Source code for easygraph.functions.graph_embedding.sdne

from argparse import ArgumentDefaultsHelpFormatter
from argparse import ArgumentParser

import easygraph as eg
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils import data
from torch.utils.data.dataloader import DataLoader


[docs] def parse_args(): parser = ArgumentParser( formatter_class=ArgumentDefaultsHelpFormatter, conflict_handler="resolve" ) parser.add_argument( "--output", default="node.emb", help="Output representation file" ) parser.add_argument( "--workers", default=8, type=int, help="Number of parallel processes." ) parser.add_argument( "--weighted", action="store_true", default=False, help="Treat graph as weighted" ) parser.add_argument( "--epochs", default=400, type=int, help="The training epochs of SDNE" ) parser.add_argument( "--dropout", default=0.05, type=float, help="Dropout rate (1 - keep probability)", ) parser.add_argument( "--weight-decay", type=float, default=5e-4, help="Weight for L2 loss on embedding matrix", ) parser.add_argument("--lr", default=0.006, type=float, help="learning rate") parser.add_argument( "--alpha", default=1e-2, type=float, help="alhpa is a hyperparameter in SDNE" ) parser.add_argument( "--beta", default=5.0, type=float, help="beta is a hyperparameter in SDNE" ) parser.add_argument( "--nu1", default=1e-5, type=float, help="nu1 is a hyperparameter in SDNE" ) parser.add_argument( "--nu2", default=1e-4, type=float, help="nu2 is a hyperparameter in SDNE" ) parser.add_argument("--bs", default=100, type=int, help="batch size of SDNE") parser.add_argument("--nhid0", default=1000, type=int, help="The first dim") parser.add_argument("--nhid1", default=128, type=int, help="The second dim") parser.add_argument( "--step_size", default=10, type=int, help="The step size for lr" ) parser.add_argument("--gamma", default=0.9, type=int, help="The gamma for lr") args = parser.parse_args() return args
[docs] class Dataload(data.Dataset): def __init__(self, Adj, Node): self.Adj = Adj self.Node = Node def __getitem__(self, index): return index # adj_batch = self.Adj[index] # adj_mat = adj_batch[index] # b_mat = torch.ones_like(adj_batch) # b_mat[adj_batch != 0] = self.Beta # return adj_batch, adj_mat, b_mat def __len__(self): return self.Node
[docs] def get_adj(g): edges = list(g.edges) edges = [(edges[i][0], edges[i][1]) for i in range(len(edges))] # print(edges) edges = np.array([np.array(i) for i in edges]) min_node, max_node = edges.min(), edges.max() if min_node == 0: Node = max_node + 1 else: Node = max_node Adj = np.zeros([Node, Node], dtype=int) for i in range(edges.shape[0]): g.add_edge(edges[i][0], edges[i][1]) if min_node == 0: Adj[edges[i][0], edges[i][1]] = 1 Adj[edges[i][1], edges[i][0]] = 1 else: Adj[edges[i][0] - 1, edges[i][1] - 1] = 1 Adj[edges[i][1] - 1, edges[i][0] - 1] = 1 Adj = torch.FloatTensor(Adj) return Adj, Node
[docs] class SDNE(nn.Module): """ Graph embedding via SDNE. Parameters ---------- graph : easygraph.Graph or easygraph.DiGraph node: Size of nodes nhid0, nhid1: Two dimensions of two hiddenlayers, default: 128, 64 dropout: One parameter for regularization, default: 0.025 alpha, beta: Twe parameters graph=g: : easygraph.Graph or easygraph.DiGraph Examples -------- >>> import easygraph as eg >>> model = eg.SDNE(graph=g, node_size= len(g.nodes), nhid0=128, nhid1=64, dropout=0.025, alpha=2e-2, beta=10) >>> emb = model.train(model, epochs, lr, bs, step_size, gamma, nu1, nu2, device, output) epochs, "--epochs", default=400, type=int, help="The training epochs of SDNE" alpha, "--alpha", default=2e-2, type=float, help="alhpa is a hyperparameter in SDNE" beta, "--beta", default=10.0, type=float, help="beta is a hyperparameter in SDNE" lr, "--lr", default=0.006, type=float, help="learning rate" bs, "--bs", default=100, type=int, help="batch size of SDNE" step_size, "--step_size", default=10, type=int, help="The step size for lr" gamma, # "--gamma", default=0.9, type=int, help="The gamma for lr" step_size, "--step_size", default=10, type=int, help="The step size for lr" nu1, # "--nu1", default=1e-5, type=float, help="nu1 is a hyperparameter in SDNE" nu2, "--nu2", default=1e-4, type=float, help="nu2 is a hyperparameter in SDNE" device, "-- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") " output "--output", default="node.emb", help="Output representation file" Reference ---------- .. [1] Wang, D., Cui, P., & Zhu, W. (2016, August). Structural deep network embedding. In Proceedings of the 22nd ACM SIGKDD international conference on Knowledge discovery and data mining (pp. 1225-1234). https://www.kdd.org/kdd2016/papers/files/rfp0191-wangAemb.pdf """ def __init__( self, graph, node_size, nhid0, nhid1, dropout=0.06, alpha=2e-2, beta=10.0 ): super(SDNE, self).__init__() self.encode0 = nn.Linear(node_size, nhid0) self.encode1 = nn.Linear(nhid0, nhid1) self.decode0 = nn.Linear(nhid1, nhid0) self.decode1 = nn.Linear(nhid0, node_size) self.droput = dropout self.alpha = alpha self.beta = beta self.graph = graph
[docs] def forward(self, adj_batch, adj_mat, b_mat): t0 = F.leaky_relu(self.encode0(adj_batch)) t0 = F.leaky_relu(self.encode1(t0)) embedding = t0 t0 = F.leaky_relu(self.decode0(t0)) t0 = F.leaky_relu(self.decode1(t0)) embedding_norm = torch.sum(embedding * embedding, dim=1, keepdim=True) L_1st = torch.sum( adj_mat * ( embedding_norm - 2 * torch.mm(embedding, torch.transpose(embedding, dim0=0, dim1=1)) + torch.transpose(embedding_norm, dim0=0, dim1=1) ) ) L_2nd = torch.sum(((adj_batch - t0) * b_mat) * ((adj_batch - t0) * b_mat)) return L_1st, self.alpha * L_2nd, L_1st + self.alpha * L_2nd
[docs] def train( self, model, epochs=100, lr=0.006, bs=100, step_size=10, gamma=0.9, nu1=1e-5, nu2=1e-4, device="cpu", output="out.emb", ): Adj, Node = get_adj(self.graph) model = model.to(device) opt = optim.Adam(model.parameters(), lr=lr) scheduler = torch.optim.lr_scheduler.StepLR( opt, step_size=step_size, gamma=gamma ) Data = Dataload(Adj, Node) Data = DataLoader( Data, batch_size=bs, shuffle=True, ) for epoch in range(1, epochs + 1): loss_sum, loss_L1, loss_L2, loss_reg = 0, 0, 0, 0 for index in Data: adj_batch = Adj[index] adj_mat = adj_batch[:, index] b_mat = torch.ones_like(adj_batch) b_mat[adj_batch != 0] = self.beta opt.zero_grad() L_1st, L_2nd, L_all = model(adj_batch, adj_mat, b_mat) L_reg = 0 for param in model.parameters(): L_reg += nu1 * torch.sum(torch.abs(param)) + nu2 * torch.sum( param * param ) Loss = L_all + L_reg Loss.backward() opt.step() loss_sum += Loss loss_L1 += L_1st loss_L2 += L_2nd loss_reg += L_reg scheduler.step(epoch) # print("The lr for epoch %d is %f" %(epoch, scheduler.get_lr()[0])) print("loss for epoch %d is:" % epoch) print("loss_sum is %f" % loss_sum) print("loss_L1 is %f" % loss_L1) print("loss_L2 is %f" % loss_L2) print("loss_reg is %f" % loss_reg) # model.eval() embedding = model.savector(Adj) outVec = embedding.detach().numpy() np.savetxt(output, outVec) return outVec
[docs] def savector(self, adj): t0 = self.encode0(adj) t0 = self.encode1(t0) return t0
# if __name__ == '__main__': # args = parse_args() # print(args) # dataset = eg.CiteseerGraphDataset(force_reload=True) # Download CiteseerGraphDataset contained in EasyGraph # num_classes = dataset.num_classes # g = dataset[0] # print(g) # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # adj, node = get_adj(g) # # labels = g.ndata['label'] # nhid0, nhid1, dropout, alpha = args.nhid0, args.nhid1, args.dropout, args.alpha # model = SDNE(node, nhid0, nhid1, dropout, alpha, graph=g) # print(model) # # emb = model.train(args, device)