Source code for cogdl.models.emb.grarep

import numpy as np
import networkx as nx
import scipy.sparse as sp
from sklearn import preprocessing
from .. import BaseModel


[docs]class GraRep(BaseModel): r"""The GraRep model from the `"Grarep: Learning graph representations with global structural information" <http://dl.acm.org/citation.cfm?doid=2806416.2806512>`_ paper. Args: hidden_size (int) : The dimension of node representation. step (int) : The maximum order of transitition probability. """
[docs] @staticmethod def add_args(parser): """Add model-specific arguments to the parser.""" # fmt: off parser.add_argument("--step", type=int, default=5, help="Number of matrix step in GraRep. Default is 5.") parser.add_argument("--hidden-size", type=int, default=128)
# fmt: on
[docs] @classmethod def build_model_from_args(cls, args): return cls(args.hidden_size, args.step)
def __init__(self, dimension, step): super(GraRep, self).__init__() self.dimension = dimension self.step = step
[docs] def forward(self, graph, return_dict=False): self.G = graph.to_networkx() self.num_node = self.G.number_of_nodes() A = np.asarray(nx.adjacency_matrix(self.G).todense(), dtype=float) A = preprocessing.normalize(A, "l1") log_beta = np.log(1.0 / self.num_node) A_list = [A] T_list = [sum(A).tolist()] temp = A # calculate A^1, A^2, ... , A^step, respectively for i in range(self.step - 1): temp = temp.dot(A) A_list.append(A) T_list.append(sum(temp).tolist()) final_emb = np.zeros((self.num_node, 1)) for k in range(self.step): for j in range(A.shape[1]): A_list[k][:, j] = np.log(A_list[k][:, j] / T_list[k][j] + 1e-20) - log_beta for i in range(A.shape[0]): A_list[k][i, j] = max(A_list[k][i, j], 0) # concatenate all k-step representations if k == 0: dimension = self.dimension - int(self.dimension / self.step) * (self.step - 1) final_emb = self._get_embedding(A_list[k], dimension) else: W = self._get_embedding(A_list[k], self.dimension / self.step) final_emb = np.hstack((final_emb, W)) embeddings = final_emb if return_dict: features_matrix = dict() for vid, node in enumerate(self.G.nodes()): features_matrix[node] = embeddings[vid] else: features_matrix = np.zeros((graph.num_nodes, embeddings.shape[1])) nx_nodes = self.G.nodes() features_matrix[nx_nodes] = embeddings[np.arange(graph.num_nodes)] return features_matrix
def _get_embedding(self, matrix, dimension): # get embedding from svd and process normalization for ut ut, s, _ = sp.linalg.svds(matrix, int(dimension)) emb_matrix = ut * np.sqrt(s) emb_matrix = preprocessing.normalize(emb_matrix, "l2") return emb_matrix