import numpy as np
import networkx as nx
import scipy.sparse as sp
from sklearn import preprocessing
from .. import BaseModel, register_model
[docs]@register_model("grarep")
class GraRep(BaseModel):
r"""The GraRep model from the `"Grarep: Learning graph representations with global structural information"
<http://dl.acm.org/citation.cfm?doid=2806416.2806512>`_ paper.
Args:
hidden_size (int) : The dimension of node representation.
step (int) : The maximum order of transitition probability.
"""
@staticmethod
[docs] def add_args(parser):
"""Add model-specific arguments to the parser."""
# fmt: off
parser.add_argument('--step', type=int, default=5,
help='Number of matrix step in GraRep. Default is 5.')
# fmt: on
@classmethod
[docs] def build_model_from_args(cls, args):
return cls(args.hidden_size, args.step)
def __init__(self, dimension, step):
super(GraRep, self).__init__()
self.dimension = dimension
self.step = step
[docs] def train(self, G):
self.G = G
self.num_node = G.number_of_nodes()
A = np.asarray(nx.adjacency_matrix(self.G).todense(), dtype=float)
A = preprocessing.normalize(A, "l1")
log_beta = np.log(1.0 / self.num_node)
A_list = [A]
T_list = [sum(A).tolist()]
temp = A
# calculate A^1, A^2, ... , A^step, respectively
for i in range(self.step - 1):
temp = temp.dot(A)
A_list.append(A)
T_list.append(sum(temp).tolist())
final_emb = np.zeros((self.num_node, 1))
for k in range(self.step):
for j in range(A.shape[1]):
A_list[k][:, j] = (
np.log(A_list[k][:, j] / T_list[k][j] + 1e-20) - log_beta
)
for i in range(A.shape[0]):
A_list[k][i, j] = max(A_list[k][i, j], 0)
# concatenate all k-step representations
if k == 0:
dimension = self.dimension - int(self.dimension / self.step) * (
self.step - 1
)
final_emb = self._get_embedding(A_list[k], dimension)
else:
W = self._get_embedding(A_list[k], self.dimension / self.step)
final_emb = np.hstack((final_emb, W))
self.embeddings = final_emb
return self.embeddings
[docs] def _get_embedding(self, matrix, dimension):
# get embedding from svd and process normalization for ut
ut, s, _ = sp.linalg.svds(matrix, int(dimension))
emb_matrix = ut * np.sqrt(s)
emb_matrix = preprocessing.normalize(emb_matrix, "l2")
return emb_matrix