Source code for cogdl.models.emb.dngr

import time

import networkx as nx
import numpy as np
import scipy.sparse as sp
from sklearn import preprocessing
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm

from .. import BaseModel, register_model


[docs]class DNGR_layer(nn.Module): def __init__(self, num_node, hidden_size1, hidden_size2): super(DNGR_layer, self).__init__() self.num_node = num_node self.hidden_size1 = hidden_size1 self.hidden_size2 = hidden_size2 self.encoder = nn.Sequential( nn.Linear(self.num_node, self.hidden_size1), nn.Tanh(), nn.Linear(self.hidden_size1, self.hidden_size2), nn.Tanh(), ) self.decoder = nn.Sequential( nn.Linear(self.hidden_size2, self.hidden_size1), nn.Tanh(), nn.Linear(self.hidden_size1, self.num_node), nn.Tanh(), )
[docs] def forward(self, x): encoded = self.encoder(x) decoded = self.decoder(encoded) return encoded, decoded
[docs]@register_model("dngr") class DNGR(BaseModel): r"""The DNGR model from the `"Deep Neural Networks for Learning Graph Representations" <https://www.aaai.org/ocs/index.php/AAAI/AAAI16/paper/download/12423/11715>`_ paper Args: hidden_size1 (int) : The size of the first hidden layer. hidden_size2 (int) : The size of the second hidden layer. noise (float) : Denoise rate of DAE. alpha (float) : Parameter in DNGR. step (int) : The max step in random surfing. max_epoch (int) : The max epoches in training step. lr (float) : Learning rate in DNGR. """ @staticmethod
[docs] def add_args(parser): """Add model-specific arguments to the parser.""" # fmt: off parser.add_argument("--hidden-size1", type=int, default=1000, help="Hidden size in first layer of Auto-Encoder") parser.add_argument("--hidden-size2", type=int, default=128, help="Hidden size in second layer of Auto-Encoder") parser.add_argument("--noise", type=float, default=0.2, help="denoise rate of DAE") parser.add_argument("--alpha", type=float, default=0.98, help="alhpa is a hyperparameter in DNGR") parser.add_argument("--step", type=int, default=10, help="step is a hyperparameter in DNGR")
# fmt: on @classmethod
[docs] def build_model_from_args(cls, args): return cls(args.hidden_size1, args.hidden_size2, args.noise, args.alpha, args.step, args.max_epoch, args.lr, args.cpu)
def __init__(self, hidden_size1, hidden_size2, noise, alpha, step, max_epoch, lr, cpu): super(DNGR, self).__init__() self.hidden_size1 = hidden_size1 self.hidden_size2 = hidden_size2 self.noise = noise self.alpha = alpha self.step = step self.max_epoch = max_epoch self.lr = lr self.cpu = cpu self.device = torch.device('cpu' if self.cpu else 'cuda')
[docs] def scale_matrix(self, mat): mat = mat - np.diag(np.diag(mat)) D_inv = np.diagflat(np.reciprocal(np.sum(mat, axis=0))) mat = np.dot(D_inv, mat) return mat
[docs] def random_surfing(self, adj_matrix): # Random Surfing adj_matrix = self.scale_matrix(adj_matrix) P0 = np.eye(self.num_node, dtype='float32') M = np.zeros((self.num_node, self.num_node), dtype='float32') P = np.eye(self.num_node, dtype='float32') for i in range(0, self.step): P = self.alpha * np.dot(P, adj_matrix) + (1 - self.alpha) * P0 M = M + P return M
[docs] def get_ppmi_matrix(self, mat): # Get Positive Pairwise Mutual Information(PPMI) matrix mat = self.random_surfing(mat) M = self.scale_matrix(mat) col_s = np.sum(M, axis=0).reshape(1, self.num_node) row_s = np.sum(M, axis=1).reshape(self.num_node, 1) D = np.sum(col_s) rowcol_s = np.dot(row_s, col_s) PPMI = np.log(np.divide(D * M, rowcol_s)) PPMI[np.isnan(PPMI)] = 0.0 PPMI[np.isinf(PPMI)] = 0.0 PPMI[np.isneginf(PPMI)] = 0.0 PPMI[PPMI < 0] = 0.0 return PPMI
[docs] def get_denoised_matrix(self, mat): return mat * (np.random.random(mat.shape) > self.noise)
[docs] def get_emb(self, matrix): ut, s, _ = sp.linalg.svds(matrix, self.hidden_size2) emb_matrix = ut * np.sqrt(s) emb_matrix = preprocessing.normalize(emb_matrix, "l2") return emb_matrix
[docs] def train(self, G): self.num_node = G.number_of_nodes() A = nx.adjacency_matrix(G).todense() PPMI = self.get_ppmi_matrix(A) print("PPMI matrix compute done") # return self.get_emb(PPMI) input_mat = torch.from_numpy(self.get_denoised_matrix(PPMI).astype(np.float32)) model = DNGR_layer(self.num_node, self.hidden_size1, self.hidden_size2) input_mat = input_mat.to(self.device) model = model.to(self.device) opt = torch.optim.Adam(model.parameters(), lr=self.lr) loss_func = nn.MSELoss() epoch_iter = tqdm(range(self.max_epoch)) for epoch in epoch_iter: opt.zero_grad() encoded, decoded = model.forward(input_mat) Loss = loss_func(decoded, input_mat) Loss.backward() epoch_iter.set_description( f"Epoch: {epoch:03d}, Loss: {Loss:.8f}" ) opt.step() embedding, _ = model.forward(input_mat) return embedding.detach().cpu().numpy()