Source code for easygraph.functions.structural_holes.SHII_metric

import math
import random

import easygraph as eg
import numpy as np


[docs] class NodeParams: def __init__(self, active, inWeight, threshold): self.active = active self.inWeight = inWeight self.threshold = threshold
[docs] def structural_hole_influence_index( G_original, S, C, model, variant=False, seedRatio=0.05, randSeedIter=10, countIterations=100, Directed=True, ): """Returns the SHII metric of each seed. Parameters ---------- G_original: easygraph.Graph or easygraph.DiGraph S: list of int A list of nodes which are structural hole spanners. C: list of list Each list includes the nodes in one community. model: string Propagation Model. Should be IC or LT. variant: bool, default is False Whether returns variant SHII ml_metrics or not. variant SHII = # of the influenced outsider / # of the influenced insiders SHII = # of the influenced outsiders / # of the total influenced nodes seedRatio: float, default is 0.05 # of sampled seeds / # of nodes of the community that the given SHS belongs to. randSeedIter: int, default is 10 How many iterations to sample seeds. countIterations: int default is 100 Number of monte carlo simulations to be used. Directed: bool, default is True Whether the graph is directed or not. Returns ------- seed_shii_pair : dict the SHII metric of each seed Examples -------- # >>> structural_hole_influence_index(G, [3, 20, 9], Com, 'LT', seedRatio=0.1, Directed=False) References ---------- .. [1] https://dl.acm.org/doi/pdf/10.1145/2939672.2939807 .. [2] https://github.com/LifangHe/KDD16_HAM/tree/master/SHII_metric """ if not Directed: G = eg.DiGraph() for edge in G_original.edges: G.add_edge(edge[0], edge[1]) G.add_edge(edge[1], edge[0]) else: G = G_original.copy() # form pair like {node_1:community_label_1,node_2:community_label_2} node_label_pair = {} for community_label in range(len(C)): for node_i in range(len(C[community_label])): node_label_pair[C[community_label][node_i]] = community_label # print(node_label_pair) seed_shii_pair = {} for community_label in range(len(C)): nodesInCommunity = [] seedSetInCommunity = [] for node in node_label_pair.keys(): if node_label_pair[node] == community_label: nodesInCommunity.append(node) if node in S: seedSetInCommunity.append(node) seedSetSize = int(math.ceil(len(nodesInCommunity) * seedRatio)) if len(seedSetInCommunity) == 0: continue for seed in seedSetInCommunity: print(">>>>>> processing seed ", seed, " now.") oneSeedSet = [] if node not in oneSeedSet: oneSeedSet.append(seed) seedNeighborSet = [] # using BFS to add neighbors of the SH spanner to the seedNeighborSet as seed candidates queue = [] queue.append(seed) while len(queue) > 0: cur_node = queue[0] count_neighbor = 0 for neighbor in G.neighbors(node=cur_node): if neighbor not in seedNeighborSet: seedNeighborSet.append(neighbor) count_neighbor = count_neighbor + 1 if count_neighbor > 0: if ( len(queue) == 1 and len(oneSeedSet) + len(seedNeighborSet) < seedSetSize ): for node in seedNeighborSet: if node not in oneSeedSet: oneSeedSet.append(node) queue.append(node) seedNeighborSet.clear() queue.pop(0) avg_censor_score_1 = 0.0 avg_censor_score_2 = 0.0 for randIter in range(randSeedIter): if randIter % 5 == 0: print("seed ", seed, ": ", randIter, " in ", randSeedIter) randSeedSet = [] for node in oneSeedSet: randSeedSet.append(node) seedNeighbors = [] for node in seedNeighborSet: seedNeighbors.append(node) while len(seedNeighbors) > 0 and len(randSeedSet) < seedSetSize: r = random.randint(0, len(seedNeighbors) - 1) if seedNeighbors[r] not in randSeedSet: randSeedSet.append(seedNeighbors[r]) seedNeighbors.pop(r) if model == "IC": censor_score_1, censor_score_2 = _independent_cascade( G, randSeedSet, community_label, countIterations, node_label_pair, ) elif model == "LT": censor_score_1, censor_score_2 = _linear_threshold( G, randSeedSet, community_label, countIterations, node_label_pair, ) avg_censor_score_1 += censor_score_1 / randSeedIter avg_censor_score_2 += censor_score_2 / randSeedIter # print("seed ", seed, " avg_censor_score in ", randIter, "is ", censor_score_1 / randSeedIter) if variant: seed_shii_pair[seed] = avg_censor_score_2 else: seed_shii_pair[seed] = avg_censor_score_1 return seed_shii_pair
def _independent_cascade(G, S, community_label, countIterations, node_label_pair): avg_result_1 = 0 avg_result_2 = 0 N = G.number_of_nodes() for b in range(countIterations): # print(b, " in ", countIterations) p_vw = np.zeros((N, N)) # 节点被激活时,激活其它节点的概率,a对b的影响等于b对a的影响 for random_i in range(N): for random_j in range(random_i + 1, N): num = random.random() p_vw[random_i][random_j] = num p_vw[random_j][random_i] = num Q = [] activeNodes = [] for v in S: Q.append(v) activeNodes.append(v) while len(Q) > 0: v = Q[0] for neighbor in G.neighbors(node=v): if neighbor not in activeNodes: toss = random.random() + 0.1 if v <= 0 or neighbor <= 0: print(v, neighbor) # if toss>0.5: # activeNodes.append(neighbor) # Q.append(neighbor) if toss >= p_vw[v - 1][neighbor - 1]: activeNodes.append(neighbor) Q.append(neighbor) Q.pop(0) self_cov = 0 total_cov = 0 uniqueActiveNodes = [] for i in activeNodes: if i not in uniqueActiveNodes: uniqueActiveNodes.append(i) for v in uniqueActiveNodes: total_cov += 1 if node_label_pair[v] == community_label: self_cov += 1 censor_score_1 = (total_cov - self_cov) / total_cov censor_score_2 = (total_cov - self_cov) / self_cov avg_result_1 += censor_score_1 / countIterations avg_result_2 += censor_score_2 / countIterations return avg_result_1, avg_result_2 def _linear_threshold(G, S, community_label, countIterations, node_label_pair): tol = 0.00001 avg_result_1 = 0 avg_result_2 = 0 for b in range(countIterations): activeNodes = [] # T is the set of nodes that are to be processed T = [] Q = {} for v in S: activeNodes.append(v) for neighbor in G.neighbors(node=v): if neighbor not in S: weight_degree = 1.0 / float(G.in_degree()[neighbor]) if neighbor not in Q.keys(): np = NodeParams(False, weight_degree, random.random()) Q[neighbor] = np T.append(neighbor) else: Q[neighbor].inWeight += weight_degree while len(T) > 0: u = T[0] if Q[u].inWeight >= Q[u].threshold + tol and not Q[u].active: activeNodes.append(u) Q[u].active = True for neighbor in G.neighbors(node=u): if neighbor in S: continue weight_degree = 1.0 / float(G.in_degree()[neighbor]) if neighbor not in Q.keys(): np = NodeParams(False, weight_degree, random.random()) Q[neighbor] = np T.append(neighbor) else: if not Q[neighbor].active: T.append(neighbor) Q[neighbor].inWeight += weight_degree if Q[neighbor].inWeight - 1 > tol: print("Error: the inweight for a node is > 1.") T.pop(0) T.clear() Q.clear() self_cov = 0 total_cov = 0 uniqueActiveNodes = [] for i in activeNodes: if i not in uniqueActiveNodes: uniqueActiveNodes.append(i) for v in uniqueActiveNodes: total_cov += 1 if node_label_pair[v] == community_label: self_cov += 1 censor_score_1 = (total_cov - self_cov) / total_cov # ==> SHII censor_score_2 = (total_cov - self_cov) / self_cov avg_result_1 += censor_score_1 / countIterations avg_result_2 += censor_score_2 / countIterations return avg_result_1, avg_result_2 if __name__ == "__main__": G = eg.datasets.get_graph_karateclub() Com = [] t1 = [1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 13, 14, 17, 18, 20, 22] Com.append(t1) t2 = [9, 10, 15, 16, 19, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34] Com.append(t2) print("community_label:", Com) result = structural_hole_influence_index( G, [3, 20, 9], Com, "IC", seedRatio=0.1, Directed=False ) print(result)