from typing import List
from easygraph.utils import *
__all__ = ["get_structural_holes_MaxD"]
@not_implemented_for("multigraph")
def get_community_kernel(G, C: List[frozenset], weight="weight"):
"""
To get community kernels with most degrees.
Parameters
----------
G : graph
An undirected graph.
C : int
#communities
Returns
-------
kernels
"""
area = []
for i in range(len(G)):
area.append(0)
for i, cc in enumerate(C):
for each_node in cc:
area[each_node - 1] += 1 << i # node_id from 1 to n.
kernels = []
cnt = 0
for i in range(len(C)):
mask = 1 << i
cnt += 1
q = []
p = []
for i in range(len(G)):
if (area[i] & mask) == mask:
q.append((G.degree(weight=weight)[i + 1], i + 1))
q.sort()
q.reverse()
for i in range(
max(int(len(q) / 100), min(2, len(q)))
): # latter of min for test.
p.append(q[i][1])
kernels.append(p)
if len(kernels) < 2:
print("ERROR: WE should have at least 2 communities.")
for i in range(len(kernels)):
if len(kernels[i]) == 0:
print("Community %d is too small." % i)
return None
return kernels
[docs]
def get_structural_holes_MaxD(G, k, C: List[frozenset]):
"""Structural hole spanners detection via MaxD method.
Both **HIS** and **MaxD** are methods in [1]_.
The authors developed these two methods to find the structural holes spanners,
based on theory of information diffusion.
Parameters
----------
k : int
Top-`k` structural hole spanners
C : list of frozenset
Each frozenset denotes a community of nodes.
Returns
-------
get_structural_holes_MaxD : list
Top-`k` structural hole spanners
Examples
--------
>>> get_structural_holes_MaxD(G,
... k = 5, # To find top five structural holes spanners.
... C = [frozenset([1,2,3]), frozenset([4,5,6])] # Two communities
... )
References
----------
.. [1] https://www.aminer.cn/structural-hole
"""
_init_data()
G_index, index_of_node, node_of_index = G.to_index_node_graph(begin_index=1)
C_index = []
for cmnt in C:
cmnt_index = []
for node in cmnt:
cmnt_index.append(index_of_node[node])
C_index.append(frozenset(cmnt_index))
kernels = get_community_kernel(G_index, C_index)
c = len(kernels)
save = []
for i in range(len(G_index)):
save.append(False)
build_network(kernels, c, G_index)
n = len(G_index)
sflow = []
save = []
for i in range(n):
save.append(True)
q = []
ans_list = []
for step in range(k):
q.clear()
sflow.clear()
for i in range(n):
sflow.append(0)
max_flow(n, kernels, save)
for i in range(n * (c - 1)):
k_ = head[i]
while k_ >= 0:
if flow[k_] > 0:
sflow[i % n] += flow[k_]
k_ = nex[k_]
for i in range(n):
if save[i] == False:
q.append((-1, i))
else:
q.append((sflow[i] + G_index.degree(weight="weight")[i + 1], i))
q.sort()
q.reverse()
candidates = []
for i in range(n):
if save[q[i][1]] == True and len(candidates) < k:
candidates.append(q[i][1])
ret = pick_candidates(n, candidates, kernels, save)
ans_list.append(ret[1] + 1)
del sflow
del q
for i in range(len(ans_list)):
ans_list[i] = node_of_index[ans_list[i]]
return ans_list
def pick_candidates(n, candidates, kernels, save):
"""
detect candidates.
Parameters
----------
n : #nodes
candidates : A list of candidates.
kernels : A list of kernels
save : A bool list of visited candidates for max_flow.
Returns
-------
A tuple of min_cut, best_candidate of this round.
"""
for i in range(len(candidates)):
save[candidates[i]] = False
old_flow = max_flow(n, kernels, save)
global prev_flow
prev_flow.clear()
for i in range(nedge):
prev_flow.append(flow[i])
mcut = 100000000
best_key = -1
for i in range(len(candidates)):
key = candidates[i]
for j in range(len(candidates)):
save[candidates[j]] = True
save[key] = False
tp = max_flow(n, kernels, save, prev_flow)
if tp < mcut:
mcut = tp
best_key = key
for i in range(len(candidates)):
save[candidates[i]] = True
save[best_key] = False
return (old_flow + mcut, best_key)
head = []
point = []
nex = []
flow = []
capa = []
dist = []
work = []
dsave = []
src = 0
dest = 0
node = 0
nedge = 0
prev_flow = []
oo = 1000000000
def _init_data():
global head, point, nex, flow, capa
global dist, work, dsave
global src, dest, node, nedge, prev_flow, oo
head = []
point = []
nex = []
flow = []
capa = []
dist = []
work = []
dsave = []
src = 0
dest = 0
node = 0
nedge = 0
prev_flow = []
oo = 1000000000
def dinic_bfs():
"""
using BFS to find augmenting basic.
Returns
-------
A bool, whether found a augmenting basic or not.
"""
global dist, dest, src, node
dist.clear()
for i in range(node):
dist.append(-1)
dist[src] = 0
Q = []
Q.append(src)
cl = 0
while cl < len(Q):
k_ = Q[cl]
i = head[k_]
while i >= 0:
if flow[i] < capa[i] and dsave[point[i]] == True and dist[point[i]] < 0:
dist[point[i]] = dist[k_] + 1
Q.append(point[i])
i = nex[i]
cl += 1
return dist[dest] >= 0
def dinic_dfs(x, exp):
"""
using DFS to calc the augmenting basic and refresh network.
Parameters
----------
x : current node.
exp : current flow.
Returns
-------
current flow.
"""
if x == dest:
return exp
res = 0
i = work[x]
global flow
while i >= 0:
v = point[i]
tmp = 0
if flow[i] < capa[i] and dist[v] == dist[x] + 1:
tmp = dinic_dfs(v, min(exp, capa[i] - flow[i]))
if tmp > 0:
flow[i] += tmp
flow[i ^ 1] -= tmp
res += tmp
exp -= tmp
if exp == 0:
break
i = nex[i]
return res
def dinic_flow():
"""
Dinic algorithm to calc max_flow.
Returns
-------
max_flow.
"""
result = 0
global work
while dinic_bfs():
work.clear()
for i in range(node):
work.append(head[i])
result += dinic_dfs(src, oo)
return result
def max_flow(n, kernels, save, prev_flow=None):
"""
Calculate max_flow.
Parameters
----------
n : #nodes
kernels : A list of kernels.
save : A bool list of visited nodes.
prev_flow : A list of previous flows.
Returns
-------
max_flow
"""
global dsave, node
dsave.clear()
for i in range(node):
dsave.append(True)
if prev_flow != None:
for i in range(nedge):
flow.append(prev_flow[i])
else:
for i in range(nedge):
flow.append(0)
c = len(kernels)
for i in range(n):
for k_ in range(c - 1):
dsave[k_ * n + i] = save[i]
ret = dinic_flow()
return ret
def init_MaxD(_node, _src, _dest):
"""
Initialize a network.
Parameters
----------
_node : #nodes
_src : the source node
_dest : the destiny node
Returns
-------
void
"""
global node, src, dest
node = _node
src = _src
dest = _dest
global point, capa, flow, nex, head
head.clear()
for i in range(node):
head.append(-1)
nedge = 0
point.clear()
capa.clear()
flow.clear()
nex.clear()
return
def addedge(u, v, c1, c2):
"""
Add an edge(u,v) with capacity c1 and inverse capacity c2.
Parameters
----------
u : node u
v : node v
c1 : capacity c1
c2 : capacity c2
Returns
-------
void
"""
global nedge
global point, capa, flow, nex, head
point.append(v)
capa.append(c1)
flow.append(0)
nex.append(head[u])
head[u] = nedge
nedge += 1
point.append(u)
capa.append(c2)
flow.append(0)
nex.append(head[v])
head[v] = nedge
nedge += 1
return
def build_network(kernels, c, G):
"""
build a network.
Parameters
----------
kernels : A list of kernels.
c : #communities.
G : graph
An undirected graph.
Returns
-------
void
"""
n = len(G)
init_MaxD(n * (c - 1) + 2, n * (c - 1), n * (c - 1) + 1)
base = 0
for k_iter in range(c):
S1 = set()
S2 = set()
for i in range(c):
for j in range(len(kernels[i])):
if i == k_iter:
S1.add(kernels[i][j])
elif i < k_iter:
S2.add(kernels[i][j])
if len(S1) == 0 or len(S2) == 0:
continue
for edges in G.edges:
addedge(base + edges[0] - 1, base + edges[1] - 1, 1, 1)
addedge(base + edges[1] - 1, base + edges[0] - 1, 1, 1)
for i in S1:
if i not in S2:
addedge(src, base + i - 1, n, 0)
for i in S2:
if i not in S1:
addedge(base + i - 1, dest, n, 0)
base += n
return