import easygraph as eg
import numpy as np
import scipy.sparse as sparse
__all__ = [
"localAssort",
]
[docs]def localAssort(
edgelist, node_attr, pr=np.arange(0.0, 1.0, 0.1), undir=True, missingValue=-1
):
"""Calculate the multiscale assortativity.
You must ensure that the node index and node attribute index start from 0
Parameters
----------
edgelist : array_like
the network represented as an edge list,
i.e., a E x 2 array of node pairs
node_attr : array_like
n length array of node attribute values
pr : array, optional
array of one minus restart probabilities for the random walk in
calculating the personalised pagerank. The largest of these values
determines the accuracy of the TotalRank vector max(pr) -> 1 is more
accurate (default: [0, .1, .2, .3, .4, .5, .6, .7, .8, .9])
undir : bool, optional
indicate if network is undirected (default: True)
missingValue : int, optional
token to indicate missing attribute values (default: -1)
Returns
-------
assortM : array_like
n x len(pr) array of local assortativities, each column corresponds to
a value of the input restart probabilities, pr. Note if only number of
restart probabilties is greater than one (i.e., len(pr) > 1).
assortT : array_like
n length array of multiscale assortativities
Z : array_like
N length array of per-node confidence scores
References
----------
For full details see [1]_
.. [1] Peel, L., Delvenne, J. C., & Lambiotte, R. (2018). "Multiscale
mixing patterns in networks.' PNAS, 115(16), 4057-4062.
"""
# number of nodes
n = len(node_attr)
# number of nodes with complete attribute
ncomp = (node_attr != missingValue).sum()
# number of edges
m = len(edgelist)
# construct adjacency matrix and calculate degree sequence
A, degree = createA(edgelist, n, undir)
# construct diagonal inverse degree matrix
D = sparse.diags(1.0 / degree, 0, format="csc")
# construct transition matrix (row normalised adjacency matrix)
W = D @ A
# number of distinct node categories
c = len(np.unique(node_attr))
if ncomp < n:
c -= 1
# calculate node weights for how "complete" the
# metadata is around the node
Z = np.zeros(n)
Z[node_attr == missingValue] = 1.0
Z = (W @ Z) / degree
# indicator array if node has attribute data (or missing)
hasAttribute = node_attr != missingValue
# calculate global expected values
values = np.ones(ncomp)
yi = (hasAttribute).nonzero()[0]
yj = node_attr[hasAttribute]
Y = sparse.coo_matrix((values, (yi, yj)), shape=(n, c)).tocsc()
eij_glob = np.array(Y.T @ (A @ Y).todense())
eij_glob /= np.sum(eij_glob)
ab_glob = np.sum(eij_glob.sum(1) * eij_glob.sum(0))
# initialise outputs
assortM = np.empty((n, len(pr)))
assortT = np.empty(n)
WY = (W @ Y).tocsc()
for i in range(n):
pis, ti, it = calculateRWRrange(W, i, pr, n)
if len(pr) > 1:
for ii, pri in enumerate(pr):
pi = pis[:, ii]
YPI = sparse.coo_matrix(
(
pi[hasAttribute],
(node_attr[hasAttribute], np.arange(n)[hasAttribute]),
),
shape=(c, n),
).tocsr()
trace_e = (YPI.dot(WY).toarray()).trace()
assortM[i, ii] = trace_e
YPI = sparse.coo_matrix(
(ti[hasAttribute], (node_attr[hasAttribute], np.arange(n)[hasAttribute])),
shape=(c, n),
).tocsr()
e_gh = (YPI @ WY).toarray()
e_gh_sum = e_gh.sum()
Z[i] = e_gh_sum
e_gh /= e_gh_sum
trace_e = e_gh.trace()
assortT[i] = trace_e
assortT -= ab_glob
np.divide(assortT, 1.0 - ab_glob, out=assortT, where=ab_glob != 0)
if len(pr) > 1:
assortM -= ab_glob
np.divide(assortM, 1.0 - ab_glob, out=assortM, where=ab_glob != 0)
return assortM, assortT, Z
return None, assortT, Z
def createA(E, n, undir=True):
"""Create adjacency matrix and degree sequence."""
if undir:
G = eg.Graph()
else:
G = eg.DiGraph()
G.add_nodes_from(range(n))
for e in E:
G.add_edge(e[0], e[1])
A = eg.to_scipy_sparse_matrix(G)
degree = np.array(A.sum(1)).flatten()
return A, degree
def calculateRWRrange(W, i, alphas, n, maxIter=1000):
"""
Calculate the personalised TotalRank and personalised PageRank vectors.
Parameters
----------
W : array_like
transition matrix (row normalised adjacency matrix)
i : int
index of the personalisation node
alphas : array_like
array of (1 - restart probabilties)
n : int
number of nodes in the network
maxIter : int, optional
maximum number of interations (default: 1000)
Returns
-------
pPageRank_all : array_like
personalised PageRank for all input alpha values (only calculated if
more than one alpha given as input, i.e., len(alphas) > 1)
pTotalRank : array_like
personalised TotalRank (personalised PageRank with alpha integrated
out)
it : int
number of iterations
References
----------
See [2]_ and [3]_ for further details.
.. [2] Boldi, P. (2005). "TotalRank: Ranking without damping." In Special
interest tracks and posters of the 14th international conference on
World Wide Web (pp. 898-899).
.. [3] Boldi, P., Santini, M., & Vigna, S. (2007). "A deeper investigation
of PageRank as a function of the damping factor." In Dagstuhl Seminar
Proceedings. Schloss Dagstuhl-Leibniz-Zentrum für Informatik.
"""
alpha0 = alphas.max()
WT = alpha0 * W.T
diff = 1
it = 1
# initialise PageRank vectors
pPageRank = np.zeros(n)
pPageRank_all = np.zeros((n, len(alphas)))
pPageRank[i] = 1
pPageRank_all[i, :] = 1
pPageRank_old = pPageRank.copy()
pTotalRank = pPageRank.copy()
oneminusalpha0 = 1 - alpha0
while diff > 1e-9:
# calculate personalised PageRank via power iteration
pPageRank = WT @ pPageRank
pPageRank[i] += oneminusalpha0
# calculate difference in pPageRank from previous iteration
delta_pPageRank = pPageRank - pPageRank_old
# Eq. [S23] Ref. [1]
pTotalRank += (delta_pPageRank) / ((it + 1) * (alpha0**it))
# only calculate personalised pageranks if more than one alpha
if len(alphas) > 1:
pPageRank_all += np.outer((delta_pPageRank), (alphas / alpha0) ** it)
# calculate convergence criteria
diff = np.sum((delta_pPageRank) ** 2) / n
it += 1
if it > maxIter:
print(i, "max iterations exceeded")
diff = 0
pPageRank_old = pPageRank.copy()
return pPageRank_all, pTotalRank, it