Source code for easygraph.functions.basic.localassort

import easygraph as eg
import numpy as np
import scipy.sparse as sparse


__all__ = [
    "localAssort",
]


[docs]def localAssort(
    edgelist, node_attr, pr=np.arange(0.0, 1.0, 0.1), undir=True, missingValue=-1
):
    """Calculate the multiscale assortativity.
    You must ensure that the node index and node attribute index start from 0
    Parameters
    ----------
    edgelist : array_like
        the network represented as an edge list,
        i.e., a E x 2 array of node pairs
    node_attr : array_like
        n length array of node attribute values
    pr : array, optional
        array of one minus restart probabilities for the random walk in
        calculating the personalised pagerank. The largest of these values
        determines the accuracy of the TotalRank vector max(pr) -> 1 is more
        accurate (default: [0, .1, .2, .3, .4, .5, .6, .7, .8, .9])
    undir : bool, optional
        indicate if network is undirected (default: True)
    missingValue : int, optional
        token to indicate missing attribute values (default: -1)
    Returns
    -------
    assortM : array_like
        n x len(pr) array of local assortativities, each column corresponds to
        a value of the input restart probabilities, pr. Note if only number of
        restart probabilties is greater than one (i.e., len(pr) > 1).
    assortT : array_like
        n length array of multiscale assortativities
    Z : array_like
        N length array of per-node confidence scores
    References
    ----------
    For full details see [1]_
    .. [1] Peel, L., Delvenne, J. C., & Lambiotte, R. (2018). "Multiscale
        mixing patterns in networks.' PNAS, 115(16), 4057-4062.
    """
    # number of nodes
    n = len(node_attr)

    # number of nodes with complete attribute
    ncomp = (node_attr != missingValue).sum()
    # number of edges
    m = len(edgelist)
    # construct adjacency matrix and calculate degree sequence
    A, degree = createA(edgelist, n, undir)

    # construct diagonal inverse degree matrix
    D = sparse.diags(1.0 / degree, 0, format="csc")

    # construct transition matrix (row normalised adjacency matrix)
    W = D @ A

    # number of distinct node categories
    c = len(np.unique(node_attr))
    if ncomp < n:
        c -= 1

    # calculate node weights for how "complete" the
    # metadata is around the node
    Z = np.zeros(n)

    Z[node_attr == missingValue] = 1.0

    Z = (W @ Z) / degree

    # indicator array if node has attribute data (or missing)
    hasAttribute = node_attr != missingValue

    # calculate global expected values
    values = np.ones(ncomp)

    yi = (hasAttribute).nonzero()[0]

    yj = node_attr[hasAttribute]
    Y = sparse.coo_matrix((values, (yi, yj)), shape=(n, c)).tocsc()
    eij_glob = np.array(Y.T @ (A @ Y).todense())

    eij_glob /= np.sum(eij_glob)

    ab_glob = np.sum(eij_glob.sum(1) * eij_glob.sum(0))
    # initialise outputs
    assortM = np.empty((n, len(pr)))
    assortT = np.empty(n)
    WY = (W @ Y).tocsc()

    for i in range(n):
        pis, ti, it = calculateRWRrange(W, i, pr, n)
        if len(pr) > 1:
            for ii, pri in enumerate(pr):
                pi = pis[:, ii]

                YPI = sparse.coo_matrix(
                    (
                        pi[hasAttribute],
                        (node_attr[hasAttribute], np.arange(n)[hasAttribute]),
                    ),
                    shape=(c, n),
                ).tocsr()
                trace_e = (YPI.dot(WY).toarray()).trace()
                assortM[i, ii] = trace_e
        YPI = sparse.coo_matrix(
            (ti[hasAttribute], (node_attr[hasAttribute], np.arange(n)[hasAttribute])),
            shape=(c, n),
        ).tocsr()
        e_gh = (YPI @ WY).toarray()
        e_gh_sum = e_gh.sum()
        Z[i] = e_gh_sum
        e_gh /= e_gh_sum
        trace_e = e_gh.trace()
        assortT[i] = trace_e

    assortT -= ab_glob
    np.divide(assortT, 1.0 - ab_glob, out=assortT, where=ab_glob != 0)

    if len(pr) > 1:
        assortM -= ab_glob
        np.divide(assortM, 1.0 - ab_glob, out=assortM, where=ab_glob != 0)
        return assortM, assortT, Z
    return None, assortT, Z


def createA(E, n, undir=True):
    """Create adjacency matrix and degree sequence."""
    if undir:
        G = eg.Graph()
    else:
        G = eg.DiGraph()
    G.add_nodes_from(range(n))

    for e in E:
        G.add_edge(e[0], e[1])

    A = eg.to_scipy_sparse_matrix(G)

    degree = np.array(A.sum(1)).flatten()

    return A, degree


def calculateRWRrange(W, i, alphas, n, maxIter=1000):
    """
    Calculate the personalised TotalRank and personalised PageRank vectors.
    Parameters
    ----------
    W : array_like
        transition matrix (row normalised adjacency matrix)
    i : int
        index of the personalisation node
    alphas : array_like
        array of (1 - restart probabilties)
    n : int
        number of nodes in the network
    maxIter : int, optional
        maximum number of interations (default: 1000)
    Returns
    -------
    pPageRank_all : array_like
        personalised PageRank for all input alpha values (only calculated if
        more than one alpha given as input, i.e., len(alphas) > 1)
    pTotalRank : array_like
        personalised TotalRank (personalised PageRank with alpha integrated
        out)

    it : int
        number of iterations
    References
    ----------
    See [2]_ and [3]_ for further details.
    .. [2] Boldi, P. (2005). "TotalRank: Ranking without damping." In Special
        interest tracks and posters of the 14th international conference on
        World Wide Web (pp. 898-899).
    .. [3] Boldi, P., Santini, M., & Vigna, S. (2007). "A deeper investigation
        of PageRank as a function of the damping factor." In Dagstuhl Seminar
        Proceedings. Schloss Dagstuhl-Leibniz-Zentrum für Informatik.
    """
    alpha0 = alphas.max()
    WT = alpha0 * W.T
    diff = 1
    it = 1

    # initialise PageRank vectors
    pPageRank = np.zeros(n)

    pPageRank_all = np.zeros((n, len(alphas)))
    pPageRank[i] = 1

    pPageRank_all[i, :] = 1

    pPageRank_old = pPageRank.copy()
    pTotalRank = pPageRank.copy()

    oneminusalpha0 = 1 - alpha0

    while diff > 1e-9:
        # calculate personalised PageRank via power iteration
        pPageRank = WT @ pPageRank
        pPageRank[i] += oneminusalpha0
        # calculate difference in pPageRank from previous iteration
        delta_pPageRank = pPageRank - pPageRank_old
        # Eq. [S23] Ref. [1]
        pTotalRank += (delta_pPageRank) / ((it + 1) * (alpha0**it))
        # only calculate personalised pageranks if more than one alpha
        if len(alphas) > 1:
            pPageRank_all += np.outer((delta_pPageRank), (alphas / alpha0) ** it)

        # calculate convergence criteria
        diff = np.sum((delta_pPageRank) ** 2) / n
        it += 1

        if it > maxIter:
            print(i, "max iterations exceeded")
            diff = 0
        pPageRank_old = pPageRank.copy()

    return pPageRank_all, pTotalRank, it