Source code for easygraph.datasets.get_sample_graph

import easygraph as eg


# import progressbar


__all__ = [
    "get_graph_karateclub",
    "get_graph_blogcatalog",
    "get_graph_youtube",
    "get_graph_flickr",
]


[docs]def get_graph_karateclub(): """Returns the undirected graph of Karate Club. Returns ------- get_graph_karateclub : easygraph.Graph The undirected graph instance of karate club from dataset: http://vlado.fmf.uni-lj.si/pub/networks/data/Ucinet/UciData.htm References ---------- .. [1] http://vlado.fmf.uni-lj.si/pub/networks/data/Ucinet/UciData.htm """ all_members = set(range(34)) club1 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 16, 17, 19, 21} # club2 = all_members - club1 G = eg.Graph(name="Zachary's Karate Club") for node in all_members: G.add_node(node + 1) zacharydat = """\ 0 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 1 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 1 0 0 0 1 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 1 1 1 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 1 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 1 0 1 0 1 1 0 0 0 0 0 1 1 1 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 1 0 0 1 1 1 0 1 1 0 0 1 1 1 1 1 1 1 0""" for row, line in enumerate(zacharydat.split("\n")): thisrow = [int(b) for b in line.split()] for col, entry in enumerate(thisrow): if entry == 1: G.add_edge(row + 1, col + 1) # Add the name of each member's club as a node attribute. for v in G: G.nodes[v]["club"] = "Mr. Hi" if v in club1 else "Officer" return G
[docs]def get_graph_blogcatalog(): """Returns the undirected graph of blogcatalog. Returns ------- get_graph_blogcatalog : easygraph.Graph The undirected graph instance of blogcatalog from dataset: https://github.com/phanein/deepwalk/blob/master/example_graphs/blogcatalog.mat References ---------- .. [1] https://github.com/phanein/deepwalk/blob/master/example_graphs/blogcatalog.mat """ from scipy.io import loadmat def sparse2graph(x): from collections import defaultdict G = defaultdict(lambda: set()) cx = x.tocoo() for i, j, v in zip(cx.row, cx.col, cx.data): G[i].add(j) return {str(k): [str(x) for x in v] for k, v in G.items()} mat = loadmat("./samples/blogcatalog.mat") A = mat["network"] data = sparse2graph(A) G = eg.Graph() for u in data: for v in data[u]: G.add_edge(u, v) return G
[docs]def get_graph_youtube(): """Returns the undirected graph of Youtube dataset. Returns ------- get_graph_youtube : easygraph.Graph The undirected graph instance of Youtube from dataset: http://socialnetworks.mpi-sws.mpg.de/data/youtube-links.txt.gz References ---------- .. [1] http://socialnetworks.mpi-sws.mpg.de/data/youtube-links.txt.gz """ import gzip from urllib import request url = "http://socialnetworks.mpi-sws.mpg.de/data/youtube-links.txt.gz" zipped_data_path = "./samples/youtube-links.txt.gz" unzipped_data_path = "./samples/youtube-links.txt" # Download .gz file print("Downloading Youtube dataset...") request.urlretrieve(url, zipped_data_path, _show_progress) # Unzip unzipped_data = gzip.GzipFile(zipped_data_path) open(unzipped_data_path, "wb+").write(unzipped_data.read()) unzipped_data.close() # Returns graph G = eg.Graph() G.add_edges_from_file(file=unzipped_data_path) return G
[docs]def get_graph_flickr(): """Returns the undirected graph of Flickr dataset. Returns ------- get_graph_flickr : easygraph.Graph The undirected graph instance of Flickr from dataset: http://socialnetworks.mpi-sws.mpg.de/data/flickr-links.txt.gz References ---------- .. [1] http://socialnetworks.mpi-sws.mpg.de/data/flickr-links.txt.gz """ import gzip from urllib import request url = "http://socialnetworks.mpi-sws.mpg.de/data/flickr-links.txt.gz" zipped_data_path = "./samples/flickr-links.txt.gz" unzipped_data_path = "./samples/flickr-links.txt" # Download .gz file print("Downloading Flickr dataset...") request.urlretrieve(url, zipped_data_path, _show_progress) # Unzip unzipped_data = gzip.GzipFile(zipped_data_path) open(unzipped_data_path, "wb+").write(unzipped_data.read()) unzipped_data.close() # Returns graph G = eg.Graph() G.add_edges_from_file(file=unzipped_data_path) return G
_pbar = None def _show_progress(block_num, block_size, total_size): global _pbar if _pbar is None: _pbar = progressbar.ProgressBar(maxval=total_size) _pbar.start() downloaded = block_num * block_size if downloaded < total_size: _pbar.update(downloaded) else: _pbar.finish() _pbar = None