Source code for easygraph.datasets.twitter_ego
import gzip
import os
import easygraph as eg
from easygraph.datasets.graph_dataset_base import EasyGraphBuiltinDataset
from easygraph.datasets.utils import download
from easygraph.datasets.utils import extract_archive
[docs]
class TwitterEgoDataset(EasyGraphBuiltinDataset):
r"""
Twitter Ego Network Dataset
The Twitter dataset was collected from public sources and contains a large ego-network of Twitter users.
The combined network includes 81K edges among 81K users.
Source: J. McAuley and J. Leskovec, Stanford SNAP, 2012
URL: https://snap.stanford.edu/data/egonets-Twitter.html
File used: https://snap.stanford.edu/data/twitter_combined.txt.gz
"""
def __init__(self):
super(TwitterEgoDataset, self).__init__(
name="twitter_ego",
url="https://snap.stanford.edu/data/twitter_combined.txt.gz",
force_reload=False,
)
[docs]
def download(self):
gz_path = os.path.join(self.raw_path, "twitter_combined.txt.gz")
download(self.url, path=gz_path)
extract_archive(gz_path, self.raw_path)
[docs]
def process(self):
import gzip
import easygraph as eg
gz_path = os.path.join(self.raw_path, "twitter_combined.txt.gz")
txt_path = os.path.join(self.raw_path, "twitter_combined.txt")
if not os.path.exists(txt_path):
with gzip.open(gz_path, "rt") as f_in, open(txt_path, "w") as f_out:
f_out.writelines(f_in)
G = eg.Graph()
edge_count = 0
with open(txt_path, "r") as f:
for line in f:
u, v = map(int, line.strip().split())
G.add_edge(u, v)
edge_count += 1
self._graphs = [G]
self._graph = G
self._processed = True
def __getitem__(self, idx):
if self._graph is not None:
return self._graph
elif self._graphs:
return self._graphs[idx]
else:
return None