Source code for easygraph.datasets.roadnet

"""RoadNet-CA Dataset

This dataset represents the road network of California.
Nodes correspond to intersections, and edges represent roads connecting them.

The data is undirected and unweighted. No features or labels are provided.

Statistics:
- Nodes: 1,965,206
- Edges: 2,766,607
- Features: None
- Labels: None

Reference:
J. Leskovec and A. Krevl, “SNAP Datasets: Stanford Large Network Dataset Collection,”
https://snap.stanford.edu/data/roadNet-CA.html
"""

import gzip
import os
import shutil

import easygraph as eg

from easygraph.classes.graph import Graph

from .graph_dataset_base import EasyGraphBuiltinDataset
from .utils import download


[docs] class RoadNetCADataset(EasyGraphBuiltinDataset): r"""Road network of California (RoadNet-CA) Nodes are road intersections and edges are roads connecting them. Parameters ---------- raw_dir : str, optional Directory to store the raw downloaded files. Default: None force_reload : bool, optional Whether to re-download and process the dataset. Default: False verbose : bool, optional Whether to print detailed processing logs. Default: True transform : callable, optional Optional transform to apply on the graph. Examples -------- >>> from easygraph.datasets import RoadNetCADataset >>> dataset = RoadNetCADataset() >>> g = dataset[0] >>> print("Nodes:", g.number_of_nodes()) >>> print("Edges:", g.number_of_edges()) """ def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None): name = "roadNet-CA" url = "https://snap.stanford.edu/data/roadNet-CA.txt.gz" super(RoadNetCADataset, self).__init__( name=name, url=url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, )
[docs] def download(self): r"""Download and decompress the .txt.gz file.""" compressed_path = os.path.join(self.raw_dir, self.name + ".txt.gz") extracted_path = os.path.join(self.raw_path, self.name + ".txt") download(self.url, path=compressed_path) if not os.path.exists(self.raw_path): os.makedirs(self.raw_path) with gzip.open(compressed_path, "rb") as f_in: with open(extracted_path, "wb") as f_out: shutil.copyfileobj(f_in, f_out)
[docs] def process(self): graph = eg.Graph() # Undirected road network edge_list_path = os.path.join(self.raw_path, self.name + ".txt") with open(edge_list_path, "r") as f: for line in f: if line.startswith("#") or line.strip() == "": continue u, v = map(int, line.strip().split()) graph.add_edge(u, v) self._g = graph self._num_nodes = graph.number_of_nodes() self._num_edges = graph.number_of_edges() if self.verbose: print("Finished loading RoadNet-CA dataset.") print(f" NumNodes: {self._num_nodes}") print(f" NumEdges: {self._num_edges}")
def __getitem__(self, idx): assert idx == 0, "RoadNetCADataset only contains one graph" return self._g if self._transform is None else self._transform(self._g) def __len__(self): return 1