Source code for easygraph.datasets.dynamic.hospital_lyon
import json
import os
from easygraph.classes.hypergraph import Hypergraph
from easygraph.datasets.dynamic.load_dataset import request_json_from_url
from easygraph.datasets.graph_dataset_base import EasyGraphDataset
from easygraph.datasets.utils import _get_eg_url
from easygraph.datasets.utils import tensor
[docs]class Hospital_Lyon(EasyGraphDataset):
_urls = {
"hospital_lyon": "easygraph-data-hospital-lyon/-/raw/main/hospital-lyon.json?ref_type=heads&inline=false",
}
def __init__(
self,
raw_dir=None,
force_reload=False,
verbose=True,
transform=None,
save_dir="./",
):
name = "hospital_lyon"
self.url = _get_eg_url(self._urls[name])
super(Hospital_Lyon, self).__init__(
name=name,
url=self.url,
raw_dir=raw_dir,
force_reload=force_reload,
verbose=verbose,
transform=transform,
save_dir=save_dir,
)
[docs] def preprocess(self, data, max_order=None, is_dynamic=True):
# The index of the nodes in this dataset are not continuous and therefore require special processing
timestamp_lst = list()
node_data = data["node-data"]
node_num = len(node_data)
G = Hypergraph(num_v=node_num)
id = 0
name_dict = {}
for k, v in data["node-data"].items():
name_dict[k] = id
v["name"] = k
G.v_property[id] = v
id = id + 1
e_property_dict = data["edge-data"]
rows = []
cols = []
edge_flag_dict = {}
edge_id = 0
for id, edge in data["edge-dict"].items():
if max_order and len(edge) > max_order + 1:
continue
try:
id = int(id)
except ValueError as e:
raise TypeError(
f"Failed to convert the edge with ID {id} to type int."
) from e
try:
edge = [name_dict[n] for n in edge]
rows.extend(edge)
cols.extend(len(edge) * [edge_id])
edge_id += 1
except ValueError as e:
raise TypeError(f"Failed to convert nodes to type int.") from e
if is_dynamic:
G.add_hyperedges(
e_list=edge,
e_property=e_property_dict[str(id)],
group_name=e_property_dict[str(id)]["timestamp"],
)
timestamp_lst.append(e_property_dict[str(id)]["timestamp"])
else:
G.add_hyperedges(e_list=edge, e_property=e_property_dict[str(id)])
G._rows = rows
G._cols = cols
return G, timestamp_lst
@property
def url(self):
return self._url
@property
def save_name(self):
return self.name
def __getitem__(self, idx):
assert idx == 0, "This dataset has only one graph"
if self._transform is None:
return self._g
else:
return self._transform(self._g)
[docs] def load(self):
graph_path = os.path.join(self.save_path, self.save_name + ".json")
with open(graph_path, "r") as f:
self.load_data = json.load(f)
[docs] def has_cache(self):
graph_path = os.path.join(self.save_path, self.save_name + ".json")
if os.path.exists(graph_path):
return True
return False
[docs] def download(self):
if self.has_cache():
self.load()
else:
root = self.raw_dir
data = request_json_from_url(self.url)
with open(os.path.join(root, self.save_name + ".json"), "w") as f:
json.dump(data, f)
self.load_data = data
[docs] def process(self):
"""Loads input data from data directory and transfer to target graph for better analysis
"""
self._g, edge_feature_list = self.preprocess(self.load_data, is_dynamic=True)
self._g.ndata["hyperedge_feature"] = tensor(
range(1, len(edge_feature_list) + 1)
)
@url.setter
def url(self, value):
self._url = value