Source code for easygraph.datasets.hypergraph.coauthorship

from functools import partial
from typing import Optional

from easygraph.datapipe import load_from_pickle
from easygraph.datapipe import norm_ft
from easygraph.datapipe import to_bool_tensor
from easygraph.datapipe import to_long_tensor
from easygraph.datapipe import to_tensor
from easygraph.datasets.hypergraph.hypergraph_dataset_base import BaseData


[docs]class CoauthorshipCora(BaseData): r"""The Co-authorship Cora dataset is a citation network dataset for vertex classification task. More details see the `HyperGCN <https://papers.nips.cc/paper/2019/file/1efa39bcaec6f3900149160693694536-Paper.pdf>`_ paper. The content of the Co-authorship Cora dataset includes the following: - ``num_classes``: The number of classes: :math:`7`. - ``num_vertices``: The number of vertices: :math:`2,708`. - ``num_edges``: The number of edges: :math:`1,072`. - ``dim_features``: The dimension of features: :math:`1,433`. - ``features``: The vertex feature matrix. ``torch.Tensor`` with size :math:`(2,708 \times 1,433)`. - ``edge_list``: The edge list. ``List`` with length :math:`1,072`. - ``labels``: The label list. ``torch.LongTensor`` with size :math:`(2,708, )`. - ``train_mask``: The train mask. ``torch.BoolTensor`` with size :math:`(2,708, )`. - ``val_mask``: The validation mask. ``torch.BoolTensor`` with size :math:`(2,708, )`. - ``test_mask``: The test mask. ``torch.BoolTensor`` with size :math:`(2,708, )`. Args: ``data_root`` (``str``, optional): The ``data_root`` has stored the data. If set to ``None``, this function will auto-download from server and save into the default direction ``~/.dhg/datasets/``. Defaults to ``None``. """ def __init__(self, data_root: Optional[str] = None) -> None: super().__init__("coauthorship_cora", data_root) self._content = { "num_classes": 7, "num_vertices": 2708, "num_edges": 1072, "dim_features": 1433, "features": { "upon": [ { "filename": "features.pkl", "md5": "14257c0e24b4eb741b469a351e524785", } ], "loader": load_from_pickle, "preprocess": [to_tensor, partial(norm_ft, ord=1)], }, "edge_list": { "upon": [ { "filename": "edge_list.pkl", "md5": "a17ff337f1b9099f5a9d4d670674e146", } ], "loader": load_from_pickle, }, "labels": { "upon": [ { "filename": "labels.pkl", "md5": "c8d11c452e0be69f79a47dd839279117", } ], "loader": load_from_pickle, "preprocess": [to_long_tensor], }, "train_mask": { "upon": [ { "filename": "train_mask.pkl", "md5": "111db6c6f986be2908378df7bdca7a9b", } ], "loader": load_from_pickle, "preprocess": [to_bool_tensor], }, "val_mask": { "upon": [ { "filename": "val_mask.pkl", "md5": "ffab1055193ffb2fe74822bb575d332a", } ], "loader": load_from_pickle, "preprocess": [to_bool_tensor], }, "test_mask": { "upon": [ { "filename": "test_mask.pkl", "md5": "ffab1055193ffb2fe74822bb575d332a", } ], "loader": load_from_pickle, "preprocess": [to_bool_tensor], }, }
[docs]class CoauthorshipDBLP(BaseData): r"""The Co-authorship DBLP dataset is a citation network dataset for vertex classification task. More details see the `HyperGCN <https://papers.nips.cc/paper/2019/file/1efa39bcaec6f3900149160693694536-Paper.pdf>`_ paper. The content of the Co-authorship DBLP dataset includes the following: - ``num_classes``: The number of classes: :math:`6`. - ``num_vertices``: The number of vertices: :math:`41,302`. - ``num_edges``: The number of edges: :math:`22,363`. - ``dim_features``: The dimension of features: :math:`1,425`. - ``features``: The vertex feature matrix. ``torch.Tensor`` with size :math:`(41,302 \times 1,425)`. - ``edge_list``: The edge list. ``List`` with length :math:`22,363`. - ``labels``: The label list. ``torch.LongTensor`` with size :math:`(41,302, )`. - ``train_mask``: The train mask. ``torch.BoolTensor`` with size :math:`(41,302, )`. - ``val_mask``: The validation mask. ``torch.BoolTensor`` with size :math:`(41,302, )`. - ``test_mask``: The test mask. ``torch.BoolTensor`` with size :math:`(41,302, )`. Args: ``data_root`` (``str``, optional): The ``data_root`` has stored the data. If set to ``None``, this function will auto-download from server and save into the default direction ``~/.dhg/datasets/``. Defaults to None. """ def __init__(self, data_root: Optional[str] = None) -> None: super().__init__("coauthorship_dblp", data_root) self._content = { "num_classes": 6, "num_vertices": 41302, "num_edges": 22363, "dim_features": 1425, "features": { "upon": [ { "filename": "features.pkl", "md5": "b78fd31b2586d1e19a40b3f6cd9cc2e7", } ], "loader": load_from_pickle, "preprocess": [to_tensor, partial(norm_ft, ord=1)], }, "edge_list": { "upon": [ { "filename": "edge_list.pkl", "md5": "c6bf5f9f3b9683bcc9b7bcc9eb8707d8", } ], "loader": load_from_pickle, }, "labels": { "upon": [ { "filename": "labels.pkl", "md5": "2e7a792ea018028d582af8f02f2058ca", } ], "loader": load_from_pickle, "preprocess": [to_long_tensor], }, "train_mask": { "upon": [ { "filename": "train_mask.pkl", "md5": "a842b795c7cac4c2f98a56cf599bc1de", } ], "loader": load_from_pickle, "preprocess": [to_bool_tensor], }, "val_mask": { "upon": [ { "filename": "val_mask.pkl", "md5": "2ec4b7df7c5e6b355067a22c391ad578", } ], "loader": load_from_pickle, "preprocess": [to_bool_tensor], }, "test_mask": { "upon": [ { "filename": "test_mask.pkl", "md5": "2ec4b7df7c5e6b355067a22c391ad578", } ], "loader": load_from_pickle, "preprocess": [to_bool_tensor], }, }