Source code for easygraph.datasets.hypergraph.coauthorship

from functools import partial
from typing import Optional

from easygraph.datapipe import load_from_pickle
from easygraph.datapipe import norm_ft
from easygraph.datapipe import to_bool_tensor
from easygraph.datapipe import to_long_tensor
from easygraph.datapipe import to_tensor
from easygraph.datasets.hypergraph.hypergraph_dataset_base import BaseData


__all__ = ["CoauthorshipCora", "CoauthorshipDBLP"]


[docs]class CoauthorshipCora(BaseData):
    r"""The Co-authorship Cora dataset is a citation network dataset for vertex classification task.
    More details see the `HyperGCN <https://papers.nips.cc/paper/2019/file/1efa39bcaec6f3900149160693694536-Paper.pdf>`_ paper.

    The content of the Co-authorship Cora dataset includes the following:

    - ``num_classes``: The number of classes: :math:`7`.
    - ``num_vertices``: The number of vertices: :math:`2,708`.
    - ``num_edges``: The number of edges: :math:`1,072`.
    - ``dim_features``: The dimension of features: :math:`1,433`.
    - ``features``: The vertex feature matrix. ``torch.Tensor`` with size :math:`(2,708 \times 1,433)`.
    - ``edge_list``: The edge list. ``List`` with length :math:`1,072`.
    - ``labels``: The label list. ``torch.LongTensor`` with size :math:`(2,708, )`.
    - ``train_mask``: The train mask. ``torch.BoolTensor`` with size :math:`(2,708, )`.
    - ``val_mask``: The validation mask. ``torch.BoolTensor`` with size :math:`(2,708, )`.
    - ``test_mask``: The test mask. ``torch.BoolTensor`` with size :math:`(2,708, )`.

    Args:
        ``data_root`` (``str``, optional): The ``data_root`` has stored the data. If set to ``None``, this function will auto-download from server and save into the default direction ``~/.dhg/datasets/``. Defaults to ``None``.
    """

    def __init__(self, data_root: Optional[str] = None) -> None:
        super().__init__("coauthorship_cora", data_root)
        self._content = {
            "num_classes": 7,
            "num_vertices": 2708,
            "num_edges": 1072,
            "dim_features": 1433,
            "features": {
                "upon": [
                    {
                        "filename": "features.pkl",
                        "md5": "14257c0e24b4eb741b469a351e524785",
                    }
                ],
                "loader": load_from_pickle,
                "preprocess": [to_tensor, partial(norm_ft, ord=1)],
            },
            "edge_list": {
                "upon": [
                    {
                        "filename": "edge_list.pkl",
                        "md5": "a17ff337f1b9099f5a9d4d670674e146",
                    }
                ],
                "loader": load_from_pickle,
            },
            "labels": {
                "upon": [
                    {
                        "filename": "labels.pkl",
                        "md5": "c8d11c452e0be69f79a47dd839279117",
                    }
                ],
                "loader": load_from_pickle,
                "preprocess": [to_long_tensor],
            },
            "train_mask": {
                "upon": [
                    {
                        "filename": "train_mask.pkl",
                        "md5": "111db6c6f986be2908378df7bdca7a9b",
                    }
                ],
                "loader": load_from_pickle,
                "preprocess": [to_bool_tensor],
            },
            "val_mask": {
                "upon": [
                    {
                        "filename": "val_mask.pkl",
                        "md5": "ffab1055193ffb2fe74822bb575d332a",
                    }
                ],
                "loader": load_from_pickle,
                "preprocess": [to_bool_tensor],
            },
            "test_mask": {
                "upon": [
                    {
                        "filename": "test_mask.pkl",
                        "md5": "ffab1055193ffb2fe74822bb575d332a",
                    }
                ],
                "loader": load_from_pickle,
                "preprocess": [to_bool_tensor],
            },
        }


[docs]class CoauthorshipDBLP(BaseData):
    r"""The Co-authorship DBLP dataset is a citation network dataset for vertex classification task.
    More details see the `HyperGCN <https://papers.nips.cc/paper/2019/file/1efa39bcaec6f3900149160693694536-Paper.pdf>`_ paper.

    The content of the Co-authorship DBLP dataset includes the following:

    - ``num_classes``: The number of classes: :math:`6`.
    - ``num_vertices``: The number of vertices: :math:`41,302`.
    - ``num_edges``: The number of edges: :math:`22,363`.
    - ``dim_features``: The dimension of features: :math:`1,425`.
    - ``features``: The vertex feature matrix. ``torch.Tensor`` with size :math:`(41,302 \times 1,425)`.
    - ``edge_list``: The edge list. ``List`` with length :math:`22,363`.
    - ``labels``: The label list. ``torch.LongTensor`` with size :math:`(41,302, )`.
    - ``train_mask``: The train mask. ``torch.BoolTensor`` with size :math:`(41,302, )`.
    - ``val_mask``: The validation mask. ``torch.BoolTensor`` with size :math:`(41,302, )`.
    - ``test_mask``: The test mask. ``torch.BoolTensor`` with size :math:`(41,302, )`.

    Args:
        ``data_root`` (``str``, optional): The ``data_root`` has stored the data. If set to ``None``, this function will auto-download from server and save into the default direction ``~/.dhg/datasets/``. Defaults to None.
    """

    def __init__(self, data_root: Optional[str] = None) -> None:
        super().__init__("coauthorship_dblp", data_root)
        self._content = {
            "num_classes": 6,
            "num_vertices": 41302,
            "num_edges": 22363,
            "dim_features": 1425,
            "features": {
                "upon": [
                    {
                        "filename": "features.pkl",
                        "md5": "b78fd31b2586d1e19a40b3f6cd9cc2e7",
                    }
                ],
                "loader": load_from_pickle,
                "preprocess": [to_tensor, partial(norm_ft, ord=1)],
            },
            "edge_list": {
                "upon": [
                    {
                        "filename": "edge_list.pkl",
                        "md5": "c6bf5f9f3b9683bcc9b7bcc9eb8707d8",
                    }
                ],
                "loader": load_from_pickle,
            },
            "labels": {
                "upon": [
                    {
                        "filename": "labels.pkl",
                        "md5": "2e7a792ea018028d582af8f02f2058ca",
                    }
                ],
                "loader": load_from_pickle,
                "preprocess": [to_long_tensor],
            },
            "train_mask": {
                "upon": [
                    {
                        "filename": "train_mask.pkl",
                        "md5": "a842b795c7cac4c2f98a56cf599bc1de",
                    }
                ],
                "loader": load_from_pickle,
                "preprocess": [to_bool_tensor],
            },
            "val_mask": {
                "upon": [
                    {
                        "filename": "val_mask.pkl",
                        "md5": "2ec4b7df7c5e6b355067a22c391ad578",
                    }
                ],
                "loader": load_from_pickle,
                "preprocess": [to_bool_tensor],
            },
            "test_mask": {
                "upon": [
                    {
                        "filename": "test_mask.pkl",
                        "md5": "2ec4b7df7c5e6b355067a22c391ad578",
                    }
                ],
                "loader": load_from_pickle,
                "preprocess": [to_bool_tensor],
            },
        }