Source code for easygraph.datasets.hypergraph.House_Committees

import requests

from easygraph.utils.exception import EasyGraphError


[docs]def request_text_from_url(url): """ Requests text data from the specified URL. Args: url (str): The URL from which to request the text data. Returns: str: The text content of the response if the request is successful. Raises: EasyGraphError: If a connection error occurs during the request or if the HTTP response status code indicates a failure. """ try: r = requests.get(url) except requests.ConnectionError: raise EasyGraphError("Connection Error!") if r.ok: return r.text else: raise EasyGraphError(f"Error: HTTP response {r.status_code}")
[docs]class House_Committees: """ A class for loading and processing the House Committees hypergraph dataset. This class fetches hyperedge, node label, node name, and label name data from predefined URLs, processes the data, and generates a hypergraph representation. It also provides access to various dataset attributes through properties and indexing. Attributes: data_root (str): The root URL for the data. If `data_root` is provided during initialization, it is set to "https://"; otherwise, it is `None`. hyperedges_path (str): The URL of the file containing hyperedge information. node_labels_path (str): The URL of the file containing node label information. node_names_path (str): The URL of the file containing node name information. label_names_path (str): The URL of the file containing label name information. _hyperedges (list): A list of tuples representing hyperedges. _node_labels (list): A list of node labels. _label_names (list): A list of label names. _node_names (list): A list of node names. _content (dict): A dictionary containing dataset statistics and data, including the number of classes, vertices, edges, the edge list, and node labels. """ def __init__(self, data_root=None): """ Initializes a new instance of the `House_Committees` class. Args: data_root (str, optional): The root URL for the data. If provided, it is set to "https://"; otherwise, it is `None`. Defaults to `None`. """ self.data_root = "https://" if data_root is not None else data_root self.hyperedges_path = "https://gitlab.com/easy-graph/easygraph-data-house-committees/-/raw/main/hyperedges-house-committees.txt?inline=false" self.node_labels_path = "https://gitlab.com/easy-graph/easygraph-data-house-committees/-/raw/main/node-labels-house-committees.txt?ref_type=heads&inline=false" self.node_names_path = "https://gitlab.com/easy-graph/easygraph-data-house-committees/-/raw/main/node-names-house-committees.txt?ref_type=heads&inline=false" self.label_names_path = "https://gitlab.com/easy-graph/easygraph-data-house-committees/-/raw/main/label-names-house-committees.txt?ref_type=heads&inline=false" self._hyperedges = [] self._node_labels = [] self._label_names = [] self._node_names = [] self.generate_hypergraph( hyperedges_path=self.hyperedges_path, node_labels_path=self.node_labels_path, node_names_path=self.node_names_path, label_names_path=self.label_names_path, ) self._content = { "num_classes": len(self._label_names), "num_vertices": len(self._node_labels), "num_edges": len(self._hyperedges), "edge_list": self._hyperedges, "labels": self._node_labels, }
[docs] def process_label_txt(self, data_str, delimiter="\n", transform_fun=str): """ Processes a string containing label data into a list of transformed values. Args: data_str (str): The input string containing label data. delimiter (str, optional): The delimiter used to split the input string. Defaults to "\n". transform_fun (callable, optional): A function used to transform each label value. Defaults to the `str` function. Returns: list: A list of transformed label values. """ data_str = data_str.strip() data_lst = data_str.split(delimiter) final_lst = [] for data in data_lst: data = data.strip() data = transform_fun(data) final_lst.append(data) return final_lst
def __getitem__(self, key: str): """ Retrieves a value from the `_content` dictionary using the specified key. Args: key (str): The key used to access the `_content` dictionary. Returns: Any: The value corresponding to the key in the `_content` dictionary. """ return self._content[key] @property def node_labels(self): """ Gets the list of node labels. Returns: list: A list of node labels. """ return self._node_labels @property def node_names(self): """ Gets the list of node names. Returns: list: A list of node names. """ return self._node_names @property def label_names(self): """ Gets the list of label names. Returns: list: A list of label names. """ return self._label_names @property def hyperedges(self): """ Gets the list of hyperedges. Returns: list: A list of tuples representing hyperedges. """ return self._hyperedges
[docs] def generate_hypergraph( self, hyperedges_path=None, node_labels_path=None, node_names_path=None, label_names_path=None, ): """ Generates a hypergraph by fetching and processing data from the specified URLs. Args: hyperedges_path (str, optional): The URL of the file containing hyperedge information. Defaults to `None`. node_labels_path (str, optional): The URL of the file containing node label information. Defaults to `None`. node_names_path (str, optional): The URL of the file containing node name information. Defaults to `None`. label_names_path (str, optional): The URL of the file containing label name information. Defaults to `None`. """ def fun(data): """ Converts a string to an integer and subtracts 1. Args: data (str): The input string to be converted. Returns: int: The converted integer value minus 1. """ data = int(data) - 1 return data hyperedges_info = request_text_from_url(hyperedges_path) hyperedges_info = hyperedges_info.strip() hyperedges_lst = hyperedges_info.split("\n") for hyperedge in hyperedges_lst: hyperedge = hyperedge.strip() hyperedge = [int(i) - 1 for i in hyperedge.split(",")] self._hyperedges.append(tuple(hyperedge)) # print(self.hyperedges) node_labels_info = request_text_from_url(node_labels_path) process_node_labels_info = self.process_label_txt( node_labels_info, transform_fun=fun ) self._node_labels = process_node_labels_info # print("process_node_labels_info:", process_node_labels_info) node_names_info = request_text_from_url(node_names_path) process_node_names_info = self.process_label_txt(node_names_info) self._node_names = process_node_names_info # print("process_node_names_info:", process_node_names_info) label_names_info = request_text_from_url(label_names_path) process_label_names_info = self.process_label_txt(label_names_info) self._label_names = process_label_names_info