Source code for easygraph.utils.download

import hashlib
import warnings

from functools import wraps
from pathlib import Path

import requests


__all__ = [
    "check_file",
    "download_file",
    "download_and_check",
]


[docs]def download_file(url: str, file_path: Path): r"""Download a file from a url. Args: ``url`` (``str``): the url of the file ``file_path`` (``str``): the path to the file """ file_path.parent.mkdir(parents=True, exist_ok=True) r = requests.get(url, stream=True, verify=True) if r.status_code != 200: raise requests.HTTPError(f"{url} is not accessible.") with open(file_path, "wb") as f: for chunk in r.iter_content(chunk_size=1024): if chunk: f.write(chunk)
[docs]def check_file(file_path: Path, md5: str): r"""Check if a file is valid. Args: ``file_path`` (``Path``): The local path of the file. ``md5`` (``str``): The md5 of the file. Raises: FileNotFoundError: Not found the file. """ if not file_path.exists(): raise FileNotFoundError(f"{file_path} does not exist.") else: with open(file_path, "rb") as f: data = f.read() cur_md5 = hashlib.md5(data).hexdigest() return cur_md5 == md5
def _retry(n: int, exception_type=requests.HTTPError): r"""A decorator for retrying a function for n times. Args: ``n`` (``int``): The number of times to retry. """ def decorator(fetcher): @wraps(fetcher) def wrapper(*args, **kwargs): for i in range(n - 1): try: return fetcher(*args, **kwargs) except exception_type as e: warnings.warn(f"Retry downloading({i + 1}/{n}): {str(e)}") except Exception as e: raise e return fetcher(*args, **kwargs) # raise FileNotFoundError return wrapper return decorator
[docs]@_retry(3) def download_and_check(url: str, file_path: Path, md5: str): r"""Download a file from a url and check its integrity. Args: ``url`` (``str``): The url of the file. ``file_path`` (``Path``): The path to the file. ``md5`` (``str``): The md5 of the file. """ if not file_path.exists(): download_file(url, file_path) if not check_file(file_path, md5): file_path.unlink() raise ValueError( f"{file_path} is corrupted. We will delete it, and try to download it" " again." ) return True