Source code for easygraph.utils.download
import hashlib
import warnings
from functools import wraps
from pathlib import Path
import requests
__all__ = [
"check_file",
"download_file",
"download_and_check",
]
[docs]def download_file(url: str, file_path: Path):
r"""Download a file from a url.
Args:
``url`` (``str``): the url of the file
``file_path`` (``str``): the path to the file
"""
file_path.parent.mkdir(parents=True, exist_ok=True)
r = requests.get(url, stream=True, verify=True)
if r.status_code != 200:
raise requests.HTTPError(f"{url} is not accessible.")
with open(file_path, "wb") as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
[docs]def check_file(file_path: Path, md5: str):
r"""Check if a file is valid.
Args:
``file_path`` (``Path``): The local path of the file.
``md5`` (``str``): The md5 of the file.
Raises:
FileNotFoundError: Not found the file.
"""
if not file_path.exists():
raise FileNotFoundError(f"{file_path} does not exist.")
else:
with open(file_path, "rb") as f:
data = f.read()
cur_md5 = hashlib.md5(data).hexdigest()
return cur_md5 == md5
def _retry(n: int, exception_type=requests.HTTPError):
r"""A decorator for retrying a function for n times.
Args:
``n`` (``int``): The number of times to retry.
"""
def decorator(fetcher):
@wraps(fetcher)
def wrapper(*args, **kwargs):
for i in range(n - 1):
try:
return fetcher(*args, **kwargs)
except exception_type as e:
warnings.warn(f"Retry downloading({i + 1}/{n}): {str(e)}")
except Exception as e:
raise e
return fetcher(*args, **kwargs)
# raise FileNotFoundError
return wrapper
return decorator
[docs]@_retry(3)
def download_and_check(url: str, file_path: Path, md5: str):
r"""Download a file from a url and check its integrity.
Args:
``url`` (``str``): The url of the file.
``file_path`` (``Path``): The path to the file.
``md5`` (``str``): The md5 of the file.
"""
if not file_path.exists():
download_file(url, file_path)
if not check_file(file_path, md5):
file_path.unlink()
raise ValueError(
f"{file_path} is corrupted. We will delete it, and try to download it"
" again."
)
return True