# -*- coding: utf-8 -*-
import os
import requests
from math import ceil
import tqdm
import hashlib
[docs]
def download_file(url, filename=None, chunk_size=1024, verbose=False,
md5sum=True):
"""
Download file with progressbar.
Based on
`<https://gist.github.com/ruxi/5d6803c116ec1130d484a4ab8c00c603>`_
(MIT License).
Parameters
----------
url : str
URL to download from.
filename : str, optional
Local destination filename (including path).
By default, the file is stored under the current folder and the name
given by the last part of `url`.
chunk_size : int, optional
Number of bytes in a chunk.
Default: `1024`
verbose : bool, optional
Whether to print additional information.
Default: `False`
md5sum : bool, optional
Whether to compute and return the MD5 checksum (hex-digest).
Default: `True`
Returns
-------
md5sum : str or `None`
Hex-digest of the MD5 hash, if ``md5sum=True``, otherwise `None`.
"""
if not filename:
local_filename = os.path.join(".", url.split('/')[-1])
else:
local_filename = filename
r = requests.get(url, stream=True)
if r.headers.get('Transfer-Encoding', 'identity') == 'chunked':
num_chunks = None
print("downloading")
else:
file_size = int(r.headers['Content-Length'])
num_chunks = ceil(file_size / chunk_size)
if verbose:
print("downloading {:d} bytes".format(file_size))
if md5sum:
hash_md5 = hashlib.md5()
with open(local_filename, 'wb') as fp:
for chunk in tqdm.tqdm(r.iter_content(chunk_size=chunk_size),
total=num_chunks,
unit='chunks',
desc=local_filename,
leave=True):
if chunk:
fp.write(chunk)
if md5sum:
hash_md5.update(chunk)
if md5sum:
return hash_md5.hexdigest()
[docs]
def compute_md5sum(filename, show_pbar=True):
"""
Compute MD5 checksum of an existing file.
Parameters
----------
filename : str
Filename including path.
show_pbar : str
Show tqdm progress bar.
Default: `True`
Returns
-------
md5sum : str
Hex-digest of the MD5 hash.
"""
CHUNK_SIZE = 1024
hash_md5 = hashlib.md5()
file_size = os.stat(filename).st_size
num_chunks = ceil(file_size / CHUNK_SIZE)
with open(filename, 'rb') as fp:
for _ in tqdm.tqdm(range(num_chunks), unit='chunks', desc='md5sum',
disable=not show_pbar):
chunk = fp.read(CHUNK_SIZE)
if chunk is not None:
hash_md5.update(chunk)
return hash_md5.hexdigest()