Source code for dival.util.zenodo_download

# -*- coding: utf-8 -*-
import os
import requests
from dival.util.input import input_yes_no
from dival.util.download import download_file, compute_md5sum


[docs] def download_zenodo_record(record_id, base_path='', md5sum_check=True, auto_yes=False): """ Download a zenodo record. Unfortunately, downloads cannot be resumed, so this method is only recommended for stable internet connections. Parameters ---------- record_id : str Record id. base_path : str, optional Path to store the downloaded files in. Default is the current folder. md5sum_check : bool, optional Whether to check the MD5 sum of each downloaded file. Default: `True` auto_yes : bool, optional Whether to answer user input questions with "y" by default. User input questions are: If ``md5sum_check=True``, in case of a checksum mismatch: whether to retry downloading. If ``md5sum_check=False``, in case of an existing file of correct size: whether to re-download. Default: `False` Returns ------- success : bool If ``md5sum_check=True``, whether all sums matched. Otherwise the returned value is always ``True``. """ r = requests.get('https://zenodo.org/api/records/{}'.format(record_id)) files = r.json()['files'] success = True for i, f in enumerate(files): url = f['links']['self'] filename = f['key'] path = os.path.join(base_path, filename) size = f['size'] size_kb = size / 1000 checksum = f['checksum'] try: size_existing = os.stat(path).st_size except OSError: size_existing = -1 if size_existing == size: if md5sum_check: print("File {:d}/{:d}, '{}', {}KB already exists with correct " "size. Will check md5 sum now.".format( i+1, len(files), filename, size_kb)) md5sum_existing = compute_md5sum(path) md5sum_matches = ("md5:" + md5sum_existing == checksum) if md5sum_matches: print("skipping file {}, md5 checksum matches".format( filename)) continue else: print("existing file {} will be overwritten, md5 checksum " "does not match".format(filename)) else: print("File {:d}/{:d}, '{}', {}KB already exists with correct " "size. Re-download this file? (y)/n".format( i+1, len(files), filename, size_kb)) if auto_yes: print("y") download = True else: download = input_yes_no() if not download: print("skipping existing file {}".format(filename)) continue print("downloading file {:d}/{:d}: '{}', {}KB".format( i+1, len(files), filename, size_kb)) if md5sum_check: retry = True md5sum_matches = False while retry and not md5sum_matches: md5sum = download_file(url, path, md5sum=True) md5sum_matches = ("md5:" + md5sum == checksum) if not md5sum_matches: print("md5 checksum does not match for file '{}'. Retry " "downloading? (y)/n".format(filename)) if auto_yes: print("y") retry = True else: retry = input_yes_no() if not md5sum_matches: success = False print('record download aborted') break else: download_file(url, os.path.join(base_path, filename), md5sum=False) return success