diff --git a/floatcsep/model.py b/floatcsep/model.py index 5afe124..7a00a0d 100644 --- a/floatcsep/model.py +++ b/floatcsep/model.py @@ -196,10 +196,23 @@ def get_source(self) -> None: os.makedirs(container, exist_ok=True) + if expected_file.exists() and expected_file.is_file() and not self.force_stage: + return + + os.makedirs(container, exist_ok=True) + + if expected_file.exists() and expected_file.is_file() and not self.force_stage: + return + if self.giturl: from_git(self.giturl, str(container), branch=self.repo_hash, force=self.force_stage) elif self.zenodo_id: - from_zenodo(self.zenodo_id, str(container), force=True) + from_zenodo( + self.zenodo_id, + str(container), + force=self.force_stage, + keys=[expected_file.name], + ) else: pass @@ -297,7 +310,13 @@ def stage( and those to be generated, as well as input catalog and arguments file. """ - if self.force_stage or not self.registry.path.exists(): + need_source = ( + self.force_stage + or not self.registry.path.exists() + or (self.registry.path.is_dir() and not any(self.registry.path.iterdir())) + ) + + if need_source: os.makedirs(self.registry.dir, exist_ok=True) self.get_source(self.zenodo_id, self.giturl, branch=self.repo_hash) @@ -338,7 +357,7 @@ def get_source(self, zenodo_id: int = None, giturl: str = None, **kwargs) -> Non if self.giturl: from_git(self.giturl, target_dir.as_posix(), branch=self.repo_hash, force=False) elif self.zenodo_id: - from_zenodo(self.zenodo_id, target_dir.as_posix(), force=True) + from_zenodo(self.zenodo_id, target_dir.as_posix(), force=self.force_stage) else: pass diff --git a/floatcsep/utils/accessors.py b/floatcsep/utils/accessors.py index 998fabb..a2ef4a7 100644 --- a/floatcsep/utils/accessors.py +++ b/floatcsep/utils/accessors.py @@ -1,66 +1,100 @@ +import time import git import requests import hashlib import os -import sys import shutil -def from_zenodo(record_id, folder, force=False): - """ - Download data from a Zenodo repository. +def from_zenodo(record_id, folder, force=False, keys=None): + record_url = f"https://zenodo.org/api/records/{record_id}" + max_tries = 5 - Downloads if file does not exist, checksum has changed in local respect to url or force + os.makedirs(folder, exist_ok=True) - Args: - record_id: corresponding to the Zenodo repository - folder: where the repository files will be downloaded - force: force download even if file exists and checksum passes + for attempt in range(1, max_tries + 1): + r = requests.get(record_url, timeout=30, headers={"User-Agent": "floatcsep"}) - Returns: - """ - # Grab the urls and filenames and checksums - r = requests.get(f"https://zenodo.org/api/records/{record_id}", timeout=30) - download_urls = [f["links"]["self"] for f in r.json()["files"]] - filenames = [(f["key"], f["checksum"]) for f in r.json()["files"]] + if r.status_code == 200: + break + + if r.status_code == 403: + text = (r.text or "").lower() + if "unusual traffic" in text or " None: - """ - Downloads files (from zenodo). - - Args: - url (str): the url where the file is located - filename (str): the filename required. - """ - progress_bar_length = 72 - block_size = 1024 - - r = requests.get(url, timeout=30, stream=True) - total_size = r.headers.get("content-length", False) - if not total_size: - with requests.head(url, timeout=30) as h: - try: - total_size = int(h.headers.get("Content-Length", 0)) - except TypeError: - total_size = 0 - else: - total_size = int(total_size) - download_size = 0 + os.makedirs(os.path.dirname(filename) or ".", exist_ok=True) + + r = requests.get(url, timeout=30, stream=True, headers={"User-Agent": "floatcsep"}) + r.raise_for_status() + + cl = r.headers.get("Content-Length") or r.headers.get("content-length") + try: + total_size = int(cl) if cl else 0 + except ValueError: + total_size = 0 + + base = os.path.basename(filename) if total_size: - print(f"Downloading file with size of {total_size / block_size:.3f} kB") + print(f"{base} ({total_size / (1024 * 1024):.2f} MB)") else: - print("Downloading file with unknown size") + print(f"{base}") + with open(filename, "wb") as f: - for data in r.iter_content(chunk_size=block_size): - download_size += len(data) + for data in r.iter_content(chunk_size=1024 * 64): + if not data: + continue f.write(data) - if total_size: - progress = int(progress_bar_length * download_size / total_size) - sys.stdout.write( - "\r[{}{}] {:.1f}%".format( - "█" * progress, - "." * (progress_bar_length - progress), - 100 * download_size / total_size, - ) - ) - sys.stdout.flush() - sys.stdout.write("\n") + + print(f"Complete: {base}") def check_hash(filename, checksum): - """Checks if existing file hash matches checksum from url.""" algorithm, value = checksum.split(":") if not os.path.exists(filename): return value, "invalid" diff --git a/tests/integration/test_model_accessors.py b/tests/integration/test_model_accessors.py index c4fdf03..f871078 100644 --- a/tests/integration/test_model_accessors.py +++ b/tests/integration/test_model_accessors.py @@ -216,9 +216,7 @@ def test_zenodo_fail(self): model = self.init_model(name=name, model_path=path_, zenodo_id=13117711) - with self.assertRaises( - Exception - ): # Mostly for FileNotFound, but connection errors can also arise + with self.assertRaises(FileNotFoundError): model.get_source() shutil.rmtree(dir_, ignore_errors=True) diff --git a/tests/unit/test_accessors.py b/tests/unit/test_accessors.py index e4bfa00..dbcb743 100644 --- a/tests/unit/test_accessors.py +++ b/tests/unit/test_accessors.py @@ -88,7 +88,7 @@ def test_zenodo_query(self): self._assert_files_ok() return try: - from_zenodo(4739912, zenodo_dir()) + from_zenodo(4739912, zenodo_dir(), keys=["dummy.txt", "dummy.tar"]) except Exception as e: self.skipTest(f"Zenodo flaky/unavailable: {e!r}") diff --git a/tests/unit/test_model.py b/tests/unit/test_model.py index af508b1..0173164 100644 --- a/tests/unit/test_model.py +++ b/tests/unit/test_model.py @@ -283,7 +283,7 @@ def test_get_source( mock_from_zenodo.assert_called_once_with( self.model.zenodo_id, self.mock_registry_instance.path.as_posix(), - force=True, + force=False, ) mock_from_git.assert_not_called() diff --git a/tutorials/case_d/models.yml b/tutorials/case_d/models.yml index aa19ea6..ca8c11f 100644 --- a/tutorials/case_d/models.yml +++ b/tutorials/case_d/models.yml @@ -5,20 +5,10 @@ N10L11: TEAM=N10L11.csv N25L11: TEAM=N25L11.csv N50L11: TEAM=N50L11.csv - N100L11: TEAM=N100L11.csv - SN10L11: TEAM=SN10L11.csv - SN25L11: TEAM=SN25L11.csv - SN50L11: TEAM=SN50L11.csv - SN100L11: TEAM=SN100L11.csv - WHEEL: zenodo_id: 6255575 path: models/wheel flavours: N10L11: WHEEL=N10L11.csv N25L11: WHEEL=N25L11.csv - N50L11: WHEEL=N50L11.csv - N100L11: WHEEL=N100L11.csv - SN10L11: WHEEL=SN10L11.csv - SN25L11: WHEEL=SN25L11.csv - SN50L11: WHEEL=SN50L11.csv - SN100L11: WHEEL=SN100L11.csv \ No newline at end of file + N50L11: WHEEL=N50L11.csv \ No newline at end of file diff --git a/tutorials/case_i/pymock/Dockerfile b/tutorials/case_i/pymock/Dockerfile index 46f7a43..dd07e26 100644 --- a/tutorials/case_i/pymock/Dockerfile +++ b/tutorials/case_i/pymock/Dockerfile @@ -28,7 +28,7 @@ RUN python3 -m venv $VIRTUAL_ENV && pip install --upgrade pip setuptools wheel # Copy the repository from the local machine to the Docker container. ## *Only the needed folders/files for the model build* COPY --chown=$USER_UID:$USER_GID pymock/ ./pymock/ -COPY --chown=$USER_UID:$USER_GID setup.cfg run.py setup.py ./ +COPY --chown=$USER_UID:$USER_GID setup.cfg setup.py ./ # Install the pymock package. ## *Uses pip to install setup.cfg and requirements/instructions therein* diff --git a/tutorials/case_i/pymock_slow/Dockerfile b/tutorials/case_i/pymock_slow/Dockerfile index 46f7a43..dd07e26 100644 --- a/tutorials/case_i/pymock_slow/Dockerfile +++ b/tutorials/case_i/pymock_slow/Dockerfile @@ -28,7 +28,7 @@ RUN python3 -m venv $VIRTUAL_ENV && pip install --upgrade pip setuptools wheel # Copy the repository from the local machine to the Docker container. ## *Only the needed folders/files for the model build* COPY --chown=$USER_UID:$USER_GID pymock/ ./pymock/ -COPY --chown=$USER_UID:$USER_GID setup.cfg run.py setup.py ./ +COPY --chown=$USER_UID:$USER_GID setup.cfg setup.py ./ # Install the pymock package. ## *Uses pip to install setup.cfg and requirements/instructions therein*