From 118062a6717a61b150bd35c490bc8e26f200e096 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sat, 9 May 2026 00:10:10 -0400 Subject: [PATCH 1/2] Allow offline imports with bundled data certification --- src/policyengine/provenance/manifest.py | 23 ++++++-- tests/test_release_manifests.py | 73 +++++++++++++++++++++++-- 2 files changed, 85 insertions(+), 11 deletions(-) diff --git a/src/policyengine/provenance/manifest.py b/src/policyengine/provenance/manifest.py index aa4e644b..32120920 100644 --- a/src/policyengine/provenance/manifest.py +++ b/src/policyengine/provenance/manifest.py @@ -236,11 +236,17 @@ def get_data_release_manifest(country_id: str) -> DataReleaseManifest: if token: headers["Authorization"] = f"Bearer {token}" - response = requests.get( - https_release_manifest_uri(country_manifest.data_package), - headers=headers, - timeout=HF_REQUEST_TIMEOUT_SECONDS, - ) + try: + response = requests.get( + https_release_manifest_uri(country_manifest.data_package), + headers=headers, + timeout=HF_REQUEST_TIMEOUT_SECONDS, + ) + except requests.RequestException as exc: + raise DataReleaseManifestUnavailableError( + "Could not fetch the data release manifest from Hugging Face." + ) from exc + if response.status_code in (401, 403): raise DataReleaseManifestUnavailableError( "Could not fetch the data release manifest from Hugging Face. " @@ -250,7 +256,12 @@ def get_data_release_manifest(country_id: str) -> DataReleaseManifest: raise DataReleaseManifestUnavailableError( "No data release manifest was published for this data package." ) - response.raise_for_status() + try: + response.raise_for_status() + except requests.RequestException as exc: + raise DataReleaseManifestUnavailableError( + "Could not fetch the data release manifest from Hugging Face." + ) from exc return DataReleaseManifest.model_validate_json(response.text) diff --git a/tests/test_release_manifests.py b/tests/test_release_manifests.py index 769efabe..fa7f7582 100644 --- a/tests/test_release_manifests.py +++ b/tests/test_release_manifests.py @@ -1,7 +1,10 @@ """Tests for bundled compatibility manifests and data release manifests.""" import json +import os import re +import subprocess +import sys from pathlib import Path from unittest.mock import MagicMock, patch @@ -316,6 +319,22 @@ def test__given_private_manifest_unavailable__then_bundled_certification_is_used assert certification == get_release_manifest("us").certification + def test__given_manifest_request_timeout__then_bundled_certification_is_used( + self, + ): + get_data_release_manifest.cache_clear() + + with patch( + "policyengine.provenance.manifest.requests.get", + side_effect=Timeout("network timeout"), + ): + certification = certify_data_release_compatibility( + "us", + runtime_model_version="1.687.0", + ) + + assert certification == get_release_manifest("us").certification + def test__given_private_manifest_unavailable_and_fingerprint_mismatch__then_fails( self, ): @@ -348,13 +367,13 @@ def test__given_private_manifest_unavailable_and_fingerprint_mismatch__then_fail else: raise AssertionError("Expected fingerprint mismatch to fail") - def test__given_manifest_fetch_failure__then_certification_does_not_fallback( + def test__given_manifest_fetch_failure_and_version_mismatch__then_fallback_fails( self, ): get_data_release_manifest.cache_clear() with patch( - "policyengine.provenance.manifest.get_data_release_manifest", + "policyengine.provenance.manifest.requests.get", side_effect=Timeout("network timeout"), ): try: @@ -362,10 +381,54 @@ def test__given_manifest_fetch_failure__then_certification_does_not_fallback( "us", runtime_model_version="1.602.0", ) - except Timeout as error: - assert "network timeout" in str(error) + except DataReleaseManifestUnavailableError as error: + assert "Could not fetch" in str(error) else: - raise AssertionError("Expected timeout to propagate") + raise AssertionError("Expected offline mismatched version to fail") + + def test__given_offline_hf__then_us_import_uses_bundled_certification( + self, + tmp_path, + ): + sitecustomize = tmp_path / "sitecustomize.py" + sitecustomize.write_text( + "\n".join( + [ + "import requests", + "from requests import Timeout", + "", + "def offline_get(*args, **kwargs):", + " raise Timeout('offline')", + "", + "requests.get = offline_get", + ] + ) + ) + env = os.environ.copy() + existing_pythonpath = env.get("PYTHONPATH") + env["PYTHONPATH"] = ( + f"{tmp_path}{os.pathsep}{existing_pythonpath}" + if existing_pythonpath + else str(tmp_path) + ) + + result = subprocess.run( + [ + sys.executable, + "-c", + ( + "import policyengine.tax_benefit_models.us as us; " + "print(us.model.data_certification.certified_by)" + ), + ], + capture_output=True, + text=True, + check=False, + env=env, + ) + + assert result.returncode == 0, result.stderr + assert "policyengine.py bundled manifest" in result.stdout def test__given_mismatched_version_and_fingerprint__then_certification_fails(self): get_data_release_manifest.cache_clear() From 5344759816b8b91a61febbe048f1d2d88d68af20 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sat, 9 May 2026 00:11:27 -0400 Subject: [PATCH 2/2] Add offline import changelog --- changelog.d/offline-import.fixed.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/offline-import.fixed.md diff --git a/changelog.d/offline-import.fixed.md b/changelog.d/offline-import.fixed.md new file mode 100644 index 00000000..99423774 --- /dev/null +++ b/changelog.d/offline-import.fixed.md @@ -0,0 +1 @@ +Country model imports now work without network access when the bundled release manifest already certifies the installed country package version. Hugging Face release-manifest transport failures fall back to bundled data certification only when the runtime model version and data build fingerprint gates still match.