From 2750c83cd6f949253339fdeb4ef0bef3499618f6 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sun, 17 May 2026 22:57:29 -0400 Subject: [PATCH] Allow TRO fallback for pinned certified datasets --- src/policyengine/provenance/trace.py | 10 ++++---- tests/test_trace_tro.py | 34 ++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/policyengine/provenance/trace.py b/src/policyengine/provenance/trace.py index d120c0a8..ff0e39dc 100644 --- a/src/policyengine/provenance/trace.py +++ b/src/policyengine/provenance/trace.py @@ -330,10 +330,12 @@ def build_trace_tro_from_release_bundle( else None ) if data_release_manifest is not None and dataset_artifact is None: - raise ValueError( - "Data release manifest does not include the certified dataset " - f"'{certified_artifact.dataset}'." - ) + if certified_artifact.sha256 is None: + raise ValueError( + "Data release manifest does not include the certified dataset " + f"'{certified_artifact.dataset}'." + ) + data_release_manifest = None dataset_sha256 = certified_artifact.sha256 or ( dataset_artifact.sha256 if dataset_artifact is not None else None ) diff --git a/tests/test_trace_tro.py b/tests/test_trace_tro.py index c4ca6623..f84fcde5 100644 --- a/tests/test_trace_tro.py +++ b/tests/test_trace_tro.py @@ -334,6 +334,40 @@ def test__given_rewritten_data_manifest__then_tro_hashes_original_source( assert data_manifest_artifact["trov:sha256"] == source_sha256 + def test__given_data_manifest_without_certified_dataset__then_falls_back( + self, + ): + data_manifest = _us_data_release_manifest() + data_manifest.artifacts.pop("enhanced_cps_2024") + + tro = build_trace_tro_from_release_bundle( + get_release_manifest("us"), + data_manifest, + fetch_pypi=_fake_fetch_pypi, + ) + + artifacts = tro["@graph"][0]["trov:hasComposition"]["trov:hasArtifact"] + artifact_ids = {a["@id"].rsplit("/", 1)[-1] for a in artifacts} + assert "dataset" in artifact_ids + assert "data_release_manifest" not in artifact_ids + + def test__given_no_dataset_hash_source__then_raises(self): + country_manifest = get_release_manifest("us").model_copy(deep=True) + assert country_manifest.certified_data_artifact is not None + country_manifest.certified_data_artifact.sha256 = None + data_manifest = _us_data_release_manifest() + data_manifest.artifacts.pop("enhanced_cps_2024") + + with pytest.raises( + ValueError, + match="Data release manifest does not include the certified dataset", + ): + build_trace_tro_from_release_bundle( + country_manifest, + data_manifest, + fetch_pypi=_fake_fetch_pypi, + ) + def test__given_certification__then_fields_are_machine_readable( self, us_bundle_tro ):