From 9c38d0db9d5770a8f28199a3d715c34ed0c55bb8 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Tue, 21 Apr 2026 11:38:41 -0400 Subject: [PATCH 1/6] Add TRACE case study writeup for AEA / TRACE grant team Working draft describing the PolicyEngine use case for TRACE, prepared after the 2026-04-21 meeting with Lars Vilhuber, Tara Watson, John Sabelhaus, Tim Clark, and Casper. Structured around the reframe that emerged in the meeting: TRACE should wrap PolicyEngine infrastructure (the us-data build pipeline and policyengine.org webapp runs) rather than be embedded in the end-user Python package. Covers: - Which PolicyEngine surfaces warrant institutional certification - The precise claims a TRO lets us make (rules, data, reform, inputs, outputs including per-household frame, institutional attestation) - UK data as the strongest TRACE case for us - Three concrete implementation workstreams with linked issues - What TRACE gets from us as a case study (infrastructure-certifying vs author-certifying; microdata provenance; pe:* extension discipline) - Three open questions (per-household frame default, retention and durable addressing, signing and key rotation) Lars explicitly asked for this kind of writeup during the meeting to feed the TRACE grant proposal and vocabulary design work. Co-Authored-By: Claude Opus 4.7 (1M context) --- changelog.d/trace-case-study-for-aea.added.md | 1 + docs/trace-case-study.md | 79 +++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 changelog.d/trace-case-study-for-aea.added.md create mode 100644 docs/trace-case-study.md diff --git a/changelog.d/trace-case-study-for-aea.added.md b/changelog.d/trace-case-study-for-aea.added.md new file mode 100644 index 00000000..d52fefd5 --- /dev/null +++ b/changelog.d/trace-case-study-for-aea.added.md @@ -0,0 +1 @@ +Added `docs/trace-case-study.md`, a working draft describing the PolicyEngine TRACE use case for Lars Vilhuber (AEA Data Editor) and the TRACE project team. Covers which PolicyEngine surfaces warrant institutional certification, the precise claims a TRO lets us make, UK data as the strongest case, the three concrete workstreams (us-data build TROs, policyengine-api webapp-run TROs, policyengine-app "Cite this result" UI), and open questions we want feedback on. diff --git a/docs/trace-case-study.md b/docs/trace-case-study.md new file mode 100644 index 00000000..a093841e --- /dev/null +++ b/docs/trace-case-study.md @@ -0,0 +1,79 @@ +# PolicyEngine as a TRACE case study + +_Working draft, April 2026 — prepared after a 2026-04-21 meeting with Lars Vilhuber (AEA Data Editor), Tara Watson (Brookings), John Sabelhaus, Tim Clark, and Casper (TRACE project)._ + +## What TRACE is for, in the PolicyEngine case + +TRACE (Transparent Research And Citation Exchange) defines a standards-based vocabulary — TROv 0.1 at `https://w3id.org/trace/trov/0.1#` — for documenting analytical artifacts by content hash under a SHACL-validatable JSON-LD grammar. A Transparent Research Object (TRO) binds inputs, code, and outputs in a way that a reader who cannot re-run the analysis can still verify that a specific set of files produced a specific set of results. + +The question we walked into the meeting with was: where in the PolicyEngine stack does TRACE add real value? + +The answer we walked out with is narrower and cleaner than what we had been building toward. TRACE is not a feature of the `policyengine` Python package for researchers running simulations on their own hardware. For that use case, readers who want to check a paper's numbers can just `pip install` the same pins and rerun. TRACE in that loop is documentation, not credibility. + +TRACE matters in exactly the places where the reader cannot easily re-run the analysis: + +1. **The calibrated microdata build.** Each `enhanced_cps_YYYY.h5` that we publish to Hugging Face is derived from inputs that the public cannot all access directly (IRS-PUF requires agreeing to IRS's terms of use; the build itself takes hours on Modal with specific GPU configurations). Every release emits a TRO that binds the upstream input fingerprints, the build code, and the output h5 under canonical TROv 0.1. + +2. **Simulation runs through policyengine.org.** When a researcher uses the webapp to score a reform, we run the simulation on our infrastructure against our pinned calibrated data, and we return the result. A paper that cites that result is asking its readers to trust PolicyEngine's institutional attestation — not to trust that the researcher reproduced a Python pipeline faithfully on their own laptop. A TRO signed by PolicyEngine and served from our infrastructure makes that institutional attestation explicit and machine-verifiable. + +## The precise claims a PolicyEngine TRO lets us make + +Before TRACE, a paper citing a PolicyEngine result could say: "PolicyEngine-US computed an EITC expansion impact of $X using `policyengine-us==1.653.3` and `policyengine-us-data==1.85.2`." The reader had to take it on faith that those versions, run on that reform, actually produced $X — or install the pins and try it themselves, which presumes the researcher's environment was not modified. + +With a TRO emitted by policyengine.org, the paper cites a URL. That URL resolves to a JSON-LD document which the reader can validate with a stock tool. Inside the TRO, pinned by SHA-256: + +- The **rules bundle**: wheel hashes for `policyengine`, `policyengine-us`, and their transitive Python dependencies at the version resolved at run time. +- The **calibrated microdata**: the `enhanced_cps_2024.h5` SHA-256 and the `DataReleaseManifest` that describes how it was built. +- The **reform**: the full reform JSON submitted by the user, content-hashed. +- The **inputs**: for a household-level simulation, the household JSON the user entered; for an economy-wide simulation, the configuration payload. +- The **outputs**: a content-hashed `results.json` carrying the aggregate metrics the webapp displays, and — for US runs, where the underlying microdata is already public — the full per-household weighted simulation frame as parquet, so downstream researchers can compute custom splits, subgroup analyses, or variables not reported in the paper without re-running the simulation. +- The **institutional attestation**: CI/deploy run URL, git SHA, cloud region, timestamp, and a signature by a PolicyEngine service account. + +The claims the TRO supports are, in plain language: + +1. _These were the rules, this was the calibrated microdata, and these were the inputs that produced those outputs._ +2. _PolicyEngine as an institution ran this simulation; the researcher did not modify the code between our servers and their paper._ +3. _Any future reader can recover the full per-household counterfactual frame for re-analysis, bounded only by what we legally can redistribute._ + +The third claim is the one Sabelhaus surfaced specifically and that we think is underused in microsimulation publishing today. Papers cite aggregate numbers; reviewers and follow-up work want distributions, state-level breakdowns, variables the paper did not headline. A TRO-bound per-household output lets downstream researchers do that custom analysis as re-aggregation rather than re-simulation. + +## UK data and the strongest TRACE case we have + +In our US work the underlying calibrated h5 is already public on Hugging Face, so a local rerun is in principle possible. That weakens the TRACE value proposition on US — a reader motivated enough to verify could just `pip install` the pins and try it themselves. The TRO still buys institutional attestation (the researcher did not modify the code), but re-running is not materially blocked. + +In our UK work the underlying microdata is UK Data Service–licensed and cannot be redistributed. A US researcher who wants to verify a UK PolicyEngine result cannot, even in principle, re-run it on their own machine, because they cannot acquire the inputs on any reasonable timescale. The only credibility path is an institution we trust having run the simulation against data that institution legally controls. That is exactly the central-bank-researcher scenario Lars described in the meeting, and it is the strongest fit for TRACE in the PolicyEngine stack. + +Two TRACE features that would make this work cleaner when they land: + +- **External-DOI pinning.** Rather than requiring restricted inputs to be redistributable, allow a TRO to pin by external identifier (UKDS study number + checksum, IRS-PUF agreement number + checksum). This lets a validator confirm that the run references the artifact that a qualified researcher could, in principle, acquire. +- **OS and compute-environment capture.** For multi-hour runs on specialized hardware, the Python-package pins do not fully determine reproducibility. Capturing the OS, Python version, and cloud-region provenance in the TRO closes that loop. + +Both of these are on the TRACE roadmap per the meeting. We will adopt as they ship. + +## What PolicyEngine is building in response + +Three concrete workstreams, each tracked as a GitHub issue: + +- **`policyengine-us-data`**: each `enhanced_cps_YYYY.h5` release already emits a build TRO. We will verify these TROs are published alongside the h5 and cross-linked from the Hugging Face dataset card so they are discoverable. (us-data PR #746 shipped the emission; issue #808 addresses a parallel licensing-documentation correction.) +- **`policyengine-api`**: emit a TRACE TRO for every webapp simulation run, signed by a PolicyEngine service account, persisted to GCS with durable URL, and exposed on the result response. (Issue #3485.) +- **`policyengine-app`**: surface the TRO as a "Cite this result" action with a citation download panel, an always-visible rules-vs-data version badge so the "rules changed or data changed?" question is answerable at a glance, and shareable permalinks that resolve the same numbers forever. (Issue #2830, blocked on the api work.) + +Documentation for researchers is being updated (household-api-docs PR #7) to put the webapp-run citation flow ahead of the local-Python-CLI flow, matching the framing that emerged in the meeting. + +## What TRACE gets from us as a case study + +A few things we think are worth surfacing to the TRACE project directly: + +1. **A use case that is infrastructure-certifying, not author-certifying.** The canonical TRACE scenario is a researcher bundling their code and data. Ours is a web service signing runs on behalf of researchers. The distinction matters for how institutional attestation gets represented in the vocabulary and for what SHACL shapes reject. +2. **Microdata provenance as a first-class artifact class.** Our build pipeline takes hours on specialized hardware and draws on half a dozen upstream sources with varying access levels. The TROv concept of `ArtifactComposition` handles this well, but concrete experience with a working microsimulation build may be useful input as the vocabulary evolves. +3. **A live stress test for `pe:*` extension discipline.** We have a working example of mapping institutionally-specific certification metadata (`pe:certifiedForModelVersion`, `pe:compatibilityBasis`, `pe:emittedIn`, `pe:ciRunUrl`, `pe:ciGitSha`) onto the TRACE core without polluting TROv shapes. If any of those generalize, we would contribute them upstream. + +We will keep notes as the implementation proceeds. The TRACE team is welcome to any of this material as part of their grant work. + +## Open questions + +- **Per-household frame as default or opt-in.** We think default-on for countries with public microdata (US) and default-off for countries with restricted microdata (UK). The choice affects TRO file size, privacy posture, and what a reviewer of a UK PolicyEngine paper can actually re-analyze. +- **Retention and addressing of webapp-run TROs.** These become permanent citations. We need to commit to durable URLs, content-addressing, and a policy on how or whether they ever get pruned. +- **Signing key and key rotation.** A PolicyEngine service-account signature is straightforward to implement; the longer-term concern is what happens when we rotate keys or restructure the service. Chain-of-trust design deserves more thought. + +Feedback welcomed from Lars, Tim, Casper, Tara, John — and anyone else reading. From 3ea0fb6d217fa289cbbf59801c060abc6e1ec1e5 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Tue, 21 Apr 2026 11:47:24 -0400 Subject: [PATCH 2/6] Apply codex review to TRACE case study: soften UK, add non-scenarios and adjacent work, clarify institution-backed self-attestation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex review of the 2026-04-21 meeting transcript vs. this writeup flagged four issues: 1. UK was oversold as 'the strongest' or 'only' TRACE case. Transcript supports 'a strong case' but not 'the strongest' — and we are considering a recalibrated UK variant that would partly lift the restriction anyway. 2. Missing explicit non-scenario section. The meeting was emphatic that researcher-laptop TRO emission, transitive dep tracing, and plain version-identification are NOT TRACE's job for us. 3. Missing adjacent workstreams that came up but are not TRACE-solved: preservation-grade archiving (HuggingFace vs Zenodo), PolicyEngine- specific TRACE vocabulary contribution, and non-TRACE version- identification work (Casper's point). 4. 'Institutional certification' language oversold what PolicyEngine actually provides. An institution certifying its own runs 'carries technically no difference' from an author certifying their own runs; the value comes from institutional reputation and structured evidence, not from cryptographic equivalent of arms-length independence. Also: back off the per-household frame as 'the highest-value downstream artifact' claim the transcript doesn't support; flag it as open design question. Drop 'transitive Python deps' from the rules-bundle section per transcript explicitly saying TRACE has not built that in. Add three additional open-question items (retention + preservation, key trust model, production-runtime binding) surfaced by codex. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/trace-case-study.md | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/docs/trace-case-study.md b/docs/trace-case-study.md index a093841e..1496aa24 100644 --- a/docs/trace-case-study.md +++ b/docs/trace-case-study.md @@ -22,7 +22,7 @@ Before TRACE, a paper citing a PolicyEngine result could say: "PolicyEngine-US c With a TRO emitted by policyengine.org, the paper cites a URL. That URL resolves to a JSON-LD document which the reader can validate with a stock tool. Inside the TRO, pinned by SHA-256: -- The **rules bundle**: wheel hashes for `policyengine`, `policyengine-us`, and their transitive Python dependencies at the version resolved at run time. +- The **rules bundle**: wheel hashes for `policyengine` and `policyengine-us` at the version resolved at run time. (We do not pin transitive Python dependencies inside the TRO — TRACE has explicitly not built that in, and a verifier who wants to reconstruct the full environment can resolve the declared dependencies against a public index.) - The **calibrated microdata**: the `enhanced_cps_2024.h5` SHA-256 and the `DataReleaseManifest` that describes how it was built. - The **reform**: the full reform JSON submitted by the user, content-hashed. - The **inputs**: for a household-level simulation, the household JSON the user entered; for an economy-wide simulation, the configuration payload. @@ -35,20 +35,35 @@ The claims the TRO supports are, in plain language: 2. _PolicyEngine as an institution ran this simulation; the researcher did not modify the code between our servers and their paper._ 3. _Any future reader can recover the full per-household counterfactual frame for re-analysis, bounded only by what we legally can redistribute._ -The third claim is the one Sabelhaus surfaced specifically and that we think is underused in microsimulation publishing today. Papers cite aggregate numbers; reviewers and follow-up work want distributions, state-level breakdowns, variables the paper did not headline. A TRO-bound per-household output lets downstream researchers do that custom analysis as re-aggregation rather than re-simulation. +The third claim deserves a design-question flag: whether the webapp TRO binds the full per-household counterfactual frame by default, or only on request, is something we have not settled. There is a real tension — papers cite aggregates; reviewers and follow-up work want distributions, state-level breakdowns, variables the paper did not headline; but a always-default full frame has file-size and privacy-posture costs, especially in restricted-data countries. We intend to make the trade-off deliberately rather than defaulting to either extreme. -## UK data and the strongest TRACE case we have +One framing point worth being careful about: what PolicyEngine provides is *institution-backed self-attestation*, not arms-length third-party certification. The arms-length property — that the verifier of a claim is structurally independent of the party being audited — is genuinely absent when PolicyEngine both runs the simulation and signs the TRO. What the TRO buys in that case is structured evidence that a reader (or a reviewer) can query, backed by institutional reputation, not cryptographic independence. That is a real step up from "trust me, I ran it" — but we should not market it as more than it is. + +## UK data as a strong case for TRACE In our US work the underlying calibrated h5 is already public on Hugging Face, so a local rerun is in principle possible. That weakens the TRACE value proposition on US — a reader motivated enough to verify could just `pip install` the pins and try it themselves. The TRO still buys institutional attestation (the researcher did not modify the code), but re-running is not materially blocked. -In our UK work the underlying microdata is UK Data Service–licensed and cannot be redistributed. A US researcher who wants to verify a UK PolicyEngine result cannot, even in principle, re-run it on their own machine, because they cannot acquire the inputs on any reasonable timescale. The only credibility path is an institution we trust having run the simulation against data that institution legally controls. That is exactly the central-bank-researcher scenario Lars described in the meeting, and it is the strongest fit for TRACE in the PolicyEngine stack. +In our UK work the underlying microdata is UK Data Service–licensed and cannot be redistributed. A researcher who wants to verify a UK PolicyEngine result cannot re-run it on their own machine on any reasonable timescale, because they cannot acquire the inputs easily. Institutional attestation is a particularly strong credibility path here, which is why the meeting flagged this kind of scenario as where TRACE adds the most value. + +One caveat worth naming explicitly: we are considering publishing a re-calibrated UK variant derived entirely from public-use inputs, which would partially lift the restriction. If that lands, the US and UK cases converge again. And the TRACE project's own plans for external-identifier pinning (UKDS study number + checksum, IRS-PUF agreement number + checksum) — not yet firmed up in TROv at time of writing — would provide an even cleaner mechanism for binding restricted-input provenance without redistribution. + +## What is explicitly NOT a TRACE case for us + +It is worth being equally clear about where TRACE does *not* add value for PolicyEngine, so we do not accidentally scope it there: + +- **A researcher running `policyengine.py` locally and emitting their own TRO.** Readers can `pip install` the same pins and rerun themselves. A TRO is bookkeeping, not a credibility upgrade. The TRO emission helpers in `policyengine.py` exist because they are reused by the two cases above, not because local emission is the flagship user experience. +- **Tracing transitive Python dependencies.** TRACE has, per the meeting, explicitly not built this in, and we should not either. The code documents its declared dependencies; a verifier can resolve them against a public index. +- **Anything that replaces plain version-and-vintage identification.** Much of what matters for reproducibility is just showing "they used that file with that version." That is documentation, not TRACE — and it is often enough on its own, especially for researchers running the Python package against public-use inputs. + +## Adjacent workstreams TRACE does not cover -Two TRACE features that would make this work cleaner when they land: +Several reproducibility commitments came up in the meeting that are TRACE-adjacent rather than TRACE-solved. Flagging them so they do not get lost: -- **External-DOI pinning.** Rather than requiring restricted inputs to be redistributable, allow a TRO to pin by external identifier (UKDS study number + checksum, IRS-PUF agreement number + checksum). This lets a validator confirm that the run references the artifact that a qualified researcher could, in principle, acquire. -- **OS and compute-environment capture.** For multi-hour runs on specialized hardware, the Python-package pins do not fully determine reproducibility. Capturing the OS, Python version, and cloud-region provenance in the TRO closes that loop. +- **Preservation-grade archiving.** Hugging Face, where our calibrated h5 artifacts are hosted today, does not publish a preservation commitment comparable to Zenodo or a CLOCKSS / LOCKSS participant. For a TRO citation URL to be durable decades from now, the artifacts it pins need to live somewhere with an explicit long-term preservation policy. Zenodo as a secondary / mirror target is worth serious consideration. +- **PolicyEngine-specific TRACE vocabulary contribution.** We already use `pe:*` extension fields; as we implement and find patterns that generalize (e.g., institution-backed self-attestation, microdata-build provenance, infrastructure-run attestation), contributing those upstream to TROv vocabulary design is in scope. +- **Plain version-identification work outside TRACE.** Version badges, shareable permalinks that resolve to the same numbers, a "why did this number move?" diff view between release pairs. These are separate deliverables that are on our app roadmap; TRACE is not the right frame for them. -Both of these are on the TRACE roadmap per the meeting. We will adopt as they ship. +Both external-identifier pinning and OS / compute-environment capture are on the TRACE roadmap and would help when they land. We will adopt as they ship. ## What PolicyEngine is building in response @@ -72,8 +87,9 @@ We will keep notes as the implementation proceeds. The TRACE team is welcome to ## Open questions -- **Per-household frame as default or opt-in.** We think default-on for countries with public microdata (US) and default-off for countries with restricted microdata (UK). The choice affects TRO file size, privacy posture, and what a reviewer of a UK PolicyEngine paper can actually re-analyze. -- **Retention and addressing of webapp-run TROs.** These become permanent citations. We need to commit to durable URLs, content-addressing, and a policy on how or whether they ever get pruned. -- **Signing key and key rotation.** A PolicyEngine service-account signature is straightforward to implement; the longer-term concern is what happens when we rotate keys or restructure the service. Chain-of-trust design deserves more thought. +- **Per-household frame as default or opt-in.** The meeting did not reach consensus on this; we flagged it as unsettled. Default-on has downstream-analysis utility but file-size and privacy-posture costs. Default-off makes TROs smaller but forces downstream researchers to rerun the simulation for any custom split. Design choice should be made deliberately with trade-offs listed, not defaulted to either extreme. +- **Retention and addressing of webapp-run TROs.** These become permanent citations. Commitments needed on durable URLs, content-addressing, migration policy for storage-provider changes, and whether we ever prune. Zenodo as a secondary / mirror target is worth serious consideration — Hugging Face does not publish a preservation commitment, and a TRO URL that 404s in 2040 is a worse outcome than a TRO URL that 404s in a PolicyEngine-controlled bucket. +- **Signing key and key trust model.** A PolicyEngine service-account signature is straightforward to implement; the harder question is how a reader in 2040 verifies the signature belongs to PolicyEngine. Options include a published keychain rooted in a DNS TXT record, a Sigstore-style transparency log, or GCP workload-identity with short-lived signatures. Chain-of-trust design deserves more thought than "we sign it with a service account." +- **Binding to the actual production runtime.** CI run URL + git SHA documents how the container that ran the simulation was *built*. The TRO should additionally bind the running container image SHA, cloud region, and pod / function instance at execution time. Otherwise the TRO only attests to a build, not a run. Feedback welcomed from Lars, Tim, Casper, Tara, John — and anyone else reading. From 8f82cd21453ad35ec76776b97ed626d009d9d71e Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Tue, 21 Apr 2026 16:33:11 -0400 Subject: [PATCH 3/6] Soften assertive language in TRACE case study per codex review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex flagged that the writeup slipped from 'claims we want to make' into 'claims we can make now' for service-account signatures, durable URLs, and per-household frames — three things the transcript does not actually settle. Changes: - Reframe section title from 'The precise claims a PolicyEngine TRO lets us make' to 'The claims a PolicyEngine TRO should let us make'. Every present-tense claim about what a TRO 'lets us' do is softened to what a TRO 'would let us' do, conditional on the design questions still being settled. - Per-household frame: drop the 'for US runs the TRO includes the full frame' assertion; replace with explicit open-design-question framing. Cite the transcript exchange for traceability. - Signing mechanism: remove the claim that a service-account signature is the answer. List service-account + DNS-keychain + Sigstore as options under consideration. - Institutional-attestation claim gains a caveat that the service- account signature is 'one implementation, not the only one.' - Workstream list for policyengine-api#3485 is rewritten from 'signed by a PolicyEngine service account, persisted to GCS with durable URL' (asserts design decisions that have not been made) to explicitly naming the strawman and the alternatives. - The two workstreams the writeup describes gain an explicit live / not-yet-live marker: us-data build TRO emission is live (us-data#746 shipped); webapp-run emission + Cite UI is not (api#3485, app#2830, api#3486). The open-questions section already handled this correctly; this change aligns the main body with that section so the writeup is internally consistent. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/trace-case-study.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/trace-case-study.md b/docs/trace-case-study.md index 1496aa24..a6b96119 100644 --- a/docs/trace-case-study.md +++ b/docs/trace-case-study.md @@ -12,30 +12,30 @@ The answer we walked out with is narrower and cleaner than what we had been buil TRACE matters in exactly the places where the reader cannot easily re-run the analysis: -1. **The calibrated microdata build.** Each `enhanced_cps_YYYY.h5` that we publish to Hugging Face is derived from inputs that the public cannot all access directly (IRS-PUF requires agreeing to IRS's terms of use; the build itself takes hours on Modal with specific GPU configurations). Every release emits a TRO that binds the upstream input fingerprints, the build code, and the output h5 under canonical TROv 0.1. +1. **The calibrated microdata build.** Each `enhanced_cps_YYYY.h5` that we publish to Hugging Face is derived from inputs that the public cannot all access directly (IRS-PUF requires agreeing to IRS's terms of use; the build itself takes hours on Modal with specific GPU configurations). Each release emits a TRO that binds the upstream input fingerprints, the build code, and the output h5 under canonical TROv 0.1. **This is live today** — us-data PR #746 shipped the emission — though cross-linking from the Hugging Face dataset card is still in flight. -2. **Simulation runs through policyengine.org.** When a researcher uses the webapp to score a reform, we run the simulation on our infrastructure against our pinned calibrated data, and we return the result. A paper that cites that result is asking its readers to trust PolicyEngine's institutional attestation — not to trust that the researcher reproduced a Python pipeline faithfully on their own laptop. A TRO signed by PolicyEngine and served from our infrastructure makes that institutional attestation explicit and machine-verifiable. +2. **Simulation runs through policyengine.org.** When a researcher uses the webapp to score a reform, we run the simulation on our infrastructure against our pinned calibrated data and return the result. A paper that cites that result is asking its readers to trust PolicyEngine's institutional attestation — not to trust that the researcher reproduced a Python pipeline faithfully on their own laptop. A TRO signed by PolicyEngine and served from our infrastructure would make that institutional attestation explicit and machine-verifiable. **This is not yet live** — backend emission is scoped in policyengine-api#3485, the "Cite this result" UI in policyengine-app#2830, both blocked on a pe.py v4 migration (api#3486, draft in #3487). This document describes the intended shape of the workflow, not its current state. -## The precise claims a PolicyEngine TRO lets us make +## The claims a PolicyEngine TRO should let us make Before TRACE, a paper citing a PolicyEngine result could say: "PolicyEngine-US computed an EITC expansion impact of $X using `policyengine-us==1.653.3` and `policyengine-us-data==1.85.2`." The reader had to take it on faith that those versions, run on that reform, actually produced $X — or install the pins and try it themselves, which presumes the researcher's environment was not modified. -With a TRO emitted by policyengine.org, the paper cites a URL. That URL resolves to a JSON-LD document which the reader can validate with a stock tool. Inside the TRO, pinned by SHA-256: +A TRO emitted by policyengine.org would let the paper cite a URL instead. That URL would resolve to a JSON-LD document the reader can validate with a stock tool. The artifact set we are designing toward, pinned by SHA-256: - The **rules bundle**: wheel hashes for `policyengine` and `policyengine-us` at the version resolved at run time. (We do not pin transitive Python dependencies inside the TRO — TRACE has explicitly not built that in, and a verifier who wants to reconstruct the full environment can resolve the declared dependencies against a public index.) - The **calibrated microdata**: the `enhanced_cps_2024.h5` SHA-256 and the `DataReleaseManifest` that describes how it was built. - The **reform**: the full reform JSON submitted by the user, content-hashed. - The **inputs**: for a household-level simulation, the household JSON the user entered; for an economy-wide simulation, the configuration payload. -- The **outputs**: a content-hashed `results.json` carrying the aggregate metrics the webapp displays, and — for US runs, where the underlying microdata is already public — the full per-household weighted simulation frame as parquet, so downstream researchers can compute custom splits, subgroup analyses, or variables not reported in the paper without re-running the simulation. -- The **institutional attestation**: CI/deploy run URL, git SHA, cloud region, timestamp, and a signature by a PolicyEngine service account. +- The **outputs**: a content-hashed `results.json` carrying the aggregate metrics the webapp displays. Whether to *also* bind a full per-household weighted simulation frame is an open design question (see below) — it would enable downstream custom splits without re-running the simulation, at a file-size and privacy-posture cost that varies by country. +- The **institutional attestation**: CI/deploy run URL, git SHA, cloud region, timestamp, and a cryptographic signature. The signing mechanism is not yet settled (see open questions); options under consideration include a GCP workload-identity short-lived signature, a published keychain rooted in a DNS TXT record at policyengine.org, or a Sigstore-style transparency log. -The claims the TRO supports are, in plain language: +Claims we believe such a TRO *should* support, in plain language: -1. _These were the rules, this was the calibrated microdata, and these were the inputs that produced those outputs._ -2. _PolicyEngine as an institution ran this simulation; the researcher did not modify the code between our servers and their paper._ -3. _Any future reader can recover the full per-household counterfactual frame for re-analysis, bounded only by what we legally can redistribute._ +1. _These were the rules, this was the calibrated microdata, and these were the inputs that produced those outputs._ — This is the artifact-composition claim; TROv core supports it. +2. _PolicyEngine as an institution ran this simulation; the researcher did not modify the code between our servers and their paper._ — This requires the institutional-attestation design to be nailed down. The service-account signature we envision is one implementation; it is not the only one. +3. _Any future reader can recover the full per-household counterfactual frame for re-analysis, bounded only by what we legally can redistribute._ — This depends on the per-household-frame default-or-opt-in design question below. -The third claim deserves a design-question flag: whether the webapp TRO binds the full per-household counterfactual frame by default, or only on request, is something we have not settled. There is a real tension — papers cite aggregates; reviewers and follow-up work want distributions, state-level breakdowns, variables the paper did not headline; but a always-default full frame has file-size and privacy-posture costs, especially in restricted-data countries. We intend to make the trade-off deliberately rather than defaulting to either extreme. +The per-household frame question deserves a specific flag: whether the webapp TRO binds the full per-household counterfactual frame by default, or only on request, is unsettled. Papers cite aggregates; reviewers and follow-up work want distributions, state-level breakdowns, variables the paper did not headline; but an always-default full frame has file-size and privacy-posture costs, especially in restricted-data countries. We intend to make the trade-off deliberately rather than defaulting to either extreme. Transcript note: this came up in the meeting (Sabelhaus on what the microdata contains beyond the summary, Max on whether the full frame belongs in a TRO); no consensus on "default-on" emerged. One framing point worth being careful about: what PolicyEngine provides is *institution-backed self-attestation*, not arms-length third-party certification. The arms-length property — that the verifier of a claim is structurally independent of the party being audited — is genuinely absent when PolicyEngine both runs the simulation and signs the TRO. What the TRO buys in that case is structured evidence that a reader (or a reviewer) can query, backed by institutional reputation, not cryptographic independence. That is a real step up from "trust me, I ran it" — but we should not market it as more than it is. @@ -70,7 +70,7 @@ Both external-identifier pinning and OS / compute-environment capture are on the Three concrete workstreams, each tracked as a GitHub issue: - **`policyengine-us-data`**: each `enhanced_cps_YYYY.h5` release already emits a build TRO. We will verify these TROs are published alongside the h5 and cross-linked from the Hugging Face dataset card so they are discoverable. (us-data PR #746 shipped the emission; issue #808 addresses a parallel licensing-documentation correction.) -- **`policyengine-api`**: emit a TRACE TRO for every webapp simulation run, signed by a PolicyEngine service account, persisted to GCS with durable URL, and exposed on the result response. (Issue #3485.) +- **`policyengine-api`**: emit a TRACE TRO for every webapp simulation run. The exact signing mechanism and persistence store are open design questions — service-account + GCS is the current strawman, but a Zenodo / Sigstore / DNS-rooted-keychain alternative is under consideration, especially for long-term durability. (Issue #3485; prerequisite v4 migration in #3487.) - **`policyengine-app`**: surface the TRO as a "Cite this result" action with a citation download panel, an always-visible rules-vs-data version badge so the "rules changed or data changed?" question is answerable at a glance, and shareable permalinks that resolve the same numbers forever. (Issue #2830, blocked on the api work.) Documentation for researchers is being updated (household-api-docs PR #7) to put the webapp-run citation flow ahead of the local-Python-CLI flow, matching the framing that emerged in the meeting. From 6d03fbc61cc9fe9e75247634627a710a28a15015 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sun, 26 Apr 2026 06:58:08 -0400 Subject: [PATCH 4/6] Harden TRACE reproducibility artifacts --- pyproject.toml | 4 +- scripts/generate_trace_tros.py | 24 +-- src/policyengine/cli.py | 6 +- .../core/tax_benefit_model_version.py | 15 +- .../data/release_manifests/us.json | 20 +- .../data/schemas/trace_tro.schema.json | 4 +- src/policyengine/provenance/bundle.py | 20 +- src/policyengine/provenance/trace.py | 189 ++++++++++++++---- src/policyengine/results/trace_tro.py | 30 +++ tests/test_bundle_refresh.py | 1 + tests/test_models.py | 6 +- tests/test_release_manifests.py | 14 +- tests/test_trace_tro.py | 130 ++++++++++++ tests/test_us_regions.py | 6 +- uv.lock | 22 +- 15 files changed, 388 insertions(+), 103 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 843cb262..2cfefa1c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ uk = [ ] us = [ "policyengine_core>=3.25.0", - "policyengine-us==1.653.3", + "policyengine-us==1.667.1", ] dev = [ "pytest", @@ -61,7 +61,7 @@ dev = [ "ruff>=0.9.0", "policyengine_core>=3.25.0", "policyengine-uk==2.88.0", - "policyengine-us==1.653.3", + "policyengine-us==1.667.1", "towncrier>=24.8.0", "mypy>=1.11.0", "pytest-cov>=5.0.0", diff --git a/scripts/generate_trace_tros.py b/scripts/generate_trace_tros.py index f9533bd9..57013643 100644 --- a/scripts/generate_trace_tros.py +++ b/scripts/generate_trace_tros.py @@ -3,14 +3,9 @@ Writes ``data/release_manifests/{country}.trace.tro.jsonld`` for each country whose bundled manifest ships in the wheel. Run this before releasing a new ``policyengine.py`` version so the packaged TRO -matches the pinned bundle. Requires HTTPS access to the data release -manifest (and ``HUGGING_FACE_TOKEN`` for private country data). - -If a country previously had a TRO on disk and the new run cannot -regenerate it (e.g. a missing secret or an unreachable HF endpoint), -the script exits non-zero so the release workflow blocks rather than -silently shipping a stale/missing TRO. If no bundled release manifests -are found at all, the script exits 0 with a notice (nothing to do). +matches the pinned bundle. The richer data release manifest is included +when available; otherwise the TRO still binds the certified dataset +sha256 and URI pinned in the bundled release manifest. """ from __future__ import annotations @@ -47,14 +42,11 @@ def regenerate_all() -> tuple[list[Path], list[tuple[str, Path, str]]]: try: data_release_manifest = get_data_release_manifest(country_id) except DataReleaseManifestUnavailableError as exc: - if tro_path.exists(): - regressions.append((country_id, tro_path, str(exc))) - else: - print( - f"skipped {country_id}: {exc}", - file=sys.stderr, - ) - continue + data_release_manifest = None + print( + f"warning: {country_id}: {exc}; writing limited TRO", + file=sys.stderr, + ) tro = build_trace_tro_from_release_bundle( country_manifest, data_release_manifest, diff --git a/src/policyengine/cli.py b/src/policyengine/cli.py index 3a659643..a282c718 100644 --- a/src/policyengine/cli.py +++ b/src/policyengine/cli.py @@ -19,6 +19,7 @@ from typing import Optional, Sequence from policyengine.provenance.manifest import ( + DataReleaseManifestUnavailableError, get_data_release_manifest, get_release_manifest, ) @@ -69,7 +70,10 @@ def _parser() -> argparse.ArgumentParser: def _emit_bundle_tro(country_id: str, out: Optional[Path]) -> int: country_manifest = get_release_manifest(country_id) - data_release_manifest = get_data_release_manifest(country_id) + try: + data_release_manifest = get_data_release_manifest(country_id) + except DataReleaseManifestUnavailableError: + data_release_manifest = None tro = build_trace_tro_from_release_bundle( country_manifest, data_release_manifest, diff --git a/src/policyengine/core/tax_benefit_model_version.py b/src/policyengine/core/tax_benefit_model_version.py index 5eb8f525..530d75a8 100644 --- a/src/policyengine/core/tax_benefit_model_version.py +++ b/src/policyengine/core/tax_benefit_model_version.py @@ -7,6 +7,7 @@ from policyengine.provenance.manifest import ( CountryReleaseManifest, DataCertification, + DataReleaseManifestUnavailableError, PackageVersion, get_data_release_manifest, ) @@ -214,16 +215,20 @@ def release_bundle(self) -> dict[str, Optional[str]]: def trace_tro(self) -> dict: """Build a TRACE TRO for this certified bundle. - Fetches the published data release manifest so the TRO can pin - the exact dataset sha256. Requires a bundled release manifest. + Uses the published data release manifest when available. If it + has not been published, the TRO falls back to the certified + dataset sha256 and URI pinned in the bundled release manifest. """ if self.release_manifest is None: raise ValueError( "TRACE TRO export requires a bundled country release manifest." ) - data_release_manifest = get_data_release_manifest( - self.release_manifest.country_id - ) + try: + data_release_manifest = get_data_release_manifest( + self.release_manifest.country_id + ) + except DataReleaseManifestUnavailableError: + data_release_manifest = None return build_trace_tro_from_release_bundle( self.release_manifest, data_release_manifest, diff --git a/src/policyengine/data/release_manifests/us.json b/src/policyengine/data/release_manifests/us.json index 0016aa8a..fa58fcfa 100644 --- a/src/policyengine/data/release_manifests/us.json +++ b/src/policyengine/data/release_manifests/us.json @@ -5,30 +5,30 @@ "policyengine_version": "4.0.0", "model_package": { "name": "policyengine-us", - "version": "1.653.3", - "sha256": "67a49b98d85c060b24d547a569e91a6703c0fc9c41299c1c67f4ecfac75c67c6", - "wheel_url": "https://files.pythonhosted.org/packages/02/07/25f39a2bfa1ff210cd8e78826c47c03b9040a98a83f4eed59c434c1ed862/policyengine_us-1.653.3-py3-none-any.whl" + "version": "1.667.1", + "sha256": "e05f846473314ea37179b1cdebe0497fa550a4197414317d9858c822c1f9672c", + "wheel_url": "https://files.pythonhosted.org/packages/26/bc/ed46f19002c14e541e7b0b107cb47a47a5b24b45dad64bfc60405effb814/policyengine_us-1.667.1-py3-none-any.whl" }, "data_package": { "name": "policyengine-us-data", - "version": "1.73.0", + "version": "1.78.2", "repo_id": "policyengine/policyengine-us-data" }, "certified_data_artifact": { "data_package": { "name": "policyengine-us-data", - "version": "1.73.0" + "version": "1.78.2" }, - "build_id": "policyengine-us-data-1.73.0", + "build_id": "policyengine-us-data-1.78.2", "dataset": "enhanced_cps_2024", - "uri": "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.73.0", - "sha256": "18cdc668d05311c32ae37364abcea89b0221c27154559667e951c7b19f5b5cbd" + "uri": "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.78.2", + "sha256": "4e92b340c3ea3e200ed5d55edf752ee1a13baf787442956fb67d25242fed13b5" }, "certification": { "compatibility_basis": "matching_data_build_fingerprint", - "data_build_id": "policyengine-us-data-1.73.0", + "data_build_id": "policyengine-us-data-1.78.2", "built_with_model_version": "1.647.0", - "certified_for_model_version": "1.653.3", + "certified_for_model_version": "1.667.1", "certified_by": "policyengine.py bundled manifest" }, "default_dataset": "enhanced_cps_2024", diff --git a/src/policyengine/data/schemas/trace_tro.schema.json b/src/policyengine/data/schemas/trace_tro.schema.json index a6fe1905..244f2d01 100644 --- a/src/policyengine/data/schemas/trace_tro.schema.json +++ b/src/policyengine/data/schemas/trace_tro.schema.json @@ -72,7 +72,7 @@ }, "trov:hasLocation": { "type": "string", - "pattern": "^(https://[^\\s]+$|data/release_manifests/[a-z]{2,3}\\.json$|reform\\.json$|results\\.json$|bundle\\.trace\\.tro\\.jsonld(#[a-f0-9]{64})?$)" + "pattern": "^(https://[^\\s]+$|data/release_manifests/[a-z]{2,3}\\.json$|(bundle\\.trace\\.tro\\.jsonld(#[a-f0-9]{64})?)|[a-z_]+\\.json$)" } } }, @@ -174,7 +174,7 @@ }, "pe:emittedIn": { "type": "string", - "enum": ["local", "github-actions"] + "enum": ["local", "github-actions", "policyengine-api"] } } } diff --git a/src/policyengine/provenance/bundle.py b/src/policyengine/provenance/bundle.py index 54e32ebd..fa432913 100644 --- a/src/policyengine/provenance/bundle.py +++ b/src/policyengine/provenance/bundle.py @@ -45,6 +45,7 @@ from policyengine.provenance.manifest import ( CountryReleaseManifest, get_release_manifest, + https_dataset_uri, ) # --------------------------------------------------------------------------- @@ -102,7 +103,11 @@ def _hf_dataset_sha256(repo_id: str, path: str, revision: str) -> str: Uses the ``HUGGING_FACE_TOKEN`` env var for private repos. Streams the file in 8 MiB chunks so memory usage stays flat. """ - url = f"https://huggingface.co/datasets/{repo_id}/resolve/{revision}/{path}" + url = https_dataset_uri( + repo_id=repo_id, + path_in_repo=path, + revision=revision, + ) headers = {"User-Agent": "policyengine.py"} token = os.environ.get("HUGGING_FACE_TOKEN") or os.environ.get("HF_TOKEN") if token: @@ -299,20 +304,27 @@ def _bump_pyproject_pin( def regenerate_trace_tro(country: str, manifest_dir: Path = MANIFEST_DIR) -> Path: """Regenerate ``{country}.trace.tro.jsonld`` from the country's - release manifest + the live data-release manifest on HF. + release manifest plus the live data-release manifest on HF when + that manifest is available. Thin wrapper around the same code path ``scripts/generate_trace_tros.py`` uses; exposed here so the refresh function can chain ``refresh_release_bundle(...)`` with TRO regeneration in one call. """ - from policyengine.provenance.manifest import get_data_release_manifest + from policyengine.provenance.manifest import ( + DataReleaseManifestUnavailableError, + get_data_release_manifest, + ) from policyengine.provenance.trace import ( build_trace_tro_from_release_bundle, serialize_trace_tro, ) release = get_release_manifest(country) - data_release = get_data_release_manifest(country) + try: + data_release = get_data_release_manifest(country) + except DataReleaseManifestUnavailableError: + data_release = None tro = build_trace_tro_from_release_bundle(release, data_release) out_path = manifest_dir / f"{country}.trace.tro.jsonld" out_path.write_bytes(serialize_trace_tro(tro)) diff --git a/src/policyengine/provenance/trace.py b/src/policyengine/provenance/trace.py index 2a48b38a..719424d5 100644 --- a/src/policyengine/provenance/trace.py +++ b/src/policyengine/provenance/trace.py @@ -69,6 +69,25 @@ def _artifact_mime_type(path_or_uri: str) -> Optional[str]: return _MIME_TYPES.get(suffix) +def _dataset_location_from_uri(uri: str) -> str: + if not uri.startswith("hf://"): + return uri + + without_scheme = uri[5:] + if "@" not in without_scheme: + return uri + path_without_revision, revision = without_scheme.rsplit("@", 1) + parts = path_without_revision.split("/", 2) + if len(parts) != 3: + return uri + repo_id = f"{parts[0]}/{parts[1]}" + return https_dataset_uri( + repo_id=repo_id, + path_in_repo=parts[2], + revision=revision, + ) + + def canonical_json_bytes(value: Mapping) -> bytes: """Canonical JSON serialization used for every content hash in the TRO. @@ -268,7 +287,7 @@ def _assemble_tro_node( def build_trace_tro_from_release_bundle( country_manifest: CountryReleaseManifest, - data_release_manifest: DataReleaseManifest, + data_release_manifest: Optional[DataReleaseManifest], *, certification: Optional[DataCertification] = None, bundle_manifest_path: Optional[str] = None, @@ -280,8 +299,11 @@ def build_trace_tro_from_release_bundle( ) -> dict: """Build a TRACE TRO for a certified runtime bundle. - Artifacts in the composition: bundle manifest, data release manifest, - certified dataset, and (when resolvable) the country model wheel. + Artifacts in the composition: bundle manifest, certified dataset, + and (when resolvable) the country model wheel. When the external + data release manifest is available, it is included as an additional + artifact; when it has not been published, the TRO still binds the + dataset sha256 and URI pinned in the bundled country manifest. Certification metadata is encoded as structured ``pe:*`` fields on the :class:`trov:TransparentResearchPerformance` node. @@ -302,13 +324,19 @@ def build_trace_tro_from_release_bundle( "Country release manifest does not define a certified artifact." ) - dataset_artifact = data_release_manifest.artifacts.get(certified_artifact.dataset) - if dataset_artifact is None: + dataset_artifact = ( + data_release_manifest.artifacts.get(certified_artifact.dataset) + if data_release_manifest is not None + else None + ) + if data_release_manifest is not None and dataset_artifact is None: raise ValueError( "Data release manifest does not include the certified dataset " f"'{certified_artifact.dataset}'." ) - dataset_sha256 = certified_artifact.sha256 or dataset_artifact.sha256 + dataset_sha256 = certified_artifact.sha256 or ( + dataset_artifact.sha256 if dataset_artifact is not None else None + ) if dataset_sha256 is None: raise ValueError( "Neither the country release manifest nor the data release manifest " @@ -319,21 +347,31 @@ def build_trace_tro_from_release_bundle( bundle_manifest_path or f"data/release_manifests/{country_manifest.country_id}.json" ) - data_manifest_location = data_release_manifest_path or https_release_manifest_uri( - country_manifest.data_package + data_manifest_location = ( + data_release_manifest_path or https_release_manifest_uri(country_manifest.data_package) + if data_release_manifest is not None + else None ) - dataset_location = https_dataset_uri( - repo_id=dataset_artifact.repo_id, - path_in_repo=dataset_artifact.path, - revision=dataset_artifact.revision, + dataset_location = ( + https_dataset_uri( + repo_id=dataset_artifact.repo_id, + path_in_repo=dataset_artifact.path, + revision=dataset_artifact.revision, + ) + if dataset_artifact is not None + else _dataset_location_from_uri(certified_artifact.uri) ) bundle_manifest_hash = hashlib.sha256( canonical_json_bytes(country_manifest.model_dump(mode="json")) ).hexdigest() - data_release_manifest_hash = hashlib.sha256( - canonical_json_bytes(data_release_manifest.model_dump(mode="json")) - ).hexdigest() + data_release_manifest_hash = ( + hashlib.sha256( + canonical_json_bytes(data_release_manifest.model_dump(mode="json")) + ).hexdigest() + if data_release_manifest is not None + else None + ) model_wheel_sha, model_wheel_https = _resolve_model_wheel( country_manifest, @@ -350,22 +388,28 @@ def build_trace_tro_from_release_bundle( "mime_type": "application/json", "name": f"policyengine.py bundle manifest for {country_manifest.country_id}", }, - { - "id": "data_release_manifest", - "hash": data_release_manifest_hash, - "location": data_manifest_location, - "mime_type": "application/json", - "name": f"{country_manifest.data_package.name} release manifest " - f"{country_manifest.data_package.version}", - }, { "id": "dataset", "hash": dataset_sha256, "location": dataset_location, - "mime_type": _artifact_mime_type(dataset_artifact.path), + "mime_type": _artifact_mime_type( + dataset_artifact.path if dataset_artifact is not None else certified_artifact.uri + ), "name": certified_artifact.dataset, }, ] + if data_release_manifest_hash is not None and data_manifest_location is not None: + artifact_specs.insert( + 1, + { + "id": "data_release_manifest", + "hash": data_release_manifest_hash, + "location": data_manifest_location, + "mime_type": "application/json", + "name": f"{country_manifest.data_package.name} release manifest " + f"{country_manifest.data_package.version}", + }, + ) if model_wheel_sha is not None: artifact_specs.append( { @@ -405,25 +449,33 @@ def build_trace_tro_from_release_bundle( certification=effective_certification, started_at=( data_release_manifest.build.built_at - if data_release_manifest.build is not None + if ( + data_release_manifest is not None + and data_release_manifest.build is not None + ) else country_manifest.published_at ), ended_at=country_manifest.published_at, ) + if data_release_manifest is None: + performance["pe:dataReleaseManifestStatus"] = "unavailable" tro_node = _assemble_tro_node( tro_name=f"policyengine {country_manifest.country_id} certified bundle TRO", tro_description=( f"TRACE TRO for certified runtime bundle " f"{country_manifest.bundle_id or country_manifest.country_id} " - f"covering the bundle manifest, the country data release " - f"manifest, the certified dataset artifact, and the country " - f"model wheel." + f"covering the bundle manifest, the certified dataset " + f"artifact, the country model wheel, and the country data " + f"release manifest when it is available." ), created_at=country_manifest.published_at or ( data_release_manifest.build.built_at - if data_release_manifest.build is not None + if ( + data_release_manifest is not None + and data_release_manifest.build is not None + ) else None ), creator=POLICYENGINE_ORGANIZATION, @@ -535,22 +587,37 @@ def build_simulation_trace_tro( results_payload: Mapping, reform_payload: Optional[Mapping] = None, reform_name: Optional[str] = None, + input_payload: Optional[Mapping] = None, + input_name: Optional[str] = None, + request_payload: Optional[Mapping] = None, + runtime_payload: Optional[Mapping] = None, + runtime_environment: Optional[Mapping[str, Any]] = None, + emission_context: Optional[Mapping[str, Any]] = None, simulation_id: Optional[str] = None, created_at: Optional[str] = None, started_at: Optional[str] = None, results_location: Optional[str] = None, reform_location: Optional[str] = None, + input_location: Optional[str] = None, + request_location: Optional[str] = None, + runtime_location: Optional[str] = None, bundle_tro_location: Optional[str] = None, bundle_tro_url: Optional[str] = None, ) -> dict: - """Build a per-simulation TRO chaining a bundle TRO to a results payload. - - The simulation TRO composition pins: the bundle TRO itself, the - reform JSON (if provided), and the ``results.json`` payload. The - ``bundle_tro_url`` field is recorded on the performance node under - ``pe:bundleTroUrl`` so a verifier can cross-check the bundle TRO - hash against bytes fetched from a canonical location rather than - trusting the caller's dict. + """Build a per-simulation TRO chaining a bundle TRO to a web/API run. + + The simulation TRO composition always pins the certified bundle TRO and + ``results.json`` payload. API callers should also pass the reform, + input/request payload, and runtime payload so the TRO binds the specific + production request that produced the output. This is intentionally broader + than a researcher-local helper: the valuable TRACE surface for PolicyEngine + is an institution-operated web/API run that readers cannot simply rerun on + their own machines. + + The ``bundle_tro_url`` field is recorded on the performance node under + ``pe:bundleTroUrl`` so a verifier can cross-check the bundle TRO hash + against bytes fetched from a canonical location rather than trusting the + caller's dict. """ bundle_reference = extract_bundle_tro_reference(bundle_tro) bundle_hash = hashlib.sha256(canonical_json_bytes(bundle_tro)).hexdigest() @@ -578,6 +645,43 @@ def build_simulation_trace_tro( "name": reform_name or "reform", } ) + if input_payload is not None: + input_hash = hashlib.sha256(canonical_json_bytes(input_payload)).hexdigest() + artifact_specs.append( + { + "id": "input", + "hash": input_hash, + "location": input_location or "input.json", + "mime_type": "application/json", + "name": input_name or "simulation input", + } + ) + if request_payload is not None: + request_hash = hashlib.sha256( + canonical_json_bytes(request_payload) + ).hexdigest() + artifact_specs.append( + { + "id": "request", + "hash": request_hash, + "location": request_location or "request.json", + "mime_type": "application/json", + "name": "API request payload", + } + ) + if runtime_payload is not None: + runtime_hash = hashlib.sha256( + canonical_json_bytes(runtime_payload) + ).hexdigest() + artifact_specs.append( + { + "id": "runtime", + "hash": runtime_hash, + "location": runtime_location or "runtime.json", + "mime_type": "application/json", + "name": "runtime environment", + } + ) artifact_specs.append( { "id": "results", @@ -611,14 +715,21 @@ def build_simulation_trace_tro( performance["trov:startedAtTime"] = started_at or created_at if created_at is not None: performance["trov:endedAtTime"] = created_at - performance.update(_emission_context()) + if runtime_environment is not None: + for key, value in runtime_environment.items(): + if value is None: + continue + performance[f"pe:{key}"] = value + performance.update(dict(emission_context or _emission_context())) tro_node = _assemble_tro_node( tro_name=f"policyengine simulation TRO ({simulation_slug})", tro_description=( "TRACE TRO for a PolicyEngine simulation result. Composition " "pins the certified runtime bundle TRO, the reform " - "specification (where applicable), and the results.json payload." + "specification (where applicable), the request/input payloads " + "(where supplied), the runtime environment (where supplied), " + "and the results.json payload." ), created_at=created_at, creator=POLICYENGINE_ORGANIZATION, diff --git a/src/policyengine/results/trace_tro.py b/src/policyengine/results/trace_tro.py index 85c7aed8..ed0b51e1 100644 --- a/src/policyengine/results/trace_tro.py +++ b/src/policyengine/results/trace_tro.py @@ -28,9 +28,18 @@ def build_results_trace_tro( bundle_tro: Mapping, reform_payload: Optional[Mapping] = None, reform_name: Optional[str] = None, + input_payload: Optional[Mapping] = None, + input_name: Optional[str] = None, + request_payload: Optional[Mapping] = None, + runtime_payload: Optional[Mapping] = None, + runtime_environment: Optional[Mapping] = None, + emission_context: Optional[Mapping] = None, simulation_id: Optional[str] = None, results_location: Optional[str] = None, reform_location: Optional[str] = None, + input_location: Optional[str] = None, + request_location: Optional[str] = None, + runtime_location: Optional[str] = None, bundle_tro_location: Optional[str] = None, bundle_tro_url: Optional[str] = None, ) -> dict: @@ -49,10 +58,19 @@ def build_results_trace_tro( results_payload=results.model_dump(mode="json"), reform_payload=reform_payload, reform_name=reform_name, + input_payload=input_payload, + input_name=input_name, + request_payload=request_payload, + runtime_payload=runtime_payload, + runtime_environment=runtime_environment, + emission_context=emission_context, simulation_id=slug, created_at=results.metadata.generated_at, results_location=results_location, reform_location=reform_location, + input_location=input_location, + request_location=request_location, + runtime_location=runtime_location, bundle_tro_location=bundle_tro_location, bundle_tro_url=bundle_tro_url, ) @@ -66,6 +84,12 @@ def write_results_with_trace_tro( bundle_tro_url: str, reform_payload: Optional[Mapping] = None, reform_name: Optional[str] = None, + input_payload: Optional[Mapping] = None, + input_name: Optional[str] = None, + request_payload: Optional[Mapping] = None, + runtime_payload: Optional[Mapping] = None, + runtime_environment: Optional[Mapping] = None, + emission_context: Optional[Mapping] = None, tro_suffix: str = ".trace.tro.jsonld", ) -> dict[str, Path]: """Write ``results.json`` and a sibling per-simulation TRACE TRO. @@ -82,6 +106,12 @@ def write_results_with_trace_tro( bundle_tro=bundle_tro, reform_payload=reform_payload, reform_name=reform_name, + input_payload=input_payload, + input_name=input_name, + request_payload=request_payload, + runtime_payload=runtime_payload, + runtime_environment=runtime_environment, + emission_context=emission_context, results_location=results_path.name, bundle_tro_url=bundle_tro_url, ) diff --git a/tests/test_bundle_refresh.py b/tests/test_bundle_refresh.py index 38537fea..aa796a43 100644 --- a/tests/test_bundle_refresh.py +++ b/tests/test_bundle_refresh.py @@ -181,6 +181,7 @@ def fake_urlopen(request, *args, **kwargs): url = request.full_url if "huggingface.co" in url: assert "@" not in url # URI revision is in the URL path + assert "/datasets/" not in url assert "1.83.4" in url return _FakeHFResponse(hf_bytes) raise AssertionError(f"Unexpected URL: {url}") diff --git a/tests/test_models.py b/tests/test_models.py index a9023c56..2bf5755c 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -113,12 +113,12 @@ def test_has_release_manifest_metadata(self): assert us_latest.release_manifest is not None assert us_latest.release_manifest.country_id == "us" assert us_latest.model_package.name == "policyengine-us" - assert us_latest.model_package.version == "1.653.3" + assert us_latest.model_package.version == "1.667.1" assert us_latest.data_package.name == "policyengine-us-data" - assert us_latest.data_package.version == "1.73.0" + assert us_latest.data_package.version == "1.78.2" assert ( us_latest.default_dataset_uri - == "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.73.0" + == "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.78.2" ) def test_has_hundreds_of_parameters(self): diff --git a/tests/test_release_manifests.py b/tests/test_release_manifests.py index d59a24ad..244fe672 100644 --- a/tests/test_release_manifests.py +++ b/tests/test_release_manifests.py @@ -49,19 +49,19 @@ def test__given_us_manifest__then_has_pinned_model_and_data_packages(self): assert manifest.country_id == "us" assert manifest.policyengine_version == "4.0.0" assert manifest.model_package.name == "policyengine-us" - assert manifest.model_package.version == "1.653.3" + assert manifest.model_package.version == "1.667.1" assert manifest.data_package.name == "policyengine-us-data" - assert manifest.data_package.version == "1.73.0" + assert manifest.data_package.version == "1.78.2" assert manifest.data_package.repo_id == "policyengine/policyengine-us-data" assert manifest.certified_data_artifact is not None assert ( - manifest.certified_data_artifact.build_id == "policyengine-us-data-1.73.0" + manifest.certified_data_artifact.build_id == "policyengine-us-data-1.78.2" ) assert manifest.certified_data_artifact.dataset == "enhanced_cps_2024" assert manifest.certification is not None - assert manifest.certification.data_build_id == "policyengine-us-data-1.73.0" + assert manifest.certification.data_build_id == "policyengine-us-data-1.78.2" assert manifest.certification.built_with_model_version == "1.647.0" - assert manifest.certification.certified_for_model_version == "1.653.3" + assert manifest.certification.certified_for_model_version == "1.667.1" def test__given_uk_manifest__then_has_pinned_model_and_data_packages(self): manifest = get_release_manifest("uk") @@ -92,7 +92,7 @@ def test__given_us_dataset_name__then_resolves_to_versioned_hf_url(self): assert ( resolved - == "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.73.0" + == "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.78.2" ) def test__given_uk_dataset_name__then_resolves_to_versioned_hf_url(self): @@ -302,7 +302,7 @@ def test__given_private_manifest_unavailable__then_bundled_certification_is_used ): certification = certify_data_release_compatibility( "us", - runtime_model_version="1.653.3", + runtime_model_version="1.667.1", ) assert certification == get_release_manifest("us").certification diff --git a/tests/test_trace_tro.py b/tests/test_trace_tro.py index fd673afd..b2de5491 100644 --- a/tests/test_trace_tro.py +++ b/tests/test_trace_tro.py @@ -20,12 +20,14 @@ from policyengine.core.tax_benefit_model_version import TaxBenefitModelVersion from policyengine.provenance.manifest import ( DataReleaseManifest, + DataReleaseManifestUnavailableError, get_data_release_manifest, get_release_manifest, ) from policyengine.provenance.trace import ( POLICYENGINE_ORGANIZATION, TRACE_TROV_NAMESPACE, + build_simulation_trace_tro, build_trace_tro_from_release_bundle, compute_trace_composition_fingerprint, extract_bundle_tro_reference, @@ -228,6 +230,38 @@ def test__given_manifest_dataset_sha__then_data_release_sha_not_required(self): dataset = next(a for a in artifacts if a["@id"].endswith("dataset")) assert dataset["trov:sha256"] == "d" * 64 + def test__given_missing_data_release_manifest__then_emits_limited_bundle_tro( + self, tro_schema + ): + tro = build_trace_tro_from_release_bundle( + get_release_manifest("us"), + None, + fetch_pypi=_fake_fetch_pypi, + ) + + artifacts = tro["@graph"][0]["trov:hasComposition"]["trov:hasArtifact"] + artifact_ids = {a["@id"].rsplit("/", 1)[-1] for a in artifacts} + assert artifact_ids == {"bundle_manifest", "dataset", "model_wheel"} + assert ( + tro["@graph"][0]["trov:hasPerformance"][ + "pe:dataReleaseManifestStatus" + ] + == "unavailable" + ) + locations = tro["@graph"][0]["trov:hasArrangement"][0][ + "trov:hasArtifactLocation" + ] + dataset_location = next( + loc + for loc in locations + if loc["@id"].endswith("dataset") + ) + assert dataset_location["trov:hasLocation"].startswith( + "https://huggingface.co/" + ) + errors = list(Draft202012Validator(tro_schema).iter_errors(tro)) + assert errors == [], [error.message for error in errors] + def test__given_artifact_locations__then_all_paths_are_https_or_local( self, us_bundle_tro ): @@ -479,6 +513,37 @@ def test__given_trace_tro_property__then_emits_valid_tro(self): assert tro["@graph"][0]["schema:creator"] == POLICYENGINE_ORGANIZATION + def test__given_trace_tro_property_without_data_release_manifest__then_falls_back( + self, tro_schema + ): + manifest = get_release_manifest("us") + model_version = TaxBenefitModelVersion( + model=TaxBenefitModel(id="us"), + version=manifest.model_package.version, + release_manifest=manifest, + model_package=manifest.model_package, + data_package=manifest.data_package, + default_dataset_uri=manifest.default_dataset_uri, + data_certification=manifest.certification, + ) + + with patch( + "policyengine.core.tax_benefit_model_version.get_data_release_manifest", + side_effect=DataReleaseManifestUnavailableError("missing"), + ): + with patch( + "policyengine.provenance.trace.fetch_pypi_wheel_metadata", + side_effect=_fake_fetch_pypi, + ): + tro = model_version.trace_tro + + artifacts = tro["@graph"][0]["trov:hasComposition"]["trov:hasArtifact"] + artifact_ids = {a["@id"].rsplit("/", 1)[-1] for a in artifacts} + assert "dataset" in artifact_ids + assert "data_release_manifest" not in artifact_ids + errors = list(Draft202012Validator(tro_schema).iter_errors(tro)) + assert errors == [], [error.message for error in errors] + class TestSimulationTRO: """Per-simulation TROs chained from a bundle TRO.""" @@ -550,6 +615,50 @@ def test__given_no_reform__then_only_bundle_and_results_are_pinned( "composition/1/artifact/results", } + def test__given_web_run_payloads__then_binds_request_input_and_runtime( + self, tro_schema, us_bundle_tro + ): + tro = build_simulation_trace_tro( + bundle_tro=us_bundle_tro, + simulation_id="run-123", + reform_payload={"gov.irs.credits.ctc.amount.base[0].amount": 3000}, + input_payload={"country": "us", "region": "state/CA"}, + request_payload={"path": "/us/economy/2/over/1"}, + runtime_payload={ + "container_image_sha": "sha256:abc", + "cloud_region": "us-central1", + }, + runtime_environment={ + "runId": "run-123", + "executionId": "job-456", + "containerImageSha": "sha256:abc", + "cloudRegion": "us-central1", + }, + emission_context={"pe:emittedIn": "policyengine-api"}, + results_payload={"budgetary_impact": 1}, + created_at="2026-04-18T12:05:00Z", + started_at="2026-04-18T12:00:00Z", + ) + + artifact_ids = { + a["@id"] + for a in tro["@graph"][0]["trov:hasComposition"]["trov:hasArtifact"] + } + assert artifact_ids == { + "composition/1/artifact/bundle_tro", + "composition/1/artifact/reform", + "composition/1/artifact/input", + "composition/1/artifact/request", + "composition/1/artifact/runtime", + "composition/1/artifact/results", + } + performance = tro["@graph"][0]["trov:hasPerformance"] + assert performance["pe:emittedIn"] == "policyengine-api" + assert performance["pe:runId"] == "run-123" + assert performance["pe:containerImageSha"] == "sha256:abc" + errors = list(Draft202012Validator(tro_schema).iter_errors(tro)) + assert errors == [], [error.message for error in errors] + def test__given_bundle_tro_url__then_performance_records_it(self, us_bundle_tro): tro = build_results_trace_tro( self._results(), @@ -651,6 +760,27 @@ def test__given_trace_tro_stdout__then_writes_canonical_json( assert payload["@graph"][0]["schema:creator"] == POLICYENGINE_ORGANIZATION assert payload["@graph"][0]["trov:hasPerformance"]["pe:emittedIn"] == "local" + def test__given_missing_data_release_manifest__then_trace_tro_still_writes( + self, capsysbinary, monkeypatch + ): + monkeypatch.delenv("GITHUB_ACTIONS", raising=False) + + with patch( + "policyengine.cli.get_data_release_manifest", + side_effect=DataReleaseManifestUnavailableError("missing"), + ): + with patch( + "policyengine.provenance.trace.fetch_pypi_wheel_metadata", + side_effect=_fake_fetch_pypi, + ): + exit_code = cli_main(["trace-tro", "us"]) + + assert exit_code == 0 + payload = json.loads(capsysbinary.readouterr().out) + artifacts = payload["@graph"][0]["trov:hasComposition"]["trov:hasArtifact"] + artifact_ids = {a["@id"].rsplit("/", 1)[-1] for a in artifacts} + assert artifact_ids == {"bundle_manifest", "dataset", "model_wheel"} + def test__given_out_path__then_writes_to_file(self, tmp_path, monkeypatch): monkeypatch.delenv("GITHUB_ACTIONS", raising=False) out = tmp_path / "nested" / "us.trace.tro.jsonld" diff --git a/tests/test_us_regions.py b/tests/test_us_regions.py index 7c038556..a730faf7 100644 --- a/tests/test_us_regions.py +++ b/tests/test_us_regions.py @@ -105,7 +105,7 @@ def test__given_us_registry__then_has_national_region(self): assert national.region_type == "national" assert ( national.dataset_path - == "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.73.0" + == "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5@1.78.2" ) def test__given_us_registry__then_has_51_states(self): @@ -134,7 +134,7 @@ def test__given_california_region__then_has_correct_format(self): assert ca.parent_code == "us" assert ( ca.dataset_path - == "hf://policyengine/policyengine-us-data/states/CA.h5@1.73.0" + == "hf://policyengine/policyengine-us-data/states/CA.h5@1.78.2" ) assert ca.state_code == "CA" assert ca.state_name == "California" @@ -167,7 +167,7 @@ def test__given_ca_first_district__then_has_correct_format(self): assert ca01.parent_code == "state/ca" assert ( ca01.dataset_path - == "hf://policyengine/policyengine-us-data/districts/CA-01.h5@1.73.0" + == "hf://policyengine/policyengine-us-data/districts/CA-01.h5@1.78.2" ) assert ca01.state_code == "CA" assert not ca01.requires_filter diff --git a/uv.lock b/uv.lock index 65678a22..3c7c1be6 100644 --- a/uv.lock +++ b/uv.lock @@ -2417,9 +2417,10 @@ wheels = [ [[package]] name = "policyengine" -version = "4.3.0" +version = "4.3.1" source = { editable = "." } dependencies = [ + { name = "jsonschema" }, { name = "microdf-python" }, { name = "packaging" }, { name = "pandas" }, @@ -2434,7 +2435,6 @@ dev = [ { name = "build" }, { name = "furo" }, { name = "itables" }, - { name = "jsonschema" }, { name = "jupyter-book" }, { name = "mypy", version = "1.19.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "mypy", version = "1.20.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, @@ -2472,7 +2472,7 @@ requires-dist = [ { name = "build", marker = "extra == 'dev'" }, { name = "furo", marker = "extra == 'dev'" }, { name = "itables", marker = "extra == 'dev'" }, - { name = "jsonschema", marker = "extra == 'dev'", specifier = ">=4.0.0" }, + { name = "jsonschema", specifier = ">=4.0.0" }, { name = "jupyter-book", marker = "extra == 'dev'" }, { name = "microdf-python", specifier = ">=1.2.1" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.11.0" }, @@ -2486,8 +2486,8 @@ requires-dist = [ { name = "policyengine-core", marker = "extra == 'us'", specifier = ">=3.25.0" }, { name = "policyengine-uk", marker = "extra == 'dev'", specifier = "==2.88.0" }, { name = "policyengine-uk", marker = "extra == 'uk'", specifier = "==2.88.0" }, - { name = "policyengine-us", marker = "extra == 'dev'", specifier = "==1.653.3" }, - { name = "policyengine-us", marker = "extra == 'us'", specifier = "==1.653.3" }, + { name = "policyengine-us", marker = "extra == 'dev'", specifier = "==1.667.1" }, + { name = "policyengine-us", marker = "extra == 'us'", specifier = "==1.667.1" }, { name = "psutil", specifier = ">=5.9.0" }, { name = "pydantic", specifier = ">=2.0.0" }, { name = "pytest", marker = "extra == 'dev'" }, @@ -2502,7 +2502,7 @@ provides-extras = ["plotting", "graph", "uk", "us", "dev"] [[package]] name = "policyengine-core" -version = "3.25.0" +version = "3.25.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "dpath" }, @@ -2526,9 +2526,9 @@ dependencies = [ { name = "standard-imghdr" }, { name = "wheel" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/08/e3/40f11fe87ae718f88359dba6bf5971a1bb9b322dd48069c7881db9006791/policyengine_core-3.25.0.tar.gz", hash = "sha256:3b59a59046465d2f5c959cfe278c598e7deaa94d04a4134f4742d9f24cdbd6de", size = 464281, upload-time = "2026-04-18T00:28:02.949Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d1/ed/117c487e09bc2d5dae1d39baef2d317158433f66004f38f19f6ed82110eb/policyengine_core-3.25.2.tar.gz", hash = "sha256:de80b64b969e3c6b5a3046e29e5b9f2ce56c82f874064f65556fa60c8c423f17", size = 466431, upload-time = "2026-04-21T17:37:40.654Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2c/f8/fd60f3c7f02d27c5c83a713cd7779707b4ddff6cd76143a9b4def1c7ec4d/policyengine_core-3.25.0-py3-none-any.whl", hash = "sha256:397127f8842dea12638880c231e6bdec346fb9c9259b7775bb060c06a3b0190b", size = 230805, upload-time = "2026-04-18T00:28:01.311Z" }, + { url = "https://files.pythonhosted.org/packages/a2/22/b2297927a2091a22267c112e8d5d5d2b374d0f1ce67f257816fee1388170/policyengine_core-3.25.2-py3-none-any.whl", hash = "sha256:c884961937940e16730fb473d486728ed8c66250dc65df15257ad611e6655b09", size = 231186, upload-time = "2026-04-21T17:37:39.148Z" }, ] [[package]] @@ -2550,7 +2550,7 @@ wheels = [ [[package]] name = "policyengine-us" -version = "1.653.3" +version = "1.667.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "microdf-python" }, @@ -2562,9 +2562,9 @@ dependencies = [ { name = "tables", version = "3.11.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a2/60/5b736fa238559857fbf29168933c809eaada9abf006d26910b7958f5748e/policyengine_us-1.653.3.tar.gz", hash = "sha256:8a5c33997b7aefa2061d0dafce837b130e8ebdb0b9f83ae8c236f80cbf1805d6", size = 9180339, upload-time = "2026-04-18T12:06:45.764Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c1/ab/be097c4b67fb56f29bdbc24baebb2fcb5babf497ac1b019e19baef5252a1/policyengine_us-1.667.1.tar.gz", hash = "sha256:5350f45a27a1bfcf01c9758cc2e7f6b7a40151e14751e2915608ae9e6493f1c7", size = 9313590, upload-time = "2026-04-25T23:59:57.078Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/02/07/25f39a2bfa1ff210cd8e78826c47c03b9040a98a83f4eed59c434c1ed862/policyengine_us-1.653.3-py3-none-any.whl", hash = "sha256:67a49b98d85c060b24d547a569e91a6703c0fc9c41299c1c67f4ecfac75c67c6", size = 9445650, upload-time = "2026-04-18T12:06:43.163Z" }, + { url = "https://files.pythonhosted.org/packages/26/bc/ed46f19002c14e541e7b0b107cb47a47a5b24b45dad64bfc60405effb814/policyengine_us-1.667.1-py3-none-any.whl", hash = "sha256:e05f846473314ea37179b1cdebe0497fa550a4197414317d9858c822c1f9672c", size = 9595732, upload-time = "2026-04-25T23:59:53.662Z" }, ] [[package]] From bb7a4f3b0071874fdb79915ac8c565d48c1256fe Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sun, 26 Apr 2026 06:59:58 -0400 Subject: [PATCH 5/6] Format TRACE reproducibility changes --- src/policyengine/provenance/trace.py | 15 +++++++-------- tests/test_trace_tro.py | 8 ++------ 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/policyengine/provenance/trace.py b/src/policyengine/provenance/trace.py index 719424d5..341addb5 100644 --- a/src/policyengine/provenance/trace.py +++ b/src/policyengine/provenance/trace.py @@ -348,7 +348,8 @@ def build_trace_tro_from_release_bundle( or f"data/release_manifests/{country_manifest.country_id}.json" ) data_manifest_location = ( - data_release_manifest_path or https_release_manifest_uri(country_manifest.data_package) + data_release_manifest_path + or https_release_manifest_uri(country_manifest.data_package) if data_release_manifest is not None else None ) @@ -393,7 +394,9 @@ def build_trace_tro_from_release_bundle( "hash": dataset_sha256, "location": dataset_location, "mime_type": _artifact_mime_type( - dataset_artifact.path if dataset_artifact is not None else certified_artifact.uri + dataset_artifact.path + if dataset_artifact is not None + else certified_artifact.uri ), "name": certified_artifact.dataset, }, @@ -657,9 +660,7 @@ def build_simulation_trace_tro( } ) if request_payload is not None: - request_hash = hashlib.sha256( - canonical_json_bytes(request_payload) - ).hexdigest() + request_hash = hashlib.sha256(canonical_json_bytes(request_payload)).hexdigest() artifact_specs.append( { "id": "request", @@ -670,9 +671,7 @@ def build_simulation_trace_tro( } ) if runtime_payload is not None: - runtime_hash = hashlib.sha256( - canonical_json_bytes(runtime_payload) - ).hexdigest() + runtime_hash = hashlib.sha256(canonical_json_bytes(runtime_payload)).hexdigest() artifact_specs.append( { "id": "runtime", diff --git a/tests/test_trace_tro.py b/tests/test_trace_tro.py index b2de5491..1070b9bf 100644 --- a/tests/test_trace_tro.py +++ b/tests/test_trace_tro.py @@ -243,18 +243,14 @@ def test__given_missing_data_release_manifest__then_emits_limited_bundle_tro( artifact_ids = {a["@id"].rsplit("/", 1)[-1] for a in artifacts} assert artifact_ids == {"bundle_manifest", "dataset", "model_wheel"} assert ( - tro["@graph"][0]["trov:hasPerformance"][ - "pe:dataReleaseManifestStatus" - ] + tro["@graph"][0]["trov:hasPerformance"]["pe:dataReleaseManifestStatus"] == "unavailable" ) locations = tro["@graph"][0]["trov:hasArrangement"][0][ "trov:hasArtifactLocation" ] dataset_location = next( - loc - for loc in locations - if loc["@id"].endswith("dataset") + loc for loc in locations if loc["@id"].endswith("dataset") ) assert dataset_location["trov:hasLocation"].startswith( "https://huggingface.co/" From 2146d72368b0effc5404bbdd0f2fce64e39b8589 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sun, 26 Apr 2026 07:11:28 -0400 Subject: [PATCH 6/6] Update US model surface snapshot --- .../household_calculator_snapshots/us_model_surface.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/fixtures/household_calculator_snapshots/us_model_surface.json b/tests/fixtures/household_calculator_snapshots/us_model_surface.json index eaf4352e..c2fbc7b2 100644 --- a/tests/fixtures/household_calculator_snapshots/us_model_surface.json +++ b/tests/fixtures/household_calculator_snapshots/us_model_surface.json @@ -5,7 +5,7 @@ "has_income_tax": true, "has_region_registry": true, "model_package_name": "policyengine-us", - "num_parameters_bucketed_100s": 777, - "num_variables_bucketed_100s": 46, + "num_parameters_bucketed_100s": 783, + "num_variables_bucketed_100s": 47, "region_registry_country": "us" }