From b925062b24beba5c7efad66ed8fda9fb0afd4a56 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sat, 30 May 2026 06:48:29 -0400 Subject: [PATCH 1/2] Document that Census TAX_ID is replaced by our tax-unit construction _create_tax_unit_table overwrites the raw Census ASEC TAX_ID with our own construct_tax_units() assignment, retaining the original as CENSUS_TAX_ID. Add a comment explaining the why so the intent (we build tax units ourselves; the Census value is kept only as the validation baseline + required raw-schema column) is legible at the call site. Co-Authored-By: Claude Opus 4.8 (1M context) --- policyengine_us_data/datasets/cps/census_cps.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/policyengine_us_data/datasets/cps/census_cps.py b/policyengine_us_data/datasets/cps/census_cps.py index 37f85fb86..5fc335789 100644 --- a/policyengine_us_data/datasets/cps/census_cps.py +++ b/policyengine_us_data/datasets/cps/census_cps.py @@ -160,6 +160,14 @@ def _create_tax_unit_table( person: pd.DataFrame, mode: str | None = None, ) -> pd.DataFrame: + # The raw Census ASEC TAX_ID (a documented filing-unit grouping) is NOT + # used as our tax unit. We build tax units ourselves with + # construct_tax_units() (default mode "policyengine", which applies PE + # filing/dependency rules) and overwrite TAX_ID with that assignment + # below. The original Census value is preserved as CENSUS_TAX_ID so it + # stays available as the ground-truth baseline our construction is + # validated against (see validation/cps_tax_unit_validation.py) and is a + # required raw-schema column (see _validate_raw_cps_schema in cps.py). person["CENSUS_TAX_ID"] = person["TAX_ID"] mode = mode or self.tax_unit_construction_mode constructed_person, tax_unit_df = construct_tax_units( @@ -167,6 +175,7 @@ def _create_tax_unit_table( year=self.time_period, mode=mode, ) + # Replace Census TAX_ID with our constructed tax-unit assignment. person["TAX_ID"] = constructed_person["TAX_ID"].values return tax_unit_df[["TAX_ID"]] From 31501943e3a6491144ca99305c67e374af2928da Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sat, 30 May 2026 06:48:53 -0400 Subject: [PATCH 2/2] Add changelog fragment for #1154 Co-Authored-By: Claude Opus 4.8 (1M context) --- changelog.d/1154.changed.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/1154.changed.md diff --git a/changelog.d/1154.changed.md b/changelog.d/1154.changed.md new file mode 100644 index 000000000..ec305bc65 --- /dev/null +++ b/changelog.d/1154.changed.md @@ -0,0 +1 @@ +Documented in `CensusCPS._create_tax_unit_table` that the raw Census ASEC `TAX_ID` is replaced by our `construct_tax_units()` assignment, with the original retained as `CENSUS_TAX_ID` for validation.