Skip to content

Commit 65fb7ac

Browse files
MaxGhenisclaude
andcommitted
Add unified PE-parity calibration with income/benefit targets
- Add unified_calibration.py: Multi-target calibrator for PE parity - Supports geographic (state, CD), income, and benefit targets - IPF-based calibration with bounded weight adjustments - 0% error on state populations, 0-4% on income/benefits - Add build_enhanced_cps.py: Extract full CPS from PE-US - 34 income/benefit columns (vs 8 in minimal CPS) - Person-level weights for proper aggregation - Update __init__.py with new exports: - PETargets, UnifiedCalibrator, calibrate_to_pe_targets Calibration results (65 targets): - 51 state populations: 0.00% error - SNAP/SSI/EITC spending: 0.00% error - Self-employment income: 4.01% error - Known gaps: capital gains (96%), dividends (73%) due to CPS limitations 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 28c27c4 commit 65fb7ac

4 files changed

Lines changed: 512 additions & 1 deletion

File tree

docs/pe_parity_status.md

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,38 @@ Tracking microplex calibration target coverage vs PolicyEngine-US.
88
|--------|-----------|--------------|-----|
99
| **Total Targets** | 1,764 | 1,661 | +103 |
1010
| **National** | 1,764 | 30 | +1,734 |
11-
| **State-level** | 0 | 1,631 | -1,631 |
11+
| **State-level** | 51 | 1,631 | -1,580 |
12+
13+
## ✅ Working Calibration (2024-12-29)
14+
15+
Successfully calibrating CPS to 65 targets:
16+
- **51 state populations**: 0% error
17+
- **14 income/benefit targets**: 0-4% error on most
18+
19+
```
20+
Target Computed Target Error
21+
─────────────────────────────────────────────────────────────
22+
State populations (51) 331.4M 331.4M 0.00%
23+
rental_income $46.0B $46.0B 0.00%
24+
self_employment_income $418.9B $436.4B 4.01%
25+
unemployment_compensation $200.3B $208.0B 3.72%
26+
taxable_pension_income $827.6B $827.6B 0.00%
27+
alimony_income $8.5B $8.5B 0.00%
28+
snap $103.1B $103.1B 0.00%
29+
ssi $78.5B $78.5B 0.00%
30+
eitc $72.7B $72.7B 0.00%
31+
```
32+
33+
### Known Gaps (CPS Data Limitations)
34+
35+
| Target | Error | Reason |
36+
|--------|-------|--------|
37+
| capital_gains | 96% | CPS has limited capital gains data |
38+
| social_security | 35% | Underreported in CPS |
39+
| employment_income | 21% | Underreported in CPS |
40+
| dividend_income | 73% | Underreported in CPS |
41+
42+
These require income imputation (like PE's enhanced CPS) to fix.
1243

1344
## Target Categories
1445

scripts/build_enhanced_cps.py

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
#!/usr/bin/env python3
2+
"""Build enhanced CPS microdata with full income/benefit columns from PE-US."""
3+
4+
import pandas as pd
5+
import numpy as np
6+
from pathlib import Path
7+
from policyengine_us import Microsimulation
8+
9+
10+
def build_enhanced_cps(year: int = 2024) -> tuple[pd.DataFrame, pd.DataFrame]:
11+
"""Build enhanced CPS person and household data from PE-US.
12+
13+
Returns:
14+
Tuple of (persons_df, households_df)
15+
"""
16+
print(f"Loading PE-US microsimulation for {year}...")
17+
sim = Microsimulation()
18+
19+
# Person-level variables
20+
person_vars = [
21+
# IDs and weights
22+
'person_id', 'household_id', 'tax_unit_id', 'spm_unit_id',
23+
'person_weight',
24+
25+
# Demographics
26+
'age', 'is_male', 'race', 'is_hispanic',
27+
'marital_status', 'is_disabled',
28+
29+
# Employment
30+
'employment_income', 'self_employment_income',
31+
'is_employed', 'hours_worked_per_week',
32+
33+
# Investment income
34+
'dividend_income', 'interest_income', 'rental_income',
35+
'short_term_capital_gains', 'long_term_capital_gains',
36+
37+
# Retirement income
38+
'social_security', 'pension_income',
39+
'taxable_pension_income', 'tax_exempt_pension_income',
40+
41+
# Other income
42+
'ssi', 'unemployment_compensation', 'alimony_income',
43+
'farm_income', 'partnership_s_corp_income',
44+
45+
# Benefits
46+
'snap', 'ssi', 'tanf', 'wic',
47+
'medicaid', 'medicare',
48+
49+
# Tax credits
50+
'eitc', 'ctc', 'cdcc',
51+
52+
# Geography
53+
'state_fips',
54+
]
55+
56+
# Household-level variables
57+
household_vars = [
58+
'household_id', 'household_weight',
59+
'household_size', 'household_income',
60+
'state_fips',
61+
]
62+
63+
# Load person data
64+
print("Loading person variables...")
65+
person_data = {}
66+
for var in person_vars:
67+
try:
68+
vals = sim.calculate(var, year)
69+
person_data[var] = vals
70+
except Exception as e:
71+
print(f" Warning: {var} not available: {e}")
72+
73+
persons_df = pd.DataFrame(person_data)
74+
print(f" Persons: {len(persons_df):,} rows, {len(persons_df.columns)} columns")
75+
76+
# Load household data
77+
print("Loading household variables...")
78+
household_data = {}
79+
for var in household_vars:
80+
try:
81+
vals = sim.calculate(var, year)
82+
household_data[var] = vals
83+
except Exception as e:
84+
print(f" Warning: {var} not available: {e}")
85+
86+
households_df = pd.DataFrame(household_data)
87+
print(f" Households: {len(households_df):,} rows, {len(households_df.columns)} columns")
88+
89+
return persons_df, households_df
90+
91+
92+
def compute_calibration_totals(persons_df: pd.DataFrame) -> dict:
93+
"""Compute weighted totals for calibration targets.
94+
95+
Returns:
96+
Dict of target_name -> (computed_value, pe_target_value, error_pct)
97+
"""
98+
from microplex.pe_targets import PETargets
99+
100+
pe = PETargets()
101+
pe_national = pe.get_national_targets()
102+
103+
# Weight column
104+
weight = persons_df.get('person_weight', pd.Series([1] * len(persons_df)))
105+
106+
# Mapping of PE target names to our columns
107+
income_map = {
108+
'employment_income': 'employment_income',
109+
'self_employment_income': 'self_employment_income',
110+
'social_security': 'social_security',
111+
'dividend_income': 'dividend_income',
112+
'interest_income': 'interest_income',
113+
'rental_income': 'rental_income',
114+
'pension_income': 'pension_income',
115+
'ssi': 'ssi',
116+
'unemployment_compensation': 'unemployment_compensation',
117+
}
118+
119+
results = {}
120+
121+
for pe_name, col_name in income_map.items():
122+
if col_name in persons_df.columns:
123+
computed = (persons_df[col_name] * weight).sum()
124+
125+
# Find PE target
126+
pe_row = pe_national[pe_national['name'] == pe_name]
127+
if not pe_row.empty:
128+
target = pe_row.iloc[0]['value']
129+
error = abs(computed - target) / target * 100
130+
results[pe_name] = {
131+
'computed': computed,
132+
'target': target,
133+
'error_pct': error
134+
}
135+
136+
return results
137+
138+
139+
def main():
140+
# Build enhanced CPS
141+
persons_df, households_df = build_enhanced_cps(2024)
142+
143+
# Save to parquet
144+
out_dir = Path("data")
145+
persons_df.to_parquet(out_dir / "cps_enhanced_persons.parquet", index=False)
146+
households_df.to_parquet(out_dir / "cps_enhanced_households.parquet", index=False)
147+
148+
print(f"\n✅ Saved enhanced CPS data")
149+
print(f" Persons: {out_dir / 'cps_enhanced_persons.parquet'}")
150+
print(f" Households: {out_dir / 'cps_enhanced_households.parquet'}")
151+
152+
# Compute and compare calibration totals
153+
print("\n=== CALIBRATION COMPARISON ===")
154+
results = compute_calibration_totals(persons_df)
155+
156+
print(f"\n{'Variable':<30} {'Computed':>15} {'Target':>15} {'Error':>10}")
157+
print("-" * 75)
158+
159+
for name, vals in sorted(results.items(), key=lambda x: -x[1]['target']):
160+
computed = vals['computed']
161+
target = vals['target']
162+
error = vals['error_pct']
163+
164+
comp_str = f"${computed/1e9:.1f}B"
165+
tgt_str = f"${target/1e9:.1f}B"
166+
err_str = f"{error:.1f}%"
167+
168+
print(f"{name:<30} {comp_str:>15} {tgt_str:>15} {err_str:>10}")
169+
170+
171+
if __name__ == "__main__":
172+
main()

src/microplex/__init__.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,16 @@
8181
create_synthesizer,
8282
HAS_STATMATCH,
8383
)
84+
from microplex.pe_targets import (
85+
PETargets,
86+
get_pe_targets,
87+
create_calibration_targets,
88+
)
89+
from microplex.unified_calibration import (
90+
UnifiedCalibrator,
91+
CalibrationTarget,
92+
calibrate_to_pe_targets,
93+
)
8494

8595
__version__ = "0.1.0"
8696

@@ -138,4 +148,11 @@
138148
"DisabilityTransitionModel",
139149
"MarriageTransition",
140150
"DivorceTransition",
151+
# PE Parity
152+
"PETargets",
153+
"get_pe_targets",
154+
"create_calibration_targets",
155+
"UnifiedCalibrator",
156+
"CalibrationTarget",
157+
"calibrate_to_pe_targets",
141158
]

0 commit comments

Comments
 (0)