diff --git a/policyengine_us_data/db/etl_national_targets.py b/policyengine_us_data/db/etl_national_targets.py index 56a6d0e92..8bab5c78f 100644 --- a/policyengine_us_data/db/etl_national_targets.py +++ b/policyengine_us_data/db/etl_national_targets.py @@ -121,6 +121,18 @@ }, ] +CBO_LONG_TERM_CAPITAL_GAINS_TARGET = { + "variable": "long_term_capital_gains", + "parameter": "net_capital_gain", + "source": "CBO Revenue Projections", + "notes": ( + "CBO detailed AGI-by-source net capital gains, used directly as the " + "aggregate long-term capital gains target because CPS-reported capital " + "gains are underreported and preferential-rate reforms operate on " + "long-term gains" + ), +} + def _register_target_variable(session: Session, variable: str) -> None: from policyengine_us.system import system @@ -428,7 +440,7 @@ def extract_national_targets(year: int = DEFAULT_YEAR): - tax_expenditure_targets: Variables targeted via repeal-based tax expenditures - conditional_count_targets: Enrollment counts requiring constraints - cbo_targets: List of CBO projection targets - - irs_soi_targets: List of IRS SOI aggregate targets + - capital_gains_targets: List of CBO capital gains aggregate targets - treasury_targets: Empty compatibility list; EITC Treasury outlays are diagnostics, not claim calibration targets. - time_period: The target year @@ -821,25 +833,25 @@ def extract_national_targets(year: int = DEFAULT_YEAR): f"{variable_name} (param: {param_name}): {e}" ) - # IRS SOI aggregate targets - use time_period derived from dataset. - irs_soi_targets = [] + # Capital gains aggregate targets - use time_period derived from dataset. + capital_gains_targets = [] try: - value = tax_benefit_system.parameters( - time_period - ).calibration.gov.irs.soi._children["long_term_capital_gains"] - irs_soi_targets.append( + target = CBO_LONG_TERM_CAPITAL_GAINS_TARGET + value = income_by_source._children[target["parameter"]] + capital_gains_targets.append( { - "variable": "long_term_capital_gains", + "variable": target["variable"], "value": float(value), - "source": "IRS SOI", - "notes": ( - "IRS SOI total long-term capital gains, uprated by policyengine-us" - ), + "source": target["source"], + "notes": target["notes"], "year": time_period, } ) except (KeyError, AttributeError) as e: - print(f"Warning: Could not extract IRS SOI LTCG parameter: {e}") + print( + "Warning: Could not extract CBO net capital gains target " + f"for long-term capital gains: {e}" + ) # Treasury/CBO EITC figures are fiscal-year refundable-outlay concepts, # not tax-year claim controls. Keep them out of calibration targets. @@ -851,7 +863,8 @@ def extract_national_targets(year: int = DEFAULT_YEAR): "tax_expenditure_targets": tax_expenditure_targets, "conditional_count_targets": conditional_count_targets, "cbo_targets": cbo_targets, - "irs_soi_targets": irs_soi_targets, + "capital_gains_targets": capital_gains_targets, + "irs_soi_targets": [], "treasury_targets": treasury_targets, "time_period": time_period, } @@ -888,6 +901,7 @@ def transform_national_targets(raw_targets): all_direct_targets = ( raw_targets["direct_sum_targets"] + cbo_non_tax + + raw_targets.get("capital_gains_targets", []) + raw_targets.get("irs_soi_targets", []) ) diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py index d088ac516..ea7c55a0f 100644 --- a/policyengine_us_data/utils/loss.py +++ b/policyengine_us_data/utils/loss.py @@ -322,10 +322,11 @@ def _cbo_program_target_value(sim, variable_name: str, time_period): "unemployment_compensation", ) -IRS_SOI_AGGREGATE_TARGETS = [ - # This complements the net capital gains target with the source-specific - # control used by downstream preferential-rate reforms. - ("long_term_capital_gains", ["long_term_capital_gains"], "long_term_capital_gains"), +CBO_CAPITAL_GAINS_TARGETS = [ + # This complements the net capital gains target with the variable used by + # downstream preferential-rate reforms, while using CBO's net capital gains + # projection directly as the aggregate control. + ("long_term_capital_gains", ["long_term_capital_gains"], "net_capital_gain"), ] EITC_NATIONAL_GEO_ID = "0100000US" @@ -1062,15 +1063,17 @@ def _sum_household_variables(sim, variable_names): ) -def _add_irs_soi_aggregate_targets(loss_matrix, targets_list, sim, time_period): - soi = sim.tax_benefit_system.parameters(time_period).calibration.gov.irs.soi +def _add_cbo_capital_gains_targets(loss_matrix, targets_list, sim, time_period): + income_by_source = sim.tax_benefit_system.parameters( + time_period + ).calibration.gov.cbo.income_by_source - for label_suffix, pe_variables, soi_param_name in IRS_SOI_AGGREGATE_TARGETS: - label = f"nation/irs/soi/{label_suffix}" + for label_suffix, pe_variables, cbo_param_name in CBO_CAPITAL_GAINS_TARGETS: + label = f"nation/cbo/income_by_source/{label_suffix}" loss_matrix[label] = _sum_household_variables(sim, pe_variables) if any(pd.isna(loss_matrix[label])): raise ValueError(f"Missing values for {label}") - targets_list.append(soi._children[soi_param_name]) + targets_list.append(income_by_source._children[cbo_param_name]) return targets_list, loss_matrix @@ -1387,10 +1390,10 @@ def build_loss_matrix(dataset: type, time_period): time_period, ) - # IRS SOI aggregate capital-gains targets. This adds a long-term gains - # control on top of the CBO net capital gains aggregate, which is important - # for reforms that change preferential LTCG rates. - targets_array, loss_matrix = _add_irs_soi_aggregate_targets( + # CBO aggregate capital-gains targets. This adds a long-term gains control + # on top of the net capital gains aggregate, which is important for reforms + # that change preferential LTCG rates. + targets_array, loss_matrix = _add_cbo_capital_gains_targets( loss_matrix, targets_array, sim, diff --git a/tests/unit/calibration/test_loss_targets.py b/tests/unit/calibration/test_loss_targets.py index 3e7ee8baf..05dfc72d1 100644 --- a/tests/unit/calibration/test_loss_targets.py +++ b/tests/unit/calibration/test_loss_targets.py @@ -23,7 +23,7 @@ _add_bls_ce_targets, _add_ctc_targets, _add_education_credit_targets, - _add_irs_soi_aggregate_targets, + _add_cbo_capital_gains_targets, _add_medicare_enrollment_target, _add_real_estate_tax_targets, _add_ssi_recipient_targets, @@ -262,10 +262,10 @@ def __init__(self): parameters=lambda period: SimpleNamespace( calibration=SimpleNamespace( gov=SimpleNamespace( - irs=SimpleNamespace( - soi=SimpleNamespace( + cbo=SimpleNamespace( + income_by_source=SimpleNamespace( _children={ - "long_term_capital_gains": 1_650.0, + "net_capital_gain": 1_650.0, } ) ) @@ -763,10 +763,10 @@ def test_transfer_balance_targets_use_absolute_error_scale(): np.testing.assert_array_equal(denominator, np.array([1e9, 11.0])) -def test_add_irs_soi_capital_gains_targets(): +def test_add_cbo_capital_gains_targets(): sim = _FakeCapitalGainsSimulation() - targets, loss_matrix = _add_irs_soi_aggregate_targets( + targets, loss_matrix = _add_cbo_capital_gains_targets( pd.DataFrame(), [], sim, @@ -775,7 +775,7 @@ def test_add_irs_soi_capital_gains_targets(): assert targets == [1_650.0] np.testing.assert_array_equal( - loss_matrix["nation/irs/soi/long_term_capital_gains"], + loss_matrix["nation/cbo/income_by_source/long_term_capital_gains"], np.array([100.0, 0.0, 50.0], dtype=np.float32), ) assert sim.calculate_calls == [ diff --git a/tests/unit/test_etl_national_targets.py b/tests/unit/test_etl_national_targets.py index a72d21d34..ff85968ed 100644 --- a/tests/unit/test_etl_national_targets.py +++ b/tests/unit/test_etl_national_targets.py @@ -977,6 +977,11 @@ class FakeTaxBenefitSystem: for target in raw_targets["tax_filer_targets"] if target["variable"] == "self_employment_income" ] + long_term_capital_gains_targets = [ + target + for target in raw_targets["capital_gains_targets"] + if target["variable"] == "long_term_capital_gains" + ] assert gross_wage_targets == [ { @@ -1032,3 +1037,12 @@ class FakeTaxBenefitSystem: "year": 2024, } ] + assert long_term_capital_gains_targets == [ + { + "variable": "long_term_capital_gains", + "value": 1_290_900_000_000, + "source": "CBO Revenue Projections", + "notes": etl_national_targets.CBO_LONG_TERM_CAPITAL_GAINS_TARGET["notes"], + "year": 2024, + } + ] diff --git a/tests/unit/test_income_target_mappings.py b/tests/unit/test_income_target_mappings.py index 17217f6e6..cad8ae219 100644 --- a/tests/unit/test_income_target_mappings.py +++ b/tests/unit/test_income_target_mappings.py @@ -47,6 +47,15 @@ def test_cbo_income_by_source_targets_match_between_legacy_and_target_db(): assert legacy_targets == db_targets +def test_cbo_ltcg_target_matches_between_legacy_and_target_db(): + legacy_variable, legacy_inputs, legacy_parameter = loss.CBO_CAPITAL_GAINS_TARGETS[0] + db_target = etl_national_targets.CBO_LONG_TERM_CAPITAL_GAINS_TARGET + + assert legacy_variable == db_target["variable"] == "long_term_capital_gains" + assert legacy_inputs == ["long_term_capital_gains"] + assert legacy_parameter == db_target["parameter"] == "net_capital_gain" + + def test_bea_nipa_direct_sum_targets_match_between_legacy_and_target_db(): assert ( loss.BEA_NIPA_WAGES_AND_SALARIES_2024