diff --git a/src/microplex_us/pipelines/us.py b/src/microplex_us/pipelines/us.py index 962968f..5f64850 100644 --- a/src/microplex_us/pipelines/us.py +++ b/src/microplex_us/pipelines/us.py @@ -285,6 +285,7 @@ "taxable_interest_income", "tax_exempt_interest_income", "capital_gains", + "long_term_capital_gains_before_response", "long_term_capital_gains", "short_term_capital_gains", "non_sch_d_capital_gains", @@ -303,6 +304,7 @@ ) PUF_SUPPORT_CLONE_TOP_TAIL_SCALE_VARIABLES: tuple[str, ...] = ( "capital_gains", + "long_term_capital_gains_before_response", "long_term_capital_gains", "short_term_capital_gains", "non_sch_d_capital_gains", @@ -5812,7 +5814,8 @@ def add(variable: str) -> bool: add(variable) if not add("capital_gains"): - add("long_term_capital_gains") + if not add("long_term_capital_gains_before_response"): + add("long_term_capital_gains") add("short_term_capital_gains") add("non_sch_d_capital_gains") @@ -5872,10 +5875,19 @@ def _apply_puf_support_clone_top_tail_guard( return clone, summary integrated_set = set(integrated_variables) + + def is_integrated_or_export_alias(variable: str) -> bool: + if variable in integrated_set: + return True + return ( + variable == "long_term_capital_gains_before_response" + and "long_term_capital_gains" in integrated_set + ) + scale_variables = [ variable for variable in self.config.puf_support_clone_top_tail_scale_variables - if variable in clone.columns and variable in integrated_set + if variable in clone.columns and is_integrated_or_export_alias(variable) ] if not scale_variables: summary["max_rough_agi_after"] = summary["max_rough_agi_before"] diff --git a/tests/pipelines/test_us.py b/tests/pipelines/test_us.py index 3d5caea..96e32fa 100644 --- a/tests/pipelines/test_us.py +++ b/tests/pipelines/test_us.py @@ -4100,6 +4100,50 @@ def test_puf_support_clone_top_tail_guard_avoids_redundant_income_totals(self): pd.testing.assert_frame_equal(guarded, clone) assert summary["affected_rows"] == 0 + def test_puf_support_clone_top_tail_guard_scales_exported_ltcg_alias(self): + pipeline = USMicroplexPipeline( + USMicroplexBuildConfig( + synthesis_backend="seed", + puf_support_clone_enabled=True, + puf_support_clone_top_tail_rough_agi_cap=78_999_999.0, + ) + ) + clone = pd.DataFrame( + { + "employment_income": [100_000.0], + "long_term_capital_gains": [70_000_000.0], + "long_term_capital_gains_before_response": [95_000_000.0], + } + ) + + guarded, summary = pipeline._apply_puf_support_clone_top_tail_guard( + clone, + integrated_variables=["long_term_capital_gains"], + ) + rough_agi, rough_agi_variables = pipeline._puf_support_clone_top_tail_rough_agi( + guarded + ) + + assert rough_agi.iloc[0] == pytest.approx(78_999_999.0) + assert rough_agi_variables == [ + "employment_income", + "long_term_capital_gains_before_response", + ] + assert guarded["employment_income"].iloc[0] == pytest.approx(100_000.0) + assert ( + guarded["long_term_capital_gains_before_response"].iloc[0] + < clone["long_term_capital_gains_before_response"].iloc[0] + ) + assert ( + guarded["long_term_capital_gains"].iloc[0] + < clone["long_term_capital_gains"].iloc[0] + ) + assert summary["affected_rows"] == 1 + assert summary["scale_basis_variables"] == [ + "long_term_capital_gains_before_response" + ] + assert "long_term_capital_gains_before_response" in summary["scaled_variables"] + def test_puf_support_clone_top_tail_guard_can_be_disabled(self): pipeline = USMicroplexPipeline( USMicroplexBuildConfig(