PolicyEngine · daphnehanse11 · May 18, 2026 · May 18, 2026 · May 18, 2026 · May 18, 2026
diff --git a/app/package.json b/app/package.json
@@ -8,7 +8,8 @@
     "dev": "next dev --turbopack",
     "build": "next build",
     "start": "next start",
-    "lint": "eslint . --max-warnings=0"
+    "lint": "eslint . --max-warnings=0",
+    "test": "bun test tests"
   },
   "devDependencies": {
     "@tailwindcss/postcss": "^4.2.4",

diff --git a/app/public/metric-options.html b/app/public/metric-options.html
@@ -525,7 +525,7 @@ <h1>How should we score a model across many outputs?</h1>
             <section id="setup" class="step">
               <h2><span class="step-num">1</span>The setup</h2>
               <p class="sub">
-                Three households, five outputs (four dollar amounts plus one
+                Three households, four outputs (three dollar amounts plus one
                 eligibility flag), two models. Every step on this page uses
                 the same numbers.
               </p>
@@ -656,9 +656,9 @@ <h2><span class="step-num">3</span>Equal weights</h2>
 
               <div class="card">
                 <p>
-                  Average the five row scores into a household score, then
+                  Average the four row scores into a household score, then
                   average the household scores into a model score. Each output
-                  gets a weight of 1/5 in every household.
+                  gets a weight of 1/4 in every household.
                 </p>
                 <div class="table-wrap" id="equal-table"></div>
                 <div class="verdict">
@@ -695,7 +695,7 @@ <h2><span class="step-num">4</span>Per-household impact weights</h2>
                 <p>
                   Now an output gets weight in proportion to its dollar size
                   for that household. A $5,000 income tax line carries more
-                  weight than a $500 ACA premium tax credit.
+                  weight than a $500 benefit line.
                 </p>
                 <div class="table-wrap" id="impact-weights-table"></div>
                 <div class="table-wrap" id="impact-table"></div>
@@ -877,9 +877,8 @@ <h3>Worked examples on realistic households</h3>
                   The "share of net income that flowed through this program"
                   reading is intuitive. Medicaid eligibility carries a
                   meaningful weight because it shows up with sizeable paired
-                  values in two of the four households; ACA PTC carries
-                  smaller weight because only one household uses it. None
-                  of these numbers required a tunable parameter.
+                  values in two of the four households. None of these numbers
+                  required a tunable parameter.
                 </p>
               </div>
             </section>
@@ -1309,7 +1308,6 @@ <h2><span class="step-num">12</span>Side by side</h2>
         { id: "income_tax", label: "Income tax", kind: "amount" },
         { id: "payroll_tax", label: "Payroll tax", kind: "amount" },
         { id: "snap", label: "SNAP", kind: "amount" },
-        { id: "aca_ptc", label: "ACA PTC", kind: "amount" },
         {
           id: "medicaid_eligible",
           label: "Medicaid eligibility",
@@ -1329,7 +1327,6 @@ <h2><span class="step-num">12</span>Side by side</h2>
             income_tax: -4000,
             payroll_tax: -5000,
             snap: 0,
-            aca_ptc: 0,
             medicaid_eligible: false,
             medicaid_value: 0,
           },
@@ -1342,7 +1339,6 @@ <h2><span class="step-num">12</span>Side by side</h2>
             income_tax: -1000,
             payroll_tax: -1500,
             snap: 6000,
-            aca_ptc: 8000,
             medicaid_eligible: true,
             medicaid_value: 6000,
           },
@@ -1355,7 +1351,6 @@ <h2><span class="step-num">12</span>Side by side</h2>
             income_tax: -2000,
             payroll_tax: 0,
             snap: 2400,
-            aca_ptc: 0,
             medicaid_eligible: true,
             medicaid_value: 9000,
           },
@@ -1374,7 +1369,6 @@ <h2><span class="step-num">12</span>Side by side</h2>
             income_tax: 3000,
             payroll_tax: -1530,
             snap: 5000,
-            aca_ptc: 3000,
             medicaid_eligible: true,
             medicaid_value: 8000,
           },
@@ -1387,7 +1381,6 @@ <h2><span class="step-num">12</span>Side by side</h2>
             income_tax: -4500,
             payroll_tax: -4590,
             snap: 0,
-            aca_ptc: 0,
             medicaid_eligible: false,
             medicaid_value: 0,
           },
@@ -1400,7 +1393,6 @@ <h2><span class="step-num">12</span>Side by side</h2>
             income_tax: 0,
             payroll_tax: 0,
             snap: 2400,
-            aca_ptc: 0,
             medicaid_eligible: true,
             medicaid_value: 9000,
           },
@@ -1413,7 +1405,6 @@ <h2><span class="step-num">12</span>Side by side</h2>
             income_tax: -40000,
             payroll_tax: -9900,
             snap: 0,
-            aca_ptc: 0,
             medicaid_eligible: false,
             medicaid_value: 0,
           },
@@ -1425,18 +1416,18 @@ <h2><span class="step-num">12</span>Side by side</h2>
           id: "tax",
           label: "Tax-focused",
           predictions: {
-            H1: { income_tax: -4000, payroll_tax: -5000, snap: 0, aca_ptc: 0, medicaid_eligible: false },
-            H2: { income_tax: -1000, payroll_tax: -1500, snap: 0, aca_ptc: 0, medicaid_eligible: false },
-            H3: { income_tax: -2000, payroll_tax: 0, snap: 0, aca_ptc: 0, medicaid_eligible: false },
+            H1: { income_tax: -4000, payroll_tax: -5000, snap: 0, medicaid_eligible: false },
+            H2: { income_tax: -1000, payroll_tax: -1500, snap: 0, medicaid_eligible: false },
+            H3: { income_tax: -2000, payroll_tax: 0, snap: 0, medicaid_eligible: false },
           },
         },
         {
           id: "benefit",
           label: "Benefit-focused",
           predictions: {
-            H1: { income_tax: -2000, payroll_tax: -4000, snap: 0, aca_ptc: 0, medicaid_eligible: false },
-            H2: { income_tax: 0, payroll_tax: -1000, snap: 6000, aca_ptc: 8000, medicaid_eligible: true },
-            H3: { income_tax: -1000, payroll_tax: 0, snap: 2400, aca_ptc: 0, medicaid_eligible: true },
+            H1: { income_tax: -2000, payroll_tax: -4000, snap: 0, medicaid_eligible: false },
+            H2: { income_tax: 0, payroll_tax: -1000, snap: 6000, medicaid_eligible: true },
+            H3: { income_tax: -1000, payroll_tax: 0, snap: 2400, medicaid_eligible: true },
           },
         },
       ];

diff --git a/app/public/paper/web/index.html b/app/public/paper/web/index.html
@@ -362,7 +362,7 @@ <h3 id="frozen-snapshot-and-open-set-status" class="anchored">Frozen snapshot an
 <tr class="odd">
 <th data-quarto-table-cell-role="th">30</th>
 <td>Output groups</td>
-<td>19 US and 7 UK</td>
+<td>18 US and 7 UK</td>
 </tr>
 <tr class="even">
 <th data-quarto-table-cell-role="th">31</th>
@@ -539,7 +539,7 @@ <h3 id="frozen-snapshot-and-open-set-status" class="anchored">Frozen snapshot an
 <h2 id="data-and-scenario-construction" class="anchored">Data and scenario construction</h2>
 <h3 id="united-states" class="anchored">United States</h3>
 <p>The US benchmark is built from Enhanced Current Population Survey (CPS)-derived households using PolicyEngine US. The sampled households are filtered to keep a single-tax-unit, single-family, single-Supplemental Poverty Measure (SPM)-unit structure with at least one adult and a supported filing status. The 2024 Enhanced CPS source contains 41,314 households; 30,173 (73.0%) pass the filter and form the eligible draw. The 27.0% excluded by the filter include multi-tax-unit households (e.g., adult roommates), multi-family households, multi-SPM-unit households, and households whose head reports a filing status outside the supported set. These excluded compositions are exactly the kind of cases where federal/state credit allocations and benefit-unit rules become hardest, so the eligible draw is a tractable subset rather than the full distribution of US households. Prompts include nonzero promptable raw inputs across relevant entities rather than a hand-curated summary, so the models see many of the same facts the simulator receives. Filing status is not stated in the prompt; the reference computation infers it from tax-unit role flags. Models therefore see the same household facts that drive the reference filing-status assignment, but they do not receive that assignment as a label.</p>
-<p>The current US release evaluates 19 output groups spanning federal income tax, refundable credits, payroll and self-employment tax, state and local income tax, Supplemental Nutrition Assistance Program (SNAP), Supplemental Security Income (SSI), Temporary Assistance for Needy Families (TANF), Affordable Care Act (ACA) premium tax credits, school-meal eligibility, and person-level coverage eligibility for the Special Supplemental Nutrition Program for Women, Infants, and Children (WIC), Medicaid, the Children’s Health Insurance Program (CHIP), Medicare, Head Start, and Early Head Start.</p>
+<p>The current US release evaluates 18 output groups spanning federal income tax, refundable credits, payroll and self-employment tax, state and local income tax, Supplemental Nutrition Assistance Program (SNAP), Supplemental Security Income (SSI), Temporary Assistance for Needy Families (TANF), school-meal eligibility, and person-level coverage eligibility for the Special Supplemental Nutrition Program for Women, Infants, and Children (WIC), Medicaid, the Children’s Health Insurance Program (CHIP), Medicare, Head Start, and Early Head Start.</p>
 <p>The output scope is intentionally narrower than the full PolicyEngine model. <a href="#tbl-scope-rationale" class="quarto-xref">Table&nbsp;3</a> summarizes the inclusion rule. The benchmark asks for WIC eligibility rather than a WIC dollar amount; WIC dollar values are used only as impact-weight proxies for coverage flags, not as requested model outputs.</p>
 <div class="cell" data-execution_count="4">
 <div id="tbl-scope-rationale" class="cell quarto-float quarto-figure quarto-figure-center anchored" data-execution_count="4">
@@ -583,18 +583,13 @@ <h3 id="united-states" class="anchored">United States</h3>
 <td>Excluded</td>
 <td>Intermediate tax bases, payroll subcomponents, and outputs that mainly require unavailable history, restricted local market data, restricted program-administration data, or take-up assignment rather than rule calculation.</td>
 </tr>
-<tr class="odd">
-<th data-quarto-table-cell-role="th">2</th>
-<td>ACA Premium Tax Credit</td>
-<td>Retained as a deliberate health-support output; when local benchmark premiums are not listed, the model must estimate them from the household facts.</td>
-</tr>
 <tr class="even">
-<th data-quarto-table-cell-role="th">3</th>
+<th data-quarto-table-cell-role="th">2</th>
 <td>Binary coverage outputs</td>
 <td>Requested as 0/1 eligibility flags and scored as classification tasks; their dollar values are used only as impact-weight proxies, not as requested model outputs.</td>
 </tr>
 <tr class="odd">
-<th data-quarto-table-cell-role="th">4</th>
+<th data-quarto-table-cell-role="th">3</th>
 <td>WIC</td>
 <td>The benchmark asks for person-level WIC eligibility. It does not ask models to estimate a WIC dollar amount.</td>
 </tr>
@@ -1999,7 +1994,7 @@ <h2 id="appendix-a-structured-output-audit" class="anchored">Appendix A: Structu
 <tr class="odd">
 <th data-quarto-table-cell-role="th">4</th>
 <td>Final parse coverage</td>
-<td>The repaired manuscript snapshot has zero missing parsed numeric values and zero missing explanations across all 34,656 model-output rows.</td>
+<td>The repaired manuscript snapshot has zero missing parsed numeric values and zero missing explanations across all 33,456 model-output rows.</td>
 </tr>
 <tr class="even">
 <th data-quarto-table-cell-role="th">5</th>
@@ -2690,4 +2685,4 @@ <h2 id="competing-interests" class="anchored">Competing interests</h2>
 
 
 
-</body></html>
+</body></html>