From 1c963d1542928b4219b05fd3073825ba52b18753 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Wed, 27 May 2026 08:18:14 -0400
Subject: [PATCH 1/2] Add PolicyBench pre-release banner

---
 app/src/components/Hero.tsx | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
diff --git a/app/src/components/Hero.tsx b/app/src/components/Hero.tsx
index 9fac047..364492a 100644
--- a/app/src/components/Hero.tsx
+++ b/app/src/components/Hero.tsx
@@ -3,6 +3,22 @@ import SiteHeader, { type HeaderNavItem } from "./SiteHeader";
 
 const SNAPSHOT_DATE_LABEL = "Snapshot 2026-05-20";
 
+function PreReleaseBanner() {
+  return (
+    <div className="border-y border-warning/30 bg-warning-soft/90">
+      <div className="mx-auto flex max-w-7xl flex-col gap-1 px-4 py-3 text-sm text-text sm:flex-row sm:items-center sm:px-6">
+        <span className="font-[family-name:var(--font-mono)] text-[11px] font-semibold uppercase tracking-[0.12em] text-warning-text">
+          Pre-release
+        </span>
+        <span className="text-text-secondary">
+          These results are provisional. We plan to rerun PolicyBench with
+          updated data and improved prompts before the final release.
+        </span>
+      </div>
+    </div>
+  );
+}
+
 export default function Hero({
   selectedView,
   onSelectView,
@@ -44,6 +60,7 @@ export default function Hero({
         availableViews={availableViews}
         actionLink={{ label: "Paper", href: "/paper", type: "internal" }}
       />
+      <PreReleaseBanner />
 
       <section
         aria-labelledby="hero-title"

From c56ce03bb654d87b22e09c73c48a3a35a114486a Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Wed, 27 May 2026 08:21:15 -0400
Subject: [PATCH 2/2] Format Python files for CI

---
 policybench/analysis.py          | 21 +++++++++++----------
 tests/test_analysis.py           |  4 +---
 tests/test_snapshot_artifacts.py |  9 ++++-----
 3 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/policybench/analysis.py b/policybench/analysis.py
index fbf49ad..f6a439b 100644
--- a/policybench/analysis.py
+++ b/policybench/analysis.py
@@ -801,9 +801,7 @@ def _match(row):
             pred_flag = binary_flag(pred)
             ref_flag = binary_flag(ref)
             return (
-                pred_flag is not None
-                and ref_flag is not None
-                and pred_flag == ref_flag
+                pred_flag is not None and ref_flag is not None and pred_flag == ref_flag
             )
         return (float(ref) == 0.0) == (float(pred) == 0.0)
 
@@ -1245,14 +1243,17 @@ def compute_metrics(
             row["threshold_score"] = float(
                 np.mean([exact, within_1pct, within_5pct, within_10pct])
             )
-            row["score"] = float(
-                np.mean(
-                    [
-                        bounded_row_score(variable, truth, pred)
-                        for truth, pred in zip(y_true, y_pred, strict=True)
-                    ]
+            row["score"] = (
+                float(
+                    np.mean(
+                        [
+                            bounded_row_score(variable, truth, pred)
+                            for truth, pred in zip(y_true, y_pred, strict=True)
+                        ]
+                    )
                 )
-            ) * coverage
+                * coverage
+            )
 
         rows.append(row)
 
diff --git a/tests/test_analysis.py b/tests/test_analysis.py
index 620594c..82c53ce 100644
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -764,9 +764,7 @@ def test_build_dashboard_payload_matches_frontend_shape(self):
         assert "within10pctRunMean" not in payload["modelStats"][0]
         assert payload["heatmap"][0]["condition"] == "no_tools"
         income_program = next(
-            row
-            for row in payload["programStats"]
-            if row["variable"] == "income_tax"
+            row for row in payload["programStats"] if row["variable"] == "income_tax"
         )
         assert income_program["score"] == pytest.approx(92.5)
         assert income_program["thresholdScore"] == pytest.approx(37.5)
diff --git a/tests/test_snapshot_artifacts.py b/tests/test_snapshot_artifacts.py
index a5a7bc4..478b1ca 100644
--- a/tests/test_snapshot_artifacts.py
+++ b/tests/test_snapshot_artifacts.py
@@ -149,16 +149,15 @@ def _aggregate_scenario_metric(country_payload: dict, metric: str) -> dict[str,
             household_score = 0.0
             for variable, model_map in variables:
                 row = model_map[model]
-                household_score += (
-                    raw_row_weights[variable] / denominator
-                ) * row[metric]
+                household_score += (raw_row_weights[variable] / denominator) * row[
+                    metric
+                ]
             entry = totals.setdefault(model, {"score": 0.0, "households": 0.0})
             entry["score"] += household_score
             entry["households"] += 1
 
     return {
-        model: entry["score"] / entry["households"]
-        for model, entry in totals.items()
+        model: entry["score"] / entry["households"] for model, entry in totals.items()
     }