diff --git a/backend/agent_tools.py b/backend/agent_tools.py
index fa902ff..cd4e067 100644
--- a/backend/agent_tools.py
+++ b/backend/agent_tools.py
@@ -728,6 +728,9 @@ def execute_tool(
"run_python": run_python,
"compute": compute,
"generate_chart": generate_chart,
+ "calculate_household": calculate_household,
+ "run_economy_simulation": run_economy_simulation,
+ "analyse_microdata": analyse_microdata,
}
if tool_name not in tools:
return {"error": f"Unknown tool: {tool_name}"}
@@ -747,12 +750,156 @@ def execute_tool(
return {"error": str(e)}
+# Shared reform-input schema for the typed tools. Top-level keys are the UK
+# programme names accepted by _build_compiled_policy(); values within each
+# programme match the corresponding *Params constructor (e.g. IncomeTaxParams).
+# `additionalProperties: True` keeps the schema permissive — _build_compiled_policy
+# enforces the real validation and raises a clean error listing valid fields.
+REFORM_SCHEMA = {
+ "type": "object",
+ "description": (
+ "Parametric reform. Top-level keys are programmes; values are the "
+ "parameter changes for that programme. Valid programmes: income_tax, "
+ "national_insurance, universal_credit, child_benefit, state_pension, "
+ "pension_credit, benefit_cap, housing_benefit, tax_credits, "
+ "scottish_child_payment, stamp_duty, capital_gains_tax, wealth_tax. "
+ "Field names within each programme match the corresponding *Params "
+ "constructor (e.g. income_tax.personal_allowance, "
+ "national_insurance.main_rate). For structural reforms (new policies "
+ "or replacing existing ones), use run_python instead."
+ ),
+ "additionalProperties": True,
+}
+
+
def get_tool_definitions(backend_id: str = "uk_compiled") -> List[Dict[str, Any]]:
backend = get_backend(backend_id)
return [
+ {
+ "name": "calculate_household",
+ "description": (
+ "Compute taxes, benefits, and net income for a SPECIFIC household "
+ "you describe (person/benunit/household records). Prefer this over "
+ "run_python for any household-level question with a defined "
+ "household composition. Returns full per-person, per-benunit, and "
+ "per-household results including baseline and reform columns."
+ ),
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "person": {
+ "type": "array",
+ "items": {"type": "object"},
+ "description": (
+ "List of person records. Each must include person_id, "
+ "benunit_id, household_id, age. Common optional fields: "
+ "employment_income, self_employment_income, pension_income."
+ ),
+ },
+ "benunit": {
+ "type": "array",
+ "items": {"type": "object"},
+ "description": (
+ "List of benefit-unit records, each with benunit_id "
+ "and household_id."
+ ),
+ },
+ "household": {
+ "type": "array",
+ "items": {"type": "object"},
+ "description": (
+ "List of household records, each with household_id "
+ "and region (e.g. 'ENGLAND', 'SCOTLAND', 'WALES', "
+ "'NORTHERN_IRELAND')."
+ ),
+ },
+ "year": {"type": "integer", "default": 2025},
+ "reform": REFORM_SCHEMA,
+ },
+ "required": ["person", "benunit", "household"],
+ },
+ },
+ {
+ "name": "run_economy_simulation",
+ "description": (
+ "Run a UK economy-wide microsimulation comparing baseline "
+ "(current law) to a parametric reform. Returns budgetary impact, "
+ "programme breakdown, decile impacts, winners/losers, caseloads, "
+ "HBAI incomes, and poverty metrics — all methodology-pinned to "
+ "PolicyEngine canonical definitions (BHC poverty, OECD-modified "
+ "equivalisation, survey-weighted). Prefer this over run_python "
+ "for any society-wide reform analysis. Does NOT support structural "
+ "reforms — for those, use run_python."
+ ),
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "year": {"type": "integer", "default": 2025},
+ "reform": REFORM_SCHEMA,
+ "dataset": {
+ "type": "string",
+ "enum": ["frs", "efrs", "spi", "lcfs", "was"],
+ "default": "frs",
+ "description": (
+ "Microdata source. frs = Family Resources Survey "
+ "(default), efrs = Enhanced FRS (matches PE-API)."
+ ),
+ },
+ },
+ "required": [],
+ },
+ },
+ {
+ "name": "analyse_microdata",
+ "description": (
+ "Slice, filter, sample, or aggregate the cached microdata for a "
+ "given year + reform. Use this for 'show me N households like X', "
+ "'break the result down by Y', 'mean income for subset Z'. "
+ "Operates on cached microdata, so it's fast for follow-up "
+ "questions on the same reform."
+ ),
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "entity": {
+ "type": "string",
+ "enum": ["persons", "benunits", "households"],
+ },
+ "operation": {
+ "type": "string",
+ "enum": ["sample", "mean", "sum", "count", "group_by", "describe"],
+ },
+ "year": {"type": "integer", "default": 2025},
+ "reform": REFORM_SCHEMA,
+ "filters": {
+ "type": "object",
+ "description": (
+ "Column → predicate. Predicate can be a scalar (=), "
+ "a list (in), or a dict with keys "
+ "min/max/gt/lt/gte/lte/ne."
+ ),
+ },
+ "columns": {"type": "array", "items": {"type": "string"}},
+ "group_by": {"type": "array", "items": {"type": "string"}},
+ "n": {
+ "type": "integer",
+ "default": 5,
+ "description": "Sample size when operation=sample.",
+ },
+ "dataset": {"type": "string", "default": "frs"},
+ },
+ "required": ["entity", "operation"],
+ },
+ },
{
"name": "run_python",
- "description": backend.tool_description(),
+ "description": backend.tool_description() + (
+ "\n\nFALLBACK: prefer the typed tools (calculate_household, "
+ "run_economy_simulation, analyse_microdata) when the question fits "
+ "their shape. Use run_python for questions those tools can't "
+ "express — structural reforms, novel aggregations, parameter "
+ "introspection, historical lookups, etc."
+ ),
"input_schema": {
"type": "object",
"properties": {
diff --git a/backend/routes/chatbot.py b/backend/routes/chatbot.py
index d091d6f..8edfcfc 100644
--- a/backend/routes/chatbot.py
+++ b/backend/routes/chatbot.py
@@ -29,16 +29,19 @@
# ---------------------------------------------------------------------------
SYSTEM_PROMPT_TEMPLATE = """You are an expert policy analysis assistant for a microsimulation platform. You help users understand and analyse tax and benefit policy using reproducible Python code.
-CRITICAL - ALWAYS COMPUTE WITH PYTHON:
+CRITICAL - ALWAYS COMPUTE; NEVER ANSWER FROM MEMORY:
- Never answer quantitative policy questions from memory.
-- You have one execution tool: `run_python`.
-- Use `run_python` for every tax, benefit, reform, schedule, poverty, decile, and distributional question.
-- Every number in your answer must come directly from the Python result you just computed.
-
-CRITICAL - START BY READING THE MODEL INSTRUCTIONS:
-- At the start of a new line of analysis, use Python to inspect `capabilities()`.
-- Use that to ground yourself in the available datasets, years, programmes, and caveats before you simulate.
-- If the user asks about something outside the modelled scope, say so clearly instead of guessing.
+- Every number in your answer must come directly from a tool call you just made.
+
+You have four execution tools, ordered from most specific to most general:
+- `calculate_household` — for any question about a specific household you can describe (person/benunit/household composition with incomes/ages/region).
+- `run_economy_simulation` — for society-wide reform analysis. Methodology is pinned (BHC poverty, OECD-modified equivalisation, FRS dataset by default). Parametric reforms only.
+- `analyse_microdata` — for slicing, filtering, sampling, or aggregating across the population for a given reform.
+- `run_python` — fallback for anything the typed tools can't express (structural reforms, novel aggregations, parameter history lookups, etc.).
+
+Prefer the typed tools first; reach for run_python only when no typed tool fits. For the typed tools, the JSON schema tells you what's allowed — call them directly. For run_python, inspect `capabilities()` first to ground yourself in available datasets, years, programmes, and caveats.
+
+If the user asks about something outside the modelled scope, say so clearly instead of guessing.
{backend_prompt_context}
diff --git a/frontend/src/app/ChatPage.tsx b/frontend/src/app/ChatPage.tsx
index aa4a9ba..a79372f 100644
--- a/frontend/src/app/ChatPage.tsx
+++ b/frontend/src/app/ChatPage.tsx
@@ -719,7 +719,12 @@ export default function ChatPage() {
>
{t.status === "pending" && }
{hasDetails && }
- {t.tool_name === "run_python" ? "python" : t.tool_name}
+ {({
+ run_python: "python",
+ calculate_household: "household sim",
+ run_economy_simulation: "economy sim",
+ analyse_microdata: "microdata analysis",
+ } as Record)[t.tool_name] ?? t.tool_name}
{t.status !== "pending" && ✓}
{isExpanded && hasDetails && renderToolDetails(t)}