Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 148 additions & 1 deletion backend/agent_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,6 +728,9 @@ def execute_tool(
"run_python": run_python,
"compute": compute,
"generate_chart": generate_chart,
"calculate_household": calculate_household,
"run_economy_simulation": run_economy_simulation,
"analyse_microdata": analyse_microdata,
}
if tool_name not in tools:
return {"error": f"Unknown tool: {tool_name}"}
Expand All @@ -747,12 +750,156 @@ def execute_tool(
return {"error": str(e)}


# Shared reform-input schema for the typed tools. Top-level keys are the UK
# programme names accepted by _build_compiled_policy(); values within each
# programme match the corresponding *Params constructor (e.g. IncomeTaxParams).
# `additionalProperties: True` keeps the schema permissive — _build_compiled_policy
# enforces the real validation and raises a clean error listing valid fields.
REFORM_SCHEMA = {
"type": "object",
"description": (
"Parametric reform. Top-level keys are programmes; values are the "
"parameter changes for that programme. Valid programmes: income_tax, "
"national_insurance, universal_credit, child_benefit, state_pension, "
"pension_credit, benefit_cap, housing_benefit, tax_credits, "
"scottish_child_payment, stamp_duty, capital_gains_tax, wealth_tax. "
"Field names within each programme match the corresponding *Params "
"constructor (e.g. income_tax.personal_allowance, "
"national_insurance.main_rate). For structural reforms (new policies "
"or replacing existing ones), use run_python instead."
),
"additionalProperties": True,
}


def get_tool_definitions(backend_id: str = "uk_compiled") -> List[Dict[str, Any]]:
backend = get_backend(backend_id)
return [
{
"name": "calculate_household",
"description": (
"Compute taxes, benefits, and net income for a SPECIFIC household "
"you describe (person/benunit/household records). Prefer this over "
"run_python for any household-level question with a defined "
"household composition. Returns full per-person, per-benunit, and "
"per-household results including baseline and reform columns."
),
"input_schema": {
"type": "object",
"properties": {
"person": {
"type": "array",
"items": {"type": "object"},
"description": (
"List of person records. Each must include person_id, "
"benunit_id, household_id, age. Common optional fields: "
"employment_income, self_employment_income, pension_income."
),
},
"benunit": {
"type": "array",
"items": {"type": "object"},
"description": (
"List of benefit-unit records, each with benunit_id "
"and household_id."
),
},
"household": {
"type": "array",
"items": {"type": "object"},
"description": (
"List of household records, each with household_id "
"and region (e.g. 'ENGLAND', 'SCOTLAND', 'WALES', "
"'NORTHERN_IRELAND')."
),
},
"year": {"type": "integer", "default": 2025},
"reform": REFORM_SCHEMA,
},
"required": ["person", "benunit", "household"],
},
},
{
"name": "run_economy_simulation",
"description": (
"Run a UK economy-wide microsimulation comparing baseline "
"(current law) to a parametric reform. Returns budgetary impact, "
"programme breakdown, decile impacts, winners/losers, caseloads, "
"HBAI incomes, and poverty metrics — all methodology-pinned to "
"PolicyEngine canonical definitions (BHC poverty, OECD-modified "
"equivalisation, survey-weighted). Prefer this over run_python "
"for any society-wide reform analysis. Does NOT support structural "
"reforms — for those, use run_python."
),
"input_schema": {
"type": "object",
"properties": {
"year": {"type": "integer", "default": 2025},
"reform": REFORM_SCHEMA,
"dataset": {
"type": "string",
"enum": ["frs", "efrs", "spi", "lcfs", "was"],
"default": "frs",
"description": (
"Microdata source. frs = Family Resources Survey "
"(default), efrs = Enhanced FRS (matches PE-API)."
),
},
},
"required": [],
},
},
{
"name": "analyse_microdata",
"description": (
"Slice, filter, sample, or aggregate the cached microdata for a "
"given year + reform. Use this for 'show me N households like X', "
"'break the result down by Y', 'mean income for subset Z'. "
"Operates on cached microdata, so it's fast for follow-up "
"questions on the same reform."
),
"input_schema": {
"type": "object",
"properties": {
"entity": {
"type": "string",
"enum": ["persons", "benunits", "households"],
},
"operation": {
"type": "string",
"enum": ["sample", "mean", "sum", "count", "group_by", "describe"],
},
"year": {"type": "integer", "default": 2025},
"reform": REFORM_SCHEMA,
"filters": {
"type": "object",
"description": (
"Column → predicate. Predicate can be a scalar (=), "
"a list (in), or a dict with keys "
"min/max/gt/lt/gte/lte/ne."
),
},
"columns": {"type": "array", "items": {"type": "string"}},
"group_by": {"type": "array", "items": {"type": "string"}},
"n": {
"type": "integer",
"default": 5,
"description": "Sample size when operation=sample.",
},
"dataset": {"type": "string", "default": "frs"},
},
"required": ["entity", "operation"],
},
},
{
"name": "run_python",
"description": backend.tool_description(),
"description": backend.tool_description() + (
"\n\nFALLBACK: prefer the typed tools (calculate_household, "
"run_economy_simulation, analyse_microdata) when the question fits "
"their shape. Use run_python for questions those tools can't "
"express — structural reforms, novel aggregations, parameter "
"introspection, historical lookups, etc."
),
"input_schema": {
"type": "object",
"properties": {
Expand Down
21 changes: 12 additions & 9 deletions backend/routes/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,19 @@
# ---------------------------------------------------------------------------
SYSTEM_PROMPT_TEMPLATE = """You are an expert policy analysis assistant for a microsimulation platform. You help users understand and analyse tax and benefit policy using reproducible Python code.

CRITICAL - ALWAYS COMPUTE WITH PYTHON:
CRITICAL - ALWAYS COMPUTE; NEVER ANSWER FROM MEMORY:
- Never answer quantitative policy questions from memory.
- You have one execution tool: `run_python`.
- Use `run_python` for every tax, benefit, reform, schedule, poverty, decile, and distributional question.
- Every number in your answer must come directly from the Python result you just computed.

CRITICAL - START BY READING THE MODEL INSTRUCTIONS:
- At the start of a new line of analysis, use Python to inspect `capabilities()`.
- Use that to ground yourself in the available datasets, years, programmes, and caveats before you simulate.
- If the user asks about something outside the modelled scope, say so clearly instead of guessing.
- Every number in your answer must come directly from a tool call you just made.

You have four execution tools, ordered from most specific to most general:
- `calculate_household` — for any question about a specific household you can describe (person/benunit/household composition with incomes/ages/region).
- `run_economy_simulation` — for society-wide reform analysis. Methodology is pinned (BHC poverty, OECD-modified equivalisation, FRS dataset by default). Parametric reforms only.
- `analyse_microdata` — for slicing, filtering, sampling, or aggregating across the population for a given reform.
- `run_python` — fallback for anything the typed tools can't express (structural reforms, novel aggregations, parameter history lookups, etc.).

Prefer the typed tools first; reach for run_python only when no typed tool fits. For the typed tools, the JSON schema tells you what's allowed — call them directly. For run_python, inspect `capabilities()` first to ground yourself in available datasets, years, programmes, and caveats.

If the user asks about something outside the modelled scope, say so clearly instead of guessing.

{backend_prompt_context}

Expand Down
7 changes: 6 additions & 1 deletion frontend/src/app/ChatPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -719,7 +719,12 @@ export default function ChatPage() {
>
{t.status === "pending" && <Loader size={10} color={THEME.primary} />}
{hasDetails && <IconChevronDown size={10} style={{ opacity: 0.4, transform: isExpanded ? "none" : "rotate(-90deg)", transition: "transform 0.15s" }} />}
<span style={{ color: THEME.text3 }}>{t.tool_name === "run_python" ? "python" : t.tool_name}</span>
<span style={{ color: THEME.text3 }}>{({
run_python: "python",
calculate_household: "household sim",
run_economy_simulation: "economy sim",
analyse_microdata: "microdata analysis",
} as Record<string, string>)[t.tool_name] ?? t.tool_name}</span>
{t.status !== "pending" && <span style={{ color: THEME.muted }}>✓</span>}
</div>
{isExpanded && hasDetails && renderToolDetails(t)}
Expand Down