From c9517dc0ad7ceb78dc0387fb797dc41f8a7db6f7 Mon Sep 17 00:00:00 2001 From: "Vincent (Wen Yu) Ge" Date: Fri, 26 Jun 2026 21:07:26 -0400 Subject: [PATCH] feat(runner): Task/todo + controlled subagents on pi (#698) pi gets the Task store tools (TaskCreate/Update/Get/List) surfaced in the TUI, and a controlled dispatch_agent that spawns a read-only nested session inheriting the same security fence. Adds extractText + tool-I/O logging parity. Co-Authored-By: Claude Opus 4.8 --- src/lib/agent/runner/backends/pi-subagent.ts | 134 ++++++++++++++++++ src/lib/agent/runner/backends/pi-tasks.ts | 137 +++++++++++++++++++ src/lib/agent/runner/backends/pi.ts | 59 ++++++-- 3 files changed, 322 insertions(+), 8 deletions(-) create mode 100644 src/lib/agent/runner/backends/pi-subagent.ts create mode 100644 src/lib/agent/runner/backends/pi-tasks.ts diff --git a/src/lib/agent/runner/backends/pi-subagent.ts b/src/lib/agent/runner/backends/pi-subagent.ts new file mode 100644 index 00000000..1f5e7f7d --- /dev/null +++ b/src/lib/agent/runner/backends/pi-subagent.ts @@ -0,0 +1,134 @@ +/** + * Controlled subagent dispatch for pi (#526). pi has no native subagent + * mechanism, so a subagent is a nested `createAgentSession` we construct — which + * means WE decide its powers, closing the leak the claude-agent-sdk path warns + * about (it can't propagate the parent's disallowedTools into subagents). + * + * Controls on every child: + * - the SAME security extension (canUseTool + YARA, fail-closed) — shared state, + * so the child shares the parent's tool-call cap and violation latch; + * - a read-only built-in toolset (read/grep/find/ls + allowlisted bash) — no + * write/edit, so a subagent can research but never mutate the project; + * - no custom tools — no .env writes, and crucially no `dispatch_agent`, so a + * child cannot recurse (depth is hard-capped at 1). + */ + +import { Type } from 'typebox'; +import { defineTool } from '@earendil-works/pi-coding-agent'; +import type { ToolDefinition } from '@earendil-works/pi-coding-agent'; +import { logToFile } from '@utils/debug'; + +/** + * Read-only built-ins a subagent may use. bash is supplied separately as the + * parent's env-scrubbed tool (below), not the built-in, so a subagent's + * subprocesses are locked down too. + */ +const SUBAGENT_TOOLS = ['read', 'grep', 'find', 'ls']; + +const SUBAGENT_SYSTEM_PROMPT = [ + 'You are a read-only research subagent for the PostHog wizard.', + 'You can read and search files and run safe build/inspect shell commands.', + 'You cannot edit files, modify .env, or dispatch further subagents.', + 'Investigate the task you are given and report concise findings as your final message.', +].join('\n'); + +function text(s: string): { + content: [{ type: 'text'; text: string }]; + details: unknown; +} { + return { content: [{ type: 'text', text: s }], details: {} }; +} + +function extractText(message: unknown): string { + const content = (message as { content?: unknown })?.content; + if (typeof content === 'string') return content; + if (Array.isArray(content)) { + return content + .filter((c): c is { type: string; text: string } => { + const b = c as { type?: string; text?: unknown }; + return b?.type === 'text' && typeof b.text === 'string'; + }) + .map((c) => c.text) + .join(''); + } + return ''; +} + +export interface SubagentContext { + /** Resolved gateway model (same as the parent). */ + // eslint-disable-next-line @typescript-eslint/no-explicit-any + model: import('@earendil-works/pi-ai').Model; + /** Registry holding the gateway provider. */ + modelRegistry: import('@earendil-works/pi-coding-agent').ModelRegistry; + cwd: string; + agentDir: string; + /** The parent's security extension factory — reused so the fence is inherited. */ + securityFactory: (pi: unknown) => void; + /** The parent's env-scrubbed bash, so a subagent's subprocesses are locked down too. */ + bashTool: ToolDefinition; + /** pi SDK entrypoints, already imported by the backend. */ + sdk: { + createAgentSession: typeof import('@earendil-works/pi-coding-agent')['createAgentSession']; + DefaultResourceLoader: typeof import('@earendil-works/pi-coding-agent')['DefaultResourceLoader']; + SessionManager: typeof import('@earendil-works/pi-coding-agent')['SessionManager']; + }; +} + +export function createDispatchAgentTool(ctx: SubagentContext): ToolDefinition { + return defineTool({ + name: 'dispatch_agent', + label: 'Dispatch subagent', + description: + 'Delegate a focused, read-only research subtask to a subagent (e.g. "find where events are captured"). The subagent can read/search files and run safe shell, but CANNOT edit files, change .env, or dispatch further subagents. Returns its findings.', + promptSnippet: + 'dispatch_agent(description, prompt) — delegate a read-only research subtask', + parameters: Type.Object({ + description: Type.String({ description: 'Short label for the subtask' }), + prompt: Type.String({ description: 'Full instruction for the subagent' }), + }), + // eslint-disable-next-line @typescript-eslint/require-await -- pi tool contract returns a Promise + async execute(_id, args) { + const { createAgentSession, DefaultResourceLoader, SessionManager } = + ctx.sdk; + + const loader = new DefaultResourceLoader({ + cwd: ctx.cwd, + agentDir: ctx.agentDir, + systemPrompt: SUBAGENT_SYSTEM_PROMPT, + noExtensions: true, + noSkills: true, + noContextFiles: true, + noPromptTemplates: true, + noThemes: true, + extensionFactories: [ctx.securityFactory], + }); + await loader.reload(); + + const { session: child } = await createAgentSession({ + model: ctx.model, + modelRegistry: ctx.modelRegistry, + cwd: ctx.cwd, + sessionManager: SessionManager.inMemory(ctx.cwd), + resourceLoader: loader, + tools: SUBAGENT_TOOLS, // read-only built-ins; no write/edit, no dispatch_agent + customTools: [ctx.bashTool], // env-scrubbed bash only (still allowlist-fenced) + }); + + let result = ''; + const unsub = child.subscribe((e) => { + if (e.type === 'message_end') { + const t = extractText(e.message).trim(); + if (t) result = t; + } + }); + logToFile(`[pi] subagent dispatch: ${args.description}`); + try { + await child.prompt(args.prompt); + } finally { + unsub(); + } + logToFile(`[pi] subagent "${args.description}" → ${result.length} chars`); + return text(result || 'Subagent completed with no textual result.'); + }, + }); +} diff --git a/src/lib/agent/runner/backends/pi-tasks.ts b/src/lib/agent/runner/backends/pi-tasks.ts new file mode 100644 index 00000000..e12f66e1 --- /dev/null +++ b/src/lib/agent/runner/backends/pi-tasks.ts @@ -0,0 +1,137 @@ +/** + * Task/todo parity for pi (#526). The same four Task tools the anthropic path + * exposes (TaskCreate/Update/Get/List), as pi `defineTool` tools backed by a + * shared in-memory store. Every mutation pushes the list to the TUI via + * `getUI().syncTodos`, so the todo panel updates live under pi exactly like the + * anthropic path — the thing that was missing before. + */ + +import { Type } from 'typebox'; +import { defineTool } from '@earendil-works/pi-coding-agent'; +import type { ToolDefinition } from '@earendil-works/pi-coding-agent'; +import { getUI } from '@ui'; + +export type TaskStatus = 'pending' | 'in_progress' | 'completed'; +export interface TaskEntry { + content: string; + status: TaskStatus; + activeForm?: string; +} +export type TaskStore = Map; + +function text(s: string): { + content: [{ type: 'text'; text: string }]; + details: unknown; +} { + return { content: [{ type: 'text', text: s }], details: {} }; +} + +function syncToTui(store: TaskStore): void { + getUI().syncTodos( + Array.from(store.values()).map((t) => ({ + content: t.content, + status: t.status, + activeForm: t.activeForm, + })), + ); +} + +/** Build the four Task tools over a fresh store. */ +export function createWizardPiTaskTools(): { + tools: ToolDefinition[]; + store: TaskStore; +} { + const store: TaskStore = new Map(); + + const taskCreate = defineTool({ + name: 'TaskCreate', + label: 'Create task', + description: + 'Create a task in the shared todo list. Returns its assigned id.', + promptSnippet: + 'TaskCreate(content) — add a todo (surfaces progress in the UI)', + parameters: Type.Object({ + content: Type.String({ description: 'Imperative task description' }), + activeForm: Type.Optional( + Type.String({ description: 'Present-continuous form for the spinner' }), + ), + }), + // eslint-disable-next-line @typescript-eslint/require-await -- pi tool contract returns a Promise + async execute(_id, args) { + const id = `task-${store.size + 1}`; + store.set(id, { + content: args.content, + status: 'pending', + activeForm: args.activeForm, + }); + syncToTui(store); + return text(`Created ${id}`); + }, + }); + + const taskUpdate = defineTool({ + name: 'TaskUpdate', + label: 'Update task', + description: + 'Update an existing task by id (status, content, or activeForm).', + promptSnippet: + 'TaskUpdate(taskId, status) — mark a todo in_progress/completed', + parameters: Type.Object({ + taskId: Type.String(), + status: Type.Optional( + Type.Union([ + Type.Literal('pending'), + Type.Literal('in_progress'), + Type.Literal('completed'), + ]), + ), + content: Type.Optional(Type.String()), + activeForm: Type.Optional(Type.String()), + }), + // eslint-disable-next-line @typescript-eslint/require-await -- pi tool contract returns a Promise + async execute(_id, args) { + const existing = store.get(args.taskId); + if (!existing) return text(`No such task: ${args.taskId}`); + store.set(args.taskId, { + content: args.content ?? existing.content, + status: (args.status as TaskStatus) ?? existing.status, + activeForm: args.activeForm ?? existing.activeForm, + }); + syncToTui(store); + return text(`Updated ${args.taskId}`); + }, + }); + + const taskGet = defineTool({ + name: 'TaskGet', + label: 'Get task', + description: 'Fetch a single task by id.', + parameters: Type.Object({ taskId: Type.String() }), + // eslint-disable-next-line @typescript-eslint/require-await -- pi tool contract returns a Promise + async execute(_id, args) { + const t = store.get(args.taskId); + return text( + t + ? JSON.stringify({ id: args.taskId, ...t }) + : `No such task: ${args.taskId}`, + ); + }, + }); + + const taskList = defineTool({ + name: 'TaskList', + label: 'List tasks', + description: 'List all tasks in the shared todo list.', + parameters: Type.Object({}), + // eslint-disable-next-line @typescript-eslint/require-await -- pi tool contract returns a Promise + async execute() { + return text( + JSON.stringify( + Array.from(store.entries()).map(([id, t]) => ({ id, ...t })), + ), + ); + }, + }); + + return { tools: [taskCreate, taskUpdate, taskGet, taskList], store }; +} diff --git a/src/lib/agent/runner/backends/pi.ts b/src/lib/agent/runner/backends/pi.ts index f2a1c8dc..5c2f4d24 100644 --- a/src/lib/agent/runner/backends/pi.ts +++ b/src/lib/agent/runner/backends/pi.ts @@ -66,6 +66,22 @@ function buildGatewayHeaders( return headers; } +/** Pull plain text out of a pi AgentMessage (content is text/image blocks). */ +function extractText(message: unknown): string { + const content = (message as { content?: unknown })?.content; + if (typeof content === 'string') return content; + if (Array.isArray(content)) { + return content + .filter((c): c is { type: string; text: string } => { + const block = c as { type?: string; text?: unknown }; + return block?.type === 'text' && typeof block.text === 'string'; + }) + .map((c) => c.text) + .join(''); + } + return ''; +} + export const piBackend: AgentRunner = { name: 'pi', @@ -159,10 +175,28 @@ export const piBackend: AgentRunner = { // stay out of the static module graph so CommonJS unit tests can load the // backend seam without parsing it. const { createWizardPiTools } = await import('./pi-tools'); - const customTools = createWizardPiTools({ - workingDirectory: session.installDir, - skillsBaseUrl: boot.skillsBaseUrl, - }); + const { createWizardPiTaskTools } = await import('./pi-tasks'); + const { createDispatchAgentTool } = await import('./pi-subagent'); + const customTools = [ + ...createWizardPiTools({ + workingDirectory: session.installDir, + skillsBaseUrl: boot.skillsBaseUrl, + }), + // Task/todo tools (#526): render the todo list live in the TUI, parity + // with the anthropic path. + ...createWizardPiTaskTools().tools, + // Controlled subagent dispatch (#526): a nested fenced session with a + // read-only toolset and no dispatch_agent of its own, so it can't + // escape the fence or recurse. + createDispatchAgentTool({ + model, + modelRegistry: registry, + cwd: session.installDir, + agentDir: getAgentDir(), + securityFactory: security.factory as (pi: unknown) => void, + sdk: { createAgentSession, DefaultResourceLoader, SessionManager }, + }), + ]; const { session: agentSession } = await createAgentSession({ model, @@ -173,15 +207,24 @@ export const piBackend: AgentRunner = { customTools, }); - // Map pi events onto the run spinner + the log file. Markers + todos are - // a follow-up (the shared stream→TUI bridge); v1 keeps the spinner alive - // and records tool I/O to the log. + // Map pi events onto the run spinner + the log file, mirroring the + // anthropic path's log shape (assistant turns + tool I/O) and driving the + // single run spinner with one stable status at a time (no overlap). const unsubscribe = agentSession.subscribe((event) => { switch (event.type) { + case 'message_end': { + const assistant = extractText(event.message).trim(); + if (assistant) { + logToFile(`[pi] assistant: ${assistant.slice(0, 1000)}`); + } + break; + } case 'tool_execution_start': { const args = JSON.stringify(event.args ?? {}).slice(0, 200); logToFile(`[pi] → ${event.toolName} ${args}`); - spinner.message(`Running ${event.toolName}…`); + // Don't surface raw tool names in the spinner — the anthropic path + // doesn't, and it reads as noise. The Task panel (syncTodos) is the + // visible progress, matching the anthropic presentation. break; } case 'tool_execution_end': {