From c9517dc0ad7ceb78dc0387fb797dc41f8a7db6f7 Mon Sep 17 00:00:00 2001
From: "Vincent (Wen Yu) Ge" <gewenyu99@gmail.com>
Date: Fri, 26 Jun 2026 21:07:26 -0400
Subject: [PATCH] feat(runner): Task/todo + controlled subagents on pi (#698)

pi gets the Task store tools (TaskCreate/Update/Get/List) surfaced in the TUI, and
a controlled dispatch_agent that spawns a read-only nested session inheriting the
same security fence. Adds extractText + tool-I/O logging parity.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/lib/agent/runner/backends/pi-subagent.ts | 134 ++++++++++++++++++
 src/lib/agent/runner/backends/pi-tasks.ts    | 137 +++++++++++++++++++
 src/lib/agent/runner/backends/pi.ts          |  59 ++++++--
 3 files changed, 322 insertions(+), 8 deletions(-)
 create mode 100644 src/lib/agent/runner/backends/pi-subagent.ts
 create mode 100644 src/lib/agent/runner/backends/pi-tasks.ts
diff --git a/src/lib/agent/runner/backends/pi-subagent.ts b/src/lib/agent/runner/backends/pi-subagent.ts
new file mode 100644
index 00000000..1f5e7f7d
--- /dev/null
+++ b/src/lib/agent/runner/backends/pi-subagent.ts
@@ -0,0 +1,134 @@
+/**
+ * Controlled subagent dispatch for pi (#526). pi has no native subagent
+ * mechanism, so a subagent is a nested `createAgentSession` we construct — which
+ * means WE decide its powers, closing the leak the claude-agent-sdk path warns
+ * about (it can't propagate the parent's disallowedTools into subagents).
+ *
+ * Controls on every child:
+ *  - the SAME security extension (canUseTool + YARA, fail-closed) — shared state,
+ *    so the child shares the parent's tool-call cap and violation latch;
+ *  - a read-only built-in toolset (read/grep/find/ls + allowlisted bash) — no
+ *    write/edit, so a subagent can research but never mutate the project;
+ *  - no custom tools — no .env writes, and crucially no `dispatch_agent`, so a
+ *    child cannot recurse (depth is hard-capped at 1).
+ */
+
+import { Type } from 'typebox';
+import { defineTool } from '@earendil-works/pi-coding-agent';
+import type { ToolDefinition } from '@earendil-works/pi-coding-agent';
+import { logToFile } from '@utils/debug';
+
+/**
+ * Read-only built-ins a subagent may use. bash is supplied separately as the
+ * parent's env-scrubbed tool (below), not the built-in, so a subagent's
+ * subprocesses are locked down too.
+ */
+const SUBAGENT_TOOLS = ['read', 'grep', 'find', 'ls'];
+
+const SUBAGENT_SYSTEM_PROMPT = [
+  'You are a read-only research subagent for the PostHog wizard.',
+  'You can read and search files and run safe build/inspect shell commands.',
+  'You cannot edit files, modify .env, or dispatch further subagents.',
+  'Investigate the task you are given and report concise findings as your final message.',
+].join('\n');
+
+function text(s: string): {
+  content: [{ type: 'text'; text: string }];
+  details: unknown;
+} {
+  return { content: [{ type: 'text', text: s }], details: {} };
+}
+
+function extractText(message: unknown): string {
+  const content = (message as { content?: unknown })?.content;
+  if (typeof content === 'string') return content;
+  if (Array.isArray(content)) {
+    return content
+      .filter((c): c is { type: string; text: string } => {
+        const b = c as { type?: string; text?: unknown };
+        return b?.type === 'text' && typeof b.text === 'string';
+      })
+      .map((c) => c.text)
+      .join('');
+  }
+  return '';
+}
+
+export interface SubagentContext {
+  /** Resolved gateway model (same as the parent). */
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  model: import('@earendil-works/pi-ai').Model<any>;
+  /** Registry holding the gateway provider. */
+  modelRegistry: import('@earendil-works/pi-coding-agent').ModelRegistry;
+  cwd: string;
+  agentDir: string;
+  /** The parent's security extension factory — reused so the fence is inherited. */
+  securityFactory: (pi: unknown) => void;
+  /** The parent's env-scrubbed bash, so a subagent's subprocesses are locked down too. */
+  bashTool: ToolDefinition;
+  /** pi SDK entrypoints, already imported by the backend. */
+  sdk: {
+    createAgentSession: typeof import('@earendil-works/pi-coding-agent')['createAgentSession'];
+    DefaultResourceLoader: typeof import('@earendil-works/pi-coding-agent')['DefaultResourceLoader'];
+    SessionManager: typeof import('@earendil-works/pi-coding-agent')['SessionManager'];
+  };
+}
+
+export function createDispatchAgentTool(ctx: SubagentContext): ToolDefinition {
+  return defineTool({
+    name: 'dispatch_agent',
+    label: 'Dispatch subagent',
+    description:
+      'Delegate a focused, read-only research subtask to a subagent (e.g. "find where events are captured"). The subagent can read/search files and run safe shell, but CANNOT edit files, change .env, or dispatch further subagents. Returns its findings.',
+    promptSnippet:
+      'dispatch_agent(description, prompt) — delegate a read-only research subtask',
+    parameters: Type.Object({
+      description: Type.String({ description: 'Short label for the subtask' }),
+      prompt: Type.String({ description: 'Full instruction for the subagent' }),
+    }),
+    // eslint-disable-next-line @typescript-eslint/require-await -- pi tool contract returns a Promise
+    async execute(_id, args) {
+      const { createAgentSession, DefaultResourceLoader, SessionManager } =
+        ctx.sdk;
+
+      const loader = new DefaultResourceLoader({
+        cwd: ctx.cwd,
+        agentDir: ctx.agentDir,
+        systemPrompt: SUBAGENT_SYSTEM_PROMPT,
+        noExtensions: true,
+        noSkills: true,
+        noContextFiles: true,
+        noPromptTemplates: true,
+        noThemes: true,
+        extensionFactories: [ctx.securityFactory],
+      });
+      await loader.reload();
+
+      const { session: child } = await createAgentSession({
+        model: ctx.model,
+        modelRegistry: ctx.modelRegistry,
+        cwd: ctx.cwd,
+        sessionManager: SessionManager.inMemory(ctx.cwd),
+        resourceLoader: loader,
+        tools: SUBAGENT_TOOLS, // read-only built-ins; no write/edit, no dispatch_agent
+        customTools: [ctx.bashTool], // env-scrubbed bash only (still allowlist-fenced)
+      });
+
+      let result = '';
+      const unsub = child.subscribe((e) => {
+        if (e.type === 'message_end') {
+          const t = extractText(e.message).trim();
+          if (t) result = t;
+        }
+      });
+      logToFile(`[pi] subagent dispatch: ${args.description}`);
+      try {
+        await child.prompt(args.prompt);
+      } finally {
+        unsub();
+      }
+      logToFile(`[pi] subagent "${args.description}" → ${result.length} chars`);
+      return text(result || 'Subagent completed with no textual result.');
+    },
+  });
+}
diff --git a/src/lib/agent/runner/backends/pi-tasks.ts b/src/lib/agent/runner/backends/pi-tasks.ts
new file mode 100644
index 00000000..e12f66e1
--- /dev/null
+++ b/src/lib/agent/runner/backends/pi-tasks.ts
@@ -0,0 +1,137 @@
+/**
+ * Task/todo parity for pi (#526). The same four Task tools the anthropic path
+ * exposes (TaskCreate/Update/Get/List), as pi `defineTool` tools backed by a
+ * shared in-memory store. Every mutation pushes the list to the TUI via
+ * `getUI().syncTodos`, so the todo panel updates live under pi exactly like the
+ * anthropic path — the thing that was missing before.
+ */
+
+import { Type } from 'typebox';
+import { defineTool } from '@earendil-works/pi-coding-agent';
+import type { ToolDefinition } from '@earendil-works/pi-coding-agent';
+import { getUI } from '@ui';
+
+export type TaskStatus = 'pending' | 'in_progress' | 'completed';
+export interface TaskEntry {
+  content: string;
+  status: TaskStatus;
+  activeForm?: string;
+}
+export type TaskStore = Map<string, TaskEntry>;
+
+function text(s: string): {
+  content: [{ type: 'text'; text: string }];
+  details: unknown;
+} {
+  return { content: [{ type: 'text', text: s }], details: {} };
+}
+
+function syncToTui(store: TaskStore): void {
+  getUI().syncTodos(
+    Array.from(store.values()).map((t) => ({
+      content: t.content,
+      status: t.status,
+      activeForm: t.activeForm,
+    })),
+  );
+}
+
+/** Build the four Task tools over a fresh store. */
+export function createWizardPiTaskTools(): {
+  tools: ToolDefinition[];
+  store: TaskStore;
+} {
+  const store: TaskStore = new Map();
+
+  const taskCreate = defineTool({
+    name: 'TaskCreate',
+    label: 'Create task',
+    description:
+      'Create a task in the shared todo list. Returns its assigned id.',
+    promptSnippet:
+      'TaskCreate(content) — add a todo (surfaces progress in the UI)',
+    parameters: Type.Object({
+      content: Type.String({ description: 'Imperative task description' }),
+      activeForm: Type.Optional(
+        Type.String({ description: 'Present-continuous form for the spinner' }),
+      ),
+    }),
+    // eslint-disable-next-line @typescript-eslint/require-await -- pi tool contract returns a Promise
+    async execute(_id, args) {
+      const id = `task-${store.size + 1}`;
+      store.set(id, {
+        content: args.content,
+        status: 'pending',
+        activeForm: args.activeForm,
+      });
+      syncToTui(store);
+      return text(`Created ${id}`);
+    },
+  });
+
+  const taskUpdate = defineTool({
+    name: 'TaskUpdate',
+    label: 'Update task',
+    description:
+      'Update an existing task by id (status, content, or activeForm).',
+    promptSnippet:
+      'TaskUpdate(taskId, status) — mark a todo in_progress/completed',
+    parameters: Type.Object({
+      taskId: Type.String(),
+      status: Type.Optional(
+        Type.Union([
+          Type.Literal('pending'),
+          Type.Literal('in_progress'),
+          Type.Literal('completed'),
+        ]),
+      ),
+      content: Type.Optional(Type.String()),
+      activeForm: Type.Optional(Type.String()),
+    }),
+    // eslint-disable-next-line @typescript-eslint/require-await -- pi tool contract returns a Promise
+    async execute(_id, args) {
+      const existing = store.get(args.taskId);
+      if (!existing) return text(`No such task: ${args.taskId}`);
+      store.set(args.taskId, {
+        content: args.content ?? existing.content,
+        status: (args.status as TaskStatus) ?? existing.status,
+        activeForm: args.activeForm ?? existing.activeForm,
+      });
+      syncToTui(store);
+      return text(`Updated ${args.taskId}`);
+    },
+  });
+
+  const taskGet = defineTool({
+    name: 'TaskGet',
+    label: 'Get task',
+    description: 'Fetch a single task by id.',
+    parameters: Type.Object({ taskId: Type.String() }),
+    // eslint-disable-next-line @typescript-eslint/require-await -- pi tool contract returns a Promise
+    async execute(_id, args) {
+      const t = store.get(args.taskId);
+      return text(
+        t
+          ? JSON.stringify({ id: args.taskId, ...t })
+          : `No such task: ${args.taskId}`,
+      );
+    },
+  });
+
+  const taskList = defineTool({
+    name: 'TaskList',
+    label: 'List tasks',
+    description: 'List all tasks in the shared todo list.',
+    parameters: Type.Object({}),
+    // eslint-disable-next-line @typescript-eslint/require-await -- pi tool contract returns a Promise
+    async execute() {
+      return text(
+        JSON.stringify(
+          Array.from(store.entries()).map(([id, t]) => ({ id, ...t })),
+        ),
+      );
+    },
+  });
+
+  return { tools: [taskCreate, taskUpdate, taskGet, taskList], store };
+}
diff --git a/src/lib/agent/runner/backends/pi.ts b/src/lib/agent/runner/backends/pi.ts
index f2a1c8dc..5c2f4d24 100644
--- a/src/lib/agent/runner/backends/pi.ts
+++ b/src/lib/agent/runner/backends/pi.ts
@@ -66,6 +66,22 @@ function buildGatewayHeaders(
   return headers;
 }
 
+/** Pull plain text out of a pi AgentMessage (content is text/image blocks). */
+function extractText(message: unknown): string {
+  const content = (message as { content?: unknown })?.content;
+  if (typeof content === 'string') return content;
+  if (Array.isArray(content)) {
+    return content
+      .filter((c): c is { type: string; text: string } => {
+        const block = c as { type?: string; text?: unknown };
+        return block?.type === 'text' && typeof block.text === 'string';
+      })
+      .map((c) => c.text)
+      .join('');
+  }
+  return '';
+}
+
 export const piBackend: AgentRunner = {
   name: 'pi',
 
@@ -159,10 +175,28 @@ export const piBackend: AgentRunner = {
       // stay out of the static module graph so CommonJS unit tests can load the
       // backend seam without parsing it.
       const { createWizardPiTools } = await import('./pi-tools');
-      const customTools = createWizardPiTools({
-        workingDirectory: session.installDir,
-        skillsBaseUrl: boot.skillsBaseUrl,
-      });
+      const { createWizardPiTaskTools } = await import('./pi-tasks');
+      const { createDispatchAgentTool } = await import('./pi-subagent');
+      const customTools = [
+        ...createWizardPiTools({
+          workingDirectory: session.installDir,
+          skillsBaseUrl: boot.skillsBaseUrl,
+        }),
+        // Task/todo tools (#526): render the todo list live in the TUI, parity
+        // with the anthropic path.
+        ...createWizardPiTaskTools().tools,
+        // Controlled subagent dispatch (#526): a nested fenced session with a
+        // read-only toolset and no dispatch_agent of its own, so it can't
+        // escape the fence or recurse.
+        createDispatchAgentTool({
+          model,
+          modelRegistry: registry,
+          cwd: session.installDir,
+          agentDir: getAgentDir(),
+          securityFactory: security.factory as (pi: unknown) => void,
+          sdk: { createAgentSession, DefaultResourceLoader, SessionManager },
+        }),
+      ];
 
       const { session: agentSession } = await createAgentSession({
         model,
@@ -173,15 +207,24 @@ export const piBackend: AgentRunner = {
         customTools,
       });
 
-      // Map pi events onto the run spinner + the log file. Markers + todos are
-      // a follow-up (the shared stream→TUI bridge); v1 keeps the spinner alive
-      // and records tool I/O to the log.
+      // Map pi events onto the run spinner + the log file, mirroring the
+      // anthropic path's log shape (assistant turns + tool I/O) and driving the
+      // single run spinner with one stable status at a time (no overlap).
       const unsubscribe = agentSession.subscribe((event) => {
         switch (event.type) {
+          case 'message_end': {
+            const assistant = extractText(event.message).trim();
+            if (assistant) {
+              logToFile(`[pi] assistant: ${assistant.slice(0, 1000)}`);
+            }
+            break;
+          }
           case 'tool_execution_start': {
             const args = JSON.stringify(event.args ?? {}).slice(0, 200);
             logToFile(`[pi] → ${event.toolName} ${args}`);
-            spinner.message(`Running ${event.toolName}…`);
+            // Don't surface raw tool names in the spinner — the anthropic path
+            // doesn't, and it reads as noise. The Task panel (syncTodos) is the
+            // visible progress, matching the anthropic presentation.
             break;
           }
           case 'tool_execution_end': {