Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions src/lib/agent/runner/backends/pi-subagent.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/**
* Controlled subagent dispatch for pi (#526). pi has no native subagent
* mechanism, so a subagent is a nested `createAgentSession` we construct — which
* means WE decide its powers, closing the leak the claude-agent-sdk path warns
* about (it can't propagate the parent's disallowedTools into subagents).
*
* Controls on every child:
* - the SAME security extension (canUseTool + YARA, fail-closed) — shared state,
* so the child shares the parent's tool-call cap and violation latch;
* - a read-only built-in toolset (read/grep/find/ls + allowlisted bash) — no
* write/edit, so a subagent can research but never mutate the project;
* - no custom tools — no .env writes, and crucially no `dispatch_agent`, so a
* child cannot recurse (depth is hard-capped at 1).
*/

import { Type } from 'typebox';
import { defineTool } from '@earendil-works/pi-coding-agent';
import type { ToolDefinition } from '@earendil-works/pi-coding-agent';
import { logToFile } from '@utils/debug';

/**
* Read-only built-ins a subagent may use. bash is supplied separately as the
* parent's env-scrubbed tool (below), not the built-in, so a subagent's
* subprocesses are locked down too.
*/
const SUBAGENT_TOOLS = ['read', 'grep', 'find', 'ls'];

const SUBAGENT_SYSTEM_PROMPT = [
'You are a read-only research subagent for the PostHog wizard.',
'You can read and search files and run safe build/inspect shell commands.',
'You cannot edit files, modify .env, or dispatch further subagents.',
'Investigate the task you are given and report concise findings as your final message.',
].join('\n');

function text(s: string): {
content: [{ type: 'text'; text: string }];
details: unknown;
} {
return { content: [{ type: 'text', text: s }], details: {} };
}

function extractText(message: unknown): string {
const content = (message as { content?: unknown })?.content;
if (typeof content === 'string') return content;
if (Array.isArray(content)) {
return content
.filter((c): c is { type: string; text: string } => {
const b = c as { type?: string; text?: unknown };
return b?.type === 'text' && typeof b.text === 'string';
})
.map((c) => c.text)
.join('');
}
return '';
}

export interface SubagentContext {
/** Resolved gateway model (same as the parent). */
// eslint-disable-next-line @typescript-eslint/no-explicit-any
model: import('@earendil-works/pi-ai').Model<any>;
/** Registry holding the gateway provider. */
modelRegistry: import('@earendil-works/pi-coding-agent').ModelRegistry;
cwd: string;
agentDir: string;
/** The parent's security extension factory — reused so the fence is inherited. */
securityFactory: (pi: unknown) => void;
/** The parent's env-scrubbed bash, so a subagent's subprocesses are locked down too. */
bashTool: ToolDefinition;
/** pi SDK entrypoints, already imported by the backend. */
sdk: {
createAgentSession: typeof import('@earendil-works/pi-coding-agent')['createAgentSession'];
DefaultResourceLoader: typeof import('@earendil-works/pi-coding-agent')['DefaultResourceLoader'];
SessionManager: typeof import('@earendil-works/pi-coding-agent')['SessionManager'];
};
}

export function createDispatchAgentTool(ctx: SubagentContext): ToolDefinition {
return defineTool({
name: 'dispatch_agent',
label: 'Dispatch subagent',
description:
'Delegate a focused, read-only research subtask to a subagent (e.g. "find where events are captured"). The subagent can read/search files and run safe shell, but CANNOT edit files, change .env, or dispatch further subagents. Returns its findings.',
promptSnippet:
'dispatch_agent(description, prompt) — delegate a read-only research subtask',
parameters: Type.Object({
description: Type.String({ description: 'Short label for the subtask' }),
prompt: Type.String({ description: 'Full instruction for the subagent' }),
}),
// eslint-disable-next-line @typescript-eslint/require-await -- pi tool contract returns a Promise
async execute(_id, args) {
const { createAgentSession, DefaultResourceLoader, SessionManager } =
ctx.sdk;

const loader = new DefaultResourceLoader({
cwd: ctx.cwd,
agentDir: ctx.agentDir,
systemPrompt: SUBAGENT_SYSTEM_PROMPT,
noExtensions: true,
noSkills: true,
noContextFiles: true,
noPromptTemplates: true,
noThemes: true,
extensionFactories: [ctx.securityFactory],
});
await loader.reload();

const { session: child } = await createAgentSession({
model: ctx.model,
modelRegistry: ctx.modelRegistry,
cwd: ctx.cwd,
sessionManager: SessionManager.inMemory(ctx.cwd),
resourceLoader: loader,
tools: SUBAGENT_TOOLS, // read-only built-ins; no write/edit, no dispatch_agent
customTools: [ctx.bashTool], // env-scrubbed bash only (still allowlist-fenced)
});

let result = '';
const unsub = child.subscribe((e) => {
if (e.type === 'message_end') {
const t = extractText(e.message).trim();
if (t) result = t;
}
});
logToFile(`[pi] subagent dispatch: ${args.description}`);
try {
await child.prompt(args.prompt);
} finally {
unsub();
}
logToFile(`[pi] subagent "${args.description}" → ${result.length} chars`);
return text(result || 'Subagent completed with no textual result.');
},
});
}
137 changes: 137 additions & 0 deletions src/lib/agent/runner/backends/pi-tasks.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/**
* Task/todo parity for pi (#526). The same four Task tools the anthropic path
* exposes (TaskCreate/Update/Get/List), as pi `defineTool` tools backed by a
* shared in-memory store. Every mutation pushes the list to the TUI via
* `getUI().syncTodos`, so the todo panel updates live under pi exactly like the
* anthropic path — the thing that was missing before.
*/

import { Type } from 'typebox';
import { defineTool } from '@earendil-works/pi-coding-agent';
import type { ToolDefinition } from '@earendil-works/pi-coding-agent';
import { getUI } from '@ui';

export type TaskStatus = 'pending' | 'in_progress' | 'completed';
export interface TaskEntry {
content: string;
status: TaskStatus;
activeForm?: string;
}
export type TaskStore = Map<string, TaskEntry>;

function text(s: string): {
content: [{ type: 'text'; text: string }];
details: unknown;
} {
return { content: [{ type: 'text', text: s }], details: {} };
}

function syncToTui(store: TaskStore): void {
getUI().syncTodos(
Array.from(store.values()).map((t) => ({
content: t.content,
status: t.status,
activeForm: t.activeForm,
})),
);
}

/** Build the four Task tools over a fresh store. */
export function createWizardPiTaskTools(): {
tools: ToolDefinition[];
store: TaskStore;
} {
const store: TaskStore = new Map();

const taskCreate = defineTool({
name: 'TaskCreate',
label: 'Create task',
description:
'Create a task in the shared todo list. Returns its assigned id.',
promptSnippet:
'TaskCreate(content) — add a todo (surfaces progress in the UI)',
parameters: Type.Object({
content: Type.String({ description: 'Imperative task description' }),
activeForm: Type.Optional(
Type.String({ description: 'Present-continuous form for the spinner' }),
),
}),
// eslint-disable-next-line @typescript-eslint/require-await -- pi tool contract returns a Promise
async execute(_id, args) {
const id = `task-${store.size + 1}`;
store.set(id, {
content: args.content,
status: 'pending',
activeForm: args.activeForm,
});
syncToTui(store);
return text(`Created ${id}`);
},
});

const taskUpdate = defineTool({
name: 'TaskUpdate',
label: 'Update task',
description:
'Update an existing task by id (status, content, or activeForm).',
promptSnippet:
'TaskUpdate(taskId, status) — mark a todo in_progress/completed',
parameters: Type.Object({
taskId: Type.String(),
status: Type.Optional(
Type.Union([
Type.Literal('pending'),
Type.Literal('in_progress'),
Type.Literal('completed'),
]),
),
content: Type.Optional(Type.String()),
activeForm: Type.Optional(Type.String()),
}),
// eslint-disable-next-line @typescript-eslint/require-await -- pi tool contract returns a Promise
async execute(_id, args) {
const existing = store.get(args.taskId);
if (!existing) return text(`No such task: ${args.taskId}`);
store.set(args.taskId, {
content: args.content ?? existing.content,
status: (args.status as TaskStatus) ?? existing.status,
activeForm: args.activeForm ?? existing.activeForm,
});
syncToTui(store);
return text(`Updated ${args.taskId}`);
},
});

const taskGet = defineTool({
name: 'TaskGet',
label: 'Get task',
description: 'Fetch a single task by id.',
parameters: Type.Object({ taskId: Type.String() }),
// eslint-disable-next-line @typescript-eslint/require-await -- pi tool contract returns a Promise
async execute(_id, args) {
const t = store.get(args.taskId);
return text(
t
? JSON.stringify({ id: args.taskId, ...t })
: `No such task: ${args.taskId}`,
);
},
});

const taskList = defineTool({
name: 'TaskList',
label: 'List tasks',
description: 'List all tasks in the shared todo list.',
parameters: Type.Object({}),
// eslint-disable-next-line @typescript-eslint/require-await -- pi tool contract returns a Promise
async execute() {
return text(
JSON.stringify(
Array.from(store.entries()).map(([id, t]) => ({ id, ...t })),
),
);
},
});

return { tools: [taskCreate, taskUpdate, taskGet, taskList], store };
}
59 changes: 51 additions & 8 deletions src/lib/agent/runner/backends/pi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,22 @@ function buildGatewayHeaders(
return headers;
}

/** Pull plain text out of a pi AgentMessage (content is text/image blocks). */
function extractText(message: unknown): string {
const content = (message as { content?: unknown })?.content;
if (typeof content === 'string') return content;
if (Array.isArray(content)) {
return content
.filter((c): c is { type: string; text: string } => {
const block = c as { type?: string; text?: unknown };
return block?.type === 'text' && typeof block.text === 'string';
})
.map((c) => c.text)
.join('');
}
return '';
}

export const piBackend: AgentRunner = {
name: 'pi',

Expand Down Expand Up @@ -159,10 +175,28 @@ export const piBackend: AgentRunner = {
// stay out of the static module graph so CommonJS unit tests can load the
// backend seam without parsing it.
const { createWizardPiTools } = await import('./pi-tools');
const customTools = createWizardPiTools({
workingDirectory: session.installDir,
skillsBaseUrl: boot.skillsBaseUrl,
});
const { createWizardPiTaskTools } = await import('./pi-tasks');
const { createDispatchAgentTool } = await import('./pi-subagent');
const customTools = [
...createWizardPiTools({
workingDirectory: session.installDir,
skillsBaseUrl: boot.skillsBaseUrl,
}),
// Task/todo tools (#526): render the todo list live in the TUI, parity
// with the anthropic path.
...createWizardPiTaskTools().tools,
// Controlled subagent dispatch (#526): a nested fenced session with a
// read-only toolset and no dispatch_agent of its own, so it can't
// escape the fence or recurse.
createDispatchAgentTool({
model,
modelRegistry: registry,
cwd: session.installDir,
agentDir: getAgentDir(),
securityFactory: security.factory as (pi: unknown) => void,
sdk: { createAgentSession, DefaultResourceLoader, SessionManager },
}),
];

const { session: agentSession } = await createAgentSession({
model,
Expand All @@ -173,15 +207,24 @@ export const piBackend: AgentRunner = {
customTools,
});

// Map pi events onto the run spinner + the log file. Markers + todos are
// a follow-up (the shared stream→TUI bridge); v1 keeps the spinner alive
// and records tool I/O to the log.
// Map pi events onto the run spinner + the log file, mirroring the
// anthropic path's log shape (assistant turns + tool I/O) and driving the
// single run spinner with one stable status at a time (no overlap).
const unsubscribe = agentSession.subscribe((event) => {
switch (event.type) {
case 'message_end': {
const assistant = extractText(event.message).trim();
if (assistant) {
logToFile(`[pi] assistant: ${assistant.slice(0, 1000)}`);
}
break;
}
case 'tool_execution_start': {
const args = JSON.stringify(event.args ?? {}).slice(0, 200);
logToFile(`[pi] → ${event.toolName} ${args}`);
spinner.message(`Running ${event.toolName}…`);
// Don't surface raw tool names in the spinner — the anthropic path
// doesn't, and it reads as noise. The Task panel (syncTodos) is the
// visible progress, matching the anthropic presentation.
break;
}
case 'tool_execution_end': {
Expand Down
Loading