diff --git a/src/lib/agent/agent-prompt-loader.ts b/src/lib/agent/agent-prompt-loader.ts index f40276ea..5139bdc2 100644 --- a/src/lib/agent/agent-prompt-loader.ts +++ b/src/lib/agent/agent-prompt-loader.ts @@ -17,6 +17,7 @@ */ import type { QueueStore, QueuedTask } from './runner/orchestrator/queue'; import type { ResolvedTask } from './runner/orchestrator/executor'; +import { DEFAULT_AGENT_MODEL } from '@lib/constants'; /** * The basics the client injects around every agent-prompt body. The `/agents/` @@ -97,7 +98,7 @@ export function assembleSeedPrompt( } /** Used when neither the enqueue call nor the prompt frontmatter names a model. */ -const DEFAULT_TASK_MODEL = 'claude-sonnet-4-6'; +const DEFAULT_TASK_MODEL = DEFAULT_AGENT_MODEL; /** Orchestrator tools are MCP tools under the `posthog-wizard` server. Frontmatter * names them short (e.g. `enqueue_task`); the SDK gates on the full name. */ diff --git a/src/lib/agent/mcp-prompt-streaming.ts b/src/lib/agent/mcp-prompt-streaming.ts index 3252a427..98d8b274 100644 --- a/src/lib/agent/mcp-prompt-streaming.ts +++ b/src/lib/agent/mcp-prompt-streaming.ts @@ -14,7 +14,7 @@ import type { AgentChunk } from '@ui/tui/services/mcp-suggested-prompts-services'; import type { Credentials } from '@lib/wizard-session'; -import { WIZARD_USER_AGENT } from '@lib/constants'; +import { DEFAULT_AGENT_MODEL, WIZARD_USER_AGENT } from '@lib/constants'; import { getLlmGatewayUrlFromHost } from '@utils/urls'; import { runtimeEnv } from '@env'; import { logToFile } from '@utils/debug'; @@ -33,7 +33,7 @@ async function loadSdk(): Promise { return _sdkModule; } -const MODEL = 'claude-sonnet-4-6'; +const MODEL = DEFAULT_AGENT_MODEL; // Bounded turn count so a single prompt can't loop forever on the // user's nickel. 20 gives the agent room for non-trivial multi-step diff --git a/src/lib/agent/runner/__tests__/runner-plan.test.ts b/src/lib/agent/runner/__tests__/runner-plan.test.ts new file mode 100644 index 00000000..0dd75734 --- /dev/null +++ b/src/lib/agent/runner/__tests__/runner-plan.test.ts @@ -0,0 +1,44 @@ +import { describe, it, expect } from 'vitest'; +import { PROGRAM_REGISTRY } from '@lib/programs/program-registry'; +import { ROUTES, MODELS, resolvePair } from '@lib/agent/runner/runner-plan'; + +const PROGRAM_IDS = PROGRAM_REGISTRY.map((c) => c.id); + +describe('runner-plan ROUTES', () => { + // `ProgramId` widens to `string`, so the type can't force coverage. This is + // the real guard: add a program without a route and this fails. + it('declares a route for every registered program', () => { + const missing = PROGRAM_IDS.filter((id) => !(id in ROUTES)); + expect(missing).toEqual([]); + }); + + it('maps no route to an unregistered program', () => { + const stale = Object.keys(ROUTES).filter((id) => !PROGRAM_IDS.includes(id)); + expect(stale).toEqual([]); + }); + + it('resolves every program to a registered runner and a known model', () => { + for (const program of PROGRAM_IDS) { + const pair = resolvePair({ program, flags: {} }); + expect(['anthropic', 'pi']).toContain(pair.runner); + expect(MODELS[pair.model]).toBeTruthy(); + } + }); + + // Pins today's behavior: the seam changes nothing until a route is moved. + it('defaults every program to anthropic / sonnet', () => { + for (const program of PROGRAM_IDS) { + expect(resolvePair({ program, flags: {} })).toEqual({ + runner: 'anthropic', + model: 'sonnet', + }); + } + }); + + it('falls back to DEFAULT_ROUTE for an unmapped program', () => { + expect(resolvePair({ program: 'not-a-program', flags: {} })).toEqual({ + runner: 'anthropic', + model: 'sonnet', + }); + }); +}); diff --git a/src/lib/agent/runner/backends/anthropic.ts b/src/lib/agent/runner/backends/anthropic.ts new file mode 100644 index 00000000..0e83c35f --- /dev/null +++ b/src/lib/agent/runner/backends/anthropic.ts @@ -0,0 +1,87 @@ +/** + * The `anthropic` runner — the control. Wraps the claude-agent-sdk path + * (`initializeAgent` + `runAgent`) that was inline in `linear.ts` before the + * runner seam. Owns only the agent loop + model transport; the shared pipeline + * (skill install, prompt, ask bridge, error routing, outro) stays in `linear.ts`. + */ + +import { getUI } from '@ui'; +import { + initializeAgent, + runAgent as executeAgent, +} from '@lib/agent/agent-interface'; +import { getLogFilePath, logToFile } from '@utils/debug'; +import { detectNodePackageManagers } from '@lib/detection/package-manager'; +import { sessionToOptions } from '@lib/agent/runner/shared/bootstrap'; +import type { AgentResult, AgentRunner, BackendRunInputs } from './types'; + +export const anthropicBackend: AgentRunner = { + name: 'anthropic', + + async run(inputs: BackendRunInputs): Promise { + const { + session, + config, + programConfig, + boot, + prompt, + spinner, + askBridge, + middleware, + model, + } = inputs; + const { + skillsBaseUrl, + accessToken, + host, + mcpUrl, + wizardFlags, + wizardMetadata, + } = boot; + + getUI().log.step('Initializing Claude agent...'); + const agent = await initializeAgent( + { + workingDirectory: session.installDir, + posthogMcpUrl: mcpUrl, + posthogApiKey: accessToken, + posthogApiHost: host, + additionalMcpServers: config.additionalMcpServers, + detectPackageManager: + config.detectPackageManager ?? detectNodePackageManagers, + skillsBaseUrl, + wizardFlags, + wizardMetadata, + integrationLabel: config.integrationLabel, + askBridge, + askMaxQuestions: config.maxQuestions, + allowedTools: programConfig.allowedTools, + disallowedTools: programConfig.disallowedTools, + getPendingQuestion: () => session.pendingQuestion, + modelOverride: model, + }, + sessionToOptions(session), + ); + getUI().log.step(`Verbose logs: ${getLogFilePath()}`); + getUI().log.success("Agent initialized. Let's get cooking!"); + logToFile('[agent-runner] agent initialized'); + + return executeAgent( + agent, + prompt, + sessionToOptions(session), + spinner, + { + estimatedDurationMinutes: config.estimatedDurationMinutes, + spinnerMessage: config.spinnerMessage, + successMessage: config.successMessage, + errorMessage: + config.errorMessage ?? `${config.integrationLabel} failed`, + additionalFeatureQueue: config.additionalFeatureQueue ?? [], + abortCases: config.abortCases, + emitStepEvents: config.trackStepProgress ?? false, + }, + middleware, + ); + }, +}; diff --git a/src/lib/agent/runner/backends/types.ts b/src/lib/agent/runner/backends/types.ts new file mode 100644 index 00000000..bf82d90c --- /dev/null +++ b/src/lib/agent/runner/backends/types.ts @@ -0,0 +1,60 @@ +/** + * The agent-runner seam. The linear pipeline assembles a run (skill install, + * prompt, ask bridge) and then hands off to a runner to actually drive the + * coding agent. A runner owns the agent loop and the model transport; it does + * NOT own bootstrap, prompt assembly, error routing, or the outro — those stay + * in `linear.ts` so every runner shares them. + * + * `anthropic` (claude-agent-sdk) is the control. `pi` (pi.dev) is the + * challenger. The runner is chosen by `resolvePair` in `runner-plan.ts`. + */ + +import type { WizardSession } from '@lib/wizard-session'; +import type { ProgramConfig } from '@lib/programs/program-step'; +import type { SpinnerHandle } from '@ui'; +import type { WizardAskBridge } from '@lib/wizard-ask-bridge'; +import type { AgentErrorType } from '@lib/agent/agent-interface'; +import type { + ProgramRun, + BootstrapResult, +} from '@lib/agent/runner/shared/types'; + +/** The benchmark/telemetry hook threaded through a run, if enabled. */ +export interface RunMiddleware { + onMessage(message: unknown): void; + finalize(resultMessage: unknown, totalDurationMs: number): unknown; +} + +/** + * Everything a runner needs to run one program. Assembled by `linear.ts` from + * the bootstrap result and the program config; the runner consumes it and never + * re-derives run context. + */ +export interface BackendRunInputs { + session: WizardSession; + config: ProgramRun; + programConfig: ProgramConfig; + boot: BootstrapResult; + /** The fully assembled prompt. */ + prompt: string; + /** Installed framework-skill path, when the program installs one. */ + skillPath?: string; + /** The run spinner (the runner drives start/stop). */ + spinner: SpinnerHandle; + /** Interactive question bridge; undefined in CI/headless (ask disabled). */ + askBridge?: WizardAskBridge; + /** Benchmark middleware, when `session.benchmark` is set. */ + middleware?: RunMiddleware; + /** Gateway model id resolved from the (runner, model) pair. */ + model: string; +} + +/** What a runner reports back: an error classification, or nothing on success. */ +export type AgentResult = { error?: AgentErrorType; message?: string }; + +/** A drop-in agent runner: consumes a fully-assembled run, returns a result. */ +export interface AgentRunner { + /** Stable name used for logs + telemetry (matches the flag variant). */ + readonly name: 'anthropic' | 'pi'; + run(inputs: BackendRunInputs): Promise; +} diff --git a/src/lib/agent/runner/linear.ts b/src/lib/agent/runner/linear.ts index 659dcbcd..b70368cc 100644 --- a/src/lib/agent/runner/linear.ts +++ b/src/lib/agent/runner/linear.ts @@ -8,15 +8,10 @@ import type { WizardSession } from '../../wizard-session'; import { OutroKind } from '../../wizard-session'; import { getUI } from '../../../ui'; -import { - initializeAgent, - runAgent as executeAgent, - AgentErrorType, - AgentSignals, -} from '../agent-interface'; +import { AgentErrorType, AgentSignals } from '../agent-interface'; import { restoreClaudeSettings } from '../claude-settings'; import { getCloudUrlFromRegion } from '../../../utils/urls'; -import { logToFile, getLogFilePath } from '../../../utils/debug'; +import { logToFile } from '../../../utils/debug'; import { createBenchmarkPipeline } from '../../middleware/benchmark'; import { wizardAbort, @@ -25,7 +20,6 @@ import { } from '../../../utils/wizard-abort'; import { analytics } from '../../../utils/analytics'; import { formatScanReport, writeScanReport } from '../../yara-hooks'; -import { detectNodePackageManagers } from '../../detection/package-manager'; import { installSkillById } from '../../wizard-tools'; import { createWizardAskBridge } from '../../wizard-ask-bridge'; import type { ProgramConfig } from '../../programs/program-step'; @@ -33,6 +27,7 @@ import { assemblePrompt } from '../agent-prompt'; import type { ProgramRun, BootstrapResult } from './shared/types'; import { abortOnInstallFailure } from './shared/errors'; import { shouldDisableAsk, sessionToOptions } from './shared/bootstrap'; +import { resolvePair, getRunner, MODELS } from './runner-plan'; export async function runLinearProgram( session: WizardSession, @@ -47,9 +42,7 @@ export async function runLinearProgram( accessToken, projectId, cloudRegion, - mcpUrl, wizardFlags, - wizardMetadata, project, } = boot; @@ -101,33 +94,6 @@ export async function runLinearProgram( timeoutMs: config.askTimeoutMs, }); - getUI().log.step('Initializing Claude agent...'); - const agent = await initializeAgent( - { - workingDirectory: session.installDir, - posthogMcpUrl: mcpUrl, - posthogApiKey: accessToken, - posthogApiHost: host, - additionalMcpServers: config.additionalMcpServers, - detectPackageManager: - config.detectPackageManager ?? detectNodePackageManagers, - skillsBaseUrl, - wizardFlags, - wizardMetadata, - integrationLabel: config.integrationLabel, - askBridge, - askMaxQuestions: config.maxQuestions, - allowedTools: programConfig.allowedTools, - disallowedTools: programConfig.disallowedTools, - getPendingQuestion: () => session.pendingQuestion, - }, - sessionToOptions(session), - ); - getUI().log.step(`Verbose logs: ${getLogFilePath()}`); - getUI().log.success("Agent initialized. Let's get cooking!"); - - logToFile('[agent-runner] agent initialized'); - const middleware = session.benchmark ? createBenchmarkPipeline(spinner, sessionToOptions(session)) : undefined; @@ -150,23 +116,23 @@ export async function runLinearProgram( }); logToFile(`[agent-runner] prompt assembled (${prompt.length} chars)`); - // 8. Run agent - const agentResult = await executeAgent( - agent, + // 8. Resolve the (runner, model) pair from the central plan and run the agent + // through the selected runner. The runner owns the agent loop + model + // transport; everything around it (skill install, prompt, ask bridge, error + // routing, outro) stays here so every runner shares it. + const pair = resolvePair({ program: programConfig.id, flags: wizardFlags }); + const agentResult = await getRunner(pair.runner).run({ + session, + config, + programConfig, + boot, prompt, - sessionToOptions(session), + skillPath, spinner, - { - estimatedDurationMinutes: config.estimatedDurationMinutes, - spinnerMessage: config.spinnerMessage, - successMessage: config.successMessage, - errorMessage: config.errorMessage ?? `${config.integrationLabel} failed`, - additionalFeatureQueue: config.additionalFeatureQueue ?? [], - abortCases: config.abortCases, - emitStepEvents: config.trackStepProgress ?? false, - }, + askBridge, middleware, - ); + model: MODELS[pair.model], + }); // 9. Error handling (full set from both runners) if (agentResult.error === AgentErrorType.ABORT) { diff --git a/src/lib/agent/runner/runner-plan.ts b/src/lib/agent/runner/runner-plan.ts new file mode 100644 index 00000000..60ff46e2 --- /dev/null +++ b/src/lib/agent/runner/runner-plan.ts @@ -0,0 +1,130 @@ +/** + * The agent-runner plan — the one central place that decides how a program runs. + * + * A program maps (via the `ROUTES` config map) to a **router** (control-flow + * shape: `linear` | `orchestrator`) and a **(runner, model) pair**. The base + * decision is just the map read; control is then asserted at named insertion + * points (`resolvePair` here; `resolveRouter` arrives with the flag middleware) + * — each an ordered middleware chain whose terminal is the map. Existing flags + * plug in as middleware, one per flag (see #692b); the core never reads a flag. + * + * Two registries bound by pairs: + * RUNNERS leaf engines (`anthropic` now; `pi` registers later) + * MODELS model alias → gateway id (retires the hardcoded model literals) + */ + +import { DEFAULT_AGENT_MODEL } from '@lib/constants'; +import { logToFile } from '@utils/debug'; +import type { ProgramId } from '@lib/programs/program-registry'; +import type { AgentRunner } from './backends/types'; +import { anthropicBackend } from './backends/anthropic'; + +export type RunnerName = 'anthropic' | 'pi'; +export type RouterName = 'linear' | 'orchestrator'; +export type ModelAlias = 'sonnet' | 'opus'; + +/** What a leaf of agent work resolves to. */ +export interface Pair { + runner: RunnerName; + model: ModelAlias; +} + +/** Model alias → gateway model id. Replaces the hardcoded model literals. */ +export const MODELS: Record = { + sonnet: DEFAULT_AGENT_MODEL, + opus: 'claude-opus-4-8', +}; + +/** Leaf engines. `pi` registers in a later PR. */ +export const RUNNERS: Partial> = { + anthropic: anthropicBackend, +}; + +/** Look up a registered runner, or fail loudly if a route names an absent one. */ +export function getRunner(name: RunnerName): AgentRunner { + const runner = RUNNERS[name]; + if (!runner) { + throw new Error(`No agent runner registered for '${name}'.`); + } + return runner; +} + +/** + * A program's default plan. `roles` overlays the pair per orchestrator sub-task + * role; the linear router always resolves `role = 'default'`. + */ +export interface Route { + router: RouterName; + runner: RunnerName; + model: ModelAlias; + roles?: Record>; +} + +/** The shared default plan. Every program points here until it overrides. */ +export const DEFAULT_ROUTE: Route = { + router: 'linear', + runner: 'anthropic', + model: 'sonnet', +}; + +/** + * Per-program routing — every registered program is listed. `Partial`, not + * `Record`: `ProgramId` widens to `string`, so the type can't force coverage — + * the `runner-plan` test keeps this in lockstep with `PROGRAM_REGISTRY`. Today + * every program runs `DEFAULT_ROUTE` (linear / anthropic / sonnet); moving one + * is a single value, e.g. `'self-driving': { ...DEFAULT_ROUTE, runner: 'pi' }`. + * Anything absent falls back to `DEFAULT_ROUTE` in `resolvePair`. + */ +export const ROUTES: Partial> = { + 'posthog-integration': DEFAULT_ROUTE, + 'revenue-analytics-setup': DEFAULT_ROUTE, + 'warehouse-source': DEFAULT_ROUTE, + 'error-tracking-upload-source-maps': DEFAULT_ROUTE, + audit: DEFAULT_ROUTE, + 'events-audit': DEFAULT_ROUTE, + 'posthog-doctor': DEFAULT_ROUTE, + 'web-analytics-doctor': DEFAULT_ROUTE, + migration: DEFAULT_ROUTE, + 'self-driving': DEFAULT_ROUTE, + 'agent-skill': DEFAULT_ROUTE, + 'mcp-add': DEFAULT_ROUTE, + 'mcp-remove': DEFAULT_ROUTE, + 'mcp-tutorial': DEFAULT_ROUTE, + 'mcp-analytics': DEFAULT_ROUTE, + slack: DEFAULT_ROUTE, +}; + +/** Everything a resolver middleware may branch on. Built once per run. */ +export interface ResolveCtx { + program: ProgramId; + flags: Record; +} + +/** A resolver middleware: defer via `next()`, or assert by returning a value. */ +export type Mw = (ctx: ResolveCtx, next: () => D) => D; + +/** Run a middleware chain over `ctx`, terminating in `base` (the map read). */ +export function runChain(chain: Mw[], ctx: ResolveCtx, base: () => D): D { + const dispatch = (i: number): D => + i < chain.length ? chain[i](ctx, () => dispatch(i + 1)) : base(); + return dispatch(0); +} + +/** + * The pair insertion point. The chain is empty until the flag middleware lands; + * the terminal is the config map read. Called per leaf with a role. + */ +const PAIR_MIDDLEWARE: Mw[] = []; + +export function resolvePair(ctx: ResolveCtx, role = 'default'): Pair { + const pair = runChain(PAIR_MIDDLEWARE, ctx, () => { + const route = ROUTES[ctx.program] ?? DEFAULT_ROUTE; + return { runner: route.runner, model: route.model, ...route.roles?.[role] }; + }); + logToFile( + `[runner] resolved: program=${ctx.program} runner=${pair.runner} model=${ + MODELS[pair.model] + }`, + ); + return pair; +} diff --git a/src/lib/constants.ts b/src/lib/constants.ts index dd9fd75c..27371a74 100644 --- a/src/lib/constants.ts +++ b/src/lib/constants.ts @@ -190,6 +190,13 @@ export const WIZARD_INTERACTION_EVENT_NAME = 'wizard interaction'; export const WIZARD_REMARK_EVENT_NAME = 'wizard remark'; /** Boolean feature flag that routes a run to the experimental orchestrator runner. */ export const WIZARD_ORCHESTRATOR_FLAG_KEY = 'wizard-orchestrator'; +/** + * Multivariate feature flag that selects the agent runner: `anthropic` (control, + * claude-agent-sdk) or `pi` (pi.dev coding agent). Read by the `wizardRunner` + * resolver middleware. Multivariate over boolean so telemetry reads the runner + * name directly. Unknown/missing resolves to `anthropic`. + */ +export const WIZARD_RUNNER_FLAG_KEY = 'wizard-runner'; /** Feature flag key that gates the intro-screen "Tools" menu. */ export const WIZARD_TOOLS_MENU_FLAG_KEY = 'wizard-tools-menu'; /**