PostHog · gewenyu99 · Jun 26, 2026 · Jun 26, 2026 · Jun 26, 2026
diff --git a/src/lib/agent/agent-prompt-loader.ts b/src/lib/agent/agent-prompt-loader.ts
@@ -17,6 +17,7 @@
  */
 import type { QueueStore, QueuedTask } from './runner/orchestrator/queue';
 import type { ResolvedTask } from './runner/orchestrator/executor';
+import { DEFAULT_AGENT_MODEL } from '@lib/constants';
 
 /**
  * The basics the client injects around every agent-prompt body. The `/agents/`
@@ -97,7 +98,7 @@ export function assembleSeedPrompt(
 }
 
 /** Used when neither the enqueue call nor the prompt frontmatter names a model. */
-const DEFAULT_TASK_MODEL = 'claude-sonnet-4-6';
+const DEFAULT_TASK_MODEL = DEFAULT_AGENT_MODEL;
 
 /** Orchestrator tools are MCP tools under the `posthog-wizard` server. Frontmatter
  *  names them short (e.g. `enqueue_task`); the SDK gates on the full name. */

diff --git a/src/lib/agent/mcp-prompt-streaming.ts b/src/lib/agent/mcp-prompt-streaming.ts
@@ -14,7 +14,7 @@
 
 import type { AgentChunk } from '@ui/tui/services/mcp-suggested-prompts-services';
 import type { Credentials } from '@lib/wizard-session';
-import { WIZARD_USER_AGENT } from '@lib/constants';
+import { DEFAULT_AGENT_MODEL, WIZARD_USER_AGENT } from '@lib/constants';
 import { getLlmGatewayUrlFromHost } from '@utils/urls';
 import { runtimeEnv } from '@env';
 import { logToFile } from '@utils/debug';
@@ -33,7 +33,7 @@ async function loadSdk(): Promise<any> {
   return _sdkModule;
 }
 
-const MODEL = 'claude-sonnet-4-6';
+const MODEL = DEFAULT_AGENT_MODEL;
 
 // Bounded turn count so a single prompt can't loop forever on the
 // user's nickel. 20 gives the agent room for non-trivial multi-step

diff --git a/src/lib/agent/runner/__tests__/runner-plan.test.ts b/src/lib/agent/runner/__tests__/runner-plan.test.ts
@@ -0,0 +1,44 @@
+import { describe, it, expect } from 'vitest';
+import { PROGRAM_REGISTRY } from '@lib/programs/program-registry';
+import { ROUTES, MODELS, resolvePair } from '@lib/agent/runner/runner-plan';
+
+const PROGRAM_IDS = PROGRAM_REGISTRY.map((c) => c.id);
+
+describe('runner-plan ROUTES', () => {
+  // `ProgramId` widens to `string`, so the type can't force coverage. This is
+  // the real guard: add a program without a route and this fails.
+  it('declares a route for every registered program', () => {
+    const missing = PROGRAM_IDS.filter((id) => !(id in ROUTES));
+    expect(missing).toEqual([]);
+  });
+
+  it('maps no route to an unregistered program', () => {
+    const stale = Object.keys(ROUTES).filter((id) => !PROGRAM_IDS.includes(id));
+    expect(stale).toEqual([]);
+  });
+
+  it('resolves every program to a registered runner and a known model', () => {
+    for (const program of PROGRAM_IDS) {
+      const pair = resolvePair({ program, flags: {} });
+      expect(['anthropic', 'pi']).toContain(pair.runner);
+      expect(MODELS[pair.model]).toBeTruthy();
+    }
+  });
+
+  // Pins today's behavior: the seam changes nothing until a route is moved.
+  it('defaults every program to anthropic / sonnet', () => {
+    for (const program of PROGRAM_IDS) {
+      expect(resolvePair({ program, flags: {} })).toEqual({
+        runner: 'anthropic',
+        model: 'sonnet',
+      });
+    }
+  });
+
+  it('falls back to DEFAULT_ROUTE for an unmapped program', () => {
+    expect(resolvePair({ program: 'not-a-program', flags: {} })).toEqual({
+      runner: 'anthropic',
+      model: 'sonnet',
+    });
+  });
+});
diff --git a/src/lib/agent/runner/backends/anthropic.ts b/src/lib/agent/runner/backends/anthropic.ts
@@ -0,0 +1,87 @@
+/**
+ * The `anthropic` runner — the control. Wraps the claude-agent-sdk path
+ * (`initializeAgent` + `runAgent`) that was inline in `linear.ts` before the
+ * runner seam. Owns only the agent loop + model transport; the shared pipeline
+ * (skill install, prompt, ask bridge, error routing, outro) stays in `linear.ts`.
+ */
+
+import { getUI } from '@ui';
+import {
+  initializeAgent,
+  runAgent as executeAgent,
+} from '@lib/agent/agent-interface';
+import { getLogFilePath, logToFile } from '@utils/debug';
+import { detectNodePackageManagers } from '@lib/detection/package-manager';
+import { sessionToOptions } from '@lib/agent/runner/shared/bootstrap';
+import type { AgentResult, AgentRunner, BackendRunInputs } from './types';
+
+export const anthropicBackend: AgentRunner = {
+  name: 'anthropic',
+
+  async run(inputs: BackendRunInputs): Promise<AgentResult> {
+    const {
+      session,
+      config,
+      programConfig,
+      boot,
+      prompt,
+      spinner,
+      askBridge,
+      middleware,
+      model,
+    } = inputs;
+    const {
+      skillsBaseUrl,
+      accessToken,
+      host,
+      mcpUrl,
+      wizardFlags,
+      wizardMetadata,
+    } = boot;
+
+    getUI().log.step('Initializing Claude agent...');
+    const agent = await initializeAgent(
+      {
+        workingDirectory: session.installDir,
+        posthogMcpUrl: mcpUrl,
+        posthogApiKey: accessToken,
+        posthogApiHost: host,
+        additionalMcpServers: config.additionalMcpServers,
+        detectPackageManager:
+          config.detectPackageManager ?? detectNodePackageManagers,
+        skillsBaseUrl,
+        wizardFlags,
+        wizardMetadata,
+        integrationLabel: config.integrationLabel,
+        askBridge,
+        askMaxQuestions: config.maxQuestions,
+        allowedTools: programConfig.allowedTools,
+        disallowedTools: programConfig.disallowedTools,
+        getPendingQuestion: () => session.pendingQuestion,
+        modelOverride: model,
+      },
+      sessionToOptions(session),
+    );
+    getUI().log.step(`Verbose logs: ${getLogFilePath()}`);
+    getUI().log.success("Agent initialized. Let's get cooking!");
+    logToFile('[agent-runner] agent initialized');
+
+    return executeAgent(
+      agent,
+      prompt,
+      sessionToOptions(session),
+      spinner,
+      {
+        estimatedDurationMinutes: config.estimatedDurationMinutes,
+        spinnerMessage: config.spinnerMessage,
+        successMessage: config.successMessage,
+        errorMessage:
+          config.errorMessage ?? `${config.integrationLabel} failed`,
+        additionalFeatureQueue: config.additionalFeatureQueue ?? [],
+        abortCases: config.abortCases,
+        emitStepEvents: config.trackStepProgress ?? false,
+      },
+      middleware,
+    );
+  },
+};
diff --git a/src/lib/agent/runner/backends/types.ts b/src/lib/agent/runner/backends/types.ts
@@ -0,0 +1,60 @@
+/**
+ * The agent-runner seam. The linear pipeline assembles a run (skill install,
+ * prompt, ask bridge) and then hands off to a runner to actually drive the
+ * coding agent. A runner owns the agent loop and the model transport; it does
+ * NOT own bootstrap, prompt assembly, error routing, or the outro — those stay
+ * in `linear.ts` so every runner shares them.
+ *
+ * `anthropic` (claude-agent-sdk) is the control. `pi` (pi.dev) is the
+ * challenger. The runner is chosen by `resolvePair` in `runner-plan.ts`.
+ */
+
+import type { WizardSession } from '@lib/wizard-session';
+import type { ProgramConfig } from '@lib/programs/program-step';
+import type { SpinnerHandle } from '@ui';
+import type { WizardAskBridge } from '@lib/wizard-ask-bridge';
+import type { AgentErrorType } from '@lib/agent/agent-interface';
+import type {
+  ProgramRun,
+  BootstrapResult,
+} from '@lib/agent/runner/shared/types';
+
+/** The benchmark/telemetry hook threaded through a run, if enabled. */
+export interface RunMiddleware {
+  onMessage(message: unknown): void;
+  finalize(resultMessage: unknown, totalDurationMs: number): unknown;
+}
+
+/**
+ * Everything a runner needs to run one program. Assembled by `linear.ts` from
+ * the bootstrap result and the program config; the runner consumes it and never
+ * re-derives run context.
+ */
+export interface BackendRunInputs {
+  session: WizardSession;
+  config: ProgramRun;
+  programConfig: ProgramConfig;
+  boot: BootstrapResult;
+  /** The fully assembled prompt. */
+  prompt: string;
+  /** Installed framework-skill path, when the program installs one. */
+  skillPath?: string;
+  /** The run spinner (the runner drives start/stop). */
+  spinner: SpinnerHandle;
+  /** Interactive question bridge; undefined in CI/headless (ask disabled). */
+  askBridge?: WizardAskBridge;
+  /** Benchmark middleware, when `session.benchmark` is set. */
+  middleware?: RunMiddleware;
+  /** Gateway model id resolved from the (runner, model) pair. */
+  model: string;
+}
+
+/** What a runner reports back: an error classification, or nothing on success. */
+export type AgentResult = { error?: AgentErrorType; message?: string };
+
+/** A drop-in agent runner: consumes a fully-assembled run, returns a result. */
+export interface AgentRunner {
+  /** Stable name used for logs + telemetry (matches the flag variant). */
+  readonly name: 'anthropic' | 'pi';
+  run(inputs: BackendRunInputs): Promise<AgentResult>;
+}
diff --git a/src/lib/agent/runner/linear.ts b/src/lib/agent/runner/linear.ts
@@ -8,15 +8,10 @@
 import type { WizardSession } from '../../wizard-session';
 import { OutroKind } from '../../wizard-session';
 import { getUI } from '../../../ui';
-import {
-  initializeAgent,
-  runAgent as executeAgent,
-  AgentErrorType,
-  AgentSignals,
-} from '../agent-interface';
+import { AgentErrorType, AgentSignals } from '../agent-interface';
 import { restoreClaudeSettings } from '../claude-settings';
 import { getCloudUrlFromRegion } from '../../../utils/urls';
-import { logToFile, getLogFilePath } from '../../../utils/debug';
+import { logToFile } from '../../../utils/debug';
 import { createBenchmarkPipeline } from '../../middleware/benchmark';
 import {
   wizardAbort,
@@ -25,14 +20,14 @@ import {
 } from '../../../utils/wizard-abort';
 import { analytics } from '../../../utils/analytics';
 import { formatScanReport, writeScanReport } from '../../yara-hooks';
-import { detectNodePackageManagers } from '../../detection/package-manager';
 import { installSkillById } from '../../wizard-tools';
 import { createWizardAskBridge } from '../../wizard-ask-bridge';
 import type { ProgramConfig } from '../../programs/program-step';
 import { assemblePrompt } from '../agent-prompt';
 import type { ProgramRun, BootstrapResult } from './shared/types';
 import { abortOnInstallFailure } from './shared/errors';
 import { shouldDisableAsk, sessionToOptions } from './shared/bootstrap';
+import { resolvePair, getRunner, MODELS } from './runner-plan';
 
 export async function runLinearProgram(
   session: WizardSession,
@@ -47,9 +42,7 @@ export async function runLinearProgram(
     accessToken,
     projectId,
     cloudRegion,
-    mcpUrl,
     wizardFlags,
-    wizardMetadata,
     project,
   } = boot;
 
@@ -101,33 +94,6 @@ export async function runLinearProgram(
         timeoutMs: config.askTimeoutMs,
       });
 
-  getUI().log.step('Initializing Claude agent...');
-  const agent = await initializeAgent(
-    {
-      workingDirectory: session.installDir,
-      posthogMcpUrl: mcpUrl,
-      posthogApiKey: accessToken,
-      posthogApiHost: host,
-      additionalMcpServers: config.additionalMcpServers,
-      detectPackageManager:
-        config.detectPackageManager ?? detectNodePackageManagers,
-      skillsBaseUrl,
-      wizardFlags,
-      wizardMetadata,
-      integrationLabel: config.integrationLabel,
-      askBridge,
-      askMaxQuestions: config.maxQuestions,
-      allowedTools: programConfig.allowedTools,
-      disallowedTools: programConfig.disallowedTools,
-      getPendingQuestion: () => session.pendingQuestion,
-    },
-    sessionToOptions(session),
-  );
-  getUI().log.step(`Verbose logs: ${getLogFilePath()}`);
-  getUI().log.success("Agent initialized. Let's get cooking!");
-
-  logToFile('[agent-runner] agent initialized');
-
   const middleware = session.benchmark
     ? createBenchmarkPipeline(spinner, sessionToOptions(session))
     : undefined;
@@ -150,23 +116,23 @@ export async function runLinearProgram(
   });
   logToFile(`[agent-runner] prompt assembled (${prompt.length} chars)`);
 
-  // 8. Run agent
-  const agentResult = await executeAgent(
-    agent,
+  // 8. Resolve the (runner, model) pair from the central plan and run the agent
+  // through the selected runner. The runner owns the agent loop + model
+  // transport; everything around it (skill install, prompt, ask bridge, error
+  // routing, outro) stays here so every runner shares it.
+  const pair = resolvePair({ program: programConfig.id, flags: wizardFlags });
+  const agentResult = await getRunner(pair.runner).run({
+    session,
+    config,
+    programConfig,
+    boot,
     prompt,
-    sessionToOptions(session),
+    skillPath,
     spinner,
-    {
-      estimatedDurationMinutes: config.estimatedDurationMinutes,
-      spinnerMessage: config.spinnerMessage,
-      successMessage: config.successMessage,
-      errorMessage: config.errorMessage ?? `${config.integrationLabel} failed`,
-      additionalFeatureQueue: config.additionalFeatureQueue ?? [],
-      abortCases: config.abortCases,
-      emitStepEvents: config.trackStepProgress ?? false,
-    },
+    askBridge,
     middleware,
-  );
+    model: MODELS[pair.model],
+  });
 
   // 9. Error handling (full set from both runners)
   if (agentResult.error === AgentErrorType.ABORT) {