Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/lib/agent/agent-prompt-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/
import type { QueueStore, QueuedTask } from './runner/orchestrator/queue';
import type { ResolvedTask } from './runner/orchestrator/executor';
import { DEFAULT_AGENT_MODEL } from '@lib/constants';

/**
* The basics the client injects around every agent-prompt body. The `/agents/`
Expand Down Expand Up @@ -97,7 +98,7 @@ export function assembleSeedPrompt(
}

/** Used when neither the enqueue call nor the prompt frontmatter names a model. */
const DEFAULT_TASK_MODEL = 'claude-sonnet-4-6';
const DEFAULT_TASK_MODEL = DEFAULT_AGENT_MODEL;

/** Orchestrator tools are MCP tools under the `posthog-wizard` server. Frontmatter
* names them short (e.g. `enqueue_task`); the SDK gates on the full name. */
Expand Down
4 changes: 2 additions & 2 deletions src/lib/agent/mcp-prompt-streaming.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import type { AgentChunk } from '@ui/tui/services/mcp-suggested-prompts-services';
import type { Credentials } from '@lib/wizard-session';
import { WIZARD_USER_AGENT } from '@lib/constants';
import { DEFAULT_AGENT_MODEL, WIZARD_USER_AGENT } from '@lib/constants';
import { getLlmGatewayUrlFromHost } from '@utils/urls';
import { runtimeEnv } from '@env';
import { logToFile } from '@utils/debug';
Expand All @@ -33,7 +33,7 @@ async function loadSdk(): Promise<any> {
return _sdkModule;
}

const MODEL = 'claude-sonnet-4-6';
const MODEL = DEFAULT_AGENT_MODEL;

// Bounded turn count so a single prompt can't loop forever on the
// user's nickel. 20 gives the agent room for non-trivial multi-step
Expand Down
44 changes: 44 additions & 0 deletions src/lib/agent/runner/__tests__/runner-plan.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import { describe, it, expect } from 'vitest';
import { PROGRAM_REGISTRY } from '@lib/programs/program-registry';
import { ROUTES, MODELS, resolvePair } from '@lib/agent/runner/runner-plan';

const PROGRAM_IDS = PROGRAM_REGISTRY.map((c) => c.id);

describe('runner-plan ROUTES', () => {
// `ProgramId` widens to `string`, so the type can't force coverage. This is
// the real guard: add a program without a route and this fails.
it('declares a route for every registered program', () => {
const missing = PROGRAM_IDS.filter((id) => !(id in ROUTES));
expect(missing).toEqual([]);
});

it('maps no route to an unregistered program', () => {
const stale = Object.keys(ROUTES).filter((id) => !PROGRAM_IDS.includes(id));
expect(stale).toEqual([]);
});

it('resolves every program to a registered runner and a known model', () => {
for (const program of PROGRAM_IDS) {
const pair = resolvePair({ program, flags: {} });
expect(['anthropic', 'pi']).toContain(pair.runner);
expect(MODELS[pair.model]).toBeTruthy();
}
});

// Pins today's behavior: the seam changes nothing until a route is moved.
it('defaults every program to anthropic / sonnet', () => {
for (const program of PROGRAM_IDS) {
expect(resolvePair({ program, flags: {} })).toEqual({
runner: 'anthropic',
model: 'sonnet',
});
}
});

it('falls back to DEFAULT_ROUTE for an unmapped program', () => {
expect(resolvePair({ program: 'not-a-program', flags: {} })).toEqual({
runner: 'anthropic',
model: 'sonnet',
});
});
});
87 changes: 87 additions & 0 deletions src/lib/agent/runner/backends/anthropic.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/**
* The `anthropic` runner — the control. Wraps the claude-agent-sdk path
* (`initializeAgent` + `runAgent`) that was inline in `linear.ts` before the
* runner seam. Owns only the agent loop + model transport; the shared pipeline
* (skill install, prompt, ask bridge, error routing, outro) stays in `linear.ts`.
*/

import { getUI } from '@ui';
import {
initializeAgent,
runAgent as executeAgent,
} from '@lib/agent/agent-interface';
import { getLogFilePath, logToFile } from '@utils/debug';
import { detectNodePackageManagers } from '@lib/detection/package-manager';
import { sessionToOptions } from '@lib/agent/runner/shared/bootstrap';
import type { AgentResult, AgentRunner, BackendRunInputs } from './types';

export const anthropicBackend: AgentRunner = {
name: 'anthropic',

async run(inputs: BackendRunInputs): Promise<AgentResult> {
const {
session,
config,
programConfig,
boot,
prompt,
spinner,
askBridge,
middleware,
model,
} = inputs;
const {
skillsBaseUrl,
accessToken,
host,
mcpUrl,
wizardFlags,
wizardMetadata,
} = boot;

getUI().log.step('Initializing Claude agent...');
const agent = await initializeAgent(
{
workingDirectory: session.installDir,
posthogMcpUrl: mcpUrl,
posthogApiKey: accessToken,
posthogApiHost: host,
additionalMcpServers: config.additionalMcpServers,
detectPackageManager:
config.detectPackageManager ?? detectNodePackageManagers,
skillsBaseUrl,
wizardFlags,
wizardMetadata,
integrationLabel: config.integrationLabel,
askBridge,
askMaxQuestions: config.maxQuestions,
allowedTools: programConfig.allowedTools,
disallowedTools: programConfig.disallowedTools,
getPendingQuestion: () => session.pendingQuestion,
modelOverride: model,
},
sessionToOptions(session),
);
getUI().log.step(`Verbose logs: ${getLogFilePath()}`);
getUI().log.success("Agent initialized. Let's get cooking!");
logToFile('[agent-runner] agent initialized');

return executeAgent(
agent,
prompt,
sessionToOptions(session),
spinner,
{
estimatedDurationMinutes: config.estimatedDurationMinutes,
spinnerMessage: config.spinnerMessage,
successMessage: config.successMessage,
errorMessage:
config.errorMessage ?? `${config.integrationLabel} failed`,
additionalFeatureQueue: config.additionalFeatureQueue ?? [],
abortCases: config.abortCases,
emitStepEvents: config.trackStepProgress ?? false,
},
middleware,
);
},
};
60 changes: 60 additions & 0 deletions src/lib/agent/runner/backends/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/**
* The agent-runner seam. The linear pipeline assembles a run (skill install,
* prompt, ask bridge) and then hands off to a runner to actually drive the
* coding agent. A runner owns the agent loop and the model transport; it does
* NOT own bootstrap, prompt assembly, error routing, or the outro — those stay
* in `linear.ts` so every runner shares them.
*
* `anthropic` (claude-agent-sdk) is the control. `pi` (pi.dev) is the
* challenger. The runner is chosen by `resolvePair` in `runner-plan.ts`.
*/

import type { WizardSession } from '@lib/wizard-session';
import type { ProgramConfig } from '@lib/programs/program-step';
import type { SpinnerHandle } from '@ui';
import type { WizardAskBridge } from '@lib/wizard-ask-bridge';
import type { AgentErrorType } from '@lib/agent/agent-interface';
import type {
ProgramRun,
BootstrapResult,
} from '@lib/agent/runner/shared/types';

/** The benchmark/telemetry hook threaded through a run, if enabled. */
export interface RunMiddleware {
onMessage(message: unknown): void;
finalize(resultMessage: unknown, totalDurationMs: number): unknown;
}

/**
* Everything a runner needs to run one program. Assembled by `linear.ts` from
* the bootstrap result and the program config; the runner consumes it and never
* re-derives run context.
*/
export interface BackendRunInputs {
session: WizardSession;
config: ProgramRun;
programConfig: ProgramConfig;
boot: BootstrapResult;
/** The fully assembled prompt. */
prompt: string;
/** Installed framework-skill path, when the program installs one. */
skillPath?: string;
/** The run spinner (the runner drives start/stop). */
spinner: SpinnerHandle;
/** Interactive question bridge; undefined in CI/headless (ask disabled). */
askBridge?: WizardAskBridge;
/** Benchmark middleware, when `session.benchmark` is set. */
middleware?: RunMiddleware;
/** Gateway model id resolved from the (runner, model) pair. */
model: string;
}

/** What a runner reports back: an error classification, or nothing on success. */
export type AgentResult = { error?: AgentErrorType; message?: string };

/** A drop-in agent runner: consumes a fully-assembled run, returns a result. */
export interface AgentRunner {
/** Stable name used for logs + telemetry (matches the flag variant). */
readonly name: 'anthropic' | 'pi';
run(inputs: BackendRunInputs): Promise<AgentResult>;
}
68 changes: 17 additions & 51 deletions src/lib/agent/runner/linear.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,10 @@
import type { WizardSession } from '../../wizard-session';
import { OutroKind } from '../../wizard-session';
import { getUI } from '../../../ui';
import {
initializeAgent,
runAgent as executeAgent,
AgentErrorType,
AgentSignals,
} from '../agent-interface';
import { AgentErrorType, AgentSignals } from '../agent-interface';
import { restoreClaudeSettings } from '../claude-settings';
import { getCloudUrlFromRegion } from '../../../utils/urls';
import { logToFile, getLogFilePath } from '../../../utils/debug';
import { logToFile } from '../../../utils/debug';
import { createBenchmarkPipeline } from '../../middleware/benchmark';
import {
wizardAbort,
Expand All @@ -25,14 +20,14 @@ import {
} from '../../../utils/wizard-abort';
import { analytics } from '../../../utils/analytics';
import { formatScanReport, writeScanReport } from '../../yara-hooks';
import { detectNodePackageManagers } from '../../detection/package-manager';
import { installSkillById } from '../../wizard-tools';
import { createWizardAskBridge } from '../../wizard-ask-bridge';
import type { ProgramConfig } from '../../programs/program-step';
import { assemblePrompt } from '../agent-prompt';
import type { ProgramRun, BootstrapResult } from './shared/types';
import { abortOnInstallFailure } from './shared/errors';
import { shouldDisableAsk, sessionToOptions } from './shared/bootstrap';
import { resolvePair, getRunner, MODELS } from './runner-plan';

export async function runLinearProgram(
session: WizardSession,
Expand All @@ -47,9 +42,7 @@ export async function runLinearProgram(
accessToken,
projectId,
cloudRegion,
mcpUrl,
wizardFlags,
wizardMetadata,
project,
} = boot;

Expand Down Expand Up @@ -101,33 +94,6 @@ export async function runLinearProgram(
timeoutMs: config.askTimeoutMs,
});

getUI().log.step('Initializing Claude agent...');
const agent = await initializeAgent(
{
workingDirectory: session.installDir,
posthogMcpUrl: mcpUrl,
posthogApiKey: accessToken,
posthogApiHost: host,
additionalMcpServers: config.additionalMcpServers,
detectPackageManager:
config.detectPackageManager ?? detectNodePackageManagers,
skillsBaseUrl,
wizardFlags,
wizardMetadata,
integrationLabel: config.integrationLabel,
askBridge,
askMaxQuestions: config.maxQuestions,
allowedTools: programConfig.allowedTools,
disallowedTools: programConfig.disallowedTools,
getPendingQuestion: () => session.pendingQuestion,
},
sessionToOptions(session),
);
getUI().log.step(`Verbose logs: ${getLogFilePath()}`);
getUI().log.success("Agent initialized. Let's get cooking!");

logToFile('[agent-runner] agent initialized');

const middleware = session.benchmark
? createBenchmarkPipeline(spinner, sessionToOptions(session))
: undefined;
Expand All @@ -150,23 +116,23 @@ export async function runLinearProgram(
});
logToFile(`[agent-runner] prompt assembled (${prompt.length} chars)`);

// 8. Run agent
const agentResult = await executeAgent(
agent,
// 8. Resolve the (runner, model) pair from the central plan and run the agent
// through the selected runner. The runner owns the agent loop + model
// transport; everything around it (skill install, prompt, ask bridge, error
// routing, outro) stays here so every runner shares it.
const pair = resolvePair({ program: programConfig.id, flags: wizardFlags });
const agentResult = await getRunner(pair.runner).run({
session,
config,
programConfig,
boot,
prompt,
sessionToOptions(session),
skillPath,
spinner,
{
estimatedDurationMinutes: config.estimatedDurationMinutes,
spinnerMessage: config.spinnerMessage,
successMessage: config.successMessage,
errorMessage: config.errorMessage ?? `${config.integrationLabel} failed`,
additionalFeatureQueue: config.additionalFeatureQueue ?? [],
abortCases: config.abortCases,
emitStepEvents: config.trackStepProgress ?? false,
},
askBridge,
middleware,
);
model: MODELS[pair.model],
});

// 9. Error handling (full set from both runners)
if (agentResult.error === AgentErrorType.ABORT) {
Expand Down
Loading