Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion scripts/smoke-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,42 @@ node --input-type=module -e "import '$DIST_BIN'" 2>&1 | head -5 | grep -q 'PostH
exit 1
}

# ── 2. --ci rejected in production builds ────────────────────────────────────
# ── 2. CI flag overrides physically absent from production builds ───────────
# The override path (src/utils/ci-flag-overrides.ts) is dead code in published
# builds and tsdown strips it; its env var name appearing in dist/*.js means
# dead-code elimination regressed and a prod surface leaked. Sourcemaps keep
# the original source, so only .js output counts.
OVERRIDE_MARKERS='WIZARD_CI_FLAG_OVERRIDES WIZARD_CI_EXCLUDE_TASKS'
if [ "${WIZARD_BUILD_NODE_ENV:-production}" = "ci" ]; then
# CI builds must keep the paths — their absence means the overrides silently
# stopped working and CI is back to testing live behavior.
for marker in $OVERRIDE_MARKERS; do
if ! grep -q "$marker" ./dist/*.js; then
echo "Smoke test failed: CI build is missing the $marker path" >&2
exit 1
fi
done
# And a real invocation must accept the env var. yargs claims every
# POSTHOG_WIZARD_-prefixed env var as a CLI option and strict-rejects
# unknown ones during command parse (--version/--help short-circuit and
# prove nothing). The run exits fast on the missing api key — all this
# asserts is that yargs did not reject the environment.
ci_probe=$(WIZARD_CI_FLAG_OVERRIDES='{"wizard-orchestrator":true}' node "$DIST_BIN" --ci --install-dir /tmp/wizard-smoke-probe 2>&1) || true
if echo "$ci_probe" | grep -q 'Unknown argument'; then
echo 'Smoke test failed: CI binary rejects WIZARD_CI_FLAG_OVERRIDES in the environment' >&2
echo "$ci_probe" | head -3 >&2
exit 1
fi
else
for marker in $OVERRIDE_MARKERS; do
if grep -q "$marker" ./dist/*.js; then
echo "Smoke test failed: $marker code leaked into a production build" >&2
exit 1
fi
done
fi

# ── 3. --ci rejected in production builds ────────────────────────────────────
# build:ci sets WIZARD_BUILD_NODE_ENV=ci → --ci stays enabled → skip the check.
if [ "${WIZARD_BUILD_NODE_ENV:-production}" = "ci" ]; then
exit 0
Expand Down
5 changes: 5 additions & 0 deletions src/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ export const IS_PRODUCTION_BUILD = process.env.NODE_ENV === 'production';
* Add new keys here when a new runtime dependency is needed.
*/
type RuntimeEnvKey =
// CI-build-only flag overrides (see utils/ci-flag-overrides.ts).
// Deliberately NOT POSTHOG_WIZARD_-prefixed: yargs .env('POSTHOG_WIZARD')
// would claim it as an unknown CLI option and strict-reject the run.
| 'WIZARD_CI_FLAG_OVERRIDES'
| 'WIZARD_CI_EXCLUDE_TASKS'
// Wizard CLI configuration (yargs POSTHOG_WIZARD_ prefix)
| 'POSTHOG_WIZARD_BENCHMARK_CONFIG'
| 'POSTHOG_WIZARD_BENCHMARK_FILE'
Expand Down
299 changes: 299 additions & 0 deletions src/lib/agent/__tests__/agent-prompt-loader.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,299 @@
import * as fs from 'fs';
import * as os from 'os';
import * as path from 'path';
import {
agentRunTools,
assembleTaskPrompt,
buildRegistry,
parseAgentPrompt,
resolveTask,
taskModel,
type AgentPrompt,
type AgentRegistry,
type OrchestratorPromptContext,
} from '../agent-prompt-loader';
import { QueueStore } from '../../programs/orchestrator/queue';

function tmpDir(): string {
return fs.mkdtempSync(path.join(os.tmpdir(), 'agent-loader-test-'));
}

function registryOf(prompts: AgentPrompt[]): AgentRegistry {
return buildRegistry(
prompts.map((p) => ({ ...p, flow: 'test-flow' })),
'test-flow',
);
}

describe('parseAgentPrompt', () => {
const sample = `---
type: instrument-events
model: claude-sonnet-4-6 # cheapest model that succeeds
skills: [instrument-events]
allowedTools: [Read, Edit, Grep, Glob, Bash]
disallowedTools: [enqueue_task]
dependsOn: [init]
---

## Goal
Add at least one capture call.
`;

it('parses frontmatter scalars and inline arrays', () => {
const p = parseAgentPrompt(sample, 'fallback');
expect(p.type).toBe('instrument-events');
expect(p.model).toBe('claude-sonnet-4-6');
expect(p.skills).toEqual(['instrument-events']);
expect(p.allowedTools).toEqual(['Read', 'Edit', 'Grep', 'Glob', 'Bash']);
expect(p.disallowedTools).toEqual(['enqueue_task']);
expect(p.dependsOn).toEqual(['init']);
});

it('strips inline comments and keeps the body', () => {
const p = parseAgentPrompt(sample, 'fallback');
expect(p.model).not.toContain('#');
expect(p.body).toContain('## Goal');
expect(p.body).not.toContain('---');
});

it('falls back to the menu id when type is omitted', () => {
const p = parseAgentPrompt('---\nmodel: x\n---\nbody', 'install');
expect(p.type).toBe('install');
});

it('parses the flow from frontmatter', () => {
const p = parseAgentPrompt('---\nflow: audit\n---\nx', 'fix-events');
expect(p.flow).toBe('audit');
});

it('marks the seed from frontmatter; everything else is a task', () => {
expect(parseAgentPrompt('---\nseed: true\n---\nplan', 'planner').seed).toBe(
true,
);
expect(parseAgentPrompt('---\nmodel: x\n---\nbody', 'install').seed).toBe(
false,
);
});

it('defaults missing array fields to empty and model to undefined', () => {
const p = parseAgentPrompt('no frontmatter at all', 'stub');
expect(p.model).toBeUndefined();
expect(p.skills).toEqual([]);
expect(p.dependsOn).toEqual([]);
expect(p.body).toBe('no frontmatter at all');
});
});

describe('agentRunTools', () => {
it('MCP-qualifies orchestrator tools and passes native tools through', () => {
const p = parseAgentPrompt(
'---\nallowedTools: [Read, read_handoffs]\ndisallowedTools: [enqueue_task, complete_task, Bash]\n---\nx',
't',
);
const { allowedTools, disallowedTools } = agentRunTools(p);
expect(allowedTools).toEqual([
'Read',
'mcp__posthog-wizard__read_handoffs',
]);
expect(disallowedTools).toEqual([
'mcp__posthog-wizard__enqueue_task',
'mcp__posthog-wizard__complete_task',
'Bash',
]);
});
});

describe('buildRegistry', () => {
const prompt = (over: Partial<AgentPrompt>): AgentPrompt => ({
type: 'x',
seed: false,
skills: [],
allowedTools: [],
disallowedTools: [],
dependsOn: [],
body: 'b',
...over,
});

it('scopes to one flow and keeps the seed out of the task types', () => {
const registry = buildRegistry(
[
prompt({ type: 'plan-audit', flow: 'audit', seed: true }),
prompt({ type: 'fix-events', flow: 'audit' }),
prompt({ type: 'install', flow: 'posthog-integration' }),
prompt({ type: 'example' }),
],
'audit',
);
expect(registry.types).toEqual(['fix-events']);
expect(registry.seed?.type).toBe('plan-audit');
expect(registry.get('install')).toBeUndefined();
// A flowless prompt (e.g. the documentation example) joins no registry.
expect(registry.get('example')).toBeUndefined();
});

it('drops harness-excluded types; unrestricted runs keep them', () => {
const prompts = [
prompt({ type: 'plan', flow: 'f', seed: true }),
prompt({ type: 'build', flow: 'f' }),
prompt({ type: 'dashboard', flow: 'f' }),
];
expect(
buildRegistry(prompts, 'f', { exclude: ['dashboard'] }).types,
).toEqual(['build']);
expect(buildRegistry(prompts, 'f').types).toEqual(['build', 'dashboard']);
});
});

describe('resolveTask', () => {
let dir: string;
let store: QueueStore;

beforeEach(() => {
dir = tmpDir();
store = new QueueStore(dir, 'run-1');
});

afterEach(() => {
fs.rmSync(dir, { recursive: true, force: true });
});

const prompt: AgentPrompt = {
type: 'capture',
seed: false,
model: 'claude-haiku-4-5-20251001',
skills: ['instrument-events'],
allowedTools: ['Read', 'Edit'],
disallowedTools: ['enqueue_task'],
dependsOn: ['plan-capture'],
body: '## Goal\nInstrument the planned events.',
};

it('throws when no prompt is registered for the type', () => {
const registry = registryOf([]);
const task = { type: 'capture', dependsOn: [] } as never;
expect(() => resolveTask(registry, task, store)).toThrow(/capture/);
});

it('resolves model, tools, and skills from the prompt', () => {
const registry = registryOf([prompt]);
const task = store.enqueue({ type: 'capture' });
const resolved = resolveTask(registry, task, store);
expect(resolved.model).toBe('claude-haiku-4-5-20251001');
expect(resolved.skills).toEqual(['instrument-events']);
expect(resolved.disallowedTools).toEqual([
'mcp__posthog-wizard__enqueue_task',
]);
});

it('prefers the enqueue model override over the prompt model', () => {
const registry = registryOf([prompt]);
const task = store.enqueue({ type: 'capture', model: 'override-x' });
expect(resolveTask(registry, task, store).model).toBe('override-x');
});

it("appends upstream dependencies' handoffs as context", () => {
const registry = registryOf([prompt]);
const dep = store.enqueue({ type: 'plan-capture' });
store.complete(dep.id, {
goals: 'decide events',
did: 'picked signup and purchase',
forNextAgent: 'instrument those two',
});
const task = store.enqueue({
type: 'capture',
dependsOn: [dep.id],
});
const resolved = resolveTask(registry, task, store);
expect(resolved.prompt).toContain('Context from previous steps');
expect(resolved.prompt).toContain('picked signup and purchase');
expect(resolved.prompt).toContain('instrument those two');
});

it('omits the context section when there are no handoffs', () => {
const registry = registryOf([prompt]);
const task = store.enqueue({ type: 'capture' });
expect(resolveTask(registry, task, store).prompt).not.toContain(
'Context from previous steps',
);
});

it('includes transitive ancestors, not just direct dependencies', () => {
const registry = registryOf([prompt]);
// install -> capture -> (this task). The task depends only on capture, but
// install's context must still reach it so nothing is silently lost.
const install = store.enqueue({ type: 'install' });
store.complete(install.id, {
goals: 'declare the SDK',
did: 'added posthog to the manifest',
forNextAgent: 'SDK is declared, not yet installed',
});
const capture = store.enqueue({ type: 'capture', dependsOn: [install.id] });
store.complete(capture.id, {
goals: 'instrument events',
did: 'added capture calls',
forNextAgent: 'events are in',
});
const task = store.enqueue({ type: 'capture', dependsOn: [capture.id] });
const { prompt: out } = resolveTask(registry, task, store);
expect(out).toContain('added posthog to the manifest'); // transitive
expect(out).toContain('added capture calls'); // direct
});

it('lists each ancestor once for diamond dependencies', () => {
const registry = registryOf([prompt]);
const install = store.enqueue({ type: 'install' });
store.complete(install.id, {
goals: 'g',
did: 'manifest entry added',
forNextAgent: 'n',
});
const a = store.enqueue({ type: 'identify', dependsOn: [install.id] });
store.complete(a.id, { goals: 'g', did: 'a-did', forNextAgent: 'n' });
const b = store.enqueue({ type: 'identify', dependsOn: [install.id] });
store.complete(b.id, { goals: 'g', did: 'b-did', forNextAgent: 'n' });
// Resolved task must be a registered type (capture); its ancestors need not be.
const task = store.enqueue({ type: 'capture', dependsOn: [a.id, b.id] });
const { prompt: out } = resolveTask(registry, task, store);
expect(out.match(/manifest entry added/g)).toHaveLength(1);
});
});

describe('taskModel', () => {
const prompt = parseAgentPrompt(
'---\nmodel: prompt-model\n---\nx',
'capture',
);

it('prefers the enqueue override, then the prompt, then the default', () => {
const registry = registryOf([prompt]);
const task = { type: 'capture' };
expect(taskModel(registry, { ...task, model: 'override' } as never)).toBe(
'override',
);
expect(taskModel(registry, task as never)).toBe('prompt-model');
expect(taskModel(registryOf([]), task as never)).toBe('claude-sonnet-4-6');
});
});

describe('assembleTaskPrompt', () => {
const ctx: OrchestratorPromptContext = {
projectId: 1,
projectApiKey: 'phc_x',
host: 'https://us.posthog.com',
};

it('points the agent at its installed task instructions', () => {
const assembled = assembleTaskPrompt(ctx, 'do the task', [
'.posthog-wizard/skills/capture/SKILL.md',
]);
expect(assembled).toContain('.posthog-wizard/skills/capture/SKILL.md');
expect(assembled).toContain('do the task');
});

it('omits the instructions section when no skills are installed', () => {
expect(assembleTaskPrompt(ctx, 'do the task')).not.toContain(
'task instructions',
);
});
});
36 changes: 36 additions & 0 deletions src/lib/agent/__tests__/variant-gating.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import {
buildWizardMetadata,
isOrchestratorEnabled,
} from '@lib/agent/agent-interface';

describe('isOrchestratorEnabled', () => {
it('is true only when the wizard-orchestrator flag is true', () => {
expect(isOrchestratorEnabled({ 'wizard-orchestrator': 'true' })).toBe(true);
});

it('is false when the flag is false, another flag, or absent', () => {
expect(isOrchestratorEnabled({ 'wizard-orchestrator': 'false' })).toBe(
false,
);
expect(isOrchestratorEnabled({ 'wizard-variant': 'orchestrator' })).toBe(
false,
);
expect(isOrchestratorEnabled({})).toBe(false);
expect(isOrchestratorEnabled()).toBe(false);
});
});

describe('buildWizardMetadata', () => {
it('selects a known variant header from the flag', () => {
expect(buildWizardMetadata({ 'wizard-variant': 'subagents' })).toEqual({
VARIANT: 'subagents',
});
});

it('falls back to the base variant for unknown or missing flags', () => {
expect(buildWizardMetadata({ 'wizard-variant': 'nope' })).toEqual({
VARIANT: 'base',
});
expect(buildWizardMetadata({})).toEqual({ VARIANT: 'base' });
});
});
Loading
Loading