PostHog · gewenyu99 · Jun 18, 2026 · Jun 10, 2026 · Jun 16, 2026 · Jun 16, 2026
diff --git a/scripts/smoke-test.sh b/scripts/smoke-test.sh
@@ -24,14 +24,16 @@ node --input-type=module -e "import '$DIST_BIN'" 2>&1 | head -5 | grep -q 'PostH
 # builds and tsdown strips it; its env var name appearing in dist/*.js means
 # dead-code elimination regressed and a prod surface leaked. Sourcemaps keep
 # the original source, so only .js output counts.
-OVERRIDE_MARKER='WIZARD_CI_FLAG_OVERRIDES'
+OVERRIDE_MARKERS='WIZARD_CI_FLAG_OVERRIDES WIZARD_CI_EXCLUDE_TASKS'
 if [ "${WIZARD_BUILD_NODE_ENV:-production}" = "ci" ]; then
-  # CI builds must keep the path — its absence means the override silently
-  # stopped working and CI is back to testing live flags.
-  if ! grep -q "$OVERRIDE_MARKER" ./dist/*.js; then
-    echo 'Smoke test failed: CI build is missing the CI flag-override path' >&2
-    exit 1
-  fi
+  # CI builds must keep the paths — their absence means the overrides silently
+  # stopped working and CI is back to testing live behavior.
+  for marker in $OVERRIDE_MARKERS; do
+    if ! grep -q "$marker" ./dist/*.js; then
+      echo "Smoke test failed: CI build is missing the $marker path" >&2
+      exit 1
+    fi
+  done
   # And a real invocation must accept the env var. yargs claims every
   # POSTHOG_WIZARD_-prefixed env var as a CLI option and strict-rejects
   # unknown ones during command parse (--version/--help short-circuit and
@@ -44,10 +46,12 @@ if [ "${WIZARD_BUILD_NODE_ENV:-production}" = "ci" ]; then
     exit 1
   fi
 else
-  if grep -q "$OVERRIDE_MARKER" ./dist/*.js; then
-    echo 'Smoke test failed: CI flag-override code leaked into a production build' >&2
-    exit 1
-  fi
+  for marker in $OVERRIDE_MARKERS; do
+    if grep -q "$marker" ./dist/*.js; then
+      echo "Smoke test failed: $marker code leaked into a production build" >&2
+      exit 1
+    fi
+  done
 fi
 
 # ── 3. --ci rejected in production builds ────────────────────────────────────

diff --git a/src/env.ts b/src/env.ts
@@ -43,6 +43,7 @@ type RuntimeEnvKey =
   // Deliberately NOT POSTHOG_WIZARD_-prefixed: yargs .env('POSTHOG_WIZARD')
   // would claim it as an unknown CLI option and strict-reject the run.
   | 'WIZARD_CI_FLAG_OVERRIDES'
+  | 'WIZARD_CI_EXCLUDE_TASKS'
   // Wizard CLI configuration (yargs POSTHOG_WIZARD_ prefix)
   | 'POSTHOG_WIZARD_BENCHMARK_CONFIG'
   | 'POSTHOG_WIZARD_BENCHMARK_FILE'

diff --git a/...tor/__tests__/agent-prompt-loader.test.ts → ...ent/__tests__/agent-prompt-loader.test.ts b/...tor/__tests__/agent-prompt-loader.test.ts → ...ent/__tests__/agent-prompt-loader.test.ts
@@ -12,7 +12,7 @@ import {
   type AgentRegistry,
   type OrchestratorPromptContext,
 } from '../agent-prompt-loader';
-import { QueueStore } from '../queue';
+import { QueueStore } from '../../programs/orchestrator/queue';
 
 function tmpDir(): string {
   return fs.mkdtempSync(path.join(os.tmpdir(), 'agent-loader-test-'));
@@ -131,6 +131,18 @@ describe('buildRegistry', () => {
     // A flowless prompt (e.g. the documentation example) joins no registry.
     expect(registry.get('example')).toBeUndefined();
   });
+
+  it('drops harness-excluded types; unrestricted runs keep them', () => {
+    const prompts = [
+      prompt({ type: 'plan', flow: 'f', seed: true }),
+      prompt({ type: 'build', flow: 'f' }),
+      prompt({ type: 'dashboard', flow: 'f' }),
+    ];
+    expect(
+      buildRegistry(prompts, 'f', { exclude: ['dashboard'] }).types,
+    ).toEqual(['build']);
+    expect(buildRegistry(prompts, 'f').types).toEqual(['build', 'dashboard']);
+  });
 });
 
 describe('resolveTask', () => {

diff --git a/...grams/orchestrator/agent-prompt-loader.ts → src/lib/agent/agent-prompt-loader.ts b/...grams/orchestrator/agent-prompt-loader.ts → src/lib/agent/agent-prompt-loader.ts
@@ -15,8 +15,8 @@
  * network latency. The registry's type list also drives `enqueue_task`
  * validation.
  */
-import type { QueueStore, QueuedTask } from './queue';
-import type { ResolvedTask } from './executor';
+import type { QueueStore, QueuedTask } from '../programs/orchestrator/queue';
+import type { ResolvedTask } from '../programs/orchestrator/executor';
 
 /**
  * The basics the client injects around every agent-prompt body. The `/agents/`
@@ -137,8 +137,15 @@ export interface AgentRegistry {
 export function buildRegistry(
   prompts: readonly AgentPrompt[],
   flow: string,
+  opts?: { exclude?: readonly string[] },
 ): AgentRegistry {
-  const inFlow = prompts.filter((p) => p.flow === flow);
+  // The harness can exclude task types (CI excludes dashboards). An excluded
+  // type does not exist for the run: the seed cannot enqueue it and no agent
+  // is ever spun up for it.
+  const excluded = new Set(opts?.exclude ?? []);
+  const inFlow = prompts.filter(
+    (p) => p.flow === flow && !excluded.has(p.type),
+  );
   const byType = new Map(inFlow.map((p) => [p.type, p]));
   return {
     types: inFlow.filter((p) => !p.seed).map((p) => p.type),
@@ -238,6 +245,7 @@ async function fetchText(url: string): Promise<string> {
 export async function loadAgentRegistry(
   skillsBaseUrl: string,
   flow: string,
+  opts?: { exclude?: readonly string[] },
 ): Promise<AgentRegistry> {
   const menuRaw = await fetchText(`${skillsBaseUrl}/agent-menu.json`);
   const menu = JSON.parse(menuRaw) as AgentMenu;
@@ -249,7 +257,7 @@ export async function loadAgentRegistry(
     }),
   );
 
-  return buildRegistry(prompts, flow);
+  return buildRegistry(prompts, flow, opts);
 }
 
 /**

diff --git a/src/lib/agent/agent-runner.ts b/src/lib/agent/agent-runner.ts
@@ -372,12 +372,12 @@ async function bootstrapProgram(
   // orchestrator arm overwrites this with its own variant when it forks.
   analytics.setTag('variant', wizardMetadata.VARIANT);
 
+  // One MCP url for every region: the server resolves the user's region from
+  // the bearer token, so the EU subdomain (a Claude Code OAuth workaround) is
+  // not needed here.
   const mcpUrl = session.localMcp
     ? 'http://localhost:8787/mcp'
-    : runtimeEnv('MCP_URL') ||
-      (cloudRegion === 'eu'
-        ? 'https://mcp-eu.posthog.com/mcp'
-        : 'https://mcp.posthog.com/mcp');
+    : runtimeEnv('MCP_URL') || 'https://mcp.posthog.com/mcp';
 
   return {
     skillsBaseUrl,

diff --git a/src/lib/agent/mcp-prompt-streaming.ts b/src/lib/agent/mcp-prompt-streaming.ts
@@ -42,38 +42,11 @@ const MODEL = 'claude-sonnet-4-6';
 // telemetry on average turn counts per prompt.
 const MAX_TURNS = 30;
 
-function resolveMcpUrl(host: string): string {
-  const override = runtimeEnv('MCP_URL');
-  if (override) return override;
-  // Parse the actual hostname rather than substring-matching the raw
-  // input. `host.includes('eu.posthog.com')` would let arbitrary URLs
-  // like `https://evil.eu.posthog.com.attacker.com` or
-  // `https://useu.posthog.commerce` route to the EU MCP endpoint
-  // (CodeQL: incomplete-url-substring-sanitization). Parsing into a
-  // hostname and checking exact match / trusted subdomain blocks both.
-  const hostname = parseHostname(host);
-  const isEu =
-    hostname === 'eu.posthog.com' || hostname.endsWith('.eu.posthog.com');
-  return isEu
-    ? 'https://mcp-eu.posthog.com/mcp'
-    : 'https://mcp.posthog.com/mcp';
-}
-
-/**
- * Normalize a host string into a hostname suitable for trust checks.
- * Accepts either a full URL (`https://us.posthog.com`) or a bare host
- * (`us.posthog.com`). Returns the hostname lowercased, or the trimmed
- * input lowercased if parsing fails (defensive fallback so a malformed
- * value still resolves to the safer-default US endpoint).
- */
-function parseHostname(raw: string): string {
-  const trimmed = raw.trim().toLowerCase();
-  try {
-    const withScheme = trimmed.includes('://') ? trimmed : `https://${trimmed}`;
-    return new URL(withScheme).hostname.toLowerCase();
-  } catch {
-    return trimmed;
-  }
+// One MCP url for every region: the server resolves the user's region from
+// the bearer token, so the EU subdomain (a Claude Code OAuth workaround) is
+// not needed here.
+function resolveMcpUrl(): string {
+  return runtimeEnv('MCP_URL') || 'https://mcp.posthog.com/mcp';
 }
 
 /**
@@ -245,7 +218,7 @@ export async function* runMcpPromptViaSdk(args: {
       once: true,
     });
 
-  const mcpUrl = resolveMcpUrl(credentials.host);
+  const mcpUrl = resolveMcpUrl();
   logToFile(
     `[runMcpPromptViaSdk] mcpUrl=${mcpUrl} model=${MODEL} resume=${
       resumeSessionId ?? '(none)'

diff --git a/src/lib/programs/orchestrator/__tests__/queue-tools.test.ts b/src/lib/programs/orchestrator/__tests__/queue-tools.test.ts
@@ -57,6 +57,18 @@ describe('checkEnqueueGuards', () => {
     const r = checkEnqueueGuards(ctx, { type: 'init', reason: 'x' });
     expect(r).toEqual({ ok: true });
   });
+
+  it('refuses to grow the queue past the runaway cap', () => {
+    for (let i = 0; i < 30; i++) {
+      store.enqueue({ type: 'capture', inputs: { i } });
+    }
+    const r = checkEnqueueGuards(ctx, {
+      type: 'init',
+      inputs: { i: 30 },
+      reason: 'x',
+    });
+    expect(r).toMatchObject({ ok: false, guard: 'queue-full' });
+  });
 });
 
 describe('apply functions', () => {

diff --git a/src/lib/programs/orchestrator/executor.ts b/src/lib/programs/orchestrator/executor.ts
@@ -101,7 +101,7 @@ export async function drainQueue(
   for (;;) {
     for (const task of store.nextRunnable()) {
       if (++starts > opts.maxStarts) break;
-      // runOne marks the task in_progress synchronously, so the next
+      // runOne marks the task running synchronously, so the next
       // nextRunnable() call no longer offers it.
       const p = runOne(store, runTask, task).finally(() =>
         running.delete(task.id),

diff --git a/src/lib/programs/orchestrator/orchestrator-runner.ts b/src/lib/programs/orchestrator/orchestrator-runner.ts
@@ -23,6 +23,7 @@ import { detectNodePackageManagers } from '../../detection/package-manager';
 import { installSkillById } from '../../wizard-tools';
 import { getUI } from '../../../ui';
 import { analytics } from '../../../utils/analytics';
+import { ciExcludedTaskTypes } from '../../../utils/ci-flag-overrides';
 import { logToFile } from '../../../utils/debug';
 import type { ProgramConfig } from '../program-step';
 import type { BootstrapResult } from '../../agent/agent-runner';
@@ -42,7 +43,7 @@ import {
   resolveTask,
   taskModel,
   type OrchestratorPromptContext,
-} from './agent-prompt-loader';
+} from '../../agent/agent-prompt-loader';
 
 function toTodoStatus(status: TaskStatus): string {
   switch (status) {
@@ -88,6 +89,7 @@ export async function runOrchestrator(
   const registry = await loadAgentRegistry(
     boot.skillsBaseUrl,
     programConfig.id,
+    { exclude: ciExcludedTaskTypes() },
   );
   const seedPrompt = registry.seed;
   if (!seedPrompt) {

diff --git a/src/lib/programs/orchestrator/queue-tools.ts b/src/lib/programs/orchestrator/queue-tools.ts
@@ -55,17 +55,33 @@ function dedupKey(type: string, inputs: Record<string, unknown>): string {
   return `${type}::${stableStringify(inputs)}`;
 }
 
+/**
+ * A backstop on total queue size. Tasks can enqueue tasks, so a misbehaving
+ * type could grow the queue without bound. Keeping the graph small is the job
+ * of good agent and skill design, not this number — it only stops a runaway.
+ * The real flow is ~9 tasks, so this sits well clear of it.
+ */
+const MAX_QUEUE_TASKS = 30;
+
 /**
  * Validate an enqueue. Structural checks only — a real type, real dependencies,
- * and not a literal duplicate. How much runs, and in what shape, is the task
- * graph's business, not a knob's.
+ * not a literal duplicate, and not past the runaway backstop. How much runs,
+ * and in what shape, is the task graph's business, not a knob's.
  */
 export function checkEnqueueGuards(
   ctx: OrchestratorToolsContext,
   args: EnqueueArgs,
 ): GuardResult {
   const tasks = ctx.store.list();
 
+  if (tasks.length >= MAX_QUEUE_TASKS) {
+    return {
+      ok: false,
+      guard: 'queue-full',
+      message: `The queue already holds ${tasks.length} tasks (cap ${MAX_QUEUE_TASKS}). Refine the existing tasks rather than adding more.`,
+    };
+  }
+
   if (!ctx.validTypes.includes(args.type)) {
     return {
       ok: false,

diff --git a/src/lib/programs/orchestrator/queue.ts b/src/lib/programs/orchestrator/queue.ts
@@ -32,6 +32,12 @@ export interface QueuedTask {
   /** Human-readable label for the TUI, set by the enqueuing agent. */
   label?: string;
   status: TaskStatus;
+  /**
+   * Ids of tasks that must finish before this one runs. Ids are generated at
+   * enqueue and dependsOn is never mutated, so a task can only depend on tasks
+   * created before it — the graph is a DAG by construction, cycles cannot
+   * form. Unknown ids are rejected by the enqueue_task guard.
+   */
   dependsOn: string[];
   inputs: Record<string, unknown>;
   model?: string;
@@ -76,6 +82,10 @@ export interface EnqueueInput {
 export const QUEUE_DIR_NAME = '.posthog-wizard';
 const DEFAULT_MAX_ATTEMPTS = 2;
 
+function nowIso(): string {
+  return new Date().toISOString();
+}
+
 /** Every queue transition, in the order it is reflected. */
 export type TransitionEvent =
   | 'enqueue'
@@ -94,10 +104,6 @@ export interface QueueStoreOptions {
   onTransition?: (event: TransitionEvent, task: QueuedTask) => void;
 }
 
-function nowIso(): string {
-  return new Date().toISOString();
-}
-
 export class QueueStore {
   private tasks: QueuedTask[] = [];
   private readonly onTransition?: (
@@ -147,7 +153,7 @@ export class QueueStore {
   }
 
   /**
-   * True when no task is in progress and none can be started. Either everything
+   * True when no task is running and none can be started. Either everything
    * is terminal, or the only pending tasks are blocked by a failed dependency.
    */
   isDrained(): boolean {
@@ -229,7 +235,7 @@ export class QueueStore {
     return this.finish(id, TaskStatus.Failed, handoff);
   }
 
-  /** Put a failed/in-progress task back to pending for a retry within the run. */
+  /** Put a failed/running task back to pending for a retry within the run. */
   requeue(id: string): QueuedTask {
     const t = this.require(id);
     t.status = TaskStatus.Pending;

diff --git a/src/utils/__tests__/ci-flag-overrides.test.ts b/src/utils/__tests__/ci-flag-overrides.test.ts
@@ -1,4 +1,7 @@
-import { applyCiFlagOverrides } from '@utils/ci-flag-overrides';
+import {
+  applyCiFlagOverrides,
+  ciExcludedTaskTypes,
+} from '@utils/ci-flag-overrides';
 
 jest.mock('@utils/debug', () => ({
   logToFile: jest.fn(),
@@ -61,3 +64,34 @@ describe('applyCiFlagOverrides', () => {
     });
   });
 });
+
+describe('ciExcludedTaskTypes', () => {
+  afterEach(() => {
+    delete process.env.WIZARD_CI_EXCLUDE_TASKS;
+  });
+
+  it('is empty when nothing is excluded', () => {
+    expect(ciExcludedTaskTypes()).toEqual([]);
+  });
+
+  it('parses the comma-separated list, ignoring stray whitespace', () => {
+    process.env.WIZARD_CI_EXCLUDE_TASKS = 'dashboard, report ,';
+    expect(ciExcludedTaskTypes()).toEqual(['dashboard', 'report']);
+  });
+
+  it('is inert in production builds', () => {
+    const prevNodeEnv = process.env.NODE_ENV;
+    process.env.NODE_ENV = 'production';
+    process.env.WIZARD_CI_EXCLUDE_TASKS = 'dashboard';
+    let result: readonly string[] | undefined;
+    jest.isolateModules(() => {
+      // eslint-disable-next-line @typescript-eslint/no-var-requires
+      const prod = require('@utils/ci-flag-overrides') as {
+        ciExcludedTaskTypes: typeof ciExcludedTaskTypes;
+      };
+      result = prod.ciExcludedTaskTypes();
+    });
+    process.env.NODE_ENV = prevNodeEnv;
+    expect(result).toEqual([]);
+  });
+});