From 75e9dcf60a0743d0dc2c6cb6d08969931e971127 Mon Sep 17 00:00:00 2001 From: "Vincent (Wen Yu) Ge" Date: Sun, 21 Jun 2026 11:07:02 -0400 Subject: [PATCH 01/21] feat(wizard-ci): full e2e via control plane (--e2e) + replay (--replay) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds two modes to the existing wizard-ci, as an alternative to classic --ci (LoggingUI: agent-only, stdout-grep). --e2e drives the WHOLE interactive flow headlessly through the wizard-ci-tools control plane and asserts on structured state; --replay plays a recorded run back in the terminal. Core files: - services/wizard-ci/e2e.ts — runE2e(): /tmp app-copy isolation, env hygiene (strips host CLAUDE*/ANTHROPIC* so the spawned agent auths with the phx key instead of deferring to the host), scoped --project-id, the happy-path policy (skip mcp+slack, delete skills, continue past health issues), spawns the wizard repo's headless harness, then asserts the structured result (runPhase=completed, posthog dep/.env, reached keep-skills, skillsComplete). replayRecording(): shells to the wizard repo's terminal replayer. - services/wizard-ci/index.ts — wires --e2e (positional app, --project-id, --keep-skills) and --replay (--step/--delay) into the CLI + --help. Engine lives in the wizard repo (store + driver must run in-process); point WIZARD_PATH at it. See PostHog/wizard PR for src/lib/ci-driver + harness. Co-Authored-By: Claude Opus 4.8 --- services/wizard-ci/e2e.ts | 169 ++++++++++++++++++++++++++++++++++++ services/wizard-ci/index.ts | 53 ++++++++++- 2 files changed, 221 insertions(+), 1 deletion(-) create mode 100644 services/wizard-ci/e2e.ts diff --git a/services/wizard-ci/e2e.ts b/services/wizard-ci/e2e.ts new file mode 100644 index 000000000..a9e5f2da1 --- /dev/null +++ b/services/wizard-ci/e2e.ts @@ -0,0 +1,169 @@ +/** + * `wizard-ci --e2e` — full end-to-end run driven through the wizard-ci-tools + * control plane, against prod cloud. + * + * Classic `wizard-ci` runs the agent under LoggingUI and exits — it skips the + * interactive screens and gives you only ANSI stdout to grep. `--e2e` runs the + * WHOLE flow: a `WizardCiDriver` makes each human-side decision through the same + * store setters the Ink UI uses, taking the happy path everywhere — skip MCP, + * skip Slack, DELETE skills, and continue past any health-check issue — then + * asserts on STRUCTURED state plus the real file changes the agent made. + * + * The store + driver must run in-process with the wizard, so the engine is the + * wizard repo's headless harness (`scripts/e2e-full-run.no-jest.ts`); this is + * the orchestration + assertion layer. + * + * pnpm wizard-ci basic-integration/javascript-node/express-todo --e2e + * pnpm wizard-ci basic-integration/next-js/15-app-router-todo --e2e --project-id 228144 + */ +import { join, basename } from "path"; +import { existsSync, mkdirSync, rmSync, readFileSync, writeFileSync } from "fs"; +import { spawnSync } from "child_process"; + +const WORKBENCH = join(import.meta.dirname, "..", ".."); +const APPS_DIR = join(WORKBENCH, "apps"); + +// Host Claude-Code / Anthropic auth vars: when the wizard's agent subprocess is +// spawned from inside a Claude Code session it defers auth to the host +// (apiKeySource=none → 401). Strip them so it auths with the phx key, exactly +// like a plain CI shell (where these are simply unset, so the strip is a no-op). +const STRIP_ENV = [ + "ANTHROPIC_API_KEY", "ANTHROPIC_BASE_URL", "ANTHROPIC_AUTH_TOKEN", + "CLAUDECODE", "CLAUDE_CODE_ENTRYPOINT", "CLAUDE_CODE_SESSION_ID", + "CLAUDE_CODE_CHILD_SESSION", "CLAUDE_CODE_OAUTH_SCOPES", "CLAUDE_CODE_OAUTH_TOKEN", + "CLAUDE_CODE_SDK_HAS_OAUTH_REFRESH", "CLAUDE_CODE_SDK_HAS_HOST_AUTH_REFRESH", + "CLAUDE_CODE_EXECPATH", "CLAUDE_CODE_EMIT_TOOL_USE_SUMMARIES", + "CLAUDE_AGENT_SDK_VERSION", "CLAUDE_CODE_ENABLE_ASK_USER_QUESTION_TOOL", "AI_AGENT", +]; + +export interface E2eOptions { + app?: string; + region?: string; + projectId?: string; + /** true → keep installed skills; default deletes them. */ + keepSkills?: boolean; +} + +function wizardRepo(): string { + const p = process.env.WIZARD_PATH?.replace(/^~/, process.env.HOME || ""); + return p || `${process.env.HOME}/development/wizard`; +} + +/** Run a single app through the control-plane e2e and assert. Returns exit code. */ +export function runE2e(opts: E2eOptions): number { + const app = opts.app; + const region = opts.region || process.env.POSTHOG_REGION || "us"; + const projectId = opts.projectId || process.env.POSTHOG_WIZARD_PROJECT_ID || ""; + const apiKey = process.env.POSTHOG_PERSONAL_API_KEY; + + if (!app) { + console.error("✖ --e2e requires an app: pnpm wizard-ci --e2e"); + return 2; + } + if (!apiKey) { + console.error("✖ POSTHOG_PERSONAL_API_KEY is required (the phx key)."); + return 2; + } + if (!projectId) { + console.error("✖ project id required: --project-id or POSTHOG_WIZARD_PROJECT_ID."); + return 2; + } + + const appSrc = join(APPS_DIR, app); + if (!existsSync(appSrc)) { + console.error(`✖ app not found: apps/${app}`); + return 2; + } + + const name = basename(app); + const appDir = `/tmp/wizard-e2e-${name}`; + const resultJson = `/tmp/wizard-e2e-${name}.json`; + const recordingJson = `/tmp/wizard-e2e-${name}.recording.json`; + + // Always a /tmp copy — never the real fixture. + rmSync(appDir, { recursive: true, force: true }); + mkdirSync(appDir, { recursive: true }); + spawnSync("rsync", ["-a", "--exclude", "node_modules", "--exclude", ".git", `${appSrc}/`, `${appDir}/`], { + stdio: "inherit", + }); + + const harness = join(wizardRepo(), "scripts", "e2e-full-run.no-jest.ts"); + if (!existsSync(harness)) { + console.error(`✖ wizard e2e harness not found: ${harness}\n Set WIZARD_PATH to the wizard repo.`); + return 2; + } + + console.log(`\n=== wizard-ci --e2e: ${app} (project ${projectId}, ${region}) ===`); + console.log(` policy: skip mcp · skip slack · ${opts.keepSkills ? "keep" : "delete"} skills · continue past health issues\n`); + + const childEnv: NodeJS.ProcessEnv = { ...process.env }; + for (const k of STRIP_ENV) delete childEnv[k]; + childEnv.POSTHOG_PERSONAL_API_KEY = apiKey; + childEnv.APP_DIR = appDir; + childEnv.PROJECT_ID = projectId; + childEnv.E2E_RESULT_JSON = resultJson; + childEnv.E2E_RECORDING_JSON = recordingJson; + childEnv.E2E_KEEP_SKILLS = opts.keepSkills ? "true" : "false"; + + const run = spawnSync("npx", ["tsx", harness], { + cwd: wizardRepo(), + stdio: "inherit", + env: childEnv, + }); + + // Structured assertions — the control plane's payoff over stdout-grepping. + let result: { runPhase?: string; hasPosthogDep?: boolean; envFile?: string | null; + screenPath?: string[]; skillsComplete?: boolean; newDeps?: string[] } | null = null; + try { + result = JSON.parse(readFileSync(resultJson, "utf8")); + } catch { + /* harness crashed before writing */ + } + + const checks: Array<[string, boolean]> = result + ? [ + ["agent run completed", result.runPhase === "completed"], + ["posthog dependency added or .env written", !!result.hasPosthogDep || !!result.envFile], + ["full interactive flow reached keep-skills", !!result.screenPath?.includes("keep-skills")], + ["skillsComplete", result.skillsComplete === true], + ] + : [["harness produced a structured result", false]]; + + console.log("\n--- assertions ---"); + for (const [label, ok] of checks) console.log(` ${ok ? "✔" : "✖"} ${label}`); + const passed = run.status === 0 && checks.every(([, ok]) => ok); + + if (result) { + writeFileSync(resultJson, JSON.stringify({ ...result, app, passed }, null, 2)); + console.log(`\nscreen path: ${result.screenPath?.join(" → ")}`); + console.log(`new deps : ${(result.newDeps || []).join(", ") || "(none)"}`); + console.log(`result json: ${resultJson}`); + } + + if (existsSync(recordingJson)) { + console.log(`recording : ${recordingJson}`); + console.log(`replay it : pnpm wizard-ci --replay ${recordingJson} (Enter ▸ step)`); + console.log(` pnpm wizard-ci --replay ${recordingJson} --delay 1200 (auto)`); + } + + console.log(`\n${passed ? "✓ E2E PASS" : "✗ E2E FAIL"} — ${app}\n`); + return passed ? 0 : 1; +} + +/** Replay a recorded run in the terminal via the wizard repo's replayer. */ +export function replayRecording(file: string, passthrough: string[]): number { + if (!existsSync(file)) { + console.error(`✖ recording not found: ${file}`); + return 2; + } + const script = join(wizardRepo(), "scripts", "replay-e2e.no-jest.ts"); + if (!existsSync(script)) { + console.error(`✖ replayer not found: ${script}\n Set WIZARD_PATH to the wizard repo.`); + return 2; + } + const run = spawnSync("npx", ["tsx", script, file, ...passthrough], { + cwd: wizardRepo(), + stdio: "inherit", + }); + return run.status ?? 1; +} diff --git a/services/wizard-ci/index.ts b/services/wizard-ci/index.ts index cefc8e4a6..d62e1c6af 100644 --- a/services/wizard-ci/index.ts +++ b/services/wizard-ci/index.ts @@ -54,6 +54,7 @@ import { selectCommand, selectApp, } from "../wizard-run/picker.js"; +import { runE2e, replayRecording } from "./e2e.js"; // ============================================================================ // Config @@ -76,6 +77,16 @@ interface Options { pushOnly: boolean; branch?: string; evaluate: boolean; + /** Run via the wizard-ci-tools control plane (full flow + structured asserts). */ + e2e: boolean; + /** Scoped project id for the personal API key (e2e mode). */ + projectId?: string; + /** e2e: keep installed skills instead of deleting them. */ + keepSkills: boolean; + /** Replay a recorded run (path to recording.json). */ + replay?: string; + /** Flags forwarded to the replayer (--step / --delay ). */ + replayPassthrough: string[]; } // ============================================================================ @@ -251,11 +262,21 @@ function parseArgs(): Options { clean: false, pushOnly: false, evaluate: false, + e2e: false, + keepSkills: false, + replayPassthrough: [], }; for (let i = 0; i < args.length; i++) { const arg = args[i]; - if (arg === "--app" || arg === "-a") opts.app = args[++i]; + if (arg === "--e2e") opts.e2e = true; + else if (arg === "--replay") opts.replay = args[++i]; + else if (arg === "--delay") { + opts.replayPassthrough.push("--delay", args[++i]); + } else if (arg === "--step") opts.replayPassthrough.push("--step"); + else if (arg === "--project-id") opts.projectId = args[++i]; + else if (arg === "--keep-skills") opts.keepSkills = true; + else if (arg === "--app" || arg === "-a") opts.app = args[++i]; else if (arg === "--command" || arg === "-c") opts.command = args[++i]; else if (arg === "--product") opts.product = args[++i]; else if (arg === "--trigger-id" || arg === "-t") opts.triggerId = args[++i]; @@ -305,8 +326,20 @@ Evaluation: pnpm wizard-ci --evaluate, -e Run pr-evaluator after PR creation With --local: runs evaluation on local branch (creates branch, commits, runs test-run mode) + +Control-plane e2e (full interactive flow, structured assertions): + pnpm wizard-ci --e2e Run via wizard-ci-tools instead of LoggingUI + happy path · skip mcp+slack · delete skills + · continue past health issues + pnpm wizard-ci ... --e2e --project-id Scoped-key project (or env POSTHOG_WIZARD_PROJECT_ID) + pnpm wizard-ci ... --e2e --keep-skills Keep installed skills instead of deleting + pnpm wizard-ci --replay Replay a recorded run in the terminal + --step (Enter to advance, default) | --delay (auto) `); process.exit(0); + } else if (!arg.startsWith("-") && !opts.app) { + // Positional app path, e.g. `wizard-ci basic-integration/ --e2e`. + opts.app = arg; } } return opts; @@ -744,6 +777,24 @@ async function runCI( async function main(): Promise { const opts = parseArgs(); + // Replay a recorded e2e run in the terminal. + if (opts.replay) { + process.exit(replayRecording(opts.replay, opts.replayPassthrough)); + } + + // Control-plane e2e: run the full interactive flow via wizard-ci-tools and + // assert on structured state, instead of the classic LoggingUI spawn + PR. + if (opts.e2e) { + process.exit( + runE2e({ + app: opts.app, + region: process.env.POSTHOG_REGION, + projectId: opts.projectId, + keepSkills: opts.keepSkills, + }), + ); + } + // Handle --clean command if (opts.clean) { await cleanBranches(); From af0a0d200d5ccf414df64c215c73805bb2c19978 Mon Sep 17 00:00:00 2001 From: "Vincent (Wen Yu) Ge" Date: Mon, 22 Jun 2026 15:57:30 -0400 Subject: [PATCH 02/21] feat(wizard-ci): TUI visual-regression snapshots for CI-e2e test definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Run each CI-e2e test definition (for now: integration on express-todo) as a real --e2e agent run, render every key-moment frame of the recording to a real-Ink ANSI snapshot, and diff against a committed baseline. Surfaces run-to-run differences (e.g. the agent enqueuing tasks differently) side-by-side for a human to review — same screens every run, deltas flagged. No mocks: real agent, real recording, real render. - services/wizard-ci/snapshots.ts — the flow (run → render → diff → report) - services/wizard-ci/ansi-html.ts — dependency-free ANSI→HTML for the side-by-side - services/wizard-ci/snapshots/express-todo/ — committed baseline (47 frames) - pnpm wizard-ci-snapshots (+ mprocs entry); --update to accept a new baseline Co-Authored-By: Claude Opus 4.8 --- mprocs.yaml | 8 + package.json | 1 + services/wizard-ci/ansi-html.ts | 109 +++++++++ services/wizard-ci/snapshots.ts | 217 ++++++++++++++++++ .../snapshots/express-todo/00-intro.ans | 8 + .../express-todo/01-health-check.ans | 1 + .../snapshots/express-todo/02-auth.ans | 9 + .../snapshots/express-todo/03-auth.ans | 9 + .../snapshots/express-todo/04-run.ans | 7 + .../snapshots/express-todo/05-run.ans | 7 + .../snapshots/express-todo/06-run.ans | 8 + .../snapshots/express-todo/07-run.ans | 8 + .../snapshots/express-todo/08-run.ans | 8 + .../snapshots/express-todo/09-run.ans | 8 + .../snapshots/express-todo/10-run.ans | 8 + .../snapshots/express-todo/11-run.ans | 10 + .../snapshots/express-todo/12-run.ans | 10 + .../snapshots/express-todo/13-run.ans | 11 + .../snapshots/express-todo/14-run.ans | 11 + .../snapshots/express-todo/15-run.ans | 12 + .../snapshots/express-todo/16-run.ans | 12 + .../snapshots/express-todo/17-run.ans | 13 ++ .../snapshots/express-todo/18-run.ans | 13 ++ .../snapshots/express-todo/19-run.ans | 14 ++ .../snapshots/express-todo/20-run.ans | 14 ++ .../snapshots/express-todo/21-run.ans | 15 ++ .../snapshots/express-todo/22-run.ans | 15 ++ .../snapshots/express-todo/23-run.ans | 15 ++ .../snapshots/express-todo/24-run.ans | 15 ++ .../snapshots/express-todo/25-run.ans | 15 ++ .../snapshots/express-todo/26-run.ans | 15 ++ .../snapshots/express-todo/27-run.ans | 15 ++ .../snapshots/express-todo/28-run.ans | 15 ++ .../snapshots/express-todo/29-run.ans | 15 ++ .../snapshots/express-todo/30-run.ans | 15 ++ .../snapshots/express-todo/31-run.ans | 15 ++ .../snapshots/express-todo/32-run.ans | 17 ++ .../snapshots/express-todo/33-run.ans | 17 ++ .../snapshots/express-todo/34-run.ans | 17 ++ .../snapshots/express-todo/35-run.ans | 17 ++ .../snapshots/express-todo/36-run.ans | 17 ++ .../snapshots/express-todo/37-run.ans | 15 ++ .../snapshots/express-todo/38-run.ans | 15 ++ .../snapshots/express-todo/39-run.ans | 15 ++ .../snapshots/express-todo/40-run.ans | 15 ++ .../snapshots/express-todo/41-run.ans | 15 ++ .../snapshots/express-todo/42-run.ans | 15 ++ .../snapshots/express-todo/43-outro.ans | 23 ++ .../snapshots/express-todo/44-mcp.ans | 3 + .../express-todo/45-slack-connect.ans | 2 + .../snapshots/express-todo/46-keep-skills.ans | 3 + 51 files changed, 907 insertions(+) create mode 100644 services/wizard-ci/ansi-html.ts create mode 100644 services/wizard-ci/snapshots.ts create mode 100644 services/wizard-ci/snapshots/express-todo/00-intro.ans create mode 100644 services/wizard-ci/snapshots/express-todo/01-health-check.ans create mode 100644 services/wizard-ci/snapshots/express-todo/02-auth.ans create mode 100644 services/wizard-ci/snapshots/express-todo/03-auth.ans create mode 100644 services/wizard-ci/snapshots/express-todo/04-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/05-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/06-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/07-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/08-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/09-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/10-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/11-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/12-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/13-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/14-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/15-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/16-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/17-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/18-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/19-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/20-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/21-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/22-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/23-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/24-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/25-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/26-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/27-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/28-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/29-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/30-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/31-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/32-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/33-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/34-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/35-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/36-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/37-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/38-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/39-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/40-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/41-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/42-run.ans create mode 100644 services/wizard-ci/snapshots/express-todo/43-outro.ans create mode 100644 services/wizard-ci/snapshots/express-todo/44-mcp.ans create mode 100644 services/wizard-ci/snapshots/express-todo/45-slack-connect.ans create mode 100644 services/wizard-ci/snapshots/express-todo/46-keep-skills.ans diff --git a/mprocs.yaml b/mprocs.yaml index 791a38f1e..bad0ab389 100644 --- a/mprocs.yaml +++ b/mprocs.yaml @@ -113,6 +113,14 @@ procs: autostart: false env_file: .env + wizard-ci-snapshots: + # Run the CI-e2e test definitions, render TUI snapshots of each real run, + # and diff against the committed baseline. Prints a per-frame summary and + # writes a side-by-side visual report (report.html). --update to accept. + shell: "pnpm wizard-ci-snapshots" + autostart: false + env_file: .env + # ═══════════════════════════════════════════════════════════════════════════ # PR/BRANCH EVALUATION # ═══════════════════════════════════════════════════════════════════════════ diff --git a/package.json b/package.json index 836fe7933..5ba102c60 100644 --- a/package.json +++ b/package.json @@ -5,6 +5,7 @@ "scripts": { "evaluate": "tsx services/pr-evaluator/index.ts", "wizard-ci": "tsx services/wizard-ci/index.ts", + "wizard-ci-snapshots": "tsx services/wizard-ci/snapshots.ts", "benchmark": "tsx services/wizard-benchmark/index.ts", "framework-detect": "tsx services/framework-detect/index.ts", "yara-scan": "tsx services/yara-scan/index.ts", diff --git a/services/wizard-ci/ansi-html.ts b/services/wizard-ci/ansi-html.ts new file mode 100644 index 000000000..4307b1b6a --- /dev/null +++ b/services/wizard-ci/ansi-html.ts @@ -0,0 +1,109 @@ +/** + * Minimal ANSI (SGR) → HTML converter — no dependency, runs offline in CI. + * Handles the subset Ink emits: reset, bold/dim/italic/underline (+ resets), + * 16-color fg/bg, bright fg/bg, 256-color (38;5;n / 48;5;n), and truecolor + * (38;2;r;g;b / 48;2;r;g;b). Unknown codes are ignored. Output is HTML-escaped. + */ + +const BASE16 = [ + "#000000", "#cd3131", "#0dbc79", "#e5e510", "#2472c8", "#bc3fbc", "#11a8cd", "#e5e5e5", + "#666666", "#f14c4c", "#23d18b", "#f5f543", "#3b8eea", "#d670d6", "#29b8db", "#ffffff", +]; + +/** xterm 256-color index → #rrggbb. */ +function xterm256(n: number): string { + if (n < 16) return BASE16[n]; + if (n < 232) { + const i = n - 16; + const r = Math.floor(i / 36); + const g = Math.floor((i % 36) / 6); + const b = i % 6; + const c = (v: number) => (v === 0 ? 0 : 55 + v * 40); + return `#${[c(r), c(g), c(b)].map((v) => v.toString(16).padStart(2, "0")).join("")}`; + } + const v = (n - 232) * 10 + 8; + return `#${[v, v, v].map((x) => x.toString(16).padStart(2, "0")).join("")}`; +} + +interface Style { + fg?: string; + bg?: string; + bold?: boolean; + dim?: boolean; + italic?: boolean; + underline?: boolean; +} + +function styleToCss(s: Style): string { + const parts: string[] = []; + if (s.fg) parts.push(`color:${s.fg}`); + if (s.bg) parts.push(`background:${s.bg}`); + if (s.bold) parts.push("font-weight:bold"); + if (s.dim) parts.push("opacity:.6"); + if (s.italic) parts.push("font-style:italic"); + if (s.underline) parts.push("text-decoration:underline"); + return parts.join(";"); +} + +function applyCodes(style: Style, codes: number[]): Style { + const s = { ...style }; + for (let i = 0; i < codes.length; i++) { + const c = codes[i]; + if (c === 0) { + for (const k of Object.keys(s)) delete (s as Record)[k]; + } else if (c === 1) s.bold = true; + else if (c === 2) s.dim = true; + else if (c === 3) s.italic = true; + else if (c === 4) s.underline = true; + else if (c === 22) (s.bold = false), (s.dim = false); + else if (c === 23) s.italic = false; + else if (c === 24) s.underline = false; + else if (c === 39) delete s.fg; + else if (c === 49) delete s.bg; + else if (c >= 30 && c <= 37) s.fg = BASE16[c - 30]; + else if (c >= 90 && c <= 97) s.fg = BASE16[c - 90 + 8]; + else if (c >= 40 && c <= 47) s.bg = BASE16[c - 40]; + else if (c >= 100 && c <= 107) s.bg = BASE16[c - 100 + 8]; + else if (c === 38 || c === 48) { + const target = c === 38 ? "fg" : "bg"; + if (codes[i + 1] === 5) { + s[target] = xterm256(codes[i + 2]); + i += 2; + } else if (codes[i + 1] === 2) { + const [r, g, b] = [codes[i + 2], codes[i + 3], codes[i + 4]]; + s[target] = `#${[r, g, b].map((v) => (v || 0).toString(16).padStart(2, "0")).join("")}`; + i += 4; + } + } + } + return s; +} + +const escapeHtml = (s: string) => + s.replace(/&/g, "&").replace(//g, ">"); + +export function ansiToHtml(input: string): string { + let style: Style = {}; + let out = ""; + let buf = ""; + const flush = () => { + if (!buf) return; + const css = styleToCss(style); + out += css ? `${escapeHtml(buf)}` : escapeHtml(buf); + buf = ""; + }; + // eslint-disable-next-line no-control-regex + const re = /\x1b\[([0-9;]*)m/g; + let last = 0; + let m: RegExpExecArray | null; + while ((m = re.exec(input))) { + buf += input.slice(last, m.index); + flush(); + const codes = m[1] === "" ? [0] : m[1].split(";").map(Number); + style = applyCodes(style, codes); + last = re.lastIndex; + } + buf += input.slice(last); + flush(); + return out; +} diff --git a/services/wizard-ci/snapshots.ts b/services/wizard-ci/snapshots.ts new file mode 100644 index 000000000..4520f1113 --- /dev/null +++ b/services/wizard-ci/snapshots.ts @@ -0,0 +1,217 @@ +/** + * wizard-ci snapshots: TUI visual-regression for the CI-e2e test definitions. + * + * For each test definition (for now: the integration flow on express-todo) this + * runs a REAL `--e2e` agent run, renders every key-moment frame of the run's + * recording to a real-Ink ANSI snapshot, and diffs it against a committed + * baseline. The point is NOT pixel-determinism — a real agent enqueues tasks + * differently run to run — it's to surface those differences to a human in a + * side-by-side: same screens every time, minor run-to-run changes flagged for + * review. No mocks anywhere: real agent, real recording, real render. + * + * pnpm wizard-ci-snapshots # run + compare, write HTML report + * pnpm wizard-ci-snapshots --update # accept current output as baseline + * pnpm wizard-ci-snapshots --recording # skip the run, render an existing + * # real recording (still no mock) + * + * CI surfaces report.html in visual-comparison mode (and fails on drift until a + * human updates the baseline). Locally it's surfaced through mprocs. + */ +import "dotenv/config"; +import { join, basename } from "path"; +import { + existsSync, + mkdirSync, + rmSync, + readFileSync, + writeFileSync, + readdirSync, + cpSync, +} from "fs"; +import { spawnSync } from "child_process"; +import { runE2e } from "./e2e.js"; +import { ansiToHtml } from "./ansi-html.js"; + +const WORKBENCH = join(import.meta.dirname, "..", ".."); +const BASELINE_ROOT = join(import.meta.dirname, "snapshots"); +const OUT_ROOT = "/tmp/wizard-snapshots"; + +/** A CI-e2e test definition: which flow runs against which app. */ +interface TestDef { + /** Stable key for baseline + report dirs. */ + name: string; + /** apps/ path the e2e harness copies and runs against. */ + app: string; +} + +const TEST_DEFS: TestDef[] = [ + { + name: "express-todo", + app: "basic-integration/javascript-node/express-todo", + }, +]; + +function wizardRepo(): string { + const p = process.env.WIZARD_PATH?.replace(/^~/, process.env.HOME || ""); + if (!p) throw new Error("WIZARD_PATH is not set (path to the wizard repo)."); + return p; +} + +type FrameStatus = "same" | "changed" | "added" | "removed"; +interface FrameDiff { + file: string; + status: FrameStatus; + baseline: string | null; + current: string | null; +} + +/** Render a recording's frames to /-.ans via the wizard. */ +function renderSnapshots(recording: string, outDir: string): void { + const script = join(wizardRepo(), "scripts", "render-snapshots.no-jest.ts"); + if (!existsSync(script)) + throw new Error(`wizard render-snapshots not found: ${script}`); + const r = spawnSync("npx", ["tsx", script, recording, outDir], { + cwd: wizardRepo(), + stdio: "inherit", + // Force truecolor so Ink/chalk emit ANSI even though this isn't a TTY — + // the snapshots capture the real colored TUI, not stripped text. + env: { ...process.env, FORCE_COLOR: "3" }, + }); + if (r.status !== 0) throw new Error("render-snapshots failed"); +} + +/** Union the two dirs by filename and classify each frame. */ +function diffDirs(baselineDir: string, currentDir: string): FrameDiff[] { + const ls = (d: string) => + existsSync(d) ? readdirSync(d).filter((f) => f.endsWith(".ans")) : []; + const files = [...new Set([...ls(baselineDir), ...ls(currentDir)])].sort(); + return files.map((file) => { + const b = existsSync(join(baselineDir, file)) + ? readFileSync(join(baselineDir, file), "utf8") + : null; + const c = existsSync(join(currentDir, file)) + ? readFileSync(join(currentDir, file), "utf8") + : null; + const status: FrameStatus = + b === null ? "added" : c === null ? "removed" : b === c ? "same" : "changed"; + return { file, status, baseline: b, current: c }; + }); +} + +const BADGE: Record = { + same: "#3fb950", + changed: "#d29922", + added: "#58a6ff", + removed: "#f85149", +}; + +function reportHtml(name: string, diffs: FrameDiff[]): string { + const cell = (s: string | null) => + s === null + ? `
— absent —
` + : `
${ansiToHtml(s)}
`; + const rows = diffs + .map( + (d) => ` +
+

${d.status} ${d.file}

+
+
baseline
${cell(d.baseline)}
+
current
${cell(d.current)}
+
+
`, + ) + .join(""); + const changed = diffs.filter((d) => d.status !== "same").length; + return `wizard-ci snapshots — ${name} + +

wizard-ci TUI snapshots — ${name}

+
${diffs.length} key-moment frames · ${changed} changed/added/removed · review the side-by-side below
+${rows} +`; +} + +function stripAnsi(s: string): string { + // eslint-disable-next-line no-control-regex + return s.replace(/\x1b\[[0-9;]*m/g, ""); +} + +function main(): number { + const args = process.argv.slice(2); + const update = args.includes("--update"); + const recordingArg = args[args.indexOf("--recording") + 1]; + const onlyRecording = args.includes("--recording") ? recordingArg : null; + const projectId = + process.env.POSTHOG_WIZARD_PROJECT_ID || + args[args.indexOf("--project-id") + 1] || + ""; + + let drift = 0; + for (const def of TEST_DEFS) { + console.log(`\n=== snapshots: ${def.name} (${def.app}) ===`); + + const recording = onlyRecording || `/tmp/wizard-e2e-${basename(def.app)}.recording.json`; + if (!onlyRecording) { + const code = runE2e({ app: def.app, projectId }); + if (code !== 0) { + console.error(`✖ e2e run failed for ${def.name} (exit ${code})`); + return code; + } + } + if (!existsSync(recording)) { + console.error(`✖ no recording at ${recording}`); + return 1; + } + + const currentDir = join(OUT_ROOT, def.name, "current"); + const baselineDir = join(BASELINE_ROOT, def.name); + renderSnapshots(recording, currentDir); + + if (update) { + rmSync(baselineDir, { recursive: true, force: true }); + mkdirSync(baselineDir, { recursive: true }); + cpSync(currentDir, baselineDir, { recursive: true }); + console.log(`✓ baseline updated → ${baselineDir}`); + continue; + } + + const diffs = diffDirs(baselineDir, currentDir); + const changed = diffs.filter((d) => d.status !== "same"); + const reportDir = join(OUT_ROOT, def.name); + mkdirSync(reportDir, { recursive: true }); + const report = join(reportDir, "report.html"); + writeFileSync(report, reportHtml(def.name, diffs)); + + console.log(`\n--- ${def.name}: ${diffs.length} frames, ${changed.length} differ ---`); + for (const d of diffs) { + const mark = d.status === "same" ? "·" : d.status === "changed" ? "~" : d.status === "added" ? "+" : "-"; + console.log(` ${mark} ${d.file}`); + // mprocs: show the changed frame's current render inline for quick eyeball. + if (d.status === "changed" && d.current) { + console.log(stripAnsi(d.current).split("\n").map((l) => ` ${l}`).join("\n")); + } + } + console.log(`\nvisual report: ${report}`); + if (!existsSync(baselineDir)) + console.log(`(no baseline yet — run with --update to seed it)`); + if (changed.length) drift = 1; + } + + if (drift) + console.log(`\n✖ snapshots drifted. Review the report; accept with --update.`); + else console.log(`\n✓ snapshots match baseline.`); + return drift; +} + +process.exit(main()); diff --git a/services/wizard-ci/snapshots/express-todo/00-intro.ans b/services/wizard-ci/snapshots/express-todo/00-intro.ans new file mode 100644 index 000000000..c68fd24a4 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/00-intro.ans @@ -0,0 +1,8 @@ + ███ PostHog Wizard starting up + + We'll use AI to analyze your project and complete work. + .env* file contents will not leave your machine. + + + ⠋ Detecting project framework... + diff --git a/services/wizard-ci/snapshots/express-todo/01-health-check.ans b/services/wizard-ci/snapshots/express-todo/01-health-check.ans new file mode 100644 index 000000000..fd9ac9d77 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/01-health-check.ans @@ -0,0 +1 @@ + ⠋ Checking service status... \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/02-auth.ans b/services/wizard-ci/snapshots/express-todo/02-auth.ans new file mode 100644 index 000000000..890d69096 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/02-auth.ans @@ -0,0 +1,9 @@ +PostHog Setup Wizard +✔ Framework: Node.js + +How does the wizard use your data? +• Source files are read by Claude for AI context +• .env* and secrets stay on your machine +• Press [I] for full privacy & usage info + +⠋ Waiting for authentication... \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/03-auth.ans b/services/wizard-ci/snapshots/express-todo/03-auth.ans new file mode 100644 index 000000000..890d69096 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/03-auth.ans @@ -0,0 +1,9 @@ +PostHog Setup Wizard +✔ Framework: Node.js + +How does the wizard use your data? +• Source files are read by Claude for AI context +• .env* and secrets stay on your machine +• Press [I] for full privacy & usage info + +⠋ Waiting for authentication... \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/04-run.ans b/services/wizard-ci/snapshots/express-todo/04-run.ans new file mode 100644 index 000000000..82d05447c --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/04-run.ans @@ -0,0 +1,7 @@ + Learn Tasks + + ⠋ Analyzing project... +──────────────────────────────────────────────────────────────────────────────────────────────────── + ◆ Using provided API key (CI mode - OAuth bypassed) + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/05-run.ans b/services/wizard-ci/snapshots/express-todo/05-run.ans new file mode 100644 index 000000000..82d05447c --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/05-run.ans @@ -0,0 +1,7 @@ + Learn Tasks + + ⠋ Analyzing project... +──────────────────────────────────────────────────────────────────────────────────────────────────── + ◆ Using provided API key (CI mode - OAuth bypassed) + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/06-run.ans b/services/wizard-ci/snapshots/express-todo/06-run.ans new file mode 100644 index 000000000..964aeb865 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/06-run.ans @@ -0,0 +1,8 @@ + Learn Tasks + + ⠋ Analyzing project... +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Using provided API key (CI mode - OAuth bypassed) + ◆ Initializing Claude agent... + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/07-run.ans b/services/wizard-ci/snapshots/express-todo/07-run.ans new file mode 100644 index 000000000..fc6e815c0 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/07-run.ans @@ -0,0 +1,8 @@ + Learn Tasks + + ⠋ Analyzing project... +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Initializing Claude agent... + ◆ Verbose logs: /tmp/posthog-wizard.log + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/08-run.ans b/services/wizard-ci/snapshots/express-todo/08-run.ans new file mode 100644 index 000000000..edb02fbd1 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/08-run.ans @@ -0,0 +1,8 @@ + Learn Tasks + + ⠋ Analyzing project... +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Verbose logs: /tmp/posthog-wizard.log + ◆ Agent initialized. Let's get cooking! + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/09-run.ans b/services/wizard-ci/snapshots/express-todo/09-run.ans new file mode 100644 index 000000000..90efe6a02 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/09-run.ans @@ -0,0 +1,8 @@ + Learn Tasks + + ⠋ Analyzing project... +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Agent initialized. Let's get cooking! + ◆ Writing your PostHog setup with events, error capture and more... + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/10-run.ans b/services/wizard-ci/snapshots/express-todo/10-run.ans new file mode 100644 index 000000000..af2e559fa --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/10-run.ans @@ -0,0 +1,8 @@ + Learn Tasks + + ⠋ Analyzing project... +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Writing your PostHog setup with events, error capture and more... + ◆ Checking project structure. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/11-run.ans b/services/wizard-ci/snapshots/express-todo/11-run.ans new file mode 100644 index 000000000..a343d062e --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/11-run.ans @@ -0,0 +1,10 @@ + Learn Tasks + + ◻ Plan event tracking + + ⠋ Progress: 0/1 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Writing your PostHog setup with events, error capture and more... + ◆ Checking project structure. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/12-run.ans b/services/wizard-ci/snapshots/express-todo/12-run.ans new file mode 100644 index 000000000..a343d062e --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/12-run.ans @@ -0,0 +1,10 @@ + Learn Tasks + + ◻ Plan event tracking + + ⠋ Progress: 0/1 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Writing your PostHog setup with events, error capture and more... + ◆ Checking project structure. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/13-run.ans b/services/wizard-ci/snapshots/express-todo/13-run.ans new file mode 100644 index 000000000..7cfc65c97 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/13-run.ans @@ -0,0 +1,11 @@ + Learn Tasks + + ◻ Plan event tracking + ◻ Install PostHog SDK + + ⠋ Progress: 0/2 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Writing your PostHog setup with events, error capture and more... + ◆ Checking project structure. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/14-run.ans b/services/wizard-ci/snapshots/express-todo/14-run.ans new file mode 100644 index 000000000..7cfc65c97 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/14-run.ans @@ -0,0 +1,11 @@ + Learn Tasks + + ◻ Plan event tracking + ◻ Install PostHog SDK + + ⠋ Progress: 0/2 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Writing your PostHog setup with events, error capture and more... + ◆ Checking project structure. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/15-run.ans b/services/wizard-ci/snapshots/express-todo/15-run.ans new file mode 100644 index 000000000..2fad5e853 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/15-run.ans @@ -0,0 +1,12 @@ + Learn Tasks + + ◻ Plan event tracking + ◻ Install PostHog SDK + ◻ Configure environment variables + + ⠋ Progress: 0/3 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Writing your PostHog setup with events, error capture and more... + ◆ Checking project structure. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/16-run.ans b/services/wizard-ci/snapshots/express-todo/16-run.ans new file mode 100644 index 000000000..2fad5e853 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/16-run.ans @@ -0,0 +1,12 @@ + Learn Tasks + + ◻ Plan event tracking + ◻ Install PostHog SDK + ◻ Configure environment variables + + ⠋ Progress: 0/3 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Writing your PostHog setup with events, error capture and more... + ◆ Checking project structure. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/17-run.ans b/services/wizard-ci/snapshots/express-todo/17-run.ans new file mode 100644 index 000000000..307572ea8 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/17-run.ans @@ -0,0 +1,13 @@ + Learn Tasks + + ◻ Plan event tracking + ◻ Install PostHog SDK + ◻ Configure environment variables + ◻ Implement PostHog and capture events + + ⠋ Progress: 0/4 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Writing your PostHog setup with events, error capture and more... + ◆ Checking project structure. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/18-run.ans b/services/wizard-ci/snapshots/express-todo/18-run.ans new file mode 100644 index 000000000..307572ea8 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/18-run.ans @@ -0,0 +1,13 @@ + Learn Tasks + + ◻ Plan event tracking + ◻ Install PostHog SDK + ◻ Configure environment variables + ◻ Implement PostHog and capture events + + ⠋ Progress: 0/4 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Writing your PostHog setup with events, error capture and more... + ◆ Checking project structure. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/19-run.ans b/services/wizard-ci/snapshots/express-todo/19-run.ans new file mode 100644 index 000000000..cee0cded9 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/19-run.ans @@ -0,0 +1,14 @@ + Learn Tasks + + ◻ Plan event tracking + ◻ Install PostHog SDK + ◻ Configure environment variables + ◻ Implement PostHog and capture events + ◻ Validate integration + + ⠋ Progress: 0/5 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Writing your PostHog setup with events, error capture and more... + ◆ Checking project structure. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/20-run.ans b/services/wizard-ci/snapshots/express-todo/20-run.ans new file mode 100644 index 000000000..cee0cded9 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/20-run.ans @@ -0,0 +1,14 @@ + Learn Tasks + + ◻ Plan event tracking + ◻ Install PostHog SDK + ◻ Configure environment variables + ◻ Implement PostHog and capture events + ◻ Validate integration + + ⠋ Progress: 0/5 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Writing your PostHog setup with events, error capture and more... + ◆ Checking project structure. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/21-run.ans b/services/wizard-ci/snapshots/express-todo/21-run.ans new file mode 100644 index 000000000..be9799eff --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/21-run.ans @@ -0,0 +1,15 @@ + Learn Tasks + + ◻ Plan event tracking + ◻ Install PostHog SDK + ◻ Configure environment variables + ◻ Implement PostHog and capture events + ◻ Validate integration + ◻ Create PostHog dashboard + + ⠋ Progress: 0/6 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Writing your PostHog setup with events, error capture and more... + ◆ Checking project structure. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/22-run.ans b/services/wizard-ci/snapshots/express-todo/22-run.ans new file mode 100644 index 000000000..be9799eff --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/22-run.ans @@ -0,0 +1,15 @@ + Learn Tasks + + ◻ Plan event tracking + ◻ Install PostHog SDK + ◻ Configure environment variables + ◻ Implement PostHog and capture events + ◻ Validate integration + ◻ Create PostHog dashboard + + ⠋ Progress: 0/6 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Writing your PostHog setup with events, error capture and more... + ◆ Checking project structure. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/23-run.ans b/services/wizard-ci/snapshots/express-todo/23-run.ans new file mode 100644 index 000000000..60b20a8b2 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/23-run.ans @@ -0,0 +1,15 @@ + Learn Tasks + + ▶ Plan event tracking + ◻ Install PostHog SDK + ◻ Configure environment variables + ◻ Implement PostHog and capture events + ◻ Validate integration + ◻ Create PostHog dashboard + + ⠋ Progress: 0/6 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Writing your PostHog setup with events, error capture and more... + ◆ Checking project structure. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/24-run.ans b/services/wizard-ci/snapshots/express-todo/24-run.ans new file mode 100644 index 000000000..786b5bfff --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/24-run.ans @@ -0,0 +1,15 @@ + Learn Tasks + + ▶ Plan event tracking + ◻ Install PostHog SDK + ◻ Configure environment variables + ◻ Implement PostHog and capture events + ◻ Validate integration + ◻ Create PostHog dashboard + + ⠋ Progress: 0/6 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Checking project structure. + ◆ Verifying PostHog dependencies. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/25-run.ans b/services/wizard-ci/snapshots/express-todo/25-run.ans new file mode 100644 index 000000000..e6b7f2588 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/25-run.ans @@ -0,0 +1,15 @@ + Learn Tasks + + ▶ Plan event tracking + ◻ Install PostHog SDK + ◻ Configure environment variables + ◻ Implement PostHog and capture events + ◻ Validate integration + ◻ Create PostHog dashboard + + ⠋ Progress: 0/6 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Verifying PostHog dependencies. + ◆ Generating events based on project. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/26-run.ans b/services/wizard-ci/snapshots/express-todo/26-run.ans new file mode 100644 index 000000000..bb7d71e0d --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/26-run.ans @@ -0,0 +1,15 @@ + Learn Tasks + + ◼ Plan event tracking + ◻ Install PostHog SDK + ◻ Configure environment variables + ◻ Implement PostHog and capture events + ◻ Validate integration + ◻ Create PostHog dashboard + + ⠋ Progress: 1/6 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Verifying PostHog dependencies. + ◆ Generating events based on project. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/27-run.ans b/services/wizard-ci/snapshots/express-todo/27-run.ans new file mode 100644 index 000000000..e1c5f00a3 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/27-run.ans @@ -0,0 +1,15 @@ + Learn Tasks + + ◼ Plan event tracking + ▶ Install PostHog SDK + ◻ Configure environment variables + ◻ Implement PostHog and capture events + ◻ Validate integration + ◻ Create PostHog dashboard + + ⠋ Progress: 1/6 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Verifying PostHog dependencies. + ◆ Generating events based on project. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/28-run.ans b/services/wizard-ci/snapshots/express-todo/28-run.ans new file mode 100644 index 000000000..0e36463a9 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/28-run.ans @@ -0,0 +1,15 @@ + Learn Tasks + + ◼ Plan event tracking + ▶ Install PostHog SDK + ▶ Configure environment variables + ◻ Implement PostHog and capture events + ◻ Validate integration + ◻ Create PostHog dashboard + + ⠋ Progress: 1/6 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Verifying PostHog dependencies. + ◆ Generating events based on project. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/29-run.ans b/services/wizard-ci/snapshots/express-todo/29-run.ans new file mode 100644 index 000000000..3410c94af --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/29-run.ans @@ -0,0 +1,15 @@ + Learn Tasks + + ◼ Plan event tracking + ◼ Configure environment variables + ▶ Install PostHog SDK + ◻ Implement PostHog and capture events + ◻ Validate integration + ◻ Create PostHog dashboard + + ⠋ Progress: 2/6 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Verifying PostHog dependencies. + ◆ Generating events based on project. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/30-run.ans b/services/wizard-ci/snapshots/express-todo/30-run.ans new file mode 100644 index 000000000..26d17ce0b --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/30-run.ans @@ -0,0 +1,15 @@ + Learn Tasks + + ◼ Plan event tracking + ◼ Configure environment variables + ▶ Install PostHog SDK + ◻ Implement PostHog and capture events + ◻ Validate integration + ◻ Create PostHog dashboard + + ⠋ Progress: 2/6 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Generating events based on project. + ◆ Inserting PostHog capture code + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/31-run.ans b/services/wizard-ci/snapshots/express-todo/31-run.ans new file mode 100644 index 000000000..18e4df848 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/31-run.ans @@ -0,0 +1,15 @@ + Learn Tasks + + ◼ Plan event tracking + ◼ Configure environment variables + ▶ Install PostHog SDK + ▶ Implement PostHog and capture events + ◻ Validate integration + ◻ Create PostHog dashboard + + ⠋ Progress: 2/6 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Generating events based on project. + ◆ Inserting PostHog capture code + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/32-run.ans b/services/wizard-ci/snapshots/express-todo/32-run.ans new file mode 100644 index 000000000..96d69a2a3 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/32-run.ans @@ -0,0 +1,17 @@ + Learn Tasks + + ◼ Plan event tracking + ◼ Configure environment variables + ▶ Install PostHog SDK + ▶ Implement PostHog and capture events + ◻ Validate integration + ◻ Create PostHog dashboard + + ⠋ Progress: 2/6 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Inserting PostHog capture code + ◆ Edited `index.js` — added PostHog init with  + `setupExpressRequestContext`/`setupExpressErrorHandler`, and `capture` calls for `todo_created`,  + `todo_updated`, `todo_completed`, and `todo_deleted`. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/33-run.ans b/services/wizard-ci/snapshots/express-todo/33-run.ans new file mode 100644 index 000000000..af0b44815 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/33-run.ans @@ -0,0 +1,17 @@ + Learn Tasks + + ◼ Plan event tracking + ◼ Configure environment variables + ◼ Implement PostHog and capture events + ▶ Install PostHog SDK + ◻ Validate integration + ◻ Create PostHog dashboard + + ⠋ Progress: 3/6 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Inserting PostHog capture code + ◆ Edited `index.js` — added PostHog init with  + `setupExpressRequestContext`/`setupExpressErrorHandler`, and `capture` calls for `todo_created`,  + `todo_updated`, `todo_completed`, and `todo_deleted`. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/34-run.ans b/services/wizard-ci/snapshots/express-todo/34-run.ans new file mode 100644 index 000000000..abea1ee85 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/34-run.ans @@ -0,0 +1,17 @@ + Learn Tasks + + ◼ Plan event tracking + ◼ Configure environment variables + ◼ Implement PostHog and capture events + ▶ Install PostHog SDK + ◻ Validate integration + ◻ Create PostHog dashboard + + ⠋ Progress: 3/6 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Edited `index.js` — added PostHog init with  + `setupExpressRequestContext`/`setupExpressErrorHandler`, and `capture` calls for `todo_created`,  + `todo_updated`, `todo_completed`, and `todo_deleted`. + ◆ Finding and correcting errors. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/35-run.ans b/services/wizard-ci/snapshots/express-todo/35-run.ans new file mode 100644 index 000000000..91364697e --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/35-run.ans @@ -0,0 +1,17 @@ + Learn Tasks + + ◼ Plan event tracking + ◼ Configure environment variables + ◼ Implement PostHog and capture events + ▶ Install PostHog SDK + ▶ Validate integration + ◻ Create PostHog dashboard + + ⠋ Progress: 3/6 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Edited `index.js` — added PostHog init with  + `setupExpressRequestContext`/`setupExpressErrorHandler`, and `capture` calls for `todo_created`,  + `todo_updated`, `todo_completed`, and `todo_deleted`. + ◆ Finding and correcting errors. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/36-run.ans b/services/wizard-ci/snapshots/express-todo/36-run.ans new file mode 100644 index 000000000..36b08f547 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/36-run.ans @@ -0,0 +1,17 @@ + Learn Tasks + + ◼ Plan event tracking + ◼ Configure environment variables + ◼ Implement PostHog and capture events + ◼ Validate integration + ▶ Install PostHog SDK + ◻ Create PostHog dashboard + + ⠋ Progress: 4/6 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Edited `index.js` — added PostHog init with  + `setupExpressRequestContext`/`setupExpressErrorHandler`, and `capture` calls for `todo_created`,  + `todo_updated`, `todo_completed`, and `todo_deleted`. + ◆ Finding and correcting errors. + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/37-run.ans b/services/wizard-ci/snapshots/express-todo/37-run.ans new file mode 100644 index 000000000..8ee6f7d43 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/37-run.ans @@ -0,0 +1,15 @@ + Learn Tasks + + ◼ Plan event tracking + ◼ Configure environment variables + ◼ Implement PostHog and capture events + ◼ Validate integration + ▶ Install PostHog SDK + ◻ Create PostHog dashboard + + ⠋ Progress: 4/6 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Finding and correcting errors. + ◆ Configured dashboard: (creating now) + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/38-run.ans b/services/wizard-ci/snapshots/express-todo/38-run.ans new file mode 100644 index 000000000..665b8bc05 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/38-run.ans @@ -0,0 +1,15 @@ + Learn Tasks + + ◼ Plan event tracking + ◼ Configure environment variables + ◼ Implement PostHog and capture events + ◼ Validate integration + ▶ Install PostHog SDK + ▶ Create PostHog dashboard + + ⠋ Progress: 4/6 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Finding and correcting errors. + ◆ Configured dashboard: (creating now) + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/39-run.ans b/services/wizard-ci/snapshots/express-todo/39-run.ans new file mode 100644 index 000000000..741293159 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/39-run.ans @@ -0,0 +1,15 @@ + Learn Tasks + + ◼ Plan event tracking + ◼ Install PostHog SDK + ◼ Configure environment variables + ◼ Implement PostHog and capture events + ◼ Validate integration + ▶ Create PostHog dashboard + + ⠋ Progress: 5/6 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Finding and correcting errors. + ◆ Configured dashboard: (creating now) + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/40-run.ans b/services/wizard-ci/snapshots/express-todo/40-run.ans new file mode 100644 index 000000000..11a95e436 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/40-run.ans @@ -0,0 +1,15 @@ + Learn Tasks + + ◼ Plan event tracking + ◼ Install PostHog SDK + ◼ Configure environment variables + ◼ Implement PostHog and capture events + ◼ Validate integration + ▶ Create PostHog dashboard + + ⠋ Progress: 5/6 completed +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Configured dashboard: (creating now) + ◆ Created setup report: /private/tmp/wizard-e2e-express-todo/posthog-setup-report.md + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/41-run.ans b/services/wizard-ci/snapshots/express-todo/41-run.ans new file mode 100644 index 000000000..d9fefd704 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/41-run.ans @@ -0,0 +1,15 @@ + Learn Tasks + + ◼ Plan event tracking + ◼ Install PostHog SDK + ◼ Configure environment variables + ◼ Implement PostHog and capture events + ◼ Validate integration + ◼ Create PostHog dashboard + + ⠋ Cleaning up... +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Configured dashboard: (creating now) + ◆ Created setup report: /private/tmp/wizard-e2e-express-todo/posthog-setup-report.md + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/42-run.ans b/services/wizard-ci/snapshots/express-todo/42-run.ans new file mode 100644 index 000000000..3084dc61d --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/42-run.ans @@ -0,0 +1,15 @@ + Learn Tasks + + ◼ Plan event tracking + ◼ Install PostHog SDK + ◼ Configure environment variables + ◼ Implement PostHog and capture events + ◼ Validate integration + ◼ Create PostHog dashboard + + ⠋ Cleaning up... +──────────────────────────────────────────────────────────────────────────────────────────────────── + ┊ Created setup report: /private/tmp/wizard-e2e-express-todo/posthog-setup-report.md + ◆ PostHog integration complete + +  Status   Tail logs   Visualizer   HN  \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/43-outro.ans b/services/wizard-ci/snapshots/express-todo/43-outro.ans new file mode 100644 index 000000000..7845ae6cc --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/43-outro.ans @@ -0,0 +1,23 @@ +✔ Successfully installed PostHog! + +Dashboard: https://us.posthog.com/project/228144/dashboard/1746617?utm_source=wizard&utm_medium=cli& +utm_content=outro-dashboard + +Check ./posthog-setup-report.md for details + +What the agent did: +• Analyzed your Node.js project structure +• Installed the posthog-node package +• Created PostHog initialization with proper configuration +• Configured graceful shutdown for event flushing +• Added example code for events, feature flags, and error capture +• Added environment variables to .env file + +Learn more: +https://posthog.com/docs/libraries/node?utm_source=wizard&utm_medium=cli&utm_content=outro-docs + +Note: This wizard uses an LLM agent to analyze and modify your project. Please review the changes  +made. +How did this work for you? Drop us a line: wizard@posthog.com + +Press any key to continue \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/44-mcp.ans b/services/wizard-ci/snapshots/express-todo/44-mcp.ans new file mode 100644 index 000000000..91a69e63a --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/44-mcp.ans @@ -0,0 +1,3 @@ +Install the MCP so you can chat to your data + +Detecting supported editors... \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/45-slack-connect.ans b/services/wizard-ci/snapshots/express-todo/45-slack-connect.ans new file mode 100644 index 000000000..11e7acbb9 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/45-slack-connect.ans @@ -0,0 +1,2 @@ + +⠋ Checking for an existing Slack connection... \ No newline at end of file diff --git a/services/wizard-ci/snapshots/express-todo/46-keep-skills.ans b/services/wizard-ci/snapshots/express-todo/46-keep-skills.ans new file mode 100644 index 000000000..64cbb0cf7 --- /dev/null +++ b/services/wizard-ci/snapshots/express-todo/46-keep-skills.ans @@ -0,0 +1,3 @@ +Keep the skills? + +Checking installed skills... \ No newline at end of file From 5407f6c4f3f2cf089efa03bd7ce9b5a6ab397d18 Mon Sep 17 00:00:00 2001 From: "Vincent (Wen Yu) Ge" Date: Mon, 22 Jun 2026 16:23:48 -0400 Subject: [PATCH 03/21] docs(wizard-ci): document snapshots env prerequisites The snapshots.ts header now lists what the flow needs in .env (POSTHOG_PERSONAL_API_KEY, POSTHOG_WIZARD_PROJECT_ID, POSTHOG_REGION) and that WIZARD_PATH must point at a checkout containing e2e-harness/. Co-Authored-By: Claude Opus 4.8 --- services/wizard-ci/snapshots.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/services/wizard-ci/snapshots.ts b/services/wizard-ci/snapshots.ts index 4520f1113..0eda7f0f7 100644 --- a/services/wizard-ci/snapshots.ts +++ b/services/wizard-ci/snapshots.ts @@ -14,6 +14,13 @@ * pnpm wizard-ci-snapshots --recording # skip the run, render an existing * # real recording (still no mock) * + * Requires (in .env, sourced by the `wizard-ci-snapshots` mprocs proc): + * POSTHOG_PERSONAL_API_KEY the phx key (used as the gateway bearer) + * POSTHOG_WIZARD_PROJECT_ID the project the key is scoped to (else bootstrap 403s) + * POSTHOG_REGION us | eu + * WIZARD_PATH a wizard checkout that has e2e-harness/ (i.e. on the + * e2e-control-plane branch) — that's where the render runs + * * CI surfaces report.html in visual-comparison mode (and fails on drift until a * human updates the baseline). Locally it's surfaced through mprocs. */ From 0bd343916ad59fc97a40a9b8cfc6b948ecf8b687 Mon Sep 17 00:00:00 2001 From: "Vincent (Wen Yu) Ge" Date: Mon, 22 Jun 2026 16:29:25 -0400 Subject: [PATCH 04/21] fix(wizard-ci): never fail on snapshot drift, just surface the diffs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A real agent emits frames a little differently run to run (different number of status updates → shifted indices), so drift is expected. Print the per-frame diffs + report.html and exit 0; only a genuine failure (run died, no recording) exits non-zero. Accept a new baseline with --update. Co-Authored-By: Claude Opus 4.8 --- services/wizard-ci/snapshots.ts | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/services/wizard-ci/snapshots.ts b/services/wizard-ci/snapshots.ts index 0eda7f0f7..a87cb6cd4 100644 --- a/services/wizard-ci/snapshots.ts +++ b/services/wizard-ci/snapshots.ts @@ -21,8 +21,11 @@ * WIZARD_PATH a wizard checkout that has e2e-harness/ (i.e. on the * e2e-control-plane branch) — that's where the render runs * - * CI surfaces report.html in visual-comparison mode (and fails on drift until a - * human updates the baseline). Locally it's surfaced through mprocs. + * Drift never fails the command — a real agent emits frames a little differently + * each run, so the diffs are surfaced (terminal summary + report.html) for a + * human to eyeball, not asserted away. Only a genuine failure (the run dying, no + * recording) exits non-zero. report.html is the side-by-side visual comparison; + * locally it's surfaced through mprocs. */ import "dotenv/config"; import { join, basename } from "path"; @@ -164,7 +167,7 @@ function main(): number { args[args.indexOf("--project-id") + 1] || ""; - let drift = 0; + let totalChanged = 0; for (const def of TEST_DEFS) { console.log(`\n=== snapshots: ${def.name} (${def.app}) ===`); @@ -212,13 +215,20 @@ function main(): number { console.log(`\nvisual report: ${report}`); if (!existsSync(baselineDir)) console.log(`(no baseline yet — run with --update to seed it)`); - if (changed.length) drift = 1; + totalChanged += changed.length; } - if (drift) - console.log(`\n✖ snapshots drifted. Review the report; accept with --update.`); + // Drift is expected — a real agent does the same steps but emits frames a + // little differently run to run. We surface the diffs for a human to eyeball; + // we never fail on them. (A genuine failure — the run dying or no recording — + // returns non-zero earlier.) Accept a new baseline with --update. + if (totalChanged) + console.log( + `\nℹ ${totalChanged} frame(s) changed — review the report above. ` + + `Accept with --update if the new run looks right.`, + ); else console.log(`\n✓ snapshots match baseline.`); - return drift; + return 0; } process.exit(main()); From 266ccec34d0ee389d785bd09128fc4e31117d212 Mon Sep 17 00:00:00 2001 From: "Vincent (Wen Yu) Ge" Date: Mon, 22 Jun 2026 16:33:34 -0400 Subject: [PATCH 05/21] feat(wizard-ci): offer to replay the run's snapshots at the end MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the diff, prompt "Replay snapshots in the terminal? [y/N]" and, on yes, launch the replay stepper directly on the run's recording — no copy/paste. TTY-only (auto-declines in CI so nothing hangs); the replayer inherits stdio for its own Enter-to-step loop. Co-Authored-By: Claude Opus 4.8 --- services/wizard-ci/snapshots.ts | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/services/wizard-ci/snapshots.ts b/services/wizard-ci/snapshots.ts index a87cb6cd4..b94c6bf44 100644 --- a/services/wizard-ci/snapshots.ts +++ b/services/wizard-ci/snapshots.ts @@ -39,9 +39,19 @@ import { cpSync, } from "fs"; import { spawnSync } from "child_process"; -import { runE2e } from "./e2e.js"; +import { createInterface } from "readline"; +import { runE2e, replayRecording } from "./e2e.js"; import { ansiToHtml } from "./ansi-html.js"; +/** Yes/no prompt. Auto-no when not a TTY (CI), so nothing ever hangs. */ +async function confirm(question: string): Promise { + if (!process.stdin.isTTY) return false; + const rl = createInterface({ input: process.stdin, output: process.stdout }); + const answer = await new Promise((res) => rl.question(question, res)); + rl.close(); + return /^y(es)?$/i.test(answer.trim()); +} + const WORKBENCH = join(import.meta.dirname, "..", ".."); const BASELINE_ROOT = join(import.meta.dirname, "snapshots"); const OUT_ROOT = "/tmp/wizard-snapshots"; @@ -157,7 +167,7 @@ function stripAnsi(s: string): string { return s.replace(/\x1b\[[0-9;]*m/g, ""); } -function main(): number { +async function main(): Promise { const args = process.argv.slice(2); const update = args.includes("--update"); const recordingArg = args[args.indexOf("--recording") + 1]; @@ -168,6 +178,7 @@ function main(): number { ""; let totalChanged = 0; + const recorded: Array<{ name: string; recording: string }> = []; for (const def of TEST_DEFS) { console.log(`\n=== snapshots: ${def.name} (${def.app}) ===`); @@ -183,6 +194,7 @@ function main(): number { console.error(`✖ no recording at ${recording}`); return 1; } + recorded.push({ name: def.name, recording }); const currentDir = join(OUT_ROOT, def.name, "current"); const baselineDir = join(BASELINE_ROOT, def.name); @@ -228,7 +240,16 @@ function main(): number { `Accept with --update if the new run looks right.`, ); else console.log(`\n✓ snapshots match baseline.`); + + // Offer to replay the run's snapshots right here — no need to copy/paste a + // command. TTY only (confirm() auto-declines in CI), and the replayer takes + // over stdin for its own stepper once this prompt closes. + for (const { name, recording } of recorded) { + if (await confirm(`\nReplay ${name} snapshots in the terminal? [y/N] `)) { + replayRecording(recording, ["--step"]); + } + } return 0; } -process.exit(main()); +main().then((code) => process.exit(code)); From 0c39ee652008086062a58d8c8a91522baceb475c Mon Sep 17 00:00:00 2001 From: "Vincent (Wen Yu) Ge" Date: Mon, 22 Jun 2026 17:37:44 -0400 Subject: [PATCH 06/21] docs: add agentic-exploration section with an example prompt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document handing the Wizard to an agent to run/drive/explore it headlessly, pointing at the runbook (wizard repo e2e-harness/EXPLORING-AS-AN-AGENT.md) with a copy-paste example prompt that targets wasp-lang/open-saas — the agent works out how to build + run the target itself. Co-Authored-By: Claude Opus 4.8 --- README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.md b/README.md index e290df2f6..ae2624a70 100644 --- a/README.md +++ b/README.md @@ -256,3 +256,15 @@ This generates the CA cert at `~/.mitmproxy/mitmproxy-ca-cert.pem` and adds it t In phrocs, start the `mitmproxy` process first, then start `wizard-run-proxy`. Traffic will appear in the mitmproxy TUI. Alternatively, you can use [Charles Proxy](https://www.charlesproxy.com/) (GUI-based, paid license) on port `8888` instead of mitmproxy. + +--- + +## Agentic exploration + +You can hand the Wizard to an AI agent and have it **run, drive, and explore the Wizard itself** — against any app, headlessly, snapshotting the TUI so it can see what happened. The agent's runbook lives in the Wizard repo at `e2e-harness/EXPLORING-AS-AN-AGENT.md` (set `WIZARD_PATH` to that checkout). It covers driving the flow through the `wizard-ci-tools` control plane (`read_state` / `list_actions` / `perform_action`), capturing snapshots with `renderFrame`, and the env the run needs. + +Point an agent (e.g. Claude Code) at it with a prompt like — here, exploring against [open-saas](https://github.com/wasp-lang/open-saas): + +> Explore the PostHog Wizard against a real app, in the name of agentic exploration. Read `e2e-harness/EXPLORING-AS-AN-AGENT.md` in the wizard repo — it's your runbook for driving the Wizard headlessly, capturing snapshots, and the env you'll need. Ask me for my phx key file path and set up per the runbook. Then clone `https://github.com/wasp-lang/open-saas` into a throwaway `/tmp` copy, work out how to build it, and run the Wizard against it — driving the flow, snapshotting each key moment, and rendering the screens back so I can see them. Then tell me what the Wizard did: which screens it walked, what it changed in the project, and anything that broke. + +The agent figures out how to build and run the target itself — that's the point. It learns the Wizard by driving it. From 58869b36aa2645dce4c769f0c5355f0d85bc1b98 Mon Sep 17 00:00:00 2001 From: "Vincent (Wen Yu) Ge" Date: Mon, 22 Jun 2026 17:50:49 -0400 Subject: [PATCH 07/21] docs: remove agent-exploration section (moved to wizard README); trim comments The agentic-exploration section belongs in the wizard repo's README, not here. Also trim snapshots.ts / index.ts comments to concise current-behavior. Co-Authored-By: Claude Opus 4.8 --- README.md | 12 --------- services/wizard-ci/index.ts | 2 +- services/wizard-ci/snapshots.ts | 44 ++++++++++++--------------------- 3 files changed, 17 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index ae2624a70..e290df2f6 100644 --- a/README.md +++ b/README.md @@ -256,15 +256,3 @@ This generates the CA cert at `~/.mitmproxy/mitmproxy-ca-cert.pem` and adds it t In phrocs, start the `mitmproxy` process first, then start `wizard-run-proxy`. Traffic will appear in the mitmproxy TUI. Alternatively, you can use [Charles Proxy](https://www.charlesproxy.com/) (GUI-based, paid license) on port `8888` instead of mitmproxy. - ---- - -## Agentic exploration - -You can hand the Wizard to an AI agent and have it **run, drive, and explore the Wizard itself** — against any app, headlessly, snapshotting the TUI so it can see what happened. The agent's runbook lives in the Wizard repo at `e2e-harness/EXPLORING-AS-AN-AGENT.md` (set `WIZARD_PATH` to that checkout). It covers driving the flow through the `wizard-ci-tools` control plane (`read_state` / `list_actions` / `perform_action`), capturing snapshots with `renderFrame`, and the env the run needs. - -Point an agent (e.g. Claude Code) at it with a prompt like — here, exploring against [open-saas](https://github.com/wasp-lang/open-saas): - -> Explore the PostHog Wizard against a real app, in the name of agentic exploration. Read `e2e-harness/EXPLORING-AS-AN-AGENT.md` in the wizard repo — it's your runbook for driving the Wizard headlessly, capturing snapshots, and the env you'll need. Ask me for my phx key file path and set up per the runbook. Then clone `https://github.com/wasp-lang/open-saas` into a throwaway `/tmp` copy, work out how to build it, and run the Wizard against it — driving the flow, snapshotting each key moment, and rendering the screens back so I can see them. Then tell me what the Wizard did: which screens it walked, what it changed in the project, and anything that broke. - -The agent figures out how to build and run the target itself — that's the point. It learns the Wizard by driving it. diff --git a/services/wizard-ci/index.ts b/services/wizard-ci/index.ts index d62e1c6af..8e29181bb 100644 --- a/services/wizard-ci/index.ts +++ b/services/wizard-ci/index.ts @@ -783,7 +783,7 @@ async function main(): Promise { } // Control-plane e2e: run the full interactive flow via wizard-ci-tools and - // assert on structured state, instead of the classic LoggingUI spawn + PR. + // assert on structured state. if (opts.e2e) { process.exit( runE2e({ diff --git a/services/wizard-ci/snapshots.ts b/services/wizard-ci/snapshots.ts index b94c6bf44..2815a652e 100644 --- a/services/wizard-ci/snapshots.ts +++ b/services/wizard-ci/snapshots.ts @@ -2,30 +2,23 @@ * wizard-ci snapshots: TUI visual-regression for the CI-e2e test definitions. * * For each test definition (for now: the integration flow on express-todo) this - * runs a REAL `--e2e` agent run, renders every key-moment frame of the run's + * runs a real `--e2e` agent run, renders every key-moment frame of the run's * recording to a real-Ink ANSI snapshot, and diffs it against a committed - * baseline. The point is NOT pixel-determinism — a real agent enqueues tasks - * differently run to run — it's to surface those differences to a human in a - * side-by-side: same screens every time, minor run-to-run changes flagged for - * review. No mocks anywhere: real agent, real recording, real render. + * baseline. Differences are surfaced in a side-by-side for review. * * pnpm wizard-ci-snapshots # run + compare, write HTML report * pnpm wizard-ci-snapshots --update # accept current output as baseline - * pnpm wizard-ci-snapshots --recording # skip the run, render an existing - * # real recording (still no mock) + * pnpm wizard-ci-snapshots --recording # render an existing recording, skip the run * * Requires (in .env, sourced by the `wizard-ci-snapshots` mprocs proc): - * POSTHOG_PERSONAL_API_KEY the phx key (used as the gateway bearer) - * POSTHOG_WIZARD_PROJECT_ID the project the key is scoped to (else bootstrap 403s) + * POSTHOG_PERSONAL_API_KEY the phx key (gateway bearer) + * POSTHOG_WIZARD_PROJECT_ID the project the key is scoped to * POSTHOG_REGION us | eu - * WIZARD_PATH a wizard checkout that has e2e-harness/ (i.e. on the - * e2e-control-plane branch) — that's where the render runs + * WIZARD_PATH a wizard checkout that has e2e-harness/ (where the render runs) * - * Drift never fails the command — a real agent emits frames a little differently - * each run, so the diffs are surfaced (terminal summary + report.html) for a - * human to eyeball, not asserted away. Only a genuine failure (the run dying, no - * recording) exits non-zero. report.html is the side-by-side visual comparison; - * locally it's surfaced through mprocs. + * Drift never fails the command: diffs are surfaced (terminal + report.html), and + * only a genuine failure (run died, no recording) exits non-zero. report.html is + * the side-by-side; locally it's surfaced through mprocs. */ import "dotenv/config"; import { join, basename } from "path"; @@ -93,8 +86,7 @@ function renderSnapshots(recording: string, outDir: string): void { const r = spawnSync("npx", ["tsx", script, recording, outDir], { cwd: wizardRepo(), stdio: "inherit", - // Force truecolor so Ink/chalk emit ANSI even though this isn't a TTY — - // the snapshots capture the real colored TUI, not stripped text. + // Force truecolor so Ink/chalk emit ANSI (not a TTY). env: { ...process.env, FORCE_COLOR: "3" }, }); if (r.status !== 0) throw new Error("render-snapshots failed"); @@ -133,7 +125,7 @@ function reportHtml(name: string, diffs: FrameDiff[]): string { const rows = diffs .map( (d) => ` -
+

${d.status} ${d.file}

baseline
${cell(d.baseline)}
@@ -145,9 +137,9 @@ function reportHtml(name: string, diffs: FrameDiff[]): string { const changed = diffs.filter((d) => d.status !== "same").length; return `wizard-ci snapshots — ${name}