From 4cb2bc04d52f2116ac5437f694373e5739de76f5 Mon Sep 17 00:00:00 2001 From: jeffyxu Date: Wed, 1 Jul 2026 15:55:12 +0800 Subject: [PATCH] test(e2e): configurable real API key for live tool-integration e2e + wire into CI Decouple the live claude/codebuddy hook-integration e2e from ambient interactive login by making the Anthropic API key and base URL configurable via TEAMAI_E2E_ANTHROPIC_API_KEY / TEAMAI_E2E_ANTHROPIC_BASE_URL, broaden the assertion to cover the full hook lifecycle (session_start, prompt_submit, tool_use, stop), and add an e2e-live-tools CI job that runs it on GitHub Actions when the corresponding secret is configured. Co-Authored-By: Claude Sonnet 5 --- .github/workflows/ci.yml | 36 ++++++ .../e2e/tool-integration-e2e.test.ts | 115 ++++++++++++++---- 2 files changed, 128 insertions(+), 23 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e5184b3..50e4a52 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -146,3 +146,39 @@ jobs: git reset --hard HEAD || true git clean -fdx || true fi + + e2e-live-tools: + name: E2E (live AI-tool hook integration) + needs: build + runs-on: ubuntu-latest + # Real CLIs + real (cheap) model calls against a configurable Anthropic- + # compatible endpoint. Skip when the API key secret isn't configured + # (e.g. PRs from forks). + if: ${{ secrets.TEAMAI_E2E_ANTHROPIC_API_KEY != '' }} + env: + TEAMAI_E2E_LIVE_TOOLS: '1' + TEAMAI_E2E_ANTHROPIC_API_KEY: ${{ secrets.TEAMAI_E2E_ANTHROPIC_API_KEY }} + TEAMAI_E2E_ANTHROPIC_BASE_URL: ${{ vars.TEAMAI_E2E_ANTHROPIC_BASE_URL || 'https://api.model.haihub.cn' }} + TEAMAI_E2E_CLAUDE_MODEL: ${{ vars.TEAMAI_E2E_CLAUDE_MODEL || 'MiniMax-M2.7' }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: 20 + cache: npm + + - name: Install dependencies + run: npm ci --ignore-scripts + + - name: Build + run: npm run build + + - name: Install claude + codebuddy CLIs + run: | + npm install -g @anthropic-ai/claude-code @tencent-ai/codebuddy-code + + - name: Run live AI-tool hook integration E2E + run: npx vitest run --config vitest.e2e.config.ts src/__tests__/e2e/tool-integration-e2e.test.ts --reporter=verbose diff --git a/src/__tests__/e2e/tool-integration-e2e.test.ts b/src/__tests__/e2e/tool-integration-e2e.test.ts index 1c83e24..e5f0728 100644 --- a/src/__tests__/e2e/tool-integration-e2e.test.ts +++ b/src/__tests__/e2e/tool-integration-e2e.test.ts @@ -24,15 +24,28 @@ import { injectHooks } from '../../hooks.js'; // requiring a global install. // Everything else (event names, matchers, structure) stays byte-real. // 3. Run the CLI non-interactively (`-p`/`--print`) with a cheap model -// on a trivial prompt. The CLI keeps its *real* HOME so its auth -// keeps working — only the spawned hook subprocess sees the sandbox -// HOME. -// 4. Assert that teamai's SessionStart hook fired by checking that a -// `session_start` event was appended to the sandbox dashboard log. +// on a prompt that forces a single shell-tool call (so the whole hook +// lifecycle fires in one run). The Anthropic-compatible CLIs (claude, +// codebuddy) are pointed at a *real, configurable* API via +// ANTHROPIC_API_KEY / ANTHROPIC_BASE_URL, so the session starts with +// an explicit key instead of relying on the developer's interactive +// login, and narrowly pre-approve only the `echo` command so the tool +// call is not blocked (no blanket permission bypass). The CLI keeps its +// real HOME; only the spawned hook subprocess sees the sandbox HOME. +// 4. Assert that teamai's hooks fired end-to-end across the full session +// lifecycle by checking the sandbox dashboard log contains every +// expected event: session_start, prompt_submit, tool_use (PostToolUse), +// and stop. // // Gated behind TEAMAI_E2E_LIVE_TOOLS=1 because it needs the real CLIs -// installed + authenticated and makes (cheap) real model calls — neither -// of which is available on shared CI runners. +// installed and makes (cheap) real model calls — neither of which is +// available on shared CI runners. +// +// Real API credentials (configurable) — used by claude + codebuddy: +// TEAMAI_E2E_ANTHROPIC_API_KEY (required; the tool is skipped if unset) +// TEAMAI_E2E_ANTHROPIC_BASE_URL (optional; defaults to the CLI's own) +// cursor-agent is not Anthropic-compatible; it stays on its own +// `cursor-agent login` auth and auto-skips until logged in. // // Cheap models can be overridden via env: // TEAMAI_E2E_CLAUDE_MODEL (default: claude-haiku-4-5-20251001) @@ -50,7 +63,37 @@ const LIVE = process.env.TEAMAI_E2E_LIVE_TOOLS === '1'; // Include ~/.local/bin (cursor-agent's default install dir) on PATH. const AUGMENTED_PATH = `${path.join(os.homedir(), '.local', 'bin')}:${process.env.PATH ?? ''}`; -const PROMPT = 'Reply with exactly the single word PONG. Do not use any tools.'; +// A prompt that forces exactly one shell-tool call. Running a tool is what +// makes the *whole* hook lifecycle fire in a single session — SessionStart, +// UserPromptSubmit, PostToolUse and Stop — so one run exercises every event +// teamai injects. +const PROMPT = + 'Run this shell command using your command-execution tool and show its output: ' + + 'echo PONG. After the tool result comes back, reply with the single word DONE.'; + +// Extra argv for the Anthropic CLIs so the forced tool call runs without an +// interactive prompt. We narrowly pre-approve only `echo` (NOT all of Bash, +// and NOT --dangerously-skip-permissions) so the spawned agent can run the one +// benign command the prompt asks for and nothing else. Both claude and +// codebuddy accept command-scoped allowlist patterns. +const TOOL_FORCING_ARGS = ['--allowedTools', 'Bash(echo:*)']; + +// Every dashboard event teamai's built-in hooks should produce over one +// tool-using session (see builtin-hooks.ts + dashboard-collector mapEventType). +const EXPECTED_EVENTS = ['session_start', 'prompt_submit', 'tool_use', 'stop'] as const; + +// Real API credentials for the Anthropic-compatible CLIs (claude, codebuddy). +// Both are configurable so the suite can run against a real key + endpoint +// instead of the developer's interactive login. +const API_KEY = process.env.TEAMAI_E2E_ANTHROPIC_API_KEY ?? ''; +const API_BASE_URL = process.env.TEAMAI_E2E_ANTHROPIC_BASE_URL ?? ''; + +/** Env vars that point an Anthropic-compatible CLI at the configured API. */ +function anthropicApiEnv(): Record { + const env: Record = { ANTHROPIC_API_KEY: API_KEY }; + if (API_BASE_URL) env.ANTHROPIC_BASE_URL = API_BASE_URL; + return env; +} interface RunPlan { /** argv passed to the CLI. */ @@ -68,6 +111,8 @@ interface ToolSpec { bin: string; /** Cheap model id for this CLI. */ model: string; + /** Whether this CLI speaks the Anthropic API (driven by ANTHROPIC_* env). */ + anthropicApi: boolean; /** * Build the settings/hooks file path + return the argv to run the CLI. * `sandbox` is the isolated teamai HOME; `wsDir` is a scratch cwd. @@ -96,10 +141,11 @@ const TOOLS: ToolSpec[] = [ id: 'claude', bin: 'claude', model: CLAUDE_MODEL, + anthropicApi: true, prepare(sandbox, wsDir) { const settings = path.join(sandbox, 'claude-settings.json'); return { - args: ['-p', PROMPT, '--model', this.model, '--settings', settings], + args: ['-p', PROMPT, '--model', this.model, '--settings', settings, ...TOOL_FORCING_ARGS], cwd: wsDir, settingsFile: settings, }; @@ -109,10 +155,11 @@ const TOOLS: ToolSpec[] = [ id: 'codebuddy', bin: 'codebuddy', model: CODEBUDDY_MODEL, + anthropicApi: true, prepare(sandbox, wsDir) { const settings = path.join(sandbox, 'codebuddy-settings.json'); return { - args: ['-p', PROMPT, '--model', this.model, '--settings', settings], + args: ['-p', PROMPT, '--model', this.model, '--settings', settings, ...TOOL_FORCING_ARGS], cwd: wsDir, settingsFile: settings, }; @@ -122,6 +169,7 @@ const TOOLS: ToolSpec[] = [ id: 'cursor', bin: 'cursor-agent', model: CURSOR_MODEL, + anthropicApi: false, prepare(_sandbox, wsDir) { // cursor-agent has no --settings flag; it reads hooks from the // workspace-level .cursor/hooks.json. Run inside an isolated workspace. @@ -133,7 +181,7 @@ const TOOLS: ToolSpec[] = [ }, ]; -/** Detect whether a CLI is installed (and, for cursor, authenticated). */ +/** Detect whether a CLI is installed + has usable credentials. */ function toolStatus(spec: ToolSpec): { ok: boolean; reason: string } { const found = spawnSync('bash', ['-lc', `command -v ${spec.bin}`], { env: { ...process.env, PATH: AUGMENTED_PATH }, @@ -142,6 +190,12 @@ function toolStatus(spec: ToolSpec): { ok: boolean; reason: string } { if (found.status !== 0 || !found.stdout.trim()) { return { ok: false, reason: `${spec.bin} not found on PATH` }; } + if (spec.anthropicApi && !API_KEY) { + return { + ok: false, + reason: `no API key — set TEAMAI_E2E_ANTHROPIC_API_KEY to run ${spec.bin}`, + }; + } if (spec.id === 'cursor') { const st = spawnSync(spec.bin, ['status'], { env: { ...process.env, PATH: AUGMENTED_PATH }, @@ -181,7 +235,7 @@ describe('live AI-tool hook integration', () => { const status = toolStatus(spec); it.skipIf(!status.ok)( - `${spec.bin}: teamai SessionStart hook fires end-to-end (cheap model)`, + `${spec.bin}: teamai hooks fire end-to-end across the session lifecycle (cheap model)`, async () => { const sandbox = fs.mkdtempSync(path.join(os.tmpdir(), `teamai-tool-${spec.id}-`)); const wsDir = path.join(sandbox, 'ws'); @@ -195,27 +249,42 @@ describe('live AI-tool hook integration', () => { // 2. …then redirect its dispatch commands to the sandbox + built dist. redirectHooksFile(plan.settingsFile, sandbox); - // 3. Run the CLI non-interactively with a cheap model. + // 3. Run the CLI non-interactively with a cheap model. Anthropic + // CLIs get the configured real API key + base URL. const res = spawnSync(spec.bin, plan.args, { - env: { ...process.env, PATH: AUGMENTED_PATH, FORCE_COLOR: '0' }, + env: { + ...process.env, + PATH: AUGMENTED_PATH, + FORCE_COLOR: '0', + ...(spec.anthropicApi ? anthropicApiEnv() : {}), + }, cwd: plan.cwd, encoding: 'utf-8', input: '', timeout: 90_000, }); - // 4. Assert teamai's SessionStart hook fired (dashboard event). + // 4. Assert teamai's hooks fired across the whole lifecycle: the + // tool-forcing prompt makes one run emit every expected event. + // The sandbox HOME is fresh per test, so every event in the log + // belongs to this tool's run. const events = readEvents(sandbox); - const sessionStart = events.find( - (e) => e.type === 'session_start' && e.tool === spec.id, - ); + const seen = new Set(events.map((e) => e.type)); + const diag = + `stdout:\n${res.stdout}\nstderr:\n${res.stderr}\n` + + `events:\n${JSON.stringify(events, null, 2)}`; + for (const ev of EXPECTED_EVENTS) { + expect( + seen.has(ev), + `expected a '${ev}' event from ${spec.bin}, got [${[...seen].join(', ')}].\n${diag}`, + ).toBe(true); + } + // session_start must be attributed to the right tool. expect( - sessionStart, - `expected a session_start event from ${spec.bin}.\n` + - `stdout:\n${res.stdout}\nstderr:\n${res.stderr}\n` + - `events:\n${JSON.stringify(events, null, 2)}`, - ).toBeDefined(); + events.find((e) => e.type === 'session_start')?.tool, + `session_start should be attributed to ${spec.id}.\n${diag}`, + ).toBe(spec.id); } finally { fs.rmSync(sandbox, { recursive: true, force: true }); }