Tencent · jeff-r2026 · Jul 1, 2026 · Jul 1, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -146,3 +146,39 @@ jobs:
             git reset --hard HEAD || true
             git clean -fdx || true
           fi
+
+  e2e-live-tools:
+    name: E2E (live AI-tool hook integration)
+    needs: build
+    runs-on: ubuntu-latest
+    # Real CLIs + real (cheap) model calls against a configurable Anthropic-
+    # compatible endpoint. Skip when the API key secret isn't configured
+    # (e.g. PRs from forks).
+    if: ${{ secrets.TEAMAI_E2E_ANTHROPIC_API_KEY != '' }}
+    env:
+      TEAMAI_E2E_LIVE_TOOLS: '1'
+      TEAMAI_E2E_ANTHROPIC_API_KEY: ${{ secrets.TEAMAI_E2E_ANTHROPIC_API_KEY }}
+      TEAMAI_E2E_ANTHROPIC_BASE_URL: ${{ vars.TEAMAI_E2E_ANTHROPIC_BASE_URL || 'https://api.model.haihub.cn' }}
+      TEAMAI_E2E_CLAUDE_MODEL: ${{ vars.TEAMAI_E2E_CLAUDE_MODEL || 'MiniMax-M2.7' }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: 20
+          cache: npm
+
+      - name: Install dependencies
+        run: npm ci --ignore-scripts
+
+      - name: Build
+        run: npm run build
+
+      - name: Install claude + codebuddy CLIs
+        run: |
+          npm install -g @anthropic-ai/claude-code @tencent-ai/codebuddy-code
+
+      - name: Run live AI-tool hook integration E2E
+        run: npx vitest run --config vitest.e2e.config.ts src/__tests__/e2e/tool-integration-e2e.test.ts --reporter=verbose
diff --git a/src/__tests__/e2e/tool-integration-e2e.test.ts b/src/__tests__/e2e/tool-integration-e2e.test.ts
@@ -24,15 +24,28 @@ import { injectHooks } from '../../hooks.js';
 //          requiring a global install.
 //      Everything else (event names, matchers, structure) stays byte-real.
 //   3. Run the CLI non-interactively (`-p`/`--print`) with a cheap model
-//      on a trivial prompt. The CLI keeps its *real* HOME so its auth
-//      keeps working — only the spawned hook subprocess sees the sandbox
-//      HOME.
-//   4. Assert that teamai's SessionStart hook fired by checking that a
-//      `session_start` event was appended to the sandbox dashboard log.
+//      on a prompt that forces a single shell-tool call (so the whole hook
+//      lifecycle fires in one run). The Anthropic-compatible CLIs (claude,
+//      codebuddy) are pointed at a *real, configurable* API via
+//      ANTHROPIC_API_KEY / ANTHROPIC_BASE_URL, so the session starts with
+//      an explicit key instead of relying on the developer's interactive
+//      login, and narrowly pre-approve only the `echo` command so the tool
+//      call is not blocked (no blanket permission bypass). The CLI keeps its
+//      real HOME; only the spawned hook subprocess sees the sandbox HOME.
+//   4. Assert that teamai's hooks fired end-to-end across the full session
+//      lifecycle by checking the sandbox dashboard log contains every
+//      expected event: session_start, prompt_submit, tool_use (PostToolUse),
+//      and stop.
 //
 // Gated behind TEAMAI_E2E_LIVE_TOOLS=1 because it needs the real CLIs
-// installed + authenticated and makes (cheap) real model calls — neither
-// of which is available on shared CI runners.
+// installed and makes (cheap) real model calls — neither of which is
+// available on shared CI runners.
+//
+// Real API credentials (configurable) — used by claude + codebuddy:
+//   TEAMAI_E2E_ANTHROPIC_API_KEY   (required; the tool is skipped if unset)
+//   TEAMAI_E2E_ANTHROPIC_BASE_URL  (optional; defaults to the CLI's own)
+// cursor-agent is not Anthropic-compatible; it stays on its own
+// `cursor-agent login` auth and auto-skips until logged in.
 //
 // Cheap models can be overridden via env:
 //   TEAMAI_E2E_CLAUDE_MODEL    (default: claude-haiku-4-5-20251001)
@@ -50,7 +63,37 @@ const LIVE = process.env.TEAMAI_E2E_LIVE_TOOLS === '1';
 // Include ~/.local/bin (cursor-agent's default install dir) on PATH.
 const AUGMENTED_PATH = `${path.join(os.homedir(), '.local', 'bin')}:${process.env.PATH ?? ''}`;
 
-const PROMPT = 'Reply with exactly the single word PONG. Do not use any tools.';
+// A prompt that forces exactly one shell-tool call. Running a tool is what
+// makes the *whole* hook lifecycle fire in a single session — SessionStart,
+// UserPromptSubmit, PostToolUse and Stop — so one run exercises every event
+// teamai injects.
+const PROMPT =
+  'Run this shell command using your command-execution tool and show its output: ' +
+  'echo PONG. After the tool result comes back, reply with the single word DONE.';
+
+// Extra argv for the Anthropic CLIs so the forced tool call runs without an
+// interactive prompt. We narrowly pre-approve only `echo` (NOT all of Bash,
+// and NOT --dangerously-skip-permissions) so the spawned agent can run the one
+// benign command the prompt asks for and nothing else. Both claude and
+// codebuddy accept command-scoped allowlist patterns.
+const TOOL_FORCING_ARGS = ['--allowedTools', 'Bash(echo:*)'];
+
+// Every dashboard event teamai's built-in hooks should produce over one
+// tool-using session (see builtin-hooks.ts + dashboard-collector mapEventType).
+const EXPECTED_EVENTS = ['session_start', 'prompt_submit', 'tool_use', 'stop'] as const;
+
+// Real API credentials for the Anthropic-compatible CLIs (claude, codebuddy).
+// Both are configurable so the suite can run against a real key + endpoint
+// instead of the developer's interactive login.
+const API_KEY = process.env.TEAMAI_E2E_ANTHROPIC_API_KEY ?? '';
+const API_BASE_URL = process.env.TEAMAI_E2E_ANTHROPIC_BASE_URL ?? '';
+
+/** Env vars that point an Anthropic-compatible CLI at the configured API. */
+function anthropicApiEnv(): Record<string, string> {
+  const env: Record<string, string> = { ANTHROPIC_API_KEY: API_KEY };
+  if (API_BASE_URL) env.ANTHROPIC_BASE_URL = API_BASE_URL;
+  return env;
+}
 
 interface RunPlan {
   /** argv passed to the CLI. */
@@ -68,6 +111,8 @@ interface ToolSpec {
   bin: string;
   /** Cheap model id for this CLI. */
   model: string;
+  /** Whether this CLI speaks the Anthropic API (driven by ANTHROPIC_* env). */
+  anthropicApi: boolean;
   /**
    * Build the settings/hooks file path + return the argv to run the CLI.
    * `sandbox` is the isolated teamai HOME; `wsDir` is a scratch cwd.
@@ -96,10 +141,11 @@ const TOOLS: ToolSpec[] = [
     id: 'claude',
     bin: 'claude',
     model: CLAUDE_MODEL,
+    anthropicApi: true,
     prepare(sandbox, wsDir) {
       const settings = path.join(sandbox, 'claude-settings.json');
       return {
-        args: ['-p', PROMPT, '--model', this.model, '--settings', settings],
+        args: ['-p', PROMPT, '--model', this.model, '--settings', settings, ...TOOL_FORCING_ARGS],
         cwd: wsDir,
         settingsFile: settings,
       };
@@ -109,10 +155,11 @@ const TOOLS: ToolSpec[] = [
     id: 'codebuddy',
     bin: 'codebuddy',
     model: CODEBUDDY_MODEL,
+    anthropicApi: true,
     prepare(sandbox, wsDir) {
       const settings = path.join(sandbox, 'codebuddy-settings.json');
       return {
-        args: ['-p', PROMPT, '--model', this.model, '--settings', settings],
+        args: ['-p', PROMPT, '--model', this.model, '--settings', settings, ...TOOL_FORCING_ARGS],
         cwd: wsDir,
         settingsFile: settings,
       };
@@ -122,6 +169,7 @@ const TOOLS: ToolSpec[] = [
     id: 'cursor',
     bin: 'cursor-agent',
     model: CURSOR_MODEL,
+    anthropicApi: false,
     prepare(_sandbox, wsDir) {
       // cursor-agent has no --settings flag; it reads hooks from the
       // workspace-level .cursor/hooks.json. Run inside an isolated workspace.
@@ -133,7 +181,7 @@ const TOOLS: ToolSpec[] = [
   },
 ];
 
-/** Detect whether a CLI is installed (and, for cursor, authenticated). */
+/** Detect whether a CLI is installed + has usable credentials. */
 function toolStatus(spec: ToolSpec): { ok: boolean; reason: string } {
   const found = spawnSync('bash', ['-lc', `command -v ${spec.bin}`], {
     env: { ...process.env, PATH: AUGMENTED_PATH },
@@ -142,6 +190,12 @@ function toolStatus(spec: ToolSpec): { ok: boolean; reason: string } {
   if (found.status !== 0 || !found.stdout.trim()) {
     return { ok: false, reason: `${spec.bin} not found on PATH` };
   }
+  if (spec.anthropicApi && !API_KEY) {
+    return {
+      ok: false,
+      reason: `no API key — set TEAMAI_E2E_ANTHROPIC_API_KEY to run ${spec.bin}`,
+    };
+  }
   if (spec.id === 'cursor') {
     const st = spawnSync(spec.bin, ['status'], {
       env: { ...process.env, PATH: AUGMENTED_PATH },
@@ -181,7 +235,7 @@ describe('live AI-tool hook integration', () => {
     const status = toolStatus(spec);
 
     it.skipIf(!status.ok)(
-      `${spec.bin}: teamai SessionStart hook fires end-to-end (cheap model)`,
+      `${spec.bin}: teamai hooks fire end-to-end across the session lifecycle (cheap model)`,
       async () => {
         const sandbox = fs.mkdtempSync(path.join(os.tmpdir(), `teamai-tool-${spec.id}-`));
         const wsDir = path.join(sandbox, 'ws');
@@ -195,27 +249,42 @@ describe('live AI-tool hook integration', () => {
           // 2. …then redirect its dispatch commands to the sandbox + built dist.
           redirectHooksFile(plan.settingsFile, sandbox);
 
-          // 3. Run the CLI non-interactively with a cheap model.
+          // 3. Run the CLI non-interactively with a cheap model. Anthropic
+          //    CLIs get the configured real API key + base URL.
           const res = spawnSync(spec.bin, plan.args, {
-            env: { ...process.env, PATH: AUGMENTED_PATH, FORCE_COLOR: '0' },
+            env: {
+              ...process.env,
+              PATH: AUGMENTED_PATH,
+              FORCE_COLOR: '0',
+              ...(spec.anthropicApi ? anthropicApiEnv() : {}),
+            },
             cwd: plan.cwd,
             encoding: 'utf-8',
             input: '',
             timeout: 90_000,
           });
 
-          // 4. Assert teamai's SessionStart hook fired (dashboard event).
+          // 4. Assert teamai's hooks fired across the whole lifecycle: the
+          //    tool-forcing prompt makes one run emit every expected event.
+          //    The sandbox HOME is fresh per test, so every event in the log
+          //    belongs to this tool's run.
           const events = readEvents(sandbox);
-          const sessionStart = events.find(
-            (e) => e.type === 'session_start' && e.tool === spec.id,
-          );
+          const seen = new Set(events.map((e) => e.type));
+          const diag =
+            `stdout:\n${res.stdout}\nstderr:\n${res.stderr}\n` +
+            `events:\n${JSON.stringify(events, null, 2)}`;
 
+          for (const ev of EXPECTED_EVENTS) {
+            expect(
+              seen.has(ev),
+              `expected a '${ev}' event from ${spec.bin}, got [${[...seen].join(', ')}].\n${diag}`,
+            ).toBe(true);
+          }
+          // session_start must be attributed to the right tool.
           expect(
-            sessionStart,
-            `expected a session_start event from ${spec.bin}.\n` +
-              `stdout:\n${res.stdout}\nstderr:\n${res.stderr}\n` +
-              `events:\n${JSON.stringify(events, null, 2)}`,
-          ).toBeDefined();
+            events.find((e) => e.type === 'session_start')?.tool,
+            `session_start should be attributed to ${spec.id}.\n${diag}`,
+          ).toBe(spec.id);
         } finally {
           fs.rmSync(sandbox, { recursive: true, force: true });
         }