Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,39 @@ jobs:
git reset --hard HEAD || true
git clean -fdx || true
fi

e2e-live-tools:
name: E2E (live AI-tool hook integration)
needs: build
runs-on: ubuntu-latest
# Real CLIs + real (cheap) model calls against a configurable Anthropic-
# compatible endpoint. Skip when the API key secret isn't configured
# (e.g. PRs from forks).
if: ${{ secrets.TEAMAI_E2E_ANTHROPIC_API_KEY != '' }}
env:
TEAMAI_E2E_LIVE_TOOLS: '1'
TEAMAI_E2E_ANTHROPIC_API_KEY: ${{ secrets.TEAMAI_E2E_ANTHROPIC_API_KEY }}
TEAMAI_E2E_ANTHROPIC_BASE_URL: ${{ vars.TEAMAI_E2E_ANTHROPIC_BASE_URL || 'https://api.model.haihub.cn' }}
TEAMAI_E2E_CLAUDE_MODEL: ${{ vars.TEAMAI_E2E_CLAUDE_MODEL || 'MiniMax-M2.7' }}
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: 20
cache: npm

- name: Install dependencies
run: npm ci --ignore-scripts

- name: Build
run: npm run build

- name: Install claude + codebuddy CLIs
run: |
npm install -g @anthropic-ai/claude-code @tencent-ai/codebuddy-code

- name: Run live AI-tool hook integration E2E
run: npx vitest run --config vitest.e2e.config.ts src/__tests__/e2e/tool-integration-e2e.test.ts --reporter=verbose
115 changes: 92 additions & 23 deletions src/__tests__/e2e/tool-integration-e2e.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,28 @@ import { injectHooks } from '../../hooks.js';
// requiring a global install.
// Everything else (event names, matchers, structure) stays byte-real.
// 3. Run the CLI non-interactively (`-p`/`--print`) with a cheap model
// on a trivial prompt. The CLI keeps its *real* HOME so its auth
// keeps working — only the spawned hook subprocess sees the sandbox
// HOME.
// 4. Assert that teamai's SessionStart hook fired by checking that a
// `session_start` event was appended to the sandbox dashboard log.
// on a prompt that forces a single shell-tool call (so the whole hook
// lifecycle fires in one run). The Anthropic-compatible CLIs (claude,
// codebuddy) are pointed at a *real, configurable* API via
// ANTHROPIC_API_KEY / ANTHROPIC_BASE_URL, so the session starts with
// an explicit key instead of relying on the developer's interactive
// login, and narrowly pre-approve only the `echo` command so the tool
// call is not blocked (no blanket permission bypass). The CLI keeps its
// real HOME; only the spawned hook subprocess sees the sandbox HOME.
// 4. Assert that teamai's hooks fired end-to-end across the full session
// lifecycle by checking the sandbox dashboard log contains every
// expected event: session_start, prompt_submit, tool_use (PostToolUse),
// and stop.
//
// Gated behind TEAMAI_E2E_LIVE_TOOLS=1 because it needs the real CLIs
// installed + authenticated and makes (cheap) real model calls — neither
// of which is available on shared CI runners.
// installed and makes (cheap) real model calls — neither of which is
// available on shared CI runners.
//
// Real API credentials (configurable) — used by claude + codebuddy:
// TEAMAI_E2E_ANTHROPIC_API_KEY (required; the tool is skipped if unset)
// TEAMAI_E2E_ANTHROPIC_BASE_URL (optional; defaults to the CLI's own)
// cursor-agent is not Anthropic-compatible; it stays on its own
// `cursor-agent login` auth and auto-skips until logged in.
//
// Cheap models can be overridden via env:
// TEAMAI_E2E_CLAUDE_MODEL (default: claude-haiku-4-5-20251001)
Expand All @@ -50,7 +63,37 @@ const LIVE = process.env.TEAMAI_E2E_LIVE_TOOLS === '1';
// Include ~/.local/bin (cursor-agent's default install dir) on PATH.
const AUGMENTED_PATH = `${path.join(os.homedir(), '.local', 'bin')}:${process.env.PATH ?? ''}`;

const PROMPT = 'Reply with exactly the single word PONG. Do not use any tools.';
// A prompt that forces exactly one shell-tool call. Running a tool is what
// makes the *whole* hook lifecycle fire in a single session — SessionStart,
// UserPromptSubmit, PostToolUse and Stop — so one run exercises every event
// teamai injects.
const PROMPT =
'Run this shell command using your command-execution tool and show its output: ' +
'echo PONG. After the tool result comes back, reply with the single word DONE.';

// Extra argv for the Anthropic CLIs so the forced tool call runs without an
// interactive prompt. We narrowly pre-approve only `echo` (NOT all of Bash,
// and NOT --dangerously-skip-permissions) so the spawned agent can run the one
// benign command the prompt asks for and nothing else. Both claude and
// codebuddy accept command-scoped allowlist patterns.
const TOOL_FORCING_ARGS = ['--allowedTools', 'Bash(echo:*)'];

// Every dashboard event teamai's built-in hooks should produce over one
// tool-using session (see builtin-hooks.ts + dashboard-collector mapEventType).
const EXPECTED_EVENTS = ['session_start', 'prompt_submit', 'tool_use', 'stop'] as const;

// Real API credentials for the Anthropic-compatible CLIs (claude, codebuddy).
// Both are configurable so the suite can run against a real key + endpoint
// instead of the developer's interactive login.
const API_KEY = process.env.TEAMAI_E2E_ANTHROPIC_API_KEY ?? '';
const API_BASE_URL = process.env.TEAMAI_E2E_ANTHROPIC_BASE_URL ?? '';

/** Env vars that point an Anthropic-compatible CLI at the configured API. */
function anthropicApiEnv(): Record<string, string> {
const env: Record<string, string> = { ANTHROPIC_API_KEY: API_KEY };
if (API_BASE_URL) env.ANTHROPIC_BASE_URL = API_BASE_URL;
return env;
}

interface RunPlan {
/** argv passed to the CLI. */
Expand All @@ -68,6 +111,8 @@ interface ToolSpec {
bin: string;
/** Cheap model id for this CLI. */
model: string;
/** Whether this CLI speaks the Anthropic API (driven by ANTHROPIC_* env). */
anthropicApi: boolean;
/**
* Build the settings/hooks file path + return the argv to run the CLI.
* `sandbox` is the isolated teamai HOME; `wsDir` is a scratch cwd.
Expand Down Expand Up @@ -96,10 +141,11 @@ const TOOLS: ToolSpec[] = [
id: 'claude',
bin: 'claude',
model: CLAUDE_MODEL,
anthropicApi: true,
prepare(sandbox, wsDir) {
const settings = path.join(sandbox, 'claude-settings.json');
return {
args: ['-p', PROMPT, '--model', this.model, '--settings', settings],
args: ['-p', PROMPT, '--model', this.model, '--settings', settings, ...TOOL_FORCING_ARGS],
cwd: wsDir,
settingsFile: settings,
};
Expand All @@ -109,10 +155,11 @@ const TOOLS: ToolSpec[] = [
id: 'codebuddy',
bin: 'codebuddy',
model: CODEBUDDY_MODEL,
anthropicApi: true,
prepare(sandbox, wsDir) {
const settings = path.join(sandbox, 'codebuddy-settings.json');
return {
args: ['-p', PROMPT, '--model', this.model, '--settings', settings],
args: ['-p', PROMPT, '--model', this.model, '--settings', settings, ...TOOL_FORCING_ARGS],
cwd: wsDir,
settingsFile: settings,
};
Expand All @@ -122,6 +169,7 @@ const TOOLS: ToolSpec[] = [
id: 'cursor',
bin: 'cursor-agent',
model: CURSOR_MODEL,
anthropicApi: false,
prepare(_sandbox, wsDir) {
// cursor-agent has no --settings flag; it reads hooks from the
// workspace-level .cursor/hooks.json. Run inside an isolated workspace.
Expand All @@ -133,7 +181,7 @@ const TOOLS: ToolSpec[] = [
},
];

/** Detect whether a CLI is installed (and, for cursor, authenticated). */
/** Detect whether a CLI is installed + has usable credentials. */
function toolStatus(spec: ToolSpec): { ok: boolean; reason: string } {
const found = spawnSync('bash', ['-lc', `command -v ${spec.bin}`], {
env: { ...process.env, PATH: AUGMENTED_PATH },
Expand All @@ -142,6 +190,12 @@ function toolStatus(spec: ToolSpec): { ok: boolean; reason: string } {
if (found.status !== 0 || !found.stdout.trim()) {
return { ok: false, reason: `${spec.bin} not found on PATH` };
}
if (spec.anthropicApi && !API_KEY) {
return {
ok: false,
reason: `no API key — set TEAMAI_E2E_ANTHROPIC_API_KEY to run ${spec.bin}`,
};
}
if (spec.id === 'cursor') {
const st = spawnSync(spec.bin, ['status'], {
env: { ...process.env, PATH: AUGMENTED_PATH },
Expand Down Expand Up @@ -181,7 +235,7 @@ describe('live AI-tool hook integration', () => {
const status = toolStatus(spec);

it.skipIf(!status.ok)(
`${spec.bin}: teamai SessionStart hook fires end-to-end (cheap model)`,
`${spec.bin}: teamai hooks fire end-to-end across the session lifecycle (cheap model)`,
async () => {
const sandbox = fs.mkdtempSync(path.join(os.tmpdir(), `teamai-tool-${spec.id}-`));
const wsDir = path.join(sandbox, 'ws');
Expand All @@ -195,27 +249,42 @@ describe('live AI-tool hook integration', () => {
// 2. …then redirect its dispatch commands to the sandbox + built dist.
redirectHooksFile(plan.settingsFile, sandbox);

// 3. Run the CLI non-interactively with a cheap model.
// 3. Run the CLI non-interactively with a cheap model. Anthropic
// CLIs get the configured real API key + base URL.
const res = spawnSync(spec.bin, plan.args, {
env: { ...process.env, PATH: AUGMENTED_PATH, FORCE_COLOR: '0' },
env: {
...process.env,
PATH: AUGMENTED_PATH,
FORCE_COLOR: '0',
...(spec.anthropicApi ? anthropicApiEnv() : {}),
},
cwd: plan.cwd,
encoding: 'utf-8',
input: '',
timeout: 90_000,
});

// 4. Assert teamai's SessionStart hook fired (dashboard event).
// 4. Assert teamai's hooks fired across the whole lifecycle: the
// tool-forcing prompt makes one run emit every expected event.
// The sandbox HOME is fresh per test, so every event in the log
// belongs to this tool's run.
const events = readEvents(sandbox);
const sessionStart = events.find(
(e) => e.type === 'session_start' && e.tool === spec.id,
);
const seen = new Set(events.map((e) => e.type));
const diag =
`stdout:\n${res.stdout}\nstderr:\n${res.stderr}\n` +
`events:\n${JSON.stringify(events, null, 2)}`;

for (const ev of EXPECTED_EVENTS) {
expect(
seen.has(ev),
`expected a '${ev}' event from ${spec.bin}, got [${[...seen].join(', ')}].\n${diag}`,
).toBe(true);
}
// session_start must be attributed to the right tool.
expect(
sessionStart,
`expected a session_start event from ${spec.bin}.\n` +
`stdout:\n${res.stdout}\nstderr:\n${res.stderr}\n` +
`events:\n${JSON.stringify(events, null, 2)}`,
).toBeDefined();
events.find((e) => e.type === 'session_start')?.tool,
`session_start should be attributed to ${spec.id}.\n${diag}`,
).toBe(spec.id);
} finally {
fs.rmSync(sandbox, { recursive: true, force: true });
}
Expand Down