From 893f43562531f70f56c589bd11c68319ef567076 Mon Sep 17 00:00:00 2001 From: jeffyxu Date: Tue, 30 Jun 2026 20:25:55 +0800 Subject: [PATCH 1/2] feat(dashboard): track conversation turns + token usage per session (#75) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add two session metrics alongside the existing Human Intervention metric: - conversation turns: number of UserPromptSubmit events per session - token usage: input/output/cache tokens summed from the Claude Code transcript, deduplicated by message.id (one turn repeats the same usage across content-block lines, so naive summing over-counts badly) Surfaced on dashboard cards (💬 / ⛁ badges with hover breakdown), aggregated idempotently into stats/.yaml (prompts/tokens) during `teamai pull` via a separate reported-prompt-tokens.json snapshot, and summarized in `teamai digest`. Tools without a transcript (e.g. Cursor) degrade gracefully to prompt-count only. Privacy: counts only, no prompt or transcript text is persisted. Tests: unit (conversation-token-metrics) + e2e (conversation-token-e2e, incl. Cursor degradation). Verified end-to-end against a real claude-haiku-4-5 session. Co-authored-by: Cursor --- docs/usage-guide.md | 13 + src/__tests__/conversation-token-e2e.test.ts | 143 +++++++++++ .../conversation-token-metrics.test.ts | 227 ++++++++++++++++++ src/dashboard-collector.ts | 147 +++++++++--- src/dashboard-html.ts | 46 ++++ src/digest.ts | 67 +++++- src/team-push.ts | 140 ++++++++++- src/types.ts | 72 ++++++ 8 files changed, 809 insertions(+), 46 deletions(-) create mode 100644 src/__tests__/conversation-token-e2e.test.ts create mode 100644 src/__tests__/conversation-token-metrics.test.ts diff --git a/docs/usage-guide.md b/docs/usage-guide.md index b9107a5..5aa4b98 100644 --- a/docs/usage-guide.md +++ b/docs/usage-guide.md @@ -528,6 +528,19 @@ teamai dashboard --port 8080 干预数据会随 `teamai pull` 自动聚合上报到团队 `stats/.yaml`,并在 `teamai digest` 的「会话自主性」榜单中给出团队均值与人均干预率排行,可用于验证某个 skill / rule 上线后干预率是否下降。无 transcript 的工具(如 Cursor)会优雅降级,只统计 `correction`。 +#### 对话量与 Token 用量 + +每个会话卡片还会显示两个徽标: + +| 徽标 | 含义 | 数据来源 | +|------|------|----------| +| `💬 N` | 该会话里**人类对话的轮数**(发了几次 prompt) | `UserPromptSubmit` 事件数 | +| `⛁ X` | 该会话累计 **token 用量**(鼠标悬停看 输入 / 输出 / 缓存读 / 缓存写 明细) | Claude Code transcript 的 `message.usage`(按 `message.id` 去重,避免重复计数) | + +> 隐私:只统计**轮数与 token 数量**,不落地任何 prompt 或 transcript 原文。 + +这两项同样随 `teamai pull` 聚合到 `stats/.yaml`(`prompts` 与 `tokens` 字段),并在 `teamai digest` 的「对话量与 Token 用量」板块给出团队对话总轮数、token 总量(分桶)与人均 token 用量排行。拿不到 transcript 的工具(如 Cursor)会优雅降级:仍统计对话轮数,token 显示为 0 / N/A。 + ### Hooks `teamai init` 自动注入的 Hooks: diff --git a/src/__tests__/conversation-token-e2e.test.ts b/src/__tests__/conversation-token-e2e.test.ts new file mode 100644 index 0000000..b680391 --- /dev/null +++ b/src/__tests__/conversation-token-e2e.test.ts @@ -0,0 +1,143 @@ +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import fs from 'node:fs'; +import path from 'node:path'; +import os from 'node:os'; +import { Readable } from 'node:stream'; + +vi.mock('../utils/logger.js', () => ({ + log: { info: vi.fn(), success: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() }, +})); + +import { + dashboardReport, + readEvents, + rebuildSessions, + aggregateSessionMetrics, +} from '../dashboard-collector.js'; +import { computePromptTokenDelta, mergePromptTokenStats } from '../team-push.js'; +import { summarizeConversation } from '../digest.js'; +import type { UserStats } from '../types.js'; + +// ─── End-to-end: conversation-turn count + token usage ─── +// +// hook STDIN ──dashboardReport()──▶ events.jsonl ──rebuildSessions──▶ session cards +// │ +// ▼ +// aggregate → delta → stats/.yaml → digest +// + +let tmpDir: string; +let originalHome: string; + +beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'teamai-ct-e2e-')); + originalHome = process.env.HOME ?? ''; + process.env.HOME = tmpDir; +}); + +afterEach(() => { + process.env.HOME = originalHome; + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +async function runHook(payload: Record, tool = 'claude'): Promise { + const raw = JSON.stringify(payload); + const fake = Readable.from([Buffer.from(raw, 'utf-8')]) as Readable & { isTTY?: boolean }; + fake.isTTY = false; + const orig = Object.getOwnPropertyDescriptor(process, 'stdin')!; + Object.defineProperty(process, 'stdin', { value: fake, configurable: true }); + try { + await dashboardReport(tool); + } finally { + Object.defineProperty(process, 'stdin', orig); + } +} + +/** + * Write a transcript that mirrors the real Claude Code schema, including the + * quirk that a single assistant turn (one message.id) is split across multiple + * content-block lines that each repeat the SAME usage object. + */ +function writeTranscript(): string { + const p = path.join(tmpDir, 'transcript.jsonl'); + const usage1 = { input_tokens: 100, output_tokens: 50, cache_read_input_tokens: 1000, cache_creation_input_tokens: 200 }; + const usage2 = { input_tokens: 20, output_tokens: 80, cache_read_input_tokens: 1500, cache_creation_input_tokens: 0 }; + const lines = [ + JSON.stringify({ type: 'user', message: { content: [{ type: 'text', text: 'create hello.txt' }] } }), + // Turn 1: one message id, two content-block lines (text + tool_use), same usage repeated. + JSON.stringify({ type: 'assistant', message: { id: 'msg_A', usage: usage1, content: [{ type: 'text', text: 'sure' }] } }), + JSON.stringify({ type: 'assistant', message: { id: 'msg_A', usage: usage1, content: [{ type: 'tool_use', id: 'toolu_1', name: 'Write' }] } }), + JSON.stringify({ type: 'user', message: { content: [{ type: 'tool_result', tool_use_id: 'toolu_1', content: 'ok' }] } }), + // Turn 2: different message id. + JSON.stringify({ type: 'assistant', message: { id: 'msg_B', usage: usage2, content: [{ type: 'text', text: 'done' }] } }), + ]; + fs.writeFileSync(p, lines.join('\n') + '\n'); + return p; +} + +describe('conversation + token metric — end to end', () => { + it('captures prompt count + deduped token usage through the full pipeline', async () => { + const sessionId = 'ct-e2e-1'; + const cwd = '/home/jeff/project'; + const transcriptPath = writeTranscript(); + + // A realistic hook sequence: two human turns, one tool use, then stop. + await runHook({ hook_event_name: 'SessionStart', session_id: sessionId, cwd }); + await runHook({ hook_event_name: 'UserPromptSubmit', session_id: sessionId, cwd, prompt: 'create hello.txt' }); + await runHook({ hook_event_name: 'PostToolUse', session_id: sessionId, cwd, tool_name: 'Write' }); + await runHook({ hook_event_name: 'UserPromptSubmit', session_id: sessionId, cwd, prompt: 'now add a comment' }); + await runHook({ hook_event_name: 'Stop', session_id: sessionId, cwd, transcript_path: transcriptPath }); + + // Events persisted; Stop carries the deduped token snapshot. + const events = await readEvents(); + const stopEvent = events.find((e) => e.type === 'stop')!; + // msg_A counted once (not twice) + msg_B: + expect(stopEvent.tokens).toEqual({ input: 120, output: 130, cacheRead: 2500, cacheCreation: 200 }); + + // Dashboard rebuild surfaces prompt count + tokens on the card. + const sessions = rebuildSessions(events); + expect(sessions).toHaveLength(1); + const s = sessions[0]; + expect(s.promptCount).toBe(2); + expect(s.tokens).toEqual({ input: 120, output: 130, cacheRead: 2500, cacheCreation: 200 }); + + // Team reporting: delta → stats → digest. + const metrics = aggregateSessionMetrics(events); + const { delta, nextReported } = computePromptTokenDelta(metrics, {}); + expect(delta.prompts).toBe(2); + expect(delta.tokens).toEqual({ input: 120, output: 130, cacheRead: 2500, cacheCreation: 200 }); + + const merged = mergePromptTokenStats(undefined, undefined, delta); + const userStats: UserStats = { + username: 'jeff', + updatedAt: new Date().toISOString(), + skills: {}, + prompts: merged.prompts, + tokens: merged.tokens, + }; + const summary = summarizeConversation([userStats])!; + expect(summary.totalPrompts).toBe(2); + expect(summary.totalTokens).toBe(120 + 130 + 2500 + 200); + expect(summary.ranked[0].username).toBe('jeff'); + + // Idempotent: re-reporting the same events yields no new delta. + const second = computePromptTokenDelta(metrics, nextReported); + expect(second.delta.prompts).toBe(0); + expect(second.delta.tokens).toEqual({ input: 0, output: 0, cacheRead: 0, cacheCreation: 0 }); + }); + + it('degrades gracefully for tools with no transcript (Cursor): prompts only, zero tokens', async () => { + const sessionId = 'ct-e2e-cursor'; + const cwd = '/home/jeff/project'; + + await runHook({ hook_event_name: 'sessionStart', session_id: sessionId, cwd }, 'cursor'); + await runHook({ hook_event_name: 'beforeSubmitPrompt', session_id: sessionId, cwd, prompt: 'hi' }, 'cursor'); + await runHook({ hook_event_name: 'stop', session_id: sessionId, cwd }, 'cursor'); + + const events = await readEvents(); + const sessions = rebuildSessions(events); + const s = sessions[0]; + expect(s.promptCount).toBe(1); + expect(s.tokens).toEqual({ input: 0, output: 0, cacheRead: 0, cacheCreation: 0 }); + }); +}); diff --git a/src/__tests__/conversation-token-metrics.test.ts b/src/__tests__/conversation-token-metrics.test.ts new file mode 100644 index 0000000..b3bdfbd --- /dev/null +++ b/src/__tests__/conversation-token-metrics.test.ts @@ -0,0 +1,227 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import fs from 'node:fs'; +import path from 'node:path'; +import os from 'node:os'; +import { + scanTranscriptStop, + aggregateSessionMetrics, + rebuildSessions, +} from '../dashboard-collector.js'; +import { computePromptTokenDelta, mergePromptTokenStats } from '../team-push.js'; +import { summarizeConversation, formatTokenCount } from '../digest.js'; +import type { DashboardEvent, SessionMetrics, TokenUsage, UserStats } from '../types.js'; + +let tmpDir: string; +let originalHome: string; + +beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'teamai-convtok-')); + originalHome = process.env.HOME ?? ''; + process.env.HOME = tmpDir; +}); + +afterEach(() => { + process.env.HOME = originalHome; + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +// An assistant transcript line carrying token usage for one message id. +function assistantLine(id: string, usage: Partial>, blockType = 'text'): string { + return JSON.stringify({ + type: 'assistant', + message: { + id, + usage: { + input_tokens: usage.input ?? 0, + output_tokens: usage.output ?? 0, + cache_read_input_tokens: usage.cacheRead ?? 0, + cache_creation_input_tokens: usage.cacheCreation ?? 0, + }, + content: [{ type: blockType, text: 'x' }], + }, + }); +} + +function writeTranscript(lines: string[]): string { + const p = path.join(tmpDir, 'transcript.jsonl'); + fs.writeFileSync(p, lines.join('\n') + '\n'); + return p; +} + +// ─── scanTranscriptStop: token summing + dedup ────────── + +describe('scanTranscriptStop — token usage', () => { + it('sums input/output/cache tokens across assistant messages', async () => { + const p = writeTranscript([ + assistantLine('msg_1', { input: 100, output: 20, cacheRead: 50, cacheCreation: 10 }), + assistantLine('msg_2', { input: 40, output: 5, cacheRead: 0, cacheCreation: 3 }), + ]); + const { tokens } = await scanTranscriptStop(p); + expect(tokens).toEqual({ input: 140, output: 25, cacheRead: 50, cacheCreation: 13 }); + }); + + it('deduplicates usage by message.id (multi-line turns count once)', async () => { + // Claude Code repeats the same usage on every content-block line of one turn. + const dup = assistantLine('msg_dup', { input: 1000, output: 200, cacheRead: 300, cacheCreation: 50 }); + const dupToolBlock = assistantLine('msg_dup', { input: 1000, output: 200, cacheRead: 300, cacheCreation: 50 }, 'tool_use'); + const p = writeTranscript([dup, dupToolBlock, dup]); + const { tokens } = await scanTranscriptStop(p); + expect(tokens).toEqual({ input: 1000, output: 200, cacheRead: 300, cacheCreation: 50 }); + }); + + it('returns zero tokens for a transcript with no usage', async () => { + const p = writeTranscript([JSON.stringify({ type: 'user', message: { content: [{ type: 'text', text: 'hi' }] } })]); + const { tokens } = await scanTranscriptStop(p); + expect(tokens).toEqual({ input: 0, output: 0, cacheRead: 0, cacheCreation: 0 }); + }); + + it('ignores non-numeric / missing usage fields gracefully', async () => { + const p = writeTranscript([ + JSON.stringify({ type: 'assistant', message: { id: 'm', usage: { input_tokens: 'oops', output_tokens: 7 }, content: [] } }), + ]); + const { tokens } = await scanTranscriptStop(p); + expect(tokens).toEqual({ input: 0, output: 7, cacheRead: 0, cacheCreation: 0 }); + }); +}); + +// ─── aggregateSessionMetrics: prompts + tokens ────────── + +describe('aggregateSessionMetrics', () => { + it('counts prompt_submit events as conversation turns', () => { + const ts = new Date().toISOString(); + const events: DashboardEvent[] = [ + { type: 'session_start', timestamp: ts, sessionId: 's1', tool: 'claude' }, + { type: 'prompt_submit', timestamp: ts, sessionId: 's1', tool: 'claude', promptSummary: 'do a' }, + { type: 'prompt_submit', timestamp: ts, sessionId: 's1', tool: 'claude', promptSummary: 'do b' }, + { type: 'prompt_submit', timestamp: ts, sessionId: 's1', tool: 'claude', promptSummary: 'do c' }, + ]; + const m = aggregateSessionMetrics(events).get('s1')!; + expect(m.prompts).toBe(3); + }); + + it('takes the latest Stop token snapshot (idempotent)', () => { + const ts = new Date().toISOString(); + const events: DashboardEvent[] = [ + { type: 'stop', timestamp: ts, sessionId: 's1', tool: 'claude', tokens: { input: 10, output: 1, cacheRead: 0, cacheCreation: 0 } }, + { type: 'stop', timestamp: ts, sessionId: 's1', tool: 'claude', tokens: { input: 30, output: 4, cacheRead: 2, cacheCreation: 1 } }, + ]; + const m = aggregateSessionMetrics(events).get('s1')!; + expect(m.tokens).toEqual({ input: 30, output: 4, cacheRead: 2, cacheCreation: 1 }); + }); +}); + +// ─── rebuildSessions: promptCount + tokens on the card ── + +describe('rebuildSessions — conversation + token fields', () => { + it('exposes promptCount and tokens on the session', () => { + const ts = new Date().toISOString(); + const events: DashboardEvent[] = [ + { type: 'session_start', timestamp: ts, sessionId: 's1', tool: 'claude', cwd: '/p' }, + { type: 'prompt_submit', timestamp: ts, sessionId: 's1', tool: 'claude', promptSummary: 'a' }, + { type: 'prompt_submit', timestamp: ts, sessionId: 's1', tool: 'claude', promptSummary: 'b' }, + { type: 'stop', timestamp: ts, sessionId: 's1', tool: 'claude', tokens: { input: 200, output: 30, cacheRead: 10, cacheCreation: 5 } }, + ]; + const session = rebuildSessions(events).find((s) => s.sessionId === 's1')!; + expect(session.promptCount).toBe(2); + expect(session.tokens).toEqual({ input: 200, output: 30, cacheRead: 10, cacheCreation: 5 }); + }); + + it('defaults to zero tokens for tools without a transcript', () => { + const ts = new Date().toISOString(); + const events: DashboardEvent[] = [ + { type: 'session_start', timestamp: ts, sessionId: 's1', tool: 'cursor', cwd: '/p' }, + { type: 'prompt_submit', timestamp: ts, sessionId: 's1', tool: 'cursor', promptSummary: 'a' }, + ]; + const session = rebuildSessions(events).find((s) => s.sessionId === 's1')!; + expect(session.promptCount).toBe(1); + expect(session.tokens).toEqual({ input: 0, output: 0, cacheRead: 0, cacheCreation: 0 }); + }); +}); + +// ─── computePromptTokenDelta / mergePromptTokenStats ──── + +function metrics(prompts: number, tokens: TokenUsage): SessionMetrics { + return { interrupt: 0, toolReject: 0, correction: 0, prompts, tokens }; +} + +describe('computePromptTokenDelta', () => { + it('counts a brand-new session fully', () => { + const current = new Map([['s1', metrics(3, { input: 100, output: 20, cacheRead: 5, cacheCreation: 2 })]]); + const { delta, nextReported } = computePromptTokenDelta(current, {}); + expect(delta.prompts).toBe(3); + expect(delta.tokens).toEqual({ input: 100, output: 20, cacheRead: 5, cacheCreation: 2 }); + expect(nextReported.s1).toEqual({ prompts: 3, tokens: { input: 100, output: 20, cacheRead: 5, cacheCreation: 2 } }); + }); + + it('is idempotent — re-reporting the same snapshot yields zero delta', () => { + const cur = metrics(3, { input: 100, output: 20, cacheRead: 5, cacheCreation: 2 }); + const reported = { s1: { prompts: cur.prompts, tokens: cur.tokens } }; + const { delta } = computePromptTokenDelta(new Map([['s1', cur]]), reported); + expect(delta.prompts).toBe(0); + expect(delta.tokens).toEqual({ input: 0, output: 0, cacheRead: 0, cacheCreation: 0 }); + }); + + it('reports only the positive change since last report', () => { + const current = new Map([['s1', metrics(5, { input: 300, output: 40, cacheRead: 10, cacheCreation: 4 })]]); + const reported = { s1: { prompts: 2, tokens: { input: 100, output: 40, cacheRead: 10, cacheCreation: 1 } } }; + const { delta } = computePromptTokenDelta(current, reported); + expect(delta.prompts).toBe(3); + expect(delta.tokens).toEqual({ input: 200, output: 0, cacheRead: 0, cacheCreation: 3 }); + }); + + it('never produces negative deltas if a snapshot shrinks', () => { + const current = new Map([['s1', metrics(1, { input: 10, output: 0, cacheRead: 0, cacheCreation: 0 })]]); + const reported = { s1: { prompts: 9, tokens: { input: 999, output: 0, cacheRead: 0, cacheCreation: 0 } } }; + const { delta } = computePromptTokenDelta(current, reported); + expect(delta.prompts).toBe(0); + expect(delta.tokens.input).toBe(0); + }); +}); + +describe('mergePromptTokenStats', () => { + it('initializes from undefined existing', () => { + const out = mergePromptTokenStats(undefined, undefined, { prompts: 3, tokens: { input: 10, output: 2, cacheRead: 1, cacheCreation: 0 } }); + expect(out).toEqual({ prompts: 3, tokens: { input: 10, output: 2, cacheRead: 1, cacheCreation: 0 } }); + }); + + it('accumulates onto existing totals', () => { + const out = mergePromptTokenStats( + 5, + { input: 100, output: 10, cacheRead: 5, cacheCreation: 2 }, + { prompts: 2, tokens: { input: 50, output: 5, cacheRead: 0, cacheCreation: 1 } }, + ); + expect(out).toEqual({ prompts: 7, tokens: { input: 150, output: 15, cacheRead: 5, cacheCreation: 3 } }); + }); +}); + +// ─── digest: summarizeConversation + formatTokenCount ─── + +describe('formatTokenCount', () => { + it('formats small / K / M ranges', () => { + expect(formatTokenCount(999)).toBe('999'); + expect(formatTokenCount(1500)).toBe('1.5K'); + expect(formatTokenCount(2_000_000)).toBe('2.0M'); + }); +}); + +describe('summarizeConversation', () => { + function user(username: string, prompts: number, tokens: TokenUsage): UserStats { + return { username, updatedAt: '', skills: {}, prompts, tokens }; + } + + it('returns null when no user reported prompts or tokens', () => { + expect(summarizeConversation([{ username: 'a', updatedAt: '', skills: {} }])).toBeNull(); + }); + + it('totals prompts and tokens and ranks users by token usage', () => { + const stats = [ + user('alice', 10, { input: 1000, output: 100, cacheRead: 0, cacheCreation: 0 }), + user('bob', 4, { input: 5000, output: 500, cacheRead: 0, cacheCreation: 0 }), + ]; + const s = summarizeConversation(stats)!; + expect(s.totalPrompts).toBe(14); + expect(s.tokens).toEqual({ input: 6000, output: 600, cacheRead: 0, cacheCreation: 0 }); + expect(s.totalTokens).toBe(6600); + expect(s.ranked[0].username).toBe('bob'); // higher token usage first + }); +}); diff --git a/src/dashboard-collector.ts b/src/dashboard-collector.ts index 0b40e6f..c138343 100644 --- a/src/dashboard-collector.ts +++ b/src/dashboard-collector.ts @@ -18,10 +18,14 @@ import { INTERVENTION_SCAN_MAX_BYTES, TRANSCRIPT_INTERRUPT_PREFIX, TRANSCRIPT_REJECT_MARKERS, + emptyTokenUsage, + addTokenUsage, type DashboardEvent, type DashboardEventType, type DashboardSession, type DashboardSessionStatus, + type TokenUsage, + type SessionMetrics, } from './types.js'; // ─── Event collection data flow ───────────────────────── @@ -108,26 +112,41 @@ export async function readLastAssistantOutput(transcriptPath: string): Promise { +): Promise { let interrupt = 0; let toolReject = 0; + const tokens = emptyTokenUsage(); + // Dedup assistant usage by message id (one turn spans many JSONL lines). + const countedMessageIds = new Set(); try { const stat = await fs.promises.stat(transcriptPath); - if (stat.size === 0) return { interrupt, toolReject }; + if (stat.size === 0) return { interrupt, toolReject, tokens }; if (stat.size > INTERVENTION_SCAN_MAX_BYTES) { - log.warn(`dashboard: transcript too large to scan for interventions (${stat.size} bytes)`); - return { interrupt, toolReject }; + log.warn(`dashboard: transcript too large to scan (${stat.size} bytes)`); + return { interrupt, toolReject, tokens }; } const rl = readline.createInterface({ @@ -137,16 +156,33 @@ export async function countInterventions( for await (const line of rl) { const trimmed = line.trim(); - // Cheap pre-filter: intervention signals only ever live in `user` entries, - // so skip JSON.parse on the (vast majority of) other lines. - if (!trimmed || !trimmed.includes('"user"')) continue; + // Cheap pre-filter: we only care about `user` entries (interventions) and + // `assistant` entries (token usage); skip JSON.parse on anything else. + if (!trimmed || (!trimmed.includes('"user"') && !trimmed.includes('"assistant"'))) continue; - let entry: { type?: string; message?: { content?: unknown } }; + let entry: { + type?: string; + message?: { content?: unknown; id?: unknown; usage?: Record }; + }; try { entry = JSON.parse(trimmed); } catch { continue; } + + if (entry.type === 'assistant') { + const usage = entry.message?.usage; + const id = entry.message?.id; + if (usage && typeof id === 'string' && !countedMessageIds.has(id)) { + countedMessageIds.add(id); + tokens.input += toNum(usage.input_tokens); + tokens.output += toNum(usage.output_tokens); + tokens.cacheRead += toNum(usage.cache_read_input_tokens); + tokens.cacheCreation += toNum(usage.cache_creation_input_tokens); + } + continue; + } + if (entry.type !== 'user' || !Array.isArray(entry.message?.content)) continue; for (const item of entry.message.content as Array>) { @@ -167,9 +203,24 @@ export async function countInterventions( } } } catch (e) { - log.warn(`dashboard: failed to scan interventions: ${(e as Error).message}`); + log.warn(`dashboard: failed to scan transcript: ${(e as Error).message}`); } + return { interrupt, toolReject, tokens }; +} + +/** Coerce an unknown usage field to a non-negative finite number (0 otherwise). */ +function toNum(v: unknown): number { + return typeof v === 'number' && Number.isFinite(v) && v > 0 ? v : 0; +} + +/** + * Backward-compatible intervention-only scan. Delegates to {@link scanTranscriptStop}. + */ +export async function countInterventions( + transcriptPath: string, +): Promise<{ interrupt: number; toolReject: number }> { + const { interrupt, toolReject } = await scanTranscriptStop(transcriptPath); return { interrupt, toolReject }; } @@ -275,10 +326,14 @@ export async function parseHookEvent( if (output) { event.stoppedOutput = output; } - // Full-transcript snapshot of interrupt/tool_reject counts (idempotent). - const iv = await countInterventions(hookData.transcript_path); - if (iv.interrupt > 0 || iv.toolReject > 0) { - event.interventions = iv; + // Full-transcript snapshot of interrupt/tool_reject counts + token usage (idempotent). + const scan = await scanTranscriptStop(hookData.transcript_path); + if (scan.interrupt > 0 || scan.toolReject > 0) { + event.interventions = { interrupt: scan.interrupt, toolReject: scan.toolReject }; + } + if (scan.tokens.input > 0 || scan.tokens.output > 0 + || scan.tokens.cacheRead > 0 || scan.tokens.cacheCreation > 0) { + event.tokens = scan.tokens; } } @@ -384,6 +439,8 @@ export function rebuildSessions(events: DashboardEvent[]): DashboardSession[] { stoppedAt: '', interventions: { interrupt: 0, toolReject: 0, correction: 0 }, interventionCount: 0, + promptCount: 0, + tokens: emptyTokenUsage(), }; sessions.set(event.sessionId, session); } @@ -431,13 +488,15 @@ export function rebuildSessions(events: DashboardEvent[]): DashboardSession[] { } } - // Fill per-session intervention counts (single source of truth: aggregate fold) - const interventionMap = aggregateSessionInterventions(events); + // Fill per-session metrics (single source of truth: aggregate fold) + const metricsMap = aggregateSessionMetrics(events); for (const session of sessions.values()) { - const iv = interventionMap.get(session.sessionId); - if (iv) { - session.interventions = iv; - session.interventionCount = iv.interrupt + iv.toolReject + iv.correction; + const m = metricsMap.get(session.sessionId); + if (m) { + session.interventions = { interrupt: m.interrupt, toolReject: m.toolReject, correction: m.correction }; + session.interventionCount = m.interrupt + m.toolReject + m.correction; + session.promptCount = m.prompts; + session.tokens = m.tokens; } } @@ -485,40 +544,47 @@ export function rebuildSessions(events: DashboardEvent[]): DashboardSession[] { } /** - * Aggregate per-session intervention counts from raw events (no timeout filtering). + * Aggregate per-session metrics from raw events (no timeout filtering). * - * - interrupt / toolReject: taken from the latest Stop event's snapshot (idempotent — - * a later Stop overrides an earlier one, so re-scanning never double-counts). + * - interrupt / toolReject / tokens: taken from the latest Stop event's snapshot + * (idempotent — a later Stop overrides an earlier one, so re-scanning never + * double-counts). * - correction: a prompt_submit arriving within CORRECTION_WINDOW_MS of a Stop AND * matching a correction keyword. Each Stop is consumed by the next prompt only once. + * - prompts: total number of prompt_submit events (human conversation turns). * * Used both by rebuildSessions (live dashboard) and by the team-stats reporter. */ -export function aggregateSessionInterventions( +export function aggregateSessionMetrics( events: DashboardEvent[], -): Map { - const map = new Map(); +): Map { + const map = new Map(); const lastStopAt = new Map(); for (const event of events) { - let iv = map.get(event.sessionId); - if (!iv) { - iv = { interrupt: 0, toolReject: 0, correction: 0 }; - map.set(event.sessionId, iv); + let m = map.get(event.sessionId); + if (!m) { + m = { interrupt: 0, toolReject: 0, correction: 0, prompts: 0, tokens: emptyTokenUsage() }; + map.set(event.sessionId, m); } if (event.type === 'stop') { if (event.interventions) { - iv.interrupt = event.interventions.interrupt; - iv.toolReject = event.interventions.toolReject; + m.interrupt = event.interventions.interrupt; + m.toolReject = event.interventions.toolReject; + } + // Token snapshot is a full cumulative total for the session — latest wins. + if (event.tokens) { + m.tokens = { ...event.tokens }; } lastStopAt.set(event.sessionId, new Date(event.timestamp).getTime()); } else if (event.type === 'prompt_submit') { + m.prompts++; const stopAt = lastStopAt.get(event.sessionId); if (stopAt !== undefined) { const gap = new Date(event.timestamp).getTime() - stopAt; if (gap >= 0 && gap <= CORRECTION_WINDOW_MS && isCorrectionPrompt(event.promptSummary)) { - iv.correction++; + m.correction++; } // Each stop is consumed once — a later prompt is a new task, not a correction. lastStopAt.delete(event.sessionId); @@ -529,6 +595,19 @@ export function aggregateSessionInterventions( return map; } +/** + * Backward-compatible intervention-only view. Delegates to {@link aggregateSessionMetrics}. + */ +export function aggregateSessionInterventions( + events: DashboardEvent[], +): Map { + const out = new Map(); + for (const [sid, m] of aggregateSessionMetrics(events)) { + out.set(sid, { interrupt: m.interrupt, toolReject: m.toolReject, correction: m.correction }); + } + return out; +} + // ─── JSONL compaction ─────────────────────────────────── /** diff --git a/src/dashboard-html.ts b/src/dashboard-html.ts index 0f9880a..94d7bcc 100644 --- a/src/dashboard-html.ts +++ b/src/dashboard-html.ts @@ -181,6 +181,24 @@ export function getDashboardHtml(port: number): string { font-weight: 600; cursor: help; } + .convo-badge { + font-size: 11px; + padding: 2px 8px; + border-radius: 12px; + background: rgba(59, 130, 246, 0.15); + color: #3b82f6; + font-weight: 600; + cursor: help; + } + .token-badge { + font-size: 11px; + padding: 2px 8px; + border-radius: 12px; + background: rgba(16, 185, 129, 0.15); + color: #10b981; + font-weight: 600; + cursor: help; + } .status-text { font-size: 12px; color: var(--text-muted); @@ -556,6 +574,25 @@ export function getDashboardHtml(port: number): string { '中断 ' + (iv.interrupt || 0) + ' · 拒绝 ' + (iv.toolReject || 0) + ' · 纠偏 ' + (iv.correction || 0); } + function fmtTokens(n) { + n = n || 0; + if (n >= 1000000) return (n / 1000000).toFixed(1) + 'M'; + if (n >= 1000) return (n / 1000).toFixed(1) + 'K'; + return String(n); + } + + function tokenTotal(t) { + t = t || {}; + return (t.input || 0) + (t.output || 0) + (t.cacheRead || 0) + (t.cacheCreation || 0); + } + + function tokenTitle(t) { + t = t || {}; + return 'Token 总量 ' + fmtTokens(tokenTotal(t)) + ' — ' + + '输入 ' + fmtTokens(t.input) + ' · 输出 ' + fmtTokens(t.output) + + ' · 缓存读 ' + fmtTokens(t.cacheRead) + ' · 缓存写 ' + fmtTokens(t.cacheCreation); + } + function renderCard(s) { const isExpanded = expandedCards.has(s.sessionId); const isStopped = s.status === 'stopped'; @@ -563,6 +600,13 @@ export function getDashboardHtml(port: number): string { const interventionBadge = (s.interventionCount > 0) ? '⚠ ' + s.interventionCount + '' : ''; + const convoBadge = (s.promptCount > 0) + ? '💬 ' + s.promptCount + '' + : ''; + const tokenSum = tokenTotal(s.tokens); + const tokenBadge = (tokenSum > 0) + ? '⛁ ' + fmtTokens(tokenSum) + '' + : ''; // ─── Expanded detail panel ─── let detail = ''; @@ -612,6 +656,8 @@ export function getDashboardHtml(port: number): string { '' + '' + escapeHtml(s.tool) + '' + '' + dur + '' + + convoBadge + + tokenBadge + interventionBadge + '' + statusLabel(s.status) + '' + '' + diff --git a/src/digest.ts b/src/digest.ts index b7cba9b..2dee95d 100644 --- a/src/digest.ts +++ b/src/digest.ts @@ -7,7 +7,8 @@ import { parseLearningDoc, titleFromFilename } from './utils/search-index.js'; import { requireInit, detectProjectConfig } from './config.js'; import { calculateTeamHealth } from './skill-health.js'; import { createGit } from './utils/git.js'; -import type { GlobalOptions, UserStats } from './types.js'; +import type { GlobalOptions, UserStats, TokenUsage } from './types.js'; +import { totalTokens } from './types.js'; interface SkillChange { name: string; @@ -292,6 +293,52 @@ export function summarizeInterventions(teamStats: UserStats[]): InterventionSumm }; } +/** Aggregated team-wide conversation-turn + token-usage summary (Issue #75). */ +export interface ConversationSummary { + /** Total human conversation turns (UserPromptSubmit) across the team. */ + totalPrompts: number; + /** Team-wide cumulative token usage. */ + tokens: TokenUsage; + /** Grand total of all token buckets. */ + totalTokens: number; + /** Per-user ranking by token usage (highest first). */ + ranked: Array<{ username: string; prompts: number; tokens: number }>; +} + +/** Compact a token count into a human-friendly string (e.g. 12.3M, 4.5K). */ +export function formatTokenCount(n: number): string { + if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`; + if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`; + return String(n); +} + +/** + * Summarize the conversation-turn count and token usage across all reported team + * stats. Returns null when no user has reported any prompts or tokens yet. + */ +export function summarizeConversation(teamStats: UserStats[]): ConversationSummary | null { + const users = teamStats.filter( + (u) => (u.prompts && u.prompts > 0) || (u.tokens && totalTokens(u.tokens) > 0), + ); + if (users.length === 0) return null; + + let totalPrompts = 0; + const tokens: TokenUsage = { input: 0, output: 0, cacheRead: 0, cacheCreation: 0 }; + + const ranked = users.map((u) => { + const p = u.prompts ?? 0; + const t = u.tokens ?? { input: 0, output: 0, cacheRead: 0, cacheCreation: 0 }; + totalPrompts += p; + tokens.input += t.input; + tokens.output += t.output; + tokens.cacheRead += t.cacheRead; + tokens.cacheCreation += t.cacheCreation; + return { username: u.username, prompts: p, tokens: totalTokens(t) }; + }).sort((a, b) => b.tokens - a.tokens); + + return { totalPrompts, tokens, totalTokens: totalTokens(tokens), ranked }; +} + /** * Generate and display weekly team digest. */ @@ -404,6 +451,24 @@ export async function generateDigest(options: GlobalOptions): Promise { console.log(''); } + // Conversation turns + token usage (Issue #75) + const conversation = summarizeConversation(teamStats); + if (conversation) { + const t = conversation.tokens; + console.log('💬 对话量与 Token 用量:'); + console.log(` 人工对话总轮数: ${conversation.totalPrompts} 次`); + console.log( + ` Token 总量: ${formatTokenCount(conversation.totalTokens)} ` + + `(输入 ${formatTokenCount(t.input)} · 输出 ${formatTokenCount(t.output)} · ` + + `缓存读 ${formatTokenCount(t.cacheRead)} · 缓存写 ${formatTokenCount(t.cacheCreation)})`, + ); + console.log(' Token 用量排行 (高 → 低):'); + for (const r of conversation.ranked.slice(0, 10)) { + console.log(` • ${r.username}: ${formatTokenCount(r.tokens)} tokens (${r.prompts} 轮对话)`); + } + console.log(''); + } + console.log('─'.repeat(52)); console.log('Generated by teamai digest'); } catch (e) { diff --git a/src/team-push.ts b/src/team-push.ts index e6a7880..e79733d 100644 --- a/src/team-push.ts +++ b/src/team-push.ts @@ -2,16 +2,25 @@ import YAML from 'yaml'; import path from 'node:path'; import { readUsageEvents, truncateUsageAfterReport } from './usage-tracker.js'; import { aggregateUsage } from './stats.js'; -import { readEvents, aggregateSessionInterventions } from './dashboard-collector.js'; +import { readEvents, aggregateSessionMetrics } from './dashboard-collector.js'; import { createGit, pushRepoDirectly, pullRepo, resetToCleanMaster } from './utils/git.js'; import { writeFile, readFileSafe, ensureDir, pathExists, listFiles } from './utils/fs.js'; import { log } from './utils/logger.js'; -import type { UserStats, UserInterventionStats } from './types.js'; -import { VOTES_LOCAL_DIR } from './types.js'; +import type { UserStats, UserInterventionStats, SessionMetrics, TokenUsage } from './types.js'; +import { VOTES_LOCAL_DIR, emptyTokenUsage, addTokenUsage } from './types.js'; /** Snapshot of already-reported per-session intervention counts (idempotency basis). */ type ReportedInterventions = Record; +/** Snapshot of already-reported per-session prompt counts + token usage (idempotency basis). */ +type ReportedPromptTokens = Record; + +/** Cumulative delta for conversation-turn count + token usage (Issue #75). */ +interface PromptTokenDelta { + prompts: number; + tokens: TokenUsage; +} + // ─── Auto-report flow (during teamai pull) ───────────── // // teamai pull @@ -174,6 +183,93 @@ function hasInterventionDelta(d: UserInterventionStats): boolean { return d.sessions > 0 || d.interrupt > 0 || d.toolReject > 0 || d.correction > 0; } +// ─── Conversation-turn + token reporting (Issue #75) ─── +// +// events.jsonl ──aggregateSessionMetrics──▶ current per-session {prompts, tokens} +// │ │ +// ▼ ▼ +// reported-prompt-tokens.json (last reported) ──delta──▶ merge into stats/.yaml +// +// Separate snapshot from interventions so each metric stays independently idempotent. +// + +/** Path to the local prompt/token reported snapshot (evaluated at call time for tests). */ +function getReportedPromptTokensPath(): string { + return path.join(process.env.HOME ?? '', '.teamai', 'dashboard', 'reported-prompt-tokens.json'); +} + +async function readReportedPromptTokens(): Promise { + try { + const content = await readFileSafe(getReportedPromptTokensPath()); + if (!content) return {}; + const parsed = JSON.parse(content); + return parsed && typeof parsed === 'object' ? parsed : {}; + } catch { + return {}; + } +} + +async function writeReportedPromptTokens(data: ReportedPromptTokens): Promise { + try { + const p = getReportedPromptTokensPath(); + await ensureDir(path.dirname(p)); + await writeFile(p, JSON.stringify(data)); + } catch (e) { + log.error(`Failed to persist reported prompt/token snapshot: ${(e as Error).message}`); + } +} + +/** Field-by-field positive token delta (never negative if a snapshot shrinks). */ +function tokenDelta(cur: TokenUsage, prev: TokenUsage | undefined): TokenUsage { + return { + input: Math.max(0, cur.input - (prev?.input ?? 0)), + output: Math.max(0, cur.output - (prev?.output ?? 0)), + cacheRead: Math.max(0, cur.cacheRead - (prev?.cacheRead ?? 0)), + cacheCreation: Math.max(0, cur.cacheCreation - (prev?.cacheCreation ?? 0)), + }; +} + +/** + * Compute the prompt-count + token delta to report: for each current session, the + * positive change since it was last reported. Idempotent (a re-run reports nothing + * new), and never negative if a snapshot shrinks. The next snapshot keeps only + * sessions still present in events.jsonl (compacted sessions stay folded into totals). + */ +export function computePromptTokenDelta( + current: Map, + reported: ReportedPromptTokens, +): { delta: PromptTokenDelta; nextReported: ReportedPromptTokens } { + const delta: PromptTokenDelta = { prompts: 0, tokens: emptyTokenUsage() }; + const nextReported: ReportedPromptTokens = {}; + + for (const [sid, cur] of current) { + const prev = reported[sid]; + delta.prompts += Math.max(0, cur.prompts - (prev?.prompts ?? 0)); + delta.tokens = addTokenUsage(delta.tokens, tokenDelta(cur.tokens, prev?.tokens)); + nextReported[sid] = { prompts: cur.prompts, tokens: cur.tokens }; + } + + return { delta, nextReported }; +} + +/** Accumulate a prompt/token delta onto the user's existing totals. */ +export function mergePromptTokenStats( + existingPrompts: number | undefined, + existingTokens: TokenUsage | undefined, + delta: PromptTokenDelta, +): { prompts: number; tokens: TokenUsage } { + return { + prompts: (existingPrompts ?? 0) + delta.prompts, + tokens: addTokenUsage(existingTokens, delta.tokens), + }; +} + +/** True when a prompt/token delta carries any new data worth pushing. */ +function hasPromptTokenDelta(d: PromptTokenDelta): boolean { + return d.prompts > 0 || d.tokens.input > 0 || d.tokens.output > 0 + || d.tokens.cacheRead > 0 || d.tokens.cacheCreation > 0; +} + /** * Auto-report usage data to team repo during pull. * Merges new events with existing stats to preserve historical data. @@ -188,16 +284,29 @@ export async function reportUsageToTeam( const events = await readUsageEvents(); const filesToPush: string[] = []; - // Compute the Human Intervention delta from the local dashboard event log. + // Fold the local dashboard event log into per-session metrics once, then derive + // both the intervention delta and the prompt-count/token delta from it. const dashboardEvents = await readEvents(); - const currentInterventions = aggregateSessionInterventions(dashboardEvents); + const metrics = aggregateSessionMetrics(dashboardEvents); + + const currentInterventions = new Map( + [...metrics].map(([sid, m]) => [sid, { interrupt: m.interrupt, toolReject: m.toolReject, correction: m.correction }]), + ); const reportedInterventions = await readReportedInterventions(); const { delta: interventionDelta, nextReported } = computeInterventionDelta( currentInterventions, reportedInterventions, ); + + const reportedPromptTokens = await readReportedPromptTokens(); + const { delta: promptTokenDelta, nextReported: nextReportedPromptTokens } = computePromptTokenDelta( + metrics, + reportedPromptTokens, + ); + const hasUsage = events.length > 0; const hasInterventions = hasInterventionDelta(interventionDelta); + const hasPromptTokens = hasPromptTokenDelta(promptTokenDelta); // Reset any dirty/conflicted state and ensure we're on the default branch before pulling. // Same pattern as push.ts — the team repo is a cache, safe to discard local state. @@ -205,8 +314,8 @@ export async function reportUsageToTeam( await resetToCleanMaster(git, repoPath); await pullRepo(repoPath); - // Process usage and/or intervention stats if there is anything new to report. - if (hasUsage || hasInterventions) { + // Process usage and/or intervention/prompt/token stats if anything is new to report. + if (hasUsage || hasInterventions || hasPromptTokens) { const statsDir = path.join(repoPath, 'stats'); await ensureDir(statsDir); const statsPath = path.join(statsDir, `${username}.yaml`); @@ -219,6 +328,11 @@ export async function reportUsageToTeam( if (hasInterventions) { merged.interventions = mergeInterventionStats(existing?.interventions, interventionDelta); } + if (hasPromptTokens) { + const pt = mergePromptTokenStats(existing?.prompts, existing?.tokens, promptTokenDelta); + merged.prompts = pt.prompts; + merged.tokens = pt.tokens; + } await writeFile(statsPath, YAML.stringify(merged)); filesToPush.push(`stats/${username}.yaml`); @@ -253,8 +367,8 @@ export async function reportUsageToTeam( // Commit and push with timeout const commitMsg = hasUsage ? `[teamai] Update usage stats for ${username}` - : hasInterventions - ? `[teamai] Update intervention stats for ${username}` + : (hasInterventions || hasPromptTokens) + ? `[teamai] Update session stats for ${username}` : `[teamai] Update votes for ${username}`; const pushPromise = pushRepoDirectly(repoPath, commitMsg, filesToPush); @@ -269,12 +383,16 @@ export async function reportUsageToTeam( await truncateUsageAfterReport(events.length); log.debug(`Reported ${events.length} usage events to team repo`); } - // Success — advance the reported-interventions snapshot so we don't re-count. + // Success — advance the reported snapshots so we don't re-count. if (hasInterventions) { await writeReportedInterventions(nextReported); log.debug(`Reported intervention delta (${interventionDelta.sessions} new sessions) to team repo`); } - if (!hasUsage && !hasInterventions) { + if (hasPromptTokens) { + await writeReportedPromptTokens(nextReportedPromptTokens); + log.debug(`Reported prompt/token delta (${promptTokenDelta.prompts} prompts) to team repo`); + } + if (!hasUsage && !hasInterventions && !hasPromptTokens) { log.debug('Pushed pending votes to team repo'); } } catch (e) { diff --git a/src/types.ts b/src/types.ts index de50639..bcedc87 100644 --- a/src/types.ts +++ b/src/types.ts @@ -343,6 +343,16 @@ export interface UserStats { * Cumulative across all reported sessions. Privacy: counts only, no prompt text. */ interventions?: UserInterventionStats; + /** + * Cumulative count of human conversation turns (UserPromptSubmit events) across + * all reported sessions. Privacy: count only, no prompt text. + */ + prompts?: number; + /** + * Cumulative token usage across all reported sessions (Claude Code transcripts + * only; tools without transcripts contribute nothing). Privacy: counts only. + */ + tokens?: TokenUsage; } /** Per-user cumulative intervention totals, persisted to stats/.yaml. */ @@ -386,6 +396,57 @@ export interface SessionRecord { // SSE → browser (session cards with status lights) // +/** + * Token usage breakdown for a session/user, summed from Claude Code transcript + * `message.usage` records (deduplicated by message id). All fields are cumulative + * token counts; tools without a transcript (e.g. Cursor) leave these at zero. + */ +export interface TokenUsage { + /** Sum of usage.input_tokens. */ + input: number; + /** Sum of usage.output_tokens. */ + output: number; + /** Sum of usage.cache_read_input_tokens. */ + cacheRead: number; + /** Sum of usage.cache_creation_input_tokens. */ + cacheCreation: number; +} + +/** A fresh zeroed TokenUsage. */ +export function emptyTokenUsage(): TokenUsage { + return { input: 0, output: 0, cacheRead: 0, cacheCreation: 0 }; +} + +/** Grand total of all token buckets (input + output + cache read + cache creation). */ +export function totalTokens(t: TokenUsage | undefined): number { + if (!t) return 0; + return t.input + t.output + t.cacheRead + t.cacheCreation; +} + +/** Add two TokenUsage values field-by-field (does not mutate inputs). */ +export function addTokenUsage(a: TokenUsage | undefined, b: TokenUsage | undefined): TokenUsage { + return { + input: (a?.input ?? 0) + (b?.input ?? 0), + output: (a?.output ?? 0) + (b?.output ?? 0), + cacheRead: (a?.cacheRead ?? 0) + (b?.cacheRead ?? 0), + cacheCreation: (a?.cacheCreation ?? 0) + (b?.cacheCreation ?? 0), + }; +} + +/** + * Per-session rolled-up metrics, derived from the dashboard event log. + * Used by both the live dashboard (rebuildSessions) and the team-stats reporter. + */ +export interface SessionMetrics { + interrupt: number; + toolReject: number; + correction: number; + /** Number of human conversation turns (UserPromptSubmit events). */ + prompts: number; + /** Cumulative token usage (latest Stop snapshot). */ + tokens: TokenUsage; +} + export type DashboardSessionStatus = 'running' | 'waiting_for_input' | 'error' | 'idle' | 'stopped'; export type DashboardEventType = 'session_start' | 'tool_use' | 'prompt_submit' | 'stop' | 'process_exit'; @@ -421,6 +482,13 @@ export interface DashboardEvent { * rebuildSessions from the stop→prompt_submit event pattern. */ interventions?: { interrupt: number; toolReject: number }; + /** + * Cumulative token usage scanned from the transcript at Stop time. Full snapshot + * (idempotent): each Stop carries the running total for the whole session, so a + * later Stop overrides an earlier one in rebuildSessions. Absent for tools with + * no transcript (e.g. Cursor) and for sessions with no recorded usage. + */ + tokens?: TokenUsage; } export interface DashboardSession { @@ -457,6 +525,10 @@ export interface DashboardSession { interventions: { interrupt: number; toolReject: number; correction: number }; /** Total intervention count (interrupt + toolReject + correction), for sorting/badges */ interventionCount: number; + /** Number of human conversation turns (UserPromptSubmit events) in this session. */ + promptCount: number; + /** Cumulative token usage for this session (zero when no transcript usage). */ + tokens: TokenUsage; } export const DASHBOARD_EVENTS_DIR = `${TEAMAI_HOME}/dashboard`; From 3b3caf5a8fc6e2319c32c38e0eef98720a596c45 Mon Sep 17 00:00:00 2001 From: jeffyxu Date: Tue, 30 Jun 2026 20:53:50 +0800 Subject: [PATCH 2/2] fix(dashboard): source prompt count from the transcript snapshot (#75) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address PR review: `prompts` was accumulated from prompt_submit events, which compactEvents removes once a session goes stale. After a same-session resume the live count drops below the reported baseline in reported-prompt-tokens.json, so the delta clamps to 0 and post-resume turns are never reported. Make prompts compaction/resume-proof the same way tokens already are: count genuine human turns from the full transcript at Stop time and carry that cumulative snapshot on the Stop event. aggregateSessionMetrics now takes max(live submit count, latest Stop snapshot) — live count covers the pre-Stop window, the snapshot is the durable baseline. Also: fall back to top-level requestId when message.id is missing for token dedup, so a turn without a message id is no longer dropped entirely. Tests: prompt counting (excludes tool_results/interrupts/meta/sidechain), requestId dedup fallback, and a compaction+resume regression proving the delta still reports new turns. Co-authored-by: Cursor --- .../conversation-token-metrics.test.ts | 81 ++++++++++++++++ src/dashboard-collector.ts | 96 +++++++++++++++---- src/types.ts | 7 ++ 3 files changed, 163 insertions(+), 21 deletions(-) diff --git a/src/__tests__/conversation-token-metrics.test.ts b/src/__tests__/conversation-token-metrics.test.ts index b3bdfbd..f1578c4 100644 --- a/src/__tests__/conversation-token-metrics.test.ts +++ b/src/__tests__/conversation-token-metrics.test.ts @@ -82,6 +82,44 @@ describe('scanTranscriptStop — token usage', () => { const { tokens } = await scanTranscriptStop(p); expect(tokens).toEqual({ input: 0, output: 7, cacheRead: 0, cacheCreation: 0 }); }); + + it('falls back to requestId for dedup when message.id is missing', async () => { + const line = (extra: object) => JSON.stringify({ + type: 'assistant', + requestId: 'req_1', + message: { usage: { input_tokens: 100, output_tokens: 10 }, content: [extra] }, + }); + // Same requestId across two content-block lines → counted once. + const p = writeTranscript([line({ type: 'text', text: 'a' }), line({ type: 'tool_use', id: 't' })]); + const { tokens } = await scanTranscriptStop(p); + expect(tokens).toEqual({ input: 100, output: 10, cacheRead: 0, cacheCreation: 0 }); + }); +}); + +// ─── scanTranscriptStop: human prompt counting ────────── + +describe('scanTranscriptStop — prompt counting', () => { + const userText = (text: string) => JSON.stringify({ type: 'user', message: { content: [{ type: 'text', text }] } }); + const userString = (text: string) => JSON.stringify({ type: 'user', message: { content: text } }); + const toolResult = () => JSON.stringify({ type: 'user', message: { content: [{ type: 'tool_result', tool_use_id: 't', content: 'ok' }] } }); + const interruptLine = () => JSON.stringify({ type: 'user', message: { content: [{ type: 'text', text: '[Request interrupted by user]' }] } }); + const metaLine = (text: string) => JSON.stringify({ type: 'user', isMeta: true, message: { content: [{ type: 'text', text }] } }); + const sidechainLine = (text: string) => JSON.stringify({ type: 'user', isSidechain: true, message: { content: [{ type: 'text', text }] } }); + + it('counts genuine human turns and excludes tool_results, interrupts, meta/sidechain', async () => { + const p = writeTranscript([ + userText('first real prompt'), + toolResult(), // not a human turn + userString('second prompt'), // plain string content counts + interruptLine(), // interrupt, not a prompt + metaLine('injected reminder'), // meta, excluded + sidechainLine('sub-agent'), // sidechain, excluded + userText('third prompt'), + ]); + const { prompts, interrupt } = await scanTranscriptStop(p); + expect(prompts).toBe(3); + expect(interrupt).toBe(1); + }); }); // ─── aggregateSessionMetrics: prompts + tokens ────────── @@ -108,6 +146,49 @@ describe('aggregateSessionMetrics', () => { const m = aggregateSessionMetrics(events).get('s1')!; expect(m.tokens).toEqual({ input: 30, output: 4, cacheRead: 2, cacheCreation: 1 }); }); + + it('prefers the Stop prompt snapshot over the live submit count (max)', () => { + const ts = new Date().toISOString(); + // Only 2 prompt_submit events survive in the log, but the transcript snapshot + // says 12 — the durable snapshot wins. + const events: DashboardEvent[] = [ + { type: 'prompt_submit', timestamp: ts, sessionId: 's1', tool: 'claude', promptSummary: 'a' }, + { type: 'prompt_submit', timestamp: ts, sessionId: 's1', tool: 'claude', promptSummary: 'b' }, + { type: 'stop', timestamp: ts, sessionId: 's1', tool: 'claude', prompts: 12 }, + ]; + expect(aggregateSessionMetrics(events).get('s1')!.prompts).toBe(12); + }); + + it('uses live submit count before any Stop snapshot exists', () => { + const ts = new Date().toISOString(); + const events: DashboardEvent[] = [ + { type: 'prompt_submit', timestamp: ts, sessionId: 's1', tool: 'claude', promptSummary: 'a' }, + { type: 'prompt_submit', timestamp: ts, sessionId: 's1', tool: 'claude', promptSummary: 'b' }, + ]; + expect(aggregateSessionMetrics(events).get('s1')!.prompts).toBe(2); + }); + + // Regression for the PR #78 review: a session compacted out of events.jsonl and + // resumed under the same id must keep reporting new prompts. The Stop transcript + // snapshot (compaction-proof) keeps cur.prompts above the reported baseline so the + // delta stays positive — unlike the old compactable prompt_submit count. + it('keeps prompts reportable after compaction + same-session resume', () => { + const ts = new Date().toISOString(); + // Post-compaction + resume: only the resumed events remain in the log, but the + // Stop snapshot reflects the full transcript (10 old + 2 new = 12 prompts). + const resumedEvents: DashboardEvent[] = [ + { type: 'prompt_submit', timestamp: ts, sessionId: 's1', tool: 'claude', promptSummary: 'new-1' }, + { type: 'prompt_submit', timestamp: ts, sessionId: 's1', tool: 'claude', promptSummary: 'new-2' }, + { type: 'stop', timestamp: ts, sessionId: 's1', tool: 'claude', prompts: 12 }, + ]; + const metrics = aggregateSessionMetrics(resumedEvents); + expect(metrics.get('s1')!.prompts).toBe(12); + + // Baseline already reported 10 prompts before compaction. + const reported = { s1: { prompts: 10, tokens: { input: 0, output: 0, cacheRead: 0, cacheCreation: 0 } } }; + const { delta } = computePromptTokenDelta(metrics, reported); + expect(delta.prompts).toBe(2); // the 2 post-resume turns are still reported + }); }); // ─── rebuildSessions: promptCount + tokens on the card ── diff --git a/src/dashboard-collector.ts b/src/dashboard-collector.ts index c138343..c5d6c33 100644 --- a/src/dashboard-collector.ts +++ b/src/dashboard-collector.ts @@ -117,6 +117,12 @@ export interface TranscriptScanResult { interrupt: number; toolReject: number; tokens: TokenUsage; + /** + * Cumulative count of genuine human prompt turns in the transcript. Sourced here + * (not from compactable prompt_submit events) so the reported baseline stays + * monotonic across compaction + same-session resume — same guarantee as `tokens`. + */ + prompts: number; } /** @@ -125,9 +131,11 @@ export interface TranscriptScanResult { * - interrupt: user message whose text starts with "[Request interrupted by user" * - toolReject: tool_result with is_error=true marked as a user rejection * - tokens: usage.{input,output,cache_*}_tokens summed across assistant messages, - * deduplicated by `message.id` (Claude Code repeats the same usage on - * every content-block line of a single turn, so naive summing would - * massively over-count). + * deduplicated by `message.id` (falling back to top-level `requestId`) + * because Claude Code repeats the same usage on every content-block + * line of a single turn, so naive summing would massively over-count. + * - prompts: genuine human prompt turns (user entries with real text, excluding + * interrupts, tool_results, and meta/sidechain entries). * * Uses a streaming line reader so large transcripts don't load fully into memory. * Returns zero counts on any error (file missing, too large, permission denied). @@ -137,16 +145,18 @@ export async function scanTranscriptStop( ): Promise { let interrupt = 0; let toolReject = 0; + let prompts = 0; const tokens = emptyTokenUsage(); - // Dedup assistant usage by message id (one turn spans many JSONL lines). - const countedMessageIds = new Set(); + // Dedup assistant usage per message (one turn spans many JSONL lines that repeat + // the same usage). Prefer message.id; fall back to the top-level requestId. + const countedUsageKeys = new Set(); try { const stat = await fs.promises.stat(transcriptPath); - if (stat.size === 0) return { interrupt, toolReject, tokens }; + if (stat.size === 0) return { interrupt, toolReject, tokens, prompts }; if (stat.size > INTERVENTION_SCAN_MAX_BYTES) { log.warn(`dashboard: transcript too large to scan (${stat.size} bytes)`); - return { interrupt, toolReject, tokens }; + return { interrupt, toolReject, tokens, prompts }; } const rl = readline.createInterface({ @@ -156,12 +166,15 @@ export async function scanTranscriptStop( for await (const line of rl) { const trimmed = line.trim(); - // Cheap pre-filter: we only care about `user` entries (interventions) and - // `assistant` entries (token usage); skip JSON.parse on anything else. + // Cheap pre-filter: we only care about `user` entries (interventions/prompts) + // and `assistant` entries (token usage); skip JSON.parse on anything else. if (!trimmed || (!trimmed.includes('"user"') && !trimmed.includes('"assistant"'))) continue; let entry: { type?: string; + isMeta?: unknown; + isSidechain?: unknown; + requestId?: unknown; message?: { content?: unknown; id?: unknown; usage?: Record }; }; try { @@ -172,9 +185,13 @@ export async function scanTranscriptStop( if (entry.type === 'assistant') { const usage = entry.message?.usage; - const id = entry.message?.id; - if (usage && typeof id === 'string' && !countedMessageIds.has(id)) { - countedMessageIds.add(id); + const dedupKey = typeof entry.message?.id === 'string' + ? entry.message.id + : typeof entry.requestId === 'string' + ? entry.requestId + : undefined; + if (usage && dedupKey && !countedUsageKeys.has(dedupKey)) { + countedUsageKeys.add(dedupKey); tokens.input += toNum(usage.input_tokens); tokens.output += toNum(usage.output_tokens); tokens.cacheRead += toNum(usage.cache_read_input_tokens); @@ -183,12 +200,28 @@ export async function scanTranscriptStop( continue; } - if (entry.type !== 'user' || !Array.isArray(entry.message?.content)) continue; + if (entry.type !== 'user') continue; - for (const item of entry.message.content as Array>) { - if (item?.type === 'text' && typeof item.text === 'string' - && item.text.startsWith(TRANSCRIPT_INTERRUPT_PREFIX)) { - interrupt++; + const isMeta = entry.isMeta === true || entry.isSidechain === true; + const content = entry.message?.content; + + // Plain-string user content = a genuine human prompt (older transcript shape). + if (typeof content === 'string') { + if (!isMeta && content.trim() && !content.startsWith(TRANSCRIPT_INTERRUPT_PREFIX)) { + prompts++; + } + continue; + } + if (!Array.isArray(content)) continue; + + let hasHumanText = false; + for (const item of content as Array>) { + if (item?.type === 'text' && typeof item.text === 'string') { + if (item.text.startsWith(TRANSCRIPT_INTERRUPT_PREFIX)) { + interrupt++; + } else if (item.text.trim()) { + hasHumanText = true; + } } else if (item?.type === 'tool_result' && item.is_error === true) { const text = typeof item.content === 'string' ? item.content @@ -201,12 +234,14 @@ export async function scanTranscriptStop( } } } + // One human turn per user entry (tool_result-only entries have no human text). + if (hasHumanText && !isMeta) prompts++; } } catch (e) { log.warn(`dashboard: failed to scan transcript: ${(e as Error).message}`); } - return { interrupt, toolReject, tokens }; + return { interrupt, toolReject, tokens, prompts }; } /** Coerce an unknown usage field to a non-negative finite number (0 otherwise). */ @@ -326,7 +361,8 @@ export async function parseHookEvent( if (output) { event.stoppedOutput = output; } - // Full-transcript snapshot of interrupt/tool_reject counts + token usage (idempotent). + // Full-transcript snapshot of interrupt/tool_reject counts + token usage + + // human prompt count (all idempotent, sourced from the non-compactable transcript). const scan = await scanTranscriptStop(hookData.transcript_path); if (scan.interrupt > 0 || scan.toolReject > 0) { event.interventions = { interrupt: scan.interrupt, toolReject: scan.toolReject }; @@ -335,6 +371,9 @@ export async function parseHookEvent( || scan.tokens.cacheRead > 0 || scan.tokens.cacheCreation > 0) { event.tokens = scan.tokens; } + if (scan.prompts > 0) { + event.prompts = scan.prompts; + } } return event; @@ -560,6 +599,11 @@ export function aggregateSessionMetrics( ): Map { const map = new Map(); const lastStopAt = new Map(); + // Two prompt-count sources, kept separate then reconciled with max(): + // - submitCount: live prompt_submit events (real-time, but compactable). + // - stopPrompts: latest Stop transcript snapshot (compaction/resume-proof). + const submitCount = new Map(); + const stopPrompts = new Map(); for (const event of events) { let m = map.get(event.sessionId); @@ -573,13 +617,16 @@ export function aggregateSessionMetrics( m.interrupt = event.interventions.interrupt; m.toolReject = event.interventions.toolReject; } - // Token snapshot is a full cumulative total for the session — latest wins. + // Token + prompt snapshots are full cumulative totals — latest wins. if (event.tokens) { m.tokens = { ...event.tokens }; } + if (typeof event.prompts === 'number') { + stopPrompts.set(event.sessionId, event.prompts); + } lastStopAt.set(event.sessionId, new Date(event.timestamp).getTime()); } else if (event.type === 'prompt_submit') { - m.prompts++; + submitCount.set(event.sessionId, (submitCount.get(event.sessionId) ?? 0) + 1); const stopAt = lastStopAt.get(event.sessionId); if (stopAt !== undefined) { const gap = new Date(event.timestamp).getTime() - stopAt; @@ -592,6 +639,13 @@ export function aggregateSessionMetrics( } } + // Reconcile prompt count: the Stop transcript snapshot is the durable baseline + // (survives compaction + resume); live submit events cover the period before the + // first Stop. max() keeps the count monotonic across both. + for (const [sid, m] of map) { + m.prompts = Math.max(submitCount.get(sid) ?? 0, stopPrompts.get(sid) ?? 0); + } + return map; } diff --git a/src/types.ts b/src/types.ts index bcedc87..1eac282 100644 --- a/src/types.ts +++ b/src/types.ts @@ -489,6 +489,13 @@ export interface DashboardEvent { * no transcript (e.g. Cursor) and for sessions with no recorded usage. */ tokens?: TokenUsage; + /** + * Cumulative count of human prompt turns scanned from the transcript at Stop time. + * Full snapshot (idempotent), sourced from the non-compactable transcript so the + * reported baseline survives compaction + same-session resume. Absent for tools + * with no transcript (e.g. Cursor); for those, prompt_submit events are counted. + */ + prompts?: number; } export interface DashboardSession {