diff --git a/.github/workflows/extension-ci.yml b/.github/workflows/extension-ci.yml new file mode 100644 index 0000000000..f19fd075b7 --- /dev/null +++ b/.github/workflows/extension-ci.yml @@ -0,0 +1,120 @@ +name: extension CI + +on: + push: + branches: [main] + paths: + - 'apps/extension/**' + - 'pnpm-lock.yaml' + - 'pnpm-workspace.yaml' + - '.nvmrc' + - '.github/workflows/extension-ci.yml' + pull_request: + branches: [main] + paths: + - 'apps/extension/**' + - 'pnpm-lock.yaml' + - 'pnpm-workspace.yaml' + - '.nvmrc' + - '.github/workflows/extension-ci.yml' + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +permissions: + contents: read + pull-requests: read + +jobs: + # Compiler, linter, formatting and unit tests, via the extension's own `verify` + # script so the extension's stricter (type-aware) oxlint config is what runs. + verify: + runs-on: ${{ vars.RUNNER_DEFAULT_LABEL || 'ubuntu-latest' }} + timeout-minutes: 15 + steps: + - uses: useblacksmith/checkout@41cdeedae8edb2e684ba22896a5fd2a3cb85db6b # v1 + with: + lfs: true + + - name: Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v4.4.0 + + - name: Setup Node + uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0 + with: + node-version-file: '.nvmrc' + cache: 'pnpm' + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Verify (typecheck, lint, format, unit tests) + run: pnpm --filter kilo-extension verify + + # Prove the Firefox MV3 target still builds (the Chrome build runs inside e2e). + build-firefox: + runs-on: ${{ vars.RUNNER_DEFAULT_LABEL || 'ubuntu-latest' }} + timeout-minutes: 15 + steps: + - uses: useblacksmith/checkout@41cdeedae8edb2e684ba22896a5fd2a3cb85db6b # v1 + with: + lfs: true + + - name: Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v4.4.0 + + - name: Setup Node + uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0 + with: + node-version-file: '.nvmrc' + cache: 'pnpm' + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Build (Firefox MV3) + run: pnpm --filter kilo-extension build:firefox + + # Chrome end-to-end tests. `e2e:chrome` builds the Chrome MV3 bundle first, so this + # also covers the Chrome build. Firefox Selenium e2e (`e2e:firefox`) is intentionally + # not run here: it relies on geckodriver tab detection that is unreliable headless. + e2e-chrome: + needs: verify + runs-on: ${{ vars.RUNNER_LARGE_LABEL || 'ubuntu-24.04-8core' }} + timeout-minutes: 20 + env: + CI: 'true' + steps: + - uses: useblacksmith/checkout@41cdeedae8edb2e684ba22896a5fd2a3cb85db6b # v1 + with: + lfs: true + + - name: Setup pnpm + uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v4.4.0 + + - name: Setup Node + uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0 + with: + node-version-file: '.nvmrc' + cache: 'pnpm' + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Install Playwright Chromium + run: pnpm --filter kilo-extension exec playwright install --with-deps chromium + + - name: Run Chrome e2e (builds the Chrome MV3 bundle first) + run: pnpm --filter kilo-extension e2e:chrome + + - name: Upload Playwright report on failure + if: failure() + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: extension-playwright-report + path: | + apps/extension/playwright-report + apps/extension/test-results + retention-days: 7 + if-no-files-found: ignore diff --git a/apps/extension/entrypoints/background.ts b/apps/extension/entrypoints/background.ts index ec405a4f97..551ec57281 100644 --- a/apps/extension/entrypoints/background.ts +++ b/apps/extension/entrypoints/background.ts @@ -36,16 +36,27 @@ interface ChromeRuntimeApi { /* * Trust boundary for the eval/debugger message path. Today only the extension's own pages (the * side panel) can reach this listener β€” there is no externally_connectable and no content script. - * Accept only same-extension, non-tab senders so adding either later can't silently widen access - * to the dangerous eval path. Content scripts carry a `tab`; external pages carry a different `id`. + * Accept only same-extension senders whose origin is this extension, so adding a content script + * later can't silently widen access to the dangerous eval path: a content script shares the + * extension `id` but reports the host page's origin, while an extension page reports + * `chrome-extension://`. */ const isTrustedExtensionSender = (sender: unknown, runtimeId: string | undefined): boolean => { if (runtimeId === undefined || typeof sender !== 'object' || sender === null) { return false; } - const { id, tab } = sender as { id?: unknown; tab?: unknown }; - return id === runtimeId && tab === undefined; + const { id, origin, url } = sender as { id?: unknown; origin?: unknown; url?: unknown }; + + if (id !== runtimeId) { + return false; + } + + const extensionOrigin = `chrome-extension://${runtimeId}`; + + return ( + origin === extensionOrigin || (typeof url === 'string' && url.startsWith(`${extensionOrigin}/`)) + ); }; const handleTabDebuggerRequest = async ({ diff --git a/apps/extension/entrypoints/sidepanel/agent-chat-panel.tsx b/apps/extension/entrypoints/sidepanel/agent-chat-panel.tsx index 2fefda5370..fdffc4660f 100644 --- a/apps/extension/entrypoints/sidepanel/agent-chat-panel.tsx +++ b/apps/extension/entrypoints/sidepanel/agent-chat-panel.tsx @@ -7,6 +7,12 @@ import { groupConversationEvents, } from '@/src/shared/agent-conversation'; import type { AgentConversationEvent } from '@/src/shared/agent-conversation'; +import { + KEEP_RECENT_EXCHANGES, + KEEP_RECENT_EXCHANGES_MANUAL, + compactConversationEvents, + hasCompactableHistory, +} from '@/src/shared/agent-context-compaction'; import { defaultMode } from '@/src/shared/agent-chat-placeholder'; import { getKiloApiBaseUrl } from '@/src/shared/auth'; import type { StoredAuth } from '@/src/shared/auth'; @@ -28,7 +34,10 @@ import { } from './agent-conversation-storage'; import type { StoredAgentConversation } from './agent-conversation-storage'; import { AgentFooterControls } from './agent-footer-controls'; +import { ContextDonut } from './context-donut'; import { runDangerousLlmTurn, runSafeLlmTurn } from './agent-turn-runners'; +import type { ContextUsage } from '@/src/shared/context-usage'; +import { AUTO_COMPACT_RATIO, getContextRatio } from '@/src/shared/context-usage'; import { useTabDebugger } from './use-tab-debugger'; import { ConversationList } from './conversation-list'; import { ConversationHistoryButton } from './conversation-history-button'; @@ -85,20 +94,20 @@ export const formatSelectedTabSystemEnvironment = ({ const ConversationTabs = ({ activeConversationId, + busyConversationIds, conversations, isDisabled, onCloseConversation, onCreateConversation, onSelectConversation, - runningConversationIds, }: { activeConversationId: string; + busyConversationIds: readonly string[]; conversations: StoredAgentConversation[]; isDisabled: boolean; onCloseConversation: (conversationId: string) => void; onCreateConversation: () => void; onSelectConversation: (conversationId: string) => void; - runningConversationIds: readonly string[]; }): JSX.Element => (
{ const title = getStoredConversationTitle(conversation); const isActive = conversation.id === activeConversationId; - const isRunning = runningConversationIds.includes(conversation.id); + const isRunning = busyConversationIds.includes(conversation.id); return (
void; organizationId: string | undefined; }): JSX.Element => { - const [draft, setDraft] = useState(''); + const [draftsByConversation, setDraftsByConversation] = useState>({}); + // Ponytail: in-memory only, recomputed from the next gateway turn after reload. + const [contextUsageByConversation, setContextUsageByConversation] = useState< + Record + >({}); const [conversationStore, setConversationStore, isConversationStoreLoaded] = useStoredAgentConversations(createDefaultConversationEvents); const [runningConversationIds, setRunningConversationIds] = useState([]); + const [compactingConversationIds, setCompactingConversationIds] = useState([]); const conversationStoreRef = useRef(conversationStore); const runStatesRef = useRef(new Map()); const runTokenRef = useRef(0); + // Ponytail: ref mirror exists only because auto-compact reads usage synchronously in the run's finally. + const latestUsageRef = useRef>({}); const { inspectableTabs, isLoadingTabs, tabDebuggerError } = useTabDebugger(); const { modelLoadError, modelOptions, refetchModels } = useGatewayModels({ auth, @@ -193,6 +209,11 @@ export const AgentChatPanel = ({ }); const activeConversation = getActiveStoredConversation(conversationStore); const { events, id: activeConversationId, mode = defaultMode } = activeConversation; + // Ponytail: drafts are in-memory only; they reset on reload like the rest of transient UI state. + const draft = draftsByConversation[activeConversationId] ?? ''; + const setActiveDraft = (value: string): void => { + setDraftsByConversation(current => ({ ...current, [activeConversationId]: value })); + }; const selectedTabId = getSelectedInspectableTabId({ inspectableTabs, selectedTabId: activeConversation.selectedTabId, @@ -217,6 +238,115 @@ export const AgentChatPanel = ({ ); const thinkingEffort = activeConversation.thinkingEffort ?? thinkingOptions[0] ?? ''; const isRunning = runningConversationIds.includes(activeConversationId); + const isCompacting = compactingConversationIds.includes(activeConversationId); + const activeUsage = contextUsageByConversation[activeConversationId]; + const activePromptTokens = activeUsage?.promptTokens ?? 0; + const contextLength = selectedModel?.contextLength; + + const compactConversation = useCallback( + async ( + conversationId: string, + keepRecentExchanges: number = KEEP_RECENT_EXCHANGES + ): Promise => { + if ( + !isConversationStoreLoaded || + runningConversationIds.includes(conversationId) || + compactingConversationIds.includes(conversationId) + ) { + return; + } + + const conversation = conversationStoreRef.current.conversations.find( + item => item.id === conversationId + ); + const runModel = conversation?.model ?? modelOptions[0]?.id ?? ''; + + if (conversation === undefined || runModel === '') { + return; + } + + setCompactingConversationIds(current => [...current, conversationId]); + + try { + const compacted = await compactConversationEvents({ + apiBaseUrl, + events: conversation.events, + fetch: fetchFromWindow, + keepRecentExchanges, + model: runModel, + organizationId, + token: auth.token, + }); + + if (compacted !== undefined) { + // Ponytail: wholesale replace is safe only because the conversation can't receive new events while compacting (guarded above + send disabled). Reconcile against currentEvents if that ever changes. + setConversationStore(store => + updateStoredConversationEvents(store, conversationId, () => compacted) + ); + const nextRef = { ...latestUsageRef.current }; + delete nextRef[conversationId]; + latestUsageRef.current = nextRef; + setContextUsageByConversation(current => { + const next = { ...current }; + delete next[conversationId]; + return next; + }); + } + } finally { + setCompactingConversationIds(current => current.filter(id => id !== conversationId)); + } + }, + // Ponytail: compaction is a single short gateway call; no abort wiring until it proves slow. + [ + auth.token, + compactingConversationIds, + isConversationStoreLoaded, + modelOptions, + organizationId, + runningConversationIds, + setConversationStore, + ] + ); + + const compactActiveConversation = useCallback( + (): Promise => compactConversation(activeConversationId, KEEP_RECENT_EXCHANGES_MANUAL), + [activeConversationId, compactConversation] + ); + + /* + * Gate on summarizable history (not measured usage) so the button is never enabled-but-inert and + * still works after a reload, when in-memory usage has reset to zero. + */ + const canCompactActive = useMemo( + () => hasCompactableHistory(events, KEEP_RECENT_EXCHANGES_MANUAL), + [events] + ); + + const contextDonut = useMemo( + () => ( + { + void compactActiveConversation(); + }} + promptTokens={activePromptTokens} + /> + ), + [ + activePromptTokens, + canCompactActive, + compactActiveConversation, + contextLength, + isCompacting, + isRunning, + ] + ); + + const busyConversationIds = useMemo( + () => [...runningConversationIds, ...compactingConversationIds], + [compactingConversationIds, runningConversationIds] + ); const isModelSelectDisabled = modelOptions.length === 0; const isThinkingSelectDisabled = thinkingOptions.length === 0; const modelControlValue = modelOptions.length === 0 ? '' : model; @@ -224,7 +354,8 @@ export const AgentChatPanel = ({ !isConversationStoreLoaded || draft.trim() === '' || modelControlValue === '' || - selectedTabId === undefined; + selectedTabId === undefined || + isCompacting; conversationStoreRef.current = conversationStore; @@ -395,6 +526,18 @@ export const AgentChatPanel = ({ updateThinkingBlock(conversationId, eventId, thinkingText); } }; + const updateRunUsage = (usage: { promptTokens: number }): void => { + if (isCurrentRun()) { + latestUsageRef.current = { + ...latestUsageRef.current, + [conversationId]: { promptTokens: usage.promptTokens }, + }; + setContextUsageByConversation(current => ({ + ...current, + [conversationId]: { promptTokens: usage.promptTokens }, + })); + } + }; runStatesRef.current.set(conversationId, { abort, @@ -415,6 +558,7 @@ export const AgentChatPanel = ({ conversationEvents: conversationWithUserMessage, fetch: fetchFromWindow, model: runModel, + onUsage: updateRunUsage, organizationId, selectedTabId: runSelectedTabId, signal: abort.signal, @@ -430,6 +574,16 @@ export const AgentChatPanel = ({ setRunningConversationIds(currentIds => currentIds.filter(currentId => currentId !== conversationId) ); + + const latest = latestUsageRef.current[conversationId]?.promptTokens ?? 0; + const runContextLength = modelOptions.find( + option => option.id === runModel + )?.contextLength; + const ratio = getContextRatio(latest, runContextLength); + + if (ratio !== undefined && ratio >= AUTO_COMPACT_RATIO) { + void compactConversation(conversationId); + } } } })(); @@ -444,18 +598,20 @@ export const AgentChatPanel = ({ selectedTabId: conversation.selectedTabId, }); const isConversationRunning = runningConversationIds.includes(conversation.id); + const isConversationCompacting = compactingConversationIds.includes(conversation.id); if ( !isConversationStoreLoaded || text === '' || isConversationRunning || + isConversationCompacting || conversationModel === '' || conversationSelectedTabId === undefined ) { return; } - setDraft(''); + setDraftsByConversation(current => ({ ...current, [activeConversationId]: '' })); submitMessage(text); }; @@ -475,7 +631,6 @@ export const AgentChatPanel = ({ thinkingEffort, }; - setDraft(''); conversationStoreRef.current = createNextStoredConversation( conversationStoreRef.current, createDefaultConversationEvents(), @@ -510,10 +665,6 @@ export const AgentChatPanel = ({ return; } - if (!globalThis.confirm('Close this conversation tab? It will stay in History.')) { - return; - } - abortConversationRun(conversationId); setConversationStore(store => closeStoredConversationTab(store, conversationId, createDefaultConversationEvents()) @@ -549,7 +700,6 @@ export const AgentChatPanel = ({ return; } - setDraft(''); setConversationStore(store => openStoredConversation({ conversationId, @@ -599,12 +749,12 @@ export const AgentChatPanel = ({
@@ -622,7 +772,7 @@ export const AgentChatPanel = ({ className="min-h-20 w-full resize-none rounded-lg border border-zinc-800 bg-zinc-950 px-3 py-2 text-sm leading-5 text-zinc-100 outline-none transition placeholder:text-zinc-600 focus:border-[#EDFF00] focus:ring-2 focus:ring-[#EDFF00]/30" id="agent-message" onChange={(event: ChangeEvent) => { - setDraft(event.currentTarget.value); + setActiveDraft(event.currentTarget.value); }} onKeyDown={(event: KeyboardEvent) => { if (event.key === 'Enter' && !event.shiftKey) { @@ -647,6 +797,7 @@ export const AgentChatPanel = ({
+ {contextDonut}
{modelLoadError === undefined ? null : (
diff --git a/apps/extension/entrypoints/sidepanel/agent-llm-turn-runner.ts b/apps/extension/entrypoints/sidepanel/agent-llm-turn-runner.ts index 6e40870bfc..1f10382615 100644 --- a/apps/extension/entrypoints/sidepanel/agent-llm-turn-runner.ts +++ b/apps/extension/entrypoints/sidepanel/agent-llm-turn-runner.ts @@ -4,6 +4,7 @@ import { createSafeToolDefinitions, } from '@/src/shared/agent-llm-harness'; import { runLlmTurn } from '@/src/shared/agent-llm-turn-runner-core'; +import type { OnTurnUsage } from '@/src/shared/agent-llm-turn-runner-core'; import { maxAgentToolRounds } from '@/src/shared/agent-tool-round-limit'; import type { FetchLike } from '@/src/shared/auth'; import { executeEvalToolCall } from './agent-eval-runtime'; @@ -18,6 +19,7 @@ interface RunDangerousLlmTurnOptions { readonly model: string; readonly organizationId?: string | undefined; readonly selectedTabId: number; + readonly onUsage?: OnTurnUsage | undefined; readonly signal?: AbortSignal | undefined; readonly supportsImages?: boolean; readonly thinkingEffort?: string | undefined; diff --git a/apps/extension/entrypoints/sidepanel/agent-safe-llm-turn-runner.ts b/apps/extension/entrypoints/sidepanel/agent-safe-llm-turn-runner.ts index b4de7ab9d4..bb4efabef7 100644 --- a/apps/extension/entrypoints/sidepanel/agent-safe-llm-turn-runner.ts +++ b/apps/extension/entrypoints/sidepanel/agent-safe-llm-turn-runner.ts @@ -1,6 +1,7 @@ import type { AgentConversationEvent } from '@/src/shared/agent-conversation'; import { createSafeToolDefinitions } from '@/src/shared/agent-llm-harness'; import { runLlmTurn } from '@/src/shared/agent-llm-turn-runner-core'; +import type { OnTurnUsage } from '@/src/shared/agent-llm-turn-runner-core'; import { maxAgentToolRounds } from '@/src/shared/agent-tool-round-limit'; import type { FetchLike } from '@/src/shared/auth'; import { executeSafeToolCall } from './agent-safe-tool-runtime'; @@ -14,6 +15,7 @@ interface RunSafeLlmTurnOptions { readonly model: string; readonly organizationId?: string | undefined; readonly selectedTabId: number; + readonly onUsage?: OnTurnUsage | undefined; readonly signal?: AbortSignal | undefined; readonly supportsImages?: boolean; readonly thinkingEffort?: string | undefined; diff --git a/apps/extension/entrypoints/sidepanel/app.tsx b/apps/extension/entrypoints/sidepanel/app.tsx index 343d81d4e1..caea24bba0 100644 --- a/apps/extension/entrypoints/sidepanel/app.tsx +++ b/apps/extension/entrypoints/sidepanel/app.tsx @@ -50,8 +50,10 @@ export const App = (): JSX.Element => { isSuccess: isStoredAuthSuccess, refetch: refetchStoredAuth, } = useQuery({ - // React Query forbids a queryFn resolving to undefined, but "no stored auth" is the - // common signed-out state; return null from the fn and map it back to undefined for the UI. + /* + * React Query forbids a queryFn resolving to undefined. Return null for the + * signed-out state and map it back to undefined for the UI via select. + */ queryFn: async () => (await loadStoredAuth(storage)) ?? null, queryKey: storedAuthQueryKey, select: data => data ?? undefined, diff --git a/apps/extension/entrypoints/sidepanel/context-donut.tsx b/apps/extension/entrypoints/sidepanel/context-donut.tsx new file mode 100644 index 0000000000..75df7d4aab --- /dev/null +++ b/apps/extension/entrypoints/sidepanel/context-donut.tsx @@ -0,0 +1,66 @@ +import type { JSX } from 'react'; +import { formatContextSummary, getContextRatio, getContextTone } from '@/src/shared/context-usage'; + +const toneStroke: Record<'danger' | 'safe' | 'warn', string> = { + danger: '#f87171', + safe: '#EDFF00', + warn: '#fbbf24', +}; + +const RADIUS = 6; +const CIRCUMFERENCE = 2 * Math.PI * RADIUS; + +export const ContextDonut = ({ + canCompact, + contextLength, + onCompact, + promptTokens, +}: { + canCompact: boolean; + contextLength: number | undefined; + onCompact: () => void; + promptTokens: number; +}): JSX.Element => { + const ratio = getContextRatio(promptTokens, contextLength); + const stroke = ratio === undefined ? '#52525b' : toneStroke[getContextTone(ratio)]; + const dash = ratio === undefined ? 0 : ratio * CIRCUMFERENCE; + const summary = formatContextSummary(promptTokens, contextLength); + const label = `Context usage: ${summary}`; + + return ( +
+ + + +
+

Context

+

{summary}

+ +
+
+ ); +}; diff --git a/apps/extension/src/shared/agent-context-compaction.test.ts b/apps/extension/src/shared/agent-context-compaction.test.ts new file mode 100644 index 0000000000..eb05195d11 --- /dev/null +++ b/apps/extension/src/shared/agent-context-compaction.test.ts @@ -0,0 +1,102 @@ +import { describe, expect, it } from 'vitest'; +import { + createAssistantMessage, + createEvalToolCall, + createToolResult, + createUserMessage, +} from './agent-conversation'; +import { + KEEP_RECENT_EXCHANGES, + KEEP_RECENT_EXCHANGES_MANUAL, + SUMMARY_PREFIX, + hasCompactableHistory, + renderEventsAsTranscript, + splitEventsForCompaction, +} from './agent-context-compaction'; + +describe('split events for compaction', () => { + it('keeps the last N exchanges and summarizes the rest', () => { + const events = [ + createAssistantMessage('greeting'), + createUserMessage('one'), + createAssistantMessage('a1'), + createUserMessage('two'), + createAssistantMessage('a2'), + createUserMessage('three'), + createAssistantMessage('a3'), + ]; + + const { toKeep, toSummarize } = splitEventsForCompaction(events); + + // KEEP_RECENT_EXCHANGES = 2 β†’ keep from the 2nd-to-last user message ('two') + expect(toKeep[0]).toMatchObject({ role: 'user', text: 'two' }); + expect(toKeep.at(-1)).toMatchObject({ text: 'a3' }); + expect(toSummarize).toMatchObject([{ text: 'greeting' }, { text: 'one' }, { text: 'a1' }]); + }); + + it('summarizes nothing when there are too few user messages', () => { + const events = [createAssistantMessage('greeting'), createUserMessage('one')]; + const { toKeep, toSummarize } = splitEventsForCompaction(events); + expect(toSummarize).toStrictEqual([]); + expect(toKeep).toStrictEqual(events); + }); + + it('summarizes the whole conversation at the manual threshold (keep 0)', () => { + const events = [ + createAssistantMessage('greeting'), + createUserMessage('one'), + createAssistantMessage('a1'), + ]; + + // A single exchange has nothing to summarize at the auto threshold, but manual compacts it all. + expect(hasCompactableHistory(events)).toBe(false); + expect(hasCompactableHistory(events, KEEP_RECENT_EXCHANGES_MANUAL)).toBe(true); + + const { toKeep, toSummarize } = splitEventsForCompaction(events, KEEP_RECENT_EXCHANGES_MANUAL); + expect(toKeep).toStrictEqual([]); + expect(toSummarize).toStrictEqual(events); + }); + + it('has nothing to compact without a user message', () => { + const events = [createAssistantMessage('greeting')]; + expect(hasCompactableHistory(events, KEEP_RECENT_EXCHANGES_MANUAL)).toBe(false); + }); +}); + +describe('render events as transcript', () => { + it('renders user and assistant lines', () => { + const text = renderEventsAsTranscript([ + createUserMessage('hello'), + createAssistantMessage('hi there'), + ]); + expect(text).toContain('User: hello'); + expect(text).toContain('Assistant: hi there'); + }); + + it('preserves tool inputs and result payloads', () => { + const text = renderEventsAsTranscript([ + createEvalToolCall({ code: 'return document.title;', tabId: 1 }), + createToolResult({ ok: true, toolCallId: 'call-1', value: 'Example Domain' }), + createToolResult({ error: 'boom', ok: false, toolCallId: 'call-2' }), + ]); + expect(text).toContain('Tool call (eval): return document.title;'); + expect(text).toContain('Tool result (ok): Example Domain'); + expect(text).toContain('Tool result (error): boom'); + }); + + it('truncates oversized tool result payloads', () => { + const text = renderEventsAsTranscript([ + createToolResult({ ok: true, toolCallId: 'call-1', value: 'x'.repeat(5000) }), + ]); + expect(text).toContain('[truncated 3000 chars]'); + expect(text.length).toBeLessThan(3000); + }); +}); + +describe('tuning constants', () => { + it('exposes tuning constants', () => { + expect(KEEP_RECENT_EXCHANGES).toBe(2); + expect(KEEP_RECENT_EXCHANGES_MANUAL).toBe(0); + expect(SUMMARY_PREFIX.length).toBeGreaterThan(0); + }); +}); diff --git a/apps/extension/src/shared/agent-context-compaction.ts b/apps/extension/src/shared/agent-context-compaction.ts new file mode 100644 index 0000000000..e6ccccb3cb --- /dev/null +++ b/apps/extension/src/shared/agent-context-compaction.ts @@ -0,0 +1,162 @@ +import { createAssistantMessage } from './agent-conversation'; +import type { AgentConversationEvent } from './agent-conversation'; +import type { FetchLike } from './auth'; +import { fetchKiloGatewayChatCompletionStream } from './kilo-api-client'; +import type { KiloGatewayChatMessage } from './kilo-gateway-chat-client'; + +export const KEEP_RECENT_EXCHANGES = 2; +/* + * Manual "Compact now" is explicit: it summarizes the whole conversation (keeps no recent exchange), + * so the user can compact whenever there is anything to compact. Auto-compaction keeps + * KEEP_RECENT_EXCHANGES for safer continuity near the context limit. + */ +export const KEEP_RECENT_EXCHANGES_MANUAL = 0; +export const SUMMARY_PREFIX = 'πŸ—œοΈ Compacted earlier context\n\n'; + +const SUMMARY_SYSTEM_PROMPT = + 'You compress a browser-agent conversation. Produce a concise but complete summary that preserves: the user’s goals and open requests, key findings about the inspected page(s), decisions made, tool actions taken and their results, and anything needed to continue the task. Use compact prose or bullet points. Do not add new actions or speculation.'; + +const isUserMessage = (event: AgentConversationEvent): boolean => + event.type === 'message' && event.role === 'user'; + +// Keep complete exchanges only: cut just before the Nth-from-last user message so kept +// Events always begin at a user turn and no tool-call/tool-result pair is split. +export const splitEventsForCompaction = ( + events: AgentConversationEvent[], + keepRecentExchanges: number = KEEP_RECENT_EXCHANGES +): { toKeep: AgentConversationEvent[]; toSummarize: AgentConversationEvent[] } => { + const userIndexes = events + .map((event, index) => (isUserMessage(event) ? index : -1)) + .filter(index => index !== -1); + + if (userIndexes.length <= keepRecentExchanges) { + return { toKeep: events, toSummarize: [] }; + } + + // A keep count of 0 keeps nothing: the cut falls past the last user message (whole transcript). + const boundary = userIndexes[userIndexes.length - keepRecentExchanges] ?? events.length; + + return { + toKeep: events.slice(boundary), + toSummarize: events.slice(0, boundary), + }; +}; + +/* + * Whether compacting would actually summarize anything. Gates the "Compact now" button so it is + * never enabled-but-inert. + */ +export const hasCompactableHistory = ( + events: AgentConversationEvent[], + keepRecentExchanges: number = KEEP_RECENT_EXCHANGES +): boolean => splitEventsForCompaction(events, keepRecentExchanges).toSummarize.length > 0; + +// Cap each tool input/output so a big snapshot or screenshot can't blow up the summarization prompt. +const MAX_TOOL_TEXT_CHARS = 2000; +const truncateToolText = (text: string): string => + text.length <= MAX_TOOL_TEXT_CHARS + ? text + : `${text.slice(0, MAX_TOOL_TEXT_CHARS)}… [truncated ${text.length - MAX_TOOL_TEXT_CHARS} chars]`; + +const stringifyToolValue = (value: unknown): string => { + if (typeof value === 'string') { + return value; + } + + try { + return JSON.stringify(value); + } catch { + return String(value); + } +}; + +const renderEvent = (event: AgentConversationEvent): string | undefined => { + switch (event.type) { + case 'message': { + return `${event.role === 'user' ? 'User' : 'Assistant'}: ${event.text}`; + } + case 'thinking': { + return undefined; + } + case 'tool-call': { + // The tool input carries the facts the next turn needs (the eval code, the query/element). + const detail = + event.name === 'eval' ? event.code : (event.query ?? event.elementId ?? event.snapshotId); + + return detail === undefined || detail === '' + ? `Tool call (${event.name})` + : `Tool call (${event.name}): ${truncateToolText(detail)}`; + } + case 'tool-result': { + if (!event.ok) { + return `Tool result (error): ${event.error ?? 'unknown error'}`; + } + + // The result payload (snapshot text, eval return, element details) is often the only record. + return event.value === undefined + ? 'Tool result (ok)' + : `Tool result (ok): ${truncateToolText(stringifyToolValue(event.value))}`; + } + } +}; + +export const renderEventsAsTranscript = (events: AgentConversationEvent[]): string => + events + .map(event => renderEvent(event)) + .filter((line): line is string => line !== undefined) + .join('\n'); + +export const buildSummarizationMessages = ( + events: AgentConversationEvent[] +): KiloGatewayChatMessage[] => [ + { content: SUMMARY_SYSTEM_PROMPT, role: 'system' }, + { + content: `Summarize the following conversation so it can continue with less context.\n\n${renderEventsAsTranscript(events)}`, + role: 'user', + }, +]; + +interface CompactConversationOptions { + readonly apiBaseUrl: string; + readonly events: AgentConversationEvent[]; + readonly fetch: FetchLike; + readonly keepRecentExchanges?: number; + readonly model: string; + readonly organizationId?: string | undefined; + readonly token: string; +} + +export const compactConversationEvents = async ({ + apiBaseUrl, + events, + fetch, + keepRecentExchanges = KEEP_RECENT_EXCHANGES, + model, + organizationId, + token, +}: CompactConversationOptions): Promise => { + const { toKeep, toSummarize } = splitEventsForCompaction(events, keepRecentExchanges); + + if (toSummarize.length === 0) { + return undefined; + } + + const completion = await fetchKiloGatewayChatCompletionStream({ + apiBaseUrl, + fetch, + messages: buildSummarizationMessages(toSummarize), + model, + onContentDelta: () => {}, + organizationId, + token, + tools: [], + }); + + const summary = completion.content ?? ''; + + if (summary.trim() === '') { + return undefined; + } + + return [createAssistantMessage(`${SUMMARY_PREFIX}${summary}`), ...toKeep]; +}; diff --git a/apps/extension/src/shared/agent-llm-turn-runner-core.test.ts b/apps/extension/src/shared/agent-llm-turn-runner-core.test.ts index d09da6e5d4..8395e84b02 100644 --- a/apps/extension/src/shared/agent-llm-turn-runner-core.test.ts +++ b/apps/extension/src/shared/agent-llm-turn-runner-core.test.ts @@ -57,6 +57,42 @@ const streamResponse = (chunks: string[]): Response => { }; describe('agent LLM turn runner core', () => { + it('forwards completion usage to onUsage', async () => { + const usageCalls: unknown[] = []; + const fetch: FetchLike = () => + streamResponse([ + 'data: {"choices":[{"delta":{"content":"Done."}}]}\n\n', + 'data: {"choices":[],"usage":{"completion_tokens":5,"prompt_tokens":999,"total_tokens":1004}}\n\n', + 'data: [DONE]\n\n', + ]); + + await runLlmTurn({ + apiBaseUrl: 'https://app.kilo.ai', + appendEvents: () => {}, + conversationEvents: [createUserMessage('Hello')], + executeToolCall: () => Promise.resolve({ ok: true, value: { text: '' } }), + failureMessage: String, + fetch, + maxToolRounds: 4, + model: 'anthropic/claude-sonnet-4', + noResponseMessage: 'No response.', + onUsage: usage => usageCalls.push(usage), + signal: undefined, + toToolCallEvents: () => [], + token: 'token-1', + tooManyToolRoundsMessage: 'Too many rounds.', + tools: [], + updateAssistantMessage: () => {}, + updateThinkingBlock: () => {}, + }); + + expect(usageCalls).toContainEqual({ + completionTokens: 5, + promptTokens: 999, + totalTokens: 1004, + }); + }); + it('streams, runs tools, and continues with tool results', async () => { const appendedEvents: AgentConversationEvent[] = []; const updatedMessages: string[] = []; diff --git a/apps/extension/src/shared/agent-llm-turn-runner-core.ts b/apps/extension/src/shared/agent-llm-turn-runner-core.ts index b49c722b21..87895eef39 100644 --- a/apps/extension/src/shared/agent-llm-turn-runner-core.ts +++ b/apps/extension/src/shared/agent-llm-turn-runner-core.ts @@ -9,6 +9,14 @@ import type { EvalTabResult } from './tab-debugger'; type ToolCallEvent = Extract; +export interface TurnUsage { + readonly completionTokens: number; + readonly promptTokens: number; + readonly totalTokens: number; +} + +export type OnTurnUsage = (usage: TurnUsage) => void; + interface RunLlmTurnOptions { readonly apiBaseUrl: string; readonly appendEvents: (events: AgentConversationEvent[]) => void; @@ -19,6 +27,7 @@ interface RunLlmTurnOptions { readonly maxToolRounds: number; readonly model: string; readonly noResponseMessage: string; + readonly onUsage?: OnTurnUsage | undefined; readonly organizationId?: string | undefined; readonly signal?: AbortSignal | undefined; readonly supportsImages?: boolean | undefined; @@ -60,6 +69,7 @@ export const runLlmTurn = async ({ maxToolRounds, model, noResponseMessage, + onUsage, organizationId, signal, supportsImages = false, @@ -133,6 +143,10 @@ export const runLlmTurn = async ({ } ); + if (completion.usage !== undefined) { + onUsage?.(completion.usage); + } + if (streamedThinkingEventId !== undefined) { const finalStreamedThinkingText = completion.reasoning ?? streamedThinkingText; const streamedThinkingEventIndex = completionEvents.findIndex( diff --git a/apps/extension/src/shared/context-usage.test.ts b/apps/extension/src/shared/context-usage.test.ts new file mode 100644 index 0000000000..2e92434134 --- /dev/null +++ b/apps/extension/src/shared/context-usage.test.ts @@ -0,0 +1,32 @@ +import { describe, expect, it } from 'vitest'; +import { formatContextSummary, getContextRatio, getContextTone } from './context-usage'; + +describe('context ratio', () => { + it('returns undefined without a context length', () => { + expect(getContextRatio(100, undefined as number | undefined)).toBeUndefined(); + expect(getContextRatio(100, 0)).toBeUndefined(); + }); + + it('returns a clamped ratio', () => { + expect(getContextRatio(50, 200)).toBeCloseTo(0.25); + expect(getContextRatio(500, 200)).toBe(1); + }); +}); + +describe('context tone', () => { + it('maps ratio to tone', () => { + expect(getContextTone(0.5)).toBe('safe'); + expect(getContextTone(0.75)).toBe('warn'); + expect(getContextTone(0.95)).toBe('danger'); + }); +}); + +describe('context summary formatting', () => { + it('formats tokens and percent', () => { + expect(formatContextSummary(1200, 200_000)).toBe('1,200 / 200,000 tokens (1%)'); + }); + + it('omits percent without a context length', () => { + expect(formatContextSummary(1200, undefined as number | undefined)).toBe('1,200 tokens'); + }); +}); diff --git a/apps/extension/src/shared/context-usage.ts b/apps/extension/src/shared/context-usage.ts new file mode 100644 index 0000000000..5daa198f79 --- /dev/null +++ b/apps/extension/src/shared/context-usage.ts @@ -0,0 +1,45 @@ +export interface ContextUsage { + readonly promptTokens: number; +} + +export const AUTO_COMPACT_RATIO = 0.85; + +const clamp01 = (value: number): number => Math.min(1, Math.max(0, value)); + +export const getContextRatio = ( + promptTokens: number, + contextLength: number | undefined +): number | undefined => { + if (contextLength === undefined || contextLength <= 0) { + return undefined; + } + + return clamp01(promptTokens / contextLength); +}; + +export const getContextTone = (ratio: number): 'danger' | 'safe' | 'warn' => { + if (ratio >= 0.9) { + return 'danger'; + } + + if (ratio >= 0.7) { + return 'warn'; + } + + return 'safe'; +}; + +const formatCount = (value: number): string => value.toLocaleString('en-US'); + +export const formatContextSummary = ( + promptTokens: number, + contextLength: number | undefined +): string => { + if (contextLength === undefined || contextLength <= 0) { + return `${formatCount(promptTokens)} tokens`; + } + + const percent = Math.round((promptTokens / contextLength) * 100); + + return `${formatCount(promptTokens)} / ${formatCount(contextLength)} tokens (${percent}%)`; +}; diff --git a/apps/extension/src/shared/kilo-api-client.test.ts b/apps/extension/src/shared/kilo-api-client.test.ts index 72931596bd..d0fda0e58c 100644 --- a/apps/extension/src/shared/kilo-api-client.test.ts +++ b/apps/extension/src/shared/kilo-api-client.test.ts @@ -177,6 +177,14 @@ describe('kilo API client', () => { ); }); + it('parses context_length into contextLength', () => { + const options = parseKiloGatewayModelsResponse({ + data: [{ context_length: 200_000, id: 'a/b', name: 'A: B', opencode: { variants: {} } }], + }); + + expect(options[0]?.contextLength).toBe(200_000); + }); + it('labels thinking efforts compactly', () => { expect(thinkingEffortLabel('medium')).toBe('Med'); expect(thinkingEffortLabel('xhigh')).toBe('XHigh'); diff --git a/apps/extension/src/shared/kilo-api-client.ts b/apps/extension/src/shared/kilo-api-client.ts index 04d4d6b830..fbc53c8d9c 100644 --- a/apps/extension/src/shared/kilo-api-client.ts +++ b/apps/extension/src/shared/kilo-api-client.ts @@ -14,6 +14,7 @@ export type { } from './kilo-gateway-chat-client'; export interface KiloGatewayModelOption { + readonly contextLength?: number; readonly hasUserByokAvailable?: boolean; readonly id: string; readonly isFree?: boolean; @@ -45,6 +46,7 @@ interface FetchKiloOrganizationsOptions { } interface ParsedGatewayModelOption { + contextLength?: number; hasUserByokAvailable?: boolean; id: string; isFree?: boolean; @@ -64,6 +66,7 @@ const modelSchema = z.object({ input_modalities: z.array(z.string()).optional(), }) .optional(), + context_length: z.number().nullable().optional(), hasUserByokAvailable: z.boolean().optional(), id: nonEmptyStringSchema, isFree: z.boolean().optional(), @@ -129,6 +132,7 @@ const compareModelOptions = ( const toGatewayModelOption = (model: ParsedGatewayModelOption): KiloGatewayModelOption => { const option: { + contextLength?: number; hasUserByokAvailable?: boolean; id: string; isFree?: boolean; @@ -144,6 +148,10 @@ const toGatewayModelOption = (model: ParsedGatewayModelOption): KiloGatewayModel variants: model.variants, }; + if (model.contextLength !== undefined) { + option.contextLength = model.contextLength; + } + if (model.hasUserByokAvailable !== undefined) { option.hasUserByokAvailable = model.hasUserByokAvailable; } @@ -183,6 +191,11 @@ export const parseKiloGatewayModelsResponse = (value: unknown): KiloGatewayModel isPreferred: model.data.preferredIndex !== undefined, name: formatShortModelName(model.data.name), variants: getModelVariants(model.data), + ...(model.data.context_length !== undefined && + model.data.context_length !== null && + model.data.context_length > 0 + ? { contextLength: model.data.context_length } + : {}), ...(model.data.hasUserByokAvailable === undefined ? {} : { hasUserByokAvailable: model.data.hasUserByokAvailable }), diff --git a/apps/extension/src/shared/kilo-gateway-chat-client.ts b/apps/extension/src/shared/kilo-gateway-chat-client.ts index 618b8ee9e3..128307569c 100644 --- a/apps/extension/src/shared/kilo-gateway-chat-client.ts +++ b/apps/extension/src/shared/kilo-gateway-chat-client.ts @@ -54,4 +54,9 @@ export interface KiloGatewayChatCompletion { readonly reasoning?: string; readonly reasoningDetails?: readonly unknown[]; readonly toolCalls: KiloGatewayToolCallRequest[]; + readonly usage?: { + readonly completionTokens: number; + readonly promptTokens: number; + readonly totalTokens: number; + }; } diff --git a/apps/extension/src/shared/kilo-gateway-chat-stream-client.test.ts b/apps/extension/src/shared/kilo-gateway-chat-stream-client.test.ts index 0790e3cfff..0bbf61024b 100644 --- a/apps/extension/src/shared/kilo-gateway-chat-stream-client.test.ts +++ b/apps/extension/src/shared/kilo-gateway-chat-stream-client.test.ts @@ -1,7 +1,10 @@ /* eslint-disable max-lines */ import { describe, expect, it } from 'vitest'; import { z } from 'zod'; -import { fetchKiloGatewayChatCompletionStream } from './kilo-api-client'; +import { + fetchKiloGatewayChatCompletionStream, + parseKiloGatewayChatCompletionStream, +} from './kilo-api-client'; import type { FetchLike } from './auth'; const jsonRequestBodySchema = z.record(z.string(), z.unknown()); @@ -462,4 +465,20 @@ describe('kilo gateway chat stream client', () => { expect(seenBody).not.toHaveProperty('reasoning'); }); + + it('extracts usage from the trailing usage chunk', () => { + const sse = [ + 'data: {"choices":[{"delta":{"content":"hi"}}]}\n\n', + 'data: {"choices":[],"usage":{"prompt_tokens":1200,"completion_tokens":34,"total_tokens":1234}}\n\n', + 'data: [DONE]\n\n', + ].join(''); + + const completion = parseKiloGatewayChatCompletionStream(sse, () => {}); + + expect(completion.usage).toStrictEqual({ + completionTokens: 34, + promptTokens: 1200, + totalTokens: 1234, + }); + }); }); diff --git a/apps/extension/src/shared/kilo-gateway-chat-stream-client.ts b/apps/extension/src/shared/kilo-gateway-chat-stream-client.ts index 27f43fd8a3..b4ce88f9c9 100644 --- a/apps/extension/src/shared/kilo-gateway-chat-stream-client.ts +++ b/apps/extension/src/shared/kilo-gateway-chat-stream-client.ts @@ -36,6 +36,7 @@ interface StreamingAccumulator { reasoning: string; reasoningDetailsByIndex: Map>; toolCallsByIndex: Map; + usage: KiloGatewayChatCompletion['usage']; } interface StreamingDeltaHandlers { @@ -81,6 +82,11 @@ const streamingToolCallDeltaSchema = z.object({ id: z.string().optional(), index: z.number(), }); +const usageSchema = z.object({ + completion_tokens: z.number(), + prompt_tokens: z.number(), + total_tokens: z.number(), +}); const streamDataSchema = z.object({ choices: z.array( z.object({ @@ -92,6 +98,7 @@ const streamDataSchema = z.object({ }), }) ), + usage: usageSchema.nullable().optional(), }); // Reasoning blocks stream incrementally like content: text accumulates while structural fields (type/signature/data/index) carry their final value. Providers may require these signed/encrypted blocks replayed verbatim on the assistant tool-call message or they reject the continuation. const appendableReasoningKeys = new Set(['data', 'summary', 'text']); @@ -247,6 +254,14 @@ const applyStreamingData = ( return; } + if (parsed.data.usage !== undefined && parsed.data.usage !== null) { + accumulator.usage = { + completionTokens: parsed.data.usage.completion_tokens, + promptTokens: parsed.data.usage.prompt_tokens, + totalTokens: parsed.data.usage.total_tokens, + }; + } + const choice = parsed.data.choices.at(0); if (choice === undefined) { @@ -287,6 +302,7 @@ const toCompletion = (accumulator: StreamingAccumulator): KiloGatewayChatComplet ...(accumulator.content === '' ? {} : { content: accumulator.content }), ...(accumulator.reasoning === '' ? {} : { reasoning: accumulator.reasoning }), ...(reasoningDetails.length === 0 ? {} : { reasoningDetails }), + ...(accumulator.usage === undefined ? {} : { usage: accumulator.usage }), toolCalls: [...accumulator.toolCallsByIndex.values()].map(toolCall => parseToolCallBuffer(toolCall) ), @@ -305,6 +321,7 @@ export const parseKiloGatewayChatCompletionStream = ( reasoning: '', reasoningDetailsByIndex: new Map(), toolCallsByIndex: new Map(), + usage: undefined, }; const handlers = { onContentDelta, onReasoningDelta }; @@ -362,6 +379,7 @@ const consumeKiloGatewayChatCompletionStream = async ( reasoning: '', reasoningDetailsByIndex: new Map(), toolCallsByIndex: new Map(), + usage: undefined, }; const reader = body.getReader(); const decoder = new TextDecoder(); @@ -393,6 +411,7 @@ export const fetchKiloGatewayChatCompletionStream = async ({ messages, model, stream: true, + stream_options: { include_usage: true }, temperature: 0, tool_choice: tools.length === 0 ? 'none' : 'auto', tools, diff --git a/apps/extension/tests/e2e/context-usage.test.ts b/apps/extension/tests/e2e/context-usage.test.ts new file mode 100644 index 0000000000..9df71f69e6 --- /dev/null +++ b/apps/extension/tests/e2e/context-usage.test.ts @@ -0,0 +1,226 @@ +/* eslint-disable import/no-nodejs-modules, max-lines */ +import { expect, test } from '@playwright/test'; +import { rm } from 'node:fs/promises'; +import { mockKiloApi } from './kilo-api-fixture'; +import type { Page } from '@playwright/test'; +import { + launchExtensionContext, + seedExtensionAuth, + setExtensionStorage, + startFixtureServer, + waitForStoredConversationText, +} from './extension-context-fixture'; + +const safeToolNames = ['get_page_snapshot', 'get_element_details', 'find_in_page']; + +/* + * Read the persisted conversation store as a JSON string. Storage is the source of truth and is + * immune to the virtualized conversation list's render/scroll timing, so assertions against it are + * not racy while auto-compaction rewrites events. + */ +const readStoredConversationsJson = (page: Page): Promise => + page.evaluate(async () => { + const storage = ( + globalThis as typeof globalThis & { + chrome?: { + storage?: { + local?: { get: (keys: string[]) => Promise> }; + }; + }; + } + ).chrome?.storage?.local; + + if (storage === undefined) { + throw new Error('Extension runtime storage is unavailable.'); + } + + const items = await storage.get(['kiloAgentConversations']); + + return JSON.stringify(items['kiloAgentConversations'] ?? null); + }); + +const modelWithContextLength = [ + { + contextLength: 1000, + id: 'anthropic/claude-sonnet-4', + name: 'Anthropic: Claude Sonnet 4', + variants: { high: {}, low: {}, medium: {} }, + }, +]; + +// Three-user-message conversation for compaction (splitEventsForCompaction needs >KEEP_RECENT_EXCHANGES=2 user messages) +const seededConversationStore = { + activeConversationId: 'conv-1', + conversations: [ + { + events: [ + { id: 'e1', role: 'user', text: 'First message', type: 'message' }, + { id: 'e2', role: 'assistant', text: 'First reply', type: 'message' }, + { id: 'e3', role: 'user', text: 'Second message', type: 'message' }, + { id: 'e4', role: 'assistant', text: 'Second reply', type: 'message' }, + { id: 'e5', role: 'user', text: 'Third message', type: 'message' }, + { id: 'e6', role: 'assistant', text: 'Third reply', type: 'message' }, + ], + id: 'conv-1', + title: 'Seeded conversation', + updatedAt: '2026-06-26T10:00:00.000Z', + }, + ], + openConversationIds: ['conv-1'], +}; + +test('context donut shows usage after a reply', async () => { + const fixture = await startFixtureServer(); + const { context, extensionId, userDataDir } = await launchExtensionContext(); + + try { + await mockKiloApi(context, { + firstCompletionEvents: [ + { choices: [{ delta: { content: 'Donut reply.' } }] }, + { choices: [], usage: { completion_tokens: 10, prompt_tokens: 850, total_tokens: 860 } }, + ], + models: modelWithContextLength, + toolNames: safeToolNames, + }); + + const page = await context.newPage(); + await page.goto(fixture.url); + + const sidePanel = await context.newPage(); + await sidePanel.goto(`chrome-extension://${extensionId}/sidepanel.html`); + await seedExtensionAuth(sidePanel); + await sidePanel.reload(); + + await sidePanel.getByLabel('Message agent').fill('Show me usage'); + // Wait for send to be enabled (model + target tab ready) + await expect(sidePanel.getByRole('button', { name: 'Send message' })).toBeEnabled(); + await sidePanel.getByLabel('Message agent').press('Enter'); + await expect(sidePanel.getByText('Donut reply.')).toBeVisible(); + + // The donut aria-label is "Context usage: " + const donut = sidePanel.getByLabel(/^Context usage:/u); + await expect(donut).toBeVisible(); + await expect(donut).toHaveAttribute('aria-label', 'Context usage: 850 / 1,000 tokens (85%)'); + } finally { + await context.close(); + await fixture.close(); + await rm(userDataDir, { force: true, recursive: true }); + } +}); + +test('auto-compaction fires when usage exceeds 85% threshold', async () => { + const fixture = await startFixtureServer(); + const seenChatBodies: unknown[] = []; + const { context, extensionId, userDataDir } = await launchExtensionContext(); + + try { + await mockKiloApi(context, { + // First call: the user's turn β€” returns usage β‰₯85% which triggers auto-compact + firstCompletionEvents: [ + { choices: [{ delta: { content: 'Threshold reply.' } }] }, + { choices: [], usage: { completion_tokens: 10, prompt_tokens: 900, total_tokens: 910 } }, + ], + models: modelWithContextLength, + // Second call: the summarization request (tool_choice: 'none') + secondCompletionEvents: [ + { choices: [{ delta: { content: 'SUMMARY: user inspected the page.' } }] }, + ], + seenChatBodies, + toolNames: safeToolNames, + }); + + const page = await context.newPage(); + await page.goto(fixture.url); + + const sidePanel = await context.newPage(); + await sidePanel.goto(`chrome-extension://${extensionId}/sidepanel.html`); + await seedExtensionAuth(sidePanel); + // Seed a conversation with 3 user messages so splitEventsForCompaction has something to compact + await setExtensionStorage(sidePanel, { kiloAgentConversations: seededConversationStore }); + await sidePanel.reload(); + + await sidePanel.getByLabel('Message agent').fill('Trigger compact'); + await expect(sidePanel.getByRole('button', { name: 'Send message' })).toBeEnabled(); + await sidePanel.getByLabel('Message agent').press('Enter'); + + /* + * Auto-compaction fires in the run's finally. Assert against persisted storage rather than the + * virtualized list, which can momentarily unmount rows while compaction rewrites events. + */ + await waitForStoredConversationText(sidePanel, 'Compacted earlier context'); + + /* + * The summary replaced the earliest seeded messages in the same atomic write, so once the + * prefix is stored the old messages are already gone. + */ + const conversationsJson = await readStoredConversationsJson(sidePanel); + expect(conversationsJson).toContain('SUMMARY: user inspected the page.'); + expect(conversationsJson).not.toContain('First message'); + expect(conversationsJson).not.toContain('Second message'); + + // The summarization call must have tool_choice: 'none' (sent with tools: []) + const [, summarizationBody] = seenChatBodies; + expect(summarizationBody).toMatchObject({ tool_choice: 'none' }); + } finally { + await context.close(); + await fixture.close(); + await rm(userDataDir, { force: true, recursive: true }); + } +}); + +test('manual "Compact now" compacts the conversation', async () => { + const fixture = await startFixtureServer(); + const { context, extensionId, userDataDir } = await launchExtensionContext(); + + try { + await mockKiloApi(context, { + /* + * First call: normal user turn. Sub-threshold usage (300/1000 = 30%) leaves auto-compaction + * untriggered and gives the donut a non-zero token count. "Compact now" is enabled by having + * summarizable history (the seeded conversation), not by this usage value. + */ + firstCompletionEvents: [ + { choices: [{ delta: { content: 'Normal reply.' } }] }, + { choices: [], usage: { completion_tokens: 10, prompt_tokens: 300, total_tokens: 310 } }, + ], + models: modelWithContextLength, + // Second call: summarization triggered by "Compact now" + secondCompletionEvents: [ + { choices: [{ delta: { content: 'SUMMARY: manually compacted.' } }] }, + ], + toolNames: safeToolNames, + }); + + const page = await context.newPage(); + await page.goto(fixture.url); + + const sidePanel = await context.newPage(); + await sidePanel.goto(`chrome-extension://${extensionId}/sidepanel.html`); + await seedExtensionAuth(sidePanel); + await setExtensionStorage(sidePanel, { kiloAgentConversations: seededConversationStore }); + await sidePanel.reload(); + + await sidePanel.getByLabel('Message agent').fill('Manual compact trigger'); + await expect(sidePanel.getByRole('button', { name: 'Send message' })).toBeEnabled(); + await sidePanel.getByLabel('Message agent').press('Enter'); + await expect(sidePanel.getByText('Normal reply.')).toBeVisible(); + + // Open the donut popover and click Compact now + await sidePanel.getByLabel(/^Context usage:/u).click(); + await sidePanel.getByRole('button', { name: 'Compact now' }).click(); + + await expect(sidePanel.getByText(/Compacted earlier context/u)).toBeVisible({ + timeout: 10_000, + }); + /* + * Compaction released the input lock: with a fresh draft, Send is enabled again. It stays + * disabled on an empty draft, which is unrelated to compaction. + */ + await sidePanel.getByLabel('Message agent').fill('After compaction'); + await expect(sidePanel.getByRole('button', { name: 'Send message' })).toBeEnabled(); + } finally { + await context.close(); + await fixture.close(); + await rm(userDataDir, { force: true, recursive: true }); + } +}); diff --git a/apps/extension/tests/e2e/conversation-drafts.test.ts b/apps/extension/tests/e2e/conversation-drafts.test.ts new file mode 100644 index 0000000000..dfcc31d471 --- /dev/null +++ b/apps/extension/tests/e2e/conversation-drafts.test.ts @@ -0,0 +1,55 @@ +/* eslint-disable import/no-nodejs-modules */ +import { expect, test } from '@playwright/test'; +import { rm } from 'node:fs/promises'; +import { mockKiloApi } from './kilo-api-fixture'; +import { + launchExtensionContext, + seedExtensionAuth, + startFixtureServer, +} from './extension-context-fixture'; + +test('per-conversation drafts are preserved when switching tabs', async () => { + const fixture = await startFixtureServer(); + const { context, extensionId, userDataDir } = await launchExtensionContext(); + + try { + await mockKiloApi(context); + + const page = await context.newPage(); + await page.goto(fixture.url); + + const sidePanel = await context.newPage(); + await sidePanel.goto(`chrome-extension://${extensionId}/sidepanel.html`); + await seedExtensionAuth(sidePanel); + await sidePanel.reload(); + + const input = sidePanel.getByLabel('Message agent'); + + // Wait for the panel to be ready (model loaded) + await expect(sidePanel.getByLabel('Model')).not.toContainText('Loading'); + + // Type a draft in conversation 1 + await input.fill('draft A'); + await expect(input).toHaveValue('draft A'); + + // Open a new conversation β€” input should be empty + await sidePanel.getByLabel('New conversation').click(); + await expect(input).toHaveValue(''); + + // Type a draft in conversation 2 + await input.fill('draft B'); + await expect(input).toHaveValue('draft B'); + + // Switch back to conversation 1 β€” draft A restored + await sidePanel.getByRole('tab', { name: /Conversation 1/u }).click(); + await expect(input).toHaveValue('draft A'); + + // Switch to conversation 2 β€” draft B restored + await sidePanel.getByRole('tab', { name: /Conversation 2/u }).click(); + await expect(input).toHaveValue('draft B'); + } finally { + await context.close(); + await fixture.close(); + await rm(userDataDir, { force: true, recursive: true }); + } +}); diff --git a/apps/extension/tests/e2e/conversation-tabs.test.ts b/apps/extension/tests/e2e/conversation-tabs.test.ts index ed732a57a1..32306c7384 100644 --- a/apps/extension/tests/e2e/conversation-tabs.test.ts +++ b/apps/extension/tests/e2e/conversation-tabs.test.ts @@ -534,9 +534,11 @@ test('closing a conversation removes only that tab', async () => { await sidePanel.getByLabel('Message agent').press('Enter'); await expect(sidePanel.getByText('Close this reply.')).toBeVisible(); + // Closing a tab must NOT raise a dialog β€” it closes immediately + let dialogFired = false; sidePanel.once('dialog', async dialog => { - expect(dialog.message()).toContain('Close this conversation tab?'); - await dialog.accept(); + dialogFired = true; + await dialog.dismiss(); }); await sidePanel.getByLabel('Close Close this').click(); @@ -544,6 +546,7 @@ test('closing a conversation removes only that tab', async () => { await expect(sidePanel.getByText('Close this reply.')).toBeHidden(); await expect(sidePanel.getByRole('tab', { name: /Keep this/u })).toBeVisible(); await expect(sidePanel.getByText('Keep this reply.')).toBeVisible(); + expect(dialogFired).toBe(false); await sidePanel.getByLabel('History').click(); await sidePanel.getByLabel('Open Close this').click(); @@ -585,9 +588,6 @@ test('history can delete closed conversations without confirmation', async () => await sidePanel.getByLabel('Message agent').press('Enter'); await expect(sidePanel.getByText('Keep open reply.')).toBeVisible(); - sidePanel.once('dialog', async dialog => { - await dialog.accept(); - }); await sidePanel.getByLabel('Close Delete later').click(); await expect(sidePanel.getByRole('tab', { name: /Delete later/u })).toBeHidden(); @@ -632,9 +632,6 @@ test('history reuses an empty inactive tab when opening a closed conversation', await expect(sidePanel.getByText('Restore me reply.')).toBeVisible(); await sidePanel.getByLabel('New conversation').click(); - sidePanel.once('dialog', async dialog => { - await dialog.accept(); - }); await sidePanel.getByLabel('Close Restore me').click(); await sidePanel.getByLabel('History').click(); diff --git a/apps/extension/tests/e2e/firefox-selenium-e2e.ts b/apps/extension/tests/e2e/firefox-selenium-e2e.ts index a9773ffca0..6c22b91ed6 100644 --- a/apps/extension/tests/e2e/firefox-selenium-e2e.ts +++ b/apps/extension/tests/e2e/firefox-selenium-e2e.ts @@ -470,28 +470,6 @@ const waitForTextGone = async (driver: WebDriver, text: string): Promise = ); }; -const acceptAlertWithText = async (driver: WebDriver, text: string): Promise => { - await driver.wait( - async () => { - try { - const alert = await driver.switchTo().alert(); - const alertText = await alert.getText(); - - if (!alertText.includes(text)) { - return false; - } - - await alert.accept(); - return true; - } catch { - return false; - } - }, - waitMs, - `Timed out waiting for alert text: ${text}` - ); -}; - const findManifestUrl = async (driver: WebDriver): Promise => { await driver.get('about:debugging#/runtime/this-firefox'); await waitForText(driver, 'Kilo Extension'); @@ -1067,8 +1045,8 @@ const scenarios: FirefoxScenario[] = [ await sendMessage(session.driver, 'Close this'); await waitForText(session.driver, 'Close this reply.'); + // Close-without-confirm: no dialog fires β€” tab closes immediately await clickButtonByLabel(session.driver, 'Close Close this'); - await acceptAlertWithText(session.driver, 'Close this conversation tab?'); await waitForTextGone(session.driver, 'Close this reply.'); await waitForText(session.driver, 'Keep this reply.'); diff --git a/apps/extension/tests/e2e/kilo-api-fixture.ts b/apps/extension/tests/e2e/kilo-api-fixture.ts index 74865d5010..661baab266 100644 --- a/apps/extension/tests/e2e/kilo-api-fixture.ts +++ b/apps/extension/tests/e2e/kilo-api-fixture.ts @@ -51,6 +51,7 @@ const evalFixtureCode = `const ${longEvalIdentifier} = document.documentElement. const chatCompletionsPath = '/api/gateway/v1/chat/completions'; const dangerousToolNames = ['get_page_snapshot', 'get_element_details', 'find_in_page', 'eval']; interface MockGatewayModel { + readonly contextLength?: number; readonly id: string; readonly name: string; readonly variants?: Record; @@ -127,6 +128,7 @@ export const mockKiloApi = async ( id: model.id, name: model.name, opencode: { variants: model.variants ?? { high: {}, low: {}, medium: {} } }, + ...(model.contextLength === undefined ? {} : { context_length: model.contextLength }), }; return Object.assign( @@ -163,26 +165,36 @@ export const mockKiloApi = async ( const toolNames = options.toolNamesByCall?.[chatCompletionCalls - 1] ?? options.toolNames ?? dangerousToolNames; - expect(body).toMatchObject({ stream: true, tool_choice: 'auto' }); - expect(parsedBody.success ? expectedModelIds.includes(parsedBody.data.model) : false).toBe( - true - ); - expect( - parsedBody.success && parsedBody.data.tools !== undefined - ? parsedBody.data.tools.map(tool => { - const parsedTool = toolDefinitionSchema.safeParse(tool); - - return parsedTool.success ? parsedTool.data.function.name : undefined; - }) - : [] - ).toStrictEqual(toolNames); - const userMessages = messages - .map(message => userMessageSchema.safeParse(message)) - .filter(message => message.success) - .map(message => message.data); - expect(userMessages.at(-1)?.content).toEqual(expect.stringContaining('')); - expect(userMessages.at(-1)?.content).toEqual(expect.stringContaining('Current time:')); - expect(userMessages.at(-1)?.content).toEqual(expect.stringContaining('Timezone:')); + // Summarization calls use tool_choice: 'none' (tools: []); skip normal-turn assertions for them. + const isSummarizationCall = + parsedBody.success && + Array.isArray(parsedBody.data.tools) && + parsedBody.data.tools.length === 0; + + if (isSummarizationCall) { + // Summarization calls skip normal-turn assertions (tool_choice: 'none', tools: []) + } else { + expect(body).toMatchObject({ stream: true, tool_choice: 'auto' }); + expect(parsedBody.success ? expectedModelIds.includes(parsedBody.data.model) : false).toBe( + true + ); + expect( + parsedBody.success && parsedBody.data.tools !== undefined + ? parsedBody.data.tools.map(tool => { + const parsedTool = toolDefinitionSchema.safeParse(tool); + + return parsedTool.success ? parsedTool.data.function.name : undefined; + }) + : [] + ).toStrictEqual(toolNames); + const userMessages = messages + .map(message => userMessageSchema.safeParse(message)) + .filter(message => message.success) + .map(message => message.data); + expect(userMessages.at(-1)?.content).toEqual(expect.stringContaining('')); + expect(userMessages.at(-1)?.content).toEqual(expect.stringContaining('Current time:')); + expect(userMessages.at(-1)?.content).toEqual(expect.stringContaining('Timezone:')); + } if (chatCompletionCalls === 1) { if (options.beforeFirstCompletion !== undefined) { diff --git a/apps/extension/tests/e2e/local-backend-live.test.ts b/apps/extension/tests/e2e/local-backend-live.test.ts index b3a7af2023..534fb9d679 100644 --- a/apps/extension/tests/e2e/local-backend-live.test.ts +++ b/apps/extension/tests/e2e/local-backend-live.test.ts @@ -911,3 +911,59 @@ test('live local backend dangerous mode eval can update the selected page', asyn await rm(userDataDir, { force: true, recursive: true }); } }); + +test('live local backend manual Compact now compacts a frontier conversation', async () => { + const fixture = await startFixtureServer({ title: 'Kilo live compaction target' }); + const requests: ChatRequestSummary[] = []; + const { context, extensionId, userDataDir } = await launchExtensionContext(); + + recordChatRequests(context, requests); + + try { + const targetPage = await context.newPage(); + await targetPage.goto(fixture.url); + + const sidePanel = await context.newPage(); + await signInWithLocalDeviceAuth({ context, extensionId, sidePanel }); + await expect(sidePanel.getByLabel('Target tab')).toContainText('Kilo live compaction target'); + await selectFrontierModel(sidePanel); + + const messageInput = sidePanel.getByLabel('Message agent'); + + /* + * A short two-exchange conversation: manual "Compact now" summarizes the whole conversation, so + * it still compacts. (With the auto threshold this little history would be inert.) + */ + for (const text of [ + 'COMPACT_ONE: reply with one short sentence.', + 'COMPACT_TWO: reply with one short sentence.', + ]) { + await messageInput.fill(text); + await messageInput.press('Enter'); + await expect(sidePanel.getByRole('button', { name: 'Send message' })).toBeVisible({ + timeout: 120_000, + }); + } + + const donut = sidePanel.getByLabel(/^Context usage:/u); + await expect(donut).toBeVisible(); + await donut.click(); + const compactButton = sidePanel.getByRole('button', { name: 'Compact now' }); + + await expect(compactButton).toBeEnabled(); + await compactButton.click(); + + await waitForStoredConversationSnapshot(sidePanel, snapshot => + snapshot.includes('Compacted earlier context') + ); + + // The summarization request goes out with no tools (tool_choice: 'none'). + expect( + requests.some(request => request.model === frontierModel && request.toolNames.length === 0) + ).toBe(true); + } finally { + await context.close(); + await fixture.close(); + await rm(userDataDir, { force: true, recursive: true }); + } +});