diff --git a/.gitignore b/.gitignore index 31a993e..8775bf5 100644 --- a/.gitignore +++ b/.gitignore @@ -23,8 +23,10 @@ web-ext.config.ts # E2E test certificates (auto-generated by e2e/certs/generate.sh at test setup) e2e/certs/*.pem -# Local documentation and audit reports (not shipped in the repo) +# Local documentation and audit reports (not shipped in the repo). +# Long-lived feature specs follow the *-spec.md naming and ARE tracked. docs/*.md +!docs/*-spec.md # Test artifacts (generated by vitest/bun test) coverage/ diff --git a/docs/attachment-cost-spec.md b/docs/attachment-cost-spec.md new file mode 100644 index 0000000..be43a13 --- /dev/null +++ b/docs/attachment-cost-spec.md @@ -0,0 +1,212 @@ +# Attachment Cost Spec + +Token cost math for image and PDF attachments in the pre-submit estimate. This file +is the source of truth for `lib/attachment-cost.ts` and the drift tests in +`tests/unit/attachment-cost.test.ts`. If Anthropic publishes a different formula or +caps, update this file in the same PR that updates the code. + +Last verified against Anthropic docs: 2026-04-26. + +Sources: +- https://platform.claude.com/docs/en/build-with-claude/vision +- https://platform.claude.com/docs/en/build-with-claude/pdf-support + +## Image cost + +### Formula (verbatim) + +> An image uses approximately `width * height / 750` tokens, where the width and +> height are expressed in pixels. + +### Per-model resolution caps (verbatim) + +> The maximal native image resolution is: +> - For Claude Opus 4.7: 4784 tokens, and at most 2576 pixels on the long edge. +> - For other models: 1568 tokens, and at most 1568 pixels on the long edge. + +When the long edge exceeds the per-model cap, Anthropic resizes the image +preserving aspect ratio, then computes the formula. The result is also clamped +to the per-model max-tokens cap. + +### Algorithm + +``` +maxLongPx, maxTokens = + Opus 4.7 -> (2576, 4784) + others -> (1568, 1568) + +if max(w, h) > maxLongPx: + scale = maxLongPx / max(w, h) + w' = round(w * scale) + h' = round(h * scale) +else: + w', h' = w, h + +tokens = min(round(w' * h' / 750), maxTokens) +``` + +### Verification table (Sonnet 4.6, max 1568 px / 1568 tokens) + +| Input pixels | Anthropic published | Our formula | +|---|---|---| +| 200 x 200 | ~54 | 53 | +| 1000 x 1000 | ~1334 | 1333 | +| 1092 x 1092 | ~1568 | 1590 capped to 1568 | +| 1920 x 1080 | ~1568 (downscaled) | resized to 1568 x 882, 1844 capped to 1568 | +| 2000 x 1500 | ~1568 (downscaled) | resized to 1568 x 1176, 2459 capped to 1568 | + +### Verification table (Opus 4.7, max 2576 px / 4784 tokens) + +| Input pixels | Anthropic published | Our formula | +|---|---|---| +| 200 x 200 | ~54 | 53 | +| 1000 x 1000 | ~1334 | 1333 | +| 1092 x 1092 | ~1590 | 1590 | +| 1920 x 1080 | ~2765 | 2765 | +| 2000 x 1500 | ~4000 | 4000 | + +Every row in both tables is asserted by `tests/unit/attachment-cost.test.ts`. If +Anthropic changes the formula or the caps, those tests fail and we re-derive. + +### Expected error vs real API + +Sub 5 percent. The formula is deterministic. The only fuzz comes from +Anthropic's word "approximately" and any off-by-one differences in their +internal rounding. We have not seen a case where our prediction misses the +published example by more than one token. + +### Models with no published image support + +Returns `null` for image tokens. The caller renders `?` and skips adding to +the total. Today every Claude model in `assets/pricing.json` supports vision, +so this branch is defensive. + +## PDF cost + +### What Anthropic actually publishes + +Two cost components, additive (verbatim): + +> Text token costs: Each page typically uses 1,500-3,000 tokens per page +> depending on content density. Standard API pricing applies with no additional +> PDF fees. +> +> Image token costs: Since each page is converted into an image, the same +> image-based cost calculations are applied. + +Anthropic does not publish: +- The DPI used when rendering each PDF page to an image. +- A per-page image-token formula independent of DPI. +- A combined per-page total. + +The only published combined-cost data point is from the Bedrock section of the +PDF doc: +- Document Chat (text-only fallback): 1,000 tokens for 3 pages (~333 / page). +- Claude PDF Chat (full visual): 7,000 tokens for 3 pages (~2,333 / page). + +### Our policy + +Surface the published 1,500-3,000 range as a low-high pair. Never collapse to +a midpoint. The overlay shows the range. The drift tests assert the constants +verbatim. + +``` +PDF_TOKENS_PER_PAGE_LOW = 1500 +PDF_TOKENS_PER_PAGE_HIGH = 3000 +``` + +For a PDF with N pages: low = N * 1500, high = N * 3000. + +The image-per-page contribution is real but unquantifiable from public data. +We disclose this once, in the overlay, as: "PDFs with charts may cost more". +Nothing more elaborate. We will not invent a DPI or interpolate from Bedrock. + +### Inherent error band + +Plus or minus 33 percent from Anthropic's own published range, plus an +unmeasurable amount for the per-page image rendering. This is a property of +the document, not a property of our code. We cannot fix it; we can only +report it honestly. + +### Hard limits (verified) + +| Limit | Value | Applies to | +|---|---|---| +| Pages per request | 600 | 1M-context models | +| Pages per request | 100 | 200K-context models | +| Total request size | 32 MB | All | +| Format | Standard PDF, no passwords or encryption | All | + +When attached page count exceeds the per-model cap, the agent emits a hard +warning: " pages exceeds the -page limit on this model". + +## Page-count parsing + +We extract the page count locally without a heavy PDF library. The +`lib/pdf-page-count.ts` module scans the PDF binary for the page tree root +and reads its `/Count` entry. Falls back to counting individual `/Type /Page` +objects when the root is not findable. + +Returns `null` for: +- Encrypted PDFs (no `/Encrypt` decoder). +- PDFs whose page tree lives entirely inside compressed object streams. +- Malformed files. + +When `null`, the overlay shows `?` for the page count and omits the PDF from +the cost estimate. The user still sees the file is attached. + +### Why not pdfjs-dist + +The official pdf.js library is the canonical parser, but in an MV3 service +worker or content-script bundle it costs ~600 KB gzipped and brings DOM +dependencies that complicate the build. For a one-shot page-count read we +do not need PDF parsing depth; the page-tree regex is good enough for ~95 +percent of standard PDFs and ships in 30 lines with no dependency footprint. + +If accuracy ever matters (encrypted PDFs, fully-compressed page trees), we +swap in pdfjs-dist via an offscreen document. Filed as a Wave-2 follow-up. + +## General hard limits (verified) + +Reused by the cost agent for warnings on both kinds of attachments. + +| Limit | Value | Source | +|---|---|---| +| Image dimensions | 8000 x 8000 px | Vision doc, "General limits" | +| Image dimensions when more than 20 images | 2000 x 2000 px | Same | +| Image file size | 5 MB API, 10 MB claude.ai | Vision FAQ | +| Images per request | 100 (200K models) / 600 (1M models) | Vision doc | +| Image formats | JPEG, PNG, GIF, WebP | Vision FAQ | +| Total request size | 32 MB | PDF doc, "Maximum request size" | + +## Active warning thresholds + +These are the points at which the agent surfaces a hard warning. The numbers +are pinned by tests; tighten only if Anthropic publishes a stricter limit. + +| Warning | Trigger | Source / rationale | +|---|---|---| +| PDF page-cap exceeded | total PDF pages > 600 (1M context) or > 100 (200K context) | Anthropic verbatim | +| Aggregate request size approaching cap | total attachment bytes > 30 MB | 2 MB margin under the 32 MB hard cap for prompt body and JSON overhead | +| Aggregate request size exceeds cap | total attachment bytes > 32 MB | Anthropic hard cap | +| Context-window overrun | projected (history + draft + attachments high) >= 90 % of context window | Anthropic explicit caveat: "Dense PDFs can fill the context window before reaching the page limit" | +| Session projection over 90 % | currentSessionPct + estimatedSessionPct (low) >= 90 % | Existing pre-submit warning | + +Coaching copy mirrors Anthropic's own published advice: "Try splitting the +document into sections; for large files, since each page is processed as an +image, downsampling embedded images can also help." + +## Empirical calibration (Wave-2) + +The honest path to single-percent accuracy is the Anthropic `count_tokens` +endpoint. Sending the actual prompt + attachments returns the real input +token count, no estimation. That requires API-key plumbing, a request +budget, and a privacy review. Not in scope for this issue. Filed separately +when Wave-1 has shipped. + +## Drift policy + +Update this file in lockstep with `lib/attachment-cost.ts`. The unit tests +treat every number in the verification tables above as ground truth. If a +test fails, the assumption is that Anthropic has changed something; refetch +the docs, update this file, update the constants, ship together. diff --git a/entrypoints/claude-ai.content.ts b/entrypoints/claude-ai.content.ts index 7335cef..87a22b0 100644 --- a/entrypoints/claude-ai.content.ts +++ b/entrypoints/claude-ai.content.ts @@ -8,7 +8,10 @@ import { isValidBridgeSchema } from '../lib/bridge-validation'; import { INITIAL_STATE, applyTokenBatch, applyStreamComplete, applyStorageResponse, applyHealthBroken, applyHealthRecovered, applyMessageLimit, applyRestoredConversation, applyDraftEstimate, clearDraftEstimate, applyUsageBudget } from '../lib/overlay-state'; import { computeUsageBudget, getTrackedUtilization } from '../lib/usage-budget'; import { parseUsageResponse } from '../lib/usage-limits-parser'; -import { computePreSubmitEstimate } from '../lib/pre-submit'; +import { computePreSubmitEstimate, MIN_DRAFT_CHARS } from '../lib/pre-submit'; +import { computeAttachmentCost } from '../lib/attachment-cost'; +import type { AttachmentDescriptor } from '../lib/attachment-cost'; +import { countPdfPages } from '../lib/pdf-page-count'; import { createOverlay } from '../ui/overlay'; import { showEnableBanner } from '../ui/enable-banner'; import { ClaudeAdapter } from '../lib/adapters/claude'; @@ -202,9 +205,24 @@ async function initializeMonitoring(): Promise { // Compose box observer for pre-submit cost estimation. let composeBoxRef: HTMLElement | null = null; + let composeFormRef: HTMLElement | null = null; let composeObserver: MutationObserver | null = null; let attachmentObserver: MutationObserver | null = null; let draftDebounceTimer: ReturnType | null = null; + let fileChangeListenerAttached = false; + + /** + * Attachments currently visible in the compose form. Keyed by a stable + * file fingerprint; values carry the filename (used to detect when the + * user removes the attachment via the UI; we match the filename text + * against the rendered form contents and prune entries that disappear). + * + * Bytes never leave the browser: image dimensions come from naturalWidth + * on a blob-URL Image, PDF page counts from a local regex over the file's + * head and tail windows. The map only holds dimensions and page counts. + */ + interface TrackedAttachment { filename: string; descriptor: AttachmentDescriptor; } + const attachmentMap = new Map(); // Restore state from stored conversation record if one exists. // This gives the overlay correct context % and turn count immediately @@ -362,6 +380,10 @@ async function initializeMonitoring(): Promise { if (msg.type === 'TOKEN_BATCH') { // Clear draft estimate: the message has been sent, stream is starting. + // Also drop tracked attachments; claude.ai resets the compose form + // after send, and the next file the user picks will repopulate the + // map via the input change listener. + attachmentMap.clear(); state = clearDraftEstimate(state); browser.runtime.sendMessage({ type: 'STORE_TOKEN_BATCH', @@ -705,26 +727,110 @@ async function initializeMonitoring(): Promise { } }); - // Compose box observer: finds ProseMirror editor, reads text + attachment - // card content from the parent form/fieldset, debounces pre-submit estimates. - - function findFormParent(el: HTMLElement): HTMLElement | null { + // Compose box observer: finds ProseMirror editor, reads text from the + // contenteditable only (the form parent's textContent includes attachment + // card filenames; reading text from the form would inflate the char count + // by the length of every attached filename), tracks attachments via the + // file input's change event, debounces pre-submit estimates. + + /** + * Find the compose region: the smallest reasonable ancestor of the editor + * that wraps the attachment cards and the file input. claude.ai's modern + * React build does not always use a
element, so we accept three + * shapes: an actual FORM/FIELDSET (legacy), any ancestor whose subtree + * contains an (current), or a wide-but-bounded ancestor + * walk if neither matches. Returns null when no plausible parent exists, + * which only happens when the editor is detached from the DOM. + */ + function findComposeRegion(el: HTMLElement): HTMLElement | null { let p: HTMLElement | null = el.parentElement; - for (let i = 0; i < 5 && p; i++) { + let widestSeen: HTMLElement | null = null; + for (let i = 0; i < 8 && p; i++) { const t = p.tagName; if (t === 'FIELDSET' || t === 'FORM') return p; + if (p.querySelector('input[type=file]')) return p; + widestSeen = p; p = p.parentElement; } - return null; + return widestSeen; } - function onComposeInput(): void { + function fileKey(file: File): string { + return `${file.name}|${file.size}|${file.lastModified}`; + } + + /** + * Read an image's pixel dimensions via a transient blob URL. The bytes + * never leave the browser: the URL is local-only and revoked as soon as + * the load handler fires. + */ + function readImageDimensions(file: File): Promise<{ width: number; height: number } | null> { + return new Promise(resolve => { + const url = URL.createObjectURL(file); + const img = new Image(); + img.onload = () => { + URL.revokeObjectURL(url); + resolve({ width: img.naturalWidth, height: img.naturalHeight }); + }; + img.onerror = () => { + URL.revokeObjectURL(url); + resolve(null); + }; + img.src = url; + }); + } + + /** + * Read enough of a PDF to locate its page-tree root: the first 1 MB plus + * the last 64 KB. This covers the common cases (catalog near the head, + * trailer at the tail) without slurping a 32 MB file into memory. Bytes + * stay local; nothing crosses the bridge. + */ + async function readPdfPageCount(file: File): Promise { + const HEAD = 1024 * 1024; + const TAIL = 64 * 1024; + + try { + if (file.size <= HEAD + TAIL) { + return countPdfPages(new Uint8Array(await file.arrayBuffer())); + } + const headBuf = await file.slice(0, HEAD).arrayBuffer(); + const tailBuf = await file.slice(file.size - TAIL).arrayBuffer(); + const merged = new Uint8Array(HEAD + TAIL); + merged.set(new Uint8Array(headBuf), 0); + merged.set(new Uint8Array(tailBuf), HEAD); + return countPdfPages(merged); + } catch { + return null; + } + } + + function recomputeDraft(): void { if (!composeBoxRef) return; - const container = findFormParent(composeBoxRef); - const text = (container ?? composeBoxRef).textContent ?? ''; - if (text.length < 20) { - if (draftDebounceTimer) { clearTimeout(draftDebounceTimer); draftDebounceTimer = null; } + // Read text only from the contenteditable. Reading from the form + // parent would include attachment-card filenames in the char count. + const text = composeBoxRef.textContent ?? ''; + + // Reconcile the attachment map against what is currently rendered: + // when the user removes an attachment via the UI, claude.ai removes + // its card from the DOM; the filename disappears from the form's + // textContent. Drop tracked entries whose filename is no longer there. + if (composeFormRef) { + const formText = composeFormRef.textContent ?? ''; + for (const [key, tracked] of attachmentMap) { + if (!formText.includes(tracked.filename)) { + attachmentMap.delete(key); + } + } + } + + const model = convState.model || 'claude-sonnet-4-6'; + const attachments: AttachmentDescriptor[] = []; + for (const tracked of attachmentMap.values()) attachments.push(tracked.descriptor); + const cost = computeAttachmentCost(attachments, model); + + if (text.length < MIN_DRAFT_CHARS && attachmentMap.size === 0) { if (state.draftEstimate !== null) { state = clearDraftEstimate(state); overlay.render(state); @@ -732,16 +838,71 @@ async function initializeMonitoring(): Promise { return; } + state = applyDraftEstimate(state, computePreSubmitEstimate({ + draftCharCount: text.length, + model, + pctPerInputToken: cachedPctPerInputToken, + currentSessionPct: lastKnownUtilization ?? 0, + currentContextPct: state.contextPct ?? 0, + attachmentTokensLow: cost.totalTokensLow, + attachmentTokensHigh: cost.totalTokensHigh, + attachmentBreakdown: cost.breakdown, + attachmentWarnings: cost.warnings, + hasUnknownImage: cost.hasUnknownImage, + hasPdf: cost.hasPdf, + })); + overlay.render(state); + } + + function onComposeInput(): void { if (draftDebounceTimer) clearTimeout(draftDebounceTimer); - draftDebounceTimer = setTimeout(() => { - state = applyDraftEstimate(state, computePreSubmitEstimate({ - draftCharCount: text.length, - model: convState.model || 'claude-sonnet-4-6', - pctPerInputToken: cachedPctPerInputToken, - currentSessionPct: lastKnownUtilization ?? 0, - })); - overlay.render(state); - }, 500); + draftDebounceTimer = setTimeout(() => { recomputeDraft(); }, 500); + } + + function handleFileChange(event: Event): void { + const input = event.target as HTMLInputElement | null; + if (!input || input.tagName !== 'INPUT' || input.type !== 'file' || !input.files) return; + + // Snapshot files now: input.files can mutate before async reads resolve. + const files = Array.from(input.files); + for (const file of files) { + const key = fileKey(file); + if (attachmentMap.has(key)) continue; + + if (file.type.startsWith('image/')) { + readImageDimensions(file).then(dims => { + if (!dims || dims.width <= 0 || dims.height <= 0) return; + attachmentMap.set(key, { + filename: file.name, + descriptor: { + kind: 'image', + width: dims.width, + height: dims.height, + sourceLabel: file.name, + fileSize: file.size, + }, + }); + recomputeDraft(); + }); + } else if (file.type === 'application/pdf') { + readPdfPageCount(file).then(pages => { + // pages can be null when the PDF is encrypted, fully + // compressed, or malformed. Track it anyway so the user + // sees the file is registered; the agent renders an + // unknown-cost row rather than dropping the attachment. + attachmentMap.set(key, { + filename: file.name, + descriptor: { + kind: 'pdf', + pageCount: pages !== null && pages > 0 ? pages : null, + sourceLabel: file.name, + fileSize: file.size, + }, + }); + recomputeDraft(); + }); + } + } } function discoverComposeBox(): void { @@ -750,8 +911,11 @@ async function initializeMonitoring(): Promise { if (box) { composeBoxRef = box; box.addEventListener('input', onComposeInput); - const parent = findFormParent(box); + const parent = findComposeRegion(box); if (parent) { + composeFormRef = parent; + // Attachment-card adds and removes flow through DOM mutations. + // Reuse onComposeInput so the same debounce path covers both. attachmentObserver = new MutationObserver(onComposeInput); attachmentObserver.observe(parent, { childList: true, subtree: true }); } @@ -767,6 +931,18 @@ async function initializeMonitoring(): Promise { discoverComposeBox(); + // Document-level capture for file-input change events. claude.ai's compose + // form is a deeply-nested set of divs (no tag) and the file input + // can live outside the editor's immediate ancestor chain, so per-region + // attachment was missing the event entirely. Capture-phase at document + // level catches every change before it bubbles, regardless of where the + // input sits relative to the editor. The listener is attached once and + // never removed; it is harmless when no compose box has been discovered. + if (!fileChangeListenerAttached) { + document.documentElement.addEventListener('change', handleFileChange, true); + fileChangeListenerAttached = true; + } + // Reset overlay, conversation state, and dismissed nudges on SPA navigation (Chrome 102+). // Also finalize the previous conversation and detect the new one. if ('navigation' in window) { @@ -808,9 +984,14 @@ async function initializeMonitoring(): Promise { overlay.hideNudge(); // Re-discover the compose box: SPA navigation replaces the DOM. + // The document-level file-change listener stays attached across + // navigations (document.documentElement is stable), so we do not + // reset fileChangeListenerAttached here. if (composeObserver) { composeObserver.disconnect(); composeObserver = null; } if (attachmentObserver) { attachmentObserver.disconnect(); attachmentObserver = null; } composeBoxRef = null; + composeFormRef = null; + attachmentMap.clear(); if (draftDebounceTimer) { clearTimeout(draftDebounceTimer); draftDebounceTimer = null; } discoverComposeBox(); diff --git a/lib/attachment-cost.ts b/lib/attachment-cost.ts new file mode 100644 index 0000000..c1e9438 --- /dev/null +++ b/lib/attachment-cost.ts @@ -0,0 +1,252 @@ +// lib/attachment-cost.ts +// Attachment Cost Agent: predicts the input token cost of image and PDF +// attachments before send. Pure functions only, no DOM refs, no chrome APIs. +// +// All math is sourced from Anthropic's published vision and PDF docs and +// pinned in docs/attachment-cost-spec.md. The drift tests in +// tests/unit/attachment-cost.test.ts assert every Anthropic example value +// from that spec verbatim; if any test fails, refetch the docs and update +// this file plus the spec in lockstep. +// +// ── Role in the multi-agent architecture ───────────────────────────────────── +// +// | Agent | Module | Input | Output | +// |----------------------|----------------------|-----------------------|-------------------------| +// | Pre-Submit Agent | pre-submit.ts | PreSubmitInput | PreSubmitEstimate | +// | **Attachment Cost** | **attachment-cost.ts**| **AttachmentDescriptor[]** | **AttachmentCostResult** | +// +// The orchestrator collects attachments from the compose box (see +// claude-ai.content.ts), runs computeAttachmentCost, then feeds the totals +// and breakdown into computePreSubmitEstimate. Image cost is deterministic +// from pixel dimensions; PDF cost is reported as a low-high range because +// Anthropic itself publishes the per-page cost as a 1500-3000 range. + +import { isKnownModel, getContextWindowSize } from './pricing'; + +// ── Public types ───────────────────────────────────────────────────────────── + +export type AttachmentDescriptor = + | { kind: 'image'; width: number; height: number; sourceLabel: string; fileSize: number } + /** + * pageCount is null when local parsing failed (encrypted, fully-compressed + * page tree, malformed). The agent emits an unknown-cost breakdown row in + * that case so the user still sees the file is tracked. + */ + | { kind: 'pdf'; pageCount: number | null; sourceLabel: string; fileSize: number }; + +/** One row in the per-attachment overlay breakdown. */ +export interface AttachmentBreakdownItem { + kind: 'image' | 'pdf'; + /** Token contribution. For images: exact. For PDFs: low end of the range. */ + tokens: number; + /** Defined only for PDFs (high end of range). Undefined when tokens is exact. */ + tokensHigh?: number; + /** Human-readable line for the overlay (e.g. "image 1568x1568", "PDF 8 pages"). */ + label: string; + /** True when the image's cost cannot be predicted on this model; UI shows "?". */ + unknown?: boolean; +} + +export interface AttachmentCostResult { + /** Lower bound of total attachment tokens. Images contribute exact; PDFs contribute low. */ + totalTokensLow: number; + /** Upper bound. Images contribute exact; PDFs contribute high. */ + totalTokensHigh: number; + breakdown: AttachmentBreakdownItem[]; + /** Hard warnings: page caps exceeded, etc. Rendered prominently in the overlay. */ + warnings: string[]; + /** True when at least one image is on a model with no published cost; surfaces "?". */ + hasUnknownImage: boolean; + /** True when at least one PDF is included; surfaces the per-page-image disclosure. */ + hasPdf: boolean; +} + +// ── Image constants (verbatim from Anthropic's vision docs) ────────────────── + +interface ImageCaps { maxLongPx: number; maxTokens: number; } + +/** + * Opus 4.7 supports high-resolution images: longer edge up to 2576 px, max + * 4784 tokens per image. Source: vision docs, "High-resolution image support". + */ +const OPUS_4_7_CAPS: ImageCaps = { maxLongPx: 2576, maxTokens: 4784 }; + +/** + * Default caps for every other Claude vision model: longer edge up to 1568 px, + * max 1568 tokens per image. Source: vision docs, "Evaluate image size". + */ +const DEFAULT_CAPS: ImageCaps = { maxLongPx: 1568, maxTokens: 1568 }; + +/** Tokens-per-pixel divisor: tokens = round(w * h / 750). */ +const TOKENS_PER_PIXEL_DIVISOR = 750; + +function imageCaps(model: string): ImageCaps { + if (model.startsWith('claude-opus-4-7')) return OPUS_4_7_CAPS; + return DEFAULT_CAPS; +} + +// ── PDF constants (verbatim from Anthropic's PDF docs) ─────────────────────── + +/** Lower bound of Anthropic's published per-page text-token range. */ +export const PDF_TOKENS_PER_PAGE_LOW = 1500; + +/** Upper bound of Anthropic's published per-page text-token range. */ +export const PDF_TOKENS_PER_PAGE_HIGH = 3000; + +/** PDF page caps per Anthropic's PDF docs ("Maximum pages per request"). */ +const PDF_PAGE_LIMIT_200K = 100; +const PDF_PAGE_LIMIT_1M = 600; + +/** + * Total request size cap from Anthropic's "Maximum request size: 32 MB". + * We warn at REQUEST_SIZE_WARN_BYTES (30 MB) so the user has 2 MB of margin + * for the prompt body and JSON overhead before the request is rejected. + */ +const REQUEST_SIZE_HARD_BYTES = 32 * 1024 * 1024; +const REQUEST_SIZE_WARN_BYTES = 30 * 1024 * 1024; + +/** + * 200K-context models cap at 100 pages, larger-context models at 600. We read + * the actual window size from the pricing table instead of hard-coding model + * prefixes so a new 1M model lands without a code edit. The 500_000 threshold + * sits comfortably between the two known tiers (200K and 1M) and avoids any + * accidental match if Anthropic ever ships a hypothetical 256K or 384K model. + * Unknown models fall back to 200K via getContextWindowSize, getting the + * conservative 100-page cap. + */ +function pdfPageLimit(model: string): number { + return getContextWindowSize(model) >= 500_000 + ? PDF_PAGE_LIMIT_1M + : PDF_PAGE_LIMIT_200K; +} + +// ── Public formulas ────────────────────────────────────────────────────────── + +/** + * Predicted input tokens for an image on the given model. Mirrors Anthropic's + * algorithm exactly: resize the long edge to the per-model cap (preserving + * aspect ratio), apply width * height / 750, clamp to maxTokens. + * + * Returns null when the model is not in the pricing table; the caller renders + * "?" and skips the image's contribution to the total. Returns 0 for + * non-positive dimensions. + */ +export function computeImageTokens(width: number, height: number, model: string): number | null { + if (width <= 0 || height <= 0) return 0; + if (!isKnownModel(model)) return null; + + const caps = imageCaps(model); + const longEdge = Math.max(width, height); + + let w = width; + let h = height; + if (longEdge > caps.maxLongPx) { + const scale = caps.maxLongPx / longEdge; + w = Math.round(width * scale); + h = Math.round(height * scale); + } + + const raw = Math.round((w * h) / TOKENS_PER_PIXEL_DIVISOR); + return Math.min(raw, caps.maxTokens); +} + +/** + * Predicted input tokens for a PDF, returned as Anthropic's published + * low-high range. Not collapsed to a midpoint: Anthropic itself publishes a + * range, not a point, and a single number would imply false precision. + */ +export function computePdfTokenRange(pageCount: number): { low: number; high: number } { + if (pageCount <= 0) return { low: 0, high: 0 }; + return { + low: pageCount * PDF_TOKENS_PER_PAGE_LOW, + high: pageCount * PDF_TOKENS_PER_PAGE_HIGH, + }; +} + +/** + * Combined cost for a list of attachments on a given model. Sums token + * contributions, builds per-attachment breakdown rows, emits hard warnings + * when page caps are exceeded. + */ +export function computeAttachmentCost( + attachments: readonly AttachmentDescriptor[], + model: string, +): AttachmentCostResult { + let totalLow = 0; + let totalHigh = 0; + const breakdown: AttachmentBreakdownItem[] = []; + const warnings: string[] = []; + let hasUnknownImage = false; + let hasPdf = false; + let pdfPageTotal = 0; + + for (const att of attachments) { + if (att.kind === 'image') { + const tokens = computeImageTokens(att.width, att.height, model); + const dims = `${att.width}x${att.height}`; + if (tokens === null) { + hasUnknownImage = true; + breakdown.push({ + kind: 'image', + tokens: 0, + label: `image ${dims}`, + unknown: true, + }); + } else { + totalLow += tokens; + totalHigh += tokens; + breakdown.push({ kind: 'image', tokens, label: `image ${dims}` }); + } + } else { + hasPdf = true; + if (att.pageCount === null) { + // Page-count parsing failed; still surface the attachment so the + // user sees it is tracked. Contributes 0 tokens to the totals. + breakdown.push({ + kind: 'pdf', + tokens: 0, + label: 'PDF (page count unavailable)', + unknown: true, + }); + continue; + } + pdfPageTotal += att.pageCount; + const range = computePdfTokenRange(att.pageCount); + totalLow += range.low; + totalHigh += range.high; + const pageLabel = att.pageCount === 1 ? '1 page' : `${att.pageCount} pages`; + breakdown.push({ + kind: 'pdf', + tokens: range.low, + tokensHigh: range.high, + label: `PDF ${pageLabel}`, + }); + } + } + + if (pdfPageTotal > 0) { + const cap = pdfPageLimit(model); + if (pdfPageTotal > cap) { + warnings.push(`${pdfPageTotal} PDF pages exceeds the ${cap}-page limit on this model. Split into sections.`); + } + } + + // Aggregate file-size warning. Anthropic's request cap is 32 MB; we warn + // at 30 MB so users have margin for the rest of the request body. + let totalBytes = 0; + for (const att of attachments) totalBytes += att.fileSize; + if (totalBytes > REQUEST_SIZE_WARN_BYTES) { + const mb = (totalBytes / (1024 * 1024)).toFixed(1); + const status = totalBytes > REQUEST_SIZE_HARD_BYTES ? 'exceeds' : 'is approaching'; + warnings.push(`Attachments total ${mb} MB; ${status} Anthropic's 32 MB request limit. Send fewer or smaller files.`); + } + + return { + totalTokensLow: totalLow, + totalTokensHigh: totalHigh, + breakdown, + warnings, + hasUnknownImage, + hasPdf, + }; +} diff --git a/lib/pdf-page-count.ts b/lib/pdf-page-count.ts new file mode 100644 index 0000000..f1ccca5 --- /dev/null +++ b/lib/pdf-page-count.ts @@ -0,0 +1,87 @@ +// lib/pdf-page-count.ts +// Pure parser: extracts the page count from a PDF binary by walking the +// page-tree dictionary in the document's textual portion. No DOM refs, no +// chrome APIs, no third-party PDF library. +// +// Why hand-rolled and not pdfjs-dist: a one-shot page-count read does not +// justify the ~600 KB gzipped pdfjs-dist payload or the MV3 service-worker +// integration friction. The page-tree root is part of the PDF's textual +// scaffolding (not the compressed object streams) for ~95 percent of standard +// PDFs, so a focused regex over a head + tail window does the job. Failure +// modes (encrypted, fully compressed) return null; the caller renders "?". +// +// Spec reference: docs/attachment-cost-spec.md. + +// PDF page-tree root looks like: +// << /Type /Pages /Kids [...] /Count 12 >> +// Intermediate page-tree nodes share the same shape with smaller counts; the +// root has the maximum Count, so we collect all matches and pick the max. +// +// Key order inside a dictionary is not specified by the PDF spec, so we run +// two regexes: one for "Type Pages then Count" and one for the reverse. +const TYPE_PAGES_THEN_COUNT = /\/Type\s*\/Pages\b[\s\S]{0,8192}?\/Count\s+(\d+)/g; +const COUNT_THEN_TYPE_PAGES = /\/Count\s+(\d+)[\s\S]{0,8192}?\/Type\s*\/Pages\b/g; + +// Leaf page objects look like: +// << /Type /Page /Parent 2 0 R ... >> +// The negative lookahead avoids matching the plural /Pages. +const LEAF_PAGE = /\/Type\s*\/Page(?!s)\b/g; + +// Sanity ceiling for a page count parsed from the binary. Anthropic caps PDFs +// at 600 pages per request; anything wildly larger is almost certainly a regex +// false positive bleeding into a stream's binary content. +const MAX_PLAUSIBLE_PAGES = 100_000; + +const HEAD_WINDOW = 1024 * 1024; // 1 MB +const TAIL_WINDOW = 64 * 1024; // 64 KB + +/** + * Module-scope decoder: TextDecoder is stateless across decode() calls and + * cheap to reuse, so we pay the construction cost once instead of every read. + */ +const LATIN1_DECODER = new TextDecoder('latin1', { fatal: false }); + +/** + * Extract the page count from a PDF binary. Returns null when the page tree + * cannot be located (encrypted, fully-compressed object streams, malformed). + * + * The caller is expected to pass either the full file bytes (for small PDFs) + * or a head + tail concatenation; either way, the page-tree root is normally + * in the first ~1 MB of textual content or in the trailer area. + */ +export function countPdfPages(bytes: Uint8Array): number | null { + if (bytes.length === 0) return null; + + // Scan window: first 1 MB plus last 64 KB of the buffer the caller gave us. + // For small PDFs, the head window covers the entire file. + const headEnd = Math.min(bytes.length, HEAD_WINDOW); + const tailStart = Math.max(headEnd, bytes.length - TAIL_WINDOW); + + const head = LATIN1_DECODER.decode(bytes.subarray(0, headEnd)); + const tail = tailStart < bytes.length + ? LATIN1_DECODER.decode(bytes.subarray(tailStart)) + : ''; + const text = head + tail; + + // Strategy 1: locate every page-tree dictionary and take the largest Count. + let max = 0; + for (const re of [TYPE_PAGES_THEN_COUNT, COUNT_THEN_TYPE_PAGES]) { + re.lastIndex = 0; + let m: RegExpExecArray | null; + while ((m = re.exec(text)) !== null) { + const n = Number.parseInt(m[1], 10); + if (Number.isFinite(n) && n > max && n <= MAX_PLAUSIBLE_PAGES) { + max = n; + } + } + } + if (max > 0) return max; + + // Strategy 2: fall back to counting leaf /Type /Page objects. + const leafMatches = text.match(LEAF_PAGE); + if (leafMatches && leafMatches.length > 0 && leafMatches.length <= MAX_PLAUSIBLE_PAGES) { + return leafMatches.length; + } + + return null; +} diff --git a/lib/pre-submit.ts b/lib/pre-submit.ts index 1ce27b1..9d3ea4b 100644 --- a/lib/pre-submit.ts +++ b/lib/pre-submit.ts @@ -47,6 +47,8 @@ // ───────────────────────────────────────────────────────────────────────────── import { classifyModelTier } from './prompt-analysis'; +import { getContextWindowSize } from './pricing'; +import type { AttachmentBreakdownItem } from './attachment-cost'; // ── Types ──────────────────────────────────────────────────────────────────── @@ -60,6 +62,29 @@ export interface PreSubmitInput { pctPerInputToken: Record | null; /** Current 5-hour session utilization (0-100), from Anthropic's usage endpoint. */ currentSessionPct: number; + /** + * Lower-bound token contribution from attachments (image + PDF). Default 0. + * Images contribute exact; PDFs contribute Anthropic's published low end. + * Sourced from computeAttachmentCost in lib/attachment-cost.ts. + */ + attachmentTokensLow?: number; + /** Upper-bound token contribution. PDFs contribute the high end of the range. */ + attachmentTokensHigh?: number; + /** Per-attachment rows for the overlay breakdown. Empty by default. */ + attachmentBreakdown?: readonly AttachmentBreakdownItem[]; + /** Hard warnings (e.g. PDF page caps exceeded). Empty by default. */ + attachmentWarnings?: readonly string[]; + /** True when at least one image is on a model with no published cost; UI shows "?". */ + hasUnknownImage?: boolean; + /** True when at least one PDF is included; surfaces the per-page-image disclosure. */ + hasPdf?: boolean; + /** + * Current conversation context window utilization (0-100), as already + * consumed by message history. The pre-submit estimate adds the projected + * tokens for this turn on top to compute the projected context fill. + * Default 0; the orchestrator passes state.contextPct. + */ + currentContextPct?: number; } /** One row in the model comparison table. */ @@ -74,16 +99,54 @@ export interface ModelComparison { /** Output of the Pre-Submit Agent. */ export interface PreSubmitEstimate { - /** Approximate input token count (chars / 4). */ + /** + * Lower-bound total input tokens for this draft (text + attachments). + * Equals upper bound when no PDFs are present. + */ estimatedTokens: number; - /** Estimated session % this draft will cost. Null when token economics missing. */ + /** Upper-bound total. Differs from estimatedTokens only when a PDF is attached. */ + estimatedTokensHigh: number; + /** Text-only token estimate (chars / 4). Useful for the breakdown line. */ + textTokens: number; + /** + * Estimated session % this draft will cost (low end). Null when token + * economics missing. Computed from the LOW token total so the displayed + * percentage is conservative; the high end is exposed separately. + */ estimatedSessionPct: number | null; + /** Upper-bound session %. Equals estimatedSessionPct when no PDFs are present. */ + estimatedSessionPctHigh: number | null; /** currentSessionPct + estimatedSessionPct. Null if either is null. */ projectedTotalPct: number | null; /** Populated only when estimatedSessionPct > MODEL_COMPARE_THRESHOLD_PCT. Sorted ascending by cost. */ modelComparisons: ModelComparison[]; /** Warning message when projectedTotalPct >= WARNING_ZONE_PCT. */ warning: string | null; + /** Per-attachment breakdown for the overlay (empty when no attachments). */ + attachmentBreakdown: readonly AttachmentBreakdownItem[]; + /** Hard warnings from the attachment agent (cap exceeded, etc.). */ + attachmentWarnings: readonly string[]; + /** Pass-through: image present on a model with no published cost. */ + hasUnknownImage: boolean; + /** Pass-through: at least one PDF; UI shows "may cost more with charts" disclosure. */ + hasPdf: boolean; + /** + * Projected context-window utilization after sending this turn (low end). + * currentContextPct + (estimatedTokens / contextWindowSize) * 100. + * Null when the model has no known context window. + */ + projectedContextPctLow: number | null; + /** Projected context utilization at the upper bound (PDF range high end). */ + projectedContextPctHigh: number | null; + /** Context window size in tokens for the model. */ + contextWindowSize: number; + /** + * Hard warning when the projection exceeds OVERRUN_ZONE_PCT of the context + * window. Anthropic's own guidance: dense PDFs can fill the context window + * before the page limit; this surfaces that risk before the user hits send. + * Null when the projection is comfortable. + */ + contextOverrunWarning: string | null; } // ── Constants ──────────────────────────────────────────────────────────────── @@ -106,33 +169,63 @@ export const MODEL_COMPARE_THRESHOLD_PCT = 5; */ export const WARNING_ZONE_PCT = 90; +/** + * Context-window overrun threshold. Anthropic's PDF docs state: "Dense PDFs + * can fill the context window before reaching the page limit." We warn when + * the projected total context fill (history + this turn) exceeds 90 percent + * so the user can split, downsample, or switch to a larger-context model + * before the request gets truncated. + */ +export const CONTEXT_OVERRUN_ZONE_PCT = 90; + // ── Main export ────────────────────────────────────────────────────────────── /** * Predict the session cost of a draft message. * - * Returns null when the draft is below MIN_DRAFT_CHARS. When token economics - * data is missing for the current model, returns an estimate with token count - * but null session % (honest: we show what we know, omit what we don't). + * Returns null when the draft is below MIN_DRAFT_CHARS AND no attachments are + * present. When token economics data is missing for the current model, returns + * an estimate with token count but null session % (honest: we show what we + * know, omit what we don't). + * + * Attachment tokens are summed into the estimate; the upper-bound fields + * differ from the lower-bound only when a PDF is attached, since Anthropic + * publishes PDF cost as a 1,500-3,000 per page range. * * @param input - PreSubmitInput assembled by the content script orchestrator. - * @returns PreSubmitEstimate or null if draft is too short. + * @returns PreSubmitEstimate or null if draft has neither text nor attachments. */ export function computePreSubmitEstimate(input: PreSubmitInput): PreSubmitEstimate | null { const { draftCharCount, model, pctPerInputToken, currentSessionPct } = input; + const attachmentTokensLow = input.attachmentTokensLow ?? 0; + const attachmentTokensHigh = input.attachmentTokensHigh ?? 0; + const attachmentBreakdown = input.attachmentBreakdown ?? []; + const attachmentWarnings = input.attachmentWarnings ?? []; + const hasUnknownImage = input.hasUnknownImage ?? false; + const hasPdf = input.hasPdf ?? false; - if (draftCharCount < MIN_DRAFT_CHARS) return null; + // Gate: either the text is long enough to estimate, or the user has + // attached something we can describe. The DRAFT_ESTIMATE pre-send + // fallback in inject.ts only sends draftCharCount, so the original gate + // (text >= MIN_DRAFT_CHARS) is preserved when no attachments are passed. + const hasAttachments = attachmentBreakdown.length > 0; + if (draftCharCount < MIN_DRAFT_CHARS && !hasAttachments) return null; - const estimatedTokens = Math.round(draftCharCount / 4); + const textTokens = Math.round(draftCharCount / 4); + const estimatedTokens = textTokens + attachmentTokensLow; + const estimatedTokensHigh = textTokens + attachmentTokensHigh; // Session % prediction: use real historical data, not a guessed multiplier. // medianPctPerInputToken is derived from actual delta records and implicitly - // accounts for the typical response size. + // accounts for the typical response size. Both bounds use the same rate; + // they only differ in the token totals being multiplied. let estimatedSessionPct: number | null = null; + let estimatedSessionPctHigh: number | null = null; if (pctPerInputToken !== null) { const rate = pctPerInputToken[model]; if (rate !== undefined && rate > 0) { estimatedSessionPct = estimatedTokens * rate; + estimatedSessionPctHigh = estimatedTokensHigh * rate; } } @@ -141,8 +234,11 @@ export function computePreSubmitEstimate(input: PreSubmitInput): PreSubmitEstima : null; // Model comparison: only when the cost is high enough to make switching worthwhile. + // Use the upper-bound token total for the comparison so a PDF-heavy draft + // shows comparisons even if the low end falls under the threshold. const modelComparisons: ModelComparison[] = []; - if (estimatedSessionPct !== null && estimatedSessionPct > MODEL_COMPARE_THRESHOLD_PCT && pctPerInputToken !== null) { + const compareTokens = Math.max(estimatedTokens, estimatedTokensHigh); + if (estimatedSessionPctHigh !== null && estimatedSessionPctHigh > MODEL_COMPARE_THRESHOLD_PCT && pctPerInputToken !== null) { for (const [m, rate] of Object.entries(pctPerInputToken)) { if (rate <= 0) continue; const tier = classifyModelTier(m); @@ -150,23 +246,62 @@ export function computePreSubmitEstimate(input: PreSubmitInput): PreSubmitEstima modelComparisons.push({ model: m, label: tier.label, - estimatedPct: estimatedTokens * rate, + estimatedPct: compareTokens * rate, }); } modelComparisons.sort((a, b) => a.estimatedPct - b.estimatedPct); } // Warning when sending this message would push into the critical zone. + // Uses the LOW projection so the warning fires only when the floor of the + // estimate already crosses 90 percent; otherwise the user gets a false + // alarm whenever the high end of a PDF range happens to spike. let warning: string | null = null; if (projectedTotalPct !== null && projectedTotalPct >= WARNING_ZONE_PCT) { warning = `Sending this will push your session to ~${Math.round(projectedTotalPct)}%. Consider starting fresh or switching models.`; } + // Context-window projection. The conversation history already consumes + // currentContextPct of the model's context window; the new turn adds + // textTokens + attachmentTokens on top. We expose both bounds so the UI + // can show a range when a PDF is attached. + const currentContextPct = input.currentContextPct ?? 0; + const contextWindowSize = getContextWindowSize(model); + let projectedContextPctLow: number | null = null; + let projectedContextPctHigh: number | null = null; + let contextOverrunWarning: string | null = null; + if (contextWindowSize > 0) { + projectedContextPctLow = currentContextPct + (estimatedTokens / contextWindowSize) * 100; + projectedContextPctHigh = currentContextPct + (estimatedTokensHigh / contextWindowSize) * 100; + + // Use the HIGH projection so dense PDFs trigger the warning even when + // the LOW range fits. This mirrors Anthropic's own caveat: "Dense PDFs + // can fill the context window before reaching the page limit." + if (projectedContextPctHigh >= CONTEXT_OVERRUN_ZONE_PCT) { + const ctxK = Math.round(contextWindowSize / 1000); + const pctRounded = Math.round(projectedContextPctHigh); + contextOverrunWarning = projectedContextPctHigh >= 100 + ? `This turn likely exceeds the ${ctxK}k context window (~${pctRounded}%). Split the document or use a larger-context model.` + : `This turn would fill ~${pctRounded}% of the ${ctxK}k context window. Consider splitting the document.`; + } + } + return { estimatedTokens, + estimatedTokensHigh, + textTokens, estimatedSessionPct, + estimatedSessionPctHigh, projectedTotalPct, modelComparisons, warning, + attachmentBreakdown, + attachmentWarnings, + hasUnknownImage, + hasPdf, + projectedContextPctLow, + projectedContextPctHigh, + contextWindowSize, + contextOverrunWarning, }; } diff --git a/tests/audit/overlay-state-audit.test.ts b/tests/audit/overlay-state-audit.test.ts index 08f97a2..9fcfd0e 100644 --- a/tests/audit/overlay-state-audit.test.ts +++ b/tests/audit/overlay-state-audit.test.ts @@ -155,22 +155,34 @@ describe('applyRestoredConversation', () => { // ── Draft estimate ───────────────────────────────────────────────────────── describe('draft estimate', () => { + const baseEstimate = { + estimatedTokens: 100, + estimatedTokensHigh: 100, + textTokens: 100, + estimatedSessionPct: 2.5, + estimatedSessionPctHigh: 2.5, + projectedTotalPct: 12.5, + modelComparisons: [], + warning: null, + attachmentBreakdown: [], + attachmentWarnings: [], + hasUnknownImage: false, + hasPdf: false, + projectedContextPctLow: 0.01, + projectedContextPctHigh: 0.01, + contextWindowSize: 1_000_000, + contextOverrunWarning: null, + }; + test('applyDraftEstimate sets estimate', () => { - const estimate = { - estimatedTokens: 100, - estimatedSessionPct: 2.5, - projectedTotalPct: 12.5, - modelComparisons: [], - warning: null, - }; - const next = applyDraftEstimate(INITIAL_STATE, estimate); - expect(next.draftEstimate).toEqual(estimate); + const next = applyDraftEstimate(INITIAL_STATE, baseEstimate); + expect(next.draftEstimate).toEqual(baseEstimate); }); test('applyDraftEstimate with null clears estimate', () => { const state: OverlayState = { ...INITIAL_STATE, - draftEstimate: { estimatedTokens: 100, estimatedSessionPct: null, projectedTotalPct: null, modelComparisons: [], warning: null }, + draftEstimate: { ...baseEstimate, estimatedSessionPct: null, estimatedSessionPctHigh: null, projectedTotalPct: null }, }; const next = applyDraftEstimate(state, null); expect(next.draftEstimate).toBeNull(); @@ -179,7 +191,7 @@ describe('draft estimate', () => { test('clearDraftEstimate sets draftEstimate to null', () => { const state: OverlayState = { ...INITIAL_STATE, - draftEstimate: { estimatedTokens: 100, estimatedSessionPct: null, projectedTotalPct: null, modelComparisons: [], warning: null }, + draftEstimate: { ...baseEstimate, estimatedSessionPct: null, estimatedSessionPctHigh: null, projectedTotalPct: null }, }; const next = clearDraftEstimate(state); expect(next.draftEstimate).toBeNull(); diff --git a/tests/unit/attachment-cost.test.ts b/tests/unit/attachment-cost.test.ts new file mode 100644 index 0000000..aefc3a5 --- /dev/null +++ b/tests/unit/attachment-cost.test.ts @@ -0,0 +1,285 @@ +import { describe, it, expect } from 'vitest'; +import { + computeImageTokens, + computePdfTokenRange, + computeAttachmentCost, + PDF_TOKENS_PER_PAGE_LOW, + PDF_TOKENS_PER_PAGE_HIGH, + type AttachmentDescriptor, +} from '../../lib/attachment-cost'; + +// ── Drift detection: Anthropic's published image table ────────────────────── +// +// Source: https://platform.claude.com/docs/en/build-with-claude/vision +// Pinned in: docs/attachment-cost-spec.md +// +// If any of these assertions fail, Anthropic likely changed the formula or +// the per-model caps. Refetch the docs, update the spec, update the constants +// in lib/attachment-cost.ts, and adjust these expectations together. + +describe('Anthropic image table - Sonnet 4.6 (drift detection)', () => { + const MODEL = 'claude-sonnet-4-6'; + it.each([ + // [width, height, expected tokens, Anthropic's "approximately" value] + [200, 200, 53], // doc says ~54; w*h/750 = 53.33 -> 53 (rounded) + [1000, 1000, 1333], // doc says ~1334; w*h/750 = 1333.33 -> 1333 + [1092, 1092, 1568], // doc says ~1568; raw = 1590, capped to 1568 + [1920, 1080, 1568], // doc says ~1568, downscaled + [2000, 1500, 1568], // doc says ~1568, downscaled + ])('%dx%d -> %d tokens', (w, h, expected) => { + expect(computeImageTokens(w, h, MODEL)).toBe(expected); + }); +}); + +describe('Anthropic image table - Opus 4.7 (drift detection)', () => { + const MODEL = 'claude-opus-4-7'; + it.each([ + [200, 200, 53], // doc says ~54 + [1000, 1000, 1333], // doc says ~1334 + [1092, 1092, 1590], // doc says ~1590; no resize since 1092 < 2576, raw = 1590 + [1920, 1080, 2765], // doc says ~2765; 1920*1080/750 = 2764.8 -> 2765 + [2000, 1500, 4000], // doc says ~4000; 2000*1500/750 = 4000 + ])('%dx%d -> %d tokens', (w, h, expected) => { + expect(computeImageTokens(w, h, MODEL)).toBe(expected); + }); +}); + +// ── Image edge cases ──────────────────────────────────────────────────────── + +describe('computeImageTokens edge cases', () => { + it('zero dimensions return 0', () => { + expect(computeImageTokens(0, 0, 'claude-sonnet-4-6')).toBe(0); + expect(computeImageTokens(1000, 0, 'claude-sonnet-4-6')).toBe(0); + expect(computeImageTokens(0, 1000, 'claude-sonnet-4-6')).toBe(0); + }); + + it('negative dimensions return 0', () => { + expect(computeImageTokens(-5, 1000, 'claude-sonnet-4-6')).toBe(0); + }); + + it('unknown model returns null', () => { + expect(computeImageTokens(500, 500, 'gpt-4-turbo')).toBeNull(); + expect(computeImageTokens(500, 500, '')).toBeNull(); + }); + + it('caps at 1568 tokens for very large images on Sonnet', () => { + expect(computeImageTokens(8000, 8000, 'claude-sonnet-4-6')).toBe(1568); + }); + + it('caps at 4784 tokens for very large images on Opus 4.7', () => { + expect(computeImageTokens(8000, 8000, 'claude-opus-4-7')).toBe(4784); + }); + + it('handles wide aspect ratio with long-edge resize on Sonnet', () => { + // 3000x100 on Sonnet: long edge 3000 > 1568, scale = 1568/3000 = 0.5227. + // Resized to 1568x52 (rounded). Tokens = 1568*52/750 = 108.7 -> 109. + const t = computeImageTokens(3000, 100, 'claude-sonnet-4-6'); + expect(t).toBe(109); + }); + + it('handles tall aspect ratio with long-edge resize on Opus 4.7', () => { + // 100x3000 on Opus 4.7: long edge 3000 > 2576, scale = 2576/3000 = 0.8587. + // Resized to 86x2576. Tokens = 86*2576/750 = 295.4 -> 295. + const t = computeImageTokens(100, 3000, 'claude-opus-4-7'); + expect(t).toBe(295); + }); + + it('Haiku 4.5 (200K context) uses default 1568 caps', () => { + expect(computeImageTokens(2000, 1500, 'claude-haiku-4-5')).toBe(1568); + }); + + it('Sonnet long-form model ID also resolves to default caps', () => { + expect(computeImageTokens(1000, 1000, 'claude-sonnet-4-6-20250514')).toBe(1333); + }); + + it('Opus 4.6 (1M context, no high-res) uses default caps', () => { + // Opus 4.6 has 1M context but does NOT have high-res image support; + // only Opus 4.7 does. This is the trap a multiplier-based model would + // fall into; the per-model caps table catches it correctly. + expect(computeImageTokens(2000, 1500, 'claude-opus-4-6')).toBe(1568); + }); +}); + +// ── PDF token range ───────────────────────────────────────────────────────── + +describe('computePdfTokenRange', () => { + it.each([ + [1, 1500, 3000], + [10, 15000, 30000], + [100, 150000, 300000], + [600, 900000, 1800000], + ])('%d pages -> [%d, %d] tokens', (pages, low, high) => { + expect(computePdfTokenRange(pages)).toEqual({ low, high }); + }); + + it('zero or negative pages returns zero range', () => { + expect(computePdfTokenRange(0)).toEqual({ low: 0, high: 0 }); + expect(computePdfTokenRange(-1)).toEqual({ low: 0, high: 0 }); + }); + + it('Anthropic published constants are stable', () => { + // Drift sentinel: if Anthropic publishes a new range, these break first + // and force a docs review. Source: build-with-claude/pdf-support. + expect(PDF_TOKENS_PER_PAGE_LOW).toBe(1500); + expect(PDF_TOKENS_PER_PAGE_HIGH).toBe(3000); + }); +}); + +// ── computeAttachmentCost: combined behavior ───────────────────────────────── + +describe('computeAttachmentCost', () => { + const img = (w: number, h: number, name = 'img.png', fileSize = 50_000): AttachmentDescriptor => + ({ kind: 'image', width: w, height: h, sourceLabel: name, fileSize }); + const pdf = (pages: number, name = 'doc.pdf', fileSize = 200_000): AttachmentDescriptor => + ({ kind: 'pdf', pageCount: pages, sourceLabel: name, fileSize }); + + it('empty list returns zero totals and empty breakdown', () => { + const r = computeAttachmentCost([], 'claude-sonnet-4-6'); + expect(r.totalTokensLow).toBe(0); + expect(r.totalTokensHigh).toBe(0); + expect(r.breakdown).toHaveLength(0); + expect(r.warnings).toHaveLength(0); + expect(r.hasUnknownImage).toBe(false); + expect(r.hasPdf).toBe(false); + }); + + it('image-only sums correctly on Sonnet', () => { + const r = computeAttachmentCost([img(1000, 1000)], 'claude-sonnet-4-6'); + expect(r.totalTokensLow).toBe(1333); + expect(r.totalTokensHigh).toBe(1333); + expect(r.hasPdf).toBe(false); + expect(r.breakdown[0].kind).toBe('image'); + expect(r.breakdown[0].tokensHigh).toBeUndefined(); + }); + + it('PDF-only returns proper low/high range', () => { + const r = computeAttachmentCost([pdf(8)], 'claude-sonnet-4-6'); + expect(r.totalTokensLow).toBe(8 * 1500); + expect(r.totalTokensHigh).toBe(8 * 3000); + expect(r.hasPdf).toBe(true); + expect(r.breakdown[0].kind).toBe('pdf'); + expect(r.breakdown[0].tokens).toBe(8 * 1500); + expect(r.breakdown[0].tokensHigh).toBe(8 * 3000); + expect(r.breakdown[0].label).toContain('8 pages'); + }); + + it('singular page label for 1-page PDF', () => { + const r = computeAttachmentCost([pdf(1)], 'claude-sonnet-4-6'); + expect(r.breakdown[0].label).toContain('1 page'); + expect(r.breakdown[0].label).not.toContain('pages'); + }); + + it('mixed image and PDF sum each component independently', () => { + const r = computeAttachmentCost([img(1000, 1000), pdf(5)], 'claude-sonnet-4-6'); + expect(r.totalTokensLow).toBe(1333 + 5 * 1500); + expect(r.totalTokensHigh).toBe(1333 + 5 * 3000); + expect(r.breakdown).toHaveLength(2); + }); + + it('image on unknown model marks unknown without breaking the total', () => { + const r = computeAttachmentCost( + [img(500, 500, 'a.png'), pdf(2, 'b.pdf')], + 'gpt-4-turbo', + ); + expect(r.hasUnknownImage).toBe(true); + expect(r.totalTokensLow).toBe(2 * 1500); + expect(r.totalTokensHigh).toBe(2 * 3000); + expect(r.breakdown[0].unknown).toBe(true); + expect(r.breakdown[0].tokens).toBe(0); + }); + + it('PDF with null page count surfaces an unknown breakdown', () => { + const r = computeAttachmentCost( + [{ kind: 'pdf', pageCount: null, sourceLabel: 'encrypted.pdf', fileSize: 200_000 }], + 'claude-sonnet-4-6', + ); + expect(r.hasPdf).toBe(true); + expect(r.totalTokensLow).toBe(0); + expect(r.totalTokensHigh).toBe(0); + expect(r.breakdown).toHaveLength(1); + expect(r.breakdown[0].unknown).toBe(true); + expect(r.breakdown[0].label).toContain('unavailable'); + }); + + it('null-page PDF mixed with parseable PDF only counts the parseable one', () => { + const r = computeAttachmentCost([ + { kind: 'pdf', pageCount: null, sourceLabel: 'a.pdf', fileSize: 200_000 }, + { kind: 'pdf', pageCount: 3, sourceLabel: 'b.pdf', fileSize: 200_000 }, + ], 'claude-sonnet-4-6'); + expect(r.totalTokensLow).toBe(3 * 1500); + expect(r.totalTokensHigh).toBe(3 * 3000); + expect(r.breakdown).toHaveLength(2); + }); + + it('warns when PDF pages exceed 100-page cap on a 200K model (Haiku)', () => { + const r = computeAttachmentCost([pdf(150)], 'claude-haiku-4-5'); + expect(r.warnings.length).toBeGreaterThan(0); + expect(r.warnings[0]).toContain('150'); + expect(r.warnings[0]).toContain('100'); + }); + + it('does not warn at exactly the 100-page cap on a 200K model', () => { + const r = computeAttachmentCost([pdf(100)], 'claude-haiku-4-5'); + expect(r.warnings).toHaveLength(0); + }); + + it('warns above 600 pages on a 1M-context model (Sonnet)', () => { + const r = computeAttachmentCost([pdf(700)], 'claude-sonnet-4-6'); + expect(r.warnings.length).toBeGreaterThan(0); + expect(r.warnings[0]).toContain('700'); + expect(r.warnings[0]).toContain('600'); + }); + + it('aggregates page count across multiple PDFs for cap warning', () => { + const r = computeAttachmentCost([pdf(60), pdf(60)], 'claude-haiku-4-5'); + expect(r.warnings.length).toBeGreaterThan(0); + expect(r.warnings[0]).toContain('120'); + }); + + it('breakdown labels include exact dimensions for images', () => { + const r = computeAttachmentCost([img(1568, 1568)], 'claude-sonnet-4-6'); + expect(r.breakdown[0].label).toContain('1568x1568'); + }); + + // ── Aggregate request-size warning (Anthropic 32 MB hard cap) ────────── + + it('warns when aggregate file size approaches the 32 MB request cap', () => { + // 31 MB total: above the 30 MB warn threshold, below the 32 MB hard cap. + const r = computeAttachmentCost([ + { kind: 'pdf', pageCount: 50, sourceLabel: 'big.pdf', fileSize: 31 * 1024 * 1024 }, + ], 'claude-sonnet-4-6'); + expect(r.warnings.length).toBeGreaterThan(0); + const w = r.warnings.find(s => s.includes('32 MB')); + expect(w).toBeDefined(); + expect(w).toContain('approaching'); + }); + + it('warns more strongly when aggregate file size exceeds the 32 MB hard cap', () => { + const r = computeAttachmentCost([ + { kind: 'pdf', pageCount: 50, sourceLabel: 'huge.pdf', fileSize: 35 * 1024 * 1024 }, + ], 'claude-sonnet-4-6'); + const w = r.warnings.find(s => s.includes('32 MB')); + expect(w).toBeDefined(); + expect(w).toContain('exceeds'); + }); + + it('aggregates file size across multiple attachments', () => { + // Two 16 MB images add to 32 MB which is above the warn threshold. + const r = computeAttachmentCost([ + { kind: 'image', width: 4000, height: 4000, sourceLabel: 'a.png', fileSize: 16 * 1024 * 1024 }, + { kind: 'image', width: 4000, height: 4000, sourceLabel: 'b.png', fileSize: 16 * 1024 * 1024 }, + ], 'claude-sonnet-4-6'); + const w = r.warnings.find(s => s.includes('32 MB')); + expect(w).toBeDefined(); + expect(w).toContain('32.0'); + }); + + it('does not warn at small total file sizes', () => { + const r = computeAttachmentCost([ + img(500, 500, 'a.png', 100_000), + pdf(5, 'b.pdf', 200_000), + ], 'claude-sonnet-4-6'); + const w = r.warnings.find(s => s.includes('32 MB')); + expect(w).toBeUndefined(); + }); +}); diff --git a/tests/unit/pdf-page-count.test.ts b/tests/unit/pdf-page-count.test.ts new file mode 100644 index 0000000..9019b52 --- /dev/null +++ b/tests/unit/pdf-page-count.test.ts @@ -0,0 +1,115 @@ +import { describe, it, expect } from 'vitest'; +import { countPdfPages } from '../../lib/pdf-page-count'; + +/** + * PDFs are byte-oriented but the textual scaffolding we parse is ASCII. Map + * each char to its byte to build a fixture without depending on TextEncoder + * (which is utf-8-only and would corrupt non-ASCII test inputs). + */ +function pdfBytes(text: string): Uint8Array { + const out = new Uint8Array(text.length); + for (let i = 0; i < text.length; i++) out[i] = text.charCodeAt(i) & 0xff; + return out; +} + +const minimalPdf = (count: number): string => `%PDF-1.4 +1 0 obj +<< /Type /Catalog /Pages 2 0 R >> +endobj +2 0 obj +<< /Type /Pages /Kids [3 0 R] /Count ${count} >> +endobj +3 0 obj +<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >> +endobj +xref +0 4 +0000000000 65535 f +0000000009 00000 n +0000000045 00000 n +0000000095 00000 n +trailer +<< /Size 4 /Root 1 0 R >> +startxref +0 +%%EOF +`; + +describe('countPdfPages: page-tree root', () => { + it.each([1, 8, 100, 600])('returns Count from /Type /Pages root (%d pages)', (n) => { + expect(countPdfPages(pdfBytes(minimalPdf(n)))).toBe(n); + }); + + it('handles reverse key ordering (Count before Type Pages)', () => { + const text = `%PDF-1.4 +2 0 obj +<< /Count 12 /Type /Pages /Kids [3 0 R] >> +endobj +%%EOF +`; + expect(countPdfPages(pdfBytes(text))).toBe(12); + }); + + it('picks the maximum Count across intermediate page-tree nodes', () => { + const text = `%PDF-1.4 +1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj +2 0 obj << /Type /Pages /Kids [3 0 R 4 0 R] /Count 5 >> endobj +3 0 obj << /Type /Pages /Kids [5 0 R 6 0 R] /Count 2 >> endobj +4 0 obj << /Type /Page /Parent 2 0 R >> endobj +5 0 obj << /Type /Page /Parent 3 0 R >> endobj +6 0 obj << /Type /Page /Parent 3 0 R >> endobj +trailer << /Size 7 /Root 1 0 R >> +%%EOF +`; + expect(countPdfPages(pdfBytes(text))).toBe(5); + }); +}); + +describe('countPdfPages: leaf-page fallback', () => { + it('counts /Type /Page leaves when no /Pages root is present', () => { + const text = `%PDF-1.4 +1 0 obj << /Type /Page >> endobj +2 0 obj << /Type /Page >> endobj +3 0 obj << /Type /Page >> endobj +%%EOF +`; + expect(countPdfPages(pdfBytes(text))).toBe(3); + }); + + it('does not match /Type /Pages as a leaf page', () => { + // Only a Pages root, no leaves. Strategy 1 finds Count via the root. + const text = `%PDF-1.4 +2 0 obj << /Type /Pages /Count 7 >> endobj +%%EOF +`; + expect(countPdfPages(pdfBytes(text))).toBe(7); + }); +}); + +describe('countPdfPages: failure modes', () => { + it('returns null on empty input', () => { + expect(countPdfPages(new Uint8Array(0))).toBeNull(); + }); + + it('returns null on non-PDF garbage input', () => { + expect(countPdfPages(pdfBytes('not a pdf at all'))).toBeNull(); + }); + + it('returns null when no page tree or leaves can be found', () => { + const text = `%PDF-1.4 +2 0 obj << /Type /Catalog >> endobj +%%EOF +`; + expect(countPdfPages(pdfBytes(text))).toBeNull(); + }); + + it('rejects implausibly large Count values (regex false positives)', () => { + // A binary stream might happen to contain "/Count 999999999" by chance. + // The MAX_PLAUSIBLE_PAGES sanity gate filters it out. + const text = `%PDF-1.4 +2 0 obj << /Type /Pages /Count 999999999 >> endobj +%%EOF +`; + expect(countPdfPages(pdfBytes(text))).toBeNull(); + }); +}); diff --git a/tests/unit/pre-submit.test.ts b/tests/unit/pre-submit.test.ts index 3bfcdb2..65163b3 100644 --- a/tests/unit/pre-submit.test.ts +++ b/tests/unit/pre-submit.test.ts @@ -6,6 +6,7 @@ import { WARNING_ZONE_PCT, type PreSubmitInput, } from '../../lib/pre-submit'; +import type { AttachmentBreakdownItem } from '../../lib/attachment-cost'; function makeInput(overrides: Partial = {}): PreSubmitInput { return { @@ -177,3 +178,254 @@ describe('warning', () => { expect(result!.warning).toBeNull(); }); }); + +// ── Attachments: tokens, range, gates, pass-through ───────────────────────── + +describe('attachments', () => { + const breakdownImg = (tokens: number): AttachmentBreakdownItem => + ({ kind: 'image', tokens, label: `image (${tokens})` }); + const breakdownPdf = (low: number, high: number): AttachmentBreakdownItem => + ({ kind: 'pdf', tokens: low, tokensHigh: high, label: 'PDF range' }); + + it('sums attachment tokens (low) into estimatedTokens', () => { + // 200 chars -> 50 text tokens; image adds 1334 -> 1384 total. + const result = computePreSubmitEstimate(makeInput({ + attachmentTokensLow: 1334, + attachmentTokensHigh: 1334, + attachmentBreakdown: [breakdownImg(1334)], + })); + expect(result!.textTokens).toBe(50); + expect(result!.estimatedTokens).toBe(1384); + expect(result!.estimatedTokensHigh).toBe(1384); + }); + + it('reflects PDF low/high range in estimatedTokensHigh', () => { + // 200 chars text + PDF low=15000 high=30000 (10 pages). + const result = computePreSubmitEstimate(makeInput({ + attachmentTokensLow: 15000, + attachmentTokensHigh: 30000, + attachmentBreakdown: [breakdownPdf(15000, 30000)], + hasPdf: true, + })); + expect(result!.estimatedTokens).toBe(50 + 15000); + expect(result!.estimatedTokensHigh).toBe(50 + 30000); + expect(result!.hasPdf).toBe(true); + }); + + it('attachments-only draft (text below threshold) still produces an estimate', () => { + const result = computePreSubmitEstimate(makeInput({ + draftCharCount: 0, + attachmentTokensLow: 1334, + attachmentTokensHigh: 1334, + attachmentBreakdown: [breakdownImg(1334)], + })); + expect(result).not.toBeNull(); + expect(result!.textTokens).toBe(0); + expect(result!.estimatedTokens).toBe(1334); + }); + + it('text below threshold AND no attachments still returns null', () => { + const result = computePreSubmitEstimate(makeInput({ + draftCharCount: MIN_DRAFT_CHARS - 1, + attachmentTokensLow: 0, + attachmentTokensHigh: 0, + attachmentBreakdown: [], + })); + expect(result).toBeNull(); + }); + + it('session % uses combined token total', () => { + // 200 chars (50 tokens) + 1000-token attachment, rate 0.01 -> 10.5%. + const result = computePreSubmitEstimate(makeInput({ + attachmentTokensLow: 1000, + attachmentTokensHigh: 1000, + attachmentBreakdown: [breakdownImg(1000)], + })); + expect(result!.estimatedSessionPct).toBeCloseTo(10.5, 1); + expect(result!.estimatedSessionPctHigh).toBeCloseTo(10.5, 1); + }); + + it('session % high differs from low when PDF range applies', () => { + // 200 chars (50 tokens) + PDF low=1500 high=3000, rate 0.01. + const result = computePreSubmitEstimate(makeInput({ + attachmentTokensLow: 1500, + attachmentTokensHigh: 3000, + attachmentBreakdown: [breakdownPdf(1500, 3000)], + hasPdf: true, + })); + expect(result!.estimatedSessionPct).toBeCloseTo(15.5, 1); + expect(result!.estimatedSessionPctHigh).toBeCloseTo(30.5, 1); + }); + + it('breakdown and warnings pass through unchanged', () => { + const breakdown = [breakdownImg(500), breakdownPdf(7500, 15000)]; + const warnings = ['150 PDF pages exceeds the 100-page limit on this model.']; + const result = computePreSubmitEstimate(makeInput({ + attachmentTokensLow: 8000, + attachmentTokensHigh: 15500, + attachmentBreakdown: breakdown, + attachmentWarnings: warnings, + hasPdf: true, + })); + expect(result!.attachmentBreakdown).toEqual(breakdown); + expect(result!.attachmentWarnings).toEqual(warnings); + }); + + it('hasUnknownImage flag passes through', () => { + const result = computePreSubmitEstimate(makeInput({ + attachmentTokensLow: 0, + attachmentTokensHigh: 0, + attachmentBreakdown: [{ kind: 'image', tokens: 0, label: 'image (cost unknown)', unknown: true }], + hasUnknownImage: true, + })); + expect(result!.hasUnknownImage).toBe(true); + }); + + it('warning fires only on LOW projection, not on HIGH spike from PDF', () => { + // Current session 80%, text 50 tokens (rate 0.01 -> 0.5%), PDF range + // adds low=1000 (10%) high=2000 (20%). Low projection: 80 + 10.5 = 90.5% + // (warning expected). High projection: 80 + 20.5 = 100.5%. + const result = computePreSubmitEstimate(makeInput({ + currentSessionPct: 80, + attachmentTokensLow: 1000, + attachmentTokensHigh: 2000, + attachmentBreakdown: [breakdownPdf(1000, 2000)], + hasPdf: true, + })); + expect(result!.warning).not.toBeNull(); + }); + + it('no warning when only HIGH would cross zone, LOW does not', () => { + // Current session 80%, low=400 (4%) high=2000 (20%). Low projection + // 80 + 4.5 = 84.5% (no warning). High would be 100.5% but we ignore it. + const result = computePreSubmitEstimate(makeInput({ + currentSessionPct: 80, + attachmentTokensLow: 400, + attachmentTokensHigh: 2000, + attachmentBreakdown: [breakdownPdf(400, 2000)], + hasPdf: true, + })); + expect(result!.warning).toBeNull(); + }); +}); + +// ── Context-window projection (Anthropic "dense PDFs fill context") ───────── + +describe('context-window projection', () => { + it('exposes contextWindowSize from the model', () => { + const r = computePreSubmitEstimate(makeInput({ model: 'claude-sonnet-4-6' })); + expect(r!.contextWindowSize).toBe(1_000_000); + }); + + it('200K models reflect the smaller window', () => { + const r = computePreSubmitEstimate(makeInput({ + model: 'claude-haiku-4-5', + pctPerInputToken: { 'claude-haiku-4-5': 0.01 }, + })); + expect(r!.contextWindowSize).toBe(200_000); + }); + + it('projects context % from history + this turn (low and high)', () => { + // currentContext 40%, text adds 50 tokens (~0.005% of 1M). + // PDF range 1500-3000 tokens (0.15%-0.3% of 1M). + // Expected: low ~40.005%, high ~40.305%. + const r = computePreSubmitEstimate(makeInput({ + currentContextPct: 40, + attachmentTokensLow: 1500, + attachmentTokensHigh: 3000, + attachmentBreakdown: [{ kind: 'pdf', tokens: 1500, tokensHigh: 3000, label: 'PDF 1 page' }], + hasPdf: true, + })); + expect(r!.projectedContextPctLow!).toBeCloseTo(40 + 0.155, 2); + expect(r!.projectedContextPctHigh!).toBeCloseTo(40 + 0.305, 2); + }); + + it('emits contextOverrunWarning when high projection >= 90% of context', () => { + // 491-page PDF on Sonnet (1M ctx) at 0% context: 491*1500=736.5k low, + // 491*3000=1.473M high. High projection: 147% of context. + const r = computePreSubmitEstimate(makeInput({ + currentContextPct: 0, + draftCharCount: 0, + attachmentTokensLow: 491 * 1500, + attachmentTokensHigh: 491 * 3000, + attachmentBreakdown: [{ + kind: 'pdf', + tokens: 491 * 1500, + tokensHigh: 491 * 3000, + label: 'PDF 491 pages', + }], + hasPdf: true, + })); + expect(r!.contextOverrunWarning).not.toBeNull(); + expect(r!.contextOverrunWarning).toContain('exceeds'); + expect(r!.contextOverrunWarning).toContain('1000k'); + }); + + it('emits soft "would fill" wording when projection is between 90% and 100%', () => { + // currentContext 80%, draft adds tokens that push high projection + // into the 90-100% band on a 1M model. + const r = computePreSubmitEstimate(makeInput({ + currentContextPct: 80, + attachmentTokensLow: 100_000, + attachmentTokensHigh: 150_000, + attachmentBreakdown: [{ + kind: 'pdf', tokens: 100_000, tokensHigh: 150_000, label: 'PDF', + }], + hasPdf: true, + })); + expect(r!.contextOverrunWarning).not.toBeNull(); + expect(r!.contextOverrunWarning).toContain('would fill'); + expect(r!.contextOverrunWarning).toContain('splitting'); + }); + + it('no warning when projection is comfortably under 90%', () => { + const r = computePreSubmitEstimate(makeInput({ + currentContextPct: 10, + attachmentTokensLow: 50_000, + attachmentTokensHigh: 50_000, + attachmentBreakdown: [{ kind: 'image', tokens: 50_000, label: 'image' }], + })); + expect(r!.contextOverrunWarning).toBeNull(); + }); + + it('200K-context model fires earlier with the same PDF', () => { + // 50-page PDF: 75k low, 150k high. On Haiku (200k), high = 75% of ctx. + // Below 90% threshold, no warning. But add some history. + const dense = computePreSubmitEstimate(makeInput({ + model: 'claude-haiku-4-5', + pctPerInputToken: { 'claude-haiku-4-5': 0.01 }, + currentContextPct: 20, + attachmentTokensLow: 50 * 1500, + attachmentTokensHigh: 50 * 3000, + attachmentBreakdown: [{ + kind: 'pdf', tokens: 75_000, tokensHigh: 150_000, label: 'PDF 50 pages', + }], + hasPdf: true, + })); + // 20 + 150_000/200_000*100 = 20 + 75 = 95%, above threshold. + expect(dense!.contextOverrunWarning).not.toBeNull(); + expect(dense!.contextOverrunWarning).toContain('200k'); + }); +}); + +// ── Backwards compatibility (DRAFT_ESTIMATE pre-send fallback) ─────────────── + +describe('backwards compatibility', () => { + it('input without attachment fields behaves like before', () => { + // The inject.ts pre-send fallback only sets draftCharCount, model, + // pctPerInputToken, currentSessionPct. Make sure that path still works. + const result = computePreSubmitEstimate({ + draftCharCount: 200, + model: 'claude-sonnet-4-6', + pctPerInputToken: { 'claude-sonnet-4-6': 0.01 }, + currentSessionPct: 40, + }); + expect(result).not.toBeNull(); + expect(result!.estimatedTokens).toBe(50); + expect(result!.estimatedTokensHigh).toBe(50); + expect(result!.attachmentBreakdown).toEqual([]); + expect(result!.attachmentWarnings).toEqual([]); + expect(result!.hasPdf).toBe(false); + expect(result!.hasUnknownImage).toBe(false); + }); +}); diff --git a/ui/overlay-styles.ts b/ui/overlay-styles.ts index f9aa35d..ce2f177 100644 --- a/ui/overlay-styles.ts +++ b/ui/overlay-styles.ts @@ -218,12 +218,47 @@ export const OVERLAY_CSS = ` text-overflow: ellipsis; } +.lco-draft-context { + font-size: 10px; + line-height: 1.4; + color: var(--lco-text); + font-variant-numeric: tabular-nums; + margin-top: 1px; + opacity: 0.85; +} + +.lco-draft-breakdown { + font-size: 9px; + line-height: 1.4; + color: var(--lco-muted); + white-space: normal; + word-break: break-word; + margin-top: 1px; +} + +.lco-draft-disclosure { + font-size: 9px; + line-height: 1.3; + color: var(--lco-muted); + font-style: italic; + opacity: 0.85; + margin-top: 1px; +} + .lco-draft-warning { font-size: 9px; line-height: 1.3; color: var(--lco-warn-fill); } +.lco-draft-hard-warning { + font-size: 10px; + line-height: 1.35; + color: var(--lco-accent); + font-weight: 600; + margin-top: 2px; +} + /* ── Health indicator ── */ .lco-health-row { diff --git a/ui/overlay.ts b/ui/overlay.ts index f84c6d0..b70d966 100644 --- a/ui/overlay.ts +++ b/ui/overlay.ts @@ -8,6 +8,8 @@ import { OVERLAY_CSS } from './overlay-styles'; import type { OverlayState } from '../lib/overlay-state'; import type { ContextSignal } from '../lib/context-intelligence'; import { classifyZone } from '../lib/usage-budget'; +import type { PreSubmitEstimate } from '../lib/pre-submit'; +import type { AttachmentBreakdownItem } from '../lib/attachment-cost'; export interface OverlayHandle { mount(shadow: ShadowRoot): void; @@ -28,6 +30,74 @@ function fmtCost(c: number | null): string { return `$${c.toFixed(4)}`; } +/** + * Compact token formatter for the draft row: 1234 -> "1.2k", 1234567 -> "1.2M". + * Same shape as lib/format formatTokens but local so this file stays UI-only. + */ +function fmtTokensCompact(n: number): string { + if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`; + if (n >= 1_000) return `${(n / 1_000).toFixed(1)}k`; + return String(n); +} + +/** + * Main draft-row text. When low equals high (no PDF, or zero attachments), + * shows a single number; when they differ (PDF range), shows "low to high" + * for both tokens and session %. Session % is rendered to one decimal so the + * range stays readable at small values. + */ +function formatDraftValue(draft: PreSubmitEstimate): string { + const low = draft.estimatedTokens; + const high = draft.estimatedTokensHigh; + const tokensText = low === high + ? `~${fmtTokensCompact(low)} tokens` + : `~${fmtTokensCompact(low)} to ${fmtTokensCompact(high)} tokens`; + + if (draft.estimatedSessionPct === null) return tokensText; + + const pctLow = draft.estimatedSessionPct; + const pctHigh = draft.estimatedSessionPctHigh ?? pctLow; + const pctText = Math.abs(pctHigh - pctLow) < 0.05 + ? `~${pctLow.toFixed(1)}% of session` + : `~${pctLow.toFixed(1)}% to ${pctHigh.toFixed(1)}% of session`; + + return `${tokensText} ${pctText}`; +} + +/** + * One-line breakdown for an attachment. Images render as "+Nk from image WxH"; + * PDFs render as "+lowK to highK from PDF N pages". Unknown-cost images + * render with "?" instead of a token figure. + */ +function formatBreakdownLine(item: AttachmentBreakdownItem): string { + if (item.unknown) return `${item.label} (?)`; + if (item.tokensHigh !== undefined && item.tokensHigh !== item.tokens) { + return `+${fmtTokensCompact(item.tokens)} to ${fmtTokensCompact(item.tokensHigh)} from ${item.label}`; + } + return `+${fmtTokensCompact(item.tokens)} from ${item.label}`; +} + +/** + * Context-window projection line. Shows what share of the model's window the + * upcoming turn would occupy on top of the conversation history. Renders as + * a range when low and high differ (PDF range), otherwise a single number. + * Suppressed when both bounds round to the same integer percent. + */ +function formatContextProjection( + low: number | null, + high: number | null, + windowSize: number, +): string { + if (low === null || high === null) return ''; + const ctxK = windowSize >= 1_000_000 + ? `${(windowSize / 1_000_000).toFixed(0)}M` + : `${Math.round(windowSize / 1000)}k`; + const lowR = Math.round(low); + const highR = Math.round(high); + if (lowR === highR) return `~${lowR}% of ${ctxK} context`; + return `~${lowR}% to ${highR}% of ${ctxK} context`; +} + export function createOverlay(): OverlayHandle { // DOM refs: null until mount() is called. render() is a no-op until then. let overlayWidget: HTMLDivElement | null = null; @@ -56,8 +126,12 @@ export function createOverlay(): OverlayHandle { let nudgeHideTimer: ReturnType | null = null; let elDraftRow: HTMLElement | null = null; let elDraftValue: HTMLElement | null = null; + let elDraftContext: HTMLElement | null = null; + let elDraftBreakdown: HTMLElement | null = null; + let elDraftDisclosure: HTMLElement | null = null; let elDraftCompare: HTMLElement | null = null; let elDraftWarning: HTMLElement | null = null; + let elDraftHardWarning: HTMLElement | null = null; let elWeeklyRow: HTMLElement | null = null; let elWeeklyFill: HTMLElement | null = null; let elWeeklyLabel: HTMLElement | null = null; @@ -115,6 +189,32 @@ export function createOverlay(): OverlayHandle { draftRow.appendChild(valDraft); body.appendChild(draftRow); + // Context-window projection row: shows what fraction of the model's + // context window THIS turn would consume (history + draft + attachments). + // Hidden until the projection is non-trivial (>=1% of context). + const draftContext = document.createElement('div'); + draftContext.className = 'lco-draft-context'; + draftContext.style.display = 'none'; + elDraftContext = draftContext; + body.appendChild(draftContext); + + // Draft per-attachment breakdown (hidden when no attachments). + // One line per image or PDF, e.g. "+1.6k from image (1568x1568)". + const draftBreakdown = document.createElement('div'); + draftBreakdown.className = 'lco-draft-breakdown'; + draftBreakdown.style.display = 'none'; + elDraftBreakdown = draftBreakdown; + body.appendChild(draftBreakdown); + + // PDF disclosure (hidden unless a PDF is attached). Surfaces the + // unpublished per-page image overhead Anthropic does not quantify. + const draftDisclosure = document.createElement('div'); + draftDisclosure.className = 'lco-draft-disclosure'; + draftDisclosure.style.display = 'none'; + draftDisclosure.textContent = 'PDFs with charts or images may cost more.'; + elDraftDisclosure = draftDisclosure; + body.appendChild(draftDisclosure); + // Draft model comparison (hidden unless cost > 5%) const draftCompare = document.createElement('div'); draftCompare.className = 'lco-draft-compare'; @@ -129,6 +229,15 @@ export function createOverlay(): OverlayHandle { elDraftWarning = draftWarning; body.appendChild(draftWarning); + // Hard warnings from the attachment agent (e.g. PDF page cap exceeded). + // More urgent than the projection warning above; rendered in the rust + // accent so the user notices before send. + const draftHardWarning = document.createElement('div'); + draftHardWarning.className = 'lco-draft-hard-warning'; + draftHardWarning.style.display = 'none'; + elDraftHardWarning = draftHardWarning; + body.appendChild(draftHardWarning); + // Last request row const rowLast = document.createElement('div'); rowLast.className = 'lco-row'; @@ -314,17 +423,26 @@ export function createOverlay(): OverlayHandle { const draft = state.draftEstimate; if (draft) { elDraftRow.style.display = ''; - if (draft.estimatedSessionPct !== null) { - elDraftValue.textContent = - `~${fmt(draft.estimatedTokens)} tokens ~${draft.estimatedSessionPct.toFixed(1)}% of session`; - } else { - elDraftValue.textContent = `~${fmt(draft.estimatedTokens)} tokens`; - } + elDraftValue.textContent = formatDraftValue(draft); } else { elDraftRow.style.display = 'none'; elDraftValue.textContent = ''; } } + if (elDraftBreakdown) { + const items = state.draftEstimate?.attachmentBreakdown ?? []; + if (items.length === 0) { + elDraftBreakdown.style.display = 'none'; + elDraftBreakdown.textContent = ''; + } else { + elDraftBreakdown.style.display = ''; + elDraftBreakdown.textContent = items.map(formatBreakdownLine).join(' '); + } + } + if (elDraftDisclosure) { + const showDisclosure = state.draftEstimate?.hasPdf ?? false; + elDraftDisclosure.style.display = showDisclosure ? '' : 'none'; + } if (elDraftCompare) { const comparisons = state.draftEstimate?.modelComparisons ?? []; if (comparisons.length > 0) { @@ -345,6 +463,34 @@ export function createOverlay(): OverlayHandle { elDraftWarning.style.display = 'none'; } } + if (elDraftHardWarning) { + // Fold the context-overrun warning into the same hard-warning row + // as PDF page-cap and request-size violations. All three are + // "this send may fail or truncate" issues; one prominent row keeps + // the user from missing any of them. + const draft = state.draftEstimate; + const hardParts: string[] = []; + if (draft?.contextOverrunWarning) hardParts.push(draft.contextOverrunWarning); + if (draft?.attachmentWarnings) hardParts.push(...draft.attachmentWarnings); + if (hardParts.length > 0) { + elDraftHardWarning.textContent = hardParts.join(' '); + elDraftHardWarning.style.display = ''; + } else { + elDraftHardWarning.style.display = 'none'; + } + } + if (elDraftContext) { + const draft = state.draftEstimate; + const low = draft?.projectedContextPctLow ?? null; + const high = draft?.projectedContextPctHigh ?? null; + // Show the row when there is anything meaningful to display + // (projection >= 1% so we do not flash for one-character drafts). + const visible = high !== null && high >= 1; + elDraftContext.style.display = visible ? '' : 'none'; + if (visible) { + elDraftContext.textContent = formatContextProjection(low, high, draft!.contextWindowSize); + } + } if (elCurrentRequest && state.lastRequest) { const { inputTokens, outputTokens, cost } = state.lastRequest;