diff --git a/.pebbles/.gitignore b/.pebbles/.gitignore new file mode 100644 index 000000000..0a168c659 --- /dev/null +++ b/.pebbles/.gitignore @@ -0,0 +1 @@ +pebbles.db diff --git a/.pebbles/config.json b/.pebbles/config.json new file mode 100644 index 000000000..5174be7f4 --- /dev/null +++ b/.pebbles/config.json @@ -0,0 +1,3 @@ +{ + "prefix": "volt" +} \ No newline at end of file diff --git a/.pebbles/events.jsonl b/.pebbles/events.jsonl new file mode 100644 index 000000000..c0320e390 --- /dev/null +++ b/.pebbles/events.jsonl @@ -0,0 +1,46 @@ +{"type":"create","timestamp":"2026-02-25T23:16:16.169294Z","issue_id":"volt-156","payload":{"description":"Fixes identified during code review (self-review + independent Codex review) of the\nfix/lcm-inline-content-path-resolution branch before merge to dev.\n\nThe branch introduces lcm_read (a tool that lets models retrieve large content stored\nin LCM by file ID) and fixes the storage model for inline payloads. The core feature\nworks and was verified E2E, but review surfaced 9 discrete issues ranging from type\nsafety holes to a high-severity multi-tenant migration gap.\n\nThese subtasks are ordered by dependency. The upgrade refactor revert (C6) is\nindependent and will be handled by the coordinator. All others are delegated to\nCodex workers.","priority":"2","title":"PR review fixes for lcm-inline-content-path-resolution","type":"epic"}} +{"type":"create","timestamp":"2026-02-25T23:16:29.282808Z","issue_id":"volt-4cf","payload":{"description":"## Problem\n\nThe array [\"lcm_expand\", \"lcm_grep\", \"lcm_read\"] is defined as a const inside TWO\nseparate component functions in packages/voltcode/src/cli/cmd/tui/routes/session/index.tsx:\n\n1. Inside AssistantMessage (line ~1559)\n2. Inside ToolPart (line ~1837)\n\nIf a new LCM tool is added, both must be updated or the UI silently breaks for one\nrendering path. This is a DRY violation.\n\n## Required change\n\n1. Extract `LCM_INTERNAL_TOOLS` to a single module-level const, defined once near the\n top of session/index.tsx (or in a shared constants file if one exists nearby).\n2. Replace both inline definitions with references to the shared const.\n3. At each usage site, add a brief comment: // See LCM_INTERNAL_TOOLS definition above\n (or the import path if extracted to a separate file).\n\n## Scope boundaries\n\n- Do NOT refactor anything else in session/index.tsx.\n- Do NOT change the contents of the array — only where it's defined.\n- Do NOT move the component functions or change their signatures.\n\n## Acceptance criteria\n\n- grep -c \"LCM_INTERNAL_TOOLS\" on session/index.tsx shows exactly 1 definition\n and 2+ references (not 2 definitions).\n- The TUI still renders correctly (typecheck passes, no runtime change).","priority":"2","title":"DRY out LCM_INTERNAL_TOOLS constant in TUI","type":"task"}} +{"type":"create","timestamp":"2026-02-25T23:16:37.914032Z","issue_id":"volt-59c","payload":{"description":"## Problem\n\npackages/voltcode/src/tool/lcm-read.ts defines:\n\n interface LcmReadMetadata {\n fileId: string\n found: boolean\n truncated: boolean\n totalSize: number\n storageKind?: string // \u003c-- THIS\n }\n\nThe successful return path (lines 129-140) never populates storageKind. It is always\nundefined at runtime. This is dead surface area that confuses future readers into\nthinking the field carries meaningful data.\n\nThe storageKind is not surfaced to the model (metadata goes to the processor, not\nthe model's context), so there is no value in returning it.\n\n## Required change\n\nRemove the `storageKind?: string` line from the LcmReadMetadata interface.\n\n## Scope boundaries\n\n- Do NOT add storageKind population logic. We're removing the field, not fixing it.\n- Do NOT change any other interface or type in the file.\n\n## Acceptance criteria\n\n- LcmReadMetadata has exactly 4 fields: fileId, found, truncated, totalSize.\n- Typecheck passes.","priority":"3","title":"Remove dead storageKind field from LcmReadMetadata","type":"task"}} +{"type":"create","timestamp":"2026-02-25T23:16:55.936937Z","issue_id":"volt-52b","payload":{"description":"## Problem\n\nlcm_read signals the processor to skip re-storage by setting metadata.lcm = { storedInLcm: true, fileId }.\nThe processor reads this at processor.ts:534 via toolResult.metadata?.lcm?.storedInLcm.\n\nBut:\n- LcmReadMetadata (lcm-read.ts:23-29) does NOT include an lcm field.\n- The processor accesses it through TypeScript's structural typing / any escape.\n- If Tool.define's metadata generic is ever tightened, this breaks silently at runtime\n with no compile-time error.\n\n## Context: how Tool.define metadata works\n\nLook at packages/voltcode/src/tool/tool.ts to understand the Tool.define\u003cP, M\u003e generic.\nThe second type parameter M is the metadata type. The processor receives toolResult.metadata\ntyped as whatever M is. Right now it's loose enough that extra fields sneak through, but\nthat's an accident of the current type system, not a contract.\n\n## Required change\n\n1. Define a shared type in a sensible location (e.g. packages/voltcode/src/session/lcm/types.ts\n or alongside the existing LCM types — use your judgment):\n\n export interface LcmToolMetadata {\n storedInLcm: boolean\n fileId: string\n }\n\n2. Update LcmReadMetadata in lcm-read.ts to include:\n\n lcm?: LcmToolMetadata\n\n3. Update processor.ts to import and use LcmToolMetadata for the type of lcmMetadata,\n rather than relying on any/structural typing. The access pattern\n toolResult.metadata?.lcm?.storedInLcm should be type-safe after this change.\n\n4. If other LCM tools (lcm_expand, lcm_describe, lcm_grep) also set metadata.lcm,\n update them too. If they don't, leave them alone.\n\n## Scope boundaries\n\n- Do NOT change the runtime behavior. The same values flow through the same paths.\n This is purely a type-safety improvement.\n- Do NOT refactor the processor's large-output handling logic.\n- Do NOT add new metadata fields beyond what already exists.\n\n## Acceptance criteria\n\n- toolResult.metadata?.lcm?.storedInLcm access in processor.ts is fully typed\n (no any, no structural escape).\n- LcmReadMetadata includes lcm?: LcmToolMetadata.\n- Typecheck passes with no new suppressions or casts.\n- The shared type is importable from a single canonical location.","priority":"1","title":"Type the metadata.lcm escape hatch between lcm_read and processor","type":"task"}} +{"type":"create","timestamp":"2026-02-25T23:17:55.936468Z","issue_id":"volt-d7e","payload":{"description":"## Problem\n\npackages/voltcode/src/session/lcm/db.ts lines 2019-2031 contain a backward-compatibility\nfallback in getLargeFileContent:\n\n // Backward-compatibility fallback for rows that predate storage_kind enforcement.\n if (row.original_path) {\n const file = Bun.file(row.original_path)\n const exists = await file.exists()\n if (!exists) {\n log.warn(\"legacy large file path not found on disk\", { fileId, path: row.original_path })\n return null\n }\n const content = await file.text()\n return { content, truncated: false, totalSize: content.length }\n }\n\nThis code:\n1. Ignores the maxBytes parameter entirely (reads full file into memory).\n2. Always returns truncated: false regardless of actual size.\n3. Is unreachable after the migration runs (the migration backfills storage_kind\n on ALL existing rows, so the code above this block always matches).\n\nWe want a clean cutover. No backward-compat shims.\n\n## Required change\n\nDelete the entire backward-compatibility fallback block (the if (row.original_path)\nblock that comes AFTER the storage_kind === \"path\" branch). The function should fall\nthrough to `return null` after the path and inline_text branches.\n\nAlso delete the comment \"Backward-compatibility fallback for rows that predate\nstorage_kind enforcement.\"\n\n## Scope boundaries\n\n- Do NOT modify the storage_kind === \"path\" branch above it (that one is correct\n and respects maxBytes).\n- Do NOT modify the inline_text branch.\n- Do NOT add any new fallback logic. If storage_kind is somehow unrecognized,\n returning null is correct.\n\n## Acceptance criteria\n\n- The backward-compat block is gone.\n- getLargeFileContent has exactly three code paths: inline_text → return content,\n path → read from disk with maxBytes, everything else → return null.\n- Typecheck passes.","priority":"1","title":"Delete backward-compat fallback in getLargeFileContent","type":"task"}} +{"type":"create","timestamp":"2026-02-25T23:18:18.213979Z","issue_id":"volt-66c","payload":{"description":"## Problem\n\npackages/voltcode/src/session/lcm/db.ts lines ~461-478 contain two separate DO blocks\nfor the large_files_storage_shape_check constraint:\n\nBlock 1 (DROP): Checks if constraint exists, drops it.\nBlock 2 (ADD): Adds constraint, catches duplicate_object.\n\nIf execution crashes between the DROP and ADD, the table is left without a constraint.\nAdditionally, every app startup unconditionally drops and recreates the constraint even\nwhen nothing changed — that's a gratuitous write on every boot.\n\n## Required change\n\nCombine both operations into a single DO block so the DROP and ADD are atomic within\none PL/pgSQL execution. Also add a guard: only drop-and-recreate if the constraint\ndefinition has actually changed.\n\nHere's the pattern:\n\n DO \\$\\$ \n DECLARE\n current_def text;\n desired_def text := '((storage_kind = ...))'; -- the full CHECK body\n BEGIN\n -- Get current constraint definition if it exists\n SELECT pg_get_constraintdef(oid) INTO current_def\n FROM pg_constraint\n WHERE conname = 'large_files_storage_shape_check'\n AND conrelid = 'large_files'::regclass;\n\n -- Only recreate if missing or changed\n IF current_def IS NULL OR current_def != desired_def THEN\n IF current_def IS NOT NULL THEN\n EXECUTE 'ALTER TABLE large_files DROP CONSTRAINT large_files_storage_shape_check';\n END IF;\n ALTER TABLE large_files\n ADD CONSTRAINT large_files_storage_shape_check CHECK (\n (storage_kind = 'path' AND original_path IS NOT NULL AND content IS NULL AND binary_content IS NULL) OR\n (storage_kind = 'inline_text' AND content IS NOT NULL AND binary_content IS NULL) OR\n (storage_kind = 'inline_binary' AND binary_content IS NOT NULL AND content IS NULL)\n );\n END IF;\n END \\$\\$;\n\nNOTE: The desired_def string must match what pg_get_constraintdef returns for the\nCHECK expression. You'll need to check the exact format PostgreSQL uses (it may\nnormalize parentheses, spacing, etc.). Test by running the migration, then querying\npg_get_constraintdef to see the canonical form, and use THAT as your comparison string.\n\nIf getting the exact string match is fiddly, an acceptable alternative is: just wrap\nthe DROP + ADD in a single DO block (so they're atomic) and skip the \"has it changed\"\noptimization. Atomicity is the must-have; skip-if-unchanged is nice-to-have.\n\n## Scope boundaries\n\n- Only touch the CHECK constraint migration block.\n- Do NOT modify the constraint definition itself.\n- Do NOT change other migrations in the same file.\n\n## Acceptance criteria\n\n- The DROP and ADD are in a single DO block (atomic execution).\n- Typecheck passes.\n- If you can verify: running the migration twice in a row does not error.","priority":"2","title":"Make CHECK constraint migration atomic and idempotent","type":"task"}} +{"type":"create","timestamp":"2026-02-25T23:18:31.864765Z","issue_id":"volt-44e","payload":{"description":"## Problem\n\nCommit 70a5534e0 (\"refactor: replace upstream upgrade sources with voltropy install\nscript\") is bundled into the fix/lcm-inline-content-path-resolution branch but has\nnothing to do with the LCM bug fix. It removes ~170 lines of package-manager upgrade\npaths (npm, brew, choco, scoop) and replaces them with a single voltropy.com endpoint.\n\nThis inflates the diff, complicates review, and mixes concerns. Additionally, Voltropy\nis handing the product off to Martian Engineering, so the upgrade path needs separate\nconsideration.\n\n## Required change\n\nRevert commit 70a5534e0 from this branch. The upgrade refactor can be re-applied\nas a separate PR after the LCM fix lands.\n\n## Owner\n\nCoordinator (not delegated to a worker).\n\n## Acceptance criteria\n\n- git log dev..HEAD no longer contains 70a5534e0.\n- installation/index.ts, cli/cmd/upgrade.ts, and cli/upgrade.ts match their state on dev.\n- All other commits on the branch are preserved.\n- Typecheck passes.","priority":"2","title":"Revert upgrade refactor from LCM PR branch","type":"task"}} +{"type":"create","timestamp":"2026-02-25T23:18:45.899017Z","issue_id":"volt-986","payload":{"description":"## Problem\n\npackages/voltcode/src/tool/lcm-read.ts lines 16-20 define:\n\n max_bytes: z\n .number()\n .min(1)\n .optional()\n .describe(\"Optional byte limit for very large payloads (default: 100000)\")\n\nThere is no .max() guard. A model could pass max_bytes: 999_999_999. The path-backed\nbranch in getLargeFileContent (db.ts) already has a 100MB safety cap:\n\n const safeMax = Math.min(maxBytes ?? 100 * 1024 * 1024, 100 * 1024 * 1024)\n\nBut the inline_text branch does not — it trusts maxBytes directly. Capping at the\nZod level makes the schema the single source of truth for the limit.\n\n## Required change\n\nAdd .max(100_000_000) to the max_bytes Zod chain:\n\n max_bytes: z\n .number()\n .min(1)\n .max(100_000_000)\n .optional()\n .describe(\"Optional byte limit for very large payloads (default: 100000)\")\n\n100_000_000 (100MB) matches the existing safety cap in the path-backed branch.\n\n## Scope boundaries\n\n- Only change the Zod schema definition.\n- Do NOT change the DEFAULT_MAX_BYTES constant (100_000) — that's the default,\n not the maximum.\n- Do NOT modify getLargeFileContent.\n\n## Acceptance criteria\n\n- max_bytes has both .min(1) and .max(100_000_000).\n- Typecheck passes.","priority":"2","title":"Cap max_bytes at 100MB in lcm_read Zod schema","type":"task"}} +{"type":"create","timestamp":"2026-02-25T23:19:02.850017Z","issue_id":"volt-727","payload":{"description":"## Problem (HIGH SEVERITY — identified by independent Codex review)\n\npackages/voltcode/src/session/lcm/user-context.ts manages per-user PostgreSQL schemas\nfor multi-tenant cloud deployments. When a user schema is created, it runs\nCREATE TABLE IF NOT EXISTS for all LCM tables.\n\nThe storage_kind column was added to the public schema via ALTER TABLE migrations in\ndb.ts (lines ~433-478), including:\n- ALTER TABLE large_files ADD COLUMN storage_kind text\n- Backfill from existing data\n- SET DEFAULT 'path'\n- SET NOT NULL\n- DROP NOT NULL on original_path\n- ADD CHECK constraint\n\nBut user-context.ts has NONE of these migrations. Any existing user schema created\nbefore this change will crash with \"column storage_kind does not exist\" when\nlcm_describe or lcm_read queries hit SELECT ... storage_kind ... FROM large_files.\n\n## Required change\n\nIn the ensureUserSchema function in user-context.ts, after the CREATE TABLE IF NOT EXISTS\nblock for large_files, add the same migration steps that db.ts has:\n\n1. ALTER TABLE large_files ADD COLUMN storage_kind text (wrapped in exception handler\n for duplicate_column)\n2. Backfill: UPDATE large_files SET storage_kind = CASE WHEN content IS NOT NULL\n THEN 'inline_text' WHEN binary_content IS NOT NULL THEN 'inline_binary'\n ELSE 'path' END WHERE storage_kind IS NULL\n3. ALTER COLUMN storage_kind SET DEFAULT 'path'\n4. ALTER COLUMN storage_kind SET NOT NULL\n5. ALTER COLUMN original_path DROP NOT NULL\n6. The CHECK constraint (using the same atomic pattern from volt-66c if that's\n done first, otherwise the existing DROP+ADD pattern)\n\nAlso update the CREATE TABLE IF NOT EXISTS statement for large_files in user-context.ts\nto include storage_kind in the column list for NEW schemas, matching the definition\nin db.ts.\n\n## How to find the code\n\nLook at ensureUserSchema in user-context.ts. It has a large SQL template string that\ncreates all LCM tables. Find the large_files CREATE TABLE and add migrations after it.\n\nThen look at db.ts for the exact migration SQL to replicate.\n\n## Scope boundaries\n\n- Only modify user-context.ts.\n- Mirror the migration logic from db.ts — do not invent new migration patterns.\n- Do NOT modify db.ts itself.\n\n## Acceptance criteria\n\n- user-context.ts CREATE TABLE for large_files includes storage_kind column.\n- ALTER TABLE migrations for storage_kind exist in ensureUserSchema.\n- The backfill logic matches db.ts exactly.\n- Typecheck passes.","priority":"0","title":"Add storage_kind migration to multi-tenant user schemas","type":"task"}} +{"type":"create","timestamp":"2026-02-25T23:22:26.72798Z","issue_id":"volt-640","payload":{"description":"## Problem (identified by independent Codex review)\n\ngetLargeFileContent in db.ts handles two storage_kind values explicitly:\n- inline_text: returns content from the DB column\n- path: reads from disk with maxBytes slicing\n\nBut inline_binary is never handled. The code falls through to the backward-compat\nfallback (which we're deleting in volt-d7e), and then to return null.\n\nWhen getLargeFileContent returns null for an inline_binary record, lcm_read.ts\nchecks largeFileExists (which returns true — the row exists), and then tells the\nmodel: \"File record exists but content could not be read — the backing file may\nhave been moved or deleted.\"\n\nThis is wrong. The file wasn't moved or deleted — it's a binary blob stored in the\nDB. The error message is misleading.\n\n## Required change\n\n### In db.ts getLargeFileContent:\n\nAfter the inline_text branch and before the path branch, add an inline_binary branch:\n\n if (row.storage_kind === \"inline_binary\") {\n // Binary content cannot be returned as text. Return null so the caller\n // can give an appropriate error message.\n return null\n }\n\nThis is a no-op for retrieval (we can't usefully return binary as text), but it\nmakes the control flow explicit.\n\n### In lcm-read.ts:\n\nBefore the generic \"backing file may have been moved\" error, add a check for\nbinary content. You'll need to either:\n\n(a) Have getLargeFileContent return a discriminated result that says WHY it's null\n (not found vs binary vs disk missing), OR\n\n(b) Query getLargeFile (which returns the full row including storage_kind) and\n check storage_kind before giving the error message.\n\nOption (b) is simpler. The code already calls largeFileExists when result is null.\nInstead, call getLargeFile and check storage_kind:\n\n if (!result) {\n const file = await LcmDb.getLargeFile(fileId, conversationId ?? undefined)\n if (file) {\n if (file.storage_kind === \"inline_binary\") {\n return {\n title: `LCM read: ${fileId}`,\n metadata: { fileId, found: true, truncated: false, totalSize: 0 },\n output: `File \"${fileId}\" contains binary content (${file.mime_type}) which cannot be displayed as text.\\n\\nUse lcm_describe with \"${fileId}\" for metadata about this file.`,\n }\n }\n // Path-backed file whose disk content is missing\n return {\n title: `LCM read: ${fileId}`,\n metadata: { fileId, found: true, truncated: false, totalSize: 0 },\n output: `File record exists but content could not be read — the backing file may have been moved or deleted.\\n\\nUse lcm_describe with \"${fileId}\" for metadata and exploration summary.`,\n }\n }\n // Truly not found\n return { ... not found response ... }\n }\n\n## Dependencies\n\nThis task depends on volt-d7e (delete backward-compat fallback), since that task\nremoves the code path that inline_binary currently falls through to.\n\n## Scope boundaries\n\n- Do NOT attempt to decode or display binary content.\n- Do NOT change the inline_text or path branches in getLargeFileContent.\n- Do NOT change lcm_describe behavior.\n\n## Acceptance criteria\n\n- An inline_binary record does NOT produce \"backing file moved or deleted\" error.\n- An inline_binary record produces a clear message saying it's binary content.\n- A path-backed record with missing disk file still produces the \"moved or deleted\" message.\n- Typecheck passes.","priority":"2","title":"Handle inline_binary explicitly in getLargeFileContent and lcm_read","type":"task"}} +{"type":"rename","timestamp":"2026-02-25T23:22:35.834246Z","issue_id":"volt-4cf","payload":{"new_id":"volt-156.1"}} +{"type":"dep_add","timestamp":"2026-02-25T23:22:35.834246Z","issue_id":"volt-156.1","payload":{"dep_type":"parent-child","depends_on":"volt-156"}} +{"type":"rename","timestamp":"2026-02-25T23:22:35.85787Z","issue_id":"volt-59c","payload":{"new_id":"volt-156.2"}} +{"type":"dep_add","timestamp":"2026-02-25T23:22:35.85787Z","issue_id":"volt-156.2","payload":{"dep_type":"parent-child","depends_on":"volt-156"}} +{"type":"rename","timestamp":"2026-02-25T23:22:35.882562Z","issue_id":"volt-52b","payload":{"new_id":"volt-156.3"}} +{"type":"dep_add","timestamp":"2026-02-25T23:22:35.882562Z","issue_id":"volt-156.3","payload":{"dep_type":"parent-child","depends_on":"volt-156"}} +{"type":"rename","timestamp":"2026-02-25T23:22:35.90705Z","issue_id":"volt-d7e","payload":{"new_id":"volt-156.4"}} +{"type":"dep_add","timestamp":"2026-02-25T23:22:35.90705Z","issue_id":"volt-156.4","payload":{"dep_type":"parent-child","depends_on":"volt-156"}} +{"type":"rename","timestamp":"2026-02-25T23:22:35.934744Z","issue_id":"volt-66c","payload":{"new_id":"volt-156.5"}} +{"type":"dep_add","timestamp":"2026-02-25T23:22:35.934744Z","issue_id":"volt-156.5","payload":{"dep_type":"parent-child","depends_on":"volt-156"}} +{"type":"rename","timestamp":"2026-02-25T23:22:35.961712Z","issue_id":"volt-44e","payload":{"new_id":"volt-156.6"}} +{"type":"dep_add","timestamp":"2026-02-25T23:22:35.961712Z","issue_id":"volt-156.6","payload":{"dep_type":"parent-child","depends_on":"volt-156"}} +{"type":"rename","timestamp":"2026-02-25T23:22:35.989286Z","issue_id":"volt-986","payload":{"new_id":"volt-156.7"}} +{"type":"dep_add","timestamp":"2026-02-25T23:22:35.989286Z","issue_id":"volt-156.7","payload":{"dep_type":"parent-child","depends_on":"volt-156"}} +{"type":"rename","timestamp":"2026-02-25T23:22:36.015665Z","issue_id":"volt-727","payload":{"new_id":"volt-156.8"}} +{"type":"dep_add","timestamp":"2026-02-25T23:22:36.015665Z","issue_id":"volt-156.8","payload":{"dep_type":"parent-child","depends_on":"volt-156"}} +{"type":"rename","timestamp":"2026-02-25T23:22:36.0467Z","issue_id":"volt-640","payload":{"new_id":"volt-156.9"}} +{"type":"dep_add","timestamp":"2026-02-25T23:22:36.0467Z","issue_id":"volt-156.9","payload":{"dep_type":"parent-child","depends_on":"volt-156"}} +{"type":"dep_add","timestamp":"2026-02-25T23:22:42.517318Z","issue_id":"volt-156.9","payload":{"dep_type":"blocks","depends_on":"volt-156.4"}} +{"type":"dep_add","timestamp":"2026-02-25T23:22:42.546838Z","issue_id":"volt-156","payload":{"dep_type":"blocks","depends_on":"volt-156.1"}} +{"type":"dep_add","timestamp":"2026-02-25T23:22:42.577167Z","issue_id":"volt-156","payload":{"dep_type":"blocks","depends_on":"volt-156.2"}} +{"type":"dep_add","timestamp":"2026-02-25T23:22:42.607045Z","issue_id":"volt-156","payload":{"dep_type":"blocks","depends_on":"volt-156.3"}} +{"type":"dep_add","timestamp":"2026-02-25T23:22:42.636317Z","issue_id":"volt-156","payload":{"dep_type":"blocks","depends_on":"volt-156.4"}} +{"type":"dep_add","timestamp":"2026-02-25T23:22:42.668198Z","issue_id":"volt-156","payload":{"dep_type":"blocks","depends_on":"volt-156.5"}} +{"type":"dep_add","timestamp":"2026-02-25T23:22:42.697979Z","issue_id":"volt-156","payload":{"dep_type":"blocks","depends_on":"volt-156.6"}} +{"type":"dep_add","timestamp":"2026-02-25T23:22:42.728294Z","issue_id":"volt-156","payload":{"dep_type":"blocks","depends_on":"volt-156.7"}} +{"type":"dep_add","timestamp":"2026-02-25T23:22:42.75862Z","issue_id":"volt-156","payload":{"dep_type":"blocks","depends_on":"volt-156.8"}} +{"type":"dep_add","timestamp":"2026-02-25T23:22:42.792294Z","issue_id":"volt-156","payload":{"dep_type":"blocks","depends_on":"volt-156.9"}} +{"type":"status_update","timestamp":"2026-02-25T23:31:48.24282Z","issue_id":"volt-156.6","payload":{"status":"in_progress"}} +{"type":"status_update","timestamp":"2026-02-25T23:31:53.700631Z","issue_id":"volt-156.1","payload":{"status":"in_progress"}} +{"type":"close","timestamp":"2026-02-25T23:32:31.153851Z","issue_id":"volt-156.6","payload":{}} +{"type":"close","timestamp":"2026-02-25T23:33:02.263939Z","issue_id":"volt-156.1","payload":{}} +{"type":"create","timestamp":"2026-02-25T23:35:15.248281Z","issue_id":"volt-da4","payload":{"description":"## Background\n\nCommit 70a5534e0 (\"refactor: replace upstream upgrade sources with voltropy install\nscript\") was reverted from the fix/lcm-inline-content-path-resolution branch because\nit was unrelated to the LCM bug fix. The commit is preserved in git history.\n\nThat commit did the right thing architecturally — it replaced ~170 lines of\npackage-manager sniffing (npm, brew, choco, scoop, pnpm, bun, yarn) and their\nrespective version-check endpoints (GitHub releases, npm registry, Homebrew formulae,\nChocolatey API, Scoop manifests) with a single install script and version endpoint.\n\nBut it pointed at Voltropy infrastructure (voltropy.com/install, api.voltropy.com).\nVoltropy is handing the product off to Martian Engineering, so the endpoints need\nto be updated before this lands.\n\n## Required change\n\n1. Cherry-pick 70a5534e0 onto a new branch (off dev, after the LCM fix merges).\n2. Replace all Voltropy references with Martian Engineering equivalents:\n - `https://www.voltropy.com/install` → whatever the Martian Engineering install\n script URL will be\n - `https://api.voltropy.com/v1/bootstrap/download-url` → whatever the Martian\n Engineering version API will be\n - Any other voltropy.com references in the cherry-picked code\n3. Update the env var from VOLT_VERSION to whatever is appropriate (check if the\n new install script expects the same var name).\n\n## Open questions (must be answered before implementation)\n\n- What are the Martian Engineering URLs for the install script and version API?\n- Is the install script API-compatible with the Voltropy one, or does it need\n adaptation?\n- Do we want to keep the \"curl | sh\" pattern or use something else?\n\n## Reference\n\n- Original commit: 70a5534e0\n- View it: git show 70a5534e0\n- Revert commit: b9e12ed5f (on fix/lcm-inline-content-path-resolution)\n\n## Acceptance criteria\n\n- Separate PR on its own branch, not bundled with anything else.\n- All voltropy.com references replaced with Martian Engineering endpoints.\n- The upgrade command works end-to-end (or is clearly marked as needing\n infrastructure that doesn't exist yet).\n- Typecheck passes.","priority":"3","title":"Restore upgrade refactor as separate PR with Martian Engineering branding","type":"task"}} +{"type":"status_update","timestamp":"2026-02-25T23:47:00.068803Z","issue_id":"volt-156.4","payload":{"status":"in_progress"}} +{"type":"close","timestamp":"2026-02-25T23:47:47.03211Z","issue_id":"volt-156.4","payload":{}} +{"type":"create","timestamp":"2026-02-25T23:52:14.79731Z","issue_id":"volt-5eb","payload":{"description":"## Context\n\nLongMemEval (https://github.com/xiaowu0162/LongMemEval) is a benchmark for\nevaluating chat assistants on long-term interactive memory. Published at ICLR 2025.\nPaper: \"Benchmarking Chat Assistants on Long-Term Interactive Memory.\"\n\nVolt's LCM (Lossless Context Management) is specifically designed to preserve and\nretrieve context across long conversations — summaries, large file storage,\nconversation ancestry. This benchmark is a natural fit for measuring how well\nLCM actually performs at long-term recall.\n\n## Required work\n\n1. Read the LongMemEval repo and understand the evaluation protocol, task categories,\n and metrics.\n2. Determine what adapter/harness is needed to run Volt against the benchmark.\n3. Implement the adapter.\n4. Run the eval and report results.\n\n## Open questions\n\n- What conversation lengths does the benchmark test? Does it exceed Volt's\n compaction thresholds?\n- Does the benchmark require multi-session memory or single-session only?\n- What are baseline scores for comparable systems?\n\n## Reference\n\n- Repo: https://github.com/xiaowu0162/LongMemEval\n- Paper venue: ICLR 2025","priority":"4","title":"Run LongMemEval benchmark against Volt's LCM","type":"task"}} diff --git a/.pebbles/pebbles.db b/.pebbles/pebbles.db new file mode 100644 index 000000000..0266cde91 Binary files /dev/null and b/.pebbles/pebbles.db differ diff --git a/packages/voltcode/src/agent/agent.ts b/packages/voltcode/src/agent/agent.ts index cbd47d4c8..a7b81837d 100644 --- a/packages/voltcode/src/agent/agent.ts +++ b/packages/voltcode/src/agent/agent.ts @@ -156,6 +156,7 @@ export namespace Agent { lcm_describe: "allow", lcm_expand: "allow", lcm_grep: "allow", + lcm_read: "allow", external_directory: { [Truncate.DIR]: "allow", [Truncate.GLOB]: "allow", diff --git a/packages/voltcode/src/cli/cmd/tui/routes/session/index.tsx b/packages/voltcode/src/cli/cmd/tui/routes/session/index.tsx index e2b65bee1..c8859c5e5 100644 --- a/packages/voltcode/src/cli/cmd/tui/routes/session/index.tsx +++ b/packages/voltcode/src/cli/cmd/tui/routes/session/index.tsx @@ -48,6 +48,7 @@ import type { TasksTool } from "@/tool/tasks" import type { QuestionTool } from "@/tool/question" import type { LcmExpandTool } from "@/tool/lcm-expand" import type { LcmGrepTool } from "@/tool/lcm-grep" +import type { LcmReadTool } from "@/tool/lcm-read" import { useKeyboard, useRenderer, useTerminalDimensions, type JSX } from "@opentui/solid" import { useSDK } from "@tui/context/sdk" import { useCommandDialog } from "@tui/component/dialog-command" @@ -87,6 +88,8 @@ import "opentui-spinner/solid" addDefaultParsers(parsers.parsers) +const LCM_INTERNAL_TOOLS = ["lcm_expand", "lcm_grep", "lcm_read"] + class CustomSpeedScroll implements ScrollAcceleration { constructor(private speed: number) {} @@ -1557,9 +1560,6 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las const { theme } = useTheme() const ctx = use() - // Internal LCM tools that are always hidden (not in dev mode) - const LCM_INTERNAL_TOOLS = ["lcm_expand", "lcm_grep"] - // Check if there are hidden tools with no visible content const hasHiddenToolsOnly = createMemo(() => { // Check if there are any tool parts @@ -1582,6 +1582,7 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las .filter((p) => p.type === "tool") .every((p) => { const toolPart = p as ToolPart + // See LCM_INTERNAL_TOOLS definition above // Internal LCM tools are always hidden when not in dev mode if (!ctx.devMode() && LCM_INTERNAL_TOOLS.includes(toolPart.tool)) return true // Other tools are hidden when showDetails=false and completed @@ -1835,12 +1836,10 @@ function ToolPart(props: { last: boolean; part: ToolPart; message: AssistantMess const ctx = use() const sync = useSync() - // Internal LCM tools that should only be visible in dev mode - const LCM_INTERNAL_TOOLS = ["lcm_expand", "lcm_grep"] - // Hide tool if showDetails is false and tool completed successfully // Hide internal LCM tools (lcm_expand, lcm_grep) when not in dev mode const shouldHide = createMemo(() => { + // See LCM_INTERNAL_TOOLS definition above // Hide internal LCM tools when not in dev mode if (!ctx.devMode() && LCM_INTERNAL_TOOLS.includes(props.part.tool)) return true if (ctx.showDetails()) return false @@ -1889,6 +1888,9 @@ function ToolPart(props: { last: boolean; part: ToolPart; message: AssistantMess + + + @@ -2238,6 +2240,14 @@ function LcmGrep(props: ToolProps) { ) } +function LcmRead(props: ToolProps) { + return ( + + LCM read {props.input.file_id} + + ) +} + function Grep(props: ToolProps) { return ( diff --git a/packages/voltcode/src/cli/cmd/upgrade.ts b/packages/voltcode/src/cli/cmd/upgrade.ts index 0822b522d..b8938c4af 100644 --- a/packages/voltcode/src/cli/cmd/upgrade.ts +++ b/packages/voltcode/src/cli/cmd/upgrade.ts @@ -7,16 +7,41 @@ export const UpgradeCommand = { command: "upgrade [target]", describe: "upgrade volt to the latest or a specific version", builder: (yargs: Argv) => { - return yargs.positional("target", { - describe: "version to upgrade to, for ex '0.1.48' or 'v0.1.48'", - type: "string", - }) + return yargs + .positional("target", { + describe: "version to upgrade to, for ex '0.1.48' or 'v0.1.48'", + type: "string", + }) + .option("method", { + alias: "m", + describe: "installation method to use", + type: "string", + choices: ["curl", "npm", "pnpm", "bun", "brew", "choco", "scoop"], + }) }, - handler: async (args: { target?: string }) => { + handler: async (args: { target?: string; method?: string }) => { UI.empty() UI.println(UI.logo(" ")) UI.empty() prompts.intro("Upgrade") + const detectedMethod = await Installation.method() + const method = (args.method as Installation.Method) ?? detectedMethod + if (method === "unknown") { + prompts.log.error(`volt is installed to ${process.execPath} and may be managed by a package manager`) + const install = await prompts.select({ + message: "Install anyways?", + options: [ + { label: "Yes", value: true }, + { label: "No", value: false }, + ], + initialValue: false, + }) + if (!install) { + prompts.outro("Done") + return + } + } + prompts.log.info("Using method: " + method) const target = args.target ? args.target.replace(/^v/, "") : await Installation.latest() if (Installation.VERSION === target) { @@ -28,11 +53,16 @@ export const UpgradeCommand = { prompts.log.info(`From ${Installation.VERSION} → ${target}`) const spinner = prompts.spinner() spinner.start("Upgrading...") - const err = await Installation.upgrade("curl", target).catch((err) => err) + const err = await Installation.upgrade(method, target).catch((err) => err) if (err) { spinner.stop("Upgrade failed", 1) if (err instanceof Installation.UpgradeFailedError) { - prompts.log.error(err.data.stderr) + // necessary because choco only allows install/upgrade in elevated terminals + if (method === "choco" && err.data.stderr.includes("not running from an elevated command shell")) { + prompts.log.error("Please run the terminal as Administrator and try again") + } else { + prompts.log.error(err.data.stderr) + } } else if (err instanceof Error) prompts.log.error(err.message) prompts.outro("Done") return diff --git a/packages/voltcode/src/cli/upgrade.ts b/packages/voltcode/src/cli/upgrade.ts index 1696e1a9a..c0c2327f0 100644 --- a/packages/voltcode/src/cli/upgrade.ts +++ b/packages/voltcode/src/cli/upgrade.ts @@ -5,7 +5,8 @@ import { Installation } from "@/installation" export async function upgrade() { const config = await Config.global() - const latest = await Installation.latest().catch(() => {}) + const method = await Installation.method() + const latest = await Installation.latest(method).catch(() => {}) if (!latest) return if (Installation.VERSION === latest) return @@ -17,7 +18,8 @@ export async function upgrade() { return } - await Installation.upgrade("curl", latest) + if (method === "unknown") return + await Installation.upgrade(method, latest) .then(() => Bus.publish(Installation.Event.Updated, { version: latest })) .catch(() => {}) } diff --git a/packages/voltcode/src/installation/index.ts b/packages/voltcode/src/installation/index.ts index 78e2c8ecb..70f6c0415 100644 --- a/packages/voltcode/src/installation/index.ts +++ b/packages/voltcode/src/installation/index.ts @@ -1,8 +1,10 @@ import { BusEvent } from "@/bus/bus-event" +import path from "path" import { $ } from "bun" import z from "zod" import { NamedError } from "@opencode-ai/util/error" import { Log } from "../util/log" +import { iife } from "@/util/iife" import { Flag } from "../flag/flag" declare global { @@ -56,7 +58,59 @@ export namespace Installation { } export async function method() { - return "curl" as const + if (process.execPath.includes(path.join(".voltcode", "bin"))) return "curl" + if (process.execPath.includes(path.join(".local", "bin"))) return "curl" + const exec = process.execPath.toLowerCase() + + const checks = [ + { + name: "npm" as const, + command: () => $`npm list -g --depth=0`.throws(false).quiet().text(), + }, + { + name: "yarn" as const, + command: () => $`yarn global list`.throws(false).quiet().text(), + }, + { + name: "pnpm" as const, + command: () => $`pnpm list -g --depth=0`.throws(false).quiet().text(), + }, + { + name: "bun" as const, + command: () => $`bun pm ls -g`.throws(false).quiet().text(), + }, + { + name: "brew" as const, + command: () => $`brew list --formula voltcode`.throws(false).quiet().text(), + }, + { + name: "scoop" as const, + command: () => $`scoop list voltcode`.throws(false).quiet().text(), + }, + { + name: "choco" as const, + command: () => $`choco list --limit-output voltcode`.throws(false).quiet().text(), + }, + ] + + checks.sort((a, b) => { + const aMatches = exec.includes(a.name) + const bMatches = exec.includes(b.name) + if (aMatches && !bMatches) return -1 + if (!aMatches && bMatches) return 1 + return 0 + }) + + for (const check of checks) { + const output = await check.command() + const installedName = + check.name === "brew" || check.name === "choco" || check.name === "scoop" ? "voltcode" : "voltcode-ai" + if (output.includes(installedName)) { + return check.name + } + } + + return "unknown" } export const UpgradeFailedError = NamedError.create( @@ -66,17 +120,58 @@ export namespace Installation { }), ) - export async function upgrade(_method: Method, target: string) { - const result = await $`curl -fsSL https://www.voltropy.com/install | sh`.env({ - ...process.env, - VOLT_VERSION: target, - }).quiet().throws(false) + async function getBrewFormula() { + const tapFormula = await $`brew list --formula anomalyco/tap/voltcode`.throws(false).quiet().text() + if (tapFormula.includes("voltcode")) return "anomalyco/tap/voltcode" + const coreFormula = await $`brew list --formula voltcode`.throws(false).quiet().text() + if (coreFormula.includes("voltcode")) return "voltcode" + return "voltcode" + } + + export async function upgrade(method: Method, target: string) { + let cmd + switch (method) { + case "curl": + cmd = $`curl -fsSL https://opencode.ai/install | bash`.env({ + ...process.env, + VERSION: target, + }) + break + case "npm": + cmd = $`npm install -g voltcode-ai@${target}` + break + case "pnpm": + cmd = $`pnpm install -g voltcode-ai@${target}` + break + case "bun": + cmd = $`bun install -g voltcode-ai@${target}` + break + case "brew": { + const formula = await getBrewFormula() + cmd = $`brew upgrade ${formula}`.env({ + HOMEBREW_NO_AUTO_UPDATE: "1", + ...process.env, + }) + break + } + case "choco": + cmd = $`echo Y | choco upgrade voltcode --version=${target}` + break + case "scoop": + cmd = $`scoop install voltcode@${target}` + break + default: + throw new Error(`Unknown method: ${method}`) + } + const result = await cmd.quiet().throws(false) if (result.exitCode !== 0) { + const stderr = method === "choco" ? "not running from an elevated command shell" : result.stderr.toString("utf8") throw new UpgradeFailedError({ - stderr: result.stderr.toString("utf8"), + stderr: stderr, }) } log.info("upgraded", { + method, target, stdout: result.stdout.toString(), stderr: result.stderr.toString(), @@ -88,16 +183,64 @@ export namespace Installation { export const CHANNEL = typeof VOLTCODE_CHANNEL === "string" ? VOLTCODE_CHANNEL : "local" export const USER_AGENT = `voltcode/${CHANNEL}/${VERSION}/${Flag.VOLTCODE_CLIENT}` - export async function latest(_installMethod?: Method) { - const platform = process.platform === "darwin" ? "darwin" : "linux" - const arch = process.arch === "arm64" ? "arm64" : "amd64" - const res = await fetch("https://api.voltropy.com/v1/bootstrap/download-url", { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ platform, arch, version: "latest" }), - }) - if (!res.ok) throw new Error(res.statusText) - const data: any = await res.json() - return (data.version as string).replace(/^v/, "") + export async function latest(installMethod?: Method) { + const detectedMethod = installMethod || (await method()) + + if (detectedMethod === "brew") { + const formula = await getBrewFormula() + if (formula === "voltcode") { + return fetch("https://formulae.brew.sh/api/formula/opencode.json") + .then((res) => { + if (!res.ok) throw new Error(res.statusText) + return res.json() + }) + .then((data: any) => data.versions.stable) + } + } + + if (detectedMethod === "npm" || detectedMethod === "bun" || detectedMethod === "pnpm") { + const registry = await iife(async () => { + const r = (await $`npm config get registry`.quiet().nothrow().text()).trim() + const reg = r || "https://registry.npmjs.org" + return reg.endsWith("/") ? reg.slice(0, -1) : reg + }) + const channel = CHANNEL + return fetch(`${registry}/voltcode-ai/${channel}`) + .then((res) => { + if (!res.ok) throw new Error(res.statusText) + return res.json() + }) + .then((data: any) => data.version) + } + + if (detectedMethod === "choco") { + return fetch( + "https://community.chocolatey.org/api/v2/Packages?$filter=Id%20eq%20%27opencode%27%20and%20IsLatestVersion&$select=Version", + { headers: { Accept: "application/json;odata=verbose" } }, + ) + .then((res) => { + if (!res.ok) throw new Error(res.statusText) + return res.json() + }) + .then((data: any) => data.d.results[0].Version) + } + + if (detectedMethod === "scoop") { + return fetch("https://raw.githubusercontent.com/ScoopInstaller/Main/master/bucket/opencode.json", { + headers: { Accept: "application/json" }, + }) + .then((res) => { + if (!res.ok) throw new Error(res.statusText) + return res.json() + }) + .then((data: any) => data.version) + } + + return fetch("https://api.github.com/repos/anomalyco/opencode/releases/latest") + .then((res) => { + if (!res.ok) throw new Error(res.statusText) + return res.json() + }) + .then((data: any) => data.tag_name.replace(/^v/, "")) } } diff --git a/packages/voltcode/src/session/large-tool-output.ts b/packages/voltcode/src/session/large-tool-output.ts index 7d31b95ed..19c19c565 100644 --- a/packages/voltcode/src/session/large-tool-output.ts +++ b/packages/voltcode/src/session/large-tool-output.ts @@ -111,7 +111,7 @@ export async function handleLargeToolOutput(input: { preview, hasMore ? `\n...[${tokenCount - Token.estimate(preview)} more tokens]` : "", ``, - `To access the full output, use the Read tool with the original file path, or use lcm_describe with file_id "${fileId}" to see metadata about this stored output.`, + `The full output is stored in LCM. To retrieve it, spawn an explore sub-agent: Task(subagent_type="explore", prompt="Use lcm_read on ${fileId} to find "). Use lcm_describe for metadata only. Do NOT attempt to read this content with the Read tool — it is stored in the LCM database, not as a file on disk.`, ].join("\n") return { diff --git a/packages/voltcode/src/session/lcm/db.ts b/packages/voltcode/src/session/lcm/db.ts index 4e770c536..dc79a0099 100644 --- a/packages/voltcode/src/session/lcm/db.ts +++ b/packages/voltcode/src/session/lcm/db.ts @@ -1,3 +1,4 @@ +import { createHash } from "crypto" import postgres from "postgres" import z from "zod" import { Log } from "@/util/log" @@ -108,6 +109,7 @@ export namespace LcmDb { export const LargeFile = z.object({ file_id: z.string(), conversation_id: z.number(), + storage_kind: z.enum(["path", "inline_text", "inline_binary"]), original_path: z.string().nullable(), mime_type: z.string(), content: z.string().nullable(), @@ -473,17 +475,17 @@ export namespace LcmDb { CREATE INDEX IF NOT EXISTS ctx_items_message_idx ON context_items(message_id); -- 7) Large files (for files too big to fit in context) - -- Stores path references to files on disk; content is read on demand + -- Stores path-backed files and inline payloads (text/binary) under one ID space. CREATE TABLE IF NOT EXISTS large_files ( file_id text PRIMARY KEY, conversation_id bigint NOT NULL REFERENCES conversations(conversation_id) ON DELETE CASCADE, - original_path text NOT NULL, -- Path to the file on disk (required for path-based storage) + storage_kind text NOT NULL DEFAULT 'path', -- path | inline_text | inline_binary + original_path text, -- Path to the file on disk (for storage_kind='path') mime_type text NOT NULL, - content text, -- Legacy: kept for backwards compatibility, not used for new files - binary_content bytea, -- Legacy: kept for backwards compatibility, not used for new files + content text, -- Inline text payload + binary_content bytea, -- Inline binary payload token_count bigint NOT NULL, -- BIGINT to support files with billions of tokens created_at timestamptz NOT NULL DEFAULT now() - -- Removed content check constraint to allow path-only storage ); -- Migration: change token_count from integer to bigint if needed @@ -503,8 +505,80 @@ export namespace LcmDb { END IF; END $$; - -- Migration: make original_path NOT NULL for new records (existing NULL paths grandfathered) - -- Note: Can't add NOT NULL constraint if existing rows have NULL, so we skip this + -- Migration: add storage_kind for explicit payload mode (path/inline_text/inline_binary) + DO $$ BEGIN + ALTER TABLE large_files ADD COLUMN storage_kind text; + EXCEPTION WHEN duplicate_column THEN NULL; END $$; + + -- Migration: backfill storage_kind from existing data + UPDATE large_files + SET storage_kind = CASE + WHEN content IS NOT NULL THEN 'inline_text' + WHEN binary_content IS NOT NULL THEN 'inline_binary' + ELSE 'path' + END + WHERE storage_kind IS NULL; + + -- Migration: default + NOT NULL for storage_kind + DO $$ BEGIN + ALTER TABLE large_files ALTER COLUMN storage_kind SET DEFAULT 'path'; + EXCEPTION WHEN others THEN NULL; END $$; + DO $$ BEGIN + ALTER TABLE large_files ALTER COLUMN storage_kind SET NOT NULL; + EXCEPTION WHEN others THEN NULL; END $$; + + -- Migration: original_path is optional for inline payloads + DO $$ BEGIN + ALTER TABLE large_files ALTER COLUMN original_path DROP NOT NULL; + EXCEPTION WHEN others THEN NULL; END $$; + + -- Migration: enforce coherent large_files row shape by storage_kind + DO $$ DECLARE + current_def text; + normalized_current_def text; + normalized_desired_def text := regexp_replace( + lower( + 'CHECK ( + (storage_kind = ''path'' AND original_path IS NOT NULL AND content IS NULL AND binary_content IS NULL) OR + (storage_kind = ''inline_text'' AND content IS NOT NULL AND binary_content IS NULL) OR + (storage_kind = ''inline_binary'' AND binary_content IS NOT NULL AND content IS NULL) + )' + ), + '[[:space:]()]', + '', + 'g' + ); + BEGIN + SELECT pg_get_constraintdef(oid) + INTO current_def + FROM pg_constraint + WHERE conname = 'large_files_storage_shape_check' + AND conrelid = 'large_files'::regclass; + + IF current_def IS NOT NULL THEN + normalized_current_def := regexp_replace( + lower(replace(current_def, '::text', '')), + '[[:space:]()]', + '', + 'g' + ); + END IF; + + IF current_def IS NULL OR normalized_current_def != normalized_desired_def THEN + IF current_def IS NOT NULL THEN + EXECUTE 'ALTER TABLE large_files DROP CONSTRAINT large_files_storage_shape_check'; + END IF; + + BEGIN + ALTER TABLE large_files + ADD CONSTRAINT large_files_storage_shape_check CHECK ( + (storage_kind = 'path' AND original_path IS NOT NULL AND content IS NULL AND binary_content IS NULL) OR + (storage_kind = 'inline_text' AND content IS NOT NULL AND binary_content IS NULL) OR + (storage_kind = 'inline_binary' AND binary_content IS NOT NULL AND content IS NULL) + ); + EXCEPTION WHEN duplicate_object THEN NULL; END; + END IF; + END $$; CREATE INDEX IF NOT EXISTS large_files_conv_idx ON large_files(conversation_id); CREATE INDEX IF NOT EXISTS large_files_path_idx ON large_files(original_path); @@ -1650,8 +1724,7 @@ export namespace LcmDb { * Generate a deterministic file ID based on content hash and conversation ID. */ export function generateFileId(conversationId: number, content: string): string { - const hash = require("crypto") - .createHash("sha256") + const hash = createHash("sha256") .update(`${conversationId}:${content}`) .digest("hex") .slice(0, 16) @@ -1663,8 +1736,7 @@ export namespace LcmDb { * Generate a deterministic file ID for binary content scoped to a conversation. */ export function generateBinaryFileId(conversationId: number, content: Uint8Array): string { - const hash = require("crypto") - .createHash("sha256") + const hash = createHash("sha256") .update(`${conversationId}:`) .update(content) .digest("hex") @@ -1693,8 +1765,8 @@ export namespace LcmDb { const fileId = generateFileId(input.conversationId, input.content) await conn` - INSERT INTO large_files (file_id, conversation_id, original_path, mime_type, content, token_count) - VALUES (${fileId}, ${input.conversationId}, ${input.originalPath ?? null}, ${input.mimeType}, ${escNull(input.content)}, ${input.tokenCount}) + INSERT INTO large_files (file_id, conversation_id, storage_kind, original_path, mime_type, content, token_count) + VALUES (${fileId}, ${input.conversationId}, 'inline_text', ${input.originalPath ?? null}, ${input.mimeType}, ${escNull(input.content)}, ${input.tokenCount}) ON CONFLICT (file_id) DO NOTHING ` log.debug("inserted large file", { fileId, conversationId: input.conversationId, originalPath: input.originalPath }) @@ -1724,18 +1796,19 @@ export namespace LcmDb { const tokenCount = LargeFileThreshold.estimateTokenCount(input.content) const fileId = generateFileId(input.conversationId, input.content) - // Use the label as original_path for identification (e.g., "user_prompt_12345") - const originalPath = input.label ?? `inline_content_${Date.now()}` + // Don't store labels as original_path — that field is for actual file paths on disk. + // Inline content is stored in the content column and read from there. + const originalPath = null await conn` - INSERT INTO large_files (file_id, conversation_id, original_path, mime_type, content, binary_content, token_count) - VALUES (${fileId}, ${input.conversationId}, ${originalPath}, ${input.mimeType ?? "text/plain"}, ${escNull(input.content)}, NULL, ${tokenCount}) + INSERT INTO large_files (file_id, conversation_id, storage_kind, original_path, mime_type, content, binary_content, token_count) + VALUES (${fileId}, ${input.conversationId}, 'inline_text', ${originalPath}, ${input.mimeType ?? "text/plain"}, ${escNull(input.content)}, NULL, ${tokenCount}) ON CONFLICT (file_id) DO NOTHING ` log.debug("inserted large text content", { fileId, conversationId: input.conversationId, - label: originalPath, + label: input.label, tokenCount, contentLength: input.content.length, }) @@ -1779,8 +1852,8 @@ export namespace LcmDb { // Store only the path reference, never the content // Convert bigint to string for postgres since it handles numeric types correctly await conn` - INSERT INTO large_files (file_id, conversation_id, original_path, mime_type, content, binary_content, token_count) - VALUES (${fileId}, ${input.conversationId}, ${input.filePath}, ${input.mimeType}, NULL, NULL, ${tokenCount.toString()}) + INSERT INTO large_files (file_id, conversation_id, storage_kind, original_path, mime_type, content, binary_content, token_count) + VALUES (${fileId}, ${input.conversationId}, 'path', ${input.filePath}, ${input.mimeType}, NULL, NULL, ${tokenCount.toString()}) ON CONFLICT (file_id) DO NOTHING ` log.debug("inserted large file path reference", { @@ -1829,8 +1902,8 @@ export namespace LcmDb { const fileId = generateBinaryFileId(input.conversationId, input.binaryContent) await conn` - INSERT INTO large_files (file_id, conversation_id, original_path, mime_type, binary_content, token_count) - VALUES (${fileId}, ${input.conversationId}, ${input.originalPath ?? null}, ${input.mimeType}, ${input.binaryContent}, ${input.tokenCount}) + INSERT INTO large_files (file_id, conversation_id, storage_kind, original_path, mime_type, binary_content, token_count) + VALUES (${fileId}, ${input.conversationId}, 'inline_binary', ${input.originalPath ?? null}, ${input.mimeType}, ${input.binaryContent}, ${input.tokenCount}) ON CONFLICT (file_id) DO NOTHING ` log.debug("inserted large binary file", { @@ -1857,7 +1930,7 @@ export namespace LcmDb { // If no conversationId provided, just do a simple lookup (backwards compatible) if (conversationId === undefined) { const rows = await conn` - SELECT file_id, conversation_id, original_path, mime_type, content, binary_content, token_count, created_at, exploration_summary, explorer_used + SELECT file_id, conversation_id, storage_kind, original_path, mime_type, content, binary_content, token_count, created_at, exploration_summary, explorer_used FROM large_files WHERE file_id = ${fileId} ` @@ -1875,7 +1948,7 @@ export namespace LcmDb { FROM conversations c JOIN ancestors a ON c.conversation_id = a.parent_conversation_id ) - SELECT lf.file_id, lf.conversation_id, lf.original_path, lf.mime_type, lf.content, lf.binary_content, lf.token_count, lf.created_at + SELECT lf.file_id, lf.conversation_id, lf.storage_kind, lf.original_path, lf.mime_type, lf.content, lf.binary_content, lf.token_count, lf.created_at, lf.exploration_summary, lf.explorer_used FROM large_files lf JOIN ancestors a ON lf.conversation_id = a.conversation_id WHERE lf.file_id = ${fileId} @@ -1904,12 +1977,12 @@ export namespace LcmDb { // If no conversationId provided, just do a simple lookup (backwards compatible) const rows = conversationId === undefined - ? await conn<{ content: string | null; original_path: string | null }[]>` - SELECT content, original_path + ? await conn<{ storage_kind: string; content: string | null; original_path: string | null }[]>` + SELECT storage_kind, content, original_path FROM large_files WHERE file_id = ${fileId} ` - : await conn<{ content: string | null; original_path: string | null }[]>` + : await conn<{ storage_kind: string; content: string | null; original_path: string | null }[]>` WITH RECURSIVE ancestors AS ( SELECT conversation_id, parent_conversation_id FROM conversations @@ -1919,7 +1992,7 @@ export namespace LcmDb { FROM conversations c JOIN ancestors a ON c.conversation_id = a.parent_conversation_id ) - SELECT lf.content, lf.original_path + SELECT lf.storage_kind, lf.content, lf.original_path FROM large_files lf JOIN ancestors a ON lf.conversation_id = a.conversation_id WHERE lf.file_id = ${fileId} @@ -1927,8 +2000,11 @@ export namespace LcmDb { const row = rows[0] if (!row) return null - // If content is stored inline (legacy), return it - if (row.content !== null) { + // Inline text payloads are returned directly from the DB. + if (row.storage_kind === "inline_text" || (row.storage_kind !== "path" && row.content !== null)) { + if (row.content === null) { + return null + } const limit = maxBytes ?? row.content.length const truncated = row.content.length > limit return { @@ -1938,8 +2014,13 @@ export namespace LcmDb { } } - // Read content from disk using the stored path - if (row.original_path) { + if (row.storage_kind === "inline_binary") { + // Binary content cannot be returned as text. + return null + } + + // Path-backed payloads are loaded from disk on demand. + if (row.storage_kind === "path" && row.original_path) { const file = Bun.file(row.original_path) const exists = await file.exists() if (!exists) { @@ -1979,7 +2060,7 @@ export namespace LcmDb { export async function getLargeFilesByConversation(conversationId: number): Promise { const conn = sql() const rows = await conn` - SELECT file_id, conversation_id, original_path, mime_type, content, binary_content, token_count, created_at, exploration_summary, explorer_used + SELECT file_id, conversation_id, storage_kind, original_path, mime_type, content, binary_content, token_count, created_at, exploration_summary, explorer_used FROM large_files WHERE conversation_id = ${conversationId} ORDER BY created_at diff --git a/packages/voltcode/src/session/lcm/index.ts b/packages/voltcode/src/session/lcm/index.ts index 419e5ed1b..218f5a8aa 100644 --- a/packages/voltcode/src/session/lcm/index.ts +++ b/packages/voltcode/src/session/lcm/index.ts @@ -17,5 +17,6 @@ export * from "./condense" export * from "./context" export * from "./db" export * from "./large-file-threshold" +export * from "./types" export * from "./explore" export * from "./migration" diff --git a/packages/voltcode/src/session/lcm/large-file.ts b/packages/voltcode/src/session/lcm/large-file.ts index a22752d0e..31853e620 100644 --- a/packages/voltcode/src/session/lcm/large-file.ts +++ b/packages/voltcode/src/session/lcm/large-file.ts @@ -23,6 +23,8 @@ export namespace LargeFile { conversationId: z.string(), /** Original file path (if available) */ originalPath: z.string().nullable(), + /** Storage mode for payload retrieval. */ + storageKind: z.enum(["path", "inline_text", "inline_binary"]).default("path"), /** MIME type of the file */ mimeType: z.string(), /** Estimated token count for the file content */ @@ -104,6 +106,7 @@ export namespace LargeFile { fileId: generateId(input.content), conversationId: input.conversationId, originalPath: input.originalPath ?? null, + storageKind: "inline_text", mimeType: input.mimeType, tokenCount: input.tokenCount, isBinary: false, @@ -124,6 +127,7 @@ export namespace LargeFile { fileId: generateId(input.binaryContent), conversationId: input.conversationId, originalPath: input.originalPath ?? null, + storageKind: "inline_binary", mimeType: input.mimeType, tokenCount: input.tokenCount, isBinary: true, @@ -150,7 +154,8 @@ export namespace LargeFile { export function formatForContext(file: Info): string { const lines: string[] = [] lines.push(`[Large File ID: ${file.fileId}]`) - if (file.originalPath) { + lines.push(`[Storage: ${file.storageKind}]`) + if (file.storageKind === "path" && file.originalPath) { lines.push(`[Path: ${file.originalPath}]`) } lines.push(`[Type: ${file.mimeType}]`) diff --git a/packages/voltcode/src/session/lcm/types.ts b/packages/voltcode/src/session/lcm/types.ts new file mode 100644 index 000000000..60f5461f3 --- /dev/null +++ b/packages/voltcode/src/session/lcm/types.ts @@ -0,0 +1,9 @@ +/** + * Escape-hatch metadata used by tools that return content already stored in LCM. + * The processor checks this to avoid re-storing tool output back into LCM. + */ +export interface LcmToolMetadata { + storedInLcm: boolean + fileId: string + originalTokenCount?: number +} diff --git a/packages/voltcode/src/session/lcm/user-context.ts b/packages/voltcode/src/session/lcm/user-context.ts index 1f5c17a1d..317078cf9 100644 --- a/packages/voltcode/src/session/lcm/user-context.ts +++ b/packages/voltcode/src/session/lcm/user-context.ts @@ -221,16 +221,109 @@ export async function ensureUserSchema(conn: postgres.Sql, userId: string): Prom CREATE TABLE IF NOT EXISTS large_files ( file_id text PRIMARY KEY, conversation_id bigint NOT NULL REFERENCES conversations(conversation_id) ON DELETE CASCADE, - original_path text NOT NULL, + storage_kind text NOT NULL DEFAULT 'path', + original_path text, mime_type text NOT NULL, content text, binary_content bytea, token_count bigint NOT NULL, exploration_summary text, explorer_used text, - created_at timestamptz NOT NULL DEFAULT now() + created_at timestamptz NOT NULL DEFAULT now(), + CONSTRAINT large_files_storage_shape_check CHECK ( + (storage_kind = 'path' AND original_path IS NOT NULL AND content IS NULL AND binary_content IS NULL) OR + (storage_kind = 'inline_text' AND content IS NOT NULL AND binary_content IS NULL) OR + (storage_kind = 'inline_binary' AND binary_content IS NOT NULL AND content IS NULL) + ) ); + -- Migration: drop the old content check constraint to allow path-only storage (avoid NOTICE spam) + DO $$ BEGIN + IF EXISTS ( + SELECT 1 + FROM pg_constraint + WHERE conname = 'large_file_content_check' + AND conrelid = 'large_files'::regclass + ) THEN + EXECUTE 'ALTER TABLE large_files DROP CONSTRAINT large_file_content_check'; + END IF; + END $$; + + -- Migration: add storage_kind for explicit payload mode (path/inline_text/inline_binary) + DO $$ BEGIN + ALTER TABLE large_files ADD COLUMN storage_kind text; + EXCEPTION WHEN duplicate_column THEN NULL; END $$; + + -- Migration: backfill storage_kind from existing data + UPDATE large_files + SET storage_kind = CASE + WHEN content IS NOT NULL THEN 'inline_text' + WHEN binary_content IS NOT NULL THEN 'inline_binary' + ELSE 'path' + END + WHERE storage_kind IS NULL; + + -- Migration: default + NOT NULL for storage_kind + DO $$ BEGIN + ALTER TABLE large_files ALTER COLUMN storage_kind SET DEFAULT 'path'; + EXCEPTION WHEN others THEN NULL; END $$; + DO $$ BEGIN + ALTER TABLE large_files ALTER COLUMN storage_kind SET NOT NULL; + EXCEPTION WHEN others THEN NULL; END $$; + + -- Migration: original_path is optional for inline payloads + DO $$ BEGIN + ALTER TABLE large_files ALTER COLUMN original_path DROP NOT NULL; + EXCEPTION WHEN others THEN NULL; END $$; + + -- Migration: enforce coherent large_files row shape by storage_kind + DO $$ DECLARE + current_def text; + normalized_current_def text; + normalized_desired_def text := regexp_replace( + lower( + 'CHECK ( + (storage_kind = ''path'' AND original_path IS NOT NULL AND content IS NULL AND binary_content IS NULL) OR + (storage_kind = ''inline_text'' AND content IS NOT NULL AND binary_content IS NULL) OR + (storage_kind = ''inline_binary'' AND binary_content IS NOT NULL AND content IS NULL) + )' + ), + '[[:space:]()]', + '', + 'g' + ); + BEGIN + SELECT pg_get_constraintdef(oid) + INTO current_def + FROM pg_constraint + WHERE conname = 'large_files_storage_shape_check' + AND conrelid = 'large_files'::regclass; + + IF current_def IS NOT NULL THEN + normalized_current_def := regexp_replace( + lower(replace(current_def, '::text', '')), + '[[:space:]()]', + '', + 'g' + ); + END IF; + + IF current_def IS NULL OR normalized_current_def != normalized_desired_def THEN + IF current_def IS NOT NULL THEN + EXECUTE 'ALTER TABLE large_files DROP CONSTRAINT large_files_storage_shape_check'; + END IF; + + BEGIN + ALTER TABLE large_files + ADD CONSTRAINT large_files_storage_shape_check CHECK ( + (storage_kind = 'path' AND original_path IS NOT NULL AND content IS NULL AND binary_content IS NULL) OR + (storage_kind = 'inline_text' AND content IS NOT NULL AND binary_content IS NULL) OR + (storage_kind = 'inline_binary' AND binary_content IS NOT NULL AND content IS NULL) + ); + EXCEPTION WHEN duplicate_object THEN NULL; END; + END IF; + END $$; + CREATE INDEX IF NOT EXISTS large_files_conv_idx ON large_files(conversation_id); CREATE INDEX IF NOT EXISTS large_files_path_idx ON large_files(original_path); diff --git a/packages/voltcode/src/session/processor.ts b/packages/voltcode/src/session/processor.ts index bb1b19cbb..2f231c88d 100644 --- a/packages/voltcode/src/session/processor.ts +++ b/packages/voltcode/src/session/processor.ts @@ -18,6 +18,7 @@ import { Question } from "@/question" import { ReadCoordinator } from "@/tool/read" import { Flag } from "@/flag/flag" import { handleLargeToolOutput, LARGE_TOOL_OUTPUT_THRESHOLD } from "./large-tool-output" +import type { LcmToolMetadata } from "./lcm/types" import { Token } from "@/util/token" export namespace SessionProcessor { @@ -531,12 +532,13 @@ export namespace SessionProcessor { ) // Handle large tool outputs by storing in LCM let finalOutput = toolResult.output - let lcmMetadata = toolResult.metadata?.lcm + let lcmMetadata: LcmToolMetadata | undefined = toolResult.metadata?.lcm if (typeof toolResult.output === "string") { const outputTokens = Token.estimate(toolResult.output) - if (outputTokens > LARGE_TOOL_OUTPUT_THRESHOLD) { + // Skip LCM re-storage if the output is already from LCM (e.g. lcm_read) + if (outputTokens > LARGE_TOOL_OUTPUT_THRESHOLD && !lcmMetadata?.storedInLcm) { // Large output: store in LCM and replace with reference const lcmResult = await handleLargeToolOutput({ sessionID: input.sessionID, diff --git a/packages/voltcode/src/tool/lcm-describe.ts b/packages/voltcode/src/tool/lcm-describe.ts index 20ed456cc..244393f27 100644 --- a/packages/voltcode/src/tool/lcm-describe.ts +++ b/packages/voltcode/src/tool/lcm-describe.ts @@ -42,6 +42,12 @@ export const LcmDescribeTool = Tool.define("lcm_read", { + description: DESCRIPTION, + parameters, + async execute(params, ctx) { + // Sub-agent gate — same pattern as lcm_expand. + const session = await Session.get(ctx.sessionID) + if (!session.parentID) { + return { + title: `Read LCM file: ${params.file_id}`, + metadata: { + fileId: params.file_id, + found: false, + truncated: false, + totalSize: 0, + }, + output: `ERROR: Only sub-agents can read full LCM file content. + +The lcm_read tool can only be called by sub-agents spawned via the Task tool. +This restriction protects the main context from uncontrolled expansion. + +To retrieve the content of "${params.file_id}", spawn an explore sub-agent: + Task(subagent_type="explore", prompt="Use lcm_read on ${params.file_id} to find ") + +The explore sub-agent will call lcm_read and return a focused answer.`, + } + } + + const fileId = params.file_id.trim() + + if (!fileId.startsWith("file_")) { + return { + title: `LCM read: ${fileId}`, + metadata: { + fileId, + found: false, + truncated: false, + totalSize: 0, + }, + output: `Invalid ID format: "${fileId}". lcm_read accepts file IDs (file_xxx). For summary IDs (sum_xxx), use lcm_expand instead.`, + } + } + + // Scope lookup to this conversation's ancestry chain. + const conversationId = await SessionPrompt.getLcmConversationId(ctx.sessionID) + + const maxBytes = params.max_bytes ?? DEFAULT_MAX_BYTES + + log.info("reading LCM file content", { fileId, maxBytes, sessionId: ctx.sessionID }) + + const result = await LcmDb.getLargeFileContent(fileId, maxBytes, conversationId ?? undefined) + + if (!result) { + // Distinguish "ID not found", "binary content", and "file on disk missing". + const file = await LcmDb.getLargeFile(fileId, conversationId ?? undefined) + if (file) { + if (file.storage_kind === "inline_binary") { + return { + title: `LCM read: ${fileId}`, + metadata: { + fileId, + found: true, + truncated: false, + totalSize: 0, + }, + output: `File "${fileId}" contains binary content (${file.mime_type}) which cannot be displayed as text.\n\nUse lcm_describe with "${fileId}" for metadata about this file.`, + } + } + + return { + title: `LCM read: ${fileId}`, + metadata: { + fileId, + found: true, + truncated: false, + totalSize: 0, + }, + output: `File record exists but content could not be read — the backing file may have been moved or deleted.\n\nUse lcm_describe with "${fileId}" for metadata and exploration summary.`, + } + } + + return { + title: `LCM read: ${fileId}`, + metadata: { + fileId, + found: false, + truncated: false, + totalSize: 0, + }, + output: `File not found: ${fileId}\n\nThis file ID does not exist in the current conversation or its ancestors.`, + } + } + + log.info("read LCM file content", { + fileId, + totalSize: result.totalSize, + truncated: result.truncated, + }) + + const lines: string[] = [] + lines.push(`## LCM File Content: ${fileId}`) + lines.push("") + + if (result.truncated) { + lines.push( + `**Note:** Content truncated to ${maxBytes.toLocaleString()} bytes (full size: ${result.totalSize.toLocaleString()} bytes). Call again with a larger max_bytes to see more.`, + ) + lines.push("") + } + + lines.push(result.content) + + return { + title: `Read LCM: ${fileId} (${result.totalSize.toLocaleString()} bytes)`, + metadata: { + fileId, + found: true, + truncated: result.truncated, + totalSize: result.totalSize, + // Signal to the processor that this is already LCM content — do not re-store. + lcm: { storedInLcm: true, fileId }, + }, + output: lines.join("\n"), + } + }, +}) diff --git a/packages/voltcode/src/tool/lcm-read.txt b/packages/voltcode/src/tool/lcm-read.txt new file mode 100644 index 000000000..1d1179e85 --- /dev/null +++ b/packages/voltcode/src/tool/lcm-read.txt @@ -0,0 +1,10 @@ +Retrieve the full stored content of an LCM file by its file ID. + +Use this tool when you need the actual content of something stored in LCM — large tool outputs, pasted user text, or any payload that was too large for inline context. + +- Accepts file IDs (file_xxx format). Does NOT work with summary IDs (sum_xxx) — use lcm_expand for those. +- Returns the stored content directly (text payloads from DB, or file content from disk). +- Supports optional byte limit to avoid overwhelming context with very large payloads. +- IMPORTANT: Only callable by sub-agents spawned via the Task tool. Main agent gets an error with instructions to spawn a sub-agent. +- To retrieve stored content from the main context, spawn an explore sub-agent: + Task(subagent_type="explore", prompt="Use lcm_read on file_xxx to find ") diff --git a/packages/voltcode/src/tool/registry.ts b/packages/voltcode/src/tool/registry.ts index 7b558d586..0572de2db 100644 --- a/packages/voltcode/src/tool/registry.ts +++ b/packages/voltcode/src/tool/registry.ts @@ -33,6 +33,7 @@ import { ApplyPatchTool } from "./apply_patch" import { LcmGrepTool } from "./lcm-grep" import { LcmExpandTool } from "./lcm-expand" import { LcmDescribeTool } from "./lcm-describe" +import { LcmReadTool } from "./lcm-read" import { AgenticMapTool } from "./agentic-map" import { LlmMapTool } from "./llm-map" @@ -142,6 +143,7 @@ export namespace ToolRegistry { LcmGrepTool, LcmExpandTool, LcmDescribeTool, + LcmReadTool, AgenticMapTool, LlmMapTool, ...custom, diff --git a/packages/voltcode/test/session/lcm/large-user-text.test.ts b/packages/voltcode/test/session/lcm/large-user-text.test.ts index b778563ca..e9e95a36e 100644 --- a/packages/voltcode/test/session/lcm/large-user-text.test.ts +++ b/packages/voltcode/test/session/lcm/large-user-text.test.ts @@ -181,8 +181,9 @@ describe("session.lcm.large-user-text", () => { expect(file).not.toBeNull() expect(file!.file_id).toBe(fileId) expect(file!.conversation_id).toBe(testConversationId) + expect(file!.storage_kind).toBe("inline_text") expect(file!.mime_type).toBe("text/plain") - expect(file!.original_path).toBe("test_metadata") + expect(file!.original_path).toBeNull() expect(file!.content).toBe(testContent) expect(Number(file!.token_count)).toBe(tokenCount) })