From 72f58df836ddd737d4a507d48c54d22c100ad17b Mon Sep 17 00:00:00 2001 From: Alezander9 Date: Wed, 6 May 2026 14:12:23 -0700 Subject: [PATCH 1/9] browser_execute: defer workflow guidance to vendored SKILL.md The tool description was hard-wrapped at ~80 columns mid-paragraph, showing up in traces with broken sentences, and duplicated content from the vendored harness's SKILL.md. Replace with four concise paragraphs that say only what the agent needs to decide whether to use the tool, and require reading SKILL.md before the first call. helpers.py is optional reference for exact signatures. --- .../opencode/src/tool/browser-execute.txt | 33 +++---------------- 1 file changed, 4 insertions(+), 29 deletions(-) diff --git a/packages/opencode/src/tool/browser-execute.txt b/packages/opencode/src/tool/browser-execute.txt index 08edbddf3..ceb324b73 100644 --- a/packages/opencode/src/tool/browser-execute.txt +++ b/packages/opencode/src/tool/browser-execute.txt @@ -1,32 +1,7 @@ -Execute Python code against a connected web browser via the BrowserCode harness. +Execute Python against a connected web browser via the BrowserCode harness. -This is the single tool for all browser interaction. The agent writes Python that -imperatively drives the browser using helpers preloaded into the script's namespace -(`goto_url`, `click_at_xy`, `type_text`, `capture_screenshot`, `js`, `cdp`, -`new_tab`, `switch_tab`, `ensure_real_tab`, `wait_for_load`, `page_info`, -`http_get`, etc.). +Use this tool whenever the task requires driving a real browser — automation, scraping, end-to-end testing, or interactive exploration. The harness attaches to the user's running Chrome (local) or a Browser Use cloud browser (remote). A session-scoped daemon holds the CDP connection so consecutive calls share the same browser and tabs. -Read `packages/bcode-browser/harness/SKILL.md` for the full helper surface and -recommended workflow. Read `packages/bcode-browser/harness/src/browser_harness/helpers.py` -for exact signatures. +Before the first `browser_execute` call of a session, you MUST read `packages/bcode-browser/harness/SKILL.md`. It defines the helper surface, the screenshot-driven workflow, remote-browser setup, and gotchas — none of that is repeated here. -State (CDP session, tab attachments, event buffer) is held by a long-lived daemon -keyed to your session id, so consecutive `browser_execute` calls share the same -browser. Add task-specific helpers to -`packages/bcode-browser/harness/agent-workspace/agent_helpers.py` between calls; -they take effect on the very next call. - -Coordinate-based interaction is the default — `click_at_xy(x, y)` rather than -selector indices. `Input.dispatchMouseEvent` passes through iframes, shadow DOM, -and cross-origin at the compositor level. - -For first navigation use `new_tab(url)` (or `ensure_real_tab(); goto_url(url)`), -not bare `goto_url` — the latter clobbers whatever tab the user is on. - -Output is whatever the script writes to stdout/stderr. Wrap multi-step flows in -one call when possible — that's the design. - -Example: - new_tab("https://example.com") - wait_for_load() - print(page_info()) +Optional: read `packages/bcode-browser/harness/src/browser_harness/helpers.py` when you need an exact signature that SKILL.md does not show. From 531cced2b71c30c46f53d133c6bbe6ae31089223 Mon Sep 17 00:00:00 2001 From: Alezander9 Date: Wed, 6 May 2026 15:30:07 -0700 Subject: [PATCH 2/9] harness: relocate to data dir, add build-hash extraction sentinel The vendored harness contains agent-edited files (agent_helpers.py and, later, domain-skills) which is data, not cache. Move the extraction target from ~/.cache/bcode/harness/ to /harness/ (~/.local/share/bcode/harness/ on Linux/Mac) so a ~/.cache wipe no longer destroys agent self-improvements. Add a content-hash sentinel at /.bcode-build that records the embed bundle that produced the on-disk tree. Warm launches stat the sentinel and skip extraction; binary upgrades trigger a fresh extract that overwrites every embed file except anything under agent-workspace/ (the Green-zone subtree: agent_helpers.py and any agent-authored files like domain-skills//*.md persist across upgrades). Resolve the harness eagerly at BrowserExecute.make() time rather than lazily on first browser_execute call, so SKILL.md is on disk by the time the agent reads the tool description (which now requires reading SKILL.md before the first call). Migration: pre-existing harness at ~/.cache/bcode/harness/ is renamed to the new location on first launch (EXDEV fallback to recursive cp+rm), preserving the agent-workspace tree. Pass dataDir as a parameter to resolveHarnessDir/make() so @browser-use/bcode-browser stays decoupled from @opencode-ai/core; the opencode adapter supplies Global.Path.data. Agent permission whitelist updated to use the new path via the exported Harness.harnessDir helper. --- .../bcode-browser/script/embed-harness.ts | 23 ++++- packages/bcode-browser/src/browser-execute.ts | 7 +- packages/bcode-browser/src/harness.ts | 89 +++++++++++++------ packages/opencode/src/agent/agent.ts | 10 +++ packages/opencode/src/tool/browser-execute.ts | 2 +- 5 files changed, 97 insertions(+), 34 deletions(-) diff --git a/packages/bcode-browser/script/embed-harness.ts b/packages/bcode-browser/script/embed-harness.ts index 3d188d709..6d65de105 100644 --- a/packages/bcode-browser/script/embed-harness.ts +++ b/packages/bcode-browser/script/embed-harness.ts @@ -3,9 +3,10 @@ // The build script (`packages/opencode/script/build.ts`) calls // `createEmbeddedHarnessBundle()` and plumbs the result into // `Bun.build({ files: { "bcode-harness.gen.ts": } })`. The generated -// virtual module exports `{ "": "" }` for every harness file. -// `harness.ts` reads it lazily in compiled mode and extracts the files to a -// per-version cache dir on first use (decisions.md §4.6). +// virtual module exports `{ "": "" }` for every harness file +// plus a content-hash `buildHash` used as the on-disk extraction sentinel. +// `harness.ts` reads it in compiled mode and extracts the files to +// `/harness/` on session start, skipping when the sentinel matches. // // The walk is glob-driven (not hand-enumerated): when skill files leave the // repo for the cloud-fetch architecture (decisions.md §4.7) the embed shrinks @@ -13,6 +14,8 @@ // so local artifacts (`.venv/`, `__pycache__/`, `*.egg-info/`, etc.) never // land in the binary. +import crypto from "crypto" +import fs from "fs/promises" import path from "path" import { fileURLToPath } from "url" @@ -29,6 +32,18 @@ const ignored = [ new Bun.Glob("**/uv.lock"), ] +// SHA-256 over (rel + NUL + content) for each file in sorted order. Stable +// across builds when content is identical, so warm launches skip extraction. +const computeBuildHash = async (files: string[]) => { + const hash = crypto.createHash("sha256") + for (const rel of files) { + hash.update(rel) + hash.update("\0") + hash.update(await fs.readFile(path.join(HARNESS_DIR, rel))) + } + return hash.digest("hex") +} + export const createEmbeddedHarnessBundle = async (buildCwd: string) => { console.log("Embedding harness files into the binary") const files = (await Array.fromAsync(new Bun.Glob("**/*").scan({ cwd: HARNESS_DIR, dot: true }))) @@ -37,6 +52,7 @@ export const createEmbeddedHarnessBundle = async (buildCwd: string) => { .sort() console.log(`Embedding ${files.length} harness files`) + const buildHash = await computeBuildHash(files) const imports = files.map((file, i) => { const spec = path.relative(buildCwd, path.join(HARNESS_DIR, file)).replaceAll("\\", "/") @@ -47,6 +63,7 @@ export const createEmbeddedHarnessBundle = async (buildCwd: string) => { `// Auto-generated by packages/bcode-browser/script/embed-harness.ts`, `// Maps "" -> bunfs path for every embedded harness file.`, ...imports, + `export const buildHash = ${JSON.stringify(buildHash)}`, `export default {`, ...entries, `} as Record`, diff --git a/packages/bcode-browser/src/browser-execute.ts b/packages/bcode-browser/src/browser-execute.ts index 5ac3aa32a..210459b39 100644 --- a/packages/bcode-browser/src/browser-execute.ts +++ b/packages/bcode-browser/src/browser-execute.ts @@ -83,13 +83,16 @@ const isUvMissing = (err: unknown): boolean => { return false } -export const make = Effect.fn("BrowserExecute.make")(function* () { +// dataDir is opencode's XDG_DATA_HOME for bcode (~/.local/share/bcode/). The +// harness lives at /harness/. We resolve eagerly at make-time so the +// extraction (compiled mode) happens before the agent reads SKILL.md. +export const make = Effect.fn("BrowserExecute.make")(function* (dataDir: string) { const spawner = yield* ChildProcessSpawner.ChildProcessSpawner const locate = yield* uvLocate + const harnessDir = yield* Effect.promise(() => resolveHarnessDir(dataDir)) const execute = (args: Parameters, ctx: ExecuteContext) => Effect.gen(function* () { - const harnessDir = yield* Effect.promise(() => resolveHarnessDir()) // Pre-flight check on harnessDir: spawn ENOENT on a missing cwd surfaces // with `path: "uv"` on Bun/Windows, which is indistinguishable from a // truly-missing uv. Catch it here so the user gets the real cause diff --git a/packages/bcode-browser/src/harness.ts b/packages/bcode-browser/src/harness.ts index 8d3b03e6d..db5a732e8 100644 --- a/packages/bcode-browser/src/harness.ts +++ b/packages/bcode-browser/src/harness.ts @@ -11,25 +11,28 @@ // `import.meta.url` lives under `/$bunfs/` (or `B:/~BUN/` on Windows), a // read-only virtual filesystem. uv cannot write `.venv/` there. We extract // the embedded harness (built into the binary by `script/embed-harness.ts`) -// to a single un-versioned directory at `~/.cache/bcode/harness/`. +// to `/harness/`, where dataDir is opencode's XDG_DATA_HOME for +// bcode (~/.local/share/bcode/ on Linux/Mac). The harness is data, not +// cache: it accumulates agent edits to `agent-workspace/agent_helpers.py` +// that must outlive a `~/.cache` wipe. // -// Per decisions §4.8, the cache is **un-versioned** so agent edits to -// `agent-workspace/agent_helpers.py` survive binary upgrades. Extraction -// policy on every launch: walk the embed map and write each file out, with -// one exception — `agent-workspace/agent_helpers.py` is preserved if -// already present. Everything else (`src/browser_harness/*.py`, -// `pyproject.toml`, skills, etc.) is overwritten unconditionally; the -// binary is the source of truth for those, and we want curated skill / -// daemon / setup updates to land on upgrade. -// `agent-workspace/agent_helpers.py` is the one Green-zone file (decisions -// §3.7, §4.5) where agent learnings accumulate and must outlive upgrades. -// Upstream moved the agent-editable surface from root `helpers.py` to -// `agent-workspace/agent_helpers.py` in PR #229; the core `helpers.py` -// inside `src/browser_harness/` is now baseline-overwrite. +// A content-hash sentinel at `/.bcode-build` records the embed +// bundle that produced the on-disk tree. On session start we compare it to +// the bundle hash and skip extraction when they match — warm launches cost +// one stat. Mismatch (binary upgrade) re-extracts every embed file except +// anything under `agent-workspace/` (the Green-zone subtree — decisions +// §3.7, §4.5: agent_helpers.py and any agent-authored files like +// domain-skills//*.md persist across upgrades). The core +// `src/browser_harness/` package and shipped skill files are +// baseline-overwrite. // // Concurrent first-callers are deduplicated via an in-process promise. // Bun.write is atomic per file; cross-process races just result in the // same bytes being written, which is fine. +// +// On first launch after the relocation, any pre-existing harness at the +// legacy `~/.cache/bcode/harness/` is moved to the new location so agent +// edits under `agent-workspace/` survive the upgrade. import fs from "fs/promises" import os from "os" @@ -47,40 +50,70 @@ const isCompiled = (() => { return d.startsWith("/$bunfs/") || d.startsWith("B:/~BUN/") })() const DEV_HARNESS_DIR = path.resolve(__dirname, "..", "harness") -const cachedHarnessDir = path.join(os.homedir(), ".cache", "bcode", "harness") +const LEGACY_CACHE_DIR = path.join(os.homedir(), ".cache", "bcode", "harness") +const SENTINEL_NAME = ".bcode-build" + +// Embed paths that are agent-editable and must be preserved across binary +// upgrades. Per decisions §3.7 / §4.5 the entire `agent-workspace/` subtree +// is the Green zone (agent_helpers.py plus any agent-authored files such as +// domain-skills//*.md). The core `src/browser_harness/` package and +// shipped skill files are baseline-overwrite. +const PRESERVED_PREFIX = "agent-workspace/" -// Files that are agent-editable and must be preserved across binary upgrades. -// Everything in the embed map that isn't in this set is baseline-overwrite. -// Per decisions §3.7 / §4.5: only `agent-workspace/agent_helpers.py` is -// Green-zone editable inside the harness. The core `src/browser_harness/` -// package (daemon, admin, helpers, run, _ipc) is baseline-only. -const PRESERVED_PATHS = new Set(["agent-workspace/agent_helpers.py"]) +// Compute the harness directory for a given dataDir without touching the +// filesystem. The agent permission whitelist uses this; runtime extraction +// uses `resolveHarnessDir`. +export const harnessDir = (dataDir: string) => path.join(dataDir, "harness") const exists = (p: string) => fs.access(p).then(() => true, () => false) -const extractEmbeddedHarness = async (): Promise => { +const readSentinel = async (dir: string) => { + try { return await fs.readFile(path.join(dir, SENTINEL_NAME), "utf8") } + catch { return null } +} + +const migrateLegacyIfPresent = async (target: string) => { + if (!(await exists(LEGACY_CACHE_DIR))) return + if (await exists(target)) return + await fs.mkdir(path.dirname(target), { recursive: true }) + try { await fs.rename(LEGACY_CACHE_DIR, target) } + catch (err) { + if ((err as { code?: string }).code !== "EXDEV") throw err + await fs.cp(LEGACY_CACHE_DIR, target, { recursive: true }) + await fs.rm(LEGACY_CACHE_DIR, { recursive: true, force: true }) + } +} + +const extractEmbeddedHarness = async (dataDir: string): Promise => { + const target = harnessDir(dataDir) + await migrateLegacyIfPresent(target) + // @ts-expect-error generated at build time const mod = await import("bcode-harness.gen.ts").catch(() => null) if (!mod) throw new Error("bcode-harness.gen.ts not found in compiled binary — was the build script updated?") const fileMap = mod.default as Record + const buildHash = mod.buildHash as string + + if ((await readSentinel(target)) === buildHash) return target - await fs.mkdir(cachedHarnessDir, { recursive: true }) + await fs.mkdir(target, { recursive: true }) await Promise.all( Object.entries(fileMap).map(async ([rel, bunfsPath]) => { - const dest = path.join(cachedHarnessDir, rel) - if (PRESERVED_PATHS.has(rel) && (await exists(dest))) return + const dest = path.join(target, rel) + if (rel.startsWith(PRESERVED_PREFIX) && (await exists(dest))) return await fs.mkdir(path.dirname(dest), { recursive: true }) await Bun.write(dest, Bun.file(bunfsPath)) }), ) - return cachedHarnessDir + await fs.writeFile(path.join(target, SENTINEL_NAME), buildHash, "utf8") + return target } let extractPromise: Promise | null = null -export const resolveHarnessDir = (): Promise => { +export const resolveHarnessDir = (dataDir: string): Promise => { if (!isCompiled) return Promise.resolve(DEV_HARNESS_DIR) - if (!extractPromise) extractPromise = extractEmbeddedHarness() + if (!extractPromise) extractPromise = extractEmbeddedHarness(dataDir) return extractPromise } diff --git a/packages/opencode/src/agent/agent.ts b/packages/opencode/src/agent/agent.ts index 76855d2be..2248c1e25 100644 --- a/packages/opencode/src/agent/agent.ts +++ b/packages/opencode/src/agent/agent.ts @@ -15,6 +15,7 @@ import PROMPT_TITLE from "./prompt/title.txt" import { Permission } from "@/permission" import { mergeDeep, pipe, sortBy, values } from "remeda" import { Global } from "@opencode-ai/core/global" +import { Harness } from "@browser-use/bcode-browser/harness" import path from "path" import { Plugin } from "@/plugin" import { Skill } from "../skill" @@ -85,9 +86,18 @@ export const layer = Layer.effect( // /sessions/. Whitelist the parent so // the agent can read its own screenshots back without permission prompts. const browserSessionsGlob = path.join(Global.Path.data, "sessions", "*") + // Vendored browser-harness in compiled-binary mode is extracted to + // /harness/ (see packages/bcode-browser/src/harness.ts). + // The agent is meant to read SKILL.md, helpers.py, interaction-skills/, + // and edit agent-workspace/agent_helpers.py + domain-skills/ as part of + // normal browser work. Whitelist the whole tree so none of that prompts. + // In dev mode the harness lives inside the worktree, so this glob is a + // no-op there. + const harnessGlob = path.join(Harness.harnessDir(Global.Path.data), "*") const whitelistedDirs = [ Truncate.GLOB, browserSessionsGlob, + harnessGlob, path.join(Global.Path.tmp, "*"), ...skillDirs.map((dir) => path.join(dir, "*")), ] diff --git a/packages/opencode/src/tool/browser-execute.ts b/packages/opencode/src/tool/browser-execute.ts index 8182aca8f..4f634bd80 100644 --- a/packages/opencode/src/tool/browser-execute.ts +++ b/packages/opencode/src/tool/browser-execute.ts @@ -15,7 +15,7 @@ const preview = (text: string) => export const BrowserExecuteTool = Tool.define( "browser_execute", Effect.gen(function* () { - const impl = yield* BrowserExecute.make() + const impl = yield* BrowserExecute.make(Global.Path.data) return { description: DESCRIPTION, parameters: impl.parameters, From c706f37f10c3430101c6cac53cd22e55d2009318 Mon Sep 17 00:00:00 2001 From: Alezander9 Date: Wed, 6 May 2026 15:30:21 -0700 Subject: [PATCH 3/9] harness: split BH_RUNTIME_DIR (sock) from BH_TMP_DIR (screenshots) Adopts the upstream BH_RUNTIME_DIR/BH_TMP_DIR split (browser-harness PR #318): the harness now keeps sock/port/pid in BH_RUNTIME_DIR and log/screenshots/debug overlays in BH_TMP_DIR. Wire that through bcode: bhScratchDir /sessions// persistent, deep path OK bhRuntimeDir /tmp/bcode// (POSIX) or os.tmpdir()/bcode// volatile, AF_UNIX-budget short Splits ExecuteContext.bhTmpDir into bhScratchDir + bhRuntimeDir; adds sessionRuntimeDir() alongside sessionScratchDir(). The opencode adapter passes Global.Path.data for scratch and the platform tmpdir for runtime. --- packages/bcode-browser/src/browser-execute.ts | 48 ++++++++++++------- packages/opencode/src/tool/browser-execute.ts | 13 ++--- 2 files changed, 39 insertions(+), 22 deletions(-) diff --git a/packages/bcode-browser/src/browser-execute.ts b/packages/bcode-browser/src/browser-execute.ts index 210459b39..3e4279984 100644 --- a/packages/bcode-browser/src/browser-execute.ts +++ b/packages/bcode-browser/src/browser-execute.ts @@ -13,29 +13,39 @@ // pipe stdout+stderr back. BU_NAME is namespaced by sessionID so parallel // sub-agents (each with their own session) get isolated daemons + browsers. // -// BH_TMP_DIR points at a per-session scratch dir so sock/port/pid/log + screenshot -// output land somewhere predictable per session, instead of all sessions sharing -// /tmp. The Level-2 wrapper supplies the cache root; we own the layout convention. +// Two per-session dirs, separated by lifetime + path-length sensitivity: +// BH_TMP_DIR — screenshots, debug overlays, daemon log. Persistent under +// /sessions//. Long path is fine; the cloud +// UI / read tool finds artifacts here. +// BH_RUNTIME_DIR — sock, port, pid. Volatile under /bcode//. +// Path-length budgeted on macOS (AF_UNIX sun_path = 104). // // Level 1 per decisions.md §1c — substantial implementation lives here. The // Level-2 hook in packages/opencode is a one-line wrapper. import fs from "fs/promises" +import os from "os" import path from "path" import { Effect, Schema, Stream } from "effect" import { ChildProcess, ChildProcessSpawner } from "effect/unstable/process" import { resolveHarnessDir } from "./harness" import { uvLocate } from "./uv-locate" -// Canonical per-session scratch dir layout. Caller supplies dataDir -// (e.g. opencode's Global.Path.data); we own the `sessions/` shape. -// AF_UNIX sun_path is 104 bytes on macOS — `/sessions//bu-.sock` -// must fit. SessionID is `ses_` + 26 chars (30 chars). The literal suffix is -// `/sessions/` (10) + 30 + `/bu-` (4) + 30 + `.sock` (5) = 79 chars, leaving -// 25 chars of headroom for dataDir. Typical XDG dataDir is well under that. +// Per-session persistent scratch under /sessions//. Holds +// screenshots, debug overlays, daemon log. Caller supplies dataDir +// (e.g. opencode's Global.Path.data). export const sessionScratchDir = (dataDir: string, sessionID: string) => path.join(dataDir, "sessions", sessionID) +// Per-session volatile runtime dir under /bcode//. Holds +// AF_UNIX sock + port file + pid. macOS sun_path is 104 bytes: +// `/tmp/bcode/ses_<26ch>/bu.sock` is 50 chars — well within budget. +// On Windows the daemon listens on TCP so the path doesn't need to be short, +// but using os.tmpdir() keeps the layout consistent. +const RUNTIME_ROOT = process.platform === "win32" ? os.tmpdir() : "/tmp" +export const sessionRuntimeDir = (sessionID: string) => + path.join(RUNTIME_ROOT, "bcode", sessionID) + const DEFAULT_TIMEOUT_MS = 60 * 1000 const MAX_TIMEOUT_MS = 10 * 60 * 1000 @@ -50,11 +60,12 @@ export type Parameters = Schema.Schema.Type export interface ExecuteContext { readonly sessionID: string - // Per-session scratch dir, passed to the harness as BH_TMP_DIR. The harness - // mkdirs it on import, but we mkdir-p here too so failures surface as a - // direct effect error rather than a child-process exit. Pre-compute via - // sessionScratchDir(dataDir, sessionID). - readonly bhTmpDir: string + // BH_TMP_DIR. Persistent per-session dir for screenshots/log. Pre-compute + // via sessionScratchDir(dataDir, sessionID). + readonly bhScratchDir: string + // BH_RUNTIME_DIR. Volatile short-path per-session dir for sock/port/pid. + // Pre-compute via sessionRuntimeDir(sessionID). + readonly bhRuntimeDir: string // Optional progress callback invoked per output chunk (combined stdout+stderr). // Level-2 supplies this to drive TUI streaming via opencode's `ctx.metadata`. // The callback receives the fully accumulated output so far, not just the @@ -100,7 +111,8 @@ export const make = Effect.fn("BrowserExecute.make")(function* (dataDir: string) if (!(yield* Effect.promise(() => fs.access(harnessDir).then(() => true, () => false)))) { return yield* Effect.fail(new Error(`harness directory not found at ${harnessDir} — bcode build is broken; please reinstall`)) } - yield* Effect.promise(() => fs.mkdir(ctx.bhTmpDir, { recursive: true })) + yield* Effect.promise(() => fs.mkdir(ctx.bhScratchDir, { recursive: true })) + yield* Effect.promise(() => fs.mkdir(ctx.bhRuntimeDir, { recursive: true })) const uv = yield* locate const proc = ChildProcess.make( uv, @@ -108,7 +120,11 @@ export const make = Effect.fn("BrowserExecute.make")(function* (dataDir: string) { cwd: harnessDir, extendEnv: true, - env: { BU_NAME: ctx.sessionID, BH_TMP_DIR: ctx.bhTmpDir }, + env: { + BU_NAME: ctx.sessionID, + BH_TMP_DIR: ctx.bhScratchDir, + BH_RUNTIME_DIR: ctx.bhRuntimeDir, + }, stdin: "ignore", }, ) diff --git a/packages/opencode/src/tool/browser-execute.ts b/packages/opencode/src/tool/browser-execute.ts index 4f634bd80..db0bb293f 100644 --- a/packages/opencode/src/tool/browser-execute.ts +++ b/packages/opencode/src/tool/browser-execute.ts @@ -33,12 +33,13 @@ export const BrowserExecuteTool = Tool.define( const result = yield* impl.execute(args, { sessionID: ctx.sessionID, - // Per-session scratch under Global.Path.data (persistent state, - // not cache). Harness writes sock/port/pid/log + screenshots here. - // Agent reads screenshots back via the read tool; the agent - // permission ruleset (agent.ts) allows /sessions/* - // so that read doesn't prompt. - bhTmpDir: BrowserExecute.sessionScratchDir(Global.Path.data, ctx.sessionID), + // Persistent per-session dir for screenshots/log. Agent reads + // screenshots back via the read tool; the agent permission ruleset + // (agent.ts) allows /sessions/* without prompts. + bhScratchDir: BrowserExecute.sessionScratchDir(Global.Path.data, ctx.sessionID), + // Volatile short-path per-session dir for sock/port/pid. macOS + // AF_UNIX sun_path is 104 bytes — kept under /tmp/bcode//. + bhRuntimeDir: BrowserExecute.sessionRuntimeDir(ctx.sessionID), // Stream chunks to the TUI as they arrive — same pattern as bash. onChunk: (output) => ctx.metadata({ From a22a4c58d632594e6cdaecdaf22bfab755e951bb Mon Sep 17 00:00:00 2001 From: Alezander9 Date: Wed, 6 May 2026 14:21:07 -0700 Subject: [PATCH 4/9] harness: snapshot to harness-archive/ on binary upgrade MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the build-hash sentinel mismatches (= bcode binary upgrade), copy the active /harness/ tree to /harness-archive// before re-extracting. The agent uses this read-only history when migrating its own helpers across upgrades — e.g. checking how a helper signature changed between versions, or recovering an interaction-skill it had locally edited. Excludes .venv/ and __pycache__/ from the snapshot (regenerable + bulky). Idempotent: if the archive subdir already exists, skip the copy (handles concurrent first-callers). Whitelist /harness-archive/* in the agent permission ruleset so reads/edits don't prompt — symmetric with the active harness whitelist. --- packages/bcode-browser/src/harness.ts | 34 +++++++++++++++++++++++---- packages/opencode/src/agent/agent.ts | 4 ++++ 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/packages/bcode-browser/src/harness.ts b/packages/bcode-browser/src/harness.ts index db5a732e8..b4e43afc2 100644 --- a/packages/bcode-browser/src/harness.ts +++ b/packages/bcode-browser/src/harness.ts @@ -19,10 +19,13 @@ // A content-hash sentinel at `/.bcode-build` records the embed // bundle that produced the on-disk tree. On session start we compare it to // the bundle hash and skip extraction when they match — warm launches cost -// one stat. Mismatch (binary upgrade) re-extracts every embed file except -// anything under `agent-workspace/` (the Green-zone subtree — decisions -// §3.7, §4.5: agent_helpers.py and any agent-authored files like -// domain-skills//*.md persist across upgrades). The core +// one stat. Mismatch (binary upgrade) snapshots the active tree to +// `/harness-archive//` (excluding `.venv/` and +// `__pycache__/`) so the agent can read the old skills + helpers when +// migrating its own customizations, then re-extracts every embed file +// except anything under `agent-workspace/` (the Green-zone subtree — +// decisions §3.7, §4.5: agent_helpers.py and any agent-authored files +// like domain-skills//*.md persist across upgrades). The core // `src/browser_harness/` package and shipped skill files are // baseline-overwrite. // @@ -65,6 +68,15 @@ const PRESERVED_PREFIX = "agent-workspace/" // uses `resolveHarnessDir`. export const harnessDir = (dataDir: string) => path.join(dataDir, "harness") +// Where past-version snapshots live. Each subdir is named for the buildHash +// of the harness it was extracted from. Read-only after creation. +export const harnessArchiveDir = (dataDir: string) => path.join(dataDir, "harness-archive") + +// Skipped during archive copies — regenerable (.venv) or junk (__pycache__). +// Match by basename at any depth so nested __pycache__/ inside src/ is also +// excluded. +const ARCHIVE_EXCLUDE = new Set([".venv", "__pycache__"]) + const exists = (p: string) => fs.access(p).then(() => true, () => false) const readSentinel = async (dir: string) => { @@ -84,6 +96,16 @@ const migrateLegacyIfPresent = async (target: string) => { } } +const archiveExistingHarness = async (dataDir: string, target: string, oldHash: string) => { + const archiveTarget = path.join(harnessArchiveDir(dataDir), oldHash) + if (await exists(archiveTarget)) return // already archived (re-entry); nothing to do + await fs.mkdir(harnessArchiveDir(dataDir), { recursive: true }) + await fs.cp(target, archiveTarget, { + recursive: true, + filter: (src) => !ARCHIVE_EXCLUDE.has(path.basename(src)), + }) +} + const extractEmbeddedHarness = async (dataDir: string): Promise => { const target = harnessDir(dataDir) await migrateLegacyIfPresent(target) @@ -94,7 +116,9 @@ const extractEmbeddedHarness = async (dataDir: string): Promise => { const fileMap = mod.default as Record const buildHash = mod.buildHash as string - if ((await readSentinel(target)) === buildHash) return target + const existing = await readSentinel(target) + if (existing === buildHash) return target + if (existing) await archiveExistingHarness(dataDir, target, existing) await fs.mkdir(target, { recursive: true }) await Promise.all( diff --git a/packages/opencode/src/agent/agent.ts b/packages/opencode/src/agent/agent.ts index 2248c1e25..88affa7cf 100644 --- a/packages/opencode/src/agent/agent.ts +++ b/packages/opencode/src/agent/agent.ts @@ -94,10 +94,14 @@ export const layer = Layer.effect( // In dev mode the harness lives inside the worktree, so this glob is a // no-op there. const harnessGlob = path.join(Harness.harnessDir(Global.Path.data), "*") + // Past-version snapshots taken at upgrade time. Read-only browsing for + // the agent when migrating its own helpers across upgrades. + const harnessArchiveGlob = path.join(Harness.harnessArchiveDir(Global.Path.data), "*") const whitelistedDirs = [ Truncate.GLOB, browserSessionsGlob, harnessGlob, + harnessArchiveGlob, path.join(Global.Path.tmp, "*"), ...skillDirs.map((dir) => path.join(dir, "*")), ] From ec490cadeaba7ba1675f4246d147f07929f38cc1 Mon Sep 17 00:00:00 2001 From: Alezander9 Date: Wed, 6 May 2026 15:35:19 -0700 Subject: [PATCH 5/9] browser_execute: resolve harness path in tool description The description references SKILL.md and helpers.py by source-tree path, which only resolves correctly in dev mode. In compiled binaries the harness extracts to /harness/ and those source paths don't exist on disk, so the agent's first read fails. Template the path: replace packages/bcode-browser/harness/ with {{HARNESS_DIR}} in the description, and substitute the resolved path (dev or compiled) in the opencode adapter at make-time. Eager harness extraction (already in the data-dir relocation commit) guarantees the files exist before the agent reads the description. --- packages/bcode-browser/src/browser-execute.ts | 2 +- packages/opencode/src/tool/browser-execute.ts | 5 ++++- packages/opencode/src/tool/browser-execute.txt | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/packages/bcode-browser/src/browser-execute.ts b/packages/bcode-browser/src/browser-execute.ts index 3e4279984..dd115c472 100644 --- a/packages/bcode-browser/src/browser-execute.ts +++ b/packages/bcode-browser/src/browser-execute.ts @@ -157,7 +157,7 @@ export const make = Effect.fn("BrowserExecute.make")(function* (dataDir: string) }), ) - return { parameters, execute } + return { parameters, execute, harnessDir } }) export * as BrowserExecute from "./browser-execute" diff --git a/packages/opencode/src/tool/browser-execute.ts b/packages/opencode/src/tool/browser-execute.ts index db0bb293f..c52f4f997 100644 --- a/packages/opencode/src/tool/browser-execute.ts +++ b/packages/opencode/src/tool/browser-execute.ts @@ -17,7 +17,10 @@ export const BrowserExecuteTool = Tool.define( Effect.gen(function* () { const impl = yield* BrowserExecute.make(Global.Path.data) return { - description: DESCRIPTION, + // Substitute the resolved harness path (dev: repo path; compiled: + // /harness/) so the SKILL.md / helpers.py references in the + // description point at files that actually exist on disk. + description: DESCRIPTION.replaceAll("{{HARNESS_DIR}}", impl.harnessDir), parameters: impl.parameters, execute: (args: Schema.Schema.Type, ctx: Tool.Context) => Effect.gen(function* () { diff --git a/packages/opencode/src/tool/browser-execute.txt b/packages/opencode/src/tool/browser-execute.txt index ceb324b73..2dea001f5 100644 --- a/packages/opencode/src/tool/browser-execute.txt +++ b/packages/opencode/src/tool/browser-execute.txt @@ -2,6 +2,6 @@ Execute Python against a connected web browser via the BrowserCode harness. Use this tool whenever the task requires driving a real browser — automation, scraping, end-to-end testing, or interactive exploration. The harness attaches to the user's running Chrome (local) or a Browser Use cloud browser (remote). A session-scoped daemon holds the CDP connection so consecutive calls share the same browser and tabs. -Before the first `browser_execute` call of a session, you MUST read `packages/bcode-browser/harness/SKILL.md`. It defines the helper surface, the screenshot-driven workflow, remote-browser setup, and gotchas — none of that is repeated here. +Before the first `browser_execute` call of a session, you MUST read `{{HARNESS_DIR}}/SKILL.md`. It defines the helper surface, the screenshot-driven workflow, remote-browser setup, and gotchas — none of that is repeated here. -Optional: read `packages/bcode-browser/harness/src/browser_harness/helpers.py` when you need an exact signature that SKILL.md does not show. +Optional: read `{{HARNESS_DIR}}/src/browser_harness/helpers.py` when you need an exact signature that SKILL.md does not show. From 95cbc63179f0696aed11836d2074674528cfb348 Mon Sep 17 00:00:00 2001 From: Alezander9 Date: Wed, 6 May 2026 15:47:57 -0700 Subject: [PATCH 6/9] browser_execute: add harness zone map to tool description Tells the agent up front: edit only under agent-workspace/ (those edits persist), everything else gets overwritten on upgrade, and previous trees are kept at // for reference. One short paragraph; the description still defers all workflow guidance to SKILL.md. Templates {{HARNESS_ARCHIVE_DIR}} alongside {{HARNESS_DIR}}; both substituted in the opencode adapter from BrowserExecute.make(). --- packages/bcode-browser/src/browser-execute.ts | 4 ++-- packages/opencode/src/tool/browser-execute.ts | 8 +++++--- packages/opencode/src/tool/browser-execute.txt | 2 ++ 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/packages/bcode-browser/src/browser-execute.ts b/packages/bcode-browser/src/browser-execute.ts index dd115c472..6aa59d058 100644 --- a/packages/bcode-browser/src/browser-execute.ts +++ b/packages/bcode-browser/src/browser-execute.ts @@ -28,7 +28,7 @@ import os from "os" import path from "path" import { Effect, Schema, Stream } from "effect" import { ChildProcess, ChildProcessSpawner } from "effect/unstable/process" -import { resolveHarnessDir } from "./harness" +import { harnessArchiveDir, resolveHarnessDir } from "./harness" import { uvLocate } from "./uv-locate" // Per-session persistent scratch under /sessions//. Holds @@ -157,7 +157,7 @@ export const make = Effect.fn("BrowserExecute.make")(function* (dataDir: string) }), ) - return { parameters, execute, harnessDir } + return { parameters, execute, harnessDir, harnessArchiveDir: harnessArchiveDir(dataDir) } }) export * as BrowserExecute from "./browser-execute" diff --git a/packages/opencode/src/tool/browser-execute.ts b/packages/opencode/src/tool/browser-execute.ts index c52f4f997..00509bc96 100644 --- a/packages/opencode/src/tool/browser-execute.ts +++ b/packages/opencode/src/tool/browser-execute.ts @@ -18,9 +18,11 @@ export const BrowserExecuteTool = Tool.define( const impl = yield* BrowserExecute.make(Global.Path.data) return { // Substitute the resolved harness path (dev: repo path; compiled: - // /harness/) so the SKILL.md / helpers.py references in the - // description point at files that actually exist on disk. - description: DESCRIPTION.replaceAll("{{HARNESS_DIR}}", impl.harnessDir), + // /harness/) and the archive path so the SKILL.md / helpers.py + // / archive references in the description point at concrete locations. + description: DESCRIPTION + .replaceAll("{{HARNESS_DIR}}", impl.harnessDir) + .replaceAll("{{HARNESS_ARCHIVE_DIR}}", impl.harnessArchiveDir), parameters: impl.parameters, execute: (args: Schema.Schema.Type, ctx: Tool.Context) => Effect.gen(function* () { diff --git a/packages/opencode/src/tool/browser-execute.txt b/packages/opencode/src/tool/browser-execute.txt index 2dea001f5..cb7615b15 100644 --- a/packages/opencode/src/tool/browser-execute.txt +++ b/packages/opencode/src/tool/browser-execute.txt @@ -4,4 +4,6 @@ Use this tool whenever the task requires driving a real browser — automation, Before the first `browser_execute` call of a session, you MUST read `{{HARNESS_DIR}}/SKILL.md`. It defines the helper surface, the screenshot-driven workflow, remote-browser setup, and gotchas — none of that is repeated here. +Edits persist only under `{{HARNESS_DIR}}/agent-workspace/`. Everything else is overwritten on bcode upgrade; previous trees are kept at `{{HARNESS_ARCHIVE_DIR}}//` for migration reference. + Optional: read `{{HARNESS_DIR}}/src/browser_harness/helpers.py` when you need an exact signature that SKILL.md does not show. From a2de105fb8447de0d2f1b49b048fb3d449de2a97 Mon Sep 17 00:00:00 2001 From: Alezander9 Date: Wed, 6 May 2026 15:53:41 -0700 Subject: [PATCH 7/9] harness: address PR review (per-dataDir cache, archive edit deny) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit resolveHarnessDir cached extraction with a single module-level promise keyed by nothing, so a second call with a different dataDir returned the first call's path. In production opencode passes a singleton Global.Path.data so this never bit, but tests and any future multi-instance scenario would silently get cross-dataDir contamination. Switch to a Map> — same dataDir still deduplicates, distinct dataDirs each get their own extraction. harness-archive/ was whitelisted in external_directory:allow, which let edit/write/apply_patch silently mutate snapshots that are intended to be read-only history. Keep the dir-level whitelist (so reads stay silent — the agent is supposed to browse the archive when migrating helpers across upgrades), but add an edit:deny rule keyed on '*/harness-archive/*'. The leading * absorbs the worktree-relative prefix that edit/write/apply_patch produce; the dir name is the anchor. All three edit-class tools route through permission='edit' so one rule covers them. Bash-level mutations (rm -rf) are still possible, but the agent has no prompt-driven path to them and the user can deny bash explicitly via config if desired. --- packages/bcode-browser/src/harness.ts | 13 ++++++++++--- packages/opencode/src/agent/agent.ts | 18 ++++++++++++++++-- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/packages/bcode-browser/src/harness.ts b/packages/bcode-browser/src/harness.ts index b4e43afc2..5852b32cd 100644 --- a/packages/bcode-browser/src/harness.ts +++ b/packages/bcode-browser/src/harness.ts @@ -133,12 +133,19 @@ const extractEmbeddedHarness = async (dataDir: string): Promise => { return target } -let extractPromise: Promise | null = null +// Per-dataDir cache. In production opencode passes the same Global.Path.data +// every call, so this is effectively a singleton; tests and any future +// multi-instance setup that resolves against multiple dataDirs each get their +// own deduplicated extraction without cross-directory contamination. +const extractCache = new Map>() export const resolveHarnessDir = (dataDir: string): Promise => { if (!isCompiled) return Promise.resolve(DEV_HARNESS_DIR) - if (!extractPromise) extractPromise = extractEmbeddedHarness(dataDir) - return extractPromise + const cached = extractCache.get(dataDir) + if (cached) return cached + const fresh = extractEmbeddedHarness(dataDir) + extractCache.set(dataDir, fresh) + return fresh } export * as Harness from "./harness" diff --git a/packages/opencode/src/agent/agent.ts b/packages/opencode/src/agent/agent.ts index 88affa7cf..1728864be 100644 --- a/packages/opencode/src/agent/agent.ts +++ b/packages/opencode/src/agent/agent.ts @@ -94,9 +94,20 @@ export const layer = Layer.effect( // In dev mode the harness lives inside the worktree, so this glob is a // no-op there. const harnessGlob = path.join(Harness.harnessDir(Global.Path.data), "*") - // Past-version snapshots taken at upgrade time. Read-only browsing for - // the agent when migrating its own helpers across upgrades. + // Past-version snapshots taken at upgrade time. Read-only history for + // the agent when migrating its own helpers across upgrades — silent + // reads via the external_directory whitelist, but edits/writes/ + // apply_patch are denied below to keep snapshots immutable. Bash-level + // mutations are still possible but the agent has no prompt-driven + // reason to delete the dir. const harnessArchiveGlob = path.join(Harness.harnessArchiveDir(Global.Path.data), "*") + // edit/write/apply_patch all `ctx.ask({ permission: "edit", ... })` + // with a path that's `path.relative(worktree, filepath)` — which for + // an out-of-worktree archive file looks like + // `../../.local/share/bcode/harness-archive//foo.py`. A leading + // `*` (greedy `.*`) absorbs that prefix; the dir name itself is the + // anchor. + const harnessArchiveEditDeny = "*/harness-archive/*" const whitelistedDirs = [ Truncate.GLOB, browserSessionsGlob, @@ -113,6 +124,9 @@ export const layer = Layer.effect( "*": "ask", ...Object.fromEntries(whitelistedDirs.map((dir) => [dir, "allow"])), }, + // Covers `edit`, `write`, `apply_patch` — all three tools route + // through the `edit` permission key (see EDIT_TOOLS in permission/). + edit: { [harnessArchiveEditDeny]: "deny" }, question: "deny", plan_enter: "deny", plan_exit: "deny", From 82575b4c973e59ccd1e2d19515207be779b96125 Mon Sep 17 00:00:00 2001 From: Alezander9 Date: Wed, 6 May 2026 16:01:29 -0700 Subject: [PATCH 8/9] harness: evict failed extraction from cache so transient errors retry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per-dataDir cache from previous commit retained rejected promises forever, so a transient failure (disk full mid-extract, ephemeral file lock, network blip in a sub-call) would poison resolveHarnessDir for the rest of the process — only a restart could recover. Attach a sibling .catch that evicts the cache entry on rejection. The returned promise still rejects to the original caller and any concurrent waiters; only the cache slot is freed so the next call retries fresh. --- packages/bcode-browser/src/harness.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/bcode-browser/src/harness.ts b/packages/bcode-browser/src/harness.ts index 5852b32cd..9e93281fa 100644 --- a/packages/bcode-browser/src/harness.ts +++ b/packages/bcode-browser/src/harness.ts @@ -145,6 +145,11 @@ export const resolveHarnessDir = (dataDir: string): Promise => { if (cached) return cached const fresh = extractEmbeddedHarness(dataDir) extractCache.set(dataDir, fresh) + // Evict on rejection so a transient failure (disk full, file lock, etc.) + // doesn't poison the cache for the rest of the process. The .catch is a + // sibling consumer, not a transformation — `fresh` itself still rejects + // for the original caller and any concurrent waiters. + fresh.catch(() => extractCache.delete(dataDir)) return fresh } From 28e0c44057f9f05e060d3d96ef655435a79f94d6 Mon Sep 17 00:00:00 2001 From: Alezander9 Date: Wed, 6 May 2026 16:02:27 -0700 Subject: [PATCH 9/9] harness: evict failed extraction from per-dataDir cache Caching a rejected promise meant a single transient extraction failure (FS hiccup, partial write, race during install) bricked every later resolveHarnessDir call until process restart. Attach a .catch that deletes the entry, gated by '=== fresh' so a retry that started between failure and handler doesn't get evicted. --- packages/bcode-browser/src/harness.ts | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/packages/bcode-browser/src/harness.ts b/packages/bcode-browser/src/harness.ts index 9e93281fa..3e3783676 100644 --- a/packages/bcode-browser/src/harness.ts +++ b/packages/bcode-browser/src/harness.ts @@ -145,11 +145,12 @@ export const resolveHarnessDir = (dataDir: string): Promise => { if (cached) return cached const fresh = extractEmbeddedHarness(dataDir) extractCache.set(dataDir, fresh) - // Evict on rejection so a transient failure (disk full, file lock, etc.) - // doesn't poison the cache for the rest of the process. The .catch is a - // sibling consumer, not a transformation — `fresh` itself still rejects - // for the original caller and any concurrent waiters. - fresh.catch(() => extractCache.delete(dataDir)) + // Evict on rejection so a transient failure (FS hiccup, partial write) doesn't + // permanently brick subsequent calls. The `===` guard avoids clobbering a + // retry that started after the failure but before this handler fired. + fresh.catch(() => { + if (extractCache.get(dataDir) === fresh) extractCache.delete(dataDir) + }) return fresh }