diff --git a/app/domain-skills/harness-test/self-edit.md b/app/domain-skills/harness-test/self-edit.md index 3b5711aa..8e587066 100644 --- a/app/domain-skills/harness-test/self-edit.md +++ b/app/domain-skills/harness-test/self-edit.md @@ -1,31 +1,27 @@ -# Harness self-edit test +# Legacy Harness Self-Edit Test -Notes from a run where the agent was asked to modify its own tools. +Historical notes from the pre-`browser-harness-js` helper/`TOOLS.json` harness. +This is not runtime guidance for app-spawned agents. -## Files that matter - -- `helpers.js` — tool implementations. Every helper is dispatched from the `module.exports.dispatch.` object at the bottom. Adding a key there is enough to register a new tool. -- `TOOLS.json` — tool schemas exposed to the LLM. Must be valid JSON; validate with `node -e 'JSON.parse(require("fs").readFileSync(path))'` before trusting it. +Browser Harness JS should cover normal browser automation. Treat harness edits +as an escape hatch only when the user explicitly asks for them, or when a +confirmed harness/runtime defect blocks the task. -## Adding a new tool - -1. Write/append the handler in `helpers.js`. For trivial pure-JS tools you can inline the body directly in the dispatch map, e.g. - ```js - reverse_string: (_ctx, a) => ({ input: str(a, 'text'), reversed: str(a, 'text').split('').reverse().join('') }), - ``` -2. Add a matching entry to `TOOLS.json` (insert *before* the `done` entry — that's the conventional last tool). -3. Both files hot-reload on the next tool call; no restart needed. +## Files that matter -## Arg validation helpers already in scope +- `helpers.js` — now a small compatibility bridge to `browser-harness-js`, not + a normal extension surface. +- `AGENTS.md` — the app-specific browser harness manual. +- `browser-harness-js/` — bundled runtime; app launches may replace it. -- `str(a, 'key')` — required string -- `num(a, 'key')` — required finite number -- `optNum(a, 'key', default)` — optional number +## Legacy notes -Use them so bad LLM args produce clean errors instead of silent `undefined`. +The old model used `helpers.js` implementations plus `TOOLS.json` schemas. +That model has been removed from the desktop runtime. Do not revive it for +ordinary browser tasks. -## Gotchas +## If an edit is unavoidable -- Edits take effect on the *very next* tool call, so if you call the new tool in the same batch as the file write it will 404. Sequence the calls. -- `shell` output is sometimes truncated in the middle of long lines — prefer `awk 'NR>=A && NR<=B'` over `sed -n` and avoid piping through `cat -n` when inspecting. -- `patch_file` only replaces the first occurrence; make `old_str` unique. +- Keep the patch minimal and task-scoped. +- Prefer fixing app source stock files over editing generated userData copies. +- Mention the edited file and reason in the final answer. diff --git a/app/eslint.config.js b/app/eslint.config.js index ed4602cd..d8afb13d 100644 --- a/app/eslint.config.js +++ b/app/eslint.config.js @@ -17,6 +17,7 @@ module.exports = [ 'dist/**', 'node_modules/**', 'docker/agent/dist/**', + 'src/main/hl/stock/browser-harness-js/sdk/**', // JS files were never linted under the old --ext .ts,.tsx flag '**/*.js', '**/*.mjs', diff --git a/app/src/main/hl/engines/browserHarnessEnv.ts b/app/src/main/hl/engines/browserHarnessEnv.ts new file mode 100644 index 00000000..46b25629 --- /dev/null +++ b/app/src/main/hl/engines/browserHarnessEnv.ts @@ -0,0 +1,16 @@ +import { createHash } from 'node:crypto'; +import path from 'node:path'; +import type { SpawnContext } from './types'; + +function replPort(sessionId: string): string { + const n = createHash('sha256').update(sessionId).digest().readUInt16BE(0); + return String(18_000 + (n % 20_000)); +} + +export function applyBrowserHarnessEnv(ctx: SpawnContext, env: NodeJS.ProcessEnv): NodeJS.ProcessEnv { + const sdkDir = path.join(ctx.harnessDir, 'browser-harness-js', 'sdk'); + env.PATH = env.PATH ? `${sdkDir}${path.delimiter}${env.PATH}` : sdkDir; + env.CDP_REPL_PORT = env.CDP_REPL_PORT ?? replPort(ctx.sessionId); + env.CDP_REPL_LOG = env.CDP_REPL_LOG ?? path.join(ctx.harnessDir, `browser-harness-js-${ctx.sessionId}.log`); + return env; +} diff --git a/app/src/main/hl/engines/browsercode/adapter.ts b/app/src/main/hl/engines/browsercode/adapter.ts index 6161a7a3..ae7aae9b 100644 --- a/app/src/main/hl/engines/browsercode/adapter.ts +++ b/app/src/main/hl/engines/browsercode/adapter.ts @@ -3,10 +3,11 @@ * * BrowserCode inherits opencode's provider/model registry. This adapter uses * BrowserCode only as the headless model runtime; native `browser_execute` is - * disabled so agents keep using this app's Electron-scoped helpers.js harness. + * disabled so agents use this app's Electron-scoped browser-harness-js runtime. */ import { register } from '../registry'; +import { applyBrowserHarnessEnv } from '../browserHarnessEnv'; import { enrichedEnv } from '../pathEnrich'; import { runCliCapture } from '../cliSpawn'; import type { @@ -154,7 +155,7 @@ const browserCodeAdapter: EngineAdapter = { }, } : {}), }); - return env; + return applyBrowserHarnessEnv(ctx, env); }, wrapPrompt(ctx: SpawnContext): string { @@ -169,8 +170,10 @@ const browserCodeAdapter: EngineAdapter = { 'You are running inside Browser Use Desktop through BrowserCode.', 'You are driving a specific Chromium browser view on this machine.', `Your target is CDP target_id=${ctx.targetId} on port ${ctx.cdpPort} (env BU_TARGET_ID / BU_CDP_PORT).`, - 'Do not use BrowserCode browser_execute. Read `./AGENTS.md` and use `./helpers.js` from this working directory for browser actions.', - 'Always read `./helpers.js` before writing scripts. Edit it only if a helper is missing.', + 'Do not use BrowserCode browser_execute. Read `./AGENTS.md` and use Browser Harness JS from this working directory for browser actions.', + "Use the `browser-harness-js` CLI for browser actions. Start with `browser-harness-js 'await connectToAssignedTarget()'`.", + 'Do not use old helpers.js convenience APIs for browser control.', + 'Do not edit harness files unless the user asks or a confirmed Browser Harness JS defect blocks the task.', 'For terminal commands, use BrowserCode/OpenCode\'s Bash tool and write commands for the current OS/shell it reports.', 'When producing files, save them to `./outputs/' + ctx.sessionId + '/` and mention the filename in the final answer.', ...attachmentLines, diff --git a/app/src/main/hl/engines/claude-code/adapter.ts b/app/src/main/hl/engines/claude-code/adapter.ts index 8bfaebfb..d0d4658c 100644 --- a/app/src/main/hl/engines/claude-code/adapter.ts +++ b/app/src/main/hl/engines/claude-code/adapter.ts @@ -12,6 +12,7 @@ import { mainLogger } from '../../../logger'; import { register } from '../registry'; +import { applyBrowserHarnessEnv } from '../browserHarnessEnv'; import { enrichedEnv } from '../pathEnrich'; import { runCliCapture, spawnCli } from '../cliSpawn'; import type { @@ -118,8 +119,10 @@ const claudeCodeAdapter: EngineAdapter = { const lines: string[] = [ 'You are driving a specific Chromium browser view on this machine.', `Your target is CDP target_id=${ctx.targetId} on port ${ctx.cdpPort} (env BU_TARGET_ID / BU_CDP_PORT).`, - 'Read `./AGENTS.md` for how to drive the browser in this harness.', - 'Always read `./helpers.js` before writing scripts — that is where the functions live. Edit it if a helper is missing.', + 'Read `./AGENTS.md` for how to drive the browser with Browser Harness JS.', + "Use the `browser-harness-js` CLI for browser actions. Start with `browser-harness-js 'await connectToAssignedTarget()'`.", + 'Do not use old helpers.js convenience APIs for browser control.', + 'Do not edit harness files unless the user asks or a confirmed Browser Harness JS defect blocks the task.', ]; if (ctx.attachmentRefs.length > 0) { lines.push('', 'The user attached these files for this task. Read each with your Read tool before acting:'); @@ -160,7 +163,7 @@ const claudeCodeAdapter: EngineAdapter = { if (ctx.savedApiKey) env.ANTHROPIC_API_KEY = ctx.savedApiKey; env.BU_TARGET_ID = ctx.targetId; env.BU_CDP_PORT = String(ctx.cdpPort); - return env; + return applyBrowserHarnessEnv(ctx, env); }, parseLine(line: string, ctx: ParseContext): ParseResult { diff --git a/app/src/main/hl/engines/codex/adapter.ts b/app/src/main/hl/engines/codex/adapter.ts index a1e2bf6c..edc40dd0 100644 --- a/app/src/main/hl/engines/codex/adapter.ts +++ b/app/src/main/hl/engines/codex/adapter.ts @@ -20,6 +20,7 @@ import os from 'node:os'; import path from 'node:path'; import { mainLogger } from '../../../logger'; import { register } from '../registry'; +import { applyBrowserHarnessEnv } from '../browserHarnessEnv'; import { enrichedEnv } from '../pathEnrich'; import { runCliCapture } from '../cliSpawn'; import { runCodexDeviceLogin } from '../../../identity/codexLogin'; @@ -109,8 +110,10 @@ const codexAdapter: EngineAdapter = { const lines: string[] = [ 'You are driving a specific Chromium browser view on this machine.', `Your target is CDP target_id=${ctx.targetId} on port ${ctx.cdpPort} (env BU_TARGET_ID / BU_CDP_PORT).`, - 'Read `./AGENTS.md` for how to drive the browser in this harness.', - 'Always read `./helpers.js` before writing scripts — that is where the functions live. Edit it if a helper is missing.', + 'Read `./AGENTS.md` for how to drive the browser with Browser Harness JS.', + "Use the `browser-harness-js` CLI for browser actions. Start with `browser-harness-js 'await connectToAssignedTarget()'`.", + 'Do not use old helpers.js convenience APIs for browser control.', + 'Do not edit harness files unless the user asks or a confirmed Browser Harness JS defect blocks the task.', ]; if (ctx.attachmentRefs.length > 0) { lines.push('', 'The user attached these files for this task. Read each one before acting:'); @@ -160,7 +163,7 @@ const codexAdapter: EngineAdapter = { } env.BU_TARGET_ID = ctx.targetId; env.BU_CDP_PORT = String(ctx.cdpPort); - return env; + return applyBrowserHarnessEnv(ctx, env); }, parseLine(line: string, ctx: ParseContext): ParseResult { diff --git a/app/src/main/hl/engines/runEngine.ts b/app/src/main/hl/engines/runEngine.ts index 4acf6678..69b36ce3 100644 --- a/app/src/main/hl/engines/runEngine.ts +++ b/app/src/main/hl/engines/runEngine.ts @@ -13,7 +13,7 @@ import fs from 'node:fs'; import path from 'node:path'; import { engineLogger } from '../../logger'; import { resolveAuth, loadOpenAIKey, loadClaudeSubscriptionType, loadBrowserCodeConfig } from '../../identity/authStore'; -import { helpersPath, toolsPath, skillPath } from '../harness'; +import { helpersPath, skillPath } from '../harness'; import { get as getAdapter } from './registry'; import { spawnCli } from './cliSpawn'; import type { @@ -260,12 +260,10 @@ export async function runEngine(opts: RunEngineOptions): Promise { const stdinMode: 'pipe' | 'ignore' = stdinPayload != null ? 'pipe' : 'ignore'; const harnessHelpersAbs = path.resolve(helpersPath()); - const harnessToolsAbs = path.resolve(toolsPath()); const harnessSkillAbs = path.resolve(skillPath()); const watchedHarnessFiles: HarnessFileWatch[] = [ { path: harnessHelpersAbs, basename: path.basename(harnessHelpersAbs), target: 'helpers', hash: hashFile(harnessHelpersAbs) ?? null }, - { path: harnessToolsAbs, basename: path.basename(harnessToolsAbs), target: 'tools', hash: hashFile(harnessToolsAbs) ?? null }, { path: harnessSkillAbs, basename: path.basename(harnessSkillAbs), target: 'tools', hash: hashFile(harnessSkillAbs) ?? null }, ]; @@ -418,7 +416,7 @@ export async function runEngine(opts: RunEngineOptions): Promise { const extra: HlEvent[] = []; if (isWrite) { const action = /edit|patch/i.test(e.name) ? 'patch' : 'write'; - if (resolved !== harnessHelpersAbs && resolved !== harnessToolsAbs && resolved !== harnessSkillAbs) { + if (resolved !== harnessHelpersAbs && resolved !== harnessSkillAbs) { const m = resolved.match(skillPathRe); if (m) extra.push({ type: 'skill_written', path: resolved, domain: m[1], topic: m[2], bytes: 0, action }); } @@ -433,7 +431,7 @@ export async function runEngine(opts: RunEngineOptions): Promise { iter: 0, pendingTools: new Map(), harnessHelpersPath: harnessHelpersAbs, - harnessToolsPath: harnessToolsAbs, + harnessToolsPath: '', harnessSkillPath: harnessSkillAbs, }; diff --git a/app/src/main/hl/engines/types.ts b/app/src/main/hl/engines/types.ts index 1159eb48..22bff6f4 100644 --- a/app/src/main/hl/engines/types.ts +++ b/app/src/main/hl/engines/types.ts @@ -12,7 +12,7 @@ import type { HlEvent } from '../../../shared/session-schemas'; export interface SpawnContext { /** User prompt to feed to the CLI. Adapters may wrap with seed/system text. */ prompt: string; - /** Absolute path to /harness/ (AGENTS.md + helpers.js live here). */ + /** Absolute path to /harness/ (AGENTS.md + browser-harness-js live here). */ harnessDir: string; /** App session id (used for naming uploads/outputs dirs + env injection). */ sessionId: string; diff --git a/app/src/main/hl/harness.ts b/app/src/main/hl/harness.ts index 182fc9f9..5e7c3570 100644 --- a/app/src/main/hl/harness.ts +++ b/app/src/main/hl/harness.ts @@ -1,15 +1,15 @@ /** - * Harness directory bootstrap: seeds `/harness/` with the stock - * `helpers.js` + `SKILL.md`. The agent (Claude Code subprocess) reads and - * edits these files freely. No tool schema, no dispatcher — helpers.js is - * a plain Node library that the agent invokes from its own shell tool. + * Harness directory bootstrap: seeds `/harness/` with the Browser + * Harness JS runtime and app-specific AGENTS.md. Agents drive the assigned + * browser target through the vendored `browser-harness-js` CLI. No tool schema, + * no dispatcher. * * Stock content is bundled via Vite's `?raw` import modifier. * - * Domain skills (`./stock/domain-skills/`) are a separate, read-only - * reference folder pulled from browser-use/harnessless. Unlike helpers.js, - * they are fully re-materialized on every launch — the agent consults them - * but must not edit them (upgrades will clobber any changes). + * Domain skills (`./stock/domain-skills/`) and interaction skills + * (`./stock/interaction-skills/`) are separate, read-only reference folders. + * They are fully re-materialized on every launch — the agent consults them but + * must not edit them (upgrades will clobber any changes). */ import fs from 'node:fs'; @@ -18,7 +18,6 @@ import { app } from 'electron'; import { mainLogger } from '../logger'; import STOCK_HELPERS_JS from './stock/helpers.js?raw'; -import STOCK_TOOLS_JSON from './stock/TOOLS.json?raw'; import STOCK_SKILL_MD from './stock/AGENTS.md?raw'; // Bundled domain-skills tree. Vite eagerly inlines every file under @@ -30,6 +29,18 @@ const STOCK_DOMAIN_SKILLS = import.meta.glob('./stock/domain-skills/**/*', { import: 'default', eager: true, }) as Record; +const INTERACTION_SKILLS_PREFIX = './stock/interaction-skills/'; +const STOCK_INTERACTION_SKILLS = import.meta.glob('./stock/interaction-skills/**/*', { + query: '?raw', + import: 'default', + eager: true, +}) as Record; +const BROWSER_HARNESS_JS_PREFIX = './stock/browser-harness-js/'; +const STOCK_BROWSER_HARNESS_JS = import.meta.glob('./stock/browser-harness-js/**/*', { + query: '?raw', + import: 'default', + eager: true, +}) as Record; export function harnessDir(): string { return path.join(app.getPath('userData'), 'harness'); @@ -39,16 +50,19 @@ export function helpersPath(): string { return path.join(harnessDir(), 'helpers. export function toolsPath(): string { return path.join(harnessDir(), 'TOOLS.json'); } export function skillPath(): string { return path.join(harnessDir(), 'AGENTS.md'); } export function domainSkillsDir(): string { return path.join(harnessDir(), 'domain-skills'); } +export function interactionSkillsDir(): string { return path.join(harnessDir(), 'interaction-skills'); } +export function browserHarnessJsDir(): string { return path.join(harnessDir(), 'browser-harness-js'); } /** * Ensure `/harness/` exists and contains the stock files. - * - Writes helpers.js if missing OR if the on-disk version is the legacy - * dispatcher-style (didn't export `createContext`). - * - Writes SKILL.md if missing. - * - Writes TOOLS.json if missing (retained for the legacy Anthropic-SDK - * agent loop; safe to ignore under the claude-subprocess path). - * - Fully replaces `/harness/domain-skills/` from the bundle. - * User edits to the up-to-date helpers.js / SKILL.md are preserved. + * - Writes helpers.js if missing OR if the on-disk version predates the + * browser-harness-js bridge. + * - Writes AGENTS.md if missing or stale. + * - Removes stale TOOLS.json from the legacy dispatcher path. + * - Fully replaces Browser Harness JS runtime, domain skills, and interaction + * skills from the bundle. + * Manual edits to the up-to-date helpers.js / AGENTS.md are preserved as an + * escape hatch, but app-spawned agents should normally use the bundled CLI. */ export function bootstrapHarness(): void { const dir = harnessDir(); @@ -61,7 +75,7 @@ export function bootstrapHarness(): void { const hp = helpersPath(); const needsHelpers = !fs.existsSync(hp) || (() => { - try { return !fs.readFileSync(hp, 'utf-8').includes('createContext'); } + try { return !fs.readFileSync(hp, 'utf-8').includes('browser-harness-js bridge'); } catch { return true; } })(); if (needsHelpers) { @@ -74,7 +88,7 @@ export function bootstrapHarness(): void { // existing users. AGENTS.md is the harness manual, not agent-editable // state — safe to overwrite so new sections (domain-skills, etc.) land // without the user deleting their userData. - const sentinel = 'Local app diagnostics'; + const sentinel = 'Browser Harness JS'; const needsSkill = !fs.existsSync(sp) || (() => { try { return !fs.readFileSync(sp, 'utf-8').includes(sentinel); } catch { return true; } @@ -84,12 +98,10 @@ export function bootstrapHarness(): void { mainLogger.info('harness.bootstrap.wroteSkill', { path: sp, bytes: (STOCK_SKILL_MD as string).length }); } - const tp = toolsPath(); - if (!fs.existsSync(tp)) { - fs.writeFileSync(tp, STOCK_TOOLS_JSON as string, 'utf-8'); - mainLogger.info('harness.bootstrap.wroteTools', { path: tp, bytes: (STOCK_TOOLS_JSON as string).length }); - } + removeLegacyToolsJson(); + materializeBrowserHarnessJs(); + materializeInteractionSkills(); materializeDomainSkills(); } @@ -100,28 +112,75 @@ export function bootstrapHarness(): void { * app version and lets us delete retired skills. */ function materializeDomainSkills(): void { - const target = domainSkillsDir(); - const entries = Object.entries(STOCK_DOMAIN_SKILLS); + materializeRawTree({ + target: domainSkillsDir(), + prefix: DOMAIN_SKILLS_PREFIX, + entries: Object.entries(STOCK_DOMAIN_SKILLS), + logName: 'domainSkills', + emptyHint: 'run `yarn sync-domain-skills` to populate stock/', + }); +} + +function materializeInteractionSkills(): void { + materializeRawTree({ + target: interactionSkillsDir(), + prefix: INTERACTION_SKILLS_PREFIX, + entries: Object.entries(STOCK_INTERACTION_SKILLS), + logName: 'interactionSkills', + }); +} + +function materializeBrowserHarnessJs(): void { + materializeRawTree({ + target: browserHarnessJsDir(), + prefix: BROWSER_HARNESS_JS_PREFIX, + entries: Object.entries(STOCK_BROWSER_HARNESS_JS), + logName: 'browserHarnessJs', + executableBasenames: new Set(['browser-harness-js']), + }); +} + +function removeLegacyToolsJson(): void { + const tp = toolsPath(); + if (!fs.existsSync(tp)) return; + try { + fs.rmSync(tp, { force: true }); + mainLogger.info('harness.bootstrap.removedLegacyTools', { path: tp }); + } catch (err) { + mainLogger.warn('harness.bootstrap.removeLegacyTools.failed', { path: tp, error: (err as Error).message }); + } +} + +function materializeRawTree(opts: { + target: string; + prefix: string; + entries: Array<[string, string]>; + logName: string; + emptyHint?: string; + executableBasenames?: Set; +}): void { + const { target, prefix, entries, logName, emptyHint, executableBasenames } = opts; if (entries.length === 0) { - mainLogger.warn('harness.bootstrap.domainSkills.empty', { hint: 'run `yarn sync-domain-skills` to populate stock/' }); + mainLogger.warn(`harness.bootstrap.${logName}.empty`, emptyHint ? { hint: emptyHint } : {}); return; } try { fs.rmSync(target, { recursive: true, force: true }); } catch (err) { - mainLogger.error('harness.bootstrap.domainSkills.clear.failed', { target, error: (err as Error).message }); + mainLogger.error(`harness.bootstrap.${logName}.clear.failed`, { target, error: (err as Error).message }); throw err; } let bytes = 0; for (const [modulePath, content] of entries) { - const rel = modulePath.slice(DOMAIN_SKILLS_PREFIX.length); + const rel = modulePath.slice(prefix.length); const outPath = path.join(target, rel); fs.mkdirSync(path.dirname(outPath), { recursive: true }); fs.writeFileSync(outPath, content, 'utf-8'); + if (executableBasenames?.has(path.basename(outPath))) fs.chmodSync(outPath, 0o755); bytes += content.length; } - mainLogger.info('harness.bootstrap.domainSkills.wrote', { target, files: entries.length, bytes }); + mainLogger.info(`harness.bootstrap.${logName}.wrote`, { target, files: entries.length, bytes }); } diff --git a/app/src/main/hl/stock/AGENTS.md b/app/src/main/hl/stock/AGENTS.md index 9ddd117d..6215c066 100644 --- a/app/src/main/hl/stock/AGENTS.md +++ b/app/src/main/hl/stock/AGENTS.md @@ -1,151 +1,175 @@ -# Browser harness — SKILL +# Browser Harness JS -You are driving a specific Chromium browser view on the user's machine. -The browser is already open; your job is to drive it with short Node -scripts that use the helpers in `./helpers.js`. +You are driving one specific Chromium browser view on the user's machine. +Use `browser-harness-js` for browser actions. It runs JavaScript snippets +against a persistent CDP session and exposes Chrome DevTools Protocol domains +directly as `session.Page`, `session.DOM`, `session.Runtime`, `session.Input`, +`session.Network`, and so on. -## Your target +Do not use old `helpers.js` convenience APIs for browser control. `helpers.js` +is only a small compatibility bridge that points at the vendored +`browser-harness-js` CLI. -Two environment variables tell you which browser view to drive: +## Your Target -- `BU_TARGET_ID` — the CDP target id of your assigned view. **Use only this target.** Do not create new targets, switch to other targets, or navigate away from this one into internal Chrome pages (chrome://, devtools://). -- `BU_CDP_PORT` — the port Chromium's CDP is listening on (usually 9222). +Two environment variables identify the assigned browser view: -## How to act +- `BU_TARGET_ID` - the CDP target id of the view you must drive. +- `BU_CDP_PORT` - the local CDP HTTP port. -Write a Node script, run it with `node`, read the output. That's the loop. +Use only this assigned target. Do not create unrelated browser targets, switch +to other user tabs, or navigate internal Chrome pages unless the user explicitly +asks for app/browser diagnostics. + +## First Call + +Run this once before page-level CDP calls: + +```bash +browser-harness-js 'await connectToAssignedTarget()' +``` + +That connects to `BU_CDP_PORT`, attaches `BU_TARGET_ID` when the browser-level +endpoint is available, enables common Page/DOM/Runtime/Network domains, and +keeps the session alive for later `browser-harness-js` calls. + +## Basic Pattern + +Single-expression snippets print the expression result automatically: + +```bash +browser-harness-js '(await session.Runtime.evaluate({expression:"document.title",returnByValue:true})).result.value' +``` + +Multi-statement snippets must explicitly `return` a value: + +```bash +browser-harness-js <<'EOF' +await connectToAssignedTarget() +await session.Page.navigate({ url: 'https://example.com' }) +await session.waitFor('Page.loadEventFired', undefined, 15000).catch(() => null) +const title = (await session.Runtime.evaluate({ + expression: 'document.title', + returnByValue: true, +})).result.value +return { title } +EOF +``` + +Output is raw result content: strings print as plain text, objects print as +compact JSON, and empty values print nothing. Errors go to stderr and exit 1. + +## CDP Is The API + +Call Chrome's protocol methods directly: + +```js +await session.Page.navigate({ url: 'https://example.com' }) +await session.Input.dispatchMouseEvent({ type: 'mousePressed', x: 120, y: 80, button: 'left', clickCount: 1 }) +await session.Input.dispatchMouseEvent({ type: 'mouseReleased', x: 120, y: 80, button: 'left', clickCount: 1 }) +await session.Input.insertText({ text: 'hello' }) +await session.DOM.getDocument({ depth: -1 }) +await session.Page.captureScreenshot({ format: 'png' }) +``` + +The full generated method surface is in +`./browser-harness-js/sdk/generated.ts`. Search it when you need exact params: + +```bash +rg -n "captureScreenshot|dispatchMouseEvent|setFileInputFiles" ./browser-harness-js/sdk/generated.ts +``` + +## Useful Globals + +The `browser-harness-js` REPL preloads: + +- `session` - persistent CDP `Session`. +- `connectToAssignedTarget()` - Browser Use Desktop helper for `BU_TARGET_ID` + and `BU_CDP_PORT`. +- `listPageTargets()` - lists real page targets when connected to a browser + endpoint. +- `detectBrowsers()` and `resolveWsUrl(opts)` - upstream browser discovery. +- `CDP` - generated namespace/type reference. + +Persist ad-hoc data across calls on `globalThis`: ```bash -node -e ' -const H = require("./helpers.js"); -(async () => { - const ctx = await H.createContext(); - await H.goto(ctx, "https://example.com"); - await H.waitForLoad(ctx); - console.log(JSON.stringify(await H.pageInfo(ctx))); - await ctx.close(); -})().catch(e => { console.error(e.message); process.exit(1); }); -' +browser-harness-js 'globalThis.lastTitle = (await session.Runtime.evaluate({expression:"document.title",returnByValue:true})).result.value' +browser-harness-js 'globalThis.lastTitle' ``` -Always: -- Open exactly one context per script; close it before the script exits. -- `console.log` the data you need to reason from in the next step. -- Use `screenshot(ctx, '/tmp/shot.png')` when visual inspection helps, then `Read` the file. - -## The helpers - -`./helpers.js` is a plain Node library. Read it whenever you need to -remember what exists. The main exports: - -- `createContext({ targetId?, port? })` — open a CDP session to your target. Defaults read `BU_TARGET_ID` / `BU_CDP_PORT`. -- `goto(ctx, url)` — navigate. Call `waitForLoad` after. -- `waitForLoad(ctx, timeoutSec?)` — poll until `document.readyState === 'complete'`. -- `pageInfo(ctx)` — `{url, title, w, h, sx, sy, pw, ph}`. Also returns `{dialog}` if an alert/confirm is blocking the JS thread. -- `click(ctx, x, y, button?, clicks?)` — **coordinate click**. Default interaction method. Passes through iframes/shadow at the compositor level. -- `typeText(ctx, text)` — insert text at the current caret (no key events). -- `pressKey(ctx, key, modifiers?)` — CDP key event. `modifiers` bitfield: 1=Alt, 2=Ctrl, 4=Cmd, 8=Shift. -- `dispatchKey(ctx, selector, key?, event?)` — dispatch a DOM KeyboardEvent when `pressKey` isn't picked up by a listener. -- `scroll(ctx, x, y, dy?, dx?)` — wheel scroll at a point. `dy=-300` scrolls down. -- `js(ctx, expression)` — run JS in the page; returns the value (must be JSON-serializable). -- `reactSetValue(ctx, selector, value)` — native setter + `input`/`change` dispatch. Use when a React component ignores `type_text`. -- `uploadFile(ctx, selector, paths)` — `` via CDP. -- `captureDialogs(ctx)` / `dialogs(ctx)` — stub `alert/confirm/prompt` so they don't block the page thread. -- `httpGet(ctx, url)` — HTTP fetch, no browser. -- `screenshot(ctx, outPath?, full?)` — PNG. Write to a file with `outPath` so you can `Read` it back. - -## Coordinate clicks before selector gymnastics - -Prefer `click(x, y)` over JS-dispatched clicks. Most framework widgets -(MUI dropdowns, custom selects) respond to coordinate clicks but not to -`el.click()`. Use `js(ctx, "document.querySelector(...).getBoundingClientRect()")` -to get precise coords; do not eyeball from screenshots. - -## Verify after every action - -Re-screenshot or re-`pageInfo` after clicking, typing, navigating. Don't -assume an action worked. - -## Self-healing - -If a helper is missing, broken, or you need a new one: - -1. Read `./helpers.js`. -2. Add the function (export it via `module.exports.yourFn = ...`). -3. Use it in the next script. It's live immediately because each `node -e` - invocation re-reads the file. - -Keep helpers short and composable. Every helper takes `ctx` as the first -arg. - -## Known gotchas - -- Chrome 144+ doesn't serve `/json/version` at `chrome://inspect` — use the port directly (`http://localhost:${BU_CDP_PORT}/json/list`). -- `alert()` / `confirm()` block the JS thread — call `captureDialogs(ctx)` **before** triggering them. -- capture_dialogs stubs reset on navigation — re-call after `goto`. -- React-controlled inputs ignore `el.value=...` — use `reactSetValue`. -- CDP `char` event doesn't fire DOM keypress for specials (Enter/Tab) — use `dispatchKey`. -- Same-origin nested iframes don't show up as CDP targets — walk `contentDocument` instead. -- Shadow DOM `querySelector` does **not** pierce — walk `element.shadowRoot` recursively. - -## Domain skills (read-only reference) - -`./domain-skills/` contains per-site playbooks pulled from -[browser-use/harnessless](https://github.com/browser-use/harnessless). -Before acting on a task for a specific site, check for a matching folder -(e.g. `./domain-skills/amazon/`, `./domain-skills/github/`) and read any -`.md` files you find there — they document selectors, flows, and gotchas -that are cheaper to reuse than to rediscover. - -These files are **read-only**. They are fully overwritten from the bundle -on every app launch, so any edits you make will be lost. If you learn -something new about a site, add it to `helpers.js` or a comment there -instead. - -## Uploads and outputs - -- **Uploads**: if the user attached files, they appear in the seed prompt with - paths under `./uploads//`. Read each with your `Read` tool - before acting on the task. Images and PDFs are natively supported. -- **Outputs**: when the user asks you to produce a file (a report, CSV, - screenshot, transcript, edited image, etc.), save it to - `./outputs//` with a clear filename. The app watches that - directory and surfaces each new file in the UI with a button to open it. +## Interaction Skills + +`./interaction-skills/` contains focused CDP recipes from +`browser-use/browser-harness-js`: screenshots, scrolling, uploads, dialogs, +iframes, shadow DOM, downloads, network requests, dropdowns, tabs, cookies, +viewport, drag-and-drop, and print-to-PDF. + +Before implementing a non-obvious browser mechanic, read the matching file. +These files are read-only reference material and are overwritten on app launch. + +## Domain Skills + +`./domain-skills/` contains site-specific playbooks pulled from +`browser-use/harnessless`. Before acting on a task for a specific website, +check for a matching folder and read any relevant `.md` files you find there. +They document selectors, flows, rate limits, and gotchas that are cheaper to +reuse than to rediscover. + +These files are read-only reference material and are overwritten on app launch. + +## Harness Files + +Browser Harness JS should cover normal browser work. Do not edit `helpers.js`, +`AGENTS.md`, `browser-harness-js/`, `interaction-skills/`, or `domain-skills/` +as a first resort. + +Only make a small harness edit when the user explicitly asks for it, or when a +confirmed bug or missing capability in the bundled runtime blocks the task. If +you do edit a harness file, say exactly what changed in your final answer. + +## Verification Loop + +Verify after every meaningful browser action: + +- Use `session.Page.captureScreenshot({ format: 'png' })` for visual state. +- Use `session.Runtime.evaluate({ expression, returnByValue: true })` for page + state. +- Use `session.waitFor(method, predicate, timeoutMs)` for protocol events. + +For screenshots: + +```bash +browser-harness-js <<'EOF' +await connectToAssignedTarget() +const { data } = await session.Page.captureScreenshot({ format: 'png' }) +await Bun.write('/tmp/browser-use-shot.png', Buffer.from(data, 'base64')) +return '/tmp/browser-use-shot.png' +EOF +``` + +## Uploads And Outputs + +- Uploads from the user appear under `./uploads//`. +- Files you create for the user must go under `./outputs//`. Mention the filename in your final answer. -## Local app diagnostics +## Local App Diagnostics -You run from `/harness/`. If the user explicitly asks you to debug -this desktop app, your local app state is one directory up. +If the user explicitly asks you to debug Browser Use Desktop, local app state is +one directory up from the harness: -- Runtime root: `..` (the Electron `userData` directory). -- Session database: `../sessions.db`. Useful tables are `sessions`, - `session_events`, and `session_attachments`. Prefer read-only `sqlite3` - queries while the app is running. +- Runtime root: `..` +- Session database: `../sessions.db` - Logs: `../logs/main.log`, `../logs/browser.log`, `../logs/renderer.log`, - and `../logs/engine.log`. These are JSONL files. -- Task transcript: persisted in `session_events.payload` for the session id. -- Account state: `../account.json`. This is onboarding state, not API keys. -- Local task control: `../local-task-server.json`. It contains a loopback URL - and bearer token for submitting new app tasks. Use it only when the user - explicitly asks you to start another Browser Use task, and do not print the - token. -- Agent files: `./uploads//` for attachments and - `./outputs//` for files you create. - -Credentials are sensitive. Do not print raw keys, tokens, or keychain blobs. -Use status checks and masked values unless the user explicitly asks to change -auth. - -- App-managed Anthropic/OpenAI API keys are in the OS credential store via - keytar: service `com.browser-use.desktop.credentials`, account `default`. -- Claude Code subscription auth belongs to the Claude CLI. Prefer - `claude auth status --json`; raw OAuth entries, if present, are under the - OS credential service `Claude Code-credentials`. -- Codex subscription auth belongs to Codex. Check for - `${CODEX_HOME:-~/.codex}/auth.json` presence only; do not dump it. + and `../logs/engine.log` +- Account state: `../account.json` +- Local task control: `../local-task-server.json` + +Do not print raw credentials, tokens, keychain values, or the local task bearer +token. Use status checks and masked values. ## Done -Say what you accomplished when the user's task is complete. Short, -user-facing. No narration of every step. +Say what you accomplished when the task is complete. Keep it short and +user-facing. diff --git a/app/src/main/hl/stock/TOOLS.json b/app/src/main/hl/stock/TOOLS.json deleted file mode 100644 index 879f81fa..00000000 --- a/app/src/main/hl/stock/TOOLS.json +++ /dev/null @@ -1,208 +0,0 @@ -[ - { - "name": "goto", - "description": "Navigate the attached tab to the given URL (does not wait for load).", - "input_schema": { "type": "object", "properties": { "url": { "type": "string" } }, "required": ["url"] } - }, - { - "name": "page_info", - "description": "Get {url, title, w, h, sx, sy, pw, ph}: viewport + scroll + page-size.", - "input_schema": { "type": "object", "properties": {} } - }, - { - "name": "click", - "description": "Coordinate click at (x,y) in CSS px relative to viewport. Default interaction method — passes through iframes/shadow DOM.", - "input_schema": { - "type": "object", - "properties": { "x": { "type": "number" }, "y": { "type": "number" }, "button": { "type": "string", "enum": ["left", "right", "middle"] }, "clicks": { "type": "number" } }, - "required": ["x", "y"] - } - }, - { - "name": "type_text", - "description": "Insert text at the current caret (no key events). Tab focus first via js() if needed. For React-controlled inputs use react_set_value.", - "input_schema": { "type": "object", "properties": { "text": { "type": "string" } }, "required": ["text"] } - }, - { - "name": "press_key", - "description": "CDP key event: 'Enter', 'Tab', 'ArrowDown', 'Escape', single chars, etc. Modifiers bitfield: 1=Alt 2=Ctrl 4=Cmd 8=Shift.", - "input_schema": { - "type": "object", - "properties": { "key": { "type": "string" }, "modifiers": { "type": "number" } }, - "required": ["key"] - } - }, - { - "name": "dispatch_key", - "description": "Dispatch a DOM KeyboardEvent on a selector. Use when CDP press_key does not trigger the listener (e.g. keypress for Enter on ).", - "input_schema": { - "type": "object", - "properties": { "selector": { "type": "string" }, "key": { "type": "string" }, "event": { "type": "string" } }, - "required": ["selector"] - } - }, - { - "name": "scroll", - "description": "Mouse-wheel scroll at (x,y). dy<0 scrolls down. Used for virtual/scroll-wheel pickers (e.g. TikTok time picker) where dy=32 steps +1 unit.", - "input_schema": { - "type": "object", - "properties": { "x": { "type": "number" }, "y": { "type": "number" }, "dy": { "type": "number" }, "dx": { "type": "number" } }, - "required": ["x", "y"] - } - }, - { - "name": "js", - "description": "Run a JS expression in the attached tab. Optional target_id to run inside a cross-origin iframe (from iframe_target).", - "input_schema": { - "type": "object", - "properties": { "expr": { "type": "string" }, "target_id": { "type": "string" } }, - "required": ["expr"] - } - }, - { - "name": "react_set_value", - "description": "Set a React-controlled input value via the native setter + dispatch 'input'+'change'. Use when type_text is overwritten by React.", - "input_schema": { - "type": "object", - "properties": { "selector": { "type": "string" }, "value": { "type": "string" } }, - "required": ["selector", "value"] - } - }, - { - "name": "screenshot", - "description": "Capture a PNG screenshot. full=true passes captureBeyondViewport. Returns byte length + a short preview only (LLM cannot reliably click from the image — use js+getBoundingClientRect for coords).", - "input_schema": { "type": "object", "properties": { "full": { "type": "boolean" } } } - }, - { - "name": "wait", - "description": "Sleep for N seconds. Prefer wait_for_load; use wait only for truly fixed delays.", - "input_schema": { "type": "object", "properties": { "seconds": { "type": "number" } }, "required": ["seconds"] } - }, - { - "name": "wait_for_load", - "description": "Poll document.readyState === 'complete' up to timeout seconds (default 15).", - "input_schema": { "type": "object", "properties": { "timeout": { "type": "number" } } } - }, - { - "name": "http_get", - "description": "HTTP GET (no browser). Use for static pages / APIs — much faster than loading in a tab.", - "input_schema": { "type": "object", "properties": { "url": { "type": "string" } }, "required": ["url"] } - }, - { - "name": "list_tabs", - "description": "List pages currently open. include_chrome=true to include chrome://, devtools://, about:blank etc.", - "input_schema": { "type": "object", "properties": { "include_chrome": { "type": "boolean" } } } - }, - { - "name": "current_tab", - "description": "Return {targetId, url, title} for the attached tab.", - "input_schema": { "type": "object", "properties": {} } - }, - { - "name": "switch_tab", - "description": "Attach to another target (via targetId from list_tabs) and make it the current session.", - "input_schema": { "type": "object", "properties": { "target_id": { "type": "string" } }, "required": ["target_id"] } - }, - { - "name": "new_tab", - "description": "Open a new tab and attach. Returns the new targetId.", - "input_schema": { "type": "object", "properties": { "url": { "type": "string" } } } - }, - { - "name": "ensure_real_tab", - "description": "Switch to a real user tab if current is chrome:// / internal / stale. Returns {targetId, url, title} or null.", - "input_schema": { "type": "object", "properties": {} } - }, - { - "name": "iframe_target", - "description": "Find cross-origin iframe target whose URL contains substr. Returns targetId string or null; pass to js(expr, target_id=...).", - "input_schema": { "type": "object", "properties": { "substr": { "type": "string" } }, "required": ["substr"] } - }, - { - "name": "upload_file", - "description": "Set files on via CDP DOM.setFileInputFiles. paths is absolute filepath or list of filepaths.", - "input_schema": { - "type": "object", - "properties": { - "selector": { "type": "string" }, - "paths": { "oneOf": [{ "type": "string" }, { "type": "array", "items": { "type": "string" } }] } - }, - "required": ["selector", "paths"] - } - }, - { - "name": "capture_dialogs", - "description": "JS stub: replace window.alert/confirm/prompt so messages stash in window.__dialogs__. Call BEFORE the triggering action. Stubs are lost on navigation — re-call after goto.", - "input_schema": { "type": "object", "properties": {} } - }, - { - "name": "dialogs", - "description": "Read the JS-stub dialog buffer. Returns list of dialog message strings since last capture_dialogs.", - "input_schema": { "type": "object", "properties": {} } - }, - { - "name": "drain_events", - "description": "Flush the CDP event ring-buffer (max 500) and clear. Returns events in FIFO order.", - "input_schema": { "type": "object", "properties": {} } - }, - { - "name": "cdp", - "description": "Escape hatch: raw CDP send. Use for methods not covered by a typed helper (e.g. Page.handleJavaScriptDialog). Returns the CDP result object.", - "input_schema": { - "type": "object", - "properties": { "method": { "type": "string" }, "params": { "type": "object" } }, - "required": ["method"] - } - }, - { - "name": "read_file", - "description": "Read a file from the local filesystem. Returns {path, content, size}. Large files are truncated at 256 KB.", - "input_schema": { "type": "object", "properties": { "path": { "type": "string" } }, "required": ["path"] } - }, - { - "name": "write_file", - "description": "Write content to a file (creates parent dirs if needed). Returns {path, bytes}.", - "input_schema": { - "type": "object", - "properties": { "path": { "type": "string" }, "content": { "type": "string" } }, - "required": ["path", "content"] - } - }, - { - "name": "patch_file", - "description": "Replace the first occurrence of old_str with new_str in a file. Returns {path, replaced: bool}.", - "input_schema": { - "type": "object", - "properties": { "path": { "type": "string" }, "old_str": { "type": "string" }, "new_str": { "type": "string" } }, - "required": ["path", "old_str", "new_str"] - } - }, - { - "name": "list_dir", - "description": "List directory entries. Returns {path, entries: [{name, type}]}.", - "input_schema": { "type": "object", "properties": { "path": { "type": "string" } }, "required": ["path"] } - }, - { - "name": "shell", - "description": "Execute a shell command. Returns {exitCode, stdout, stderr}. Timeout: 30s. Optional cwd.", - "input_schema": { - "type": "object", - "properties": { "command": { "type": "string" }, "cwd": { "type": "string" } }, - "required": ["command"] - } - }, - { - "name": "notify", - "description": "Send a notification to the user. level=info for FYI (agent keeps going), level=blocking for things that need user action (auth walls, CAPTCHAs). Blocking notifications halt the agent after sending.", - "input_schema": { - "type": "object", - "properties": { "message": { "type": "string" }, "level": { "type": "string", "enum": ["info", "blocking"] } }, - "required": ["message", "level"] - } - }, - { - "name": "done", - "description": "Call this when the task is complete. Pass a short user-facing summary of the outcome.", - "input_schema": { "type": "object", "properties": { "summary": { "type": "string" } }, "required": ["summary"] } - } -] diff --git a/app/src/main/hl/stock/browser-harness-js/SKILL.md b/app/src/main/hl/stock/browser-harness-js/SKILL.md new file mode 100644 index 00000000..3da37b5c --- /dev/null +++ b/app/src/main/hl/stock/browser-harness-js/SKILL.md @@ -0,0 +1,248 @@ +--- +name: cdp +description: Drive Browser Use Desktop's assigned Chromium target via the DevTools Protocol from JavaScript. Run snippets through the bundled `browser-harness-js` CLI; it auto-spawns a long-lived Bun HTTP server holding a CDP `Session`, and every call executes against the same persistent connection. +--- + +# CDP — `browser-harness-js` skill + +Custom codegen'd CDP SDK (every method from browser_protocol.json + js_protocol.json gets a typed wrapper) plus a tiny HTTP server that holds one persistent CDP `Session`. The `browser-harness-js` CLI auto-starts the server on first use and forwards JS snippets to it. + +Browser Use Desktop bundles the runtime under `./browser-harness-js/sdk/` and puts that directory on PATH for you. Do not run `npx skills add` or create global symlinks from inside the desktop harness. + +## First use in Browser Use Desktop + +Connect to the app-assigned target before page-level calls: + +```bash +browser-harness-js 'await connectToAssignedTarget()' +``` + +`connectToAssignedTarget()` reads `BU_TARGET_ID` and `BU_CDP_PORT`, attaches the assigned target when possible, and enables the common Page/DOM/Runtime/Network domains. The CLI auto-installs `bun` on first run if it is missing. Set `BROWSER_HARNESS_SKIP_BUN_INSTALL=1` to opt out. + +## How to use + +Just run `browser-harness-js ''`. The first call spawns the server in the background; subsequent calls hit the same process and so reuse the same `session`, the same WebSocket to Chrome, and any globals you set. + +```bash +browser-harness-js 'await connectToAssignedTarget()' +browser-harness-js 'await session.Page.navigate({url:"https://example.com"})' +browser-harness-js '(await session.Runtime.evaluate({expression:"document.title",returnByValue:true})).result.value' +``` + +Output is the **raw result content** — no `{ok,result}` envelope. + +| Result type | stdout | +|---|---| +| string | bare text, no JSON quotes (e.g. `Example Domain`) | +| number / boolean | `42`, `true` | +| object / array (non-empty) | compact JSON (e.g. `{"frameId":"..."}`, `[1,2,3]`) | +| `undefined` / `null` / `""` / `{}` / `[]` | empty (no output) | + +**Errors** go to **stderr**, exit code `1`. The CDP error message and JS stack are printed verbatim, e.g.: +``` +Error: CDP -32602: invalid params + at _call (.../session.ts:117:33) + ... +``` +Detect failure with `if browser-harness-js '...'; then ...; else handle_error; fi` or by checking `$?`. + +**Multi-line snippets via stdin (heredoc).** Important: a multi-statement snippet does NOT auto-return the last expression — write `return X` explicitly. Single-expression snippets passed as the first argument DO auto-return. + +```bash +browser-harness-js <<'EOF' +const tabs = await listPageTargets(); +globalThis.tid = tabs[0].targetId; +await session.use(globalThis.tid); +return globalThis.tid; +EOF +``` + +## CLI commands + +| Command | Behavior | +|---|---| +| `browser-harness-js ''` | Auto-start server if needed, eval the JS, print result. | +| `browser-harness-js </DevToolsActivePort` directly. | +| `{ wsUrl }` | You already have `ws://…/devtools/browser/` (e.g. piped from elsewhere). | + +```js +await session.connect({ profileDir: '/Users//Library/Application Support/Google/Chrome' }) +await session.connect({ wsUrl: 'ws://127.0.0.1:9222/devtools/browser/' }) +``` + +Profile paths by OS — use these with `{ profileDir }`: +- macOS: `~/Library/Application Support/` (e.g. `Google/Chrome`, `Comet`, `BraveSoftware/Brave-Browser`, `Arc/User Data`) +- Linux: `~/.config/` (e.g. `google-chrome`, `chromium`, `BraveSoftware/Brave-Browser`) +- Windows: `%LOCALAPPDATA%\\User Data` (e.g. `Google\Chrome`, `Microsoft\Edge`, `BraveSoftware\Brave-Browser`) + +Per-candidate WS-open timeout defaults to **5s** — live browsers answer with open/close within ~100ms, so 5s is already generous. The only case where 5s is too short is when Chrome is showing the **Allow** popup and waiting on the user to click. If you expect that, pass `timeoutMs: 30000`: + +```js +await session.connect({ profileDir: '/Users//Library/Application Support/Google/Chrome', timeoutMs: 30_000 }) +``` + +**If you see `No detected browser accepted a connection`** — the browsers have `DevToolsActivePort` files but none are currently serving WS. Most common cause: remote-debugging is enabled but the user hasn't clicked **Allow** on the prompt yet. Tell them to click Allow, then retry (or bump `timeoutMs`). + +### Picking a target (tab) + +After `connect()`, call `session.use(targetId)` once; subsequent page-level calls (Page/DOM/Runtime/Network/etc.) auto-route to that target's sessionId. `Browser.*` and `Target.*` calls always hit the browser endpoint. + +```js +const tabs = await listPageTargets() // no args; uses the connected session +const sid = await session.use(tabs[0].targetId) +await session.Page.enable() +await session.Page.navigate({ url: 'https://example.com' }) +``` + +`listPageTargets()` uses CDP's `Target.getTargets` (not `/json`), so it works on Chrome 144+ too. It already filters out `chrome://` and `devtools://` URLs. Equivalent raw call: + +```js +const { targetInfos } = await session.Target.getTargets({}) +const tabs = targetInfos.filter(t => t.type === 'page' && !t.url.startsWith('chrome://') && !t.url.startsWith('devtools://')) +``` + +To switch tabs: `session.use(otherTargetId)`. To detach: `session.setActiveSession(undefined)`. + +### Events + +```js +// Subscribe (returns an unsubscribe fn) +const off = session.onEvent((method, params, sessionId) => { ... }) + +// Or wait for a single matching event with optional predicate + timeout +await session.Network.enable() +const ev = await session.waitFor( + 'Page.frameNavigated', + (p) => p.frame.url.includes('example.com'), + 10_000 +) +``` + +### Persisting state across calls + +Each snippet runs inside its own async wrapper, so its `let`/`const` declarations vanish when it returns. To carry data forward, attach to `globalThis`: + +```bash +browser-harness-js '(await listPageTargets()).forEach((t,i)=>globalThis["tab"+i]=t.targetId)' +browser-harness-js 'await session.use(globalThis.tab0)' +browser-harness-js 'await session.Page.navigate({url:"https://example.com"})' +``` + +`session` itself, the active sessionId, and event subscribers are already preserved by the server — globals are only needed for ad-hoc data. + +## Connecting to a running Chrome (chrome://inspect flow) + +When attaching to the user's already-running browser: + +1. **Try `await session.connect()` first** (no args) — auto-detect handles every Chromium-based browser via `DevToolsActivePort`. If it returns, you're done. +2. **If auto-detect fails** with `No running browser with remote debugging detected`, the user needs to turn it on. Open the inspect page: + ```bash + # macOS — prefer AppleScript over `open -a` (reuses current profile, avoids the profile picker) + osascript -e 'open location "chrome://inspect/#remote-debugging"' + + # Linux + google-chrome 'chrome://inspect/#remote-debugging' # or: chromium, google-chrome-stable + + # Windows (PowerShell) + Start-Process chrome 'chrome://inspect/#remote-debugging' + ``` + Only macOS's AppleScript path avoids the profile picker; Linux/Windows may prompt the user to pick a profile first. +3. **Tick "Discover network targets"** in chrome://inspect, then click **Allow** when Chrome prompts. +4. **If auto-detect picks the wrong browser** (multiple running, you want a specific one): list them with `await detectBrowsers()`, then `await session.connect({ profileDir: })`. +5. **If `session.connect()` returns `No detected browser accepted a connection`**, the user has remote-debugging on but hasn't clicked **Allow** yet. Tell them to click it and retry, or pass `timeoutMs: 30000` to wait for the click. + +## Working with targets (tabs) + +- **Filter Chrome internals.** `listPageTargets()` already drops `chrome://` and `devtools://` URLs. If you call `Target.getTargets()` directly, filter manually. +- **CDP target order ≠ visible tab-strip order.** When the user says "the first tab I can see", use a screenshot or page title to identify it — `Target.activateTarget` only switches to a known targetId. + +## Looking up a method + +The full typed surface is in `/sdk/generated.ts` (~655 KB, only loaded if you read it). Each method has its CDP description as a JSDoc comment plus typed `*Params` / `*Return` interfaces in per-domain namespaces. + +```bash +grep -n "navigate" /sdk/generated.ts | head +``` + +## Regenerating the SDK + +This is a maintenance-only workflow, not a normal task step. Browser Use +Desktop already bundles the generated SDK. Do not regenerate or patch it during +ordinary browser tasks unless the user explicitly asks, or a confirmed bundled +runtime defect blocks the task. + +When the upstream protocol JSONs change, replace `sdk/browser_protocol.json` and/or `sdk/js_protocol.json` and re-run: + +```bash +cd /sdk && bun gen.ts +browser-harness-js --restart # pick up the new bindings +``` + +## Files + +All paths are relative to `` (the install path — see top of this doc). + +- `/usr/local/bin/browser-harness-js` → `/sdk/browser-harness-js` (the CLI) +- `sdk/repl.ts` — HTTP server (`Bun.serve` on `127.0.0.1:9876`) +- `sdk/session.ts` — `Session` class (transport, connect, target routing, events) +- `sdk/generated.ts` — codegen output: every CDP method as a typed wrapper +- `sdk/gen.ts` — codegen script +- `sdk/{browser,js}_protocol.json` — upstream protocol (vendored) diff --git a/app/src/main/hl/stock/browser-harness-js/sdk/browser-harness-js b/app/src/main/hl/stock/browser-harness-js/sdk/browser-harness-js new file mode 100755 index 00000000..778dcc51 --- /dev/null +++ b/app/src/main/hl/stock/browser-harness-js/sdk/browser-harness-js @@ -0,0 +1,137 @@ +#!/usr/bin/env bash +# browser-harness-js — eval JS in the persistent CDP REPL. Auto-starts the REPL on first use. +# +# Usage: +# browser-harness-js 'await session.connect({port:9222})' +# browser-harness-js 'await session.Page.navigate({url:"https://example.com"})' +# browser-harness-js <<'EOF' +# const t = await listPageTargets("localhost", 9222); +# globalThis.tid = t[0].targetId; +# await session.use(globalThis.tid); +# globalThis.tid +# EOF +# +# browser-harness-js --status # is the REPL running? prints health JSON +# browser-harness-js --stop # gracefully shut it down +# browser-harness-js --logs # tail the REPL log +# browser-harness-js --restart # stop + start fresh (drops session state) +# browser-harness-js --start # explicit start (no-op if already running) + +set -euo pipefail + +PORT="${CDP_REPL_PORT:-9876}" +HOST="127.0.0.1" +URL="http://$HOST:$PORT" + +# Resolve repl.ts alongside this script, following symlinks (e.g. /usr/local/bin/browser-harness-js → /sdk/browser-harness-js). +SCRIPT_PATH="${BASH_SOURCE[0]}" +while [ -L "$SCRIPT_PATH" ]; do + SCRIPT_DIR="$(cd "$(dirname "$SCRIPT_PATH")" && pwd)" + SCRIPT_PATH="$(readlink "$SCRIPT_PATH")" + [[ "$SCRIPT_PATH" != /* ]] && SCRIPT_PATH="$SCRIPT_DIR/$SCRIPT_PATH" +done +REPL="$(cd "$(dirname "$SCRIPT_PATH")" && pwd)/repl.ts" + +LOG="${CDP_REPL_LOG:-/tmp/browser-harness-js.log}" + +# Bootstrap bun if missing — the REPL server is Bun-native. +ensure_bun() { + if command -v bun >/dev/null 2>&1; then return 0; fi + # Handle fresh install: bun's install script drops the binary here but + # PATH isn't updated until the next login shell. + if [ -x "$HOME/.bun/bin/bun" ]; then + export PATH="$HOME/.bun/bin:$PATH" + return 0 + fi + if [ -n "${BROWSER_HARNESS_SKIP_BUN_INSTALL:-}" ]; then + echo "browser-harness-js: bun not found and BROWSER_HARNESS_SKIP_BUN_INSTALL is set." >&2 + echo " Install manually: curl -fsSL https://bun.sh/install | bash" >&2 + return 1 + fi + echo "browser-harness-js: installing bun (one-time, from https://bun.sh/install)..." >&2 + if ! curl -fsSL https://bun.sh/install | bash >&2; then + echo "browser-harness-js: bun install failed. Install manually from https://bun.sh, or set BROWSER_HARNESS_SKIP_BUN_INSTALL=1 to suppress this prompt." >&2 + return 1 + fi + export PATH="$HOME/.bun/bin:$PATH" + command -v bun >/dev/null 2>&1 || { + echo "browser-harness-js: bun installed but not found at \$HOME/.bun/bin/bun." >&2 + return 1 + } +} + +is_up() { + curl -fsS --max-time 1 "$URL/health" >/dev/null 2>&1 +} + +start_repl() { + is_up && return 0 + ensure_bun || return 1 + CDP_REPL_PORT="$PORT" nohup bun "$REPL" >"$LOG" 2>&1 & + for _ in $(seq 1 100); do + sleep 0.1 + is_up && return 0 + done + echo "browser-harness-js: REPL failed to start on $URL (see $LOG)" >&2 + return 1 +} + +post_eval() { + # Capture body + status separately. Body goes to stdout (only if non-empty) + # on 200; otherwise to stderr with non-zero exit. + local out status body + out=$(curl -sS -w '\n___STATUS___%{http_code}' --data-binary "$1" "$URL/eval") + status="${out##*___STATUS___}" + body="${out%$'\n'___STATUS___*}" + if [ "$status" = "200" ]; then + [ -n "$body" ] && printf '%s\n' "$body" + return 0 + else + [ -n "$body" ] && printf '%s\n' "$body" >&2 + return 1 + fi +} + +case "${1:-}" in + --status) + if is_up; then + curl -sS "$URL/health"; echo + else + echo '{"ok":false,"error":"down"}' + exit 1 + fi + ;; + --start) + start_repl + curl -sS "$URL/health"; echo + ;; + --stop) + if is_up; then + curl -s -X POST "$URL/quit" >/dev/null || true + echo '{"ok":true,"stopped":true}' + else + echo '{"ok":true,"stopped":false,"note":"already down"}' + fi + ;; + --restart) + is_up && curl -s -X POST "$URL/quit" >/dev/null 2>&1 || true + sleep 0.2 + start_repl + curl -sS "$URL/health"; echo + ;; + --logs) + exec tail -f "$LOG" + ;; + --help|-h) + sed -n '2,/^set -euo/p' "$0" | sed 's/^#//; s/^ //; /^set -euo/d' + ;; + "") + start_repl + code="$(cat)" + post_eval "$code" + ;; + *) + start_repl + post_eval "$1" + ;; +esac diff --git a/app/src/main/hl/stock/browser-harness-js/sdk/generated.ts b/app/src/main/hl/stock/browser-harness-js/sdk/generated.ts new file mode 100644 index 00000000..30f31c18 --- /dev/null +++ b/app/src/main/hl/stock/browser-harness-js/sdk/generated.ts @@ -0,0 +1,15160 @@ +/* eslint-disable */ +// AUTO-GENERATED by gen.ts. Do not edit by hand. +// Run `bun gen.ts` to regenerate from browser_protocol.json + js_protocol.json. + +export interface Transport { + _call(method: string, params?: unknown): Promise; +} + + +export namespace Accessibility { + + /** Unique accessibility node identifier. */ + export type AXNodeId = string; + + /** Enum of possible property types. */ + export type AXValueType = "boolean" | "tristate" | "booleanOrUndefined" | "idref" | "idrefList" | "integer" | "node" | "nodeList" | "number" | "string" | "computedString" | "token" | "tokenList" | "domRelation" | "role" | "internalRole" | "valueUndefined"; + + /** Enum of possible property sources. */ + export type AXValueSourceType = "attribute" | "implicit" | "style" | "contents" | "placeholder" | "relatedElement"; + + /** Enum of possible native property sources (as a subtype of a particular AXValueSourceType). */ + export type AXValueNativeSourceType = "description" | "figcaption" | "label" | "labelfor" | "labelwrapped" | "legend" | "rubyannotation" | "tablecaption" | "title" | "other"; + + /** A single source for a computed AX property. */ + export interface AXValueSource { + /** What type of source this is. */ + type: Accessibility.AXValueSourceType; + /** The value of this property source. */ + value?: Accessibility.AXValue; + /** The name of the relevant attribute, if any. */ + attribute?: string; + /** The value of the relevant attribute, if any. */ + attributeValue?: Accessibility.AXValue; + /** Whether this source is superseded by a higher priority source. */ + superseded?: boolean; + /** The native markup source for this value, e.g. a `