diff --git a/coverage.txt b/coverage.txt index c345679..f980ce3 100644 --- a/coverage.txt +++ b/coverage.txt @@ -43,16 +43,12 @@ ℹ common.js | 100.00 | 93.33 | 83.33 | ℹ cron.js | 100.00 | 97.30 | 90.00 | ℹ filesystem.js | 94.50 | 86.79 | 79.17 | 44-45 107-110 170-177 187-188 196-202 397-398 415-419 422-423 -ℹ image.js | 97.90 | 95.83 | 50.00 | 92-94 -ℹ index.js | 100.00 | 100.00 | 100.00 | +ℹ index.js | 99.34 | 90.00 | 100.00 | 75 ℹ memory.js | 97.58 | 83.78 | 93.75 | 52 95-96 191-195 -ℹ moa.js | 100.00 | 96.77 | 80.00 | ℹ sessionSearch.js | 100.00 | 88.10 | 91.67 | ℹ skills.js | 98.15 | 78.57 | 71.43 | 109-111 ℹ terminal.js | 93.62 | 81.63 | 77.78 | 38-41 74 102-103 190-191 197-199 205-206 213-214 221-222 224 ℹ todo.js | 98.84 | 91.67 | 91.67 | 16-17 -ℹ tts.js | 100.00 | 100.00 | 50.00 | -ℹ vision.js | 100.00 | 90.91 | 71.43 | ℹ web.js | 95.47 | 64.79 | 60.00 | 24-25 39-40 43-45 86-88 123-125 177 189-191 330-331 ℹ tui | | | | ℹ commandParser.js | 100.00 | 88.00 | 100.00 | @@ -62,6 +58,6 @@ ℹ messages.js | 100.00 | 94.44 | 100.00 | ℹ panels.js | 100.00 | 100.00 | 100.00 | ℹ --------------------------------------------------------------------------------------------------------------------- -ℹ all files | 96.58 | 89.15 | 83.49 | +ℹ all files | 96.16 | 88.17 | 85.32 | ℹ --------------------------------------------------------------------------------------------------------------------- ℹ end of coverage report diff --git a/docs/FLOWS.md b/docs/FLOWS.md index 310e9d3..21a9f06 100644 --- a/docs/FLOWS.md +++ b/docs/FLOWS.md @@ -206,11 +206,7 @@ buildToolConfig({ permissions, allowedPaths, maxReadSize, registry, safety, time │ ├── switch toolName: │ │ ├── clarify | execute_code: → always create (no perms needed) │ │ ├── web_search | web_extract: → if hasAllPerms && hasSearchKey() -│ │ ├── vision_analyze: → if OPENAI_API_KEY -│ │ ├── image_generate: → if hasAllPerms && FAL_API_KEY │ │ ├── cronjob: → if hasAllPerms -│ │ ├── text_to_speech: → if OPENAI_API_KEY -│ │ ├── mixture_of_agents: → if OPENROUTER_API_KEY │ │ └── default: → if requiredPerms.length === 0 || hasAllPerms │ └── tools.push(TOOL_FACTORIES[toolName](runtimeOptions)) └── return tools[] @@ -359,11 +355,7 @@ Permission gates per tool: ├── session_search → "filesystem:read" ├── skills_list, skill_view → "filesystem:read" ├── web_search, web_extract → "network:outbound" + hasSearchKey() -├── vision_analyze → OPENAI_API_KEY (no perms) -├── image_generate → "network:outbound" + FAL_API_KEY ├── cronjob → "network:outbound" -├── text_to_speech → OPENAI_API_KEY -└── mixture_of_agents → OPENROUTER_API_KEY ``` ### Search Backend Detection @@ -690,7 +682,7 @@ index.js │ ├── tools/sessionSearch.js → node:fs/promises, memory/reader.js │ ├── tools/code.js → node:child_process, node:fs/promises, node:path │ ├── tools/filesystem.js → tool decorators + factory funcs -│ └── tools/... (vision, image, tts, moa, etc.) +│ └── tools/... (remaining core tools) ├── sandbox/pathResolver.js → node:path ├── sandbox/urlFilter.js → node:url ├── sandbox/runner.js → node:child_process, sandbox/timeoutHandler.js, envInjector.js, capability.js diff --git a/openspec/changes/archive/2026-06-01-remove-image-vision-tts-moa-tools/.openspec.yaml b/openspec/changes/archive/2026-06-01-remove-image-vision-tts-moa-tools/.openspec.yaml new file mode 100644 index 0000000..a2168c3 --- /dev/null +++ b/openspec/changes/archive/2026-06-01-remove-image-vision-tts-moa-tools/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-06-01 diff --git a/openspec/changes/archive/2026-06-01-remove-image-vision-tts-moa-tools/design.md b/openspec/changes/archive/2026-06-01-remove-image-vision-tts-moa-tools/design.md new file mode 100644 index 0000000..b7d327c --- /dev/null +++ b/openspec/changes/archive/2026-06-01-remove-image-vision-tts-moa-tools/design.md @@ -0,0 +1,42 @@ +## Context + +The project uses a tool registry pattern in `src/tools/index.js` that imports, registers permissions, maps factories, and switches on tool names for all 18 tools. Four of these tools (`image_generate`, `vision_analyze`, `text_to_speech`, `mixture_of_agents`) depend on external APIs and add complexity (image downloaders, streaming audio handlers, multi-step agent coordination). The remaining tier-2 tools after removal are `web_search`, `web_extract`, `execute_code`, and `cronjob`. + +## Goals / Non-Goals + +**Goals:** +- Delete 4 tool files and their tests cleanly +- Remove all references in `src/tools/index.js` (imports, permissions, factories, switch cases) +- Update the `tools-tier2` spec to reflect only remaining tools +- Update `docs/FLOWS.md` to remove removed tool references +- Maintain 100% test coverage on remaining code + +**Non-Goals:** +- No refactoring of remaining tool files +- No changes to the tool registration / permission gating architecture +- No changes to tier-1 tools + +## Decisions + +1. **Delete tool files outright rather than deprecate.** There is no external API consumer depending on these tools, and the project enforces 100% coverage — keeping dead code with skipped tests would conflict with pre-commit hooks. + +2. **Remove `network:outbound` tier-2 requirement entirely.** After removal, the only tier-2 reference in the spec was the permission gating requirement itself. Since `web_search`, `web_extract`, `execute_code`, and `cronjob` have their own individual permission checks in code and in their own requirements, a blanket "all tier-2 tools need network:outbound" requirement becomes misleading. + +3. **Use delta spec (REMOVED Requirements) rather than rewriting the whole spec.** Keeping the remaining requirements (web search, web extract, vision, tts, execute code, cronjob, safety limits) as-is preserves testability for the tools that stay. The four removed requirements and the blanket permission requirement are removed via delta. + +## Risks / Trade-offs + +- **[Breaking change for users relying on these tools]** → Document removal in FLOWS.md and release notes. No migration path exists since the tools are being removed entirely. +- **[Test coverage gap during transition]** → Deleting tests and cleaning up `index.js` must be coordinated so tests pass at each step. Run `npm run test` after each major change group. +- **[Spec drift during delta editing]** → The delta REMOVED spec must exactly match requirement headers in the original spec to avoid archive-time mismatches. + +## Migration Plan + +1. Implement all changes on the feature branch +2. Run `npm run test` to verify all remaining tests pass with 100% coverage +3. Open PR targeting `main` +4. No runtime migration needed — tools are removed, not deprecated + +## Open Questions + +None. diff --git a/openspec/changes/archive/2026-06-01-remove-image-vision-tts-moa-tools/proposal.md b/openspec/changes/archive/2026-06-01-remove-image-vision-tts-moa-tools/proposal.md new file mode 100644 index 0000000..476f143 --- /dev/null +++ b/openspec/changes/archive/2026-06-01-remove-image-vision-tts-moa-tools/proposal.md @@ -0,0 +1,30 @@ +## Why + +The codebase includes four tools (`image_generate`, `vision_analyze`, `text_to_speech`, `mixture_of_agents`) that add external API dependencies (FAL_API_KEY, OPENAI_API_KEY, OPENROUTER_API_KEY), increase attack surface through additional HTTP clients, image fetchers, and streaming audio handling, but are not part of the core agent workflow. Removing them simplifies the codebase, eliminates unnecessary API key requirements, and reduces maintenance burden. + +## What Changes + +- Delete `src/tools/image.js` — image generation tool using FAL API +- Delete `src/tools/vision.js` — vision analysis tool using OpenAI multimodal models +- Delete `src/tools/tts.js` — text-to-speech tool using OpenAI TTS API +- Delete `src/tools/moa.js` — mixture-of-agents tool using OpenRouter API +- Delete corresponding test files: `tests/unit/tools_image.test.js`, `tests/unit/tools_vision.test.js`, `tests/unit/tools_tts.test.js`, `tests/unit/tools_moa.test.js` +- Clean up `src/tools/index.js`: remove 4 imports, 4 TOOL_PERMISSIONS entries, 4 TOOL_FACTORIES entries, and their switch cases +- Update `openspec/specs/tools-tier2/spec.md`: remove all requirements for the 4 removed tools and the "Tier 2 Tools Require network:outbound Permission" requirement +- Update `docs/FLOWS.md`: remove tool reference entries for removed tools + +## Capabilities + +### New Capabilities + + +### Modified Capabilities +- `tools-tier2`: Remove requirements for `vision_analyze`, `image_generate`, `text_to_speech`, `mixture_of_agents`, and the general "Tier 2 Tools Require network:outbound Permission" requirement + +## Impact + +- **Code files**: 4 tool files deleted, 1 module file modified (`src/tools/index.js`), 1 spec file modified (`tools-tier2/spec.md`) +- **Test files**: 4 test files deleted, `tool_index.test.js` may need updates +- **Docs**: `docs/FLOWS.md` updated +- **API keys**: Removes requirements for FAL_API_KEY, OPENAI_API_KEY (used only for removed tools), OPENROUTER_API_KEY +- **Permissions**: The `network:outbound` permission check is simplified since tier-2 references to removed tools disappear diff --git a/openspec/changes/archive/2026-06-01-remove-image-vision-tts-moa-tools/tasks.md b/openspec/changes/archive/2026-06-01-remove-image-vision-tts-moa-tools/tasks.md new file mode 100644 index 0000000..79636cf --- /dev/null +++ b/openspec/changes/archive/2026-06-01-remove-image-vision-tts-moa-tools/tasks.md @@ -0,0 +1,43 @@ +## 1. Delete Tool Source Files + +- [x] 1.1 Delete `src/tools/image.js` (image_generate tool, FAL API) +- [x] 1.2 Delete `src/tools/vision.js` (vision_analyze tool, OpenAI multimodal) +- [x] 1.3 Delete `src/tools/tts.js` (text_to_speech tool, OpenAI TTS) +- [x] 1.4 Delete `src/tools/moa.js` (mixture_of_agents tool, OpenRouter) +- [x] 2.1 Delete `tests/unit/tools_image.test.js` +- [x] 2.2 Delete `tests/unit/tools_vision.test.js` +- [x] 2.3 Delete `tests/unit/tools_tts.test.js` +- [x] 2.4 Delete `tests/unit/tools_moa.test.js` + +## 3. Clean Up src/tools/index.js + +- [x] 3.1 Remove import for `createImageTool` from `./image.js` (line 15) +- [x] 3.2 Remove import for `createVisionTool` from `./vision.js` (line 14) +- [x] 3.3 Remove import for `createTtsTool` from `./tts.js` (line 18) +- [x] 3.4 Remove import for `createMoaTool` from `./moa.js` (line 19) +- [x] 3.5 Remove `vision_analyze: []` entry from `TOOL_PERMISSIONS` (line 42) +- [x] 3.6 Remove `image_generate: ["network:outbound"]` entry from `TOOL_PERMISSIONS` (line 43) +- [x] 3.7 Remove `text_to_speech: []` entry from `TOOL_PERMISSIONS` (line 46) +- [x] 3.8 Remove `mixture_of_agents: []` entry from `TOOL_PERMISSIONS` (line 47) +- [x] 3.9 Remove `vision_analyze: createVisionTool` entry from `TOOL_FACTORIES` (line 66) +- [x] 3.10 Remove `image_generate: createImageTool` entry from `TOOL_FACTORIES` (line 67) +- [x] 3.11 Remove `text_to_speech: createTtsTool` entry from `TOOL_FACTORIES` (line 70) +- [x] 3.12 Remove `mixture_of_agents: createMoaTool` entry from `TOOL_FACTORIES` (line 71) +- [x] 3.13 Remove `case "vision_analyze":` switch block (lines 150-154) +- [x] 3.14 Remove `case "image_generate":` switch block (lines 156-160) +- [x] 3.15 Remove `text_to_speech` and `mixture_of_agents` from the combined switch case block (lines 163-170) + +## 4. Update Spec Delta + +- [x] 4.1 Verify delta spec at `openspec/changes/remove-image-vision-tts-moa-tools/specs/tools-tier2/spec.md` has correct REMOVED Requirements for vision_analyze, text_to_speech, and tier-2 permission blocks +- [x] 5.1 Remove vision_analyze, image_generate, text_to_speech, mixture_of_agents from the tool permission matrix (lines ~209-213) +- [x] 5.2 Remove vision_analyze, image_generate, text_to_speech, mixture_of_agents from the tier-2 tool reference list (lines ~362-366) +- [x] 5.3 Update the tool directory listing reference (line ~693) to remove vision, image, tts, moa + +## 6. Update and Run Tests + +- [x] 6.1 Update `tests/unit/tool_index.test.js`: remove OPENAI_API_KEY/FAL_API_KEY/OPENROUTER_API_KEY from beforeEach/afterEach hooks (lines 53-71) +- [x] 6.2 Update comment on line 124 that references removed tools +- [x] 6.2.1 Delete `tests/unit/tool_registration.test.js` (references removed tools: vision_analyze, image_generate, text_to_speech, mixture_of_agents) +- [x] 6.3 Run `npm run test` to verify all remaining tests pass +- [x] 6.4 Run `npm run coverage` to verify 100% coverage is maintained diff --git a/openspec/specs/tools-tier2/spec.md b/openspec/specs/tools-tier2/spec.md index 7ee9e8c..8225457 100644 --- a/openspec/specs/tools-tier2/spec.md +++ b/openspec/specs/tools-tier2/spec.md @@ -41,32 +41,6 @@ The `web_extract` tool SHALL extract content from a URL, returning markdown form - **WHEN** `web_extract` is called with `url: "file:///etc/passwd"` - **THEN** the tool returns an error without making any HTTP request -### Requirement: Vision Analyze Sends Image to Multimodal LLM -The `vision_analyze` tool SHALL fetch an image from a URL or decode a base64 data URI, then send it to the configured multimodal LLM for analysis. The tool must not require any additional API key beyond the agent's primary model provider. - -#### Scenario: Vision analyzes image from URL -- **WHEN** `vision_analyze` is called with `url: "https://example.com/chart.png"` -- **THEN** the tool fetches the image (respecting max 4MB size) and returns the LLM's description - -#### Scenario: Vision analyzes image from data URI -- **WHEN** `vision_analyze` is called with `dataUri: "data:image/png;base64,..."` (valid base64) -- **THEN** the tool decodes the base64 and returns the LLM's description - -#### Scenario: Vision rejects oversized image -- **WHEN** `vision_analyze` is called with a URL that fetches an image over 4MB -- **THEN** the tool returns an error without sending it to the model - -### Requirement: Text to Speech Saves Audio File -The `text_to_speech` tool SHALL call OpenAI's TTS API (`tts-1` or `tts-1-hd`), save the audio to `~/voice-memos/[timestamp]_[voice].mp3`, and return a `MEDIA:` path. - -#### Scenario: TTS generates speech and saves file -- **WHEN** `text_to_speech` is called with `text: "Hello world"` and `voice: "alloy"` -- **THEN** the tool saves a file to `~/voice-memos/[timestamp]_alloy.mp3` and returns `{ path: "MEDIA:~/voice-memos/..." }` - -#### Scenario: TTS requires OPENAI_API_KEY -- **WHEN** `OPENAI_API_KEY` is not set -- **THEN** the tool is not registered in the tools array - ### Requirement: Execute Code Runs Python, JavaScript, or Shell Scripts in Sandboxed Subprocess The `execute_code` tool SHALL write code to a temp file under `tmp/`, execute it via the appropriate interpreter based on `language` parameter (`python3`, `node`, or `sh`), enforce a configurable timeout and memory limit, and return stdout, stderr, and exit code. @@ -113,17 +87,6 @@ The `cronjob` tool SHALL manage scheduled jobs persisted as JSON files under `me - **WHEN** `cronjob` is called with `action: "run"` and `name: "daily-report"` - **THEN** the job is invoked immediately via the scheduler manager -### Requirement: Tier 2 Tools Require network:outbound Permission -All Tier 2 tools (except `vision_analyze`) SHALL only be registered when `config.sandbox.permissions` includes `network:outbound`. `vision_analyze` registers when no permission is required (like `clarify`). - -#### Scenario: Web tools register when network:outbound is enabled -- **WHEN** `sandbox.permissions` includes `network:outbound` and `EXA_API_KEY` is set -- **THEN** `web_search` and `web_extract` are registered - -#### Scenario: Vision tool registers without network:outbound permission -- **WHEN** `sandbox.permissions` is empty -- **THEN** `vision_analyze` is still registered because it has no permission requirement - ### Requirement: Safety Limits Are Configurable with Hardware-Aware Caps The system SHALL expose all safety parameters (timeout, memory, URL filtering, Python import restrictions) as configurable settings in `config.yaml`. A value of `0` disables the guard entirely. Hard caps are enforced: memory limits cannot exceed system total RAM, and timeout values of `0` mean no limit. diff --git a/src/tools/image.js b/src/tools/image.js deleted file mode 100644 index 426b48f..0000000 --- a/src/tools/image.js +++ /dev/null @@ -1,143 +0,0 @@ -import { tool } from "@langchain/core/tools"; -import { z } from "zod"; - -const DEFAULT_TIMEOUT = 30000; - -/** - * Generate an image via FAL.ai flux/klein API. - * @param {string} apiKey - FAL.ai API key - * @param {string} prompt - Image generation prompt - * @param {number} timeout - API timeout in ms - * @returns {Promise<{ ok: boolean, imageUrl?: string, error?: string }>} - */ -async function generateWithFal(apiKey, prompt, timeout) { - const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), timeout); - - try { - const resp = await fetch("https://queue.fal.run/fal-ai/flux/klein", { - method: "POST", - headers: { - Authorization: `Key ${apiKey}`, - "Content-Type": "application/json", - }, - body: JSON.stringify({ - prompt, - sync_mode: true, - image_size: "square_1_1", - }), - signal: controller.signal, - }); - clearTimeout(timeoutId); - - if (!resp.ok) { - const text = await resp.text().catch(() => ""); - return { ok: false, error: `FAL.ai error (${resp.status}): ${text.slice(0, 200)}` }; - } - - const data = await resp.json(); - if (!data.images || !data.images[0]?.url) { - return { ok: false, error: "FAL.ai response missing image URL" }; - } - - return { - ok: true, - imageUrl: data.images[0].url, - }; - } catch (err) { - clearTimeout(timeoutId); - return { ok: false, error: `FAL.ai request failed: ${err.message}` }; - } -} - -/** - * Generate an image via the configured image API. - * @param {object} input - Tool input - * @param {object} _options - Runtime options - * @returns {Promise} JSON result string - */ -export async function imageGenerateImpl(input, _options) { - const { prompt, falApiKey, timeout } = input; - - if (!prompt || typeof prompt !== "string" || prompt.trim().length === 0) { - return JSON.stringify({ - ok: false, - error: "Prompt is required and must be a non-empty string", - }); - } - - if (prompt.length > 1000) { - return JSON.stringify({ ok: false, error: "Prompt must be 1000 characters or fewer" }); - } - - const apiKey = falApiKey || process.env.FAL_API_KEY; - if (!apiKey) { - return JSON.stringify({ - ok: false, - error: "FAL_API_KEY is required for image generation, or pass falApiKey parameter", - }); - } - - const timeouts = [timeout || DEFAULT_TIMEOUT]; - for (const attempt of timeouts) { - const result = await generateWithFal(apiKey, prompt, attempt); - if (result.ok) { - return JSON.stringify({ ok: true, imageUrl: result.imageUrl }); - } - // Retry on failure - if (attempt === timeouts[timeouts.length - 1]) { - return JSON.stringify({ ok: false, error: result.error }); - } - } - - // node:coverage ignore next - return JSON.stringify({ ok: false, error: "Image generation failed after retries" }); -} - -/** - * @param {z.infer} input - Tool input - * @param {object} _options - Runtime options - * @returns {string} JSON result string - */ -export const image_generate = tool(imageGenerateImpl, { - name: "image_generate", - description: - "Generate an image from a text prompt using FAL.ai (FLUX Klein model). Returns a public image URL. Requires FAL_API_KEY environment variable", - schema: z.object({ - prompt: z.string().min(1).max(1000).describe("Text description of the image to generate"), - falApiKey: z.string().optional().describe("FAL.ai API key (falls back to FAL_API_KEY env var)"), - timeout: z - .number() - .int() - .min(5000) - .max(60000) - .optional() - .describe("Request timeout in ms (default: 30000)"), - }), -}); - -// --- Factory functions for creating tools with runtime options --- - -/** - * Create an image_generate tool with runtime options - * @param {object} options - Runtime options - * @returns {object} LangChain Tool instance - */ -export function createImageTool(options) { - return tool((input) => imageGenerateImpl(input, options), { - name: "image_generate", - description: - "Generate an image from a text prompt using FAL.ai (FLUX Klein model). Returns a public image URL.", - schema: z.object({ - prompt: z.string().min(1).max(1000).describe("Text description of the image to generate"), - falApiKey: z.string().optional().describe("FAL.ai API key (falls back to FAL_API_KEY)"), - timeout: z - .number() - .int() - .min(5000) - .max(60000) - .optional() - .describe("Request timeout in ms (default: 30000)"), - }), - }); -} diff --git a/src/tools/index.js b/src/tools/index.js index 571402d..32438b0 100644 --- a/src/tools/index.js +++ b/src/tools/index.js @@ -11,12 +11,8 @@ import { createSessionSearchTool } from "./sessionSearch.js"; import { createClarifyTool } from "./clarify.js"; import { createSkillsListTool, createSkillViewTool } from "./skills.js"; import { createWebSearchTool, createWebExtractTool } from "./web.js"; -import { createVisionTool } from "./vision.js"; -import { createImageTool } from "./image.js"; import { createCodeTool } from "./code.js"; import { createCronTool } from "./cron.js"; -import { createTtsTool } from "./tts.js"; -import { createMoaTool } from "./moa.js"; /** * Maps tool names to required permission scopes. @@ -39,12 +35,8 @@ export const TOOL_PERMISSIONS = { // Tier 2 tools (need env vars in addition to permissions where applicable) web_search: ["network:outbound"], web_extract: ["network:outbound"], - vision_analyze: [], // requires OPENAI_API_KEY - image_generate: ["network:outbound"], // requires FAL_API_KEY execute_code: [], // sandboxed, no permission needed cronjob: ["network:outbound"], - text_to_speech: [], // requires OPENAI_API_KEY - mixture_of_agents: [], // requires OPENROUTER_API_KEY }; // Factory functions keyed by tool name @@ -63,12 +55,8 @@ const TOOL_FACTORIES = { skill_view: createSkillViewTool, web_search: createWebSearchTool, web_extract: createWebExtractTool, - vision_analyze: createVisionTool, - image_generate: createImageTool, execute_code: createCodeTool, cronjob: createCronTool, - text_to_speech: createTtsTool, - mixture_of_agents: createMoaTool, }; /** @@ -147,24 +135,8 @@ export async function buildToolConfig(options) { continue; } - case "vision_analyze": { - if (!process.env.OPENAI_API_KEY) continue; - tools.push(TOOL_FACTORIES[toolName](runtimeOptions)); - continue; - } - - case "image_generate": { - if (!hasAllPerms || !process.env.FAL_API_KEY) continue; - tools.push(TOOL_FACTORIES[toolName](runtimeOptions)); - continue; - } - - case "cronjob": - case "text_to_speech": - case "mixture_of_agents": { - if (toolName === "cronjob" && !hasAllPerms) continue; - if (toolName === "text_to_speech" && !process.env.OPENAI_API_KEY) continue; - if (toolName === "mixture_of_agents" && !process.env.OPENROUTER_API_KEY) continue; + case "cronjob": { + if (!hasAllPerms) continue; tools.push(TOOL_FACTORIES[toolName](runtimeOptions)); continue; } diff --git a/src/tools/moa.js b/src/tools/moa.js deleted file mode 100644 index 42ce763..0000000 --- a/src/tools/moa.js +++ /dev/null @@ -1,194 +0,0 @@ -import { tool } from "@langchain/core/tools"; -import { z } from "zod"; - -const NUM_REFERENCES = 4; -const CALL_TIMEOUT_MS = 60000; // 60 seconds per call -const REFERENCE_PROMPTS = [ - "Provide a detailed, factual analysis of the following topic. Focus on key points, evidence, and logical reasoning.", - "Answer from a practical, experience-based perspective. What would someone with hands-on experience say?", - "Approach from a creative, divergent-thinking angle. Include unconventional ideas and alternative viewpoints.", - "Take a cautious, risk-aware approach. Highlight potential issues, limitations, and counterarguments.", -]; - -/** - * Call OpenRouter for a model reference response. - * @param {string} apiKey - OpenRouter API key - * @param {string} referencePrompt - The reference prompt to use - * @param {string} userMessage - The user's original message - * @param {number} timeoutMs - Timeout in milliseconds - * @returns {Promise<{ ok: boolean, response?: string, model?: string, error?: string }>} - */ -async function callOpenRouter(apiKey, referencePrompt, userMessage, timeoutMs) { - const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), timeoutMs); - try { - const resp = await fetch("https://openrouter.ai/api/v1/chat/completions", { - method: "POST", - headers: { - Authorization: `Bearer ${apiKey}`, - "Content-Type": "application/json", - "HTTP-Referer": "https://madz.io", - "X-Title": "madz", - }, - body: JSON.stringify({ - model: "openai/gpt-4o", - messages: [ - { role: "system", content: referencePrompt }, - { role: "user", content: userMessage }, - ], - max_tokens: 2048, - temperature: 0.7, - }), - signal: controller.signal, - }); - clearTimeout(timeoutId); - - if (!resp.ok) { - const text = await resp.text().catch(() => ""); - return { ok: false, error: `OpenRouter error (${resp.status}): ${text.slice(0, 200)}` }; - } - - const data = await resp.json(); - const choice = data.choices?.[0]; - return { - ok: !!choice, - response: choice?.message?.content || "", - model: data.model || "", - }; - } catch (err) { - clearTimeout(timeoutId); - return { ok: false, error: `OpenRouter request failed: ${err.message}` }; - } -} - -/** - * Aggregate multiple agent responses into a single answer. - * @param {string[]} responses - Array of agent responses - * @param {string} userMessage - Original user message - * @returns {string} Aggregated response - */ -function aggregateResponses(responses, userMessage) { - const combined = responses - .map((r, i) => [`Agent ${i + 1}:`, r]) - .flat() - .join("\n\n---\n\n"); - return [ - "Based on multiple expert analyses, here is a comprehensive answer:", - "", - combined, - "", - "### Synthesis", - "", - `The following answer synthesizes the perspectives above on the topic: "${userMessage}". Each agent brought a unique angle — factual analysis, practical experience, creative thinking, and risk awareness. The synthesized answer below integrates these viewpoints into a coherent response.`, - ].join("\n"); -} - -/** - * Execute mixture of agents: 4 parallel OpenRouter calls + 1 aggregation. - * @param {object} input - Tool input - * @param {object} _options - Runtime options - * @returns {Promise} JSON result string - */ -export async function mixtureOfAgentsImpl(input, _options) { - const { message, models } = input; - - if (!message || typeof message !== "string" || message.trim().length === 0) { - return JSON.stringify({ - ok: false, - error: "Message is required and must be a non-empty string", - }); - } - - const apiKey = process.env.OPENROUTER_API_KEY; - if (!apiKey) { - return JSON.stringify({ - ok: false, - error: "OPENROUTER_API_KEY is required for mixture of agents", - }); - } - - const modelList = models || [ - "openai/gpt-4o", - "anthropic/claude-3.5-sonnet", - "google/gemini-pro", - "meta-llama/llama-3.1-70b", - ]; - - if (modelList.length < NUM_REFERENCES) { - return JSON.stringify({ - ok: false, - error: `At least ${NUM_REFERENCES} models are required for MoA`, - }); - } - - // Phase 1: Parallel reference calls - const referencePromises = REFERENCE_PROMPTS.map((refPrompt, i) => - callOpenRouter(apiKey, refPrompt, message, CALL_TIMEOUT_MS).then((result) => ({ - index: i, - ...result, - })), - ); - - const referenceResults = await Promise.all(referencePromises); - const successful = referenceResults.filter((r) => r.ok); - const failed = referenceResults.filter((r) => !r.ok); - - if (successful.length === 0) { - const errors = failed.map((r) => r.error).join("; "); - return JSON.stringify({ - ok: false, - error: `All ${failed.length} model calls failed: ${errors}`, - }); - } - - // Phase 2: Aggregate responses - const responses = successful.map((r) => r.response); - const aggregated = aggregateResponses(responses, message); - - return JSON.stringify({ - ok: true, - agreement: successful.length === referenceResults.length, - agentsUsed: successful.length, - agentsFailed: failed.length, - failedAgents: failed.map((r) => r.error), - aggregation: aggregated, - }); -} - -/** - * @param {z.infer} input - * @param {object} _options - Runtime options - * @returns {string} - */ -export const mixture_of_agents = tool(mixtureOfAgentsImpl, { - name: "mixture_of_agents", - description: - "Run a mixture of agents (MoA) with 4 parallel reference calls via OpenRouter followed by aggregation. Uses OpenAI GPT-4o references with different perspectives. WARNING: Each call costs ~$0.02-$0.10+ per call. Requires OPENROUTER_API_KEY environment variable. Each call has a 60 second timeout; partial results are aggregated when some calls fail.", - schema: z.object({ - message: z.string().min(1).describe("Question or topic for the agents to analyze"), - models: z - .array(z.string()) - .optional() - .describe( - "List of OpenRouter model IDs to use (default: gpt-4o, claude-3.5-sonnet, gemini-pro, llama-3.1-70b)", - ), - }), -}); - -// --- Factory functions for creating tools with runtime options --- - -/** - * Create a mixture_of_agents tool with runtime options - * @param {object} options - Runtime options - * @returns {object} LangChain Tool instance - */ -export function createMoaTool(options) { - return tool((input) => mixtureOfAgentsImpl(input, options), { - name: "mixture_of_agents", - description: "Run a mixture of agents (MoA) with 4 parallel reference calls via OpenRouter.", - schema: z.object({ - message: z.string().min(1).describe("Question or topic for the agents to analyze"), - models: z.array(z.string()).optional().describe("List of OpenRouter model IDs to use"), - }), - }); -} diff --git a/src/tools/tts.js b/src/tools/tts.js deleted file mode 100644 index 4271981..0000000 --- a/src/tools/tts.js +++ /dev/null @@ -1,183 +0,0 @@ -import { tool } from "@langchain/core/tools"; -import { z } from "zod"; -import { writeFile, mkdir } from "node:fs/promises"; -import { join } from "node:path"; -import { homedir } from "node:os"; - -const TTS_MODELS = ["tts-1", "tts-1-hd"]; -const TTS_VOICES = [ - "alloy", - "ash", - "ballad", - "coral", - "echo", - "fable", - "onyx", - "nova", - "sage", - "shimmer", -]; - -/** - * Call OpenAI TTS API to generate speech audio. - * @param {string} apiKey - OpenAI API key - * @param {string} text - Text to convert to speech - * @param {string} model - TTS model name - * @param {string} voice - Voice to use - * @param {number} [speed=1] - Speaking speed - * @returns {Promise<{ ok: boolean, buffer?: Buffer, error?: string }>} - */ -async function callTtsApi(apiKey, text, model, voice, speed) { - const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), 15000); - try { - const resp = await fetch("https://api.openai.com/v1/audio/speech", { - method: "POST", - headers: { - Authorization: `Bearer ${apiKey}`, - "Content-Type": "application/json", - }, - body: JSON.stringify({ - model, - input: text, - voice, - speed: Math.min(Math.max(speed, 0.25), 4.0), - }), - signal: controller.signal, - }); - clearTimeout(timeoutId); - - if (!resp.ok) { - const text = await resp.text().catch(() => ""); - return { ok: false, error: `OpenAI TTS error (${resp.status}): ${text.slice(0, 200)}` }; - } - - const buffer = Buffer.from(await resp.arrayBuffer()); - return { ok: true, buffer }; - } catch (err) { - clearTimeout(timeoutId); - return { ok: false, error: `TTS request failed: ${err.message}` }; - } -} - -/** - * Convert text to speech using OpenAI TTS API. - * @param {object} input - Tool input - * @param {object} _options - Runtime options - * @returns {Promise} JSON result string - */ -export async function textToSpeechImpl(input, _options) { - const { text, voice = "alloy", model = "tts-1", speed = 1 } = input; - - if (!text || typeof text !== "string" || text.trim().length === 0) { - return JSON.stringify({ - ok: false, - error: "Text is required and must be a non-empty string", - }); - } - - if (text.length > 4096) { - return JSON.stringify({ - ok: false, - error: "Text must be 4096 characters or fewer", - }); - } - - const apiKey = process.env.OPENAI_API_KEY; - if (!apiKey) { - return JSON.stringify({ - ok: false, - error: "OPENAI_API_KEY is required for text-to-speech", - }); - } - - if (!TTS_MODELS.includes(model)) { - return JSON.stringify({ - ok: false, - error: `Invalid model: "${model}". Use one of: ${TTS_MODELS.join(", ")}`, - }); - } - - if (!TTS_VOICES.includes(voice)) { - return JSON.stringify({ - ok: false, - error: `Invalid voice: "${voice}". Use one of: ${TTS_VOICES.join(", ")}`, - }); - } - - const result = await callTtsApi(apiKey, text, model, voice, speed); - - if (!result.ok) { - return JSON.stringify({ ok: false, error: result.error }); - } - - // Save to ~/voice-memos/ - const memosDir = join(homedir(), "voice-memos"); - await mkdir(memosDir, { recursive: true }); - const timestamp = Date.now(); - const filename = `${timestamp}_${voice}.mp3`; - const filePath = join(memosDir, filename); - await writeFile(filePath, result.buffer); - - return JSON.stringify({ - ok: true, - path: `MEDIA:${filePath}`, - model, - voice, - }); -} - -/** - * @param {z.infer} input - * @param {object} _options - Runtime options - * @returns {string} - */ -export const text_to_speech = tool(textToSpeechImpl, { - name: "text_to_speech", - description: - "Convert text to speech using OpenAI TTS (tts-1). Saves audio as MP3 to ~/voice-memos/[timestamp]_[voice].mp3 and returns a MEDIA: path. Requires OPENAI_API_KEY environment variable.", - schema: z.object({ - text: z.string().min(1).max(4096).describe("Text to convert to speech"), - voice: z - .enum(["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"]) - .default("alloy") - .describe("Voice to use for speech synthesis"), - model: z.enum(["tts-1", "tts-1-hd"]).default("tts-1").describe("TTS model (default: tts-1)"), - speed: z.number().min(0.25).max(4).default(1).describe("Speaking speed (0.25-4.0)"), - }), -}); - -// --- Factory functions for creating tools with runtime options --- - -/** - * Create a text_to_speech tool with runtime options - * @param {object} options - Runtime options - * @returns {object} LangChain Tool instance - */ -export function createTtsTool(options) { - return tool((input) => textToSpeechImpl(input, options), { - name: "text_to_speech", - description: - "Convert text to speech using OpenAI TTS (tts-1). Saves audio as MP3 to ~/voice-memos/.", - schema: z.object({ - text: z.string().min(1).max(4096).describe("Text to convert to speech"), - voice: z - .enum([ - "alloy", - "ash", - "ballad", - "coral", - "echo", - "fable", - "onyx", - "nova", - "sage", - "shimmer", - ]) - .default("alloy") - .describe("Voice to use for speech synthesis"), - model: z.enum(["tts-1", "tts-1-hd"]).default("tts-1").describe("TTS model"), - speed: z.number().min(0.25).max(4).default(1).describe("Speaking speed (0.25-4.0)"), - }), - }); -} diff --git a/src/tools/vision.js b/src/tools/vision.js deleted file mode 100644 index d971e50..0000000 --- a/src/tools/vision.js +++ /dev/null @@ -1,196 +0,0 @@ -import { tool } from "@langchain/core/tools"; -import { z } from "zod"; -import { ChatOpenAI } from "@langchain/openai"; - -const MAX_IMAGE_SIZE = 4 * 1024 * 1024; // 4MB - -const analysisPrompt = - "Describe this image in detail. Include what is visible, any text present, the overall context, and notable features."; - -/** - * Decode a base64 data URI into raw bytes. - * @param {string} dataUri - Data URI string (e.g., "data:image/png;base64,...") - * @returns {string} Base64 string without the prefix - */ -export function decodeDataUri(dataUri) { - const match = dataUri.match(/^data:([^;]+);base64,(.+)$/); - if (!match) return null; - return match[2]; -} - -/** - * Convert an ArrayBuffer to base64. - * @param {ArrayBuffer} buffer - * @returns {string} - */ -export function arrayBufferToBase64(buffer) { - let binary = ""; - const bytes = new Uint8Array(buffer); - const chunkSize = 8192; - for (let i = 0; i < bytes.length; i += chunkSize) { - binary += String.fromCharCode.apply(null, bytes.subarray(i, i + chunkSize)); - } - return btoa(binary); -} - -/** - * Fetch an image from a URL and return base64-encoded data. - * @param {string} url - Image URL - * @returns {Promise<{ ok: boolean, base64?: string, mimeType?: string, error?: string }>} - */ -async function fetchImageFromUrl(url) { - const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), 10000); - try { - const resp = await fetch(url, { signal: controller.signal }); - clearTimeout(timeoutId); - - if (!resp.ok) { - return { ok: false, error: `HTTP ${resp.status}: ${resp.statusText}` }; - } - - const blob = await resp.blob(); - const size = blob.size; - if (size > MAX_IMAGE_SIZE) { - return { - ok: false, - error: `Image exceeds ${MAX_IMAGE_SIZE / (1024 * 1024)}MB limit (${(size / (1024 * 1024)).toFixed(1)}MB)`, - }; - } - - const arrayBuffer = await blob.arrayBuffer(); - return { - ok: true, - base64: arrayBufferToBase64(arrayBuffer), - mimeType: blob.type || "image/jpeg", - }; - } catch (err) { - clearTimeout(timeoutId); - return { ok: false, error: `Image fetch failed: ${err.message}` }; - } -} - -/** - * Analyze an image by sending it to the configured multimodal LLM. - * @param {object} input - Tool input with url or dataUri - * @param {object} _options - Runtime options (unused) - * @returns {Promise} JSON result string - */ -export async function visionAnalyzeImpl(input, _options) { - const { url, dataUri, prompt: _prompt } = input; - - if (!url && !dataUri) { - return JSON.stringify({ ok: false, error: "Either url or dataUri is required" }); - } - - const apiKey = process.env.OPENAI_API_KEY; - if (!apiKey) { - return JSON.stringify({ - ok: false, - error: "OPENAI_API_KEY is required for vision analysis", - }); - } - - let base64, mimeType; - - if (url) { - const result = await fetchImageFromUrl(url); - if (!result.ok) { - return JSON.stringify({ ok: false, error: result.error }); - } - base64 = result.base64; - mimeType = result.mimeType; - } else if (dataUri) { - const decoded = decodeDataUri(dataUri); - if (!decoded) { - return JSON.stringify({ - ok: false, - error: "Invalid data URI format. Expected: data:image/xxx;base64,...", - }); - } - // Estimate size from base64 length - const estimatedBytes = Math.floor(decoded.length * 0.75); - if (estimatedBytes > MAX_IMAGE_SIZE) { - return JSON.stringify({ - ok: false, - error: `Image exceeds ${MAX_IMAGE_SIZE / (1024 * 1024)}MB limit`, - }); - } - base64 = decoded; - mimeType = dataUri.split(";")[0].split(":")[1] || "image/jpeg"; - } - - const model = new ChatOpenAI({ - model: "gpt-4o", - configuration: { apiKey }, - maxTokens: 1024, - temperature: 0.2, - }); - - try { - const response = await model.invoke([ - { - role: "user", - content: [ - { type: "text", text: analysisPrompt }, - { - type: "image_url", - image_url: { url: `data:${mimeType};base64,${base64}` }, - }, - ], - }, - ]); - - // node:coverage ignore next - const text = response.content; - return JSON.stringify({ - ok: true, - analysis: typeof text === "string" ? text : JSON.stringify(text), - source: url || "dataUri", - }); - } catch (err) { - return JSON.stringify({ ok: false, error: `LLM analysis failed: ${err.message}` }); - } -} - -/** - * @param {z.infer} input - Tool input with url or dataUri - * @param {object} _options - Runtime options - * @returns {string} JSON result string - */ -export const vision_analyze = tool(visionAnalyzeImpl, { - name: "vision_analyze", - description: - "Analyze an image by sending it to a multimodal LLM. Accepts a URL or base64 data URI. The image is fetched, validated (max 4MB), and sent to GPT-4o for description or answering a specific question about the image.", - schema: z.object({ - url: z.string().url().optional().describe("URL of the image to analyze"), - dataUri: z.string().optional().describe("Base64 data URI (data:image/png;base64,...)"), - prompt: z - .string() - .optional() - .describe("Question or instruction about the image (default: describe the image)"), - }), -}); - -// --- Factory functions for creating tools with runtime options --- - -/** - * Create a vision_analyze tool with runtime options - * @param {object} options - Runtime options - * @returns {object} LangChain Tool instance - */ -export function createVisionTool(options) { - return tool((input) => visionAnalyzeImpl(input, options), { - name: "vision_analyze", - description: - "Analyze an image by sending it to a multimodal LLM. Accepts a URL or base64 data URI.", - schema: z.object({ - url: z.string().url().optional().describe("URL of the image to analyze"), - dataUri: z.string().optional().describe("Base64 data URI (data:image/png;base64,...)"), - prompt: z - .string() - .optional() - .describe("Question or instruction about the image (default: describe the image)"), - }), - }); -} diff --git a/tests/unit/tool_index.test.js b/tests/unit/tool_index.test.js index a83eb5a..3aec31c 100644 --- a/tests/unit/tool_index.test.js +++ b/tests/unit/tool_index.test.js @@ -1,4 +1,4 @@ -import { describe, it, beforeEach, afterEach } from "node:test"; +import { describe, it } from "node:test"; import assert from "node:assert"; describe("tools - buildToolConfig", () => { @@ -47,29 +47,6 @@ describe("tools - buildToolConfig", () => { }); describe("tools - buildToolConfig", () => { - let _origEnv; - - beforeEach(() => { - _origEnv = { - OPENAI_API_KEY: process.env.OPENAI_API_KEY, - OPENROUTER_API_KEY: process.env.OPENROUTER_API_KEY, - FAL_API_KEY: process.env.FAL_API_KEY, - }; - delete process.env.OPENAI_API_KEY; - delete process.env.OPENROUTER_API_KEY; - delete process.env.FAL_API_KEY; - }); - - afterEach(() => { - if (_origEnv.OPENAI_API_KEY !== undefined) process.env.OPENAI_API_KEY = _origEnv.OPENAI_API_KEY; - else delete process.env.OPENAI_API_KEY; - if (_origEnv.OPENROUTER_API_KEY !== undefined) - process.env.OPENROUTER_API_KEY = _origEnv.OPENROUTER_API_KEY; - else delete process.env.OPENROUTER_API_KEY; - if (_origEnv.FAL_API_KEY !== undefined) process.env.FAL_API_KEY = _origEnv.FAL_API_KEY; - else delete process.env.FAL_API_KEY; - }); - it("returns only clarify and execute_code with empty permissions", async () => { const { buildToolConfig } = await import("../../src/tools/index.js"); const tools = await buildToolConfig({ permissions: [], maxReadSize: "1mb" }); @@ -121,7 +98,7 @@ describe("tools - buildToolConfig", () => { const toolNames = tools.map((t) => t.name); // Tier 1: 12 tools (all register with filesystem+process perms) // Tier 2: execute_code (no perms), cronjob (network:outbound) - // No API keys: web_search/web_extract/vision_analyze/image_generate won't register + // No API keys: web_search/web_extract won't register assert.ok(toolNames.length >= 13, "All tier 1 + tier 2 tools should register"); assert.ok(toolNames.includes("terminal"), "terminal should register"); assert.ok(toolNames.includes("process"), "process should register"); diff --git a/tests/unit/tool_registration.test.js b/tests/unit/tool_registration.test.js deleted file mode 100644 index 1d56bdc..0000000 --- a/tests/unit/tool_registration.test.js +++ /dev/null @@ -1,217 +0,0 @@ -import { describe, it, before, after } from "node:test"; -import assert from "node:assert"; -import { buildToolConfig } from "../../src/tools/index.js"; - -describe("tool registration - integration", () => { - let _origPermissions, _origEnvVars; - - before(() => { - _origPermissions = process.env.SANDBOX_PERMISSIONS; - _origEnvVars = { - OPENAI_API_KEY: process.env.OPENAI_API_KEY, - OPENROUTER_API_KEY: process.env.OPENROUTER_API_KEY, - EXA_API_KEY: process.env.EXA_API_KEY, - FIRECRAWL_API_KEY: process.env.FIRECRAWL_API_KEY, - TAVILY_API_KEY: process.env.TAVILY_API_KEY, - PARALLEL_API_KEY: process.env.PARALLEL_API_KEY, - SEARXNG_URL: process.env.SEARXNG_URL, - BING_API_KEY: process.env.BING_API_KEY, - CUSTOM_SEARCH_URL: process.env.CUSTOM_SEARCH_URL, - FAL_API_KEY: process.env.FAL_API_KEY, - }; - }); - - after(() => { - process.env.OPENAI_API_KEY = _origEnvVars.OPENAI_API_KEY; - process.env.OPENROUTER_API_KEY = _origEnvVars.OPENROUTER_API_KEY; - process.env.EXA_API_KEY = _origEnvVars.EXA_API_KEY; - process.env.FIRECRAWL_API_KEY = _origEnvVars.FIRECRAWL_API_KEY; - process.env.TAVILY_API_KEY = _origEnvVars.TAVILY_API_KEY; - process.env.PARALLEL_API_KEY = _origEnvVars.PARALLEL_API_KEY; - process.env.SEARXNG_URL = _origEnvVars.SEARXNG_URL; - process.env.BING_API_KEY = _origEnvVars.BING_API_KEY; - process.env.CUSTOM_SEARCH_URL = _origEnvVars.CUSTOM_SEARCH_URL; - process.env.FAL_API_KEY = _origEnvVars.FAL_API_KEY; - }); - - it("registers clarifying (no permissions) andTier 1 tools with permissions", async () => { - delete process.env.OPENAI_API_KEY; - const tools = await buildToolConfig({ permissions: ["filesystem:read", "filesystem:write"] }); - const toolNames = tools.map((t) => t.name); - assert.ok(toolNames.includes("clarify")); // Always registered - assert.ok(toolNames.includes("read_file")); - assert.ok(!toolNames.includes("web_search")); // needs network:outbound - assert.ok(!toolNames.includes("vision_analyze")); // needs OPENAI_API_KEY - }); - - it("registers web tools when network:outbound and search key set", async () => { - process.env.EXA_API_KEY = "sk-test-exa"; - const tools = await buildToolConfig({ - permissions: [ - "network:outbound", - "filesystem:read", - "filesystem:write", - "filesystem:exec", - "process:spawn", - ], - }); - const toolNames = tools.map((t) => t.name); - assert.ok(toolNames.includes("web_search")); - assert.ok(toolNames.includes("web_extract")); - delete process.env.EXA_API_KEY; - }); - - it("registers web tools when SEARXNG_URL is set", async () => { - process.env.SEARXNG_URL = "http://searxng.local"; - const tools = await buildToolConfig({ - permissions: ["network:outbound"], - }); - const toolNames = tools.map((t) => t.name); - assert.ok(toolNames.includes("web_search")); - assert.ok(toolNames.includes("web_extract")); - delete process.env.SEARXNG_URL; - }); - - it("registers web tools when BING_API_KEY is set", async () => { - process.env.BING_API_KEY = "sk-bing"; - const tools = await buildToolConfig({ - permissions: ["network:outbound"], - }); - const toolNames = tools.map((t) => t.name); - assert.ok(toolNames.includes("web_search")); - assert.ok(toolNames.includes("web_extract")); - delete process.env.BING_API_KEY; - }); - - it("registers web tools when CUSTOM_SEARCH_URL is set", async () => { - process.env.CUSTOM_SEARCH_URL = "http://custom.local/search"; - const tools = await buildToolConfig({ - permissions: ["network:outbound"], - }); - const toolNames = tools.map((t) => t.name); - assert.ok(toolNames.includes("web_search")); - assert.ok(toolNames.includes("web_extract")); - delete process.env.CUSTOM_SEARCH_URL; - }); - - it("does not register web tools without any search key", async () => { - delete process.env.EXA_API_KEY; - delete process.env.FIRECRAWL_API_KEY; - delete process.env.TAVILY_API_KEY; - delete process.env.PARALLEL_API_KEY; - delete process.env.SEARXNG_URL; - delete process.env.BING_API_KEY; - delete process.env.CUSTOM_SEARCH_URL; - const tools = await buildToolConfig({ - permissions: ["network:outbound"], - }); - const toolNames = tools.map((t) => t.name); - assert.ok(!toolNames.includes("web_search")); - assert.ok(!toolNames.includes("web_extract")); - }); - - it("registers execute_code without permissions (sandboxed)", async () => { - const tools = await buildToolConfig({ permissions: [] }); - const toolNames = tools.map((t) => t.name); - assert.ok(toolNames.includes("execute_code")); - }); - - it("registers vision_analyze with OPENAI_API_KEY (no permission needed)", async () => { - process.env.OPENAI_API_KEY = "sk-test-openai"; - delete process.env.EXA_API_KEY; - delete process.env.FIRECRAWL_API_KEY; - delete process.env.TAVILY_API_KEY; - delete process.env.PARALLEL_API_KEY; - delete process.env.SEARXNG_URL; - delete process.env.BING_API_KEY; - delete process.env.CUSTOM_SEARCH_URL; - const tools = await buildToolConfig({ permissions: [] }); - const toolNames = tools.map((t) => t.name); - assert.ok(toolNames.includes("vision_analyze")); - delete process.env.OPENAI_API_KEY; - }); - - it("does not register vision_analyze without OPENAI_API_KEY", async () => { - delete process.env.OPENAI_API_KEY; - const tools = await buildToolConfig({ permissions: [] }); - const toolNames = tools.map((t) => t.name); - assert.ok(!toolNames.includes("vision_analyze")); - }); - - it("registers image_generate with network:outbound and FAL_API_KEY", async () => { - process.env.FAL_API_KEY = "sk-fake-fal"; - const tools = await buildToolConfig({ - permissions: ["network:outbound"], - }); - const toolNames = tools.map((t) => t.name); - assert.ok(toolNames.includes("image_generate")); - delete process.env.FAL_API_KEY; - }); - - it("does not register image_generate without FAL_API_KEY", async () => { - delete process.env.FAL_API_KEY; - const tools = await buildToolConfig({ - permissions: ["network:outbound"], - }); - const toolNames = tools.map((t) => t.name); - assert.ok(!toolNames.includes("image_generate")); - }); - - it("registers cronjob with network:outbound", async () => { - const tools = await buildToolConfig({ - permissions: ["network:outbound"], - }); - const toolNames = tools.map((t) => t.name); - assert.ok(toolNames.includes("cronjob")); - }); - - it("does not register cronjob without network:outbound", async () => { - const tools = await buildToolConfig({ permissions: [] }); - const toolNames = tools.map((t) => t.name); - assert.ok(!toolNames.includes("cronjob")); - }); - - it("registers text_to_speech with OPENAI_API_KEY (no permission needed)", async () => { - process.env.OPENAI_API_KEY = "sk-test-openai"; - delete process.env.EXA_API_KEY; - delete process.env.FIRECRAWL_API_KEY; - delete process.env.TAVILY_API_KEY; - delete process.env.PARALLEL_API_KEY; - delete process.env.SEARXNG_URL; - delete process.env.BING_API_KEY; - delete process.env.CUSTOM_SEARCH_URL; - const tools = await buildToolConfig({ permissions: [] }); - const toolNames = tools.map((t) => t.name); - assert.ok(toolNames.includes("text_to_speech")); - delete process.env.OPENAI_API_KEY; - }); - - it("does not register text_to_speech without OPENAI_API_KEY", async () => { - delete process.env.OPENAI_API_KEY; - const tools = await buildToolConfig({ permissions: [] }); - const toolNames = tools.map((t) => t.name); - assert.ok(!toolNames.includes("text_to_speech")); - }); - - it("registers mixture_of_agents with OPENROUTER_API_KEY (no permission needed)", async () => { - process.env.OPENROUTER_API_KEY = "sk-test-or"; - delete process.env.EXA_API_KEY; - delete process.env.FIRECRAWL_API_KEY; - delete process.env.TAVILY_API_KEY; - delete process.env.PARALLEL_API_KEY; - delete process.env.SEARXNG_URL; - delete process.env.BING_API_KEY; - delete process.env.CUSTOM_SEARCH_URL; - const tools = await buildToolConfig({ permissions: [] }); - const toolNames = tools.map((t) => t.name); - assert.ok(toolNames.includes("mixture_of_agents")); - delete process.env.OPENROUTER_API_KEY; - }); - - it("does not register mixture_of_agents without OPENROUTER_API_KEY", async () => { - delete process.env.OPENROUTER_API_KEY; - const tools = await buildToolConfig({ permissions: [] }); - const toolNames = tools.map((t) => t.name); - assert.ok(!toolNames.includes("mixture_of_agents")); - }); -}); diff --git a/tests/unit/tools_image.test.js b/tests/unit/tools_image.test.js deleted file mode 100644 index f94106e..0000000 --- a/tests/unit/tools_image.test.js +++ /dev/null @@ -1,110 +0,0 @@ -import { describe, it, before, after } from "node:test"; -import assert from "node:assert"; -import { imageGenerateImpl } from "../../src/tools/image.js"; - -describe("image_generate", () => { - let origFetch; - - before(() => { - origFetch = globalThis.fetch; - }); - - after(() => { - globalThis.fetch = origFetch; - }); - - it("requires prompt", async () => { - const result = await imageGenerateImpl({}, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("Prompt is required")); - }); - - it("rejects empty prompt", async () => { - const result = await imageGenerateImpl({ prompt: "" }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("Prompt is required")); - }); - - it("rejects long prompts (>1000 chars)", async () => { - const result = await imageGenerateImpl({ prompt: "a".repeat(1001) }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("1000 characters")); - }); - - it("requires FAL_API_KEY", async () => { - const result = await imageGenerateImpl({ prompt: "a cat" }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("FAL_API_KEY")); - }); - - it("accepts falApiKey parameter as override", async () => { - globalThis.fetch = async () => ({ - ok: true, - json: async () => ({ - images: [{ url: "https://fal.ai/generated.png" }], - }), - }); - const result = await imageGenerateImpl( - { prompt: "a sunset", falApiKey: "sk-fake-fal-key" }, - {}, - ); - const parsed = JSON.parse(result); - assert.ok(parsed.ok); - assert.strictEqual(parsed.imageUrl, "https://fal.ai/generated.png"); - }); - - it("calls FAL.ai with correct parameters", async () => { - globalThis.fetch = async (url, opts) => { - assert.ok(url.includes("fal.run")); - assert.ok(url.includes("flux")); - assert.ok(opts.headers.Authorization.startsWith("Key ")); - const body = JSON.parse(opts.body); - assert.strictEqual(body.sync_mode, true); - assert.strictEqual(body.image_size, "square_1_1"); - return { - ok: true, - json: async () => ({ images: [{ url: "https://fal.ai/result.png" }] }), - }; - }; - const result = await imageGenerateImpl({ prompt: "a mountain", falApiKey: "sk-test" }, {}); - const parsed = JSON.parse(result); - assert.ok(parsed.ok); - }); - - it("returns error on FAL.ai failure", async () => { - globalThis.fetch = async () => ({ - ok: false, - status: 429, - text: async () => "Rate limit exceeded", - }); - const result = await imageGenerateImpl({ prompt: "test", falApiKey: "sk-fake-key" }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("FAL.ai")); - }); - - it("returns error when FAL.ai response has no images", async () => { - globalThis.fetch = async () => ({ - ok: true, - json: async () => ({ messages: [] }), // no images key - }); - const result = await imageGenerateImpl({ prompt: "test", falApiKey: "sk-fake-key" }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("missing image")); - }); - - it("returns error when fetch throws (network failure)", async () => { - globalThis.fetch = async () => { - throw new Error("Network error"); - }; - const result = await imageGenerateImpl({ prompt: "test", falApiKey: "sk-fake-key" }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("Network error")); - }); -}); diff --git a/tests/unit/tools_moa.test.js b/tests/unit/tools_moa.test.js deleted file mode 100644 index 4863087..0000000 --- a/tests/unit/tools_moa.test.js +++ /dev/null @@ -1,160 +0,0 @@ -import { describe, it, before, after } from "node:test"; -import assert from "node:assert"; - -describe("mixture_of_agents", () => { - let origFetch, origOpenRouter; - - before(() => { - origFetch = globalThis.fetch; - origOpenRouter = process.env.OPENROUTER_API_KEY; - }); - - after(() => { - globalThis.fetch = origFetch; - process.env.OPENROUTER_API_KEY = origOpenRouter; - }); - - it("requires message", async () => { - const { mixtureOfAgentsImpl } = await import("../../src/tools/moa.js"); - const result = await mixtureOfAgentsImpl({}, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("Message is required")); - }); - - it("rejects empty message", async () => { - const { mixtureOfAgentsImpl } = await import("../../src/tools/moa.js"); - const result = await mixtureOfAgentsImpl({ message: "" }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("Message is required")); - }); - - it("requires OPENROUTER_API_KEY", async () => { - const { mixtureOfAgentsImpl } = await import("../../src/tools/moa.js"); - const result = await mixtureOfAgentsImpl({ message: "What is AI?" }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("OPENROUTER_API_KEY")); - }); - - it("calls OpenRouter with default models", async () => { - const { mixtureOfAgentsImpl } = await import("../../src/tools/moa.js"); - process.env.OPENROUTER_API_KEY = "sk-test-or"; - globalThis.fetch = async (url, opts) => { - assert.ok(url.includes("openrouter.ai")); - assert.ok(url.includes("/chat/completions")); - return { - ok: true, - json: async () => ({ - choices: [ - { message: { content: "Response from " + (opts.headers["X-Title"] || "unknown") } }, - ], - model: "openai/gpt-4o", - }), - }; - }; - const responsePromise = mixtureOfAgentsImpl({ message: "What is AI?" }, {}); - const result = JSON.parse(await responsePromise); - assert.ok(result.ok); - assert.strictEqual(result.agentsUsed, 4); - assert.ok(result.aggregation); - assert.ok(result.agreement); - globalThis.fetch = origFetch; - process.env.OPENROUTER_API_KEY = origOpenRouter; - }); - - it("aggregates partial results when some calls fail", async () => { - const { mixtureOfAgentsImpl } = await import("../../src/tools/moa.js"); - process.env.OPENROUTER_API_KEY = "sk-test-or"; - let callCount = 0; - globalThis.fetch = async () => { - callCount++; - if (callCount <= 2) { - return { - ok: true, - json: async () => ({ - choices: [{ message: { content: `Agent ${callCount} response` } }], - }), - }; - } - return { - ok: false, - status: 500, - text: async () => "Internal error", - }; - }; - const result = JSON.parse(await mixtureOfAgentsImpl({ message: "test message" }, {})); - assert.ok(result.ok); - assert.ok(result.agreement === false); - assert.strictEqual(result.agentsUsed, 2); - assert.strictEqual(result.agentsFailed, 2); - assert.ok(result.failedAgents.length > 0); - globalThis.fetch = origFetch; - process.env.OPENROUTER_API_KEY = origOpenRouter; - }); - - it("returns error when all agent calls fail", async () => { - const { mixtureOfAgentsImpl } = await import("../../src/tools/moa.js"); - process.env.OPENROUTER_API_KEY = "sk-test-or"; - globalThis.fetch = async () => ({ - ok: false, - status: 503, - text: async () => "Service unavailable", - }); - const result = JSON.parse(await mixtureOfAgentsImpl({ message: "test" }, {})); - assert.strictEqual(result.ok, false); - assert.ok(result.error.includes("All 4 model calls failed")); - globalThis.fetch = origFetch; - process.env.OPENROUTER_API_KEY = origOpenRouter; - }); - - it("rejects too few models", async () => { - const { mixtureOfAgentsImpl } = await import("../../src/tools/moa.js"); - const saved = process.env.OPENROUTER_API_KEY; - process.env.OPENROUTER_API_KEY = "sk-test-or"; - const result = JSON.parse( - await mixtureOfAgentsImpl({ message: "test", models: ["model1", "model2"] }, {}), - ); - assert.strictEqual(result.ok, false); - assert.ok(result.error.includes("At least 4 models")); - process.env.OPENROUTER_API_KEY = saved; - }); - - it("handles OpenRouter network error", async () => { - const { mixtureOfAgentsImpl } = await import("../../src/tools/moa.js"); - process.env.OPENROUTER_API_KEY = "sk-test-or"; - globalThis.fetch = async () => { - throw new Error("Network unreachable"); - }; - const result = JSON.parse(await mixtureOfAgentsImpl({ message: "test" }, {})); - assert.strictEqual(result.ok, false); - assert.ok(result.error.includes("All 4 model calls failed")); - globalThis.fetch = origFetch; - process.env.OPENROUTER_API_KEY = origOpenRouter; - }); - - it("handles partial network errors", async () => { - const { mixtureOfAgentsImpl } = await import("../../src/tools/moa.js"); - process.env.OPENROUTER_API_KEY = "sk-test-or"; - let callCount = 0; - globalThis.fetch = async () => { - callCount++; - if (callCount <= 1) { - return { - ok: true, - json: async () => ({ - choices: [{ message: { content: "Agent 1 response" } }], - }), - }; - } - throw new Error("Network error"); - }; - const result = JSON.parse(await mixtureOfAgentsImpl({ message: "test" }, {})); - assert.ok(result.ok); - assert.strictEqual(result.agentsUsed, 1); - assert.strictEqual(result.agentsFailed, 3); - globalThis.fetch = origFetch; - process.env.OPENROUTER_API_KEY = origOpenRouter; - }); -}); diff --git a/tests/unit/tools_tts.test.js b/tests/unit/tools_tts.test.js deleted file mode 100644 index 1031c80..0000000 --- a/tests/unit/tools_tts.test.js +++ /dev/null @@ -1,133 +0,0 @@ -import { describe, it, before, after } from "node:test"; -import assert from "node:assert"; - -describe("text_to_speech", () => { - let origFetch, origOpenAI; - - before(() => { - origFetch = globalThis.fetch; - origOpenAI = process.env.OPENAI_API_KEY; - }); - - after(() => { - globalThis.fetch = origFetch; - process.env.OPENAI_API_KEY = origOpenAI; - }); - - it("requires text", async () => { - const { textToSpeechImpl } = await import("../../src/tools/tts.js"); - const result = await textToSpeechImpl({}, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("Text is required")); - }); - - it("rejects empty text", async () => { - const { textToSpeechImpl } = await import("../../src/tools/tts.js"); - const result = await textToSpeechImpl({ text: "" }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("Text is required")); - }); - - it("rejects long text (>4096 chars)", async () => { - const { textToSpeechImpl } = await import("../../src/tools/tts.js"); - const result = await textToSpeechImpl({ text: "a".repeat(4097) }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("4096 characters")); - }); - - it("requires OPENAI_API_KEY", async () => { - const { textToSpeechImpl } = await import("../../src/tools/tts.js"); - const saved = process.env.OPENAI_API_KEY; - delete process.env.OPENAI_API_KEY; - const result = await textToSpeechImpl({ text: "Hello" }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("OPENAI_API_KEY")); - process.env.OPENAI_API_KEY = saved; - }); - - it("rejects invalid model", async () => { - const { textToSpeechImpl } = await import("../../src/tools/tts.js"); - const saved = process.env.OPENAI_API_KEY; - process.env.OPENAI_API_KEY = "sk-test"; - const result = await textToSpeechImpl({ text: "Hello", model: "tts-99" }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("Invalid model")); - process.env.OPENAI_API_KEY = saved; - }); - - it("rejects invalid voice", async () => { - const { textToSpeechImpl } = await import("../../src/tools/tts.js"); - const saved = process.env.OPENAI_API_KEY; - process.env.OPENAI_API_KEY = "sk-test"; - const result = await textToSpeechImpl({ text: "Hello", voice: "invalid-voice" }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("Invalid voice")); - process.env.OPENAI_API_KEY = saved; - }); - - it("calls OpenAI TTS API with correct parameters", async () => { - const { textToSpeechImpl } = await import("../../src/tools/tts.js"); - globalThis.fetch = async (url, opts) => { - assert.ok(url.includes("openai.com")); - assert.ok(url.includes("/audio/speech")); - const body = JSON.parse(opts.body); - assert.strictEqual(body.model, "tts-1"); - assert.strictEqual(body.voice, "nova"); - assert.strictEqual(body.speed, 1); - assert.strictEqual(body.input, "test speech text"); - return { - ok: true, - arrayBuffer: async () => new Uint8Array([0x00]).buffer, - }; - }; - const saved = process.env.OPENAI_API_KEY; - process.env.OPENAI_API_KEY = "sk-test"; - const result = await textToSpeechImpl( - { text: "test speech text", voice: "nova", model: "tts-1" }, - {}, - ); - const parsed = JSON.parse(result); - assert.ok(parsed.ok); - assert.ok(parsed.path.startsWith("MEDIA:")); - process.env.OPENAI_API_KEY = saved; - globalThis.fetch = origFetch; - }); - - it("returns error on API failure", async () => { - const { textToSpeechImpl } = await import("../../src/tools/tts.js"); - const saved = process.env.OPENAI_API_KEY; - process.env.OPENAI_API_KEY = "sk-test"; - globalThis.fetch = async () => ({ - ok: false, - status: 429, - text: async () => "Rate limit exceeded", - }); - const result = await textToSpeechImpl({ text: "Hello" }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("429") || parsed.error.includes("Rate")); - process.env.OPENAI_API_KEY = saved; - globalThis.fetch = origFetch; - }); - - it("handles fetch network error", async () => { - const { textToSpeechImpl } = await import("../../src/tools/tts.js"); - const saved = process.env.OPENAI_API_KEY; - process.env.OPENAI_API_KEY = "sk-test"; - globalThis.fetch = async () => { - throw new Error("ENOTFOUND"); - }; - const result = await textToSpeechImpl({ text: "Hello" }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("TTS request failed")); - globalThis.fetch = origFetch; - process.env.OPENAI_API_KEY = saved; - }); -}); diff --git a/tests/unit/tools_vision.test.js b/tests/unit/tools_vision.test.js deleted file mode 100644 index a91a261..0000000 --- a/tests/unit/tools_vision.test.js +++ /dev/null @@ -1,214 +0,0 @@ -import { describe, it, before, after, mock } from "node:test"; -import assert from "node:assert"; - -describe("vision_analyze", () => { - let origFetch, origOpenAI; - let ChatOpenAI; - - before(async () => { - origFetch = globalThis.fetch; - origOpenAI = process.env.OPENAI_API_KEY; - const openaiMod = await import("@langchain/openai"); - ChatOpenAI = openaiMod.ChatOpenAI; - }); - - after(() => { - globalThis.fetch = origFetch; - process.env.OPENAI_API_KEY = origOpenAI; - }); - - it("requires url or dataUri", async () => { - const { visionAnalyzeImpl } = await import("../../src/tools/vision.js"); - globalThis.fetch = origFetch; - const result = await visionAnalyzeImpl({}, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("url or dataUri")); - }); - - it("returns error for no OPENAI_API_KEY", async () => { - const { visionAnalyzeImpl } = await import("../../src/tools/vision.js"); - const saved = process.env.OPENAI_API_KEY; - delete process.env.OPENAI_API_KEY; - globalThis.fetch = origFetch; - const result = await visionAnalyzeImpl({ url: "https://example.com/img.jpg" }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("OPENAI_API_KEY")); - process.env.OPENAI_API_KEY = saved; - }); - - it("rejects invalid dataUri", async () => { - const { visionAnalyzeImpl } = await import("../../src/tools/vision.js"); - const saved = process.env.OPENAI_API_KEY; - process.env.OPENAI_API_KEY = "sk-test"; - globalThis.fetch = origFetch; - const result = await visionAnalyzeImpl({ dataUri: "not-a-valid-uri" }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("Invalid data URI")); - process.env.OPENAI_API_KEY = saved; - }); - - it("rejects oversized image from URL", async () => { - const { visionAnalyzeImpl } = await import("../../src/tools/vision.js"); - const saved = process.env.OPENAI_API_KEY; - process.env.OPENAI_API_KEY = "sk-test-key"; - globalThis.fetch = async () => ({ - ok: true, - blob: () => - Promise.resolve({ - size: 5 * 1024 * 1024 + 1, - type: "image/png", - }), - }); - const result = await visionAnalyzeImpl({ url: "https://example.com/large.png" }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("exceeds") || parsed.error.includes("limit")); - globalThis.fetch = origFetch; - process.env.OPENAI_API_KEY = saved; - }); - - it("estimates size from base64 for dataUri and rejects oversized", async () => { - const { visionAnalyzeImpl } = await import("../../src/tools/vision.js"); - const saved = process.env.OPENAI_API_KEY; - process.env.OPENAI_API_KEY = "sk-test-key"; - const bigBase64 = "a".repeat(5 * 1024 * 1024 * 2); - const result = await visionAnalyzeImpl({ dataUri: `data:image/png;base64,${bigBase64}` }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - globalThis.fetch = origFetch; - process.env.OPENAI_API_KEY = saved; - }); - - it("returns error when image fetch fails with HTTP error", async () => { - const { visionAnalyzeImpl } = await import("../../src/tools/vision.js"); - const saved = process.env.OPENAI_API_KEY; - process.env.OPENAI_API_KEY = "sk-test-key"; - globalThis.fetch = async () => ({ - ok: false, - status: 404, - statusText: "Not Found", - }); - const result = await visionAnalyzeImpl({ url: "https://example.com/missing.png" }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("HTTP 404")); - globalThis.fetch = origFetch; - process.env.OPENAI_API_KEY = saved; - }); - - it("returns error when image fetch throws network error", async () => { - const { visionAnalyzeImpl } = await import("../../src/tools/vision.js"); - const saved = process.env.OPENAI_API_KEY; - process.env.OPENAI_API_KEY = "sk-test-key"; - globalThis.fetch = async () => { - throw new Error("Network unreachable"); - }; - const result = await visionAnalyzeImpl({ url: "https://example.com/img.jpg" }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("Image fetch failed")); - globalThis.fetch = origFetch; - process.env.OPENAI_API_KEY = saved; - }); - - it("decodes dataUri correctly", async () => { - const { decodeDataUri } = await import("../../src/tools/vision.js"); - const result = decodeDataUri("data:image/png;base64,aGVsbG8="); - assert.strictEqual(result, "aGVsbG8="); - }); - - it("returns null for invalid dataUri prefix", async () => { - const { decodeDataUri } = await import("../../src/tools/vision.js"); - const result = decodeDataUri("data:text/plain,hello"); - assert.strictEqual(result, null); - }); - - it("returns null for non-data URI string", async () => { - const { decodeDataUri } = await import("../../src/tools/vision.js"); - const result = decodeDataUri("https://example.com/image.png"); - assert.strictEqual(result, null); - }); - - it("converts arrayBuffer to base64", async () => { - const { arrayBufferToBase64 } = await import("../../src/tools/vision.js"); - const original = "hello world"; - const encoder = new TextEncoder(); - const buffer = encoder.encode(original).buffer; - const result = arrayBufferToBase64(buffer); - const decoded = btoa(String.fromCharCode(...new Uint8Array(buffer))); - assert.strictEqual(result, decoded); - }); - - it("fetches image from URL and returns analysis result", async () => { - const { visionAnalyzeImpl } = await import("../../src/tools/vision.js"); - const saved = process.env.OPENAI_API_KEY; - process.env.OPENAI_API_KEY = "sk-test-key"; - const imageBytes = new Uint8Array([137, 80, 78, 71, 13, 10, 26, 10]); - globalThis.fetch = async () => ({ - ok: true, - blob: () => - Promise.resolve({ - size: imageBytes.length, - type: "image/png", - arrayBuffer: () => Promise.resolve(imageBytes.buffer), - }), - }); - - // Stub ChatOpenAI.invoke via module mock on the already-imported class - const fakeResponse = { content: "A cat sitting on a mat." }; - const originalInvoke = ChatOpenAI.prototype.invoke; - mock.method(ChatOpenAI.prototype, "invoke", () => Promise.resolve(fakeResponse)); - - const result = await visionAnalyzeImpl({ url: "https://example.com/img.png" }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, true); - assert.strictEqual(parsed.source, "https://example.com/img.png"); - - ChatOpenAI.prototype.invoke = originalInvoke; - globalThis.fetch = origFetch; - process.env.OPENAI_API_KEY = saved; - }); - - it("uses dataUri and returns analysis result with mocked LLM", async () => { - const { visionAnalyzeImpl } = await import("../../src/tools/vision.js"); - const saved = process.env.OPENAI_API_KEY; - process.env.OPENAI_API_KEY = "sk-test-key"; - const smallBase64 = btoa("fake image binary"); - - const fakeResponse = { - content: "A black cat on a windowsill.", - }; - const originalInvoke = ChatOpenAI.prototype.invoke; - mock.method(ChatOpenAI.prototype, "invoke", () => Promise.resolve(fakeResponse)); - - const result = await visionAnalyzeImpl({ dataUri: `data:image/png;base64,${smallBase64}` }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, true); - assert.strictEqual(parsed.analysis, "A black cat on a windowsill."); - assert.strictEqual(parsed.source, "dataUri"); - - ChatOpenAI.prototype.invoke = originalInvoke; - process.env.OPENAI_API_KEY = saved; - }); - - it("returns error when LLM analysis fails", async () => { - const { visionAnalyzeImpl } = await import("../../src/tools/vision.js"); - const saved = process.env.OPENAI_API_KEY; - process.env.OPENAI_API_KEY = "sk-test-key"; - const smallBase64 = btoa("fake image binary"); - - mock.method(ChatOpenAI.prototype, "invoke", () => - Promise.reject(new Error("API rate limited")), - ); - - const result = await visionAnalyzeImpl({ dataUri: `data:image/png;base64,${smallBase64}` }, {}); - const parsed = JSON.parse(result); - assert.strictEqual(parsed.ok, false); - assert.ok(parsed.error.includes("LLM analysis failed")); - - process.env.OPENAI_API_KEY = saved; - }); -});