diff --git a/package.json b/package.json index 7eb34cc..66fe1a9 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "publishConfig": { "access": "public" }, - "version": "1.3.8", + "version": "1.3.9", "description": "Generic bash tool for AI agents, compatible with AI SDK", "type": "module", "main": "dist/index.js", diff --git a/src/tool.integration.test.ts b/src/tool.integration.test.ts index f53ddb2..f39beb3 100644 --- a/src/tool.integration.test.ts +++ b/src/tool.integration.test.ts @@ -211,6 +211,156 @@ describe("createBashTool integration", () => { expect(result.content).toBe('export const hello = "world";'); }); + + it("applies outputFilter to file content", async () => { + const { tools } = await createBashTool({ + files: { + "multiline.txt": "line1\nline2\nline3\nline4\nline5", + }, + }); + + assert(tools.readFile.execute, "readFile.execute should be defined"); + const result = (await tools.readFile.execute( + { path: "multiline.txt", outputFilter: "tail -2" }, + opts, + )) as { content: string }; + + expect(result.content.trim()).toBe("line4\nline5"); + }); + + it("applies grep filter to file content", async () => { + const { tools } = await createBashTool({ + files: { + "log.txt": "INFO: started\nERROR: failed\nINFO: done\nERROR: timeout", + }, + }); + + assert(tools.readFile.execute, "readFile.execute should be defined"); + const result = (await tools.readFile.execute( + { path: "log.txt", outputFilter: "grep ERROR" }, + opts, + )) as { content: string }; + + expect(result.content).toContain("ERROR: failed"); + expect(result.content).toContain("ERROR: timeout"); + expect(result.content).not.toContain("INFO"); + }); + }); + + describe("outputFilter", () => { + it("filters bash command output with tail", async () => { + const { tools } = await createBashTool({ + files: { + "numbers.txt": "1\n2\n3\n4\n5\n6\n7\n8\n9\n10", + }, + }); + + assert(tools.bash.execute, "bash.execute should be defined"); + const result = (await tools.bash.execute( + { command: "cat numbers.txt", outputFilter: "tail -3" }, + opts, + )) as CommandResult; + + expect(result.exitCode).toBe(0); + expect(result.stdout.trim()).toBe("8\n9\n10"); + }); + + it("filters bash command output with grep", async () => { + const { tools } = await createBashTool({ + files: testFiles, + }); + + assert(tools.bash.execute, "bash.execute should be defined"); + const result = (await tools.bash.execute( + { command: "find . -type f", outputFilter: "grep -E '\\.ts$'" }, + opts, + )) as CommandResult; + + expect(result.exitCode).toBe(0); + expect(result.stdout).toContain("index.ts"); + expect(result.stdout).toContain("helpers.ts"); + expect(result.stdout).not.toContain("package.json"); + }); + + it("filters bash command output with head", async () => { + const { tools } = await createBashTool({ + files: { + "data.txt": "a\nb\nc\nd\ne", + }, + }); + + assert(tools.bash.execute, "bash.execute should be defined"); + const result = (await tools.bash.execute( + { command: "cat data.txt", outputFilter: "head -2" }, + opts, + )) as CommandResult; + + expect(result.exitCode).toBe(0); + expect(result.stdout.trim()).toBe("a\nb"); + }); + }); + + describe("invocation logging", () => { + it("stores full output and returns filtered output", async () => { + const { tools } = await createBashTool({ + files: { + "numbers.txt": "1\n2\n3\n4\n5", + }, + enableInvocationLog: true, + }); + + assert(tools.bash.execute, "bash.execute should be defined"); + assert(tools.readFile.execute, "readFile.execute should be defined"); + + const result = (await tools.bash.execute( + { command: "cat numbers.txt", outputFilter: "tail -2" }, + opts, + )) as CommandResult & { invocationLogPath: string }; + + // Filtered output returned + expect(result.stdout.trim()).toBe("4\n5"); + expect(result.invocationLogPath).toMatch(/\.invocation$/); + + // Full output available in log + const logResult = (await tools.readFile.execute( + { path: result.invocationLogPath }, + opts, + )) as { content: string }; + + expect(logResult.content).toContain("1\n2\n3\n4\n5"); + }); + + it("allows re-filtering invocation log", async () => { + const { tools } = await createBashTool({ + files: { + "log.txt": + "INFO: start\nERROR: fail1\nINFO: middle\nERROR: fail2\nINFO: end", + }, + enableInvocationLog: true, + }); + + assert(tools.bash.execute, "bash.execute should be defined"); + assert(tools.readFile.execute, "readFile.execute should be defined"); + + // First, get last 2 lines + const result = (await tools.bash.execute( + { command: "cat log.txt", outputFilter: "tail -2" }, + opts, + )) as CommandResult & { invocationLogPath: string }; + + expect(result.stdout).toContain("fail2"); + expect(result.stdout).toContain("end"); + + // Now re-filter the full log for errors only + const errorResult = (await tools.readFile.execute( + { path: result.invocationLogPath, outputFilter: "grep ERROR" }, + opts, + )) as { content: string }; + + expect(errorResult.content).toContain("ERROR: fail1"); + expect(errorResult.content).toContain("ERROR: fail2"); + expect(errorResult.content).not.toContain("INFO"); + }); }); describe("writeFile tool", () => { diff --git a/src/tool.test.ts b/src/tool.test.ts index b5adad2..9dd37c9 100644 --- a/src/tool.test.ts +++ b/src/tool.test.ts @@ -361,6 +361,15 @@ describe("createBashTool", () => { }); }); +// Common description sections for tests +const OUTPUT_FILTERING_SECTION = `OUTPUT FILTERING: +Use the outputFilter parameter to filter stdout before it is returned. +Examples: + outputFilter: "tail -50" # Last 50 lines + outputFilter: "head -100" # First 100 lines + outputFilter: "grep error" # Lines containing "error" + outputFilter: "grep -i warn" # Case-insensitive search`; + describe("createBashTool tool prompt integration", () => { beforeEach(() => { for (const key of Object.keys(mockFiles)) { @@ -389,7 +398,9 @@ Common operations: ls -la # List files with details find . -name '*.ts' # Find files by pattern grep -r 'pattern' . # Search file contents - cat # View file contents`); + cat # View file contents + +${OUTPUT_FILTERING_SECTION}`); }); it("includes format-specific hints for JSON files", async () => { @@ -414,7 +425,9 @@ Common operations: ls -la # List files with details find . -name '*.ts' # Find files by pattern grep -r 'pattern' . # Search file contents - cat # View file contents`); + cat # View file contents + +${OUTPUT_FILTERING_SECTION}`); }); it("includes format-specific hints for YAML files", async () => { @@ -439,7 +452,9 @@ Common operations: ls -la # List files with details find . -name '*.ts' # Find files by pattern grep -r 'pattern' . # Search file contents - cat # View file contents`); + cat # View file contents + +${OUTPUT_FILTERING_SECTION}`); }); it("includes format-specific hints for multiple formats", async () => { @@ -471,7 +486,9 @@ Common operations: ls -la # List files with details find . -name '*.ts' # Find files by pattern grep -r 'pattern' . # Search file contents - cat # View file contents`); + cat # View file contents + +${OUTPUT_FILTERING_SECTION}`); }); it("includes yq for CSV when using just-bash sandbox", async () => { @@ -497,7 +514,9 @@ Common operations: ls -la # List files with details find . -name '*.ts' # Find files by pattern grep -r 'pattern' . # Search file contents - cat # View file contents`); + cat # View file contents + +${OUTPUT_FILTERING_SECTION}`); }); it("includes extraInstructions after tool prompt", async () => { @@ -524,6 +543,8 @@ Common operations: grep -r 'pattern' . # Search file contents cat # View file contents +${OUTPUT_FILTERING_SECTION} + Always use TypeScript.`); }); @@ -563,7 +584,9 @@ Common operations: ls -la # List files with details find . -name '*.ts' # Find files by pattern grep -r 'pattern' . # Search file contents - cat # View file contents`); + cat # View file contents + +${OUTPUT_FILTERING_SECTION}`); }); it("uses empty string toolPrompt to disable tool hints", async () => { @@ -588,7 +611,9 @@ Common operations: ls -la # List files with details find . -name '*.ts' # Find files by pattern grep -r 'pattern' . # Search file contents - cat # View file contents`); + cat # View file contents + +${OUTPUT_FILTERING_SECTION}`); }); it("combines custom toolPrompt with extraInstructions", async () => { @@ -618,6 +643,8 @@ Common operations: grep -r 'pattern' . # Search file contents cat # View file contents +${OUTPUT_FILTERING_SECTION} + Always run tests first.`); }); }); diff --git a/src/tool.ts b/src/tool.ts index 655e485..bb8d038 100644 --- a/src/tool.ts +++ b/src/tool.ts @@ -198,6 +198,8 @@ export async function createBashTool( onBeforeBashCall: options.onBeforeBashCall, onAfterBashCall: options.onAfterBashCall, maxOutputLength: options.maxOutputLength, + enableInvocationLog: options.enableInvocationLog, + invocationLogPath: options.invocationLogPath, }); const tools = { diff --git a/src/tools/bash.test.ts b/src/tools/bash.test.ts index 8f65f33..aecd76c 100644 --- a/src/tools/bash.test.ts +++ b/src/tools/bash.test.ts @@ -1,5 +1,10 @@ -import { describe, expect, it, vi } from "vitest"; -import { createBashExecuteTool, DEFAULT_MAX_OUTPUT_LENGTH } from "./bash.js"; +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { + createBashExecuteTool, + DEFAULT_INVOCATION_LOG_PATH, + DEFAULT_MAX_OUTPUT_LENGTH, + parseInvocationLog, +} from "./bash.js"; // Mock AI SDK vi.mock("ai", () => ({ @@ -10,14 +15,31 @@ vi.mock("ai", () => ({ })), })); -const mockSandbox = { - executeCommand: vi.fn(), - readFile: vi.fn(), - writeFiles: vi.fn(), - stop: vi.fn(), -}; +function createMockSandbox() { + return { + executeCommand: vi.fn(), + readFile: vi.fn(), + writeFiles: vi.fn(), + stop: vi.fn(), + }; +} + +let mockSandbox = createMockSandbox(); + +// Common description sections for tests +const OUTPUT_FILTERING_SECTION = `OUTPUT FILTERING: +Use the outputFilter parameter to filter stdout before it is returned. +Examples: + outputFilter: "tail -50" # Last 50 lines + outputFilter: "head -100" # First 100 lines + outputFilter: "grep error" # Lines containing "error" + outputFilter: "grep -i warn" # Case-insensitive search`; describe("createBashExecuteTool", () => { + beforeEach(() => { + mockSandbox = createMockSandbox(); + }); + it("generates description with cwd only", () => { const tool = createBashExecuteTool({ sandbox: mockSandbox, @@ -35,7 +57,9 @@ Common operations: ls -la # List files with details find . -name '*.ts' # Find files by pattern grep -r 'pattern' . # Search file contents - cat # View file contents`); + cat # View file contents + +${OUTPUT_FILTERING_SECTION}`); }); it("generates description with files list", () => { @@ -61,7 +85,9 @@ Common operations: ls -la # List files with details find . -name '*.ts' # Find files by pattern grep -r 'pattern' . # Search file contents - cat # View file contents`); + cat # View file contents + +${OUTPUT_FILTERING_SECTION}`); }); it("generates description with truncated files list when more than 8", () => { @@ -105,7 +131,9 @@ Common operations: ls -la # List files with details find . -name '*.ts' # Find files by pattern grep -r 'pattern' . # Search file contents - cat # View file contents`); + cat # View file contents + +${OUTPUT_FILTERING_SECTION}`); }); it("generates description with extra instructions", () => { @@ -128,6 +156,8 @@ Common operations: grep -r 'pattern' . # Search file contents cat # View file contents +${OUTPUT_FILTERING_SECTION} + Focus on TypeScript files only.`); }); @@ -156,6 +186,8 @@ Common operations: grep -r 'pattern' . # Search file contents cat # View file contents +${OUTPUT_FILTERING_SECTION} + This is a Python project.`); }); @@ -177,7 +209,9 @@ Common operations: ls -la # List files with details find . -name '*.ts' # Find files by pattern grep -r 'pattern' . # Search file contents - cat # View file contents`); + cat # View file contents + +${OUTPUT_FILTERING_SECTION}`); }); it("truncates stdout when exceeding maxOutputLength", async () => { @@ -388,4 +422,218 @@ Common operations: `modified: ${"x".repeat(100)}\n\n[stdout truncated: 50 characters removed]`, ); }); + + describe("invocation logging", () => { + it("writes invocation log and returns path when enableInvocationLog is true", async () => { + mockSandbox.executeCommand.mockResolvedValue({ + stdout: "hello world", + stderr: "", + exitCode: 0, + }); + + const tool = createBashExecuteTool({ + sandbox: mockSandbox, + cwd: "/workspace", + enableInvocationLog: true, + }); + + // biome-ignore lint/style/noNonNullAssertion: test mock + const result = (await tool.execute!( + { command: "echo hello" }, + {} as never, + )) as { + stdout: string; + invocationLogPath: string; + }; + + // Should create directory and write file + expect(mockSandbox.executeCommand).toHaveBeenCalledWith( + `mkdir -p "/workspace/${DEFAULT_INVOCATION_LOG_PATH}"`, + ); + expect(mockSandbox.writeFiles).toHaveBeenCalled(); + + const writeCall = mockSandbox.writeFiles.mock.calls[0][0][0]; + expect(writeCall.path).toMatch( + /\/workspace\/.bash-tool\/commands\/.*\.invocation$/, + ); + + const logContent = parseInvocationLog(writeCall.content); + expect(logContent.command).toBe("echo hello"); + expect(logContent.stdout).toBe("hello world"); + expect(logContent.exitCode).toBe(0); + + // Response should include the log path + expect(result.invocationLogPath).toMatch( + /\/workspace\/.bash-tool\/commands\/.*\.invocation$/, + ); + expect(result.invocationLogPath).toBe(writeCall.path); + }); + + it("does not write invocation log or return path when enableInvocationLog is false", async () => { + mockSandbox.executeCommand.mockResolvedValue({ + stdout: "hello", + stderr: "", + exitCode: 0, + }); + + const tool = createBashExecuteTool({ + sandbox: mockSandbox, + cwd: "/workspace", + enableInvocationLog: false, + }); + + // biome-ignore lint/style/noNonNullAssertion: test mock + const result = (await tool.execute!( + { command: "echo hello" }, + {} as never, + )) as { + stdout: string; + invocationLogPath?: string; + }; + + expect(mockSandbox.writeFiles).not.toHaveBeenCalled(); + expect(result.invocationLogPath).toBeUndefined(); + }); + + it("uses custom invocationLogPath and returns it", async () => { + mockSandbox.executeCommand.mockResolvedValue({ + stdout: "test", + stderr: "", + exitCode: 0, + }); + + const tool = createBashExecuteTool({ + sandbox: mockSandbox, + cwd: "/workspace", + enableInvocationLog: true, + invocationLogPath: "custom/logs", + }); + + // biome-ignore lint/style/noNonNullAssertion: test mock + const result = (await tool.execute!( + { command: "test" }, + {} as never, + )) as { + invocationLogPath: string; + }; + + expect(mockSandbox.executeCommand).toHaveBeenCalledWith( + 'mkdir -p "/workspace/custom/logs"', + ); + + const writeCall = mockSandbox.writeFiles.mock.calls[0][0][0]; + expect(writeCall.path).toMatch( + /\/workspace\/custom\/logs\/.*\.invocation$/, + ); + expect(result.invocationLogPath).toBe(writeCall.path); + }); + + it("includes outputFilter in invocation log and returns path", async () => { + // With outputFilter, a single combined bash script is executed + mockSandbox.executeCommand.mockResolvedValueOnce({ + stdout: "line3", // filtered output + stderr: "", + exitCode: 0, + }); + + const tool = createBashExecuteTool({ + sandbox: mockSandbox, + cwd: "/workspace", + enableInvocationLog: true, + }); + + // biome-ignore lint/style/noNonNullAssertion: test mock + const result = (await tool.execute!( + { command: "echo test", outputFilter: "tail -1" }, + {} as never, + )) as { + stdout: string; + invocationLogPath: string; + }; + + // With outputFilter, invocation log is written via bash script (not writeFiles) + expect(mockSandbox.writeFiles).not.toHaveBeenCalled(); + // The log path should still be returned + expect(result.invocationLogPath).toMatch( + /\/workspace\/.bash-tool\/commands\/.*\.invocation$/, + ); + // Filtered output should be returned + expect(result.stdout).toBe("line3"); + // Single executeCommand call for the combined script + expect(mockSandbox.executeCommand).toHaveBeenCalledTimes(1); + }); + }); + + describe("output filtering", () => { + it("applies outputFilter to stdout", async () => { + // With outputFilter, a single combined bash script is executed + // that returns filtered output directly + mockSandbox.executeCommand.mockResolvedValueOnce({ + stdout: "line3", // filtered output from the combined script + stderr: "", + exitCode: 0, + }); + + const tool = createBashExecuteTool({ + sandbox: mockSandbox, + cwd: "/workspace", + }); + + // biome-ignore lint/style/noNonNullAssertion: test mock + const result = (await tool.execute!( + { command: "cat file", outputFilter: "tail -1" }, + {} as never, + )) as { stdout: string }; + + expect(result.stdout).toBe("line3"); + // Single executeCommand call for the combined script + expect(mockSandbox.executeCommand).toHaveBeenCalledTimes(1); + }); + + it("returns filter exit code when filter fails", async () => { + // When filter fails, the combined script exits with filter's exit code + mockSandbox.executeCommand.mockResolvedValueOnce({ + stdout: "", + stderr: "filter failed", + exitCode: 1, + }); + + const tool = createBashExecuteTool({ + sandbox: mockSandbox, + cwd: "/workspace", + }); + + // biome-ignore lint/style/noNonNullAssertion: test mock + const result = (await tool.execute!( + { command: "echo test", outputFilter: "invalid-filter" }, + {} as never, + )) as { stdout: string; stderr: string; exitCode: number }; + + expect(result.exitCode).toBe(1); + expect(result.stderr).toBe("filter failed"); + }); + + it("does not apply filter when outputFilter is not provided", async () => { + mockSandbox.executeCommand.mockResolvedValue({ + stdout: "original output", + stderr: "", + exitCode: 0, + }); + + const tool = createBashExecuteTool({ + sandbox: mockSandbox, + cwd: "/workspace", + }); + + // biome-ignore lint/style/noNonNullAssertion: test mock + const result = (await tool.execute!( + { command: "echo test" }, + {} as never, + )) as { stdout: string }; + + expect(result.stdout).toBe("original output"); + // Should only have one executeCommand call (the actual command) + expect(mockSandbox.executeCommand).toHaveBeenCalledTimes(1); + }); + }); }); diff --git a/src/tools/bash.ts b/src/tools/bash.ts index 813cb41..30008bc 100644 --- a/src/tools/bash.ts +++ b/src/tools/bash.ts @@ -1,3 +1,4 @@ +import nodePath from "node:path"; import { tool } from "ai"; import { z } from "zod"; import type { @@ -8,8 +9,128 @@ import type { Sandbox, } from "../types.js"; +/** Default path for invocation log files */ +export const DEFAULT_INVOCATION_LOG_PATH = ".bash-tool/commands"; + +/** + * Structure of an invocation log file (parsed form) + */ +export interface InvocationLog { + timestamp: string; + command: string; + exitCode: number; + stdout: string; + stderr: string; + outputFilter?: string; +} + +/** + * Format an invocation log as a grep/tail-friendly text format. + * Format: + * ``` + * # timestamp: 2024-01-15T10:30:45.123Z + * # command: ls -la + * # exitCode: 0 + * # outputFilter: tail -10 + * ---STDOUT--- + * + * ---STDERR--- + * + * ``` + */ +function formatInvocationLog(log: InvocationLog): string { + const lines: string[] = [ + `# timestamp: ${log.timestamp}`, + `# command: ${log.command}`, + `# exitCode: ${log.exitCode}`, + ]; + if (log.outputFilter) { + lines.push(`# outputFilter: ${log.outputFilter}`); + } + lines.push("---STDOUT---"); + lines.push(log.stdout); + lines.push("---STDERR---"); + lines.push(log.stderr); + return lines.join("\n"); +} + +/** + * Parse an invocation log from text format. + * Throws if the format is invalid (missing required sections). + */ +export function parseInvocationLog(content: string): InvocationLog { + const lines = content.split("\n"); + const log: InvocationLog = { + timestamp: "", + command: "", + exitCode: 0, + stdout: "", + stderr: "", + }; + + let section: "header" | "stdout" | "stderr" = "header"; + const stdoutLines: string[] = []; + const stderrLines: string[] = []; + let hasStdoutSection = false; + let hasStderrSection = false; + + for (const line of lines) { + if (line === "---STDOUT---") { + section = "stdout"; + hasStdoutSection = true; + continue; + } + if (line === "---STDERR---") { + section = "stderr"; + hasStderrSection = true; + continue; + } + + if (section === "header" && line.startsWith("# ")) { + const match = line.match(/^# (\w+): (.*)$/); + if (match) { + const [, key, value] = match; + if (key === "timestamp") log.timestamp = value; + else if (key === "command") log.command = value; + else if (key === "exitCode") log.exitCode = Number.parseInt(value, 10); + else if (key === "outputFilter") log.outputFilter = value; + } + } else if (section === "stdout") { + stdoutLines.push(line); + } else if (section === "stderr") { + stderrLines.push(line); + } + } + + // Validate that we found the required sections + if (!hasStdoutSection || !hasStderrSection) { + throw new Error("Invalid invocation log format: missing required sections"); + } + + log.stdout = stdoutLines.join("\n"); + log.stderr = stderrLines.join("\n"); + + return log; +} + +/** + * Generates a filesystem-safe timestamp for invocation log filenames. + * Replaces colons with dashes to avoid filesystem issues. + */ +function generateInvocationFilename(): string { + const timestamp = new Date().toISOString().replace(/:/g, "-"); + return `${timestamp}.invocation`; +} + const bashSchema = z.object({ command: z.string().describe("The bash command to execute"), + outputFilter: z + .string() + .optional() + .describe( + "Optional shell filter to apply to output (e.g., 'tail -20', 'grep error'). " + + "Full output is stored in invocation log, filtered output is returned.", + ), }); /** Default maximum length for stdout/stderr output (30KB) */ @@ -38,6 +159,16 @@ export interface CreateBashToolOptions { * @default 30000 */ maxOutputLength?: number; + /** + * Enable storing full command output in invocation log files. + * @default false + */ + enableInvocationLog?: boolean; + /** + * Path (relative to cwd) where invocation log files are stored. + * @default ".bash-tool/commands" + */ + invocationLogPath?: string; } /** @@ -56,7 +187,14 @@ function truncateOutput( } function generateDescription(options: CreateBashToolOptions): string { - const { cwd, files, extraInstructions, toolPrompt } = options; + const { + cwd, + files, + extraInstructions, + toolPrompt, + enableInvocationLog = false, + invocationLogPath = DEFAULT_INVOCATION_LOG_PATH, + } = options; const lines: string[] = [ "Execute bash commands in the sandbox environment.", @@ -93,6 +231,36 @@ function generateDescription(options: CreateBashToolOptions): string { lines.push(" cat # View file contents"); lines.push(""); + // Add output filtering documentation + lines.push("OUTPUT FILTERING:"); + lines.push( + "Use the outputFilter parameter to filter stdout before it is returned.", + ); + if (enableInvocationLog) { + lines.push( + "Full unfiltered output is saved to the invocation log for later retrieval.", + ); + } + lines.push("Examples:"); + lines.push(' outputFilter: "tail -50" # Last 50 lines'); + lines.push(' outputFilter: "head -100" # First 100 lines'); + lines.push(' outputFilter: "grep error" # Lines containing "error"'); + lines.push(' outputFilter: "grep -i warn" # Case-insensitive search'); + lines.push(""); + + // Add invocation log documentation if enabled + if (enableInvocationLog) { + lines.push("INVOCATION LOG:"); + lines.push(`Log path: ${invocationLogPath}/.invocation`); + lines.push( + "The response includes invocationLogPath with the log file path.", + ); + lines.push( + "Use readFile with outputFilter to re-query logs with different filters.", + ); + lines.push(""); + } + if (extraInstructions) { lines.push(extraInstructions); lines.push(""); @@ -108,12 +276,14 @@ export function createBashExecuteTool(options: CreateBashToolOptions) { onBeforeBashCall, onAfterBashCall, maxOutputLength = DEFAULT_MAX_OUTPUT_LENGTH, + enableInvocationLog = false, + invocationLogPath = DEFAULT_INVOCATION_LOG_PATH, } = options; return tool({ description: generateDescription(options), inputSchema: bashSchema, - execute: async ({ command: originalCommand }) => { + execute: async ({ command: originalCommand, outputFilter }) => { // Allow modification of command before execution let command = originalCommand; if (onBeforeBashCall) { @@ -123,11 +293,55 @@ export function createBashExecuteTool(options: CreateBashToolOptions) { } } - // Prepend cd to ensure commands run in the working directory - const fullCommand = `cd "${cwd}" && ${command}`; + let result: { stdout: string; stderr: string; exitCode: number }; + let logPath: string | undefined; + + // Optimized path: when filter is specified, use temp files to avoid + // transferring full output to Node.js and back + if (outputFilter) { + const execResult = await executeWithFilter( + sandbox, + cwd, + command, + outputFilter, + enableInvocationLog + ? { + logDir: nodePath.posix.join(cwd, invocationLogPath), + logPath: nodePath.posix.join( + cwd, + invocationLogPath, + generateInvocationFilename(), + ), + } + : undefined, + ); + result = execResult.result; + logPath = execResult.logPath; + } else { + // No filter: execute command directly + const fullCommand = `cd "${cwd}" && ${command}`; + result = await sandbox.executeCommand(fullCommand); + + // Store full output in invocation log if enabled + if (enableInvocationLog) { + const invocationLog: InvocationLog = { + timestamp: new Date().toISOString(), + command, + exitCode: result.exitCode, + stdout: result.stdout, + stderr: result.stderr, + }; - // Execute the command - let result = await sandbox.executeCommand(fullCommand); + const filename = generateInvocationFilename(); + const logDir = nodePath.posix.join(cwd, invocationLogPath); + logPath = nodePath.posix.join(logDir, filename); + + await sandbox.executeCommand(`mkdir -p "${logDir}"`); + await sandbox.writeFiles([ + { path: logPath, content: formatInvocationLog(invocationLog) }, + ]); + } + } // Truncate output if needed result = { @@ -144,7 +358,93 @@ export function createBashExecuteTool(options: CreateBashToolOptions) { } } + // Include invocation log path in response if logging is enabled + if (logPath) { + return { ...result, invocationLogPath: logPath }; + } + return result; }, }); } + +/** + * Execute a command with output filter using temp files. + * This keeps full output in the sandbox and only returns filtered output. + * Optionally writes invocation log in the same bash invocation. + */ +async function executeWithFilter( + sandbox: Sandbox, + cwd: string, + command: string, + filter: string, + invocationLog?: { logDir: string; logPath: string }, +): Promise<{ + result: { stdout: string; stderr: string; exitCode: number }; + logPath?: string; +}> { + const timestamp = new Date().toISOString(); + const escapedCommand = command.replace(/'/g, "'\\''"); + const escapedFilter = filter.replace(/'/g, "'\\''"); + + // Use fixed temp file paths based on timestamp to avoid mktemp issues + const tempId = timestamp.replace(/[:.]/g, "-"); + const tmpStdout = `/tmp/bash-tool-stdout-${tempId}`; + const tmpStderr = `/tmp/bash-tool-stderr-${tempId}`; + + // Build a single bash script that: + // 1. Runs the command, capturing output to temp files + // 2. Optionally writes the invocation log + // 3. Filters and outputs the result + // 4. Cleans up temp files + // 5. Exits with the original command's exit code + + let script = ` +# Run command and capture output +cd "${cwd}" && ${command} > "${tmpStdout}" 2> "${tmpStderr}" +cmd_exit=$? +`; + + if (invocationLog) { + // Add invocation log writing to the script + script += ` +# Write invocation log +mkdir -p "${invocationLog.logDir}" +cat > "${invocationLog.logPath}" << 'INVOCATION_HEADER' +# timestamp: ${timestamp} +# command: ${escapedCommand} +INVOCATION_HEADER +echo "# exitCode: $cmd_exit" >> "${invocationLog.logPath}" +echo "# outputFilter: ${escapedFilter}" >> "${invocationLog.logPath}" +echo "---STDOUT---" >> "${invocationLog.logPath}" +cat "${tmpStdout}" >> "${invocationLog.logPath}" +echo "---STDERR---" >> "${invocationLog.logPath}" +cat "${tmpStderr}" >> "${invocationLog.logPath}" +`; + } + + script += ` +# Output filtered stdout +cat "${tmpStdout}" | ${filter} +filter_exit=$? + +# Output original stderr to stderr +cat "${tmpStderr}" >&2 + +# Clean up +rm -f "${tmpStdout}" "${tmpStderr}" + +# Exit with filter exit code if filter failed, otherwise original exit code +if [ $filter_exit -ne 0 ]; then + exit $filter_exit +fi +exit $cmd_exit +`; + + const result = await sandbox.executeCommand(script); + + return { + result, + logPath: invocationLog?.logPath, + }; +} diff --git a/src/tools/read-file.test.ts b/src/tools/read-file.test.ts new file mode 100644 index 0000000..cd883d2 --- /dev/null +++ b/src/tools/read-file.test.ts @@ -0,0 +1,222 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { createReadFileTool } from "./read-file.js"; + +/** + * Helper to format invocation log in the text format used by bash tool. + */ +function formatInvocationLog(log: { + timestamp: string; + command: string; + exitCode: number; + stdout: string; + stderr: string; + outputFilter?: string; +}): string { + const lines: string[] = [ + `# timestamp: ${log.timestamp}`, + `# command: ${log.command}`, + `# exitCode: ${log.exitCode}`, + ]; + if (log.outputFilter) { + lines.push(`# outputFilter: ${log.outputFilter}`); + } + lines.push("---STDOUT---"); + lines.push(log.stdout); + lines.push("---STDERR---"); + lines.push(log.stderr); + return lines.join("\n"); +} + +// Mock AI SDK +vi.mock("ai", () => ({ + tool: vi.fn((config) => ({ + description: config.description, + parameters: config.parameters, + execute: config.execute, + })), +})); + +function createMockSandbox() { + return { + executeCommand: vi.fn(), + readFile: vi.fn(), + writeFiles: vi.fn(), + stop: vi.fn(), + }; +} + +let mockSandbox = createMockSandbox(); + +describe("createReadFileTool", () => { + beforeEach(() => { + mockSandbox = createMockSandbox(); + }); + + it("reads file content", async () => { + mockSandbox.readFile.mockResolvedValue("file content"); + + const tool = createReadFileTool({ + sandbox: mockSandbox, + cwd: "/workspace", + }); + + // biome-ignore lint/style/noNonNullAssertion: test mock + const result = (await tool.execute!({ path: "test.txt" }, {} as never)) as { + content: string; + }; + + expect(result.content).toBe("file content"); + expect(mockSandbox.readFile).toHaveBeenCalledWith("/workspace/test.txt"); + }); + + it("resolves relative paths against cwd", async () => { + mockSandbox.readFile.mockResolvedValue("content"); + + const tool = createReadFileTool({ + sandbox: mockSandbox, + cwd: "/app/project", + }); + + // biome-ignore lint/style/noNonNullAssertion: test mock + await tool.execute!({ path: "src/index.ts" }, {} as never); + + expect(mockSandbox.readFile).toHaveBeenCalledWith( + "/app/project/src/index.ts", + ); + }); + + describe("invocation file handling", () => { + it("extracts stdout from .invocation files using sed", async () => { + // Now uses sed to extract stdout, mock executeCommand + mockSandbox.executeCommand.mockResolvedValue({ + stdout: "file1.txt\nfile2.txt", + stderr: "", + exitCode: 0, + }); + + const tool = createReadFileTool({ + sandbox: mockSandbox, + cwd: "/workspace", + }); + + // biome-ignore lint/style/noNonNullAssertion: test mock + const result = (await tool.execute!( + { path: ".bash-tool/commands/2024-01-15T10-30-45.123Z.invocation" }, + {} as never, + )) as { content: string }; + + expect(result.content).toBe("file1.txt\nfile2.txt"); + // Verify sed command was used to extract stdout section + expect(mockSandbox.executeCommand).toHaveBeenCalledWith( + expect.stringContaining("sed -n"), + ); + }); + + it("falls back to readFile if sed fails", async () => { + // Sed fails + mockSandbox.executeCommand.mockResolvedValue({ + stdout: "", + stderr: "error", + exitCode: 1, + }); + // Fallback readFile returns the raw content + const invocationLog = formatInvocationLog({ + timestamp: "2024-01-15T10:30:45.123Z", + command: "ls -la", + exitCode: 0, + stdout: "fallback content", + stderr: "", + }); + mockSandbox.readFile.mockResolvedValue(invocationLog); + + const tool = createReadFileTool({ + sandbox: mockSandbox, + cwd: "/workspace", + }); + + // biome-ignore lint/style/noNonNullAssertion: test mock + const result = (await tool.execute!( + { path: "test.invocation" }, + {} as never, + )) as { content: string }; + + expect(result.content).toBe("fallback content"); + }); + }); + + describe("output filtering", () => { + it("applies outputFilter using cat and pipe", async () => { + mockSandbox.executeCommand.mockResolvedValue({ + stdout: "line3", + stderr: "", + exitCode: 0, + }); + + const tool = createReadFileTool({ + sandbox: mockSandbox, + cwd: "/workspace", + }); + + // biome-ignore lint/style/noNonNullAssertion: test mock + const result = (await tool.execute!( + { path: "test.txt", outputFilter: "tail -1" }, + {} as never, + )) as { content: string }; + + expect(result.content).toBe("line3"); + expect(mockSandbox.executeCommand).toHaveBeenCalledWith( + 'cd "/workspace" && cat "/workspace/test.txt" | tail -1', + ); + }); + + it("returns error when filter fails", async () => { + mockSandbox.executeCommand.mockResolvedValue({ + stdout: "", + stderr: "grep: invalid pattern", + exitCode: 1, + }); + + const tool = createReadFileTool({ + sandbox: mockSandbox, + cwd: "/workspace", + }); + + // biome-ignore lint/style/noNonNullAssertion: test mock + const result = (await tool.execute!( + { path: "test.txt", outputFilter: "grep '['" }, + {} as never, + )) as { content: string; error: string }; + + expect(result.error).toContain("Filter error"); + }); + + it("applies filter to invocation file stdout using sed and pipe", async () => { + // Now uses sed to extract stdout and pipe through filter in one command + mockSandbox.executeCommand.mockResolvedValue({ + stdout: "line3", + stderr: "", + exitCode: 0, + }); + + const tool = createReadFileTool({ + sandbox: mockSandbox, + cwd: "/workspace", + }); + + // biome-ignore lint/style/noNonNullAssertion: test mock + const result = (await tool.execute!( + { path: "test.invocation", outputFilter: "tail -1" }, + {} as never, + )) as { content: string }; + + expect(result.content).toBe("line3"); + // Verify sed + filter command was used + expect(mockSandbox.executeCommand).toHaveBeenCalledWith( + expect.stringContaining("sed -n"), + ); + expect(mockSandbox.executeCommand).toHaveBeenCalledWith( + expect.stringContaining("| tail -1"), + ); + }); + }); +}); diff --git a/src/tools/read-file.ts b/src/tools/read-file.ts index 63f47b9..a070844 100644 --- a/src/tools/read-file.ts +++ b/src/tools/read-file.ts @@ -2,9 +2,16 @@ import nodePath from "node:path"; import { tool } from "ai"; import { z } from "zod"; import type { Sandbox } from "../types.js"; +import { parseInvocationLog } from "./bash.js"; const readFileSchema = z.object({ path: z.string().describe("The path to the file to read"), + outputFilter: z + .string() + .optional() + .describe( + "Optional shell filter to apply to content (e.g., 'tail -20', 'grep -i error')", + ), }); export interface CreateReadFileToolOptions { @@ -13,14 +20,174 @@ export interface CreateReadFileToolOptions { cwd: string; } +/** + * Check if a file is an invocation log file by extension. + */ +function isInvocationFile(filePath: string): boolean { + return filePath.endsWith(".invocation"); +} + +/** + * Parse invocation log content and extract stdout. + */ +function parseInvocationContent(content: string): string { + try { + const log = parseInvocationLog(content); + return log.stdout; + } catch { + // If parsing fails, return original content + return content; + } +} + +/** + * Apply a shell filter to file content using cat. + */ +async function applyFilterWithCat( + sandbox: Sandbox, + cwd: string, + filePath: string, + filter: string, +): Promise<{ content: string; error?: string }> { + const filterCommand = `cd "${cwd}" && cat "${filePath}" | ${filter}`; + + const result = await sandbox.executeCommand(filterCommand); + + if (result.exitCode !== 0) { + return { + content: "", + error: `Filter error: ${result.stderr}`, + }; + } + + return { content: result.stdout }; +} + +/** + * Extract stdout from invocation file and apply filter in a single command. + * Uses sed to extract content between ---STDOUT--- and ---STDERR--- markers. + */ +async function applyFilterToInvocationFile( + sandbox: Sandbox, + cwd: string, + filePath: string, + filter: string, +): Promise<{ content: string; error?: string }> { + // Extract stdout section and pipe through filter + // sed extracts lines between ---STDOUT--- and ---STDERR--- (exclusive) + const filterCommand = `cd "${cwd}" && sed -n '/^---STDOUT---$/,/^---STDERR---$/{ /^---STDOUT---$/d; /^---STDERR---$/d; p }' "${filePath}" | ${filter}`; + + const result = await sandbox.executeCommand(filterCommand); + + if (result.exitCode !== 0) { + return { + content: "", + error: `Filter error: ${result.stderr}`, + }; + } + + return { content: result.stdout }; +} + +/** + * Extract stdout from invocation file using sed. + */ +async function extractInvocationStdout( + sandbox: Sandbox, + cwd: string, + filePath: string, +): Promise { + // Extract stdout section using sed + const command = `cd "${cwd}" && sed -n '/^---STDOUT---$/,/^---STDERR---$/{ /^---STDOUT---$/d; /^---STDERR---$/d; p }' "${filePath}"`; + + const result = await sandbox.executeCommand(command); + + if (result.exitCode !== 0) { + // Fall back to reading and parsing + const content = await sandbox.readFile(filePath); + return parseInvocationContent(content); + } + + return result.stdout; +} + +function generateDescription(): string { + const lines = [ + "Read the contents of a file from the sandbox.", + "", + "OUTPUT FILTERING:", + "Use the outputFilter parameter to filter content before it is returned.", + "Examples:", + ' outputFilter: "tail -50" # Last 50 lines', + ' outputFilter: "head -100" # First 100 lines', + ' outputFilter: "grep error" # Lines containing "error"', + ' outputFilter: "grep -i warn" # Case-insensitive search', + "", + "INVOCATION FILES:", + "For .invocation files (from bash tool logs), automatically extracts stdout.", + "Use outputFilter to re-query stored command output with different filters.", + 'Example: readFile({ path: "...invocation", outputFilter: "grep -i error" })', + ]; + return lines.join("\n"); +} + export function createReadFileTool(options: CreateReadFileToolOptions) { const { sandbox, cwd } = options; return tool({ - description: "Read the contents of a file from the sandbox.", + description: generateDescription(), inputSchema: readFileSchema, - execute: async ({ path }) => { + execute: async ({ path, outputFilter }) => { const resolvedPath = nodePath.posix.resolve(cwd, path); + + // For invocation files, extract stdout section + if (isInvocationFile(path)) { + if (outputFilter) { + // Use sed to extract stdout and pipe through filter in one command + const filterResult = await applyFilterToInvocationFile( + sandbox, + cwd, + resolvedPath, + outputFilter, + ); + if (filterResult.error) { + // Fall back to reading file and parsing + const content = await extractInvocationStdout( + sandbox, + cwd, + resolvedPath, + ); + return { content, error: filterResult.error }; + } + return { content: filterResult.content }; + } + + // No filter, just extract stdout + const content = await extractInvocationStdout( + sandbox, + cwd, + resolvedPath, + ); + return { content }; + } + + // For regular files with a filter, use cat | filter directly + if (outputFilter) { + const filterResult = await applyFilterWithCat( + sandbox, + cwd, + resolvedPath, + outputFilter, + ); + if (filterResult.error) { + // On filter error, fall back to reading the file normally + const content = await sandbox.readFile(resolvedPath); + return { content, error: filterResult.error }; + } + return { content: filterResult.content }; + } + + // No filter, just read the file normally const content = await sandbox.readFile(resolvedPath); return { content }; }, diff --git a/src/types.ts b/src/types.ts index 37a7824..7de895f 100644 --- a/src/types.ts +++ b/src/types.ts @@ -156,6 +156,20 @@ export interface CreateBashToolOptions { * @default 1000 */ maxFiles?: number; + + /** + * Enable storing full command output in invocation log files. + * When enabled, full unfiltered output is stored in files that can be + * re-read and filtered later via the readFile tool. + * @default false + */ + enableInvocationLog?: boolean; + + /** + * Path (relative to destination) where invocation log files are stored. + * @default ".bash-tool/commands" + */ + invocationLogPath?: string; } // Import actual tool creators for proper typing