diff --git a/.changeset/cap-background-command-output.md b/.changeset/cap-background-command-output.md new file mode 100644 index 000000000..09ed4bf24 --- /dev/null +++ b/.changeset/cap-background-command-output.md @@ -0,0 +1,5 @@ +--- +"@moonshot-ai/agent-core": minor +--- + +Apply the 16 MiB output cap to background shell commands too, so a runaway background command can no longer fill the disk or crash the process; it is now terminated with the same guidance to redirect large output to a file. diff --git a/packages/agent-core/src/agent/background/index.ts b/packages/agent-core/src/agent/background/index.ts index 5ef3fac69..37e76e0c1 100644 --- a/packages/agent-core/src/agent/background/index.ts +++ b/packages/agent-core/src/agent/background/index.ts @@ -67,9 +67,8 @@ interface ManagedTask { /** Total UTF-8 bytes observed, including chunks dropped from the live ring buffer. */ outputSizeBytes: number; /** - * True once a foreground command has crossed `MAX_FOREGROUND_OUTPUT_BYTES` - * and termination has been requested. One-shot guard so the ceiling fires - * exactly once. + * True once a command has crossed `MAX_TASK_OUTPUT_BYTES` and termination has + * been requested. One-shot guard so the ceiling fires exactly once. */ outputLimitTripped: boolean; status: BackgroundTaskStatus; @@ -123,20 +122,20 @@ const MAX_OUTPUT_BYTES = 1024 * 1024; // 1 MiB const NOTIFICATION_FALLBACK_PREVIEW_BYTES = 3_000; /** - * Hard ceiling on the combined output a single *foreground* command may stream - * before it is force-terminated (SIGTERM → grace → SIGKILL). It guards the - * live-forward path — which has no memory bound of its own — from a runaway - * command (e.g. `b3sum --length `) whose output would otherwise grow the - * process heap until Node aborts with an out-of-memory crash. - * - * Detached (background) tasks are exempt: their output is ring-buffered and - * spilled to disk, so it never accumulates unbounded in memory. + * Hard ceiling on the combined output a single shell command may stream before + * it is force-terminated (SIGTERM → grace → SIGKILL). It guards both the + * live-forward path and the on-disk `output.log` write chain from a runaway + * command (e.g. `b3sum --length `) whose output would otherwise grow + * without bound — filling the disk, or retaining each pending-write chunk until + * Node aborts with an out-of-memory crash. Scoped to process tasks (foreground + * and background); subagent and user-question results are appended once and must + * always be persisted, so they are intentionally not capped here. */ -const MAX_FOREGROUND_OUTPUT_BYTES = 16 * 1024 * 1024; // 16 MiB +const MAX_TASK_OUTPUT_BYTES = 16 * 1024 * 1024; // 16 MiB -/** Terminal `stopReason` recorded when a foreground command trips the output ceiling. */ -function foregroundOutputLimitReason(): string { - const mib = Math.floor(MAX_FOREGROUND_OUTPUT_BYTES / (1024 * 1024)); +/** Terminal `stopReason` recorded when a command trips the output ceiling. */ +function outputLimitReason(): string { + const mib = Math.floor(MAX_TASK_OUTPUT_BYTES / (1024 * 1024)); return ( `Output limit exceeded: the command produced more than ${mib} MiB and was ` + 'terminated. Redirect large output to a file (e.g. `command > out.txt`) and ' + @@ -642,18 +641,20 @@ export class BackgroundManager { entry.outputRingChars -= removed.length; } - // Foreground output ceiling: a single non-detached command must not grow - // the (unbounded) live-forward buffer until the process runs out of memory. - // Trip once, then request graceful termination through the shared stop path - // (SIGTERM → grace → SIGKILL). Detached tasks are exempt — their output is - // ring-buffered and spilled to disk, so it never accumulates in memory. + // Output ceiling: a single shell command must not grow the (unbounded) + // live-forward buffer or the on-disk write chain until the process runs out + // of memory or fills the disk. Trip once, then request graceful termination + // through the shared stop path (SIGTERM → grace → SIGKILL). Scoped to + // process tasks (foreground and background): subagent and user-question tasks + // append their bounded result in one shot and must always persist it, so they + // are intentionally not capped here. if ( !entry.outputLimitTripped && - !this.isDetached(entry) && - entry.outputSizeBytes > MAX_FOREGROUND_OUTPUT_BYTES + entry.task.kind === 'process' && + entry.outputSizeBytes > MAX_TASK_OUTPUT_BYTES ) { entry.outputLimitTripped = true; - void this.stop(entry.taskId, foregroundOutputLimitReason()); + void this.stop(entry.taskId, outputLimitReason()); } // Once the cap has tripped the task is being terminated: keep only the diff --git a/packages/agent-core/src/agent/background/process-task.ts b/packages/agent-core/src/agent/background/process-task.ts index 22e96a744..f2b3d7328 100644 --- a/packages/agent-core/src/agent/background/process-task.ts +++ b/packages/agent-core/src/agent/background/process-task.ts @@ -140,7 +140,7 @@ function observeProcessStream( if (chunk.length === 0) return; sink.appendOutput(chunk); // Once the manager has begun terminating the task — an output-limit trip - // (see MAX_FOREGROUND_OUTPUT_BYTES), a user interrupt, or a timeout — + // (see MAX_TASK_OUTPUT_BYTES), a user interrupt, or a timeout — // `appendOutput` above may synchronously abort the signal. Stop forwarding // live output from that point so the unbounded forward buffer cannot keep // growing while the process is being killed. diff --git a/packages/agent-core/test/agent/background/output-limit.test.ts b/packages/agent-core/test/agent/background/output-limit.test.ts index 70fcd3fac..b9903c4bb 100644 --- a/packages/agent-core/test/agent/background/output-limit.test.ts +++ b/packages/agent-core/test/agent/background/output-limit.test.ts @@ -1,11 +1,12 @@ /** - * Foreground output ceiling. + * Output ceiling for shell (process) tasks. * - * A single non-detached command that streams more output than the per-command - * limit must be force-terminated instead of growing the (unbounded) - * live-forward buffer until the process runs out of memory. Detached - * (background) tasks are exempt: their output is ring-buffered and spilled to - * disk, so it never accumulates in memory. + * A single shell command that streams more output than the per-command limit + * must be force-terminated instead of growing the (unbounded) live-forward + * buffer or the on-disk write chain until the process runs out of memory or + * fills the disk. The ceiling applies to process tasks, foreground and + * background alike. Subagent and user-question tasks append their bounded result + * in one shot and must always be persisted, so they are not capped. */ import { mkdtempSync, rmSync } from 'node:fs'; @@ -17,7 +18,7 @@ import { join } from 'pathe'; import { describe, expect, it, vi } from 'vitest'; import { ProcessBackgroundTask } from '../../../src/agent/background'; -import { createBackgroundManager, waitForTerminal } from './helpers'; +import { agentTask, createBackgroundManager, waitForTerminal } from './helpers'; const MiB = 1024 * 1024; const LIMIT_BYTES = 16 * MiB; @@ -92,7 +93,7 @@ function sigtermIgnoringProcess(chunks: string[]): { proc: KaosProcess; kill: Re return { proc, kill }; } -describe('BackgroundManager — foreground output ceiling', () => { +describe('BackgroundManager — output ceiling (foreground + background)', () => { it('terminates a foreground command that exceeds the output limit and stops forwarding', async () => { const { manager } = createBackgroundManager(); // 20 MiB total, well past the 16 MiB ceiling. @@ -119,7 +120,7 @@ describe('BackgroundManager — foreground output ceiling', () => { expect(forwardedChars).toBeLessThanOrEqual(LIMIT_BYTES); }); - it('does not terminate a detached (background) task for the same output', async () => { + it('also terminates a detached (background) task that exceeds the output limit', async () => { const { manager } = createBackgroundManager(); const chunks = Array.from({ length: 20 }, () => 'x'.repeat(MiB)); const { proc, kill } = streamingProcess(chunks); @@ -131,8 +132,9 @@ describe('BackgroundManager — foreground output ceiling', () => { const info = await waitForTerminal(manager, taskId); - expect(info?.status).toBe('completed'); - expect(kill).not.toHaveBeenCalledWith('SIGTERM'); + expect(info?.status).toBe('killed'); + expect(info?.stopReason ?? '').toMatch(/output limit/i); + expect(kill).toHaveBeenCalledWith('SIGTERM'); }); it('stops enqueuing output to disk once the foreground cap trips', async () => { @@ -169,4 +171,70 @@ describe('BackgroundManager — foreground output ceiling', () => { rmSync(sessionDir, { recursive: true, force: true }); } }); + + it('stops enqueuing output to disk once the cap trips for a background task', async () => { + const sessionDir = mkdtempSync(join(tmpdir(), 'bpm-limit-bg-')); + try { + const { manager, persistence } = createBackgroundManager({ sessionDir }); + let persistedChars = 0; + const spy = vi + .spyOn(persistence!, 'appendTaskOutput') + .mockImplementation(async (_id: string, chunk: string) => { + persistedChars += chunk.length; + }); + + // 20 MiB, and the producer ignores SIGTERM so it keeps writing through + // the whole grace window. Background tasks now share the same ceiling. + const chunks = Array.from({ length: 20 }, () => 'x'.repeat(MiB)); + const { proc } = sigtermIgnoringProcess(chunks); + + const taskId = manager.registerTask(new ProcessBackgroundTask(proc, 'runaway', 'bg', () => {}), { + detached: true, + timeoutMs: 60_000, + }); + + const info = await waitForTerminal(manager, taskId); + + expect(info?.status).toBe('killed'); + // Same guarantee as the foreground case: once the cap trips, subsequent + // chunks are dropped before they reach the disk write chain. + expect(persistedChars).toBeLessThanOrEqual(17 * MiB); + + spy.mockRestore(); + } finally { + rmSync(sessionDir, { recursive: true, force: true }); + } + }); + + it('does not cap or drop a detached subagent result larger than the limit', async () => { + const sessionDir = mkdtempSync(join(tmpdir(), 'bpm-limit-agent-')); + try { + const { manager, persistence } = createBackgroundManager({ sessionDir }); + let persistedChars = 0; + const spy = vi + .spyOn(persistence!, 'appendTaskOutput') + .mockImplementation(async (_id: string, chunk: string) => { + persistedChars += chunk.length; + }); + + // 20 MiB result — well past the 16 MiB ceiling — delivered in one shot, + // exactly how a subagent appends its completed result. + const bigResult = 'y'.repeat(20 * MiB); + const taskId = manager.registerTask( + agentTask(Promise.resolve({ result: bigResult }), 'big subagent result'), + { detached: true, timeoutMs: 60_000 }, + ); + + const info = await waitForTerminal(manager, taskId); + + // Non-process tasks must complete normally and have their full result + // persisted; the shell-output ceiling must not drop it. + expect(info?.status).toBe('completed'); + expect(persistedChars).toBe(bigResult.length); + + spy.mockRestore(); + } finally { + rmSync(sessionDir, { recursive: true, force: true }); + } + }); });