Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/cap-background-command-output.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@moonshot-ai/agent-core": minor
---

Apply the 16 MiB output cap to background shell commands too, so a runaway background command can no longer fill the disk or crash the process; it is now terminated with the same guidance to redirect large output to a file.
47 changes: 24 additions & 23 deletions packages/agent-core/src/agent/background/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,8 @@ interface ManagedTask {
/** Total UTF-8 bytes observed, including chunks dropped from the live ring buffer. */
outputSizeBytes: number;
/**
* True once a foreground command has crossed `MAX_FOREGROUND_OUTPUT_BYTES`
* and termination has been requested. One-shot guard so the ceiling fires
* exactly once.
* True once a command has crossed `MAX_TASK_OUTPUT_BYTES` and termination has
* been requested. One-shot guard so the ceiling fires exactly once.
*/
outputLimitTripped: boolean;
status: BackgroundTaskStatus;
Expand Down Expand Up @@ -123,20 +122,20 @@ const MAX_OUTPUT_BYTES = 1024 * 1024; // 1 MiB
const NOTIFICATION_FALLBACK_PREVIEW_BYTES = 3_000;

/**
* Hard ceiling on the combined output a single *foreground* command may stream
* before it is force-terminated (SIGTERM → grace → SIGKILL). It guards the
* live-forward path — which has no memory bound of its own — from a runaway
* command (e.g. `b3sum --length <huge>`) whose output would otherwise grow the
* process heap until Node aborts with an out-of-memory crash.
*
* Detached (background) tasks are exempt: their output is ring-buffered and
* spilled to disk, so it never accumulates unbounded in memory.
* Hard ceiling on the combined output a single shell command may stream before
* it is force-terminated (SIGTERM → grace → SIGKILL). It guards both the
* live-forward path and the on-disk `output.log` write chain from a runaway
* command (e.g. `b3sum --length <huge>`) whose output would otherwise grow
* without bound — filling the disk, or retaining each pending-write chunk until
* Node aborts with an out-of-memory crash. Scoped to process tasks (foreground
* and background); subagent and user-question results are appended once and must
* always be persisted, so they are intentionally not capped here.
*/
const MAX_FOREGROUND_OUTPUT_BYTES = 16 * 1024 * 1024; // 16 MiB
const MAX_TASK_OUTPUT_BYTES = 16 * 1024 * 1024; // 16 MiB

/** Terminal `stopReason` recorded when a foreground command trips the output ceiling. */
function foregroundOutputLimitReason(): string {
const mib = Math.floor(MAX_FOREGROUND_OUTPUT_BYTES / (1024 * 1024));
/** Terminal `stopReason` recorded when a command trips the output ceiling. */
function outputLimitReason(): string {
const mib = Math.floor(MAX_TASK_OUTPUT_BYTES / (1024 * 1024));
return (
`Output limit exceeded: the command produced more than ${mib} MiB and was ` +
'terminated. Redirect large output to a file (e.g. `command > out.txt`) and ' +
Expand Down Expand Up @@ -642,18 +641,20 @@ export class BackgroundManager {
entry.outputRingChars -= removed.length;
}

// Foreground output ceiling: a single non-detached command must not grow
// the (unbounded) live-forward buffer until the process runs out of memory.
// Trip once, then request graceful termination through the shared stop path
// (SIGTERM → grace → SIGKILL). Detached tasks are exempt — their output is
// ring-buffered and spilled to disk, so it never accumulates in memory.
// Output ceiling: a single shell command must not grow the (unbounded)
// live-forward buffer or the on-disk write chain until the process runs out
// of memory or fills the disk. Trip once, then request graceful termination
// through the shared stop path (SIGTERM → grace → SIGKILL). Scoped to
// process tasks (foreground and background): subagent and user-question tasks
// append their bounded result in one shot and must always persist it, so they
// are intentionally not capped here.
if (
!entry.outputLimitTripped &&
!this.isDetached(entry) &&
entry.outputSizeBytes > MAX_FOREGROUND_OUTPUT_BYTES
entry.task.kind === 'process' &&
entry.outputSizeBytes > MAX_TASK_OUTPUT_BYTES
) {
entry.outputLimitTripped = true;
void this.stop(entry.taskId, foregroundOutputLimitReason());
void this.stop(entry.taskId, outputLimitReason());
}

// Once the cap has tripped the task is being terminated: keep only the
Expand Down
2 changes: 1 addition & 1 deletion packages/agent-core/src/agent/background/process-task.ts
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ function observeProcessStream(
if (chunk.length === 0) return;
sink.appendOutput(chunk);
// Once the manager has begun terminating the task — an output-limit trip
// (see MAX_FOREGROUND_OUTPUT_BYTES), a user interrupt, or a timeout —
// (see MAX_TASK_OUTPUT_BYTES), a user interrupt, or a timeout —
// `appendOutput` above may synchronously abort the signal. Stop forwarding
// live output from that point so the unbounded forward buffer cannot keep
// growing while the process is being killed.
Expand Down
90 changes: 79 additions & 11 deletions packages/agent-core/test/agent/background/output-limit.test.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
/**
* Foreground output ceiling.
* Output ceiling for shell (process) tasks.
*
* A single non-detached command that streams more output than the per-command
* limit must be force-terminated instead of growing the (unbounded)
* live-forward buffer until the process runs out of memory. Detached
* (background) tasks are exempt: their output is ring-buffered and spilled to
* disk, so it never accumulates in memory.
* A single shell command that streams more output than the per-command limit
* must be force-terminated instead of growing the (unbounded) live-forward
* buffer or the on-disk write chain until the process runs out of memory or
* fills the disk. The ceiling applies to process tasks, foreground and
* background alike. Subagent and user-question tasks append their bounded result
* in one shot and must always be persisted, so they are not capped.
*/

import { mkdtempSync, rmSync } from 'node:fs';
Expand All @@ -17,7 +18,7 @@ import { join } from 'pathe';
import { describe, expect, it, vi } from 'vitest';

import { ProcessBackgroundTask } from '../../../src/agent/background';
import { createBackgroundManager, waitForTerminal } from './helpers';
import { agentTask, createBackgroundManager, waitForTerminal } from './helpers';

const MiB = 1024 * 1024;
const LIMIT_BYTES = 16 * MiB;
Expand Down Expand Up @@ -92,7 +93,7 @@ function sigtermIgnoringProcess(chunks: string[]): { proc: KaosProcess; kill: Re
return { proc, kill };
}

describe('BackgroundManager — foreground output ceiling', () => {
describe('BackgroundManager — output ceiling (foreground + background)', () => {
it('terminates a foreground command that exceeds the output limit and stops forwarding', async () => {
const { manager } = createBackgroundManager();
// 20 MiB total, well past the 16 MiB ceiling.
Expand All @@ -119,7 +120,7 @@ describe('BackgroundManager — foreground output ceiling', () => {
expect(forwardedChars).toBeLessThanOrEqual(LIMIT_BYTES);
});

it('does not terminate a detached (background) task for the same output', async () => {
it('also terminates a detached (background) task that exceeds the output limit', async () => {
const { manager } = createBackgroundManager();
const chunks = Array.from({ length: 20 }, () => 'x'.repeat(MiB));
const { proc, kill } = streamingProcess(chunks);
Expand All @@ -131,8 +132,9 @@ describe('BackgroundManager — foreground output ceiling', () => {

const info = await waitForTerminal(manager, taskId);

expect(info?.status).toBe('completed');
expect(kill).not.toHaveBeenCalledWith('SIGTERM');
expect(info?.status).toBe('killed');
expect(info?.stopReason ?? '').toMatch(/output limit/i);
expect(kill).toHaveBeenCalledWith('SIGTERM');
});

it('stops enqueuing output to disk once the foreground cap trips', async () => {
Expand Down Expand Up @@ -169,4 +171,70 @@ describe('BackgroundManager — foreground output ceiling', () => {
rmSync(sessionDir, { recursive: true, force: true });
}
});

it('stops enqueuing output to disk once the cap trips for a background task', async () => {
const sessionDir = mkdtempSync(join(tmpdir(), 'bpm-limit-bg-'));
try {
const { manager, persistence } = createBackgroundManager({ sessionDir });
let persistedChars = 0;
const spy = vi
.spyOn(persistence!, 'appendTaskOutput')
.mockImplementation(async (_id: string, chunk: string) => {
persistedChars += chunk.length;
});

// 20 MiB, and the producer ignores SIGTERM so it keeps writing through
// the whole grace window. Background tasks now share the same ceiling.
const chunks = Array.from({ length: 20 }, () => 'x'.repeat(MiB));
const { proc } = sigtermIgnoringProcess(chunks);

const taskId = manager.registerTask(new ProcessBackgroundTask(proc, 'runaway', 'bg', () => {}), {
detached: true,
timeoutMs: 60_000,
});

const info = await waitForTerminal(manager, taskId);

expect(info?.status).toBe('killed');
// Same guarantee as the foreground case: once the cap trips, subsequent
// chunks are dropped before they reach the disk write chain.
expect(persistedChars).toBeLessThanOrEqual(17 * MiB);

spy.mockRestore();
} finally {
rmSync(sessionDir, { recursive: true, force: true });
}
});

it('does not cap or drop a detached subagent result larger than the limit', async () => {
const sessionDir = mkdtempSync(join(tmpdir(), 'bpm-limit-agent-'));
try {
const { manager, persistence } = createBackgroundManager({ sessionDir });
let persistedChars = 0;
const spy = vi
.spyOn(persistence!, 'appendTaskOutput')
.mockImplementation(async (_id: string, chunk: string) => {
persistedChars += chunk.length;
});

// 20 MiB result — well past the 16 MiB ceiling — delivered in one shot,
// exactly how a subagent appends its completed result.
const bigResult = 'y'.repeat(20 * MiB);
const taskId = manager.registerTask(
agentTask(Promise.resolve({ result: bigResult }), 'big subagent result'),
{ detached: true, timeoutMs: 60_000 },
);

const info = await waitForTerminal(manager, taskId);

// Non-process tasks must complete normally and have their full result
// persisted; the shell-output ceiling must not drop it.
expect(info?.status).toBe('completed');
expect(persistedChars).toBe(bigResult.length);

spy.mockRestore();
} finally {
rmSync(sessionDir, { recursive: true, force: true });
}
});
});
Loading