From ad60ce73dcd84277ecaf348abe8b685fdf785268 Mon Sep 17 00:00:00 2001 From: lewis Date: Thu, 2 Jul 2026 12:11:22 +0800 Subject: [PATCH 1/2] feat(agent-core): detect stalled turns and force text-only recovery Add a ProgressDetector that watches external state (git status, background tasks) and information gain (new non-trivial tool outputs) to detect when a turn is spinning without progress. After 8 consecutive idle steps, the harness injects a system reminder and forces the next model step to run with no tools available, requiring a text-only response. Successful Edit and Write tool results are now counted as progress even when their output is short, so repeated edits to the same already-dirty file are not misclassified as stalled. The stall threshold and minimum information-gain length are configurable via loop_control.progress_stall_threshold and loop_control.progress_min_info_gain_length. This prevents the no-op tool loops seen with commands like Bash(:), Read /dev/null, and echo placeholders, where the model keeps emitting tool calls instead of responding to the user. - packages/agent-core/src/agent/turn/progress-detector.ts (new) - packages/agent-core/src/agent/turn/index.ts - packages/agent-core/src/loop/turn-step.ts - packages/agent-core/src/loop/types.ts - packages/agent-core/src/config/schema.ts - packages/agent-core/test/agent/turn/progress-detector.test.ts (new) - packages/agent-core/test/config/configs.test.ts Co-authored-by: Kimi --- packages/agent-core/src/agent/turn/index.ts | 92 +++++++++- .../src/agent/turn/progress-detector.ts | 158 ++++++++++++++++++ packages/agent-core/src/config/schema.ts | 2 + packages/agent-core/src/loop/turn-step.ts | 8 +- packages/agent-core/src/loop/types.ts | 5 + .../test/agent/turn/progress-detector.test.ts | 156 +++++++++++++++++ .../agent-core/test/config/configs.test.ts | 4 + 7 files changed, 419 insertions(+), 6 deletions(-) create mode 100644 packages/agent-core/src/agent/turn/progress-detector.ts create mode 100644 packages/agent-core/test/agent/turn/progress-detector.test.ts diff --git a/packages/agent-core/src/agent/turn/index.ts b/packages/agent-core/src/agent/turn/index.ts index 847794aaa..7061fdfa2 100644 --- a/packages/agent-core/src/agent/turn/index.ts +++ b/packages/agent-core/src/agent/turn/index.ts @@ -1,4 +1,5 @@ import { createHash } from 'node:crypto'; +import type { Readable } from 'node:stream'; import { createControlledPromise, type ControlledPromise } from '@antfu/utils'; import { @@ -39,6 +40,7 @@ import { abortable, isUserCancellation, userCancellationReason } from '../../uti import { USER_PROMPT_ORIGIN, type PromptOrigin } from '../context'; import { renderUserPromptHookBlockResult, renderUserPromptHookResult } from '../../session/hooks'; import { canonicalTelemetryArgs, isPlainRecord } from './canonical-args'; +import { ProgressDetector, type ProgressSnapshot } from './progress-detector'; import { ToolCallDeduplicator } from './tool-dedup'; import { budgetToolResultForModel } from './tool-result-budget'; @@ -79,6 +81,22 @@ const GOAL_MODEL_CONFIG_PAUSE_PREFIX = 'Paused after model configuration error'; const GOAL_RUNTIME_PAUSE_PREFIX = 'Paused after runtime error'; const GOAL_PROVIDER_FILTERED_PAUSE_REASON = 'Paused after provider safety policy block'; +/** + * Number of consecutive steps without external progress before the harness + * forces the model into text-only mode. This is a safety rail against tool-use + * loops where the model emits placeholder calls (e.g. Bash(:), Read /dev/null) + * instead of responding to the user. + */ +const PROGRESS_STALL_THRESHOLD = 8; + +const PROGRESS_STALL_REMINDER = [ + 'The last several actions did not advance the task: no files were changed,', + 'no new background work started, and no new useful information was gathered.', + 'Stop making tool calls. In your next response, reply with text only:', + 'summarize what you know, what has already been tried, and what decision or', + 'information is needed next.', +].join(' '); + /** * The prompt the goal driver appends to start each continuation turn — the * autonomous stand-in for the user typing "continue". The model decides when to @@ -665,7 +683,14 @@ export class TurnFlow { private async runStepLoop(turnId: number, signal: AbortSignal): Promise { let stopHookContinuationUsed = false; let goalOutcomeMessageContinuationUsed = false; + let forceTextMode = false; const deduper = new ToolCallDeduplicator({ telemetry: this.agent.telemetry }); + const loopControl = this.agent.kimiConfig?.loopControl; + const progressStallThreshold = loopControl?.progressStallThreshold ?? PROGRESS_STALL_THRESHOLD; + const progressDetector = new ProgressDetector({ + takeSnapshot: () => this.takeProgressSnapshot(), + minInfoGainLength: loopControl?.progressMinInfoGainLength, + }); await this.agent.mcp?.waitForInitialLoad(signal); // Surface the active goal at the start of the turn (append-only; no-op when // there is no active goal). Each goal continuation is its own turn, so this @@ -674,7 +699,6 @@ export class TurnFlow { while (true) { signal.throwIfAborted(); const model = this.agent.config.model; - const loopControl = this.agent.kimiConfig?.loopControl; let stopForGoalBudget = false; try { const result = await runTurn({ @@ -683,7 +707,7 @@ export class TurnFlow { llm: this.agent.llm, buildMessages: () => this.agent.context.messages, buildMessagesStrict: () => this.agent.context.strictMessages, - dispatchEvent: this.buildDispatchEvent(turnId), + dispatchEvent: this.buildDispatchEvent(turnId, progressDetector), tools: this.agent.tools.loopTools, log: this.agent.log, maxSteps: loopControl?.maxStepsPerTurn, @@ -697,6 +721,7 @@ export class TurnFlow { } }, hooks: { + // oxlint-disable-next-line no-loop-func -- step hook state is scoped to this turn. beforeStep: async ({ signal: stepSignal }) => { this.agent.microCompaction.detect(); await this.agent.fullCompaction.beforeStep(stepSignal); @@ -709,12 +734,29 @@ export class TurnFlow { this.flushSteerBuffer(); await this.agent.injection.inject(); deduper.beginStep(); + if (forceTextMode) { + this.agent.context.appendSystemReminder(PROGRESS_STALL_REMINDER, { + kind: 'system_trigger', + name: 'progress_stall_guard', + }); + return { tools: [] }; + } return; }, - afterStep: async ({ usage }) => { + // oxlint-disable-next-line no-loop-func -- step hook state is scoped to this turn. + afterStep: async ({ stepNumber, usage }) => { this.agent.usage.record(model, usage, 'turn'); await this.agent.fullCompaction.afterStep(); deduper.endStep(); + const progress = await progressDetector.recordStep(stepNumber); + if (!progress && progressDetector.stepsSinceLastProgress(stepNumber) >= progressStallThreshold) { + this.agent.log.warn('turn appears stalled; forcing text-only mode', { + turnId, + stepNumber, + threshold: progressStallThreshold, + }); + forceTextMode = true; + } return stopForGoalBudget ? { stopTurn: true } : undefined; }, // oxlint-disable-next-line no-loop-func -- stop hook continuation state is scoped to this turn. @@ -840,10 +882,11 @@ export class TurnFlow { } } - private buildDispatchEvent(turnId: number) { + private buildDispatchEvent(turnId: number, progressDetector?: ProgressDetector) { return createLoopEventDispatcher({ appendTranscriptRecord: async (event: LoopRecordedEvent) => { this.agent.context.appendLoopEvent(event); + progressDetector?.onLoopEvent(event); }, emitLiveEvent: (event: LoopEvent) => { this.noteFirstRequestEvent(event); @@ -854,6 +897,47 @@ export class TurnFlow { }); } + private async takeProgressSnapshot(): Promise { + const cwd = this.agent.config.cwd; + const [gitStatus, backgroundTasks] = await Promise.all([ + this.runGitStatus(cwd), + this.captureBackgroundTasks(), + ]); + return { gitStatus, backgroundTasks }; + } + + private async runGitStatus(cwd: string): Promise { + try { + const proc = await this.agent.kaos.exec('git', '-C', cwd, 'status', '--porcelain'); + const stdout = await this.collectStream(proc.stdout); + const exitCode = await proc.wait(); + if (exitCode !== 0) { + return ''; + } + return stdout.trim(); + } catch { + return ''; + } + } + + private captureBackgroundTasks(): string { + const tasks = this.agent.background.list(true); + return JSON.stringify( + tasks.map((task) => ({ + id: task.taskId, + status: task.status, + })), + ); + } + + private async collectStream(stream: Readable): Promise { + const chunks: Buffer[] = []; + for await (const chunk of stream) { + chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk as string)); + } + return Buffer.concat(chunks).toString('utf-8'); + } + private noteFirstRequestEvent(event: LoopEvent): void { switch (event.type) { case 'step.end': diff --git a/packages/agent-core/src/agent/turn/progress-detector.ts b/packages/agent-core/src/agent/turn/progress-detector.ts new file mode 100644 index 000000000..2a8118709 --- /dev/null +++ b/packages/agent-core/src/agent/turn/progress-detector.ts @@ -0,0 +1,158 @@ +/** + * Detects when a turn is spinning without making real progress. + * + * Progress is measured by looking at external, observable state rather than + * interpreting model outputs: + * + * - Information gain: successful tool outputs that are non-trivial and have + * not been seen before in this turn. + * - External state change: git working tree, background task lifecycle, or + * other host-provided snapshots. + * + * When a configurable number of consecutive steps pass without progress, the + * detector reports that the turn has stalled. The host can then force the model + * into text-only mode instead of letting it continue emitting placeholder tool + * calls. + */ + +import { createHash } from 'node:crypto'; + +import type { LoopRecordedEvent, LoopToolCallEvent, LoopToolResultEvent } from '../../loop/events'; + +const PROGRESS_TOOLS = new Set(['Edit', 'Write']); + +export interface ProgressSnapshot { + /** + * `git status --porcelain` output. Empty when git is unavailable or the tree + * is clean. Changes when the working tree actually changes. + */ + readonly gitStatus: string; + /** + * Snapshot of active/terminal background tasks. Changes when tasks are + * created, complete, fail, or are stopped. + */ + readonly backgroundTasks: string; +} + +export type TakeProgressSnapshot = () => Promise | ProgressSnapshot; + +export interface ProgressDetectorOptions { + /** Called once per step to capture external world state. */ + readonly takeSnapshot: TakeProgressSnapshot; + /** + * Minimum successful output length to count as information gain. + * Outputs shorter than this are treated as trivial/no-op responses. + */ + readonly minInfoGainLength?: number | undefined; +} + +const DEFAULT_MIN_INFO_GAIN_LENGTH = 60; + +/** + * Tracks whether a turn is still advancing. + * + * The detector is intentionally stateful per-turn: it accumulates seen output + * hashes and the last external snapshot, and reports how many consecutive steps + * have passed without any progress signal. + */ +export class ProgressDetector { + private readonly takeSnapshot: TakeProgressSnapshot; + private readonly minInfoGainLength: number; + private readonly seenOutputHashes = new Set(); + private previousSnapshot?: ProgressSnapshot; + private currentStepEvents: LoopRecordedEvent[] = []; + private readonly toolCallNames = new Map(); + private lastProgressStep = 0; + + constructor(options: ProgressDetectorOptions) { + this.takeSnapshot = options.takeSnapshot; + this.minInfoGainLength = options.minInfoGainLength ?? DEFAULT_MIN_INFO_GAIN_LENGTH; + } + + /** Called for every recorded loop event so the detector can observe results. */ + onLoopEvent(event: LoopRecordedEvent): void { + this.currentStepEvents.push(event); + if (event.type === 'tool.call') { + const call = event as LoopToolCallEvent; + this.toolCallNames.set(call.toolCallId, call.name); + } + } + + /** + * Evaluates the events collected since the last call and reports whether this + * step made progress. Resets the per-step event buffer. + */ + async recordStep(stepNumber: number): Promise { + const snapshot = await this.takeSnapshot(); + const stateChanged = this.hasExternalStateChanged(snapshot); + this.previousSnapshot = snapshot; + + const infoGained = this.hasInformationGain(); + this.currentStepEvents = []; + + const progress = stateChanged || infoGained; + if (progress) { + this.lastProgressStep = stepNumber; + } + return progress; + } + + /** Number of consecutive steps since the last progress signal. */ + stepsSinceLastProgress(currentStep: number): number { + return currentStep - this.lastProgressStep; + } + + private hasExternalStateChanged(current: ProgressSnapshot): boolean { + if (this.previousSnapshot === undefined) { + return false; // First step has no previous snapshot to compare against. + } + return ( + this.previousSnapshot.gitStatus !== current.gitStatus || + this.previousSnapshot.backgroundTasks !== current.backgroundTasks + ); + } + + private hasInformationGain(): boolean { + for (const event of this.currentStepEvents) { + if (event.type !== 'tool.result') { + continue; + } + const resultEvent = event as LoopToolResultEvent; + const result = resultEvent.result; + if (result.isError === true) { + continue; + } + // Successful writes/edits are real progress even when their output is + // short, because they change file contents. git status --porcelain does + // not capture repeated edits to an already-dirty file. + const toolName = this.toolCallNames.get(resultEvent.toolCallId); + if (toolName !== undefined && PROGRESS_TOOLS.has(toolName)) { + return true; + } + const text = extractOutputText(result.output); + if (text.length < this.minInfoGainLength) { + continue; + } + const hash = hashString(text); + if (!this.seenOutputHashes.has(hash)) { + this.seenOutputHashes.add(hash); + return true; + } + } + return false; + } +} + +function extractOutputText(output: string | readonly { readonly type: string; readonly text?: string }[]): string { + if (typeof output === 'string') { + return output; + } + return output + .filter((part): part is { readonly type: string; readonly text: string } => part.type === 'text' && typeof part.text === 'string') + .map((part) => part.text) + .join(''); +} + +function hashString(value: string): string { + return createHash('sha256').update(value, 'utf8').digest('hex'); +} diff --git a/packages/agent-core/src/config/schema.ts b/packages/agent-core/src/config/schema.ts index 9b1ac9d64..39566c907 100644 --- a/packages/agent-core/src/config/schema.ts +++ b/packages/agent-core/src/config/schema.ts @@ -117,6 +117,8 @@ export const LoopControlSchema = z.object({ maxRalphIterations: z.number().int().min(-1).optional(), reservedContextSize: z.number().int().min(0).optional(), compactionTriggerRatio: z.number().min(0.5).max(0.99).optional(), + progressStallThreshold: z.number().int().min(1).optional(), + progressMinInfoGainLength: z.number().int().min(0).optional(), }); export type LoopControl = z.infer; diff --git a/packages/agent-core/src/loop/turn-step.ts b/packages/agent-core/src/loop/turn-step.ts index 8d72cad5d..d6ddd3199 100644 --- a/packages/agent-core/src/loop/turn-step.ts +++ b/packages/agent-core/src/loop/turn-step.ts @@ -64,6 +64,7 @@ export async function executeLoopStep(deps: ExecuteLoopStepDeps): Promise<{ recordUsage, } = deps; + let stepTools = tools; if (hooks?.beforeStep !== undefined) { const beforeStep = await hooks.beforeStep({ turnId, @@ -74,6 +75,9 @@ export async function executeLoopStep(deps: ExecuteLoopStepDeps): Promise<{ if (beforeStep?.block === true) { throw new Error(beforeStep.reason ?? `Step ${String(currentStep)} was blocked`); } + if (beforeStep?.tools !== undefined) { + stepTools = beforeStep.tools; + } } signal.throwIfAborted(); @@ -84,7 +88,7 @@ export async function executeLoopStep(deps: ExecuteLoopStepDeps): Promise<{ const stepUuid = randomUUID(); const step: ToolCallStepContext = { - tools, + tools: stepTools, hooks, log, dispatchEvent, @@ -104,7 +108,7 @@ export async function executeLoopStep(deps: ExecuteLoopStepDeps): Promise<{ const chatParams: LLMChatParams = { messages, - tools: tools ?? [], + tools: stepTools ?? [], signal, ...createChatStreamingCallbacks({ dispatchEvent, diff --git a/packages/agent-core/src/loop/types.ts b/packages/agent-core/src/loop/types.ts index 9d290f235..2dd2fc24d 100644 --- a/packages/agent-core/src/loop/types.ts +++ b/packages/agent-core/src/loop/types.ts @@ -198,6 +198,11 @@ export interface LoopStoppedStepContext extends LoopStepHookContext { export interface BeforeStepResult { readonly block?: boolean | undefined; readonly reason?: string | undefined; + /** + * Override the tools available to the model for this step only. + * Returning an empty array forces a text-only response. + */ + readonly tools?: readonly ExecutableTool[] | undefined; } export interface AfterStepResult { diff --git a/packages/agent-core/test/agent/turn/progress-detector.test.ts b/packages/agent-core/test/agent/turn/progress-detector.test.ts new file mode 100644 index 000000000..ac16cbf11 --- /dev/null +++ b/packages/agent-core/test/agent/turn/progress-detector.test.ts @@ -0,0 +1,156 @@ +import { describe, expect, it } from 'vitest'; + +import type { LoopRecordedEvent } from '../../../src/loop/events'; +import { + ProgressDetector, + type ProgressSnapshot, +} from '../../../src/agent/turn/progress-detector'; + +function makeToolResultEvent(output: string, isError = false): LoopRecordedEvent { + return { + type: 'tool.result', + parentUuid: 'parent-1', + toolCallId: 'call-1', + result: { output, isError }, + }; +} + +function makeToolCallEvent(name: string): LoopRecordedEvent { + return { + type: 'tool.call', + uuid: 'call-1', + turnId: '0', + step: 1, + stepUuid: 'step-1', + toolCallId: 'call-1', + name, + args: {}, + }; +} + +function stableSnapshot(): ProgressSnapshot { + return { gitStatus: '', backgroundTasks: '[]' }; +} + +function changingSnapshot(step: number): ProgressSnapshot { + return { gitStatus: `M file-${step}.ts`, backgroundTasks: '[]' }; +} + +async function runStep( + detector: ProgressDetector, + stepNumber: number, + events: LoopRecordedEvent[], +): Promise { + for (const event of events) { + detector.onLoopEvent(event); + } + return detector.recordStep(stepNumber); +} + +describe('ProgressDetector', () => { + it('reports progress when git status changes', async () => { + let snapshot = stableSnapshot(); + const detector = new ProgressDetector({ + takeSnapshot: () => snapshot, + }); + + expect(await runStep(detector, 1, [makeToolResultEvent('some output')])).toBe(false); + + snapshot = changingSnapshot(2); + expect(await runStep(detector, 2, [makeToolResultEvent('some output')])).toBe(true); + }); + + it('reports progress when a new non-trivial tool output is seen', async () => { + const detector = new ProgressDetector({ + takeSnapshot: () => stableSnapshot(), + }); + + expect( + await runStep(detector, 1, [makeToolResultEvent('this is the first substantial output that is definitely longer than the threshold')]), + ).toBe(true); + + // Same output again should not count as progress. + expect( + await runStep(detector, 2, [makeToolResultEvent('this is the first substantial output that is definitely longer than the threshold')]), + ).toBe(false); + + // A different substantial output counts. + expect( + await runStep(detector, 3, [makeToolResultEvent('this is the second substantial output that is definitely longer than the threshold and different')]), + ).toBe(true); + }); + + it('ignores trivial and error outputs', async () => { + const detector = new ProgressDetector({ + takeSnapshot: () => stableSnapshot(), + }); + + expect(await runStep(detector, 1, [makeToolResultEvent('ok')])).toBe(false); + expect(await runStep(detector, 2, [makeToolResultEvent('')])).toBe(false); + expect(await runStep(detector, 3, [makeToolResultEvent('Command executed successfully.')])).toBe( + false, + ); + expect(await runStep(detector, 4, [makeToolResultEvent('error', true)])).toBe(false); + }); + + it('tracks consecutive steps without progress', async () => { + const detector = new ProgressDetector({ + takeSnapshot: () => stableSnapshot(), + }); + + await runStep(detector, 1, [makeToolResultEvent('this is one substantial output that is definitely longer than the threshold value')]); + await runStep(detector, 2, [makeToolResultEvent('ok')]); + await runStep(detector, 3, [makeToolResultEvent('ok')]); + await runStep(detector, 4, [makeToolResultEvent('ok')]); + + expect(detector.stepsSinceLastProgress(4)).toBe(3); + }); + + it('resets idle counter after progress', async () => { + const detector = new ProgressDetector({ + takeSnapshot: () => stableSnapshot(), + }); + + await runStep(detector, 1, [makeToolResultEvent('this is the first substantial output that is definitely longer than the threshold value')]); + await runStep(detector, 2, [makeToolResultEvent('ok')]); + await runStep(detector, 3, [makeToolResultEvent('this is the second substantial output that is definitely longer than the threshold value and different from the first')]); + + expect(detector.stepsSinceLastProgress(3)).toBe(0); + }); + + it('does not count tool.call events alone as progress', async () => { + const detector = new ProgressDetector({ + takeSnapshot: () => stableSnapshot(), + }); + + expect(await runStep(detector, 1, [makeToolCallEvent('Read')])).toBe(false); + }); + + it('counts successful Edit/Write results as progress even with short output', async () => { + const detector = new ProgressDetector({ + takeSnapshot: () => stableSnapshot(), + }); + + expect( + await runStep(detector, 1, [ + makeToolCallEvent('Edit'), + makeToolResultEvent('ok'), + ]), + ).toBe(true); + + expect( + await runStep(detector, 2, [ + makeToolCallEvent('Write'), + makeToolResultEvent('done'), + ]), + ).toBe(true); + + // Failed edits do not count. + expect( + await runStep(detector, 3, [ + makeToolCallEvent('Edit'), + makeToolResultEvent('error', true), + ]), + ).toBe(false); + }); +}); diff --git a/packages/agent-core/test/config/configs.test.ts b/packages/agent-core/test/config/configs.test.ts index 458a5cf8d..951397eac 100644 --- a/packages/agent-core/test/config/configs.test.ts +++ b/packages/agent-core/test/config/configs.test.ts @@ -97,6 +97,8 @@ max_steps_per_run = 42 max_retries_per_step = 3 reserved_context_size = 50000 compaction_trigger_ratio = 0.85 +progress_stall_threshold = 12 +progress_min_info_gain_length = 120 [background] max_running_tasks = 4 @@ -174,6 +176,8 @@ describe('harness config TOML loader', () => { maxRetriesPerStep: 3, reservedContextSize: 50000, compactionTriggerRatio: 0.85, + progressStallThreshold: 12, + progressMinInfoGainLength: 120, }); expect(config.background).toMatchObject({ maxRunningTasks: 4, From 40579ac35b4ad179db1f1d4b5847f4c36c9187ed Mon Sep 17 00:00:00 2001 From: lewis Date: Thu, 2 Jul 2026 19:03:23 +0800 Subject: [PATCH 2/2] chore: add changeset for progress detector --- .changeset/progress-detector-stalled-turns.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/progress-detector-stalled-turns.md diff --git a/.changeset/progress-detector-stalled-turns.md b/.changeset/progress-detector-stalled-turns.md new file mode 100644 index 000000000..bbbac28b7 --- /dev/null +++ b/.changeset/progress-detector-stalled-turns.md @@ -0,0 +1,5 @@ +--- +"@moonshot-ai/kimi-code": minor +--- + +Detect stalled turns and force text-only recovery. When the agent emits consecutive tool calls that produce no external progress, the harness clears the available tool list and asks the model to respond in text instead of continuing the loop.