Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/progress-detector-stalled-turns.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@moonshot-ai/kimi-code": minor
---

Detect stalled turns and force text-only recovery. When the agent emits consecutive tool calls that produce no external progress, the harness clears the available tool list and asks the model to respond in text instead of continuing the loop.
92 changes: 88 additions & 4 deletions packages/agent-core/src/agent/turn/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { createHash } from 'node:crypto';
import type { Readable } from 'node:stream';

import { createControlledPromise, type ControlledPromise } from '@antfu/utils';
import {
Expand Down Expand Up @@ -39,6 +40,7 @@ import { abortable, isUserCancellation, userCancellationReason } from '../../uti
import { USER_PROMPT_ORIGIN, type PromptOrigin } from '../context';
import { renderUserPromptHookBlockResult, renderUserPromptHookResult } from '../../session/hooks';
import { canonicalTelemetryArgs, isPlainRecord } from './canonical-args';
import { ProgressDetector, type ProgressSnapshot } from './progress-detector';
import { ToolCallDeduplicator } from './tool-dedup';
import { budgetToolResultForModel } from './tool-result-budget';

Expand Down Expand Up @@ -79,6 +81,22 @@ const GOAL_MODEL_CONFIG_PAUSE_PREFIX = 'Paused after model configuration error';
const GOAL_RUNTIME_PAUSE_PREFIX = 'Paused after runtime error';
const GOAL_PROVIDER_FILTERED_PAUSE_REASON = 'Paused after provider safety policy block';

/**
* Number of consecutive steps without external progress before the harness
* forces the model into text-only mode. This is a safety rail against tool-use
* loops where the model emits placeholder calls (e.g. Bash(:), Read /dev/null)
* instead of responding to the user.
*/
const PROGRESS_STALL_THRESHOLD = 8;

const PROGRESS_STALL_REMINDER = [
'The last several actions did not advance the task: no files were changed,',
'no new background work started, and no new useful information was gathered.',
'Stop making tool calls. In your next response, reply with text only:',
'summarize what you know, what has already been tried, and what decision or',
'information is needed next.',
].join(' ');

/**
* The prompt the goal driver appends to start each continuation turn — the
* autonomous stand-in for the user typing "continue". The model decides when to
Expand Down Expand Up @@ -665,7 +683,14 @@ export class TurnFlow {
private async runStepLoop(turnId: number, signal: AbortSignal): Promise<LoopTurnStopReason> {
let stopHookContinuationUsed = false;
let goalOutcomeMessageContinuationUsed = false;
let forceTextMode = false;
const deduper = new ToolCallDeduplicator({ telemetry: this.agent.telemetry });
const loopControl = this.agent.kimiConfig?.loopControl;
const progressStallThreshold = loopControl?.progressStallThreshold ?? PROGRESS_STALL_THRESHOLD;
const progressDetector = new ProgressDetector({
takeSnapshot: () => this.takeProgressSnapshot(),
minInfoGainLength: loopControl?.progressMinInfoGainLength,
});
await this.agent.mcp?.waitForInitialLoad(signal);
// Surface the active goal at the start of the turn (append-only; no-op when
// there is no active goal). Each goal continuation is its own turn, so this
Expand All @@ -674,7 +699,6 @@ export class TurnFlow {
while (true) {
signal.throwIfAborted();
const model = this.agent.config.model;
const loopControl = this.agent.kimiConfig?.loopControl;
let stopForGoalBudget = false;
try {
const result = await runTurn({
Expand All @@ -683,7 +707,7 @@ export class TurnFlow {
llm: this.agent.llm,
buildMessages: () => this.agent.context.messages,
buildMessagesStrict: () => this.agent.context.strictMessages,
dispatchEvent: this.buildDispatchEvent(turnId),
dispatchEvent: this.buildDispatchEvent(turnId, progressDetector),
tools: this.agent.tools.loopTools,
log: this.agent.log,
maxSteps: loopControl?.maxStepsPerTurn,
Expand All @@ -697,6 +721,7 @@ export class TurnFlow {
}
},
hooks: {
// oxlint-disable-next-line no-loop-func -- step hook state is scoped to this turn.
beforeStep: async ({ signal: stepSignal }) => {
this.agent.microCompaction.detect();
await this.agent.fullCompaction.beforeStep(stepSignal);
Expand All @@ -709,12 +734,29 @@ export class TurnFlow {
this.flushSteerBuffer();
await this.agent.injection.inject();
deduper.beginStep();
if (forceTextMode) {
this.agent.context.appendSystemReminder(PROGRESS_STALL_REMINDER, {
kind: 'system_trigger',
name: 'progress_stall_guard',
});
return { tools: [] };
}
return;
},
afterStep: async ({ usage }) => {
// oxlint-disable-next-line no-loop-func -- step hook state is scoped to this turn.
afterStep: async ({ stepNumber, usage }) => {
this.agent.usage.record(model, usage, 'turn');
await this.agent.fullCompaction.afterStep();
deduper.endStep();
const progress = await progressDetector.recordStep(stepNumber);
if (!progress && progressDetector.stepsSinceLastProgress(stepNumber) >= progressStallThreshold) {
this.agent.log.warn('turn appears stalled; forcing text-only mode', {
turnId,
stepNumber,
threshold: progressStallThreshold,
});
forceTextMode = true;
}
return stopForGoalBudget ? { stopTurn: true } : undefined;
},
// oxlint-disable-next-line no-loop-func -- stop hook continuation state is scoped to this turn.
Expand Down Expand Up @@ -840,10 +882,11 @@ export class TurnFlow {
}
}

private buildDispatchEvent(turnId: number) {
private buildDispatchEvent(turnId: number, progressDetector?: ProgressDetector) {
return createLoopEventDispatcher({
appendTranscriptRecord: async (event: LoopRecordedEvent) => {
this.agent.context.appendLoopEvent(event);
progressDetector?.onLoopEvent(event);
},
emitLiveEvent: (event: LoopEvent) => {
this.noteFirstRequestEvent(event);
Expand All @@ -854,6 +897,47 @@ export class TurnFlow {
});
}

private async takeProgressSnapshot(): Promise<ProgressSnapshot> {
const cwd = this.agent.config.cwd;
const [gitStatus, backgroundTasks] = await Promise.all([
this.runGitStatus(cwd),
this.captureBackgroundTasks(),
]);
return { gitStatus, backgroundTasks };
}

private async runGitStatus(cwd: string): Promise<string> {
try {
const proc = await this.agent.kaos.exec('git', '-C', cwd, 'status', '--porcelain');

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Detect content changes in already-dirty files

When a turn keeps editing a file that is already modified or untracked, git status --porcelain stays identical (for example, M src/foo.ts) even though the file contents changed; Edit/Write successes also often return short outputs below the 60-character information-gain threshold. In that common single-file refactor case, eight real edits can be classified as stalled and the next step is forced into text-only mode, preventing the agent from making further needed changes. Please include a content-sensitive signal (e.g. diff/hash/mtime for dirty paths) or otherwise count successful write/edit tool results as progress.

Useful? React with 👍 / 👎.

const stdout = await this.collectStream(proc.stdout);
const exitCode = await proc.wait();
if (exitCode !== 0) {
return '';
}
return stdout.trim();
} catch {
return '';
}
}

private captureBackgroundTasks(): string {
const tasks = this.agent.background.list(true);
return JSON.stringify(
tasks.map((task) => ({
id: task.taskId,
status: task.status,
})),
);
}

private async collectStream(stream: Readable): Promise<string> {
const chunks: Buffer[] = [];
for await (const chunk of stream) {
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk as string));
}
return Buffer.concat(chunks).toString('utf-8');
}

private noteFirstRequestEvent(event: LoopEvent): void {
switch (event.type) {
case 'step.end':
Expand Down
158 changes: 158 additions & 0 deletions packages/agent-core/src/agent/turn/progress-detector.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
/**
* Detects when a turn is spinning without making real progress.
*
* Progress is measured by looking at external, observable state rather than
* interpreting model outputs:
*
* - Information gain: successful tool outputs that are non-trivial and have
* not been seen before in this turn.
* - External state change: git working tree, background task lifecycle, or
* other host-provided snapshots.
*
* When a configurable number of consecutive steps pass without progress, the
* detector reports that the turn has stalled. The host can then force the model
* into text-only mode instead of letting it continue emitting placeholder tool
* calls.
*/

import { createHash } from 'node:crypto';

import type { LoopRecordedEvent, LoopToolCallEvent, LoopToolResultEvent } from '../../loop/events';

const PROGRESS_TOOLS = new Set(['Edit', 'Write']);

export interface ProgressSnapshot {
/**
* `git status --porcelain` output. Empty when git is unavailable or the tree
* is clean. Changes when the working tree actually changes.
*/
readonly gitStatus: string;
/**
* Snapshot of active/terminal background tasks. Changes when tasks are
* created, complete, fail, or are stopped.
*/
readonly backgroundTasks: string;
}

export type TakeProgressSnapshot = () => Promise<ProgressSnapshot> | ProgressSnapshot;

export interface ProgressDetectorOptions {
/** Called once per step to capture external world state. */
readonly takeSnapshot: TakeProgressSnapshot;
/**
* Minimum successful output length to count as information gain.
* Outputs shorter than this are treated as trivial/no-op responses.
*/
readonly minInfoGainLength?: number | undefined;
}

const DEFAULT_MIN_INFO_GAIN_LENGTH = 60;

/**
* Tracks whether a turn is still advancing.
*
* The detector is intentionally stateful per-turn: it accumulates seen output
* hashes and the last external snapshot, and reports how many consecutive steps
* have passed without any progress signal.
*/
export class ProgressDetector {
private readonly takeSnapshot: TakeProgressSnapshot;
private readonly minInfoGainLength: number;
private readonly seenOutputHashes = new Set<string>();
private previousSnapshot?: ProgressSnapshot;
private currentStepEvents: LoopRecordedEvent[] = [];
private readonly toolCallNames = new Map<string, string>();
private lastProgressStep = 0;

constructor(options: ProgressDetectorOptions) {
this.takeSnapshot = options.takeSnapshot;
this.minInfoGainLength = options.minInfoGainLength ?? DEFAULT_MIN_INFO_GAIN_LENGTH;
}

/** Called for every recorded loop event so the detector can observe results. */
onLoopEvent(event: LoopRecordedEvent): void {
this.currentStepEvents.push(event);
if (event.type === 'tool.call') {
const call = event as LoopToolCallEvent;
this.toolCallNames.set(call.toolCallId, call.name);
}
}

/**
* Evaluates the events collected since the last call and reports whether this
* step made progress. Resets the per-step event buffer.
*/
async recordStep(stepNumber: number): Promise<boolean> {
const snapshot = await this.takeSnapshot();
const stateChanged = this.hasExternalStateChanged(snapshot);
this.previousSnapshot = snapshot;

const infoGained = this.hasInformationGain();
this.currentStepEvents = [];

const progress = stateChanged || infoGained;
if (progress) {
this.lastProgressStep = stepNumber;
}
return progress;
}

/** Number of consecutive steps since the last progress signal. */
stepsSinceLastProgress(currentStep: number): number {
return currentStep - this.lastProgressStep;
}

private hasExternalStateChanged(current: ProgressSnapshot): boolean {
if (this.previousSnapshot === undefined) {
return false; // First step has no previous snapshot to compare against.
}
return (
this.previousSnapshot.gitStatus !== current.gitStatus ||
this.previousSnapshot.backgroundTasks !== current.backgroundTasks
);
}

private hasInformationGain(): boolean {
for (const event of this.currentStepEvents) {
if (event.type !== 'tool.result') {
continue;
}
const resultEvent = event as LoopToolResultEvent;
const result = resultEvent.result;
if (result.isError === true) {
continue;
}
// Successful writes/edits are real progress even when their output is
// short, because they change file contents. git status --porcelain does
// not capture repeated edits to an already-dirty file.
const toolName = this.toolCallNames.get(resultEvent.toolCallId);
if (toolName !== undefined && PROGRESS_TOOLS.has(toolName)) {
return true;
}
const text = extractOutputText(result.output);
if (text.length < this.minInfoGainLength) {
continue;
}
const hash = hashString(text);
if (!this.seenOutputHashes.has(hash)) {
this.seenOutputHashes.add(hash);
return true;
}
}
return false;
}
}

function extractOutputText(output: string | readonly { readonly type: string; readonly text?: string }[]): string {
if (typeof output === 'string') {
return output;
}
return output
.filter((part): part is { readonly type: string; readonly text: string } => part.type === 'text' && typeof part.text === 'string')
.map((part) => part.text)
.join('');
}

function hashString(value: string): string {
return createHash('sha256').update(value, 'utf8').digest('hex');
}
2 changes: 2 additions & 0 deletions packages/agent-core/src/config/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ export const LoopControlSchema = z.object({
maxRalphIterations: z.number().int().min(-1).optional(),
reservedContextSize: z.number().int().min(0).optional(),
compactionTriggerRatio: z.number().min(0.5).max(0.99).optional(),
progressStallThreshold: z.number().int().min(1).optional(),
progressMinInfoGainLength: z.number().int().min(0).optional(),
});

export type LoopControl = z.infer<typeof LoopControlSchema>;
Expand Down
8 changes: 6 additions & 2 deletions packages/agent-core/src/loop/turn-step.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ export async function executeLoopStep(deps: ExecuteLoopStepDeps): Promise<{
recordUsage,
} = deps;

let stepTools = tools;
if (hooks?.beforeStep !== undefined) {
const beforeStep = await hooks.beforeStep({
turnId,
Expand All @@ -74,6 +75,9 @@ export async function executeLoopStep(deps: ExecuteLoopStepDeps): Promise<{
if (beforeStep?.block === true) {
throw new Error(beforeStep.reason ?? `Step ${String(currentStep)} was blocked`);
}
if (beforeStep?.tools !== undefined) {
stepTools = beforeStep.tools;
}
}

signal.throwIfAborted();
Expand All @@ -84,7 +88,7 @@ export async function executeLoopStep(deps: ExecuteLoopStepDeps): Promise<{
const stepUuid = randomUUID();

const step: ToolCallStepContext = {
tools,
tools: stepTools,
hooks,
log,
dispatchEvent,
Expand All @@ -104,7 +108,7 @@ export async function executeLoopStep(deps: ExecuteLoopStepDeps): Promise<{

const chatParams: LLMChatParams = {
messages,
tools: tools ?? [],
tools: stepTools ?? [],
signal,
...createChatStreamingCallbacks({
dispatchEvent,
Expand Down
5 changes: 5 additions & 0 deletions packages/agent-core/src/loop/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,11 @@ export interface LoopStoppedStepContext extends LoopStepHookContext {
export interface BeforeStepResult {
readonly block?: boolean | undefined;
readonly reason?: string | undefined;
/**
* Override the tools available to the model for this step only.
* Returning an empty array forces a text-only response.
*/
readonly tools?: readonly ExecutableTool[] | undefined;
}

export interface AfterStepResult {
Expand Down
Loading