Skip to content

Commit a8ee48f

Browse files
bloveclaudegithub-actions[bot]
authored
fix(chat): merge consecutive reasoning steps into one pill (#724)
* fix(chat): merge consecutive reasoning steps into one pill A multi-step agent turn (reason → tool → reason → tool → answer) produced a stack of separate "Thought for 1s" pills — one per assistant reasoning message, with the hidden tool messages between them. This collapses a reasoning RUN (a maximal sequence of consecutive assistant reasoning steps separated only by hidden tool messages) into a single pill rendered at the run's first step: "Thought for {total} · {N} steps", expandable to the joined reasoning. Single-step turns keep the normal "Thought for {duration}" pill (label falls back when N == 1). Adds reasoningRunStart()/reasoningRun() helpers on ChatComponent; uses the existing chat-reasoning [label] input + resolvedLabel() fallback. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * chore(docs): regenerate api docs * chore(docs): regenerate api docs * test(chat): unit-cover reasoningRunStart/reasoningRun (merged pill) Addresses the AI review's "missing test coverage" comment on #724. Adds 11 unit tests over the two core methods, constructing a real ChatComponent in an injection context and driving agent.messages() directly (no template compile): - reasoningRunStart: start after a user turn; not a start for a non-reasoning message; not a start mid-run (2nd consecutive step, even across a tool msg). - reasoningRun: single step (no label); two steps split by a tool message merge (joined content, summed duration, "· N steps" label); run terminates at a non-reasoning assistant message; all-undefined durations → durationMs undefined + "<1s" fallback (pins the behavior the reviewer flagged); mixed durations sum only numerics; streaming reflects the loading tail step (single and multi-step), and is false once response text arrives. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * fix(chat): drop misleading "<1s" from merged reasoning label when timing is unknown Addresses the second AI review comment on #724. When no reasoning step reports a duration, durationMs is undefined and the label previously read "Thought for <1s · N steps" — "<1s" implies fast when it really means "no timing data". Now label by step count alone ("N steps") in that case; the "Thought for {total} · N steps" form is used only when at least one step reported timing. Updates the pinned unit test to assert the new behavior. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * chore(docs): regenerate api docs --------- Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent f404906 commit a8ee48f

3 files changed

Lines changed: 225 additions & 5 deletions

File tree

apps/website/content/docs/chat/api/api-docs.json

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,12 @@
489489
"description": "",
490490
"optional": false
491491
},
492+
{
493+
"name": "glyphName",
494+
"type": "Signal<string>",
495+
"description": "The effective name as a Material Symbols ligature (camelCase → snake_case).",
496+
"optional": false
497+
},
492498
{
493499
"name": "icon",
494500
"type": "InputSignal<string>",
@@ -2068,6 +2074,32 @@
20682074
}
20692075
]
20702076
},
2077+
{
2078+
"name": "reasoningRun",
2079+
"signature": "reasoningRun(index: number): object",
2080+
"description": "Aggregate the reasoning RUN starting at `index`: joins each step's\nreasoning, sums durations, counts steps, and computes the streaming flag\nand the merged label when N > 1 (\"Thought for {total} · {N} steps\", or\njust \"{N} steps\" when no step reported timing).",
2081+
"params": [
2082+
{
2083+
"name": "index",
2084+
"type": "number",
2085+
"description": "",
2086+
"optional": false
2087+
}
2088+
]
2089+
},
2090+
{
2091+
"name": "reasoningRunStart",
2092+
"signature": "reasoningRunStart(index: number): boolean",
2093+
"description": "True when message[index] starts a reasoning RUN — a maximal sequence of\nconsecutive assistant reasoning steps separated only by (hidden) tool\nmessages. The merged reasoning pill renders once, here.",
2094+
"params": [
2095+
{
2096+
"name": "index",
2097+
"type": "number",
2098+
"description": "",
2099+
"optional": false
2100+
}
2101+
]
2102+
},
20712103
{
20722104
"name": "submitMessage",
20732105
"signature": "submitMessage(text: string): void",

libs/chat/src/lib/compositions/chat/chat.component.spec.ts

Lines changed: 121 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import { ChatGenerativeUiComponent } from '../../primitives/chat-generative-ui/c
1717
import type { Spec, StateStore } from '@json-render/core';
1818
import type { AgentEvent } from '../../agent/agent-event';
1919
import type { Subagent } from '../../agent/subagent';
20-
import type { ToolCall } from '../../agent';
20+
import type { ToolCall, Message } from '../../agent';
2121

2222
describe('ChatComponent', () => {
2323
it('is defined as a class', () => {
@@ -648,3 +648,123 @@ describe('ChatComponent — subagent cards render once (no duplicate per-message
648648
expect(host.querySelectorAll('chat-subagent-card').length).toBe(1);
649649
});
650650
});
651+
652+
describe('ChatComponent — reasoning runs (merged pill)', () => {
653+
// Unit coverage for reasoningRunStart() / reasoningRun() — the core of the
654+
// merge-consecutive-reasoning-steps feature. These are plain class methods
655+
// over agent.messages(), so we construct a real ChatComponent in an injection
656+
// context and drive its `agent` signal input directly (same pattern as the
657+
// welcome-branch tests above); no template compile needed.
658+
659+
interface ReasoningRun {
660+
content: string;
661+
durationMs: number | undefined;
662+
streaming: boolean;
663+
label: string | undefined;
664+
}
665+
interface ReasoningApi {
666+
reasoningRunStart(index: number): boolean;
667+
reasoningRun(index: number): ReasoningRun;
668+
}
669+
670+
function api(messages: Message[], isLoading = false): ReasoningApi {
671+
TestBed.configureTestingModule({});
672+
const injector = TestBed.inject(Injector);
673+
let comp!: ChatComponent;
674+
runInInjectionContext(injector, () => {
675+
comp = new ChatComponent();
676+
setSignalInput(comp.agent, mockAgent({ messages, isLoading }));
677+
});
678+
return comp as unknown as ReasoningApi;
679+
}
680+
681+
const user = (id: string, content = 'hi'): Message => ({ id, role: 'user', content });
682+
const tool = (id: string): Message => ({ id, role: 'tool', content: 'result', toolCallId: 'c' });
683+
const reasoning = (id: string, reasoning: string, durationMs?: number, content = ''): Message =>
684+
({ id, role: 'assistant', content, reasoning, reasoningDurationMs: durationMs });
685+
const answer = (id: string, content: string): Message => ({ id, role: 'assistant', content });
686+
687+
describe('reasoningRunStart', () => {
688+
it('is true for a reasoning step that follows a user message', () => {
689+
const a = api([user('u1'), reasoning('a1', 'thinking', 1000)]);
690+
expect(a.reasoningRunStart(1)).toBe(true);
691+
});
692+
693+
it('is false for an assistant message with no reasoning', () => {
694+
const a = api([user('u1'), answer('a1', 'done')]);
695+
expect(a.reasoningRunStart(1)).toBe(false);
696+
});
697+
698+
it('is false for the 2nd consecutive reasoning step (mid-run, even across a tool message)', () => {
699+
const a = api([user('u1'), reasoning('a1', 'first', 1000), tool('t1'), reasoning('a2', 'second', 1000)]);
700+
expect(a.reasoningRunStart(1)).toBe(true); // run starts at the first step
701+
expect(a.reasoningRunStart(3)).toBe(false); // the second step is NOT a new start
702+
});
703+
});
704+
705+
describe('reasoningRun', () => {
706+
it('single step: no label, content from the one step, duration from the one step', () => {
707+
const a = api([user('u1'), reasoning('a1', 'just this', 4000)]);
708+
const run = a.reasoningRun(1);
709+
expect(run.label).toBeUndefined(); // single step → no "· N steps" label
710+
expect(run.content).toBe('just this');
711+
expect(run.durationMs).toBe(4000);
712+
expect(run.streaming).toBe(false); // not loading
713+
});
714+
715+
it('two steps separated by a tool message merge: joined content, summed duration, "N steps" label', () => {
716+
const a = api([user('u1'), reasoning('a1', 'first', 3000), tool('t1'), reasoning('a2', 'second', 2000)]);
717+
const run = a.reasoningRun(1);
718+
expect(run.content).toBe('first\n\nsecond');
719+
expect(run.durationMs).toBe(5000); // 3000 + 2000
720+
expect(run.label).toBe('Thought for 5s · 2 steps');
721+
});
722+
723+
it('the run ends when a non-reasoning assistant message follows (boundary excluded)', () => {
724+
const a = api([
725+
user('u1'),
726+
reasoning('a1', 'first', 1000),
727+
reasoning('a2', 'second', 1000),
728+
answer('a3', 'the answer'),
729+
]);
730+
const run = a.reasoningRun(1);
731+
expect(run.content).toBe('first\n\nsecond'); // a3 terminates the run
732+
expect(run.content).not.toContain('the answer');
733+
expect(run.label).toBe('Thought for 2s · 2 steps');
734+
});
735+
736+
it('all durations undefined → durationMs undefined; label drops the duration phrase ("N steps")', () => {
737+
// Per review: "Thought for <1s" reads as "fast" when timing is actually
738+
// unknown. With no step reporting a duration, label by step count alone.
739+
const a = api([user('u1'), reasoning('a1', 'first'), tool('t1'), reasoning('a2', 'second')]);
740+
const run = a.reasoningRun(1);
741+
expect(run.durationMs).toBeUndefined();
742+
expect(run.label).toBe('2 steps');
743+
});
744+
745+
it('mixed defined/undefined durations sum only the numeric ones', () => {
746+
const a = api([user('u1'), reasoning('a1', 'first', 3000), tool('t1'), reasoning('a2', 'second')]);
747+
expect(a.reasoningRun(1).durationMs).toBe(3000);
748+
});
749+
750+
it('streaming is true when the run’s last step is the loading tail with no response text yet', () => {
751+
const a = api([user('u1'), reasoning('a1', 'thinking', undefined, '')], /* isLoading */ true);
752+
const run = a.reasoningRun(1);
753+
expect(run.streaming).toBe(true);
754+
expect(run.label).toBeUndefined(); // still a single step
755+
});
756+
757+
it('streaming reflects the LAST step of a multi-step run', () => {
758+
// Two-step run whose final step is the loading tail (empty content).
759+
const a = api([user('u1'), reasoning('a1', 'first', 2000), tool('t1'), reasoning('a2', 'second', undefined, '')], true);
760+
const run = a.reasoningRun(1);
761+
expect(run.streaming).toBe(true);
762+
expect(run.label).toBe('Thought for 2s · 2 steps');
763+
});
764+
765+
it('streaming is false once response text has arrived on the tail step', () => {
766+
const a = api([user('u1'), reasoning('a1', 'thinking', 2000, 'here is the answer')], true);
767+
expect(a.reasoningRun(1).streaming).toBe(false);
768+
});
769+
});
770+
});

libs/chat/src/lib/compositions/chat/chat.component.ts

Lines changed: 72 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import { createPartialArgsBridge, type PartialArgsBridge } from '../../a2ui/part
3636
import { createA2uiSurfaceStore, type A2uiSurfaceStore } from '../../a2ui/surface-store';
3737
import { a2uiActionLabel } from '../../a2ui/action-label';
3838
import { messageContent } from '../shared/message-utils';
39+
import { formatDuration } from '../../utils/format-duration';
3940
import { CHAT_HOST_TOKENS, ensureChatRootStyles } from '../../styles/chat-tokens';
4041
import type { ChatRenderEvent } from './chat-render-event';
4142
import { CHAT_LIFECYCLE, type ChatLifecycle } from '../../lifecycle';
@@ -197,11 +198,19 @@ export function isPinned(
197198
[streaming]="agent().isLoading() && i === agent().messages().length - 1"
198199
[current]="i === agent().messages().length - 1"
199200
>
200-
@if (message.reasoning) {
201+
<!-- Reasoning is merged across a run of consecutive (tool-
202+
separated) reasoning steps and rendered ONCE at the run's
203+
first step as "Thought for {total} · {N} steps", so a
204+
multi-step agent shows one compact pill instead of a
205+
stack of "Thought for 1s" chips. Single-step turns render
206+
a normal "Thought for {duration}" pill. -->
207+
@if (message.reasoning && reasoningRunStart(i)) {
208+
@let run = reasoningRun(i);
201209
<chat-reasoning
202-
[content]="message.reasoning"
203-
[isStreaming]="isReasoningStreaming(message, i)"
204-
[durationMs]="message.reasoningDurationMs"
210+
[content]="run.content"
211+
[isStreaming]="run.streaming"
212+
[durationMs]="run.durationMs"
213+
[label]="run.label"
205214
/>
206215
}
207216
<chat-tool-calls [agent]="agent()" [message]="message" [excludeToolNames]="excludedToolNames()">
@@ -468,6 +477,65 @@ export class ChatComponent {
468477
return text.length === 0;
469478
}
470479

480+
/** The nearest preceding assistant message (skipping hidden tool messages), or undefined. */
481+
private prevAssistant(msgs: Message[], index: number): Message | undefined {
482+
for (let j = index - 1; j >= 0; j--) {
483+
if (msgs[j].role === 'tool') continue;
484+
return msgs[j].role === 'assistant' ? msgs[j] : undefined;
485+
}
486+
return undefined;
487+
}
488+
489+
/**
490+
* True when message[index] starts a reasoning RUN — a maximal sequence of
491+
* consecutive assistant reasoning steps separated only by (hidden) tool
492+
* messages. The merged reasoning pill renders once, here.
493+
*/
494+
protected reasoningRunStart(index: number): boolean {
495+
const msgs = this.agent().messages();
496+
if (!msgs[index]?.reasoning) return false;
497+
return !this.prevAssistant(msgs, index)?.reasoning;
498+
}
499+
500+
/**
501+
* Aggregate the reasoning RUN starting at `index`: joins each step's
502+
* reasoning, sums durations, counts steps, and computes the streaming flag
503+
* and the merged label when N > 1 ("Thought for {total} · {N} steps", or
504+
* just "{N} steps" when no step reported timing).
505+
*/
506+
protected reasoningRun(index: number): {
507+
content: string;
508+
durationMs: number | undefined;
509+
streaming: boolean;
510+
label: string | undefined;
511+
} {
512+
const msgs = this.agent().messages();
513+
const steps: { msg: Message; idx: number }[] = [];
514+
for (let j = index; j < msgs.length; j++) {
515+
const m = msgs[j];
516+
if (m.role === 'tool') continue; // skip hidden tool messages
517+
if (m.role === 'assistant' && m.reasoning) { steps.push({ msg: m, idx: j }); continue; }
518+
break; // any other message ends the run
519+
}
520+
const content = steps.map((s) => s.msg.reasoning ?? '').filter(Boolean).join('\n\n');
521+
const durations = steps
522+
.map((s) => s.msg.reasoningDurationMs)
523+
.filter((d): d is number => typeof d === 'number');
524+
const durationMs = durations.length ? durations.reduce((a, b) => a + b, 0) : undefined;
525+
const last = steps[steps.length - 1];
526+
const streaming = last ? this.isReasoningStreaming(last.msg, last.idx) : false;
527+
// Only claim a duration when at least one step reported timing. Otherwise
528+
// "Thought for <1s" would read as "fast" when it really means "unknown", so
529+
// drop the duration phrase and label by step count alone.
530+
const label =
531+
steps.length > 1
532+
? durationMs !== undefined
533+
? `Thought for ${formatDuration(durationMs)} · ${steps.length} steps`
534+
: `${steps.length} steps`
535+
: undefined;
536+
return { content, durationMs, streaming, label };
537+
}
538+
471539
private readonly classifiers = new Map<string, ContentClassifier>();
472540
private readonly destroyRef = inject(DestroyRef);
473541
private readonly injector = inject(Injector);

0 commit comments

Comments
 (0)