fix(chat): merge consecutive reasoning steps into one pill (#724)

blove · claude · github-actions[bot] · web-flow · commit a8ee48faebc5 · 2026-06-23T18:46:35.000-07:00
* fix(chat): merge consecutive reasoning steps into one pill A multi-step agent turn (reason → tool → reason → tool → answer) produced a stack of separate "Thought for 1s" pills — one per assistant reasoning message, with the hidden tool messages between them. This collapses a reasoning RUN (a maximal sequence of consecutive assistant reasoning steps separated only by hidden tool messages) into a single pill rendered at the run's first step: "Thought for {total} · {N} steps", expandable to the joined reasoning. Single-step turns keep the normal "Thought for {duration}" pill (label falls back when N == 1). Adds reasoningRunStart()/reasoningRun() helpers on ChatComponent; uses the existing chat-reasoning [label] input + resolvedLabel() fallback. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * chore(docs): regenerate api docs * chore(docs): regenerate api docs * test(chat): unit-cover reasoningRunStart/reasoningRun (merged pill) Addresses the AI review's "missing test coverage" comment on #724. Adds 11 unit tests over the two core methods, constructing a real ChatComponent in an injection context and driving agent.messages() directly (no template compile): - reasoningRunStart: start after a user turn; not a start for a non-reasoning message; not a start mid-run (2nd consecutive step, even across a tool msg). - reasoningRun: single step (no label); two steps split by a tool message merge (joined content, summed duration, "· N steps" label); run terminates at a non-reasoning assistant message; all-undefined durations → durationMs undefined + "<1s" fallback (pins the behavior the reviewer flagged); mixed durations sum only numerics; streaming reflects the loading tail step (single and multi-step), and is false once response text arrives. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * fix(chat): drop misleading "<1s" from merged reasoning label when timing is unknown Addresses the second AI review comment on #724. When no reasoning step reports a duration, durationMs is undefined and the label previously read "Thought for <1s · N steps" — "<1s" implies fast when it really means "no timing data". Now label by step count alone ("N steps") in that case; the "Thought for {total} · N steps" form is used only when at least one step reported timing. Updates the pinned unit test to assert the new behavior. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * chore(docs): regenerate api docs --------- Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
diff --git a/apps/website/content/docs/chat/api/api-docs.json b/apps/website/content/docs/chat/api/api-docs.json
@@ -489,6 +489,12 @@
         "description": "",
         "optional": false
       },
+      {
+        "name": "glyphName",
+        "type": "Signal<string>",
+        "description": "The effective name as a Material Symbols ligature (camelCase → snake_case).",
+        "optional": false
+      },
       {
         "name": "icon",
         "type": "InputSignal<string>",
@@ -2068,6 +2074,32 @@
           }
         ]
       },
+      {
+        "name": "reasoningRun",
+        "signature": "reasoningRun(index: number): object",
+        "description": "Aggregate the reasoning RUN starting at `index`: joins each step's\nreasoning, sums durations, counts steps, and computes the streaming flag\nand the merged label when N > 1 (\"Thought for {total} · {N} steps\", or\njust \"{N} steps\" when no step reported timing).",
+        "params": [
+          {
+            "name": "index",
+            "type": "number",
+            "description": "",
+            "optional": false
+          }
+        ]
+      },
+      {
+        "name": "reasoningRunStart",
+        "signature": "reasoningRunStart(index: number): boolean",
+        "description": "True when message[index] starts a reasoning RUN — a maximal sequence of\nconsecutive assistant reasoning steps separated only by (hidden) tool\nmessages. The merged reasoning pill renders once, here.",
+        "params": [
+          {
+            "name": "index",
+            "type": "number",
+            "description": "",
+            "optional": false
+          }
+        ]
+      },
       {
         "name": "submitMessage",
         "signature": "submitMessage(text: string): void",
diff --git a/libs/chat/src/lib/compositions/chat/chat.component.spec.ts b/libs/chat/src/lib/compositions/chat/chat.component.spec.ts
@@ -17,7 +17,7 @@ import { ChatGenerativeUiComponent } from '../../primitives/chat-generative-ui/c
 import type { Spec, StateStore } from '@json-render/core';
 import type { AgentEvent } from '../../agent/agent-event';
 import type { Subagent } from '../../agent/subagent';
-import type { ToolCall } from '../../agent';
+import type { ToolCall, Message } from '../../agent';
 
 describe('ChatComponent', () => {
   it('is defined as a class', () => {
@@ -648,3 +648,123 @@ describe('ChatComponent — subagent cards render once (no duplicate per-message
     expect(host.querySelectorAll('chat-subagent-card').length).toBe(1);
   });
 });
+
+describe('ChatComponent — reasoning runs (merged pill)', () => {
+  // Unit coverage for reasoningRunStart() / reasoningRun() — the core of the
+  // merge-consecutive-reasoning-steps feature. These are plain class methods
+  // over agent.messages(), so we construct a real ChatComponent in an injection
+  // context and drive its `agent` signal input directly (same pattern as the
+  // welcome-branch tests above); no template compile needed.
+
+  interface ReasoningRun {
+    content: string;
+    durationMs: number | undefined;
+    streaming: boolean;
+    label: string | undefined;
+  }
+  interface ReasoningApi {
+    reasoningRunStart(index: number): boolean;
+    reasoningRun(index: number): ReasoningRun;
+  }
+
+  function api(messages: Message[], isLoading = false): ReasoningApi {
+    TestBed.configureTestingModule({});
+    const injector = TestBed.inject(Injector);
+    let comp!: ChatComponent;
+    runInInjectionContext(injector, () => {
+      comp = new ChatComponent();
+      setSignalInput(comp.agent, mockAgent({ messages, isLoading }));
+    });
+    return comp as unknown as ReasoningApi;
+  }
+
+  const user = (id: string, content = 'hi'): Message => ({ id, role: 'user', content });
+  const tool = (id: string): Message => ({ id, role: 'tool', content: 'result', toolCallId: 'c' });
+  const reasoning = (id: string, reasoning: string, durationMs?: number, content = ''): Message =>
+    ({ id, role: 'assistant', content, reasoning, reasoningDurationMs: durationMs });
+  const answer = (id: string, content: string): Message => ({ id, role: 'assistant', content });
+
+  describe('reasoningRunStart', () => {
+    it('is true for a reasoning step that follows a user message', () => {
+      const a = api([user('u1'), reasoning('a1', 'thinking', 1000)]);
+      expect(a.reasoningRunStart(1)).toBe(true);
+    });
+
+    it('is false for an assistant message with no reasoning', () => {
+      const a = api([user('u1'), answer('a1', 'done')]);
+      expect(a.reasoningRunStart(1)).toBe(false);
+    });
+
+    it('is false for the 2nd consecutive reasoning step (mid-run, even across a tool message)', () => {
+      const a = api([user('u1'), reasoning('a1', 'first', 1000), tool('t1'), reasoning('a2', 'second', 1000)]);
+      expect(a.reasoningRunStart(1)).toBe(true);  // run starts at the first step
+      expect(a.reasoningRunStart(3)).toBe(false); // the second step is NOT a new start
+    });
+  });
+
+  describe('reasoningRun', () => {
+    it('single step: no label, content from the one step, duration from the one step', () => {
+      const a = api([user('u1'), reasoning('a1', 'just this', 4000)]);
+      const run = a.reasoningRun(1);
+      expect(run.label).toBeUndefined();          // single step → no "· N steps" label
+      expect(run.content).toBe('just this');
+      expect(run.durationMs).toBe(4000);
+      expect(run.streaming).toBe(false);          // not loading
+    });
+
+    it('two steps separated by a tool message merge: joined content, summed duration, "N steps" label', () => {
+      const a = api([user('u1'), reasoning('a1', 'first', 3000), tool('t1'), reasoning('a2', 'second', 2000)]);
+      const run = a.reasoningRun(1);
+      expect(run.content).toBe('first\n\nsecond');
+      expect(run.durationMs).toBe(5000);                       // 3000 + 2000
+      expect(run.label).toBe('Thought for 5s · 2 steps');
+    });
+
+    it('the run ends when a non-reasoning assistant message follows (boundary excluded)', () => {
+      const a = api([
+        user('u1'),
+        reasoning('a1', 'first', 1000),
+        reasoning('a2', 'second', 1000),
+        answer('a3', 'the answer'),
+      ]);
+      const run = a.reasoningRun(1);
+      expect(run.content).toBe('first\n\nsecond'); // a3 terminates the run
+      expect(run.content).not.toContain('the answer');
+      expect(run.label).toBe('Thought for 2s · 2 steps');
+    });
+
+    it('all durations undefined → durationMs undefined; label drops the duration phrase ("N steps")', () => {
+      // Per review: "Thought for <1s" reads as "fast" when timing is actually
+      // unknown. With no step reporting a duration, label by step count alone.
+      const a = api([user('u1'), reasoning('a1', 'first'), tool('t1'), reasoning('a2', 'second')]);
+      const run = a.reasoningRun(1);
+      expect(run.durationMs).toBeUndefined();
+      expect(run.label).toBe('2 steps');
+    });
+
+    it('mixed defined/undefined durations sum only the numeric ones', () => {
+      const a = api([user('u1'), reasoning('a1', 'first', 3000), tool('t1'), reasoning('a2', 'second')]);
+      expect(a.reasoningRun(1).durationMs).toBe(3000);
+    });
+
+    it('streaming is true when the run’s last step is the loading tail with no response text yet', () => {
+      const a = api([user('u1'), reasoning('a1', 'thinking', undefined, '')], /* isLoading */ true);
+      const run = a.reasoningRun(1);
+      expect(run.streaming).toBe(true);
+      expect(run.label).toBeUndefined(); // still a single step
+    });
+
+    it('streaming reflects the LAST step of a multi-step run', () => {
+      // Two-step run whose final step is the loading tail (empty content).
+      const a = api([user('u1'), reasoning('a1', 'first', 2000), tool('t1'), reasoning('a2', 'second', undefined, '')], true);
+      const run = a.reasoningRun(1);
+      expect(run.streaming).toBe(true);
+      expect(run.label).toBe('Thought for 2s · 2 steps');
+    });
+
+    it('streaming is false once response text has arrived on the tail step', () => {
+      const a = api([user('u1'), reasoning('a1', 'thinking', 2000, 'here is the answer')], true);
+      expect(a.reasoningRun(1).streaming).toBe(false);
+    });
+  });
+});
diff --git a/libs/chat/src/lib/compositions/chat/chat.component.ts b/libs/chat/src/lib/compositions/chat/chat.component.ts
@@ -36,6 +36,7 @@ import { createPartialArgsBridge, type PartialArgsBridge } from '../../a2ui/part
 import { createA2uiSurfaceStore, type A2uiSurfaceStore } from '../../a2ui/surface-store';
 import { a2uiActionLabel } from '../../a2ui/action-label';
 import { messageContent } from '../shared/message-utils';
+import { formatDuration } from '../../utils/format-duration';
 import { CHAT_HOST_TOKENS, ensureChatRootStyles } from '../../styles/chat-tokens';
 import type { ChatRenderEvent } from './chat-render-event';
 import { CHAT_LIFECYCLE, type ChatLifecycle } from '../../lifecycle';
@@ -197,11 +198,19 @@ export function isPinned(
                   [streaming]="agent().isLoading() && i === agent().messages().length - 1"
                   [current]="i === agent().messages().length - 1"
                 >
-                  @if (message.reasoning) {
+                  <!-- Reasoning is merged across a run of consecutive (tool-
+                       separated) reasoning steps and rendered ONCE at the run's
+                       first step as "Thought for {total} · {N} steps", so a
+                       multi-step agent shows one compact pill instead of a
+                       stack of "Thought for 1s" chips. Single-step turns render
+                       a normal "Thought for {duration}" pill. -->
+                  @if (message.reasoning && reasoningRunStart(i)) {
+                    @let run = reasoningRun(i);
                     <chat-reasoning
-                      [content]="message.reasoning"
-                      [isStreaming]="isReasoningStreaming(message, i)"
-                      [durationMs]="message.reasoningDurationMs"
+                      [content]="run.content"
+                      [isStreaming]="run.streaming"
+                      [durationMs]="run.durationMs"
+                      [label]="run.label"
                     />
                   }
                   <chat-tool-calls [agent]="agent()" [message]="message" [excludeToolNames]="excludedToolNames()">
@@ -468,6 +477,65 @@ export class ChatComponent {
     return text.length === 0;
   }
 
+  /** The nearest preceding assistant message (skipping hidden tool messages), or undefined. */
+  private prevAssistant(msgs: Message[], index: number): Message | undefined {
+    for (let j = index - 1; j >= 0; j--) {
+      if (msgs[j].role === 'tool') continue;
+      return msgs[j].role === 'assistant' ? msgs[j] : undefined;
+    }
+    return undefined;
+  }
+
+  /**
+   * True when message[index] starts a reasoning RUN — a maximal sequence of
+   * consecutive assistant reasoning steps separated only by (hidden) tool
+   * messages. The merged reasoning pill renders once, here.
+   */
+  protected reasoningRunStart(index: number): boolean {
+    const msgs = this.agent().messages();
+    if (!msgs[index]?.reasoning) return false;
+    return !this.prevAssistant(msgs, index)?.reasoning;
+  }
+
+  /**
+   * Aggregate the reasoning RUN starting at `index`: joins each step's
+   * reasoning, sums durations, counts steps, and computes the streaming flag
+   * and the merged label when N > 1 ("Thought for {total} · {N} steps", or
+   * just "{N} steps" when no step reported timing).
+   */
+  protected reasoningRun(index: number): {
+    content: string;
+    durationMs: number | undefined;
+    streaming: boolean;
+    label: string | undefined;
+  } {
+    const msgs = this.agent().messages();
+    const steps: { msg: Message; idx: number }[] = [];
+    for (let j = index; j < msgs.length; j++) {
+      const m = msgs[j];
+      if (m.role === 'tool') continue;            // skip hidden tool messages
+      if (m.role === 'assistant' && m.reasoning) { steps.push({ msg: m, idx: j }); continue; }
+      break;                                      // any other message ends the run
+    }
+    const content = steps.map((s) => s.msg.reasoning ?? '').filter(Boolean).join('\n\n');
+    const durations = steps
+      .map((s) => s.msg.reasoningDurationMs)
+      .filter((d): d is number => typeof d === 'number');
+    const durationMs = durations.length ? durations.reduce((a, b) => a + b, 0) : undefined;
+    const last = steps[steps.length - 1];
+    const streaming = last ? this.isReasoningStreaming(last.msg, last.idx) : false;
+    // Only claim a duration when at least one step reported timing. Otherwise
+    // "Thought for <1s" would read as "fast" when it really means "unknown", so
+    // drop the duration phrase and label by step count alone.
+    const label =
+      steps.length > 1
+        ? durationMs !== undefined
+          ? `Thought for ${formatDuration(durationMs)} · ${steps.length} steps`
+          : `${steps.length} steps`
+        : undefined;
+    return { content, durationMs, streaming, label };
+  }
+
   private readonly classifiers = new Map<string, ContentClassifier>();
   private readonly destroyRef = inject(DestroyRef);
   private readonly injector = inject(Injector);