Skip to content

Commit 6d9d77d

Browse files
authored
fix(reasoning): accumulate and strip reasoning tags from autoparser results (mudler#9227)
fix(reasoning): acccumulate and strip reasoning tags from autoparser results Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
1 parent 6f304d1 commit 6d9d77d

4 files changed

Lines changed: 168 additions & 4 deletions

File tree

core/http/endpoints/openai/chat.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,11 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
8686

8787
// Prefer pre-parsed chat deltas from C++ autoparser when available
8888
if tokenUsage.HasChatDeltaContent() {
89-
reasoningDelta, contentDelta = tokenUsage.ChatDeltaReasoningAndContent()
89+
rawReasoning, cd := tokenUsage.ChatDeltaReasoningAndContent()
90+
contentDelta = cd
91+
// Strip reasoning tags (e.g. <|channel>thought / <channel|>) that
92+
// the C++ autoparser includes as part of reasoning content.
93+
reasoningDelta = extractor.ProcessChatDeltaReasoning(rawReasoning)
9094
// Keep extractor state consistent for fallback
9195
extractor.ProcessToken(s)
9296
} else {
@@ -149,7 +153,11 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
149153

150154
// Prefer pre-parsed chat deltas from C++ autoparser when available
151155
if usage.HasChatDeltaContent() {
152-
reasoningDelta, contentDelta = usage.ChatDeltaReasoningAndContent()
156+
rawReasoning, cd := usage.ChatDeltaReasoningAndContent()
157+
contentDelta = cd
158+
// Strip reasoning tags (e.g. <|channel>thought / <channel|>) that
159+
// the C++ autoparser includes as part of reasoning content.
160+
reasoningDelta = extractor.ProcessChatDeltaReasoning(rawReasoning)
153161
// Keep extractor state consistent for fallback
154162
extractor.ProcessToken(s)
155163
} else {

core/http/endpoints/openresponses/responses.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1823,7 +1823,9 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
18231823
var reasoningDelta, contentDelta string
18241824
// Prefer pre-parsed chat deltas from C++ autoparser when available
18251825
if tokenUsage.HasChatDeltaContent() {
1826-
reasoningDelta, contentDelta = tokenUsage.ChatDeltaReasoningAndContent()
1826+
rawReasoning, cd := tokenUsage.ChatDeltaReasoningAndContent()
1827+
contentDelta = cd
1828+
reasoningDelta = extractor.ProcessChatDeltaReasoning(rawReasoning)
18271829
extractor.ProcessToken(token) // keep state consistent
18281830
} else {
18291831
reasoningDelta, contentDelta = extractor.ProcessToken(token)
@@ -2350,7 +2352,9 @@ func handleOpenResponsesStream(c echo.Context, responseID string, createdAt int6
23502352
var reasoningDelta, contentDelta string
23512353
// Prefer pre-parsed chat deltas from C++ autoparser when available
23522354
if tokenUsage.HasChatDeltaContent() {
2353-
reasoningDelta, contentDelta = tokenUsage.ChatDeltaReasoningAndContent()
2355+
rawReasoning, cd := tokenUsage.ChatDeltaReasoningAndContent()
2356+
contentDelta = cd
2357+
reasoningDelta = extractor.ProcessChatDeltaReasoning(rawReasoning)
23542358
extractor.ProcessToken(token) // keep state consistent
23552359
} else {
23562360
reasoningDelta, contentDelta = extractor.ProcessToken(token)

pkg/reasoning/extractor.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ type ReasoningExtractor struct {
2121
lastReasoning string
2222
lastCleaned string
2323
suppressReasoning bool
24+
25+
// ChatDelta reasoning accumulator — used by ProcessChatDeltaReasoning
26+
// to strip reasoning tags (e.g. <|channel>thought, <channel|>) that
27+
// the C++ autoparser includes in reasoning_content deltas.
28+
cdReasoningAccum string
29+
cdLastStrippedReasoning string
2430
}
2531

2632
// NewReasoningExtractor creates a new extractor for the given thinking token and config.
@@ -64,6 +70,61 @@ func (e *ReasoningExtractor) ProcessToken(token string) (reasoningDelta, content
6470
return reasoningDelta, contentDelta
6571
}
6672

73+
// ProcessChatDeltaReasoning accumulates raw reasoning text from C++ autoparser
74+
// ChatDeltas, strips any embedded reasoning tags (e.g. <|channel>thought …
75+
// <channel|> for Gemma 4), and returns only the new stripped delta.
76+
// This prevents tag tokens from leaking into the reasoning field of SSE chunks.
77+
//
78+
// When the C++ autoparser already strips tags (e.g. <think> models), the text
79+
// passes through unchanged — ExtractReasoning finds no tags so we use the raw text.
80+
func (e *ReasoningExtractor) ProcessChatDeltaReasoning(rawDelta string) string {
81+
if rawDelta == "" {
82+
return ""
83+
}
84+
e.cdReasoningAccum += rawDelta
85+
86+
// Try to strip reasoning tags from accumulated ChatDelta reasoning.
87+
stripped, cleaned := ExtractReasoning(e.cdReasoningAccum, &e.config)
88+
89+
if stripped == "" {
90+
// ExtractReasoning found no reasoning content. This happens when:
91+
// a) A complete start tag was found but has no content after it yet
92+
// (cleaned == "" because everything is inside the unclosed tag)
93+
// → keep buffering
94+
// b) We're accumulating a partial multi-token start tag
95+
// (e.g. "<|channel>" before "thought" arrives)
96+
// → keep buffering
97+
// c) No tags at all — C++ already stripped them
98+
// → pass through the raw text as-is
99+
if cleaned == "" && strings.TrimSpace(e.cdReasoningAccum) != "" {
100+
// Case (a): tag found, unclosed, no content yet
101+
stripped = ""
102+
} else if e.thinkingStartToken != "" &&
103+
len(strings.TrimSpace(e.cdReasoningAccum)) < len(e.thinkingStartToken) &&
104+
strings.HasPrefix(e.thinkingStartToken, strings.TrimSpace(e.cdReasoningAccum)) {
105+
// Case (b): partial start tag prefix
106+
stripped = ""
107+
} else {
108+
// Case (c): no tags found — text is already clean from C++
109+
stripped = e.cdReasoningAccum
110+
}
111+
}
112+
113+
// Compute delta from stripped reasoning
114+
var delta string
115+
if len(stripped) > len(e.cdLastStrippedReasoning) && strings.HasPrefix(stripped, e.cdLastStrippedReasoning) {
116+
delta = stripped[len(e.cdLastStrippedReasoning):]
117+
} else if stripped != e.cdLastStrippedReasoning && stripped != "" {
118+
delta = stripped
119+
}
120+
e.cdLastStrippedReasoning = stripped
121+
122+
if e.suppressReasoning {
123+
return ""
124+
}
125+
return delta
126+
}
127+
67128
// Reasoning returns the total accumulated reasoning after streaming.
68129
func (e *ReasoningExtractor) Reasoning() string {
69130
return e.lastReasoning
@@ -84,6 +145,8 @@ func (e *ReasoningExtractor) Reset() {
84145
e.accumulated = ""
85146
e.lastReasoning = ""
86147
e.lastCleaned = ""
148+
e.cdReasoningAccum = ""
149+
e.cdLastStrippedReasoning = ""
87150
}
88151

89152
// ResetAndSuppressReasoning clears state and suppresses future reasoning deltas.
@@ -95,6 +158,8 @@ func (e *ReasoningExtractor) ResetAndSuppressReasoning() {
95158
e.accumulated = ""
96159
e.lastReasoning = ""
97160
e.lastCleaned = ""
161+
e.cdReasoningAccum = ""
162+
e.cdLastStrippedReasoning = ""
98163
e.suppressReasoning = true
99164
}
100165

pkg/reasoning/extractor_test.go

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,4 +195,91 @@ var _ = Describe("ReasoningExtractor", func() {
195195
Expect(ext.CleanedContent()).To(Equal("visible content"))
196196
})
197197
})
198+
199+
Context("ProcessChatDeltaReasoning with Gemma 4 tags", func() {
200+
It("should strip <|channel>thought and <channel|> tags from streaming deltas", func() {
201+
ext := NewReasoningExtractor("<|channel>thought", Config{})
202+
203+
// Simulate C++ autoparser sending tag tokens as reasoning
204+
d1 := ext.ProcessChatDeltaReasoning("<|channel>")
205+
Expect(d1).To(BeEmpty(), "start tag prefix should be buffered, not emitted")
206+
207+
d2 := ext.ProcessChatDeltaReasoning("thought")
208+
Expect(d2).To(BeEmpty(), "start tag suffix should be buffered, not emitted")
209+
210+
d3 := ext.ProcessChatDeltaReasoning("\n")
211+
Expect(d3).To(BeEmpty(), "newline after start tag should not emit yet")
212+
213+
d4 := ext.ProcessChatDeltaReasoning("The")
214+
Expect(d4).To(Equal("The"))
215+
216+
d5 := ext.ProcessChatDeltaReasoning(" user")
217+
Expect(d5).To(Equal(" user"))
218+
219+
d6 := ext.ProcessChatDeltaReasoning(" asks")
220+
Expect(d6).To(Equal(" asks"))
221+
222+
// Trailing newline gets TrimSpaced by ExtractReasoning,
223+
// so it appears delayed with the next non-whitespace token
224+
d7 := ext.ProcessChatDeltaReasoning("\n")
225+
Expect(d7).To(BeEmpty(), "trailing newline is buffered by TrimSpace")
226+
227+
d8 := ext.ProcessChatDeltaReasoning("2+2=4")
228+
Expect(d8).To(Equal("\n2+2=4"), "delayed newline emitted with next content")
229+
230+
d9 := ext.ProcessChatDeltaReasoning("<channel|>")
231+
Expect(d9).To(BeEmpty(), "close tag should be consumed, not emitted")
232+
})
233+
234+
It("should handle empty deltas", func() {
235+
ext := NewReasoningExtractor("<|channel>thought", Config{})
236+
d := ext.ProcessChatDeltaReasoning("")
237+
Expect(d).To(BeEmpty())
238+
})
239+
240+
It("should pass through reasoning without tags unchanged", func() {
241+
ext := NewReasoningExtractor("<think>", Config{})
242+
243+
// When C++ autoparser already strips tags (e.g. <think> models),
244+
// reasoning arrives clean — just pass it through.
245+
d1 := ext.ProcessChatDeltaReasoning("I need to")
246+
Expect(d1).To(Equal("I need to"))
247+
248+
d2 := ext.ProcessChatDeltaReasoning(" think carefully")
249+
Expect(d2).To(Equal(" think carefully"))
250+
})
251+
252+
It("should strip <think> tags if C++ autoparser includes them", func() {
253+
ext := NewReasoningExtractor("<think>", Config{})
254+
255+
d1 := ext.ProcessChatDeltaReasoning("<think>")
256+
Expect(d1).To(BeEmpty())
257+
258+
d2 := ext.ProcessChatDeltaReasoning("reasoning")
259+
Expect(d2).To(Equal("reasoning"))
260+
261+
d3 := ext.ProcessChatDeltaReasoning("</think>")
262+
Expect(d3).To(BeEmpty())
263+
})
264+
265+
It("should respect suppressReasoning", func() {
266+
ext := NewReasoningExtractor("<|channel>thought", Config{})
267+
ext.ResetAndSuppressReasoning()
268+
269+
d := ext.ProcessChatDeltaReasoning("some reasoning")
270+
Expect(d).To(BeEmpty())
271+
})
272+
273+
It("should reset ChatDelta state on Reset", func() {
274+
ext := NewReasoningExtractor("<|channel>thought", Config{})
275+
276+
ext.ProcessChatDeltaReasoning("<|channel>thought")
277+
ext.ProcessChatDeltaReasoning("\nfirst reasoning")
278+
ext.Reset()
279+
280+
// After reset, should start fresh
281+
d := ext.ProcessChatDeltaReasoning("clean reasoning")
282+
Expect(d).To(Equal("clean reasoning"))
283+
})
284+
})
198285
})

0 commit comments

Comments
 (0)