Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Improvements

* `.stream()` and `.stream_async()` now emit `<thinking>` / `</thinking>` tag boundaries around thinking content in all stream modes. With `content="text"`, concatenating all chunks produces well-formed output with thinking delimited by a single tag pair. With `content="all"`, tag boundary strings are yielded alongside typed `ContentThinking` objects, so downstream consumers can detect thinking boundaries without type inspection. (#294, #297)
* `.stream()` and `.stream_async()` now handle thinking content differently by mode. With `content="text"`, thinking is suppressed entirely. With `content="all"`, thinking fragments are yielded as `ContentThinkingDelta` objects with a `phase` property (`"start"`, `"body"`, or `"end"`) that communicates block boundaries to downstream consumers without injecting synthetic strings into the stream. (#299, #297, #294)
* Updated default models across all providers to current generation: (#292)
* Anthropic: `claude-sonnet-4-6`
* Bedrock: `us.anthropic.claude-sonnet-4-6`
Expand Down
97 changes: 57 additions & 40 deletions chatlas/_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
ContentJson,
ContentText,
ContentThinking,
ContentThinkingDelta,
ContentToolRequest,
ContentToolResult,
ToolInfo,
Expand Down Expand Up @@ -1157,7 +1158,7 @@ def stream(
data_model: Optional[type[BaseModel]] = None,
kwargs: Optional[SubmitInputArgsT] = None,
) -> Generator[
str | ContentThinking | ContentToolRequest | ContentToolResult, None, None
str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None, None
]: ...

def stream(
Expand All @@ -1168,7 +1169,7 @@ def stream(
data_model: Optional[type[BaseModel]] = None,
kwargs: Optional[SubmitInputArgsT] = None,
) -> Generator[
str | ContentThinking | ContentToolRequest | ContentToolResult, None, None
str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None, None
]:
"""
Generate a response from the chat in a streaming fashion.
Expand Down Expand Up @@ -1233,7 +1234,7 @@ class Person(BaseModel):
)

def wrapper() -> Generator[
str | ContentThinking | ContentToolRequest | ContentToolResult, None, None
str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None, None
]:
with display:
for chunk in generator:
Expand All @@ -1260,7 +1261,7 @@ async def stream_async(
data_model: Optional[type[BaseModel]] = None,
kwargs: Optional[SubmitInputArgsT] = None,
) -> AsyncGenerator[
str | ContentThinking | ContentToolRequest | ContentToolResult, None
str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None
]: ...

async def stream_async(
Expand All @@ -1271,7 +1272,7 @@ async def stream_async(
data_model: Optional[type[BaseModel]] = None,
kwargs: Optional[SubmitInputArgsT] = None,
) -> AsyncGenerator[
str | ContentThinking | ContentToolRequest | ContentToolResult, None
str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None
]:
"""
Generate a response from the chat in a streaming fashion asynchronously.
Expand Down Expand Up @@ -1332,7 +1333,7 @@ class Person(BaseModel):
display = self._markdown_display(echo=echo)

async def wrapper() -> AsyncGenerator[
str | ContentThinking | ContentToolRequest | ContentToolResult, None
str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None
]:
with display:
async for chunk in self._chat_impl_async(
Expand Down Expand Up @@ -2494,7 +2495,7 @@ def _chat_impl(
kwargs: Optional[SubmitInputArgsT] = None,
data_model: Optional[type[BaseModel]] = None,
) -> Generator[
str | ContentThinking | ContentToolRequest | ContentToolResult, None, None
str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None, None
]: ...

def _chat_impl(
Expand Down Expand Up @@ -2564,7 +2565,7 @@ def _chat_impl_async(
kwargs: Optional[SubmitInputArgsT] = None,
data_model: Optional[type[BaseModel]] = None,
) -> AsyncGenerator[
str | ContentThinking | ContentToolRequest | ContentToolResult, None
str | ContentThinkingDelta | ContentToolRequest | ContentToolResult, None
]: ...

async def _chat_impl_async(
Expand Down Expand Up @@ -2676,28 +2677,33 @@ def emit(text: str | Content):
for chunk in response:
content = self.provider.stream_content(chunk)
if content is not None:
text = content_text(content)
if text:
is_thinking = isinstance(content, ContentThinking)
if is_thinking and not inside_thinking:
emit("<thinking>\n")
yield "<thinking>\n"
inside_thinking = True
elif not is_thinking and inside_thinking:
emit("\n</thinking>\n\n")
yield "\n</thinking>\n\n"
inside_thinking = False

emit(text)
if content_mode == "all" and is_thinking:
if is_thinking_delta(content) and not inside_thinking:
content = ContentThinkingDelta(
thinking=content.thinking, phase="start"
)
emit("<thinking>\n")
inside_thinking = True
elif not is_thinking_delta(content) and inside_thinking:
emit("\n</thinking>\n\n")
if content_mode == "all":
yield ContentThinkingDelta(thinking="", phase="end")
inside_thinking = False

if is_thinking_delta(content):
emit(content.thinking)
if content_mode == "all":
yield content
else:
else:
text = content_text(content)
if text:
emit(text)
yield text
result = self.provider.stream_merge_chunks(result, chunk)

if inside_thinking:
emit("\n</thinking>\n\n")
yield "\n</thinking>\n\n"
if content_mode == "all":
yield ContentThinkingDelta(thinking="", phase="end")

turn = self.provider.stream_turn(
result,
Expand Down Expand Up @@ -2796,28 +2802,33 @@ def emit(text: str | Content):
async for chunk in response:
content = self.provider.stream_content(chunk)
if content is not None:
text = content_text(content)
if text:
is_thinking = isinstance(content, ContentThinking)
if is_thinking and not inside_thinking:
emit("<thinking>\n")
yield "<thinking>\n"
inside_thinking = True
elif not is_thinking and inside_thinking:
emit("\n</thinking>\n\n")
yield "\n</thinking>\n\n"
inside_thinking = False

emit(text)
if content_mode == "all" and is_thinking:
if is_thinking_delta(content) and not inside_thinking:
content = ContentThinkingDelta(
thinking=content.thinking, phase="start"
)
emit("<thinking>\n")
inside_thinking = True
elif not is_thinking_delta(content) and inside_thinking:
emit("\n</thinking>\n\n")
if content_mode == "all":
yield ContentThinkingDelta(thinking="", phase="end")
inside_thinking = False

if is_thinking_delta(content):
emit(content.thinking)
if content_mode == "all":
yield content
else:
else:
text = content_text(content)
if text:
emit(text)
yield text
result = self.provider.stream_merge_chunks(result, chunk)

if inside_thinking:
emit("\n</thinking>\n\n")
yield "\n</thinking>\n\n"
if content_mode == "all":
yield ContentThinkingDelta(thinking="", phase="end")

turn = self.provider.stream_turn(
result,
Expand Down Expand Up @@ -3292,8 +3303,14 @@ class ToolFailureWarning(RuntimeWarning):
warnings.simplefilter("always", ToolFailureWarning)


def is_thinking_delta(content: Content) -> TypeGuard[ContentThinkingDelta]:
return isinstance(content, ContentThinkingDelta)


def content_text(content: Content) -> str:
"""Extract displayable text from a Content object."""
if isinstance(content, ContentThinkingDelta):
return content.thinking
if isinstance(content, ContentThinking):
return content.thinking
if isinstance(content, ContentText):
Expand Down
37 changes: 26 additions & 11 deletions chatlas/_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
BaseModel,
ConfigDict,
Field,
PrivateAttr,
field_serializer,
field_validator,
)
Expand Down Expand Up @@ -139,6 +138,7 @@ def from_tool(cls, tool: "Tool | ToolBuiltIn") -> "ToolInfo":
"json",
"pdf",
"thinking",
"thinking_delta",
"web_search_request",
"web_search_results",
"web_fetch_request",
Expand Down Expand Up @@ -631,20 +631,11 @@ class ContentThinking(Content):

thinking: str
extra: Optional[dict[str, Any]] = None
_complete: bool = PrivateAttr(default=True)

content_type: ContentTypeEnum = "thinking"

@classmethod
def _as_chunk(cls, thinking: str) -> "ContentThinking":
obj = cls.model_construct(thinking=thinking, content_type="thinking")
obj._complete = False
return obj

def __str__(self):
if self._complete:
return f"<thinking>\n{self.thinking}\n</thinking>\n"
return self.thinking
return f"<thinking>\n{self.thinking}\n</thinking>\n"

def _repr_html_(self):
return str(self.tagify())
Expand All @@ -663,6 +654,30 @@ def tagify(self):
return HTML(html)


class ContentThinkingDelta(Content):
"""
A streaming fragment of thinking/reasoning content.

Emitted during streaming to represent a chunk of the model's thinking.
The ``phase`` attribute communicates block boundaries to downstream consumers.

Parameters
----------
thinking
The thinking/reasoning text fragment.
phase
The phase of the thinking delta: ``"start"``, ``"body"``, or ``"end"``.
"""

thinking: str
phase: Literal["start", "body", "end"] = "body"

content_type: ContentTypeEnum = "thinking_delta"

Comment thread
cpsievert marked this conversation as resolved.
def __str__(self):
return self.thinking


class ContentToolRequestSearch(Content):
"""
A web search request from the model.
Expand Down
3 changes: 2 additions & 1 deletion chatlas/_provider_anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
ContentPDF,
ContentText,
ContentThinking,
ContentThinkingDelta,
ContentToolRequest,
ContentToolRequestFetch,
ContentToolRequestSearch,
Expand Down Expand Up @@ -468,7 +469,7 @@ def stream_content(self, chunk) -> Optional[Content]:
if chunk.delta.type == "text_delta":
return ContentText.model_construct(text=chunk.delta.text)
if chunk.delta.type == "thinking_delta":
return ContentThinking._as_chunk(chunk.delta.thinking)
return ContentThinkingDelta(thinking=chunk.delta.thinking)
return None

def stream_merge_chunks(self, completion, chunk):
Expand Down
3 changes: 2 additions & 1 deletion chatlas/_provider_google.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
ContentPDF,
ContentText,
ContentThinking,
ContentThinkingDelta,
ContentToolRequest,
ContentToolResult,
)
Expand Down Expand Up @@ -377,7 +378,7 @@ def stream_content(self, chunk) -> Optional[Content]:
if text is None:
return None
if getattr(part, "thought", None):
return ContentThinking._as_chunk(text)
return ContentThinkingDelta(thinking=text)
return ContentText.model_construct(text=text)

def stream_merge_chunks(self, completion, chunk):
Expand Down
3 changes: 2 additions & 1 deletion chatlas/_provider_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
ContentPDF,
ContentText,
ContentThinking,
ContentThinkingDelta,
ContentToolRequest,
ContentToolRequestSearch,
ContentToolResult,
Expand Down Expand Up @@ -298,7 +299,7 @@ def stream_content(self, chunk) -> Optional[Content]:
return ContentText.model_construct(text=chunk.delta)
if chunk.type == "response.reasoning_summary_text.delta":
# https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/delta
return ContentThinking._as_chunk(chunk.delta)
return ContentThinkingDelta(thinking=chunk.delta)
if chunk.type == "response.reasoning_summary_text.done":
# The thinking→text transition in _submit_turns already emits
# "\n</thinking>\n\n" which provides the visual separator.
Expand Down
4 changes: 4 additions & 0 deletions chatlas/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
ContentImageRemote,
ContentJson,
ContentText,
ContentThinking,
ContentThinkingDelta,
ContentToolRequest,
ContentToolRequestFetch,
ContentToolRequestSearch,
Expand All @@ -32,6 +34,8 @@
"ContentImageRemote",
"ContentJson",
"ContentText",
"ContentThinking",
"ContentThinkingDelta",
"ContentToolRequest",
"ContentToolResult",
"ContentToolRequestFetch",
Expand Down
2 changes: 2 additions & 0 deletions docs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ quartodoc:
- types.ContentImageRemote
- types.ContentJson
- types.ContentText
- types.ContentThinking
- types.ContentThinkingDelta
- types.ContentToolRequest
- types.ContentToolResult
- types.ContentToolRequestSearch
Expand Down
Loading
Loading