From 1310d84b8676a4cc4bc6d3d21174099dea0dda64 Mon Sep 17 00:00:00 2001 From: Carson Date: Thu, 2 Apr 2026 10:39:37 -0500 Subject: [PATCH 1/9] chore: add .worktrees/ to .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index f346d70d..b7a00880 100644 --- a/.gitignore +++ b/.gitignore @@ -11,5 +11,8 @@ logs/ /.luarc.json +# worktrees +.worktrees/ + # setuptools_scm chatlas/_version.py From 9919b3480e0503bf2c92fd537c811652c41ebe08 Mon Sep 17 00:00:00 2001 From: Carson Date: Thu, 2 Apr 2026 10:44:00 -0500 Subject: [PATCH 2/9] refactor: add abstract stream_content() to Provider, make stream_text() concrete Co-Authored-By: Claude Opus 4.6 --- chatlas/_provider.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/chatlas/_provider.py b/chatlas/_provider.py index 5962e77c..b92681b9 100644 --- a/chatlas/_provider.py +++ b/chatlas/_provider.py @@ -15,7 +15,7 @@ from pydantic import BaseModel -from ._content import Content +from ._content import Content, ContentText, ContentThinking from ._tools import Tool, ToolBuiltIn from ._turn import AssistantTurn, Turn from ._typing_extensions import NotRequired, TypedDict @@ -226,7 +226,20 @@ async def chat_perform_async( ) -> AsyncIterable[ChatCompletionChunkT] | ChatCompletionT: ... @abstractmethod - def stream_text(self, chunk: ChatCompletionChunkT) -> Optional[str]: ... + def stream_content( + self, chunk: ChatCompletionChunkT + ) -> Optional["Content"]: + ... + + def stream_text(self, chunk: ChatCompletionChunkT) -> Optional[str]: + content = self.stream_content(chunk) + if content is None: + return None + if isinstance(content, ContentThinking): + return content.thinking + if isinstance(content, ContentText): + return content.text + return str(content) @abstractmethod def stream_merge_chunks( From 3570a8ae559dc0349af8208a88890a8ca1120a49 Mon Sep 17 00:00:00 2001 From: Carson Date: Thu, 2 Apr 2026 10:45:53 -0500 Subject: [PATCH 3/9] refactor: implement stream_content() on all providers Replace stream_text() with stream_content() returning Content objects (ContentText/ContentThinking) instead of raw strings on all five provider implementations: Anthropic, OpenAI, Google, OpenAI Completions, and Snowflake. Co-Authored-By: Claude Opus 4.6 --- chatlas/_provider_anthropic.py | 6 +++--- chatlas/_provider_google.py | 17 +++++++++++++---- chatlas/_provider_openai.py | 9 +++------ chatlas/_provider_openai_completions.py | 7 +++++-- chatlas/_provider_snowflake.py | 4 ++-- 5 files changed, 26 insertions(+), 17 deletions(-) diff --git a/chatlas/_provider_anthropic.py b/chatlas/_provider_anthropic.py index fca0c871..63477f53 100644 --- a/chatlas/_provider_anthropic.py +++ b/chatlas/_provider_anthropic.py @@ -463,12 +463,12 @@ def _structured_tool_call(**kwargs: Any): return kwargs_full - def stream_text(self, chunk) -> Optional[str]: + def stream_content(self, chunk) -> Optional[Content]: if chunk.type == "content_block_delta": if chunk.delta.type == "text_delta": - return chunk.delta.text + return ContentText(text=chunk.delta.text) if chunk.delta.type == "thinking_delta": - return chunk.delta.thinking + return ContentThinking(thinking=chunk.delta.thinking) return None def stream_merge_chunks(self, completion, chunk): diff --git a/chatlas/_provider_google.py b/chatlas/_provider_google.py index df5492b0..1419340d 100644 --- a/chatlas/_provider_google.py +++ b/chatlas/_provider_google.py @@ -14,6 +14,7 @@ ContentJson, ContentPDF, ContentText, + ContentThinking, ContentToolRequest, ContentToolResult, ) @@ -361,12 +362,20 @@ def _chat_perform_args( return kwargs_full - def stream_text(self, chunk) -> Optional[str]: + def stream_content(self, chunk) -> Optional[Content]: try: - # Errors if there is no text (e.g., tool request) - return chunk.text - except Exception: + parts = chunk.candidates[0].content.parts + except (AttributeError, IndexError): return None + if not parts: + return None + part = parts[0] + text = getattr(part, "text", None) + if text is None: + return None + if getattr(part, "thought", None): + return ContentThinking(thinking=text) + return ContentText(text=text) def stream_merge_chunks(self, completion, chunk): chunkd = chunk.model_dump() diff --git a/chatlas/_provider_openai.py b/chatlas/_provider_openai.py index 6623bd9a..0ff27e23 100644 --- a/chatlas/_provider_openai.py +++ b/chatlas/_provider_openai.py @@ -292,16 +292,13 @@ def _chat_perform_args( return kwargs_full - def stream_text(self, chunk): + def stream_content(self, chunk) -> Optional[Content]: if chunk.type == "response.output_text.delta": # https://platform.openai.com/docs/api-reference/responses-streaming/response/output_text/delta - return chunk.delta + return ContentText(text=chunk.delta) if chunk.type == "response.reasoning_summary_text.delta": # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/delta - return chunk.delta - if chunk.type == "response.reasoning_summary_text.done": - # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/done - return "\n\n" + return ContentThinking(thinking=chunk.delta) return None def stream_merge_chunks(self, completion, chunk): diff --git a/chatlas/_provider_openai_completions.py b/chatlas/_provider_openai_completions.py index 1b7c66dc..5937222a 100644 --- a/chatlas/_provider_openai_completions.py +++ b/chatlas/_provider_openai_completions.py @@ -192,10 +192,13 @@ def _chat_perform_args( return kwargs_full - def stream_text(self, chunk): + def stream_content(self, chunk) -> Optional[Content]: if not chunk.choices: return None - return chunk.choices[0].delta.content + text = chunk.choices[0].delta.content + if text is None: + return None + return ContentText(text=text) def stream_merge_chunks(self, completion, chunk): chunkd = chunk.model_dump() diff --git a/chatlas/_provider_snowflake.py b/chatlas/_provider_snowflake.py index 2f2c1d58..0d6a8b83 100644 --- a/chatlas/_provider_snowflake.py +++ b/chatlas/_provider_snowflake.py @@ -356,13 +356,13 @@ def _complete_request( return req - def stream_text(self, chunk): + def stream_content(self, chunk) -> Optional[Content]: if not chunk.choices: return None delta = chunk.choices[0].delta if delta is None or "content" not in delta: return None - return delta["content"] + return ContentText(text=delta["content"]) # Snowflake sort-of follows OpenAI/Anthropic streaming formats except they # don't have the critical "index" field in the delta that the merge logic From 065fd1765540ae5f0ffc1cafc1cae54ff696fb63 Mon Sep 17 00:00:00 2001 From: Carson Date: Thu, 2 Apr 2026 10:52:54 -0500 Subject: [PATCH 4/9] refactor: use stream_content() in _submit_turns, yield ContentThinking for content='all' - Add content_text() helper to extract displayable text from Content objects - Thread content_mode parameter from _chat_impl to _submit_turns - Use stream_content() instead of stream_text() in streaming loops - Yield ContentThinking objects when content="all" mode is active - Use model_construct() for streaming ContentText to avoid the whitespace-to-"[empty string]" validator corrupting raw chunks Co-Authored-By: Claude Opus 4.6 --- chatlas/_chat.py | 48 +++++++++++++++++++------ chatlas/_provider_anthropic.py | 2 +- chatlas/_provider_google.py | 2 +- chatlas/_provider_openai.py | 2 +- chatlas/_provider_openai_completions.py | 2 +- chatlas/_provider_snowflake.py | 2 +- 6 files changed, 43 insertions(+), 15 deletions(-) diff --git a/chatlas/_chat.py b/chatlas/_chat.py index 709f5bc0..d17bb75c 100644 --- a/chatlas/_chat.py +++ b/chatlas/_chat.py @@ -34,6 +34,7 @@ Content, ContentJson, ContentText, + ContentThinking, ContentToolRequest, ContentToolResult, ToolInfo, @@ -2500,6 +2501,7 @@ def _chat_impl( stream=stream, data_model=data_model, kwargs=kwargs, + content_mode=content, ): yield chunk @@ -2567,6 +2569,7 @@ async def _chat_impl_async( stream=stream, data_model=data_model, kwargs=kwargs, + content_mode=content, ): yield chunk @@ -2604,7 +2607,8 @@ def _submit_turns( stream: bool, data_model: type[BaseModel] | None = None, kwargs: Optional[SubmitInputArgsT] = None, - ) -> Generator[str, None, None]: + content_mode: Literal["text", "all"] = "text", + ) -> Generator[str | Content, None, None]: if any(isinstance(x, Tool) and x._is_async for x in self._tools.values()): raise ValueError("Cannot use async tools in a synchronous chat") @@ -2630,10 +2634,17 @@ def emit(text: str | Content): result = None for chunk in response: - text = self.provider.stream_text(chunk) - if text: - emit(text) - yield text + content = self.provider.stream_content(chunk) + if content is not None: + text = content_text(content) + if text: + emit(text) + if content_mode == "all" and isinstance( + content, ContentThinking + ): + yield content + else: + yield text result = self.provider.stream_merge_chunks(result, chunk) turn = self.provider.stream_turn( @@ -2682,7 +2693,8 @@ async def _submit_turns_async( stream: bool, data_model: type[BaseModel] | None = None, kwargs: Optional[SubmitInputArgsT] = None, - ) -> AsyncGenerator[str, None]: + content_mode: Literal["text", "all"] = "text", + ) -> AsyncGenerator[str | Content, None]: def emit(text: str | Content): self._echo_content(str(text)) @@ -2705,10 +2717,17 @@ def emit(text: str | Content): result = None async for chunk in response: - text = self.provider.stream_text(chunk) - if text: - emit(text) - yield text + content = self.provider.stream_content(chunk) + if content is not None: + text = content_text(content) + if text: + emit(text) + if content_mode == "all" and isinstance( + content, ContentThinking + ): + yield content + else: + yield text result = self.provider.stream_merge_chunks(result, chunk) turn = self.provider.stream_turn( @@ -3184,6 +3203,15 @@ class ToolFailureWarning(RuntimeWarning): warnings.simplefilter("always", ToolFailureWarning) +def content_text(content: Content) -> str: + """Extract displayable text from a Content object.""" + if isinstance(content, ContentThinking): + return content.thinking + if isinstance(content, ContentText): + return content.text + return str(content) + + def is_quarto(): return os.getenv("QUARTO_PYTHON", None) is not None diff --git a/chatlas/_provider_anthropic.py b/chatlas/_provider_anthropic.py index 63477f53..d4b26b55 100644 --- a/chatlas/_provider_anthropic.py +++ b/chatlas/_provider_anthropic.py @@ -466,7 +466,7 @@ def _structured_tool_call(**kwargs: Any): def stream_content(self, chunk) -> Optional[Content]: if chunk.type == "content_block_delta": if chunk.delta.type == "text_delta": - return ContentText(text=chunk.delta.text) + return ContentText.model_construct(text=chunk.delta.text) if chunk.delta.type == "thinking_delta": return ContentThinking(thinking=chunk.delta.thinking) return None diff --git a/chatlas/_provider_google.py b/chatlas/_provider_google.py index 1419340d..38bcf59f 100644 --- a/chatlas/_provider_google.py +++ b/chatlas/_provider_google.py @@ -375,7 +375,7 @@ def stream_content(self, chunk) -> Optional[Content]: return None if getattr(part, "thought", None): return ContentThinking(thinking=text) - return ContentText(text=text) + return ContentText.model_construct(text=text) def stream_merge_chunks(self, completion, chunk): chunkd = chunk.model_dump() diff --git a/chatlas/_provider_openai.py b/chatlas/_provider_openai.py index 0ff27e23..d32225ca 100644 --- a/chatlas/_provider_openai.py +++ b/chatlas/_provider_openai.py @@ -295,7 +295,7 @@ def _chat_perform_args( def stream_content(self, chunk) -> Optional[Content]: if chunk.type == "response.output_text.delta": # https://platform.openai.com/docs/api-reference/responses-streaming/response/output_text/delta - return ContentText(text=chunk.delta) + return ContentText.model_construct(text=chunk.delta) if chunk.type == "response.reasoning_summary_text.delta": # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/delta return ContentThinking(thinking=chunk.delta) diff --git a/chatlas/_provider_openai_completions.py b/chatlas/_provider_openai_completions.py index 5937222a..064042a2 100644 --- a/chatlas/_provider_openai_completions.py +++ b/chatlas/_provider_openai_completions.py @@ -198,7 +198,7 @@ def stream_content(self, chunk) -> Optional[Content]: text = chunk.choices[0].delta.content if text is None: return None - return ContentText(text=text) + return ContentText.model_construct(text=text) def stream_merge_chunks(self, completion, chunk): chunkd = chunk.model_dump() diff --git a/chatlas/_provider_snowflake.py b/chatlas/_provider_snowflake.py index 0d6a8b83..cc22dc5a 100644 --- a/chatlas/_provider_snowflake.py +++ b/chatlas/_provider_snowflake.py @@ -362,7 +362,7 @@ def stream_content(self, chunk) -> Optional[Content]: delta = chunk.choices[0].delta if delta is None or "content" not in delta: return None - return ContentText(text=delta["content"]) + return ContentText.model_construct(text=delta["content"]) # Snowflake sort-of follows OpenAI/Anthropic streaming formats except they # don't have the critical "index" field in the delta that the merge logic From 7c94903cea703cbd5dba349a020edc5e16c01c09 Mon Sep 17 00:00:00 2001 From: Carson Date: Thu, 2 Apr 2026 10:57:02 -0500 Subject: [PATCH 5/9] style: auto-format with ruff Co-Authored-By: Claude Opus 4.6 --- chatlas/_chat.py | 9 ++++++--- chatlas/_content_expand.py | 3 ++- chatlas/_parallel.py | 2 ++ chatlas/_provider.py | 5 +---- chatlas/_provider_anthropic.py | 4 +--- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/chatlas/_chat.py b/chatlas/_chat.py index d17bb75c..7f2dca51 100644 --- a/chatlas/_chat.py +++ b/chatlas/_chat.py @@ -1310,9 +1310,12 @@ class Person(BaseModel): chat = ChatOpenAI() - chunks = [chunk async for chunk in await chat.stream_async( - "John is 25 years old", data_model=Person - )] + chunks = [ + chunk + async for chunk in await chat.stream_async( + "John is 25 years old", data_model=Person + ) + ] person = Person.model_validate_json("".join(chunks)) ``` """ diff --git a/chatlas/_content_expand.py b/chatlas/_content_expand.py index b2f64e07..09cd972c 100644 --- a/chatlas/_content_expand.py +++ b/chatlas/_content_expand.py @@ -50,7 +50,8 @@ def expand_tool_result(content: ContentToolResult) -> list[ContentUnion]: def expand_tool_value( - request: ContentToolRequest, value: ContentImageInline | ContentImageRemote | ContentPDF + request: ContentToolRequest, + value: ContentImageInline | ContentImageRemote | ContentPDF, ) -> list[ContentUnion]: open_tag = f'' diff --git a/chatlas/_parallel.py b/chatlas/_parallel.py index 979938ef..1dbff03a 100644 --- a/chatlas/_parallel.py +++ b/chatlas/_parallel.py @@ -38,6 +38,7 @@ ChatT = TypeVar("ChatT", bound=Chat) BaseModelT = TypeVar("BaseModelT", bound=BaseModel) + @dataclass class StructuredChatResult(Generic[BaseModelT, ChatT]): """Holds the result of a structured parallel chat request.""" @@ -396,6 +397,7 @@ async def parallel_chat_structured( import chatlas as ctl from pydantic import BaseModel + class Person(BaseModel): name: str age: int diff --git a/chatlas/_provider.py b/chatlas/_provider.py index b92681b9..b3e8d09a 100644 --- a/chatlas/_provider.py +++ b/chatlas/_provider.py @@ -226,10 +226,7 @@ async def chat_perform_async( ) -> AsyncIterable[ChatCompletionChunkT] | ChatCompletionT: ... @abstractmethod - def stream_content( - self, chunk: ChatCompletionChunkT - ) -> Optional["Content"]: - ... + def stream_content(self, chunk: ChatCompletionChunkT) -> Optional["Content"]: ... def stream_text(self, chunk: ChatCompletionChunkT) -> Optional[str]: content = self.stream_content(chunk) diff --git a/chatlas/_provider_anthropic.py b/chatlas/_provider_anthropic.py index d4b26b55..4a0d746d 100644 --- a/chatlas/_provider_anthropic.py +++ b/chatlas/_provider_anthropic.py @@ -830,9 +830,7 @@ def _as_turn(self, completion: Message, has_data_model=False) -> AssistantTurn: extra = { "type": content.type, "tool_use_id": content.tool_use_id, - "content": [ - x.model_dump() for x in content.content - ] + "content": [x.model_dump() for x in content.content] if isinstance(content.content, list) else content.content.model_dump(), } From c20438a1ffb904e595cd96c8b1390e24d1a05065 Mon Sep 17 00:00:00 2001 From: Carson Date: Thu, 2 Apr 2026 12:02:36 -0500 Subject: [PATCH 6/9] feat: add ContentThinking to Google _as_turn() and CHANGELOG entry Handle thinking parts in non-streaming Google responses by checking part.get("thought") and emitting ContentThinking. Also add CHANGELOG entry for the stream_content() refactor. Co-Authored-By: Claude Opus 4.6 --- CHANGELOG.md | 4 ++++ chatlas/_provider_google.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 72c8f453..7a04cee3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [UNRELEASED] +### New features + +* Providers now distinguish text from thinking content during streaming via the new `stream_content()` method. This allows downstream packages like shinychat to provide specific UI for thinking content. (#276) + ### Bug fixes * Fixed tool calling with Google thinking models (e.g., `gemini-3-flash-preview`) failing with a 400 `INVALID_ARGUMENT` error about a missing `thought_signature`. The signature is now preserved and forwarded in subsequent turns. (#274) diff --git a/chatlas/_provider_google.py b/chatlas/_provider_google.py index 38bcf59f..f382f82f 100644 --- a/chatlas/_provider_google.py +++ b/chatlas/_provider_google.py @@ -563,6 +563,8 @@ def _as_turn( if text: if has_data_model: contents.append(ContentJson(value=orjson.loads(text))) + elif part.get("thought"): + contents.append(ContentThinking(thinking=text)) else: contents.append(ContentText(text=text)) function_call = part.get("function_call") From c3877c805269a2993c27f7087ef607641e97d180 Mon Sep 17 00:00:00 2001 From: Carson Date: Thu, 2 Apr 2026 12:25:04 -0500 Subject: [PATCH 7/9] fix: address code review feedback on PR #276 - Add ContentThinking to all content="all" overload return types so callers know they can receive thinking content during streaming - Restore reasoning_summary_text.done separator in OpenAI provider that was accidentally dropped during the stream_content() refactor - Revert unrelated formatting changes to _content_expand.py and _parallel.py to keep the PR focused Co-Authored-By: Claude Opus 4.6 --- chatlas/_chat.py | 20 ++++++++++---------- chatlas/_content_expand.py | 3 +-- chatlas/_parallel.py | 2 -- chatlas/_provider_openai.py | 4 ++++ 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/chatlas/_chat.py b/chatlas/_chat.py index 7f2dca51..5b5a633e 100644 --- a/chatlas/_chat.py +++ b/chatlas/_chat.py @@ -1156,7 +1156,7 @@ def stream( echo: EchoOptions = "none", data_model: Optional[type[BaseModel]] = None, kwargs: Optional[SubmitInputArgsT] = None, - ) -> Generator[str | ContentToolRequest | ContentToolResult, None, None]: ... + ) -> Generator[str | ContentThinking | ContentToolRequest | ContentToolResult, None, None]: ... def stream( self, @@ -1165,7 +1165,7 @@ def stream( echo: EchoOptions = "none", data_model: Optional[type[BaseModel]] = None, kwargs: Optional[SubmitInputArgsT] = None, - ) -> Generator[str | ContentToolRequest | ContentToolResult, None, None]: + ) -> Generator[str | ContentThinking | ContentToolRequest | ContentToolResult, None, None]: """ Generate a response from the chat in a streaming fashion. @@ -1229,7 +1229,7 @@ class Person(BaseModel): ) def wrapper() -> Generator[ - str | ContentToolRequest | ContentToolResult, None, None + str | ContentThinking | ContentToolRequest | ContentToolResult, None, None ]: with display: for chunk in generator: @@ -1255,7 +1255,7 @@ async def stream_async( echo: EchoOptions = "none", data_model: Optional[type[BaseModel]] = None, kwargs: Optional[SubmitInputArgsT] = None, - ) -> AsyncGenerator[str | ContentToolRequest | ContentToolResult, None]: ... + ) -> AsyncGenerator[str | ContentThinking | ContentToolRequest | ContentToolResult, None]: ... async def stream_async( self, @@ -1264,7 +1264,7 @@ async def stream_async( echo: EchoOptions = "none", data_model: Optional[type[BaseModel]] = None, kwargs: Optional[SubmitInputArgsT] = None, - ) -> AsyncGenerator[str | ContentToolRequest | ContentToolResult, None]: + ) -> AsyncGenerator[str | ContentThinking | ContentToolRequest | ContentToolResult, None]: """ Generate a response from the chat in a streaming fashion asynchronously. @@ -1324,7 +1324,7 @@ class Person(BaseModel): display = self._markdown_display(echo=echo) async def wrapper() -> AsyncGenerator[ - str | ContentToolRequest | ContentToolResult, None + str | ContentThinking | ContentToolRequest | ContentToolResult, None ]: with display: async for chunk in self._chat_impl_async( @@ -2485,7 +2485,7 @@ def _chat_impl( stream: bool, kwargs: Optional[SubmitInputArgsT] = None, data_model: Optional[type[BaseModel]] = None, - ) -> Generator[str | ContentToolRequest | ContentToolResult, None, None]: ... + ) -> Generator[str | ContentThinking | ContentToolRequest | ContentToolResult, None, None]: ... def _chat_impl( self, @@ -2495,7 +2495,7 @@ def _chat_impl( stream: bool, kwargs: Optional[SubmitInputArgsT] = None, data_model: Optional[type[BaseModel]] = None, - ) -> Generator[str | ContentToolRequest | ContentToolResult, None, None]: + ) -> Generator[str | Content, None, None]: user_turn_result: UserTurn | None = user_turn while user_turn_result is not None: for chunk in self._submit_turns( @@ -2553,7 +2553,7 @@ def _chat_impl_async( stream: bool, kwargs: Optional[SubmitInputArgsT] = None, data_model: Optional[type[BaseModel]] = None, - ) -> AsyncGenerator[str | ContentToolRequest | ContentToolResult, None]: ... + ) -> AsyncGenerator[str | ContentThinking | ContentToolRequest | ContentToolResult, None]: ... async def _chat_impl_async( self, @@ -2563,7 +2563,7 @@ async def _chat_impl_async( stream: bool, kwargs: Optional[SubmitInputArgsT] = None, data_model: Optional[type[BaseModel]] = None, - ) -> AsyncGenerator[str | ContentToolRequest | ContentToolResult, None]: + ) -> AsyncGenerator[str | Content, None]: user_turn_result: UserTurn | None = user_turn while user_turn_result is not None: async for chunk in self._submit_turns_async( diff --git a/chatlas/_content_expand.py b/chatlas/_content_expand.py index 09cd972c..b2f64e07 100644 --- a/chatlas/_content_expand.py +++ b/chatlas/_content_expand.py @@ -50,8 +50,7 @@ def expand_tool_result(content: ContentToolResult) -> list[ContentUnion]: def expand_tool_value( - request: ContentToolRequest, - value: ContentImageInline | ContentImageRemote | ContentPDF, + request: ContentToolRequest, value: ContentImageInline | ContentImageRemote | ContentPDF ) -> list[ContentUnion]: open_tag = f'' diff --git a/chatlas/_parallel.py b/chatlas/_parallel.py index 1dbff03a..979938ef 100644 --- a/chatlas/_parallel.py +++ b/chatlas/_parallel.py @@ -38,7 +38,6 @@ ChatT = TypeVar("ChatT", bound=Chat) BaseModelT = TypeVar("BaseModelT", bound=BaseModel) - @dataclass class StructuredChatResult(Generic[BaseModelT, ChatT]): """Holds the result of a structured parallel chat request.""" @@ -397,7 +396,6 @@ async def parallel_chat_structured( import chatlas as ctl from pydantic import BaseModel - class Person(BaseModel): name: str age: int diff --git a/chatlas/_provider_openai.py b/chatlas/_provider_openai.py index d32225ca..2384d837 100644 --- a/chatlas/_provider_openai.py +++ b/chatlas/_provider_openai.py @@ -299,6 +299,10 @@ def stream_content(self, chunk) -> Optional[Content]: if chunk.type == "response.reasoning_summary_text.delta": # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/delta return ContentThinking(thinking=chunk.delta) + if chunk.type == "response.reasoning_summary_text.done": + # Separator between reasoning summary and response text + # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/done + return ContentText.model_construct(text="\n\n") return None def stream_merge_chunks(self, completion, chunk): From afe70f5eee39c1aecafeb8904dc88ddddbbed7a6 Mon Sep 17 00:00:00 2001 From: Carson Date: Thu, 2 Apr 2026 12:28:43 -0500 Subject: [PATCH 8/9] docs: make changelog entry more user-facing Co-Authored-By: Claude Opus 4.6 --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a04cee3..897a5246 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### New features -* Providers now distinguish text from thinking content during streaming via the new `stream_content()` method. This allows downstream packages like shinychat to provide specific UI for thinking content. (#276) +* The `.stream()` and `.stream_async()` methods now yield `ContentThinking` objects (instead of plain strings) for thinking/reasoning content when `content="all"`. This allows downstream packages like shinychat to provide specific UI for thinking content. (#276) ### Bug fixes From 539f954c24fa3754933252f2549849a9077d71d1 Mon Sep 17 00:00:00 2001 From: Carson Date: Thu, 2 Apr 2026 12:39:22 -0500 Subject: [PATCH 9/9] fix: resolve pyright type errors in stream_content refactor - Add overloads to _submit_turns/_submit_turns_async so content_mode="text" narrows return type to Generator[str], fixing ChatResponse type mismatch - Fix Google provider stream_content() optional subscript error by guarding against None candidates before indexing Co-Authored-By: Claude Opus 4.6 --- chatlas/_chat.py | 46 +++++++++++++++++++++++++++++++++++++ chatlas/_provider_google.py | 9 +++++--- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/chatlas/_chat.py b/chatlas/_chat.py index 5b5a633e..cccc4941 100644 --- a/chatlas/_chat.py +++ b/chatlas/_chat.py @@ -2603,6 +2603,29 @@ async def _chat_impl_async( if all_results: user_turn_result = UserTurn(all_results) + @overload + def _submit_turns( + self, + user_turn: UserTurn, + echo: EchoOptions, + stream: bool, + data_model: type[BaseModel] | None = None, + kwargs: Optional[SubmitInputArgsT] = None, + content_mode: Literal["text"] = "text", + ) -> Generator[str, None, None]: ... + + @overload + def _submit_turns( + self, + user_turn: UserTurn, + echo: EchoOptions, + stream: bool, + data_model: type[BaseModel] | None = None, + kwargs: Optional[SubmitInputArgsT] = None, + *, + content_mode: Literal["all"], + ) -> Generator[str | Content, None, None]: ... + def _submit_turns( self, user_turn: UserTurn, @@ -2689,6 +2712,29 @@ def emit(text: str | Content): tokens_log(self.provider, turn.tokens) self._turns.extend([user_turn, turn]) + @overload + def _submit_turns_async( + self, + user_turn: UserTurn, + echo: EchoOptions, + stream: bool, + data_model: type[BaseModel] | None = None, + kwargs: Optional[SubmitInputArgsT] = None, + content_mode: Literal["text"] = "text", + ) -> AsyncGenerator[str, None]: ... + + @overload + def _submit_turns_async( + self, + user_turn: UserTurn, + echo: EchoOptions, + stream: bool, + data_model: type[BaseModel] | None = None, + kwargs: Optional[SubmitInputArgsT] = None, + *, + content_mode: Literal["all"], + ) -> AsyncGenerator[str | Content, None]: ... + async def _submit_turns_async( self, user_turn: UserTurn, diff --git a/chatlas/_provider_google.py b/chatlas/_provider_google.py index f382f82f..f512071e 100644 --- a/chatlas/_provider_google.py +++ b/chatlas/_provider_google.py @@ -363,10 +363,13 @@ def _chat_perform_args( return kwargs_full def stream_content(self, chunk) -> Optional[Content]: - try: - parts = chunk.candidates[0].content.parts - except (AttributeError, IndexError): + candidates = getattr(chunk, "candidates", None) + if not candidates: + return None + content = candidates[0].content + if content is None: return None + parts = content.parts if not parts: return None part = parts[0]