From 1310d84b8676a4cc4bc6d3d21174099dea0dda64 Mon Sep 17 00:00:00 2001
From: Carson <cpsievert1@gmail.com>
Date: Thu, 2 Apr 2026 10:39:37 -0500
Subject: [PATCH 1/9] chore: add .worktrees/ to .gitignore

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index f346d70d..b7a00880 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,5 +11,8 @@ logs/
 
 /.luarc.json
 
+# worktrees
+.worktrees/
+
 # setuptools_scm
 chatlas/_version.py

From 9919b3480e0503bf2c92fd537c811652c41ebe08 Mon Sep 17 00:00:00 2001
From: Carson <cpsievert1@gmail.com>
Date: Thu, 2 Apr 2026 10:44:00 -0500
Subject: [PATCH 2/9] refactor: add abstract stream_content() to Provider, make
 stream_text() concrete

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 chatlas/_provider.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/chatlas/_provider.py b/chatlas/_provider.py
index 5962e77c..b92681b9 100644
--- a/chatlas/_provider.py
+++ b/chatlas/_provider.py
@@ -15,7 +15,7 @@
 
 from pydantic import BaseModel
 
-from ._content import Content
+from ._content import Content, ContentText, ContentThinking
 from ._tools import Tool, ToolBuiltIn
 from ._turn import AssistantTurn, Turn
 from ._typing_extensions import NotRequired, TypedDict
@@ -226,7 +226,20 @@ async def chat_perform_async(
     ) -> AsyncIterable[ChatCompletionChunkT] | ChatCompletionT: ...
 
     @abstractmethod
-    def stream_text(self, chunk: ChatCompletionChunkT) -> Optional[str]: ...
+    def stream_content(
+        self, chunk: ChatCompletionChunkT
+    ) -> Optional["Content"]:
+        ...
+
+    def stream_text(self, chunk: ChatCompletionChunkT) -> Optional[str]:
+        content = self.stream_content(chunk)
+        if content is None:
+            return None
+        if isinstance(content, ContentThinking):
+            return content.thinking
+        if isinstance(content, ContentText):
+            return content.text
+        return str(content)
 
     @abstractmethod
     def stream_merge_chunks(

From 3570a8ae559dc0349af8208a88890a8ca1120a49 Mon Sep 17 00:00:00 2001
From: Carson <cpsievert1@gmail.com>
Date: Thu, 2 Apr 2026 10:45:53 -0500
Subject: [PATCH 3/9] refactor: implement stream_content() on all providers

Replace stream_text() with stream_content() returning Content objects
(ContentText/ContentThinking) instead of raw strings on all five
provider implementations: Anthropic, OpenAI, Google, OpenAI Completions,
and Snowflake.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 chatlas/_provider_anthropic.py          |  6 +++---
 chatlas/_provider_google.py             | 17 +++++++++++++----
 chatlas/_provider_openai.py             |  9 +++------
 chatlas/_provider_openai_completions.py |  7 +++++--
 chatlas/_provider_snowflake.py          |  4 ++--
 5 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/chatlas/_provider_anthropic.py b/chatlas/_provider_anthropic.py
index fca0c871..63477f53 100644
--- a/chatlas/_provider_anthropic.py
+++ b/chatlas/_provider_anthropic.py
@@ -463,12 +463,12 @@ def _structured_tool_call(**kwargs: Any):
 
         return kwargs_full
 
-    def stream_text(self, chunk) -> Optional[str]:
+    def stream_content(self, chunk) -> Optional[Content]:
         if chunk.type == "content_block_delta":
             if chunk.delta.type == "text_delta":
-                return chunk.delta.text
+                return ContentText(text=chunk.delta.text)
             if chunk.delta.type == "thinking_delta":
-                return chunk.delta.thinking
+                return ContentThinking(thinking=chunk.delta.thinking)
         return None
 
     def stream_merge_chunks(self, completion, chunk):
diff --git a/chatlas/_provider_google.py b/chatlas/_provider_google.py
index df5492b0..1419340d 100644
--- a/chatlas/_provider_google.py
+++ b/chatlas/_provider_google.py
@@ -14,6 +14,7 @@
     ContentJson,
     ContentPDF,
     ContentText,
+    ContentThinking,
     ContentToolRequest,
     ContentToolResult,
 )
@@ -361,12 +362,20 @@ def _chat_perform_args(
 
         return kwargs_full
 
-    def stream_text(self, chunk) -> Optional[str]:
+    def stream_content(self, chunk) -> Optional[Content]:
         try:
-            # Errors if there is no text (e.g., tool request)
-            return chunk.text
-        except Exception:
+            parts = chunk.candidates[0].content.parts
+        except (AttributeError, IndexError):
             return None
+        if not parts:
+            return None
+        part = parts[0]
+        text = getattr(part, "text", None)
+        if text is None:
+            return None
+        if getattr(part, "thought", None):
+            return ContentThinking(thinking=text)
+        return ContentText(text=text)
 
     def stream_merge_chunks(self, completion, chunk):
         chunkd = chunk.model_dump()
diff --git a/chatlas/_provider_openai.py b/chatlas/_provider_openai.py
index 6623bd9a..0ff27e23 100644
--- a/chatlas/_provider_openai.py
+++ b/chatlas/_provider_openai.py
@@ -292,16 +292,13 @@ def _chat_perform_args(
 
         return kwargs_full
 
-    def stream_text(self, chunk):
+    def stream_content(self, chunk) -> Optional[Content]:
         if chunk.type == "response.output_text.delta":
             # https://platform.openai.com/docs/api-reference/responses-streaming/response/output_text/delta
-            return chunk.delta
+            return ContentText(text=chunk.delta)
         if chunk.type == "response.reasoning_summary_text.delta":
             # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/delta
-            return chunk.delta
-        if chunk.type == "response.reasoning_summary_text.done":
-            # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/done
-            return "\n\n"
+            return ContentThinking(thinking=chunk.delta)
         return None
 
     def stream_merge_chunks(self, completion, chunk):
diff --git a/chatlas/_provider_openai_completions.py b/chatlas/_provider_openai_completions.py
index 1b7c66dc..5937222a 100644
--- a/chatlas/_provider_openai_completions.py
+++ b/chatlas/_provider_openai_completions.py
@@ -192,10 +192,13 @@ def _chat_perform_args(
 
         return kwargs_full
 
-    def stream_text(self, chunk):
+    def stream_content(self, chunk) -> Optional[Content]:
         if not chunk.choices:
             return None
-        return chunk.choices[0].delta.content
+        text = chunk.choices[0].delta.content
+        if text is None:
+            return None
+        return ContentText(text=text)
 
     def stream_merge_chunks(self, completion, chunk):
         chunkd = chunk.model_dump()
diff --git a/chatlas/_provider_snowflake.py b/chatlas/_provider_snowflake.py
index 2f2c1d58..0d6a8b83 100644
--- a/chatlas/_provider_snowflake.py
+++ b/chatlas/_provider_snowflake.py
@@ -356,13 +356,13 @@ def _complete_request(
 
         return req
 
-    def stream_text(self, chunk):
+    def stream_content(self, chunk) -> Optional[Content]:
         if not chunk.choices:
             return None
         delta = chunk.choices[0].delta
         if delta is None or "content" not in delta:
             return None
-        return delta["content"]
+        return ContentText(text=delta["content"])
 
     # Snowflake sort-of follows OpenAI/Anthropic streaming formats except they
     # don't have the critical "index" field in the delta that the merge logic

From 065fd1765540ae5f0ffc1cafc1cae54ff696fb63 Mon Sep 17 00:00:00 2001
From: Carson <cpsievert1@gmail.com>
Date: Thu, 2 Apr 2026 10:52:54 -0500
Subject: [PATCH 4/9] refactor: use stream_content() in _submit_turns, yield
 ContentThinking for content='all'

- Add content_text() helper to extract displayable text from Content objects
- Thread content_mode parameter from _chat_impl to _submit_turns
- Use stream_content() instead of stream_text() in streaming loops
- Yield ContentThinking objects when content="all" mode is active
- Use model_construct() for streaming ContentText to avoid the
  whitespace-to-"[empty string]" validator corrupting raw chunks

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 chatlas/_chat.py                        | 48 +++++++++++++++++++------
 chatlas/_provider_anthropic.py          |  2 +-
 chatlas/_provider_google.py             |  2 +-
 chatlas/_provider_openai.py             |  2 +-
 chatlas/_provider_openai_completions.py |  2 +-
 chatlas/_provider_snowflake.py          |  2 +-
 6 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/chatlas/_chat.py b/chatlas/_chat.py
index 709f5bc0..d17bb75c 100644
--- a/chatlas/_chat.py
+++ b/chatlas/_chat.py
@@ -34,6 +34,7 @@
     Content,
     ContentJson,
     ContentText,
+    ContentThinking,
     ContentToolRequest,
     ContentToolResult,
     ToolInfo,
@@ -2500,6 +2501,7 @@ def _chat_impl(
                 stream=stream,
                 data_model=data_model,
                 kwargs=kwargs,
+                content_mode=content,
             ):
                 yield chunk
 
@@ -2567,6 +2569,7 @@ async def _chat_impl_async(
                 stream=stream,
                 data_model=data_model,
                 kwargs=kwargs,
+                content_mode=content,
             ):
                 yield chunk
 
@@ -2604,7 +2607,8 @@ def _submit_turns(
         stream: bool,
         data_model: type[BaseModel] | None = None,
         kwargs: Optional[SubmitInputArgsT] = None,
-    ) -> Generator[str, None, None]:
+        content_mode: Literal["text", "all"] = "text",
+    ) -> Generator[str | Content, None, None]:
         if any(isinstance(x, Tool) and x._is_async for x in self._tools.values()):
             raise ValueError("Cannot use async tools in a synchronous chat")
 
@@ -2630,10 +2634,17 @@ def emit(text: str | Content):
 
             result = None
             for chunk in response:
-                text = self.provider.stream_text(chunk)
-                if text:
-                    emit(text)
-                    yield text
+                content = self.provider.stream_content(chunk)
+                if content is not None:
+                    text = content_text(content)
+                    if text:
+                        emit(text)
+                        if content_mode == "all" and isinstance(
+                            content, ContentThinking
+                        ):
+                            yield content
+                        else:
+                            yield text
                 result = self.provider.stream_merge_chunks(result, chunk)
 
             turn = self.provider.stream_turn(
@@ -2682,7 +2693,8 @@ async def _submit_turns_async(
         stream: bool,
         data_model: type[BaseModel] | None = None,
         kwargs: Optional[SubmitInputArgsT] = None,
-    ) -> AsyncGenerator[str, None]:
+        content_mode: Literal["text", "all"] = "text",
+    ) -> AsyncGenerator[str | Content, None]:
         def emit(text: str | Content):
             self._echo_content(str(text))
 
@@ -2705,10 +2717,17 @@ def emit(text: str | Content):
 
             result = None
             async for chunk in response:
-                text = self.provider.stream_text(chunk)
-                if text:
-                    emit(text)
-                    yield text
+                content = self.provider.stream_content(chunk)
+                if content is not None:
+                    text = content_text(content)
+                    if text:
+                        emit(text)
+                        if content_mode == "all" and isinstance(
+                            content, ContentThinking
+                        ):
+                            yield content
+                        else:
+                            yield text
                 result = self.provider.stream_merge_chunks(result, chunk)
 
             turn = self.provider.stream_turn(
@@ -3184,6 +3203,15 @@ class ToolFailureWarning(RuntimeWarning):
 warnings.simplefilter("always", ToolFailureWarning)
 
 
+def content_text(content: Content) -> str:
+    """Extract displayable text from a Content object."""
+    if isinstance(content, ContentThinking):
+        return content.thinking
+    if isinstance(content, ContentText):
+        return content.text
+    return str(content)
+
+
 def is_quarto():
     return os.getenv("QUARTO_PYTHON", None) is not None
 
diff --git a/chatlas/_provider_anthropic.py b/chatlas/_provider_anthropic.py
index 63477f53..d4b26b55 100644
--- a/chatlas/_provider_anthropic.py
+++ b/chatlas/_provider_anthropic.py
@@ -466,7 +466,7 @@ def _structured_tool_call(**kwargs: Any):
     def stream_content(self, chunk) -> Optional[Content]:
         if chunk.type == "content_block_delta":
             if chunk.delta.type == "text_delta":
-                return ContentText(text=chunk.delta.text)
+                return ContentText.model_construct(text=chunk.delta.text)
             if chunk.delta.type == "thinking_delta":
                 return ContentThinking(thinking=chunk.delta.thinking)
         return None
diff --git a/chatlas/_provider_google.py b/chatlas/_provider_google.py
index 1419340d..38bcf59f 100644
--- a/chatlas/_provider_google.py
+++ b/chatlas/_provider_google.py
@@ -375,7 +375,7 @@ def stream_content(self, chunk) -> Optional[Content]:
             return None
         if getattr(part, "thought", None):
             return ContentThinking(thinking=text)
-        return ContentText(text=text)
+        return ContentText.model_construct(text=text)
 
     def stream_merge_chunks(self, completion, chunk):
         chunkd = chunk.model_dump()
diff --git a/chatlas/_provider_openai.py b/chatlas/_provider_openai.py
index 0ff27e23..d32225ca 100644
--- a/chatlas/_provider_openai.py
+++ b/chatlas/_provider_openai.py
@@ -295,7 +295,7 @@ def _chat_perform_args(
     def stream_content(self, chunk) -> Optional[Content]:
         if chunk.type == "response.output_text.delta":
             # https://platform.openai.com/docs/api-reference/responses-streaming/response/output_text/delta
-            return ContentText(text=chunk.delta)
+            return ContentText.model_construct(text=chunk.delta)
         if chunk.type == "response.reasoning_summary_text.delta":
             # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/delta
             return ContentThinking(thinking=chunk.delta)
diff --git a/chatlas/_provider_openai_completions.py b/chatlas/_provider_openai_completions.py
index 5937222a..064042a2 100644
--- a/chatlas/_provider_openai_completions.py
+++ b/chatlas/_provider_openai_completions.py
@@ -198,7 +198,7 @@ def stream_content(self, chunk) -> Optional[Content]:
         text = chunk.choices[0].delta.content
         if text is None:
             return None
-        return ContentText(text=text)
+        return ContentText.model_construct(text=text)
 
     def stream_merge_chunks(self, completion, chunk):
         chunkd = chunk.model_dump()
diff --git a/chatlas/_provider_snowflake.py b/chatlas/_provider_snowflake.py
index 0d6a8b83..cc22dc5a 100644
--- a/chatlas/_provider_snowflake.py
+++ b/chatlas/_provider_snowflake.py
@@ -362,7 +362,7 @@ def stream_content(self, chunk) -> Optional[Content]:
         delta = chunk.choices[0].delta
         if delta is None or "content" not in delta:
             return None
-        return ContentText(text=delta["content"])
+        return ContentText.model_construct(text=delta["content"])
 
     # Snowflake sort-of follows OpenAI/Anthropic streaming formats except they
     # don't have the critical "index" field in the delta that the merge logic

From 7c94903cea703cbd5dba349a020edc5e16c01c09 Mon Sep 17 00:00:00 2001
From: Carson <cpsievert1@gmail.com>
Date: Thu, 2 Apr 2026 10:57:02 -0500
Subject: [PATCH 5/9] style: auto-format with ruff

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 chatlas/_chat.py               | 9 ++++++---
 chatlas/_content_expand.py     | 3 ++-
 chatlas/_parallel.py           | 2 ++
 chatlas/_provider.py           | 5 +----
 chatlas/_provider_anthropic.py | 4 +---
 5 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/chatlas/_chat.py b/chatlas/_chat.py
index d17bb75c..7f2dca51 100644
--- a/chatlas/_chat.py
+++ b/chatlas/_chat.py
@@ -1310,9 +1310,12 @@ class Person(BaseModel):
 
 
         chat = ChatOpenAI()
-        chunks = [chunk async for chunk in await chat.stream_async(
-            "John is 25 years old", data_model=Person
-        )]
+        chunks = [
+            chunk
+            async for chunk in await chat.stream_async(
+                "John is 25 years old", data_model=Person
+            )
+        ]
         person = Person.model_validate_json("".join(chunks))
         ```
         """
diff --git a/chatlas/_content_expand.py b/chatlas/_content_expand.py
index b2f64e07..09cd972c 100644
--- a/chatlas/_content_expand.py
+++ b/chatlas/_content_expand.py
@@ -50,7 +50,8 @@ def expand_tool_result(content: ContentToolResult) -> list[ContentUnion]:
 
 
 def expand_tool_value(
-    request: ContentToolRequest, value: ContentImageInline | ContentImageRemote | ContentPDF
+    request: ContentToolRequest,
+    value: ContentImageInline | ContentImageRemote | ContentPDF,
 ) -> list[ContentUnion]:
     open_tag = f'<tool-content call-id="{request.id}">'
 
diff --git a/chatlas/_parallel.py b/chatlas/_parallel.py
index 979938ef..1dbff03a 100644
--- a/chatlas/_parallel.py
+++ b/chatlas/_parallel.py
@@ -38,6 +38,7 @@
 ChatT = TypeVar("ChatT", bound=Chat)
 BaseModelT = TypeVar("BaseModelT", bound=BaseModel)
 
+
 @dataclass
 class StructuredChatResult(Generic[BaseModelT, ChatT]):
     """Holds the result of a structured parallel chat request."""
@@ -396,6 +397,7 @@ async def parallel_chat_structured(
     import chatlas as ctl
     from pydantic import BaseModel
 
+
     class Person(BaseModel):
         name: str
         age: int
diff --git a/chatlas/_provider.py b/chatlas/_provider.py
index b92681b9..b3e8d09a 100644
--- a/chatlas/_provider.py
+++ b/chatlas/_provider.py
@@ -226,10 +226,7 @@ async def chat_perform_async(
     ) -> AsyncIterable[ChatCompletionChunkT] | ChatCompletionT: ...
 
     @abstractmethod
-    def stream_content(
-        self, chunk: ChatCompletionChunkT
-    ) -> Optional["Content"]:
-        ...
+    def stream_content(self, chunk: ChatCompletionChunkT) -> Optional["Content"]: ...
 
     def stream_text(self, chunk: ChatCompletionChunkT) -> Optional[str]:
         content = self.stream_content(chunk)
diff --git a/chatlas/_provider_anthropic.py b/chatlas/_provider_anthropic.py
index d4b26b55..4a0d746d 100644
--- a/chatlas/_provider_anthropic.py
+++ b/chatlas/_provider_anthropic.py
@@ -830,9 +830,7 @@ def _as_turn(self, completion: Message, has_data_model=False) -> AssistantTurn:
                 extra = {
                     "type": content.type,
                     "tool_use_id": content.tool_use_id,
-                    "content": [
-                        x.model_dump() for x in content.content
-                    ]
+                    "content": [x.model_dump() for x in content.content]
                     if isinstance(content.content, list)
                     else content.content.model_dump(),
                 }

From c20438a1ffb904e595cd96c8b1390e24d1a05065 Mon Sep 17 00:00:00 2001
From: Carson <cpsievert1@gmail.com>
Date: Thu, 2 Apr 2026 12:02:36 -0500
Subject: [PATCH 6/9] feat: add ContentThinking to Google _as_turn() and
 CHANGELOG entry

Handle thinking parts in non-streaming Google responses by checking
part.get("thought") and emitting ContentThinking. Also add CHANGELOG
entry for the stream_content() refactor.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md                | 4 ++++
 chatlas/_provider_google.py | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 72c8f453..7a04cee3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [UNRELEASED]
 
+### New features
+
+* Providers now distinguish text from thinking content during streaming via the new `stream_content()` method. This allows downstream packages like shinychat to provide specific UI for thinking content. (#276)
+
 ### Bug fixes
 
 * Fixed tool calling with Google thinking models (e.g., `gemini-3-flash-preview`) failing with a 400 `INVALID_ARGUMENT` error about a missing `thought_signature`. The signature is now preserved and forwarded in subsequent turns. (#274)
diff --git a/chatlas/_provider_google.py b/chatlas/_provider_google.py
index 38bcf59f..f382f82f 100644
--- a/chatlas/_provider_google.py
+++ b/chatlas/_provider_google.py
@@ -563,6 +563,8 @@ def _as_turn(
             if text:
                 if has_data_model:
                     contents.append(ContentJson(value=orjson.loads(text)))
+                elif part.get("thought"):
+                    contents.append(ContentThinking(thinking=text))
                 else:
                     contents.append(ContentText(text=text))
             function_call = part.get("function_call")

From c3877c805269a2993c27f7087ef607641e97d180 Mon Sep 17 00:00:00 2001
From: Carson <cpsievert1@gmail.com>
Date: Thu, 2 Apr 2026 12:25:04 -0500
Subject: [PATCH 7/9] fix: address code review feedback on PR #276

- Add ContentThinking to all content="all" overload return types so
  callers know they can receive thinking content during streaming
- Restore reasoning_summary_text.done separator in OpenAI provider
  that was accidentally dropped during the stream_content() refactor
- Revert unrelated formatting changes to _content_expand.py and
  _parallel.py to keep the PR focused

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 chatlas/_chat.py            | 20 ++++++++++----------
 chatlas/_content_expand.py  |  3 +--
 chatlas/_parallel.py        |  2 --
 chatlas/_provider_openai.py |  4 ++++
 4 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/chatlas/_chat.py b/chatlas/_chat.py
index 7f2dca51..5b5a633e 100644
--- a/chatlas/_chat.py
+++ b/chatlas/_chat.py
@@ -1156,7 +1156,7 @@ def stream(
         echo: EchoOptions = "none",
         data_model: Optional[type[BaseModel]] = None,
         kwargs: Optional[SubmitInputArgsT] = None,
-    ) -> Generator[str | ContentToolRequest | ContentToolResult, None, None]: ...
+    ) -> Generator[str | ContentThinking | ContentToolRequest | ContentToolResult, None, None]: ...
 
     def stream(
         self,
@@ -1165,7 +1165,7 @@ def stream(
         echo: EchoOptions = "none",
         data_model: Optional[type[BaseModel]] = None,
         kwargs: Optional[SubmitInputArgsT] = None,
-    ) -> Generator[str | ContentToolRequest | ContentToolResult, None, None]:
+    ) -> Generator[str | ContentThinking | ContentToolRequest | ContentToolResult, None, None]:
         """
         Generate a response from the chat in a streaming fashion.
 
@@ -1229,7 +1229,7 @@ class Person(BaseModel):
         )
 
         def wrapper() -> Generator[
-            str | ContentToolRequest | ContentToolResult, None, None
+            str | ContentThinking | ContentToolRequest | ContentToolResult, None, None
         ]:
             with display:
                 for chunk in generator:
@@ -1255,7 +1255,7 @@ async def stream_async(
         echo: EchoOptions = "none",
         data_model: Optional[type[BaseModel]] = None,
         kwargs: Optional[SubmitInputArgsT] = None,
-    ) -> AsyncGenerator[str | ContentToolRequest | ContentToolResult, None]: ...
+    ) -> AsyncGenerator[str | ContentThinking | ContentToolRequest | ContentToolResult, None]: ...
 
     async def stream_async(
         self,
@@ -1264,7 +1264,7 @@ async def stream_async(
         echo: EchoOptions = "none",
         data_model: Optional[type[BaseModel]] = None,
         kwargs: Optional[SubmitInputArgsT] = None,
-    ) -> AsyncGenerator[str | ContentToolRequest | ContentToolResult, None]:
+    ) -> AsyncGenerator[str | ContentThinking | ContentToolRequest | ContentToolResult, None]:
         """
         Generate a response from the chat in a streaming fashion asynchronously.
 
@@ -1324,7 +1324,7 @@ class Person(BaseModel):
         display = self._markdown_display(echo=echo)
 
         async def wrapper() -> AsyncGenerator[
-            str | ContentToolRequest | ContentToolResult, None
+            str | ContentThinking | ContentToolRequest | ContentToolResult, None
         ]:
             with display:
                 async for chunk in self._chat_impl_async(
@@ -2485,7 +2485,7 @@ def _chat_impl(
         stream: bool,
         kwargs: Optional[SubmitInputArgsT] = None,
         data_model: Optional[type[BaseModel]] = None,
-    ) -> Generator[str | ContentToolRequest | ContentToolResult, None, None]: ...
+    ) -> Generator[str | ContentThinking | ContentToolRequest | ContentToolResult, None, None]: ...
 
     def _chat_impl(
         self,
@@ -2495,7 +2495,7 @@ def _chat_impl(
         stream: bool,
         kwargs: Optional[SubmitInputArgsT] = None,
         data_model: Optional[type[BaseModel]] = None,
-    ) -> Generator[str | ContentToolRequest | ContentToolResult, None, None]:
+    ) -> Generator[str | Content, None, None]:
         user_turn_result: UserTurn | None = user_turn
         while user_turn_result is not None:
             for chunk in self._submit_turns(
@@ -2553,7 +2553,7 @@ def _chat_impl_async(
         stream: bool,
         kwargs: Optional[SubmitInputArgsT] = None,
         data_model: Optional[type[BaseModel]] = None,
-    ) -> AsyncGenerator[str | ContentToolRequest | ContentToolResult, None]: ...
+    ) -> AsyncGenerator[str | ContentThinking | ContentToolRequest | ContentToolResult, None]: ...
 
     async def _chat_impl_async(
         self,
@@ -2563,7 +2563,7 @@ async def _chat_impl_async(
         stream: bool,
         kwargs: Optional[SubmitInputArgsT] = None,
         data_model: Optional[type[BaseModel]] = None,
-    ) -> AsyncGenerator[str | ContentToolRequest | ContentToolResult, None]:
+    ) -> AsyncGenerator[str | Content, None]:
         user_turn_result: UserTurn | None = user_turn
         while user_turn_result is not None:
             async for chunk in self._submit_turns_async(
diff --git a/chatlas/_content_expand.py b/chatlas/_content_expand.py
index 09cd972c..b2f64e07 100644
--- a/chatlas/_content_expand.py
+++ b/chatlas/_content_expand.py
@@ -50,8 +50,7 @@ def expand_tool_result(content: ContentToolResult) -> list[ContentUnion]:
 
 
 def expand_tool_value(
-    request: ContentToolRequest,
-    value: ContentImageInline | ContentImageRemote | ContentPDF,
+    request: ContentToolRequest, value: ContentImageInline | ContentImageRemote | ContentPDF
 ) -> list[ContentUnion]:
     open_tag = f'<tool-content call-id="{request.id}">'
 
diff --git a/chatlas/_parallel.py b/chatlas/_parallel.py
index 1dbff03a..979938ef 100644
--- a/chatlas/_parallel.py
+++ b/chatlas/_parallel.py
@@ -38,7 +38,6 @@
 ChatT = TypeVar("ChatT", bound=Chat)
 BaseModelT = TypeVar("BaseModelT", bound=BaseModel)
 
-
 @dataclass
 class StructuredChatResult(Generic[BaseModelT, ChatT]):
     """Holds the result of a structured parallel chat request."""
@@ -397,7 +396,6 @@ async def parallel_chat_structured(
     import chatlas as ctl
     from pydantic import BaseModel
 
-
     class Person(BaseModel):
         name: str
         age: int
diff --git a/chatlas/_provider_openai.py b/chatlas/_provider_openai.py
index d32225ca..2384d837 100644
--- a/chatlas/_provider_openai.py
+++ b/chatlas/_provider_openai.py
@@ -299,6 +299,10 @@ def stream_content(self, chunk) -> Optional[Content]:
         if chunk.type == "response.reasoning_summary_text.delta":
             # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/delta
             return ContentThinking(thinking=chunk.delta)
+        if chunk.type == "response.reasoning_summary_text.done":
+            # Separator between reasoning summary and response text
+            # https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_summary_text/done
+            return ContentText.model_construct(text="\n\n")
         return None
 
     def stream_merge_chunks(self, completion, chunk):

From afe70f5eee39c1aecafeb8904dc88ddddbbed7a6 Mon Sep 17 00:00:00 2001
From: Carson <cpsievert1@gmail.com>
Date: Thu, 2 Apr 2026 12:28:43 -0500
Subject: [PATCH 8/9] docs: make changelog entry more user-facing

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7a04cee3..897a5246 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### New features
 
-* Providers now distinguish text from thinking content during streaming via the new `stream_content()` method. This allows downstream packages like shinychat to provide specific UI for thinking content. (#276)
+* The `.stream()` and `.stream_async()` methods now yield `ContentThinking` objects (instead of plain strings) for thinking/reasoning content when `content="all"`. This allows downstream packages like shinychat to provide specific UI for thinking content. (#276)
 
 ### Bug fixes
 

From 539f954c24fa3754933252f2549849a9077d71d1 Mon Sep 17 00:00:00 2001
From: Carson <cpsievert1@gmail.com>
Date: Thu, 2 Apr 2026 12:39:22 -0500
Subject: [PATCH 9/9] fix: resolve pyright type errors in stream_content
 refactor

- Add overloads to _submit_turns/_submit_turns_async so content_mode="text"
  narrows return type to Generator[str], fixing ChatResponse type mismatch
- Fix Google provider stream_content() optional subscript error by
  guarding against None candidates before indexing

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 chatlas/_chat.py            | 46 +++++++++++++++++++++++++++++++++++++
 chatlas/_provider_google.py |  9 +++++---
 2 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/chatlas/_chat.py b/chatlas/_chat.py
index 5b5a633e..cccc4941 100644
--- a/chatlas/_chat.py
+++ b/chatlas/_chat.py
@@ -2603,6 +2603,29 @@ async def _chat_impl_async(
             if all_results:
                 user_turn_result = UserTurn(all_results)
 
+    @overload
+    def _submit_turns(
+        self,
+        user_turn: UserTurn,
+        echo: EchoOptions,
+        stream: bool,
+        data_model: type[BaseModel] | None = None,
+        kwargs: Optional[SubmitInputArgsT] = None,
+        content_mode: Literal["text"] = "text",
+    ) -> Generator[str, None, None]: ...
+
+    @overload
+    def _submit_turns(
+        self,
+        user_turn: UserTurn,
+        echo: EchoOptions,
+        stream: bool,
+        data_model: type[BaseModel] | None = None,
+        kwargs: Optional[SubmitInputArgsT] = None,
+        *,
+        content_mode: Literal["all"],
+    ) -> Generator[str | Content, None, None]: ...
+
     def _submit_turns(
         self,
         user_turn: UserTurn,
@@ -2689,6 +2712,29 @@ def emit(text: str | Content):
             tokens_log(self.provider, turn.tokens)
         self._turns.extend([user_turn, turn])
 
+    @overload
+    def _submit_turns_async(
+        self,
+        user_turn: UserTurn,
+        echo: EchoOptions,
+        stream: bool,
+        data_model: type[BaseModel] | None = None,
+        kwargs: Optional[SubmitInputArgsT] = None,
+        content_mode: Literal["text"] = "text",
+    ) -> AsyncGenerator[str, None]: ...
+
+    @overload
+    def _submit_turns_async(
+        self,
+        user_turn: UserTurn,
+        echo: EchoOptions,
+        stream: bool,
+        data_model: type[BaseModel] | None = None,
+        kwargs: Optional[SubmitInputArgsT] = None,
+        *,
+        content_mode: Literal["all"],
+    ) -> AsyncGenerator[str | Content, None]: ...
+
     async def _submit_turns_async(
         self,
         user_turn: UserTurn,
diff --git a/chatlas/_provider_google.py b/chatlas/_provider_google.py
index f382f82f..f512071e 100644
--- a/chatlas/_provider_google.py
+++ b/chatlas/_provider_google.py
@@ -363,10 +363,13 @@ def _chat_perform_args(
         return kwargs_full
 
     def stream_content(self, chunk) -> Optional[Content]:
-        try:
-            parts = chunk.candidates[0].content.parts
-        except (AttributeError, IndexError):
+        candidates = getattr(chunk, "candidates", None)
+        if not candidates:
+            return None
+        content = candidates[0].content
+        if content is None:
             return None
+        parts = content.parts
         if not parts:
             return None
         part = parts[0]