posit-dev · cpsievert · May 7, 2026 · May 6, 2026 · May 6, 2026 · May 7, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [UNRELEASED]
 
+### New features
+
+* `ChatOpenAICompletions()` (and providers built on it like `ChatDeepSeek`, `ChatOpenRouter`, etc.) now extracts `reasoning_content` from model responses as `ContentThinking` objects. A new `preserve_thinking` parameter controls whether reasoning content is sent back to the API in multi-turn conversations; it defaults to `False` but is set to `True` for `ChatDeepSeek` (required for V4 tool-calling) and `ChatOpenRouter` (recommended for quality). (#295)
+
 ### Improvements
 
 * `.stream()` and `.stream_async()` now handle thinking content differently by mode. With `content="text"`, thinking is suppressed entirely. With `content="all"`, thinking fragments are yielded as `ContentThinkingDelta` objects with a `phase` property (`"start"`, `"body"`, or `"end"`) that communicates block boundaries to downstream consumers without injecting synthetic strings into the stream. (#299, #297, #294)
@@ -19,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   * Databricks: `databricks-claude-sonnet-4-6`
   * OpenAI / Completions / OpenRouter / Portkey: `gpt-5.4`
   * GitHub: `gpt-5`
+  * Deepseek: `deepseek-v4-flash`
   * Perplexity: `sonar`
 * Updated token pricing data from LiteLLM. (#292)
 

diff --git a/chatlas/_provider_deepseek.py b/chatlas/_provider_deepseek.py
@@ -122,7 +122,7 @@ def ChatDeepSeek(
     ```
     """
     if model is None:
-        model = log_model_default("deepseek-chat")
+        model = log_model_default("deepseek-v4-flash")
 
     if api_key is None:
         api_key = os.getenv("DEEPSEEK_API_KEY")
@@ -136,6 +136,7 @@ def ChatDeepSeek(
             model=model,
             base_url=base_url,
             seed=seed,
+            preserve_thinking=True,
             name="DeepSeek",
             kwargs=kwargs,
         ),
@@ -144,14 +145,13 @@ def ChatDeepSeek(
 
 
 class DeepSeekProvider(OpenAICompletionsProvider):
-    @staticmethod
-    def _turns_as_inputs(turns: list[Turn]) -> list["ChatCompletionMessageParam"]:
+    def _turns_as_inputs(self, turns: list[Turn]) -> list["ChatCompletionMessageParam"]:
         from openai.types.chat import (
             ChatCompletionAssistantMessageParam,
             ChatCompletionUserMessageParam,
         )
 
-        params = OpenAICompletionsProvider._turns_as_inputs(turns)
+        params = super()._turns_as_inputs(turns)
 
         # Content must be a string
         for i, param in enumerate(params):

diff --git a/chatlas/_provider_openai.py b/chatlas/_provider_openai.py
@@ -447,8 +447,7 @@ def _response_as_turn(completion: Response, has_data_model: bool) -> AssistantTu
             completion=completion,
         )
 
-    @staticmethod
-    def _turns_as_inputs(turns: list[Turn]) -> "list[ResponseInputItemParam]":
+    def _turns_as_inputs(self, turns: list[Turn]) -> "list[ResponseInputItemParam]":
         res: "list[ResponseInputItemParam]" = []
         for turn in turns:
             res.extend([as_input_param(x, turn.role) for x in turn.contents])

diff --git a/chatlas/_provider_openai_completions.py b/chatlas/_provider_openai_completions.py
@@ -24,6 +24,7 @@
     ContentJson,
     ContentPDF,
     ContentText,
+    ContentThinking,
     ContentToolRequest,
     ContentToolResult,
 )
@@ -59,6 +60,7 @@ def ChatOpenAICompletions(
     model: "Optional[ChatModel | str]" = None,
     api_key: Optional[str] = None,
     seed: int | None | MISSING_TYPE = MISSING,
+    preserve_thinking: bool = False,
     kwargs: Optional["ChatClientArgs"] = None,
 ) -> Chat["SubmitInputArgs", ChatCompletion]:
     """
@@ -70,6 +72,15 @@ def ChatOpenAICompletions(
     This function may also be useful for using an "OpenAI-compatible model"
     hosted by another provider (e.g., vLLM, Ollama, etc.) that supports the
     OpenAI Completions API.
+
+    Parameters
+    ----------
+    preserve_thinking
+        If True, reasoning content returned by the model is included when
+        sending conversation history back to the API. If False (the default),
+        reasoning content is still captured in the turn but dropped from
+        subsequent requests. Set to True if your provider requires or benefits
+        from seeing prior reasoning in multi-turn conversations.
     """
     if isinstance(seed, MISSING_TYPE):
         seed = 1014 if is_testing() else None
@@ -83,6 +94,7 @@ def ChatOpenAICompletions(
             model=model,
             base_url=base_url,
             seed=seed,
+            preserve_thinking=preserve_thinking,
             kwargs=kwargs,
         ),
         system_prompt=system_prompt,
@@ -105,6 +117,7 @@ def __init__(
         base_url: str = "https://api.openai.com/v1",
         name: str = "OpenAI",
         seed: int | None = None,
+        preserve_thinking: bool = False,
         kwargs: Optional["ChatClientArgs"] = None,
     ):
         super().__init__(
@@ -115,6 +128,7 @@ def __init__(
             kwargs=kwargs,
         )
         self._seed = seed
+        self._preserve_thinking = preserve_thinking
 
     def chat_perform(
         self,
@@ -194,7 +208,13 @@ def _chat_perform_args(
     def stream_content(self, chunk) -> Optional[Content]:
         if not chunk.choices:
             return None
-        text = chunk.choices[0].delta.content
+        delta = chunk.choices[0].delta
+
+        reasoning = getattr(delta, "reasoning_content", None)
+        if reasoning is not None:
+            return ContentThinking(thinking=reasoning)
+
+        text = delta.content
         if text is None:
             return None
         return ContentText.model_construct(text=text)
@@ -240,8 +260,7 @@ def value_tokens(self, completion):
             cached_tokens,
         )
 
-    @staticmethod
-    def _turns_as_inputs(turns: list[Turn]) -> list["ChatCompletionMessageParam"]:
+    def _turns_as_inputs(self, turns: list[Turn]) -> list["ChatCompletionMessageParam"]:
         res: list["ChatCompletionMessageParam"] = []
         for turn in turns:
             if isinstance(turn, SystemTurn):
@@ -251,8 +270,12 @@ def _turns_as_inputs(turns: list[Turn]) -> list["ChatCompletionMessageParam"]:
             elif isinstance(turn, AssistantTurn):
                 content_parts: list["ContentArrayOfContentPart"] = []
                 tool_calls: list["ChatCompletionMessageToolCallParam"] = []
+                reasoning_content: str | None = None
                 for x in turn.contents:
-                    if isinstance(x, ContentText):
+                    if isinstance(x, ContentThinking):
+                        if self._preserve_thinking:
+                            reasoning_content = (reasoning_content or "") + x.thinking
+                    elif isinstance(x, ContentText):
                         content_parts.append({"type": "text", "text": x.text})
                     elif isinstance(x, ContentJson):
                         text = orjson.dumps(x.value).decode("utf-8")
@@ -276,11 +299,13 @@ def _turns_as_inputs(turns: list[Turn]) -> list["ChatCompletionMessageParam"]:
                         )
 
                 # Some OpenAI-compatible models (e.g., Groq) don't work nicely with empty content
-                args = {
+                args: dict[str, Any] = {
                     "role": "assistant",
                     "content": content_parts,
                     "tool_calls": tool_calls,
                 }
+                if reasoning_content is not None:
+                    args["reasoning_content"] = reasoning_content
                 if not content_parts:
                     del args["content"]
                 if not tool_calls:
@@ -361,15 +386,20 @@ def _response_as_turn(
         message = completion.choices[0].message
 
         contents: list[Content] = []
+
+        reasoning = getattr(message, "reasoning_content", None)
+        if reasoning:
+            contents.append(ContentThinking(thinking=reasoning))
+
         if message.content is not None:
             if has_data_model:
                 data = message.content
                 # Some providers (e.g., Cloudflare) may already provide a dict
                 if not isinstance(data, dict):
                     data = orjson.loads(data)
-                contents = [ContentJson(value=data)]
+                contents.append(ContentJson(value=data))
             else:
-                contents = [ContentText(text=message.content)]
+                contents.append(ContentText(text=message.content))
 
         tool_calls = message.tool_calls
 

diff --git a/chatlas/_provider_openai_generic.py b/chatlas/_provider_openai_generic.py
@@ -273,9 +273,8 @@ def _chat_perform_args(
         data_model: Optional[type[BaseModel]],
     ) -> SubmitInputArgsT: ...
 
-    @staticmethod
     @abstractmethod
-    def _turns_as_inputs(turns: list[Turn]) -> list[Any]: ...
+    def _turns_as_inputs(self, turns: list[Turn]) -> list[Any]: ...
 
     @staticmethod
     @abstractmethod

diff --git a/chatlas/_provider_openrouter.py b/chatlas/_provider_openrouter.py
@@ -132,6 +132,7 @@ def ChatOpenRouter(
             base_url=base_url,
             seed=seed,
             name="OpenRouter",
+            preserve_thinking=True,
             kwargs=kwargs2,
         ),
         system_prompt=system_prompt,

diff --git a/tests/_vcr/test_provider_deepseek/test_deepseek_list_models.yaml b/tests/_vcr/test_provider_deepseek/test_deepseek_list_models.yaml
@@ -5,7 +5,7 @@ interactions:
       Accept:
       - application/json
       Accept-Encoding:
-      - gzip, deflate
+      - gzip, deflate, zstd
       Connection:
       - keep-alive
       Host:
@@ -18,36 +18,36 @@ interactions:
     uri: https://api.deepseek.com/models
   response:
     body:
-      string: '{"object":"list","data":[{"id":"deepseek-chat","object":"model","owned_by":"deepseek"},{"id":"deepseek-reasoner","object":"model","owned_by":"deepseek"}]}'
+      string: '{"object":"list","data":[{"id":"deepseek-v4-flash","object":"model","owned_by":"deepseek"},{"id":"deepseek-v4-pro","object":"model","owned_by":"deepseek"}]}'
     headers:
       Connection:
       - keep-alive
       Content-Length:
-      - '154'
+      - '156'
       Content-Type:
       - application/json
       Date:
-      - Wed, 31 Dec 2025 20:37:50 GMT
+      - Wed, 06 May 2026 23:20:10 GMT
       Server:
       - elb
       Strict-Transport-Security:
       - max-age=31536000; includeSubDomains; preload
-      Vary:
-      - origin
       Via:
-      - 1.1 2f51a381830e231f6bc2b46fda74f69e.cloudfront.net (CloudFront)
+      - 1.1 59d8e2230b07f6d21e105d564b87cc6c.cloudfront.net (CloudFront)
       X-Amz-Cf-Id:
-      - mgFFCx8JBolTTjH52ADc1GUHPZssINDzDxG2pABqc5djt5OUrlCTbQ==
+      - kvNMGPNoxRu0-JnpPnf6mBiJ7QU1rXkorvtdIg95oN1NGREqV2R0Bw==
       X-Amz-Cf-Pop:
-      - ORD56-P7
+      - MSP50-P3
       X-Cache:
       - Miss from cloudfront
       X-Content-Type-Options:
       - nosniff
       access-control-allow-credentials:
       - 'true'
+      vary:
+      - origin, access-control-request-method, access-control-request-headers
       x-ds-trace-id:
-      - e9649652e269daf0ffdd27df13056a22
+      - ca44e5c998d05d2e72061b2b65c80da0
     status:
       code: 200
       message: OK