Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [UNRELEASED]

### New features

* `ChatOpenAICompletions()` (and providers built on it like `ChatDeepSeek`, `ChatOpenRouter`, etc.) now extracts `reasoning_content` from model responses as `ContentThinking` objects. A new `preserve_thinking` parameter controls whether reasoning content is sent back to the API in multi-turn conversations; it defaults to `False` but is set to `True` for `ChatDeepSeek` (required for V4 tool-calling) and `ChatOpenRouter` (recommended for quality). (#295)

### Improvements

* `.stream()` and `.stream_async()` now handle thinking content differently by mode. With `content="text"`, thinking is suppressed entirely. With `content="all"`, thinking fragments are yielded as `ContentThinkingDelta` objects with a `phase` property (`"start"`, `"body"`, or `"end"`) that communicates block boundaries to downstream consumers without injecting synthetic strings into the stream. (#299, #297, #294)
Expand All @@ -19,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* Databricks: `databricks-claude-sonnet-4-6`
* OpenAI / Completions / OpenRouter / Portkey: `gpt-5.4`
* GitHub: `gpt-5`
* Deepseek: `deepseek-v4-flash`
* Perplexity: `sonar`
* Updated token pricing data from LiteLLM. (#292)

Expand Down
8 changes: 4 additions & 4 deletions chatlas/_provider_deepseek.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def ChatDeepSeek(
```
"""
if model is None:
model = log_model_default("deepseek-chat")
model = log_model_default("deepseek-v4-flash")

if api_key is None:
api_key = os.getenv("DEEPSEEK_API_KEY")
Expand All @@ -136,6 +136,7 @@ def ChatDeepSeek(
model=model,
base_url=base_url,
seed=seed,
preserve_thinking=True,
name="DeepSeek",
kwargs=kwargs,
),
Expand All @@ -144,14 +145,13 @@ def ChatDeepSeek(


class DeepSeekProvider(OpenAICompletionsProvider):
@staticmethod
def _turns_as_inputs(turns: list[Turn]) -> list["ChatCompletionMessageParam"]:
def _turns_as_inputs(self, turns: list[Turn]) -> list["ChatCompletionMessageParam"]:
from openai.types.chat import (
ChatCompletionAssistantMessageParam,
ChatCompletionUserMessageParam,
)

params = OpenAICompletionsProvider._turns_as_inputs(turns)
params = super()._turns_as_inputs(turns)

# Content must be a string
for i, param in enumerate(params):
Expand Down
3 changes: 1 addition & 2 deletions chatlas/_provider_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,8 +447,7 @@ def _response_as_turn(completion: Response, has_data_model: bool) -> AssistantTu
completion=completion,
)

@staticmethod
def _turns_as_inputs(turns: list[Turn]) -> "list[ResponseInputItemParam]":
def _turns_as_inputs(self, turns: list[Turn]) -> "list[ResponseInputItemParam]":
res: "list[ResponseInputItemParam]" = []
for turn in turns:
res.extend([as_input_param(x, turn.role) for x in turn.contents])
Expand Down
44 changes: 37 additions & 7 deletions chatlas/_provider_openai_completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
ContentJson,
ContentPDF,
ContentText,
ContentThinking,
ContentToolRequest,
ContentToolResult,
)
Expand Down Expand Up @@ -59,6 +60,7 @@ def ChatOpenAICompletions(
model: "Optional[ChatModel | str]" = None,
api_key: Optional[str] = None,
seed: int | None | MISSING_TYPE = MISSING,
preserve_thinking: bool = False,
kwargs: Optional["ChatClientArgs"] = None,
) -> Chat["SubmitInputArgs", ChatCompletion]:
"""
Expand All @@ -70,6 +72,15 @@ def ChatOpenAICompletions(
This function may also be useful for using an "OpenAI-compatible model"
hosted by another provider (e.g., vLLM, Ollama, etc.) that supports the
OpenAI Completions API.

Parameters
----------
preserve_thinking
If True, reasoning content returned by the model is included when
sending conversation history back to the API. If False (the default),
reasoning content is still captured in the turn but dropped from
subsequent requests. Set to True if your provider requires or benefits
from seeing prior reasoning in multi-turn conversations.
"""
if isinstance(seed, MISSING_TYPE):
seed = 1014 if is_testing() else None
Expand All @@ -83,6 +94,7 @@ def ChatOpenAICompletions(
model=model,
base_url=base_url,
seed=seed,
preserve_thinking=preserve_thinking,
kwargs=kwargs,
),
system_prompt=system_prompt,
Expand All @@ -105,6 +117,7 @@ def __init__(
base_url: str = "https://api.openai.com/v1",
name: str = "OpenAI",
seed: int | None = None,
preserve_thinking: bool = False,
kwargs: Optional["ChatClientArgs"] = None,
):
super().__init__(
Expand All @@ -115,6 +128,7 @@ def __init__(
kwargs=kwargs,
)
self._seed = seed
self._preserve_thinking = preserve_thinking

def chat_perform(
self,
Expand Down Expand Up @@ -194,7 +208,13 @@ def _chat_perform_args(
def stream_content(self, chunk) -> Optional[Content]:
if not chunk.choices:
return None
text = chunk.choices[0].delta.content
delta = chunk.choices[0].delta

reasoning = getattr(delta, "reasoning_content", None)
if reasoning is not None:
return ContentThinking(thinking=reasoning)

text = delta.content
if text is None:
return None
return ContentText.model_construct(text=text)
Expand Down Expand Up @@ -240,8 +260,7 @@ def value_tokens(self, completion):
cached_tokens,
)

@staticmethod
def _turns_as_inputs(turns: list[Turn]) -> list["ChatCompletionMessageParam"]:
def _turns_as_inputs(self, turns: list[Turn]) -> list["ChatCompletionMessageParam"]:
res: list["ChatCompletionMessageParam"] = []
for turn in turns:
if isinstance(turn, SystemTurn):
Expand All @@ -251,8 +270,12 @@ def _turns_as_inputs(turns: list[Turn]) -> list["ChatCompletionMessageParam"]:
elif isinstance(turn, AssistantTurn):
content_parts: list["ContentArrayOfContentPart"] = []
tool_calls: list["ChatCompletionMessageToolCallParam"] = []
reasoning_content: str | None = None
for x in turn.contents:
if isinstance(x, ContentText):
if isinstance(x, ContentThinking):
if self._preserve_thinking:
reasoning_content = (reasoning_content or "") + x.thinking
elif isinstance(x, ContentText):
content_parts.append({"type": "text", "text": x.text})
elif isinstance(x, ContentJson):
text = orjson.dumps(x.value).decode("utf-8")
Expand All @@ -276,11 +299,13 @@ def _turns_as_inputs(turns: list[Turn]) -> list["ChatCompletionMessageParam"]:
)

# Some OpenAI-compatible models (e.g., Groq) don't work nicely with empty content
args = {
args: dict[str, Any] = {
"role": "assistant",
"content": content_parts,
"tool_calls": tool_calls,
}
if reasoning_content is not None:
args["reasoning_content"] = reasoning_content
if not content_parts:
del args["content"]
if not tool_calls:
Expand Down Expand Up @@ -361,15 +386,20 @@ def _response_as_turn(
message = completion.choices[0].message

contents: list[Content] = []

reasoning = getattr(message, "reasoning_content", None)
if reasoning:
contents.append(ContentThinking(thinking=reasoning))

if message.content is not None:
if has_data_model:
data = message.content
# Some providers (e.g., Cloudflare) may already provide a dict
if not isinstance(data, dict):
data = orjson.loads(data)
contents = [ContentJson(value=data)]
contents.append(ContentJson(value=data))
else:
contents = [ContentText(text=message.content)]
contents.append(ContentText(text=message.content))

tool_calls = message.tool_calls

Expand Down
3 changes: 1 addition & 2 deletions chatlas/_provider_openai_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,9 +273,8 @@ def _chat_perform_args(
data_model: Optional[type[BaseModel]],
) -> SubmitInputArgsT: ...

@staticmethod
@abstractmethod
def _turns_as_inputs(turns: list[Turn]) -> list[Any]: ...
def _turns_as_inputs(self, turns: list[Turn]) -> list[Any]: ...

@staticmethod
@abstractmethod
Expand Down
1 change: 1 addition & 0 deletions chatlas/_provider_openrouter.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def ChatOpenRouter(
base_url=base_url,
seed=seed,
name="OpenRouter",
preserve_thinking=True,
kwargs=kwargs2,
),
system_prompt=system_prompt,
Expand Down
20 changes: 10 additions & 10 deletions tests/_vcr/test_provider_deepseek/test_deepseek_list_models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ interactions:
Accept:
- application/json
Accept-Encoding:
- gzip, deflate
- gzip, deflate, zstd
Connection:
- keep-alive
Host:
Expand All @@ -18,36 +18,36 @@ interactions:
uri: https://api.deepseek.com/models
response:
body:
string: '{"object":"list","data":[{"id":"deepseek-chat","object":"model","owned_by":"deepseek"},{"id":"deepseek-reasoner","object":"model","owned_by":"deepseek"}]}'
string: '{"object":"list","data":[{"id":"deepseek-v4-flash","object":"model","owned_by":"deepseek"},{"id":"deepseek-v4-pro","object":"model","owned_by":"deepseek"}]}'
headers:
Connection:
- keep-alive
Content-Length:
- '154'
- '156'
Content-Type:
- application/json
Date:
- Wed, 31 Dec 2025 20:37:50 GMT
- Wed, 06 May 2026 23:20:10 GMT
Server:
- elb
Strict-Transport-Security:
- max-age=31536000; includeSubDomains; preload
Vary:
- origin
Via:
- 1.1 2f51a381830e231f6bc2b46fda74f69e.cloudfront.net (CloudFront)
- 1.1 59d8e2230b07f6d21e105d564b87cc6c.cloudfront.net (CloudFront)
X-Amz-Cf-Id:
- mgFFCx8JBolTTjH52ADc1GUHPZssINDzDxG2pABqc5djt5OUrlCTbQ==
- kvNMGPNoxRu0-JnpPnf6mBiJ7QU1rXkorvtdIg95oN1NGREqV2R0Bw==
X-Amz-Cf-Pop:
- ORD56-P7
- MSP50-P3
X-Cache:
- Miss from cloudfront
X-Content-Type-Options:
- nosniff
access-control-allow-credentials:
- 'true'
vary:
- origin, access-control-request-method, access-control-request-headers
x-ds-trace-id:
- e9649652e269daf0ffdd27df13056a22
- ca44e5c998d05d2e72061b2b65c80da0
status:
code: 200
message: OK
Expand Down
Loading
Loading