Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions rust/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ Or provide an OAuth bearer token directly:
export ANTHROPIC_AUTH_TOKEN="anthropic-oauth-or-proxy-bearer-token"
```

For local OpenAI-compatible servers such as Ollama, including Qwen reasoning
models, see [`../docs/local-openai-compatible-providers.md`](../docs/local-openai-compatible-providers.md).
Use the exact model tag exposed by the server, for example `qwen3:latest`, and
prefer `OLLAMA_HOST` for Ollama-specific local routing.

## Mock parity harness

The workspace now includes a deterministic Anthropic-compatible mock service and a clean-environment CLI harness for end-to-end parity checks.
Expand Down
9 changes: 9 additions & 0 deletions rust/crates/api/src/providers/openai_compat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,7 @@ impl StreamState {
.delta
.reasoning_content
.filter(|value| !value.is_empty())
.or(choice.delta.reasoning.filter(|value| !value.is_empty()))
.or(choice
.delta
.thinking
Expand Down Expand Up @@ -827,6 +828,8 @@ struct ChatMessage {
#[serde(default)]
reasoning_content: Option<String>,
#[serde(default)]
reasoning: Option<String>,
#[serde(default)]
tool_calls: Vec<ResponseToolCall>,
}

Expand Down Expand Up @@ -901,6 +904,8 @@ struct ChunkDelta {
#[serde(default)]
reasoning_content: Option<String>,
#[serde(default)]
reasoning: Option<String>,
#[serde(default)]
thinking: Option<ThinkingDelta>,
#[serde(default, deserialize_with = "deserialize_null_as_empty_vec")]
tool_calls: Vec<DeltaToolCall>,
Expand Down Expand Up @@ -1510,6 +1515,7 @@ fn normalize_response(
.message
.reasoning_content
.filter(|value| !value.is_empty())
.or(choice.message.reasoning.filter(|value| !value.is_empty()))
{
content.push(OutputContentBlock::Thinking {
thinking,
Expand Down Expand Up @@ -1992,6 +1998,7 @@ mod tests {
role: "assistant".to_string(),
content: Some("final answer".to_string()),
reasoning_content: Some("hidden thought".to_string()),
reasoning: None,
tool_calls: Vec::new(),
},
finish_reason: Some("stop".to_string()),
Expand Down Expand Up @@ -2029,6 +2036,7 @@ mod tests {
delta: super::ChunkDelta {
content: None,
reasoning_content: Some("think".to_string()),
reasoning: None,
thinking: None,
tool_calls: Vec::new(),
},
Expand All @@ -2046,6 +2054,7 @@ mod tests {
delta: super::ChunkDelta {
content: Some(" answer".to_string()),
reasoning_content: None,
reasoning: None,
thinking: None,
tool_calls: Vec::new(),
},
Expand Down
126 changes: 126 additions & 0 deletions rust/crates/api/tests/openai_compat_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,55 @@ async fn send_message_preserves_deepseek_reasoning_content_before_text() {
assert_eq!(body["thinking"], json!({"type": "enabled"}));
}

#[tokio::test]
async fn send_message_preserves_ollama_reasoning_before_text() {
let state = Arc::new(Mutex::new(Vec::<CapturedRequest>::new()));
let body = concat!(
"{",
"\"id\":\"chatcmpl_ollama_reasoning\",",
"\"model\":\"qwen3:latest\",",
"\"choices\":[{",
"\"message\":{\"role\":\"assistant\",\"reasoning\":\"Think locally\",\"content\":\"Answer locally\",\"tool_calls\":[]},",
"\"finish_reason\":\"stop\"",
"}],",
"\"usage\":{\"prompt_tokens\":11,\"completion_tokens\":5}",
"}"
);
let server = spawn_server(
state.clone(),
vec![http_response("200 OK", "application/json", body)],
)
.await;

let client = OpenAiCompatClient::new("ollama-test-key", OpenAiCompatConfig::openai())
.with_base_url(server.base_url());
let response = client
.send_message(&MessageRequest {
model: "openai/qwen3:latest".to_string(),
..sample_request(false)
})
.await
.expect("request should succeed");

assert_eq!(
response.content,
vec![
OutputContentBlock::Thinking {
thinking: "Think locally".to_string(),
signature: None,
},
OutputContentBlock::Text {
text: "Answer locally".to_string(),
},
]
);

let captured = state.lock().await;
let request = captured.first().expect("server should capture request");
let body: serde_json::Value = serde_json::from_str(&request.body).expect("json body");
assert_eq!(body["model"], json!("qwen3:latest"));
}

#[tokio::test]
async fn local_openai_gateway_strips_routing_prefix_and_preserves_extra_body_params() {
let state = Arc::new(Mutex::new(Vec::<CapturedRequest>::new()));
Expand Down Expand Up @@ -389,6 +438,83 @@ async fn stream_message_normalizes_text_and_multiple_tool_calls() {
assert!(request.body.contains("\"stream\":true"));
}

#[tokio::test]
async fn stream_message_preserves_ollama_reasoning_before_text() {
let state = Arc::new(Mutex::new(Vec::<CapturedRequest>::new()));
let sse = concat!(
"data: {\"id\":\"chatcmpl_stream_ollama_reasoning\",\"model\":\"qwen3:latest\",\"choices\":[{\"delta\":{\"reasoning\":\"Think\"}}]}\n\n",
"data: {\"id\":\"chatcmpl_stream_ollama_reasoning\",\"choices\":[{\"delta\":{\"content\":\" answer\"},\"finish_reason\":\"stop\"}]}\n\n",
"data: [DONE]\n\n"
);
let server = spawn_server(
state.clone(),
vec![http_response_with_headers(
"200 OK",
"text/event-stream",
sse,
&[("x-request-id", "req_ollama_reasoning_stream")],
)],
)
.await;

let client = OpenAiCompatClient::new("ollama-test-key", OpenAiCompatConfig::openai())
.with_base_url(server.base_url());
let mut stream = client
.stream_message(&MessageRequest {
model: "openai/qwen3:latest".to_string(),
..sample_request(false)
})
.await
.expect("stream should start");

assert_eq!(stream.request_id(), Some("req_ollama_reasoning_stream"));

let mut events = Vec::new();
while let Some(event) = stream.next_event().await.expect("event should parse") {
events.push(event);
}

assert!(matches!(events[0], StreamEvent::MessageStart(_)));
assert!(matches!(
events[1],
StreamEvent::ContentBlockStart(ContentBlockStartEvent {
index: 0,
content_block: OutputContentBlock::Thinking { .. },
})
));
assert!(matches!(
events[2],
StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent {
index: 0,
delta: ContentBlockDelta::ThinkingDelta { .. },
})
));
assert!(matches!(
events[3],
StreamEvent::ContentBlockStop(ContentBlockStopEvent { index: 0 })
));
assert!(matches!(
events[4],
StreamEvent::ContentBlockStart(ContentBlockStartEvent {
index: 1,
content_block: OutputContentBlock::Text { .. },
})
));
assert!(matches!(
events[5],
StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent {
index: 1,
delta: ContentBlockDelta::TextDelta { .. },
})
));

let captured = state.lock().await;
let request = captured.first().expect("captured request");
let body: serde_json::Value = serde_json::from_str(&request.body).expect("json body");
assert_eq!(body["model"], json!("qwen3:latest"));
assert_eq!(body["stream"], json!(true));
}

#[allow(clippy::await_holding_lock)]
#[tokio::test]
async fn stream_message_retries_retryable_sse_handshake_failures() {
Expand Down
26 changes: 26 additions & 0 deletions rust/crates/rusty-claude-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2939,6 +2939,10 @@ fn validate_model_syntax(model: &str) -> Result<(), String> {
err_msg.push_str("\nDid you mean `openai/");
err_msg.push_str(trimmed);
err_msg.push_str("`? (Requires OPENAI_API_KEY env var)");
} else if trimmed.starts_with("qwen") && trimmed.contains(':') {
err_msg.push_str("\nFor a local Ollama model, set `OPENAI_BASE_URL=http://127.0.0.1:11434/v1` before using tagged names like `");
err_msg.push_str(trimmed);
err_msg.push_str("`.");
} else if trimmed.starts_with("qwen") {
err_msg.push_str("\nDid you mean `qwen/");
err_msg.push_str(trimmed);
Expand Down Expand Up @@ -19743,6 +19747,28 @@ mod alias_resolution_tests {
assert!(result.unwrap_err().contains("invalid model syntax"));
}

#[test]
fn qwen_invalid_model_hint_mentions_local_ollama_openai_base_url() {
let _guard = ollama_env_lock();
let _ollama_env = EnvVarGuard::unset("OLLAMA_HOST");
let _openai_env = EnvVarGuard::unset("OPENAI_BASE_URL");
let result = validate_model_syntax("qwen3:8b");

let error = result.expect_err("Ollama tag without local base URL should fail");
assert!(
error.contains("Ollama"),
"Qwen Ollama tag error should mention Ollama: {error}"
);
assert!(
error.contains("OPENAI_BASE_URL"),
"Qwen Ollama tag error should mention OPENAI_BASE_URL: {error}"
);
assert!(
error.contains("http://127.0.0.1:11434/v1"),
"Qwen Ollama tag error should show local Ollama OpenAI URL: {error}"
);
}

#[test]
fn test_direct_provider_model_passes() {
// Direct provider/model strings should remain unchanged and pass
Expand Down
Loading