diff --git a/rust/README.md b/rust/README.md index edcd4fef..53ebfc74 100644 --- a/rust/README.md +++ b/rust/README.md @@ -40,6 +40,11 @@ Or provide an OAuth bearer token directly: export ANTHROPIC_AUTH_TOKEN="anthropic-oauth-or-proxy-bearer-token" ``` +For local OpenAI-compatible servers such as Ollama, including Qwen reasoning +models, see [`../docs/local-openai-compatible-providers.md`](../docs/local-openai-compatible-providers.md). +Use the exact model tag exposed by the server, for example `qwen3:latest`, and +prefer `OLLAMA_HOST` for Ollama-specific local routing. + ## Mock parity harness The workspace now includes a deterministic Anthropic-compatible mock service and a clean-environment CLI harness for end-to-end parity checks. diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs index 09fe09c2..8fb39699 100644 --- a/rust/crates/api/src/providers/openai_compat.rs +++ b/rust/crates/api/src/providers/openai_compat.rs @@ -572,6 +572,7 @@ impl StreamState { .delta .reasoning_content .filter(|value| !value.is_empty()) + .or(choice.delta.reasoning.filter(|value| !value.is_empty())) .or(choice .delta .thinking @@ -827,6 +828,8 @@ struct ChatMessage { #[serde(default)] reasoning_content: Option, #[serde(default)] + reasoning: Option, + #[serde(default)] tool_calls: Vec, } @@ -901,6 +904,8 @@ struct ChunkDelta { #[serde(default)] reasoning_content: Option, #[serde(default)] + reasoning: Option, + #[serde(default)] thinking: Option, #[serde(default, deserialize_with = "deserialize_null_as_empty_vec")] tool_calls: Vec, @@ -1510,6 +1515,7 @@ fn normalize_response( .message .reasoning_content .filter(|value| !value.is_empty()) + .or(choice.message.reasoning.filter(|value| !value.is_empty())) { content.push(OutputContentBlock::Thinking { thinking, @@ -1992,6 +1998,7 @@ mod tests { role: "assistant".to_string(), content: Some("final answer".to_string()), reasoning_content: Some("hidden thought".to_string()), + reasoning: None, tool_calls: Vec::new(), }, finish_reason: Some("stop".to_string()), @@ -2029,6 +2036,7 @@ mod tests { delta: super::ChunkDelta { content: None, reasoning_content: Some("think".to_string()), + reasoning: None, thinking: None, tool_calls: Vec::new(), }, @@ -2046,6 +2054,7 @@ mod tests { delta: super::ChunkDelta { content: Some(" answer".to_string()), reasoning_content: None, + reasoning: None, thinking: None, tool_calls: Vec::new(), }, diff --git a/rust/crates/api/tests/openai_compat_integration.rs b/rust/crates/api/tests/openai_compat_integration.rs index e6edb791..832d170e 100644 --- a/rust/crates/api/tests/openai_compat_integration.rs +++ b/rust/crates/api/tests/openai_compat_integration.rs @@ -166,6 +166,55 @@ async fn send_message_preserves_deepseek_reasoning_content_before_text() { assert_eq!(body["thinking"], json!({"type": "enabled"})); } +#[tokio::test] +async fn send_message_preserves_ollama_reasoning_before_text() { + let state = Arc::new(Mutex::new(Vec::::new())); + let body = concat!( + "{", + "\"id\":\"chatcmpl_ollama_reasoning\",", + "\"model\":\"qwen3:latest\",", + "\"choices\":[{", + "\"message\":{\"role\":\"assistant\",\"reasoning\":\"Think locally\",\"content\":\"Answer locally\",\"tool_calls\":[]},", + "\"finish_reason\":\"stop\"", + "}],", + "\"usage\":{\"prompt_tokens\":11,\"completion_tokens\":5}", + "}" + ); + let server = spawn_server( + state.clone(), + vec![http_response("200 OK", "application/json", body)], + ) + .await; + + let client = OpenAiCompatClient::new("ollama-test-key", OpenAiCompatConfig::openai()) + .with_base_url(server.base_url()); + let response = client + .send_message(&MessageRequest { + model: "openai/qwen3:latest".to_string(), + ..sample_request(false) + }) + .await + .expect("request should succeed"); + + assert_eq!( + response.content, + vec![ + OutputContentBlock::Thinking { + thinking: "Think locally".to_string(), + signature: None, + }, + OutputContentBlock::Text { + text: "Answer locally".to_string(), + }, + ] + ); + + let captured = state.lock().await; + let request = captured.first().expect("server should capture request"); + let body: serde_json::Value = serde_json::from_str(&request.body).expect("json body"); + assert_eq!(body["model"], json!("qwen3:latest")); +} + #[tokio::test] async fn local_openai_gateway_strips_routing_prefix_and_preserves_extra_body_params() { let state = Arc::new(Mutex::new(Vec::::new())); @@ -389,6 +438,83 @@ async fn stream_message_normalizes_text_and_multiple_tool_calls() { assert!(request.body.contains("\"stream\":true")); } +#[tokio::test] +async fn stream_message_preserves_ollama_reasoning_before_text() { + let state = Arc::new(Mutex::new(Vec::::new())); + let sse = concat!( + "data: {\"id\":\"chatcmpl_stream_ollama_reasoning\",\"model\":\"qwen3:latest\",\"choices\":[{\"delta\":{\"reasoning\":\"Think\"}}]}\n\n", + "data: {\"id\":\"chatcmpl_stream_ollama_reasoning\",\"choices\":[{\"delta\":{\"content\":\" answer\"},\"finish_reason\":\"stop\"}]}\n\n", + "data: [DONE]\n\n" + ); + let server = spawn_server( + state.clone(), + vec![http_response_with_headers( + "200 OK", + "text/event-stream", + sse, + &[("x-request-id", "req_ollama_reasoning_stream")], + )], + ) + .await; + + let client = OpenAiCompatClient::new("ollama-test-key", OpenAiCompatConfig::openai()) + .with_base_url(server.base_url()); + let mut stream = client + .stream_message(&MessageRequest { + model: "openai/qwen3:latest".to_string(), + ..sample_request(false) + }) + .await + .expect("stream should start"); + + assert_eq!(stream.request_id(), Some("req_ollama_reasoning_stream")); + + let mut events = Vec::new(); + while let Some(event) = stream.next_event().await.expect("event should parse") { + events.push(event); + } + + assert!(matches!(events[0], StreamEvent::MessageStart(_))); + assert!(matches!( + events[1], + StreamEvent::ContentBlockStart(ContentBlockStartEvent { + index: 0, + content_block: OutputContentBlock::Thinking { .. }, + }) + )); + assert!(matches!( + events[2], + StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent { + index: 0, + delta: ContentBlockDelta::ThinkingDelta { .. }, + }) + )); + assert!(matches!( + events[3], + StreamEvent::ContentBlockStop(ContentBlockStopEvent { index: 0 }) + )); + assert!(matches!( + events[4], + StreamEvent::ContentBlockStart(ContentBlockStartEvent { + index: 1, + content_block: OutputContentBlock::Text { .. }, + }) + )); + assert!(matches!( + events[5], + StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent { + index: 1, + delta: ContentBlockDelta::TextDelta { .. }, + }) + )); + + let captured = state.lock().await; + let request = captured.first().expect("captured request"); + let body: serde_json::Value = serde_json::from_str(&request.body).expect("json body"); + assert_eq!(body["model"], json!("qwen3:latest")); + assert_eq!(body["stream"], json!(true)); +} + #[allow(clippy::await_holding_lock)] #[tokio::test] async fn stream_message_retries_retryable_sse_handshake_failures() { diff --git a/rust/crates/rusty-claude-cli/src/main.rs b/rust/crates/rusty-claude-cli/src/main.rs index 543ab88a..7f747516 100644 --- a/rust/crates/rusty-claude-cli/src/main.rs +++ b/rust/crates/rusty-claude-cli/src/main.rs @@ -2939,6 +2939,10 @@ fn validate_model_syntax(model: &str) -> Result<(), String> { err_msg.push_str("\nDid you mean `openai/"); err_msg.push_str(trimmed); err_msg.push_str("`? (Requires OPENAI_API_KEY env var)"); + } else if trimmed.starts_with("qwen") && trimmed.contains(':') { + err_msg.push_str("\nFor a local Ollama model, set `OPENAI_BASE_URL=http://127.0.0.1:11434/v1` before using tagged names like `"); + err_msg.push_str(trimmed); + err_msg.push_str("`."); } else if trimmed.starts_with("qwen") { err_msg.push_str("\nDid you mean `qwen/"); err_msg.push_str(trimmed); @@ -19743,6 +19747,28 @@ mod alias_resolution_tests { assert!(result.unwrap_err().contains("invalid model syntax")); } + #[test] + fn qwen_invalid_model_hint_mentions_local_ollama_openai_base_url() { + let _guard = ollama_env_lock(); + let _ollama_env = EnvVarGuard::unset("OLLAMA_HOST"); + let _openai_env = EnvVarGuard::unset("OPENAI_BASE_URL"); + let result = validate_model_syntax("qwen3:8b"); + + let error = result.expect_err("Ollama tag without local base URL should fail"); + assert!( + error.contains("Ollama"), + "Qwen Ollama tag error should mention Ollama: {error}" + ); + assert!( + error.contains("OPENAI_BASE_URL"), + "Qwen Ollama tag error should mention OPENAI_BASE_URL: {error}" + ); + assert!( + error.contains("http://127.0.0.1:11434/v1"), + "Qwen Ollama tag error should show local Ollama OpenAI URL: {error}" + ); + } + #[test] fn test_direct_provider_model_passes() { // Direct provider/model strings should remain unchanged and pass