fix(copilot): Use correct OpenRouter reasoning API format

Addresses review comments from CodeRabbit and Sentry:

- Change reasoning format from {"enabled": True} (invalid) to
  {"max_tokens": config.thinking_budget_tokens} per OpenRouter docs
- Add missing thinking_budget_tokens config field (default: 10000)
- Extract duplicate code into _apply_thinking_config() helper function
- Update description from 'adaptive' to 'extended' thinking for clarity

References:
- OpenRouter reasoning docs: https://openrouter.ai/docs/reasoning-tokens
This commit is contained in:
Otto-AGPT
2026-02-11 13:54:57 +00:00
parent ba6d585170
commit cdeefb8621
2 changed files with 26 additions and 10 deletions

View File

@@ -96,7 +96,13 @@ class ChatConfig(BaseSettings):
# Extended thinking configuration for Claude models
thinking_enabled: bool = Field(
default=True,
description="Enable adaptive thinking for Claude models via OpenRouter",
description="Enable extended thinking for Claude models via OpenRouter",
)
thinking_budget_tokens: int = Field(
default=10000,
ge=1000,
le=100000,
description="Maximum tokens for extended thinking (budget_tokens for Claude)",
)
@field_validator("api_key", mode="before")

View File

@@ -80,6 +80,19 @@ settings = Settings()
client = openai.AsyncOpenAI(api_key=config.api_key, base_url=config.base_url)
def _apply_thinking_config(extra_body: dict[str, Any], model: str) -> None:
"""Apply extended thinking configuration for Anthropic models via OpenRouter.
OpenRouter's reasoning API expects either:
- {"max_tokens": N} for explicit token budget
- {"effort": "high"} for automatic budget
See: https://openrouter.ai/docs/reasoning-tokens
"""
if config.thinking_enabled and "anthropic" in model.lower():
extra_body["reasoning"] = {"max_tokens": config.thinking_budget_tokens}
langfuse = get_client()
# Redis key prefix for tracking running long-running operations
@@ -1066,9 +1079,8 @@ async def _stream_chat_chunks(
:128
] # OpenRouter limit
# Enable adaptive thinking for Anthropic models via OpenRouter
if config.thinking_enabled and "anthropic" in model.lower():
extra_body["reasoning"] = {"enabled": True}
# Enable extended thinking for Anthropic models via OpenRouter
_apply_thinking_config(extra_body, model)
api_call_start = time_module.perf_counter()
stream = await client.chat.completions.create(
@@ -1833,9 +1845,8 @@ async def _generate_llm_continuation(
if session_id:
extra_body["session_id"] = session_id[:128]
# Enable adaptive thinking for Anthropic models via OpenRouter
if config.thinking_enabled and "anthropic" in config.model.lower():
extra_body["reasoning"] = {"enabled": True}
# Enable extended thinking for Anthropic models via OpenRouter
_apply_thinking_config(extra_body, config.model)
retry_count = 0
last_error: Exception | None = None
@@ -1967,9 +1978,8 @@ async def _generate_llm_continuation_with_streaming(
if session_id:
extra_body["session_id"] = session_id[:128]
# Enable adaptive thinking for Anthropic models via OpenRouter
if config.thinking_enabled and "anthropic" in config.model.lower():
extra_body["reasoning"] = {"enabled": True}
# Enable extended thinking for Anthropic models via OpenRouter
_apply_thinking_config(extra_body, config.model)
# Make streaming LLM call (no tools - just text response)
from typing import cast