fix(copilot): Use correct OpenRouter reasoning API format

Addresses review comments from CodeRabbit and Sentry: - Change reasoning format from {"enabled": True} (invalid) to {"max_tokens": config.thinking_budget_tokens} per OpenRouter docs - Add missing thinking_budget_tokens config field (default: 10000) - Extract duplicate code into _apply_thinking_config() helper function - Update description from 'adaptive' to 'extended' thinking for clarity References: - OpenRouter reasoning docs: https://openrouter.ai/docs/reasoning-tokens
2026-02-11 15:25:16 -05:00 · 2026-02-11 13:54:57 +00:00
parent ba6d585170
commit cdeefb8621
2 changed files with 26 additions and 10 deletions
--- a/autogpt_platform/backend/backend/api/features/chat/config.py
+++ b/autogpt_platform/backend/backend/api/features/chat/config.py
@@ -96,7 +96,13 @@ class ChatConfig(BaseSettings):
    # Extended thinking configuration for Claude models
    thinking_enabled: bool = Field(
        default=True,
-        description="Enable adaptive thinking for Claude models via OpenRouter",
+        description="Enable extended thinking for Claude models via OpenRouter",
+    )
+    thinking_budget_tokens: int = Field(
+        default=10000,
+        ge=1000,
+        le=100000,
+        description="Maximum tokens for extended thinking (budget_tokens for Claude)",
    )

    @field_validator("api_key", mode="before")
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -80,6 +80,19 @@ settings = Settings()
 client = openai.AsyncOpenAI(api_key=config.api_key, base_url=config.base_url)


+def _apply_thinking_config(extra_body: dict[str, Any], model: str) -> None:
+    """Apply extended thinking configuration for Anthropic models via OpenRouter.
+
+    OpenRouter's reasoning API expects either:
+    - {"max_tokens": N} for explicit token budget
+    - {"effort": "high"} for automatic budget
+
+    See: https://openrouter.ai/docs/reasoning-tokens
+    """
+    if config.thinking_enabled and "anthropic" in model.lower():
+        extra_body["reasoning"] = {"max_tokens": config.thinking_budget_tokens}
+
+
 langfuse = get_client()

 # Redis key prefix for tracking running long-running operations
@@ -1066,9 +1079,8 @@ async def _stream_chat_chunks(
                        :128
                    ]  # OpenRouter limit

-                # Enable adaptive thinking for Anthropic models via OpenRouter
-                if config.thinking_enabled and "anthropic" in model.lower():
-                    extra_body["reasoning"] = {"enabled": True}
+                # Enable extended thinking for Anthropic models via OpenRouter
+                _apply_thinking_config(extra_body, model)

                api_call_start = time_module.perf_counter()
                stream = await client.chat.completions.create(
@@ -1833,9 +1845,8 @@ async def _generate_llm_continuation(
        if session_id:
            extra_body["session_id"] = session_id[:128]

-        # Enable adaptive thinking for Anthropic models via OpenRouter
-        if config.thinking_enabled and "anthropic" in config.model.lower():
-            extra_body["reasoning"] = {"enabled": True}
+        # Enable extended thinking for Anthropic models via OpenRouter
+        _apply_thinking_config(extra_body, config.model)

        retry_count = 0
        last_error: Exception | None = None
@@ -1967,9 +1978,8 @@ async def _generate_llm_continuation_with_streaming(
        if session_id:
            extra_body["session_id"] = session_id[:128]

-        # Enable adaptive thinking for Anthropic models via OpenRouter
-        if config.thinking_enabled and "anthropic" in config.model.lower():
-            extra_body["reasoning"] = {"enabled": True}
+        # Enable extended thinking for Anthropic models via OpenRouter
+        _apply_thinking_config(extra_body, config.model)

        # Make streaming LLM call (no tools - just text response)
        from typing import cast