mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-02-11 15:25:16 -05:00
fix(copilot): Use correct OpenRouter reasoning API format
Addresses review comments from CodeRabbit and Sentry:
- Change reasoning format from {"enabled": True} (invalid) to
{"max_tokens": config.thinking_budget_tokens} per OpenRouter docs
- Add missing thinking_budget_tokens config field (default: 10000)
- Extract duplicate code into _apply_thinking_config() helper function
- Update description from 'adaptive' to 'extended' thinking for clarity
References:
- OpenRouter reasoning docs: https://openrouter.ai/docs/reasoning-tokens
This commit is contained in:
@@ -96,7 +96,13 @@ class ChatConfig(BaseSettings):
|
||||
# Extended thinking configuration for Claude models
|
||||
thinking_enabled: bool = Field(
|
||||
default=True,
|
||||
description="Enable adaptive thinking for Claude models via OpenRouter",
|
||||
description="Enable extended thinking for Claude models via OpenRouter",
|
||||
)
|
||||
thinking_budget_tokens: int = Field(
|
||||
default=10000,
|
||||
ge=1000,
|
||||
le=100000,
|
||||
description="Maximum tokens for extended thinking (budget_tokens for Claude)",
|
||||
)
|
||||
|
||||
@field_validator("api_key", mode="before")
|
||||
|
||||
@@ -80,6 +80,19 @@ settings = Settings()
|
||||
client = openai.AsyncOpenAI(api_key=config.api_key, base_url=config.base_url)
|
||||
|
||||
|
||||
def _apply_thinking_config(extra_body: dict[str, Any], model: str) -> None:
|
||||
"""Apply extended thinking configuration for Anthropic models via OpenRouter.
|
||||
|
||||
OpenRouter's reasoning API expects either:
|
||||
- {"max_tokens": N} for explicit token budget
|
||||
- {"effort": "high"} for automatic budget
|
||||
|
||||
See: https://openrouter.ai/docs/reasoning-tokens
|
||||
"""
|
||||
if config.thinking_enabled and "anthropic" in model.lower():
|
||||
extra_body["reasoning"] = {"max_tokens": config.thinking_budget_tokens}
|
||||
|
||||
|
||||
langfuse = get_client()
|
||||
|
||||
# Redis key prefix for tracking running long-running operations
|
||||
@@ -1066,9 +1079,8 @@ async def _stream_chat_chunks(
|
||||
:128
|
||||
] # OpenRouter limit
|
||||
|
||||
# Enable adaptive thinking for Anthropic models via OpenRouter
|
||||
if config.thinking_enabled and "anthropic" in model.lower():
|
||||
extra_body["reasoning"] = {"enabled": True}
|
||||
# Enable extended thinking for Anthropic models via OpenRouter
|
||||
_apply_thinking_config(extra_body, model)
|
||||
|
||||
api_call_start = time_module.perf_counter()
|
||||
stream = await client.chat.completions.create(
|
||||
@@ -1833,9 +1845,8 @@ async def _generate_llm_continuation(
|
||||
if session_id:
|
||||
extra_body["session_id"] = session_id[:128]
|
||||
|
||||
# Enable adaptive thinking for Anthropic models via OpenRouter
|
||||
if config.thinking_enabled and "anthropic" in config.model.lower():
|
||||
extra_body["reasoning"] = {"enabled": True}
|
||||
# Enable extended thinking for Anthropic models via OpenRouter
|
||||
_apply_thinking_config(extra_body, config.model)
|
||||
|
||||
retry_count = 0
|
||||
last_error: Exception | None = None
|
||||
@@ -1967,9 +1978,8 @@ async def _generate_llm_continuation_with_streaming(
|
||||
if session_id:
|
||||
extra_body["session_id"] = session_id[:128]
|
||||
|
||||
# Enable adaptive thinking for Anthropic models via OpenRouter
|
||||
if config.thinking_enabled and "anthropic" in config.model.lower():
|
||||
extra_body["reasoning"] = {"enabled": True}
|
||||
# Enable extended thinking for Anthropic models via OpenRouter
|
||||
_apply_thinking_config(extra_body, config.model)
|
||||
|
||||
# Make streaming LLM call (no tools - just text response)
|
||||
from typing import cast
|
||||
|
||||
Reference in New Issue
Block a user