From 75f8772f8ae7c2e0ecd1f9074fe3459ee112916e Mon Sep 17 00:00:00 2001 From: Otto Date: Tue, 10 Feb 2026 13:58:57 +0000 Subject: [PATCH] feat(copilot): Enable extended thinking for Claude models Adds configuration to enable Anthropic's extended thinking feature via OpenRouter. This keeps the model's chain-of-thought reasoning internal rather than outputting it to users. Configuration: - thinking_enabled: bool (default: True) - thinking_budget_tokens: int (default: 10000) The thinking config is only applied to Anthropic models (detected via model name containing 'anthropic'). Fixes the issue where the CoPilot prompt expects thinking mode but it wasn't enabled on the API side, causing internal reasoning to leak into user-facing responses. --- .../backend/api/features/chat/config.py | 12 +++++++ .../backend/api/features/chat/service.py | 33 +++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/autogpt_platform/backend/backend/api/features/chat/config.py b/autogpt_platform/backend/backend/api/features/chat/config.py index 0b37e42df8..a9b901a244 100644 --- a/autogpt_platform/backend/backend/api/features/chat/config.py +++ b/autogpt_platform/backend/backend/api/features/chat/config.py @@ -93,6 +93,18 @@ class ChatConfig(BaseSettings): description="Name of the prompt in Langfuse to fetch", ) + # Extended thinking configuration for Claude models + thinking_enabled: bool = Field( + default=True, + description="Enable extended thinking for Claude models", + ) + thinking_budget_tokens: int = Field( + default=10000, + ge=1000, + le=100000, + description="Token budget for extended thinking (1000-100000)", + ) + @field_validator("api_key", mode="before") @classmethod def get_api_key(cls, v): diff --git a/autogpt_platform/backend/backend/api/features/chat/service.py b/autogpt_platform/backend/backend/api/features/chat/service.py index 49e70265fa..ee36bd73df 100644 --- a/autogpt_platform/backend/backend/api/features/chat/service.py +++ b/autogpt_platform/backend/backend/api/features/chat/service.py @@ -1066,6 +1066,17 @@ async def _stream_chat_chunks( :128 ] # OpenRouter limit + # Enable extended thinking for Anthropic models + if config.thinking_enabled and "anthropic" in model.lower(): + extra_body["provider"] = { + "anthropic": { + "thinking": { + "type": "enabled", + "budget_tokens": config.thinking_budget_tokens, + } + } + } + api_call_start = time_module.perf_counter() stream = await client.chat.completions.create( model=model, @@ -1829,6 +1840,17 @@ async def _generate_llm_continuation( if session_id: extra_body["session_id"] = session_id[:128] + # Enable extended thinking for Anthropic models + if config.thinking_enabled and "anthropic" in config.model.lower(): + extra_body["provider"] = { + "anthropic": { + "thinking": { + "type": "enabled", + "budget_tokens": config.thinking_budget_tokens, + } + } + } + retry_count = 0 last_error: Exception | None = None response = None @@ -1959,6 +1981,17 @@ async def _generate_llm_continuation_with_streaming( if session_id: extra_body["session_id"] = session_id[:128] + # Enable extended thinking for Anthropic models + if config.thinking_enabled and "anthropic" in config.model.lower(): + extra_body["provider"] = { + "anthropic": { + "thinking": { + "type": "enabled", + "budget_tokens": config.thinking_budget_tokens, + } + } + } + # Make streaming LLM call (no tools - just text response) from typing import cast