feat(copilot): Enable extended thinking for Claude models

Adds configuration to enable Anthropic's extended thinking feature via OpenRouter. This keeps the model's chain-of-thought reasoning internal rather than outputting it to users. Configuration: - thinking_enabled: bool (default: True) - thinking_budget_tokens: int (default: 10000) The thinking config is only applied to Anthropic models (detected via model name containing 'anthropic'). Fixes the issue where the CoPilot prompt expects thinking mode but it wasn't enabled on the API side, causing internal reasoning to leak into user-facing responses.
2026-02-11 15:25:16 -05:00 · 2026-02-10 13:58:57 +00:00
parent f2ead70f3d
commit 75f8772f8a
2 changed files with 45 additions and 0 deletions
--- a/autogpt_platform/backend/backend/api/features/chat/config.py
+++ b/autogpt_platform/backend/backend/api/features/chat/config.py
@@ -93,6 +93,18 @@ class ChatConfig(BaseSettings):
        description="Name of the prompt in Langfuse to fetch",
    )

+    # Extended thinking configuration for Claude models
+    thinking_enabled: bool = Field(
+        default=True,
+        description="Enable extended thinking for Claude models",
+    )
+    thinking_budget_tokens: int = Field(
+        default=10000,
+        ge=1000,
+        le=100000,
+        description="Token budget for extended thinking (1000-100000)",
+    )
+
    @field_validator("api_key", mode="before")
    @classmethod
    def get_api_key(cls, v):
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -1066,6 +1066,17 @@ async def _stream_chat_chunks(
                        :128
                    ]  # OpenRouter limit

+                # Enable extended thinking for Anthropic models
+                if config.thinking_enabled and "anthropic" in model.lower():
+                    extra_body["provider"] = {
+                        "anthropic": {
+                            "thinking": {
+                                "type": "enabled",
+                                "budget_tokens": config.thinking_budget_tokens,
+                            }
+                        }
+                    }
+
                api_call_start = time_module.perf_counter()
                stream = await client.chat.completions.create(
                    model=model,
@@ -1829,6 +1840,17 @@ async def _generate_llm_continuation(
        if session_id:
            extra_body["session_id"] = session_id[:128]

+        # Enable extended thinking for Anthropic models
+        if config.thinking_enabled and "anthropic" in config.model.lower():
+            extra_body["provider"] = {
+                "anthropic": {
+                    "thinking": {
+                        "type": "enabled",
+                        "budget_tokens": config.thinking_budget_tokens,
+                    }
+                }
+            }
+
        retry_count = 0
        last_error: Exception | None = None
        response = None
@@ -1959,6 +1981,17 @@ async def _generate_llm_continuation_with_streaming(
        if session_id:
            extra_body["session_id"] = session_id[:128]

+        # Enable extended thinking for Anthropic models
+        if config.thinking_enabled and "anthropic" in config.model.lower():
+            extra_body["provider"] = {
+                "anthropic": {
+                    "thinking": {
+                        "type": "enabled",
+                        "budget_tokens": config.thinking_budget_tokens,
+                    }
+                }
+            }
+
        # Make streaming LLM call (no tools - just text response)
        from typing import cast