From 75f8772f8ae7c2e0ecd1f9074fe3459ee112916e Mon Sep 17 00:00:00 2001
From: Otto <otto@agpt.co>
Date: Tue, 10 Feb 2026 13:58:57 +0000
Subject: [PATCH] feat(copilot): Enable extended thinking for Claude models

Adds configuration to enable Anthropic's extended thinking feature via
OpenRouter. This keeps the model's chain-of-thought reasoning internal
rather than outputting it to users.

Configuration:
- thinking_enabled: bool (default: True)
- thinking_budget_tokens: int (default: 10000)

The thinking config is only applied to Anthropic models (detected via
model name containing 'anthropic').

Fixes the issue where the CoPilot prompt expects thinking mode but it
wasn't enabled on the API side, causing internal reasoning to leak
into user-facing responses.
---
 .../backend/api/features/chat/config.py       | 12 +++++++
 .../backend/api/features/chat/service.py      | 33 +++++++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/autogpt_platform/backend/backend/api/features/chat/config.py b/autogpt_platform/backend/backend/api/features/chat/config.py
index 0b37e42df8..a9b901a244 100644
--- a/autogpt_platform/backend/backend/api/features/chat/config.py
+++ b/autogpt_platform/backend/backend/api/features/chat/config.py
@@ -93,6 +93,18 @@ class ChatConfig(BaseSettings):
         description="Name of the prompt in Langfuse to fetch",
     )
 
+    # Extended thinking configuration for Claude models
+    thinking_enabled: bool = Field(
+        default=True,
+        description="Enable extended thinking for Claude models",
+    )
+    thinking_budget_tokens: int = Field(
+        default=10000,
+        ge=1000,
+        le=100000,
+        description="Token budget for extended thinking (1000-100000)",
+    )
+
     @field_validator("api_key", mode="before")
     @classmethod
     def get_api_key(cls, v):
diff --git a/autogpt_platform/backend/backend/api/features/chat/service.py b/autogpt_platform/backend/backend/api/features/chat/service.py
index 49e70265fa..ee36bd73df 100644
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -1066,6 +1066,17 @@ async def _stream_chat_chunks(
                         :128
                     ]  # OpenRouter limit
 
+                # Enable extended thinking for Anthropic models
+                if config.thinking_enabled and "anthropic" in model.lower():
+                    extra_body["provider"] = {
+                        "anthropic": {
+                            "thinking": {
+                                "type": "enabled",
+                                "budget_tokens": config.thinking_budget_tokens,
+                            }
+                        }
+                    }
+
                 api_call_start = time_module.perf_counter()
                 stream = await client.chat.completions.create(
                     model=model,
@@ -1829,6 +1840,17 @@ async def _generate_llm_continuation(
         if session_id:
             extra_body["session_id"] = session_id[:128]
 
+        # Enable extended thinking for Anthropic models
+        if config.thinking_enabled and "anthropic" in config.model.lower():
+            extra_body["provider"] = {
+                "anthropic": {
+                    "thinking": {
+                        "type": "enabled",
+                        "budget_tokens": config.thinking_budget_tokens,
+                    }
+                }
+            }
+
         retry_count = 0
         last_error: Exception | None = None
         response = None
@@ -1959,6 +1981,17 @@ async def _generate_llm_continuation_with_streaming(
         if session_id:
             extra_body["session_id"] = session_id[:128]
 
+        # Enable extended thinking for Anthropic models
+        if config.thinking_enabled and "anthropic" in config.model.lower():
+            extra_body["provider"] = {
+                "anthropic": {
+                    "thinking": {
+                        "type": "enabled",
+                        "budget_tokens": config.thinking_budget_tokens,
+                    }
+                }
+            }
+
         # Make streaming LLM call (no tools - just text response)
         from typing import cast