From 017a00af46667360a77b62d23d5fb042703bfa5c Mon Sep 17 00:00:00 2001
From: Otto <otto@agpt.co>
Date: Tue, 10 Feb 2026 15:18:05 +0000
Subject: [PATCH] feat(copilot): Enable extended thinking for Claude models
 (#12052)

## Summary

Enables Anthropic's extended thinking feature for Claude models in
CoPilot via OpenRouter. This keeps the model's chain-of-thought
reasoning internal rather than outputting it to users.

## Problem

The CoPilot prompt was designed for a thinking agent (with
`<internal_reasoning>` tags), but extended thinking wasn't enabled on
the API side. This caused the model to output its reasoning as regular
text, leaking internal analysis to users.

## Solution

Added thinking configuration to the OpenRouter `extra_body` for
Anthropic models:
```python
extra_body["provider"] = {
    "anthropic": {
        "thinking": {
            "type": "enabled",
            "budget_tokens": config.thinking_budget_tokens,
        }
    }
}
```

## Configuration

New settings in `ChatConfig`:
| Setting | Default | Description |
|---------|---------|-------------|
| `thinking_enabled` | `True` | Enable extended thinking for Claude
models |
| `thinking_budget_tokens` | `10000` | Token budget for thinking
(1000-100000) |

## Changes

- `config.py`: Added `thinking_enabled` and `thinking_budget_tokens`
settings
- `service.py`: Added thinking config to all 3 places where `extra_body`
is built for LLM calls

## Testing

- Verify CoPilot responses no longer include internal reasoning text
- Check that Claude's extended thinking is working (should see thinking
tokens in usage)
- Confirm non-Anthropic models are unaffected

## Related

Discussion:
https://discord.com/channels/1126875755960336515/1126875756925046928/1470779843552612607

---------

Co-authored-by: Swifty <craigswift13@gmail.com>
---
 .../backend/backend/api/features/chat/config.py      |  6 ++++++
 .../backend/backend/api/features/chat/service.py     | 12 ++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/autogpt_platform/backend/backend/api/features/chat/config.py b/autogpt_platform/backend/backend/api/features/chat/config.py
index 0b37e42df8..808692f97f 100644
--- a/autogpt_platform/backend/backend/api/features/chat/config.py
+++ b/autogpt_platform/backend/backend/api/features/chat/config.py
@@ -93,6 +93,12 @@ class ChatConfig(BaseSettings):
         description="Name of the prompt in Langfuse to fetch",
     )
 
+    # Extended thinking configuration for Claude models
+    thinking_enabled: bool = Field(
+        default=True,
+        description="Enable adaptive thinking for Claude models via OpenRouter",
+    )
+
     @field_validator("api_key", mode="before")
     @classmethod
     def get_api_key(cls, v):
diff --git a/autogpt_platform/backend/backend/api/features/chat/service.py b/autogpt_platform/backend/backend/api/features/chat/service.py
index 49e70265fa..072ea88fd5 100644
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -1066,6 +1066,10 @@ async def _stream_chat_chunks(
                         :128
                     ]  # OpenRouter limit
 
+                # Enable adaptive thinking for Anthropic models via OpenRouter
+                if config.thinking_enabled and "anthropic" in model.lower():
+                    extra_body["reasoning"] = {"enabled": True}
+
                 api_call_start = time_module.perf_counter()
                 stream = await client.chat.completions.create(
                     model=model,
@@ -1829,6 +1833,10 @@ async def _generate_llm_continuation(
         if session_id:
             extra_body["session_id"] = session_id[:128]
 
+        # Enable adaptive thinking for Anthropic models via OpenRouter
+        if config.thinking_enabled and "anthropic" in config.model.lower():
+            extra_body["reasoning"] = {"enabled": True}
+
         retry_count = 0
         last_error: Exception | None = None
         response = None
@@ -1959,6 +1967,10 @@ async def _generate_llm_continuation_with_streaming(
         if session_id:
             extra_body["session_id"] = session_id[:128]
 
+        # Enable adaptive thinking for Anthropic models via OpenRouter
+        if config.thinking_enabled and "anthropic" in config.model.lower():
+            extra_body["reasoning"] = {"enabled": True}
+
         # Make streaming LLM call (no tools - just text response)
         from typing import cast