diff --git a/autogpt_platform/backend/backend/api/features/chat/config.py b/autogpt_platform/backend/backend/api/features/chat/config.py
index 808692f97f..f32c844edd 100644
--- a/autogpt_platform/backend/backend/api/features/chat/config.py
+++ b/autogpt_platform/backend/backend/api/features/chat/config.py
@@ -96,7 +96,13 @@ class ChatConfig(BaseSettings):
     # Extended thinking configuration for Claude models
     thinking_enabled: bool = Field(
         default=True,
-        description="Enable adaptive thinking for Claude models via OpenRouter",
+        description="Enable extended thinking for Claude models via OpenRouter",
+    )
+    thinking_budget_tokens: int = Field(
+        default=10000,
+        ge=1000,
+        le=100000,
+        description="Maximum tokens for extended thinking (budget_tokens for Claude)",
     )
 
     @field_validator("api_key", mode="before")
diff --git a/autogpt_platform/backend/backend/api/features/chat/service.py b/autogpt_platform/backend/backend/api/features/chat/service.py
index 072ea88fd5..37104633a6 100644
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -80,6 +80,19 @@ settings = Settings()
 client = openai.AsyncOpenAI(api_key=config.api_key, base_url=config.base_url)
 
 
+def _apply_thinking_config(extra_body: dict[str, Any], model: str) -> None:
+    """Apply extended thinking configuration for Anthropic models via OpenRouter.
+
+    OpenRouter's reasoning API expects either:
+    - {"max_tokens": N} for explicit token budget
+    - {"effort": "high"} for automatic budget
+
+    See: https://openrouter.ai/docs/reasoning-tokens
+    """
+    if config.thinking_enabled and "anthropic" in model.lower():
+        extra_body["reasoning"] = {"max_tokens": config.thinking_budget_tokens}
+
+
 langfuse = get_client()
 
 # Redis key prefix for tracking running long-running operations
@@ -1066,9 +1079,8 @@ async def _stream_chat_chunks(
                         :128
                     ]  # OpenRouter limit
 
-                # Enable adaptive thinking for Anthropic models via OpenRouter
-                if config.thinking_enabled and "anthropic" in model.lower():
-                    extra_body["reasoning"] = {"enabled": True}
+                # Enable extended thinking for Anthropic models via OpenRouter
+                _apply_thinking_config(extra_body, model)
 
                 api_call_start = time_module.perf_counter()
                 stream = await client.chat.completions.create(
@@ -1833,9 +1845,8 @@ async def _generate_llm_continuation(
         if session_id:
             extra_body["session_id"] = session_id[:128]
 
-        # Enable adaptive thinking for Anthropic models via OpenRouter
-        if config.thinking_enabled and "anthropic" in config.model.lower():
-            extra_body["reasoning"] = {"enabled": True}
+        # Enable extended thinking for Anthropic models via OpenRouter
+        _apply_thinking_config(extra_body, config.model)
 
         retry_count = 0
         last_error: Exception | None = None
@@ -1967,9 +1978,8 @@ async def _generate_llm_continuation_with_streaming(
         if session_id:
             extra_body["session_id"] = session_id[:128]
 
-        # Enable adaptive thinking for Anthropic models via OpenRouter
-        if config.thinking_enabled and "anthropic" in config.model.lower():
-            extra_body["reasoning"] = {"enabled": True}
+        # Enable extended thinking for Anthropic models via OpenRouter
+        _apply_thinking_config(extra_body, config.model)
 
         # Make streaming LLM call (no tools - just text response)
         from typing import cast