feat(copilot): Enable extended thinking for Claude models

Adds configuration to enable Anthropic's extended thinking feature via
OpenRouter. This keeps the model's chain-of-thought reasoning internal
rather than outputting it to users.

Configuration:
- thinking_enabled: bool (default: True)
- thinking_budget_tokens: int (default: 10000)

The thinking config is only applied to Anthropic models (detected via
model name containing 'anthropic').

Fixes the issue where the CoPilot prompt expects thinking mode but it
wasn't enabled on the API side, causing internal reasoning to leak
into user-facing responses.
This commit is contained in:
Otto
2026-02-10 13:58:57 +00:00
parent f2ead70f3d
commit 75f8772f8a
2 changed files with 45 additions and 0 deletions

View File

@@ -93,6 +93,18 @@ class ChatConfig(BaseSettings):
description="Name of the prompt in Langfuse to fetch",
)
# Extended thinking configuration for Claude models
thinking_enabled: bool = Field(
default=True,
description="Enable extended thinking for Claude models",
)
thinking_budget_tokens: int = Field(
default=10000,
ge=1000,
le=100000,
description="Token budget for extended thinking (1000-100000)",
)
@field_validator("api_key", mode="before")
@classmethod
def get_api_key(cls, v):

View File

@@ -1066,6 +1066,17 @@ async def _stream_chat_chunks(
:128
] # OpenRouter limit
# Enable extended thinking for Anthropic models
if config.thinking_enabled and "anthropic" in model.lower():
extra_body["provider"] = {
"anthropic": {
"thinking": {
"type": "enabled",
"budget_tokens": config.thinking_budget_tokens,
}
}
}
api_call_start = time_module.perf_counter()
stream = await client.chat.completions.create(
model=model,
@@ -1829,6 +1840,17 @@ async def _generate_llm_continuation(
if session_id:
extra_body["session_id"] = session_id[:128]
# Enable extended thinking for Anthropic models
if config.thinking_enabled and "anthropic" in config.model.lower():
extra_body["provider"] = {
"anthropic": {
"thinking": {
"type": "enabled",
"budget_tokens": config.thinking_budget_tokens,
}
}
}
retry_count = 0
last_error: Exception | None = None
response = None
@@ -1959,6 +1981,17 @@ async def _generate_llm_continuation_with_streaming(
if session_id:
extra_body["session_id"] = session_id[:128]
# Enable extended thinking for Anthropic models
if config.thinking_enabled and "anthropic" in config.model.lower():
extra_body["provider"] = {
"anthropic": {
"thinking": {
"type": "enabled",
"budget_tokens": config.thinking_budget_tokens,
}
}
}
# Make streaming LLM call (no tools - just text response)
from typing import cast