Compare commits

..

4 Commits

Author SHA1 Message Date
Otto-AGPT
cdeefb8621 fix(copilot): Use correct OpenRouter reasoning API format
Addresses review comments from CodeRabbit and Sentry:

- Change reasoning format from {"enabled": True} (invalid) to
  {"max_tokens": config.thinking_budget_tokens} per OpenRouter docs
- Add missing thinking_budget_tokens config field (default: 10000)
- Extract duplicate code into _apply_thinking_config() helper function
- Update description from 'adaptive' to 'extended' thinking for clarity

References:
- OpenRouter reasoning docs: https://openrouter.ai/docs/reasoning-tokens
2026-02-11 13:54:57 +00:00
Swifty
ba6d585170 update settings 2026-02-10 16:08:21 +01:00
Swifty
90eac56525 Merge branch 'dev' into fix/enable-extended-thinking 2026-02-10 15:26:40 +01:00
Otto
75f8772f8a feat(copilot): Enable extended thinking for Claude models
Adds configuration to enable Anthropic's extended thinking feature via
OpenRouter. This keeps the model's chain-of-thought reasoning internal
rather than outputting it to users.

Configuration:
- thinking_enabled: bool (default: True)
- thinking_budget_tokens: int (default: 10000)

The thinking config is only applied to Anthropic models (detected via
model name containing 'anthropic').

Fixes the issue where the CoPilot prompt expects thinking mode but it
wasn't enabled on the API side, causing internal reasoning to leak
into user-facing responses.
2026-02-10 13:58:57 +00:00
8 changed files with 42 additions and 63 deletions

View File

@@ -93,6 +93,18 @@ class ChatConfig(BaseSettings):
description="Name of the prompt in Langfuse to fetch",
)
# Extended thinking configuration for Claude models
thinking_enabled: bool = Field(
default=True,
description="Enable extended thinking for Claude models via OpenRouter",
)
thinking_budget_tokens: int = Field(
default=10000,
ge=1000,
le=100000,
description="Maximum tokens for extended thinking (budget_tokens for Claude)",
)
@field_validator("api_key", mode="before")
@classmethod
def get_api_key(cls, v):

View File

@@ -80,6 +80,19 @@ settings = Settings()
client = openai.AsyncOpenAI(api_key=config.api_key, base_url=config.base_url)
def _apply_thinking_config(extra_body: dict[str, Any], model: str) -> None:
"""Apply extended thinking configuration for Anthropic models via OpenRouter.
OpenRouter's reasoning API expects either:
- {"max_tokens": N} for explicit token budget
- {"effort": "high"} for automatic budget
See: https://openrouter.ai/docs/reasoning-tokens
"""
if config.thinking_enabled and "anthropic" in model.lower():
extra_body["reasoning"] = {"max_tokens": config.thinking_budget_tokens}
langfuse = get_client()
# Redis key prefix for tracking running long-running operations
@@ -1066,6 +1079,9 @@ async def _stream_chat_chunks(
:128
] # OpenRouter limit
# Enable extended thinking for Anthropic models via OpenRouter
_apply_thinking_config(extra_body, model)
api_call_start = time_module.perf_counter()
stream = await client.chat.completions.create(
model=model,
@@ -1829,6 +1845,9 @@ async def _generate_llm_continuation(
if session_id:
extra_body["session_id"] = session_id[:128]
# Enable extended thinking for Anthropic models via OpenRouter
_apply_thinking_config(extra_body, config.model)
retry_count = 0
last_error: Exception | None = None
response = None
@@ -1959,6 +1978,9 @@ async def _generate_llm_continuation_with_streaming(
if session_id:
extra_body["session_id"] = session_id[:128]
# Enable extended thinking for Anthropic models via OpenRouter
_apply_thinking_config(extra_body, config.model)
# Make streaming LLM call (no tools - just text response)
from typing import cast

View File

@@ -1,11 +1,11 @@
"use client";
import { LoadingSpinner } from "@/components/atoms/LoadingSpinner/LoadingSpinner";
import { SidebarProvider } from "@/components/ui/sidebar";
import { ChatContainer } from "./components/ChatContainer/ChatContainer";
import { ChatSidebar } from "./components/ChatSidebar/ChatSidebar";
import { MobileDrawer } from "./components/MobileDrawer/MobileDrawer";
import { MobileHeader } from "./components/MobileHeader/MobileHeader";
import { ScaleLoader } from "./components/ScaleLoader/ScaleLoader";
import { useCopilotPage } from "./useCopilotPage";
export function CopilotPage() {
@@ -34,11 +34,7 @@ export function CopilotPage() {
} = useCopilotPage();
if (isUserLoading || !isLoggedIn) {
return (
<div className="fixed inset-0 z-50 flex items-center justify-center bg-[#f8f8f9]">
<ScaleLoader className="text-neutral-400" />
</div>
);
return <LoadingSpinner size="large" cover />;
}
return (

View File

@@ -143,10 +143,10 @@ export const ChatMessagesContainer = ({
return (
<Conversation className="min-h-0 flex-1">
<ConversationContent className="flex min-h-screen flex-1 flex-col gap-6 px-3 py-6">
<ConversationContent className="gap-6 px-3 py-6">
{isLoading && messages.length === 0 && (
<div className="flex min-h-full flex-1 items-center justify-center">
<LoadingSpinner className="text-neutral-600" />
<div className="flex flex-1 items-center justify-center">
<LoadingSpinner size="large" className="text-neutral-400" />
</div>
)}
{messages.map((message, messageIndex) => {

View File

@@ -121,8 +121,8 @@ export function ChatSidebar() {
className="mt-4 flex flex-col gap-1"
>
{isLoadingSessions ? (
<div className="flex min-h-[30rem] items-center justify-center py-4">
<LoadingSpinner size="small" className="text-neutral-600" />
<div className="flex items-center justify-center py-4">
<LoadingSpinner size="small" className="text-neutral-400" />
</div>
) : sessions.length === 0 ? (
<p className="py-4 text-center text-sm text-neutral-500">

View File

@@ -1,35 +0,0 @@
.loader {
width: 48px;
height: 48px;
display: inline-block;
position: relative;
}
.loader::after,
.loader::before {
content: "";
box-sizing: border-box;
width: 100%;
height: 100%;
border-radius: 50%;
background: currentColor;
position: absolute;
left: 0;
top: 0;
animation: animloader 2s linear infinite;
}
.loader::after {
animation-delay: 1s;
}
@keyframes animloader {
0% {
transform: scale(0);
opacity: 1;
}
100% {
transform: scale(1);
opacity: 0;
}
}

View File

@@ -1,16 +0,0 @@
import { cn } from "@/lib/utils";
import styles from "./ScaleLoader.module.css";
interface Props {
size?: number;
className?: string;
}
export function ScaleLoader({ size = 48, className }: Props) {
return (
<div
className={cn(styles.loader, className)}
style={{ width: size, height: size }}
/>
);
}

View File

@@ -155,7 +155,7 @@ export function CreateAgentTool({ part }: Props) {
>
{isOperating && (
<ContentGrid>
<ProgressBar value={progress} />
<ProgressBar value={progress} className="max-w-[280px]" />
<ContentHint>
This could take a few minutes, grab a coffee
</ContentHint>