Compare commits

...

1 Commits

Author SHA1 Message Date
Zamil Majdy
554dfac5fc fix(copilot): route baseline Anthropic models directly instead of through OpenRouter
The baseline copilot path was routing all LLM calls through OpenRouter,
including Anthropic models, which caused an unexpected $1.59K bill.

This change adds direct Anthropic API routing for Claude models in the
baseline path:
- Add `anthropic_api_key` config field (reads ANTHROPIC_API_KEY env var)
- Add `_get_anthropic_client()` for direct Anthropic API access
- Add `_get_baseline_client(model)` that routes Anthropic models directly
  when ANTHROPIC_API_KEY is set, falling back to OpenRouter otherwise
- Update default model names to direct Anthropic format (e.g.
  `claude-sonnet-4-20250514` instead of `anthropic/claude-sonnet-4`)
- Non-Anthropic models (title, simulation) continue through OpenRouter

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-10 00:47:01 +00:00
5 changed files with 174 additions and 5 deletions

View File

@@ -53,6 +53,7 @@ from backend.copilot.response_model import (
)
from backend.copilot.service import (
_build_system_prompt,
_get_anthropic_client,
_get_openai_client,
_update_title_async,
config,
@@ -83,6 +84,8 @@ from backend.util.tool_call_loop import (
)
if TYPE_CHECKING:
from langfuse.openai import AsyncOpenAI as LangfuseAsyncOpenAI
from backend.copilot.permissions import CopilotPermissions
logger = logging.getLogger(__name__)
@@ -229,6 +232,23 @@ def _resolve_baseline_model(mode: CopilotMode | None) -> str:
return config.model
def _is_anthropic_model(model: str) -> bool:
"""Return True if *model* should be routed to the Anthropic API directly."""
return model.startswith("claude-") or model.startswith("anthropic/")
def _get_baseline_client(model: str) -> "LangfuseAsyncOpenAI":
"""Return the right OpenAI-compatible client for *model*.
Anthropic models are sent directly to the Anthropic API when an
``ANTHROPIC_API_KEY`` is configured; everything else goes through
OpenRouter.
"""
if _is_anthropic_model(model) and config.anthropic_api_key:
return _get_anthropic_client()
return _get_openai_client()
# Tag pairs to strip from baseline streaming output. Different models use
# different tag names for their internal reasoning (Claude uses <thinking>,
# Gemini uses <internal_reasoning>, etc.).
@@ -359,7 +379,7 @@ async def _baseline_llm_caller(
round_text = ""
response = None # initialized before try so finally block can access it
try:
client = _get_openai_client()
client = _get_baseline_client(state.model)
typed_messages = cast(list[ChatCompletionMessageParam], messages)
if tools:
typed_tools = cast(list[ChatCompletionToolParam], tools)
@@ -729,7 +749,7 @@ async def _compress_session_messages(
result = await compress_context(
messages=messages_dict,
model=model,
client=_get_openai_client(),
client=_get_baseline_client(model),
)
except Exception as e:
logger.warning("[Baseline] Context compression with LLM failed: %s", e)

View File

@@ -828,3 +828,94 @@ class TestBaselineCostExtraction:
# response was never assigned so cost extraction must not raise
assert state.cost_usd is None
class TestGetBaselineClient:
"""Tests for _get_baseline_client routing logic."""
def test_anthropic_model_uses_anthropic_client(self):
from backend.copilot.baseline.service import _get_baseline_client
mock_anthropic = MagicMock()
mock_openai = MagicMock()
with (
patch(
"backend.copilot.baseline.service._get_anthropic_client",
return_value=mock_anthropic,
),
patch(
"backend.copilot.baseline.service._get_openai_client",
return_value=mock_openai,
),
patch(
"backend.copilot.baseline.service.config",
anthropic_api_key="sk-ant-test",
),
):
client = _get_baseline_client("claude-sonnet-4-20250514")
assert client is mock_anthropic
def test_openrouter_model_uses_openai_client(self):
from backend.copilot.baseline.service import _get_baseline_client
mock_anthropic = MagicMock()
mock_openai = MagicMock()
with (
patch(
"backend.copilot.baseline.service._get_anthropic_client",
return_value=mock_anthropic,
),
patch(
"backend.copilot.baseline.service._get_openai_client",
return_value=mock_openai,
),
patch(
"backend.copilot.baseline.service.config",
anthropic_api_key="sk-ant-test",
),
):
client = _get_baseline_client("openai/gpt-4o-mini")
assert client is mock_openai
def test_anthropic_model_without_key_falls_back_to_openrouter(self):
from backend.copilot.baseline.service import _get_baseline_client
mock_anthropic = MagicMock()
mock_openai = MagicMock()
with (
patch(
"backend.copilot.baseline.service._get_anthropic_client",
return_value=mock_anthropic,
),
patch(
"backend.copilot.baseline.service._get_openai_client",
return_value=mock_openai,
),
patch(
"backend.copilot.baseline.service.config",
anthropic_api_key=None,
),
):
client = _get_baseline_client("claude-sonnet-4-20250514")
assert client is mock_openai
class TestIsAnthropicModel:
"""Tests for _is_anthropic_model helper."""
def test_claude_prefix(self):
from backend.copilot.baseline.service import _is_anthropic_model
assert _is_anthropic_model("claude-sonnet-4-20250514") is True
assert _is_anthropic_model("claude-opus-4-20250514") is True
def test_anthropic_slash_prefix(self):
from backend.copilot.baseline.service import _is_anthropic_model
assert _is_anthropic_model("anthropic/claude-sonnet-4") is True
def test_non_anthropic(self):
from backend.copilot.baseline.service import _is_anthropic_model
assert _is_anthropic_model("openai/gpt-4o-mini") is False
assert _is_anthropic_model("google/gemini-2.5-flash") is False

View File

@@ -8,6 +8,8 @@ from pydantic_settings import BaseSettings
from backend.util.clients import OPENROUTER_BASE_URL
ANTHROPIC_BASE_URL = "https://api.anthropic.com/v1"
# Per-request routing mode for a single chat turn.
# - 'fast': route to the baseline OpenAI-compatible path with the cheaper model.
# - 'extended_thinking': route to the Claude Agent SDK path with the default
@@ -22,11 +24,11 @@ class ChatConfig(BaseSettings):
# OpenAI API Configuration
model: str = Field(
default="anthropic/claude-opus-4.6",
default="claude-opus-4-20250514",
description="Default model for extended thinking mode",
)
fast_model: str = Field(
default="anthropic/claude-sonnet-4",
default="claude-sonnet-4-20250514",
description="Model for fast mode (baseline path). Should be faster/cheaper than the default model.",
)
title_model: str = Field(
@@ -38,6 +40,10 @@ class ChatConfig(BaseSettings):
description="Model for dry-run block simulation (should be fast/cheap with good JSON output)",
)
api_key: str | None = Field(default=None, description="OpenAI API key")
anthropic_api_key: str | None = Field(
default=None,
description="Anthropic API key for direct Anthropic API access (baseline path)",
)
base_url: str | None = Field(
default=OPENROUTER_BASE_URL,
description="Base URL for API (e.g., for OpenRouter)",
@@ -279,6 +285,14 @@ class ChatConfig(BaseSettings):
# would pair it with the OpenRouter base_url, causing auth failures.
return v
@field_validator("anthropic_api_key", mode="before")
@classmethod
def get_anthropic_api_key(cls, v):
"""Get Anthropic API key from environment if not provided."""
if not v:
v = os.getenv("ANTHROPIC_API_KEY")
return v
@field_validator("base_url", mode="before")
@classmethod
def get_base_url(cls, v):

View File

@@ -14,6 +14,7 @@ _ENV_VARS_TO_CLEAR = (
"CHAT_API_KEY",
"OPEN_ROUTER_API_KEY",
"OPENAI_API_KEY",
"ANTHROPIC_API_KEY",
"CHAT_BASE_URL",
"OPENROUTER_BASE_URL",
"OPENAI_BASE_URL",
@@ -70,6 +71,38 @@ class TestOpenrouterActive:
assert cfg.openrouter_active is False
class TestAnthropicApiKey:
"""Tests for the anthropic_api_key field and validator."""
def test_reads_from_env(self, monkeypatch: pytest.MonkeyPatch):
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-test")
cfg = ChatConfig()
assert cfg.anthropic_api_key == "sk-ant-test"
def test_none_when_not_set(self):
cfg = ChatConfig()
assert cfg.anthropic_api_key is None
def test_explicit_value_overrides_env(self, monkeypatch: pytest.MonkeyPatch):
monkeypatch.setenv("ANTHROPIC_API_KEY", "from-env")
cfg = ChatConfig(anthropic_api_key="explicit")
assert cfg.anthropic_api_key == "explicit"
class TestDefaultModelNames:
"""Default model names should use direct Anthropic IDs (not OpenRouter format)."""
def test_default_model_is_direct_anthropic(self):
cfg = ChatConfig()
assert "/" not in cfg.model
assert cfg.model.startswith("claude-")
def test_fast_model_is_direct_anthropic(self):
cfg = ChatConfig()
assert "/" not in cfg.fast_model
assert cfg.fast_model.startswith("claude-")
class TestE2BActive:
"""Tests for the e2b_active property — single source of truth for E2B usage."""

View File

@@ -21,7 +21,7 @@ from backend.data.understanding import format_understanding_for_prompt
from backend.util.exceptions import NotAuthorizedError, NotFoundError
from backend.util.settings import AppEnvironment, Settings
from .config import ChatConfig
from .config import ANTHROPIC_BASE_URL, ChatConfig
from .model import (
ChatSessionInfo,
get_chat_session,
@@ -35,6 +35,7 @@ config = ChatConfig()
settings = Settings()
_client: LangfuseAsyncOpenAI | None = None
_anthropic_client: LangfuseAsyncOpenAI | None = None
_langfuse = None
@@ -45,6 +46,16 @@ def _get_openai_client() -> LangfuseAsyncOpenAI:
return _client
def _get_anthropic_client() -> LangfuseAsyncOpenAI:
"""Return an OpenAI-compatible client pointed at the Anthropic API."""
global _anthropic_client
if _anthropic_client is None:
_anthropic_client = LangfuseAsyncOpenAI(
api_key=config.anthropic_api_key, base_url=ANTHROPIC_BASE_URL
)
return _anthropic_client
def _get_langfuse():
global _langfuse
if _langfuse is None: