From 554dfac5fcaa07264a5f218d38ce5b679a9051cb Mon Sep 17 00:00:00 2001 From: Zamil Majdy Date: Fri, 10 Apr 2026 00:47:01 +0000 Subject: [PATCH] fix(copilot): route baseline Anthropic models directly instead of through OpenRouter The baseline copilot path was routing all LLM calls through OpenRouter, including Anthropic models, which caused an unexpected $1.59K bill. This change adds direct Anthropic API routing for Claude models in the baseline path: - Add `anthropic_api_key` config field (reads ANTHROPIC_API_KEY env var) - Add `_get_anthropic_client()` for direct Anthropic API access - Add `_get_baseline_client(model)` that routes Anthropic models directly when ANTHROPIC_API_KEY is set, falling back to OpenRouter otherwise - Update default model names to direct Anthropic format (e.g. `claude-sonnet-4-20250514` instead of `anthropic/claude-sonnet-4`) - Non-Anthropic models (title, simulation) continue through OpenRouter Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/copilot/baseline/service.py | 24 ++++- .../copilot/baseline/service_unit_test.py | 91 +++++++++++++++++++ .../backend/backend/copilot/config.py | 18 +++- .../backend/backend/copilot/config_test.py | 33 +++++++ .../backend/backend/copilot/service.py | 13 ++- 5 files changed, 174 insertions(+), 5 deletions(-) diff --git a/autogpt_platform/backend/backend/copilot/baseline/service.py b/autogpt_platform/backend/backend/copilot/baseline/service.py index a8044d80b7..72f42bc909 100644 --- a/autogpt_platform/backend/backend/copilot/baseline/service.py +++ b/autogpt_platform/backend/backend/copilot/baseline/service.py @@ -53,6 +53,7 @@ from backend.copilot.response_model import ( ) from backend.copilot.service import ( _build_system_prompt, + _get_anthropic_client, _get_openai_client, _update_title_async, config, @@ -83,6 +84,8 @@ from backend.util.tool_call_loop import ( ) if TYPE_CHECKING: + from langfuse.openai import AsyncOpenAI as LangfuseAsyncOpenAI + from backend.copilot.permissions import CopilotPermissions logger = logging.getLogger(__name__) @@ -229,6 +232,23 @@ def _resolve_baseline_model(mode: CopilotMode | None) -> str: return config.model +def _is_anthropic_model(model: str) -> bool: + """Return True if *model* should be routed to the Anthropic API directly.""" + return model.startswith("claude-") or model.startswith("anthropic/") + + +def _get_baseline_client(model: str) -> "LangfuseAsyncOpenAI": + """Return the right OpenAI-compatible client for *model*. + + Anthropic models are sent directly to the Anthropic API when an + ``ANTHROPIC_API_KEY`` is configured; everything else goes through + OpenRouter. + """ + if _is_anthropic_model(model) and config.anthropic_api_key: + return _get_anthropic_client() + return _get_openai_client() + + # Tag pairs to strip from baseline streaming output. Different models use # different tag names for their internal reasoning (Claude uses , # Gemini uses , etc.). @@ -359,7 +379,7 @@ async def _baseline_llm_caller( round_text = "" response = None # initialized before try so finally block can access it try: - client = _get_openai_client() + client = _get_baseline_client(state.model) typed_messages = cast(list[ChatCompletionMessageParam], messages) if tools: typed_tools = cast(list[ChatCompletionToolParam], tools) @@ -729,7 +749,7 @@ async def _compress_session_messages( result = await compress_context( messages=messages_dict, model=model, - client=_get_openai_client(), + client=_get_baseline_client(model), ) except Exception as e: logger.warning("[Baseline] Context compression with LLM failed: %s", e) diff --git a/autogpt_platform/backend/backend/copilot/baseline/service_unit_test.py b/autogpt_platform/backend/backend/copilot/baseline/service_unit_test.py index ba1374b720..b55fa45964 100644 --- a/autogpt_platform/backend/backend/copilot/baseline/service_unit_test.py +++ b/autogpt_platform/backend/backend/copilot/baseline/service_unit_test.py @@ -828,3 +828,94 @@ class TestBaselineCostExtraction: # response was never assigned so cost extraction must not raise assert state.cost_usd is None + + +class TestGetBaselineClient: + """Tests for _get_baseline_client routing logic.""" + + def test_anthropic_model_uses_anthropic_client(self): + from backend.copilot.baseline.service import _get_baseline_client + + mock_anthropic = MagicMock() + mock_openai = MagicMock() + with ( + patch( + "backend.copilot.baseline.service._get_anthropic_client", + return_value=mock_anthropic, + ), + patch( + "backend.copilot.baseline.service._get_openai_client", + return_value=mock_openai, + ), + patch( + "backend.copilot.baseline.service.config", + anthropic_api_key="sk-ant-test", + ), + ): + client = _get_baseline_client("claude-sonnet-4-20250514") + assert client is mock_anthropic + + def test_openrouter_model_uses_openai_client(self): + from backend.copilot.baseline.service import _get_baseline_client + + mock_anthropic = MagicMock() + mock_openai = MagicMock() + with ( + patch( + "backend.copilot.baseline.service._get_anthropic_client", + return_value=mock_anthropic, + ), + patch( + "backend.copilot.baseline.service._get_openai_client", + return_value=mock_openai, + ), + patch( + "backend.copilot.baseline.service.config", + anthropic_api_key="sk-ant-test", + ), + ): + client = _get_baseline_client("openai/gpt-4o-mini") + assert client is mock_openai + + def test_anthropic_model_without_key_falls_back_to_openrouter(self): + from backend.copilot.baseline.service import _get_baseline_client + + mock_anthropic = MagicMock() + mock_openai = MagicMock() + with ( + patch( + "backend.copilot.baseline.service._get_anthropic_client", + return_value=mock_anthropic, + ), + patch( + "backend.copilot.baseline.service._get_openai_client", + return_value=mock_openai, + ), + patch( + "backend.copilot.baseline.service.config", + anthropic_api_key=None, + ), + ): + client = _get_baseline_client("claude-sonnet-4-20250514") + assert client is mock_openai + + +class TestIsAnthropicModel: + """Tests for _is_anthropic_model helper.""" + + def test_claude_prefix(self): + from backend.copilot.baseline.service import _is_anthropic_model + + assert _is_anthropic_model("claude-sonnet-4-20250514") is True + assert _is_anthropic_model("claude-opus-4-20250514") is True + + def test_anthropic_slash_prefix(self): + from backend.copilot.baseline.service import _is_anthropic_model + + assert _is_anthropic_model("anthropic/claude-sonnet-4") is True + + def test_non_anthropic(self): + from backend.copilot.baseline.service import _is_anthropic_model + + assert _is_anthropic_model("openai/gpt-4o-mini") is False + assert _is_anthropic_model("google/gemini-2.5-flash") is False diff --git a/autogpt_platform/backend/backend/copilot/config.py b/autogpt_platform/backend/backend/copilot/config.py index 6da1cae52b..871f5d43d5 100644 --- a/autogpt_platform/backend/backend/copilot/config.py +++ b/autogpt_platform/backend/backend/copilot/config.py @@ -8,6 +8,8 @@ from pydantic_settings import BaseSettings from backend.util.clients import OPENROUTER_BASE_URL +ANTHROPIC_BASE_URL = "https://api.anthropic.com/v1" + # Per-request routing mode for a single chat turn. # - 'fast': route to the baseline OpenAI-compatible path with the cheaper model. # - 'extended_thinking': route to the Claude Agent SDK path with the default @@ -22,11 +24,11 @@ class ChatConfig(BaseSettings): # OpenAI API Configuration model: str = Field( - default="anthropic/claude-opus-4.6", + default="claude-opus-4-20250514", description="Default model for extended thinking mode", ) fast_model: str = Field( - default="anthropic/claude-sonnet-4", + default="claude-sonnet-4-20250514", description="Model for fast mode (baseline path). Should be faster/cheaper than the default model.", ) title_model: str = Field( @@ -38,6 +40,10 @@ class ChatConfig(BaseSettings): description="Model for dry-run block simulation (should be fast/cheap with good JSON output)", ) api_key: str | None = Field(default=None, description="OpenAI API key") + anthropic_api_key: str | None = Field( + default=None, + description="Anthropic API key for direct Anthropic API access (baseline path)", + ) base_url: str | None = Field( default=OPENROUTER_BASE_URL, description="Base URL for API (e.g., for OpenRouter)", @@ -279,6 +285,14 @@ class ChatConfig(BaseSettings): # would pair it with the OpenRouter base_url, causing auth failures. return v + @field_validator("anthropic_api_key", mode="before") + @classmethod + def get_anthropic_api_key(cls, v): + """Get Anthropic API key from environment if not provided.""" + if not v: + v = os.getenv("ANTHROPIC_API_KEY") + return v + @field_validator("base_url", mode="before") @classmethod def get_base_url(cls, v): diff --git a/autogpt_platform/backend/backend/copilot/config_test.py b/autogpt_platform/backend/backend/copilot/config_test.py index d63ce6bae1..10617884c1 100644 --- a/autogpt_platform/backend/backend/copilot/config_test.py +++ b/autogpt_platform/backend/backend/copilot/config_test.py @@ -14,6 +14,7 @@ _ENV_VARS_TO_CLEAR = ( "CHAT_API_KEY", "OPEN_ROUTER_API_KEY", "OPENAI_API_KEY", + "ANTHROPIC_API_KEY", "CHAT_BASE_URL", "OPENROUTER_BASE_URL", "OPENAI_BASE_URL", @@ -70,6 +71,38 @@ class TestOpenrouterActive: assert cfg.openrouter_active is False +class TestAnthropicApiKey: + """Tests for the anthropic_api_key field and validator.""" + + def test_reads_from_env(self, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-test") + cfg = ChatConfig() + assert cfg.anthropic_api_key == "sk-ant-test" + + def test_none_when_not_set(self): + cfg = ChatConfig() + assert cfg.anthropic_api_key is None + + def test_explicit_value_overrides_env(self, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv("ANTHROPIC_API_KEY", "from-env") + cfg = ChatConfig(anthropic_api_key="explicit") + assert cfg.anthropic_api_key == "explicit" + + +class TestDefaultModelNames: + """Default model names should use direct Anthropic IDs (not OpenRouter format).""" + + def test_default_model_is_direct_anthropic(self): + cfg = ChatConfig() + assert "/" not in cfg.model + assert cfg.model.startswith("claude-") + + def test_fast_model_is_direct_anthropic(self): + cfg = ChatConfig() + assert "/" not in cfg.fast_model + assert cfg.fast_model.startswith("claude-") + + class TestE2BActive: """Tests for the e2b_active property — single source of truth for E2B usage.""" diff --git a/autogpt_platform/backend/backend/copilot/service.py b/autogpt_platform/backend/backend/copilot/service.py index fdd6fe24b6..37e3f464d7 100644 --- a/autogpt_platform/backend/backend/copilot/service.py +++ b/autogpt_platform/backend/backend/copilot/service.py @@ -21,7 +21,7 @@ from backend.data.understanding import format_understanding_for_prompt from backend.util.exceptions import NotAuthorizedError, NotFoundError from backend.util.settings import AppEnvironment, Settings -from .config import ChatConfig +from .config import ANTHROPIC_BASE_URL, ChatConfig from .model import ( ChatSessionInfo, get_chat_session, @@ -35,6 +35,7 @@ config = ChatConfig() settings = Settings() _client: LangfuseAsyncOpenAI | None = None +_anthropic_client: LangfuseAsyncOpenAI | None = None _langfuse = None @@ -45,6 +46,16 @@ def _get_openai_client() -> LangfuseAsyncOpenAI: return _client +def _get_anthropic_client() -> LangfuseAsyncOpenAI: + """Return an OpenAI-compatible client pointed at the Anthropic API.""" + global _anthropic_client + if _anthropic_client is None: + _anthropic_client = LangfuseAsyncOpenAI( + api_key=config.anthropic_api_key, base_url=ANTHROPIC_BASE_URL + ) + return _anthropic_client + + def _get_langfuse(): global _langfuse if _langfuse is None: