diff --git a/autogpt_platform/backend/backend/copilot/config.py b/autogpt_platform/backend/backend/copilot/config.py index 0206d7930b..a26b105347 100644 --- a/autogpt_platform/backend/backend/copilot/config.py +++ b/autogpt_platform/backend/backend/copilot/config.py @@ -3,7 +3,7 @@ import os from typing import Literal -from pydantic import Field, field_validator, model_validator +from pydantic import Field, field_validator from pydantic_settings import BaseSettings from backend.util.clients import OPENROUTER_BASE_URL @@ -186,28 +186,6 @@ class ChatConfig(BaseSettings): "or the unprefixed `CLAUDE_AGENT_CLI_PATH` environment variable " "(same pattern as `api_key` / `base_url`).", ) - claude_agent_use_compat_proxy: bool = Field( - default=True, - description="Run the in-process OpenRouter compatibility proxy " - "(`backend.copilot.sdk.openrouter_compat_proxy`) in front of the " - "Claude Code CLI. The proxy strips `tool_reference` content " - "blocks and the `context-management-2025-06-27` beta header / " - "field from outgoing requests so newer SDK / CLI versions stop " - "tripping OpenRouter's stricter validation. Defaults to True " - "because the bundled CLI in `claude-agent-sdk >= 0.1.55` requires " - "the proxy. Orthogonal to `claude_agent_cli_path` — the override " - "picks the binary, the proxy rewrites whatever the binary sends. " - "Disable explicitly only if you've pinned `claude-agent-sdk` to " - "a version whose bundled CLI is in " - "`_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT` (2.1.63 or 2.1.70). " - "Reads from `CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY` or the " - "unprefixed `CLAUDE_AGENT_USE_COMPAT_PROXY` environment " - "variable (same pattern as `claude_agent_cli_path`). Only " - "takes effect when the session has an Anthropic-compatible " - "upstream to forward to — direct-Anthropic sessions skip the " - "proxy entirely to avoid silently re-routing through " - "OpenRouter.", - ) use_openrouter: bool = Field( default=True, description="Enable routing API calls through the OpenRouter proxy. " @@ -355,37 +333,6 @@ class ChatConfig(BaseSettings): ) return v - @model_validator(mode="before") - @classmethod - def _inject_unprefixed_compat_proxy_env(cls, values): - """Inject the unprefixed ``CLAUDE_AGENT_USE_COMPAT_PROXY`` env var - as a fallback for the ``claude_agent_use_compat_proxy`` field. - - Unlike ``claude_agent_cli_path`` (which defaults to ``None`` and - can use a simple ``if not v`` guard), this field defaults to - ``True``, so a ``mode="before"`` field validator cannot - distinguish "caller passed ``False`` explicitly" from "Pydantic - resolved the default ``True``" — both arrive as the raw value. - - Using a ``model_validator(mode="before")`` lets us inspect the - full input dict: if the key is absent AND the prefixed env var - ``CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY`` is not set, we inject the - unprefixed value so Pydantic can coerce it (``"1"``/``"true"`` - → ``True``). Explicit kwargs always take precedence because - they appear in *values* before this validator runs. - """ - if not isinstance(values, dict): - return values - key = "claude_agent_use_compat_proxy" - if key not in values: - # No explicit kwarg and Pydantic hasn't injected the - # prefixed env var yet — check the unprefixed form. - if os.getenv("CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY") is None: - unprefixed = os.getenv("CLAUDE_AGENT_USE_COMPAT_PROXY") - if unprefixed is not None: - values[key] = unprefixed - return values - # Prompt paths for different contexts PROMPT_PATHS: dict[str, str] = { "default": "prompts/chat_system.md", diff --git a/autogpt_platform/backend/backend/copilot/config_test.py b/autogpt_platform/backend/backend/copilot/config_test.py index 60400ac41a..413a89277a 100644 --- a/autogpt_platform/backend/backend/copilot/config_test.py +++ b/autogpt_platform/backend/backend/copilot/config_test.py @@ -19,8 +19,6 @@ _ENV_VARS_TO_CLEAR = ( "OPENAI_BASE_URL", "CHAT_CLAUDE_AGENT_CLI_PATH", "CLAUDE_AGENT_CLI_PATH", - "CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY", - "CLAUDE_AGENT_USE_COMPAT_PROXY", ) @@ -124,63 +122,3 @@ class TestClaudeAgentCliPathEnvFallback: def test_no_env_var_defaults_to_none(self, monkeypatch: pytest.MonkeyPatch) -> None: cfg = ChatConfig() assert cfg.claude_agent_cli_path is None - - -class TestClaudeAgentUseCompatProxyEnvFallback: - """``claude_agent_use_compat_proxy`` accepts both the Pydantic- - prefixed ``CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY`` env var and the - unprefixed ``CLAUDE_AGENT_USE_COMPAT_PROXY`` form. Regression - guard for the bool-default pitfall: the field has a non-None - default (``True``), so Pydantic passes the default into the - validator when no value is provided and a naive ``if v is None`` - check would never fire. - """ - - def test_prefixed_env_var_enables_proxy( - self, monkeypatch: pytest.MonkeyPatch - ) -> None: - monkeypatch.setenv("CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY", "true") - cfg = ChatConfig() - assert cfg.claude_agent_use_compat_proxy is True - - def test_unprefixed_env_var_enables_proxy( - self, monkeypatch: pytest.MonkeyPatch - ) -> None: - monkeypatch.setenv("CLAUDE_AGENT_USE_COMPAT_PROXY", "true") - cfg = ChatConfig() - assert cfg.claude_agent_use_compat_proxy is True - - def test_unprefixed_env_var_respects_falsy_value( - self, monkeypatch: pytest.MonkeyPatch - ) -> None: - monkeypatch.setenv("CLAUDE_AGENT_USE_COMPAT_PROXY", "false") - cfg = ChatConfig() - assert cfg.claude_agent_use_compat_proxy is False - - def test_prefixed_wins_over_unprefixed( - self, monkeypatch: pytest.MonkeyPatch - ) -> None: - """When both are set, the Pydantic-prefixed var is authoritative - so the validator doesn't silently clobber an explicit - ``CHAT_...=false`` with an unprefixed ``=true``.""" - monkeypatch.setenv("CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY", "false") - monkeypatch.setenv("CLAUDE_AGENT_USE_COMPAT_PROXY", "true") - cfg = ChatConfig() - assert cfg.claude_agent_use_compat_proxy is False - - def test_no_env_var_uses_field_default( - self, monkeypatch: pytest.MonkeyPatch - ) -> None: - cfg = ChatConfig() - # Dev-preview branch defaults compat_proxy to True (the - # bundled CLI in claude-agent-sdk 0.1.58 needs the proxy). - assert cfg.claude_agent_use_compat_proxy is True - - def test_explicit_kwarg_not_overridden_by_unprefixed_env( - self, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Regression: explicit ChatConfig(claude_agent_use_compat_proxy=False) - must not be overridden by the unprefixed env var.""" - monkeypatch.setenv("CLAUDE_AGENT_USE_COMPAT_PROXY", "true") - cfg = ChatConfig(claude_agent_use_compat_proxy=False) - assert cfg.claude_agent_use_compat_proxy is False diff --git a/autogpt_platform/backend/backend/copilot/sdk/cli_openrouter_compat_test.py b/autogpt_platform/backend/backend/copilot/sdk/cli_openrouter_compat_test.py index d0f7e8dafa..3b20cd2b68 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/cli_openrouter_compat_test.py +++ b/autogpt_platform/backend/backend/copilot/sdk/cli_openrouter_compat_test.py @@ -392,17 +392,10 @@ async def _run_cli_against_fake_server( async def _run_reproduction( *, - route_through_proxy: bool, extra_env: dict[str, str] | None = None, ) -> tuple[int, str, str, list[_CapturedRequest]]: """Spawn the CLI against a fake Anthropic API and return what the - *upstream* (post-proxy if any) saw. - - When ``route_through_proxy`` is True, the CLI talks to the - ``OpenRouterCompatProxy`` and the proxy forwards to the fake - upstream. The fake upstream is what records the requests, so the - captured bodies are what OpenRouter would actually have received — - *after* the proxy's stripping pass. + server saw. """ cli_path = _resolve_cli_path() if cli_path is None or not cli_path.is_file(): @@ -415,30 +408,14 @@ async def _run_reproduction( captured: list[_CapturedRequest] = [] upstream_runner, upstream_port = await _start_fake_anthropic_server(captured) - proxy = None - target_port = upstream_port try: - if route_through_proxy: - from backend.copilot.sdk.openrouter_compat_proxy import ( - OpenRouterCompatProxy, - ) - - proxy = OpenRouterCompatProxy( - target_base_url=f"http://127.0.0.1:{upstream_port}" - ) - await proxy.start() - # Pull the bound port out of the proxy URL. - target_port = int(proxy.local_url.rsplit(":", 1)[1]) - returncode, stdout, stderr = await _run_cli_against_fake_server( cli_path=cli_path, - fake_server_port=target_port, + fake_server_port=upstream_port, timeout_seconds=30.0, extra_env=extra_env, ) finally: - if proxy is not None: - await proxy.stop() await upstream_runner.cleanup() return returncode, stdout, stderr, captured @@ -470,10 +447,9 @@ def _assert_no_forbidden_patterns( "`claude-agent-sdk` above 0.1.45. See " "https://github.com/Significant-Gravitas/AutoGPT/pull/12294 and " "https://github.com/anthropics/claude-agent-sdk-python/issues/789. " - "If you intended to upgrade, you must enable the in-process compat " - "proxy (`CLAUDE_AGENT_USE_COMPAT_PROXY=true` or the prefixed " - "`CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY=true`) or use a known-good " - "CLI binary via `claude_agent_cli_path` (env: " + "If you intended to upgrade, ensure " + "`CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1` is set in the SDK env " + "or use a known-good CLI binary via `claude_agent_cli_path` (env: " "`CLAUDE_AGENT_CLI_PATH` or `CHAT_CLAUDE_AGENT_CLI_PATH`)." ) @@ -483,74 +459,31 @@ async def test_cli_does_not_send_openrouter_incompatible_features(): """End-to-end OpenRouter compatibility reproduction (bare CLI path). Spawns the bundled (or overridden) Claude Code CLI against a fake - Anthropic API server WITHOUT the compat proxy in the loop, captures - every request body it sends, and asserts that none of them contain - the two known OpenRouter-breaking features. + Anthropic API server, captures every request body it sends, and + asserts that none of them contain the two known OpenRouter-breaking + features. On a clean SDK pin (0.1.45 or 0.1.47, bundled CLI 2.1.63 or 2.1.70) this passes naturally. On a broken pin (0.1.55+, bundled CLI 2.1.91+) it fails — that failure IS the bisect signal we use to verify which SDK versions need the workaround. - - Skipped when ``claude_agent_use_compat_proxy=True`` because in that - configuration the operator has explicitly opted into the workaround - and the bare-CLI behaviour is moot — what matters is that the - *upstream* (post-proxy) sees clean requests, which is covered by - ``test_cli_via_compat_proxy_emits_clean_requests_to_upstream``. """ - from backend.copilot.config import ChatConfig - - if ChatConfig().claude_agent_use_compat_proxy: - pytest.skip( - "Compat proxy is enabled in the active config — the bare-CLI " - "reproduction is not a meaningful signal here. The proxy-routed " - "variant `test_cli_via_compat_proxy_emits_clean_requests_to_upstream` " - "is the regression guard for this configuration." - ) - - returncode, _stdout, stderr, captured = await _run_reproduction( - route_through_proxy=False - ) - _assert_no_forbidden_patterns(captured, returncode, stderr) - - -@pytest.mark.asyncio -async def test_cli_via_compat_proxy_emits_clean_requests_to_upstream(): - """End-to-end test for the compat proxy workaround. - - Spawns the bundled CLI against an in-process fake Anthropic API - server WITH the ``OpenRouterCompatProxy`` in front, then asserts - that the *upstream* sees clean requests — no `tool_reference` - blocks, no `context-management-2025-06-27` beta header — even - when the bundled CLI itself would have sent them. - - This is the regression guard for the proxy: if the proxy ever - stops stripping a known forbidden pattern, this test catches it. - On a SDK version where the bare CLI is already clean (0.1.45 / - 0.1.47), the proxy is a no-op and the test passes trivially. - On a SDK version with the regression (0.1.55+), the test fails - if and only if the proxy fails to strip the pattern. - """ - returncode, _stdout, stderr, captured = await _run_reproduction( - route_through_proxy=True - ) + returncode, _stdout, stderr, captured = await _run_reproduction() _assert_no_forbidden_patterns(captured, returncode, stderr) @pytest.mark.asyncio async def test_disable_experimental_betas_env_var_strips_headers(): - """Validate whether ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`` is - sufficient to strip the ``context-management-2025-06-27`` beta header - when ``ANTHROPIC_BASE_URL`` points to a non-Anthropic endpoint - (simulating OpenRouter). + """Validate that ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`` strips + the ``context-management-2025-06-27`` beta header when + ``ANTHROPIC_BASE_URL`` points to a non-Anthropic endpoint (simulating + OpenRouter). - If this test passes, the compat proxy is unnecessary and can be - removed — the env var alone is enough. If it fails, the CLI's - provider-detection logic does not honour the env var for custom - base URLs and the proxy remains required. + This is the main regression guard: the env var is injected by + ``service.py`` into every CLI subprocess so newer SDK / CLI versions + work with OpenRouter without any proxy. """ returncode, _stdout, stderr, captured = await _run_reproduction( - route_through_proxy=False, extra_env={"CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS": "1"}, ) _assert_no_forbidden_patterns(captured, returncode, stderr) diff --git a/autogpt_platform/backend/backend/copilot/sdk/openrouter_compat_proxy.py b/autogpt_platform/backend/backend/copilot/sdk/openrouter_compat_proxy.py deleted file mode 100644 index 103046942e..0000000000 --- a/autogpt_platform/backend/backend/copilot/sdk/openrouter_compat_proxy.py +++ /dev/null @@ -1,559 +0,0 @@ -"""Tiny in-process HTTP middleware that makes the Claude Code CLI work -against OpenRouter on **any** ``claude-agent-sdk`` version. - -Background ----------- -We've been pinned at ``claude-agent-sdk==0.1.45`` (bundled CLI 2.1.63) -since `PR #12294`_ because every newer CLI version sends one of two -features that OpenRouter rejects: - -1. **`tool_reference` content blocks** in ``tool_result.content`` — - introduced in CLI 2.1.69. OpenRouter's stricter Zod validation - refuses requests containing them with:: - - messages[N].content[0].content: Invalid input: expected string, received array - -2. **`context-management-2025-06-27` beta header** — sent in either the - request body's ``betas`` array or the ``anthropic-beta`` HTTP header. - OpenRouter responds:: - - 400 No endpoints available that support Anthropic's context - management features (context-management-2025-06-27). - - Tracked upstream at `claude-agent-sdk-python#789`_. - -This module starts a tiny aiohttp server that: - -* listens on ``127.0.0.1:RANDOM_PORT``, -* receives every CLI request that would normally go to - ``ANTHROPIC_BASE_URL``, -* strips the two forbidden patterns from the body and headers, -* forwards the cleaned request to the real upstream - (``proxy_target_base_url``, e.g. ``https://openrouter.ai/api/v1``), -* streams the response back to the CLI unchanged. - -The proxy is wired via :class:`backend.copilot.config.ChatConfig.claude_agent_use_compat_proxy`. -When the flag is on, :mod:`backend.copilot.sdk.service` starts a proxy -per session, sets ``ANTHROPIC_BASE_URL`` in the SDK's ``env`` to point -at the proxy, then tears it down after the session ends. - -Why a separate proxy instead of a custom HTTP transport in the SDK? -------------------------------------------------------------------- -The Python SDK delegates **all** HTTP traffic to the bundled Claude -Code CLI subprocess. Once the CLI is spawned, the only seam left is -the network — there is no in-process hook for "modify outgoing -request before it leaves the CLI". The proxy lives at that seam. - -This module is intentionally orthogonal to the -:attr:`ChatConfig.claude_agent_cli_path` override: - -* ``cli_path`` lets us swap **which CLI binary** we run. -* this proxy lets us **rewrite what any CLI binary sends**. - -The two can be combined or used independently. - -.. _PR #12294: https://github.com/Significant-Gravitas/AutoGPT/pull/12294 -.. _claude-agent-sdk-python#789: https://github.com/anthropics/claude-agent-sdk-python/issues/789 -""" - -from __future__ import annotations - -import asyncio -import json -import logging -from typing import Any - -import aiohttp -from aiohttp import web - -logger = logging.getLogger(__name__) - -# Header values OpenRouter rejects. We strip exactly these tokens from -# the comma-separated ``anthropic-beta`` header value (preserving any -# other betas the CLI requests). -_FORBIDDEN_BETA_TOKENS: frozenset[str] = frozenset( - { - "context-management-2025-06-27", - } -) - -# Hop-by-hop headers we must NOT forward through the proxy. Per -# RFC 7230 §6.1, these are connection-specific and must be regenerated -# by each intermediary. ``host`` is also stripped because aiohttp -# generates the correct ``Host`` header for the upstream URL itself. -# -# The canonical header name defined in RFC 7230 §4.4 is ``Trailer`` -# (singular); some SDKs / legacy proxies also emit the plural -# ``Trailers`` so we accept both forms just in case. Intermediaries -# must additionally drop every header name listed in the incoming -# ``Connection`` field value (§6.1 "extension hop-by-hop headers") — -# that's handled dynamically by :func:`clean_request_headers`. -_HOP_BY_HOP_HEADERS: frozenset[str] = frozenset( - { - "connection", - "keep-alive", - "proxy-authenticate", - "proxy-authorization", - "te", - "trailer", - "trailers", - "transfer-encoding", - "upgrade", - "host", - # ``content-length`` is stripped because we may rewrite the - # body — aiohttp will recompute it on the upstream request. - "content-length", - } -) - - -# --------------------------------------------------------------------------- -# Pure helpers — exported so the unit tests can drive them directly without -# spinning up a server. -# --------------------------------------------------------------------------- - - -def strip_tool_reference_blocks(payload: Any) -> Any: - """Recursively remove ``tool_reference`` content blocks from - *payload*, returning the cleaned structure. - - The CLI's built-in ``ToolSearch`` tool emits these as part of - ``tool_result.content``:: - - {"type": "tool_reference", "tool_name": "mcp__copilot__find_block"} - - OpenRouter's stricter Zod validation rejects them. Removing them - is safe — they are metadata about which tools were searched, not - real model-visible content. The CLI's *internal* state still - contains them; only the wire format is rewritten. - """ - if isinstance(payload, dict): - # Drop the dict entirely if it IS a tool_reference block. The - # caller (a list comprehension below) discards None entries so - # we can return None to signal "remove me". - if payload.get("type") == "tool_reference": - return None - cleaned_dict: dict[str, Any] = {} - for key, value in payload.items(): - cleaned_value = strip_tool_reference_blocks(value) - # If a dict-valued child WAS a tool_reference block, - # drop the key entirely rather than writing `null` — - # otherwise schema-strict upstreams still reject the - # payload. Only applies when the original value was a - # dict; genuine None values in the input are preserved. - if cleaned_value is None and isinstance(value, dict): - continue - cleaned_dict[key] = cleaned_value - return cleaned_dict - if isinstance(payload, list): - cleaned_list: list[Any] = [] - for item in payload: - cleaned_item = strip_tool_reference_blocks(item) - if cleaned_item is None and isinstance(item, dict): - # Item was a tool_reference block — drop it from the - # list rather than leaving a None hole. - continue - cleaned_list.append(cleaned_item) - return cleaned_list - return payload - - -def strip_forbidden_betas_from_body(payload: Any) -> Any: - """Remove forbidden tokens from the ``betas`` array of an - Anthropic Messages API request body, if present. - - Returns a shallow copy with the ``betas`` key cleaned — the input - dict is never mutated. - - The Messages API accepts a top-level ``betas: list[str]`` parameter - used to opt into beta features. We drop tokens in - :data:`_FORBIDDEN_BETA_TOKENS` so OpenRouter's check passes. - """ - if not isinstance(payload, dict): - return payload - betas = payload.get("betas") - if not isinstance(betas, list): - return payload - cleaned_betas = [b for b in betas if b not in _FORBIDDEN_BETA_TOKENS] - result = {k: v for k, v in payload.items() if k != "betas"} - if cleaned_betas: - result["betas"] = cleaned_betas - return result - - -def strip_forbidden_anthropic_beta_header(value: str | None) -> str | None: - """Return *value* with forbidden tokens removed. - - The ``anthropic-beta`` HTTP header is a comma-separated list of - feature flags. We strip exactly the forbidden tokens, preserving - any others. Returns ``None`` if nothing remains (so the caller - can drop the header entirely). - """ - if not value: - return value - tokens = [token.strip() for token in value.split(",")] - kept = [token for token in tokens if token and token not in _FORBIDDEN_BETA_TOKENS] - if not kept: - return None - return ", ".join(kept) - - -def clean_request_body_bytes(body_bytes: bytes) -> bytes: - """Apply both body-level strippers to *body_bytes*, returning the - cleaned JSON. Falls back to the original bytes when the body - isn't valid JSON (the CLI shouldn't be sending non-JSON to the - Messages API, but be defensive).""" - if not body_bytes: - return body_bytes - try: - payload = json.loads(body_bytes.decode("utf-8")) - except (UnicodeDecodeError, json.JSONDecodeError): - return body_bytes - payload = strip_tool_reference_blocks(payload) - payload = strip_forbidden_betas_from_body(payload) - return json.dumps(payload, separators=(",", ":")).encode("utf-8") - - -def _parse_connection_tokens(headers: dict[str, str]) -> set[str]: - """Extract hop-by-hop header names from the ``Connection`` field.""" - connection_header = next( - (value for name, value in headers.items() if name.lower() == "connection"), - "", - ) - return { - token.strip().lower() for token in connection_header.split(",") if token.strip() - } - - -def clean_request_headers(headers: dict[str, str]) -> dict[str, str]: - """Drop hop-by-hop headers and rewrite ``anthropic-beta`` to remove - forbidden tokens. Returns a fresh dict the caller can pass through - to the upstream client without further mutation. - - Per RFC 7230 section 6.1, intermediaries must drop the static hop-by-hop - set above **and** every header name listed in the incoming - ``Connection`` field value (case-insensitive). The latter is how - extension hop-by-hop headers are signalled per-connection. - - Callers should pass an already-materialised ``dict`` (e.g. - ``dict(request.headers)``) so this function stays simple. - """ - connection_tokens = _parse_connection_tokens(headers) - - cleaned: dict[str, str] = {} - for name, value in headers.items(): - lower_name = name.lower() - if lower_name in _HOP_BY_HOP_HEADERS or lower_name in connection_tokens: - continue - if lower_name == "anthropic-beta": - stripped = strip_forbidden_anthropic_beta_header(value) - if stripped is None: - continue - cleaned[name] = stripped - continue - cleaned[name] = value - return cleaned - - -def clean_response_headers( - headers: "Any", -) -> list[tuple[str, str]]: - """Like :func:`clean_request_headers` but preserves multi-valued - headers (e.g. ``Set-Cookie``). Accepts any mapping-like object - whose ``.items()`` yields ``(name, value)`` pairs — including - aiohttp's ``CIMultiDictProxy`` which can have duplicate keys. - - Returns a list of ``(name, value)`` tuples suitable for passing - to ``web.StreamResponse(headers=...)`` via ``CIMultiDict``. - """ - connection_tokens: set[str] = set() - for name, value in headers.items(): - if name.lower() == "connection": - connection_tokens = { - t.strip().lower() for t in value.split(",") if t.strip() - } - break - - cleaned: list[tuple[str, str]] = [] - for name, value in headers.items(): - lower_name = name.lower() - if lower_name in _HOP_BY_HOP_HEADERS or lower_name in connection_tokens: - continue - if lower_name == "anthropic-beta": - stripped = strip_forbidden_anthropic_beta_header(value) - if stripped is None: - continue - cleaned.append((name, stripped)) - continue - cleaned.append((name, value)) - return cleaned - - -# --------------------------------------------------------------------------- -# The proxy server -# --------------------------------------------------------------------------- - - -class OpenRouterCompatProxy: - """In-process HTTP proxy that rewrites Claude Code CLI requests on - the way to OpenRouter (or any other Anthropic-compatible gateway). - - Usage:: - - proxy = OpenRouterCompatProxy(target_base_url="https://openrouter.ai/api/v1") - await proxy.start() - try: - # Spawn the CLI with ANTHROPIC_BASE_URL=proxy.local_url - ... - finally: - await proxy.stop() - """ - - def __init__( - self, - target_base_url: str, - *, - bind_host: str = "127.0.0.1", - request_timeout: float = 600.0, - ) -> None: - self._target_base_url = target_base_url.rstrip("/") - self._bind_host = bind_host - self._request_timeout = request_timeout - self._runner: web.AppRunner | None = None - self._client: aiohttp.ClientSession | None = None - self._port: int | None = None - - @property - def local_url(self) -> str: - """The ``http://host:port`` URL that the CLI should use as - ``ANTHROPIC_BASE_URL``. Raises if :meth:`start` has not been - called yet.""" - if self._port is None: - raise RuntimeError("Proxy is not running — call start() first.") - return f"http://{self._bind_host}:{self._port}" - - @property - def target_base_url(self) -> str: - """The upstream URL the proxy is forwarding to.""" - return self._target_base_url - - async def start(self) -> None: - """Bind to a random local port and start serving. - - Cleans up the ``ClientSession`` and the ``AppRunner`` on any - failure during setup so a partially-initialised proxy never - leaves resources dangling (covers the - ``runner.setup() / site.start()`` raise paths in addition to - the explicit bind-failure branches below). - """ - if self._runner is not None: - return # already started - # Use sock_connect + sock_read instead of total so long-lived - # SSE / streaming responses aren't killed after request_timeout. - # total=None means no cumulative limit; sock_read is the per-chunk - # idle timeout (time between data arriving on the socket). - client = aiohttp.ClientSession( - timeout=aiohttp.ClientTimeout( - total=None, - sock_connect=self._request_timeout, - sock_read=self._request_timeout, - ) - ) - app = web.Application() - # Catch every method + path so we can also forward GETs - # (the CLI may probe profile / model endpoints). - app.router.add_route("*", "/{tail:.*}", self._handle) - runner = web.AppRunner(app) - runner_setup = False - try: - await runner.setup() - runner_setup = True - site = web.TCPSite(runner, self._bind_host, 0) - await site.start() - server = site._server - if server is None: - raise RuntimeError("Failed to bind compat proxy server.") - sockets = getattr(server, "sockets", None) - if not sockets: - raise RuntimeError("Compat proxy server has no listening sockets.") - self._port = sockets[0].getsockname()[1] - except BaseException: - # Best-effort teardown — swallow secondary errors so the - # caller sees the original exception. - if runner_setup: - try: - await runner.cleanup() - except Exception: # pragma: no cover - cleanup-only path - logger.exception("compat proxy runner cleanup failed") - try: - await client.close() - except Exception: # pragma: no cover - cleanup-only path - logger.exception("compat proxy client close failed") - raise - # Only publish the attributes after everything is wired up so - # ``stop()`` and ``local_url`` observe a consistent state. - self._client = client - self._runner = runner - # Deliberately log only the local bind port — never the - # upstream URL or any derived component. CodeQL's - # `py/clear-text-logging-sensitive-data` taint analysis traces - # everything that originates from a config-supplied URL as - # potentially-sensitive even after parsing, and the upstream - # endpoint is anyway discoverable from the config the operator - # already has access to. The detailed upstream is exposed via - # the ``target_base_url`` property for callers that need it. - logger.info( - "OpenRouter compat proxy listening on %s:%d", - self._bind_host, - self._port, - ) - - async def stop(self) -> None: - """Stop accepting connections and release the port.""" - if self._runner is not None: - await self._runner.cleanup() - self._runner = None - if self._client is not None: - await self._client.close() - self._client = None - self._port = None - - async def __aenter__(self) -> "OpenRouterCompatProxy": - await self.start() - return self - - async def __aexit__(self, exc_type, exc, tb) -> None: - await self.stop() - - async def _handle(self, request: web.Request) -> web.StreamResponse: - """Forward *request* to the upstream after stripping forbidden - features. Streams the upstream response back to the caller - chunk-by-chunk so SSE / streamed responses work.""" - if self._client is None: - raise web.HTTPInternalServerError(reason="proxy client missing") - - # Build the upstream URL. ``request.path_qs`` includes the - # query string verbatim. ``request.path`` for ``/v1/messages`` - # is just ``/v1/messages`` — we strip a leading slash and - # concat with the target base URL. - upstream_path = request.path_qs - if not upstream_path.startswith("/"): - upstream_path = "/" + upstream_path - # Allow the target_base_url to itself contain a path (e.g. - # ``https://openrouter.ai/api/v1``). In that case requests to - # ``/v1/messages`` need to become ``/api/v1/messages``, not - # ``/api/v1/v1/messages``. Strip a leading ``/v1`` from the - # incoming path if the target already ends with ``/v1`` (or - # similar API-version segment). - # Deduplicate API version prefix: if the target URL already - # contains a versioned path segment (e.g. ``/api/v1``) and the - # incoming request path starts with the same segment, strip it - # to avoid ``/api/v1/v1/messages``. - from urllib.parse import urlparse - - target_base = self._target_base_url - target_path = urlparse(target_base).path.rstrip("/") - if target_path and upstream_path.startswith(target_path + "/"): - upstream_path = upstream_path[len(target_path) :] - elif target_path and upstream_path == target_path: - upstream_path = "/" - upstream_url = f"{target_base}{upstream_path}" - - body_bytes = await request.read() - cleaned_body = clean_request_body_bytes(body_bytes) - cleaned_headers = clean_request_headers(dict(request.headers)) - - try: - upstream_response = await self._client.request( - method=request.method, - url=upstream_url, - data=cleaned_body if cleaned_body else None, - headers=cleaned_headers, - allow_redirects=False, - ) - except (aiohttp.ClientError, asyncio.TimeoutError) as e: - # ``aiohttp.ClientTimeout`` raises ``asyncio.TimeoutError`` - # (not ``aiohttp.ClientError``) on hung upstreams, so both - # must be caught here to surface the explicit 502 failure - # mode this proxy guarantees. - # - # Log the detailed error for ops, but return a generic - # message to the caller — exception strings can leak - # internal hostnames, ports, or stack frames (CodeQL - # `py/stack-trace-exposure`). - logger.warning( - "OpenRouter compat proxy upstream error: %s", type(e).__name__ - ) - return web.Response(status=502, text="upstream error") - - # Stream the response back unchanged (apart from hop-by-hop - # header filtering). Use clean_response_headers to preserve - # multi-valued headers like Set-Cookie that dict() would drop. - from multidict import CIMultiDict - - downstream = web.StreamResponse( - status=upstream_response.status, - headers=CIMultiDict(clean_response_headers(upstream_response.headers)), - ) - await downstream.prepare(request) - # Track whether the stream terminated cleanly. A mid-stream - # ``aiohttp.ClientError`` means the upstream died before - # finishing; calling ``write_eof()`` on that partial response - # would signal "complete stream" to the downstream client and - # silently corrupt the body. Skip the EOF on the error path - # so the client's connection is dropped instead, surfacing the - # failure correctly. - cancelled = False - stream_error: aiohttp.ClientError | None = None - try: - async for chunk in upstream_response.content.iter_any(): - await downstream.write(chunk) - except asyncio.CancelledError: - # Never suppress cancellation — since Python 3.8 it's a - # ``BaseException`` subclass precisely so catching - # ``Exception`` won't accidentally swallow it. Release - # the upstream body and re-raise so the asyncio task - # cooperatively unwinds (avoids hanging shutdowns / - # stuck request handlers). - cancelled = True - upstream_response.release() - raise - except aiohttp.ClientError as e: - stream_error = e - logger.warning( - "OpenRouter compat proxy stream interrupted: %s", type(e).__name__ - ) - finally: - if not cancelled: - upstream_response.release() - - if stream_error is not None: - # Do NOT call ``write_eof`` or return the prepared - # ``downstream`` here — aiohttp finalises a returned - # StreamResponse (writing the terminating chunk / - # content-length / EOF) even if we skipped ``write_eof`` - # ourselves, which would signal a clean end of stream to - # the client on top of the truncated body. Instead abort - # the underlying transport directly so the client's - # parser surfaces a ``ClientPayloadError`` / - # ``ServerDisconnectedError`` and the caller can retry / - # surface the failure instead of silently consuming a - # corrupt body. - try: - downstream.force_close() - except Exception: # pragma: no cover - defensive on transport - pass - transport = request.transport - if transport is not None: - try: - transport.abort() - except Exception: # pragma: no cover - defensive on transport - pass - # Re-raise the original stream error so aiohttp treats - # this handler as having failed; the transport is - # already aborted above so the client sees an abrupt - # disconnect either way. - raise stream_error - - await downstream.write_eof() - return downstream diff --git a/autogpt_platform/backend/backend/copilot/sdk/openrouter_compat_proxy_test.py b/autogpt_platform/backend/backend/copilot/sdk/openrouter_compat_proxy_test.py deleted file mode 100644 index c98711e24f..0000000000 --- a/autogpt_platform/backend/backend/copilot/sdk/openrouter_compat_proxy_test.py +++ /dev/null @@ -1,695 +0,0 @@ -"""Tests for the OpenRouter compatibility proxy. - -The proxy strips two known forbidden patterns from requests so newer -``claude-agent-sdk`` / Claude Code CLI versions can talk to OpenRouter -through the unchanged transport. These tests cover both: - -* the pure stripping helpers (deterministic, no I/O), and -* the end-to-end proxy behaviour against a fake upstream server, so we - catch hop-by-hop header bugs and streaming regressions. - -See ``openrouter_compat_proxy.py`` for the rationale and the upstream -issues being worked around. -""" - -from __future__ import annotations - -import asyncio -import json -from typing import Any - -import aiohttp -import pytest -from aiohttp import web - -from backend.copilot.sdk.openrouter_compat_proxy import ( - _FORBIDDEN_BETA_TOKENS, - _HOP_BY_HOP_HEADERS, - OpenRouterCompatProxy, - clean_request_body_bytes, - clean_request_headers, - strip_forbidden_anthropic_beta_header, - strip_forbidden_betas_from_body, - strip_tool_reference_blocks, -) - -# --------------------------------------------------------------------------- -# strip_tool_reference_blocks -# --------------------------------------------------------------------------- - - -class TestStripToolReferenceBlocks: - """The CLI's built-in ToolSearch tool emits ``tool_reference`` - content blocks in ``tool_result.content``. OpenRouter's stricter - Zod validation rejects them. We drop them entirely — they're - metadata about which tools were searched, not real model-visible - content.""" - - def test_removes_tool_reference_block_at_top_level(self): - block = {"type": "tool_reference", "tool_name": "find_block"} - assert strip_tool_reference_blocks(block) is None - - def test_removes_tool_reference_block_from_list(self): - blocks = [ - {"type": "text", "text": "hello"}, - {"type": "tool_reference", "tool_name": "find_block"}, - {"type": "text", "text": "world"}, - ] - assert strip_tool_reference_blocks(blocks) == [ - {"type": "text", "text": "hello"}, - {"type": "text", "text": "world"}, - ] - - def test_strips_nested_tool_reference_inside_tool_result(self): - # The exact shape PR #12294 root-caused: tool_result.content - # contains the tool_reference block. - request = { - "messages": [ - { - "role": "user", - "content": [ - { - "type": "tool_result", - "tool_use_id": "tu_1", - "content": [ - {"type": "text", "text": "result text"}, - { - "type": "tool_reference", - "tool_name": "mcp__copilot__find_block", - }, - ], - } - ], - } - ] - } - cleaned = strip_tool_reference_blocks(request) - tool_result_content = cleaned["messages"][0]["content"][0]["content"] - assert tool_result_content == [{"type": "text", "text": "result text"}] - - def test_preserves_unrelated_payloads(self): - payload = { - "model": "claude-opus-4.6", - "messages": [{"role": "user", "content": "hi"}], - "temperature": 0.7, - } - assert strip_tool_reference_blocks(payload) == payload - - def test_handles_empty_and_primitive_inputs(self): - assert strip_tool_reference_blocks({}) == {} - assert strip_tool_reference_blocks([]) == [] - assert strip_tool_reference_blocks("plain string") == "plain string" - assert strip_tool_reference_blocks(42) == 42 - assert strip_tool_reference_blocks(None) is None - - def test_removes_dict_valued_tool_reference_child_entirely(self): - # Regression guard: when a tool_reference dict is assigned to - # a key rather than listed, the helper used to rewrite it to - # `null` (leaving the parent key with a None value). That is - # still schema-invalid upstream — remove the key entirely. - payload = { - "wrapper": {"type": "tool_reference", "tool_name": "find_block"}, - "keep": "value", - } - cleaned = strip_tool_reference_blocks(payload) - assert "wrapper" not in cleaned - assert cleaned["keep"] == "value" - - def test_preserves_genuine_none_values_on_non_dict_children(self): - payload = {"explicit_null": None, "text": "ok"} - cleaned = strip_tool_reference_blocks(payload) - assert cleaned == {"explicit_null": None, "text": "ok"} - - -# --------------------------------------------------------------------------- -# strip_forbidden_betas_from_body -# --------------------------------------------------------------------------- - - -class TestStripForbiddenBetasFromBody: - """OpenRouter rejects ``context-management-2025-06-27`` in the - request body's ``betas`` array.""" - - def test_removes_forbidden_token_keeps_others(self): - body = { - "model": "claude-opus-4.6", - "betas": [ - "context-management-2025-06-27", - "fine-grained-tool-streaming-2025", - ], - } - cleaned = strip_forbidden_betas_from_body(body) - assert cleaned["betas"] == ["fine-grained-tool-streaming-2025"] - - def test_removes_betas_field_entirely_when_only_forbidden(self): - body = {"model": "x", "betas": ["context-management-2025-06-27"]} - cleaned = strip_forbidden_betas_from_body(body) - assert "betas" not in cleaned - - def test_no_op_when_no_betas_field(self): - body = {"model": "x"} - assert strip_forbidden_betas_from_body(body) == {"model": "x"} - - def test_no_op_on_non_dict(self): - assert strip_forbidden_betas_from_body([1, 2, 3]) == [1, 2, 3] - assert strip_forbidden_betas_from_body("plain") == "plain" - - def test_all_forbidden_tokens_constants_are_recognized(self): - for forbidden in _FORBIDDEN_BETA_TOKENS: - body = {"betas": [forbidden, "other"]} - cleaned = strip_forbidden_betas_from_body(body) - assert forbidden not in cleaned["betas"] - - -# --------------------------------------------------------------------------- -# strip_forbidden_anthropic_beta_header -# --------------------------------------------------------------------------- - - -class TestStripForbiddenAnthropicBetaHeader: - def test_removes_forbidden_token_keeps_others(self): - value = "fine-grained-tool-streaming-2025, context-management-2025-06-27, other-beta" - result = strip_forbidden_anthropic_beta_header(value) - assert result == "fine-grained-tool-streaming-2025, other-beta" - - def test_returns_none_when_only_forbidden_token_present(self): - assert ( - strip_forbidden_anthropic_beta_header("context-management-2025-06-27") - is None - ) - - def test_passes_through_clean_header(self): - assert strip_forbidden_anthropic_beta_header("foo, bar") == "foo, bar" - - def test_handles_empty_and_none_input(self): - assert strip_forbidden_anthropic_beta_header("") == "" - assert strip_forbidden_anthropic_beta_header(None) is None - - def test_handles_extra_whitespace(self): - value = " context-management-2025-06-27 , fine-grained " - result = strip_forbidden_anthropic_beta_header(value) - assert result == "fine-grained" - - -# --------------------------------------------------------------------------- -# clean_request_body_bytes — combined body-level cleanup -# --------------------------------------------------------------------------- - - -class TestCleanRequestBodyBytes: - def test_strips_both_patterns_in_one_pass(self): - body = { - "model": "claude-opus-4.6", - "betas": ["context-management-2025-06-27"], - "messages": [ - { - "role": "user", - "content": [ - { - "type": "tool_result", - "tool_use_id": "tu_1", - "content": [ - {"type": "tool_reference", "tool_name": "find"}, - {"type": "text", "text": "ok"}, - ], - } - ], - } - ], - } - cleaned_bytes = clean_request_body_bytes(json.dumps(body).encode("utf-8")) - cleaned = json.loads(cleaned_bytes.decode("utf-8")) - assert "betas" not in cleaned # only forbidden token, dropped - tool_result_content = cleaned["messages"][0]["content"][0]["content"] - assert tool_result_content == [{"type": "text", "text": "ok"}] - - def test_passes_through_non_json_body(self): - garbage = b"\xff\xfe not json at all" - assert clean_request_body_bytes(garbage) == garbage - - def test_passes_through_empty_body(self): - assert clean_request_body_bytes(b"") == b"" - - -# --------------------------------------------------------------------------- -# clean_request_headers — hop-by-hop + anthropic-beta cleanup -# --------------------------------------------------------------------------- - - -class TestCleanRequestHeaders: - def test_drops_hop_by_hop_headers(self): - headers = { - "Host": "example.com", - "Connection": "keep-alive", - "Content-Length": "42", - "Authorization": "Bearer xxx", - "Content-Type": "application/json", - } - cleaned = clean_request_headers(headers) - assert "Host" not in cleaned - assert "Connection" not in cleaned - assert "Content-Length" not in cleaned - assert cleaned["Authorization"] == "Bearer xxx" - assert cleaned["Content-Type"] == "application/json" - - def test_strips_forbidden_token_from_anthropic_beta_header(self): - headers = { - "anthropic-beta": "context-management-2025-06-27, other-beta", - "Authorization": "Bearer x", - } - cleaned = clean_request_headers(headers) - assert cleaned["anthropic-beta"] == "other-beta" - - def test_drops_anthropic_beta_header_when_only_forbidden(self): - headers = {"anthropic-beta": "context-management-2025-06-27"} - cleaned = clean_request_headers(headers) - assert "anthropic-beta" not in cleaned - - def test_hop_by_hop_set_completeness(self): - # Sanity check: if upstream removes hop-by-hop headers from - # this set we want to know — keep the canonical RFC 7230 list. - for required in ( - "connection", - "transfer-encoding", - "host", - "trailer", - "trailers", - ): - assert required in _HOP_BY_HOP_HEADERS - - def test_drops_headers_listed_in_connection_field(self): - # Per RFC 7230 §6.1 intermediaries must also drop every - # header name listed in the incoming Connection field value - # (extension hop-by-hop headers signalled per-connection). - headers = { - "Connection": "X-Custom-Hop, Upgrade", - "X-Custom-Hop": "secret-extension", - "Authorization": "Bearer x", - "X-Keep": "ok", - } - cleaned = clean_request_headers(headers) - assert "X-Custom-Hop" not in cleaned - # Upgrade is a static hop-by-hop header; Connection itself is - # also dropped; the rest pass through. - assert "Connection" not in cleaned - assert cleaned["Authorization"] == "Bearer x" - assert cleaned["X-Keep"] == "ok" - - def test_connection_token_matching_is_case_insensitive(self): - headers = { - "Connection": "x-hop-HEADER", - "X-Hop-Header": "drop-me", - "X-Keep": "ok", - } - cleaned = clean_request_headers(headers) - assert "X-Hop-Header" not in cleaned - assert cleaned["X-Keep"] == "ok" - - -# --------------------------------------------------------------------------- -# End-to-end: real proxy + fake upstream -# --------------------------------------------------------------------------- - - -class _FakeUpstream: - """Tiny aiohttp app that records every request the proxy forwards - so the test can assert on the cleaned payloads.""" - - def __init__(self) -> None: - self.captured: list[dict[str, Any]] = [] - self._runner: web.AppRunner | None = None - self.port: int = 0 - - async def start(self) -> str: - async def handler(request: web.Request) -> web.StreamResponse: - body = await request.text() - self.captured.append( - { - "method": request.method, - "path": request.path_qs, - "headers": {k: v for k, v in request.headers.items()}, - "body": body, - } - ) - # Return a minimal JSON success response so the proxy has - # something to stream back. - return web.json_response({"ok": True, "echoed": body}) - - app = web.Application() - app.router.add_route("*", "/{tail:.*}", handler) - self._runner = web.AppRunner(app) - await self._runner.setup() - site = web.TCPSite(self._runner, "127.0.0.1", 0) - await site.start() - server = site._server - assert server is not None - sockets = getattr(server, "sockets", None) - assert sockets is not None - self.port = sockets[0].getsockname()[1] - return f"http://127.0.0.1:{self.port}" - - async def stop(self) -> None: - if self._runner is not None: - await self._runner.cleanup() - self._runner = None - - -@pytest.mark.asyncio -async def test_proxy_strips_tool_reference_block_end_to_end(): - upstream = _FakeUpstream() - upstream_url = await upstream.start() - proxy = OpenRouterCompatProxy(target_base_url=upstream_url) - await proxy.start() - try: - body = { - "model": "claude-opus-4.6", - "messages": [ - { - "role": "user", - "content": [ - {"type": "text", "text": "hi"}, - { - "type": "tool_reference", - "tool_name": "mcp__copilot__find_block", - }, - ], - } - ], - } - async with aiohttp.ClientSession() as client: - async with client.post( - f"{proxy.local_url}/v1/messages", - json=body, - headers={"Authorization": "Bearer test"}, - ) as resp: - assert resp.status == 200 - await resp.read() - finally: - await proxy.stop() - await upstream.stop() - - assert len(upstream.captured) == 1 - forwarded = json.loads(upstream.captured[0]["body"]) - # The tool_reference block must NOT be in the upstream-visible body. - assert '"tool_reference"' not in upstream.captured[0]["body"] - assert forwarded["messages"][0]["content"] == [{"type": "text", "text": "hi"}] - - -@pytest.mark.asyncio -async def test_proxy_strips_context_management_beta_header_end_to_end(): - upstream = _FakeUpstream() - upstream_url = await upstream.start() - proxy = OpenRouterCompatProxy(target_base_url=upstream_url) - await proxy.start() - try: - async with aiohttp.ClientSession() as client: - async with client.post( - f"{proxy.local_url}/v1/messages", - json={"model": "x", "messages": []}, - headers={ - "Authorization": "Bearer test", - "anthropic-beta": "context-management-2025-06-27, other-beta", - }, - ) as resp: - assert resp.status == 200 - await resp.read() - finally: - await proxy.stop() - await upstream.stop() - - forwarded_headers = upstream.captured[0]["headers"] - # Header is rewritten to remove only the forbidden token, keeping the rest. - assert any( - k.lower() == "anthropic-beta" and v == "other-beta" - for k, v in forwarded_headers.items() - ) - - -@pytest.mark.asyncio -async def test_proxy_strips_betas_from_request_body_end_to_end(): - upstream = _FakeUpstream() - upstream_url = await upstream.start() - proxy = OpenRouterCompatProxy(target_base_url=upstream_url) - await proxy.start() - try: - body = { - "model": "x", - "betas": [ - "context-management-2025-06-27", - "fine-grained-tool-streaming-2025", - ], - "messages": [], - } - async with aiohttp.ClientSession() as client: - async with client.post( - f"{proxy.local_url}/v1/messages", - json=body, - ) as resp: - assert resp.status == 200 - await resp.read() - finally: - await proxy.stop() - await upstream.stop() - - forwarded = json.loads(upstream.captured[0]["body"]) - # Only the surviving beta should be present. - assert forwarded["betas"] == ["fine-grained-tool-streaming-2025"] - - -@pytest.mark.asyncio -async def test_proxy_passes_through_clean_request_unchanged(): - """The proxy must be a no-op for requests that don't contain any of - the forbidden patterns — no other rewriting allowed.""" - upstream = _FakeUpstream() - upstream_url = await upstream.start() - proxy = OpenRouterCompatProxy(target_base_url=upstream_url) - await proxy.start() - try: - body = { - "model": "claude-opus-4.6", - "messages": [{"role": "user", "content": "hello"}], - "temperature": 0.7, - } - async with aiohttp.ClientSession() as client: - async with client.post( - f"{proxy.local_url}/v1/messages", - json=body, - headers={ - "Authorization": "Bearer test", - "Content-Type": "application/json", - }, - ) as resp: - assert resp.status == 200 - await resp.read() - finally: - await proxy.stop() - await upstream.stop() - - forwarded = json.loads(upstream.captured[0]["body"]) - assert forwarded == body - - -@pytest.mark.asyncio -async def test_proxy_returns_502_on_upstream_failure(): - """If the upstream is unreachable the proxy must return a clear - 502, not silently hang. - - Note: the outer ``client.post`` talks to the *proxy* on localhost, - not to the dead upstream directly. The proxy is the thing under - test, so it should always respond with a 502 — we must NOT - swallow ``aiohttp.ClientError`` / ``asyncio.TimeoutError`` on the - outer call, because that would mask a proxy crash and turn the - assertion into a false positive. Let any such exception fail the - test. - """ - proxy = OpenRouterCompatProxy( - target_base_url="http://127.0.0.1:1", # nothing listening - ) - await proxy.start() - try: - async with aiohttp.ClientSession() as client: - async with client.post( - f"{proxy.local_url}/v1/messages", - json={"model": "x"}, - timeout=aiohttp.ClientTimeout(total=10), - ) as resp: - assert resp.status == 502 - text = await resp.text() - # Generic error message — no internal hostname leaked. - assert "upstream error" in text - finally: - await proxy.stop() - - -@pytest.mark.asyncio -async def test_proxy_returns_502_on_upstream_timeout(): - """``aiohttp.ClientTimeout`` raises ``asyncio.TimeoutError`` (not - ``aiohttp.ClientError``), which previously escaped the except - block and surfaced as a 500. This regression-guards the 502 - contract for hung upstreams.""" - - class _HangingUpstream: - """Upstream that accepts the request but never finishes the - response body, forcing the proxy's client timeout to fire.""" - - def __init__(self) -> None: - self._runner: web.AppRunner | None = None - self.port: int = 0 - - async def start(self) -> str: - async def handler(request: web.Request) -> web.StreamResponse: - # Hold the response open longer than the proxy's - # client timeout so aiohttp raises TimeoutError on - # the proxy side. - await asyncio.sleep(30) - return web.Response(status=200) - - app = web.Application() - app.router.add_route("*", "/{tail:.*}", handler) - self._runner = web.AppRunner(app) - await self._runner.setup() - site = web.TCPSite(self._runner, "127.0.0.1", 0) - await site.start() - server = site._server - assert server is not None - sockets = getattr(server, "sockets", None) - assert sockets is not None - self.port = sockets[0].getsockname()[1] - return f"http://127.0.0.1:{self.port}" - - async def stop(self) -> None: - if self._runner is not None: - await self._runner.cleanup() - self._runner = None - - upstream = _HangingUpstream() - upstream_url = await upstream.start() - # Short proxy timeout so the test finishes quickly. - proxy = OpenRouterCompatProxy(target_base_url=upstream_url, request_timeout=0.5) - await proxy.start() - try: - async with aiohttp.ClientSession() as client: - async with client.post( - f"{proxy.local_url}/v1/messages", - json={"model": "x"}, - timeout=aiohttp.ClientTimeout(total=10), - ) as resp: - assert resp.status == 502 - text = await resp.text() - # Generic error message — no internal hostname leaked. - assert "upstream error" in text - finally: - await proxy.stop() - await upstream.stop() - - -@pytest.mark.asyncio -async def test_proxy_does_not_signal_clean_eof_on_mid_stream_error(): - """Regression guard: if the upstream stream dies mid-body, the - proxy must NOT call ``write_eof()`` — that would mark the - downstream response as a complete, valid stream even though the - client only saw a truncated body. Instead the proxy drops the - connection so the client's parser surfaces a transport error. - - We simulate the failure with a raw asyncio TCP server that - sends a chunked-encoding response header plus one partial chunk - and then hard-closes the socket — this is the one failure mode - aiohttp's ``iter_any()`` reliably surfaces as an - ``aiohttp.ClientError`` rather than an ordinary clean EOF. - """ - - class _TruncatingUpstream: - """Raw TCP server that sends a partial chunked body then - closes the socket without writing the terminating chunk.""" - - def __init__(self) -> None: - self._server: asyncio.base_events.Server | None = None - self.port: int = 0 - - async def start(self) -> str: - async def handle_conn( - reader: asyncio.StreamReader, - writer: asyncio.StreamWriter, - ) -> None: - try: - # Read and discard the request until the blank - # line — we don't care what the proxy sends. - while True: - line = await reader.readline() - if not line or line == b"\r\n": - break - # Chunked response with one partial chunk. - writer.write( - b"HTTP/1.1 200 OK\r\n" - b"Content-Type: application/octet-stream\r\n" - b"Transfer-Encoding: chunked\r\n" - b"Connection: close\r\n" - b"\r\n" - # One chunk, size 8, content "partial-". - b"8\r\n" - b"partial-\r\n" - # Deliberately DO NOT send the terminating - # "0\r\n\r\n" — this is the mid-stream - # truncation we're testing. - ) - await writer.drain() - finally: - # Hard-close the socket so the proxy's - # iter_any() sees an abrupt end-of-stream. - try: - writer.transport.abort() - except Exception: - pass - - self._server = await asyncio.start_server(handle_conn, "127.0.0.1", 0) - sockets = self._server.sockets - assert sockets is not None - self.port = sockets[0].getsockname()[1] - return f"http://127.0.0.1:{self.port}" - - async def stop(self) -> None: - if self._server is not None: - self._server.close() - await self._server.wait_closed() - self._server = None - - upstream = _TruncatingUpstream() - upstream_url = await upstream.start() - proxy = OpenRouterCompatProxy(target_base_url=upstream_url, request_timeout=5.0) - await proxy.start() - try: - async with aiohttp.ClientSession() as client: - client_error: Exception | None = None - try: - async with client.post( - f"{proxy.local_url}/v1/messages", - json={"model": "x"}, - timeout=aiohttp.ClientTimeout(total=10), - ) as resp: - # The client should see either an error raising - # here or a truncated body followed by a - # transport-level failure on read — both surface - # the truncation instead of silently reporting - # success. - await resp.read() - except ( - aiohttp.ClientPayloadError, - aiohttp.ClientConnectionError, - aiohttp.ServerDisconnectedError, - ) as e: - client_error = e - assert client_error is not None, ( - "Proxy silently consumed an upstream mid-stream " - "failure and returned a clean EOF to the client — " - "regression in the stream-error path." - ) - finally: - await proxy.stop() - await upstream.stop() - - -@pytest.mark.asyncio -async def test_proxy_local_url_raises_before_start(): - proxy = OpenRouterCompatProxy(target_base_url="http://example.com") - with pytest.raises(RuntimeError): - _ = proxy.local_url diff --git a/autogpt_platform/backend/backend/copilot/sdk/sdk_compat_test.py b/autogpt_platform/backend/backend/copilot/sdk/sdk_compat_test.py index 835bd82603..eba8c843c5 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/sdk_compat_test.py +++ b/autogpt_platform/backend/backend/copilot/sdk/sdk_compat_test.py @@ -202,22 +202,11 @@ def test_sdk_exports_hook_event_type(hook_event: str): # OpenRouter compatibility — bundled CLI version pin # --------------------------------------------------------------------------- # -# We're stuck on ``claude-agent-sdk==0.1.45`` (bundled CLI ``2.1.63``) -# because every version above introduces a 400 against OpenRouter: -# -# 1. CLI ``2.1.69`` (= SDK ``0.1.46``) shipped a `tool_reference` content -# block in `tool_result.content` that OpenRouter's stricter Zod -# validation rejects. See PR -# https://github.com/Significant-Gravitas/AutoGPT/pull/12294 for the -# forensic write-up that originally pinned us. CLI ``2.1.70`` added -# proxy detection that *should* disable the offending block, but two -# later attempts (Dependabot bumps to 0.1.55 / 0.1.56) still failed. -# -# 2. A second regression — the ``context-management-2025-06-27`` beta -# header — appeared in some CLI version after ``2.1.91``. Tracked -# upstream at -# https://github.com/anthropics/claude-agent-sdk-python/issues/789 -# (still open at the time of writing, no upstream PR yet). +# Newer ``claude-agent-sdk`` versions bundle CLI binaries that send +# features incompatible with OpenRouter (``tool_reference`` content +# blocks, ``context-management-2025-06-27`` beta). We neutralise these +# at runtime by injecting ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`` +# into the CLI subprocess env (see ``service.py``). # # This test is the cheapest possible regression guard: it pins the # bundled CLI to a known-good version. If anyone bumps @@ -225,89 +214,39 @@ def test_sdk_exports_hook_event_type(hook_event: str): # ``_cli_version.py`` will change and this test will fail with a clear # message that points the next person at the OpenRouter compat issue # instead of letting them silently re-break production. -# -# Workaround for actually upgrading: set the -# ``claude_agent_cli_path`` config option (or the matching env var) to -# point at a separately-installed Claude Code CLI binary at a known-good -# version, so the SDK Python API surface and the CLI binary version can -# be picked independently. -# CLI versions verified to work against OpenRouter directly (no compat -# proxy required) — bisected via the reproduction test in -# `cli_openrouter_compat_test.py`. Bundled CLI versions outside this -# set are still allowed but ONLY when the compat proxy is enabled (see -# the second known-good set below + the test below). -_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT: frozenset[str] = frozenset( +# CLI versions verified to work against OpenRouter when the +# ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`` env var is set -- +# bisected via the reproduction test in ``cli_openrouter_compat_test.py``. +_KNOWN_GOOD_BUNDLED_CLI_VERSIONS: frozenset[str] = frozenset( { - "2.1.63", # claude-agent-sdk 0.1.45 — original pin from PR #12294. - "2.1.70", # claude-agent-sdk 0.1.47 — first version with the + "2.1.63", # claude-agent-sdk 0.1.45 -- original pin from PR #12294. + "2.1.70", # claude-agent-sdk 0.1.47 -- first version with the # tool_reference proxy detection fix; bisect-verified # OpenRouter-safe in #12742. + "2.1.97", # claude-agent-sdk 0.1.58 -- works with the + # CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 env var. } ) -# CLI versions verified to work against OpenRouter ONLY when the -# in-process `openrouter_compat_proxy` is enabled (which strips the -# `tool_reference` content blocks and `context-management-2025-06-27` -# beta from outgoing requests). Without the proxy these CLI versions -# trip OpenRouter's stricter validation and return 400. -_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_VIA_PROXY: frozenset[str] = frozenset( - { - "2.1.97", # claude-agent-sdk 0.1.58 — needs `claude_agent_use_compat_proxy=True` - # due to the upstream regression in - # anthropics/claude-agent-sdk-python#789. - } -) - -# Aggregate set used by the assertion below — the test allows EITHER -# a directly-known-good CLI OR a proxy-known-good CLI when the proxy -# is enabled in the active config. -_KNOWN_GOOD_BUNDLED_CLI_VERSIONS: frozenset[str] = ( - _KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT | _KNOWN_GOOD_BUNDLED_CLI_VERSIONS_VIA_PROXY -) - def test_bundled_cli_version_is_known_good_against_openrouter(): """Pin the bundled CLI version so accidental SDK bumps cause a loud, fast failure with a pointer to the OpenRouter compatibility issue. - - A CLI version that's only safe via the compat proxy is allowed only - when ``ChatConfig.claude_agent_use_compat_proxy`` is enabled. """ from claude_agent_sdk._cli_version import __cli_version__ - from backend.copilot.config import ChatConfig - - cfg = ChatConfig() - proxy_enabled = cfg.claude_agent_use_compat_proxy - - if __cli_version__ in _KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT: - return # safe with or without the proxy - - if __cli_version__ in _KNOWN_GOOD_BUNDLED_CLI_VERSIONS_VIA_PROXY: - assert proxy_enabled, ( - f"Bundled Claude Code CLI version {__cli_version__!r} is only " - "OpenRouter-safe when `claude_agent_use_compat_proxy` is " - "enabled, but the active ChatConfig has the proxy disabled. " - "Either set `COPILOT__CLAUDE_AGENT_USE_COMPAT_PROXY=true` or " - "downgrade `claude-agent-sdk` to a version whose bundled CLI " - f"is in {sorted(_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT)!r}. " - "See https://github.com/anthropics/claude-agent-sdk-python/issues/789." - ) - return - - raise AssertionError( + assert __cli_version__ in _KNOWN_GOOD_BUNDLED_CLI_VERSIONS, ( f"Bundled Claude Code CLI version is {__cli_version__!r}, which is " - f"not in any OpenRouter-known-good set " + f"not in the OpenRouter-known-good set " f"({sorted(_KNOWN_GOOD_BUNDLED_CLI_VERSIONS)!r}). " "If you intentionally bumped `claude-agent-sdk`, verify the new " "bundled CLI works with OpenRouter against the reproduction test " - "in `cli_openrouter_compat_test.py`, then add the new CLI version " - "to either `_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT` (works " - "without the proxy) or `_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_VIA_PROXY` " - "(works only with `claude_agent_use_compat_proxy=true`). If you " - "cannot make the bundled CLI work either way, set " - "`claude_agent_cli_path` to a known-good binary instead. See " + "in `cli_openrouter_compat_test.py` (with " + "`CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`), then add the new " + "CLI version to `_KNOWN_GOOD_BUNDLED_CLI_VERSIONS`. If the env " + "var is not sufficient, set `claude_agent_cli_path` to a " + "known-good binary instead. See " "https://github.com/anthropics/claude-agent-sdk-python/issues/789 " "and https://github.com/Significant-Gravitas/AutoGPT/pull/12294." ) diff --git a/autogpt_platform/backend/backend/copilot/sdk/service.py b/autogpt_platform/backend/backend/copilot/sdk/service.py index 96dfb67e8e..d8b164091e 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/service.py +++ b/autogpt_platform/backend/backend/copilot/sdk/service.py @@ -1980,13 +1980,6 @@ async def stream_chat_completion_sdk( transcript_content: str = "" state: _RetryState | None = None - # OpenRouter compat proxy — started inside the try and stopped in finally - # when ``ChatConfig.claude_agent_use_compat_proxy`` is enabled. The proxy - # rewrites outgoing CLI requests to strip ``tool_reference`` content - # blocks and the ``context-management-2025-06-27`` beta so the latest - # SDK / CLI versions stop tripping OpenRouter's validation. - _compat_proxy: Any = None # OpenRouterCompatProxy | None — lazy import - # Token usage accumulators — populated from ResultMessage at end of turn turn_prompt_tokens = 0 # uncached input tokens only turn_completion_tokens = 0 @@ -2249,96 +2242,14 @@ async def stream_chat_completion_sdk( if sdk_model: sdk_options_kwargs["model"] = sdk_model - # OpenRouter compatibility proxy — started here so its local URL - # can be injected into the CLI subprocess env BEFORE the env dict - # is passed to ``ClaudeAgentOptions``. When this flag is on we - # transparently rewrite outgoing CLI requests via the proxy - # (stripping ``tool_reference`` blocks and the - # ``context-management-2025-06-27`` beta) so newer SDK / CLI - # versions can talk to OpenRouter without their stricter - # validation rejecting the request. - if config.claude_agent_use_compat_proxy: - # Only start the compat proxy when there's already an - # explicit Anthropic-compatible upstream to forward to. - # Otherwise we'd be silently routing direct Anthropic / - # Claude Code subscription sessions through OpenRouter, - # which would break auth and change providers without - # operator consent. The explicit upstream can come from: - # - # 1. ``sdk_env['ANTHROPIC_BASE_URL']`` — caller override; - # 2. the process env — lowest-precedence host override; - # 3. ``ChatConfig.openrouter_active`` — OpenRouter is - # configured as the session's routing provider (i.e. - # the only case in which falling back to - # ``OPENROUTER_BASE_URL`` is intentional). - # - # When none of the above hold, log a warning and leave - # the CLI to talk to Anthropic directly as usual — the - # feature is opt-in and documented as "OpenRouter - # compatibility", so quietly no-oping on direct-Anthropic - # sessions is the safe default. - # Claude Code subscription mode intentionally sets - # ``sdk_env['ANTHROPIC_BASE_URL'] = ""`` to *disable* any - # base-URL override and keep the CLI talking to Anthropic - # directly. Treat an explicit empty string as a hard - # "no-proxy" signal so we never silently start the proxy - # against a host-wide ``ANTHROPIC_BASE_URL`` or fall back - # to OpenRouter when the caller has opted out. - sdk_env_map = sdk_env or {} - explicit_sdk_env = "ANTHROPIC_BASE_URL" in sdk_env_map - sdk_env_value = ( - sdk_env_map["ANTHROPIC_BASE_URL"] if explicit_sdk_env else None - ) - if explicit_sdk_env and not sdk_env_value: - # Empty string from sdk_env → subscription mode opt-out. - target_base_url: str | None = None - explicit_opt_out = True - else: - target_base_url = sdk_env_value or os.environ.get("ANTHROPIC_BASE_URL") - explicit_opt_out = False - # Only fall back to OpenRouter when the session actually - # has no base-URL plumbing of its own AND OpenRouter is - # the active routing provider AND the caller hasn't - # explicitly opted out via an empty sdk_env override. - if ( - not target_base_url - and not explicit_opt_out - and config.openrouter_active - ): - from backend.util.clients import OPENROUTER_BASE_URL - - target_base_url = OPENROUTER_BASE_URL - - if target_base_url: - from backend.copilot.sdk.openrouter_compat_proxy import ( - OpenRouterCompatProxy, - ) - - _compat_proxy = OpenRouterCompatProxy(target_base_url=target_base_url) - await _compat_proxy.start() - # Inject the proxy URL into the SDK env so the spawned - # CLI subprocess uses the proxy as its Anthropic - # endpoint. - if sdk_env is None: - sdk_env = {} - sdk_env["ANTHROPIC_BASE_URL"] = _compat_proxy.local_url - # Log only the local bind URL — upstream is redacted - # to match the taint-analysis guidance applied in - # ``openrouter_compat_proxy.start``. - logger.info( - "%s OpenRouter compat proxy active (listening on %s)", - log_prefix, - _compat_proxy.local_url, - ) - else: - logger.warning( - "%s claude_agent_use_compat_proxy is enabled but no " - "Anthropic-compatible upstream is configured for this " - "session (no ANTHROPIC_BASE_URL override and " - "openrouter_active is False); skipping proxy startup " - "so the CLI keeps talking to Anthropic directly.", - log_prefix, - ) + # Tell the CLI to strip experimental betas (e.g. + # ``context-management-2025-06-27``) and ``tool_reference`` + # content blocks so newer SDK / CLI versions work with + # OpenRouter's stricter validation. This single env var + # replaces the old in-process compat proxy. + if sdk_env is None: + sdk_env = {} + sdk_env["CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS"] = "1" if sdk_env: sdk_options_kwargs["env"] = sdk_env @@ -3012,18 +2923,5 @@ async def stream_chat_completion_sdk( except Exception: logger.warning("%s SDK cleanup failed", log_prefix, exc_info=True) finally: - # Tear down the OpenRouter compat proxy if it was started for - # this session — releases the bound port and the aiohttp - # client. Wrapped so a stop failure can never block the - # downstream lock release. - if _compat_proxy is not None: - try: - await _compat_proxy.stop() - except Exception: - logger.warning( - "%s OpenRouter compat proxy stop failed", - log_prefix, - exc_info=True, - ) # Release stream lock to allow new streams for this session await lock.release() diff --git a/autogpt_platform/backend/pyproject.toml b/autogpt_platform/backend/pyproject.toml index 08b1d5f1bc..ea81390d81 100644 --- a/autogpt_platform/backend/pyproject.toml +++ b/autogpt_platform/backend/pyproject.toml @@ -18,7 +18,7 @@ apscheduler = "^3.11.1" autogpt-libs = { path = "../autogpt_libs", develop = true } bleach = { extras = ["css"], version = "^6.2.0" } cachetools = "^5.5.0" -claude-agent-sdk = "0.1.58" # latest stable; bundled CLI 2.1.97 ships the broken context-management beta and REQUIRES the openrouter_compat_proxy. See sdk_compat_test.py. +claude-agent-sdk = "0.1.58" # latest stable; bundled CLI 2.1.97 -- CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 env var strips the broken context-management beta. See sdk_compat_test.py. click = "^8.2.0" cryptography = "^46.0" discord-py = "^2.5.2"