refactor(backend): replace compat proxy with CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS env var

This commit is contained in:
majdyz
2026-04-12 10:53:07 +00:00
parent e92ecbbb7c
commit 8e9bb083b2
8 changed files with 46 additions and 1645 deletions

View File

@@ -3,7 +3,7 @@
import os
from typing import Literal
from pydantic import Field, field_validator, model_validator
from pydantic import Field, field_validator
from pydantic_settings import BaseSettings
from backend.util.clients import OPENROUTER_BASE_URL
@@ -186,28 +186,6 @@ class ChatConfig(BaseSettings):
"or the unprefixed `CLAUDE_AGENT_CLI_PATH` environment variable "
"(same pattern as `api_key` / `base_url`).",
)
claude_agent_use_compat_proxy: bool = Field(
default=True,
description="Run the in-process OpenRouter compatibility proxy "
"(`backend.copilot.sdk.openrouter_compat_proxy`) in front of the "
"Claude Code CLI. The proxy strips `tool_reference` content "
"blocks and the `context-management-2025-06-27` beta header / "
"field from outgoing requests so newer SDK / CLI versions stop "
"tripping OpenRouter's stricter validation. Defaults to True "
"because the bundled CLI in `claude-agent-sdk >= 0.1.55` requires "
"the proxy. Orthogonal to `claude_agent_cli_path` — the override "
"picks the binary, the proxy rewrites whatever the binary sends. "
"Disable explicitly only if you've pinned `claude-agent-sdk` to "
"a version whose bundled CLI is in "
"`_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT` (2.1.63 or 2.1.70). "
"Reads from `CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY` or the "
"unprefixed `CLAUDE_AGENT_USE_COMPAT_PROXY` environment "
"variable (same pattern as `claude_agent_cli_path`). Only "
"takes effect when the session has an Anthropic-compatible "
"upstream to forward to — direct-Anthropic sessions skip the "
"proxy entirely to avoid silently re-routing through "
"OpenRouter.",
)
use_openrouter: bool = Field(
default=True,
description="Enable routing API calls through the OpenRouter proxy. "
@@ -355,37 +333,6 @@ class ChatConfig(BaseSettings):
)
return v
@model_validator(mode="before")
@classmethod
def _inject_unprefixed_compat_proxy_env(cls, values):
"""Inject the unprefixed ``CLAUDE_AGENT_USE_COMPAT_PROXY`` env var
as a fallback for the ``claude_agent_use_compat_proxy`` field.
Unlike ``claude_agent_cli_path`` (which defaults to ``None`` and
can use a simple ``if not v`` guard), this field defaults to
``True``, so a ``mode="before"`` field validator cannot
distinguish "caller passed ``False`` explicitly" from "Pydantic
resolved the default ``True``" — both arrive as the raw value.
Using a ``model_validator(mode="before")`` lets us inspect the
full input dict: if the key is absent AND the prefixed env var
``CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY`` is not set, we inject the
unprefixed value so Pydantic can coerce it (``"1"``/``"true"``
→ ``True``). Explicit kwargs always take precedence because
they appear in *values* before this validator runs.
"""
if not isinstance(values, dict):
return values
key = "claude_agent_use_compat_proxy"
if key not in values:
# No explicit kwarg and Pydantic hasn't injected the
# prefixed env var yet — check the unprefixed form.
if os.getenv("CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY") is None:
unprefixed = os.getenv("CLAUDE_AGENT_USE_COMPAT_PROXY")
if unprefixed is not None:
values[key] = unprefixed
return values
# Prompt paths for different contexts
PROMPT_PATHS: dict[str, str] = {
"default": "prompts/chat_system.md",

View File

@@ -19,8 +19,6 @@ _ENV_VARS_TO_CLEAR = (
"OPENAI_BASE_URL",
"CHAT_CLAUDE_AGENT_CLI_PATH",
"CLAUDE_AGENT_CLI_PATH",
"CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY",
"CLAUDE_AGENT_USE_COMPAT_PROXY",
)
@@ -124,63 +122,3 @@ class TestClaudeAgentCliPathEnvFallback:
def test_no_env_var_defaults_to_none(self, monkeypatch: pytest.MonkeyPatch) -> None:
cfg = ChatConfig()
assert cfg.claude_agent_cli_path is None
class TestClaudeAgentUseCompatProxyEnvFallback:
"""``claude_agent_use_compat_proxy`` accepts both the Pydantic-
prefixed ``CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY`` env var and the
unprefixed ``CLAUDE_AGENT_USE_COMPAT_PROXY`` form. Regression
guard for the bool-default pitfall: the field has a non-None
default (``True``), so Pydantic passes the default into the
validator when no value is provided and a naive ``if v is None``
check would never fire.
"""
def test_prefixed_env_var_enables_proxy(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
monkeypatch.setenv("CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY", "true")
cfg = ChatConfig()
assert cfg.claude_agent_use_compat_proxy is True
def test_unprefixed_env_var_enables_proxy(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
monkeypatch.setenv("CLAUDE_AGENT_USE_COMPAT_PROXY", "true")
cfg = ChatConfig()
assert cfg.claude_agent_use_compat_proxy is True
def test_unprefixed_env_var_respects_falsy_value(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
monkeypatch.setenv("CLAUDE_AGENT_USE_COMPAT_PROXY", "false")
cfg = ChatConfig()
assert cfg.claude_agent_use_compat_proxy is False
def test_prefixed_wins_over_unprefixed(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
"""When both are set, the Pydantic-prefixed var is authoritative
so the validator doesn't silently clobber an explicit
``CHAT_...=false`` with an unprefixed ``=true``."""
monkeypatch.setenv("CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY", "false")
monkeypatch.setenv("CLAUDE_AGENT_USE_COMPAT_PROXY", "true")
cfg = ChatConfig()
assert cfg.claude_agent_use_compat_proxy is False
def test_no_env_var_uses_field_default(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
cfg = ChatConfig()
# Dev-preview branch defaults compat_proxy to True (the
# bundled CLI in claude-agent-sdk 0.1.58 needs the proxy).
assert cfg.claude_agent_use_compat_proxy is True
def test_explicit_kwarg_not_overridden_by_unprefixed_env(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Regression: explicit ChatConfig(claude_agent_use_compat_proxy=False)
must not be overridden by the unprefixed env var."""
monkeypatch.setenv("CLAUDE_AGENT_USE_COMPAT_PROXY", "true")
cfg = ChatConfig(claude_agent_use_compat_proxy=False)
assert cfg.claude_agent_use_compat_proxy is False

View File

@@ -392,17 +392,10 @@ async def _run_cli_against_fake_server(
async def _run_reproduction(
*,
route_through_proxy: bool,
extra_env: dict[str, str] | None = None,
) -> tuple[int, str, str, list[_CapturedRequest]]:
"""Spawn the CLI against a fake Anthropic API and return what the
*upstream* (post-proxy if any) saw.
When ``route_through_proxy`` is True, the CLI talks to the
``OpenRouterCompatProxy`` and the proxy forwards to the fake
upstream. The fake upstream is what records the requests, so the
captured bodies are what OpenRouter would actually have received —
*after* the proxy's stripping pass.
server saw.
"""
cli_path = _resolve_cli_path()
if cli_path is None or not cli_path.is_file():
@@ -415,30 +408,14 @@ async def _run_reproduction(
captured: list[_CapturedRequest] = []
upstream_runner, upstream_port = await _start_fake_anthropic_server(captured)
proxy = None
target_port = upstream_port
try:
if route_through_proxy:
from backend.copilot.sdk.openrouter_compat_proxy import (
OpenRouterCompatProxy,
)
proxy = OpenRouterCompatProxy(
target_base_url=f"http://127.0.0.1:{upstream_port}"
)
await proxy.start()
# Pull the bound port out of the proxy URL.
target_port = int(proxy.local_url.rsplit(":", 1)[1])
returncode, stdout, stderr = await _run_cli_against_fake_server(
cli_path=cli_path,
fake_server_port=target_port,
fake_server_port=upstream_port,
timeout_seconds=30.0,
extra_env=extra_env,
)
finally:
if proxy is not None:
await proxy.stop()
await upstream_runner.cleanup()
return returncode, stdout, stderr, captured
@@ -470,10 +447,9 @@ def _assert_no_forbidden_patterns(
"`claude-agent-sdk` above 0.1.45. See "
"https://github.com/Significant-Gravitas/AutoGPT/pull/12294 and "
"https://github.com/anthropics/claude-agent-sdk-python/issues/789. "
"If you intended to upgrade, you must enable the in-process compat "
"proxy (`CLAUDE_AGENT_USE_COMPAT_PROXY=true` or the prefixed "
"`CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY=true`) or use a known-good "
"CLI binary via `claude_agent_cli_path` (env: "
"If you intended to upgrade, ensure "
"`CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1` is set in the SDK env "
"or use a known-good CLI binary via `claude_agent_cli_path` (env: "
"`CLAUDE_AGENT_CLI_PATH` or `CHAT_CLAUDE_AGENT_CLI_PATH`)."
)
@@ -483,74 +459,31 @@ async def test_cli_does_not_send_openrouter_incompatible_features():
"""End-to-end OpenRouter compatibility reproduction (bare CLI path).
Spawns the bundled (or overridden) Claude Code CLI against a fake
Anthropic API server WITHOUT the compat proxy in the loop, captures
every request body it sends, and asserts that none of them contain
the two known OpenRouter-breaking features.
Anthropic API server, captures every request body it sends, and
asserts that none of them contain the two known OpenRouter-breaking
features.
On a clean SDK pin (0.1.45 or 0.1.47, bundled CLI 2.1.63 or 2.1.70)
this passes naturally. On a broken pin (0.1.55+, bundled CLI 2.1.91+)
it fails — that failure IS the bisect signal we use to verify which
SDK versions need the workaround.
Skipped when ``claude_agent_use_compat_proxy=True`` because in that
configuration the operator has explicitly opted into the workaround
and the bare-CLI behaviour is moot — what matters is that the
*upstream* (post-proxy) sees clean requests, which is covered by
``test_cli_via_compat_proxy_emits_clean_requests_to_upstream``.
"""
from backend.copilot.config import ChatConfig
if ChatConfig().claude_agent_use_compat_proxy:
pytest.skip(
"Compat proxy is enabled in the active config — the bare-CLI "
"reproduction is not a meaningful signal here. The proxy-routed "
"variant `test_cli_via_compat_proxy_emits_clean_requests_to_upstream` "
"is the regression guard for this configuration."
)
returncode, _stdout, stderr, captured = await _run_reproduction(
route_through_proxy=False
)
_assert_no_forbidden_patterns(captured, returncode, stderr)
@pytest.mark.asyncio
async def test_cli_via_compat_proxy_emits_clean_requests_to_upstream():
"""End-to-end test for the compat proxy workaround.
Spawns the bundled CLI against an in-process fake Anthropic API
server WITH the ``OpenRouterCompatProxy`` in front, then asserts
that the *upstream* sees clean requests — no `tool_reference`
blocks, no `context-management-2025-06-27` beta header — even
when the bundled CLI itself would have sent them.
This is the regression guard for the proxy: if the proxy ever
stops stripping a known forbidden pattern, this test catches it.
On a SDK version where the bare CLI is already clean (0.1.45 /
0.1.47), the proxy is a no-op and the test passes trivially.
On a SDK version with the regression (0.1.55+), the test fails
if and only if the proxy fails to strip the pattern.
"""
returncode, _stdout, stderr, captured = await _run_reproduction(
route_through_proxy=True
)
returncode, _stdout, stderr, captured = await _run_reproduction()
_assert_no_forbidden_patterns(captured, returncode, stderr)
@pytest.mark.asyncio
async def test_disable_experimental_betas_env_var_strips_headers():
"""Validate whether ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`` is
sufficient to strip the ``context-management-2025-06-27`` beta header
when ``ANTHROPIC_BASE_URL`` points to a non-Anthropic endpoint
(simulating OpenRouter).
"""Validate that ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`` strips
the ``context-management-2025-06-27`` beta header when
``ANTHROPIC_BASE_URL`` points to a non-Anthropic endpoint (simulating
OpenRouter).
If this test passes, the compat proxy is unnecessary and can be
removed — the env var alone is enough. If it fails, the CLI's
provider-detection logic does not honour the env var for custom
base URLs and the proxy remains required.
This is the main regression guard: the env var is injected by
``service.py`` into every CLI subprocess so newer SDK / CLI versions
work with OpenRouter without any proxy.
"""
returncode, _stdout, stderr, captured = await _run_reproduction(
route_through_proxy=False,
extra_env={"CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS": "1"},
)
_assert_no_forbidden_patterns(captured, returncode, stderr)

View File

@@ -1,559 +0,0 @@
"""Tiny in-process HTTP middleware that makes the Claude Code CLI work
against OpenRouter on **any** ``claude-agent-sdk`` version.
Background
----------
We've been pinned at ``claude-agent-sdk==0.1.45`` (bundled CLI 2.1.63)
since `PR #12294`_ because every newer CLI version sends one of two
features that OpenRouter rejects:
1. **`tool_reference` content blocks** in ``tool_result.content`` —
introduced in CLI 2.1.69. OpenRouter's stricter Zod validation
refuses requests containing them with::
messages[N].content[0].content: Invalid input: expected string, received array
2. **`context-management-2025-06-27` beta header** — sent in either the
request body's ``betas`` array or the ``anthropic-beta`` HTTP header.
OpenRouter responds::
400 No endpoints available that support Anthropic's context
management features (context-management-2025-06-27).
Tracked upstream at `claude-agent-sdk-python#789`_.
This module starts a tiny aiohttp server that:
* listens on ``127.0.0.1:RANDOM_PORT``,
* receives every CLI request that would normally go to
``ANTHROPIC_BASE_URL``,
* strips the two forbidden patterns from the body and headers,
* forwards the cleaned request to the real upstream
(``proxy_target_base_url``, e.g. ``https://openrouter.ai/api/v1``),
* streams the response back to the CLI unchanged.
The proxy is wired via :class:`backend.copilot.config.ChatConfig.claude_agent_use_compat_proxy`.
When the flag is on, :mod:`backend.copilot.sdk.service` starts a proxy
per session, sets ``ANTHROPIC_BASE_URL`` in the SDK's ``env`` to point
at the proxy, then tears it down after the session ends.
Why a separate proxy instead of a custom HTTP transport in the SDK?
-------------------------------------------------------------------
The Python SDK delegates **all** HTTP traffic to the bundled Claude
Code CLI subprocess. Once the CLI is spawned, the only seam left is
the network — there is no in-process hook for "modify outgoing
request before it leaves the CLI". The proxy lives at that seam.
This module is intentionally orthogonal to the
:attr:`ChatConfig.claude_agent_cli_path` override:
* ``cli_path`` lets us swap **which CLI binary** we run.
* this proxy lets us **rewrite what any CLI binary sends**.
The two can be combined or used independently.
.. _PR #12294: https://github.com/Significant-Gravitas/AutoGPT/pull/12294
.. _claude-agent-sdk-python#789: https://github.com/anthropics/claude-agent-sdk-python/issues/789
"""
from __future__ import annotations
import asyncio
import json
import logging
from typing import Any
import aiohttp
from aiohttp import web
logger = logging.getLogger(__name__)
# Header values OpenRouter rejects. We strip exactly these tokens from
# the comma-separated ``anthropic-beta`` header value (preserving any
# other betas the CLI requests).
_FORBIDDEN_BETA_TOKENS: frozenset[str] = frozenset(
{
"context-management-2025-06-27",
}
)
# Hop-by-hop headers we must NOT forward through the proxy. Per
# RFC 7230 §6.1, these are connection-specific and must be regenerated
# by each intermediary. ``host`` is also stripped because aiohttp
# generates the correct ``Host`` header for the upstream URL itself.
#
# The canonical header name defined in RFC 7230 §4.4 is ``Trailer``
# (singular); some SDKs / legacy proxies also emit the plural
# ``Trailers`` so we accept both forms just in case. Intermediaries
# must additionally drop every header name listed in the incoming
# ``Connection`` field value (§6.1 "extension hop-by-hop headers") —
# that's handled dynamically by :func:`clean_request_headers`.
_HOP_BY_HOP_HEADERS: frozenset[str] = frozenset(
{
"connection",
"keep-alive",
"proxy-authenticate",
"proxy-authorization",
"te",
"trailer",
"trailers",
"transfer-encoding",
"upgrade",
"host",
# ``content-length`` is stripped because we may rewrite the
# body — aiohttp will recompute it on the upstream request.
"content-length",
}
)
# ---------------------------------------------------------------------------
# Pure helpers — exported so the unit tests can drive them directly without
# spinning up a server.
# ---------------------------------------------------------------------------
def strip_tool_reference_blocks(payload: Any) -> Any:
"""Recursively remove ``tool_reference`` content blocks from
*payload*, returning the cleaned structure.
The CLI's built-in ``ToolSearch`` tool emits these as part of
``tool_result.content``::
{"type": "tool_reference", "tool_name": "mcp__copilot__find_block"}
OpenRouter's stricter Zod validation rejects them. Removing them
is safe — they are metadata about which tools were searched, not
real model-visible content. The CLI's *internal* state still
contains them; only the wire format is rewritten.
"""
if isinstance(payload, dict):
# Drop the dict entirely if it IS a tool_reference block. The
# caller (a list comprehension below) discards None entries so
# we can return None to signal "remove me".
if payload.get("type") == "tool_reference":
return None
cleaned_dict: dict[str, Any] = {}
for key, value in payload.items():
cleaned_value = strip_tool_reference_blocks(value)
# If a dict-valued child WAS a tool_reference block,
# drop the key entirely rather than writing `null` —
# otherwise schema-strict upstreams still reject the
# payload. Only applies when the original value was a
# dict; genuine None values in the input are preserved.
if cleaned_value is None and isinstance(value, dict):
continue
cleaned_dict[key] = cleaned_value
return cleaned_dict
if isinstance(payload, list):
cleaned_list: list[Any] = []
for item in payload:
cleaned_item = strip_tool_reference_blocks(item)
if cleaned_item is None and isinstance(item, dict):
# Item was a tool_reference block — drop it from the
# list rather than leaving a None hole.
continue
cleaned_list.append(cleaned_item)
return cleaned_list
return payload
def strip_forbidden_betas_from_body(payload: Any) -> Any:
"""Remove forbidden tokens from the ``betas`` array of an
Anthropic Messages API request body, if present.
Returns a shallow copy with the ``betas`` key cleaned — the input
dict is never mutated.
The Messages API accepts a top-level ``betas: list[str]`` parameter
used to opt into beta features. We drop tokens in
:data:`_FORBIDDEN_BETA_TOKENS` so OpenRouter's check passes.
"""
if not isinstance(payload, dict):
return payload
betas = payload.get("betas")
if not isinstance(betas, list):
return payload
cleaned_betas = [b for b in betas if b not in _FORBIDDEN_BETA_TOKENS]
result = {k: v for k, v in payload.items() if k != "betas"}
if cleaned_betas:
result["betas"] = cleaned_betas
return result
def strip_forbidden_anthropic_beta_header(value: str | None) -> str | None:
"""Return *value* with forbidden tokens removed.
The ``anthropic-beta`` HTTP header is a comma-separated list of
feature flags. We strip exactly the forbidden tokens, preserving
any others. Returns ``None`` if nothing remains (so the caller
can drop the header entirely).
"""
if not value:
return value
tokens = [token.strip() for token in value.split(",")]
kept = [token for token in tokens if token and token not in _FORBIDDEN_BETA_TOKENS]
if not kept:
return None
return ", ".join(kept)
def clean_request_body_bytes(body_bytes: bytes) -> bytes:
"""Apply both body-level strippers to *body_bytes*, returning the
cleaned JSON. Falls back to the original bytes when the body
isn't valid JSON (the CLI shouldn't be sending non-JSON to the
Messages API, but be defensive)."""
if not body_bytes:
return body_bytes
try:
payload = json.loads(body_bytes.decode("utf-8"))
except (UnicodeDecodeError, json.JSONDecodeError):
return body_bytes
payload = strip_tool_reference_blocks(payload)
payload = strip_forbidden_betas_from_body(payload)
return json.dumps(payload, separators=(",", ":")).encode("utf-8")
def _parse_connection_tokens(headers: dict[str, str]) -> set[str]:
"""Extract hop-by-hop header names from the ``Connection`` field."""
connection_header = next(
(value for name, value in headers.items() if name.lower() == "connection"),
"",
)
return {
token.strip().lower() for token in connection_header.split(",") if token.strip()
}
def clean_request_headers(headers: dict[str, str]) -> dict[str, str]:
"""Drop hop-by-hop headers and rewrite ``anthropic-beta`` to remove
forbidden tokens. Returns a fresh dict the caller can pass through
to the upstream client without further mutation.
Per RFC 7230 section 6.1, intermediaries must drop the static hop-by-hop
set above **and** every header name listed in the incoming
``Connection`` field value (case-insensitive). The latter is how
extension hop-by-hop headers are signalled per-connection.
Callers should pass an already-materialised ``dict`` (e.g.
``dict(request.headers)``) so this function stays simple.
"""
connection_tokens = _parse_connection_tokens(headers)
cleaned: dict[str, str] = {}
for name, value in headers.items():
lower_name = name.lower()
if lower_name in _HOP_BY_HOP_HEADERS or lower_name in connection_tokens:
continue
if lower_name == "anthropic-beta":
stripped = strip_forbidden_anthropic_beta_header(value)
if stripped is None:
continue
cleaned[name] = stripped
continue
cleaned[name] = value
return cleaned
def clean_response_headers(
headers: "Any",
) -> list[tuple[str, str]]:
"""Like :func:`clean_request_headers` but preserves multi-valued
headers (e.g. ``Set-Cookie``). Accepts any mapping-like object
whose ``.items()`` yields ``(name, value)`` pairs — including
aiohttp's ``CIMultiDictProxy`` which can have duplicate keys.
Returns a list of ``(name, value)`` tuples suitable for passing
to ``web.StreamResponse(headers=...)`` via ``CIMultiDict``.
"""
connection_tokens: set[str] = set()
for name, value in headers.items():
if name.lower() == "connection":
connection_tokens = {
t.strip().lower() for t in value.split(",") if t.strip()
}
break
cleaned: list[tuple[str, str]] = []
for name, value in headers.items():
lower_name = name.lower()
if lower_name in _HOP_BY_HOP_HEADERS or lower_name in connection_tokens:
continue
if lower_name == "anthropic-beta":
stripped = strip_forbidden_anthropic_beta_header(value)
if stripped is None:
continue
cleaned.append((name, stripped))
continue
cleaned.append((name, value))
return cleaned
# ---------------------------------------------------------------------------
# The proxy server
# ---------------------------------------------------------------------------
class OpenRouterCompatProxy:
"""In-process HTTP proxy that rewrites Claude Code CLI requests on
the way to OpenRouter (or any other Anthropic-compatible gateway).
Usage::
proxy = OpenRouterCompatProxy(target_base_url="https://openrouter.ai/api/v1")
await proxy.start()
try:
# Spawn the CLI with ANTHROPIC_BASE_URL=proxy.local_url
...
finally:
await proxy.stop()
"""
def __init__(
self,
target_base_url: str,
*,
bind_host: str = "127.0.0.1",
request_timeout: float = 600.0,
) -> None:
self._target_base_url = target_base_url.rstrip("/")
self._bind_host = bind_host
self._request_timeout = request_timeout
self._runner: web.AppRunner | None = None
self._client: aiohttp.ClientSession | None = None
self._port: int | None = None
@property
def local_url(self) -> str:
"""The ``http://host:port`` URL that the CLI should use as
``ANTHROPIC_BASE_URL``. Raises if :meth:`start` has not been
called yet."""
if self._port is None:
raise RuntimeError("Proxy is not running — call start() first.")
return f"http://{self._bind_host}:{self._port}"
@property
def target_base_url(self) -> str:
"""The upstream URL the proxy is forwarding to."""
return self._target_base_url
async def start(self) -> None:
"""Bind to a random local port and start serving.
Cleans up the ``ClientSession`` and the ``AppRunner`` on any
failure during setup so a partially-initialised proxy never
leaves resources dangling (covers the
``runner.setup() / site.start()`` raise paths in addition to
the explicit bind-failure branches below).
"""
if self._runner is not None:
return # already started
# Use sock_connect + sock_read instead of total so long-lived
# SSE / streaming responses aren't killed after request_timeout.
# total=None means no cumulative limit; sock_read is the per-chunk
# idle timeout (time between data arriving on the socket).
client = aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(
total=None,
sock_connect=self._request_timeout,
sock_read=self._request_timeout,
)
)
app = web.Application()
# Catch every method + path so we can also forward GETs
# (the CLI may probe profile / model endpoints).
app.router.add_route("*", "/{tail:.*}", self._handle)
runner = web.AppRunner(app)
runner_setup = False
try:
await runner.setup()
runner_setup = True
site = web.TCPSite(runner, self._bind_host, 0)
await site.start()
server = site._server
if server is None:
raise RuntimeError("Failed to bind compat proxy server.")
sockets = getattr(server, "sockets", None)
if not sockets:
raise RuntimeError("Compat proxy server has no listening sockets.")
self._port = sockets[0].getsockname()[1]
except BaseException:
# Best-effort teardown — swallow secondary errors so the
# caller sees the original exception.
if runner_setup:
try:
await runner.cleanup()
except Exception: # pragma: no cover - cleanup-only path
logger.exception("compat proxy runner cleanup failed")
try:
await client.close()
except Exception: # pragma: no cover - cleanup-only path
logger.exception("compat proxy client close failed")
raise
# Only publish the attributes after everything is wired up so
# ``stop()`` and ``local_url`` observe a consistent state.
self._client = client
self._runner = runner
# Deliberately log only the local bind port — never the
# upstream URL or any derived component. CodeQL's
# `py/clear-text-logging-sensitive-data` taint analysis traces
# everything that originates from a config-supplied URL as
# potentially-sensitive even after parsing, and the upstream
# endpoint is anyway discoverable from the config the operator
# already has access to. The detailed upstream is exposed via
# the ``target_base_url`` property for callers that need it.
logger.info(
"OpenRouter compat proxy listening on %s:%d",
self._bind_host,
self._port,
)
async def stop(self) -> None:
"""Stop accepting connections and release the port."""
if self._runner is not None:
await self._runner.cleanup()
self._runner = None
if self._client is not None:
await self._client.close()
self._client = None
self._port = None
async def __aenter__(self) -> "OpenRouterCompatProxy":
await self.start()
return self
async def __aexit__(self, exc_type, exc, tb) -> None:
await self.stop()
async def _handle(self, request: web.Request) -> web.StreamResponse:
"""Forward *request* to the upstream after stripping forbidden
features. Streams the upstream response back to the caller
chunk-by-chunk so SSE / streamed responses work."""
if self._client is None:
raise web.HTTPInternalServerError(reason="proxy client missing")
# Build the upstream URL. ``request.path_qs`` includes the
# query string verbatim. ``request.path`` for ``/v1/messages``
# is just ``/v1/messages`` — we strip a leading slash and
# concat with the target base URL.
upstream_path = request.path_qs
if not upstream_path.startswith("/"):
upstream_path = "/" + upstream_path
# Allow the target_base_url to itself contain a path (e.g.
# ``https://openrouter.ai/api/v1``). In that case requests to
# ``/v1/messages`` need to become ``/api/v1/messages``, not
# ``/api/v1/v1/messages``. Strip a leading ``/v1`` from the
# incoming path if the target already ends with ``/v1`` (or
# similar API-version segment).
# Deduplicate API version prefix: if the target URL already
# contains a versioned path segment (e.g. ``/api/v1``) and the
# incoming request path starts with the same segment, strip it
# to avoid ``/api/v1/v1/messages``.
from urllib.parse import urlparse
target_base = self._target_base_url
target_path = urlparse(target_base).path.rstrip("/")
if target_path and upstream_path.startswith(target_path + "/"):
upstream_path = upstream_path[len(target_path) :]
elif target_path and upstream_path == target_path:
upstream_path = "/"
upstream_url = f"{target_base}{upstream_path}"
body_bytes = await request.read()
cleaned_body = clean_request_body_bytes(body_bytes)
cleaned_headers = clean_request_headers(dict(request.headers))
try:
upstream_response = await self._client.request(
method=request.method,
url=upstream_url,
data=cleaned_body if cleaned_body else None,
headers=cleaned_headers,
allow_redirects=False,
)
except (aiohttp.ClientError, asyncio.TimeoutError) as e:
# ``aiohttp.ClientTimeout`` raises ``asyncio.TimeoutError``
# (not ``aiohttp.ClientError``) on hung upstreams, so both
# must be caught here to surface the explicit 502 failure
# mode this proxy guarantees.
#
# Log the detailed error for ops, but return a generic
# message to the caller — exception strings can leak
# internal hostnames, ports, or stack frames (CodeQL
# `py/stack-trace-exposure`).
logger.warning(
"OpenRouter compat proxy upstream error: %s", type(e).__name__
)
return web.Response(status=502, text="upstream error")
# Stream the response back unchanged (apart from hop-by-hop
# header filtering). Use clean_response_headers to preserve
# multi-valued headers like Set-Cookie that dict() would drop.
from multidict import CIMultiDict
downstream = web.StreamResponse(
status=upstream_response.status,
headers=CIMultiDict(clean_response_headers(upstream_response.headers)),
)
await downstream.prepare(request)
# Track whether the stream terminated cleanly. A mid-stream
# ``aiohttp.ClientError`` means the upstream died before
# finishing; calling ``write_eof()`` on that partial response
# would signal "complete stream" to the downstream client and
# silently corrupt the body. Skip the EOF on the error path
# so the client's connection is dropped instead, surfacing the
# failure correctly.
cancelled = False
stream_error: aiohttp.ClientError | None = None
try:
async for chunk in upstream_response.content.iter_any():
await downstream.write(chunk)
except asyncio.CancelledError:
# Never suppress cancellation — since Python 3.8 it's a
# ``BaseException`` subclass precisely so catching
# ``Exception`` won't accidentally swallow it. Release
# the upstream body and re-raise so the asyncio task
# cooperatively unwinds (avoids hanging shutdowns /
# stuck request handlers).
cancelled = True
upstream_response.release()
raise
except aiohttp.ClientError as e:
stream_error = e
logger.warning(
"OpenRouter compat proxy stream interrupted: %s", type(e).__name__
)
finally:
if not cancelled:
upstream_response.release()
if stream_error is not None:
# Do NOT call ``write_eof`` or return the prepared
# ``downstream`` here — aiohttp finalises a returned
# StreamResponse (writing the terminating chunk /
# content-length / EOF) even if we skipped ``write_eof``
# ourselves, which would signal a clean end of stream to
# the client on top of the truncated body. Instead abort
# the underlying transport directly so the client's
# parser surfaces a ``ClientPayloadError`` /
# ``ServerDisconnectedError`` and the caller can retry /
# surface the failure instead of silently consuming a
# corrupt body.
try:
downstream.force_close()
except Exception: # pragma: no cover - defensive on transport
pass
transport = request.transport
if transport is not None:
try:
transport.abort()
except Exception: # pragma: no cover - defensive on transport
pass
# Re-raise the original stream error so aiohttp treats
# this handler as having failed; the transport is
# already aborted above so the client sees an abrupt
# disconnect either way.
raise stream_error
await downstream.write_eof()
return downstream

View File

@@ -1,695 +0,0 @@
"""Tests for the OpenRouter compatibility proxy.
The proxy strips two known forbidden patterns from requests so newer
``claude-agent-sdk`` / Claude Code CLI versions can talk to OpenRouter
through the unchanged transport. These tests cover both:
* the pure stripping helpers (deterministic, no I/O), and
* the end-to-end proxy behaviour against a fake upstream server, so we
catch hop-by-hop header bugs and streaming regressions.
See ``openrouter_compat_proxy.py`` for the rationale and the upstream
issues being worked around.
"""
from __future__ import annotations
import asyncio
import json
from typing import Any
import aiohttp
import pytest
from aiohttp import web
from backend.copilot.sdk.openrouter_compat_proxy import (
_FORBIDDEN_BETA_TOKENS,
_HOP_BY_HOP_HEADERS,
OpenRouterCompatProxy,
clean_request_body_bytes,
clean_request_headers,
strip_forbidden_anthropic_beta_header,
strip_forbidden_betas_from_body,
strip_tool_reference_blocks,
)
# ---------------------------------------------------------------------------
# strip_tool_reference_blocks
# ---------------------------------------------------------------------------
class TestStripToolReferenceBlocks:
"""The CLI's built-in ToolSearch tool emits ``tool_reference``
content blocks in ``tool_result.content``. OpenRouter's stricter
Zod validation rejects them. We drop them entirely — they're
metadata about which tools were searched, not real model-visible
content."""
def test_removes_tool_reference_block_at_top_level(self):
block = {"type": "tool_reference", "tool_name": "find_block"}
assert strip_tool_reference_blocks(block) is None
def test_removes_tool_reference_block_from_list(self):
blocks = [
{"type": "text", "text": "hello"},
{"type": "tool_reference", "tool_name": "find_block"},
{"type": "text", "text": "world"},
]
assert strip_tool_reference_blocks(blocks) == [
{"type": "text", "text": "hello"},
{"type": "text", "text": "world"},
]
def test_strips_nested_tool_reference_inside_tool_result(self):
# The exact shape PR #12294 root-caused: tool_result.content
# contains the tool_reference block.
request = {
"messages": [
{
"role": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": "tu_1",
"content": [
{"type": "text", "text": "result text"},
{
"type": "tool_reference",
"tool_name": "mcp__copilot__find_block",
},
],
}
],
}
]
}
cleaned = strip_tool_reference_blocks(request)
tool_result_content = cleaned["messages"][0]["content"][0]["content"]
assert tool_result_content == [{"type": "text", "text": "result text"}]
def test_preserves_unrelated_payloads(self):
payload = {
"model": "claude-opus-4.6",
"messages": [{"role": "user", "content": "hi"}],
"temperature": 0.7,
}
assert strip_tool_reference_blocks(payload) == payload
def test_handles_empty_and_primitive_inputs(self):
assert strip_tool_reference_blocks({}) == {}
assert strip_tool_reference_blocks([]) == []
assert strip_tool_reference_blocks("plain string") == "plain string"
assert strip_tool_reference_blocks(42) == 42
assert strip_tool_reference_blocks(None) is None
def test_removes_dict_valued_tool_reference_child_entirely(self):
# Regression guard: when a tool_reference dict is assigned to
# a key rather than listed, the helper used to rewrite it to
# `null` (leaving the parent key with a None value). That is
# still schema-invalid upstream — remove the key entirely.
payload = {
"wrapper": {"type": "tool_reference", "tool_name": "find_block"},
"keep": "value",
}
cleaned = strip_tool_reference_blocks(payload)
assert "wrapper" not in cleaned
assert cleaned["keep"] == "value"
def test_preserves_genuine_none_values_on_non_dict_children(self):
payload = {"explicit_null": None, "text": "ok"}
cleaned = strip_tool_reference_blocks(payload)
assert cleaned == {"explicit_null": None, "text": "ok"}
# ---------------------------------------------------------------------------
# strip_forbidden_betas_from_body
# ---------------------------------------------------------------------------
class TestStripForbiddenBetasFromBody:
"""OpenRouter rejects ``context-management-2025-06-27`` in the
request body's ``betas`` array."""
def test_removes_forbidden_token_keeps_others(self):
body = {
"model": "claude-opus-4.6",
"betas": [
"context-management-2025-06-27",
"fine-grained-tool-streaming-2025",
],
}
cleaned = strip_forbidden_betas_from_body(body)
assert cleaned["betas"] == ["fine-grained-tool-streaming-2025"]
def test_removes_betas_field_entirely_when_only_forbidden(self):
body = {"model": "x", "betas": ["context-management-2025-06-27"]}
cleaned = strip_forbidden_betas_from_body(body)
assert "betas" not in cleaned
def test_no_op_when_no_betas_field(self):
body = {"model": "x"}
assert strip_forbidden_betas_from_body(body) == {"model": "x"}
def test_no_op_on_non_dict(self):
assert strip_forbidden_betas_from_body([1, 2, 3]) == [1, 2, 3]
assert strip_forbidden_betas_from_body("plain") == "plain"
def test_all_forbidden_tokens_constants_are_recognized(self):
for forbidden in _FORBIDDEN_BETA_TOKENS:
body = {"betas": [forbidden, "other"]}
cleaned = strip_forbidden_betas_from_body(body)
assert forbidden not in cleaned["betas"]
# ---------------------------------------------------------------------------
# strip_forbidden_anthropic_beta_header
# ---------------------------------------------------------------------------
class TestStripForbiddenAnthropicBetaHeader:
def test_removes_forbidden_token_keeps_others(self):
value = "fine-grained-tool-streaming-2025, context-management-2025-06-27, other-beta"
result = strip_forbidden_anthropic_beta_header(value)
assert result == "fine-grained-tool-streaming-2025, other-beta"
def test_returns_none_when_only_forbidden_token_present(self):
assert (
strip_forbidden_anthropic_beta_header("context-management-2025-06-27")
is None
)
def test_passes_through_clean_header(self):
assert strip_forbidden_anthropic_beta_header("foo, bar") == "foo, bar"
def test_handles_empty_and_none_input(self):
assert strip_forbidden_anthropic_beta_header("") == ""
assert strip_forbidden_anthropic_beta_header(None) is None
def test_handles_extra_whitespace(self):
value = " context-management-2025-06-27 , fine-grained "
result = strip_forbidden_anthropic_beta_header(value)
assert result == "fine-grained"
# ---------------------------------------------------------------------------
# clean_request_body_bytes — combined body-level cleanup
# ---------------------------------------------------------------------------
class TestCleanRequestBodyBytes:
def test_strips_both_patterns_in_one_pass(self):
body = {
"model": "claude-opus-4.6",
"betas": ["context-management-2025-06-27"],
"messages": [
{
"role": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": "tu_1",
"content": [
{"type": "tool_reference", "tool_name": "find"},
{"type": "text", "text": "ok"},
],
}
],
}
],
}
cleaned_bytes = clean_request_body_bytes(json.dumps(body).encode("utf-8"))
cleaned = json.loads(cleaned_bytes.decode("utf-8"))
assert "betas" not in cleaned # only forbidden token, dropped
tool_result_content = cleaned["messages"][0]["content"][0]["content"]
assert tool_result_content == [{"type": "text", "text": "ok"}]
def test_passes_through_non_json_body(self):
garbage = b"\xff\xfe not json at all"
assert clean_request_body_bytes(garbage) == garbage
def test_passes_through_empty_body(self):
assert clean_request_body_bytes(b"") == b""
# ---------------------------------------------------------------------------
# clean_request_headers — hop-by-hop + anthropic-beta cleanup
# ---------------------------------------------------------------------------
class TestCleanRequestHeaders:
def test_drops_hop_by_hop_headers(self):
headers = {
"Host": "example.com",
"Connection": "keep-alive",
"Content-Length": "42",
"Authorization": "Bearer xxx",
"Content-Type": "application/json",
}
cleaned = clean_request_headers(headers)
assert "Host" not in cleaned
assert "Connection" not in cleaned
assert "Content-Length" not in cleaned
assert cleaned["Authorization"] == "Bearer xxx"
assert cleaned["Content-Type"] == "application/json"
def test_strips_forbidden_token_from_anthropic_beta_header(self):
headers = {
"anthropic-beta": "context-management-2025-06-27, other-beta",
"Authorization": "Bearer x",
}
cleaned = clean_request_headers(headers)
assert cleaned["anthropic-beta"] == "other-beta"
def test_drops_anthropic_beta_header_when_only_forbidden(self):
headers = {"anthropic-beta": "context-management-2025-06-27"}
cleaned = clean_request_headers(headers)
assert "anthropic-beta" not in cleaned
def test_hop_by_hop_set_completeness(self):
# Sanity check: if upstream removes hop-by-hop headers from
# this set we want to know — keep the canonical RFC 7230 list.
for required in (
"connection",
"transfer-encoding",
"host",
"trailer",
"trailers",
):
assert required in _HOP_BY_HOP_HEADERS
def test_drops_headers_listed_in_connection_field(self):
# Per RFC 7230 §6.1 intermediaries must also drop every
# header name listed in the incoming Connection field value
# (extension hop-by-hop headers signalled per-connection).
headers = {
"Connection": "X-Custom-Hop, Upgrade",
"X-Custom-Hop": "secret-extension",
"Authorization": "Bearer x",
"X-Keep": "ok",
}
cleaned = clean_request_headers(headers)
assert "X-Custom-Hop" not in cleaned
# Upgrade is a static hop-by-hop header; Connection itself is
# also dropped; the rest pass through.
assert "Connection" not in cleaned
assert cleaned["Authorization"] == "Bearer x"
assert cleaned["X-Keep"] == "ok"
def test_connection_token_matching_is_case_insensitive(self):
headers = {
"Connection": "x-hop-HEADER",
"X-Hop-Header": "drop-me",
"X-Keep": "ok",
}
cleaned = clean_request_headers(headers)
assert "X-Hop-Header" not in cleaned
assert cleaned["X-Keep"] == "ok"
# ---------------------------------------------------------------------------
# End-to-end: real proxy + fake upstream
# ---------------------------------------------------------------------------
class _FakeUpstream:
"""Tiny aiohttp app that records every request the proxy forwards
so the test can assert on the cleaned payloads."""
def __init__(self) -> None:
self.captured: list[dict[str, Any]] = []
self._runner: web.AppRunner | None = None
self.port: int = 0
async def start(self) -> str:
async def handler(request: web.Request) -> web.StreamResponse:
body = await request.text()
self.captured.append(
{
"method": request.method,
"path": request.path_qs,
"headers": {k: v for k, v in request.headers.items()},
"body": body,
}
)
# Return a minimal JSON success response so the proxy has
# something to stream back.
return web.json_response({"ok": True, "echoed": body})
app = web.Application()
app.router.add_route("*", "/{tail:.*}", handler)
self._runner = web.AppRunner(app)
await self._runner.setup()
site = web.TCPSite(self._runner, "127.0.0.1", 0)
await site.start()
server = site._server
assert server is not None
sockets = getattr(server, "sockets", None)
assert sockets is not None
self.port = sockets[0].getsockname()[1]
return f"http://127.0.0.1:{self.port}"
async def stop(self) -> None:
if self._runner is not None:
await self._runner.cleanup()
self._runner = None
@pytest.mark.asyncio
async def test_proxy_strips_tool_reference_block_end_to_end():
upstream = _FakeUpstream()
upstream_url = await upstream.start()
proxy = OpenRouterCompatProxy(target_base_url=upstream_url)
await proxy.start()
try:
body = {
"model": "claude-opus-4.6",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "hi"},
{
"type": "tool_reference",
"tool_name": "mcp__copilot__find_block",
},
],
}
],
}
async with aiohttp.ClientSession() as client:
async with client.post(
f"{proxy.local_url}/v1/messages",
json=body,
headers={"Authorization": "Bearer test"},
) as resp:
assert resp.status == 200
await resp.read()
finally:
await proxy.stop()
await upstream.stop()
assert len(upstream.captured) == 1
forwarded = json.loads(upstream.captured[0]["body"])
# The tool_reference block must NOT be in the upstream-visible body.
assert '"tool_reference"' not in upstream.captured[0]["body"]
assert forwarded["messages"][0]["content"] == [{"type": "text", "text": "hi"}]
@pytest.mark.asyncio
async def test_proxy_strips_context_management_beta_header_end_to_end():
upstream = _FakeUpstream()
upstream_url = await upstream.start()
proxy = OpenRouterCompatProxy(target_base_url=upstream_url)
await proxy.start()
try:
async with aiohttp.ClientSession() as client:
async with client.post(
f"{proxy.local_url}/v1/messages",
json={"model": "x", "messages": []},
headers={
"Authorization": "Bearer test",
"anthropic-beta": "context-management-2025-06-27, other-beta",
},
) as resp:
assert resp.status == 200
await resp.read()
finally:
await proxy.stop()
await upstream.stop()
forwarded_headers = upstream.captured[0]["headers"]
# Header is rewritten to remove only the forbidden token, keeping the rest.
assert any(
k.lower() == "anthropic-beta" and v == "other-beta"
for k, v in forwarded_headers.items()
)
@pytest.mark.asyncio
async def test_proxy_strips_betas_from_request_body_end_to_end():
upstream = _FakeUpstream()
upstream_url = await upstream.start()
proxy = OpenRouterCompatProxy(target_base_url=upstream_url)
await proxy.start()
try:
body = {
"model": "x",
"betas": [
"context-management-2025-06-27",
"fine-grained-tool-streaming-2025",
],
"messages": [],
}
async with aiohttp.ClientSession() as client:
async with client.post(
f"{proxy.local_url}/v1/messages",
json=body,
) as resp:
assert resp.status == 200
await resp.read()
finally:
await proxy.stop()
await upstream.stop()
forwarded = json.loads(upstream.captured[0]["body"])
# Only the surviving beta should be present.
assert forwarded["betas"] == ["fine-grained-tool-streaming-2025"]
@pytest.mark.asyncio
async def test_proxy_passes_through_clean_request_unchanged():
"""The proxy must be a no-op for requests that don't contain any of
the forbidden patterns — no other rewriting allowed."""
upstream = _FakeUpstream()
upstream_url = await upstream.start()
proxy = OpenRouterCompatProxy(target_base_url=upstream_url)
await proxy.start()
try:
body = {
"model": "claude-opus-4.6",
"messages": [{"role": "user", "content": "hello"}],
"temperature": 0.7,
}
async with aiohttp.ClientSession() as client:
async with client.post(
f"{proxy.local_url}/v1/messages",
json=body,
headers={
"Authorization": "Bearer test",
"Content-Type": "application/json",
},
) as resp:
assert resp.status == 200
await resp.read()
finally:
await proxy.stop()
await upstream.stop()
forwarded = json.loads(upstream.captured[0]["body"])
assert forwarded == body
@pytest.mark.asyncio
async def test_proxy_returns_502_on_upstream_failure():
"""If the upstream is unreachable the proxy must return a clear
502, not silently hang.
Note: the outer ``client.post`` talks to the *proxy* on localhost,
not to the dead upstream directly. The proxy is the thing under
test, so it should always respond with a 502 — we must NOT
swallow ``aiohttp.ClientError`` / ``asyncio.TimeoutError`` on the
outer call, because that would mask a proxy crash and turn the
assertion into a false positive. Let any such exception fail the
test.
"""
proxy = OpenRouterCompatProxy(
target_base_url="http://127.0.0.1:1", # nothing listening
)
await proxy.start()
try:
async with aiohttp.ClientSession() as client:
async with client.post(
f"{proxy.local_url}/v1/messages",
json={"model": "x"},
timeout=aiohttp.ClientTimeout(total=10),
) as resp:
assert resp.status == 502
text = await resp.text()
# Generic error message — no internal hostname leaked.
assert "upstream error" in text
finally:
await proxy.stop()
@pytest.mark.asyncio
async def test_proxy_returns_502_on_upstream_timeout():
"""``aiohttp.ClientTimeout`` raises ``asyncio.TimeoutError`` (not
``aiohttp.ClientError``), which previously escaped the except
block and surfaced as a 500. This regression-guards the 502
contract for hung upstreams."""
class _HangingUpstream:
"""Upstream that accepts the request but never finishes the
response body, forcing the proxy's client timeout to fire."""
def __init__(self) -> None:
self._runner: web.AppRunner | None = None
self.port: int = 0
async def start(self) -> str:
async def handler(request: web.Request) -> web.StreamResponse:
# Hold the response open longer than the proxy's
# client timeout so aiohttp raises TimeoutError on
# the proxy side.
await asyncio.sleep(30)
return web.Response(status=200)
app = web.Application()
app.router.add_route("*", "/{tail:.*}", handler)
self._runner = web.AppRunner(app)
await self._runner.setup()
site = web.TCPSite(self._runner, "127.0.0.1", 0)
await site.start()
server = site._server
assert server is not None
sockets = getattr(server, "sockets", None)
assert sockets is not None
self.port = sockets[0].getsockname()[1]
return f"http://127.0.0.1:{self.port}"
async def stop(self) -> None:
if self._runner is not None:
await self._runner.cleanup()
self._runner = None
upstream = _HangingUpstream()
upstream_url = await upstream.start()
# Short proxy timeout so the test finishes quickly.
proxy = OpenRouterCompatProxy(target_base_url=upstream_url, request_timeout=0.5)
await proxy.start()
try:
async with aiohttp.ClientSession() as client:
async with client.post(
f"{proxy.local_url}/v1/messages",
json={"model": "x"},
timeout=aiohttp.ClientTimeout(total=10),
) as resp:
assert resp.status == 502
text = await resp.text()
# Generic error message — no internal hostname leaked.
assert "upstream error" in text
finally:
await proxy.stop()
await upstream.stop()
@pytest.mark.asyncio
async def test_proxy_does_not_signal_clean_eof_on_mid_stream_error():
"""Regression guard: if the upstream stream dies mid-body, the
proxy must NOT call ``write_eof()`` — that would mark the
downstream response as a complete, valid stream even though the
client only saw a truncated body. Instead the proxy drops the
connection so the client's parser surfaces a transport error.
We simulate the failure with a raw asyncio TCP server that
sends a chunked-encoding response header plus one partial chunk
and then hard-closes the socket — this is the one failure mode
aiohttp's ``iter_any()`` reliably surfaces as an
``aiohttp.ClientError`` rather than an ordinary clean EOF.
"""
class _TruncatingUpstream:
"""Raw TCP server that sends a partial chunked body then
closes the socket without writing the terminating chunk."""
def __init__(self) -> None:
self._server: asyncio.base_events.Server | None = None
self.port: int = 0
async def start(self) -> str:
async def handle_conn(
reader: asyncio.StreamReader,
writer: asyncio.StreamWriter,
) -> None:
try:
# Read and discard the request until the blank
# line — we don't care what the proxy sends.
while True:
line = await reader.readline()
if not line or line == b"\r\n":
break
# Chunked response with one partial chunk.
writer.write(
b"HTTP/1.1 200 OK\r\n"
b"Content-Type: application/octet-stream\r\n"
b"Transfer-Encoding: chunked\r\n"
b"Connection: close\r\n"
b"\r\n"
# One chunk, size 8, content "partial-".
b"8\r\n"
b"partial-\r\n"
# Deliberately DO NOT send the terminating
# "0\r\n\r\n" — this is the mid-stream
# truncation we're testing.
)
await writer.drain()
finally:
# Hard-close the socket so the proxy's
# iter_any() sees an abrupt end-of-stream.
try:
writer.transport.abort()
except Exception:
pass
self._server = await asyncio.start_server(handle_conn, "127.0.0.1", 0)
sockets = self._server.sockets
assert sockets is not None
self.port = sockets[0].getsockname()[1]
return f"http://127.0.0.1:{self.port}"
async def stop(self) -> None:
if self._server is not None:
self._server.close()
await self._server.wait_closed()
self._server = None
upstream = _TruncatingUpstream()
upstream_url = await upstream.start()
proxy = OpenRouterCompatProxy(target_base_url=upstream_url, request_timeout=5.0)
await proxy.start()
try:
async with aiohttp.ClientSession() as client:
client_error: Exception | None = None
try:
async with client.post(
f"{proxy.local_url}/v1/messages",
json={"model": "x"},
timeout=aiohttp.ClientTimeout(total=10),
) as resp:
# The client should see either an error raising
# here or a truncated body followed by a
# transport-level failure on read — both surface
# the truncation instead of silently reporting
# success.
await resp.read()
except (
aiohttp.ClientPayloadError,
aiohttp.ClientConnectionError,
aiohttp.ServerDisconnectedError,
) as e:
client_error = e
assert client_error is not None, (
"Proxy silently consumed an upstream mid-stream "
"failure and returned a clean EOF to the client — "
"regression in the stream-error path."
)
finally:
await proxy.stop()
await upstream.stop()
@pytest.mark.asyncio
async def test_proxy_local_url_raises_before_start():
proxy = OpenRouterCompatProxy(target_base_url="http://example.com")
with pytest.raises(RuntimeError):
_ = proxy.local_url

View File

@@ -202,22 +202,11 @@ def test_sdk_exports_hook_event_type(hook_event: str):
# OpenRouter compatibility — bundled CLI version pin
# ---------------------------------------------------------------------------
#
# We're stuck on ``claude-agent-sdk==0.1.45`` (bundled CLI ``2.1.63``)
# because every version above introduces a 400 against OpenRouter:
#
# 1. CLI ``2.1.69`` (= SDK ``0.1.46``) shipped a `tool_reference` content
# block in `tool_result.content` that OpenRouter's stricter Zod
# validation rejects. See PR
# https://github.com/Significant-Gravitas/AutoGPT/pull/12294 for the
# forensic write-up that originally pinned us. CLI ``2.1.70`` added
# proxy detection that *should* disable the offending block, but two
# later attempts (Dependabot bumps to 0.1.55 / 0.1.56) still failed.
#
# 2. A second regression — the ``context-management-2025-06-27`` beta
# header — appeared in some CLI version after ``2.1.91``. Tracked
# upstream at
# https://github.com/anthropics/claude-agent-sdk-python/issues/789
# (still open at the time of writing, no upstream PR yet).
# Newer ``claude-agent-sdk`` versions bundle CLI binaries that send
# features incompatible with OpenRouter (``tool_reference`` content
# blocks, ``context-management-2025-06-27`` beta). We neutralise these
# at runtime by injecting ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1``
# into the CLI subprocess env (see ``service.py``).
#
# This test is the cheapest possible regression guard: it pins the
# bundled CLI to a known-good version. If anyone bumps
@@ -225,89 +214,39 @@ def test_sdk_exports_hook_event_type(hook_event: str):
# ``_cli_version.py`` will change and this test will fail with a clear
# message that points the next person at the OpenRouter compat issue
# instead of letting them silently re-break production.
#
# Workaround for actually upgrading: set the
# ``claude_agent_cli_path`` config option (or the matching env var) to
# point at a separately-installed Claude Code CLI binary at a known-good
# version, so the SDK Python API surface and the CLI binary version can
# be picked independently.
# CLI versions verified to work against OpenRouter directly (no compat
# proxy required) — bisected via the reproduction test in
# `cli_openrouter_compat_test.py`. Bundled CLI versions outside this
# set are still allowed but ONLY when the compat proxy is enabled (see
# the second known-good set below + the test below).
_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT: frozenset[str] = frozenset(
# CLI versions verified to work against OpenRouter when the
# ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`` env var is set --
# bisected via the reproduction test in ``cli_openrouter_compat_test.py``.
_KNOWN_GOOD_BUNDLED_CLI_VERSIONS: frozenset[str] = frozenset(
{
"2.1.63", # claude-agent-sdk 0.1.45 original pin from PR #12294.
"2.1.70", # claude-agent-sdk 0.1.47 first version with the
"2.1.63", # claude-agent-sdk 0.1.45 -- original pin from PR #12294.
"2.1.70", # claude-agent-sdk 0.1.47 -- first version with the
# tool_reference proxy detection fix; bisect-verified
# OpenRouter-safe in #12742.
"2.1.97", # claude-agent-sdk 0.1.58 -- works with the
# CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 env var.
}
)
# CLI versions verified to work against OpenRouter ONLY when the
# in-process `openrouter_compat_proxy` is enabled (which strips the
# `tool_reference` content blocks and `context-management-2025-06-27`
# beta from outgoing requests). Without the proxy these CLI versions
# trip OpenRouter's stricter validation and return 400.
_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_VIA_PROXY: frozenset[str] = frozenset(
{
"2.1.97", # claude-agent-sdk 0.1.58 — needs `claude_agent_use_compat_proxy=True`
# due to the upstream regression in
# anthropics/claude-agent-sdk-python#789.
}
)
# Aggregate set used by the assertion below — the test allows EITHER
# a directly-known-good CLI OR a proxy-known-good CLI when the proxy
# is enabled in the active config.
_KNOWN_GOOD_BUNDLED_CLI_VERSIONS: frozenset[str] = (
_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT | _KNOWN_GOOD_BUNDLED_CLI_VERSIONS_VIA_PROXY
)
def test_bundled_cli_version_is_known_good_against_openrouter():
"""Pin the bundled CLI version so accidental SDK bumps cause a loud,
fast failure with a pointer to the OpenRouter compatibility issue.
A CLI version that's only safe via the compat proxy is allowed only
when ``ChatConfig.claude_agent_use_compat_proxy`` is enabled.
"""
from claude_agent_sdk._cli_version import __cli_version__
from backend.copilot.config import ChatConfig
cfg = ChatConfig()
proxy_enabled = cfg.claude_agent_use_compat_proxy
if __cli_version__ in _KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT:
return # safe with or without the proxy
if __cli_version__ in _KNOWN_GOOD_BUNDLED_CLI_VERSIONS_VIA_PROXY:
assert proxy_enabled, (
f"Bundled Claude Code CLI version {__cli_version__!r} is only "
"OpenRouter-safe when `claude_agent_use_compat_proxy` is "
"enabled, but the active ChatConfig has the proxy disabled. "
"Either set `COPILOT__CLAUDE_AGENT_USE_COMPAT_PROXY=true` or "
"downgrade `claude-agent-sdk` to a version whose bundled CLI "
f"is in {sorted(_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT)!r}. "
"See https://github.com/anthropics/claude-agent-sdk-python/issues/789."
)
return
raise AssertionError(
assert __cli_version__ in _KNOWN_GOOD_BUNDLED_CLI_VERSIONS, (
f"Bundled Claude Code CLI version is {__cli_version__!r}, which is "
f"not in any OpenRouter-known-good set "
f"not in the OpenRouter-known-good set "
f"({sorted(_KNOWN_GOOD_BUNDLED_CLI_VERSIONS)!r}). "
"If you intentionally bumped `claude-agent-sdk`, verify the new "
"bundled CLI works with OpenRouter against the reproduction test "
"in `cli_openrouter_compat_test.py`, then add the new CLI version "
"to either `_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT` (works "
"without the proxy) or `_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_VIA_PROXY` "
"(works only with `claude_agent_use_compat_proxy=true`). If you "
"cannot make the bundled CLI work either way, set "
"`claude_agent_cli_path` to a known-good binary instead. See "
"in `cli_openrouter_compat_test.py` (with "
"`CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`), then add the new "
"CLI version to `_KNOWN_GOOD_BUNDLED_CLI_VERSIONS`. If the env "
"var is not sufficient, set `claude_agent_cli_path` to a "
"known-good binary instead. See "
"https://github.com/anthropics/claude-agent-sdk-python/issues/789 "
"and https://github.com/Significant-Gravitas/AutoGPT/pull/12294."
)

View File

@@ -1980,13 +1980,6 @@ async def stream_chat_completion_sdk(
transcript_content: str = ""
state: _RetryState | None = None
# OpenRouter compat proxy — started inside the try and stopped in finally
# when ``ChatConfig.claude_agent_use_compat_proxy`` is enabled. The proxy
# rewrites outgoing CLI requests to strip ``tool_reference`` content
# blocks and the ``context-management-2025-06-27`` beta so the latest
# SDK / CLI versions stop tripping OpenRouter's validation.
_compat_proxy: Any = None # OpenRouterCompatProxy | None — lazy import
# Token usage accumulators — populated from ResultMessage at end of turn
turn_prompt_tokens = 0 # uncached input tokens only
turn_completion_tokens = 0
@@ -2249,96 +2242,14 @@ async def stream_chat_completion_sdk(
if sdk_model:
sdk_options_kwargs["model"] = sdk_model
# OpenRouter compatibility proxy — started here so its local URL
# can be injected into the CLI subprocess env BEFORE the env dict
# is passed to ``ClaudeAgentOptions``. When this flag is on we
# transparently rewrite outgoing CLI requests via the proxy
# (stripping ``tool_reference`` blocks and the
# ``context-management-2025-06-27`` beta) so newer SDK / CLI
# versions can talk to OpenRouter without their stricter
# validation rejecting the request.
if config.claude_agent_use_compat_proxy:
# Only start the compat proxy when there's already an
# explicit Anthropic-compatible upstream to forward to.
# Otherwise we'd be silently routing direct Anthropic /
# Claude Code subscription sessions through OpenRouter,
# which would break auth and change providers without
# operator consent. The explicit upstream can come from:
#
# 1. ``sdk_env['ANTHROPIC_BASE_URL']`` — caller override;
# 2. the process env — lowest-precedence host override;
# 3. ``ChatConfig.openrouter_active`` — OpenRouter is
# configured as the session's routing provider (i.e.
# the only case in which falling back to
# ``OPENROUTER_BASE_URL`` is intentional).
#
# When none of the above hold, log a warning and leave
# the CLI to talk to Anthropic directly as usual — the
# feature is opt-in and documented as "OpenRouter
# compatibility", so quietly no-oping on direct-Anthropic
# sessions is the safe default.
# Claude Code subscription mode intentionally sets
# ``sdk_env['ANTHROPIC_BASE_URL'] = ""`` to *disable* any
# base-URL override and keep the CLI talking to Anthropic
# directly. Treat an explicit empty string as a hard
# "no-proxy" signal so we never silently start the proxy
# against a host-wide ``ANTHROPIC_BASE_URL`` or fall back
# to OpenRouter when the caller has opted out.
sdk_env_map = sdk_env or {}
explicit_sdk_env = "ANTHROPIC_BASE_URL" in sdk_env_map
sdk_env_value = (
sdk_env_map["ANTHROPIC_BASE_URL"] if explicit_sdk_env else None
)
if explicit_sdk_env and not sdk_env_value:
# Empty string from sdk_env → subscription mode opt-out.
target_base_url: str | None = None
explicit_opt_out = True
else:
target_base_url = sdk_env_value or os.environ.get("ANTHROPIC_BASE_URL")
explicit_opt_out = False
# Only fall back to OpenRouter when the session actually
# has no base-URL plumbing of its own AND OpenRouter is
# the active routing provider AND the caller hasn't
# explicitly opted out via an empty sdk_env override.
if (
not target_base_url
and not explicit_opt_out
and config.openrouter_active
):
from backend.util.clients import OPENROUTER_BASE_URL
target_base_url = OPENROUTER_BASE_URL
if target_base_url:
from backend.copilot.sdk.openrouter_compat_proxy import (
OpenRouterCompatProxy,
)
_compat_proxy = OpenRouterCompatProxy(target_base_url=target_base_url)
await _compat_proxy.start()
# Inject the proxy URL into the SDK env so the spawned
# CLI subprocess uses the proxy as its Anthropic
# endpoint.
if sdk_env is None:
sdk_env = {}
sdk_env["ANTHROPIC_BASE_URL"] = _compat_proxy.local_url
# Log only the local bind URL — upstream is redacted
# to match the taint-analysis guidance applied in
# ``openrouter_compat_proxy.start``.
logger.info(
"%s OpenRouter compat proxy active (listening on %s)",
log_prefix,
_compat_proxy.local_url,
)
else:
logger.warning(
"%s claude_agent_use_compat_proxy is enabled but no "
"Anthropic-compatible upstream is configured for this "
"session (no ANTHROPIC_BASE_URL override and "
"openrouter_active is False); skipping proxy startup "
"so the CLI keeps talking to Anthropic directly.",
log_prefix,
)
# Tell the CLI to strip experimental betas (e.g.
# ``context-management-2025-06-27``) and ``tool_reference``
# content blocks so newer SDK / CLI versions work with
# OpenRouter's stricter validation. This single env var
# replaces the old in-process compat proxy.
if sdk_env is None:
sdk_env = {}
sdk_env["CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS"] = "1"
if sdk_env:
sdk_options_kwargs["env"] = sdk_env
@@ -3012,18 +2923,5 @@ async def stream_chat_completion_sdk(
except Exception:
logger.warning("%s SDK cleanup failed", log_prefix, exc_info=True)
finally:
# Tear down the OpenRouter compat proxy if it was started for
# this session — releases the bound port and the aiohttp
# client. Wrapped so a stop failure can never block the
# downstream lock release.
if _compat_proxy is not None:
try:
await _compat_proxy.stop()
except Exception:
logger.warning(
"%s OpenRouter compat proxy stop failed",
log_prefix,
exc_info=True,
)
# Release stream lock to allow new streams for this session
await lock.release()

View File

@@ -18,7 +18,7 @@ apscheduler = "^3.11.1"
autogpt-libs = { path = "../autogpt_libs", develop = true }
bleach = { extras = ["css"], version = "^6.2.0" }
cachetools = "^5.5.0"
claude-agent-sdk = "0.1.58" # latest stable; bundled CLI 2.1.97 ships the broken context-management beta and REQUIRES the openrouter_compat_proxy. See sdk_compat_test.py.
claude-agent-sdk = "0.1.58" # latest stable; bundled CLI 2.1.97 -- CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 env var strips the broken context-management beta. See sdk_compat_test.py.
click = "^8.2.0"
cryptography = "^46.0"
discord-py = "^2.5.2"