mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-30 03:00:41 -04:00
refactor(backend): replace compat proxy with CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS env var
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
import os
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import Field, field_validator, model_validator
|
||||
from pydantic import Field, field_validator
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
from backend.util.clients import OPENROUTER_BASE_URL
|
||||
@@ -186,28 +186,6 @@ class ChatConfig(BaseSettings):
|
||||
"or the unprefixed `CLAUDE_AGENT_CLI_PATH` environment variable "
|
||||
"(same pattern as `api_key` / `base_url`).",
|
||||
)
|
||||
claude_agent_use_compat_proxy: bool = Field(
|
||||
default=True,
|
||||
description="Run the in-process OpenRouter compatibility proxy "
|
||||
"(`backend.copilot.sdk.openrouter_compat_proxy`) in front of the "
|
||||
"Claude Code CLI. The proxy strips `tool_reference` content "
|
||||
"blocks and the `context-management-2025-06-27` beta header / "
|
||||
"field from outgoing requests so newer SDK / CLI versions stop "
|
||||
"tripping OpenRouter's stricter validation. Defaults to True "
|
||||
"because the bundled CLI in `claude-agent-sdk >= 0.1.55` requires "
|
||||
"the proxy. Orthogonal to `claude_agent_cli_path` — the override "
|
||||
"picks the binary, the proxy rewrites whatever the binary sends. "
|
||||
"Disable explicitly only if you've pinned `claude-agent-sdk` to "
|
||||
"a version whose bundled CLI is in "
|
||||
"`_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT` (2.1.63 or 2.1.70). "
|
||||
"Reads from `CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY` or the "
|
||||
"unprefixed `CLAUDE_AGENT_USE_COMPAT_PROXY` environment "
|
||||
"variable (same pattern as `claude_agent_cli_path`). Only "
|
||||
"takes effect when the session has an Anthropic-compatible "
|
||||
"upstream to forward to — direct-Anthropic sessions skip the "
|
||||
"proxy entirely to avoid silently re-routing through "
|
||||
"OpenRouter.",
|
||||
)
|
||||
use_openrouter: bool = Field(
|
||||
default=True,
|
||||
description="Enable routing API calls through the OpenRouter proxy. "
|
||||
@@ -355,37 +333,6 @@ class ChatConfig(BaseSettings):
|
||||
)
|
||||
return v
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def _inject_unprefixed_compat_proxy_env(cls, values):
|
||||
"""Inject the unprefixed ``CLAUDE_AGENT_USE_COMPAT_PROXY`` env var
|
||||
as a fallback for the ``claude_agent_use_compat_proxy`` field.
|
||||
|
||||
Unlike ``claude_agent_cli_path`` (which defaults to ``None`` and
|
||||
can use a simple ``if not v`` guard), this field defaults to
|
||||
``True``, so a ``mode="before"`` field validator cannot
|
||||
distinguish "caller passed ``False`` explicitly" from "Pydantic
|
||||
resolved the default ``True``" — both arrive as the raw value.
|
||||
|
||||
Using a ``model_validator(mode="before")`` lets us inspect the
|
||||
full input dict: if the key is absent AND the prefixed env var
|
||||
``CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY`` is not set, we inject the
|
||||
unprefixed value so Pydantic can coerce it (``"1"``/``"true"``
|
||||
→ ``True``). Explicit kwargs always take precedence because
|
||||
they appear in *values* before this validator runs.
|
||||
"""
|
||||
if not isinstance(values, dict):
|
||||
return values
|
||||
key = "claude_agent_use_compat_proxy"
|
||||
if key not in values:
|
||||
# No explicit kwarg and Pydantic hasn't injected the
|
||||
# prefixed env var yet — check the unprefixed form.
|
||||
if os.getenv("CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY") is None:
|
||||
unprefixed = os.getenv("CLAUDE_AGENT_USE_COMPAT_PROXY")
|
||||
if unprefixed is not None:
|
||||
values[key] = unprefixed
|
||||
return values
|
||||
|
||||
# Prompt paths for different contexts
|
||||
PROMPT_PATHS: dict[str, str] = {
|
||||
"default": "prompts/chat_system.md",
|
||||
|
||||
@@ -19,8 +19,6 @@ _ENV_VARS_TO_CLEAR = (
|
||||
"OPENAI_BASE_URL",
|
||||
"CHAT_CLAUDE_AGENT_CLI_PATH",
|
||||
"CLAUDE_AGENT_CLI_PATH",
|
||||
"CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY",
|
||||
"CLAUDE_AGENT_USE_COMPAT_PROXY",
|
||||
)
|
||||
|
||||
|
||||
@@ -124,63 +122,3 @@ class TestClaudeAgentCliPathEnvFallback:
|
||||
def test_no_env_var_defaults_to_none(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
cfg = ChatConfig()
|
||||
assert cfg.claude_agent_cli_path is None
|
||||
|
||||
|
||||
class TestClaudeAgentUseCompatProxyEnvFallback:
|
||||
"""``claude_agent_use_compat_proxy`` accepts both the Pydantic-
|
||||
prefixed ``CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY`` env var and the
|
||||
unprefixed ``CLAUDE_AGENT_USE_COMPAT_PROXY`` form. Regression
|
||||
guard for the bool-default pitfall: the field has a non-None
|
||||
default (``True``), so Pydantic passes the default into the
|
||||
validator when no value is provided and a naive ``if v is None``
|
||||
check would never fire.
|
||||
"""
|
||||
|
||||
def test_prefixed_env_var_enables_proxy(
|
||||
self, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
monkeypatch.setenv("CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY", "true")
|
||||
cfg = ChatConfig()
|
||||
assert cfg.claude_agent_use_compat_proxy is True
|
||||
|
||||
def test_unprefixed_env_var_enables_proxy(
|
||||
self, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
monkeypatch.setenv("CLAUDE_AGENT_USE_COMPAT_PROXY", "true")
|
||||
cfg = ChatConfig()
|
||||
assert cfg.claude_agent_use_compat_proxy is True
|
||||
|
||||
def test_unprefixed_env_var_respects_falsy_value(
|
||||
self, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
monkeypatch.setenv("CLAUDE_AGENT_USE_COMPAT_PROXY", "false")
|
||||
cfg = ChatConfig()
|
||||
assert cfg.claude_agent_use_compat_proxy is False
|
||||
|
||||
def test_prefixed_wins_over_unprefixed(
|
||||
self, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""When both are set, the Pydantic-prefixed var is authoritative
|
||||
so the validator doesn't silently clobber an explicit
|
||||
``CHAT_...=false`` with an unprefixed ``=true``."""
|
||||
monkeypatch.setenv("CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY", "false")
|
||||
monkeypatch.setenv("CLAUDE_AGENT_USE_COMPAT_PROXY", "true")
|
||||
cfg = ChatConfig()
|
||||
assert cfg.claude_agent_use_compat_proxy is False
|
||||
|
||||
def test_no_env_var_uses_field_default(
|
||||
self, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
cfg = ChatConfig()
|
||||
# Dev-preview branch defaults compat_proxy to True (the
|
||||
# bundled CLI in claude-agent-sdk 0.1.58 needs the proxy).
|
||||
assert cfg.claude_agent_use_compat_proxy is True
|
||||
|
||||
def test_explicit_kwarg_not_overridden_by_unprefixed_env(
|
||||
self, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""Regression: explicit ChatConfig(claude_agent_use_compat_proxy=False)
|
||||
must not be overridden by the unprefixed env var."""
|
||||
monkeypatch.setenv("CLAUDE_AGENT_USE_COMPAT_PROXY", "true")
|
||||
cfg = ChatConfig(claude_agent_use_compat_proxy=False)
|
||||
assert cfg.claude_agent_use_compat_proxy is False
|
||||
|
||||
@@ -392,17 +392,10 @@ async def _run_cli_against_fake_server(
|
||||
|
||||
async def _run_reproduction(
|
||||
*,
|
||||
route_through_proxy: bool,
|
||||
extra_env: dict[str, str] | None = None,
|
||||
) -> tuple[int, str, str, list[_CapturedRequest]]:
|
||||
"""Spawn the CLI against a fake Anthropic API and return what the
|
||||
*upstream* (post-proxy if any) saw.
|
||||
|
||||
When ``route_through_proxy`` is True, the CLI talks to the
|
||||
``OpenRouterCompatProxy`` and the proxy forwards to the fake
|
||||
upstream. The fake upstream is what records the requests, so the
|
||||
captured bodies are what OpenRouter would actually have received —
|
||||
*after* the proxy's stripping pass.
|
||||
server saw.
|
||||
"""
|
||||
cli_path = _resolve_cli_path()
|
||||
if cli_path is None or not cli_path.is_file():
|
||||
@@ -415,30 +408,14 @@ async def _run_reproduction(
|
||||
captured: list[_CapturedRequest] = []
|
||||
upstream_runner, upstream_port = await _start_fake_anthropic_server(captured)
|
||||
|
||||
proxy = None
|
||||
target_port = upstream_port
|
||||
try:
|
||||
if route_through_proxy:
|
||||
from backend.copilot.sdk.openrouter_compat_proxy import (
|
||||
OpenRouterCompatProxy,
|
||||
)
|
||||
|
||||
proxy = OpenRouterCompatProxy(
|
||||
target_base_url=f"http://127.0.0.1:{upstream_port}"
|
||||
)
|
||||
await proxy.start()
|
||||
# Pull the bound port out of the proxy URL.
|
||||
target_port = int(proxy.local_url.rsplit(":", 1)[1])
|
||||
|
||||
returncode, stdout, stderr = await _run_cli_against_fake_server(
|
||||
cli_path=cli_path,
|
||||
fake_server_port=target_port,
|
||||
fake_server_port=upstream_port,
|
||||
timeout_seconds=30.0,
|
||||
extra_env=extra_env,
|
||||
)
|
||||
finally:
|
||||
if proxy is not None:
|
||||
await proxy.stop()
|
||||
await upstream_runner.cleanup()
|
||||
|
||||
return returncode, stdout, stderr, captured
|
||||
@@ -470,10 +447,9 @@ def _assert_no_forbidden_patterns(
|
||||
"`claude-agent-sdk` above 0.1.45. See "
|
||||
"https://github.com/Significant-Gravitas/AutoGPT/pull/12294 and "
|
||||
"https://github.com/anthropics/claude-agent-sdk-python/issues/789. "
|
||||
"If you intended to upgrade, you must enable the in-process compat "
|
||||
"proxy (`CLAUDE_AGENT_USE_COMPAT_PROXY=true` or the prefixed "
|
||||
"`CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY=true`) or use a known-good "
|
||||
"CLI binary via `claude_agent_cli_path` (env: "
|
||||
"If you intended to upgrade, ensure "
|
||||
"`CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1` is set in the SDK env "
|
||||
"or use a known-good CLI binary via `claude_agent_cli_path` (env: "
|
||||
"`CLAUDE_AGENT_CLI_PATH` or `CHAT_CLAUDE_AGENT_CLI_PATH`)."
|
||||
)
|
||||
|
||||
@@ -483,74 +459,31 @@ async def test_cli_does_not_send_openrouter_incompatible_features():
|
||||
"""End-to-end OpenRouter compatibility reproduction (bare CLI path).
|
||||
|
||||
Spawns the bundled (or overridden) Claude Code CLI against a fake
|
||||
Anthropic API server WITHOUT the compat proxy in the loop, captures
|
||||
every request body it sends, and asserts that none of them contain
|
||||
the two known OpenRouter-breaking features.
|
||||
Anthropic API server, captures every request body it sends, and
|
||||
asserts that none of them contain the two known OpenRouter-breaking
|
||||
features.
|
||||
|
||||
On a clean SDK pin (0.1.45 or 0.1.47, bundled CLI 2.1.63 or 2.1.70)
|
||||
this passes naturally. On a broken pin (0.1.55+, bundled CLI 2.1.91+)
|
||||
it fails — that failure IS the bisect signal we use to verify which
|
||||
SDK versions need the workaround.
|
||||
|
||||
Skipped when ``claude_agent_use_compat_proxy=True`` because in that
|
||||
configuration the operator has explicitly opted into the workaround
|
||||
and the bare-CLI behaviour is moot — what matters is that the
|
||||
*upstream* (post-proxy) sees clean requests, which is covered by
|
||||
``test_cli_via_compat_proxy_emits_clean_requests_to_upstream``.
|
||||
"""
|
||||
from backend.copilot.config import ChatConfig
|
||||
|
||||
if ChatConfig().claude_agent_use_compat_proxy:
|
||||
pytest.skip(
|
||||
"Compat proxy is enabled in the active config — the bare-CLI "
|
||||
"reproduction is not a meaningful signal here. The proxy-routed "
|
||||
"variant `test_cli_via_compat_proxy_emits_clean_requests_to_upstream` "
|
||||
"is the regression guard for this configuration."
|
||||
)
|
||||
|
||||
returncode, _stdout, stderr, captured = await _run_reproduction(
|
||||
route_through_proxy=False
|
||||
)
|
||||
_assert_no_forbidden_patterns(captured, returncode, stderr)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cli_via_compat_proxy_emits_clean_requests_to_upstream():
|
||||
"""End-to-end test for the compat proxy workaround.
|
||||
|
||||
Spawns the bundled CLI against an in-process fake Anthropic API
|
||||
server WITH the ``OpenRouterCompatProxy`` in front, then asserts
|
||||
that the *upstream* sees clean requests — no `tool_reference`
|
||||
blocks, no `context-management-2025-06-27` beta header — even
|
||||
when the bundled CLI itself would have sent them.
|
||||
|
||||
This is the regression guard for the proxy: if the proxy ever
|
||||
stops stripping a known forbidden pattern, this test catches it.
|
||||
On a SDK version where the bare CLI is already clean (0.1.45 /
|
||||
0.1.47), the proxy is a no-op and the test passes trivially.
|
||||
On a SDK version with the regression (0.1.55+), the test fails
|
||||
if and only if the proxy fails to strip the pattern.
|
||||
"""
|
||||
returncode, _stdout, stderr, captured = await _run_reproduction(
|
||||
route_through_proxy=True
|
||||
)
|
||||
returncode, _stdout, stderr, captured = await _run_reproduction()
|
||||
_assert_no_forbidden_patterns(captured, returncode, stderr)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_disable_experimental_betas_env_var_strips_headers():
|
||||
"""Validate whether ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`` is
|
||||
sufficient to strip the ``context-management-2025-06-27`` beta header
|
||||
when ``ANTHROPIC_BASE_URL`` points to a non-Anthropic endpoint
|
||||
(simulating OpenRouter).
|
||||
"""Validate that ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`` strips
|
||||
the ``context-management-2025-06-27`` beta header when
|
||||
``ANTHROPIC_BASE_URL`` points to a non-Anthropic endpoint (simulating
|
||||
OpenRouter).
|
||||
|
||||
If this test passes, the compat proxy is unnecessary and can be
|
||||
removed — the env var alone is enough. If it fails, the CLI's
|
||||
provider-detection logic does not honour the env var for custom
|
||||
base URLs and the proxy remains required.
|
||||
This is the main regression guard: the env var is injected by
|
||||
``service.py`` into every CLI subprocess so newer SDK / CLI versions
|
||||
work with OpenRouter without any proxy.
|
||||
"""
|
||||
returncode, _stdout, stderr, captured = await _run_reproduction(
|
||||
route_through_proxy=False,
|
||||
extra_env={"CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS": "1"},
|
||||
)
|
||||
_assert_no_forbidden_patterns(captured, returncode, stderr)
|
||||
|
||||
@@ -1,559 +0,0 @@
|
||||
"""Tiny in-process HTTP middleware that makes the Claude Code CLI work
|
||||
against OpenRouter on **any** ``claude-agent-sdk`` version.
|
||||
|
||||
Background
|
||||
----------
|
||||
We've been pinned at ``claude-agent-sdk==0.1.45`` (bundled CLI 2.1.63)
|
||||
since `PR #12294`_ because every newer CLI version sends one of two
|
||||
features that OpenRouter rejects:
|
||||
|
||||
1. **`tool_reference` content blocks** in ``tool_result.content`` —
|
||||
introduced in CLI 2.1.69. OpenRouter's stricter Zod validation
|
||||
refuses requests containing them with::
|
||||
|
||||
messages[N].content[0].content: Invalid input: expected string, received array
|
||||
|
||||
2. **`context-management-2025-06-27` beta header** — sent in either the
|
||||
request body's ``betas`` array or the ``anthropic-beta`` HTTP header.
|
||||
OpenRouter responds::
|
||||
|
||||
400 No endpoints available that support Anthropic's context
|
||||
management features (context-management-2025-06-27).
|
||||
|
||||
Tracked upstream at `claude-agent-sdk-python#789`_.
|
||||
|
||||
This module starts a tiny aiohttp server that:
|
||||
|
||||
* listens on ``127.0.0.1:RANDOM_PORT``,
|
||||
* receives every CLI request that would normally go to
|
||||
``ANTHROPIC_BASE_URL``,
|
||||
* strips the two forbidden patterns from the body and headers,
|
||||
* forwards the cleaned request to the real upstream
|
||||
(``proxy_target_base_url``, e.g. ``https://openrouter.ai/api/v1``),
|
||||
* streams the response back to the CLI unchanged.
|
||||
|
||||
The proxy is wired via :class:`backend.copilot.config.ChatConfig.claude_agent_use_compat_proxy`.
|
||||
When the flag is on, :mod:`backend.copilot.sdk.service` starts a proxy
|
||||
per session, sets ``ANTHROPIC_BASE_URL`` in the SDK's ``env`` to point
|
||||
at the proxy, then tears it down after the session ends.
|
||||
|
||||
Why a separate proxy instead of a custom HTTP transport in the SDK?
|
||||
-------------------------------------------------------------------
|
||||
The Python SDK delegates **all** HTTP traffic to the bundled Claude
|
||||
Code CLI subprocess. Once the CLI is spawned, the only seam left is
|
||||
the network — there is no in-process hook for "modify outgoing
|
||||
request before it leaves the CLI". The proxy lives at that seam.
|
||||
|
||||
This module is intentionally orthogonal to the
|
||||
:attr:`ChatConfig.claude_agent_cli_path` override:
|
||||
|
||||
* ``cli_path`` lets us swap **which CLI binary** we run.
|
||||
* this proxy lets us **rewrite what any CLI binary sends**.
|
||||
|
||||
The two can be combined or used independently.
|
||||
|
||||
.. _PR #12294: https://github.com/Significant-Gravitas/AutoGPT/pull/12294
|
||||
.. _claude-agent-sdk-python#789: https://github.com/anthropics/claude-agent-sdk-python/issues/789
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
import aiohttp
|
||||
from aiohttp import web
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Header values OpenRouter rejects. We strip exactly these tokens from
|
||||
# the comma-separated ``anthropic-beta`` header value (preserving any
|
||||
# other betas the CLI requests).
|
||||
_FORBIDDEN_BETA_TOKENS: frozenset[str] = frozenset(
|
||||
{
|
||||
"context-management-2025-06-27",
|
||||
}
|
||||
)
|
||||
|
||||
# Hop-by-hop headers we must NOT forward through the proxy. Per
|
||||
# RFC 7230 §6.1, these are connection-specific and must be regenerated
|
||||
# by each intermediary. ``host`` is also stripped because aiohttp
|
||||
# generates the correct ``Host`` header for the upstream URL itself.
|
||||
#
|
||||
# The canonical header name defined in RFC 7230 §4.4 is ``Trailer``
|
||||
# (singular); some SDKs / legacy proxies also emit the plural
|
||||
# ``Trailers`` so we accept both forms just in case. Intermediaries
|
||||
# must additionally drop every header name listed in the incoming
|
||||
# ``Connection`` field value (§6.1 "extension hop-by-hop headers") —
|
||||
# that's handled dynamically by :func:`clean_request_headers`.
|
||||
_HOP_BY_HOP_HEADERS: frozenset[str] = frozenset(
|
||||
{
|
||||
"connection",
|
||||
"keep-alive",
|
||||
"proxy-authenticate",
|
||||
"proxy-authorization",
|
||||
"te",
|
||||
"trailer",
|
||||
"trailers",
|
||||
"transfer-encoding",
|
||||
"upgrade",
|
||||
"host",
|
||||
# ``content-length`` is stripped because we may rewrite the
|
||||
# body — aiohttp will recompute it on the upstream request.
|
||||
"content-length",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pure helpers — exported so the unit tests can drive them directly without
|
||||
# spinning up a server.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def strip_tool_reference_blocks(payload: Any) -> Any:
|
||||
"""Recursively remove ``tool_reference`` content blocks from
|
||||
*payload*, returning the cleaned structure.
|
||||
|
||||
The CLI's built-in ``ToolSearch`` tool emits these as part of
|
||||
``tool_result.content``::
|
||||
|
||||
{"type": "tool_reference", "tool_name": "mcp__copilot__find_block"}
|
||||
|
||||
OpenRouter's stricter Zod validation rejects them. Removing them
|
||||
is safe — they are metadata about which tools were searched, not
|
||||
real model-visible content. The CLI's *internal* state still
|
||||
contains them; only the wire format is rewritten.
|
||||
"""
|
||||
if isinstance(payload, dict):
|
||||
# Drop the dict entirely if it IS a tool_reference block. The
|
||||
# caller (a list comprehension below) discards None entries so
|
||||
# we can return None to signal "remove me".
|
||||
if payload.get("type") == "tool_reference":
|
||||
return None
|
||||
cleaned_dict: dict[str, Any] = {}
|
||||
for key, value in payload.items():
|
||||
cleaned_value = strip_tool_reference_blocks(value)
|
||||
# If a dict-valued child WAS a tool_reference block,
|
||||
# drop the key entirely rather than writing `null` —
|
||||
# otherwise schema-strict upstreams still reject the
|
||||
# payload. Only applies when the original value was a
|
||||
# dict; genuine None values in the input are preserved.
|
||||
if cleaned_value is None and isinstance(value, dict):
|
||||
continue
|
||||
cleaned_dict[key] = cleaned_value
|
||||
return cleaned_dict
|
||||
if isinstance(payload, list):
|
||||
cleaned_list: list[Any] = []
|
||||
for item in payload:
|
||||
cleaned_item = strip_tool_reference_blocks(item)
|
||||
if cleaned_item is None and isinstance(item, dict):
|
||||
# Item was a tool_reference block — drop it from the
|
||||
# list rather than leaving a None hole.
|
||||
continue
|
||||
cleaned_list.append(cleaned_item)
|
||||
return cleaned_list
|
||||
return payload
|
||||
|
||||
|
||||
def strip_forbidden_betas_from_body(payload: Any) -> Any:
|
||||
"""Remove forbidden tokens from the ``betas`` array of an
|
||||
Anthropic Messages API request body, if present.
|
||||
|
||||
Returns a shallow copy with the ``betas`` key cleaned — the input
|
||||
dict is never mutated.
|
||||
|
||||
The Messages API accepts a top-level ``betas: list[str]`` parameter
|
||||
used to opt into beta features. We drop tokens in
|
||||
:data:`_FORBIDDEN_BETA_TOKENS` so OpenRouter's check passes.
|
||||
"""
|
||||
if not isinstance(payload, dict):
|
||||
return payload
|
||||
betas = payload.get("betas")
|
||||
if not isinstance(betas, list):
|
||||
return payload
|
||||
cleaned_betas = [b for b in betas if b not in _FORBIDDEN_BETA_TOKENS]
|
||||
result = {k: v for k, v in payload.items() if k != "betas"}
|
||||
if cleaned_betas:
|
||||
result["betas"] = cleaned_betas
|
||||
return result
|
||||
|
||||
|
||||
def strip_forbidden_anthropic_beta_header(value: str | None) -> str | None:
|
||||
"""Return *value* with forbidden tokens removed.
|
||||
|
||||
The ``anthropic-beta`` HTTP header is a comma-separated list of
|
||||
feature flags. We strip exactly the forbidden tokens, preserving
|
||||
any others. Returns ``None`` if nothing remains (so the caller
|
||||
can drop the header entirely).
|
||||
"""
|
||||
if not value:
|
||||
return value
|
||||
tokens = [token.strip() for token in value.split(",")]
|
||||
kept = [token for token in tokens if token and token not in _FORBIDDEN_BETA_TOKENS]
|
||||
if not kept:
|
||||
return None
|
||||
return ", ".join(kept)
|
||||
|
||||
|
||||
def clean_request_body_bytes(body_bytes: bytes) -> bytes:
|
||||
"""Apply both body-level strippers to *body_bytes*, returning the
|
||||
cleaned JSON. Falls back to the original bytes when the body
|
||||
isn't valid JSON (the CLI shouldn't be sending non-JSON to the
|
||||
Messages API, but be defensive)."""
|
||||
if not body_bytes:
|
||||
return body_bytes
|
||||
try:
|
||||
payload = json.loads(body_bytes.decode("utf-8"))
|
||||
except (UnicodeDecodeError, json.JSONDecodeError):
|
||||
return body_bytes
|
||||
payload = strip_tool_reference_blocks(payload)
|
||||
payload = strip_forbidden_betas_from_body(payload)
|
||||
return json.dumps(payload, separators=(",", ":")).encode("utf-8")
|
||||
|
||||
|
||||
def _parse_connection_tokens(headers: dict[str, str]) -> set[str]:
|
||||
"""Extract hop-by-hop header names from the ``Connection`` field."""
|
||||
connection_header = next(
|
||||
(value for name, value in headers.items() if name.lower() == "connection"),
|
||||
"",
|
||||
)
|
||||
return {
|
||||
token.strip().lower() for token in connection_header.split(",") if token.strip()
|
||||
}
|
||||
|
||||
|
||||
def clean_request_headers(headers: dict[str, str]) -> dict[str, str]:
|
||||
"""Drop hop-by-hop headers and rewrite ``anthropic-beta`` to remove
|
||||
forbidden tokens. Returns a fresh dict the caller can pass through
|
||||
to the upstream client without further mutation.
|
||||
|
||||
Per RFC 7230 section 6.1, intermediaries must drop the static hop-by-hop
|
||||
set above **and** every header name listed in the incoming
|
||||
``Connection`` field value (case-insensitive). The latter is how
|
||||
extension hop-by-hop headers are signalled per-connection.
|
||||
|
||||
Callers should pass an already-materialised ``dict`` (e.g.
|
||||
``dict(request.headers)``) so this function stays simple.
|
||||
"""
|
||||
connection_tokens = _parse_connection_tokens(headers)
|
||||
|
||||
cleaned: dict[str, str] = {}
|
||||
for name, value in headers.items():
|
||||
lower_name = name.lower()
|
||||
if lower_name in _HOP_BY_HOP_HEADERS or lower_name in connection_tokens:
|
||||
continue
|
||||
if lower_name == "anthropic-beta":
|
||||
stripped = strip_forbidden_anthropic_beta_header(value)
|
||||
if stripped is None:
|
||||
continue
|
||||
cleaned[name] = stripped
|
||||
continue
|
||||
cleaned[name] = value
|
||||
return cleaned
|
||||
|
||||
|
||||
def clean_response_headers(
|
||||
headers: "Any",
|
||||
) -> list[tuple[str, str]]:
|
||||
"""Like :func:`clean_request_headers` but preserves multi-valued
|
||||
headers (e.g. ``Set-Cookie``). Accepts any mapping-like object
|
||||
whose ``.items()`` yields ``(name, value)`` pairs — including
|
||||
aiohttp's ``CIMultiDictProxy`` which can have duplicate keys.
|
||||
|
||||
Returns a list of ``(name, value)`` tuples suitable for passing
|
||||
to ``web.StreamResponse(headers=...)`` via ``CIMultiDict``.
|
||||
"""
|
||||
connection_tokens: set[str] = set()
|
||||
for name, value in headers.items():
|
||||
if name.lower() == "connection":
|
||||
connection_tokens = {
|
||||
t.strip().lower() for t in value.split(",") if t.strip()
|
||||
}
|
||||
break
|
||||
|
||||
cleaned: list[tuple[str, str]] = []
|
||||
for name, value in headers.items():
|
||||
lower_name = name.lower()
|
||||
if lower_name in _HOP_BY_HOP_HEADERS or lower_name in connection_tokens:
|
||||
continue
|
||||
if lower_name == "anthropic-beta":
|
||||
stripped = strip_forbidden_anthropic_beta_header(value)
|
||||
if stripped is None:
|
||||
continue
|
||||
cleaned.append((name, stripped))
|
||||
continue
|
||||
cleaned.append((name, value))
|
||||
return cleaned
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# The proxy server
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class OpenRouterCompatProxy:
|
||||
"""In-process HTTP proxy that rewrites Claude Code CLI requests on
|
||||
the way to OpenRouter (or any other Anthropic-compatible gateway).
|
||||
|
||||
Usage::
|
||||
|
||||
proxy = OpenRouterCompatProxy(target_base_url="https://openrouter.ai/api/v1")
|
||||
await proxy.start()
|
||||
try:
|
||||
# Spawn the CLI with ANTHROPIC_BASE_URL=proxy.local_url
|
||||
...
|
||||
finally:
|
||||
await proxy.stop()
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
target_base_url: str,
|
||||
*,
|
||||
bind_host: str = "127.0.0.1",
|
||||
request_timeout: float = 600.0,
|
||||
) -> None:
|
||||
self._target_base_url = target_base_url.rstrip("/")
|
||||
self._bind_host = bind_host
|
||||
self._request_timeout = request_timeout
|
||||
self._runner: web.AppRunner | None = None
|
||||
self._client: aiohttp.ClientSession | None = None
|
||||
self._port: int | None = None
|
||||
|
||||
@property
|
||||
def local_url(self) -> str:
|
||||
"""The ``http://host:port`` URL that the CLI should use as
|
||||
``ANTHROPIC_BASE_URL``. Raises if :meth:`start` has not been
|
||||
called yet."""
|
||||
if self._port is None:
|
||||
raise RuntimeError("Proxy is not running — call start() first.")
|
||||
return f"http://{self._bind_host}:{self._port}"
|
||||
|
||||
@property
|
||||
def target_base_url(self) -> str:
|
||||
"""The upstream URL the proxy is forwarding to."""
|
||||
return self._target_base_url
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Bind to a random local port and start serving.
|
||||
|
||||
Cleans up the ``ClientSession`` and the ``AppRunner`` on any
|
||||
failure during setup so a partially-initialised proxy never
|
||||
leaves resources dangling (covers the
|
||||
``runner.setup() / site.start()`` raise paths in addition to
|
||||
the explicit bind-failure branches below).
|
||||
"""
|
||||
if self._runner is not None:
|
||||
return # already started
|
||||
# Use sock_connect + sock_read instead of total so long-lived
|
||||
# SSE / streaming responses aren't killed after request_timeout.
|
||||
# total=None means no cumulative limit; sock_read is the per-chunk
|
||||
# idle timeout (time between data arriving on the socket).
|
||||
client = aiohttp.ClientSession(
|
||||
timeout=aiohttp.ClientTimeout(
|
||||
total=None,
|
||||
sock_connect=self._request_timeout,
|
||||
sock_read=self._request_timeout,
|
||||
)
|
||||
)
|
||||
app = web.Application()
|
||||
# Catch every method + path so we can also forward GETs
|
||||
# (the CLI may probe profile / model endpoints).
|
||||
app.router.add_route("*", "/{tail:.*}", self._handle)
|
||||
runner = web.AppRunner(app)
|
||||
runner_setup = False
|
||||
try:
|
||||
await runner.setup()
|
||||
runner_setup = True
|
||||
site = web.TCPSite(runner, self._bind_host, 0)
|
||||
await site.start()
|
||||
server = site._server
|
||||
if server is None:
|
||||
raise RuntimeError("Failed to bind compat proxy server.")
|
||||
sockets = getattr(server, "sockets", None)
|
||||
if not sockets:
|
||||
raise RuntimeError("Compat proxy server has no listening sockets.")
|
||||
self._port = sockets[0].getsockname()[1]
|
||||
except BaseException:
|
||||
# Best-effort teardown — swallow secondary errors so the
|
||||
# caller sees the original exception.
|
||||
if runner_setup:
|
||||
try:
|
||||
await runner.cleanup()
|
||||
except Exception: # pragma: no cover - cleanup-only path
|
||||
logger.exception("compat proxy runner cleanup failed")
|
||||
try:
|
||||
await client.close()
|
||||
except Exception: # pragma: no cover - cleanup-only path
|
||||
logger.exception("compat proxy client close failed")
|
||||
raise
|
||||
# Only publish the attributes after everything is wired up so
|
||||
# ``stop()`` and ``local_url`` observe a consistent state.
|
||||
self._client = client
|
||||
self._runner = runner
|
||||
# Deliberately log only the local bind port — never the
|
||||
# upstream URL or any derived component. CodeQL's
|
||||
# `py/clear-text-logging-sensitive-data` taint analysis traces
|
||||
# everything that originates from a config-supplied URL as
|
||||
# potentially-sensitive even after parsing, and the upstream
|
||||
# endpoint is anyway discoverable from the config the operator
|
||||
# already has access to. The detailed upstream is exposed via
|
||||
# the ``target_base_url`` property for callers that need it.
|
||||
logger.info(
|
||||
"OpenRouter compat proxy listening on %s:%d",
|
||||
self._bind_host,
|
||||
self._port,
|
||||
)
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Stop accepting connections and release the port."""
|
||||
if self._runner is not None:
|
||||
await self._runner.cleanup()
|
||||
self._runner = None
|
||||
if self._client is not None:
|
||||
await self._client.close()
|
||||
self._client = None
|
||||
self._port = None
|
||||
|
||||
async def __aenter__(self) -> "OpenRouterCompatProxy":
|
||||
await self.start()
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc, tb) -> None:
|
||||
await self.stop()
|
||||
|
||||
async def _handle(self, request: web.Request) -> web.StreamResponse:
|
||||
"""Forward *request* to the upstream after stripping forbidden
|
||||
features. Streams the upstream response back to the caller
|
||||
chunk-by-chunk so SSE / streamed responses work."""
|
||||
if self._client is None:
|
||||
raise web.HTTPInternalServerError(reason="proxy client missing")
|
||||
|
||||
# Build the upstream URL. ``request.path_qs`` includes the
|
||||
# query string verbatim. ``request.path`` for ``/v1/messages``
|
||||
# is just ``/v1/messages`` — we strip a leading slash and
|
||||
# concat with the target base URL.
|
||||
upstream_path = request.path_qs
|
||||
if not upstream_path.startswith("/"):
|
||||
upstream_path = "/" + upstream_path
|
||||
# Allow the target_base_url to itself contain a path (e.g.
|
||||
# ``https://openrouter.ai/api/v1``). In that case requests to
|
||||
# ``/v1/messages`` need to become ``/api/v1/messages``, not
|
||||
# ``/api/v1/v1/messages``. Strip a leading ``/v1`` from the
|
||||
# incoming path if the target already ends with ``/v1`` (or
|
||||
# similar API-version segment).
|
||||
# Deduplicate API version prefix: if the target URL already
|
||||
# contains a versioned path segment (e.g. ``/api/v1``) and the
|
||||
# incoming request path starts with the same segment, strip it
|
||||
# to avoid ``/api/v1/v1/messages``.
|
||||
from urllib.parse import urlparse
|
||||
|
||||
target_base = self._target_base_url
|
||||
target_path = urlparse(target_base).path.rstrip("/")
|
||||
if target_path and upstream_path.startswith(target_path + "/"):
|
||||
upstream_path = upstream_path[len(target_path) :]
|
||||
elif target_path and upstream_path == target_path:
|
||||
upstream_path = "/"
|
||||
upstream_url = f"{target_base}{upstream_path}"
|
||||
|
||||
body_bytes = await request.read()
|
||||
cleaned_body = clean_request_body_bytes(body_bytes)
|
||||
cleaned_headers = clean_request_headers(dict(request.headers))
|
||||
|
||||
try:
|
||||
upstream_response = await self._client.request(
|
||||
method=request.method,
|
||||
url=upstream_url,
|
||||
data=cleaned_body if cleaned_body else None,
|
||||
headers=cleaned_headers,
|
||||
allow_redirects=False,
|
||||
)
|
||||
except (aiohttp.ClientError, asyncio.TimeoutError) as e:
|
||||
# ``aiohttp.ClientTimeout`` raises ``asyncio.TimeoutError``
|
||||
# (not ``aiohttp.ClientError``) on hung upstreams, so both
|
||||
# must be caught here to surface the explicit 502 failure
|
||||
# mode this proxy guarantees.
|
||||
#
|
||||
# Log the detailed error for ops, but return a generic
|
||||
# message to the caller — exception strings can leak
|
||||
# internal hostnames, ports, or stack frames (CodeQL
|
||||
# `py/stack-trace-exposure`).
|
||||
logger.warning(
|
||||
"OpenRouter compat proxy upstream error: %s", type(e).__name__
|
||||
)
|
||||
return web.Response(status=502, text="upstream error")
|
||||
|
||||
# Stream the response back unchanged (apart from hop-by-hop
|
||||
# header filtering). Use clean_response_headers to preserve
|
||||
# multi-valued headers like Set-Cookie that dict() would drop.
|
||||
from multidict import CIMultiDict
|
||||
|
||||
downstream = web.StreamResponse(
|
||||
status=upstream_response.status,
|
||||
headers=CIMultiDict(clean_response_headers(upstream_response.headers)),
|
||||
)
|
||||
await downstream.prepare(request)
|
||||
# Track whether the stream terminated cleanly. A mid-stream
|
||||
# ``aiohttp.ClientError`` means the upstream died before
|
||||
# finishing; calling ``write_eof()`` on that partial response
|
||||
# would signal "complete stream" to the downstream client and
|
||||
# silently corrupt the body. Skip the EOF on the error path
|
||||
# so the client's connection is dropped instead, surfacing the
|
||||
# failure correctly.
|
||||
cancelled = False
|
||||
stream_error: aiohttp.ClientError | None = None
|
||||
try:
|
||||
async for chunk in upstream_response.content.iter_any():
|
||||
await downstream.write(chunk)
|
||||
except asyncio.CancelledError:
|
||||
# Never suppress cancellation — since Python 3.8 it's a
|
||||
# ``BaseException`` subclass precisely so catching
|
||||
# ``Exception`` won't accidentally swallow it. Release
|
||||
# the upstream body and re-raise so the asyncio task
|
||||
# cooperatively unwinds (avoids hanging shutdowns /
|
||||
# stuck request handlers).
|
||||
cancelled = True
|
||||
upstream_response.release()
|
||||
raise
|
||||
except aiohttp.ClientError as e:
|
||||
stream_error = e
|
||||
logger.warning(
|
||||
"OpenRouter compat proxy stream interrupted: %s", type(e).__name__
|
||||
)
|
||||
finally:
|
||||
if not cancelled:
|
||||
upstream_response.release()
|
||||
|
||||
if stream_error is not None:
|
||||
# Do NOT call ``write_eof`` or return the prepared
|
||||
# ``downstream`` here — aiohttp finalises a returned
|
||||
# StreamResponse (writing the terminating chunk /
|
||||
# content-length / EOF) even if we skipped ``write_eof``
|
||||
# ourselves, which would signal a clean end of stream to
|
||||
# the client on top of the truncated body. Instead abort
|
||||
# the underlying transport directly so the client's
|
||||
# parser surfaces a ``ClientPayloadError`` /
|
||||
# ``ServerDisconnectedError`` and the caller can retry /
|
||||
# surface the failure instead of silently consuming a
|
||||
# corrupt body.
|
||||
try:
|
||||
downstream.force_close()
|
||||
except Exception: # pragma: no cover - defensive on transport
|
||||
pass
|
||||
transport = request.transport
|
||||
if transport is not None:
|
||||
try:
|
||||
transport.abort()
|
||||
except Exception: # pragma: no cover - defensive on transport
|
||||
pass
|
||||
# Re-raise the original stream error so aiohttp treats
|
||||
# this handler as having failed; the transport is
|
||||
# already aborted above so the client sees an abrupt
|
||||
# disconnect either way.
|
||||
raise stream_error
|
||||
|
||||
await downstream.write_eof()
|
||||
return downstream
|
||||
@@ -1,695 +0,0 @@
|
||||
"""Tests for the OpenRouter compatibility proxy.
|
||||
|
||||
The proxy strips two known forbidden patterns from requests so newer
|
||||
``claude-agent-sdk`` / Claude Code CLI versions can talk to OpenRouter
|
||||
through the unchanged transport. These tests cover both:
|
||||
|
||||
* the pure stripping helpers (deterministic, no I/O), and
|
||||
* the end-to-end proxy behaviour against a fake upstream server, so we
|
||||
catch hop-by-hop header bugs and streaming regressions.
|
||||
|
||||
See ``openrouter_compat_proxy.py`` for the rationale and the upstream
|
||||
issues being worked around.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
import aiohttp
|
||||
import pytest
|
||||
from aiohttp import web
|
||||
|
||||
from backend.copilot.sdk.openrouter_compat_proxy import (
|
||||
_FORBIDDEN_BETA_TOKENS,
|
||||
_HOP_BY_HOP_HEADERS,
|
||||
OpenRouterCompatProxy,
|
||||
clean_request_body_bytes,
|
||||
clean_request_headers,
|
||||
strip_forbidden_anthropic_beta_header,
|
||||
strip_forbidden_betas_from_body,
|
||||
strip_tool_reference_blocks,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# strip_tool_reference_blocks
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestStripToolReferenceBlocks:
|
||||
"""The CLI's built-in ToolSearch tool emits ``tool_reference``
|
||||
content blocks in ``tool_result.content``. OpenRouter's stricter
|
||||
Zod validation rejects them. We drop them entirely — they're
|
||||
metadata about which tools were searched, not real model-visible
|
||||
content."""
|
||||
|
||||
def test_removes_tool_reference_block_at_top_level(self):
|
||||
block = {"type": "tool_reference", "tool_name": "find_block"}
|
||||
assert strip_tool_reference_blocks(block) is None
|
||||
|
||||
def test_removes_tool_reference_block_from_list(self):
|
||||
blocks = [
|
||||
{"type": "text", "text": "hello"},
|
||||
{"type": "tool_reference", "tool_name": "find_block"},
|
||||
{"type": "text", "text": "world"},
|
||||
]
|
||||
assert strip_tool_reference_blocks(blocks) == [
|
||||
{"type": "text", "text": "hello"},
|
||||
{"type": "text", "text": "world"},
|
||||
]
|
||||
|
||||
def test_strips_nested_tool_reference_inside_tool_result(self):
|
||||
# The exact shape PR #12294 root-caused: tool_result.content
|
||||
# contains the tool_reference block.
|
||||
request = {
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "tu_1",
|
||||
"content": [
|
||||
{"type": "text", "text": "result text"},
|
||||
{
|
||||
"type": "tool_reference",
|
||||
"tool_name": "mcp__copilot__find_block",
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
cleaned = strip_tool_reference_blocks(request)
|
||||
tool_result_content = cleaned["messages"][0]["content"][0]["content"]
|
||||
assert tool_result_content == [{"type": "text", "text": "result text"}]
|
||||
|
||||
def test_preserves_unrelated_payloads(self):
|
||||
payload = {
|
||||
"model": "claude-opus-4.6",
|
||||
"messages": [{"role": "user", "content": "hi"}],
|
||||
"temperature": 0.7,
|
||||
}
|
||||
assert strip_tool_reference_blocks(payload) == payload
|
||||
|
||||
def test_handles_empty_and_primitive_inputs(self):
|
||||
assert strip_tool_reference_blocks({}) == {}
|
||||
assert strip_tool_reference_blocks([]) == []
|
||||
assert strip_tool_reference_blocks("plain string") == "plain string"
|
||||
assert strip_tool_reference_blocks(42) == 42
|
||||
assert strip_tool_reference_blocks(None) is None
|
||||
|
||||
def test_removes_dict_valued_tool_reference_child_entirely(self):
|
||||
# Regression guard: when a tool_reference dict is assigned to
|
||||
# a key rather than listed, the helper used to rewrite it to
|
||||
# `null` (leaving the parent key with a None value). That is
|
||||
# still schema-invalid upstream — remove the key entirely.
|
||||
payload = {
|
||||
"wrapper": {"type": "tool_reference", "tool_name": "find_block"},
|
||||
"keep": "value",
|
||||
}
|
||||
cleaned = strip_tool_reference_blocks(payload)
|
||||
assert "wrapper" not in cleaned
|
||||
assert cleaned["keep"] == "value"
|
||||
|
||||
def test_preserves_genuine_none_values_on_non_dict_children(self):
|
||||
payload = {"explicit_null": None, "text": "ok"}
|
||||
cleaned = strip_tool_reference_blocks(payload)
|
||||
assert cleaned == {"explicit_null": None, "text": "ok"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# strip_forbidden_betas_from_body
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestStripForbiddenBetasFromBody:
|
||||
"""OpenRouter rejects ``context-management-2025-06-27`` in the
|
||||
request body's ``betas`` array."""
|
||||
|
||||
def test_removes_forbidden_token_keeps_others(self):
|
||||
body = {
|
||||
"model": "claude-opus-4.6",
|
||||
"betas": [
|
||||
"context-management-2025-06-27",
|
||||
"fine-grained-tool-streaming-2025",
|
||||
],
|
||||
}
|
||||
cleaned = strip_forbidden_betas_from_body(body)
|
||||
assert cleaned["betas"] == ["fine-grained-tool-streaming-2025"]
|
||||
|
||||
def test_removes_betas_field_entirely_when_only_forbidden(self):
|
||||
body = {"model": "x", "betas": ["context-management-2025-06-27"]}
|
||||
cleaned = strip_forbidden_betas_from_body(body)
|
||||
assert "betas" not in cleaned
|
||||
|
||||
def test_no_op_when_no_betas_field(self):
|
||||
body = {"model": "x"}
|
||||
assert strip_forbidden_betas_from_body(body) == {"model": "x"}
|
||||
|
||||
def test_no_op_on_non_dict(self):
|
||||
assert strip_forbidden_betas_from_body([1, 2, 3]) == [1, 2, 3]
|
||||
assert strip_forbidden_betas_from_body("plain") == "plain"
|
||||
|
||||
def test_all_forbidden_tokens_constants_are_recognized(self):
|
||||
for forbidden in _FORBIDDEN_BETA_TOKENS:
|
||||
body = {"betas": [forbidden, "other"]}
|
||||
cleaned = strip_forbidden_betas_from_body(body)
|
||||
assert forbidden not in cleaned["betas"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# strip_forbidden_anthropic_beta_header
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestStripForbiddenAnthropicBetaHeader:
|
||||
def test_removes_forbidden_token_keeps_others(self):
|
||||
value = "fine-grained-tool-streaming-2025, context-management-2025-06-27, other-beta"
|
||||
result = strip_forbidden_anthropic_beta_header(value)
|
||||
assert result == "fine-grained-tool-streaming-2025, other-beta"
|
||||
|
||||
def test_returns_none_when_only_forbidden_token_present(self):
|
||||
assert (
|
||||
strip_forbidden_anthropic_beta_header("context-management-2025-06-27")
|
||||
is None
|
||||
)
|
||||
|
||||
def test_passes_through_clean_header(self):
|
||||
assert strip_forbidden_anthropic_beta_header("foo, bar") == "foo, bar"
|
||||
|
||||
def test_handles_empty_and_none_input(self):
|
||||
assert strip_forbidden_anthropic_beta_header("") == ""
|
||||
assert strip_forbidden_anthropic_beta_header(None) is None
|
||||
|
||||
def test_handles_extra_whitespace(self):
|
||||
value = " context-management-2025-06-27 , fine-grained "
|
||||
result = strip_forbidden_anthropic_beta_header(value)
|
||||
assert result == "fine-grained"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# clean_request_body_bytes — combined body-level cleanup
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCleanRequestBodyBytes:
|
||||
def test_strips_both_patterns_in_one_pass(self):
|
||||
body = {
|
||||
"model": "claude-opus-4.6",
|
||||
"betas": ["context-management-2025-06-27"],
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "tu_1",
|
||||
"content": [
|
||||
{"type": "tool_reference", "tool_name": "find"},
|
||||
{"type": "text", "text": "ok"},
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
cleaned_bytes = clean_request_body_bytes(json.dumps(body).encode("utf-8"))
|
||||
cleaned = json.loads(cleaned_bytes.decode("utf-8"))
|
||||
assert "betas" not in cleaned # only forbidden token, dropped
|
||||
tool_result_content = cleaned["messages"][0]["content"][0]["content"]
|
||||
assert tool_result_content == [{"type": "text", "text": "ok"}]
|
||||
|
||||
def test_passes_through_non_json_body(self):
|
||||
garbage = b"\xff\xfe not json at all"
|
||||
assert clean_request_body_bytes(garbage) == garbage
|
||||
|
||||
def test_passes_through_empty_body(self):
|
||||
assert clean_request_body_bytes(b"") == b""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# clean_request_headers — hop-by-hop + anthropic-beta cleanup
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCleanRequestHeaders:
|
||||
def test_drops_hop_by_hop_headers(self):
|
||||
headers = {
|
||||
"Host": "example.com",
|
||||
"Connection": "keep-alive",
|
||||
"Content-Length": "42",
|
||||
"Authorization": "Bearer xxx",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
cleaned = clean_request_headers(headers)
|
||||
assert "Host" not in cleaned
|
||||
assert "Connection" not in cleaned
|
||||
assert "Content-Length" not in cleaned
|
||||
assert cleaned["Authorization"] == "Bearer xxx"
|
||||
assert cleaned["Content-Type"] == "application/json"
|
||||
|
||||
def test_strips_forbidden_token_from_anthropic_beta_header(self):
|
||||
headers = {
|
||||
"anthropic-beta": "context-management-2025-06-27, other-beta",
|
||||
"Authorization": "Bearer x",
|
||||
}
|
||||
cleaned = clean_request_headers(headers)
|
||||
assert cleaned["anthropic-beta"] == "other-beta"
|
||||
|
||||
def test_drops_anthropic_beta_header_when_only_forbidden(self):
|
||||
headers = {"anthropic-beta": "context-management-2025-06-27"}
|
||||
cleaned = clean_request_headers(headers)
|
||||
assert "anthropic-beta" not in cleaned
|
||||
|
||||
def test_hop_by_hop_set_completeness(self):
|
||||
# Sanity check: if upstream removes hop-by-hop headers from
|
||||
# this set we want to know — keep the canonical RFC 7230 list.
|
||||
for required in (
|
||||
"connection",
|
||||
"transfer-encoding",
|
||||
"host",
|
||||
"trailer",
|
||||
"trailers",
|
||||
):
|
||||
assert required in _HOP_BY_HOP_HEADERS
|
||||
|
||||
def test_drops_headers_listed_in_connection_field(self):
|
||||
# Per RFC 7230 §6.1 intermediaries must also drop every
|
||||
# header name listed in the incoming Connection field value
|
||||
# (extension hop-by-hop headers signalled per-connection).
|
||||
headers = {
|
||||
"Connection": "X-Custom-Hop, Upgrade",
|
||||
"X-Custom-Hop": "secret-extension",
|
||||
"Authorization": "Bearer x",
|
||||
"X-Keep": "ok",
|
||||
}
|
||||
cleaned = clean_request_headers(headers)
|
||||
assert "X-Custom-Hop" not in cleaned
|
||||
# Upgrade is a static hop-by-hop header; Connection itself is
|
||||
# also dropped; the rest pass through.
|
||||
assert "Connection" not in cleaned
|
||||
assert cleaned["Authorization"] == "Bearer x"
|
||||
assert cleaned["X-Keep"] == "ok"
|
||||
|
||||
def test_connection_token_matching_is_case_insensitive(self):
|
||||
headers = {
|
||||
"Connection": "x-hop-HEADER",
|
||||
"X-Hop-Header": "drop-me",
|
||||
"X-Keep": "ok",
|
||||
}
|
||||
cleaned = clean_request_headers(headers)
|
||||
assert "X-Hop-Header" not in cleaned
|
||||
assert cleaned["X-Keep"] == "ok"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# End-to-end: real proxy + fake upstream
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _FakeUpstream:
|
||||
"""Tiny aiohttp app that records every request the proxy forwards
|
||||
so the test can assert on the cleaned payloads."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.captured: list[dict[str, Any]] = []
|
||||
self._runner: web.AppRunner | None = None
|
||||
self.port: int = 0
|
||||
|
||||
async def start(self) -> str:
|
||||
async def handler(request: web.Request) -> web.StreamResponse:
|
||||
body = await request.text()
|
||||
self.captured.append(
|
||||
{
|
||||
"method": request.method,
|
||||
"path": request.path_qs,
|
||||
"headers": {k: v for k, v in request.headers.items()},
|
||||
"body": body,
|
||||
}
|
||||
)
|
||||
# Return a minimal JSON success response so the proxy has
|
||||
# something to stream back.
|
||||
return web.json_response({"ok": True, "echoed": body})
|
||||
|
||||
app = web.Application()
|
||||
app.router.add_route("*", "/{tail:.*}", handler)
|
||||
self._runner = web.AppRunner(app)
|
||||
await self._runner.setup()
|
||||
site = web.TCPSite(self._runner, "127.0.0.1", 0)
|
||||
await site.start()
|
||||
server = site._server
|
||||
assert server is not None
|
||||
sockets = getattr(server, "sockets", None)
|
||||
assert sockets is not None
|
||||
self.port = sockets[0].getsockname()[1]
|
||||
return f"http://127.0.0.1:{self.port}"
|
||||
|
||||
async def stop(self) -> None:
|
||||
if self._runner is not None:
|
||||
await self._runner.cleanup()
|
||||
self._runner = None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_proxy_strips_tool_reference_block_end_to_end():
|
||||
upstream = _FakeUpstream()
|
||||
upstream_url = await upstream.start()
|
||||
proxy = OpenRouterCompatProxy(target_base_url=upstream_url)
|
||||
await proxy.start()
|
||||
try:
|
||||
body = {
|
||||
"model": "claude-opus-4.6",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "hi"},
|
||||
{
|
||||
"type": "tool_reference",
|
||||
"tool_name": "mcp__copilot__find_block",
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
async with aiohttp.ClientSession() as client:
|
||||
async with client.post(
|
||||
f"{proxy.local_url}/v1/messages",
|
||||
json=body,
|
||||
headers={"Authorization": "Bearer test"},
|
||||
) as resp:
|
||||
assert resp.status == 200
|
||||
await resp.read()
|
||||
finally:
|
||||
await proxy.stop()
|
||||
await upstream.stop()
|
||||
|
||||
assert len(upstream.captured) == 1
|
||||
forwarded = json.loads(upstream.captured[0]["body"])
|
||||
# The tool_reference block must NOT be in the upstream-visible body.
|
||||
assert '"tool_reference"' not in upstream.captured[0]["body"]
|
||||
assert forwarded["messages"][0]["content"] == [{"type": "text", "text": "hi"}]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_proxy_strips_context_management_beta_header_end_to_end():
|
||||
upstream = _FakeUpstream()
|
||||
upstream_url = await upstream.start()
|
||||
proxy = OpenRouterCompatProxy(target_base_url=upstream_url)
|
||||
await proxy.start()
|
||||
try:
|
||||
async with aiohttp.ClientSession() as client:
|
||||
async with client.post(
|
||||
f"{proxy.local_url}/v1/messages",
|
||||
json={"model": "x", "messages": []},
|
||||
headers={
|
||||
"Authorization": "Bearer test",
|
||||
"anthropic-beta": "context-management-2025-06-27, other-beta",
|
||||
},
|
||||
) as resp:
|
||||
assert resp.status == 200
|
||||
await resp.read()
|
||||
finally:
|
||||
await proxy.stop()
|
||||
await upstream.stop()
|
||||
|
||||
forwarded_headers = upstream.captured[0]["headers"]
|
||||
# Header is rewritten to remove only the forbidden token, keeping the rest.
|
||||
assert any(
|
||||
k.lower() == "anthropic-beta" and v == "other-beta"
|
||||
for k, v in forwarded_headers.items()
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_proxy_strips_betas_from_request_body_end_to_end():
|
||||
upstream = _FakeUpstream()
|
||||
upstream_url = await upstream.start()
|
||||
proxy = OpenRouterCompatProxy(target_base_url=upstream_url)
|
||||
await proxy.start()
|
||||
try:
|
||||
body = {
|
||||
"model": "x",
|
||||
"betas": [
|
||||
"context-management-2025-06-27",
|
||||
"fine-grained-tool-streaming-2025",
|
||||
],
|
||||
"messages": [],
|
||||
}
|
||||
async with aiohttp.ClientSession() as client:
|
||||
async with client.post(
|
||||
f"{proxy.local_url}/v1/messages",
|
||||
json=body,
|
||||
) as resp:
|
||||
assert resp.status == 200
|
||||
await resp.read()
|
||||
finally:
|
||||
await proxy.stop()
|
||||
await upstream.stop()
|
||||
|
||||
forwarded = json.loads(upstream.captured[0]["body"])
|
||||
# Only the surviving beta should be present.
|
||||
assert forwarded["betas"] == ["fine-grained-tool-streaming-2025"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_proxy_passes_through_clean_request_unchanged():
|
||||
"""The proxy must be a no-op for requests that don't contain any of
|
||||
the forbidden patterns — no other rewriting allowed."""
|
||||
upstream = _FakeUpstream()
|
||||
upstream_url = await upstream.start()
|
||||
proxy = OpenRouterCompatProxy(target_base_url=upstream_url)
|
||||
await proxy.start()
|
||||
try:
|
||||
body = {
|
||||
"model": "claude-opus-4.6",
|
||||
"messages": [{"role": "user", "content": "hello"}],
|
||||
"temperature": 0.7,
|
||||
}
|
||||
async with aiohttp.ClientSession() as client:
|
||||
async with client.post(
|
||||
f"{proxy.local_url}/v1/messages",
|
||||
json=body,
|
||||
headers={
|
||||
"Authorization": "Bearer test",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
) as resp:
|
||||
assert resp.status == 200
|
||||
await resp.read()
|
||||
finally:
|
||||
await proxy.stop()
|
||||
await upstream.stop()
|
||||
|
||||
forwarded = json.loads(upstream.captured[0]["body"])
|
||||
assert forwarded == body
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_proxy_returns_502_on_upstream_failure():
|
||||
"""If the upstream is unreachable the proxy must return a clear
|
||||
502, not silently hang.
|
||||
|
||||
Note: the outer ``client.post`` talks to the *proxy* on localhost,
|
||||
not to the dead upstream directly. The proxy is the thing under
|
||||
test, so it should always respond with a 502 — we must NOT
|
||||
swallow ``aiohttp.ClientError`` / ``asyncio.TimeoutError`` on the
|
||||
outer call, because that would mask a proxy crash and turn the
|
||||
assertion into a false positive. Let any such exception fail the
|
||||
test.
|
||||
"""
|
||||
proxy = OpenRouterCompatProxy(
|
||||
target_base_url="http://127.0.0.1:1", # nothing listening
|
||||
)
|
||||
await proxy.start()
|
||||
try:
|
||||
async with aiohttp.ClientSession() as client:
|
||||
async with client.post(
|
||||
f"{proxy.local_url}/v1/messages",
|
||||
json={"model": "x"},
|
||||
timeout=aiohttp.ClientTimeout(total=10),
|
||||
) as resp:
|
||||
assert resp.status == 502
|
||||
text = await resp.text()
|
||||
# Generic error message — no internal hostname leaked.
|
||||
assert "upstream error" in text
|
||||
finally:
|
||||
await proxy.stop()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_proxy_returns_502_on_upstream_timeout():
|
||||
"""``aiohttp.ClientTimeout`` raises ``asyncio.TimeoutError`` (not
|
||||
``aiohttp.ClientError``), which previously escaped the except
|
||||
block and surfaced as a 500. This regression-guards the 502
|
||||
contract for hung upstreams."""
|
||||
|
||||
class _HangingUpstream:
|
||||
"""Upstream that accepts the request but never finishes the
|
||||
response body, forcing the proxy's client timeout to fire."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._runner: web.AppRunner | None = None
|
||||
self.port: int = 0
|
||||
|
||||
async def start(self) -> str:
|
||||
async def handler(request: web.Request) -> web.StreamResponse:
|
||||
# Hold the response open longer than the proxy's
|
||||
# client timeout so aiohttp raises TimeoutError on
|
||||
# the proxy side.
|
||||
await asyncio.sleep(30)
|
||||
return web.Response(status=200)
|
||||
|
||||
app = web.Application()
|
||||
app.router.add_route("*", "/{tail:.*}", handler)
|
||||
self._runner = web.AppRunner(app)
|
||||
await self._runner.setup()
|
||||
site = web.TCPSite(self._runner, "127.0.0.1", 0)
|
||||
await site.start()
|
||||
server = site._server
|
||||
assert server is not None
|
||||
sockets = getattr(server, "sockets", None)
|
||||
assert sockets is not None
|
||||
self.port = sockets[0].getsockname()[1]
|
||||
return f"http://127.0.0.1:{self.port}"
|
||||
|
||||
async def stop(self) -> None:
|
||||
if self._runner is not None:
|
||||
await self._runner.cleanup()
|
||||
self._runner = None
|
||||
|
||||
upstream = _HangingUpstream()
|
||||
upstream_url = await upstream.start()
|
||||
# Short proxy timeout so the test finishes quickly.
|
||||
proxy = OpenRouterCompatProxy(target_base_url=upstream_url, request_timeout=0.5)
|
||||
await proxy.start()
|
||||
try:
|
||||
async with aiohttp.ClientSession() as client:
|
||||
async with client.post(
|
||||
f"{proxy.local_url}/v1/messages",
|
||||
json={"model": "x"},
|
||||
timeout=aiohttp.ClientTimeout(total=10),
|
||||
) as resp:
|
||||
assert resp.status == 502
|
||||
text = await resp.text()
|
||||
# Generic error message — no internal hostname leaked.
|
||||
assert "upstream error" in text
|
||||
finally:
|
||||
await proxy.stop()
|
||||
await upstream.stop()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_proxy_does_not_signal_clean_eof_on_mid_stream_error():
|
||||
"""Regression guard: if the upstream stream dies mid-body, the
|
||||
proxy must NOT call ``write_eof()`` — that would mark the
|
||||
downstream response as a complete, valid stream even though the
|
||||
client only saw a truncated body. Instead the proxy drops the
|
||||
connection so the client's parser surfaces a transport error.
|
||||
|
||||
We simulate the failure with a raw asyncio TCP server that
|
||||
sends a chunked-encoding response header plus one partial chunk
|
||||
and then hard-closes the socket — this is the one failure mode
|
||||
aiohttp's ``iter_any()`` reliably surfaces as an
|
||||
``aiohttp.ClientError`` rather than an ordinary clean EOF.
|
||||
"""
|
||||
|
||||
class _TruncatingUpstream:
|
||||
"""Raw TCP server that sends a partial chunked body then
|
||||
closes the socket without writing the terminating chunk."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._server: asyncio.base_events.Server | None = None
|
||||
self.port: int = 0
|
||||
|
||||
async def start(self) -> str:
|
||||
async def handle_conn(
|
||||
reader: asyncio.StreamReader,
|
||||
writer: asyncio.StreamWriter,
|
||||
) -> None:
|
||||
try:
|
||||
# Read and discard the request until the blank
|
||||
# line — we don't care what the proxy sends.
|
||||
while True:
|
||||
line = await reader.readline()
|
||||
if not line or line == b"\r\n":
|
||||
break
|
||||
# Chunked response with one partial chunk.
|
||||
writer.write(
|
||||
b"HTTP/1.1 200 OK\r\n"
|
||||
b"Content-Type: application/octet-stream\r\n"
|
||||
b"Transfer-Encoding: chunked\r\n"
|
||||
b"Connection: close\r\n"
|
||||
b"\r\n"
|
||||
# One chunk, size 8, content "partial-".
|
||||
b"8\r\n"
|
||||
b"partial-\r\n"
|
||||
# Deliberately DO NOT send the terminating
|
||||
# "0\r\n\r\n" — this is the mid-stream
|
||||
# truncation we're testing.
|
||||
)
|
||||
await writer.drain()
|
||||
finally:
|
||||
# Hard-close the socket so the proxy's
|
||||
# iter_any() sees an abrupt end-of-stream.
|
||||
try:
|
||||
writer.transport.abort()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self._server = await asyncio.start_server(handle_conn, "127.0.0.1", 0)
|
||||
sockets = self._server.sockets
|
||||
assert sockets is not None
|
||||
self.port = sockets[0].getsockname()[1]
|
||||
return f"http://127.0.0.1:{self.port}"
|
||||
|
||||
async def stop(self) -> None:
|
||||
if self._server is not None:
|
||||
self._server.close()
|
||||
await self._server.wait_closed()
|
||||
self._server = None
|
||||
|
||||
upstream = _TruncatingUpstream()
|
||||
upstream_url = await upstream.start()
|
||||
proxy = OpenRouterCompatProxy(target_base_url=upstream_url, request_timeout=5.0)
|
||||
await proxy.start()
|
||||
try:
|
||||
async with aiohttp.ClientSession() as client:
|
||||
client_error: Exception | None = None
|
||||
try:
|
||||
async with client.post(
|
||||
f"{proxy.local_url}/v1/messages",
|
||||
json={"model": "x"},
|
||||
timeout=aiohttp.ClientTimeout(total=10),
|
||||
) as resp:
|
||||
# The client should see either an error raising
|
||||
# here or a truncated body followed by a
|
||||
# transport-level failure on read — both surface
|
||||
# the truncation instead of silently reporting
|
||||
# success.
|
||||
await resp.read()
|
||||
except (
|
||||
aiohttp.ClientPayloadError,
|
||||
aiohttp.ClientConnectionError,
|
||||
aiohttp.ServerDisconnectedError,
|
||||
) as e:
|
||||
client_error = e
|
||||
assert client_error is not None, (
|
||||
"Proxy silently consumed an upstream mid-stream "
|
||||
"failure and returned a clean EOF to the client — "
|
||||
"regression in the stream-error path."
|
||||
)
|
||||
finally:
|
||||
await proxy.stop()
|
||||
await upstream.stop()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_proxy_local_url_raises_before_start():
|
||||
proxy = OpenRouterCompatProxy(target_base_url="http://example.com")
|
||||
with pytest.raises(RuntimeError):
|
||||
_ = proxy.local_url
|
||||
@@ -202,22 +202,11 @@ def test_sdk_exports_hook_event_type(hook_event: str):
|
||||
# OpenRouter compatibility — bundled CLI version pin
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# We're stuck on ``claude-agent-sdk==0.1.45`` (bundled CLI ``2.1.63``)
|
||||
# because every version above introduces a 400 against OpenRouter:
|
||||
#
|
||||
# 1. CLI ``2.1.69`` (= SDK ``0.1.46``) shipped a `tool_reference` content
|
||||
# block in `tool_result.content` that OpenRouter's stricter Zod
|
||||
# validation rejects. See PR
|
||||
# https://github.com/Significant-Gravitas/AutoGPT/pull/12294 for the
|
||||
# forensic write-up that originally pinned us. CLI ``2.1.70`` added
|
||||
# proxy detection that *should* disable the offending block, but two
|
||||
# later attempts (Dependabot bumps to 0.1.55 / 0.1.56) still failed.
|
||||
#
|
||||
# 2. A second regression — the ``context-management-2025-06-27`` beta
|
||||
# header — appeared in some CLI version after ``2.1.91``. Tracked
|
||||
# upstream at
|
||||
# https://github.com/anthropics/claude-agent-sdk-python/issues/789
|
||||
# (still open at the time of writing, no upstream PR yet).
|
||||
# Newer ``claude-agent-sdk`` versions bundle CLI binaries that send
|
||||
# features incompatible with OpenRouter (``tool_reference`` content
|
||||
# blocks, ``context-management-2025-06-27`` beta). We neutralise these
|
||||
# at runtime by injecting ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1``
|
||||
# into the CLI subprocess env (see ``service.py``).
|
||||
#
|
||||
# This test is the cheapest possible regression guard: it pins the
|
||||
# bundled CLI to a known-good version. If anyone bumps
|
||||
@@ -225,89 +214,39 @@ def test_sdk_exports_hook_event_type(hook_event: str):
|
||||
# ``_cli_version.py`` will change and this test will fail with a clear
|
||||
# message that points the next person at the OpenRouter compat issue
|
||||
# instead of letting them silently re-break production.
|
||||
#
|
||||
# Workaround for actually upgrading: set the
|
||||
# ``claude_agent_cli_path`` config option (or the matching env var) to
|
||||
# point at a separately-installed Claude Code CLI binary at a known-good
|
||||
# version, so the SDK Python API surface and the CLI binary version can
|
||||
# be picked independently.
|
||||
|
||||
# CLI versions verified to work against OpenRouter directly (no compat
|
||||
# proxy required) — bisected via the reproduction test in
|
||||
# `cli_openrouter_compat_test.py`. Bundled CLI versions outside this
|
||||
# set are still allowed but ONLY when the compat proxy is enabled (see
|
||||
# the second known-good set below + the test below).
|
||||
_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT: frozenset[str] = frozenset(
|
||||
# CLI versions verified to work against OpenRouter when the
|
||||
# ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`` env var is set --
|
||||
# bisected via the reproduction test in ``cli_openrouter_compat_test.py``.
|
||||
_KNOWN_GOOD_BUNDLED_CLI_VERSIONS: frozenset[str] = frozenset(
|
||||
{
|
||||
"2.1.63", # claude-agent-sdk 0.1.45 — original pin from PR #12294.
|
||||
"2.1.70", # claude-agent-sdk 0.1.47 — first version with the
|
||||
"2.1.63", # claude-agent-sdk 0.1.45 -- original pin from PR #12294.
|
||||
"2.1.70", # claude-agent-sdk 0.1.47 -- first version with the
|
||||
# tool_reference proxy detection fix; bisect-verified
|
||||
# OpenRouter-safe in #12742.
|
||||
"2.1.97", # claude-agent-sdk 0.1.58 -- works with the
|
||||
# CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 env var.
|
||||
}
|
||||
)
|
||||
|
||||
# CLI versions verified to work against OpenRouter ONLY when the
|
||||
# in-process `openrouter_compat_proxy` is enabled (which strips the
|
||||
# `tool_reference` content blocks and `context-management-2025-06-27`
|
||||
# beta from outgoing requests). Without the proxy these CLI versions
|
||||
# trip OpenRouter's stricter validation and return 400.
|
||||
_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_VIA_PROXY: frozenset[str] = frozenset(
|
||||
{
|
||||
"2.1.97", # claude-agent-sdk 0.1.58 — needs `claude_agent_use_compat_proxy=True`
|
||||
# due to the upstream regression in
|
||||
# anthropics/claude-agent-sdk-python#789.
|
||||
}
|
||||
)
|
||||
|
||||
# Aggregate set used by the assertion below — the test allows EITHER
|
||||
# a directly-known-good CLI OR a proxy-known-good CLI when the proxy
|
||||
# is enabled in the active config.
|
||||
_KNOWN_GOOD_BUNDLED_CLI_VERSIONS: frozenset[str] = (
|
||||
_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT | _KNOWN_GOOD_BUNDLED_CLI_VERSIONS_VIA_PROXY
|
||||
)
|
||||
|
||||
|
||||
def test_bundled_cli_version_is_known_good_against_openrouter():
|
||||
"""Pin the bundled CLI version so accidental SDK bumps cause a loud,
|
||||
fast failure with a pointer to the OpenRouter compatibility issue.
|
||||
|
||||
A CLI version that's only safe via the compat proxy is allowed only
|
||||
when ``ChatConfig.claude_agent_use_compat_proxy`` is enabled.
|
||||
"""
|
||||
from claude_agent_sdk._cli_version import __cli_version__
|
||||
|
||||
from backend.copilot.config import ChatConfig
|
||||
|
||||
cfg = ChatConfig()
|
||||
proxy_enabled = cfg.claude_agent_use_compat_proxy
|
||||
|
||||
if __cli_version__ in _KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT:
|
||||
return # safe with or without the proxy
|
||||
|
||||
if __cli_version__ in _KNOWN_GOOD_BUNDLED_CLI_VERSIONS_VIA_PROXY:
|
||||
assert proxy_enabled, (
|
||||
f"Bundled Claude Code CLI version {__cli_version__!r} is only "
|
||||
"OpenRouter-safe when `claude_agent_use_compat_proxy` is "
|
||||
"enabled, but the active ChatConfig has the proxy disabled. "
|
||||
"Either set `COPILOT__CLAUDE_AGENT_USE_COMPAT_PROXY=true` or "
|
||||
"downgrade `claude-agent-sdk` to a version whose bundled CLI "
|
||||
f"is in {sorted(_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT)!r}. "
|
||||
"See https://github.com/anthropics/claude-agent-sdk-python/issues/789."
|
||||
)
|
||||
return
|
||||
|
||||
raise AssertionError(
|
||||
assert __cli_version__ in _KNOWN_GOOD_BUNDLED_CLI_VERSIONS, (
|
||||
f"Bundled Claude Code CLI version is {__cli_version__!r}, which is "
|
||||
f"not in any OpenRouter-known-good set "
|
||||
f"not in the OpenRouter-known-good set "
|
||||
f"({sorted(_KNOWN_GOOD_BUNDLED_CLI_VERSIONS)!r}). "
|
||||
"If you intentionally bumped `claude-agent-sdk`, verify the new "
|
||||
"bundled CLI works with OpenRouter against the reproduction test "
|
||||
"in `cli_openrouter_compat_test.py`, then add the new CLI version "
|
||||
"to either `_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT` (works "
|
||||
"without the proxy) or `_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_VIA_PROXY` "
|
||||
"(works only with `claude_agent_use_compat_proxy=true`). If you "
|
||||
"cannot make the bundled CLI work either way, set "
|
||||
"`claude_agent_cli_path` to a known-good binary instead. See "
|
||||
"in `cli_openrouter_compat_test.py` (with "
|
||||
"`CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`), then add the new "
|
||||
"CLI version to `_KNOWN_GOOD_BUNDLED_CLI_VERSIONS`. If the env "
|
||||
"var is not sufficient, set `claude_agent_cli_path` to a "
|
||||
"known-good binary instead. See "
|
||||
"https://github.com/anthropics/claude-agent-sdk-python/issues/789 "
|
||||
"and https://github.com/Significant-Gravitas/AutoGPT/pull/12294."
|
||||
)
|
||||
|
||||
@@ -1980,13 +1980,6 @@ async def stream_chat_completion_sdk(
|
||||
transcript_content: str = ""
|
||||
state: _RetryState | None = None
|
||||
|
||||
# OpenRouter compat proxy — started inside the try and stopped in finally
|
||||
# when ``ChatConfig.claude_agent_use_compat_proxy`` is enabled. The proxy
|
||||
# rewrites outgoing CLI requests to strip ``tool_reference`` content
|
||||
# blocks and the ``context-management-2025-06-27`` beta so the latest
|
||||
# SDK / CLI versions stop tripping OpenRouter's validation.
|
||||
_compat_proxy: Any = None # OpenRouterCompatProxy | None — lazy import
|
||||
|
||||
# Token usage accumulators — populated from ResultMessage at end of turn
|
||||
turn_prompt_tokens = 0 # uncached input tokens only
|
||||
turn_completion_tokens = 0
|
||||
@@ -2249,96 +2242,14 @@ async def stream_chat_completion_sdk(
|
||||
if sdk_model:
|
||||
sdk_options_kwargs["model"] = sdk_model
|
||||
|
||||
# OpenRouter compatibility proxy — started here so its local URL
|
||||
# can be injected into the CLI subprocess env BEFORE the env dict
|
||||
# is passed to ``ClaudeAgentOptions``. When this flag is on we
|
||||
# transparently rewrite outgoing CLI requests via the proxy
|
||||
# (stripping ``tool_reference`` blocks and the
|
||||
# ``context-management-2025-06-27`` beta) so newer SDK / CLI
|
||||
# versions can talk to OpenRouter without their stricter
|
||||
# validation rejecting the request.
|
||||
if config.claude_agent_use_compat_proxy:
|
||||
# Only start the compat proxy when there's already an
|
||||
# explicit Anthropic-compatible upstream to forward to.
|
||||
# Otherwise we'd be silently routing direct Anthropic /
|
||||
# Claude Code subscription sessions through OpenRouter,
|
||||
# which would break auth and change providers without
|
||||
# operator consent. The explicit upstream can come from:
|
||||
#
|
||||
# 1. ``sdk_env['ANTHROPIC_BASE_URL']`` — caller override;
|
||||
# 2. the process env — lowest-precedence host override;
|
||||
# 3. ``ChatConfig.openrouter_active`` — OpenRouter is
|
||||
# configured as the session's routing provider (i.e.
|
||||
# the only case in which falling back to
|
||||
# ``OPENROUTER_BASE_URL`` is intentional).
|
||||
#
|
||||
# When none of the above hold, log a warning and leave
|
||||
# the CLI to talk to Anthropic directly as usual — the
|
||||
# feature is opt-in and documented as "OpenRouter
|
||||
# compatibility", so quietly no-oping on direct-Anthropic
|
||||
# sessions is the safe default.
|
||||
# Claude Code subscription mode intentionally sets
|
||||
# ``sdk_env['ANTHROPIC_BASE_URL'] = ""`` to *disable* any
|
||||
# base-URL override and keep the CLI talking to Anthropic
|
||||
# directly. Treat an explicit empty string as a hard
|
||||
# "no-proxy" signal so we never silently start the proxy
|
||||
# against a host-wide ``ANTHROPIC_BASE_URL`` or fall back
|
||||
# to OpenRouter when the caller has opted out.
|
||||
sdk_env_map = sdk_env or {}
|
||||
explicit_sdk_env = "ANTHROPIC_BASE_URL" in sdk_env_map
|
||||
sdk_env_value = (
|
||||
sdk_env_map["ANTHROPIC_BASE_URL"] if explicit_sdk_env else None
|
||||
)
|
||||
if explicit_sdk_env and not sdk_env_value:
|
||||
# Empty string from sdk_env → subscription mode opt-out.
|
||||
target_base_url: str | None = None
|
||||
explicit_opt_out = True
|
||||
else:
|
||||
target_base_url = sdk_env_value or os.environ.get("ANTHROPIC_BASE_URL")
|
||||
explicit_opt_out = False
|
||||
# Only fall back to OpenRouter when the session actually
|
||||
# has no base-URL plumbing of its own AND OpenRouter is
|
||||
# the active routing provider AND the caller hasn't
|
||||
# explicitly opted out via an empty sdk_env override.
|
||||
if (
|
||||
not target_base_url
|
||||
and not explicit_opt_out
|
||||
and config.openrouter_active
|
||||
):
|
||||
from backend.util.clients import OPENROUTER_BASE_URL
|
||||
|
||||
target_base_url = OPENROUTER_BASE_URL
|
||||
|
||||
if target_base_url:
|
||||
from backend.copilot.sdk.openrouter_compat_proxy import (
|
||||
OpenRouterCompatProxy,
|
||||
)
|
||||
|
||||
_compat_proxy = OpenRouterCompatProxy(target_base_url=target_base_url)
|
||||
await _compat_proxy.start()
|
||||
# Inject the proxy URL into the SDK env so the spawned
|
||||
# CLI subprocess uses the proxy as its Anthropic
|
||||
# endpoint.
|
||||
if sdk_env is None:
|
||||
sdk_env = {}
|
||||
sdk_env["ANTHROPIC_BASE_URL"] = _compat_proxy.local_url
|
||||
# Log only the local bind URL — upstream is redacted
|
||||
# to match the taint-analysis guidance applied in
|
||||
# ``openrouter_compat_proxy.start``.
|
||||
logger.info(
|
||||
"%s OpenRouter compat proxy active (listening on %s)",
|
||||
log_prefix,
|
||||
_compat_proxy.local_url,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"%s claude_agent_use_compat_proxy is enabled but no "
|
||||
"Anthropic-compatible upstream is configured for this "
|
||||
"session (no ANTHROPIC_BASE_URL override and "
|
||||
"openrouter_active is False); skipping proxy startup "
|
||||
"so the CLI keeps talking to Anthropic directly.",
|
||||
log_prefix,
|
||||
)
|
||||
# Tell the CLI to strip experimental betas (e.g.
|
||||
# ``context-management-2025-06-27``) and ``tool_reference``
|
||||
# content blocks so newer SDK / CLI versions work with
|
||||
# OpenRouter's stricter validation. This single env var
|
||||
# replaces the old in-process compat proxy.
|
||||
if sdk_env is None:
|
||||
sdk_env = {}
|
||||
sdk_env["CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS"] = "1"
|
||||
|
||||
if sdk_env:
|
||||
sdk_options_kwargs["env"] = sdk_env
|
||||
@@ -3012,18 +2923,5 @@ async def stream_chat_completion_sdk(
|
||||
except Exception:
|
||||
logger.warning("%s SDK cleanup failed", log_prefix, exc_info=True)
|
||||
finally:
|
||||
# Tear down the OpenRouter compat proxy if it was started for
|
||||
# this session — releases the bound port and the aiohttp
|
||||
# client. Wrapped so a stop failure can never block the
|
||||
# downstream lock release.
|
||||
if _compat_proxy is not None:
|
||||
try:
|
||||
await _compat_proxy.stop()
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"%s OpenRouter compat proxy stop failed",
|
||||
log_prefix,
|
||||
exc_info=True,
|
||||
)
|
||||
# Release stream lock to allow new streams for this session
|
||||
await lock.release()
|
||||
|
||||
@@ -18,7 +18,7 @@ apscheduler = "^3.11.1"
|
||||
autogpt-libs = { path = "../autogpt_libs", develop = true }
|
||||
bleach = { extras = ["css"], version = "^6.2.0" }
|
||||
cachetools = "^5.5.0"
|
||||
claude-agent-sdk = "0.1.58" # latest stable; bundled CLI 2.1.97 ships the broken context-management beta and REQUIRES the openrouter_compat_proxy. See sdk_compat_test.py.
|
||||
claude-agent-sdk = "0.1.58" # latest stable; bundled CLI 2.1.97 -- CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 env var strips the broken context-management beta. See sdk_compat_test.py.
|
||||
click = "^8.2.0"
|
||||
cryptography = "^46.0"
|
||||
discord-py = "^2.5.2"
|
||||
|
||||
Reference in New Issue
Block a user