refactor(backend): replace compat proxy with CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS env var

2026-04-30 03:00:41 -04:00 · 2026-04-12 10:53:07 +00:00
parent e92ecbbb7c
commit 8e9bb083b2
8 changed files with 46 additions and 1645 deletions
--- a/autogpt_platform/backend/backend/copilot/config.py
+++ b/autogpt_platform/backend/backend/copilot/config.py
@@ -3,7 +3,7 @@
 import os
 from typing import Literal

-from pydantic import Field, field_validator, model_validator
+from pydantic import Field, field_validator
 from pydantic_settings import BaseSettings

 from backend.util.clients import OPENROUTER_BASE_URL
@@ -186,28 +186,6 @@ class ChatConfig(BaseSettings):
        "or the unprefixed `CLAUDE_AGENT_CLI_PATH` environment variable "
        "(same pattern as `api_key` / `base_url`).",
    )
-    claude_agent_use_compat_proxy: bool = Field(
-        default=True,
-        description="Run the in-process OpenRouter compatibility proxy "
-        "(`backend.copilot.sdk.openrouter_compat_proxy`) in front of the "
-        "Claude Code CLI. The proxy strips `tool_reference` content "
-        "blocks and the `context-management-2025-06-27` beta header / "
-        "field from outgoing requests so newer SDK / CLI versions stop "
-        "tripping OpenRouter's stricter validation. Defaults to True "
-        "because the bundled CLI in `claude-agent-sdk >= 0.1.55` requires "
-        "the proxy. Orthogonal to `claude_agent_cli_path` — the override "
-        "picks the binary, the proxy rewrites whatever the binary sends. "
-        "Disable explicitly only if you've pinned `claude-agent-sdk` to "
-        "a version whose bundled CLI is in "
-        "`_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT` (2.1.63 or 2.1.70). "
-        "Reads from `CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY` or the "
-        "unprefixed `CLAUDE_AGENT_USE_COMPAT_PROXY` environment "
-        "variable (same pattern as `claude_agent_cli_path`). Only "
-        "takes effect when the session has an Anthropic-compatible "
-        "upstream to forward to — direct-Anthropic sessions skip the "
-        "proxy entirely to avoid silently re-routing through "
-        "OpenRouter.",
-    )
    use_openrouter: bool = Field(
        default=True,
        description="Enable routing API calls through the OpenRouter proxy. "
@@ -355,37 +333,6 @@ class ChatConfig(BaseSettings):
            )
        return v

-    @model_validator(mode="before")
-    @classmethod
-    def _inject_unprefixed_compat_proxy_env(cls, values):
-        """Inject the unprefixed ``CLAUDE_AGENT_USE_COMPAT_PROXY`` env var
-        as a fallback for the ``claude_agent_use_compat_proxy`` field.
-
-        Unlike ``claude_agent_cli_path`` (which defaults to ``None`` and
-        can use a simple ``if not v`` guard), this field defaults to
-        ``True``, so a ``mode="before"`` field validator cannot
-        distinguish "caller passed ``False`` explicitly" from "Pydantic
-        resolved the default ``True``" — both arrive as the raw value.
-
-        Using a ``model_validator(mode="before")`` lets us inspect the
-        full input dict: if the key is absent AND the prefixed env var
-        ``CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY`` is not set, we inject the
-        unprefixed value so Pydantic can coerce it (``"1"``/``"true"``
-        → ``True``).  Explicit kwargs always take precedence because
-        they appear in *values* before this validator runs.
-        """
-        if not isinstance(values, dict):
-            return values
-        key = "claude_agent_use_compat_proxy"
-        if key not in values:
-            # No explicit kwarg and Pydantic hasn't injected the
-            # prefixed env var yet — check the unprefixed form.
-            if os.getenv("CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY") is None:
-                unprefixed = os.getenv("CLAUDE_AGENT_USE_COMPAT_PROXY")
-                if unprefixed is not None:
-                    values[key] = unprefixed
-        return values
-
    # Prompt paths for different contexts
    PROMPT_PATHS: dict[str, str] = {
        "default": "prompts/chat_system.md",
--- a/autogpt_platform/backend/backend/copilot/config_test.py
+++ b/autogpt_platform/backend/backend/copilot/config_test.py
@@ -19,8 +19,6 @@ _ENV_VARS_TO_CLEAR = (
    "OPENAI_BASE_URL",
    "CHAT_CLAUDE_AGENT_CLI_PATH",
    "CLAUDE_AGENT_CLI_PATH",
-    "CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY",
-    "CLAUDE_AGENT_USE_COMPAT_PROXY",
 )


@@ -124,63 +122,3 @@ class TestClaudeAgentCliPathEnvFallback:
    def test_no_env_var_defaults_to_none(self, monkeypatch: pytest.MonkeyPatch) -> None:
        cfg = ChatConfig()
        assert cfg.claude_agent_cli_path is None
-
-
-class TestClaudeAgentUseCompatProxyEnvFallback:
-    """``claude_agent_use_compat_proxy`` accepts both the Pydantic-
-    prefixed ``CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY`` env var and the
-    unprefixed ``CLAUDE_AGENT_USE_COMPAT_PROXY`` form.  Regression
-    guard for the bool-default pitfall: the field has a non-None
-    default (``True``), so Pydantic passes the default into the
-    validator when no value is provided and a naive ``if v is None``
-    check would never fire.
-    """
-
-    def test_prefixed_env_var_enables_proxy(
-        self, monkeypatch: pytest.MonkeyPatch
-    ) -> None:
-        monkeypatch.setenv("CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY", "true")
-        cfg = ChatConfig()
-        assert cfg.claude_agent_use_compat_proxy is True
-
-    def test_unprefixed_env_var_enables_proxy(
-        self, monkeypatch: pytest.MonkeyPatch
-    ) -> None:
-        monkeypatch.setenv("CLAUDE_AGENT_USE_COMPAT_PROXY", "true")
-        cfg = ChatConfig()
-        assert cfg.claude_agent_use_compat_proxy is True
-
-    def test_unprefixed_env_var_respects_falsy_value(
-        self, monkeypatch: pytest.MonkeyPatch
-    ) -> None:
-        monkeypatch.setenv("CLAUDE_AGENT_USE_COMPAT_PROXY", "false")
-        cfg = ChatConfig()
-        assert cfg.claude_agent_use_compat_proxy is False
-
-    def test_prefixed_wins_over_unprefixed(
-        self, monkeypatch: pytest.MonkeyPatch
-    ) -> None:
-        """When both are set, the Pydantic-prefixed var is authoritative
-        so the validator doesn't silently clobber an explicit
-        ``CHAT_...=false`` with an unprefixed ``=true``."""
-        monkeypatch.setenv("CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY", "false")
-        monkeypatch.setenv("CLAUDE_AGENT_USE_COMPAT_PROXY", "true")
-        cfg = ChatConfig()
-        assert cfg.claude_agent_use_compat_proxy is False
-
-    def test_no_env_var_uses_field_default(
-        self, monkeypatch: pytest.MonkeyPatch
-    ) -> None:
-        cfg = ChatConfig()
-        # Dev-preview branch defaults compat_proxy to True (the
-        # bundled CLI in claude-agent-sdk 0.1.58 needs the proxy).
-        assert cfg.claude_agent_use_compat_proxy is True
-
-    def test_explicit_kwarg_not_overridden_by_unprefixed_env(
-        self, monkeypatch: pytest.MonkeyPatch
-    ) -> None:
-        """Regression: explicit ChatConfig(claude_agent_use_compat_proxy=False)
-        must not be overridden by the unprefixed env var."""
-        monkeypatch.setenv("CLAUDE_AGENT_USE_COMPAT_PROXY", "true")
-        cfg = ChatConfig(claude_agent_use_compat_proxy=False)
-        assert cfg.claude_agent_use_compat_proxy is False
--- a/autogpt_platform/backend/backend/copilot/sdk/cli_openrouter_compat_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/cli_openrouter_compat_test.py
@@ -392,17 +392,10 @@ async def _run_cli_against_fake_server(

 async def _run_reproduction(
    *,
-    route_through_proxy: bool,
    extra_env: dict[str, str] | None = None,
 ) -> tuple[int, str, str, list[_CapturedRequest]]:
    """Spawn the CLI against a fake Anthropic API and return what the
-    *upstream* (post-proxy if any) saw.
-
-    When ``route_through_proxy`` is True, the CLI talks to the
-    ``OpenRouterCompatProxy`` and the proxy forwards to the fake
-    upstream. The fake upstream is what records the requests, so the
-    captured bodies are what OpenRouter would actually have received —
-    *after* the proxy's stripping pass.
+    server saw.
    """
    cli_path = _resolve_cli_path()
    if cli_path is None or not cli_path.is_file():
@@ -415,30 +408,14 @@ async def _run_reproduction(
    captured: list[_CapturedRequest] = []
    upstream_runner, upstream_port = await _start_fake_anthropic_server(captured)

-    proxy = None
-    target_port = upstream_port
    try:
-        if route_through_proxy:
-            from backend.copilot.sdk.openrouter_compat_proxy import (
-                OpenRouterCompatProxy,
-            )
-
-            proxy = OpenRouterCompatProxy(
-                target_base_url=f"http://127.0.0.1:{upstream_port}"
-            )
-            await proxy.start()
-            # Pull the bound port out of the proxy URL.
-            target_port = int(proxy.local_url.rsplit(":", 1)[1])
-
        returncode, stdout, stderr = await _run_cli_against_fake_server(
            cli_path=cli_path,
-            fake_server_port=target_port,
+            fake_server_port=upstream_port,
            timeout_seconds=30.0,
            extra_env=extra_env,
        )
    finally:
-        if proxy is not None:
-            await proxy.stop()
        await upstream_runner.cleanup()

    return returncode, stdout, stderr, captured
@@ -470,10 +447,9 @@ def _assert_no_forbidden_patterns(
        "`claude-agent-sdk` above 0.1.45. See "
        "https://github.com/Significant-Gravitas/AutoGPT/pull/12294 and "
        "https://github.com/anthropics/claude-agent-sdk-python/issues/789. "
-        "If you intended to upgrade, you must enable the in-process compat "
-        "proxy (`CLAUDE_AGENT_USE_COMPAT_PROXY=true` or the prefixed "
-        "`CHAT_CLAUDE_AGENT_USE_COMPAT_PROXY=true`) or use a known-good "
-        "CLI binary via `claude_agent_cli_path` (env: "
+        "If you intended to upgrade, ensure "
+        "`CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1` is set in the SDK env "
+        "or use a known-good CLI binary via `claude_agent_cli_path` (env: "
        "`CLAUDE_AGENT_CLI_PATH` or `CHAT_CLAUDE_AGENT_CLI_PATH`)."
    )

@@ -483,74 +459,31 @@ async def test_cli_does_not_send_openrouter_incompatible_features():
    """End-to-end OpenRouter compatibility reproduction (bare CLI path).

    Spawns the bundled (or overridden) Claude Code CLI against a fake
-    Anthropic API server WITHOUT the compat proxy in the loop, captures
-    every request body it sends, and asserts that none of them contain
-    the two known OpenRouter-breaking features.
+    Anthropic API server, captures every request body it sends, and
+    asserts that none of them contain the two known OpenRouter-breaking
+    features.

    On a clean SDK pin (0.1.45 or 0.1.47, bundled CLI 2.1.63 or 2.1.70)
    this passes naturally.  On a broken pin (0.1.55+, bundled CLI 2.1.91+)
    it fails — that failure IS the bisect signal we use to verify which
    SDK versions need the workaround.
-
-    Skipped when ``claude_agent_use_compat_proxy=True`` because in that
-    configuration the operator has explicitly opted into the workaround
-    and the bare-CLI behaviour is moot — what matters is that the
-    *upstream* (post-proxy) sees clean requests, which is covered by
-    ``test_cli_via_compat_proxy_emits_clean_requests_to_upstream``.
    """
-    from backend.copilot.config import ChatConfig
-
-    if ChatConfig().claude_agent_use_compat_proxy:
-        pytest.skip(
-            "Compat proxy is enabled in the active config — the bare-CLI "
-            "reproduction is not a meaningful signal here. The proxy-routed "
-            "variant `test_cli_via_compat_proxy_emits_clean_requests_to_upstream` "
-            "is the regression guard for this configuration."
-        )
-
-    returncode, _stdout, stderr, captured = await _run_reproduction(
-        route_through_proxy=False
-    )
-    _assert_no_forbidden_patterns(captured, returncode, stderr)
-
-
-@pytest.mark.asyncio
-async def test_cli_via_compat_proxy_emits_clean_requests_to_upstream():
-    """End-to-end test for the compat proxy workaround.
-
-    Spawns the bundled CLI against an in-process fake Anthropic API
-    server WITH the ``OpenRouterCompatProxy`` in front, then asserts
-    that the *upstream* sees clean requests — no `tool_reference`
-    blocks, no `context-management-2025-06-27` beta header — even
-    when the bundled CLI itself would have sent them.
-
-    This is the regression guard for the proxy: if the proxy ever
-    stops stripping a known forbidden pattern, this test catches it.
-    On a SDK version where the bare CLI is already clean (0.1.45 /
-    0.1.47), the proxy is a no-op and the test passes trivially.
-    On a SDK version with the regression (0.1.55+), the test fails
-    if and only if the proxy fails to strip the pattern.
-    """
-    returncode, _stdout, stderr, captured = await _run_reproduction(
-        route_through_proxy=True
-    )
+    returncode, _stdout, stderr, captured = await _run_reproduction()
    _assert_no_forbidden_patterns(captured, returncode, stderr)


@pytest.mark.asyncio
 async def test_disable_experimental_betas_env_var_strips_headers():
-    """Validate whether ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`` is
-    sufficient to strip the ``context-management-2025-06-27`` beta header
-    when ``ANTHROPIC_BASE_URL`` points to a non-Anthropic endpoint
-    (simulating OpenRouter).
+    """Validate that ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`` strips
+    the ``context-management-2025-06-27`` beta header when
+    ``ANTHROPIC_BASE_URL`` points to a non-Anthropic endpoint (simulating
+    OpenRouter).

-    If this test passes, the compat proxy is unnecessary and can be
-    removed — the env var alone is enough.  If it fails, the CLI's
-    provider-detection logic does not honour the env var for custom
-    base URLs and the proxy remains required.
+    This is the main regression guard: the env var is injected by
+    ``service.py`` into every CLI subprocess so newer SDK / CLI versions
+    work with OpenRouter without any proxy.
    """
    returncode, _stdout, stderr, captured = await _run_reproduction(
-        route_through_proxy=False,
        extra_env={"CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS": "1"},
    )
    _assert_no_forbidden_patterns(captured, returncode, stderr)
--- a/autogpt_platform/backend/backend/copilot/sdk/openrouter_compat_proxy.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/openrouter_compat_proxy.py
@@ -1,559 +0,0 @@
-"""Tiny in-process HTTP middleware that makes the Claude Code CLI work
-against OpenRouter on **any** ``claude-agent-sdk`` version.
-
-Background
----------
-We've been pinned at ``claude-agent-sdk==0.1.45`` (bundled CLI 2.1.63)
-since `PR #12294`_ because every newer CLI version sends one of two
-features that OpenRouter rejects:
-
-1. **`tool_reference` content blocks** in ``tool_result.content`` —
-   introduced in CLI 2.1.69. OpenRouter's stricter Zod validation
-   refuses requests containing them with::
-
-        messages[N].content[0].content: Invalid input: expected string, received array
-
-2. **`context-management-2025-06-27` beta header** — sent in either the
-   request body's ``betas`` array or the ``anthropic-beta`` HTTP header.
-   OpenRouter responds::
-
-        400 No endpoints available that support Anthropic's context
-        management features (context-management-2025-06-27).
-
-   Tracked upstream at `claude-agent-sdk-python#789`_.
-
-This module starts a tiny aiohttp server that:
-
-* listens on ``127.0.0.1:RANDOM_PORT``,
-* receives every CLI request that would normally go to
-  ``ANTHROPIC_BASE_URL``,
-* strips the two forbidden patterns from the body and headers,
-* forwards the cleaned request to the real upstream
-  (``proxy_target_base_url``, e.g. ``https://openrouter.ai/api/v1``),
-* streams the response back to the CLI unchanged.
-
-The proxy is wired via :class:`backend.copilot.config.ChatConfig.claude_agent_use_compat_proxy`.
-When the flag is on, :mod:`backend.copilot.sdk.service` starts a proxy
-per session, sets ``ANTHROPIC_BASE_URL`` in the SDK's ``env`` to point
-at the proxy, then tears it down after the session ends.
-
-Why a separate proxy instead of a custom HTTP transport in the SDK?
-------------------------------------------------------------------
-The Python SDK delegates **all** HTTP traffic to the bundled Claude
-Code CLI subprocess. Once the CLI is spawned, the only seam left is
-the network — there is no in-process hook for "modify outgoing
-request before it leaves the CLI". The proxy lives at that seam.
-
-This module is intentionally orthogonal to the
-:attr:`ChatConfig.claude_agent_cli_path` override:
-
-* ``cli_path`` lets us swap **which CLI binary** we run.
-* this proxy lets us **rewrite what any CLI binary sends**.
-
-The two can be combined or used independently.
-
-.. _PR #12294: https://github.com/Significant-Gravitas/AutoGPT/pull/12294
-.. _claude-agent-sdk-python#789: https://github.com/anthropics/claude-agent-sdk-python/issues/789
-"""
-
-from __future__ import annotations
-
-import asyncio
-import json
-import logging
-from typing import Any
-
-import aiohttp
-from aiohttp import web
-
-logger = logging.getLogger(__name__)
-
-# Header values OpenRouter rejects.  We strip exactly these tokens from
-# the comma-separated ``anthropic-beta`` header value (preserving any
-# other betas the CLI requests).
-_FORBIDDEN_BETA_TOKENS: frozenset[str] = frozenset(
-    {
-        "context-management-2025-06-27",
-    }
-)
-
-# Hop-by-hop headers we must NOT forward through the proxy.  Per
-# RFC 7230 §6.1, these are connection-specific and must be regenerated
-# by each intermediary.  ``host`` is also stripped because aiohttp
-# generates the correct ``Host`` header for the upstream URL itself.
-#
-# The canonical header name defined in RFC 7230 §4.4 is ``Trailer``
-# (singular); some SDKs / legacy proxies also emit the plural
-# ``Trailers`` so we accept both forms just in case.  Intermediaries
-# must additionally drop every header name listed in the incoming
-# ``Connection`` field value (§6.1 "extension hop-by-hop headers") —
-# that's handled dynamically by :func:`clean_request_headers`.
-_HOP_BY_HOP_HEADERS: frozenset[str] = frozenset(
-    {
-        "connection",
-        "keep-alive",
-        "proxy-authenticate",
-        "proxy-authorization",
-        "te",
-        "trailer",
-        "trailers",
-        "transfer-encoding",
-        "upgrade",
-        "host",
-        # ``content-length`` is stripped because we may rewrite the
-        # body — aiohttp will recompute it on the upstream request.
-        "content-length",
-    }
-)
-
-
-# ---------------------------------------------------------------------------
-# Pure helpers — exported so the unit tests can drive them directly without
-# spinning up a server.
-# ---------------------------------------------------------------------------
-
-
-def strip_tool_reference_blocks(payload: Any) -> Any:
-    """Recursively remove ``tool_reference`` content blocks from
-    *payload*, returning the cleaned structure.
-
-    The CLI's built-in ``ToolSearch`` tool emits these as part of
-    ``tool_result.content``::
-
-        {"type": "tool_reference", "tool_name": "mcp__copilot__find_block"}
-
-    OpenRouter's stricter Zod validation rejects them.  Removing them
-    is safe — they are metadata about which tools were searched, not
-    real model-visible content.  The CLI's *internal* state still
-    contains them; only the wire format is rewritten.
-    """
-    if isinstance(payload, dict):
-        # Drop the dict entirely if it IS a tool_reference block.  The
-        # caller (a list comprehension below) discards None entries so
-        # we can return None to signal "remove me".
-        if payload.get("type") == "tool_reference":
-            return None
-        cleaned_dict: dict[str, Any] = {}
-        for key, value in payload.items():
-            cleaned_value = strip_tool_reference_blocks(value)
-            # If a dict-valued child WAS a tool_reference block,
-            # drop the key entirely rather than writing `null` —
-            # otherwise schema-strict upstreams still reject the
-            # payload.  Only applies when the original value was a
-            # dict; genuine None values in the input are preserved.
-            if cleaned_value is None and isinstance(value, dict):
-                continue
-            cleaned_dict[key] = cleaned_value
-        return cleaned_dict
-    if isinstance(payload, list):
-        cleaned_list: list[Any] = []
-        for item in payload:
-            cleaned_item = strip_tool_reference_blocks(item)
-            if cleaned_item is None and isinstance(item, dict):
-                # Item was a tool_reference block — drop it from the
-                # list rather than leaving a None hole.
-                continue
-            cleaned_list.append(cleaned_item)
-        return cleaned_list
-    return payload
-
-
-def strip_forbidden_betas_from_body(payload: Any) -> Any:
-    """Remove forbidden tokens from the ``betas`` array of an
-    Anthropic Messages API request body, if present.
-
-    Returns a shallow copy with the ``betas`` key cleaned — the input
-    dict is never mutated.
-
-    The Messages API accepts a top-level ``betas: list[str]`` parameter
-    used to opt into beta features.  We drop tokens in
-    :data:`_FORBIDDEN_BETA_TOKENS` so OpenRouter's check passes.
-    """
-    if not isinstance(payload, dict):
-        return payload
-    betas = payload.get("betas")
-    if not isinstance(betas, list):
-        return payload
-    cleaned_betas = [b for b in betas if b not in _FORBIDDEN_BETA_TOKENS]
-    result = {k: v for k, v in payload.items() if k != "betas"}
-    if cleaned_betas:
-        result["betas"] = cleaned_betas
-    return result
-
-
-def strip_forbidden_anthropic_beta_header(value: str | None) -> str | None:
-    """Return *value* with forbidden tokens removed.
-
-    The ``anthropic-beta`` HTTP header is a comma-separated list of
-    feature flags.  We strip exactly the forbidden tokens, preserving
-    any others.  Returns ``None`` if nothing remains (so the caller
-    can drop the header entirely).
-    """
-    if not value:
-        return value
-    tokens = [token.strip() for token in value.split(",")]
-    kept = [token for token in tokens if token and token not in _FORBIDDEN_BETA_TOKENS]
-    if not kept:
-        return None
-    return ", ".join(kept)
-
-
-def clean_request_body_bytes(body_bytes: bytes) -> bytes:
-    """Apply both body-level strippers to *body_bytes*, returning the
-    cleaned JSON.  Falls back to the original bytes when the body
-    isn't valid JSON (the CLI shouldn't be sending non-JSON to the
-    Messages API, but be defensive)."""
-    if not body_bytes:
-        return body_bytes
-    try:
-        payload = json.loads(body_bytes.decode("utf-8"))
-    except (UnicodeDecodeError, json.JSONDecodeError):
-        return body_bytes
-    payload = strip_tool_reference_blocks(payload)
-    payload = strip_forbidden_betas_from_body(payload)
-    return json.dumps(payload, separators=(",", ":")).encode("utf-8")
-
-
-def _parse_connection_tokens(headers: dict[str, str]) -> set[str]:
-    """Extract hop-by-hop header names from the ``Connection`` field."""
-    connection_header = next(
-        (value for name, value in headers.items() if name.lower() == "connection"),
-        "",
-    )
-    return {
-        token.strip().lower() for token in connection_header.split(",") if token.strip()
-    }
-
-
-def clean_request_headers(headers: dict[str, str]) -> dict[str, str]:
-    """Drop hop-by-hop headers and rewrite ``anthropic-beta`` to remove
-    forbidden tokens.  Returns a fresh dict the caller can pass through
-    to the upstream client without further mutation.
-
-    Per RFC 7230 section 6.1, intermediaries must drop the static hop-by-hop
-    set above **and** every header name listed in the incoming
-    ``Connection`` field value (case-insensitive).  The latter is how
-    extension hop-by-hop headers are signalled per-connection.
-
-    Callers should pass an already-materialised ``dict`` (e.g.
-    ``dict(request.headers)``) so this function stays simple.
-    """
-    connection_tokens = _parse_connection_tokens(headers)
-
-    cleaned: dict[str, str] = {}
-    for name, value in headers.items():
-        lower_name = name.lower()
-        if lower_name in _HOP_BY_HOP_HEADERS or lower_name in connection_tokens:
-            continue
-        if lower_name == "anthropic-beta":
-            stripped = strip_forbidden_anthropic_beta_header(value)
-            if stripped is None:
-                continue
-            cleaned[name] = stripped
-            continue
-        cleaned[name] = value
-    return cleaned
-
-
-def clean_response_headers(
-    headers: "Any",
-) -> list[tuple[str, str]]:
-    """Like :func:`clean_request_headers` but preserves multi-valued
-    headers (e.g. ``Set-Cookie``).  Accepts any mapping-like object
-    whose ``.items()`` yields ``(name, value)`` pairs — including
-    aiohttp's ``CIMultiDictProxy`` which can have duplicate keys.
-
-    Returns a list of ``(name, value)`` tuples suitable for passing
-    to ``web.StreamResponse(headers=...)`` via ``CIMultiDict``.
-    """
-    connection_tokens: set[str] = set()
-    for name, value in headers.items():
-        if name.lower() == "connection":
-            connection_tokens = {
-                t.strip().lower() for t in value.split(",") if t.strip()
-            }
-            break
-
-    cleaned: list[tuple[str, str]] = []
-    for name, value in headers.items():
-        lower_name = name.lower()
-        if lower_name in _HOP_BY_HOP_HEADERS or lower_name in connection_tokens:
-            continue
-        if lower_name == "anthropic-beta":
-            stripped = strip_forbidden_anthropic_beta_header(value)
-            if stripped is None:
-                continue
-            cleaned.append((name, stripped))
-            continue
-        cleaned.append((name, value))
-    return cleaned
-
-
-# ---------------------------------------------------------------------------
-# The proxy server
-# ---------------------------------------------------------------------------
-
-
-class OpenRouterCompatProxy:
-    """In-process HTTP proxy that rewrites Claude Code CLI requests on
-    the way to OpenRouter (or any other Anthropic-compatible gateway).
-
-    Usage::
-
-        proxy = OpenRouterCompatProxy(target_base_url="https://openrouter.ai/api/v1")
-        await proxy.start()
-        try:
-            # Spawn the CLI with ANTHROPIC_BASE_URL=proxy.local_url
-            ...
-        finally:
-            await proxy.stop()
-    """
-
-    def __init__(
-        self,
-        target_base_url: str,
-        *,
-        bind_host: str = "127.0.0.1",
-        request_timeout: float = 600.0,
-    ) -> None:
-        self._target_base_url = target_base_url.rstrip("/")
-        self._bind_host = bind_host
-        self._request_timeout = request_timeout
-        self._runner: web.AppRunner | None = None
-        self._client: aiohttp.ClientSession | None = None
-        self._port: int | None = None
-
-    @property
-    def local_url(self) -> str:
-        """The ``http://host:port`` URL that the CLI should use as
-        ``ANTHROPIC_BASE_URL``.  Raises if :meth:`start` has not been
-        called yet."""
-        if self._port is None:
-            raise RuntimeError("Proxy is not running — call start() first.")
-        return f"http://{self._bind_host}:{self._port}"
-
-    @property
-    def target_base_url(self) -> str:
-        """The upstream URL the proxy is forwarding to."""
-        return self._target_base_url
-
-    async def start(self) -> None:
-        """Bind to a random local port and start serving.
-
-        Cleans up the ``ClientSession`` and the ``AppRunner`` on any
-        failure during setup so a partially-initialised proxy never
-        leaves resources dangling (covers the
-        ``runner.setup() / site.start()`` raise paths in addition to
-        the explicit bind-failure branches below).
-        """
-        if self._runner is not None:
-            return  # already started
-        # Use sock_connect + sock_read instead of total so long-lived
-        # SSE / streaming responses aren't killed after request_timeout.
-        # total=None means no cumulative limit; sock_read is the per-chunk
-        # idle timeout (time between data arriving on the socket).
-        client = aiohttp.ClientSession(
-            timeout=aiohttp.ClientTimeout(
-                total=None,
-                sock_connect=self._request_timeout,
-                sock_read=self._request_timeout,
-            )
-        )
-        app = web.Application()
-        # Catch every method + path so we can also forward GETs
-        # (the CLI may probe profile / model endpoints).
-        app.router.add_route("*", "/{tail:.*}", self._handle)
-        runner = web.AppRunner(app)
-        runner_setup = False
-        try:
-            await runner.setup()
-            runner_setup = True
-            site = web.TCPSite(runner, self._bind_host, 0)
-            await site.start()
-            server = site._server
-            if server is None:
-                raise RuntimeError("Failed to bind compat proxy server.")
-            sockets = getattr(server, "sockets", None)
-            if not sockets:
-                raise RuntimeError("Compat proxy server has no listening sockets.")
-            self._port = sockets[0].getsockname()[1]
-        except BaseException:
-            # Best-effort teardown — swallow secondary errors so the
-            # caller sees the original exception.
-            if runner_setup:
-                try:
-                    await runner.cleanup()
-                except Exception:  # pragma: no cover - cleanup-only path
-                    logger.exception("compat proxy runner cleanup failed")
-            try:
-                await client.close()
-            except Exception:  # pragma: no cover - cleanup-only path
-                logger.exception("compat proxy client close failed")
-            raise
-        # Only publish the attributes after everything is wired up so
-        # ``stop()`` and ``local_url`` observe a consistent state.
-        self._client = client
-        self._runner = runner
-        # Deliberately log only the local bind port — never the
-        # upstream URL or any derived component. CodeQL's
-        # `py/clear-text-logging-sensitive-data` taint analysis traces
-        # everything that originates from a config-supplied URL as
-        # potentially-sensitive even after parsing, and the upstream
-        # endpoint is anyway discoverable from the config the operator
-        # already has access to. The detailed upstream is exposed via
-        # the ``target_base_url`` property for callers that need it.
-        logger.info(
-            "OpenRouter compat proxy listening on %s:%d",
-            self._bind_host,
-            self._port,
-        )
-
-    async def stop(self) -> None:
-        """Stop accepting connections and release the port."""
-        if self._runner is not None:
-            await self._runner.cleanup()
-            self._runner = None
-        if self._client is not None:
-            await self._client.close()
-            self._client = None
-        self._port = None
-
-    async def __aenter__(self) -> "OpenRouterCompatProxy":
-        await self.start()
-        return self
-
-    async def __aexit__(self, exc_type, exc, tb) -> None:
-        await self.stop()
-
-    async def _handle(self, request: web.Request) -> web.StreamResponse:
-        """Forward *request* to the upstream after stripping forbidden
-        features.  Streams the upstream response back to the caller
-        chunk-by-chunk so SSE / streamed responses work."""
-        if self._client is None:
-            raise web.HTTPInternalServerError(reason="proxy client missing")
-
-        # Build the upstream URL.  ``request.path_qs`` includes the
-        # query string verbatim.  ``request.path`` for ``/v1/messages``
-        # is just ``/v1/messages`` — we strip a leading slash and
-        # concat with the target base URL.
-        upstream_path = request.path_qs
-        if not upstream_path.startswith("/"):
-            upstream_path = "/" + upstream_path
-        # Allow the target_base_url to itself contain a path (e.g.
-        # ``https://openrouter.ai/api/v1``).  In that case requests to
-        # ``/v1/messages`` need to become ``/api/v1/messages``, not
-        # ``/api/v1/v1/messages``.  Strip a leading ``/v1`` from the
-        # incoming path if the target already ends with ``/v1`` (or
-        # similar API-version segment).
-        # Deduplicate API version prefix: if the target URL already
-        # contains a versioned path segment (e.g. ``/api/v1``) and the
-        # incoming request path starts with the same segment, strip it
-        # to avoid ``/api/v1/v1/messages``.
-        from urllib.parse import urlparse
-
-        target_base = self._target_base_url
-        target_path = urlparse(target_base).path.rstrip("/")
-        if target_path and upstream_path.startswith(target_path + "/"):
-            upstream_path = upstream_path[len(target_path) :]
-        elif target_path and upstream_path == target_path:
-            upstream_path = "/"
-        upstream_url = f"{target_base}{upstream_path}"
-
-        body_bytes = await request.read()
-        cleaned_body = clean_request_body_bytes(body_bytes)
-        cleaned_headers = clean_request_headers(dict(request.headers))
-
-        try:
-            upstream_response = await self._client.request(
-                method=request.method,
-                url=upstream_url,
-                data=cleaned_body if cleaned_body else None,
-                headers=cleaned_headers,
-                allow_redirects=False,
-            )
-        except (aiohttp.ClientError, asyncio.TimeoutError) as e:
-            # ``aiohttp.ClientTimeout`` raises ``asyncio.TimeoutError``
-            # (not ``aiohttp.ClientError``) on hung upstreams, so both
-            # must be caught here to surface the explicit 502 failure
-            # mode this proxy guarantees.
-            #
-            # Log the detailed error for ops, but return a generic
-            # message to the caller — exception strings can leak
-            # internal hostnames, ports, or stack frames (CodeQL
-            # `py/stack-trace-exposure`).
-            logger.warning(
-                "OpenRouter compat proxy upstream error: %s", type(e).__name__
-            )
-            return web.Response(status=502, text="upstream error")
-
-        # Stream the response back unchanged (apart from hop-by-hop
-        # header filtering).  Use clean_response_headers to preserve
-        # multi-valued headers like Set-Cookie that dict() would drop.
-        from multidict import CIMultiDict
-
-        downstream = web.StreamResponse(
-            status=upstream_response.status,
-            headers=CIMultiDict(clean_response_headers(upstream_response.headers)),
-        )
-        await downstream.prepare(request)
-        # Track whether the stream terminated cleanly.  A mid-stream
-        # ``aiohttp.ClientError`` means the upstream died before
-        # finishing; calling ``write_eof()`` on that partial response
-        # would signal "complete stream" to the downstream client and
-        # silently corrupt the body.  Skip the EOF on the error path
-        # so the client's connection is dropped instead, surfacing the
-        # failure correctly.
-        cancelled = False
-        stream_error: aiohttp.ClientError | None = None
-        try:
-            async for chunk in upstream_response.content.iter_any():
-                await downstream.write(chunk)
-        except asyncio.CancelledError:
-            # Never suppress cancellation — since Python 3.8 it's a
-            # ``BaseException`` subclass precisely so catching
-            # ``Exception`` won't accidentally swallow it.  Release
-            # the upstream body and re-raise so the asyncio task
-            # cooperatively unwinds (avoids hanging shutdowns /
-            # stuck request handlers).
-            cancelled = True
-            upstream_response.release()
-            raise
-        except aiohttp.ClientError as e:
-            stream_error = e
-            logger.warning(
-                "OpenRouter compat proxy stream interrupted: %s", type(e).__name__
-            )
-        finally:
-            if not cancelled:
-                upstream_response.release()
-
-        if stream_error is not None:
-            # Do NOT call ``write_eof`` or return the prepared
-            # ``downstream`` here — aiohttp finalises a returned
-            # StreamResponse (writing the terminating chunk /
-            # content-length / EOF) even if we skipped ``write_eof``
-            # ourselves, which would signal a clean end of stream to
-            # the client on top of the truncated body.  Instead abort
-            # the underlying transport directly so the client's
-            # parser surfaces a ``ClientPayloadError`` /
-            # ``ServerDisconnectedError`` and the caller can retry /
-            # surface the failure instead of silently consuming a
-            # corrupt body.
-            try:
-                downstream.force_close()
-            except Exception:  # pragma: no cover - defensive on transport
-                pass
-            transport = request.transport
-            if transport is not None:
-                try:
-                    transport.abort()
-                except Exception:  # pragma: no cover - defensive on transport
-                    pass
-            # Re-raise the original stream error so aiohttp treats
-            # this handler as having failed; the transport is
-            # already aborted above so the client sees an abrupt
-            # disconnect either way.
-            raise stream_error
-
-        await downstream.write_eof()
-        return downstream
--- a/autogpt_platform/backend/backend/copilot/sdk/openrouter_compat_proxy_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/openrouter_compat_proxy_test.py
@@ -1,695 +0,0 @@
-"""Tests for the OpenRouter compatibility proxy.
-
-The proxy strips two known forbidden patterns from requests so newer
-``claude-agent-sdk`` / Claude Code CLI versions can talk to OpenRouter
-through the unchanged transport. These tests cover both:
-
-* the pure stripping helpers (deterministic, no I/O), and
-* the end-to-end proxy behaviour against a fake upstream server, so we
-  catch hop-by-hop header bugs and streaming regressions.
-
-See ``openrouter_compat_proxy.py`` for the rationale and the upstream
-issues being worked around.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import json
-from typing import Any
-
-import aiohttp
-import pytest
-from aiohttp import web
-
-from backend.copilot.sdk.openrouter_compat_proxy import (
-    _FORBIDDEN_BETA_TOKENS,
-    _HOP_BY_HOP_HEADERS,
-    OpenRouterCompatProxy,
-    clean_request_body_bytes,
-    clean_request_headers,
-    strip_forbidden_anthropic_beta_header,
-    strip_forbidden_betas_from_body,
-    strip_tool_reference_blocks,
-)
-
-# ---------------------------------------------------------------------------
-# strip_tool_reference_blocks
-# ---------------------------------------------------------------------------
-
-
-class TestStripToolReferenceBlocks:
-    """The CLI's built-in ToolSearch tool emits ``tool_reference``
-    content blocks in ``tool_result.content``. OpenRouter's stricter
-    Zod validation rejects them. We drop them entirely — they're
-    metadata about which tools were searched, not real model-visible
-    content."""
-
-    def test_removes_tool_reference_block_at_top_level(self):
-        block = {"type": "tool_reference", "tool_name": "find_block"}
-        assert strip_tool_reference_blocks(block) is None
-
-    def test_removes_tool_reference_block_from_list(self):
-        blocks = [
-            {"type": "text", "text": "hello"},
-            {"type": "tool_reference", "tool_name": "find_block"},
-            {"type": "text", "text": "world"},
-        ]
-        assert strip_tool_reference_blocks(blocks) == [
-            {"type": "text", "text": "hello"},
-            {"type": "text", "text": "world"},
-        ]
-
-    def test_strips_nested_tool_reference_inside_tool_result(self):
-        # The exact shape PR #12294 root-caused: tool_result.content
-        # contains the tool_reference block.
-        request = {
-            "messages": [
-                {
-                    "role": "user",
-                    "content": [
-                        {
-                            "type": "tool_result",
-                            "tool_use_id": "tu_1",
-                            "content": [
-                                {"type": "text", "text": "result text"},
-                                {
-                                    "type": "tool_reference",
-                                    "tool_name": "mcp__copilot__find_block",
-                                },
-                            ],
-                        }
-                    ],
-                }
-            ]
-        }
-        cleaned = strip_tool_reference_blocks(request)
-        tool_result_content = cleaned["messages"][0]["content"][0]["content"]
-        assert tool_result_content == [{"type": "text", "text": "result text"}]
-
-    def test_preserves_unrelated_payloads(self):
-        payload = {
-            "model": "claude-opus-4.6",
-            "messages": [{"role": "user", "content": "hi"}],
-            "temperature": 0.7,
-        }
-        assert strip_tool_reference_blocks(payload) == payload
-
-    def test_handles_empty_and_primitive_inputs(self):
-        assert strip_tool_reference_blocks({}) == {}
-        assert strip_tool_reference_blocks([]) == []
-        assert strip_tool_reference_blocks("plain string") == "plain string"
-        assert strip_tool_reference_blocks(42) == 42
-        assert strip_tool_reference_blocks(None) is None
-
-    def test_removes_dict_valued_tool_reference_child_entirely(self):
-        # Regression guard: when a tool_reference dict is assigned to
-        # a key rather than listed, the helper used to rewrite it to
-        # `null` (leaving the parent key with a None value). That is
-        # still schema-invalid upstream — remove the key entirely.
-        payload = {
-            "wrapper": {"type": "tool_reference", "tool_name": "find_block"},
-            "keep": "value",
-        }
-        cleaned = strip_tool_reference_blocks(payload)
-        assert "wrapper" not in cleaned
-        assert cleaned["keep"] == "value"
-
-    def test_preserves_genuine_none_values_on_non_dict_children(self):
-        payload = {"explicit_null": None, "text": "ok"}
-        cleaned = strip_tool_reference_blocks(payload)
-        assert cleaned == {"explicit_null": None, "text": "ok"}
-
-
-# ---------------------------------------------------------------------------
-# strip_forbidden_betas_from_body
-# ---------------------------------------------------------------------------
-
-
-class TestStripForbiddenBetasFromBody:
-    """OpenRouter rejects ``context-management-2025-06-27`` in the
-    request body's ``betas`` array."""
-
-    def test_removes_forbidden_token_keeps_others(self):
-        body = {
-            "model": "claude-opus-4.6",
-            "betas": [
-                "context-management-2025-06-27",
-                "fine-grained-tool-streaming-2025",
-            ],
-        }
-        cleaned = strip_forbidden_betas_from_body(body)
-        assert cleaned["betas"] == ["fine-grained-tool-streaming-2025"]
-
-    def test_removes_betas_field_entirely_when_only_forbidden(self):
-        body = {"model": "x", "betas": ["context-management-2025-06-27"]}
-        cleaned = strip_forbidden_betas_from_body(body)
-        assert "betas" not in cleaned
-
-    def test_no_op_when_no_betas_field(self):
-        body = {"model": "x"}
-        assert strip_forbidden_betas_from_body(body) == {"model": "x"}
-
-    def test_no_op_on_non_dict(self):
-        assert strip_forbidden_betas_from_body([1, 2, 3]) == [1, 2, 3]
-        assert strip_forbidden_betas_from_body("plain") == "plain"
-
-    def test_all_forbidden_tokens_constants_are_recognized(self):
-        for forbidden in _FORBIDDEN_BETA_TOKENS:
-            body = {"betas": [forbidden, "other"]}
-            cleaned = strip_forbidden_betas_from_body(body)
-            assert forbidden not in cleaned["betas"]
-
-
-# ---------------------------------------------------------------------------
-# strip_forbidden_anthropic_beta_header
-# ---------------------------------------------------------------------------
-
-
-class TestStripForbiddenAnthropicBetaHeader:
-    def test_removes_forbidden_token_keeps_others(self):
-        value = "fine-grained-tool-streaming-2025, context-management-2025-06-27, other-beta"
-        result = strip_forbidden_anthropic_beta_header(value)
-        assert result == "fine-grained-tool-streaming-2025, other-beta"
-
-    def test_returns_none_when_only_forbidden_token_present(self):
-        assert (
-            strip_forbidden_anthropic_beta_header("context-management-2025-06-27")
-            is None
-        )
-
-    def test_passes_through_clean_header(self):
-        assert strip_forbidden_anthropic_beta_header("foo, bar") == "foo, bar"
-
-    def test_handles_empty_and_none_input(self):
-        assert strip_forbidden_anthropic_beta_header("") == ""
-        assert strip_forbidden_anthropic_beta_header(None) is None
-
-    def test_handles_extra_whitespace(self):
-        value = "  context-management-2025-06-27  ,  fine-grained  "
-        result = strip_forbidden_anthropic_beta_header(value)
-        assert result == "fine-grained"
-
-
-# ---------------------------------------------------------------------------
-# clean_request_body_bytes — combined body-level cleanup
-# ---------------------------------------------------------------------------
-
-
-class TestCleanRequestBodyBytes:
-    def test_strips_both_patterns_in_one_pass(self):
-        body = {
-            "model": "claude-opus-4.6",
-            "betas": ["context-management-2025-06-27"],
-            "messages": [
-                {
-                    "role": "user",
-                    "content": [
-                        {
-                            "type": "tool_result",
-                            "tool_use_id": "tu_1",
-                            "content": [
-                                {"type": "tool_reference", "tool_name": "find"},
-                                {"type": "text", "text": "ok"},
-                            ],
-                        }
-                    ],
-                }
-            ],
-        }
-        cleaned_bytes = clean_request_body_bytes(json.dumps(body).encode("utf-8"))
-        cleaned = json.loads(cleaned_bytes.decode("utf-8"))
-        assert "betas" not in cleaned  # only forbidden token, dropped
-        tool_result_content = cleaned["messages"][0]["content"][0]["content"]
-        assert tool_result_content == [{"type": "text", "text": "ok"}]
-
-    def test_passes_through_non_json_body(self):
-        garbage = b"\xff\xfe not json at all"
-        assert clean_request_body_bytes(garbage) == garbage
-
-    def test_passes_through_empty_body(self):
-        assert clean_request_body_bytes(b"") == b""
-
-
-# ---------------------------------------------------------------------------
-# clean_request_headers — hop-by-hop + anthropic-beta cleanup
-# ---------------------------------------------------------------------------
-
-
-class TestCleanRequestHeaders:
-    def test_drops_hop_by_hop_headers(self):
-        headers = {
-            "Host": "example.com",
-            "Connection": "keep-alive",
-            "Content-Length": "42",
-            "Authorization": "Bearer xxx",
-            "Content-Type": "application/json",
-        }
-        cleaned = clean_request_headers(headers)
-        assert "Host" not in cleaned
-        assert "Connection" not in cleaned
-        assert "Content-Length" not in cleaned
-        assert cleaned["Authorization"] == "Bearer xxx"
-        assert cleaned["Content-Type"] == "application/json"
-
-    def test_strips_forbidden_token_from_anthropic_beta_header(self):
-        headers = {
-            "anthropic-beta": "context-management-2025-06-27, other-beta",
-            "Authorization": "Bearer x",
-        }
-        cleaned = clean_request_headers(headers)
-        assert cleaned["anthropic-beta"] == "other-beta"
-
-    def test_drops_anthropic_beta_header_when_only_forbidden(self):
-        headers = {"anthropic-beta": "context-management-2025-06-27"}
-        cleaned = clean_request_headers(headers)
-        assert "anthropic-beta" not in cleaned
-
-    def test_hop_by_hop_set_completeness(self):
-        # Sanity check: if upstream removes hop-by-hop headers from
-        # this set we want to know — keep the canonical RFC 7230 list.
-        for required in (
-            "connection",
-            "transfer-encoding",
-            "host",
-            "trailer",
-            "trailers",
-        ):
-            assert required in _HOP_BY_HOP_HEADERS
-
-    def test_drops_headers_listed_in_connection_field(self):
-        # Per RFC 7230 §6.1 intermediaries must also drop every
-        # header name listed in the incoming Connection field value
-        # (extension hop-by-hop headers signalled per-connection).
-        headers = {
-            "Connection": "X-Custom-Hop, Upgrade",
-            "X-Custom-Hop": "secret-extension",
-            "Authorization": "Bearer x",
-            "X-Keep": "ok",
-        }
-        cleaned = clean_request_headers(headers)
-        assert "X-Custom-Hop" not in cleaned
-        # Upgrade is a static hop-by-hop header; Connection itself is
-        # also dropped; the rest pass through.
-        assert "Connection" not in cleaned
-        assert cleaned["Authorization"] == "Bearer x"
-        assert cleaned["X-Keep"] == "ok"
-
-    def test_connection_token_matching_is_case_insensitive(self):
-        headers = {
-            "Connection": "x-hop-HEADER",
-            "X-Hop-Header": "drop-me",
-            "X-Keep": "ok",
-        }
-        cleaned = clean_request_headers(headers)
-        assert "X-Hop-Header" not in cleaned
-        assert cleaned["X-Keep"] == "ok"
-
-
-# ---------------------------------------------------------------------------
-# End-to-end: real proxy + fake upstream
-# ---------------------------------------------------------------------------
-
-
-class _FakeUpstream:
-    """Tiny aiohttp app that records every request the proxy forwards
-    so the test can assert on the cleaned payloads."""
-
-    def __init__(self) -> None:
-        self.captured: list[dict[str, Any]] = []
-        self._runner: web.AppRunner | None = None
-        self.port: int = 0
-
-    async def start(self) -> str:
-        async def handler(request: web.Request) -> web.StreamResponse:
-            body = await request.text()
-            self.captured.append(
-                {
-                    "method": request.method,
-                    "path": request.path_qs,
-                    "headers": {k: v for k, v in request.headers.items()},
-                    "body": body,
-                }
-            )
-            # Return a minimal JSON success response so the proxy has
-            # something to stream back.
-            return web.json_response({"ok": True, "echoed": body})
-
-        app = web.Application()
-        app.router.add_route("*", "/{tail:.*}", handler)
-        self._runner = web.AppRunner(app)
-        await self._runner.setup()
-        site = web.TCPSite(self._runner, "127.0.0.1", 0)
-        await site.start()
-        server = site._server
-        assert server is not None
-        sockets = getattr(server, "sockets", None)
-        assert sockets is not None
-        self.port = sockets[0].getsockname()[1]
-        return f"http://127.0.0.1:{self.port}"
-
-    async def stop(self) -> None:
-        if self._runner is not None:
-            await self._runner.cleanup()
-            self._runner = None
-
-
-@pytest.mark.asyncio
-async def test_proxy_strips_tool_reference_block_end_to_end():
-    upstream = _FakeUpstream()
-    upstream_url = await upstream.start()
-    proxy = OpenRouterCompatProxy(target_base_url=upstream_url)
-    await proxy.start()
-    try:
-        body = {
-            "model": "claude-opus-4.6",
-            "messages": [
-                {
-                    "role": "user",
-                    "content": [
-                        {"type": "text", "text": "hi"},
-                        {
-                            "type": "tool_reference",
-                            "tool_name": "mcp__copilot__find_block",
-                        },
-                    ],
-                }
-            ],
-        }
-        async with aiohttp.ClientSession() as client:
-            async with client.post(
-                f"{proxy.local_url}/v1/messages",
-                json=body,
-                headers={"Authorization": "Bearer test"},
-            ) as resp:
-                assert resp.status == 200
-                await resp.read()
-    finally:
-        await proxy.stop()
-        await upstream.stop()
-
-    assert len(upstream.captured) == 1
-    forwarded = json.loads(upstream.captured[0]["body"])
-    # The tool_reference block must NOT be in the upstream-visible body.
-    assert '"tool_reference"' not in upstream.captured[0]["body"]
-    assert forwarded["messages"][0]["content"] == [{"type": "text", "text": "hi"}]
-
-
-@pytest.mark.asyncio
-async def test_proxy_strips_context_management_beta_header_end_to_end():
-    upstream = _FakeUpstream()
-    upstream_url = await upstream.start()
-    proxy = OpenRouterCompatProxy(target_base_url=upstream_url)
-    await proxy.start()
-    try:
-        async with aiohttp.ClientSession() as client:
-            async with client.post(
-                f"{proxy.local_url}/v1/messages",
-                json={"model": "x", "messages": []},
-                headers={
-                    "Authorization": "Bearer test",
-                    "anthropic-beta": "context-management-2025-06-27, other-beta",
-                },
-            ) as resp:
-                assert resp.status == 200
-                await resp.read()
-    finally:
-        await proxy.stop()
-        await upstream.stop()
-
-    forwarded_headers = upstream.captured[0]["headers"]
-    # Header is rewritten to remove only the forbidden token, keeping the rest.
-    assert any(
-        k.lower() == "anthropic-beta" and v == "other-beta"
-        for k, v in forwarded_headers.items()
-    )
-
-
-@pytest.mark.asyncio
-async def test_proxy_strips_betas_from_request_body_end_to_end():
-    upstream = _FakeUpstream()
-    upstream_url = await upstream.start()
-    proxy = OpenRouterCompatProxy(target_base_url=upstream_url)
-    await proxy.start()
-    try:
-        body = {
-            "model": "x",
-            "betas": [
-                "context-management-2025-06-27",
-                "fine-grained-tool-streaming-2025",
-            ],
-            "messages": [],
-        }
-        async with aiohttp.ClientSession() as client:
-            async with client.post(
-                f"{proxy.local_url}/v1/messages",
-                json=body,
-            ) as resp:
-                assert resp.status == 200
-                await resp.read()
-    finally:
-        await proxy.stop()
-        await upstream.stop()
-
-    forwarded = json.loads(upstream.captured[0]["body"])
-    # Only the surviving beta should be present.
-    assert forwarded["betas"] == ["fine-grained-tool-streaming-2025"]
-
-
-@pytest.mark.asyncio
-async def test_proxy_passes_through_clean_request_unchanged():
-    """The proxy must be a no-op for requests that don't contain any of
-    the forbidden patterns — no other rewriting allowed."""
-    upstream = _FakeUpstream()
-    upstream_url = await upstream.start()
-    proxy = OpenRouterCompatProxy(target_base_url=upstream_url)
-    await proxy.start()
-    try:
-        body = {
-            "model": "claude-opus-4.6",
-            "messages": [{"role": "user", "content": "hello"}],
-            "temperature": 0.7,
-        }
-        async with aiohttp.ClientSession() as client:
-            async with client.post(
-                f"{proxy.local_url}/v1/messages",
-                json=body,
-                headers={
-                    "Authorization": "Bearer test",
-                    "Content-Type": "application/json",
-                },
-            ) as resp:
-                assert resp.status == 200
-                await resp.read()
-    finally:
-        await proxy.stop()
-        await upstream.stop()
-
-    forwarded = json.loads(upstream.captured[0]["body"])
-    assert forwarded == body
-
-
-@pytest.mark.asyncio
-async def test_proxy_returns_502_on_upstream_failure():
-    """If the upstream is unreachable the proxy must return a clear
-    502, not silently hang.
-
-    Note: the outer ``client.post`` talks to the *proxy* on localhost,
-    not to the dead upstream directly. The proxy is the thing under
-    test, so it should always respond with a 502 — we must NOT
-    swallow ``aiohttp.ClientError`` / ``asyncio.TimeoutError`` on the
-    outer call, because that would mask a proxy crash and turn the
-    assertion into a false positive. Let any such exception fail the
-    test.
-    """
-    proxy = OpenRouterCompatProxy(
-        target_base_url="http://127.0.0.1:1",  # nothing listening
-    )
-    await proxy.start()
-    try:
-        async with aiohttp.ClientSession() as client:
-            async with client.post(
-                f"{proxy.local_url}/v1/messages",
-                json={"model": "x"},
-                timeout=aiohttp.ClientTimeout(total=10),
-            ) as resp:
-                assert resp.status == 502
-                text = await resp.text()
-                # Generic error message — no internal hostname leaked.
-                assert "upstream error" in text
-    finally:
-        await proxy.stop()
-
-
-@pytest.mark.asyncio
-async def test_proxy_returns_502_on_upstream_timeout():
-    """``aiohttp.ClientTimeout`` raises ``asyncio.TimeoutError`` (not
-    ``aiohttp.ClientError``), which previously escaped the except
-    block and surfaced as a 500.  This regression-guards the 502
-    contract for hung upstreams."""
-
-    class _HangingUpstream:
-        """Upstream that accepts the request but never finishes the
-        response body, forcing the proxy's client timeout to fire."""
-
-        def __init__(self) -> None:
-            self._runner: web.AppRunner | None = None
-            self.port: int = 0
-
-        async def start(self) -> str:
-            async def handler(request: web.Request) -> web.StreamResponse:
-                # Hold the response open longer than the proxy's
-                # client timeout so aiohttp raises TimeoutError on
-                # the proxy side.
-                await asyncio.sleep(30)
-                return web.Response(status=200)
-
-            app = web.Application()
-            app.router.add_route("*", "/{tail:.*}", handler)
-            self._runner = web.AppRunner(app)
-            await self._runner.setup()
-            site = web.TCPSite(self._runner, "127.0.0.1", 0)
-            await site.start()
-            server = site._server
-            assert server is not None
-            sockets = getattr(server, "sockets", None)
-            assert sockets is not None
-            self.port = sockets[0].getsockname()[1]
-            return f"http://127.0.0.1:{self.port}"
-
-        async def stop(self) -> None:
-            if self._runner is not None:
-                await self._runner.cleanup()
-                self._runner = None
-
-    upstream = _HangingUpstream()
-    upstream_url = await upstream.start()
-    # Short proxy timeout so the test finishes quickly.
-    proxy = OpenRouterCompatProxy(target_base_url=upstream_url, request_timeout=0.5)
-    await proxy.start()
-    try:
-        async with aiohttp.ClientSession() as client:
-            async with client.post(
-                f"{proxy.local_url}/v1/messages",
-                json={"model": "x"},
-                timeout=aiohttp.ClientTimeout(total=10),
-            ) as resp:
-                assert resp.status == 502
-                text = await resp.text()
-                # Generic error message — no internal hostname leaked.
-                assert "upstream error" in text
-    finally:
-        await proxy.stop()
-        await upstream.stop()
-
-
-@pytest.mark.asyncio
-async def test_proxy_does_not_signal_clean_eof_on_mid_stream_error():
-    """Regression guard: if the upstream stream dies mid-body, the
-    proxy must NOT call ``write_eof()`` — that would mark the
-    downstream response as a complete, valid stream even though the
-    client only saw a truncated body.  Instead the proxy drops the
-    connection so the client's parser surfaces a transport error.
-
-    We simulate the failure with a raw asyncio TCP server that
-    sends a chunked-encoding response header plus one partial chunk
-    and then hard-closes the socket — this is the one failure mode
-    aiohttp's ``iter_any()`` reliably surfaces as an
-    ``aiohttp.ClientError`` rather than an ordinary clean EOF.
-    """
-
-    class _TruncatingUpstream:
-        """Raw TCP server that sends a partial chunked body then
-        closes the socket without writing the terminating chunk."""
-
-        def __init__(self) -> None:
-            self._server: asyncio.base_events.Server | None = None
-            self.port: int = 0
-
-        async def start(self) -> str:
-            async def handle_conn(
-                reader: asyncio.StreamReader,
-                writer: asyncio.StreamWriter,
-            ) -> None:
-                try:
-                    # Read and discard the request until the blank
-                    # line — we don't care what the proxy sends.
-                    while True:
-                        line = await reader.readline()
-                        if not line or line == b"\r\n":
-                            break
-                    # Chunked response with one partial chunk.
-                    writer.write(
-                        b"HTTP/1.1 200 OK\r\n"
-                        b"Content-Type: application/octet-stream\r\n"
-                        b"Transfer-Encoding: chunked\r\n"
-                        b"Connection: close\r\n"
-                        b"\r\n"
-                        # One chunk, size 8, content "partial-".
-                        b"8\r\n"
-                        b"partial-\r\n"
-                        # Deliberately DO NOT send the terminating
-                        # "0\r\n\r\n" — this is the mid-stream
-                        # truncation we're testing.
-                    )
-                    await writer.drain()
-                finally:
-                    # Hard-close the socket so the proxy's
-                    # iter_any() sees an abrupt end-of-stream.
-                    try:
-                        writer.transport.abort()
-                    except Exception:
-                        pass
-
-            self._server = await asyncio.start_server(handle_conn, "127.0.0.1", 0)
-            sockets = self._server.sockets
-            assert sockets is not None
-            self.port = sockets[0].getsockname()[1]
-            return f"http://127.0.0.1:{self.port}"
-
-        async def stop(self) -> None:
-            if self._server is not None:
-                self._server.close()
-                await self._server.wait_closed()
-                self._server = None
-
-    upstream = _TruncatingUpstream()
-    upstream_url = await upstream.start()
-    proxy = OpenRouterCompatProxy(target_base_url=upstream_url, request_timeout=5.0)
-    await proxy.start()
-    try:
-        async with aiohttp.ClientSession() as client:
-            client_error: Exception | None = None
-            try:
-                async with client.post(
-                    f"{proxy.local_url}/v1/messages",
-                    json={"model": "x"},
-                    timeout=aiohttp.ClientTimeout(total=10),
-                ) as resp:
-                    # The client should see either an error raising
-                    # here or a truncated body followed by a
-                    # transport-level failure on read — both surface
-                    # the truncation instead of silently reporting
-                    # success.
-                    await resp.read()
-            except (
-                aiohttp.ClientPayloadError,
-                aiohttp.ClientConnectionError,
-                aiohttp.ServerDisconnectedError,
-            ) as e:
-                client_error = e
-            assert client_error is not None, (
-                "Proxy silently consumed an upstream mid-stream "
-                "failure and returned a clean EOF to the client — "
-                "regression in the stream-error path."
-            )
-    finally:
-        await proxy.stop()
-        await upstream.stop()
-
-
-@pytest.mark.asyncio
-async def test_proxy_local_url_raises_before_start():
-    proxy = OpenRouterCompatProxy(target_base_url="http://example.com")
-    with pytest.raises(RuntimeError):
-        _ = proxy.local_url
--- a/autogpt_platform/backend/backend/copilot/sdk/sdk_compat_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/sdk_compat_test.py
@@ -202,22 +202,11 @@ def test_sdk_exports_hook_event_type(hook_event: str):
 # OpenRouter compatibility — bundled CLI version pin
 # ---------------------------------------------------------------------------
 #
-# We're stuck on ``claude-agent-sdk==0.1.45`` (bundled CLI ``2.1.63``)
-# because every version above introduces a 400 against OpenRouter:
-#
-# 1. CLI ``2.1.69`` (= SDK ``0.1.46``) shipped a `tool_reference` content
-#    block in `tool_result.content` that OpenRouter's stricter Zod
-#    validation rejects.  See PR
-#    https://github.com/Significant-Gravitas/AutoGPT/pull/12294 for the
-#    forensic write-up that originally pinned us.  CLI ``2.1.70`` added
-#    proxy detection that *should* disable the offending block, but two
-#    later attempts (Dependabot bumps to 0.1.55 / 0.1.56) still failed.
-#
-# 2. A second regression — the ``context-management-2025-06-27`` beta
-#    header — appeared in some CLI version after ``2.1.91``.  Tracked
-#    upstream at
-#    https://github.com/anthropics/claude-agent-sdk-python/issues/789
-#    (still open at the time of writing, no upstream PR yet).
+# Newer ``claude-agent-sdk`` versions bundle CLI binaries that send
+# features incompatible with OpenRouter (``tool_reference`` content
+# blocks, ``context-management-2025-06-27`` beta).  We neutralise these
+# at runtime by injecting ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1``
+# into the CLI subprocess env (see ``service.py``).
 #
 # This test is the cheapest possible regression guard: it pins the
 # bundled CLI to a known-good version.  If anyone bumps
@@ -225,89 +214,39 @@ def test_sdk_exports_hook_event_type(hook_event: str):
 # ``_cli_version.py`` will change and this test will fail with a clear
 # message that points the next person at the OpenRouter compat issue
 # instead of letting them silently re-break production.
-#
-# Workaround for actually upgrading: set the
-# ``claude_agent_cli_path`` config option (or the matching env var) to
-# point at a separately-installed Claude Code CLI binary at a known-good
-# version, so the SDK Python API surface and the CLI binary version can
-# be picked independently.

-# CLI versions verified to work against OpenRouter directly (no compat
-# proxy required) — bisected via the reproduction test in
-# `cli_openrouter_compat_test.py`.  Bundled CLI versions outside this
-# set are still allowed but ONLY when the compat proxy is enabled (see
-# the second known-good set below + the test below).
-_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT: frozenset[str] = frozenset(
+# CLI versions verified to work against OpenRouter when the
+# ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`` env var is set --
+# bisected via the reproduction test in ``cli_openrouter_compat_test.py``.
+_KNOWN_GOOD_BUNDLED_CLI_VERSIONS: frozenset[str] = frozenset(
    {
-        "2.1.63",  # claude-agent-sdk 0.1.45 — original pin from PR #12294.
-        "2.1.70",  # claude-agent-sdk 0.1.47 — first version with the
+        "2.1.63",  # claude-agent-sdk 0.1.45 -- original pin from PR #12294.
+        "2.1.70",  # claude-agent-sdk 0.1.47 -- first version with the
        #          tool_reference proxy detection fix; bisect-verified
        #          OpenRouter-safe in #12742.
+        "2.1.97",  # claude-agent-sdk 0.1.58 -- works with the
+        #          CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 env var.
    }
 )

-# CLI versions verified to work against OpenRouter ONLY when the
-# in-process `openrouter_compat_proxy` is enabled (which strips the
-# `tool_reference` content blocks and `context-management-2025-06-27`
-# beta from outgoing requests).  Without the proxy these CLI versions
-# trip OpenRouter's stricter validation and return 400.
-_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_VIA_PROXY: frozenset[str] = frozenset(
-    {
-        "2.1.97",  # claude-agent-sdk 0.1.58 — needs `claude_agent_use_compat_proxy=True`
-        #          due to the upstream regression in
-        #          anthropics/claude-agent-sdk-python#789.
-    }
-)
-
-# Aggregate set used by the assertion below — the test allows EITHER
-# a directly-known-good CLI OR a proxy-known-good CLI when the proxy
-# is enabled in the active config.
-_KNOWN_GOOD_BUNDLED_CLI_VERSIONS: frozenset[str] = (
-    _KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT | _KNOWN_GOOD_BUNDLED_CLI_VERSIONS_VIA_PROXY
-)
-

 def test_bundled_cli_version_is_known_good_against_openrouter():
    """Pin the bundled CLI version so accidental SDK bumps cause a loud,
    fast failure with a pointer to the OpenRouter compatibility issue.
-
-    A CLI version that's only safe via the compat proxy is allowed only
-    when ``ChatConfig.claude_agent_use_compat_proxy`` is enabled.
    """
    from claude_agent_sdk._cli_version import __cli_version__

-    from backend.copilot.config import ChatConfig
-
-    cfg = ChatConfig()
-    proxy_enabled = cfg.claude_agent_use_compat_proxy
-
-    if __cli_version__ in _KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT:
-        return  # safe with or without the proxy
-
-    if __cli_version__ in _KNOWN_GOOD_BUNDLED_CLI_VERSIONS_VIA_PROXY:
-        assert proxy_enabled, (
-            f"Bundled Claude Code CLI version {__cli_version__!r} is only "
-            "OpenRouter-safe when `claude_agent_use_compat_proxy` is "
-            "enabled, but the active ChatConfig has the proxy disabled. "
-            "Either set `COPILOT__CLAUDE_AGENT_USE_COMPAT_PROXY=true` or "
-            "downgrade `claude-agent-sdk` to a version whose bundled CLI "
-            f"is in {sorted(_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT)!r}. "
-            "See https://github.com/anthropics/claude-agent-sdk-python/issues/789."
-        )
-        return
-
-    raise AssertionError(
+    assert __cli_version__ in _KNOWN_GOOD_BUNDLED_CLI_VERSIONS, (
        f"Bundled Claude Code CLI version is {__cli_version__!r}, which is "
-        f"not in any OpenRouter-known-good set "
+        f"not in the OpenRouter-known-good set "
        f"({sorted(_KNOWN_GOOD_BUNDLED_CLI_VERSIONS)!r}). "
        "If you intentionally bumped `claude-agent-sdk`, verify the new "
        "bundled CLI works with OpenRouter against the reproduction test "
-        "in `cli_openrouter_compat_test.py`, then add the new CLI version "
-        "to either `_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_DIRECT` (works "
-        "without the proxy) or `_KNOWN_GOOD_BUNDLED_CLI_VERSIONS_VIA_PROXY` "
-        "(works only with `claude_agent_use_compat_proxy=true`). If you "
-        "cannot make the bundled CLI work either way, set "
-        "`claude_agent_cli_path` to a known-good binary instead. See "
+        "in `cli_openrouter_compat_test.py` (with "
+        "`CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`), then add the new "
+        "CLI version to `_KNOWN_GOOD_BUNDLED_CLI_VERSIONS`. If the env "
+        "var is not sufficient, set `claude_agent_cli_path` to a "
+        "known-good binary instead. See "
        "https://github.com/anthropics/claude-agent-sdk-python/issues/789 "
        "and https://github.com/Significant-Gravitas/AutoGPT/pull/12294."
    )
--- a/autogpt_platform/backend/backend/copilot/sdk/service.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service.py
@@ -1980,13 +1980,6 @@ async def stream_chat_completion_sdk(
    transcript_content: str = ""
    state: _RetryState | None = None

-    # OpenRouter compat proxy — started inside the try and stopped in finally
-    # when ``ChatConfig.claude_agent_use_compat_proxy`` is enabled. The proxy
-    # rewrites outgoing CLI requests to strip ``tool_reference`` content
-    # blocks and the ``context-management-2025-06-27`` beta so the latest
-    # SDK / CLI versions stop tripping OpenRouter's validation.
-    _compat_proxy: Any = None  # OpenRouterCompatProxy | None — lazy import
-
    # Token usage accumulators — populated from ResultMessage at end of turn
    turn_prompt_tokens = 0  # uncached input tokens only
    turn_completion_tokens = 0
@@ -2249,96 +2242,14 @@ async def stream_chat_completion_sdk(
        if sdk_model:
            sdk_options_kwargs["model"] = sdk_model

-        # OpenRouter compatibility proxy — started here so its local URL
-        # can be injected into the CLI subprocess env BEFORE the env dict
-        # is passed to ``ClaudeAgentOptions``.  When this flag is on we
-        # transparently rewrite outgoing CLI requests via the proxy
-        # (stripping ``tool_reference`` blocks and the
-        # ``context-management-2025-06-27`` beta) so newer SDK / CLI
-        # versions can talk to OpenRouter without their stricter
-        # validation rejecting the request.
-        if config.claude_agent_use_compat_proxy:
-            # Only start the compat proxy when there's already an
-            # explicit Anthropic-compatible upstream to forward to.
-            # Otherwise we'd be silently routing direct Anthropic /
-            # Claude Code subscription sessions through OpenRouter,
-            # which would break auth and change providers without
-            # operator consent.  The explicit upstream can come from:
-            #
-            # 1. ``sdk_env['ANTHROPIC_BASE_URL']`` — caller override;
-            # 2. the process env — lowest-precedence host override;
-            # 3. ``ChatConfig.openrouter_active`` — OpenRouter is
-            #    configured as the session's routing provider (i.e.
-            #    the only case in which falling back to
-            #    ``OPENROUTER_BASE_URL`` is intentional).
-            #
-            # When none of the above hold, log a warning and leave
-            # the CLI to talk to Anthropic directly as usual — the
-            # feature is opt-in and documented as "OpenRouter
-            # compatibility", so quietly no-oping on direct-Anthropic
-            # sessions is the safe default.
-            # Claude Code subscription mode intentionally sets
-            # ``sdk_env['ANTHROPIC_BASE_URL'] = ""`` to *disable* any
-            # base-URL override and keep the CLI talking to Anthropic
-            # directly. Treat an explicit empty string as a hard
-            # "no-proxy" signal so we never silently start the proxy
-            # against a host-wide ``ANTHROPIC_BASE_URL`` or fall back
-            # to OpenRouter when the caller has opted out.
-            sdk_env_map = sdk_env or {}
-            explicit_sdk_env = "ANTHROPIC_BASE_URL" in sdk_env_map
-            sdk_env_value = (
-                sdk_env_map["ANTHROPIC_BASE_URL"] if explicit_sdk_env else None
-            )
-            if explicit_sdk_env and not sdk_env_value:
-                # Empty string from sdk_env → subscription mode opt-out.
-                target_base_url: str | None = None
-                explicit_opt_out = True
-            else:
-                target_base_url = sdk_env_value or os.environ.get("ANTHROPIC_BASE_URL")
-                explicit_opt_out = False
-            # Only fall back to OpenRouter when the session actually
-            # has no base-URL plumbing of its own AND OpenRouter is
-            # the active routing provider AND the caller hasn't
-            # explicitly opted out via an empty sdk_env override.
-            if (
-                not target_base_url
-                and not explicit_opt_out
-                and config.openrouter_active
-            ):
-                from backend.util.clients import OPENROUTER_BASE_URL
-
-                target_base_url = OPENROUTER_BASE_URL
-
-            if target_base_url:
-                from backend.copilot.sdk.openrouter_compat_proxy import (
-                    OpenRouterCompatProxy,
-                )
-
-                _compat_proxy = OpenRouterCompatProxy(target_base_url=target_base_url)
-                await _compat_proxy.start()
-                # Inject the proxy URL into the SDK env so the spawned
-                # CLI subprocess uses the proxy as its Anthropic
-                # endpoint.
-                if sdk_env is None:
-                    sdk_env = {}
-                sdk_env["ANTHROPIC_BASE_URL"] = _compat_proxy.local_url
-                # Log only the local bind URL — upstream is redacted
-                # to match the taint-analysis guidance applied in
-                # ``openrouter_compat_proxy.start``.
-                logger.info(
-                    "%s OpenRouter compat proxy active (listening on %s)",
-                    log_prefix,
-                    _compat_proxy.local_url,
-                )
-            else:
-                logger.warning(
-                    "%s claude_agent_use_compat_proxy is enabled but no "
-                    "Anthropic-compatible upstream is configured for this "
-                    "session (no ANTHROPIC_BASE_URL override and "
-                    "openrouter_active is False); skipping proxy startup "
-                    "so the CLI keeps talking to Anthropic directly.",
-                    log_prefix,
-                )
+        # Tell the CLI to strip experimental betas (e.g.
+        # ``context-management-2025-06-27``) and ``tool_reference``
+        # content blocks so newer SDK / CLI versions work with
+        # OpenRouter's stricter validation.  This single env var
+        # replaces the old in-process compat proxy.
+        if sdk_env is None:
+            sdk_env = {}
+        sdk_env["CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS"] = "1"

        if sdk_env:
            sdk_options_kwargs["env"] = sdk_env
@@ -3012,18 +2923,5 @@ async def stream_chat_completion_sdk(
        except Exception:
            logger.warning("%s SDK cleanup failed", log_prefix, exc_info=True)
        finally:
-            # Tear down the OpenRouter compat proxy if it was started for
-            # this session — releases the bound port and the aiohttp
-            # client. Wrapped so a stop failure can never block the
-            # downstream lock release.
-            if _compat_proxy is not None:
-                try:
-                    await _compat_proxy.stop()
-                except Exception:
-                    logger.warning(
-                        "%s OpenRouter compat proxy stop failed",
-                        log_prefix,
-                        exc_info=True,
-                    )
            # Release stream lock to allow new streams for this session
            await lock.release()
--- a/autogpt_platform/backend/pyproject.toml
+++ b/autogpt_platform/backend/pyproject.toml
@@ -18,7 +18,7 @@ apscheduler = "^3.11.1"
 autogpt-libs = { path = "../autogpt_libs", develop = true }
 bleach = { extras = ["css"], version = "^6.2.0" }
 cachetools = "^5.5.0"
-claude-agent-sdk = "0.1.58"  # latest stable; bundled CLI 2.1.97 ships the broken context-management beta and REQUIRES the openrouter_compat_proxy. See sdk_compat_test.py.
+claude-agent-sdk = "0.1.58"  # latest stable; bundled CLI 2.1.97 -- CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 env var strips the broken context-management beta. See sdk_compat_test.py.
 click = "^8.2.0"
 cryptography = "^46.0"
 discord-py = "^2.5.2"