Merge branch 'chore/sdk-dev-preview-0.1.58-with-proxy' of https://github.com/Significant-Gravitas/AutoGPT into preview/all-prs

This commit is contained in:
majdyz
2026-04-13 08:01:33 +00:00
11 changed files with 925 additions and 21 deletions

View File

@@ -152,18 +152,31 @@ class ChatConfig(BaseSettings):
"overloaded). The SDK automatically retries with this cheaper model.",
)
claude_agent_max_turns: int = Field(
default=1000,
default=50,
ge=1,
le=10000,
description="Maximum number of agentic turns (tool-use loops) per query. "
"Prevents runaway tool loops from burning budget.",
"Prevents runaway tool loops from burning budget. "
"Changed from 1000 to 50 in SDK 0.1.58 upgrade — override via "
"CHAT_CLAUDE_AGENT_MAX_TURNS env var if your workflows need more.",
)
claude_agent_max_budget_usd: float = Field(
default=100.0,
default=5.0,
ge=0.01,
le=1000.0,
description="Maximum spend in USD per SDK query. The CLI aborts the "
"request if this budget is exceeded.",
"request if this budget is exceeded. "
"Changed from $100 to $5 in SDK 0.1.58 upgrade — override via "
"CHAT_CLAUDE_AGENT_MAX_BUDGET_USD env var if needed.",
)
claude_agent_max_thinking_tokens: int = Field(
default=8192,
ge=1024,
le=128000,
description="Maximum thinking/reasoning tokens per LLM call. "
"Extended thinking on Opus can generate 50k+ tokens at $75/M — "
"capping this is the single biggest cost lever. "
"8192 is sufficient for most tasks; increase for complex reasoning.",
)
claude_agent_max_transient_retries: int = Field(
default=3,
@@ -172,6 +185,20 @@ class ChatConfig(BaseSettings):
description="Maximum number of retries for transient API errors "
"(429, 5xx, ECONNRESET) before surfacing the error to the user.",
)
claude_agent_cli_path: str | None = Field(
default=None,
description="Optional explicit path to a Claude Code CLI binary. "
"When set, the SDK uses this binary instead of the version bundled "
"with the installed `claude-agent-sdk` package — letting us pin "
"the Python SDK and the CLI independently. Critical for keeping "
"OpenRouter compatibility while still picking up newer SDK API "
"features (the bundled CLI version in 0.1.46+ is broken against "
"OpenRouter — see PR #12294 and "
"anthropics/claude-agent-sdk-python#789). Falls back to the "
"bundled binary when unset. Reads from `CHAT_CLAUDE_AGENT_CLI_PATH` "
"or the unprefixed `CLAUDE_AGENT_CLI_PATH` environment variable "
"(same pattern as `api_key` / `base_url`).",
)
use_openrouter: bool = Field(
default=True,
description="Enable routing API calls through the OpenRouter proxy. "
@@ -294,6 +321,40 @@ class ChatConfig(BaseSettings):
v = OPENROUTER_BASE_URL
return v
@field_validator("claude_agent_cli_path", mode="before")
@classmethod
def get_claude_agent_cli_path(cls, v):
"""Resolve the Claude Code CLI override path from environment.
Accepts either the Pydantic-prefixed ``CHAT_CLAUDE_AGENT_CLI_PATH``
or the unprefixed ``CLAUDE_AGENT_CLI_PATH`` (matching the same
fallback pattern used by ``api_key`` / ``base_url``). Keeping the
unprefixed form working is important because the field is
primarily an operator escape hatch set via container/host env,
and the unprefixed name is what the PR description, the field
docstrings, and the reproduction test in
``cli_openrouter_compat_test.py`` refer to.
"""
if not v:
v = os.getenv("CHAT_CLAUDE_AGENT_CLI_PATH")
if not v:
v = os.getenv("CLAUDE_AGENT_CLI_PATH")
if v:
if not os.path.exists(v):
raise ValueError(
f"claude_agent_cli_path '{v}' does not exist. "
"Check the path or unset CLAUDE_AGENT_CLI_PATH to use "
"the bundled CLI."
)
if not os.path.isfile(v):
raise ValueError(f"claude_agent_cli_path '{v}' is not a regular file.")
if not os.access(v, os.X_OK):
raise ValueError(
f"claude_agent_cli_path '{v}' exists but is not executable. "
"Check file permissions."
)
return v
# Prompt paths for different contexts
PROMPT_PATHS: dict[str, str] = {
"default": "prompts/chat_system.md",

View File

@@ -17,6 +17,8 @@ _ENV_VARS_TO_CLEAR = (
"CHAT_BASE_URL",
"OPENROUTER_BASE_URL",
"OPENAI_BASE_URL",
"CHAT_CLAUDE_AGENT_CLI_PATH",
"CLAUDE_AGENT_CLI_PATH",
)
@@ -87,3 +89,78 @@ class TestE2BActive:
"""e2b_active is False when use_e2b_sandbox=False regardless of key."""
cfg = ChatConfig(use_e2b_sandbox=False, e2b_api_key="test-key")
assert cfg.e2b_active is False
class TestClaudeAgentCliPathEnvFallback:
"""``claude_agent_cli_path`` accepts both the Pydantic-prefixed
``CHAT_CLAUDE_AGENT_CLI_PATH`` env var and the unprefixed
``CLAUDE_AGENT_CLI_PATH`` form (mirrors ``api_key`` / ``base_url``).
"""
def test_prefixed_env_var_is_picked_up(
self, monkeypatch: pytest.MonkeyPatch, tmp_path
) -> None:
fake_cli = tmp_path / "fake-claude"
fake_cli.write_text("#!/bin/sh\n")
fake_cli.chmod(0o755)
monkeypatch.setenv("CHAT_CLAUDE_AGENT_CLI_PATH", str(fake_cli))
cfg = ChatConfig()
assert cfg.claude_agent_cli_path == str(fake_cli)
def test_unprefixed_env_var_is_picked_up(
self, monkeypatch: pytest.MonkeyPatch, tmp_path
) -> None:
fake_cli = tmp_path / "fake-claude"
fake_cli.write_text("#!/bin/sh\n")
fake_cli.chmod(0o755)
monkeypatch.setenv("CLAUDE_AGENT_CLI_PATH", str(fake_cli))
cfg = ChatConfig()
assert cfg.claude_agent_cli_path == str(fake_cli)
def test_prefixed_wins_over_unprefixed(
self, monkeypatch: pytest.MonkeyPatch, tmp_path
) -> None:
prefixed_cli = tmp_path / "fake-claude-prefixed"
prefixed_cli.write_text("#!/bin/sh\n")
prefixed_cli.chmod(0o755)
unprefixed_cli = tmp_path / "fake-claude-unprefixed"
unprefixed_cli.write_text("#!/bin/sh\n")
unprefixed_cli.chmod(0o755)
monkeypatch.setenv("CHAT_CLAUDE_AGENT_CLI_PATH", str(prefixed_cli))
monkeypatch.setenv("CLAUDE_AGENT_CLI_PATH", str(unprefixed_cli))
cfg = ChatConfig()
assert cfg.claude_agent_cli_path == str(prefixed_cli)
def test_no_env_var_defaults_to_none(self, monkeypatch: pytest.MonkeyPatch) -> None:
cfg = ChatConfig()
assert cfg.claude_agent_cli_path is None
def test_nonexistent_path_raises_validation_error(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Non-existent CLI path must be rejected at config time, not at
runtime when subprocess.run fails with an opaque OS error."""
monkeypatch.setenv(
"CLAUDE_AGENT_CLI_PATH", "/opt/nonexistent/claude-cli-binary"
)
with pytest.raises(Exception, match="does not exist"):
ChatConfig()
def test_non_executable_path_raises_validation_error(
self, monkeypatch: pytest.MonkeyPatch, tmp_path
) -> None:
"""Path that exists but is not executable must be rejected."""
non_exec = tmp_path / "claude-not-executable"
non_exec.write_text("#!/bin/sh\n")
non_exec.chmod(0o644) # readable but not executable
monkeypatch.setenv("CLAUDE_AGENT_CLI_PATH", str(non_exec))
with pytest.raises(Exception, match="not executable"):
ChatConfig()
def test_directory_path_raises_validation_error(
self, monkeypatch: pytest.MonkeyPatch, tmp_path
) -> None:
"""Path pointing to a directory must be rejected."""
monkeypatch.setenv("CLAUDE_AGENT_CLI_PATH", str(tmp_path))
with pytest.raises(Exception, match="not a regular file"):
ChatConfig()

View File

@@ -174,13 +174,23 @@ class CoPilotProcessor:
logger.info(f"[CoPilotExecutor] Worker {self.tid} started")
def _prewarm_cli(self) -> None:
"""Run the bundled CLI binary once to warm OS page caches."""
try:
from claude_agent_sdk._internal.transport.subprocess_cli import (
SubprocessCLITransport,
)
"""Run the Claude Code CLI binary once to warm OS page caches.
cli_path = SubprocessCLITransport._find_bundled_cli(None) # type: ignore[arg-type]
Honours the ``claude_agent_cli_path`` config override (which lets
us run a pinned CLI version independent of the bundled one in the
installed ``claude-agent-sdk`` wheel — see
``ChatConfig.claude_agent_cli_path`` for the rationale). Falls
back to the bundled binary when no override is set.
"""
try:
cfg = ChatConfig()
cli_path: str | None = cfg.claude_agent_cli_path
if not cli_path:
from claude_agent_sdk._internal.transport.subprocess_cli import (
SubprocessCLITransport,
)
cli_path = SubprocessCLITransport._find_bundled_cli(None) # type: ignore[arg-type]
if cli_path:
result = subprocess.run(
[cli_path, "-v"],

View File

@@ -0,0 +1,639 @@
"""Reproduction test for the OpenRouter incompatibility in newer
``claude-agent-sdk`` / Claude Code CLI versions.
Background — there are two stacked regressions that block us from
upgrading the ``claude-agent-sdk`` package above ``0.1.45``:
1. **`tool_reference` content blocks** introduced by CLI ``2.1.69`` (=
SDK ``0.1.46``). The CLI's built-in ``ToolSearch`` tool returns
``{"type": "tool_reference", "tool_name": "..."}`` content blocks in
``tool_result.content``. OpenRouter's stricter Zod validation
rejects this with::
messages[N].content[0].content: Invalid input: expected string, received array
This is the regression that originally pinned us at 0.1.45 — see
https://github.com/Significant-Gravitas/AutoGPT/pull/12294 for the
full forensic write-up. CLI 2.1.70 added proxy detection that
*should* disable the offending blocks when ``ANTHROPIC_BASE_URL`` is
set, but our subsequent attempts at 0.1.55 / 0.1.56 still failed.
2. **`context-management-2025-06-27` beta header** — some CLI version
after ``2.1.91`` started injecting this header / beta flag, which
OpenRouter rejects with::
400 No endpoints available that support Anthropic's context
management features (context-management-2025-06-27). Context
management requires a supported provider (Anthropic).
Tracked upstream at
https://github.com/anthropics/claude-agent-sdk-python/issues/789.
Still open at the time of writing, no upstream PR linked, no
workaround documented.
The purpose of this test:
* Spin up a tiny in-process HTTP server that pretends to be the
Anthropic Messages API.
* Capture every request body the CLI sends.
* Inspect the captured bodies for the two forbidden patterns above.
* Fail loudly if either is present, with a pointer to the issue
tracker.
This is the reproduction we use as a CI gate when bisecting which SDK /
CLI version is safe to upgrade to. It runs against the bundled CLI by
default (or against ``ChatConfig.claude_agent_cli_path`` when set), so
it doubles as a regression guard for the ``cli_path`` override
mechanism.
The test does **not** need an OpenRouter API key — it reproduces the
mechanism (forbidden content blocks / headers in the *outgoing*
request) rather than the symptom (the 400 OpenRouter would return).
This keeps it deterministic, free, and CI-runnable without secrets.
"""
from __future__ import annotations
import asyncio
import json
import logging
import os
import re
import subprocess
from pathlib import Path
from typing import Any
import pytest
from aiohttp import web
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Forbidden patterns we scan for in captured request bodies
# ---------------------------------------------------------------------------
# Substring of the `tool_reference` content block that breaks OpenRouter's
# Beta string OpenRouter rejects in upstream issue #789. Can appear in
# either `betas` arrays or the `anthropic-beta` header value.
_FORBIDDEN_CONTEXT_MANAGEMENT_BETA = "context-management-2025-06-27"
def _body_contains_tool_reference_block(body_text: str) -> bool:
"""Return True if *body_text* contains a ``tool_reference`` content
block anywhere in its structure.
We parse the JSON and walk it rather than relying on substring
matches because the CLI is free to emit either ``{"type": "tool_reference"}``
(with spaces) or the compact ``{"type":"tool_reference"}`` form,
and we must catch both. Falls back to a whitespace-tolerant
regex when the body isn't valid JSON — the Messages API always
sends JSON, but the fallback keeps the detector honest on
malformed / partial bodies a fuzzer might produce.
"""
try:
payload = json.loads(body_text)
except (ValueError, TypeError):
# Whitespace-tolerant fallback: allow any whitespace between
# the key, colon, and value quoted string.
return bool(re.search(r'"type"\s*:\s*"tool_reference"', body_text))
def _walk(node: Any) -> bool:
if isinstance(node, dict):
if node.get("type") == "tool_reference":
return True
return any(_walk(v) for v in node.values())
if isinstance(node, list):
return any(_walk(v) for v in node)
return False
return _walk(payload)
def _scan_request_for_forbidden_patterns(
body_text: str,
headers: dict[str, str],
) -> list[str]:
"""Return a list of forbidden patterns found in *body_text* / *headers*.
Empty list = clean request. Non-empty = the CLI is sending one of the
OpenRouter-incompatible features.
"""
findings: list[str] = []
if _body_contains_tool_reference_block(body_text):
findings.append(
"`tool_reference` content block in request body — "
"PR #12294 / CLI 2.1.69 regression"
)
if _FORBIDDEN_CONTEXT_MANAGEMENT_BETA in body_text:
findings.append(
f"{_FORBIDDEN_CONTEXT_MANAGEMENT_BETA!r} in request body — "
"anthropics/claude-agent-sdk-python#789"
)
# Header values are case-insensitive in HTTP — aiohttp normalises
# incoming names but values are stored as-is.
for header_name, header_value in headers.items():
if header_name.lower() == "anthropic-beta":
if _FORBIDDEN_CONTEXT_MANAGEMENT_BETA in header_value:
findings.append(
f"{_FORBIDDEN_CONTEXT_MANAGEMENT_BETA!r} in "
"`anthropic-beta` header — issue #789"
)
return findings
# ---------------------------------------------------------------------------
# Fake Anthropic Messages API
# ---------------------------------------------------------------------------
#
# We need to give the CLI a *successful* response so it doesn't error out
# before we get a chance to inspect the request. The minimal thing the
# CLI accepts is a streamed (SSE) message-start → content-block-delta →
# message-stop sequence.
#
# We don't strictly *need* the CLI to accept the response — we already
# have the request body by the time we send any reply — but giving it a
# valid stream means the assertion failure (if any) is the *only*
# failure mode in the test, not "CLI exited 1 because we sent garbage".
def _build_streaming_message_response() -> str:
"""Return an SSE-formatted body containing a minimal Anthropic
Messages API streamed response.
This is the smallest stream that the Claude Code CLI will accept
end-to-end without errors. Each line is one SSE event."""
events: list[dict[str, Any]] = [
{
"type": "message_start",
"message": {
"id": "msg_test",
"type": "message",
"role": "assistant",
"content": [],
"model": "claude-test",
"stop_reason": None,
"stop_sequence": None,
"usage": {"input_tokens": 1, "output_tokens": 1},
},
},
{
"type": "content_block_start",
"index": 0,
"content_block": {"type": "text", "text": ""},
},
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": "ok"},
},
{"type": "content_block_stop", "index": 0},
{
"type": "message_delta",
"delta": {"stop_reason": "end_turn", "stop_sequence": None},
"usage": {"output_tokens": 1},
},
{"type": "message_stop"},
]
return "".join(
f"event: {evt['type']}\ndata: {json.dumps(evt)}\n\n" for evt in events
)
class _CapturedRequest:
"""One request the fake server received."""
def __init__(self, path: str, headers: dict[str, str], body: str) -> None:
self.path = path
self.headers = headers
self.body = body
async def _start_fake_anthropic_server(
captured: list[_CapturedRequest],
) -> tuple[web.AppRunner, int]:
"""Start an aiohttp server pretending to be the Anthropic API.
All POSTs to ``/v1/messages`` are recorded into *captured* and
answered with a valid streaming response. Returns ``(runner, port)``
so the caller can ``await runner.cleanup()`` when finished.
"""
async def messages_handler(request: web.Request) -> web.StreamResponse:
body = await request.text()
captured.append(
_CapturedRequest(
path=request.path,
headers={k: v for k, v in request.headers.items()},
body=body,
)
)
# Stream a minimal valid response so the CLI doesn't error out
# before we can inspect what it sent.
response = web.StreamResponse(
status=200,
headers={
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
},
)
await response.prepare(request)
await response.write(_build_streaming_message_response().encode("utf-8"))
await response.write_eof()
return response
app = web.Application()
app.router.add_post("/v1/messages", messages_handler)
# OAuth/profile endpoints the CLI may probe — answer 404 so it falls
# through quickly without retrying.
app.router.add_route("*", "/{tail:.*}", lambda _r: web.Response(status=404))
runner = web.AppRunner(app)
await runner.setup()
site = web.TCPSite(runner, "127.0.0.1", 0)
await site.start()
server = site._server
assert server is not None
sockets = getattr(server, "sockets", None)
assert sockets is not None
port: int = sockets[0].getsockname()[1]
return runner, port
# ---------------------------------------------------------------------------
# CLI invocation
# ---------------------------------------------------------------------------
def _resolve_cli_path() -> Path | None:
"""Return the Claude Code CLI binary the SDK would use.
Honours the same override mechanism as ``service.py`` /
``ChatConfig.claude_agent_cli_path``: checks either the Pydantic-
prefixed ``CHAT_CLAUDE_AGENT_CLI_PATH`` or the unprefixed
``CLAUDE_AGENT_CLI_PATH`` env var first, then falls back to the
bundled binary that ships with the installed ``claude-agent-sdk``
wheel. The two env var names are accepted at the config layer via
``ChatConfig.get_claude_agent_cli_path`` and mirrored here so the
reproduction test picks up the same override regardless of which
form an operator sets.
"""
override = os.environ.get("CHAT_CLAUDE_AGENT_CLI_PATH") or os.environ.get(
"CLAUDE_AGENT_CLI_PATH"
)
if override:
candidate = Path(override)
return candidate if candidate.is_file() else None
try:
from typing import cast
from claude_agent_sdk._internal.transport.subprocess_cli import (
SubprocessCLITransport,
)
bundled = cast(str, SubprocessCLITransport._find_bundled_cli(None))
return Path(bundled) if bundled else None
except (ImportError, AttributeError) as e: # pragma: no cover - import-time guard
logger.warning("Could not locate bundled Claude CLI: %s", e)
return None
async def _run_cli_against_fake_server(
cli_path: Path,
fake_server_port: int,
timeout_seconds: float,
extra_env: dict[str, str] | None = None,
) -> tuple[int, str, str]:
"""Spawn the CLI pointed at the fake Anthropic server and feed it a
single ``user`` message via stream-json on stdin.
Returns ``(returncode, stdout, stderr)``. The return code is not
asserted by the test — we only care that the CLI made at least one
POST to ``/v1/messages`` so the fake server captured the body.
"""
fake_url = f"http://127.0.0.1:{fake_server_port}"
env = {
# Inherit basic shell variables so the CLI can find its tools,
# but force network/auth at our fake endpoint.
**os.environ,
"ANTHROPIC_BASE_URL": fake_url,
"ANTHROPIC_API_KEY": "sk-test-fake-key-not-real",
# Disable any features that would phone home to a different host
# mid-test (telemetry, plugin marketplace fetch).
"DISABLE_TELEMETRY": "1",
"CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1",
**(extra_env or {}),
}
# The CLI accepts stream-json input on stdin in `query` mode. A
# minimal user-message envelope is enough to trigger an API call.
stdin_payload = (
json.dumps(
{
"type": "user",
"message": {"role": "user", "content": "hello"},
}
)
+ "\n"
)
proc = await asyncio.create_subprocess_exec(
str(cli_path),
"--output-format",
"stream-json",
"--input-format",
"stream-json",
"--verbose",
"--print",
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
env=env,
)
try:
assert proc.stdin is not None
proc.stdin.write(stdin_payload.encode("utf-8"))
await proc.stdin.drain()
proc.stdin.close()
stdout_bytes, stderr_bytes = await asyncio.wait_for(
proc.communicate(), timeout=timeout_seconds
)
except (asyncio.TimeoutError, TimeoutError):
# Best-effort kill — we already have whatever requests the CLI
# managed to send before stalling.
try:
proc.kill()
except ProcessLookupError:
pass
# Reap the process after kill() so we don't leave an unreaped
# child behind until event-loop shutdown. Wait with its own
# short timeout in case the kill was ineffective.
try:
stdout_bytes, stderr_bytes = await asyncio.wait_for(
proc.communicate(), timeout=5.0
)
except (asyncio.TimeoutError, TimeoutError):
stdout_bytes, stderr_bytes = b"", b""
return (
proc.returncode if proc.returncode is not None else -1,
stdout_bytes.decode("utf-8", errors="replace"),
stderr_bytes.decode("utf-8", errors="replace"),
)
# ---------------------------------------------------------------------------
# The actual test
# ---------------------------------------------------------------------------
async def _run_reproduction(
*,
extra_env: dict[str, str] | None = None,
) -> tuple[int, str, str, list[_CapturedRequest]]:
"""Spawn the CLI against a fake Anthropic API and return what the
server saw.
"""
cli_path = _resolve_cli_path()
if cli_path is None or not cli_path.is_file():
pytest.skip(
"No Claude Code CLI binary available (neither bundled nor "
"overridden via CLAUDE_AGENT_CLI_PATH / "
"CHAT_CLAUDE_AGENT_CLI_PATH); cannot reproduce."
)
captured: list[_CapturedRequest] = []
upstream_runner, upstream_port = await _start_fake_anthropic_server(captured)
try:
returncode, stdout, stderr = await _run_cli_against_fake_server(
cli_path=cli_path,
fake_server_port=upstream_port,
timeout_seconds=30.0,
extra_env=extra_env,
)
finally:
await upstream_runner.cleanup()
return returncode, stdout, stderr, captured
def _assert_no_forbidden_patterns(
captured: list[_CapturedRequest], returncode: int, stderr: str
) -> None:
if not captured:
pytest.skip(
"Bundled CLI did not make any HTTP requests to the fake server "
f"(rc={returncode}). The CLI may have failed before reaching "
f"the network — stderr tail: {stderr[-500:]!r}. "
"Nothing to assert; treating as inconclusive rather than "
"either passing or failing."
)
all_findings: list[str] = []
for req in captured:
findings = _scan_request_for_forbidden_patterns(req.body, req.headers)
if findings:
all_findings.extend(f"{req.path}: {finding}" for finding in findings)
assert not all_findings, (
f"Bundled Claude Code CLI sent OpenRouter-incompatible features in "
f"{len(all_findings)} request(s):\n - "
+ "\n - ".join(all_findings)
+ "\n\nThe bundled CLI is sending OpenRouter-incompatible features. "
"See https://github.com/Significant-Gravitas/AutoGPT/pull/12294 and "
"https://github.com/anthropics/claude-agent-sdk-python/issues/789. "
"If you bumped `claude-agent-sdk`, verify the new bundled CLI works "
"with `CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1` set (injected by "
"``build_sdk_env()`` in ``env.py``), then add the CLI version to "
"`_KNOWN_GOOD_BUNDLED_CLI_VERSIONS` in `sdk_compat_test.py`. "
"Alternatively, pin a known-good binary via `claude_agent_cli_path` "
"(env: `CLAUDE_AGENT_CLI_PATH` or `CHAT_CLAUDE_AGENT_CLI_PATH`)."
)
@pytest.mark.asyncio
@pytest.mark.xfail(
reason="CLI 2.1.97 (SDK 0.1.58) sends context-management beta without "
"CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1. This is expected — the env "
"var guard in test_disable_experimental_betas_env_var_strips_headers "
"is the real regression test.",
strict=True,
)
async def test_bare_cli_does_not_send_openrouter_incompatible_features():
"""Bare CLI reproduction (no env var workaround).
Documents whether the bundled CLI sends OpenRouter-incompatible
features without the CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS env var.
On SDK 0.1.58 (CLI 2.1.97) this is expected to fail — the env var
test above is the actual regression guard.
"""
returncode, _stdout, stderr, captured = await _run_reproduction()
_assert_no_forbidden_patterns(captured, returncode, stderr)
@pytest.mark.asyncio
async def test_disable_experimental_betas_env_var_strips_headers():
"""Validate that ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`` strips
the ``context-management-2025-06-27`` beta header when
``ANTHROPIC_BASE_URL`` points to a non-Anthropic endpoint (simulating
OpenRouter).
This is the main regression guard: the env var is injected by
``build_sdk_env()`` in ``env.py`` into every CLI subprocess so newer
SDK / CLI versions work with OpenRouter without any proxy.
"""
returncode, _stdout, stderr, captured = await _run_reproduction(
extra_env={"CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS": "1"},
)
_assert_no_forbidden_patterns(captured, returncode, stderr)
def test_subprocess_module_available():
"""Sentinel test: the subprocess module must be importable so the
main reproduction test can spawn the CLI. Catches sandboxed CI
runners that block subprocess execution before the slow test runs."""
assert subprocess.__name__ == "subprocess"
# ---------------------------------------------------------------------------
# Pure helper unit tests — pin the forbidden-pattern detection so any
# future drift in the scanner is caught fast, even when the slow
# end-to-end CLI subprocess test isn't runnable.
# ---------------------------------------------------------------------------
class TestScanRequestForForbiddenPatterns:
def test_clean_body_returns_empty_findings(self):
body = '{"model": "claude-opus-4.6", "messages": [{"role": "user", "content": "hi"}]}'
assert _scan_request_for_forbidden_patterns(body, {}) == []
def test_detects_tool_reference_in_body(self):
body = (
'{"messages": [{"role": "user", "content": ['
'{"type": "tool_reference", "tool_name": "find"}'
"]}]}"
)
findings = _scan_request_for_forbidden_patterns(body, {})
assert len(findings) == 1
assert "tool_reference" in findings[0]
assert "PR #12294" in findings[0]
def test_detects_context_management_in_body(self):
body = '{"betas": ["context-management-2025-06-27"]}'
findings = _scan_request_for_forbidden_patterns(body, {})
assert len(findings) == 1
assert "context-management-2025-06-27" in findings[0]
assert "#789" in findings[0]
def test_detects_context_management_in_anthropic_beta_header(self):
findings = _scan_request_for_forbidden_patterns(
body_text="{}",
headers={"anthropic-beta": "context-management-2025-06-27"},
)
assert len(findings) == 1
assert "anthropic-beta" in findings[0]
def test_detects_context_management_in_uppercase_header_name(self):
# HTTP header names are case-insensitive — make sure the
# scanner handles a server that didn't normalise names.
findings = _scan_request_for_forbidden_patterns(
body_text="{}",
headers={"Anthropic-Beta": "context-management-2025-06-27, other"},
)
assert len(findings) == 1
def test_ignores_unrelated_header_values(self):
findings = _scan_request_for_forbidden_patterns(
body_text="{}",
headers={
"authorization": "Bearer secret",
"anthropic-beta": "fine-grained-tool-streaming-2025",
},
)
assert findings == []
def test_detects_both_patterns_simultaneously(self):
body = (
'{"betas": ["context-management-2025-06-27"], '
'"messages": [{"role": "user", "content": ['
'{"type": "tool_reference", "tool_name": "find"}'
"]}]}"
)
findings = _scan_request_for_forbidden_patterns(body, {})
# Both patterns hit, in stable order: tool_reference then betas.
assert len(findings) == 2
assert "tool_reference" in findings[0]
assert "context-management-2025-06-27" in findings[1]
def test_detects_compact_tool_reference_without_spaces(self):
# Regression guard: the old substring matcher only caught the
# prettified form '"type": "tool_reference"' with a space
# between the key and the value, so a CLI emitting compact
# JSON (e.g. via `json.dumps(separators=(",", ":"))`) could
# slip past the scanner and false-pass. The JSON-walking
# detector catches both forms.
body = '{"messages":[{"role":"user","content":[{"type":"tool_reference","tool_name":"find"}]}]}'
findings = _scan_request_for_forbidden_patterns(body, {})
assert len(findings) == 1
assert "tool_reference" in findings[0]
def test_detects_tool_reference_in_malformed_body_fallback(self):
# When the body isn't valid JSON the helper falls back to a
# whitespace-tolerant regex so fuzzed / partial payloads are
# still caught.
body = 'garbage-prefix{"type" : "tool_reference"} trailing'
findings = _scan_request_for_forbidden_patterns(body, {})
assert len(findings) == 1
assert "tool_reference" in findings[0]
class TestResolveCliPath:
def test_honours_explicit_env_var_when_file_exists(self, tmp_path, monkeypatch):
fake_cli = tmp_path / "fake-claude"
fake_cli.write_text("#!/bin/sh\necho fake\n")
fake_cli.chmod(0o755)
monkeypatch.delenv("CHAT_CLAUDE_AGENT_CLI_PATH", raising=False)
monkeypatch.setenv("CLAUDE_AGENT_CLI_PATH", str(fake_cli))
resolved = _resolve_cli_path()
assert resolved == fake_cli
def test_honours_chat_prefixed_env_var_when_file_exists(
self, tmp_path, monkeypatch
):
"""The Pydantic ``CHAT_`` prefix variant is also honoured.
Mirrors ``ChatConfig.get_claude_agent_cli_path`` which accepts
either ``CHAT_CLAUDE_AGENT_CLI_PATH`` (prefix applied by
``pydantic_settings``) or the unprefixed ``CLAUDE_AGENT_CLI_PATH``
form documented in the PR and field docstring.
"""
fake_cli = tmp_path / "fake-claude-prefixed"
fake_cli.write_text("#!/bin/sh\necho fake\n")
fake_cli.chmod(0o755)
monkeypatch.delenv("CLAUDE_AGENT_CLI_PATH", raising=False)
monkeypatch.setenv("CHAT_CLAUDE_AGENT_CLI_PATH", str(fake_cli))
resolved = _resolve_cli_path()
assert resolved == fake_cli
def test_returns_none_when_env_var_points_to_missing_file(self, monkeypatch):
monkeypatch.delenv("CHAT_CLAUDE_AGENT_CLI_PATH", raising=False)
monkeypatch.setenv("CLAUDE_AGENT_CLI_PATH", "/nonexistent/path/to/claude")
# Should fall through to the bundled binary OR return None,
# but never raise.
resolved = _resolve_cli_path()
# We can't assert exact value (depends on whether the bundled
# CLI is installed in the test env) but the function must not
# raise — the caller is supposed to handle None gracefully.
assert resolved is None or resolved.is_file()
def test_falls_back_to_bundled_when_env_var_unset(self, monkeypatch):
monkeypatch.delenv("CLAUDE_AGENT_CLI_PATH", raising=False)
monkeypatch.delenv("CHAT_CLAUDE_AGENT_CLI_PATH", raising=False)
# Same caveat as above — returns the bundled path or None,
# depending on what's installed in the test env.
resolved = _resolve_cli_path()
assert resolved is None or resolved.is_file()

View File

@@ -96,5 +96,8 @@ def build_sdk_env(
env["CLAUDE_CODE_DISABLE_CLAUDE_MDS"] = "1"
env["CLAUDE_CODE_DISABLE_AUTO_MEMORY"] = "1"
env["CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC"] = "1"
# Strip Anthropic-specific beta headers (e.g. context-management-2025-06-27)
# that OpenRouter rejects. Safe for all modes — direct Anthropic ignores it.
env["CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS"] = "1"
return env

View File

@@ -44,6 +44,7 @@ class TestBuildSdkEnvSubscription:
assert result["ANTHROPIC_API_KEY"] == ""
assert result["ANTHROPIC_AUTH_TOKEN"] == ""
assert result["ANTHROPIC_BASE_URL"] == ""
assert result.get("CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS") == "1"
mock_validate.assert_called_once()
@patch(
@@ -78,6 +79,7 @@ class TestBuildSdkEnvDirectAnthropic:
assert "ANTHROPIC_API_KEY" not in result
assert "ANTHROPIC_AUTH_TOKEN" not in result
assert "ANTHROPIC_BASE_URL" not in result
assert result.get("CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS") == "1"
def test_no_anthropic_key_overrides_when_openrouter_flag_true_but_no_key(self):
"""OpenRouter flag is True but no api_key => openrouter_active is False."""
@@ -93,6 +95,7 @@ class TestBuildSdkEnvDirectAnthropic:
assert "ANTHROPIC_API_KEY" not in result
assert "ANTHROPIC_AUTH_TOKEN" not in result
assert "ANTHROPIC_BASE_URL" not in result
assert result.get("CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS") == "1"
# ---------------------------------------------------------------------------
@@ -123,6 +126,8 @@ class TestBuildSdkEnvOpenRouter:
assert result["ANTHROPIC_AUTH_TOKEN"] == "sk-or-test-key"
assert result["ANTHROPIC_API_KEY"] == ""
assert "ANTHROPIC_CUSTOM_HEADERS" not in result
# OpenRouter compat: env var must always be present
assert result.get("CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS") == "1"
def test_strips_trailing_v1(self):
"""The /v1 suffix is stripped from the base URL."""
@@ -133,6 +138,7 @@ class TestBuildSdkEnvOpenRouter:
result = build_sdk_env()
assert result["ANTHROPIC_BASE_URL"] == "https://openrouter.ai/api"
assert result.get("CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS") == "1"
def test_strips_trailing_v1_and_slash(self):
"""Trailing slash before /v1 strip is handled."""
@@ -144,6 +150,7 @@ class TestBuildSdkEnvOpenRouter:
# rstrip("/") first, then remove /v1
assert result["ANTHROPIC_BASE_URL"] == "https://openrouter.ai/api"
assert result.get("CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS") == "1"
def test_no_v1_suffix_left_alone(self):
"""A base URL without /v1 is used as-is."""
@@ -154,6 +161,7 @@ class TestBuildSdkEnvOpenRouter:
result = build_sdk_env()
assert result["ANTHROPIC_BASE_URL"] == "https://custom-proxy.example.com"
assert result.get("CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS") == "1"
def test_session_id_header(self):
cfg = self._openrouter_config()

View File

@@ -203,11 +203,15 @@ class TestConfigDefaults:
def test_max_turns_default(self):
cfg = _make_config()
assert cfg.claude_agent_max_turns == 1000
assert cfg.claude_agent_max_turns == 50
def test_max_budget_usd_default(self):
cfg = _make_config()
assert cfg.claude_agent_max_budget_usd == 100.0
assert cfg.claude_agent_max_budget_usd == 5.0
def test_max_thinking_tokens_default(self):
cfg = _make_config()
assert cfg.claude_agent_max_thinking_tokens == 8192
def test_max_transient_retries_default(self):
cfg = _make_config()

View File

@@ -196,3 +196,93 @@ def test_sdk_exports_hook_event_type(hook_event: str):
# HookEvent is a Literal type — check that our events are valid values.
# We can't easily inspect Literal at runtime, so just verify the type exists.
assert HookEvent is not None
# ---------------------------------------------------------------------------
# OpenRouter compatibility — bundled CLI version pin
# ---------------------------------------------------------------------------
#
# Newer ``claude-agent-sdk`` versions bundle CLI binaries that send
# features incompatible with OpenRouter (``tool_reference`` content
# blocks, ``context-management-2025-06-27`` beta). We neutralise these
# at runtime by injecting ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1``
# into the CLI subprocess env (see ``build_sdk_env()`` in ``env.py``).
#
# This test is the cheapest possible regression guard: it pins the
# bundled CLI to a known-good version. If anyone bumps
# ``claude-agent-sdk`` in ``pyproject.toml``, the bundled CLI version in
# ``_cli_version.py`` will change and this test will fail with a clear
# message that points the next person at the OpenRouter compat issue
# instead of letting them silently re-break production.
# CLI versions bisect-verified as OpenRouter-safe. 2.1.63 and 2.1.70 pre-date
# the context-management beta regression and work without any env var. 2.1.97+
# requires ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`` (injected by
# ``build_sdk_env()`` in ``env.py``) to strip the beta header.
_KNOWN_GOOD_BUNDLED_CLI_VERSIONS: frozenset[str] = frozenset(
{
"2.1.63", # claude-agent-sdk 0.1.45 -- original pin from PR #12294.
"2.1.70", # claude-agent-sdk 0.1.47 -- first version with the
# tool_reference proxy detection fix; bisect-verified
# OpenRouter-safe in #12742.
"2.1.97", # claude-agent-sdk 0.1.58 -- OpenRouter-safe only with
# CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 (injected by
# build_sdk_env() in env.py).
}
)
def test_bundled_cli_version_is_known_good_against_openrouter():
"""Pin the bundled CLI version so accidental SDK bumps cause a loud,
fast failure with a pointer to the OpenRouter compatibility issue.
"""
from claude_agent_sdk._cli_version import __cli_version__
assert __cli_version__ in _KNOWN_GOOD_BUNDLED_CLI_VERSIONS, (
f"Bundled Claude Code CLI version is {__cli_version__!r}, which is "
f"not in the OpenRouter-known-good set "
f"({sorted(_KNOWN_GOOD_BUNDLED_CLI_VERSIONS)!r}). "
"If you intentionally bumped `claude-agent-sdk`, verify the new "
"bundled CLI works with OpenRouter against the reproduction test "
"in `cli_openrouter_compat_test.py` (with "
"`CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`), then add the new "
"CLI version to `_KNOWN_GOOD_BUNDLED_CLI_VERSIONS`. If the env "
"var is not sufficient, set `claude_agent_cli_path` to a "
"known-good binary instead. See "
"https://github.com/anthropics/claude-agent-sdk-python/issues/789 "
"and https://github.com/Significant-Gravitas/AutoGPT/pull/12294."
)
def test_sdk_exposes_cli_path_option():
"""Sanity-check that the SDK still exposes the `cli_path` option we use
for the OpenRouter workaround. If upstream removes it we need to know."""
import inspect
from claude_agent_sdk import ClaudeAgentOptions
sig = inspect.signature(ClaudeAgentOptions)
assert "cli_path" in sig.parameters, (
"ClaudeAgentOptions no longer accepts `cli_path` — our "
"claude_agent_cli_path config override would be silently ignored. "
"Either find an alternative override mechanism or pin the SDK to a "
"version that still exposes it."
)
def test_sdk_exposes_max_thinking_tokens_option():
"""Sanity-check that the SDK still exposes the `max_thinking_tokens` option
we use to cap extended thinking cost. If upstream removes or renames it
the cap will be silently ignored and Opus thinking tokens will be unbounded."""
import inspect
from claude_agent_sdk import ClaudeAgentOptions
sig = inspect.signature(ClaudeAgentOptions)
assert "max_thinking_tokens" in sig.parameters, (
"ClaudeAgentOptions no longer accepts `max_thinking_tokens` — our "
"claude_agent_max_thinking_tokens cost cap would be silently ignored, "
"allowing Opus extended thinking to generate unbounded tokens at $75/M. "
"Find the correct parameter name in the new SDK version and update "
"ChatConfig.claude_agent_max_thinking_tokens and service.py accordingly."
)

View File

@@ -2238,13 +2238,24 @@ async def stream_chat_completion_sdk(
"max_turns": config.claude_agent_max_turns,
# max_budget_usd: per-query spend ceiling enforced by the CLI.
"max_budget_usd": config.claude_agent_max_budget_usd,
# max_thinking_tokens: cap extended thinking output per LLM call.
# Thinking tokens are billed at output rate ($75/M for Opus) and
# account for ~54% of total cost. 8192 is the default.
"max_thinking_tokens": config.claude_agent_max_thinking_tokens,
}
if sdk_model:
sdk_options_kwargs["model"] = sdk_model
if sdk_env:
sdk_options_kwargs["env"] = sdk_env
if use_resume and resume_file:
sdk_options_kwargs["resume"] = resume_file
# Optional explicit Claude Code CLI binary path (decouples the
# bundled SDK version from the CLI version we run — needed because
# the CLI bundled in 0.1.46+ is broken against OpenRouter). Falls
# back to the bundled binary when unset.
if config.claude_agent_cli_path:
sdk_options_kwargs["cli_path"] = config.claude_agent_cli_path
options = ClaudeAgentOptions(**sdk_options_kwargs) # type: ignore[arg-type] # dynamic kwargs

View File

@@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.
# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand.
[[package]]
name = "agentmail"
@@ -909,17 +909,18 @@ files = [
[[package]]
name = "claude-agent-sdk"
version = "0.1.45"
version = "0.1.58"
description = "Python SDK for Claude Code"
optional = false
python-versions = ">=3.10"
groups = ["main"]
files = [
{file = "claude_agent_sdk-0.1.45-py3-none-macosx_11_0_arm64.whl", hash = "sha256:26a5cc60c3a394f5b814f6b2f67650819cbcd38c405bbdc11582b3e097b3a770"},
{file = "claude_agent_sdk-0.1.45-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:decc741b53e0b2c10a64fd84c15acca1102077d9f99941c54905172cd95160c9"},
{file = "claude_agent_sdk-0.1.45-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:7d48dcf4178c704e4ccbf3f1f4ebf20b3de3f03d0592086c1f3abd16b8ca441e"},
{file = "claude_agent_sdk-0.1.45-py3-none-win_amd64.whl", hash = "sha256:d1cf34995109c513d8daabcae7208edc260b553b53462a9ac06a7c40e240a288"},
{file = "claude_agent_sdk-0.1.45.tar.gz", hash = "sha256:97c1e981431b5af1e08c34731906ab8d4a58fe0774a04df0ea9587dcabc85151"},
{file = "claude_agent_sdk-0.1.58-py3-none-macosx_11_0_arm64.whl", hash = "sha256:69197950809754c4f06bba8261f2d99c3f9605b6cc1c13d3409d0eb82fb4ee64"},
{file = "claude_agent_sdk-0.1.58-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:75d60883fc5e2070bccd8d9b19505fe16af8e049120c03821e9dc8c826cca434"},
{file = "claude_agent_sdk-0.1.58-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:7bf4eb0f00ec944a7b63eb94788f120dfb0460c348a525235c7d6641805acc1d"},
{file = "claude_agent_sdk-0.1.58-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:650d298a3d3c0dcdde4b5f1dbf52f472ff0b0ec82987b27ffa2a4e0e72928408"},
{file = "claude_agent_sdk-0.1.58-py3-none-win_amd64.whl", hash = "sha256:2c2130a7ffe06ed4f88d56b217a5091c91c9bcb1a69cfd94d5dcf0d2946d8c55"},
{file = "claude_agent_sdk-0.1.58.tar.gz", hash = "sha256:77bee8fd60be033cb870def46c2ab1625a512fa8a3de4ff8d766664ffb16d6a6"},
]
[package.dependencies]
@@ -8928,4 +8929,4 @@ cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and pyt
[metadata]
lock-version = "2.1"
python-versions = ">=3.10,<3.14"
content-hash = "da61798b73758b9292fc1933268d488fbe739dc1fbf5c6586cd0c76a3411eb2e"
content-hash = "c4cc6a0a26869a167ce182b178224554135d89d8ffa4605257d17b3f495cdf59"

View File

@@ -18,7 +18,7 @@ apscheduler = "^3.11.1"
autogpt-libs = { path = "../autogpt_libs", develop = true }
bleach = { extras = ["css"], version = "^6.2.0" }
cachetools = "^5.5.0"
claude-agent-sdk = "0.1.45" # see copilot/sdk/sdk_compat_test.py for capability checks
claude-agent-sdk = "0.1.58" # latest stable; bundled CLI 2.1.97 -- CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 env var strips the broken context-management beta. See sdk_compat_test.py.
click = "^8.2.0"
cryptography = "^46.0"
discord-py = "^2.5.2"