mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-30 03:00:41 -04:00
Merge branch 'chore/sdk-dev-preview-0.1.58-with-proxy' of https://github.com/Significant-Gravitas/AutoGPT into preview/all-prs
This commit is contained in:
@@ -152,18 +152,31 @@ class ChatConfig(BaseSettings):
|
||||
"overloaded). The SDK automatically retries with this cheaper model.",
|
||||
)
|
||||
claude_agent_max_turns: int = Field(
|
||||
default=1000,
|
||||
default=50,
|
||||
ge=1,
|
||||
le=10000,
|
||||
description="Maximum number of agentic turns (tool-use loops) per query. "
|
||||
"Prevents runaway tool loops from burning budget.",
|
||||
"Prevents runaway tool loops from burning budget. "
|
||||
"Changed from 1000 to 50 in SDK 0.1.58 upgrade — override via "
|
||||
"CHAT_CLAUDE_AGENT_MAX_TURNS env var if your workflows need more.",
|
||||
)
|
||||
claude_agent_max_budget_usd: float = Field(
|
||||
default=100.0,
|
||||
default=5.0,
|
||||
ge=0.01,
|
||||
le=1000.0,
|
||||
description="Maximum spend in USD per SDK query. The CLI aborts the "
|
||||
"request if this budget is exceeded.",
|
||||
"request if this budget is exceeded. "
|
||||
"Changed from $100 to $5 in SDK 0.1.58 upgrade — override via "
|
||||
"CHAT_CLAUDE_AGENT_MAX_BUDGET_USD env var if needed.",
|
||||
)
|
||||
claude_agent_max_thinking_tokens: int = Field(
|
||||
default=8192,
|
||||
ge=1024,
|
||||
le=128000,
|
||||
description="Maximum thinking/reasoning tokens per LLM call. "
|
||||
"Extended thinking on Opus can generate 50k+ tokens at $75/M — "
|
||||
"capping this is the single biggest cost lever. "
|
||||
"8192 is sufficient for most tasks; increase for complex reasoning.",
|
||||
)
|
||||
claude_agent_max_transient_retries: int = Field(
|
||||
default=3,
|
||||
@@ -172,6 +185,20 @@ class ChatConfig(BaseSettings):
|
||||
description="Maximum number of retries for transient API errors "
|
||||
"(429, 5xx, ECONNRESET) before surfacing the error to the user.",
|
||||
)
|
||||
claude_agent_cli_path: str | None = Field(
|
||||
default=None,
|
||||
description="Optional explicit path to a Claude Code CLI binary. "
|
||||
"When set, the SDK uses this binary instead of the version bundled "
|
||||
"with the installed `claude-agent-sdk` package — letting us pin "
|
||||
"the Python SDK and the CLI independently. Critical for keeping "
|
||||
"OpenRouter compatibility while still picking up newer SDK API "
|
||||
"features (the bundled CLI version in 0.1.46+ is broken against "
|
||||
"OpenRouter — see PR #12294 and "
|
||||
"anthropics/claude-agent-sdk-python#789). Falls back to the "
|
||||
"bundled binary when unset. Reads from `CHAT_CLAUDE_AGENT_CLI_PATH` "
|
||||
"or the unprefixed `CLAUDE_AGENT_CLI_PATH` environment variable "
|
||||
"(same pattern as `api_key` / `base_url`).",
|
||||
)
|
||||
use_openrouter: bool = Field(
|
||||
default=True,
|
||||
description="Enable routing API calls through the OpenRouter proxy. "
|
||||
@@ -294,6 +321,40 @@ class ChatConfig(BaseSettings):
|
||||
v = OPENROUTER_BASE_URL
|
||||
return v
|
||||
|
||||
@field_validator("claude_agent_cli_path", mode="before")
|
||||
@classmethod
|
||||
def get_claude_agent_cli_path(cls, v):
|
||||
"""Resolve the Claude Code CLI override path from environment.
|
||||
|
||||
Accepts either the Pydantic-prefixed ``CHAT_CLAUDE_AGENT_CLI_PATH``
|
||||
or the unprefixed ``CLAUDE_AGENT_CLI_PATH`` (matching the same
|
||||
fallback pattern used by ``api_key`` / ``base_url``). Keeping the
|
||||
unprefixed form working is important because the field is
|
||||
primarily an operator escape hatch set via container/host env,
|
||||
and the unprefixed name is what the PR description, the field
|
||||
docstrings, and the reproduction test in
|
||||
``cli_openrouter_compat_test.py`` refer to.
|
||||
"""
|
||||
if not v:
|
||||
v = os.getenv("CHAT_CLAUDE_AGENT_CLI_PATH")
|
||||
if not v:
|
||||
v = os.getenv("CLAUDE_AGENT_CLI_PATH")
|
||||
if v:
|
||||
if not os.path.exists(v):
|
||||
raise ValueError(
|
||||
f"claude_agent_cli_path '{v}' does not exist. "
|
||||
"Check the path or unset CLAUDE_AGENT_CLI_PATH to use "
|
||||
"the bundled CLI."
|
||||
)
|
||||
if not os.path.isfile(v):
|
||||
raise ValueError(f"claude_agent_cli_path '{v}' is not a regular file.")
|
||||
if not os.access(v, os.X_OK):
|
||||
raise ValueError(
|
||||
f"claude_agent_cli_path '{v}' exists but is not executable. "
|
||||
"Check file permissions."
|
||||
)
|
||||
return v
|
||||
|
||||
# Prompt paths for different contexts
|
||||
PROMPT_PATHS: dict[str, str] = {
|
||||
"default": "prompts/chat_system.md",
|
||||
|
||||
@@ -17,6 +17,8 @@ _ENV_VARS_TO_CLEAR = (
|
||||
"CHAT_BASE_URL",
|
||||
"OPENROUTER_BASE_URL",
|
||||
"OPENAI_BASE_URL",
|
||||
"CHAT_CLAUDE_AGENT_CLI_PATH",
|
||||
"CLAUDE_AGENT_CLI_PATH",
|
||||
)
|
||||
|
||||
|
||||
@@ -87,3 +89,78 @@ class TestE2BActive:
|
||||
"""e2b_active is False when use_e2b_sandbox=False regardless of key."""
|
||||
cfg = ChatConfig(use_e2b_sandbox=False, e2b_api_key="test-key")
|
||||
assert cfg.e2b_active is False
|
||||
|
||||
|
||||
class TestClaudeAgentCliPathEnvFallback:
|
||||
"""``claude_agent_cli_path`` accepts both the Pydantic-prefixed
|
||||
``CHAT_CLAUDE_AGENT_CLI_PATH`` env var and the unprefixed
|
||||
``CLAUDE_AGENT_CLI_PATH`` form (mirrors ``api_key`` / ``base_url``).
|
||||
"""
|
||||
|
||||
def test_prefixed_env_var_is_picked_up(
|
||||
self, monkeypatch: pytest.MonkeyPatch, tmp_path
|
||||
) -> None:
|
||||
fake_cli = tmp_path / "fake-claude"
|
||||
fake_cli.write_text("#!/bin/sh\n")
|
||||
fake_cli.chmod(0o755)
|
||||
monkeypatch.setenv("CHAT_CLAUDE_AGENT_CLI_PATH", str(fake_cli))
|
||||
cfg = ChatConfig()
|
||||
assert cfg.claude_agent_cli_path == str(fake_cli)
|
||||
|
||||
def test_unprefixed_env_var_is_picked_up(
|
||||
self, monkeypatch: pytest.MonkeyPatch, tmp_path
|
||||
) -> None:
|
||||
fake_cli = tmp_path / "fake-claude"
|
||||
fake_cli.write_text("#!/bin/sh\n")
|
||||
fake_cli.chmod(0o755)
|
||||
monkeypatch.setenv("CLAUDE_AGENT_CLI_PATH", str(fake_cli))
|
||||
cfg = ChatConfig()
|
||||
assert cfg.claude_agent_cli_path == str(fake_cli)
|
||||
|
||||
def test_prefixed_wins_over_unprefixed(
|
||||
self, monkeypatch: pytest.MonkeyPatch, tmp_path
|
||||
) -> None:
|
||||
prefixed_cli = tmp_path / "fake-claude-prefixed"
|
||||
prefixed_cli.write_text("#!/bin/sh\n")
|
||||
prefixed_cli.chmod(0o755)
|
||||
unprefixed_cli = tmp_path / "fake-claude-unprefixed"
|
||||
unprefixed_cli.write_text("#!/bin/sh\n")
|
||||
unprefixed_cli.chmod(0o755)
|
||||
monkeypatch.setenv("CHAT_CLAUDE_AGENT_CLI_PATH", str(prefixed_cli))
|
||||
monkeypatch.setenv("CLAUDE_AGENT_CLI_PATH", str(unprefixed_cli))
|
||||
cfg = ChatConfig()
|
||||
assert cfg.claude_agent_cli_path == str(prefixed_cli)
|
||||
|
||||
def test_no_env_var_defaults_to_none(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
cfg = ChatConfig()
|
||||
assert cfg.claude_agent_cli_path is None
|
||||
|
||||
def test_nonexistent_path_raises_validation_error(
|
||||
self, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""Non-existent CLI path must be rejected at config time, not at
|
||||
runtime when subprocess.run fails with an opaque OS error."""
|
||||
monkeypatch.setenv(
|
||||
"CLAUDE_AGENT_CLI_PATH", "/opt/nonexistent/claude-cli-binary"
|
||||
)
|
||||
with pytest.raises(Exception, match="does not exist"):
|
||||
ChatConfig()
|
||||
|
||||
def test_non_executable_path_raises_validation_error(
|
||||
self, monkeypatch: pytest.MonkeyPatch, tmp_path
|
||||
) -> None:
|
||||
"""Path that exists but is not executable must be rejected."""
|
||||
non_exec = tmp_path / "claude-not-executable"
|
||||
non_exec.write_text("#!/bin/sh\n")
|
||||
non_exec.chmod(0o644) # readable but not executable
|
||||
monkeypatch.setenv("CLAUDE_AGENT_CLI_PATH", str(non_exec))
|
||||
with pytest.raises(Exception, match="not executable"):
|
||||
ChatConfig()
|
||||
|
||||
def test_directory_path_raises_validation_error(
|
||||
self, monkeypatch: pytest.MonkeyPatch, tmp_path
|
||||
) -> None:
|
||||
"""Path pointing to a directory must be rejected."""
|
||||
monkeypatch.setenv("CLAUDE_AGENT_CLI_PATH", str(tmp_path))
|
||||
with pytest.raises(Exception, match="not a regular file"):
|
||||
ChatConfig()
|
||||
|
||||
@@ -174,13 +174,23 @@ class CoPilotProcessor:
|
||||
logger.info(f"[CoPilotExecutor] Worker {self.tid} started")
|
||||
|
||||
def _prewarm_cli(self) -> None:
|
||||
"""Run the bundled CLI binary once to warm OS page caches."""
|
||||
try:
|
||||
from claude_agent_sdk._internal.transport.subprocess_cli import (
|
||||
SubprocessCLITransport,
|
||||
)
|
||||
"""Run the Claude Code CLI binary once to warm OS page caches.
|
||||
|
||||
cli_path = SubprocessCLITransport._find_bundled_cli(None) # type: ignore[arg-type]
|
||||
Honours the ``claude_agent_cli_path`` config override (which lets
|
||||
us run a pinned CLI version independent of the bundled one in the
|
||||
installed ``claude-agent-sdk`` wheel — see
|
||||
``ChatConfig.claude_agent_cli_path`` for the rationale). Falls
|
||||
back to the bundled binary when no override is set.
|
||||
"""
|
||||
try:
|
||||
cfg = ChatConfig()
|
||||
cli_path: str | None = cfg.claude_agent_cli_path
|
||||
if not cli_path:
|
||||
from claude_agent_sdk._internal.transport.subprocess_cli import (
|
||||
SubprocessCLITransport,
|
||||
)
|
||||
|
||||
cli_path = SubprocessCLITransport._find_bundled_cli(None) # type: ignore[arg-type]
|
||||
if cli_path:
|
||||
result = subprocess.run(
|
||||
[cli_path, "-v"],
|
||||
|
||||
@@ -0,0 +1,639 @@
|
||||
"""Reproduction test for the OpenRouter incompatibility in newer
|
||||
``claude-agent-sdk`` / Claude Code CLI versions.
|
||||
|
||||
Background — there are two stacked regressions that block us from
|
||||
upgrading the ``claude-agent-sdk`` package above ``0.1.45``:
|
||||
|
||||
1. **`tool_reference` content blocks** introduced by CLI ``2.1.69`` (=
|
||||
SDK ``0.1.46``). The CLI's built-in ``ToolSearch`` tool returns
|
||||
``{"type": "tool_reference", "tool_name": "..."}`` content blocks in
|
||||
``tool_result.content``. OpenRouter's stricter Zod validation
|
||||
rejects this with::
|
||||
|
||||
messages[N].content[0].content: Invalid input: expected string, received array
|
||||
|
||||
This is the regression that originally pinned us at 0.1.45 — see
|
||||
https://github.com/Significant-Gravitas/AutoGPT/pull/12294 for the
|
||||
full forensic write-up. CLI 2.1.70 added proxy detection that
|
||||
*should* disable the offending blocks when ``ANTHROPIC_BASE_URL`` is
|
||||
set, but our subsequent attempts at 0.1.55 / 0.1.56 still failed.
|
||||
|
||||
2. **`context-management-2025-06-27` beta header** — some CLI version
|
||||
after ``2.1.91`` started injecting this header / beta flag, which
|
||||
OpenRouter rejects with::
|
||||
|
||||
400 No endpoints available that support Anthropic's context
|
||||
management features (context-management-2025-06-27). Context
|
||||
management requires a supported provider (Anthropic).
|
||||
|
||||
Tracked upstream at
|
||||
https://github.com/anthropics/claude-agent-sdk-python/issues/789.
|
||||
Still open at the time of writing, no upstream PR linked, no
|
||||
workaround documented.
|
||||
|
||||
The purpose of this test:
|
||||
* Spin up a tiny in-process HTTP server that pretends to be the
|
||||
Anthropic Messages API.
|
||||
* Capture every request body the CLI sends.
|
||||
* Inspect the captured bodies for the two forbidden patterns above.
|
||||
* Fail loudly if either is present, with a pointer to the issue
|
||||
tracker.
|
||||
|
||||
This is the reproduction we use as a CI gate when bisecting which SDK /
|
||||
CLI version is safe to upgrade to. It runs against the bundled CLI by
|
||||
default (or against ``ChatConfig.claude_agent_cli_path`` when set), so
|
||||
it doubles as a regression guard for the ``cli_path`` override
|
||||
mechanism.
|
||||
|
||||
The test does **not** need an OpenRouter API key — it reproduces the
|
||||
mechanism (forbidden content blocks / headers in the *outgoing*
|
||||
request) rather than the symptom (the 400 OpenRouter would return).
|
||||
This keeps it deterministic, free, and CI-runnable without secrets.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
from aiohttp import web
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Forbidden patterns we scan for in captured request bodies
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Substring of the `tool_reference` content block that breaks OpenRouter's
|
||||
# Beta string OpenRouter rejects in upstream issue #789. Can appear in
|
||||
# either `betas` arrays or the `anthropic-beta` header value.
|
||||
_FORBIDDEN_CONTEXT_MANAGEMENT_BETA = "context-management-2025-06-27"
|
||||
|
||||
|
||||
def _body_contains_tool_reference_block(body_text: str) -> bool:
|
||||
"""Return True if *body_text* contains a ``tool_reference`` content
|
||||
block anywhere in its structure.
|
||||
|
||||
We parse the JSON and walk it rather than relying on substring
|
||||
matches because the CLI is free to emit either ``{"type": "tool_reference"}``
|
||||
(with spaces) or the compact ``{"type":"tool_reference"}`` form,
|
||||
and we must catch both. Falls back to a whitespace-tolerant
|
||||
regex when the body isn't valid JSON — the Messages API always
|
||||
sends JSON, but the fallback keeps the detector honest on
|
||||
malformed / partial bodies a fuzzer might produce.
|
||||
"""
|
||||
try:
|
||||
payload = json.loads(body_text)
|
||||
except (ValueError, TypeError):
|
||||
# Whitespace-tolerant fallback: allow any whitespace between
|
||||
# the key, colon, and value quoted string.
|
||||
return bool(re.search(r'"type"\s*:\s*"tool_reference"', body_text))
|
||||
|
||||
def _walk(node: Any) -> bool:
|
||||
if isinstance(node, dict):
|
||||
if node.get("type") == "tool_reference":
|
||||
return True
|
||||
return any(_walk(v) for v in node.values())
|
||||
if isinstance(node, list):
|
||||
return any(_walk(v) for v in node)
|
||||
return False
|
||||
|
||||
return _walk(payload)
|
||||
|
||||
|
||||
def _scan_request_for_forbidden_patterns(
|
||||
body_text: str,
|
||||
headers: dict[str, str],
|
||||
) -> list[str]:
|
||||
"""Return a list of forbidden patterns found in *body_text* / *headers*.
|
||||
|
||||
Empty list = clean request. Non-empty = the CLI is sending one of the
|
||||
OpenRouter-incompatible features.
|
||||
"""
|
||||
findings: list[str] = []
|
||||
if _body_contains_tool_reference_block(body_text):
|
||||
findings.append(
|
||||
"`tool_reference` content block in request body — "
|
||||
"PR #12294 / CLI 2.1.69 regression"
|
||||
)
|
||||
if _FORBIDDEN_CONTEXT_MANAGEMENT_BETA in body_text:
|
||||
findings.append(
|
||||
f"{_FORBIDDEN_CONTEXT_MANAGEMENT_BETA!r} in request body — "
|
||||
"anthropics/claude-agent-sdk-python#789"
|
||||
)
|
||||
# Header values are case-insensitive in HTTP — aiohttp normalises
|
||||
# incoming names but values are stored as-is.
|
||||
for header_name, header_value in headers.items():
|
||||
if header_name.lower() == "anthropic-beta":
|
||||
if _FORBIDDEN_CONTEXT_MANAGEMENT_BETA in header_value:
|
||||
findings.append(
|
||||
f"{_FORBIDDEN_CONTEXT_MANAGEMENT_BETA!r} in "
|
||||
"`anthropic-beta` header — issue #789"
|
||||
)
|
||||
return findings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fake Anthropic Messages API
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# We need to give the CLI a *successful* response so it doesn't error out
|
||||
# before we get a chance to inspect the request. The minimal thing the
|
||||
# CLI accepts is a streamed (SSE) message-start → content-block-delta →
|
||||
# message-stop sequence.
|
||||
#
|
||||
# We don't strictly *need* the CLI to accept the response — we already
|
||||
# have the request body by the time we send any reply — but giving it a
|
||||
# valid stream means the assertion failure (if any) is the *only*
|
||||
# failure mode in the test, not "CLI exited 1 because we sent garbage".
|
||||
|
||||
|
||||
def _build_streaming_message_response() -> str:
|
||||
"""Return an SSE-formatted body containing a minimal Anthropic
|
||||
Messages API streamed response.
|
||||
|
||||
This is the smallest stream that the Claude Code CLI will accept
|
||||
end-to-end without errors. Each line is one SSE event."""
|
||||
events: list[dict[str, Any]] = [
|
||||
{
|
||||
"type": "message_start",
|
||||
"message": {
|
||||
"id": "msg_test",
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [],
|
||||
"model": "claude-test",
|
||||
"stop_reason": None,
|
||||
"stop_sequence": None,
|
||||
"usage": {"input_tokens": 1, "output_tokens": 1},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "content_block_start",
|
||||
"index": 0,
|
||||
"content_block": {"type": "text", "text": ""},
|
||||
},
|
||||
{
|
||||
"type": "content_block_delta",
|
||||
"index": 0,
|
||||
"delta": {"type": "text_delta", "text": "ok"},
|
||||
},
|
||||
{"type": "content_block_stop", "index": 0},
|
||||
{
|
||||
"type": "message_delta",
|
||||
"delta": {"stop_reason": "end_turn", "stop_sequence": None},
|
||||
"usage": {"output_tokens": 1},
|
||||
},
|
||||
{"type": "message_stop"},
|
||||
]
|
||||
return "".join(
|
||||
f"event: {evt['type']}\ndata: {json.dumps(evt)}\n\n" for evt in events
|
||||
)
|
||||
|
||||
|
||||
class _CapturedRequest:
|
||||
"""One request the fake server received."""
|
||||
|
||||
def __init__(self, path: str, headers: dict[str, str], body: str) -> None:
|
||||
self.path = path
|
||||
self.headers = headers
|
||||
self.body = body
|
||||
|
||||
|
||||
async def _start_fake_anthropic_server(
|
||||
captured: list[_CapturedRequest],
|
||||
) -> tuple[web.AppRunner, int]:
|
||||
"""Start an aiohttp server pretending to be the Anthropic API.
|
||||
|
||||
All POSTs to ``/v1/messages`` are recorded into *captured* and
|
||||
answered with a valid streaming response. Returns ``(runner, port)``
|
||||
so the caller can ``await runner.cleanup()`` when finished.
|
||||
"""
|
||||
|
||||
async def messages_handler(request: web.Request) -> web.StreamResponse:
|
||||
body = await request.text()
|
||||
captured.append(
|
||||
_CapturedRequest(
|
||||
path=request.path,
|
||||
headers={k: v for k, v in request.headers.items()},
|
||||
body=body,
|
||||
)
|
||||
)
|
||||
# Stream a minimal valid response so the CLI doesn't error out
|
||||
# before we can inspect what it sent.
|
||||
response = web.StreamResponse(
|
||||
status=200,
|
||||
headers={
|
||||
"Content-Type": "text/event-stream",
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
},
|
||||
)
|
||||
await response.prepare(request)
|
||||
await response.write(_build_streaming_message_response().encode("utf-8"))
|
||||
await response.write_eof()
|
||||
return response
|
||||
|
||||
app = web.Application()
|
||||
app.router.add_post("/v1/messages", messages_handler)
|
||||
# OAuth/profile endpoints the CLI may probe — answer 404 so it falls
|
||||
# through quickly without retrying.
|
||||
app.router.add_route("*", "/{tail:.*}", lambda _r: web.Response(status=404))
|
||||
|
||||
runner = web.AppRunner(app)
|
||||
await runner.setup()
|
||||
site = web.TCPSite(runner, "127.0.0.1", 0)
|
||||
await site.start()
|
||||
|
||||
server = site._server
|
||||
assert server is not None
|
||||
sockets = getattr(server, "sockets", None)
|
||||
assert sockets is not None
|
||||
port: int = sockets[0].getsockname()[1]
|
||||
return runner, port
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI invocation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _resolve_cli_path() -> Path | None:
|
||||
"""Return the Claude Code CLI binary the SDK would use.
|
||||
|
||||
Honours the same override mechanism as ``service.py`` /
|
||||
``ChatConfig.claude_agent_cli_path``: checks either the Pydantic-
|
||||
prefixed ``CHAT_CLAUDE_AGENT_CLI_PATH`` or the unprefixed
|
||||
``CLAUDE_AGENT_CLI_PATH`` env var first, then falls back to the
|
||||
bundled binary that ships with the installed ``claude-agent-sdk``
|
||||
wheel. The two env var names are accepted at the config layer via
|
||||
``ChatConfig.get_claude_agent_cli_path`` and mirrored here so the
|
||||
reproduction test picks up the same override regardless of which
|
||||
form an operator sets.
|
||||
"""
|
||||
override = os.environ.get("CHAT_CLAUDE_AGENT_CLI_PATH") or os.environ.get(
|
||||
"CLAUDE_AGENT_CLI_PATH"
|
||||
)
|
||||
if override:
|
||||
candidate = Path(override)
|
||||
return candidate if candidate.is_file() else None
|
||||
|
||||
try:
|
||||
from typing import cast
|
||||
|
||||
from claude_agent_sdk._internal.transport.subprocess_cli import (
|
||||
SubprocessCLITransport,
|
||||
)
|
||||
|
||||
bundled = cast(str, SubprocessCLITransport._find_bundled_cli(None))
|
||||
return Path(bundled) if bundled else None
|
||||
except (ImportError, AttributeError) as e: # pragma: no cover - import-time guard
|
||||
logger.warning("Could not locate bundled Claude CLI: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
async def _run_cli_against_fake_server(
|
||||
cli_path: Path,
|
||||
fake_server_port: int,
|
||||
timeout_seconds: float,
|
||||
extra_env: dict[str, str] | None = None,
|
||||
) -> tuple[int, str, str]:
|
||||
"""Spawn the CLI pointed at the fake Anthropic server and feed it a
|
||||
single ``user`` message via stream-json on stdin.
|
||||
|
||||
Returns ``(returncode, stdout, stderr)``. The return code is not
|
||||
asserted by the test — we only care that the CLI made at least one
|
||||
POST to ``/v1/messages`` so the fake server captured the body.
|
||||
"""
|
||||
fake_url = f"http://127.0.0.1:{fake_server_port}"
|
||||
env = {
|
||||
# Inherit basic shell variables so the CLI can find its tools,
|
||||
# but force network/auth at our fake endpoint.
|
||||
**os.environ,
|
||||
"ANTHROPIC_BASE_URL": fake_url,
|
||||
"ANTHROPIC_API_KEY": "sk-test-fake-key-not-real",
|
||||
# Disable any features that would phone home to a different host
|
||||
# mid-test (telemetry, plugin marketplace fetch).
|
||||
"DISABLE_TELEMETRY": "1",
|
||||
"CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1",
|
||||
**(extra_env or {}),
|
||||
}
|
||||
|
||||
# The CLI accepts stream-json input on stdin in `query` mode. A
|
||||
# minimal user-message envelope is enough to trigger an API call.
|
||||
stdin_payload = (
|
||||
json.dumps(
|
||||
{
|
||||
"type": "user",
|
||||
"message": {"role": "user", "content": "hello"},
|
||||
}
|
||||
)
|
||||
+ "\n"
|
||||
)
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
str(cli_path),
|
||||
"--output-format",
|
||||
"stream-json",
|
||||
"--input-format",
|
||||
"stream-json",
|
||||
"--verbose",
|
||||
"--print",
|
||||
stdin=asyncio.subprocess.PIPE,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
env=env,
|
||||
)
|
||||
try:
|
||||
assert proc.stdin is not None
|
||||
proc.stdin.write(stdin_payload.encode("utf-8"))
|
||||
await proc.stdin.drain()
|
||||
proc.stdin.close()
|
||||
|
||||
stdout_bytes, stderr_bytes = await asyncio.wait_for(
|
||||
proc.communicate(), timeout=timeout_seconds
|
||||
)
|
||||
except (asyncio.TimeoutError, TimeoutError):
|
||||
# Best-effort kill — we already have whatever requests the CLI
|
||||
# managed to send before stalling.
|
||||
try:
|
||||
proc.kill()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
# Reap the process after kill() so we don't leave an unreaped
|
||||
# child behind until event-loop shutdown. Wait with its own
|
||||
# short timeout in case the kill was ineffective.
|
||||
try:
|
||||
stdout_bytes, stderr_bytes = await asyncio.wait_for(
|
||||
proc.communicate(), timeout=5.0
|
||||
)
|
||||
except (asyncio.TimeoutError, TimeoutError):
|
||||
stdout_bytes, stderr_bytes = b"", b""
|
||||
|
||||
return (
|
||||
proc.returncode if proc.returncode is not None else -1,
|
||||
stdout_bytes.decode("utf-8", errors="replace"),
|
||||
stderr_bytes.decode("utf-8", errors="replace"),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# The actual test
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def _run_reproduction(
|
||||
*,
|
||||
extra_env: dict[str, str] | None = None,
|
||||
) -> tuple[int, str, str, list[_CapturedRequest]]:
|
||||
"""Spawn the CLI against a fake Anthropic API and return what the
|
||||
server saw.
|
||||
"""
|
||||
cli_path = _resolve_cli_path()
|
||||
if cli_path is None or not cli_path.is_file():
|
||||
pytest.skip(
|
||||
"No Claude Code CLI binary available (neither bundled nor "
|
||||
"overridden via CLAUDE_AGENT_CLI_PATH / "
|
||||
"CHAT_CLAUDE_AGENT_CLI_PATH); cannot reproduce."
|
||||
)
|
||||
|
||||
captured: list[_CapturedRequest] = []
|
||||
upstream_runner, upstream_port = await _start_fake_anthropic_server(captured)
|
||||
|
||||
try:
|
||||
returncode, stdout, stderr = await _run_cli_against_fake_server(
|
||||
cli_path=cli_path,
|
||||
fake_server_port=upstream_port,
|
||||
timeout_seconds=30.0,
|
||||
extra_env=extra_env,
|
||||
)
|
||||
finally:
|
||||
await upstream_runner.cleanup()
|
||||
|
||||
return returncode, stdout, stderr, captured
|
||||
|
||||
|
||||
def _assert_no_forbidden_patterns(
|
||||
captured: list[_CapturedRequest], returncode: int, stderr: str
|
||||
) -> None:
|
||||
if not captured:
|
||||
pytest.skip(
|
||||
"Bundled CLI did not make any HTTP requests to the fake server "
|
||||
f"(rc={returncode}). The CLI may have failed before reaching "
|
||||
f"the network — stderr tail: {stderr[-500:]!r}. "
|
||||
"Nothing to assert; treating as inconclusive rather than "
|
||||
"either passing or failing."
|
||||
)
|
||||
|
||||
all_findings: list[str] = []
|
||||
for req in captured:
|
||||
findings = _scan_request_for_forbidden_patterns(req.body, req.headers)
|
||||
if findings:
|
||||
all_findings.extend(f"{req.path}: {finding}" for finding in findings)
|
||||
|
||||
assert not all_findings, (
|
||||
f"Bundled Claude Code CLI sent OpenRouter-incompatible features in "
|
||||
f"{len(all_findings)} request(s):\n - "
|
||||
+ "\n - ".join(all_findings)
|
||||
+ "\n\nThe bundled CLI is sending OpenRouter-incompatible features. "
|
||||
"See https://github.com/Significant-Gravitas/AutoGPT/pull/12294 and "
|
||||
"https://github.com/anthropics/claude-agent-sdk-python/issues/789. "
|
||||
"If you bumped `claude-agent-sdk`, verify the new bundled CLI works "
|
||||
"with `CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1` set (injected by "
|
||||
"``build_sdk_env()`` in ``env.py``), then add the CLI version to "
|
||||
"`_KNOWN_GOOD_BUNDLED_CLI_VERSIONS` in `sdk_compat_test.py`. "
|
||||
"Alternatively, pin a known-good binary via `claude_agent_cli_path` "
|
||||
"(env: `CLAUDE_AGENT_CLI_PATH` or `CHAT_CLAUDE_AGENT_CLI_PATH`)."
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.xfail(
|
||||
reason="CLI 2.1.97 (SDK 0.1.58) sends context-management beta without "
|
||||
"CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1. This is expected — the env "
|
||||
"var guard in test_disable_experimental_betas_env_var_strips_headers "
|
||||
"is the real regression test.",
|
||||
strict=True,
|
||||
)
|
||||
async def test_bare_cli_does_not_send_openrouter_incompatible_features():
|
||||
"""Bare CLI reproduction (no env var workaround).
|
||||
|
||||
Documents whether the bundled CLI sends OpenRouter-incompatible
|
||||
features without the CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS env var.
|
||||
On SDK 0.1.58 (CLI 2.1.97) this is expected to fail — the env var
|
||||
test above is the actual regression guard.
|
||||
"""
|
||||
returncode, _stdout, stderr, captured = await _run_reproduction()
|
||||
_assert_no_forbidden_patterns(captured, returncode, stderr)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_disable_experimental_betas_env_var_strips_headers():
|
||||
"""Validate that ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`` strips
|
||||
the ``context-management-2025-06-27`` beta header when
|
||||
``ANTHROPIC_BASE_URL`` points to a non-Anthropic endpoint (simulating
|
||||
OpenRouter).
|
||||
|
||||
This is the main regression guard: the env var is injected by
|
||||
``build_sdk_env()`` in ``env.py`` into every CLI subprocess so newer
|
||||
SDK / CLI versions work with OpenRouter without any proxy.
|
||||
"""
|
||||
returncode, _stdout, stderr, captured = await _run_reproduction(
|
||||
extra_env={"CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS": "1"},
|
||||
)
|
||||
_assert_no_forbidden_patterns(captured, returncode, stderr)
|
||||
|
||||
|
||||
def test_subprocess_module_available():
|
||||
"""Sentinel test: the subprocess module must be importable so the
|
||||
main reproduction test can spawn the CLI. Catches sandboxed CI
|
||||
runners that block subprocess execution before the slow test runs."""
|
||||
assert subprocess.__name__ == "subprocess"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pure helper unit tests — pin the forbidden-pattern detection so any
|
||||
# future drift in the scanner is caught fast, even when the slow
|
||||
# end-to-end CLI subprocess test isn't runnable.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestScanRequestForForbiddenPatterns:
|
||||
def test_clean_body_returns_empty_findings(self):
|
||||
body = '{"model": "claude-opus-4.6", "messages": [{"role": "user", "content": "hi"}]}'
|
||||
assert _scan_request_for_forbidden_patterns(body, {}) == []
|
||||
|
||||
def test_detects_tool_reference_in_body(self):
|
||||
body = (
|
||||
'{"messages": [{"role": "user", "content": ['
|
||||
'{"type": "tool_reference", "tool_name": "find"}'
|
||||
"]}]}"
|
||||
)
|
||||
findings = _scan_request_for_forbidden_patterns(body, {})
|
||||
assert len(findings) == 1
|
||||
assert "tool_reference" in findings[0]
|
||||
assert "PR #12294" in findings[0]
|
||||
|
||||
def test_detects_context_management_in_body(self):
|
||||
body = '{"betas": ["context-management-2025-06-27"]}'
|
||||
findings = _scan_request_for_forbidden_patterns(body, {})
|
||||
assert len(findings) == 1
|
||||
assert "context-management-2025-06-27" in findings[0]
|
||||
assert "#789" in findings[0]
|
||||
|
||||
def test_detects_context_management_in_anthropic_beta_header(self):
|
||||
findings = _scan_request_for_forbidden_patterns(
|
||||
body_text="{}",
|
||||
headers={"anthropic-beta": "context-management-2025-06-27"},
|
||||
)
|
||||
assert len(findings) == 1
|
||||
assert "anthropic-beta" in findings[0]
|
||||
|
||||
def test_detects_context_management_in_uppercase_header_name(self):
|
||||
# HTTP header names are case-insensitive — make sure the
|
||||
# scanner handles a server that didn't normalise names.
|
||||
findings = _scan_request_for_forbidden_patterns(
|
||||
body_text="{}",
|
||||
headers={"Anthropic-Beta": "context-management-2025-06-27, other"},
|
||||
)
|
||||
assert len(findings) == 1
|
||||
|
||||
def test_ignores_unrelated_header_values(self):
|
||||
findings = _scan_request_for_forbidden_patterns(
|
||||
body_text="{}",
|
||||
headers={
|
||||
"authorization": "Bearer secret",
|
||||
"anthropic-beta": "fine-grained-tool-streaming-2025",
|
||||
},
|
||||
)
|
||||
assert findings == []
|
||||
|
||||
def test_detects_both_patterns_simultaneously(self):
|
||||
body = (
|
||||
'{"betas": ["context-management-2025-06-27"], '
|
||||
'"messages": [{"role": "user", "content": ['
|
||||
'{"type": "tool_reference", "tool_name": "find"}'
|
||||
"]}]}"
|
||||
)
|
||||
findings = _scan_request_for_forbidden_patterns(body, {})
|
||||
# Both patterns hit, in stable order: tool_reference then betas.
|
||||
assert len(findings) == 2
|
||||
assert "tool_reference" in findings[0]
|
||||
assert "context-management-2025-06-27" in findings[1]
|
||||
|
||||
def test_detects_compact_tool_reference_without_spaces(self):
|
||||
# Regression guard: the old substring matcher only caught the
|
||||
# prettified form '"type": "tool_reference"' with a space
|
||||
# between the key and the value, so a CLI emitting compact
|
||||
# JSON (e.g. via `json.dumps(separators=(",", ":"))`) could
|
||||
# slip past the scanner and false-pass. The JSON-walking
|
||||
# detector catches both forms.
|
||||
body = '{"messages":[{"role":"user","content":[{"type":"tool_reference","tool_name":"find"}]}]}'
|
||||
findings = _scan_request_for_forbidden_patterns(body, {})
|
||||
assert len(findings) == 1
|
||||
assert "tool_reference" in findings[0]
|
||||
|
||||
def test_detects_tool_reference_in_malformed_body_fallback(self):
|
||||
# When the body isn't valid JSON the helper falls back to a
|
||||
# whitespace-tolerant regex so fuzzed / partial payloads are
|
||||
# still caught.
|
||||
body = 'garbage-prefix{"type" : "tool_reference"} trailing'
|
||||
findings = _scan_request_for_forbidden_patterns(body, {})
|
||||
assert len(findings) == 1
|
||||
assert "tool_reference" in findings[0]
|
||||
|
||||
|
||||
class TestResolveCliPath:
|
||||
def test_honours_explicit_env_var_when_file_exists(self, tmp_path, monkeypatch):
|
||||
fake_cli = tmp_path / "fake-claude"
|
||||
fake_cli.write_text("#!/bin/sh\necho fake\n")
|
||||
fake_cli.chmod(0o755)
|
||||
monkeypatch.delenv("CHAT_CLAUDE_AGENT_CLI_PATH", raising=False)
|
||||
monkeypatch.setenv("CLAUDE_AGENT_CLI_PATH", str(fake_cli))
|
||||
resolved = _resolve_cli_path()
|
||||
assert resolved == fake_cli
|
||||
|
||||
def test_honours_chat_prefixed_env_var_when_file_exists(
|
||||
self, tmp_path, monkeypatch
|
||||
):
|
||||
"""The Pydantic ``CHAT_`` prefix variant is also honoured.
|
||||
|
||||
Mirrors ``ChatConfig.get_claude_agent_cli_path`` which accepts
|
||||
either ``CHAT_CLAUDE_AGENT_CLI_PATH`` (prefix applied by
|
||||
``pydantic_settings``) or the unprefixed ``CLAUDE_AGENT_CLI_PATH``
|
||||
form documented in the PR and field docstring.
|
||||
"""
|
||||
fake_cli = tmp_path / "fake-claude-prefixed"
|
||||
fake_cli.write_text("#!/bin/sh\necho fake\n")
|
||||
fake_cli.chmod(0o755)
|
||||
monkeypatch.delenv("CLAUDE_AGENT_CLI_PATH", raising=False)
|
||||
monkeypatch.setenv("CHAT_CLAUDE_AGENT_CLI_PATH", str(fake_cli))
|
||||
resolved = _resolve_cli_path()
|
||||
assert resolved == fake_cli
|
||||
|
||||
def test_returns_none_when_env_var_points_to_missing_file(self, monkeypatch):
|
||||
monkeypatch.delenv("CHAT_CLAUDE_AGENT_CLI_PATH", raising=False)
|
||||
monkeypatch.setenv("CLAUDE_AGENT_CLI_PATH", "/nonexistent/path/to/claude")
|
||||
# Should fall through to the bundled binary OR return None,
|
||||
# but never raise.
|
||||
resolved = _resolve_cli_path()
|
||||
# We can't assert exact value (depends on whether the bundled
|
||||
# CLI is installed in the test env) but the function must not
|
||||
# raise — the caller is supposed to handle None gracefully.
|
||||
assert resolved is None or resolved.is_file()
|
||||
|
||||
def test_falls_back_to_bundled_when_env_var_unset(self, monkeypatch):
|
||||
monkeypatch.delenv("CLAUDE_AGENT_CLI_PATH", raising=False)
|
||||
monkeypatch.delenv("CHAT_CLAUDE_AGENT_CLI_PATH", raising=False)
|
||||
# Same caveat as above — returns the bundled path or None,
|
||||
# depending on what's installed in the test env.
|
||||
resolved = _resolve_cli_path()
|
||||
assert resolved is None or resolved.is_file()
|
||||
@@ -96,5 +96,8 @@ def build_sdk_env(
|
||||
env["CLAUDE_CODE_DISABLE_CLAUDE_MDS"] = "1"
|
||||
env["CLAUDE_CODE_DISABLE_AUTO_MEMORY"] = "1"
|
||||
env["CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC"] = "1"
|
||||
# Strip Anthropic-specific beta headers (e.g. context-management-2025-06-27)
|
||||
# that OpenRouter rejects. Safe for all modes — direct Anthropic ignores it.
|
||||
env["CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS"] = "1"
|
||||
|
||||
return env
|
||||
|
||||
@@ -44,6 +44,7 @@ class TestBuildSdkEnvSubscription:
|
||||
assert result["ANTHROPIC_API_KEY"] == ""
|
||||
assert result["ANTHROPIC_AUTH_TOKEN"] == ""
|
||||
assert result["ANTHROPIC_BASE_URL"] == ""
|
||||
assert result.get("CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS") == "1"
|
||||
mock_validate.assert_called_once()
|
||||
|
||||
@patch(
|
||||
@@ -78,6 +79,7 @@ class TestBuildSdkEnvDirectAnthropic:
|
||||
assert "ANTHROPIC_API_KEY" not in result
|
||||
assert "ANTHROPIC_AUTH_TOKEN" not in result
|
||||
assert "ANTHROPIC_BASE_URL" not in result
|
||||
assert result.get("CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS") == "1"
|
||||
|
||||
def test_no_anthropic_key_overrides_when_openrouter_flag_true_but_no_key(self):
|
||||
"""OpenRouter flag is True but no api_key => openrouter_active is False."""
|
||||
@@ -93,6 +95,7 @@ class TestBuildSdkEnvDirectAnthropic:
|
||||
assert "ANTHROPIC_API_KEY" not in result
|
||||
assert "ANTHROPIC_AUTH_TOKEN" not in result
|
||||
assert "ANTHROPIC_BASE_URL" not in result
|
||||
assert result.get("CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS") == "1"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -123,6 +126,8 @@ class TestBuildSdkEnvOpenRouter:
|
||||
assert result["ANTHROPIC_AUTH_TOKEN"] == "sk-or-test-key"
|
||||
assert result["ANTHROPIC_API_KEY"] == ""
|
||||
assert "ANTHROPIC_CUSTOM_HEADERS" not in result
|
||||
# OpenRouter compat: env var must always be present
|
||||
assert result.get("CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS") == "1"
|
||||
|
||||
def test_strips_trailing_v1(self):
|
||||
"""The /v1 suffix is stripped from the base URL."""
|
||||
@@ -133,6 +138,7 @@ class TestBuildSdkEnvOpenRouter:
|
||||
result = build_sdk_env()
|
||||
|
||||
assert result["ANTHROPIC_BASE_URL"] == "https://openrouter.ai/api"
|
||||
assert result.get("CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS") == "1"
|
||||
|
||||
def test_strips_trailing_v1_and_slash(self):
|
||||
"""Trailing slash before /v1 strip is handled."""
|
||||
@@ -144,6 +150,7 @@ class TestBuildSdkEnvOpenRouter:
|
||||
|
||||
# rstrip("/") first, then remove /v1
|
||||
assert result["ANTHROPIC_BASE_URL"] == "https://openrouter.ai/api"
|
||||
assert result.get("CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS") == "1"
|
||||
|
||||
def test_no_v1_suffix_left_alone(self):
|
||||
"""A base URL without /v1 is used as-is."""
|
||||
@@ -154,6 +161,7 @@ class TestBuildSdkEnvOpenRouter:
|
||||
result = build_sdk_env()
|
||||
|
||||
assert result["ANTHROPIC_BASE_URL"] == "https://custom-proxy.example.com"
|
||||
assert result.get("CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS") == "1"
|
||||
|
||||
def test_session_id_header(self):
|
||||
cfg = self._openrouter_config()
|
||||
|
||||
@@ -203,11 +203,15 @@ class TestConfigDefaults:
|
||||
|
||||
def test_max_turns_default(self):
|
||||
cfg = _make_config()
|
||||
assert cfg.claude_agent_max_turns == 1000
|
||||
assert cfg.claude_agent_max_turns == 50
|
||||
|
||||
def test_max_budget_usd_default(self):
|
||||
cfg = _make_config()
|
||||
assert cfg.claude_agent_max_budget_usd == 100.0
|
||||
assert cfg.claude_agent_max_budget_usd == 5.0
|
||||
|
||||
def test_max_thinking_tokens_default(self):
|
||||
cfg = _make_config()
|
||||
assert cfg.claude_agent_max_thinking_tokens == 8192
|
||||
|
||||
def test_max_transient_retries_default(self):
|
||||
cfg = _make_config()
|
||||
|
||||
@@ -196,3 +196,93 @@ def test_sdk_exports_hook_event_type(hook_event: str):
|
||||
# HookEvent is a Literal type — check that our events are valid values.
|
||||
# We can't easily inspect Literal at runtime, so just verify the type exists.
|
||||
assert HookEvent is not None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OpenRouter compatibility — bundled CLI version pin
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Newer ``claude-agent-sdk`` versions bundle CLI binaries that send
|
||||
# features incompatible with OpenRouter (``tool_reference`` content
|
||||
# blocks, ``context-management-2025-06-27`` beta). We neutralise these
|
||||
# at runtime by injecting ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1``
|
||||
# into the CLI subprocess env (see ``build_sdk_env()`` in ``env.py``).
|
||||
#
|
||||
# This test is the cheapest possible regression guard: it pins the
|
||||
# bundled CLI to a known-good version. If anyone bumps
|
||||
# ``claude-agent-sdk`` in ``pyproject.toml``, the bundled CLI version in
|
||||
# ``_cli_version.py`` will change and this test will fail with a clear
|
||||
# message that points the next person at the OpenRouter compat issue
|
||||
# instead of letting them silently re-break production.
|
||||
|
||||
# CLI versions bisect-verified as OpenRouter-safe. 2.1.63 and 2.1.70 pre-date
|
||||
# the context-management beta regression and work without any env var. 2.1.97+
|
||||
# requires ``CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`` (injected by
|
||||
# ``build_sdk_env()`` in ``env.py``) to strip the beta header.
|
||||
_KNOWN_GOOD_BUNDLED_CLI_VERSIONS: frozenset[str] = frozenset(
|
||||
{
|
||||
"2.1.63", # claude-agent-sdk 0.1.45 -- original pin from PR #12294.
|
||||
"2.1.70", # claude-agent-sdk 0.1.47 -- first version with the
|
||||
# tool_reference proxy detection fix; bisect-verified
|
||||
# OpenRouter-safe in #12742.
|
||||
"2.1.97", # claude-agent-sdk 0.1.58 -- OpenRouter-safe only with
|
||||
# CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 (injected by
|
||||
# build_sdk_env() in env.py).
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def test_bundled_cli_version_is_known_good_against_openrouter():
|
||||
"""Pin the bundled CLI version so accidental SDK bumps cause a loud,
|
||||
fast failure with a pointer to the OpenRouter compatibility issue.
|
||||
"""
|
||||
from claude_agent_sdk._cli_version import __cli_version__
|
||||
|
||||
assert __cli_version__ in _KNOWN_GOOD_BUNDLED_CLI_VERSIONS, (
|
||||
f"Bundled Claude Code CLI version is {__cli_version__!r}, which is "
|
||||
f"not in the OpenRouter-known-good set "
|
||||
f"({sorted(_KNOWN_GOOD_BUNDLED_CLI_VERSIONS)!r}). "
|
||||
"If you intentionally bumped `claude-agent-sdk`, verify the new "
|
||||
"bundled CLI works with OpenRouter against the reproduction test "
|
||||
"in `cli_openrouter_compat_test.py` (with "
|
||||
"`CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1`), then add the new "
|
||||
"CLI version to `_KNOWN_GOOD_BUNDLED_CLI_VERSIONS`. If the env "
|
||||
"var is not sufficient, set `claude_agent_cli_path` to a "
|
||||
"known-good binary instead. See "
|
||||
"https://github.com/anthropics/claude-agent-sdk-python/issues/789 "
|
||||
"and https://github.com/Significant-Gravitas/AutoGPT/pull/12294."
|
||||
)
|
||||
|
||||
|
||||
def test_sdk_exposes_cli_path_option():
|
||||
"""Sanity-check that the SDK still exposes the `cli_path` option we use
|
||||
for the OpenRouter workaround. If upstream removes it we need to know."""
|
||||
import inspect
|
||||
|
||||
from claude_agent_sdk import ClaudeAgentOptions
|
||||
|
||||
sig = inspect.signature(ClaudeAgentOptions)
|
||||
assert "cli_path" in sig.parameters, (
|
||||
"ClaudeAgentOptions no longer accepts `cli_path` — our "
|
||||
"claude_agent_cli_path config override would be silently ignored. "
|
||||
"Either find an alternative override mechanism or pin the SDK to a "
|
||||
"version that still exposes it."
|
||||
)
|
||||
|
||||
|
||||
def test_sdk_exposes_max_thinking_tokens_option():
|
||||
"""Sanity-check that the SDK still exposes the `max_thinking_tokens` option
|
||||
we use to cap extended thinking cost. If upstream removes or renames it
|
||||
the cap will be silently ignored and Opus thinking tokens will be unbounded."""
|
||||
import inspect
|
||||
|
||||
from claude_agent_sdk import ClaudeAgentOptions
|
||||
|
||||
sig = inspect.signature(ClaudeAgentOptions)
|
||||
assert "max_thinking_tokens" in sig.parameters, (
|
||||
"ClaudeAgentOptions no longer accepts `max_thinking_tokens` — our "
|
||||
"claude_agent_max_thinking_tokens cost cap would be silently ignored, "
|
||||
"allowing Opus extended thinking to generate unbounded tokens at $75/M. "
|
||||
"Find the correct parameter name in the new SDK version and update "
|
||||
"ChatConfig.claude_agent_max_thinking_tokens and service.py accordingly."
|
||||
)
|
||||
|
||||
@@ -2238,13 +2238,24 @@ async def stream_chat_completion_sdk(
|
||||
"max_turns": config.claude_agent_max_turns,
|
||||
# max_budget_usd: per-query spend ceiling enforced by the CLI.
|
||||
"max_budget_usd": config.claude_agent_max_budget_usd,
|
||||
# max_thinking_tokens: cap extended thinking output per LLM call.
|
||||
# Thinking tokens are billed at output rate ($75/M for Opus) and
|
||||
# account for ~54% of total cost. 8192 is the default.
|
||||
"max_thinking_tokens": config.claude_agent_max_thinking_tokens,
|
||||
}
|
||||
if sdk_model:
|
||||
sdk_options_kwargs["model"] = sdk_model
|
||||
|
||||
if sdk_env:
|
||||
sdk_options_kwargs["env"] = sdk_env
|
||||
if use_resume and resume_file:
|
||||
sdk_options_kwargs["resume"] = resume_file
|
||||
# Optional explicit Claude Code CLI binary path (decouples the
|
||||
# bundled SDK version from the CLI version we run — needed because
|
||||
# the CLI bundled in 0.1.46+ is broken against OpenRouter). Falls
|
||||
# back to the bundled binary when unset.
|
||||
if config.claude_agent_cli_path:
|
||||
sdk_options_kwargs["cli_path"] = config.claude_agent_cli_path
|
||||
|
||||
options = ClaudeAgentOptions(**sdk_options_kwargs) # type: ignore[arg-type] # dynamic kwargs
|
||||
|
||||
|
||||
17
autogpt_platform/backend/poetry.lock
generated
17
autogpt_platform/backend/poetry.lock
generated
@@ -1,4 +1,4 @@
|
||||
# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "agentmail"
|
||||
@@ -909,17 +909,18 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "claude-agent-sdk"
|
||||
version = "0.1.45"
|
||||
version = "0.1.58"
|
||||
description = "Python SDK for Claude Code"
|
||||
optional = false
|
||||
python-versions = ">=3.10"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "claude_agent_sdk-0.1.45-py3-none-macosx_11_0_arm64.whl", hash = "sha256:26a5cc60c3a394f5b814f6b2f67650819cbcd38c405bbdc11582b3e097b3a770"},
|
||||
{file = "claude_agent_sdk-0.1.45-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:decc741b53e0b2c10a64fd84c15acca1102077d9f99941c54905172cd95160c9"},
|
||||
{file = "claude_agent_sdk-0.1.45-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:7d48dcf4178c704e4ccbf3f1f4ebf20b3de3f03d0592086c1f3abd16b8ca441e"},
|
||||
{file = "claude_agent_sdk-0.1.45-py3-none-win_amd64.whl", hash = "sha256:d1cf34995109c513d8daabcae7208edc260b553b53462a9ac06a7c40e240a288"},
|
||||
{file = "claude_agent_sdk-0.1.45.tar.gz", hash = "sha256:97c1e981431b5af1e08c34731906ab8d4a58fe0774a04df0ea9587dcabc85151"},
|
||||
{file = "claude_agent_sdk-0.1.58-py3-none-macosx_11_0_arm64.whl", hash = "sha256:69197950809754c4f06bba8261f2d99c3f9605b6cc1c13d3409d0eb82fb4ee64"},
|
||||
{file = "claude_agent_sdk-0.1.58-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:75d60883fc5e2070bccd8d9b19505fe16af8e049120c03821e9dc8c826cca434"},
|
||||
{file = "claude_agent_sdk-0.1.58-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:7bf4eb0f00ec944a7b63eb94788f120dfb0460c348a525235c7d6641805acc1d"},
|
||||
{file = "claude_agent_sdk-0.1.58-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:650d298a3d3c0dcdde4b5f1dbf52f472ff0b0ec82987b27ffa2a4e0e72928408"},
|
||||
{file = "claude_agent_sdk-0.1.58-py3-none-win_amd64.whl", hash = "sha256:2c2130a7ffe06ed4f88d56b217a5091c91c9bcb1a69cfd94d5dcf0d2946d8c55"},
|
||||
{file = "claude_agent_sdk-0.1.58.tar.gz", hash = "sha256:77bee8fd60be033cb870def46c2ab1625a512fa8a3de4ff8d766664ffb16d6a6"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -8928,4 +8929,4 @@ cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and pyt
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = ">=3.10,<3.14"
|
||||
content-hash = "da61798b73758b9292fc1933268d488fbe739dc1fbf5c6586cd0c76a3411eb2e"
|
||||
content-hash = "c4cc6a0a26869a167ce182b178224554135d89d8ffa4605257d17b3f495cdf59"
|
||||
|
||||
@@ -18,7 +18,7 @@ apscheduler = "^3.11.1"
|
||||
autogpt-libs = { path = "../autogpt_libs", develop = true }
|
||||
bleach = { extras = ["css"], version = "^6.2.0" }
|
||||
cachetools = "^5.5.0"
|
||||
claude-agent-sdk = "0.1.45" # see copilot/sdk/sdk_compat_test.py for capability checks
|
||||
claude-agent-sdk = "0.1.58" # latest stable; bundled CLI 2.1.97 -- CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 env var strips the broken context-management beta. See sdk_compat_test.py.
|
||||
click = "^8.2.0"
|
||||
cryptography = "^46.0"
|
||||
discord-py = "^2.5.2"
|
||||
|
||||
Reference in New Issue
Block a user