mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-30 03:00:41 -04:00
Compare commits
5 Commits
master
...
chore/sdk-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6dc0b6cffd | ||
|
|
a6e306d28a | ||
|
|
d6f0fcb052 | ||
|
|
feb247d56e | ||
|
|
fdb3590693 |
@@ -172,6 +172,20 @@ class ChatConfig(BaseSettings):
|
||||
description="Maximum number of retries for transient API errors "
|
||||
"(429, 5xx, ECONNRESET) before surfacing the error to the user.",
|
||||
)
|
||||
claude_agent_cli_path: str | None = Field(
|
||||
default=None,
|
||||
description="Optional explicit path to a Claude Code CLI binary. "
|
||||
"When set, the SDK uses this binary instead of the version bundled "
|
||||
"with the installed `claude-agent-sdk` package — letting us pin "
|
||||
"the Python SDK and the CLI independently. Critical for keeping "
|
||||
"OpenRouter compatibility while still picking up newer SDK API "
|
||||
"features (the bundled CLI version in 0.1.46+ is broken against "
|
||||
"OpenRouter — see PR #12294 and "
|
||||
"anthropics/claude-agent-sdk-python#789). Falls back to the "
|
||||
"bundled binary when unset. Reads from `CHAT_CLAUDE_AGENT_CLI_PATH` "
|
||||
"or the unprefixed `CLAUDE_AGENT_CLI_PATH` environment variable "
|
||||
"(same pattern as `api_key` / `base_url`).",
|
||||
)
|
||||
use_openrouter: bool = Field(
|
||||
default=True,
|
||||
description="Enable routing API calls through the OpenRouter proxy. "
|
||||
@@ -294,6 +308,26 @@ class ChatConfig(BaseSettings):
|
||||
v = OPENROUTER_BASE_URL
|
||||
return v
|
||||
|
||||
@field_validator("claude_agent_cli_path", mode="before")
|
||||
@classmethod
|
||||
def get_claude_agent_cli_path(cls, v):
|
||||
"""Resolve the Claude Code CLI override path from environment.
|
||||
|
||||
Accepts either the Pydantic-prefixed ``CHAT_CLAUDE_AGENT_CLI_PATH``
|
||||
or the unprefixed ``CLAUDE_AGENT_CLI_PATH`` (matching the same
|
||||
fallback pattern used by ``api_key`` / ``base_url``). Keeping the
|
||||
unprefixed form working is important because the field is
|
||||
primarily an operator escape hatch set via container/host env,
|
||||
and the unprefixed name is what the PR description, the field
|
||||
docstrings, and the reproduction test in
|
||||
``cli_openrouter_compat_test.py`` refer to.
|
||||
"""
|
||||
if not v:
|
||||
v = os.getenv("CHAT_CLAUDE_AGENT_CLI_PATH")
|
||||
if not v:
|
||||
v = os.getenv("CLAUDE_AGENT_CLI_PATH")
|
||||
return v
|
||||
|
||||
# Prompt paths for different contexts
|
||||
PROMPT_PATHS: dict[str, str] = {
|
||||
"default": "prompts/chat_system.md",
|
||||
|
||||
@@ -174,13 +174,25 @@ class CoPilotProcessor:
|
||||
logger.info(f"[CoPilotExecutor] Worker {self.tid} started")
|
||||
|
||||
def _prewarm_cli(self) -> None:
|
||||
"""Run the bundled CLI binary once to warm OS page caches."""
|
||||
try:
|
||||
from claude_agent_sdk._internal.transport.subprocess_cli import (
|
||||
SubprocessCLITransport,
|
||||
)
|
||||
"""Run the Claude Code CLI binary once to warm OS page caches.
|
||||
|
||||
cli_path = SubprocessCLITransport._find_bundled_cli(None) # type: ignore[arg-type]
|
||||
Honours the ``claude_agent_cli_path`` config override (which lets
|
||||
us run a pinned CLI version independent of the bundled one in the
|
||||
installed ``claude-agent-sdk`` wheel — see
|
||||
``ChatConfig.claude_agent_cli_path`` for the rationale). Falls
|
||||
back to the bundled binary when no override is set.
|
||||
"""
|
||||
try:
|
||||
from backend.copilot.config import ChatConfig
|
||||
|
||||
cfg = ChatConfig()
|
||||
cli_path: str | None = cfg.claude_agent_cli_path
|
||||
if not cli_path:
|
||||
from claude_agent_sdk._internal.transport.subprocess_cli import (
|
||||
SubprocessCLITransport,
|
||||
)
|
||||
|
||||
cli_path = SubprocessCLITransport._find_bundled_cli(None) # type: ignore[arg-type]
|
||||
if cli_path:
|
||||
result = subprocess.run(
|
||||
[cli_path, "-v"],
|
||||
|
||||
@@ -0,0 +1,577 @@
|
||||
"""Reproduction test for the OpenRouter incompatibility in newer
|
||||
``claude-agent-sdk`` / Claude Code CLI versions.
|
||||
|
||||
Background — there are two stacked regressions that block us from
|
||||
upgrading the ``claude-agent-sdk`` package above ``0.1.45``:
|
||||
|
||||
1. **`tool_reference` content blocks** introduced by CLI ``2.1.69`` (=
|
||||
SDK ``0.1.46``). The CLI's built-in ``ToolSearch`` tool returns
|
||||
``{"type": "tool_reference", "tool_name": "..."}`` content blocks in
|
||||
``tool_result.content``. OpenRouter's stricter Zod validation
|
||||
rejects this with::
|
||||
|
||||
messages[N].content[0].content: Invalid input: expected string, received array
|
||||
|
||||
This is the regression that originally pinned us at 0.1.45 — see
|
||||
https://github.com/Significant-Gravitas/AutoGPT/pull/12294 for the
|
||||
full forensic write-up. CLI 2.1.70 added proxy detection that
|
||||
*should* disable the offending blocks when ``ANTHROPIC_BASE_URL`` is
|
||||
set, but our subsequent attempts at 0.1.55 / 0.1.56 still failed.
|
||||
|
||||
2. **`context-management-2025-06-27` beta header** — some CLI version
|
||||
after ``2.1.91`` started injecting this header / beta flag, which
|
||||
OpenRouter rejects with::
|
||||
|
||||
400 No endpoints available that support Anthropic's context
|
||||
management features (context-management-2025-06-27). Context
|
||||
management requires a supported provider (Anthropic).
|
||||
|
||||
Tracked upstream at
|
||||
https://github.com/anthropics/claude-agent-sdk-python/issues/789.
|
||||
Still open at the time of writing, no upstream PR linked, no
|
||||
workaround documented.
|
||||
|
||||
The purpose of this test:
|
||||
* Spin up a tiny in-process HTTP server that pretends to be the
|
||||
Anthropic Messages API.
|
||||
* Capture every request body the CLI sends.
|
||||
* Inspect the captured bodies for the two forbidden patterns above.
|
||||
* Fail loudly if either is present, with a pointer to the issue
|
||||
tracker.
|
||||
|
||||
This is the reproduction we use as a CI gate when bisecting which SDK /
|
||||
CLI version is safe to upgrade to. It runs against the bundled CLI by
|
||||
default (or against ``ChatConfig.claude_agent_cli_path`` when set), so
|
||||
it doubles as a regression guard for the ``cli_path`` override
|
||||
mechanism.
|
||||
|
||||
The test does **not** need an OpenRouter API key — it reproduces the
|
||||
mechanism (forbidden content blocks / headers in the *outgoing*
|
||||
request) rather than the symptom (the 400 OpenRouter would return).
|
||||
This keeps it deterministic, free, and CI-runnable without secrets.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
from aiohttp import web
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Forbidden patterns we scan for in captured request bodies
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Match the `tool_reference` content block that breaks OpenRouter's stricter
|
||||
# Zod validation in tool_result.content. PR #12294 root-cause.
|
||||
#
|
||||
# We use a whitespace-tolerant regex rather than a literal substring because
|
||||
# the Claude Code CLI is Node.js and `JSON.stringify` without an indent
|
||||
# argument emits no whitespace between the key, colon, and value
|
||||
# (`{"type":"tool_reference"}`), while a Python serializer would emit
|
||||
# `{"type": "tool_reference"}`. A naive substring with one specific spacing
|
||||
# would silently miss the real regression.
|
||||
_FORBIDDEN_TOOL_REFERENCE_RE = re.compile(r'"type"\s*:\s*"tool_reference"')
|
||||
|
||||
# Beta string OpenRouter rejects in upstream issue #789. Can appear in
|
||||
# either `betas` arrays or the `anthropic-beta` header value. This is a
|
||||
# unique opaque token (no JSON punctuation around it that could vary), so
|
||||
# a plain substring match is robust.
|
||||
_FORBIDDEN_CONTEXT_MANAGEMENT_BETA = "context-management-2025-06-27"
|
||||
|
||||
|
||||
def _scan_request_for_forbidden_patterns(
|
||||
body_text: str,
|
||||
headers: dict[str, str],
|
||||
) -> list[str]:
|
||||
"""Return a list of forbidden patterns found in *body_text* / *headers*.
|
||||
|
||||
Empty list = clean request. Non-empty = the CLI is sending one of the
|
||||
OpenRouter-incompatible features.
|
||||
"""
|
||||
findings: list[str] = []
|
||||
if _FORBIDDEN_TOOL_REFERENCE_RE.search(body_text):
|
||||
findings.append(
|
||||
"`tool_reference` content block in request body — "
|
||||
"PR #12294 / CLI 2.1.69 regression"
|
||||
)
|
||||
if _FORBIDDEN_CONTEXT_MANAGEMENT_BETA in body_text:
|
||||
findings.append(
|
||||
f"{_FORBIDDEN_CONTEXT_MANAGEMENT_BETA!r} in request body — "
|
||||
"anthropics/claude-agent-sdk-python#789"
|
||||
)
|
||||
# Header values are case-insensitive in HTTP — aiohttp normalises
|
||||
# incoming names but values are stored as-is.
|
||||
for header_name, header_value in headers.items():
|
||||
if header_name.lower() == "anthropic-beta":
|
||||
if _FORBIDDEN_CONTEXT_MANAGEMENT_BETA in header_value:
|
||||
findings.append(
|
||||
f"{_FORBIDDEN_CONTEXT_MANAGEMENT_BETA!r} in "
|
||||
"`anthropic-beta` header — issue #789"
|
||||
)
|
||||
return findings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fake Anthropic Messages API
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# We need to give the CLI a *successful* response so it doesn't error out
|
||||
# before we get a chance to inspect the request. The minimal thing the
|
||||
# CLI accepts is a streamed (SSE) message-start → content-block-delta →
|
||||
# message-stop sequence.
|
||||
#
|
||||
# We don't strictly *need* the CLI to accept the response — we already
|
||||
# have the request body by the time we send any reply — but giving it a
|
||||
# valid stream means the assertion failure (if any) is the *only*
|
||||
# failure mode in the test, not "CLI exited 1 because we sent garbage".
|
||||
|
||||
|
||||
def _build_streaming_message_response() -> str:
|
||||
"""Return an SSE-formatted body containing a minimal Anthropic
|
||||
Messages API streamed response.
|
||||
|
||||
This is the smallest stream that the Claude Code CLI will accept
|
||||
end-to-end without errors. Each line is one SSE event."""
|
||||
events: list[dict[str, Any]] = [
|
||||
{
|
||||
"type": "message_start",
|
||||
"message": {
|
||||
"id": "msg_test",
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [],
|
||||
"model": "claude-test",
|
||||
"stop_reason": None,
|
||||
"stop_sequence": None,
|
||||
"usage": {"input_tokens": 1, "output_tokens": 1},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "content_block_start",
|
||||
"index": 0,
|
||||
"content_block": {"type": "text", "text": ""},
|
||||
},
|
||||
{
|
||||
"type": "content_block_delta",
|
||||
"index": 0,
|
||||
"delta": {"type": "text_delta", "text": "ok"},
|
||||
},
|
||||
{"type": "content_block_stop", "index": 0},
|
||||
{
|
||||
"type": "message_delta",
|
||||
"delta": {"stop_reason": "end_turn", "stop_sequence": None},
|
||||
"usage": {"output_tokens": 1},
|
||||
},
|
||||
{"type": "message_stop"},
|
||||
]
|
||||
return "".join(
|
||||
f"event: {evt['type']}\ndata: {json.dumps(evt)}\n\n" for evt in events
|
||||
)
|
||||
|
||||
|
||||
class _CapturedRequest:
|
||||
"""One request the fake server received."""
|
||||
|
||||
def __init__(self, path: str, headers: dict[str, str], body: str) -> None:
|
||||
self.path = path
|
||||
self.headers = headers
|
||||
self.body = body
|
||||
|
||||
|
||||
async def _start_fake_anthropic_server(
|
||||
captured: list[_CapturedRequest],
|
||||
) -> tuple[web.AppRunner, int]:
|
||||
"""Start an aiohttp server pretending to be the Anthropic API.
|
||||
|
||||
All POSTs to ``/v1/messages`` are recorded into *captured* and
|
||||
answered with a valid streaming response. Returns ``(runner, port)``
|
||||
so the caller can ``await runner.cleanup()`` when finished.
|
||||
"""
|
||||
import socket
|
||||
|
||||
async def messages_handler(request: web.Request) -> web.StreamResponse:
|
||||
body = await request.text()
|
||||
captured.append(
|
||||
_CapturedRequest(
|
||||
path=request.path,
|
||||
headers={k: v for k, v in request.headers.items()},
|
||||
body=body,
|
||||
)
|
||||
)
|
||||
# Stream a minimal valid response so the CLI doesn't error out
|
||||
# before we can inspect what it sent.
|
||||
response = web.StreamResponse(
|
||||
status=200,
|
||||
headers={
|
||||
"Content-Type": "text/event-stream",
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
},
|
||||
)
|
||||
await response.prepare(request)
|
||||
await response.write(_build_streaming_message_response().encode("utf-8"))
|
||||
await response.write_eof()
|
||||
return response
|
||||
|
||||
async def fallback_handler(_request: web.Request) -> web.Response:
|
||||
# OAuth/profile endpoints the CLI may probe — answer 404 so it
|
||||
# falls through quickly without retrying.
|
||||
return web.Response(status=404)
|
||||
|
||||
app = web.Application()
|
||||
app.router.add_post("/v1/messages", messages_handler)
|
||||
app.router.add_route("*", "/{tail:.*}", fallback_handler)
|
||||
|
||||
# Bind an ephemeral port ourselves so we can read it back via the
|
||||
# public ``getsockname`` API rather than reaching into ``site._server``
|
||||
# private aiohttp internals.
|
||||
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
sock.bind(("127.0.0.1", 0))
|
||||
port: int = sock.getsockname()[1]
|
||||
|
||||
runner = web.AppRunner(app)
|
||||
await runner.setup()
|
||||
site = web.SockSite(runner, sock)
|
||||
await site.start()
|
||||
|
||||
return runner, port
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI invocation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _resolve_cli_path() -> Path | None:
|
||||
"""Return the Claude Code CLI binary the SDK would use.
|
||||
|
||||
Honours the same override mechanism as ``service.py`` /
|
||||
``ChatConfig.claude_agent_cli_path``: checks either the Pydantic-
|
||||
prefixed ``CHAT_CLAUDE_AGENT_CLI_PATH`` or the unprefixed
|
||||
``CLAUDE_AGENT_CLI_PATH`` env var first, then falls back to the
|
||||
bundled binary that ships with the installed ``claude-agent-sdk``
|
||||
wheel. The two env var names are accepted at the config layer via
|
||||
``ChatConfig.get_claude_agent_cli_path`` and mirrored here so the
|
||||
reproduction test picks up the same override regardless of which
|
||||
form an operator sets.
|
||||
"""
|
||||
override = os.environ.get("CHAT_CLAUDE_AGENT_CLI_PATH") or os.environ.get(
|
||||
"CLAUDE_AGENT_CLI_PATH"
|
||||
)
|
||||
if override:
|
||||
candidate = Path(override)
|
||||
return candidate if candidate.is_file() else None
|
||||
|
||||
try:
|
||||
from claude_agent_sdk._internal.transport.subprocess_cli import ( # type: ignore[import-untyped]
|
||||
SubprocessCLITransport,
|
||||
)
|
||||
|
||||
bundled = SubprocessCLITransport._find_bundled_cli(None) # type: ignore[arg-type]
|
||||
return Path(bundled) if bundled else None
|
||||
except Exception as e: # pragma: no cover - import-time guard
|
||||
logger.warning("Could not locate bundled Claude CLI: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
async def _run_cli_against_fake_server(
|
||||
cli_path: Path,
|
||||
fake_server_port: int,
|
||||
timeout_seconds: float,
|
||||
) -> tuple[int, str, str]:
|
||||
"""Spawn the CLI pointed at the fake Anthropic server and feed it a
|
||||
single ``user`` message via stream-json on stdin.
|
||||
|
||||
Returns ``(returncode, stdout, stderr)``. The return code is not
|
||||
asserted by the test — we only care that the CLI made at least one
|
||||
POST to ``/v1/messages`` so the fake server captured the body.
|
||||
"""
|
||||
fake_url = f"http://127.0.0.1:{fake_server_port}"
|
||||
env = {
|
||||
# Inherit basic shell variables so the CLI can find its tools,
|
||||
# but force network/auth at our fake endpoint.
|
||||
**os.environ,
|
||||
"ANTHROPIC_BASE_URL": fake_url,
|
||||
"ANTHROPIC_API_KEY": "sk-test-fake-key-not-real",
|
||||
# Disable any features that would phone home to a different host
|
||||
# mid-test (telemetry, plugin marketplace fetch).
|
||||
"DISABLE_TELEMETRY": "1",
|
||||
"CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1",
|
||||
}
|
||||
|
||||
# The CLI accepts stream-json input on stdin in `query` mode. A
|
||||
# minimal user-message envelope is enough to trigger an API call.
|
||||
stdin_payload = (
|
||||
json.dumps(
|
||||
{
|
||||
"type": "user",
|
||||
"message": {"role": "user", "content": "hello"},
|
||||
}
|
||||
)
|
||||
+ "\n"
|
||||
)
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
str(cli_path),
|
||||
"--output-format",
|
||||
"stream-json",
|
||||
"--input-format",
|
||||
"stream-json",
|
||||
"--verbose",
|
||||
"--print",
|
||||
stdin=asyncio.subprocess.PIPE,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
env=env,
|
||||
)
|
||||
try:
|
||||
assert proc.stdin is not None
|
||||
proc.stdin.write(stdin_payload.encode("utf-8"))
|
||||
await proc.stdin.drain()
|
||||
proc.stdin.close()
|
||||
|
||||
stdout_bytes, stderr_bytes = await asyncio.wait_for(
|
||||
proc.communicate(), timeout=timeout_seconds
|
||||
)
|
||||
except (asyncio.TimeoutError, TimeoutError):
|
||||
# Best-effort kill — we already have whatever requests the CLI
|
||||
# managed to send before stalling.
|
||||
try:
|
||||
proc.kill()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
# Reap the process to avoid leaving a zombie + open pipe FDs.
|
||||
# Without this the asyncio transport keeps the stdout/stderr
|
||||
# pipes alive until the loop exits, and in CI loops where this
|
||||
# test runs many times the file-descriptor count creeps up.
|
||||
try:
|
||||
await asyncio.wait_for(proc.wait(), timeout=5.0)
|
||||
except (asyncio.TimeoutError, TimeoutError):
|
||||
pass
|
||||
stdout_bytes, stderr_bytes = b"", b""
|
||||
|
||||
return (
|
||||
proc.returncode if proc.returncode is not None else -1,
|
||||
stdout_bytes.decode("utf-8", errors="replace"),
|
||||
stderr_bytes.decode("utf-8", errors="replace"),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# The actual test
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cli_does_not_send_openrouter_incompatible_features(caplog):
|
||||
"""End-to-end OpenRouter compatibility reproduction.
|
||||
|
||||
Spawns the bundled (or overridden) Claude Code CLI against a fake
|
||||
Anthropic API server, captures every request body it sends, and
|
||||
asserts that none of them contain the two known OpenRouter-breaking
|
||||
features (`tool_reference` content blocks or the
|
||||
`context-management-2025-06-27` beta header).
|
||||
|
||||
Why this matters: pinning the CLI version via
|
||||
``test_bundled_cli_version_is_known_good_against_openrouter`` only
|
||||
catches accidental SDK bumps — it doesn't tell us *why* the new
|
||||
version would fail. This test reproduces the exact mechanism so
|
||||
bisecting via CI commits gives an actionable signal.
|
||||
"""
|
||||
cli_path = _resolve_cli_path()
|
||||
if cli_path is None or not cli_path.is_file():
|
||||
pytest.skip(
|
||||
"No Claude Code CLI binary available (neither bundled nor "
|
||||
"overridden via CLAUDE_AGENT_CLI_PATH / "
|
||||
"CHAT_CLAUDE_AGENT_CLI_PATH); cannot reproduce."
|
||||
)
|
||||
|
||||
captured: list[_CapturedRequest] = []
|
||||
runner, port = await _start_fake_anthropic_server(captured)
|
||||
try:
|
||||
returncode, stdout, stderr = await _run_cli_against_fake_server(
|
||||
cli_path=cli_path,
|
||||
fake_server_port=port,
|
||||
timeout_seconds=30.0,
|
||||
)
|
||||
finally:
|
||||
await runner.cleanup()
|
||||
|
||||
# We don't assert the CLI's exit code — depending on the CLI version
|
||||
# and what we send back, the CLI may exit non-zero after a single
|
||||
# successful round-trip. All we care about is that the captured
|
||||
# request bodies don't contain the forbidden patterns.
|
||||
logger.info(
|
||||
"CLI exited rc=%d; captured %d requests; stdout=%d bytes; stderr=%d bytes",
|
||||
returncode,
|
||||
len(captured),
|
||||
len(stdout),
|
||||
len(stderr),
|
||||
)
|
||||
|
||||
if not captured:
|
||||
pytest.skip(
|
||||
"Bundled CLI did not make any HTTP requests to the fake server "
|
||||
f"(rc={returncode}). The CLI may have failed before reaching "
|
||||
f"the network — stderr tail: {stderr[-500:]!r}. "
|
||||
"Nothing to assert; treating as inconclusive rather than "
|
||||
"either passing or failing."
|
||||
)
|
||||
|
||||
all_findings: list[str] = []
|
||||
for req in captured:
|
||||
findings = _scan_request_for_forbidden_patterns(req.body, req.headers)
|
||||
if findings:
|
||||
all_findings.extend(f"{req.path}: {finding}" for finding in findings)
|
||||
|
||||
assert not all_findings, (
|
||||
f"Bundled Claude Code CLI sent OpenRouter-incompatible features in "
|
||||
f"{len(all_findings)} request(s):\n - "
|
||||
+ "\n - ".join(all_findings)
|
||||
+ "\n\nThis is the regression that prevents us from upgrading "
|
||||
"`claude-agent-sdk` above 0.1.45. See "
|
||||
"https://github.com/Significant-Gravitas/AutoGPT/pull/12294 and "
|
||||
"https://github.com/anthropics/claude-agent-sdk-python/issues/789. "
|
||||
"If you intended to upgrade, you must use a known-good CLI binary "
|
||||
"via `claude_agent_cli_path` (env: `CLAUDE_AGENT_CLI_PATH` or "
|
||||
"`CHAT_CLAUDE_AGENT_CLI_PATH`) instead of the bundled one."
|
||||
)
|
||||
|
||||
|
||||
def test_subprocess_module_available():
|
||||
"""Sentinel test: the subprocess module must be importable so the
|
||||
main reproduction test can spawn the CLI. Catches sandboxed CI
|
||||
runners that block subprocess execution before the slow test runs."""
|
||||
assert subprocess.__name__ == "subprocess"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pure helper unit tests — pin the forbidden-pattern detection so any
|
||||
# future drift in the scanner is caught fast, even when the slow
|
||||
# end-to-end CLI subprocess test isn't runnable.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestScanRequestForForbiddenPatterns:
|
||||
def test_clean_body_returns_empty_findings(self):
|
||||
body = '{"model": "claude-opus-4.6", "messages": [{"role": "user", "content": "hi"}]}'
|
||||
assert _scan_request_for_forbidden_patterns(body, {}) == []
|
||||
|
||||
def test_detects_tool_reference_in_body(self):
|
||||
body = (
|
||||
'{"messages": [{"role": "user", "content": ['
|
||||
'{"type": "tool_reference", "tool_name": "find"}'
|
||||
"]}]}"
|
||||
)
|
||||
findings = _scan_request_for_forbidden_patterns(body, {})
|
||||
assert len(findings) == 1
|
||||
assert "tool_reference" in findings[0]
|
||||
assert "PR #12294" in findings[0]
|
||||
|
||||
def test_detects_context_management_in_body(self):
|
||||
body = '{"betas": ["context-management-2025-06-27"]}'
|
||||
findings = _scan_request_for_forbidden_patterns(body, {})
|
||||
assert len(findings) == 1
|
||||
assert "context-management-2025-06-27" in findings[0]
|
||||
assert "#789" in findings[0]
|
||||
|
||||
def test_detects_context_management_in_anthropic_beta_header(self):
|
||||
findings = _scan_request_for_forbidden_patterns(
|
||||
body_text="{}",
|
||||
headers={"anthropic-beta": "context-management-2025-06-27"},
|
||||
)
|
||||
assert len(findings) == 1
|
||||
assert "anthropic-beta" in findings[0]
|
||||
|
||||
def test_detects_context_management_in_uppercase_header_name(self):
|
||||
# HTTP header names are case-insensitive — make sure the
|
||||
# scanner handles a server that didn't normalise names.
|
||||
findings = _scan_request_for_forbidden_patterns(
|
||||
body_text="{}",
|
||||
headers={"Anthropic-Beta": "context-management-2025-06-27, other"},
|
||||
)
|
||||
assert len(findings) == 1
|
||||
|
||||
def test_ignores_unrelated_header_values(self):
|
||||
findings = _scan_request_for_forbidden_patterns(
|
||||
body_text="{}",
|
||||
headers={
|
||||
"authorization": "Bearer secret",
|
||||
"anthropic-beta": "fine-grained-tool-streaming-2025",
|
||||
},
|
||||
)
|
||||
assert findings == []
|
||||
|
||||
def test_detects_both_patterns_simultaneously(self):
|
||||
body = (
|
||||
'{"betas": ["context-management-2025-06-27"], '
|
||||
'"messages": [{"role": "user", "content": ['
|
||||
'{"type": "tool_reference", "tool_name": "find"}'
|
||||
"]}]}"
|
||||
)
|
||||
findings = _scan_request_for_forbidden_patterns(body, {})
|
||||
# Both patterns hit, in stable order: tool_reference then betas.
|
||||
assert len(findings) == 2
|
||||
assert "tool_reference" in findings[0]
|
||||
assert "context-management-2025-06-27" in findings[1]
|
||||
|
||||
|
||||
class TestResolveCliPath:
|
||||
def test_honours_explicit_env_var_when_file_exists(self, tmp_path, monkeypatch):
|
||||
fake_cli = tmp_path / "fake-claude"
|
||||
fake_cli.write_text("#!/bin/sh\necho fake\n")
|
||||
fake_cli.chmod(0o755)
|
||||
monkeypatch.delenv("CHAT_CLAUDE_AGENT_CLI_PATH", raising=False)
|
||||
monkeypatch.setenv("CLAUDE_AGENT_CLI_PATH", str(fake_cli))
|
||||
resolved = _resolve_cli_path()
|
||||
assert resolved == fake_cli
|
||||
|
||||
def test_honours_chat_prefixed_env_var_when_file_exists(
|
||||
self, tmp_path, monkeypatch
|
||||
):
|
||||
"""The Pydantic ``CHAT_`` prefix variant is also honoured.
|
||||
|
||||
Mirrors ``ChatConfig.get_claude_agent_cli_path`` which accepts
|
||||
either ``CHAT_CLAUDE_AGENT_CLI_PATH`` (prefix applied by
|
||||
``pydantic_settings``) or the unprefixed ``CLAUDE_AGENT_CLI_PATH``
|
||||
form documented in the PR and field docstring.
|
||||
"""
|
||||
fake_cli = tmp_path / "fake-claude-prefixed"
|
||||
fake_cli.write_text("#!/bin/sh\necho fake\n")
|
||||
fake_cli.chmod(0o755)
|
||||
monkeypatch.delenv("CLAUDE_AGENT_CLI_PATH", raising=False)
|
||||
monkeypatch.setenv("CHAT_CLAUDE_AGENT_CLI_PATH", str(fake_cli))
|
||||
resolved = _resolve_cli_path()
|
||||
assert resolved == fake_cli
|
||||
|
||||
def test_returns_none_when_env_var_points_to_missing_file(self, monkeypatch):
|
||||
monkeypatch.delenv("CHAT_CLAUDE_AGENT_CLI_PATH", raising=False)
|
||||
monkeypatch.setenv("CLAUDE_AGENT_CLI_PATH", "/nonexistent/path/to/claude")
|
||||
# When the override is set but the file is missing, the resolver
|
||||
# returns ``None`` outright — it does NOT silently fall through to
|
||||
# the bundled binary, because doing so would defeat the purpose of
|
||||
# the override (the operator explicitly asked for a specific path).
|
||||
# The strict ``is None`` assertion catches any future regression
|
||||
# that swaps this fail-loud behaviour for a silent fallback.
|
||||
resolved = _resolve_cli_path()
|
||||
assert resolved is None
|
||||
|
||||
def test_falls_back_to_bundled_when_env_var_unset(self, monkeypatch):
|
||||
monkeypatch.delenv("CLAUDE_AGENT_CLI_PATH", raising=False)
|
||||
monkeypatch.delenv("CHAT_CLAUDE_AGENT_CLI_PATH", raising=False)
|
||||
# Same caveat as above — returns the bundled path or None,
|
||||
# depending on what's installed in the test env.
|
||||
resolved = _resolve_cli_path()
|
||||
assert resolved is None or resolved.is_file()
|
||||
@@ -196,3 +196,79 @@ def test_sdk_exports_hook_event_type(hook_event: str):
|
||||
# HookEvent is a Literal type — check that our events are valid values.
|
||||
# We can't easily inspect Literal at runtime, so just verify the type exists.
|
||||
assert HookEvent is not None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OpenRouter compatibility — bundled CLI version pin
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# We're stuck on ``claude-agent-sdk==0.1.45`` (bundled CLI ``2.1.63``)
|
||||
# because every version above introduces a 400 against OpenRouter:
|
||||
#
|
||||
# 1. CLI ``2.1.69`` (= SDK ``0.1.46``) shipped a `tool_reference` content
|
||||
# block in `tool_result.content` that OpenRouter's stricter Zod
|
||||
# validation rejects. See PR
|
||||
# https://github.com/Significant-Gravitas/AutoGPT/pull/12294 for the
|
||||
# forensic write-up that originally pinned us. CLI ``2.1.70`` added
|
||||
# proxy detection that *should* disable the offending block, but two
|
||||
# later attempts (Dependabot bumps to 0.1.55 / 0.1.56) still failed.
|
||||
#
|
||||
# 2. A second regression — the ``context-management-2025-06-27`` beta
|
||||
# header — appeared in some CLI version after ``2.1.91``. Tracked
|
||||
# upstream at
|
||||
# https://github.com/anthropics/claude-agent-sdk-python/issues/789
|
||||
# (still open at the time of writing, no upstream PR yet).
|
||||
#
|
||||
# This test is the cheapest possible regression guard: it pins the
|
||||
# bundled CLI to a known-good version. If anyone bumps
|
||||
# ``claude-agent-sdk`` in ``pyproject.toml``, the bundled CLI version in
|
||||
# ``_cli_version.py`` will change and this test will fail with a clear
|
||||
# message that points the next person at the OpenRouter compat issue
|
||||
# instead of letting them silently re-break production.
|
||||
#
|
||||
# Workaround for actually upgrading: set the
|
||||
# ``claude_agent_cli_path`` config option (or the matching env var) to
|
||||
# point at a separately-installed Claude Code CLI binary at a known-good
|
||||
# version, so the SDK Python API surface and the CLI binary version can
|
||||
# be picked independently.
|
||||
|
||||
# CLI versions verified to work against OpenRouter from production
|
||||
# traffic. When upstream lands a fix and we can confirm a newer version
|
||||
# works, add it to this set rather than blanket-removing the assertion.
|
||||
_KNOWN_GOOD_BUNDLED_CLI_VERSIONS: frozenset[str] = frozenset({"2.1.63"})
|
||||
|
||||
|
||||
def test_bundled_cli_version_is_known_good_against_openrouter():
|
||||
"""Pin the bundled CLI version so accidental SDK bumps cause a loud,
|
||||
fast failure with a pointer to the OpenRouter compatibility issue."""
|
||||
from claude_agent_sdk._cli_version import __cli_version__
|
||||
|
||||
assert __cli_version__ in _KNOWN_GOOD_BUNDLED_CLI_VERSIONS, (
|
||||
f"Bundled Claude Code CLI version is {__cli_version__!r}, which is "
|
||||
f"not in the OpenRouter-known-good set "
|
||||
f"{sorted(_KNOWN_GOOD_BUNDLED_CLI_VERSIONS)!r}. "
|
||||
"If you intentionally bumped `claude-agent-sdk`, verify the new "
|
||||
"bundled CLI works with OpenRouter against the reproduction test "
|
||||
"in `cli_openrouter_compat_test.py`, then add the new CLI version "
|
||||
"to `_KNOWN_GOOD_BUNDLED_CLI_VERSIONS`. If you cannot make the "
|
||||
"bundled CLI work, set `claude_agent_cli_path` to a known-good "
|
||||
"binary instead and skip the bundled one. See "
|
||||
"https://github.com/anthropics/claude-agent-sdk-python/issues/789 "
|
||||
"and https://github.com/Significant-Gravitas/AutoGPT/pull/12294."
|
||||
)
|
||||
|
||||
|
||||
def test_sdk_exposes_cli_path_option():
|
||||
"""Sanity-check that the SDK still exposes the `cli_path` option we use
|
||||
for the OpenRouter workaround. If upstream removes it we need to know."""
|
||||
import inspect
|
||||
|
||||
from claude_agent_sdk import ClaudeAgentOptions
|
||||
|
||||
sig = inspect.signature(ClaudeAgentOptions)
|
||||
assert "cli_path" in sig.parameters, (
|
||||
"ClaudeAgentOptions no longer accepts `cli_path` — our "
|
||||
"claude_agent_cli_path config override would be silently ignored. "
|
||||
"Either find an alternative override mechanism or pin the SDK to a "
|
||||
"version that still exposes it."
|
||||
)
|
||||
|
||||
@@ -2245,6 +2245,12 @@ async def stream_chat_completion_sdk(
|
||||
sdk_options_kwargs["env"] = sdk_env
|
||||
if use_resume and resume_file:
|
||||
sdk_options_kwargs["resume"] = resume_file
|
||||
# Optional explicit Claude Code CLI binary path (decouples the
|
||||
# bundled SDK version from the CLI version we run — needed because
|
||||
# the CLI bundled in 0.1.46+ is broken against OpenRouter). Falls
|
||||
# back to the bundled binary when unset.
|
||||
if config.claude_agent_cli_path:
|
||||
sdk_options_kwargs["cli_path"] = config.claude_agent_cli_path
|
||||
|
||||
options = ClaudeAgentOptions(**sdk_options_kwargs) # type: ignore[arg-type] # dynamic kwargs
|
||||
|
||||
|
||||
@@ -35,7 +35,6 @@ class TestUsdToMicrodollars:
|
||||
assert usd_to_microdollars(1.0) == 1_000_000
|
||||
|
||||
|
||||
|
||||
class TestMaskEmail:
|
||||
def test_typical_email(self):
|
||||
assert _mask_email("user@example.com") == "us***@example.com"
|
||||
|
||||
Reference in New Issue
Block a user