test(copilot/sdk-compat): tighten reproduction test (regex scan, proc reap, strict assertions, public socket API)

Address self-review findings on cli_openrouter_compat_test.py:

- Switch the tool_reference detection to a whitespace-tolerant regex
  (`"type"\s*:\s*"tool_reference"`). The Claude Code CLI is Node.js
  and `JSON.stringify` without an indent emits no whitespace, producing
  `{"type":"tool_reference"}`. The previous literal substring with one
  spacing would silently miss the real regression.

- Reap the subprocess after `proc.kill()` on timeout via
  `await asyncio.wait_for(proc.wait(), timeout=5)` so we don't leak a
  zombie + open pipe FDs across CI runs.

- Tighten `test_returns_none_when_env_var_points_to_missing_file` to
  assert `resolved is None` exactly. The previous
  `is None or .is_file()` was too permissive — it would also accept
  the function silently falling through to the bundled binary, which
  would defeat the explicit-override semantics.

- Replace `site._server` private aiohttp access with the public socket
  API: bind an ephemeral port via `socket.bind` and pass it to
  `web.SockSite`. Reading the port back via `getsockname` is robust to
  aiohttp internal changes.

- Convert the catch-all 404 route handler from a bare lambda to an
  `async def fallback_handler` to silence the aiohttp deprecation
  warning ("Bare functions are deprecated, use async ones").
This commit is contained in:
majdyz
2026-04-11 11:43:45 +00:00
parent a6e306d28a
commit 6dc0b6cffd

View File

@@ -57,6 +57,7 @@ import asyncio
import json
import logging
import os
import re
import subprocess
from pathlib import Path
from typing import Any
@@ -71,12 +72,21 @@ logger = logging.getLogger(__name__)
# Forbidden patterns we scan for in captured request bodies
# ---------------------------------------------------------------------------
# Substring of the `tool_reference` content block that breaks OpenRouter's
# stricter Zod validation in tool_result.content. PR #12294 root-cause.
_FORBIDDEN_TOOL_REFERENCE = '"type": "tool_reference"'
# Match the `tool_reference` content block that breaks OpenRouter's stricter
# Zod validation in tool_result.content. PR #12294 root-cause.
#
# We use a whitespace-tolerant regex rather than a literal substring because
# the Claude Code CLI is Node.js and `JSON.stringify` without an indent
# argument emits no whitespace between the key, colon, and value
# (`{"type":"tool_reference"}`), while a Python serializer would emit
# `{"type": "tool_reference"}`. A naive substring with one specific spacing
# would silently miss the real regression.
_FORBIDDEN_TOOL_REFERENCE_RE = re.compile(r'"type"\s*:\s*"tool_reference"')
# Beta string OpenRouter rejects in upstream issue #789. Can appear in
# either `betas` arrays or the `anthropic-beta` header value.
# either `betas` arrays or the `anthropic-beta` header value. This is a
# unique opaque token (no JSON punctuation around it that could vary), so
# a plain substring match is robust.
_FORBIDDEN_CONTEXT_MANAGEMENT_BETA = "context-management-2025-06-27"
@@ -90,7 +100,7 @@ def _scan_request_for_forbidden_patterns(
OpenRouter-incompatible features.
"""
findings: list[str] = []
if _FORBIDDEN_TOOL_REFERENCE in body_text:
if _FORBIDDEN_TOOL_REFERENCE_RE.search(body_text):
findings.append(
"`tool_reference` content block in request body — "
"PR #12294 / CLI 2.1.69 regression"
@@ -188,6 +198,7 @@ async def _start_fake_anthropic_server(
answered with a valid streaming response. Returns ``(runner, port)``
so the caller can ``await runner.cleanup()`` when finished.
"""
import socket
async def messages_handler(request: web.Request) -> web.StreamResponse:
body = await request.text()
@@ -213,22 +224,28 @@ async def _start_fake_anthropic_server(
await response.write_eof()
return response
async def fallback_handler(_request: web.Request) -> web.Response:
# OAuth/profile endpoints the CLI may probe — answer 404 so it
# falls through quickly without retrying.
return web.Response(status=404)
app = web.Application()
app.router.add_post("/v1/messages", messages_handler)
# OAuth/profile endpoints the CLI may probe — answer 404 so it falls
# through quickly without retrying.
app.router.add_route("*", "/{tail:.*}", lambda _r: web.Response(status=404))
app.router.add_route("*", "/{tail:.*}", fallback_handler)
# Bind an ephemeral port ourselves so we can read it back via the
# public ``getsockname`` API rather than reaching into ``site._server``
# private aiohttp internals.
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.bind(("127.0.0.1", 0))
port: int = sock.getsockname()[1]
runner = web.AppRunner(app)
await runner.setup()
site = web.TCPSite(runner, "127.0.0.1", 0)
site = web.SockSite(runner, sock)
await site.start()
server = site._server
assert server is not None
sockets = getattr(server, "sockets", None)
assert sockets is not None
port: int = sockets[0].getsockname()[1]
return runner, port
@@ -335,6 +352,14 @@ async def _run_cli_against_fake_server(
proc.kill()
except ProcessLookupError:
pass
# Reap the process to avoid leaving a zombie + open pipe FDs.
# Without this the asyncio transport keeps the stdout/stderr
# pipes alive until the loop exits, and in CI loops where this
# test runs many times the file-descriptor count creeps up.
try:
await asyncio.wait_for(proc.wait(), timeout=5.0)
except (asyncio.TimeoutError, TimeoutError):
pass
stdout_bytes, stderr_bytes = b"", b""
return (
@@ -534,13 +559,14 @@ class TestResolveCliPath:
def test_returns_none_when_env_var_points_to_missing_file(self, monkeypatch):
monkeypatch.delenv("CHAT_CLAUDE_AGENT_CLI_PATH", raising=False)
monkeypatch.setenv("CLAUDE_AGENT_CLI_PATH", "/nonexistent/path/to/claude")
# Should fall through to the bundled binary OR return None,
# but never raise.
# When the override is set but the file is missing, the resolver
# returns ``None`` outright — it does NOT silently fall through to
# the bundled binary, because doing so would defeat the purpose of
# the override (the operator explicitly asked for a specific path).
# The strict ``is None`` assertion catches any future regression
# that swaps this fail-loud behaviour for a silent fallback.
resolved = _resolve_cli_path()
# We can't assert exact value (depends on whether the bundled
# CLI is installed in the test env) but the function must not
# raise — the caller is supposed to handle None gracefully.
assert resolved is None or resolved.is_file()
assert resolved is None
def test_falls_back_to_bundled_when_env_var_unset(self, monkeypatch):
monkeypatch.delenv("CLAUDE_AGENT_CLI_PATH", raising=False)