From cd9924f03e8d42d2ac151c1dbf6164eeac274c23 Mon Sep 17 00:00:00 2001 From: majdyz Date: Sat, 11 Apr 2026 10:30:31 +0000 Subject: [PATCH] fix(copilot/sdk-proxy): address CodeRabbit follow-ups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three follow-up findings from CodeRabbit's second-pass review: * The forbidden-pattern scanner in ``cli_openrouter_compat_test`` relied on a substring match against the prettified form `"type": "tool_reference"` (with a space). The CLI is free to emit compact JSON like `{"type":"tool_reference"}` which would slip past the scanner and false-pass the reproduction test. Replaced the substring check with a JSON walker that catches any dict with `type == "tool_reference"` regardless of serialisation, with a whitespace-tolerant regex fallback for malformed bodies. Added two regression tests (compact form, malformed fallback). * The timeout path in ``_run_cli_against_fake_server`` called ``proc.kill()`` and returned immediately, leaving an unreaped subprocess until event-loop shutdown. Reap it with a 5-second bounded ``proc.communicate()`` wait after the kill. * ``test_proxy_returns_502_on_upstream_failure`` swallowed ``aiohttp.ClientError`` / ``asyncio.TimeoutError`` on the outer ``client.post``. That outer call talks to the *proxy* on localhost — not the dead upstream — so any exception there indicates a proxy crash and must fail the test, not be caught. Removed the except block and bumped the client timeout to 10s to give the proxy room to return its 502. Also asserts the response body contains the generic "upstream error" text so a regression that replaces the 502 with a different status is caught. --- .../copilot/sdk/cli_openrouter_compat_test.py | 68 +++++++++++++++++-- .../sdk/openrouter_compat_proxy_test.py | 21 ++++-- 2 files changed, 77 insertions(+), 12 deletions(-) diff --git a/autogpt_platform/backend/backend/copilot/sdk/cli_openrouter_compat_test.py b/autogpt_platform/backend/backend/copilot/sdk/cli_openrouter_compat_test.py index 56b8bc2dd6..045eee23f8 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/cli_openrouter_compat_test.py +++ b/autogpt_platform/backend/backend/copilot/sdk/cli_openrouter_compat_test.py @@ -57,6 +57,7 @@ import asyncio import json import logging import os +import re import subprocess from pathlib import Path from typing import Any @@ -72,14 +73,42 @@ logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Substring of the `tool_reference` content block that breaks OpenRouter's -# stricter Zod validation in tool_result.content. PR #12294 root-cause. -_FORBIDDEN_TOOL_REFERENCE = '"type": "tool_reference"' - # Beta string OpenRouter rejects in upstream issue #789. Can appear in # either `betas` arrays or the `anthropic-beta` header value. _FORBIDDEN_CONTEXT_MANAGEMENT_BETA = "context-management-2025-06-27" +def _body_contains_tool_reference_block(body_text: str) -> bool: + """Return True if *body_text* contains a ``tool_reference`` content + block anywhere in its structure. + + We parse the JSON and walk it rather than relying on substring + matches because the CLI is free to emit either ``{"type": "tool_reference"}`` + (with spaces) or the compact ``{"type":"tool_reference"}`` form, + and we must catch both. Falls back to a whitespace-tolerant + regex when the body isn't valid JSON — the Messages API always + sends JSON, but the fallback keeps the detector honest on + malformed / partial bodies a fuzzer might produce. + """ + try: + payload = json.loads(body_text) + except (ValueError, TypeError): + # Whitespace-tolerant fallback: allow any whitespace between + # the key, colon, and value quoted string. + return bool(re.search(r'"type"\s*:\s*"tool_reference"', body_text)) + + def _walk(node: Any) -> bool: + if isinstance(node, dict): + if node.get("type") == "tool_reference": + return True + return any(_walk(v) for v in node.values()) + if isinstance(node, list): + return any(_walk(v) for v in node) + return False + + return _walk(payload) + + def _scan_request_for_forbidden_patterns( body_text: str, headers: dict[str, str], @@ -90,7 +119,7 @@ def _scan_request_for_forbidden_patterns( OpenRouter-incompatible features. """ findings: list[str] = [] - if _FORBIDDEN_TOOL_REFERENCE in body_text: + if _body_contains_tool_reference_block(body_text): findings.append( "`tool_reference` content block in request body — " "PR #12294 / CLI 2.1.69 regression" @@ -335,7 +364,15 @@ async def _run_cli_against_fake_server( proc.kill() except ProcessLookupError: pass - stdout_bytes, stderr_bytes = b"", b"" + # Reap the process after kill() so we don't leave an unreaped + # child behind until event-loop shutdown. Wait with its own + # short timeout in case the kill was ineffective. + try: + stdout_bytes, stderr_bytes = await asyncio.wait_for( + proc.communicate(), timeout=5.0 + ) + except (asyncio.TimeoutError, TimeoutError): + stdout_bytes, stderr_bytes = b"", b"" return ( proc.returncode if proc.returncode is not None else -1, @@ -502,6 +539,27 @@ class TestScanRequestForForbiddenPatterns: assert "tool_reference" in findings[0] assert "context-management-2025-06-27" in findings[1] + def test_detects_compact_tool_reference_without_spaces(self): + # Regression guard: the old substring matcher only caught the + # prettified form '"type": "tool_reference"' with a space + # between the key and the value, so a CLI emitting compact + # JSON (e.g. via `json.dumps(separators=(",", ":"))`) could + # slip past the scanner and false-pass. The JSON-walking + # detector catches both forms. + body = '{"messages":[{"role":"user","content":[{"type":"tool_reference","tool_name":"find"}]}]}' + findings = _scan_request_for_forbidden_patterns(body, {}) + assert len(findings) == 1 + assert "tool_reference" in findings[0] + + def test_detects_tool_reference_in_malformed_body_fallback(self): + # When the body isn't valid JSON the helper falls back to a + # whitespace-tolerant regex so fuzzed / partial payloads are + # still caught. + body = 'garbage-prefix{"type" : "tool_reference"} trailing' + findings = _scan_request_for_forbidden_patterns(body, {}) + assert len(findings) == 1 + assert "tool_reference" in findings[0] + class TestResolveCliPath: def test_honours_explicit_env_var_when_file_exists(self, tmp_path, monkeypatch): diff --git a/autogpt_platform/backend/backend/copilot/sdk/openrouter_compat_proxy_test.py b/autogpt_platform/backend/backend/copilot/sdk/openrouter_compat_proxy_test.py index 09fa60953e..cf1506a687 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/openrouter_compat_proxy_test.py +++ b/autogpt_platform/backend/backend/copilot/sdk/openrouter_compat_proxy_test.py @@ -492,7 +492,16 @@ async def test_proxy_passes_through_clean_request_unchanged(): @pytest.mark.asyncio async def test_proxy_returns_502_on_upstream_failure(): """If the upstream is unreachable the proxy must return a clear - 502, not silently hang.""" + 502, not silently hang. + + Note: the outer ``client.post`` talks to the *proxy* on localhost, + not to the dead upstream directly. The proxy is the thing under + test, so it should always respond with a 502 — we must NOT + swallow ``aiohttp.ClientError`` / ``asyncio.TimeoutError`` on the + outer call, because that would mask a proxy crash and turn the + assertion into a false positive. Let any such exception fail the + test. + """ proxy = OpenRouterCompatProxy( target_base_url="http://127.0.0.1:1", # nothing listening ) @@ -502,14 +511,12 @@ async def test_proxy_returns_502_on_upstream_failure(): async with client.post( f"{proxy.local_url}/v1/messages", json={"model": "x"}, - timeout=aiohttp.ClientTimeout(total=5), + timeout=aiohttp.ClientTimeout(total=10), ) as resp: assert resp.status == 502 - except (aiohttp.ClientError, asyncio.TimeoutError): - # Some platforms refuse the connection so quickly aiohttp - # raises before the proxy can respond — that also satisfies - # the spirit of the test (no infinite hang). - pass + text = await resp.text() + # Generic error message — no internal hostname leaked. + assert "upstream error" in text finally: await proxy.stop()