diff --git a/autogpt_platform/backend/backend/copilot/sdk/cli_openrouter_compat_test.py b/autogpt_platform/backend/backend/copilot/sdk/cli_openrouter_compat_test.py index 56b8bc2dd6..045eee23f8 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/cli_openrouter_compat_test.py +++ b/autogpt_platform/backend/backend/copilot/sdk/cli_openrouter_compat_test.py @@ -57,6 +57,7 @@ import asyncio import json import logging import os +import re import subprocess from pathlib import Path from typing import Any @@ -72,14 +73,42 @@ logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Substring of the `tool_reference` content block that breaks OpenRouter's -# stricter Zod validation in tool_result.content. PR #12294 root-cause. -_FORBIDDEN_TOOL_REFERENCE = '"type": "tool_reference"' - # Beta string OpenRouter rejects in upstream issue #789. Can appear in # either `betas` arrays or the `anthropic-beta` header value. _FORBIDDEN_CONTEXT_MANAGEMENT_BETA = "context-management-2025-06-27" +def _body_contains_tool_reference_block(body_text: str) -> bool: + """Return True if *body_text* contains a ``tool_reference`` content + block anywhere in its structure. + + We parse the JSON and walk it rather than relying on substring + matches because the CLI is free to emit either ``{"type": "tool_reference"}`` + (with spaces) or the compact ``{"type":"tool_reference"}`` form, + and we must catch both. Falls back to a whitespace-tolerant + regex when the body isn't valid JSON — the Messages API always + sends JSON, but the fallback keeps the detector honest on + malformed / partial bodies a fuzzer might produce. + """ + try: + payload = json.loads(body_text) + except (ValueError, TypeError): + # Whitespace-tolerant fallback: allow any whitespace between + # the key, colon, and value quoted string. + return bool(re.search(r'"type"\s*:\s*"tool_reference"', body_text)) + + def _walk(node: Any) -> bool: + if isinstance(node, dict): + if node.get("type") == "tool_reference": + return True + return any(_walk(v) for v in node.values()) + if isinstance(node, list): + return any(_walk(v) for v in node) + return False + + return _walk(payload) + + def _scan_request_for_forbidden_patterns( body_text: str, headers: dict[str, str], @@ -90,7 +119,7 @@ def _scan_request_for_forbidden_patterns( OpenRouter-incompatible features. """ findings: list[str] = [] - if _FORBIDDEN_TOOL_REFERENCE in body_text: + if _body_contains_tool_reference_block(body_text): findings.append( "`tool_reference` content block in request body — " "PR #12294 / CLI 2.1.69 regression" @@ -335,7 +364,15 @@ async def _run_cli_against_fake_server( proc.kill() except ProcessLookupError: pass - stdout_bytes, stderr_bytes = b"", b"" + # Reap the process after kill() so we don't leave an unreaped + # child behind until event-loop shutdown. Wait with its own + # short timeout in case the kill was ineffective. + try: + stdout_bytes, stderr_bytes = await asyncio.wait_for( + proc.communicate(), timeout=5.0 + ) + except (asyncio.TimeoutError, TimeoutError): + stdout_bytes, stderr_bytes = b"", b"" return ( proc.returncode if proc.returncode is not None else -1, @@ -502,6 +539,27 @@ class TestScanRequestForForbiddenPatterns: assert "tool_reference" in findings[0] assert "context-management-2025-06-27" in findings[1] + def test_detects_compact_tool_reference_without_spaces(self): + # Regression guard: the old substring matcher only caught the + # prettified form '"type": "tool_reference"' with a space + # between the key and the value, so a CLI emitting compact + # JSON (e.g. via `json.dumps(separators=(",", ":"))`) could + # slip past the scanner and false-pass. The JSON-walking + # detector catches both forms. + body = '{"messages":[{"role":"user","content":[{"type":"tool_reference","tool_name":"find"}]}]}' + findings = _scan_request_for_forbidden_patterns(body, {}) + assert len(findings) == 1 + assert "tool_reference" in findings[0] + + def test_detects_tool_reference_in_malformed_body_fallback(self): + # When the body isn't valid JSON the helper falls back to a + # whitespace-tolerant regex so fuzzed / partial payloads are + # still caught. + body = 'garbage-prefix{"type" : "tool_reference"} trailing' + findings = _scan_request_for_forbidden_patterns(body, {}) + assert len(findings) == 1 + assert "tool_reference" in findings[0] + class TestResolveCliPath: def test_honours_explicit_env_var_when_file_exists(self, tmp_path, monkeypatch): diff --git a/autogpt_platform/backend/backend/copilot/sdk/openrouter_compat_proxy_test.py b/autogpt_platform/backend/backend/copilot/sdk/openrouter_compat_proxy_test.py index 09fa60953e..cf1506a687 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/openrouter_compat_proxy_test.py +++ b/autogpt_platform/backend/backend/copilot/sdk/openrouter_compat_proxy_test.py @@ -492,7 +492,16 @@ async def test_proxy_passes_through_clean_request_unchanged(): @pytest.mark.asyncio async def test_proxy_returns_502_on_upstream_failure(): """If the upstream is unreachable the proxy must return a clear - 502, not silently hang.""" + 502, not silently hang. + + Note: the outer ``client.post`` talks to the *proxy* on localhost, + not to the dead upstream directly. The proxy is the thing under + test, so it should always respond with a 502 — we must NOT + swallow ``aiohttp.ClientError`` / ``asyncio.TimeoutError`` on the + outer call, because that would mask a proxy crash and turn the + assertion into a false positive. Let any such exception fail the + test. + """ proxy = OpenRouterCompatProxy( target_base_url="http://127.0.0.1:1", # nothing listening ) @@ -502,14 +511,12 @@ async def test_proxy_returns_502_on_upstream_failure(): async with client.post( f"{proxy.local_url}/v1/messages", json={"model": "x"}, - timeout=aiohttp.ClientTimeout(total=5), + timeout=aiohttp.ClientTimeout(total=10), ) as resp: assert resp.status == 502 - except (aiohttp.ClientError, asyncio.TimeoutError): - # Some platforms refuse the connection so quickly aiohttp - # raises before the proxy can respond — that also satisfies - # the spirit of the test (no infinite hang). - pass + text = await resp.text() + # Generic error message — no internal hostname leaked. + assert "upstream error" in text finally: await proxy.stop()