mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-30 03:00:41 -04:00
feat(backend/copilot): add web_search tool via Anthropic web_search beta
New `web_search` copilot tool wraps Anthropic's server-side `web_search_20250305` so both SDK and baseline paths have a single unified search interface. Previously baseline (Kimi on OpenRouter) had no native search and had to go through the Perplexity block via `run_block`; SDK (Sonnet) used Claude Code's native WebSearch.
* `copilot/tools/web_search.py` — `WebSearchTool` dispatches through `AsyncAnthropic.messages.create` with a cheap Haiku model + `web_search_20250305` tool, parses `web_search_tool_result` blocks into {title, url, snippet, page_age}. `is_available` hides the tool when no Anthropic API key is configured.
* `sdk/tool_adapter.py` — moved `WebSearch` from SDK built-in-always list to `SDK_DISALLOWED_TOOLS` so SDK routes through `mcp__copilot__web_search` too. Single code path for cost tracking.
* `persist_and_record_usage(provider="anthropic")` — billing lands in the same turn-accounting bucket as LLM cost, so rate limits and credit charges stay coherent. Cost = per-search fee ($10/1K) + Haiku dispatch tokens.
* `copilot/tools/models.py` — new `WebSearchResponse` / `WebSearchResult` models matching the native WebSearch shape.
12 new tests: result extractor (title/url/snippet/page_age, limit cap, non-search blocks ignored), cost estimator (per-search fee linear in count), integration (cost tracker called with provider='anthropic'), no-API-key short-circuit, registry sanity.
This commit is contained in:
@@ -779,7 +779,9 @@ def create_copilot_mcp_server(*, use_e2b: bool = False):
|
||||
# In E2B mode, all five are disabled — MCP equivalents provide direct sandbox
|
||||
# access. read_file also handles local tool-results and ephemeral reads.
|
||||
_SDK_BUILTIN_FILE_TOOLS = ["Read", "Write", "Edit", "Glob", "Grep"]
|
||||
_SDK_BUILTIN_ALWAYS = ["Task", "Agent", "WebSearch", "TodoWrite"]
|
||||
# WebSearch moved to ``SDK_DISALLOWED_TOOLS`` — routed through
|
||||
# ``mcp__copilot__web_search`` so cost tracking is unified across paths.
|
||||
_SDK_BUILTIN_ALWAYS = ["Task", "Agent", "TodoWrite"]
|
||||
_SDK_BUILTIN_TOOLS = [*_SDK_BUILTIN_FILE_TOOLS, *_SDK_BUILTIN_ALWAYS]
|
||||
|
||||
# SDK built-in tools that must be explicitly blocked.
|
||||
@@ -805,6 +807,7 @@ _SDK_BUILTIN_TOOLS = [*_SDK_BUILTIN_FILE_TOOLS, *_SDK_BUILTIN_ALWAYS]
|
||||
SDK_DISALLOWED_TOOLS = [
|
||||
"Bash",
|
||||
"WebFetch",
|
||||
"WebSearch",
|
||||
"AskUserQuestion",
|
||||
"Write",
|
||||
"Edit",
|
||||
|
||||
@@ -45,6 +45,7 @@ from .run_sub_session import RunSubSessionTool
|
||||
from .search_docs import SearchDocsTool
|
||||
from .validate_agent import ValidateAgentGraphTool
|
||||
from .web_fetch import WebFetchTool
|
||||
from .web_search import WebSearchTool
|
||||
from .workspace_files import (
|
||||
DeleteWorkspaceFileTool,
|
||||
ListWorkspaceFilesTool,
|
||||
@@ -93,6 +94,7 @@ TOOL_REGISTRY: dict[str, BaseTool] = {
|
||||
"get_agent_building_guide": GetAgentBuildingGuideTool(),
|
||||
# Web fetch for safe URL retrieval
|
||||
"web_fetch": WebFetchTool(),
|
||||
"web_search": WebSearchTool(),
|
||||
# Agent-browser multi-step automation (navigate, act, screenshot)
|
||||
"browser_navigate": BrowserNavigateTool(),
|
||||
"browser_act": BrowserActTool(),
|
||||
|
||||
@@ -76,6 +76,7 @@ class ResponseType(str, Enum):
|
||||
|
||||
# Web
|
||||
WEB_FETCH = "web_fetch"
|
||||
WEB_SEARCH = "web_search"
|
||||
|
||||
# Feature requests
|
||||
FEATURE_REQUEST_SEARCH = "feature_request_search"
|
||||
@@ -585,6 +586,30 @@ class WebFetchResponse(ToolResponseBase):
|
||||
truncated: bool = False
|
||||
|
||||
|
||||
class WebSearchResult(BaseModel):
|
||||
"""One entry in a web_search tool response."""
|
||||
|
||||
title: str
|
||||
url: str
|
||||
snippet: str = ""
|
||||
page_age: str | None = None
|
||||
|
||||
|
||||
class WebSearchResponse(ToolResponseBase):
|
||||
"""Response for web_search tool — mirrors the shape of the SDK's
|
||||
native ``WebSearch`` tool so the LLM sees a consistent interface
|
||||
regardless of which path dispatched the call."""
|
||||
|
||||
type: ResponseType = ResponseType.WEB_SEARCH
|
||||
query: str
|
||||
results: list[WebSearchResult] = Field(default_factory=list)
|
||||
# Backend-reported usage for this call (copied from Anthropic's
|
||||
# ``usage.server_tool_use``). Surfaces as metadata for frontend
|
||||
# debug panels but is also what drives rate-limit / cost tracking
|
||||
# via ``persist_and_record_usage(provider="anthropic")``.
|
||||
search_requests: int = 0
|
||||
|
||||
|
||||
class BashExecResponse(ToolResponseBase):
|
||||
"""Response for bash_exec tool."""
|
||||
|
||||
|
||||
221
autogpt_platform/backend/backend/copilot/tools/web_search.py
Normal file
221
autogpt_platform/backend/backend/copilot/tools/web_search.py
Normal file
@@ -0,0 +1,221 @@
|
||||
"""Web search tool — wraps Anthropic's server-side ``web_search`` beta.
|
||||
|
||||
Single entry point for web search on both SDK and baseline paths. The
|
||||
``web_search_20250305`` tool is server-side on Anthropic, so we call
|
||||
the Messages API directly regardless of which LLM invoked the copilot
|
||||
tool — OpenRouter can't proxy server-side tool execution.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from anthropic import AsyncAnthropic
|
||||
|
||||
from backend.copilot.model import ChatSession
|
||||
from backend.copilot.token_tracking import persist_and_record_usage
|
||||
from backend.util.settings import Settings
|
||||
|
||||
from .base import BaseTool
|
||||
from .models import ErrorResponse, ToolResponseBase, WebSearchResponse, WebSearchResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_WEB_SEARCH_DISPATCH_MODEL = "claude-haiku-4-5"
|
||||
_MAX_DISPATCH_TOKENS = 512
|
||||
_DEFAULT_MAX_RESULTS = 5
|
||||
_HARD_MAX_RESULTS = 20
|
||||
|
||||
|
||||
class WebSearchTool(BaseTool):
|
||||
"""Search the public web and return cited results."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "web_search"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return (
|
||||
"Search the web and return cited results. Use this for live "
|
||||
"information — news, current events, up-to-date docs, recent "
|
||||
"releases — when the model's training data would be stale. "
|
||||
"Returns a list of {title, url, snippet} plus the URLs so "
|
||||
"``web_fetch`` can deep-dive any result. Costs a few cents "
|
||||
"per search; prefer one well-targeted query over many "
|
||||
"reformulations."
|
||||
)
|
||||
|
||||
@property
|
||||
def parameters(self) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The search query — a question or topic.",
|
||||
},
|
||||
"max_results": {
|
||||
"type": "integer",
|
||||
"description": (
|
||||
f"Maximum results to return (default "
|
||||
f"{_DEFAULT_MAX_RESULTS}, hard cap {_HARD_MAX_RESULTS})."
|
||||
),
|
||||
"default": _DEFAULT_MAX_RESULTS,
|
||||
},
|
||||
},
|
||||
"required": ["query"],
|
||||
}
|
||||
|
||||
@property
|
||||
def requires_auth(self) -> bool:
|
||||
return False
|
||||
|
||||
@property
|
||||
def is_available(self) -> bool:
|
||||
return bool(Settings().secrets.anthropic_api_key)
|
||||
|
||||
async def _execute(
|
||||
self,
|
||||
user_id: str | None,
|
||||
session: ChatSession,
|
||||
query: str = "",
|
||||
max_results: int = _DEFAULT_MAX_RESULTS,
|
||||
**kwargs: Any,
|
||||
) -> ToolResponseBase:
|
||||
query = (query or "").strip()
|
||||
session_id = session.session_id if session else None
|
||||
if not query:
|
||||
return ErrorResponse(
|
||||
message="Please provide a non-empty search query.",
|
||||
error="missing_query",
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
try:
|
||||
max_results = int(max_results)
|
||||
except (TypeError, ValueError):
|
||||
max_results = _DEFAULT_MAX_RESULTS
|
||||
max_results = max(1, min(max_results, _HARD_MAX_RESULTS))
|
||||
|
||||
api_key = Settings().secrets.anthropic_api_key
|
||||
if not api_key:
|
||||
return ErrorResponse(
|
||||
message=(
|
||||
"Web search is unavailable — the deployment has no "
|
||||
"Anthropic API key configured."
|
||||
),
|
||||
error="web_search_not_configured",
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
client = AsyncAnthropic(api_key=api_key)
|
||||
try:
|
||||
resp = await client.messages.create(
|
||||
model=_WEB_SEARCH_DISPATCH_MODEL,
|
||||
max_tokens=_MAX_DISPATCH_TOKENS,
|
||||
tools=[
|
||||
{
|
||||
"type": "web_search_20250305",
|
||||
"name": "web_search",
|
||||
"max_uses": 1,
|
||||
}
|
||||
],
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
f"Use the web_search tool exactly once with the "
|
||||
f"query {query!r} and then stop. Do not "
|
||||
f"summarise — the caller parses the raw "
|
||||
f"tool_result."
|
||||
),
|
||||
}
|
||||
],
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"[web_search] Anthropic call failed for query=%r: %s", query, exc
|
||||
)
|
||||
return ErrorResponse(
|
||||
message=f"Web search failed: {exc}",
|
||||
error="web_search_failed",
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
results, search_requests = _extract_results(resp, limit=max_results)
|
||||
|
||||
cost_usd = _estimate_cost_usd(resp, search_requests=search_requests)
|
||||
try:
|
||||
usage = getattr(resp, "usage", None)
|
||||
await persist_and_record_usage(
|
||||
session=session,
|
||||
user_id=user_id,
|
||||
prompt_tokens=getattr(usage, "input_tokens", 0) or 0,
|
||||
completion_tokens=getattr(usage, "output_tokens", 0) or 0,
|
||||
log_prefix="[web_search]",
|
||||
cost_usd=cost_usd,
|
||||
model=_WEB_SEARCH_DISPATCH_MODEL,
|
||||
provider="anthropic",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("[web_search] usage tracking failed: %s", exc)
|
||||
|
||||
return WebSearchResponse(
|
||||
message=f"Found {len(results)} result(s) for {query!r}.",
|
||||
query=query,
|
||||
results=results,
|
||||
search_requests=search_requests,
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
|
||||
def _extract_results(resp: Any, *, limit: int) -> tuple[list[WebSearchResult], int]:
|
||||
"""Pull results + server-side request count from an Anthropic response."""
|
||||
results: list[WebSearchResult] = []
|
||||
search_requests = 0
|
||||
|
||||
for block in getattr(resp, "content", []) or []:
|
||||
btype = getattr(block, "type", None)
|
||||
if btype == "web_search_tool_result":
|
||||
content = getattr(block, "content", []) or []
|
||||
for item in content:
|
||||
if getattr(item, "type", None) != "web_search_result":
|
||||
continue
|
||||
if len(results) >= limit:
|
||||
break
|
||||
results.append(
|
||||
WebSearchResult(
|
||||
title=getattr(item, "title", "") or "",
|
||||
url=getattr(item, "url", "") or "",
|
||||
snippet=getattr(item, "encrypted_content", None)
|
||||
or getattr(item, "page_content", "")
|
||||
or "",
|
||||
page_age=getattr(item, "page_age", None),
|
||||
)
|
||||
)
|
||||
|
||||
usage = getattr(resp, "usage", None)
|
||||
server_tool_use = getattr(usage, "server_tool_use", None) if usage else None
|
||||
if server_tool_use is not None:
|
||||
search_requests = getattr(server_tool_use, "web_search_requests", 0) or 0
|
||||
|
||||
return results, search_requests
|
||||
|
||||
|
||||
# Update when Anthropic revises pricing.
|
||||
_COST_PER_SEARCH_USD = 0.010 # $10 per 1,000 web_search requests
|
||||
_HAIKU_INPUT_USD_PER_MTOK = 1.0
|
||||
_HAIKU_OUTPUT_USD_PER_MTOK = 5.0
|
||||
|
||||
|
||||
def _estimate_cost_usd(resp: Any, *, search_requests: int) -> float:
|
||||
"""Per-search fee × count + Haiku dispatch tokens."""
|
||||
usage = getattr(resp, "usage", None)
|
||||
input_tokens = getattr(usage, "input_tokens", 0) if usage else 0
|
||||
output_tokens = getattr(usage, "output_tokens", 0) if usage else 0
|
||||
|
||||
search_cost = search_requests * _COST_PER_SEARCH_USD
|
||||
inference_cost = (input_tokens / 1_000_000) * _HAIKU_INPUT_USD_PER_MTOK + (
|
||||
output_tokens / 1_000_000
|
||||
) * _HAIKU_OUTPUT_USD_PER_MTOK
|
||||
return round(search_cost + inference_cost, 6)
|
||||
@@ -0,0 +1,304 @@
|
||||
"""Tests for the ``web_search`` copilot tool.
|
||||
|
||||
Covers the result extractor + cost estimator as pure units (fed with
|
||||
synthetic Anthropic response objects), plus light integration tests that
|
||||
mock ``AsyncAnthropic.messages.create`` and confirm the handler plumbs
|
||||
through to ``persist_and_record_usage`` with the right provider tag.
|
||||
"""
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from backend.copilot.model import ChatSession
|
||||
|
||||
from .models import ErrorResponse, WebSearchResponse, WebSearchResult
|
||||
from .web_search import (
|
||||
_COST_PER_SEARCH_USD,
|
||||
WebSearchTool,
|
||||
_estimate_cost_usd,
|
||||
_extract_results,
|
||||
)
|
||||
|
||||
|
||||
def _fake_anthropic_response(
|
||||
*,
|
||||
results: list[dict] | None = None,
|
||||
search_requests: int = 1,
|
||||
input_tokens: int = 120,
|
||||
output_tokens: int = 40,
|
||||
) -> SimpleNamespace:
|
||||
"""Build a synthetic Anthropic Messages response.
|
||||
|
||||
Matches the shape produced by ``client.messages.create`` when the
|
||||
response includes a ``web_search_tool_result`` content block and
|
||||
``usage.server_tool_use.web_search_requests`` on the turn meter.
|
||||
"""
|
||||
content = []
|
||||
if results is not None:
|
||||
content.append(
|
||||
SimpleNamespace(
|
||||
type="web_search_tool_result",
|
||||
content=[
|
||||
SimpleNamespace(
|
||||
type="web_search_result",
|
||||
title=r.get("title", "untitled"),
|
||||
url=r.get("url", ""),
|
||||
encrypted_content=r.get("snippet", ""),
|
||||
page_age=r.get("page_age"),
|
||||
)
|
||||
for r in results
|
||||
],
|
||||
)
|
||||
)
|
||||
usage = SimpleNamespace(
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens,
|
||||
server_tool_use=SimpleNamespace(web_search_requests=search_requests),
|
||||
)
|
||||
return SimpleNamespace(content=content, usage=usage)
|
||||
|
||||
|
||||
class TestExtractResults:
|
||||
"""The extractor is the only Anthropic-response-shape contact point;
|
||||
pin its behaviour so an API shape change surfaces here first."""
|
||||
|
||||
def test_extracts_title_url_snippet_and_page_age(self):
|
||||
resp = _fake_anthropic_response(
|
||||
results=[
|
||||
{
|
||||
"title": "Kimi K2.6 launch",
|
||||
"url": "https://example.com/kimi",
|
||||
"snippet": "Moonshot released K2.6 on 2026-04-20.",
|
||||
"page_age": "1 day",
|
||||
},
|
||||
{
|
||||
"title": "OpenRouter pricing",
|
||||
"url": "https://openrouter.ai/moonshotai/kimi-k2.6",
|
||||
"snippet": "",
|
||||
},
|
||||
]
|
||||
)
|
||||
out, requests = _extract_results(resp, limit=10)
|
||||
assert requests == 1
|
||||
assert len(out) == 2
|
||||
assert out[0].title == "Kimi K2.6 launch"
|
||||
assert out[0].url == "https://example.com/kimi"
|
||||
assert out[0].snippet.startswith("Moonshot released")
|
||||
assert out[0].page_age == "1 day"
|
||||
assert out[1].snippet == ""
|
||||
|
||||
def test_limit_caps_returned_results(self):
|
||||
resp = _fake_anthropic_response(
|
||||
results=[{"title": f"r{i}", "url": f"https://e/{i}"} for i in range(10)]
|
||||
)
|
||||
out, _ = _extract_results(resp, limit=3)
|
||||
assert len(out) == 3
|
||||
assert [r.title for r in out] == ["r0", "r1", "r2"]
|
||||
|
||||
def test_missing_content_returns_empty(self):
|
||||
resp = SimpleNamespace(content=[], usage=None)
|
||||
out, requests = _extract_results(resp, limit=10)
|
||||
assert out == []
|
||||
assert requests == 0
|
||||
|
||||
def test_non_search_blocks_are_ignored(self):
|
||||
resp = SimpleNamespace(
|
||||
content=[
|
||||
SimpleNamespace(type="text", text="Here's what I found..."),
|
||||
SimpleNamespace(
|
||||
type="web_search_tool_result",
|
||||
content=[
|
||||
SimpleNamespace(
|
||||
type="web_search_result",
|
||||
title="real",
|
||||
url="https://real.example",
|
||||
encrypted_content="body",
|
||||
page_age=None,
|
||||
)
|
||||
],
|
||||
),
|
||||
],
|
||||
usage=None,
|
||||
)
|
||||
out, _ = _extract_results(resp, limit=10)
|
||||
assert len(out) == 1 and out[0].title == "real"
|
||||
|
||||
|
||||
class TestEstimateCostUsd:
|
||||
"""Pin the per-search fee + Haiku inference math — the pricing
|
||||
constants in ``web_search.py`` are hard-coded (no live lookup) so a
|
||||
drift between Anthropic's schedule and our constants must surface
|
||||
in this test for the next reader to notice."""
|
||||
|
||||
def test_zero_searches_still_charges_inference(self):
|
||||
resp = _fake_anthropic_response(results=[], search_requests=0)
|
||||
cost = _estimate_cost_usd(resp, search_requests=0)
|
||||
# Haiku at 1000 input / 5000 output tokens = tiny but non-zero.
|
||||
assert 0 < cost < 0.001
|
||||
|
||||
def test_single_search_fee_dominates(self):
|
||||
resp = _fake_anthropic_response(
|
||||
results=[{"title": "x", "url": "https://e"}],
|
||||
search_requests=1,
|
||||
input_tokens=100,
|
||||
output_tokens=20,
|
||||
)
|
||||
cost = _estimate_cost_usd(resp, search_requests=1)
|
||||
# ~$0.010 search + trivial inference — total still ~1 cent.
|
||||
assert cost >= _COST_PER_SEARCH_USD
|
||||
assert cost < _COST_PER_SEARCH_USD + 0.001
|
||||
|
||||
def test_three_searches_linear_in_count(self):
|
||||
resp = _fake_anthropic_response(
|
||||
results=[], search_requests=3, input_tokens=0, output_tokens=0
|
||||
)
|
||||
cost = _estimate_cost_usd(resp, search_requests=3)
|
||||
assert cost == pytest.approx(3 * _COST_PER_SEARCH_USD)
|
||||
|
||||
|
||||
class TestWebSearchToolDispatch:
|
||||
"""Lightweight integration test: mock the Anthropic client, confirm
|
||||
the handler returns a ``WebSearchResponse`` and the usage tracker is
|
||||
called with ``provider='anthropic'`` (not 'open_router', even on the
|
||||
baseline path — server-side web_search bills Anthropic regardless of
|
||||
the calling LLM's route)."""
|
||||
|
||||
def _session(self) -> ChatSession:
|
||||
s = ChatSession.new("test-user", dry_run=False)
|
||||
s.session_id = "sess-1"
|
||||
return s
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_returns_response_with_results_and_tracks_cost(self, monkeypatch):
|
||||
fake_resp = _fake_anthropic_response(
|
||||
results=[
|
||||
{
|
||||
"title": "hello",
|
||||
"url": "https://example.com",
|
||||
"snippet": "greeting",
|
||||
}
|
||||
],
|
||||
search_requests=1,
|
||||
)
|
||||
mock_client = type(
|
||||
"MC",
|
||||
(),
|
||||
{
|
||||
"messages": type(
|
||||
"M", (), {"create": AsyncMock(return_value=fake_resp)}
|
||||
)()
|
||||
},
|
||||
)()
|
||||
|
||||
# Stub the Anthropic API key so ``is_available`` is True.
|
||||
monkeypatch.setattr(
|
||||
"backend.copilot.tools.web_search.Settings",
|
||||
lambda: SimpleNamespace(
|
||||
secrets=SimpleNamespace(anthropic_api_key="sk-test")
|
||||
),
|
||||
)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"backend.copilot.tools.web_search.AsyncAnthropic",
|
||||
return_value=mock_client,
|
||||
),
|
||||
patch(
|
||||
"backend.copilot.tools.web_search.persist_and_record_usage",
|
||||
new=AsyncMock(return_value=160),
|
||||
) as mock_track,
|
||||
):
|
||||
tool = WebSearchTool()
|
||||
result = await tool._execute(
|
||||
user_id="u1",
|
||||
session=self._session(),
|
||||
query="kimi k2.6 launch",
|
||||
max_results=5,
|
||||
)
|
||||
|
||||
assert isinstance(result, WebSearchResponse)
|
||||
assert result.query == "kimi k2.6 launch"
|
||||
assert len(result.results) == 1
|
||||
assert isinstance(result.results[0], WebSearchResult)
|
||||
assert result.search_requests == 1
|
||||
|
||||
# Cost tracker must have been called with provider="anthropic".
|
||||
assert mock_track.await_count == 1
|
||||
kwargs = mock_track.await_args.kwargs
|
||||
assert kwargs["provider"] == "anthropic"
|
||||
assert kwargs["model"] == "claude-haiku-4-5"
|
||||
assert kwargs["user_id"] == "u1"
|
||||
assert kwargs["cost_usd"] >= _COST_PER_SEARCH_USD
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_missing_api_key_returns_error_without_calling_anthropic(
|
||||
self, monkeypatch
|
||||
):
|
||||
monkeypatch.setattr(
|
||||
"backend.copilot.tools.web_search.Settings",
|
||||
lambda: SimpleNamespace(secrets=SimpleNamespace(anthropic_api_key="")),
|
||||
)
|
||||
anthropic_stub = AsyncMock()
|
||||
with (
|
||||
patch(
|
||||
"backend.copilot.tools.web_search.AsyncAnthropic",
|
||||
return_value=anthropic_stub,
|
||||
),
|
||||
patch(
|
||||
"backend.copilot.tools.web_search.persist_and_record_usage",
|
||||
new=AsyncMock(),
|
||||
) as mock_track,
|
||||
):
|
||||
tool = WebSearchTool()
|
||||
assert tool.is_available is False
|
||||
result = await tool._execute(
|
||||
user_id="u1",
|
||||
session=self._session(),
|
||||
query="anything",
|
||||
)
|
||||
assert isinstance(result, ErrorResponse)
|
||||
assert result.error == "web_search_not_configured"
|
||||
anthropic_stub.messages.create.assert_not_called()
|
||||
mock_track.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_query_rejected_without_api_call(self, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"backend.copilot.tools.web_search.Settings",
|
||||
lambda: SimpleNamespace(
|
||||
secrets=SimpleNamespace(anthropic_api_key="sk-test")
|
||||
),
|
||||
)
|
||||
anthropic_stub = AsyncMock()
|
||||
with patch(
|
||||
"backend.copilot.tools.web_search.AsyncAnthropic",
|
||||
return_value=anthropic_stub,
|
||||
):
|
||||
tool = WebSearchTool()
|
||||
result = await tool._execute(
|
||||
user_id="u1", session=self._session(), query=" "
|
||||
)
|
||||
assert isinstance(result, ErrorResponse)
|
||||
assert result.error == "missing_query"
|
||||
anthropic_stub.messages.create.assert_not_called()
|
||||
|
||||
|
||||
class TestToolRegistryIntegration:
|
||||
"""The tool must be registered under the ``web_search`` name so the
|
||||
MCP layer exposes it as ``mcp__copilot__web_search`` — which is
|
||||
what the SDK path now dispatches to (see
|
||||
``sdk/tool_adapter.py::SDK_DISALLOWED_TOOLS`` which blocks the CLI's
|
||||
native ``WebSearch`` in favour of the MCP route)."""
|
||||
|
||||
def test_web_search_is_in_tool_registry(self):
|
||||
from backend.copilot.tools import TOOL_REGISTRY
|
||||
|
||||
assert "web_search" in TOOL_REGISTRY
|
||||
assert isinstance(TOOL_REGISTRY["web_search"], WebSearchTool)
|
||||
|
||||
def test_sdk_native_websearch_is_disallowed(self):
|
||||
from backend.copilot.sdk.tool_adapter import SDK_DISALLOWED_TOOLS
|
||||
|
||||
assert "WebSearch" in SDK_DISALLOWED_TOOLS
|
||||
Reference in New Issue
Block a user