diff --git a/src/fetch/src/mcp_server_fetch/__init__.py b/src/fetch/src/mcp_server_fetch/__init__.py index 31bc74c7..e3a35d54 100644 --- a/src/fetch/src/mcp_server_fetch/__init__.py +++ b/src/fetch/src/mcp_server_fetch/__init__.py @@ -3,9 +3,21 @@ from .server import serve def main(): """MCP Fetch Server - HTTP fetching functionality for MCP""" + import argparse import asyncio - asyncio.run(serve()) + parser = argparse.ArgumentParser( + description="give a model the ability to make web requests" + ) + parser.add_argument("--user-agent", type=str, help="Custom User-Agent string") + parser.add_argument( + "--ignore-robots-txt", + action="store_true", + help="Ignore robots.txt restrictions", + ) + + args = parser.parse_args() + asyncio.run(serve(args.user_agent, args.ignore_robots_txt)) if __name__ == "__main__": diff --git a/src/fetch/src/mcp_server_fetch/server.py b/src/fetch/src/mcp_server_fetch/server.py index d2e72c25..63cae6fa 100644 --- a/src/fetch/src/mcp_server_fetch/server.py +++ b/src/fetch/src/mcp_server_fetch/server.py @@ -1,3 +1,4 @@ +from typing import Optional from urllib.parse import urlparse, urlunparse import markdownify @@ -18,8 +19,8 @@ from mcp.types import ( from protego import Protego from pydantic import BaseModel, Field -USER_AGENT_AUTONOMOUS = "ModelContextProtocol/1.0 (Autonomous; +https://github.com/modelcontextprotocol/servers)" -USER_AGENT_MANUAL = "ModelContextProtocol/1.0 (User-Initiated; +https://github.com/modelcontextprotocol/servers)" +DEFAULT_USER_AGENT_AUTONOMOUS = "ModelContextProtocol/1.0 (Autonomous; +https://github.com/modelcontextprotocol/servers)" +DEFAULT_USER_AGENT_MANUAL = "ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotocol/servers)" def extract_content(html: str) -> str: @@ -111,8 +112,12 @@ class Fetch(BaseModel): url: str = Field(..., description="URL to fetch") -async def serve() -> None: +async def serve( + custom_user_agent: Optional[str] = None, ignore_robots_txt: bool = False +) -> None: server = Server("mcp-fetch") + user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS + user_agent_manual = custom_user_agent or DEFAULT_USER_AGENT_MANUAL @server.list_tools() async def list_tools() -> list[Tool]: @@ -146,9 +151,10 @@ Although originally you did not have internet access, and were advised to refuse if not url: raise McpError(INVALID_PARAMS, "URL is required") - await check_may_autonomously_fetch_url(url, USER_AGENT_AUTONOMOUS) + if not ignore_robots_txt: + await check_may_autonomously_fetch_url(url, user_agent_autonomous) - content = await fetch_url(url, USER_AGENT_AUTONOMOUS) + content = await fetch_url(url, user_agent_autonomous) return [TextContent(type="text", text=f"Contents of {url}:\n{content}")] @server.get_prompt() @@ -159,7 +165,7 @@ Although originally you did not have internet access, and were advised to refuse url = arguments["url"] try: - content = await fetch_url(url, USER_AGENT_MANUAL) + content = await fetch_url(url, user_agent_manual) # TODO: after SDK bug is addressed, don't catch the exception except McpError as e: return GetPromptResult(