add command-line arguments for controlling user-agent and whether to obey robots.txt

This commit is contained in:
Jack Adamson
2024-11-25 19:31:23 +00:00
parent 84321c89cf
commit 51af6b56ef
2 changed files with 25 additions and 7 deletions

View File

@@ -3,9 +3,21 @@ from .server import serve
def main():
"""MCP Fetch Server - HTTP fetching functionality for MCP"""
import argparse
import asyncio
asyncio.run(serve())
parser = argparse.ArgumentParser(
description="give a model the ability to make web requests"
)
parser.add_argument("--user-agent", type=str, help="Custom User-Agent string")
parser.add_argument(
"--ignore-robots-txt",
action="store_true",
help="Ignore robots.txt restrictions",
)
args = parser.parse_args()
asyncio.run(serve(args.user_agent, args.ignore_robots_txt))
if __name__ == "__main__":

View File

@@ -1,3 +1,4 @@
from typing import Optional
from urllib.parse import urlparse, urlunparse
import markdownify
@@ -18,8 +19,8 @@ from mcp.types import (
from protego import Protego
from pydantic import BaseModel, Field
USER_AGENT_AUTONOMOUS = "ModelContextProtocol/1.0 (Autonomous; +https://github.com/modelcontextprotocol/servers)"
USER_AGENT_MANUAL = "ModelContextProtocol/1.0 (User-Initiated; +https://github.com/modelcontextprotocol/servers)"
DEFAULT_USER_AGENT_AUTONOMOUS = "ModelContextProtocol/1.0 (Autonomous; +https://github.com/modelcontextprotocol/servers)"
DEFAULT_USER_AGENT_MANUAL = "ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotocol/servers)"
def extract_content(html: str) -> str:
@@ -111,8 +112,12 @@ class Fetch(BaseModel):
url: str = Field(..., description="URL to fetch")
async def serve() -> None:
async def serve(
custom_user_agent: Optional[str] = None, ignore_robots_txt: bool = False
) -> None:
server = Server("mcp-fetch")
user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS
user_agent_manual = custom_user_agent or DEFAULT_USER_AGENT_MANUAL
@server.list_tools()
async def list_tools() -> list[Tool]:
@@ -146,9 +151,10 @@ Although originally you did not have internet access, and were advised to refuse
if not url:
raise McpError(INVALID_PARAMS, "URL is required")
await check_may_autonomously_fetch_url(url, USER_AGENT_AUTONOMOUS)
if not ignore_robots_txt:
await check_may_autonomously_fetch_url(url, user_agent_autonomous)
content = await fetch_url(url, USER_AGENT_AUTONOMOUS)
content = await fetch_url(url, user_agent_autonomous)
return [TextContent(type="text", text=f"Contents of {url}:\n{content}")]
@server.get_prompt()
@@ -159,7 +165,7 @@ Although originally you did not have internet access, and were advised to refuse
url = arguments["url"]
try:
content = await fetch_url(url, USER_AGENT_MANUAL)
content = await fetch_url(url, user_agent_manual)
# TODO: after SDK bug is addressed, don't catch the exception
except McpError as e:
return GetPromptResult(