diff --git a/src/fetch/src/mcp_server_fetch/server.py b/src/fetch/src/mcp_server_fetch/server.py index 6cec81e9..8caf0da0 100644 --- a/src/fetch/src/mcp_server_fetch/server.py +++ b/src/fetch/src/mcp_server_fetch/server.py @@ -23,11 +23,11 @@ DEFAULT_USER_AGENT_AUTONOMOUS = "ModelContextProtocol/1.0 (Autonomous; +https:// DEFAULT_USER_AGENT_MANUAL = "ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotocol/servers)" -def extract_content(html: str) -> str: +def extract_content_from_html(html: str) -> str: ret = readabilipy.simple_json.simple_json_from_html_string( html, use_readability=True ) - if not ret["plain_content"]: + if not ret["content"]: return "Page failed to be simplified from HTML" content = markdownify.markdownify( ret["content"], @@ -105,13 +105,18 @@ async def fetch_url(url: str, user_agent: str) -> str: f"Failed to fetch {url} - status code {response.status_code}", ) - page_html = response.text + page_raw = response.text - return extract_content(page_html) + content_type = response.headers.get("content-type", "") + if "