From 467330de6e9627eff3d14066f7ad9b2ae4fe8b59 Mon Sep 17 00:00:00 2001 From: Jack Adamson Date: Thu, 28 Nov 2024 18:44:33 +0000 Subject: [PATCH] update fetch server to use readability JS if node is installed --- src/fetch/README.md | 2 ++ src/fetch/pyproject.toml | 2 +- src/fetch/src/mcp_server_fetch/server.py | 6 ++++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/fetch/README.md b/src/fetch/README.md index ffdd01b0..f31a2435 100644 --- a/src/fetch/README.md +++ b/src/fetch/README.md @@ -16,6 +16,8 @@ Presently the server only supports fetching HTML content. ## Installation +Optionally: Install node.js, this will cause the fetch serve to use a different HTML simplifier that is more robust. + ### Using uv (recommended) When using [`uv`](https://docs.astral.sh/uv/) no specific installation is needed. We will diff --git a/src/fetch/pyproject.toml b/src/fetch/pyproject.toml index 25eac8d8..d9015e69 100644 --- a/src/fetch/pyproject.toml +++ b/src/fetch/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "mcp-server-fetch" -version = "0.1.2" +version = "0.1.3" description = "A Model Context Protocol server providing tools to fetch and convert web content for usage by LLMs" readme = "README.md" requires-python = ">=3.10" diff --git a/src/fetch/src/mcp_server_fetch/server.py b/src/fetch/src/mcp_server_fetch/server.py index 04ecad3c..6cec81e9 100644 --- a/src/fetch/src/mcp_server_fetch/server.py +++ b/src/fetch/src/mcp_server_fetch/server.py @@ -24,11 +24,13 @@ DEFAULT_USER_AGENT_MANUAL = "ModelContextProtocol/1.0 (User-Specified; +https:// def extract_content(html: str) -> str: - ret = readabilipy.simple_json.simple_json_from_html_string(html) + ret = readabilipy.simple_json.simple_json_from_html_string( + html, use_readability=True + ) if not ret["plain_content"]: return "Page failed to be simplified from HTML" content = markdownify.markdownify( - ret["plain_content"], + ret["content"], heading_style=markdownify.ATX, ) return content