update fetch server to use readability JS if node is installed

This commit is contained in:
Jack Adamson
2024-11-28 18:44:33 +00:00
parent 6d83c78752
commit 467330de6e
3 changed files with 7 additions and 3 deletions

View File

@@ -16,6 +16,8 @@ Presently the server only supports fetching HTML content.
## Installation
Optionally: Install node.js, this will cause the fetch serve to use a different HTML simplifier that is more robust.
### Using uv (recommended)
When using [`uv`](https://docs.astral.sh/uv/) no specific installation is needed. We will

View File

@@ -1,6 +1,6 @@
[project]
name = "mcp-server-fetch"
version = "0.1.2"
version = "0.1.3"
description = "A Model Context Protocol server providing tools to fetch and convert web content for usage by LLMs"
readme = "README.md"
requires-python = ">=3.10"

View File

@@ -24,11 +24,13 @@ DEFAULT_USER_AGENT_MANUAL = "ModelContextProtocol/1.0 (User-Specified; +https://
def extract_content(html: str) -> str:
ret = readabilipy.simple_json.simple_json_from_html_string(html)
ret = readabilipy.simple_json.simple_json_from_html_string(
html, use_readability=True
)
if not ret["plain_content"]:
return "<error>Page failed to be simplified from HTML</error>"
content = markdownify.markdownify(
ret["plain_content"],
ret["content"],
heading_style=markdownify.ATX,
)
return content