From 168522f34db65b95d8290b6834e16b4f86b7444c Mon Sep 17 00:00:00 2001 From: YeongJun Date: Sat, 30 Nov 2024 23:10:12 +0900 Subject: [PATCH 1/2] ensure url is string type in robot_parser.can_fetch() --- src/fetch/src/mcp_server_fetch/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fetch/src/mcp_server_fetch/server.py b/src/fetch/src/mcp_server_fetch/server.py index 3d35094b..e172bcf5 100644 --- a/src/fetch/src/mcp_server_fetch/server.py +++ b/src/fetch/src/mcp_server_fetch/server.py @@ -93,7 +93,7 @@ async def check_may_autonomously_fetch_url(url: AnyUrl | str, user_agent: str) - line for line in robot_txt.splitlines() if not line.strip().startswith("#") ) robot_parser = Protego.parse(processed_robot_txt) - if not robot_parser.can_fetch(url, user_agent): + if not robot_parser.can_fetch(str(url), user_agent): raise McpError( INTERNAL_ERROR, f"The sites robots.txt ({robot_txt_url}), specifies that autonomous fetching of this page is not allowed, " From d5bae8759fcd93ee312ab65ce36deec3ed64b85b Mon Sep 17 00:00:00 2001 From: YeongJun Date: Sat, 30 Nov 2024 23:31:30 +0900 Subject: [PATCH 2/2] follows redirects on checking robots.txt --- src/fetch/src/mcp_server_fetch/server.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/fetch/src/mcp_server_fetch/server.py b/src/fetch/src/mcp_server_fetch/server.py index e172bcf5..72b294d0 100644 --- a/src/fetch/src/mcp_server_fetch/server.py +++ b/src/fetch/src/mcp_server_fetch/server.py @@ -74,7 +74,9 @@ async def check_may_autonomously_fetch_url(url: AnyUrl | str, user_agent: str) - async with AsyncClient() as client: try: response = await client.get( - robot_txt_url, headers={"User-Agent": user_agent} + robot_txt_url, + follow_redirects=True, + headers={"User-Agent": user_agent}, ) except HTTPError: raise McpError(