mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
fix(blocks): set User-Agent header and URL-encode topic in GetWikipediaSummaryBlock (#11754)
The GetWikipediaSummaryBlock was returning HTTP 403 errors from Wikipedia's API because it wasn't explicitly setting a User-Agent header that complies with https://wikitech.wikimedia.org/wiki/Robot_policy. Additionally, topics with spaces or special characters would cause malformed URLs. Fixes: OPEN-2889 Changes 🏗️ - URL-encode the topic parameter using urllib.parse.quote() to handle spaces and special characters - Explicitly set required headers per Wikimedia robot policy: - User-Agent: Platform default user agent (includes app name, URL, and contact email) - Accept-Encoding: gzip, deflate: Recommended by Wikimedia to reduce bandwidth - Updated test mock to match the new function signature Checklist 📋 For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan: - [x] Verify code passes syntax check - [x] Verify code passes ruff linting - [x] Create an agent using GetWikipediaSummaryBlock with a topic containing spaces (e.g., "Artificial Intelligence") - [x] Verify the block returns a Wikipedia summary without 403 errors For configuration changes: - .env.default is updated or already compatible with my changes - docker-compose.yml is updated or already compatible with my changes - I have included a list of my configuration changes in the PR description (under Changes) . N/A - No configuration changes required. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Bug Fixes** * Improved Wikipedia API requests by adding compatible request headers (including a proper user agent and encoding acceptance) for more reliable responses. * Enhanced handling of search topics by URL-encoding terms so queries with spaces or special characters return correct results. <sub>✏️ Tip: You can customize this high-level summary in your review settings.</sub> <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -18,6 +18,7 @@ from backend.data.model import (
|
||||
SchemaField,
|
||||
)
|
||||
from backend.integrations.providers import ProviderName
|
||||
from backend.util.request import DEFAULT_USER_AGENT
|
||||
|
||||
|
||||
class GetWikipediaSummaryBlock(Block, GetRequest):
|
||||
@@ -39,17 +40,27 @@ class GetWikipediaSummaryBlock(Block, GetRequest):
|
||||
output_schema=GetWikipediaSummaryBlock.Output,
|
||||
test_input={"topic": "Artificial Intelligence"},
|
||||
test_output=("summary", "summary content"),
|
||||
test_mock={"get_request": lambda url, json: {"extract": "summary content"}},
|
||||
test_mock={
|
||||
"get_request": lambda url, headers, json: {"extract": "summary content"}
|
||||
},
|
||||
)
|
||||
|
||||
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
|
||||
topic = input_data.topic
|
||||
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{topic}"
|
||||
# URL-encode the topic to handle spaces and special characters
|
||||
encoded_topic = quote(topic, safe="")
|
||||
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{encoded_topic}"
|
||||
|
||||
# Set headers per Wikimedia robot policy (https://w.wiki/4wJS)
|
||||
# - User-Agent: Required, must identify the bot
|
||||
# - Accept-Encoding: gzip recommended to reduce bandwidth
|
||||
headers = {
|
||||
"User-Agent": DEFAULT_USER_AGENT,
|
||||
"Accept-Encoding": "gzip, deflate",
|
||||
}
|
||||
|
||||
# Note: User-Agent is now automatically set by the request library
|
||||
# to comply with Wikimedia's robot policy (https://w.wiki/4wJS)
|
||||
try:
|
||||
response = await self.get_request(url, json=True)
|
||||
response = await self.get_request(url, headers=headers, json=True)
|
||||
if "extract" not in response:
|
||||
raise ValueError(f"Unable to parse Wikipedia response: {response}")
|
||||
yield "summary", response["extract"]
|
||||
|
||||
Reference in New Issue
Block a user