Compare commits

...

2 Commits

Author SHA1 Message Date
Claude
5012026f21 fix(backend): add missing default credentials and fix provider mismatches
- Add llama_api and v0 credentials to get_all_creds() (were defined but not added)
- Add openweathermap credentials (has setting but was missing from defaults)
- Fix unreal provider name mismatch (unreal -> unreal_speech) to match block usage
2026-01-13 17:27:44 +00:00
Bently
e539280e98 fix(blocks): set User-Agent header and URL-encode topic in GetWikipediaSummaryBlock (#11754)
The GetWikipediaSummaryBlock was returning HTTP 403 errors from
Wikipedia's API because it wasn't explicitly setting a User-Agent header
that complies with https://wikitech.wikimedia.org/wiki/Robot_policy.
Additionally, topics with spaces or special characters would cause
malformed URLs.

Fixes: OPEN-2889

Changes 🏗️

- URL-encode the topic parameter using urllib.parse.quote() to handle
spaces and special characters
- Explicitly set required headers per Wikimedia robot policy:
- User-Agent: Platform default user agent (includes app name, URL, and
contact email)
- Accept-Encoding: gzip, deflate: Recommended by Wikimedia to reduce
bandwidth
- Updated test mock to match the new function signature

Checklist 📋

For code changes:

- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
  - [x] Verify code passes syntax check
  - [x] Verify code passes ruff linting
- [x] Create an agent using GetWikipediaSummaryBlock with a topic
containing spaces (e.g., "Artificial Intelligence")
  - [x] Verify the block returns a Wikipedia summary without 403 errors

For configuration changes:

- .env.default is updated or already compatible with my changes
- docker-compose.yml is updated or already compatible with my changes
- I have included a list of my configuration changes in the PR
description (under Changes)
.
N/A - No configuration changes required.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **Bug Fixes**
* Improved Wikipedia API requests by adding compatible request headers
(including a proper user agent and encoding acceptance) for more
reliable responses.
* Enhanced handling of search topics by URL-encoding terms so queries
with spaces or special characters return correct results.

<sub>✏️ Tip: You can customize this high-level summary in your review
settings.</sub>
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
2026-01-13 12:24:51 +00:00
2 changed files with 33 additions and 7 deletions

View File

@@ -18,6 +18,7 @@ from backend.data.model import (
SchemaField,
)
from backend.integrations.providers import ProviderName
from backend.util.request import DEFAULT_USER_AGENT
class GetWikipediaSummaryBlock(Block, GetRequest):
@@ -39,17 +40,27 @@ class GetWikipediaSummaryBlock(Block, GetRequest):
output_schema=GetWikipediaSummaryBlock.Output,
test_input={"topic": "Artificial Intelligence"},
test_output=("summary", "summary content"),
test_mock={"get_request": lambda url, json: {"extract": "summary content"}},
test_mock={
"get_request": lambda url, headers, json: {"extract": "summary content"}
},
)
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
topic = input_data.topic
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{topic}"
# URL-encode the topic to handle spaces and special characters
encoded_topic = quote(topic, safe="")
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{encoded_topic}"
# Set headers per Wikimedia robot policy (https://w.wiki/4wJS)
# - User-Agent: Required, must identify the bot
# - Accept-Encoding: gzip recommended to reduce bandwidth
headers = {
"User-Agent": DEFAULT_USER_AGENT,
"Accept-Encoding": "gzip, deflate",
}
# Note: User-Agent is now automatically set by the request library
# to comply with Wikimedia's robot policy (https://w.wiki/4wJS)
try:
response = await self.get_request(url, json=True)
response = await self.get_request(url, headers=headers, json=True)
if "extract" not in response:
raise ValueError(f"Unable to parse Wikipedia response: {response}")
yield "summary", response["extract"]

View File

@@ -96,9 +96,9 @@ jina_credentials = APIKeyCredentials(
)
unreal_credentials = APIKeyCredentials(
id="66f20754-1b81-48e4-91d0-f4f0dd82145f",
provider="unreal",
provider="unreal_speech",
api_key=SecretStr(settings.secrets.unreal_speech_api_key),
title="Use Credits for Unreal",
title="Use Credits for Unreal Speech",
expires_at=None,
)
open_router_credentials = APIKeyCredentials(
@@ -216,6 +216,14 @@ webshare_proxy_credentials = UserPasswordCredentials(
title="Use Credits for Webshare Proxy",
)
openweathermap_credentials = APIKeyCredentials(
id="8b3d4e5f-6a7b-8c9d-0e1f-2a3b4c5d6e7f",
provider="openweathermap",
api_key=SecretStr(settings.secrets.openweathermap_api_key),
title="Use Credits for OpenWeatherMap",
expires_at=None,
)
DEFAULT_CREDENTIALS = [
ollama_credentials,
revid_credentials,
@@ -243,6 +251,7 @@ DEFAULT_CREDENTIALS = [
llama_api_credentials,
v0_credentials,
webshare_proxy_credentials,
openweathermap_credentials,
]
@@ -331,11 +340,17 @@ class IntegrationCredentialsStore:
all_credentials.append(zerobounce_credentials)
if settings.secrets.google_maps_api_key:
all_credentials.append(google_maps_credentials)
if settings.secrets.llama_api_key:
all_credentials.append(llama_api_credentials)
if settings.secrets.v0_api_key:
all_credentials.append(v0_credentials)
if (
settings.secrets.webshare_proxy_username
and settings.secrets.webshare_proxy_password
):
all_credentials.append(webshare_proxy_credentials)
if settings.secrets.openweathermap_api_key:
all_credentials.append(openweathermap_credentials)
return all_credentials
async def get_creds_by_id(