Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ec4c2caa14 | ||
|
|
516e8b4b25 | ||
|
|
e7e118b5a8 | ||
|
|
92a7a7e6d6 | ||
|
|
e16995347f | ||
|
|
234d3acb4c |
@@ -18,7 +18,6 @@ from backend.data.model import (
|
|||||||
SchemaField,
|
SchemaField,
|
||||||
)
|
)
|
||||||
from backend.integrations.providers import ProviderName
|
from backend.integrations.providers import ProviderName
|
||||||
from backend.util.request import DEFAULT_USER_AGENT
|
|
||||||
|
|
||||||
|
|
||||||
class GetWikipediaSummaryBlock(Block, GetRequest):
|
class GetWikipediaSummaryBlock(Block, GetRequest):
|
||||||
@@ -40,27 +39,17 @@ class GetWikipediaSummaryBlock(Block, GetRequest):
|
|||||||
output_schema=GetWikipediaSummaryBlock.Output,
|
output_schema=GetWikipediaSummaryBlock.Output,
|
||||||
test_input={"topic": "Artificial Intelligence"},
|
test_input={"topic": "Artificial Intelligence"},
|
||||||
test_output=("summary", "summary content"),
|
test_output=("summary", "summary content"),
|
||||||
test_mock={
|
test_mock={"get_request": lambda url, json: {"extract": "summary content"}},
|
||||||
"get_request": lambda url, headers, json: {"extract": "summary content"}
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
|
|
||||||
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
|
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
|
||||||
topic = input_data.topic
|
topic = input_data.topic
|
||||||
# URL-encode the topic to handle spaces and special characters
|
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{topic}"
|
||||||
encoded_topic = quote(topic, safe="")
|
|
||||||
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{encoded_topic}"
|
|
||||||
|
|
||||||
# Set headers per Wikimedia robot policy (https://w.wiki/4wJS)
|
|
||||||
# - User-Agent: Required, must identify the bot
|
|
||||||
# - Accept-Encoding: gzip recommended to reduce bandwidth
|
|
||||||
headers = {
|
|
||||||
"User-Agent": DEFAULT_USER_AGENT,
|
|
||||||
"Accept-Encoding": "gzip, deflate",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
# Note: User-Agent is now automatically set by the request library
|
||||||
|
# to comply with Wikimedia's robot policy (https://w.wiki/4wJS)
|
||||||
try:
|
try:
|
||||||
response = await self.get_request(url, headers=headers, json=True)
|
response = await self.get_request(url, json=True)
|
||||||
if "extract" not in response:
|
if "extract" not in response:
|
||||||
raise ValueError(f"Unable to parse Wikipedia response: {response}")
|
raise ValueError(f"Unable to parse Wikipedia response: {response}")
|
||||||
yield "summary", response["extract"]
|
yield "summary", response["extract"]
|
||||||
|
|||||||
|
Before Width: | Height: | Size: 492 KiB After Width: | Height: | Size: 492 KiB |
|
Before Width: | Height: | Size: 1.0 MiB After Width: | Height: | Size: 1.0 MiB |
|
Before Width: | Height: | Size: 502 KiB After Width: | Height: | Size: 502 KiB |
|
Before Width: | Height: | Size: 503 KiB After Width: | Height: | Size: 503 KiB |
|
Before Width: | Height: | Size: 1.0 MiB After Width: | Height: | Size: 1.0 MiB |
|
Before Width: | Height: | Size: 173 KiB After Width: | Height: | Size: 173 KiB |
|
Before Width: | Height: | Size: 162 KiB After Width: | Height: | Size: 162 KiB |
|
Before Width: | Height: | Size: 181 KiB After Width: | Height: | Size: 181 KiB |
|
Before Width: | Height: | Size: 56 KiB After Width: | Height: | Size: 56 KiB |