mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-02-12 15:55:03 -05:00
<!-- Clearly explain the need for these changes: --> This PR addresses the need for consistent error handling across all blocks in the AutoGPT platform. Previously, each block had to manually define an `error` field in their output schema, leading to code duplication and potential inconsistencies. Some blocks might forget to include the error field, making error handling unpredictable. ### Changes 🏗️ <!-- Concisely describe all of the changes made in this pull request: --> - **Created `BlockSchemaOutput` base class**: New base class that extends `BlockSchema` with a standardized `error` field - **Created `BlockSchemaInput` base class**: Added for consistency and future extensibility - **Updated 140+ block implementations**: Changed all block `Output` classes from `class Output(BlockSchema):` to `class Output(BlockSchemaOutput):` - **Removed manual error field definitions**: Eliminated hundreds of duplicate `error: str = SchemaField(...)` definitions - **Updated type annotations**: Changed `Block[BlockSchema, BlockSchema]` to `Block[BlockSchemaInput, BlockSchemaOutput]` throughout the codebase - **Fixed imports**: Added `BlockSchemaInput` and `BlockSchemaOutput` imports to all relevant files - **Maintained backward compatibility**: Updated `EmptySchema` to inherit from `BlockSchemaOutput` **Key Benefits:** - Consistent error handling across all blocks - Reduced code duplication (removed ~200 lines of repetitive error field definitions) - Type safety improvements with distinct input/output schema types - Blocks can still override error field with more specific descriptions when needed ### Checklist 📋 #### For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan: <!-- Put your test plan here: --> - [x] Verified `poetry run format` passes (all linting, formatting, and type checking) - [x] Tested block instantiation works correctly (MediaDurationBlock, UnrealTextToSpeechBlock) - [x] Confirmed error fields are automatically present in all updated blocks - [x] Verified block loading system works (successfully loads 353+ blocks) - [x] Tested backward compatibility with EmptySchema - [x] Confirmed blocks can still override error field with custom descriptions - [x] Validated core schema inheritance chain works correctly #### For configuration changes: - [x] `.env.default` is updated or already compatible with my changes - [x] `docker-compose.yml` is updated or already compatible with my changes - [x] I have included a list of my configuration changes in the PR description (under **Changes**) *Note: No configuration changes were needed for this refactoring.* 🤖 Generated with [Claude Code](https://claude.ai/code) --------- Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: Lluis Agusti <hi@llu.lu> Co-authored-by: Ubbe <hi@ubbe.dev>
167 lines
6.1 KiB
Python
167 lines
6.1 KiB
Python
import asyncio
|
|
import logging
|
|
from datetime import datetime, timedelta, timezone
|
|
from typing import Any
|
|
|
|
import feedparser
|
|
import pydantic
|
|
|
|
from backend.data.block import (
|
|
Block,
|
|
BlockCategory,
|
|
BlockOutput,
|
|
BlockSchemaInput,
|
|
BlockSchemaOutput,
|
|
)
|
|
from backend.data.model import SchemaField
|
|
from backend.util.request import Requests
|
|
|
|
|
|
class RSSEntry(pydantic.BaseModel):
|
|
title: str
|
|
link: str
|
|
description: str
|
|
pub_date: datetime
|
|
author: str
|
|
categories: list[str]
|
|
|
|
|
|
class ReadRSSFeedBlock(Block):
|
|
class Input(BlockSchemaInput):
|
|
rss_url: str = SchemaField(
|
|
description="The URL of the RSS feed to read",
|
|
placeholder="https://example.com/rss",
|
|
)
|
|
time_period: int = SchemaField(
|
|
description="The time period to check in minutes relative to the run block runtime, e.g. 60 would check for new entries in the last hour.",
|
|
placeholder="1440",
|
|
default=1440,
|
|
)
|
|
polling_rate: int = SchemaField(
|
|
description="The number of seconds to wait between polling attempts.",
|
|
placeholder="300",
|
|
)
|
|
run_continuously: bool = SchemaField(
|
|
description="Whether to run the block continuously or just once.",
|
|
default=True,
|
|
)
|
|
|
|
class Output(BlockSchemaOutput):
|
|
entry: RSSEntry = SchemaField(description="The RSS item")
|
|
entries: list[RSSEntry] = SchemaField(description="List of all RSS entries")
|
|
|
|
def __init__(self):
|
|
super().__init__(
|
|
id="5ebe6768-8e5d-41e3-9134-1c7bd89a8d52",
|
|
input_schema=ReadRSSFeedBlock.Input,
|
|
output_schema=ReadRSSFeedBlock.Output,
|
|
description="Reads RSS feed entries from a given URL.",
|
|
categories={BlockCategory.INPUT},
|
|
test_input={
|
|
"rss_url": "https://example.com/rss",
|
|
"time_period": 10_000_000,
|
|
"polling_rate": 1,
|
|
"run_continuously": False,
|
|
},
|
|
test_output=[
|
|
(
|
|
"entry",
|
|
RSSEntry(
|
|
title="Example RSS Item",
|
|
link="https://example.com/article",
|
|
description="This is an example RSS item description.",
|
|
pub_date=datetime(2023, 6, 23, 12, 30, 0, tzinfo=timezone.utc),
|
|
author="John Doe",
|
|
categories=["Technology", "News"],
|
|
),
|
|
),
|
|
(
|
|
"entries",
|
|
[
|
|
RSSEntry(
|
|
title="Example RSS Item",
|
|
link="https://example.com/article",
|
|
description="This is an example RSS item description.",
|
|
pub_date=datetime(
|
|
2023, 6, 23, 12, 30, 0, tzinfo=timezone.utc
|
|
),
|
|
author="John Doe",
|
|
categories=["Technology", "News"],
|
|
),
|
|
],
|
|
),
|
|
],
|
|
test_mock={
|
|
"parse_feed": lambda *args, **kwargs: {
|
|
"entries": [
|
|
{
|
|
"title": "Example RSS Item",
|
|
"link": "https://example.com/article",
|
|
"summary": "This is an example RSS item description.",
|
|
"published_parsed": (2023, 6, 23, 12, 30, 0, 4, 174, 0),
|
|
"author": "John Doe",
|
|
"tags": [{"term": "Technology"}, {"term": "News"}],
|
|
}
|
|
]
|
|
}
|
|
},
|
|
)
|
|
|
|
@staticmethod
|
|
async def parse_feed(url: str) -> dict[str, Any]:
|
|
# Security fix: Add protection against memory exhaustion attacks
|
|
MAX_FEED_SIZE = 10 * 1024 * 1024 # 10MB limit for RSS feeds
|
|
|
|
# Download feed content with size limit
|
|
try:
|
|
response = await Requests(raise_for_status=True).get(url)
|
|
|
|
# Check content length if available
|
|
content_length = response.headers.get("Content-Length")
|
|
if content_length and int(content_length) > MAX_FEED_SIZE:
|
|
raise ValueError(
|
|
f"Feed too large: {content_length} bytes exceeds {MAX_FEED_SIZE} limit"
|
|
)
|
|
|
|
# Get content with size limit
|
|
content = response.content
|
|
if len(content) > MAX_FEED_SIZE:
|
|
raise ValueError(f"Feed too large: exceeds {MAX_FEED_SIZE} byte limit")
|
|
|
|
# Parse with feedparser using the validated content
|
|
# feedparser has built-in protection against XML attacks
|
|
return feedparser.parse(content) # type: ignore
|
|
except Exception as e:
|
|
# Log error and return empty feed
|
|
logging.warning(f"Failed to parse RSS feed from {url}: {e}")
|
|
return {"entries": []}
|
|
|
|
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
|
|
keep_going = True
|
|
start_time = datetime.now(timezone.utc) - timedelta(
|
|
minutes=input_data.time_period
|
|
)
|
|
while keep_going:
|
|
keep_going = input_data.run_continuously
|
|
|
|
feed = await self.parse_feed(input_data.rss_url)
|
|
all_entries = []
|
|
|
|
for entry in feed["entries"]:
|
|
pub_date = datetime(*entry["published_parsed"][:6], tzinfo=timezone.utc)
|
|
|
|
if pub_date > start_time:
|
|
rss_entry = RSSEntry(
|
|
title=entry["title"],
|
|
link=entry["link"],
|
|
description=entry.get("summary", ""),
|
|
pub_date=pub_date,
|
|
author=entry.get("author", ""),
|
|
categories=[tag["term"] for tag in entry.get("tags", [])],
|
|
)
|
|
all_entries.append(rss_entry)
|
|
yield "entry", rss_entry
|
|
|
|
yield "entries", all_entries
|
|
await asyncio.sleep(input_data.polling_rate)
|