Files
AutoGPT/autogpt_platform/backend/backend/blocks/rss.py
Zamil Majdy 2f8cdf62ba feat(backend): Standardize error handling with BlockSchemaInput & BlockSchemaOutput base class (#11257)
<!-- Clearly explain the need for these changes: -->

This PR addresses the need for consistent error handling across all
blocks in the AutoGPT platform. Previously, each block had to manually
define an `error` field in their output schema, leading to code
duplication and potential inconsistencies. Some blocks might forget to
include the error field, making error handling unpredictable.

### Changes 🏗️

<!-- Concisely describe all of the changes made in this pull request:
-->

- **Created `BlockSchemaOutput` base class**: New base class that
extends `BlockSchema` with a standardized `error` field
- **Created `BlockSchemaInput` base class**: Added for consistency and
future extensibility
- **Updated 140+ block implementations**: Changed all block `Output`
classes from `class Output(BlockSchema):` to `class
Output(BlockSchemaOutput):`
- **Removed manual error field definitions**: Eliminated hundreds of
duplicate `error: str = SchemaField(...)` definitions
- **Updated type annotations**: Changed `Block[BlockSchema,
BlockSchema]` to `Block[BlockSchemaInput, BlockSchemaOutput]` throughout
the codebase
- **Fixed imports**: Added `BlockSchemaInput` and `BlockSchemaOutput`
imports to all relevant files
- **Maintained backward compatibility**: Updated `EmptySchema` to
inherit from `BlockSchemaOutput`

**Key Benefits:**
- Consistent error handling across all blocks
- Reduced code duplication (removed ~200 lines of repetitive error field
definitions)
- Type safety improvements with distinct input/output schema types
- Blocks can still override error field with more specific descriptions
when needed

### Checklist 📋

#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
  <!-- Put your test plan here: -->
- [x] Verified `poetry run format` passes (all linting, formatting, and
type checking)
- [x] Tested block instantiation works correctly (MediaDurationBlock,
UnrealTextToSpeechBlock)
- [x] Confirmed error fields are automatically present in all updated
blocks
- [x] Verified block loading system works (successfully loads 353+
blocks)
  - [x] Tested backward compatibility with EmptySchema
- [x] Confirmed blocks can still override error field with custom
descriptions
  - [x] Validated core schema inheritance chain works correctly

#### For configuration changes:

- [x] `.env.default` is updated or already compatible with my changes
- [x] `docker-compose.yml` is updated or already compatible with my
changes
- [x] I have included a list of my configuration changes in the PR
description (under **Changes**)

*Note: No configuration changes were needed for this refactoring.*

🤖 Generated with [Claude Code](https://claude.ai/code)

---------

Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: Lluis Agusti <hi@llu.lu>
Co-authored-by: Ubbe <hi@ubbe.dev>
2025-10-30 12:28:08 +00:00

167 lines
6.1 KiB
Python

import asyncio
import logging
from datetime import datetime, timedelta, timezone
from typing import Any
import feedparser
import pydantic
from backend.data.block import (
Block,
BlockCategory,
BlockOutput,
BlockSchemaInput,
BlockSchemaOutput,
)
from backend.data.model import SchemaField
from backend.util.request import Requests
class RSSEntry(pydantic.BaseModel):
title: str
link: str
description: str
pub_date: datetime
author: str
categories: list[str]
class ReadRSSFeedBlock(Block):
class Input(BlockSchemaInput):
rss_url: str = SchemaField(
description="The URL of the RSS feed to read",
placeholder="https://example.com/rss",
)
time_period: int = SchemaField(
description="The time period to check in minutes relative to the run block runtime, e.g. 60 would check for new entries in the last hour.",
placeholder="1440",
default=1440,
)
polling_rate: int = SchemaField(
description="The number of seconds to wait between polling attempts.",
placeholder="300",
)
run_continuously: bool = SchemaField(
description="Whether to run the block continuously or just once.",
default=True,
)
class Output(BlockSchemaOutput):
entry: RSSEntry = SchemaField(description="The RSS item")
entries: list[RSSEntry] = SchemaField(description="List of all RSS entries")
def __init__(self):
super().__init__(
id="5ebe6768-8e5d-41e3-9134-1c7bd89a8d52",
input_schema=ReadRSSFeedBlock.Input,
output_schema=ReadRSSFeedBlock.Output,
description="Reads RSS feed entries from a given URL.",
categories={BlockCategory.INPUT},
test_input={
"rss_url": "https://example.com/rss",
"time_period": 10_000_000,
"polling_rate": 1,
"run_continuously": False,
},
test_output=[
(
"entry",
RSSEntry(
title="Example RSS Item",
link="https://example.com/article",
description="This is an example RSS item description.",
pub_date=datetime(2023, 6, 23, 12, 30, 0, tzinfo=timezone.utc),
author="John Doe",
categories=["Technology", "News"],
),
),
(
"entries",
[
RSSEntry(
title="Example RSS Item",
link="https://example.com/article",
description="This is an example RSS item description.",
pub_date=datetime(
2023, 6, 23, 12, 30, 0, tzinfo=timezone.utc
),
author="John Doe",
categories=["Technology", "News"],
),
],
),
],
test_mock={
"parse_feed": lambda *args, **kwargs: {
"entries": [
{
"title": "Example RSS Item",
"link": "https://example.com/article",
"summary": "This is an example RSS item description.",
"published_parsed": (2023, 6, 23, 12, 30, 0, 4, 174, 0),
"author": "John Doe",
"tags": [{"term": "Technology"}, {"term": "News"}],
}
]
}
},
)
@staticmethod
async def parse_feed(url: str) -> dict[str, Any]:
# Security fix: Add protection against memory exhaustion attacks
MAX_FEED_SIZE = 10 * 1024 * 1024 # 10MB limit for RSS feeds
# Download feed content with size limit
try:
response = await Requests(raise_for_status=True).get(url)
# Check content length if available
content_length = response.headers.get("Content-Length")
if content_length and int(content_length) > MAX_FEED_SIZE:
raise ValueError(
f"Feed too large: {content_length} bytes exceeds {MAX_FEED_SIZE} limit"
)
# Get content with size limit
content = response.content
if len(content) > MAX_FEED_SIZE:
raise ValueError(f"Feed too large: exceeds {MAX_FEED_SIZE} byte limit")
# Parse with feedparser using the validated content
# feedparser has built-in protection against XML attacks
return feedparser.parse(content) # type: ignore
except Exception as e:
# Log error and return empty feed
logging.warning(f"Failed to parse RSS feed from {url}: {e}")
return {"entries": []}
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
keep_going = True
start_time = datetime.now(timezone.utc) - timedelta(
minutes=input_data.time_period
)
while keep_going:
keep_going = input_data.run_continuously
feed = await self.parse_feed(input_data.rss_url)
all_entries = []
for entry in feed["entries"]:
pub_date = datetime(*entry["published_parsed"][:6], tzinfo=timezone.utc)
if pub_date > start_time:
rss_entry = RSSEntry(
title=entry["title"],
link=entry["link"],
description=entry.get("summary", ""),
pub_date=pub_date,
author=entry.get("author", ""),
categories=[tag["term"] for tag in entry.get("tags", [])],
)
all_entries.append(rss_entry)
yield "entry", rss_entry
yield "entries", all_entries
await asyncio.sleep(input_data.polling_rate)