Files
AutoGPT/autogpt_platform/backend/backend/blocks/code_extraction_block.py
Zamil Majdy 2f8cdf62ba feat(backend): Standardize error handling with BlockSchemaInput & BlockSchemaOutput base class (#11257)
<!-- Clearly explain the need for these changes: -->

This PR addresses the need for consistent error handling across all
blocks in the AutoGPT platform. Previously, each block had to manually
define an `error` field in their output schema, leading to code
duplication and potential inconsistencies. Some blocks might forget to
include the error field, making error handling unpredictable.

### Changes 🏗️

<!-- Concisely describe all of the changes made in this pull request:
-->

- **Created `BlockSchemaOutput` base class**: New base class that
extends `BlockSchema` with a standardized `error` field
- **Created `BlockSchemaInput` base class**: Added for consistency and
future extensibility
- **Updated 140+ block implementations**: Changed all block `Output`
classes from `class Output(BlockSchema):` to `class
Output(BlockSchemaOutput):`
- **Removed manual error field definitions**: Eliminated hundreds of
duplicate `error: str = SchemaField(...)` definitions
- **Updated type annotations**: Changed `Block[BlockSchema,
BlockSchema]` to `Block[BlockSchemaInput, BlockSchemaOutput]` throughout
the codebase
- **Fixed imports**: Added `BlockSchemaInput` and `BlockSchemaOutput`
imports to all relevant files
- **Maintained backward compatibility**: Updated `EmptySchema` to
inherit from `BlockSchemaOutput`

**Key Benefits:**
- Consistent error handling across all blocks
- Reduced code duplication (removed ~200 lines of repetitive error field
definitions)
- Type safety improvements with distinct input/output schema types
- Blocks can still override error field with more specific descriptions
when needed

### Checklist 📋

#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
  <!-- Put your test plan here: -->
- [x] Verified `poetry run format` passes (all linting, formatting, and
type checking)
- [x] Tested block instantiation works correctly (MediaDurationBlock,
UnrealTextToSpeechBlock)
- [x] Confirmed error fields are automatically present in all updated
blocks
- [x] Verified block loading system works (successfully loads 353+
blocks)
  - [x] Tested backward compatibility with EmptySchema
- [x] Confirmed blocks can still override error field with custom
descriptions
  - [x] Validated core schema inheritance chain works correctly

#### For configuration changes:

- [x] `.env.default` is updated or already compatible with my changes
- [x] `docker-compose.yml` is updated or already compatible with my
changes
- [x] I have included a list of my configuration changes in the PR
description (under **Changes**)

*Note: No configuration changes were needed for this refactoring.*

🤖 Generated with [Claude Code](https://claude.ai/code)

---------

Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: Lluis Agusti <hi@llu.lu>
Co-authored-by: Ubbe <hi@ubbe.dev>
2025-10-30 12:28:08 +00:00

119 lines
4.8 KiB
Python

import re
from backend.data.block import (
Block,
BlockCategory,
BlockOutput,
BlockSchemaInput,
BlockSchemaOutput,
)
from backend.data.model import SchemaField
class CodeExtractionBlock(Block):
class Input(BlockSchemaInput):
text: str = SchemaField(
description="Text containing code blocks to extract (e.g., AI response)",
placeholder="Enter text containing code blocks",
)
class Output(BlockSchemaOutput):
html: str = SchemaField(description="Extracted HTML code")
css: str = SchemaField(description="Extracted CSS code")
javascript: str = SchemaField(description="Extracted JavaScript code")
python: str = SchemaField(description="Extracted Python code")
sql: str = SchemaField(description="Extracted SQL code")
java: str = SchemaField(description="Extracted Java code")
cpp: str = SchemaField(description="Extracted C++ code")
csharp: str = SchemaField(description="Extracted C# code")
json_code: str = SchemaField(description="Extracted JSON code")
bash: str = SchemaField(description="Extracted Bash code")
php: str = SchemaField(description="Extracted PHP code")
ruby: str = SchemaField(description="Extracted Ruby code")
yaml: str = SchemaField(description="Extracted YAML code")
markdown: str = SchemaField(description="Extracted Markdown code")
typescript: str = SchemaField(description="Extracted TypeScript code")
xml: str = SchemaField(description="Extracted XML code")
remaining_text: str = SchemaField(
description="Remaining text after code extraction"
)
def __init__(self):
super().__init__(
id="d3a7d896-3b78-4f44-8b4b-48fbf4f0bcd8",
description="Extracts code blocks from text and identifies their programming languages",
categories={BlockCategory.TEXT},
input_schema=CodeExtractionBlock.Input,
output_schema=CodeExtractionBlock.Output,
test_input={
"text": "Here's a Python example:\n```python\nprint('Hello World')\n```\nAnd some HTML:\n```html\n<h1>Title</h1>\n```"
},
test_output=[
("html", "<h1>Title</h1>"),
("python", "print('Hello World')"),
("remaining_text", "Here's a Python example:\nAnd some HTML:"),
],
)
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
# List of supported programming languages with mapped aliases
language_aliases = {
"html": ["html", "htm"],
"css": ["css"],
"javascript": ["javascript", "js"],
"python": ["python", "py"],
"sql": ["sql"],
"java": ["java"],
"cpp": ["cpp", "c++"],
"csharp": ["csharp", "c#", "cs"],
"json_code": ["json"],
"bash": ["bash", "shell", "sh"],
"php": ["php"],
"ruby": ["ruby", "rb"],
"yaml": ["yaml", "yml"],
"markdown": ["markdown", "md"],
"typescript": ["typescript", "ts"],
"xml": ["xml"],
}
# Extract code for each language
for canonical_name, aliases in language_aliases.items():
code = ""
# Try each alias for the language
for alias in aliases:
code_for_alias = self.extract_code(input_data.text, alias)
if code_for_alias:
code = code + "\n\n" + code_for_alias if code else code_for_alias
if code: # Only yield if there's actual code content
yield canonical_name, code
# Remove all code blocks from the text to get remaining text
pattern = (
r"```(?:"
+ "|".join(
re.escape(alias)
for aliases in language_aliases.values()
for alias in aliases
)
+ r")[ \t]*\n[\s\S]*?```"
)
remaining_text = re.sub(pattern, "", input_data.text).strip()
remaining_text = re.sub(r"\n\s*\n", "\n", remaining_text)
if remaining_text: # Only yield if there's remaining text
yield "remaining_text", remaining_text
def extract_code(self, text: str, language: str) -> str:
# Escape special regex characters in the language string
language = re.escape(language)
# Extract all code blocks enclosed in ```language``` blocks
pattern = re.compile(
rf"```{language}[ \t]*\n(.*?)\n```", re.DOTALL | re.IGNORECASE
)
matches = pattern.finditer(text)
# Combine all code blocks for this language with newlines between them
code_blocks = [match.group(1).strip() for match in matches]
return "\n\n".join(code_blocks) if code_blocks else ""