Files
AutoGPT/autogpt_platform/backend/backend/blocks/code_extraction_block.py
Reinier van der Leer 113e87a23c refactor(backend): Reduce circular imports (#12068)
I'm getting circular import issues because there is a lot of
cross-importing between `backend.data`, `backend.blocks`, and other
modules. This change reduces block-related cross-imports and thus risk
of breaking circular imports.

### Changes 🏗️

- Strip down `backend.data.block`
- Move `Block` base class and related class/enum defs to
`backend.blocks._base`
  - Move `is_block_auth_configured` to `backend.blocks._utils`
- Move `get_blocks()`, `get_io_block_ids()` etc. to `backend.blocks`
(`__init__.py`)
  - Update imports everywhere
- Remove unused and poorly typed `Block.create()`
  - Change usages from `block_cls.create()` to `block_cls()`
- Improve typing of `load_all_blocks` and `get_blocks`
- Move cross-import of `backend.api.features.library.model` from
`backend/data/__init__.py` to `backend/data/integrations.py`
- Remove deprecated attribute `NodeModel.webhook`
  - Re-generate OpenAPI spec and fix frontend usage
- Eliminate module-level `backend.blocks` import from `blocks/agent.py`
- Eliminate module-level `backend.data.execution` and
`backend.executor.manager` imports from `blocks/helpers/review.py`
- Replace `BlockInput` with `GraphInput` for graph inputs

### Checklist 📋

#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
  - CI static type-checking + tests should be sufficient for this
2026-02-12 12:07:49 +00:00

119 lines
4.8 KiB
Python

import re
from backend.blocks._base import (
Block,
BlockCategory,
BlockOutput,
BlockSchemaInput,
BlockSchemaOutput,
)
from backend.data.model import SchemaField
class CodeExtractionBlock(Block):
class Input(BlockSchemaInput):
text: str = SchemaField(
description="Text containing code blocks to extract (e.g., AI response)",
placeholder="Enter text containing code blocks",
)
class Output(BlockSchemaOutput):
html: str = SchemaField(description="Extracted HTML code")
css: str = SchemaField(description="Extracted CSS code")
javascript: str = SchemaField(description="Extracted JavaScript code")
python: str = SchemaField(description="Extracted Python code")
sql: str = SchemaField(description="Extracted SQL code")
java: str = SchemaField(description="Extracted Java code")
cpp: str = SchemaField(description="Extracted C++ code")
csharp: str = SchemaField(description="Extracted C# code")
json_code: str = SchemaField(description="Extracted JSON code")
bash: str = SchemaField(description="Extracted Bash code")
php: str = SchemaField(description="Extracted PHP code")
ruby: str = SchemaField(description="Extracted Ruby code")
yaml: str = SchemaField(description="Extracted YAML code")
markdown: str = SchemaField(description="Extracted Markdown code")
typescript: str = SchemaField(description="Extracted TypeScript code")
xml: str = SchemaField(description="Extracted XML code")
remaining_text: str = SchemaField(
description="Remaining text after code extraction"
)
def __init__(self):
super().__init__(
id="d3a7d896-3b78-4f44-8b4b-48fbf4f0bcd8",
description="Extracts code blocks from text and identifies their programming languages",
categories={BlockCategory.TEXT},
input_schema=CodeExtractionBlock.Input,
output_schema=CodeExtractionBlock.Output,
test_input={
"text": "Here's a Python example:\n```python\nprint('Hello World')\n```\nAnd some HTML:\n```html\n<h1>Title</h1>\n```"
},
test_output=[
("html", "<h1>Title</h1>"),
("python", "print('Hello World')"),
("remaining_text", "Here's a Python example:\nAnd some HTML:"),
],
)
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
# List of supported programming languages with mapped aliases
language_aliases = {
"html": ["html", "htm"],
"css": ["css"],
"javascript": ["javascript", "js"],
"python": ["python", "py"],
"sql": ["sql"],
"java": ["java"],
"cpp": ["cpp", "c++"],
"csharp": ["csharp", "c#", "cs"],
"json_code": ["json"],
"bash": ["bash", "shell", "sh"],
"php": ["php"],
"ruby": ["ruby", "rb"],
"yaml": ["yaml", "yml"],
"markdown": ["markdown", "md"],
"typescript": ["typescript", "ts"],
"xml": ["xml"],
}
# Extract code for each language
for canonical_name, aliases in language_aliases.items():
code = ""
# Try each alias for the language
for alias in aliases:
code_for_alias = self.extract_code(input_data.text, alias)
if code_for_alias:
code = code + "\n\n" + code_for_alias if code else code_for_alias
if code: # Only yield if there's actual code content
yield canonical_name, code
# Remove all code blocks from the text to get remaining text
pattern = (
r"```(?:"
+ "|".join(
re.escape(alias)
for aliases in language_aliases.values()
for alias in aliases
)
+ r")[ \t]*\n[\s\S]*?```"
)
remaining_text = re.sub(pattern, "", input_data.text).strip()
remaining_text = re.sub(r"\n\s*\n", "\n", remaining_text)
if remaining_text: # Only yield if there's remaining text
yield "remaining_text", remaining_text
def extract_code(self, text: str, language: str) -> str:
# Escape special regex characters in the language string
language = re.escape(language)
# Extract all code blocks enclosed in ```language``` blocks
pattern = re.compile(
rf"```{language}[ \t]*\n(.*?)\n```", re.DOTALL | re.IGNORECASE
)
matches = pattern.finditer(text)
# Combine all code blocks for this language with newlines between them
code_blocks = [match.group(1).strip() for match in matches]
return "\n\n".join(code_blocks) if code_blocks else ""