mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-02-12 15:55:03 -05:00
I'm getting circular import issues because there is a lot of cross-importing between `backend.data`, `backend.blocks`, and other modules. This change reduces block-related cross-imports and thus risk of breaking circular imports. ### Changes 🏗️ - Strip down `backend.data.block` - Move `Block` base class and related class/enum defs to `backend.blocks._base` - Move `is_block_auth_configured` to `backend.blocks._utils` - Move `get_blocks()`, `get_io_block_ids()` etc. to `backend.blocks` (`__init__.py`) - Update imports everywhere - Remove unused and poorly typed `Block.create()` - Change usages from `block_cls.create()` to `block_cls()` - Improve typing of `load_all_blocks` and `get_blocks` - Move cross-import of `backend.api.features.library.model` from `backend/data/__init__.py` to `backend/data/integrations.py` - Remove deprecated attribute `NodeModel.webhook` - Re-generate OpenAPI spec and fix frontend usage - Eliminate module-level `backend.blocks` import from `blocks/agent.py` - Eliminate module-level `backend.data.execution` and `backend.executor.manager` imports from `blocks/helpers/review.py` - Replace `BlockInput` with `GraphInput` for graph inputs ### Checklist 📋 #### For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan: - CI static type-checking + tests should be sufficient for this
119 lines
4.8 KiB
Python
119 lines
4.8 KiB
Python
import re
|
|
|
|
from backend.blocks._base import (
|
|
Block,
|
|
BlockCategory,
|
|
BlockOutput,
|
|
BlockSchemaInput,
|
|
BlockSchemaOutput,
|
|
)
|
|
from backend.data.model import SchemaField
|
|
|
|
|
|
class CodeExtractionBlock(Block):
|
|
class Input(BlockSchemaInput):
|
|
text: str = SchemaField(
|
|
description="Text containing code blocks to extract (e.g., AI response)",
|
|
placeholder="Enter text containing code blocks",
|
|
)
|
|
|
|
class Output(BlockSchemaOutput):
|
|
html: str = SchemaField(description="Extracted HTML code")
|
|
css: str = SchemaField(description="Extracted CSS code")
|
|
javascript: str = SchemaField(description="Extracted JavaScript code")
|
|
python: str = SchemaField(description="Extracted Python code")
|
|
sql: str = SchemaField(description="Extracted SQL code")
|
|
java: str = SchemaField(description="Extracted Java code")
|
|
cpp: str = SchemaField(description="Extracted C++ code")
|
|
csharp: str = SchemaField(description="Extracted C# code")
|
|
json_code: str = SchemaField(description="Extracted JSON code")
|
|
bash: str = SchemaField(description="Extracted Bash code")
|
|
php: str = SchemaField(description="Extracted PHP code")
|
|
ruby: str = SchemaField(description="Extracted Ruby code")
|
|
yaml: str = SchemaField(description="Extracted YAML code")
|
|
markdown: str = SchemaField(description="Extracted Markdown code")
|
|
typescript: str = SchemaField(description="Extracted TypeScript code")
|
|
xml: str = SchemaField(description="Extracted XML code")
|
|
remaining_text: str = SchemaField(
|
|
description="Remaining text after code extraction"
|
|
)
|
|
|
|
def __init__(self):
|
|
super().__init__(
|
|
id="d3a7d896-3b78-4f44-8b4b-48fbf4f0bcd8",
|
|
description="Extracts code blocks from text and identifies their programming languages",
|
|
categories={BlockCategory.TEXT},
|
|
input_schema=CodeExtractionBlock.Input,
|
|
output_schema=CodeExtractionBlock.Output,
|
|
test_input={
|
|
"text": "Here's a Python example:\n```python\nprint('Hello World')\n```\nAnd some HTML:\n```html\n<h1>Title</h1>\n```"
|
|
},
|
|
test_output=[
|
|
("html", "<h1>Title</h1>"),
|
|
("python", "print('Hello World')"),
|
|
("remaining_text", "Here's a Python example:\nAnd some HTML:"),
|
|
],
|
|
)
|
|
|
|
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
|
|
# List of supported programming languages with mapped aliases
|
|
language_aliases = {
|
|
"html": ["html", "htm"],
|
|
"css": ["css"],
|
|
"javascript": ["javascript", "js"],
|
|
"python": ["python", "py"],
|
|
"sql": ["sql"],
|
|
"java": ["java"],
|
|
"cpp": ["cpp", "c++"],
|
|
"csharp": ["csharp", "c#", "cs"],
|
|
"json_code": ["json"],
|
|
"bash": ["bash", "shell", "sh"],
|
|
"php": ["php"],
|
|
"ruby": ["ruby", "rb"],
|
|
"yaml": ["yaml", "yml"],
|
|
"markdown": ["markdown", "md"],
|
|
"typescript": ["typescript", "ts"],
|
|
"xml": ["xml"],
|
|
}
|
|
|
|
# Extract code for each language
|
|
for canonical_name, aliases in language_aliases.items():
|
|
code = ""
|
|
# Try each alias for the language
|
|
for alias in aliases:
|
|
code_for_alias = self.extract_code(input_data.text, alias)
|
|
if code_for_alias:
|
|
code = code + "\n\n" + code_for_alias if code else code_for_alias
|
|
|
|
if code: # Only yield if there's actual code content
|
|
yield canonical_name, code
|
|
|
|
# Remove all code blocks from the text to get remaining text
|
|
pattern = (
|
|
r"```(?:"
|
|
+ "|".join(
|
|
re.escape(alias)
|
|
for aliases in language_aliases.values()
|
|
for alias in aliases
|
|
)
|
|
+ r")[ \t]*\n[\s\S]*?```"
|
|
)
|
|
|
|
remaining_text = re.sub(pattern, "", input_data.text).strip()
|
|
remaining_text = re.sub(r"\n\s*\n", "\n", remaining_text)
|
|
|
|
if remaining_text: # Only yield if there's remaining text
|
|
yield "remaining_text", remaining_text
|
|
|
|
def extract_code(self, text: str, language: str) -> str:
|
|
# Escape special regex characters in the language string
|
|
language = re.escape(language)
|
|
# Extract all code blocks enclosed in ```language``` blocks
|
|
pattern = re.compile(
|
|
rf"```{language}[ \t]*\n(.*?)\n```", re.DOTALL | re.IGNORECASE
|
|
)
|
|
matches = pattern.finditer(text)
|
|
# Combine all code blocks for this language with newlines between them
|
|
code_blocks = [match.group(1).strip() for match in matches]
|
|
return "\n\n".join(code_blocks) if code_blocks else ""
|