mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
feat(backend): Normalize simplified JSON schema for Firecrawl extraction
This commit is contained in:
committed by
GitHub
parent
4744675ef9
commit
67b14cda64
53
autogpt_platform/backend/backend/blocks/firecrawl/extract.py
Executable file → Normal file
53
autogpt_platform/backend/backend/blocks/firecrawl/extract.py
Executable file → Normal file
@@ -19,6 +19,52 @@ from backend.sdk import (
|
||||
from ._config import firecrawl
|
||||
|
||||
|
||||
def normalize_to_json_schema(schema: dict | None) -> dict | None:
|
||||
"""
|
||||
Normalize a simplified schema format into valid JSON Schema format.
|
||||
|
||||
Transforms simplified schemas like {"field": "type"} into proper JSON Schema format:
|
||||
{"type": "object", "properties": {"field": {"type": "type"}}}
|
||||
|
||||
If the schema already appears to be a valid JSON Schema (has "type" or "properties"),
|
||||
it is returned as-is.
|
||||
|
||||
Args:
|
||||
schema: The schema to normalize, or None
|
||||
|
||||
Returns:
|
||||
A valid JSON Schema dict, or None if input was None
|
||||
"""
|
||||
if schema is None:
|
||||
return None
|
||||
|
||||
# If it already has "type" at the root level, assume it's already a JSON Schema
|
||||
if "type" in schema:
|
||||
return schema
|
||||
|
||||
# If it already has "properties", assume it's already a JSON Schema
|
||||
if "properties" in schema:
|
||||
return schema
|
||||
|
||||
# Otherwise, treat it as a simplified format and transform it
|
||||
properties = {}
|
||||
for key, value in schema.items():
|
||||
if isinstance(value, str):
|
||||
# Simple type string like "string", "number", etc.
|
||||
properties[key] = {"type": value}
|
||||
elif isinstance(value, dict):
|
||||
# Already a property definition, use as-is
|
||||
properties[key] = value
|
||||
else:
|
||||
# Fallback: treat as any type
|
||||
properties[key] = {"type": "string"}
|
||||
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": properties,
|
||||
}
|
||||
|
||||
|
||||
@cost(BlockCost(2, BlockCostType.RUN))
|
||||
class FirecrawlExtractBlock(Block):
|
||||
class Input(BlockSchemaInput):
|
||||
@@ -30,7 +76,7 @@ class FirecrawlExtractBlock(Block):
|
||||
description="The prompt to use for the crawl", default=None, advanced=False
|
||||
)
|
||||
output_schema: dict | None = SchemaField(
|
||||
description="A Json Schema describing the output structure if more rigid structure is desired.",
|
||||
description='A JSON Schema describing the output structure. Supports both simplified format (e.g., {"field": "string"}) and full JSON Schema format (e.g., {"type": "object", "properties": {"field": {"type": "string"}}}).',
|
||||
default=None,
|
||||
)
|
||||
enable_web_search: bool = SchemaField(
|
||||
@@ -59,10 +105,13 @@ class FirecrawlExtractBlock(Block):
|
||||
) -> BlockOutput:
|
||||
app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
|
||||
|
||||
# Normalize the schema to ensure it's in valid JSON Schema format
|
||||
normalized_schema = normalize_to_json_schema(input_data.output_schema)
|
||||
|
||||
extract_result = app.extract(
|
||||
urls=input_data.urls,
|
||||
prompt=input_data.prompt,
|
||||
schema=input_data.output_schema,
|
||||
schema=normalized_schema,
|
||||
enable_web_search=input_data.enable_web_search,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user