mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-01-22 05:28:02 -05:00
Merge branch 'dev' into swiftyos/secrt-1706-improve-store-search
This commit is contained in:
@@ -246,7 +246,7 @@ async def execute_node(
|
||||
async for output_name, output_data in node_block.execute(
|
||||
input_data, **extra_exec_kwargs
|
||||
):
|
||||
output_data = json.convert_pydantic_to_json(output_data)
|
||||
output_data = json.to_dict(output_data)
|
||||
output_size += len(json.dumps(output_data))
|
||||
log_metadata.debug("Node produced output", **{output_name: output_data})
|
||||
yield output_name, output_data
|
||||
|
||||
@@ -1,25 +1,22 @@
|
||||
import logging
|
||||
import re
|
||||
from typing import Any, Type, TypeGuard, TypeVar, overload
|
||||
from typing import Any, Type, TypeVar, overload
|
||||
|
||||
import jsonschema
|
||||
import orjson
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from fastapi.encoders import jsonable_encoder as to_dict
|
||||
from prisma import Json
|
||||
from pydantic import BaseModel
|
||||
|
||||
from .truncate import truncate
|
||||
from .type import type_match
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Precompiled regex to remove PostgreSQL-incompatible control characters
|
||||
# Removes \u0000-\u0008, \u000B-\u000C, \u000E-\u001F, \u007F (keeps tab \u0009, newline \u000A, carriage return \u000D)
|
||||
POSTGRES_CONTROL_CHARS = re.compile(r"[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]")
|
||||
|
||||
|
||||
def to_dict(data) -> dict:
|
||||
if isinstance(data, BaseModel):
|
||||
data = data.model_dump()
|
||||
return jsonable_encoder(data)
|
||||
|
||||
|
||||
def dumps(
|
||||
data: Any, *args: Any, indent: int | None = None, option: int = 0, **kwargs: Any
|
||||
) -> str:
|
||||
@@ -108,64 +105,19 @@ def validate_with_jsonschema(
|
||||
return str(e)
|
||||
|
||||
|
||||
def is_list_of_basemodels(value: object) -> TypeGuard[list[BaseModel]]:
|
||||
return isinstance(value, list) and all(
|
||||
isinstance(item, BaseModel) for item in value
|
||||
)
|
||||
|
||||
|
||||
def convert_pydantic_to_json(output_data: Any) -> Any:
|
||||
if isinstance(output_data, BaseModel):
|
||||
return output_data.model_dump()
|
||||
if is_list_of_basemodels(output_data):
|
||||
return [item.model_dump() for item in output_data]
|
||||
return output_data
|
||||
|
||||
|
||||
def _sanitize_value(value: Any) -> Any:
|
||||
"""
|
||||
Recursively sanitize values by removing PostgreSQL-incompatible control characters.
|
||||
|
||||
This function walks through data structures and removes control characters from strings.
|
||||
It handles:
|
||||
- Strings: Remove control chars directly from the string
|
||||
- Lists: Recursively sanitize each element
|
||||
- Dicts: Recursively sanitize keys and values
|
||||
- Other types: Return as-is
|
||||
|
||||
Args:
|
||||
value: The value to sanitize
|
||||
|
||||
Returns:
|
||||
Sanitized version of the value with control characters removed
|
||||
"""
|
||||
if isinstance(value, str):
|
||||
# Remove control characters directly from the string
|
||||
return POSTGRES_CONTROL_CHARS.sub("", value)
|
||||
elif isinstance(value, dict):
|
||||
# Recursively sanitize dictionary keys and values
|
||||
return {_sanitize_value(k): _sanitize_value(v) for k, v in value.items()}
|
||||
elif isinstance(value, list):
|
||||
# Recursively sanitize list elements
|
||||
return [_sanitize_value(item) for item in value]
|
||||
elif isinstance(value, tuple):
|
||||
# Recursively sanitize tuple elements
|
||||
return tuple(_sanitize_value(item) for item in value)
|
||||
else:
|
||||
# For other types (int, float, bool, None, etc.), return as-is
|
||||
return value
|
||||
|
||||
|
||||
def SafeJson(data: Any) -> Json:
|
||||
"""
|
||||
Safely serialize data and return Prisma's Json type.
|
||||
Sanitizes control characters to prevent PostgreSQL 22P05 errors.
|
||||
|
||||
This function:
|
||||
1. Converts Pydantic models to dicts
|
||||
1. Converts Pydantic models to dicts (recursively using to_dict)
|
||||
2. Recursively removes PostgreSQL-incompatible control characters from strings
|
||||
3. Returns a Prisma Json object safe for database storage
|
||||
|
||||
Uses to_dict (jsonable_encoder) with a custom encoder to handle both Pydantic
|
||||
conversion and control character sanitization in a two-pass approach.
|
||||
|
||||
Args:
|
||||
data: Input data to sanitize and convert to Json
|
||||
|
||||
@@ -177,9 +129,29 @@ def SafeJson(data: Any) -> Json:
|
||||
>>> SafeJson({"path": "C:\\\\temp"}) # backslashes preserved
|
||||
>>> SafeJson({"data": "Text\\\\u0000here"}) # literal backslash-u preserved
|
||||
"""
|
||||
# Convert Pydantic models to dict first
|
||||
if isinstance(data, BaseModel):
|
||||
data = data.model_dump(exclude_none=True)
|
||||
|
||||
# Return as Prisma Json type
|
||||
return Json(_sanitize_value(data))
|
||||
def _sanitize_string(value: str) -> str:
|
||||
"""Remove PostgreSQL-incompatible control characters from string."""
|
||||
return POSTGRES_CONTROL_CHARS.sub("", value)
|
||||
|
||||
try:
|
||||
# Use two-pass approach for consistent string sanitization:
|
||||
# 1. First convert to basic JSON-serializable types (handles Pydantic models)
|
||||
# 2. Then sanitize strings in the result
|
||||
basic_result = to_dict(data)
|
||||
sanitized_result = to_dict(basic_result, custom_encoder={str: _sanitize_string})
|
||||
return Json(sanitized_result)
|
||||
except Exception as e:
|
||||
# Log the failure and fall back to string representation
|
||||
logger.error(
|
||||
"SafeJson fallback to string representation due to serialization error: %s (%s). "
|
||||
"Data type: %s, Data preview: %s",
|
||||
type(e).__name__,
|
||||
truncate(str(e), 200),
|
||||
type(data).__name__,
|
||||
truncate(str(data), 100),
|
||||
)
|
||||
|
||||
# Ultimate fallback: convert to string representation and sanitize
|
||||
sanitized = _sanitize_string(str(data))
|
||||
return Json(sanitized)
|
||||
|
||||
@@ -479,3 +479,278 @@ class TestSafeJson:
|
||||
# And can be parsed back
|
||||
parsed_back = json.loads(json_string)
|
||||
assert isinstance(parsed_back, dict)
|
||||
|
||||
def test_dict_containing_pydantic_models(self):
|
||||
"""Test that dicts containing Pydantic models are properly serialized."""
|
||||
# This reproduces the bug from PR #11187 where credential_inputs failed
|
||||
model1 = SamplePydanticModel(name="Alice", age=30)
|
||||
model2 = SamplePydanticModel(name="Bob", age=25)
|
||||
|
||||
data = {
|
||||
"user1": model1,
|
||||
"user2": model2,
|
||||
"regular_data": "test",
|
||||
}
|
||||
|
||||
result = SafeJson(data)
|
||||
assert isinstance(result, Json)
|
||||
|
||||
# Verify it can be JSON serialized (this was the bug)
|
||||
import json
|
||||
|
||||
json_string = json.dumps(result.data)
|
||||
assert "Alice" in json_string
|
||||
assert "Bob" in json_string
|
||||
|
||||
def test_nested_pydantic_in_dict(self):
|
||||
"""Test deeply nested Pydantic models in dicts."""
|
||||
inner_model = SamplePydanticModel(name="Inner", age=20)
|
||||
middle_model = SamplePydanticModel(
|
||||
name="Middle", age=30, metadata={"inner": inner_model}
|
||||
)
|
||||
|
||||
data = {
|
||||
"level1": {
|
||||
"level2": {
|
||||
"model": middle_model,
|
||||
"other": "data",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result = SafeJson(data)
|
||||
assert isinstance(result, Json)
|
||||
|
||||
import json
|
||||
|
||||
json_string = json.dumps(result.data)
|
||||
assert "Middle" in json_string
|
||||
assert "Inner" in json_string
|
||||
|
||||
def test_list_containing_pydantic_models_in_dict(self):
|
||||
"""Test list of Pydantic models inside a dict."""
|
||||
models = [SamplePydanticModel(name=f"User{i}", age=20 + i) for i in range(5)]
|
||||
|
||||
data = {
|
||||
"users": models,
|
||||
"count": len(models),
|
||||
}
|
||||
|
||||
result = SafeJson(data)
|
||||
assert isinstance(result, Json)
|
||||
|
||||
import json
|
||||
|
||||
json_string = json.dumps(result.data)
|
||||
assert "User0" in json_string
|
||||
assert "User4" in json_string
|
||||
|
||||
def test_credentials_meta_input_scenario(self):
|
||||
"""Test the exact scenario from create_graph_execution that was failing."""
|
||||
|
||||
# Simulate CredentialsMetaInput structure
|
||||
class MockCredentialsMetaInput(BaseModel):
|
||||
id: str
|
||||
title: Optional[str] = None
|
||||
provider: str
|
||||
type: str
|
||||
|
||||
cred_input = MockCredentialsMetaInput(
|
||||
id="test-123", title="Test Credentials", provider="github", type="oauth2"
|
||||
)
|
||||
|
||||
# This is how credential_inputs is structured in create_graph_execution
|
||||
credential_inputs = {"github_creds": cred_input}
|
||||
|
||||
# This should work without TypeError
|
||||
result = SafeJson(credential_inputs)
|
||||
assert isinstance(result, Json)
|
||||
|
||||
# Verify it can be JSON serialized
|
||||
import json
|
||||
|
||||
json_string = json.dumps(result.data)
|
||||
assert "test-123" in json_string
|
||||
assert "github" in json_string
|
||||
assert "oauth2" in json_string
|
||||
|
||||
def test_mixed_pydantic_and_primitives(self):
|
||||
"""Test complex mix of Pydantic models and primitive types."""
|
||||
model = SamplePydanticModel(name="Test", age=25)
|
||||
|
||||
data = {
|
||||
"models": [model, {"plain": "dict"}, "string", 123],
|
||||
"nested": {
|
||||
"model": model,
|
||||
"list": [1, 2, model, 4],
|
||||
"plain": "text",
|
||||
},
|
||||
"plain_list": [1, 2, 3],
|
||||
}
|
||||
|
||||
result = SafeJson(data)
|
||||
assert isinstance(result, Json)
|
||||
|
||||
import json
|
||||
|
||||
json_string = json.dumps(result.data)
|
||||
assert "Test" in json_string
|
||||
assert "plain" in json_string
|
||||
|
||||
def test_pydantic_model_with_control_chars_in_dict(self):
|
||||
"""Test Pydantic model with control chars when nested in dict."""
|
||||
model = SamplePydanticModel(
|
||||
name="Test\x00User", # Has null byte
|
||||
age=30,
|
||||
metadata={"info": "data\x08with\x0Ccontrols"},
|
||||
)
|
||||
|
||||
data = {"credential": model}
|
||||
|
||||
result = SafeJson(data)
|
||||
assert isinstance(result, Json)
|
||||
|
||||
# Verify control characters are removed
|
||||
import json
|
||||
|
||||
json_string = json.dumps(result.data)
|
||||
assert "\x00" not in json_string
|
||||
assert "\x08" not in json_string
|
||||
assert "\x0C" not in json_string
|
||||
assert "TestUser" in json_string # Name preserved minus null byte
|
||||
|
||||
def test_deeply_nested_pydantic_models_control_char_sanitization(self):
|
||||
"""Test that control characters are sanitized in deeply nested Pydantic models."""
|
||||
|
||||
# Create nested Pydantic models with control characters at different levels
|
||||
class InnerModel(BaseModel):
|
||||
deep_string: str
|
||||
value: int = 42
|
||||
metadata: dict = {}
|
||||
|
||||
class MiddleModel(BaseModel):
|
||||
middle_string: str
|
||||
inner: InnerModel
|
||||
data: str
|
||||
|
||||
class OuterModel(BaseModel):
|
||||
outer_string: str
|
||||
middle: MiddleModel
|
||||
|
||||
# Create test data with control characters at every nesting level
|
||||
inner = InnerModel(
|
||||
deep_string="Deepest\x00Level\x08Control\x0CChars", # Multiple control chars at deepest level
|
||||
metadata={
|
||||
"nested_key": "Nested\x1FValue\x7FDelete"
|
||||
}, # Control chars in nested dict
|
||||
)
|
||||
|
||||
middle = MiddleModel(
|
||||
middle_string="Middle\x01StartOfHeading\x1FUnitSeparator",
|
||||
inner=inner,
|
||||
data="Some\x0BVerticalTab\x0EShiftOut",
|
||||
)
|
||||
|
||||
outer = OuterModel(outer_string="Outer\x00Null\x07Bell", middle=middle)
|
||||
|
||||
# Wrap in a dict with additional control characters
|
||||
data = {
|
||||
"top_level": "Top\x00Level\x08Backspace",
|
||||
"nested_model": outer,
|
||||
"list_with_strings": [
|
||||
"List\x00Item1",
|
||||
"List\x0CItem2\x1F",
|
||||
{"dict_in_list": "Dict\x08Value"},
|
||||
],
|
||||
}
|
||||
|
||||
# Process with SafeJson
|
||||
result = SafeJson(data)
|
||||
assert isinstance(result, Json)
|
||||
|
||||
# Verify all control characters are removed at every level
|
||||
import json
|
||||
|
||||
json_string = json.dumps(result.data)
|
||||
|
||||
# Check that NO control characters remain anywhere
|
||||
control_chars = [
|
||||
"\x00",
|
||||
"\x01",
|
||||
"\x02",
|
||||
"\x03",
|
||||
"\x04",
|
||||
"\x05",
|
||||
"\x06",
|
||||
"\x07",
|
||||
"\x08",
|
||||
"\x0B",
|
||||
"\x0C",
|
||||
"\x0E",
|
||||
"\x0F",
|
||||
"\x10",
|
||||
"\x11",
|
||||
"\x12",
|
||||
"\x13",
|
||||
"\x14",
|
||||
"\x15",
|
||||
"\x16",
|
||||
"\x17",
|
||||
"\x18",
|
||||
"\x19",
|
||||
"\x1A",
|
||||
"\x1B",
|
||||
"\x1C",
|
||||
"\x1D",
|
||||
"\x1E",
|
||||
"\x1F",
|
||||
"\x7F",
|
||||
]
|
||||
|
||||
for char in control_chars:
|
||||
assert (
|
||||
char not in json_string
|
||||
), f"Control character {repr(char)} found in result"
|
||||
|
||||
# Verify specific sanitized content is present (control chars removed but text preserved)
|
||||
result_data = cast(dict[str, Any], result.data)
|
||||
|
||||
# Top level
|
||||
assert "TopLevelBackspace" in json_string
|
||||
|
||||
# Outer model level
|
||||
assert "OuterNullBell" in json_string
|
||||
|
||||
# Middle model level
|
||||
assert "MiddleStartOfHeadingUnitSeparator" in json_string
|
||||
assert "SomeVerticalTabShiftOut" in json_string
|
||||
|
||||
# Inner model level (deepest nesting)
|
||||
assert "DeepestLevelControlChars" in json_string
|
||||
|
||||
# Nested dict in model
|
||||
assert "NestedValueDelete" in json_string
|
||||
|
||||
# List items
|
||||
assert "ListItem1" in json_string
|
||||
assert "ListItem2" in json_string
|
||||
assert "DictValue" in json_string
|
||||
|
||||
# Verify structure is preserved (not just converted to string)
|
||||
assert isinstance(result_data, dict)
|
||||
assert isinstance(result_data["nested_model"], dict)
|
||||
assert isinstance(result_data["nested_model"]["middle"], dict)
|
||||
assert isinstance(result_data["nested_model"]["middle"]["inner"], dict)
|
||||
assert isinstance(result_data["list_with_strings"], list)
|
||||
|
||||
# Verify specific deep values are accessible and sanitized
|
||||
nested_model = cast(dict[str, Any], result_data["nested_model"])
|
||||
middle = cast(dict[str, Any], nested_model["middle"])
|
||||
inner = cast(dict[str, Any], middle["inner"])
|
||||
|
||||
deep_string = inner["deep_string"]
|
||||
assert deep_string == "DeepestLevelControlChars"
|
||||
|
||||
metadata = cast(dict[str, Any], inner["metadata"])
|
||||
nested_metadata = metadata["nested_key"]
|
||||
assert nested_metadata == "NestedValueDelete"
|
||||
|
||||
Reference in New Issue
Block a user