mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-01-23 14:08:02 -05:00
Compare commits
1 Commits
testing-cl
...
feat/impro
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f4ff940803 |
@@ -10,17 +10,19 @@ from pydantic import BaseModel
|
|||||||
|
|
||||||
from .type import type_match
|
from .type import type_match
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"json",
|
||||||
|
"dumps",
|
||||||
|
"loads",
|
||||||
|
"validate_with_jsonschema",
|
||||||
|
"SafeJson",
|
||||||
|
"convert_pydantic_to_json",
|
||||||
|
]
|
||||||
|
|
||||||
# Precompiled regex to remove PostgreSQL-incompatible control characters
|
# Precompiled regex to remove PostgreSQL-incompatible control characters
|
||||||
# Removes \u0000-\u0008, \u000B-\u000C, \u000E-\u001F, \u007F (keeps tab \u0009, newline \u000A, carriage return \u000D)
|
# Removes \u0000-\u0008, \u000B-\u000C, \u000E-\u001F, \u007F (keeps tab \u0009, newline \u000A, carriage return \u000D)
|
||||||
POSTGRES_CONTROL_CHARS = re.compile(r"[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]")
|
POSTGRES_CONTROL_CHARS = re.compile(r"[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]")
|
||||||
|
|
||||||
# Comprehensive regex to remove all PostgreSQL-incompatible control character sequences in JSON
|
|
||||||
# Handles both Unicode escapes (\\u0000-\\u0008, \\u000B-\\u000C, \\u000E-\\u001F, \\u007F)
|
|
||||||
# and JSON single-char escapes (\\b, \\f) while preserving legitimate file paths
|
|
||||||
POSTGRES_JSON_ESCAPES = re.compile(
|
|
||||||
r"\\u000[0-8]|\\u000[bB]|\\u000[cC]|\\u00[0-1][0-9a-fA-F]|\\u007[fF]|(?<!\\)\\[bf](?!\\)"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def to_dict(data) -> dict:
|
def to_dict(data) -> dict:
|
||||||
if isinstance(data, BaseModel):
|
if isinstance(data, BaseModel):
|
||||||
@@ -130,24 +132,67 @@ def convert_pydantic_to_json(output_data: Any) -> Any:
|
|||||||
return output_data
|
return output_data
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_value(value: Any) -> Any:
|
||||||
|
"""
|
||||||
|
Recursively sanitize values by removing PostgreSQL-incompatible control characters.
|
||||||
|
|
||||||
|
This function walks through data structures and removes control characters from strings.
|
||||||
|
It handles:
|
||||||
|
- Strings: Remove control chars directly from the string
|
||||||
|
- Lists: Recursively sanitize each element
|
||||||
|
- Dicts: Recursively sanitize keys and values
|
||||||
|
- Other types: Return as-is
|
||||||
|
|
||||||
|
Args:
|
||||||
|
value: The value to sanitize
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Sanitized version of the value with control characters removed
|
||||||
|
"""
|
||||||
|
if isinstance(value, str):
|
||||||
|
# Remove control characters directly from the string
|
||||||
|
return POSTGRES_CONTROL_CHARS.sub("", value)
|
||||||
|
elif isinstance(value, dict):
|
||||||
|
# Recursively sanitize dictionary keys and values
|
||||||
|
return {_sanitize_value(k): _sanitize_value(v) for k, v in value.items()}
|
||||||
|
elif isinstance(value, list):
|
||||||
|
# Recursively sanitize list elements
|
||||||
|
return [_sanitize_value(item) for item in value]
|
||||||
|
elif isinstance(value, tuple):
|
||||||
|
# Recursively sanitize tuple elements
|
||||||
|
return tuple(_sanitize_value(item) for item in value)
|
||||||
|
else:
|
||||||
|
# For other types (int, float, bool, None, etc.), return as-is
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
def SafeJson(data: Any) -> Json:
|
def SafeJson(data: Any) -> Json:
|
||||||
"""
|
"""
|
||||||
Safely serialize data and return Prisma's Json type.
|
Safely serialize data and return Prisma's Json type.
|
||||||
Sanitizes null bytes to prevent PostgreSQL 22P05 errors.
|
Sanitizes control characters to prevent PostgreSQL 22P05 errors.
|
||||||
|
|
||||||
|
This function:
|
||||||
|
1. Converts Pydantic models to dicts
|
||||||
|
2. Recursively removes PostgreSQL-incompatible control characters from strings
|
||||||
|
3. Returns a Prisma Json object safe for database storage
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data: Input data to sanitize and convert to Json
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Prisma Json object with control characters removed
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> SafeJson({"text": "Hello\\x00World"}) # null char removed
|
||||||
|
>>> SafeJson({"path": "C:\\\\temp"}) # backslashes preserved
|
||||||
|
>>> SafeJson({"data": "Text\\\\u0000here"}) # literal backslash-u preserved
|
||||||
"""
|
"""
|
||||||
|
# Convert Pydantic models to dict first
|
||||||
if isinstance(data, BaseModel):
|
if isinstance(data, BaseModel):
|
||||||
json_string = data.model_dump_json(
|
data = data.model_dump(exclude_none=True)
|
||||||
warnings="error",
|
|
||||||
exclude_none=True,
|
|
||||||
fallback=lambda v: None,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
json_string = dumps(data, default=lambda v: None)
|
|
||||||
|
|
||||||
# Remove PostgreSQL-incompatible control characters in JSON string
|
# Sanitize the data structure by removing control characters
|
||||||
# Single comprehensive regex handles all control character sequences
|
sanitized_data = _sanitize_value(data)
|
||||||
sanitized_json = POSTGRES_JSON_ESCAPES.sub("", json_string)
|
|
||||||
|
|
||||||
# Remove any remaining raw control characters (fallback safety net)
|
# Return as Prisma Json type
|
||||||
sanitized_json = POSTGRES_CONTROL_CHARS.sub("", sanitized_json)
|
return Json(sanitized_data)
|
||||||
return Json(json.loads(sanitized_json))
|
|
||||||
|
|||||||
Reference in New Issue
Block a user