Files
AutoGPT/autogpt_platform/backend/backend/util/json.py
Zamil Majdy 29da8db48e feat(copilot): E2B cloud sandbox — unified file tools, persistent execution, output truncation (#12212)
## Summary

- **E2B file tools**: New MCP tools
(`read_file`/`write_file`/`edit_file`/`glob`/`grep`) that operate
directly on the E2B sandbox filesystem (`/home/user`). When E2B is
active, these replace SDK built-in `Read/Write/Edit/Glob/Grep` so all
tools share a single coherent filesystem with `bash_exec` — no sync
needed.
- **E2B sandbox lifecycle**: New `e2b_sandbox.py` manages sandbox
creation and reconnection via Redis, with stale-key cleanup on
reconnection failure.
- **E2B enabled by default**: `use_e2b_sandbox` defaults to `True`; set
`CHAT_USE_E2B_SANDBOX=false` to disable.
- **Centralized output truncation**: All MCP tool outputs are truncated
via `_truncating` wrapper and stashed (`_pending_tool_outputs`) to
bypass SDK's head-truncation for the frontend.
- **Frontend tool display**: `GenericTool.tsx` now renders bash
stdout/stderr, file content, edit diffs (old/new), todo lists, and
glob/grep results with category-specific icons and status text.
- **Workspace file tools + E2B**: `read_workspace_file`'s `save_to_path`
and `write_workspace_file`'s `source_path` route to E2B sandbox when
active.

## Files changed

| Area | Files | What |
|------|-------|------|
| E2B file tools | `sdk/e2b_file_tools.py`, `sdk/e2b_file_tools_test.py`
| MCP file tool handlers + tests |
| E2B sandbox | `tools/e2b_sandbox.py` | Sandbox lifecycle
(create/reconnect/Redis) |
| Tool adapter | `sdk/tool_adapter.py` | MCP server, truncation, stash,
path validation |
| Service | `sdk/service.py` | E2B integration, prompt supplements |
| Security | `sdk/security_hooks.py`, `sdk/security_hooks_test.py` |
Path validation for E2B mode |
| Bash exec | `tools/bash_exec.py` | E2B execution path |
| Workspace files | `tools/workspace_files.py`,
`tools/workspace_files_test.py` | E2B-aware save/source paths |
| Config | `copilot/config.py` | E2B config fields (default on) |
| Truncation | `util/truncate.py` | Middle-out truncation fix |
| Frontend | `GenericTool.tsx` | Tool-specific display rendering |

## Test plan

- [x] `security_hooks_test.py` — 43 tests (path validation, tool access,
deny messages)
- [x] `e2b_file_tools_test.py` — 19 tests (path resolution, local read
safety)
- [x] `workspace_files_test.py` — 17 tests (ephemeral path validation)
- [x] CI green (backend 3.11/3.12/3.13, lint, types, e2e)
2026-03-03 21:31:38 +00:00

167 lines
5.2 KiB
Python

import logging
import re
from typing import Any, Type, TypeVar, overload
import jsonschema
import orjson
from fastapi.encoders import jsonable_encoder as to_dict
from prisma import Json
from .truncate import truncate
from .type import type_match
logger = logging.getLogger(__name__)
# Precompiled regex to remove PostgreSQL-incompatible control characters
# Removes \u0000-\u0008, \u000B-\u000C, \u000E-\u001F, \u007F (keeps tab \u0009, newline \u000A, carriage return \u000D)
POSTGRES_CONTROL_CHARS = re.compile(r"[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]")
def dumps(
data: Any, *args: Any, indent: int | None = None, option: int = 0, **kwargs: Any
) -> str:
"""
Serialize data to JSON string with automatic conversion of Pydantic models and complex types.
This function converts the input data to a JSON-serializable format using FastAPI's
jsonable_encoder before dumping to JSON. It handles Pydantic models, complex types,
and ensures proper serialization.
Parameters
----------
data : Any
The data to serialize. Can be any type including Pydantic models, dicts, lists, etc.
*args : Any
Additional positional arguments
indent : int | None
If not None, pretty-print with indentation
option : int
orjson option flags (default: 0)
**kwargs : Any
Additional keyword arguments. Supported: default, ensure_ascii, separators, indent
Returns
-------
str
JSON string representation of the data
Examples
--------
>>> dumps({"name": "Alice", "age": 30})
'{"name": "Alice", "age": 30}'
>>> dumps(pydantic_model_instance, indent=2)
'{\n "field1": "value1",\n "field2": "value2"\n}'
"""
serializable_data = to_dict(data)
# Handle indent parameter
if indent is not None or kwargs.get("indent") is not None:
option |= orjson.OPT_INDENT_2
# orjson only accepts specific parameters, filter out stdlib json params
# ensure_ascii: orjson always produces UTF-8 (better than ASCII)
# separators: orjson uses compact separators by default
supported_orjson_params = {"default"}
orjson_kwargs = {k: v for k, v in kwargs.items() if k in supported_orjson_params}
return orjson.dumps(serializable_data, option=option, **orjson_kwargs).decode(
"utf-8"
)
T = TypeVar("T")
@overload
def loads(data: str | bytes, *args, target_type: Type[T], **kwargs) -> T: ...
@overload
def loads(data: str | bytes, *args, **kwargs) -> Any: ...
def loads(
data: str | bytes, *args, target_type: Type[T] | None = None, **kwargs
) -> Any:
parsed = orjson.loads(data)
if target_type:
return type_match(parsed, target_type)
return parsed
def validate_with_jsonschema(
schema: dict[str, Any], data: dict[str, Any]
) -> str | None:
"""
Validate the data against the schema.
Returns the validation error message if the data does not match the schema.
"""
try:
jsonschema.validate(data, schema)
return None
except jsonschema.ValidationError as e:
return str(e)
def sanitize_string(value: str) -> str:
"""Remove PostgreSQL-incompatible control characters from string.
Strips \\x00-\\x08, \\x0B-\\x0C, \\x0E-\\x1F, \\x7F while keeping tab,
newline, and carriage return. Use this before inserting free-form text
into PostgreSQL text/varchar columns.
"""
return POSTGRES_CONTROL_CHARS.sub("", value)
def sanitize_json(data: Any) -> Any:
try:
# Use two-pass approach for consistent string sanitization:
# 1. First convert to basic JSON-serializable types (handles Pydantic models)
# 2. Then sanitize strings in the result
basic_result = to_dict(data)
return to_dict(basic_result, custom_encoder={str: sanitize_string})
except Exception as e:
# Log the failure and fall back to string representation
logger.error(
"SafeJson fallback to string representation due to serialization error: %s (%s). "
"Data type: %s, Data preview: %s",
type(e).__name__,
truncate(str(e), 200),
type(data).__name__,
truncate(str(data), 100),
)
# Ultimate fallback: convert to string representation and sanitize
return sanitize_string(str(data))
class SafeJson(Json):
"""
Safely serialize data and return Prisma's Json type.
Sanitizes control characters to prevent PostgreSQL 22P05 errors.
This function:
1. Converts Pydantic models to dicts (recursively using to_dict)
2. Recursively removes PostgreSQL-incompatible control characters from strings
3. Returns a Prisma Json object safe for database storage
Uses to_dict (jsonable_encoder) with a custom encoder to handle both Pydantic
conversion and control character sanitization in a two-pass approach.
Args:
data: Input data to sanitize and convert to Json
Returns:
Prisma Json object with control characters removed
Examples:
>>> SafeJson({"text": "Hello\\x00World"}) # null char removed
>>> SafeJson({"path": "C:\\\\temp"}) # backslashes preserved
>>> SafeJson({"data": "Text\\\\u0000here"}) # literal backslash-u preserved
"""
def __init__(self, data: Any):
super().__init__(sanitize_json(data))