mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
fix(backend/copilot): address review feedback - size limits, prompting, tests
- Move E2B-specific bridging text from shared prompt section to E2B supplement's extra_notes (MAJOR 1) - Add size cap to _bridge_to_sandbox: <=5MB uses shell base64 to /tmp, 5-50MB uses sandbox.files.write to /home/user, >50MB skipped (MAJOR 2) - Add 7 unit tests for _bridge_to_sandbox covering happy path, skip conditions, error handling, and size-based routing (MINOR 3) - Fix inaccurate comment about tool-outputs name origin (NIT 7) - Update is_allowed_local_path docstring to mention tool-outputs (NIT 9) - Add prompting guidance for handling base64 images in tool outputs (save to workspace, show via download URL) - Add prompting guidance for using @@agptfile: references instead of copy-pasting large data between tools - Add no-op server/graph_cleanup fixtures to sdk/conftest.py so SDK unit tests don't require Postgres
This commit is contained in:
@@ -149,7 +149,8 @@ def is_allowed_local_path(path: str, sdk_cwd: str | None = None) -> bool:
|
||||
|
||||
Allowed:
|
||||
- Files under *sdk_cwd* (``/tmp/copilot-<session>/``)
|
||||
- Files under ``~/.claude/projects/<encoded-cwd>/<uuid>/tool-results/...``.
|
||||
- Files under ``~/.claude/projects/<encoded-cwd>/<uuid>/tool-results/...``
|
||||
or ``tool-outputs/...``.
|
||||
The SDK nests tool-results under a conversation UUID directory;
|
||||
the UUID segment is validated with ``_UUID_RE``.
|
||||
"""
|
||||
@@ -178,7 +179,8 @@ def is_allowed_local_path(path: str, sdk_cwd: str | None = None) -> bool:
|
||||
# The SDK always creates a conversation UUID directory between
|
||||
# the project dir and the tool directory.
|
||||
# Accept both "tool-results" (SDK's persisted outputs) and
|
||||
# "tool-outputs" (alternate name used by some SDK versions).
|
||||
# "tool-outputs" (the model sometimes confuses workspace paths
|
||||
# with filesystem paths and generates this variant).
|
||||
if resolved.startswith(project_dir + os.sep):
|
||||
relative = resolved[len(project_dir) + 1 :]
|
||||
parts = relative.split(os.sep)
|
||||
|
||||
@@ -18,6 +18,18 @@ After `write_workspace_file`, embed the `download_url` in Markdown:
|
||||
- Image: ``
|
||||
- Video: ``
|
||||
|
||||
### Handling binary/image data in tool outputs — CRITICAL
|
||||
When a tool output contains base64-encoded binary data (images, PDFs, etc.):
|
||||
1. **NEVER** try to inline or render the base64 content in your response.
|
||||
2. **Save** the data to workspace using `write_workspace_file` (pass the base64 data URI as content).
|
||||
3. **Show** the result via the workspace download URL in Markdown: ``.
|
||||
|
||||
### Passing large data between tools — CRITICAL
|
||||
When tool outputs produce large text that you need to feed into another tool:
|
||||
- **NEVER** copy-paste the full text into the next tool call argument.
|
||||
- **Save** the output to a file (workspace or local), then use `@@agptfile:` references.
|
||||
- This avoids token limits and ensures data integrity.
|
||||
|
||||
### File references — @@agptfile:
|
||||
Pass large file content to tools by reference: `@@agptfile:<uri>[<start>-<end>]`
|
||||
- `workspace://<file_id>` or `workspace:///<path>` — workspace files
|
||||
@@ -138,6 +150,10 @@ parent autopilot handles orchestration.
|
||||
# E2B-only notes — E2B has full internet access so gh CLI works there.
|
||||
# Not shown in local (bubblewrap) mode: --unshare-net blocks all network.
|
||||
_E2B_TOOL_NOTES = """
|
||||
### SDK tool-result files in E2B
|
||||
When you `Read` an SDK tool-result file, it is automatically copied into the
|
||||
sandbox at `/tmp/<filename>` so `bash_exec` can access it for further processing.
|
||||
|
||||
### GitHub CLI (`gh`) and git
|
||||
- If the user has connected their GitHub account, both `gh` and `git` are
|
||||
pre-authenticated — use them directly without any manual login step.
|
||||
@@ -212,9 +228,7 @@ Important files (code, configs, outputs) should be saved to workspace to ensure
|
||||
### SDK tool-result files
|
||||
When tool outputs are large, the SDK truncates them and saves the full output to
|
||||
a local file under `~/.claude/projects/.../tool-results/` (or `tool-outputs/`).
|
||||
To read these files, use `Read` — it reads from the host filesystem and
|
||||
automatically copies the file into the sandbox at `/tmp/<filename>` so
|
||||
`bash_exec` can access it for further processing.
|
||||
To read these files, use `Read` — it reads from the host filesystem.
|
||||
|
||||
### Large tool outputs saved to workspace
|
||||
When a tool output contains `<tool-output-truncated workspace_path="...">`, the
|
||||
|
||||
@@ -6,10 +6,23 @@ from unittest.mock import patch
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
from backend.util import json
|
||||
|
||||
|
||||
@pytest_asyncio.fixture(scope="session", loop_scope="session")
|
||||
async def server(): # type: ignore[override]
|
||||
"""No-op server stub — SDK tests don't need the full backend."""
|
||||
return None
|
||||
|
||||
|
||||
@pytest_asyncio.fixture(scope="session", loop_scope="session", autouse=True)
|
||||
async def graph_cleanup(): # type: ignore[override]
|
||||
"""No-op graph cleanup stub."""
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mock_chat_config():
|
||||
"""Mock ChatConfig so compact_transcript tests skip real config lookup."""
|
||||
|
||||
@@ -311,6 +311,13 @@ async def _handle_grep(args: dict[str, Any]) -> dict[str, Any]:
|
||||
|
||||
# Bridging: copy SDK-internal files into E2B sandbox
|
||||
|
||||
# Files larger than this are written to /home/user/ via sandbox.files.write()
|
||||
# instead of /tmp/ via shell base64, to avoid shell argument length limits
|
||||
# and E2B command timeouts.
|
||||
_BRIDGE_SHELL_MAX_BYTES = 5 * 1024 * 1024 # 5 MB
|
||||
# Files larger than this are skipped entirely to avoid excessive transfer times.
|
||||
_BRIDGE_SKIP_BYTES = 50 * 1024 * 1024 # 50 MB
|
||||
|
||||
|
||||
async def _bridge_to_sandbox(
|
||||
sandbox: Any, file_path: str, offset: int, limit: int
|
||||
@@ -318,23 +325,43 @@ async def _bridge_to_sandbox(
|
||||
"""Best-effort copy of a host-side SDK file into the E2B sandbox.
|
||||
|
||||
When the model reads an SDK-internal file (e.g. tool-results), it often
|
||||
wants to process the data with bash. Copying the file into ``/tmp/``
|
||||
wants to process the data with bash. Copying the file into the sandbox
|
||||
under a stable name lets ``bash_exec`` access it without extra steps.
|
||||
|
||||
Only copies when offset=0 and limit is large enough to indicate the model
|
||||
wants the full file. Errors are logged but never propagated.
|
||||
|
||||
Size handling:
|
||||
- <= 5 MB: written to ``/tmp/<basename>`` via shell base64 (``_sandbox_write``).
|
||||
- 5-50 MB: written to ``/home/user/<basename>`` via ``sandbox.files.write()``
|
||||
to avoid shell argument length limits.
|
||||
- > 50 MB: skipped entirely with a warning.
|
||||
"""
|
||||
if offset != 0 or limit < 2000:
|
||||
return
|
||||
basename = os.path.basename(file_path)
|
||||
sandbox_path = f"/tmp/{basename}"
|
||||
try:
|
||||
expanded = os.path.realpath(os.path.expanduser(file_path))
|
||||
file_size = os.path.getsize(expanded)
|
||||
if file_size > _BRIDGE_SKIP_BYTES:
|
||||
logger.warning(
|
||||
"[E2B] Skipping bridge for large file (%d bytes): %s",
|
||||
file_size,
|
||||
basename,
|
||||
)
|
||||
return
|
||||
with open(expanded, "rb") as fh:
|
||||
content = fh.read()
|
||||
await _sandbox_write(
|
||||
sandbox, sandbox_path, content.decode("utf-8", errors="replace")
|
||||
)
|
||||
if file_size <= _BRIDGE_SHELL_MAX_BYTES:
|
||||
sandbox_path = f"/tmp/{basename}"
|
||||
await _sandbox_write(
|
||||
sandbox, sandbox_path, content.decode("utf-8", errors="replace")
|
||||
)
|
||||
else:
|
||||
sandbox_path = f"/home/user/{basename}"
|
||||
await sandbox.files.write(
|
||||
sandbox_path, content.decode("utf-8", errors="replace")
|
||||
)
|
||||
logger.info(
|
||||
"[E2B] Bridged SDK file to sandbox: %s -> %s", basename, sandbox_path
|
||||
)
|
||||
|
||||
@@ -13,6 +13,9 @@ import pytest
|
||||
from backend.copilot.context import E2B_WORKDIR, SDK_PROJECTS_DIR, _current_project_dir
|
||||
|
||||
from .e2b_file_tools import (
|
||||
_BRIDGE_SHELL_MAX_BYTES,
|
||||
_BRIDGE_SKIP_BYTES,
|
||||
_bridge_to_sandbox,
|
||||
_check_sandbox_symlink_escape,
|
||||
_read_local,
|
||||
_sandbox_write,
|
||||
@@ -354,3 +357,107 @@ class TestSandboxWrite:
|
||||
encoded_in_cmd = call_args.split("echo ")[1].split(" |")[0].strip("'")
|
||||
decoded = base64.b64decode(encoded_in_cmd).decode()
|
||||
assert decoded == content
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _bridge_to_sandbox — copy SDK-internal files into E2B sandbox
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_bridge_sandbox() -> SimpleNamespace:
|
||||
"""Build a sandbox mock suitable for _bridge_to_sandbox tests."""
|
||||
run_result = SimpleNamespace(stdout="", stderr="", exit_code=0)
|
||||
commands = SimpleNamespace(run=AsyncMock(return_value=run_result))
|
||||
files = SimpleNamespace(write=AsyncMock())
|
||||
return SimpleNamespace(commands=commands, files=files)
|
||||
|
||||
|
||||
class TestBridgeToSandbox:
|
||||
@pytest.mark.asyncio
|
||||
async def test_happy_path_small_file(self, tmp_path):
|
||||
"""A small file is bridged to /tmp/<basename> via _sandbox_write."""
|
||||
f = tmp_path / "result.json"
|
||||
f.write_text('{"ok": true}')
|
||||
sandbox = _make_bridge_sandbox()
|
||||
|
||||
await _bridge_to_sandbox(sandbox, str(f), offset=0, limit=2000)
|
||||
|
||||
sandbox.commands.run.assert_called_once()
|
||||
cmd = sandbox.commands.run.call_args[0][0]
|
||||
assert "result.json" in cmd
|
||||
sandbox.files.write.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_skip_when_offset_nonzero(self, tmp_path):
|
||||
"""Bridging is skipped when offset != 0 (partial read)."""
|
||||
f = tmp_path / "data.txt"
|
||||
f.write_text("content")
|
||||
sandbox = _make_bridge_sandbox()
|
||||
|
||||
await _bridge_to_sandbox(sandbox, str(f), offset=10, limit=2000)
|
||||
|
||||
sandbox.commands.run.assert_not_called()
|
||||
sandbox.files.write.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_skip_when_limit_too_small(self, tmp_path):
|
||||
"""Bridging is skipped when limit < 2000 (partial read)."""
|
||||
f = tmp_path / "data.txt"
|
||||
f.write_text("content")
|
||||
sandbox = _make_bridge_sandbox()
|
||||
|
||||
await _bridge_to_sandbox(sandbox, str(f), offset=0, limit=100)
|
||||
|
||||
sandbox.commands.run.assert_not_called()
|
||||
sandbox.files.write.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_nonexistent_file_does_not_raise(self, tmp_path):
|
||||
"""Bridging a non-existent file logs but does not propagate errors."""
|
||||
sandbox = _make_bridge_sandbox()
|
||||
|
||||
await _bridge_to_sandbox(
|
||||
sandbox, str(tmp_path / "ghost.txt"), offset=0, limit=2000
|
||||
)
|
||||
|
||||
sandbox.commands.run.assert_not_called()
|
||||
sandbox.files.write.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_sandbox_write_failure_does_not_raise(self, tmp_path):
|
||||
"""If sandbox write fails, the error is swallowed (best-effort)."""
|
||||
f = tmp_path / "data.txt"
|
||||
f.write_text("content")
|
||||
sandbox = _make_bridge_sandbox()
|
||||
sandbox.commands.run.side_effect = RuntimeError("E2B timeout")
|
||||
|
||||
await _bridge_to_sandbox(sandbox, str(f), offset=0, limit=2000)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_large_file_uses_files_api(self, tmp_path):
|
||||
"""Files > 5 MB but <= 50 MB are written to /home/user/ via files.write."""
|
||||
f = tmp_path / "big.json"
|
||||
f.write_bytes(b"x" * (_BRIDGE_SHELL_MAX_BYTES + 1))
|
||||
sandbox = _make_bridge_sandbox()
|
||||
|
||||
await _bridge_to_sandbox(sandbox, str(f), offset=0, limit=2000)
|
||||
|
||||
sandbox.files.write.assert_called_once()
|
||||
call_args = sandbox.files.write.call_args[0]
|
||||
assert call_args[0] == "/home/user/big.json"
|
||||
sandbox.commands.run.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_very_large_file_skipped(self, tmp_path):
|
||||
"""Files > 50 MB are skipped entirely."""
|
||||
f = tmp_path / "huge.bin"
|
||||
# Create a sparse file to avoid actually writing 50 MB
|
||||
with open(f, "wb") as fh:
|
||||
fh.seek(_BRIDGE_SKIP_BYTES + 1)
|
||||
fh.write(b"\0")
|
||||
sandbox = _make_bridge_sandbox()
|
||||
|
||||
await _bridge_to_sandbox(sandbox, str(f), offset=0, limit=2000)
|
||||
|
||||
sandbox.commands.run.assert_not_called()
|
||||
sandbox.files.write.assert_not_called()
|
||||
|
||||
Reference in New Issue
Block a user