fix(backend/copilot): address review feedback - size limits, prompting, tests

- Move E2B-specific bridging text from shared prompt section to E2B supplement's extra_notes (MAJOR 1) - Add size cap to _bridge_to_sandbox: <=5MB uses shell base64 to /tmp, 5-50MB uses sandbox.files.write to /home/user, >50MB skipped (MAJOR 2) - Add 7 unit tests for _bridge_to_sandbox covering happy path, skip conditions, error handling, and size-based routing (MINOR 3) - Fix inaccurate comment about tool-outputs name origin (NIT 7) - Update is_allowed_local_path docstring to mention tool-outputs (NIT 9) - Add prompting guidance for handling base64 images in tool outputs (save to workspace, show via download URL) - Add prompting guidance for using @@agptfile: references instead of copy-pasting large data between tools - Add no-op server/graph_cleanup fixtures to sdk/conftest.py so SDK unit tests don't require Postgres
2026-04-08 03:00:28 -04:00 · 2026-04-02 07:56:49 +02:00
parent f3dd708cf6
commit 66afca6e0c
5 changed files with 173 additions and 10 deletions
--- a/autogpt_platform/backend/backend/copilot/context.py
+++ b/autogpt_platform/backend/backend/copilot/context.py
@@ -149,7 +149,8 @@ def is_allowed_local_path(path: str, sdk_cwd: str | None = None) -> bool:

    Allowed:
    - Files under *sdk_cwd* (``/tmp/copilot-<session>/``)
-    - Files under ``~/.claude/projects/<encoded-cwd>/<uuid>/tool-results/...``.
+    - Files under ``~/.claude/projects/<encoded-cwd>/<uuid>/tool-results/...``
+      or ``tool-outputs/...``.
      The SDK nests tool-results under a conversation UUID directory;
      the UUID segment is validated with ``_UUID_RE``.
    """
@@ -178,7 +179,8 @@ def is_allowed_local_path(path: str, sdk_cwd: str | None = None) -> bool:
        # The SDK always creates a conversation UUID directory between
        # the project dir and the tool directory.
        # Accept both "tool-results" (SDK's persisted outputs) and
-        # "tool-outputs" (alternate name used by some SDK versions).
+        # "tool-outputs" (the model sometimes confuses workspace paths
+        # with filesystem paths and generates this variant).
        if resolved.startswith(project_dir + os.sep):
            relative = resolved[len(project_dir) + 1 :]
            parts = relative.split(os.sep)
--- a/autogpt_platform/backend/backend/copilot/prompting.py
+++ b/autogpt_platform/backend/backend/copilot/prompting.py
@@ -18,6 +18,18 @@ After `write_workspace_file`, embed the `download_url` in Markdown:
 - Image: `![chart](workspace://file_id#image/png)`
 - Video: `![recording](workspace://file_id#video/mp4)`

+### Handling binary/image data in tool outputs — CRITICAL
+When a tool output contains base64-encoded binary data (images, PDFs, etc.):
+1. **NEVER** try to inline or render the base64 content in your response.
+2. **Save** the data to workspace using `write_workspace_file` (pass the base64 data URI as content).
+3. **Show** the result via the workspace download URL in Markdown: `![image](workspace://file_id#image/png)`.
+
+### Passing large data between tools — CRITICAL
+When tool outputs produce large text that you need to feed into another tool:
+- **NEVER** copy-paste the full text into the next tool call argument.
+- **Save** the output to a file (workspace or local), then use `@@agptfile:` references.
+- This avoids token limits and ensures data integrity.
+
 ### File references — @@agptfile:
 Pass large file content to tools by reference: `@@agptfile:<uri>[<start>-<end>]`
 - `workspace://<file_id>` or `workspace:///<path>` — workspace files
@@ -138,6 +150,10 @@ parent autopilot handles orchestration.
 # E2B-only notes — E2B has full internet access so gh CLI works there.
 # Not shown in local (bubblewrap) mode: --unshare-net blocks all network.
 _E2B_TOOL_NOTES = """
+### SDK tool-result files in E2B
+When you `Read` an SDK tool-result file, it is automatically copied into the
+sandbox at `/tmp/<filename>` so `bash_exec` can access it for further processing.
+
 ### GitHub CLI (`gh`) and git
 - If the user has connected their GitHub account, both `gh` and `git` are
  pre-authenticated — use them directly without any manual login step.
@@ -212,9 +228,7 @@ Important files (code, configs, outputs) should be saved to workspace to ensure
 ### SDK tool-result files
 When tool outputs are large, the SDK truncates them and saves the full output to
 a local file under `~/.claude/projects/.../tool-results/` (or `tool-outputs/`).
-To read these files, use `Read` — it reads from the host filesystem and
-automatically copies the file into the sandbox at `/tmp/<filename>` so
-`bash_exec` can access it for further processing.
+To read these files, use `Read` — it reads from the host filesystem.

 ### Large tool outputs saved to workspace
 When a tool output contains `<tool-output-truncated workspace_path="...">`, the
--- a/autogpt_platform/backend/backend/copilot/sdk/conftest.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/conftest.py
@@ -6,10 +6,23 @@ from unittest.mock import patch
 from uuid import uuid4

 import pytest
+import pytest_asyncio

 from backend.util import json


+@pytest_asyncio.fixture(scope="session", loop_scope="session")
+async def server():  # type: ignore[override]
+    """No-op server stub — SDK tests don't need the full backend."""
+    return None
+
+
+@pytest_asyncio.fixture(scope="session", loop_scope="session", autouse=True)
+async def graph_cleanup():  # type: ignore[override]
+    """No-op graph cleanup stub."""
+    yield
+
+
@pytest.fixture()
 def mock_chat_config():
    """Mock ChatConfig so compact_transcript tests skip real config lookup."""
--- a/autogpt_platform/backend/backend/copilot/sdk/e2b_file_tools.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/e2b_file_tools.py
@@ -311,6 +311,13 @@ async def _handle_grep(args: dict[str, Any]) -> dict[str, Any]:

 # Bridging: copy SDK-internal files into E2B sandbox

+# Files larger than this are written to /home/user/ via sandbox.files.write()
+# instead of /tmp/ via shell base64, to avoid shell argument length limits
+# and E2B command timeouts.
+_BRIDGE_SHELL_MAX_BYTES = 5 * 1024 * 1024  # 5 MB
+# Files larger than this are skipped entirely to avoid excessive transfer times.
+_BRIDGE_SKIP_BYTES = 50 * 1024 * 1024  # 50 MB
+

 async def _bridge_to_sandbox(
    sandbox: Any, file_path: str, offset: int, limit: int
@@ -318,23 +325,43 @@ async def _bridge_to_sandbox(
    """Best-effort copy of a host-side SDK file into the E2B sandbox.

    When the model reads an SDK-internal file (e.g. tool-results), it often
-    wants to process the data with bash.  Copying the file into ``/tmp/``
+    wants to process the data with bash.  Copying the file into the sandbox
    under a stable name lets ``bash_exec`` access it without extra steps.

    Only copies when offset=0 and limit is large enough to indicate the model
    wants the full file.  Errors are logged but never propagated.
+
+    Size handling:
+    - <= 5 MB: written to ``/tmp/<basename>`` via shell base64 (``_sandbox_write``).
+    - 5-50 MB: written to ``/home/user/<basename>`` via ``sandbox.files.write()``
+      to avoid shell argument length limits.
+    - > 50 MB: skipped entirely with a warning.
    """
    if offset != 0 or limit < 2000:
        return
    basename = os.path.basename(file_path)
-    sandbox_path = f"/tmp/{basename}"
    try:
        expanded = os.path.realpath(os.path.expanduser(file_path))
+        file_size = os.path.getsize(expanded)
+        if file_size > _BRIDGE_SKIP_BYTES:
+            logger.warning(
+                "[E2B] Skipping bridge for large file (%d bytes): %s",
+                file_size,
+                basename,
+            )
+            return
        with open(expanded, "rb") as fh:
            content = fh.read()
-        await _sandbox_write(
-            sandbox, sandbox_path, content.decode("utf-8", errors="replace")
-        )
+        if file_size <= _BRIDGE_SHELL_MAX_BYTES:
+            sandbox_path = f"/tmp/{basename}"
+            await _sandbox_write(
+                sandbox, sandbox_path, content.decode("utf-8", errors="replace")
+            )
+        else:
+            sandbox_path = f"/home/user/{basename}"
+            await sandbox.files.write(
+                sandbox_path, content.decode("utf-8", errors="replace")
+            )
        logger.info(
            "[E2B] Bridged SDK file to sandbox: %s -> %s", basename, sandbox_path
        )
--- a/autogpt_platform/backend/backend/copilot/sdk/e2b_file_tools_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/e2b_file_tools_test.py
@@ -13,6 +13,9 @@ import pytest
 from backend.copilot.context import E2B_WORKDIR, SDK_PROJECTS_DIR, _current_project_dir

 from .e2b_file_tools import (
+    _BRIDGE_SHELL_MAX_BYTES,
+    _BRIDGE_SKIP_BYTES,
+    _bridge_to_sandbox,
    _check_sandbox_symlink_escape,
    _read_local,
    _sandbox_write,
@@ -354,3 +357,107 @@ class TestSandboxWrite:
        encoded_in_cmd = call_args.split("echo ")[1].split(" |")[0].strip("'")
        decoded = base64.b64decode(encoded_in_cmd).decode()
        assert decoded == content
+
+
+# ---------------------------------------------------------------------------
+# _bridge_to_sandbox — copy SDK-internal files into E2B sandbox
+# ---------------------------------------------------------------------------
+
+
+def _make_bridge_sandbox() -> SimpleNamespace:
+    """Build a sandbox mock suitable for _bridge_to_sandbox tests."""
+    run_result = SimpleNamespace(stdout="", stderr="", exit_code=0)
+    commands = SimpleNamespace(run=AsyncMock(return_value=run_result))
+    files = SimpleNamespace(write=AsyncMock())
+    return SimpleNamespace(commands=commands, files=files)
+
+
+class TestBridgeToSandbox:
+    @pytest.mark.asyncio
+    async def test_happy_path_small_file(self, tmp_path):
+        """A small file is bridged to /tmp/<basename> via _sandbox_write."""
+        f = tmp_path / "result.json"
+        f.write_text('{"ok": true}')
+        sandbox = _make_bridge_sandbox()
+
+        await _bridge_to_sandbox(sandbox, str(f), offset=0, limit=2000)
+
+        sandbox.commands.run.assert_called_once()
+        cmd = sandbox.commands.run.call_args[0][0]
+        assert "result.json" in cmd
+        sandbox.files.write.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_skip_when_offset_nonzero(self, tmp_path):
+        """Bridging is skipped when offset != 0 (partial read)."""
+        f = tmp_path / "data.txt"
+        f.write_text("content")
+        sandbox = _make_bridge_sandbox()
+
+        await _bridge_to_sandbox(sandbox, str(f), offset=10, limit=2000)
+
+        sandbox.commands.run.assert_not_called()
+        sandbox.files.write.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_skip_when_limit_too_small(self, tmp_path):
+        """Bridging is skipped when limit < 2000 (partial read)."""
+        f = tmp_path / "data.txt"
+        f.write_text("content")
+        sandbox = _make_bridge_sandbox()
+
+        await _bridge_to_sandbox(sandbox, str(f), offset=0, limit=100)
+
+        sandbox.commands.run.assert_not_called()
+        sandbox.files.write.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_nonexistent_file_does_not_raise(self, tmp_path):
+        """Bridging a non-existent file logs but does not propagate errors."""
+        sandbox = _make_bridge_sandbox()
+
+        await _bridge_to_sandbox(
+            sandbox, str(tmp_path / "ghost.txt"), offset=0, limit=2000
+        )
+
+        sandbox.commands.run.assert_not_called()
+        sandbox.files.write.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_sandbox_write_failure_does_not_raise(self, tmp_path):
+        """If sandbox write fails, the error is swallowed (best-effort)."""
+        f = tmp_path / "data.txt"
+        f.write_text("content")
+        sandbox = _make_bridge_sandbox()
+        sandbox.commands.run.side_effect = RuntimeError("E2B timeout")
+
+        await _bridge_to_sandbox(sandbox, str(f), offset=0, limit=2000)
+
+    @pytest.mark.asyncio
+    async def test_large_file_uses_files_api(self, tmp_path):
+        """Files > 5 MB but <= 50 MB are written to /home/user/ via files.write."""
+        f = tmp_path / "big.json"
+        f.write_bytes(b"x" * (_BRIDGE_SHELL_MAX_BYTES + 1))
+        sandbox = _make_bridge_sandbox()
+
+        await _bridge_to_sandbox(sandbox, str(f), offset=0, limit=2000)
+
+        sandbox.files.write.assert_called_once()
+        call_args = sandbox.files.write.call_args[0]
+        assert call_args[0] == "/home/user/big.json"
+        sandbox.commands.run.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_very_large_file_skipped(self, tmp_path):
+        """Files > 50 MB are skipped entirely."""
+        f = tmp_path / "huge.bin"
+        # Create a sparse file to avoid actually writing 50 MB
+        with open(f, "wb") as fh:
+            fh.seek(_BRIDGE_SKIP_BYTES + 1)
+            fh.write(b"\0")
+        sandbox = _make_bridge_sandbox()
+
+        await _bridge_to_sandbox(sandbox, str(f), offset=0, limit=2000)
+
+        sandbox.commands.run.assert_not_called()
+        sandbox.files.write.assert_not_called()