fix(backend): lower bridge shell threshold and add collision-free sandbox paths

- Lower _BRIDGE_SHELL_MAX_BYTES from 5 MB to 32 KB to stay within ARG_MAX when base64-encoding content for shell transfer. - Prefix bridged sandbox filenames with a 12-char SHA-256 hash of the full source path to prevent collisions when different source files share the same basename (e.g. multiple result.json files). - Fix potential NameError in exception handler when basename is not yet assigned.
2026-04-08 03:00:28 -04:00 · 2026-04-02 09:07:43 +02:00
parent 015e0d591e
commit dd34b0dc48
2 changed files with 35 additions and 14 deletions
--- a/autogpt_platform/backend/backend/copilot/sdk/e2b_file_tools.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/e2b_file_tools.py
@@ -8,6 +8,7 @@ SDK-internal paths (``~/.claude/projects/…/tool-results/``) are handled
 by the separate ``Read`` MCP tool registered in ``tool_adapter.py``.
 """

+import hashlib
 import itertools
 import json
 import logging
@@ -317,8 +318,9 @@ async def _handle_grep(args: dict[str, Any]) -> dict[str, Any]:

 # Files larger than this are written to /home/user/ via sandbox.files.write()
 # instead of /tmp/ via shell base64, to avoid shell argument length limits
-# and E2B command timeouts.
-_BRIDGE_SHELL_MAX_BYTES = 5 * 1024 * 1024  # 5 MB
+# and E2B command timeouts.  Base64 expands content by ~33%, so keep this
+# well under the typical Linux ARG_MAX (128 KB).
+_BRIDGE_SHELL_MAX_BYTES = 32 * 1024  # 32 KB
 # Files larger than this are skipped entirely to avoid excessive transfer times.
 _BRIDGE_SKIP_BYTES = 50 * 1024 * 1024  # 50 MB

@@ -338,16 +340,23 @@ async def _bridge_to_sandbox(
    Returns the sandbox path on success, or ``None`` on skip/failure.

    Size handling:
-    - <= 5 MB: written to ``/tmp/<basename>`` via shell base64 (``_sandbox_write``).
-    - 5-50 MB: written to ``/home/user/<basename>`` via ``sandbox.files.write()``
-      to avoid shell argument length limits.
+    - <= 32 KB: written to ``/tmp/<hash>-<basename>`` via shell base64
+      (``_sandbox_write``).  Kept small to stay within ARG_MAX.
+    - 32 KB - 50 MB: written to ``/home/user/<hash>-<basename>`` via
+      ``sandbox.files.write()`` to avoid shell argument length limits.
    - > 50 MB: skipped entirely with a warning.
+
+    The sandbox filename is prefixed with a short hash of the full source
+    path to avoid collisions when different source files share the same
+    basename (e.g. multiple ``result.json`` files).
    """
    if offset != 0 or limit < 2000:
        return None
-    basename = os.path.basename(file_path)
    try:
        expanded = os.path.realpath(os.path.expanduser(file_path))
+        basename = os.path.basename(expanded)
+        source_id = hashlib.sha256(expanded.encode()).hexdigest()[:12]
+        unique_name = f"{source_id}-{basename}"
        file_size = os.path.getsize(expanded)
        if file_size > _BRIDGE_SKIP_BYTES:
            logger.warning(
@@ -359,12 +368,12 @@ async def _bridge_to_sandbox(
        with open(expanded, "rb") as fh:
            content = fh.read()
        if file_size <= _BRIDGE_SHELL_MAX_BYTES:
-            sandbox_path = f"/tmp/{basename}"
+            sandbox_path = f"/tmp/{unique_name}"
            await _sandbox_write(
                sandbox, sandbox_path, content.decode("utf-8", errors="replace")
            )
        else:
-            sandbox_path = f"/home/user/{basename}"
+            sandbox_path = f"/home/user/{unique_name}"
            await sandbox.files.write(
                sandbox_path, content.decode("utf-8", errors="replace")
            )
@@ -375,7 +384,7 @@ async def _bridge_to_sandbox(
    except Exception:
        logger.debug(
            "[E2B] Failed to bridge SDK file to sandbox: %s",
-            basename,
+            file_path,
            exc_info=True,
        )
        return None
--- a/autogpt_platform/backend/backend/copilot/sdk/e2b_file_tools_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/e2b_file_tools_test.py
@@ -3,6 +3,7 @@
 Pure unit tests with no external dependencies (no E2B, no sandbox).
 """

+import hashlib
 import os
 import shutil
 from types import SimpleNamespace
@@ -22,6 +23,15 @@ from .e2b_file_tools import (
    resolve_sandbox_path,
 )

+
+def _expected_bridge_path(file_path: str, prefix: str = "/tmp") -> str:
+    """Compute the expected sandbox path for a bridged file."""
+    expanded = os.path.realpath(os.path.expanduser(file_path))
+    basename = os.path.basename(expanded)
+    source_id = hashlib.sha256(expanded.encode()).hexdigest()[:12]
+    return f"{prefix}/{source_id}-{basename}"
+
+
 # ---------------------------------------------------------------------------
 # resolve_sandbox_path — sandbox path normalisation & boundary enforcement
 # ---------------------------------------------------------------------------
@@ -375,14 +385,15 @@ def _make_bridge_sandbox() -> SimpleNamespace:
 class TestBridgeToSandbox:
    @pytest.mark.asyncio
    async def test_happy_path_small_file(self, tmp_path):
-        """A small file is bridged to /tmp/<basename> via _sandbox_write."""
+        """A small file is bridged to /tmp/<hash>-<basename> via _sandbox_write."""
        f = tmp_path / "result.json"
        f.write_text('{"ok": true}')
        sandbox = _make_bridge_sandbox()

        result = await _bridge_to_sandbox(sandbox, str(f), offset=0, limit=2000)

-        assert result == "/tmp/result.json"
+        expected = _expected_bridge_path(str(f))
+        assert result == expected
        sandbox.commands.run.assert_called_once()
        cmd = sandbox.commands.run.call_args[0][0]
        assert "result.json" in cmd
@@ -439,17 +450,18 @@ class TestBridgeToSandbox:

    @pytest.mark.asyncio
    async def test_large_file_uses_files_api(self, tmp_path):
-        """Files > 5 MB but <= 50 MB are written to /home/user/ via files.write."""
+        """Files > 32 KB but <= 50 MB are written to /home/user/ via files.write."""
        f = tmp_path / "big.json"
        f.write_bytes(b"x" * (_BRIDGE_SHELL_MAX_BYTES + 1))
        sandbox = _make_bridge_sandbox()

        result = await _bridge_to_sandbox(sandbox, str(f), offset=0, limit=2000)

-        assert result == "/home/user/big.json"
+        expected = _expected_bridge_path(str(f), prefix="/home/user")
+        assert result == expected
        sandbox.files.write.assert_called_once()
        call_args = sandbox.files.write.call_args[0]
-        assert call_args[0] == "/home/user/big.json"
+        assert call_args[0] == expected
        sandbox.commands.run.assert_not_called()

    @pytest.mark.asyncio