fix(backend): lower bridge shell threshold and add collision-free sandbox paths

- Lower _BRIDGE_SHELL_MAX_BYTES from 5 MB to 32 KB to stay within
  ARG_MAX when base64-encoding content for shell transfer.
- Prefix bridged sandbox filenames with a 12-char SHA-256 hash of the
  full source path to prevent collisions when different source files
  share the same basename (e.g. multiple result.json files).
- Fix potential NameError in exception handler when basename is not yet
  assigned.
This commit is contained in:
Zamil Majdy
2026-04-02 09:07:43 +02:00
parent 015e0d591e
commit dd34b0dc48
2 changed files with 35 additions and 14 deletions

View File

@@ -8,6 +8,7 @@ SDK-internal paths (``~/.claude/projects/…/tool-results/``) are handled
by the separate ``Read`` MCP tool registered in ``tool_adapter.py``.
"""
import hashlib
import itertools
import json
import logging
@@ -317,8 +318,9 @@ async def _handle_grep(args: dict[str, Any]) -> dict[str, Any]:
# Files larger than this are written to /home/user/ via sandbox.files.write()
# instead of /tmp/ via shell base64, to avoid shell argument length limits
# and E2B command timeouts.
_BRIDGE_SHELL_MAX_BYTES = 5 * 1024 * 1024 # 5 MB
# and E2B command timeouts. Base64 expands content by ~33%, so keep this
# well under the typical Linux ARG_MAX (128 KB).
_BRIDGE_SHELL_MAX_BYTES = 32 * 1024 # 32 KB
# Files larger than this are skipped entirely to avoid excessive transfer times.
_BRIDGE_SKIP_BYTES = 50 * 1024 * 1024 # 50 MB
@@ -338,16 +340,23 @@ async def _bridge_to_sandbox(
Returns the sandbox path on success, or ``None`` on skip/failure.
Size handling:
- <= 5 MB: written to ``/tmp/<basename>`` via shell base64 (``_sandbox_write``).
- 5-50 MB: written to ``/home/user/<basename>`` via ``sandbox.files.write()``
to avoid shell argument length limits.
- <= 32 KB: written to ``/tmp/<hash>-<basename>`` via shell base64
(``_sandbox_write``). Kept small to stay within ARG_MAX.
- 32 KB - 50 MB: written to ``/home/user/<hash>-<basename>`` via
``sandbox.files.write()`` to avoid shell argument length limits.
- > 50 MB: skipped entirely with a warning.
The sandbox filename is prefixed with a short hash of the full source
path to avoid collisions when different source files share the same
basename (e.g. multiple ``result.json`` files).
"""
if offset != 0 or limit < 2000:
return None
basename = os.path.basename(file_path)
try:
expanded = os.path.realpath(os.path.expanduser(file_path))
basename = os.path.basename(expanded)
source_id = hashlib.sha256(expanded.encode()).hexdigest()[:12]
unique_name = f"{source_id}-{basename}"
file_size = os.path.getsize(expanded)
if file_size > _BRIDGE_SKIP_BYTES:
logger.warning(
@@ -359,12 +368,12 @@ async def _bridge_to_sandbox(
with open(expanded, "rb") as fh:
content = fh.read()
if file_size <= _BRIDGE_SHELL_MAX_BYTES:
sandbox_path = f"/tmp/{basename}"
sandbox_path = f"/tmp/{unique_name}"
await _sandbox_write(
sandbox, sandbox_path, content.decode("utf-8", errors="replace")
)
else:
sandbox_path = f"/home/user/{basename}"
sandbox_path = f"/home/user/{unique_name}"
await sandbox.files.write(
sandbox_path, content.decode("utf-8", errors="replace")
)
@@ -375,7 +384,7 @@ async def _bridge_to_sandbox(
except Exception:
logger.debug(
"[E2B] Failed to bridge SDK file to sandbox: %s",
basename,
file_path,
exc_info=True,
)
return None

View File

@@ -3,6 +3,7 @@
Pure unit tests with no external dependencies (no E2B, no sandbox).
"""
import hashlib
import os
import shutil
from types import SimpleNamespace
@@ -22,6 +23,15 @@ from .e2b_file_tools import (
resolve_sandbox_path,
)
def _expected_bridge_path(file_path: str, prefix: str = "/tmp") -> str:
"""Compute the expected sandbox path for a bridged file."""
expanded = os.path.realpath(os.path.expanduser(file_path))
basename = os.path.basename(expanded)
source_id = hashlib.sha256(expanded.encode()).hexdigest()[:12]
return f"{prefix}/{source_id}-{basename}"
# ---------------------------------------------------------------------------
# resolve_sandbox_path — sandbox path normalisation & boundary enforcement
# ---------------------------------------------------------------------------
@@ -375,14 +385,15 @@ def _make_bridge_sandbox() -> SimpleNamespace:
class TestBridgeToSandbox:
@pytest.mark.asyncio
async def test_happy_path_small_file(self, tmp_path):
"""A small file is bridged to /tmp/<basename> via _sandbox_write."""
"""A small file is bridged to /tmp/<hash>-<basename> via _sandbox_write."""
f = tmp_path / "result.json"
f.write_text('{"ok": true}')
sandbox = _make_bridge_sandbox()
result = await _bridge_to_sandbox(sandbox, str(f), offset=0, limit=2000)
assert result == "/tmp/result.json"
expected = _expected_bridge_path(str(f))
assert result == expected
sandbox.commands.run.assert_called_once()
cmd = sandbox.commands.run.call_args[0][0]
assert "result.json" in cmd
@@ -439,17 +450,18 @@ class TestBridgeToSandbox:
@pytest.mark.asyncio
async def test_large_file_uses_files_api(self, tmp_path):
"""Files > 5 MB but <= 50 MB are written to /home/user/ via files.write."""
"""Files > 32 KB but <= 50 MB are written to /home/user/ via files.write."""
f = tmp_path / "big.json"
f.write_bytes(b"x" * (_BRIDGE_SHELL_MAX_BYTES + 1))
sandbox = _make_bridge_sandbox()
result = await _bridge_to_sandbox(sandbox, str(f), offset=0, limit=2000)
assert result == "/home/user/big.json"
expected = _expected_bridge_path(str(f), prefix="/home/user")
assert result == expected
sandbox.files.write.assert_called_once()
call_args = sandbox.files.write.call_args[0]
assert call_args[0] == "/home/user/big.json"
assert call_args[0] == expected
sandbox.commands.run.assert_not_called()
@pytest.mark.asyncio