fix(backend/copilot): address review feedback - size limits, prompting, tests

- Move E2B-specific bridging text from shared prompt section to E2B
  supplement's extra_notes (MAJOR 1)
- Add size cap to _bridge_to_sandbox: <=5MB uses shell base64 to /tmp,
  5-50MB uses sandbox.files.write to /home/user, >50MB skipped (MAJOR 2)
- Add 7 unit tests for _bridge_to_sandbox covering happy path, skip
  conditions, error handling, and size-based routing (MINOR 3)
- Fix inaccurate comment about tool-outputs name origin (NIT 7)
- Update is_allowed_local_path docstring to mention tool-outputs (NIT 9)
- Add prompting guidance for handling base64 images in tool outputs
  (save to workspace, show via download URL)
- Add prompting guidance for using @@agptfile: references instead of
  copy-pasting large data between tools
- Add no-op server/graph_cleanup fixtures to sdk/conftest.py so SDK
  unit tests don't require Postgres
This commit is contained in:
Zamil Majdy
2026-04-02 07:56:49 +02:00
parent f3dd708cf6
commit 66afca6e0c
5 changed files with 173 additions and 10 deletions

View File

@@ -149,7 +149,8 @@ def is_allowed_local_path(path: str, sdk_cwd: str | None = None) -> bool:
Allowed:
- Files under *sdk_cwd* (``/tmp/copilot-<session>/``)
- Files under ``~/.claude/projects/<encoded-cwd>/<uuid>/tool-results/...``.
- Files under ``~/.claude/projects/<encoded-cwd>/<uuid>/tool-results/...``
or ``tool-outputs/...``.
The SDK nests tool-results under a conversation UUID directory;
the UUID segment is validated with ``_UUID_RE``.
"""
@@ -178,7 +179,8 @@ def is_allowed_local_path(path: str, sdk_cwd: str | None = None) -> bool:
# The SDK always creates a conversation UUID directory between
# the project dir and the tool directory.
# Accept both "tool-results" (SDK's persisted outputs) and
# "tool-outputs" (alternate name used by some SDK versions).
# "tool-outputs" (the model sometimes confuses workspace paths
# with filesystem paths and generates this variant).
if resolved.startswith(project_dir + os.sep):
relative = resolved[len(project_dir) + 1 :]
parts = relative.split(os.sep)

View File

@@ -18,6 +18,18 @@ After `write_workspace_file`, embed the `download_url` in Markdown:
- Image: `![chart](workspace://file_id#image/png)`
- Video: `![recording](workspace://file_id#video/mp4)`
### Handling binary/image data in tool outputs — CRITICAL
When a tool output contains base64-encoded binary data (images, PDFs, etc.):
1. **NEVER** try to inline or render the base64 content in your response.
2. **Save** the data to workspace using `write_workspace_file` (pass the base64 data URI as content).
3. **Show** the result via the workspace download URL in Markdown: `![image](workspace://file_id#image/png)`.
### Passing large data between tools — CRITICAL
When tool outputs produce large text that you need to feed into another tool:
- **NEVER** copy-paste the full text into the next tool call argument.
- **Save** the output to a file (workspace or local), then use `@@agptfile:` references.
- This avoids token limits and ensures data integrity.
### File references — @@agptfile:
Pass large file content to tools by reference: `@@agptfile:<uri>[<start>-<end>]`
- `workspace://<file_id>` or `workspace:///<path>` — workspace files
@@ -138,6 +150,10 @@ parent autopilot handles orchestration.
# E2B-only notes — E2B has full internet access so gh CLI works there.
# Not shown in local (bubblewrap) mode: --unshare-net blocks all network.
_E2B_TOOL_NOTES = """
### SDK tool-result files in E2B
When you `Read` an SDK tool-result file, it is automatically copied into the
sandbox at `/tmp/<filename>` so `bash_exec` can access it for further processing.
### GitHub CLI (`gh`) and git
- If the user has connected their GitHub account, both `gh` and `git` are
pre-authenticated — use them directly without any manual login step.
@@ -212,9 +228,7 @@ Important files (code, configs, outputs) should be saved to workspace to ensure
### SDK tool-result files
When tool outputs are large, the SDK truncates them and saves the full output to
a local file under `~/.claude/projects/.../tool-results/` (or `tool-outputs/`).
To read these files, use `Read` — it reads from the host filesystem and
automatically copies the file into the sandbox at `/tmp/<filename>` so
`bash_exec` can access it for further processing.
To read these files, use `Read` — it reads from the host filesystem.
### Large tool outputs saved to workspace
When a tool output contains `<tool-output-truncated workspace_path="...">`, the

View File

@@ -6,10 +6,23 @@ from unittest.mock import patch
from uuid import uuid4
import pytest
import pytest_asyncio
from backend.util import json
@pytest_asyncio.fixture(scope="session", loop_scope="session")
async def server(): # type: ignore[override]
"""No-op server stub — SDK tests don't need the full backend."""
return None
@pytest_asyncio.fixture(scope="session", loop_scope="session", autouse=True)
async def graph_cleanup(): # type: ignore[override]
"""No-op graph cleanup stub."""
yield
@pytest.fixture()
def mock_chat_config():
"""Mock ChatConfig so compact_transcript tests skip real config lookup."""

View File

@@ -311,6 +311,13 @@ async def _handle_grep(args: dict[str, Any]) -> dict[str, Any]:
# Bridging: copy SDK-internal files into E2B sandbox
# Files larger than this are written to /home/user/ via sandbox.files.write()
# instead of /tmp/ via shell base64, to avoid shell argument length limits
# and E2B command timeouts.
_BRIDGE_SHELL_MAX_BYTES = 5 * 1024 * 1024 # 5 MB
# Files larger than this are skipped entirely to avoid excessive transfer times.
_BRIDGE_SKIP_BYTES = 50 * 1024 * 1024 # 50 MB
async def _bridge_to_sandbox(
sandbox: Any, file_path: str, offset: int, limit: int
@@ -318,23 +325,43 @@ async def _bridge_to_sandbox(
"""Best-effort copy of a host-side SDK file into the E2B sandbox.
When the model reads an SDK-internal file (e.g. tool-results), it often
wants to process the data with bash. Copying the file into ``/tmp/``
wants to process the data with bash. Copying the file into the sandbox
under a stable name lets ``bash_exec`` access it without extra steps.
Only copies when offset=0 and limit is large enough to indicate the model
wants the full file. Errors are logged but never propagated.
Size handling:
- <= 5 MB: written to ``/tmp/<basename>`` via shell base64 (``_sandbox_write``).
- 5-50 MB: written to ``/home/user/<basename>`` via ``sandbox.files.write()``
to avoid shell argument length limits.
- > 50 MB: skipped entirely with a warning.
"""
if offset != 0 or limit < 2000:
return
basename = os.path.basename(file_path)
sandbox_path = f"/tmp/{basename}"
try:
expanded = os.path.realpath(os.path.expanduser(file_path))
file_size = os.path.getsize(expanded)
if file_size > _BRIDGE_SKIP_BYTES:
logger.warning(
"[E2B] Skipping bridge for large file (%d bytes): %s",
file_size,
basename,
)
return
with open(expanded, "rb") as fh:
content = fh.read()
await _sandbox_write(
sandbox, sandbox_path, content.decode("utf-8", errors="replace")
)
if file_size <= _BRIDGE_SHELL_MAX_BYTES:
sandbox_path = f"/tmp/{basename}"
await _sandbox_write(
sandbox, sandbox_path, content.decode("utf-8", errors="replace")
)
else:
sandbox_path = f"/home/user/{basename}"
await sandbox.files.write(
sandbox_path, content.decode("utf-8", errors="replace")
)
logger.info(
"[E2B] Bridged SDK file to sandbox: %s -> %s", basename, sandbox_path
)

View File

@@ -13,6 +13,9 @@ import pytest
from backend.copilot.context import E2B_WORKDIR, SDK_PROJECTS_DIR, _current_project_dir
from .e2b_file_tools import (
_BRIDGE_SHELL_MAX_BYTES,
_BRIDGE_SKIP_BYTES,
_bridge_to_sandbox,
_check_sandbox_symlink_escape,
_read_local,
_sandbox_write,
@@ -354,3 +357,107 @@ class TestSandboxWrite:
encoded_in_cmd = call_args.split("echo ")[1].split(" |")[0].strip("'")
decoded = base64.b64decode(encoded_in_cmd).decode()
assert decoded == content
# ---------------------------------------------------------------------------
# _bridge_to_sandbox — copy SDK-internal files into E2B sandbox
# ---------------------------------------------------------------------------
def _make_bridge_sandbox() -> SimpleNamespace:
"""Build a sandbox mock suitable for _bridge_to_sandbox tests."""
run_result = SimpleNamespace(stdout="", stderr="", exit_code=0)
commands = SimpleNamespace(run=AsyncMock(return_value=run_result))
files = SimpleNamespace(write=AsyncMock())
return SimpleNamespace(commands=commands, files=files)
class TestBridgeToSandbox:
@pytest.mark.asyncio
async def test_happy_path_small_file(self, tmp_path):
"""A small file is bridged to /tmp/<basename> via _sandbox_write."""
f = tmp_path / "result.json"
f.write_text('{"ok": true}')
sandbox = _make_bridge_sandbox()
await _bridge_to_sandbox(sandbox, str(f), offset=0, limit=2000)
sandbox.commands.run.assert_called_once()
cmd = sandbox.commands.run.call_args[0][0]
assert "result.json" in cmd
sandbox.files.write.assert_not_called()
@pytest.mark.asyncio
async def test_skip_when_offset_nonzero(self, tmp_path):
"""Bridging is skipped when offset != 0 (partial read)."""
f = tmp_path / "data.txt"
f.write_text("content")
sandbox = _make_bridge_sandbox()
await _bridge_to_sandbox(sandbox, str(f), offset=10, limit=2000)
sandbox.commands.run.assert_not_called()
sandbox.files.write.assert_not_called()
@pytest.mark.asyncio
async def test_skip_when_limit_too_small(self, tmp_path):
"""Bridging is skipped when limit < 2000 (partial read)."""
f = tmp_path / "data.txt"
f.write_text("content")
sandbox = _make_bridge_sandbox()
await _bridge_to_sandbox(sandbox, str(f), offset=0, limit=100)
sandbox.commands.run.assert_not_called()
sandbox.files.write.assert_not_called()
@pytest.mark.asyncio
async def test_nonexistent_file_does_not_raise(self, tmp_path):
"""Bridging a non-existent file logs but does not propagate errors."""
sandbox = _make_bridge_sandbox()
await _bridge_to_sandbox(
sandbox, str(tmp_path / "ghost.txt"), offset=0, limit=2000
)
sandbox.commands.run.assert_not_called()
sandbox.files.write.assert_not_called()
@pytest.mark.asyncio
async def test_sandbox_write_failure_does_not_raise(self, tmp_path):
"""If sandbox write fails, the error is swallowed (best-effort)."""
f = tmp_path / "data.txt"
f.write_text("content")
sandbox = _make_bridge_sandbox()
sandbox.commands.run.side_effect = RuntimeError("E2B timeout")
await _bridge_to_sandbox(sandbox, str(f), offset=0, limit=2000)
@pytest.mark.asyncio
async def test_large_file_uses_files_api(self, tmp_path):
"""Files > 5 MB but <= 50 MB are written to /home/user/ via files.write."""
f = tmp_path / "big.json"
f.write_bytes(b"x" * (_BRIDGE_SHELL_MAX_BYTES + 1))
sandbox = _make_bridge_sandbox()
await _bridge_to_sandbox(sandbox, str(f), offset=0, limit=2000)
sandbox.files.write.assert_called_once()
call_args = sandbox.files.write.call_args[0]
assert call_args[0] == "/home/user/big.json"
sandbox.commands.run.assert_not_called()
@pytest.mark.asyncio
async def test_very_large_file_skipped(self, tmp_path):
"""Files > 50 MB are skipped entirely."""
f = tmp_path / "huge.bin"
# Create a sparse file to avoid actually writing 50 MB
with open(f, "wb") as fh:
fh.seek(_BRIDGE_SKIP_BYTES + 1)
fh.write(b"\0")
sandbox = _make_bridge_sandbox()
await _bridge_to_sandbox(sandbox, str(f), offset=0, limit=2000)
sandbox.commands.run.assert_not_called()
sandbox.files.write.assert_not_called()