mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
## Summary Reduce CoPilot per-turn token overhead by systematically trimming tool descriptions, parameter schemas, and system prompt content. All 35 MCP tool schemas are passed on every SDK call — this PR reduces their size. ### Strategy 1. **Tool descriptions**: Trimmed verbose multi-sentence explanations to concise single-sentence summaries while preserving meaning 2. **Parameter schemas**: Shortened parameter descriptions to essential info, removed some `default` values (handled in code) 3. **System prompt**: Condensed `_SHARED_TOOL_NOTES` and storage supplement template in `prompting.py` 4. **Cross-tool references**: Removed duplicate workflow hints (e.g. "call find_block before run_block" appeared in BOTH tools — kept only in the dependent tool). Critical cross-tool references retained (e.g. `continue_run_block` in `run_block`, `fix_agent_graph` in `validate_agent`, `get_doc_page` in `search_docs`, `web_fetch` preference in `browser_navigate`) ### Token Impact | Metric | Before | After | Reduction | |--------|--------|-------|-----------| | System Prompt | ~865 tokens | ~497 tokens | 43% | | Tool Schemas | ~9,744 tokens | ~6,470 tokens | 34% | | **Grand Total** | **~10,609 tokens** | **~6,967 tokens** | **34%** | Saves **~3,642 tokens per conversation turn**. ### Key Decisions - **Mostly description changes**: Tool logic, parameters, and types unchanged. However, some schema-level `default` fields were removed (e.g. `save` in `customize_agent`) — these are machine-readable metadata, not just prose, and may affect LLM behavior. - **Quality preserved**: All descriptions still convey what the tool does and essential usage patterns - **Cross-references trimmed carefully**: Kept prerequisite hints in the dependent tool (run_block mentions find_block) but removed the reverse (find_block no longer mentions run_block). Critical cross-tool guidance retained where removal would degrade model behavior. - **`run_time` description fixed**: Added missing supported values (today, last 30 days, ISO datetime) per review feedback ### Future Optimization The SDK passes all 35 tools on every call. The MCP protocol's `list_tools()` handler supports dynamic tool registration — a follow-up PR could implement lazy tool loading (register core tools + a discovery meta-tool) to further reduce per-turn token cost. ### Changes - Trimmed descriptions across 25 tool files - Condensed `_SHARED_TOOL_NOTES` and `_build_storage_supplement` in `prompting.py` - Fixed `run_time` schema description in `agent_output.py` ### Checklist #### For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan: - [x] All 273 copilot tests pass locally - [x] All 35 tools load and produce valid schemas - [x] Before/after token dumps compared - [x] Formatting passes (`poetry run format`) - [x] CI green
198 lines
7.0 KiB
Python
198 lines
7.0 KiB
Python
"""Bash execution tool — run shell commands on E2B or in a bubblewrap sandbox.
|
|
|
|
When an E2B sandbox is available in the current execution context the command
|
|
runs directly on the remote E2B cloud environment. This means:
|
|
|
|
- **Persistent filesystem**: files survive across turns via HTTP-based sync
|
|
with the sandbox's ``/home/user`` directory (E2B files API), shared with
|
|
SDK Read/Write/Edit tools.
|
|
- **Full internet access**: E2B sandboxes have unrestricted outbound network.
|
|
- **Execution isolation**: E2B provides a fresh, containerised Linux environment.
|
|
|
|
When E2B is *not* configured the tool falls back to **bubblewrap** (bwrap):
|
|
OS-level isolation with a whitelist-only filesystem, no network, and resource
|
|
limits. Requires bubblewrap to be installed (Linux only).
|
|
"""
|
|
|
|
import logging
|
|
import shlex
|
|
from typing import Any
|
|
|
|
from e2b import AsyncSandbox
|
|
from e2b.exceptions import TimeoutException
|
|
|
|
from backend.copilot.context import E2B_WORKDIR, get_current_sandbox
|
|
from backend.copilot.integration_creds import get_integration_env_vars
|
|
from backend.copilot.model import ChatSession
|
|
|
|
from .base import BaseTool
|
|
from .models import BashExecResponse, ErrorResponse, ToolResponseBase
|
|
from .sandbox import get_workspace_dir, has_full_sandbox, run_sandboxed
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class BashExecTool(BaseTool):
|
|
"""Execute Bash commands on E2B or in a bubblewrap sandbox."""
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "bash_exec"
|
|
|
|
@property
|
|
def description(self) -> str:
|
|
return (
|
|
"Execute a Bash command or script. Shares filesystem with SDK file tools. "
|
|
"Useful for scripts, data processing, and package installation. "
|
|
"Killed after timeout (default 30s, max 120s)."
|
|
)
|
|
|
|
@property
|
|
def parameters(self) -> dict[str, Any]:
|
|
return {
|
|
"type": "object",
|
|
"properties": {
|
|
"command": {
|
|
"type": "string",
|
|
"description": "Bash command or script.",
|
|
},
|
|
"timeout": {
|
|
"type": "integer",
|
|
"description": "Max seconds (default 30, max 120).",
|
|
"default": 30,
|
|
},
|
|
},
|
|
"required": ["command"],
|
|
}
|
|
|
|
@property
|
|
def requires_auth(self) -> bool:
|
|
# True because _execute_on_e2b injects user tokens (GH_TOKEN etc.)
|
|
# when user_id is present. Defense-in-depth: ensures only authenticated
|
|
# users reach the token injection path.
|
|
return True
|
|
|
|
async def _execute(
|
|
self,
|
|
user_id: str | None,
|
|
session: ChatSession,
|
|
**kwargs: Any,
|
|
) -> ToolResponseBase:
|
|
"""Run a bash command on E2B (if available) or in a bubblewrap sandbox.
|
|
|
|
Dispatches to :meth:`_execute_on_e2b` when a sandbox is present in the
|
|
current execution context, otherwise falls back to the local bubblewrap
|
|
sandbox. Returns a :class:`BashExecResponse` on success or an
|
|
:class:`ErrorResponse` when the sandbox is unavailable or the command
|
|
is empty.
|
|
"""
|
|
session_id = session.session_id if session else None
|
|
|
|
command: str = (kwargs.get("command") or "").strip()
|
|
timeout: int = int(kwargs.get("timeout", 30))
|
|
|
|
if not command:
|
|
return ErrorResponse(
|
|
message="No command provided.",
|
|
error="empty_command",
|
|
session_id=session_id,
|
|
)
|
|
|
|
sandbox = get_current_sandbox()
|
|
if sandbox is not None:
|
|
return await self._execute_on_e2b(
|
|
sandbox, command, timeout, session_id, user_id
|
|
)
|
|
|
|
# Bubblewrap fallback: local isolated execution.
|
|
if not has_full_sandbox():
|
|
return ErrorResponse(
|
|
message="bash_exec requires bubblewrap sandbox (Linux only).",
|
|
error="sandbox_unavailable",
|
|
session_id=session_id,
|
|
)
|
|
|
|
workspace = get_workspace_dir(session_id or "default")
|
|
|
|
stdout, stderr, exit_code, timed_out = await run_sandboxed(
|
|
command=["bash", "-c", command],
|
|
cwd=workspace,
|
|
timeout=timeout,
|
|
)
|
|
|
|
return BashExecResponse(
|
|
message=(
|
|
"Execution timed out"
|
|
if timed_out
|
|
else f"Command executed (exit {exit_code})"
|
|
),
|
|
stdout=stdout,
|
|
stderr=stderr,
|
|
exit_code=exit_code,
|
|
timed_out=timed_out,
|
|
session_id=session_id,
|
|
)
|
|
|
|
async def _execute_on_e2b(
|
|
self,
|
|
sandbox: AsyncSandbox,
|
|
command: str,
|
|
timeout: int,
|
|
session_id: str | None,
|
|
user_id: str | None = None,
|
|
) -> ToolResponseBase:
|
|
"""Execute *command* on the E2B sandbox via commands.run().
|
|
|
|
Integration tokens (e.g. GH_TOKEN) are injected into the sandbox env
|
|
for any user with connected accounts. E2B has full internet access, so
|
|
CLI tools like ``gh`` work without manual authentication.
|
|
"""
|
|
envs: dict[str, str] = {
|
|
"PATH": "/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin",
|
|
}
|
|
# Collect injected secret values so we can scrub them from output.
|
|
secret_values: list[str] = []
|
|
if user_id is not None:
|
|
integration_env = await get_integration_env_vars(user_id)
|
|
secret_values = [v for v in integration_env.values() if v]
|
|
envs.update(integration_env)
|
|
|
|
try:
|
|
result = await sandbox.commands.run(
|
|
f"bash -c {shlex.quote(command)}",
|
|
cwd=E2B_WORKDIR,
|
|
timeout=timeout,
|
|
envs=envs,
|
|
)
|
|
stdout = result.stdout or ""
|
|
stderr = result.stderr or ""
|
|
# Scrub injected tokens from command output to prevent exfiltration
|
|
# via `echo $GH_TOKEN`, `env`, `printenv`, etc.
|
|
for secret in secret_values:
|
|
stdout = stdout.replace(secret, "[REDACTED]")
|
|
stderr = stderr.replace(secret, "[REDACTED]")
|
|
return BashExecResponse(
|
|
message=f"Command executed on E2B (exit {result.exit_code})",
|
|
stdout=stdout,
|
|
stderr=stderr,
|
|
exit_code=result.exit_code,
|
|
timed_out=False,
|
|
session_id=session_id,
|
|
)
|
|
except Exception as exc:
|
|
if isinstance(exc, TimeoutException):
|
|
return BashExecResponse(
|
|
message="Execution timed out",
|
|
stdout="",
|
|
stderr=f"Timed out after {timeout}s",
|
|
exit_code=-1,
|
|
timed_out=True,
|
|
session_id=session_id,
|
|
)
|
|
logger.error("[E2B] bash_exec failed: %s", exc, exc_info=True)
|
|
return ErrorResponse(
|
|
message=f"E2B execution failed: {exc}",
|
|
error="e2b_execution_error",
|
|
session_id=session_id,
|
|
)
|