From 1dd53db21c6b30c480f9d4253534a10985e23672 Mon Sep 17 00:00:00 2001 From: Zamil Majdy Date: Thu, 12 Feb 2026 21:44:40 +0400 Subject: [PATCH] feat(chat/sandbox): bubblewrap sandbox for bash_exec, remove python_exec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace `--ro-bind / /` with whitelist-only filesystem: only /usr, /etc, /bin, /lib, /sbin mounted read-only. /app, /root, /home, /opt, /var are completely invisible inside the sandbox. - Add `--clearenv` to wipe all inherited env vars (API keys, DB passwords). Only safe vars (PATH, HOME=workspace, LANG) are explicitly set. - Remove python_exec tool — bash_exec can run `python3 -c` or heredocs with identical bubblewrap protection, reducing attack surface. - Remove all fallback security code (import hooks, blocked modules, network command lists). Tools now hard-require bubblewrap — disabled on platforms without bwrap. - Clean up security_hooks.py: remove ~200 lines of dead bash validation code, add Bash to BLOCKED_TOOLS as defence-in-depth. - Wire up long-running tool callback in SDK service for create_agent/edit_agent delegation to Redis Streams background infrastructure. --- autogpt_platform/backend/Dockerfile | 6 +- .../api/features/chat/sdk/security_hooks.py | 157 ++--------------- .../backend/api/features/chat/sdk/service.py | 148 ++++++++++++++-- .../api/features/chat/sdk/tool_adapter.py | 160 ++++------------- .../api/features/chat/tools/__init__.py | 4 +- .../api/features/chat/tools/bash_exec.py | 97 ++++------- .../backend/api/features/chat/tools/models.py | 11 -- .../api/features/chat/tools/python_exec.py | 162 ------------------ .../api/features/chat/tools/sandbox.py | 153 +++++++++++++---- .../backend/test/chat/__init__.py | 0 .../backend/test/chat/test_security_hooks.py | 133 ++++++++++++++ 11 files changed, 481 insertions(+), 550 deletions(-) delete mode 100644 autogpt_platform/backend/backend/api/features/chat/tools/python_exec.py create mode 100644 autogpt_platform/backend/test/chat/__init__.py create mode 100644 autogpt_platform/backend/test/chat/test_security_hooks.py diff --git a/autogpt_platform/backend/Dockerfile b/autogpt_platform/backend/Dockerfile index d5a216927c..ba5d04d056 100644 --- a/autogpt_platform/backend/Dockerfile +++ b/autogpt_platform/backend/Dockerfile @@ -62,8 +62,9 @@ ENV POETRY_HOME=/opt/poetry \ DEBIAN_FRONTEND=noninteractive ENV PATH=/opt/poetry/bin:$PATH -# Install Python, FFmpeg, ImageMagick, and CLI tools for agent use -# CLI tools match ALLOWED_BASH_COMMANDS in security_hooks.py +# Install Python, FFmpeg, ImageMagick, and CLI tools for agent use. +# bubblewrap provides OS-level sandbox (whitelist-only FS + no network) +# for the bash_exec MCP tool. RUN apt-get update && apt-get install -y \ python3.13 \ python3-pip \ @@ -72,6 +73,7 @@ RUN apt-get update && apt-get install -y \ jq \ ripgrep \ tree \ + bubblewrap \ && rm -rf /var/lib/apt/lists/* # Copy only necessary files from builder diff --git a/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks.py b/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks.py index 245a13407b..82e76924a0 100644 --- a/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks.py +++ b/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks.py @@ -8,15 +8,18 @@ import json import logging import os import re -import shlex from typing import Any, cast from backend.api.features.chat.sdk.tool_adapter import MCP_TOOL_PREFIX logger = logging.getLogger(__name__) -# Tools that are blocked entirely (CLI/system access) +# Tools that are blocked entirely (CLI/system access). +# "Bash" (capital) is the SDK built-in — it's NOT in allowed_tools but blocked +# here as defence-in-depth. The agent uses mcp__copilot__bash_exec instead, +# which has kernel-level network isolation (unshare --net). BLOCKED_TOOLS = { + "Bash", "bash", "shell", "exec", @@ -24,66 +27,11 @@ BLOCKED_TOOLS = { "command", } -# Safe read-only commands allowed in the sandboxed Bash tool. -# These are data-processing / inspection utilities — no writes, no network. -ALLOWED_BASH_COMMANDS = { - # JSON / structured data - "jq", - # Text processing - "grep", - "egrep", - "fgrep", - "rg", - "head", - "tail", - "cat", - "wc", - "sort", - "uniq", - "cut", - "tr", - "sed", - "awk", - "column", - "fold", - "fmt", - "nl", - "paste", - "rev", - # File inspection (read-only) - "find", - "ls", - "file", - "stat", - "du", - "tree", - "basename", - "dirname", - "realpath", - # Utilities - "echo", - "printf", - "date", - "true", - "false", - "xargs", - "tee", - # Comparison / encoding - "diff", - "comm", - "base64", - "md5sum", - "sha256sum", -} - # Tools allowed only when their path argument stays within the SDK workspace. # The SDK uses these to handle oversized tool results (writes to tool-results/ # files, then reads them back) and for workspace file operations. WORKSPACE_SCOPED_TOOLS = {"Read", "Write", "Edit", "Glob", "Grep"} -# Tools that get sandboxed Bash validation (command allowlist + workspace paths). -SANDBOXED_BASH_TOOLS = {"Bash"} - # Dangerous patterns in tool inputs DANGEROUS_PATTERNS = [ r"sudo", @@ -144,88 +92,11 @@ def _validate_workspace_path( f"Blocked {tool_name} outside workspace: {path} (resolved={resolved})" ) return _deny( - f"Tool '{tool_name}' can only access files within the workspace directory." + f"[SECURITY] Tool '{tool_name}' can only access files within the workspace " + "directory. This is enforced by the platform and cannot be bypassed." ) -def _validate_bash_command( - tool_input: dict[str, Any], sdk_cwd: str | None -) -> dict[str, Any]: - """Validate a Bash command against the allowlist of safe commands. - - Only read-only data-processing commands are allowed (jq, grep, head, etc.). - Blocks command substitution, output redirection, and disallowed executables. - - Uses ``shlex.split`` to properly handle quoted strings (e.g. jq filters - containing ``|`` won't be mistaken for shell pipes). - """ - command = tool_input.get("command", "") - if not command or not isinstance(command, str): - return _deny("Bash command is empty.") - - # Block command substitution — can smuggle arbitrary commands - if "$(" in command or "`" in command: - return _deny("Command substitution ($() or ``) is not allowed in Bash.") - - # Block output redirection — Bash should be read-only. - # Strip quoted strings first so `jq '.x > 5'` isn't a false positive, - # then check for unquoted > or >> (with or without surrounding spaces). - unquoted = re.sub(r"'[^']*'|\"[^\"]*\"", "", command) - if re.search(r"(?{1,2}", unquoted): - return _deny("Output redirection (> or >>) is not allowed in Bash.") - - # Block /dev/ access (e.g., /dev/tcp for network) - if "/dev/" in command: - return _deny("Access to /dev/ is not allowed in Bash.") - - # Tokenize with shlex (respects quotes), then extract command names. - # shlex preserves shell operators like | ; && || as separate tokens. - try: - tokens = shlex.split(command) - except ValueError: - return _deny("Malformed command (unmatched quotes).") - - # Walk tokens: the first non-assignment token after a pipe/separator is a command. - expect_command = True - for token in tokens: - if token in ("|", "||", "&&", ";"): - expect_command = True - continue - if expect_command: - # Skip env var assignments (VAR=value) - if "=" in token and not token.startswith("-"): - continue - cmd_name = os.path.basename(token) - if cmd_name not in ALLOWED_BASH_COMMANDS: - allowed = ", ".join(sorted(ALLOWED_BASH_COMMANDS)) - logger.warning(f"Blocked Bash command: {cmd_name}") - return _deny( - f"Command '{cmd_name}' is not allowed. " - f"Allowed commands: {allowed}" - ) - expect_command = False - - # Validate absolute file paths stay within workspace - if sdk_cwd: - norm_cwd = os.path.normpath(sdk_cwd) - claude_dir = os.path.normpath(os.path.expanduser("~/.claude/projects")) - for token in tokens: - if not token.startswith("/"): - continue - resolved = os.path.normpath(token) - if resolved.startswith(norm_cwd + os.sep) or resolved == norm_cwd: - continue - if resolved.startswith(claude_dir + os.sep) and "tool-results" in resolved: - continue - logger.warning(f"Blocked Bash path outside workspace: {token}") - return _deny( - f"Bash can only access files within the workspace directory. " - f"Path '{token}' is outside the workspace." - ) - - return {} - - def _validate_tool_access( tool_name: str, tool_input: dict[str, Any], sdk_cwd: str | None = None ) -> dict[str, Any]: @@ -238,14 +109,11 @@ def _validate_tool_access( if tool_name in BLOCKED_TOOLS: logger.warning(f"Blocked tool access attempt: {tool_name}") return _deny( - f"Tool '{tool_name}' is not available. " - "Use the CoPilot-specific tools instead." + f"[SECURITY] Tool '{tool_name}' is blocked for security. " + "This is enforced by the platform and cannot be bypassed. " + "Use the CoPilot-specific MCP tools instead." ) - # Sandboxed Bash: only allowlisted commands, workspace-scoped paths - if tool_name in SANDBOXED_BASH_TOOLS: - return _validate_bash_command(tool_input, sdk_cwd) - # Workspace-scoped tools: allowed only within the SDK workspace directory if tool_name in WORKSPACE_SCOPED_TOOLS: return _validate_workspace_path(tool_name, tool_input, sdk_cwd) @@ -259,7 +127,10 @@ def _validate_tool_access( logger.warning( f"Blocked dangerous pattern in tool input: {pattern} in {tool_name}" ) - return _deny("Input contains blocked pattern") + return _deny( + "[SECURITY] Input contains a blocked pattern. " + "This is enforced by the platform and cannot be bypassed." + ) return {} diff --git a/autogpt_platform/backend/backend/api/features/chat/sdk/service.py b/autogpt_platform/backend/backend/api/features/chat/sdk/service.py index 8ba92bf44f..cdcb00ec5f 100644 --- a/autogpt_platform/backend/backend/api/features/chat/sdk/service.py +++ b/autogpt_platform/backend/backend/api/features/chat/sdk/service.py @@ -10,6 +10,7 @@ from typing import Any from backend.util.exceptions import NotFoundError +from .. import stream_registry from ..config import ChatConfig from ..model import ( ChatMessage, @@ -27,13 +28,19 @@ from ..response_model import ( StreamToolInputAvailable, StreamToolOutputAvailable, ) -from ..service import _build_system_prompt, _generate_session_title +from ..service import ( + _build_system_prompt, + _execute_long_running_tool_with_streaming, + _generate_session_title, +) +from ..tools.models import OperationPendingResponse, OperationStartedResponse from ..tools.sandbox import WORKSPACE_PREFIX, make_session_path from ..tracking import track_user_message from .response_adapter import SDKResponseAdapter from .security_hooks import create_security_hooks from .tool_adapter import ( COPILOT_TOOL_NAMES, + LongRunningCallback, create_copilot_mcp_server, set_execution_context, ) @@ -47,21 +54,136 @@ _background_tasks: set[asyncio.Task[Any]] = set() _SDK_CWD_PREFIX = WORKSPACE_PREFIX -# Appended to the system prompt to inform the agent about Bash restrictions. -# The SDK already describes each tool (Read, Write, Edit, Glob, Grep, Bash), -# but it doesn't know about our security hooks' command allowlist for Bash. +# Appended to the system prompt to inform the agent about available tools. +# The SDK built-in Bash is NOT available — use mcp__copilot__bash_exec instead, +# which has kernel-level network isolation (unshare --net). _SDK_TOOL_SUPPLEMENT = """ -## Bash restrictions +## Tool notes -The Bash tool is restricted to safe, read-only data-processing commands: -jq, grep, head, tail, cat, wc, sort, uniq, cut, tr, sed, awk, find, ls, -echo, diff, base64, and similar utilities. -Network commands (curl, wget), destructive commands (rm, chmod), and -interpreters (python, node) are NOT available. +- The SDK built-in Bash tool is NOT available. Use the `bash_exec` MCP tool + for shell commands — it runs in a network-isolated sandbox. +- Long-running tools (create_agent, edit_agent, etc.) are handled + asynchronously. You will receive an immediate response; the actual result + is delivered to the user via a background stream. """ +def _build_long_running_callback(user_id: str | None) -> LongRunningCallback: + """Build a callback that delegates long-running tools to the non-SDK infrastructure. + + Long-running tools (create_agent, edit_agent, etc.) are delegated to the + existing background infrastructure: stream_registry (Redis Streams), + database persistence, and SSE reconnection. This means results survive + page refreshes / pod restarts, and the frontend shows the proper loading + widget with progress updates. + + The returned callback matches the ``LongRunningCallback`` signature: + ``(tool_name, args, session) -> MCP response dict``. + """ + + async def _callback( + tool_name: str, args: dict[str, Any], session: ChatSession + ) -> dict[str, Any]: + operation_id = str(uuid.uuid4()) + task_id = str(uuid.uuid4()) + tool_call_id = f"sdk-{uuid.uuid4().hex[:12]}" + session_id = session.session_id + + # --- Build user-friendly messages (matches non-SDK service) --- + if tool_name == "create_agent": + desc = args.get("description", "") + desc_preview = (desc[:100] + "...") if len(desc) > 100 else desc + pending_msg = ( + f"Creating your agent: {desc_preview}" + if desc_preview + else "Creating agent... This may take a few minutes." + ) + started_msg = ( + "Agent creation started. You can close this tab - " + "check your library in a few minutes." + ) + elif tool_name == "edit_agent": + changes = args.get("changes", "") + changes_preview = (changes[:100] + "...") if len(changes) > 100 else changes + pending_msg = ( + f"Editing agent: {changes_preview}" + if changes_preview + else "Editing agent... This may take a few minutes." + ) + started_msg = ( + "Agent edit started. You can close this tab - " + "check your library in a few minutes." + ) + else: + pending_msg = f"Running {tool_name}... This may take a few minutes." + started_msg = ( + f"{tool_name} started. You can close this tab - " + "check back in a few minutes." + ) + + # --- Register task in Redis for SSE reconnection --- + await stream_registry.create_task( + task_id=task_id, + session_id=session_id, + user_id=user_id, + tool_call_id=tool_call_id, + tool_name=tool_name, + operation_id=operation_id, + ) + + # --- Save OperationPendingResponse to chat history --- + pending_message = ChatMessage( + role="tool", + content=OperationPendingResponse( + message=pending_msg, + operation_id=operation_id, + tool_name=tool_name, + ).model_dump_json(), + tool_call_id=tool_call_id, + ) + session.messages.append(pending_message) + await upsert_chat_session(session) + + # --- Spawn background task (reuses non-SDK infrastructure) --- + bg_task = asyncio.create_task( + _execute_long_running_tool_with_streaming( + tool_name=tool_name, + parameters=args, + tool_call_id=tool_call_id, + operation_id=operation_id, + task_id=task_id, + session_id=session_id, + user_id=user_id, + ) + ) + _background_tasks.add(bg_task) + bg_task.add_done_callback(_background_tasks.discard) + await stream_registry.set_task_asyncio_task(task_id, bg_task) + + logger.info( + f"[SDK] Long-running tool {tool_name} delegated to background " + f"(operation_id={operation_id}, task_id={task_id})" + ) + + # --- Return OperationStartedResponse as MCP tool result --- + # This flows through SDK → response adapter → frontend, triggering + # the loading widget with SSE reconnection support. + started_json = OperationStartedResponse( + message=started_msg, + operation_id=operation_id, + tool_name=tool_name, + task_id=task_id, + ).model_dump_json() + + return { + "content": [{"type": "text", "text": started_json}], + "isError": False, + } + + return _callback + + def _resolve_sdk_model() -> str | None: """Resolve the model name for the Claude Agent SDK CLI. @@ -339,7 +461,11 @@ async def stream_chat_completion_sdk( sdk_cwd = _make_sdk_cwd(session_id) os.makedirs(sdk_cwd, exist_ok=True) - set_execution_context(user_id, session) + set_execution_context( + user_id, + session, + long_running_callback=_build_long_running_callback(user_id), + ) try: try: diff --git a/autogpt_platform/backend/backend/api/features/chat/sdk/tool_adapter.py b/autogpt_platform/backend/backend/api/features/chat/sdk/tool_adapter.py index 15f9a2cb4b..ad69f13874 100644 --- a/autogpt_platform/backend/backend/api/features/chat/sdk/tool_adapter.py +++ b/autogpt_platform/backend/backend/api/features/chat/sdk/tool_adapter.py @@ -2,15 +2,19 @@ This module provides the adapter layer that converts existing BaseTool implementations into in-process MCP tools that can be used with the Claude Agent SDK. + +Long-running tools (``is_long_running=True``) are delegated to the non-SDK +background infrastructure (stream_registry, Redis persistence, SSE reconnection) +via a callback provided by the service layer. This avoids wasteful SDK polling +and makes results survive page refreshes. """ -import asyncio import json import logging import os import uuid +from collections.abc import Awaitable, Callable from contextvars import ContextVar -from dataclasses import dataclass from typing import Any from backend.api.features.chat.model import ChatSession @@ -40,37 +44,38 @@ _pending_tool_outputs: ContextVar[dict[str, str]] = ContextVar( "pending_tool_outputs", default=None # type: ignore[arg-type] ) +# Callback type for delegating long-running tools to the non-SDK infrastructure. +# Args: (tool_name, arguments, session) → MCP-formatted response dict. +LongRunningCallback = Callable[ + [str, dict[str, Any], ChatSession], Awaitable[dict[str, Any]] +] -@dataclass -class _BackgroundOp: - """Tracks a background tool operation.""" - - tool_name: str - task: asyncio.Task[Any] - result: dict[str, Any] | None = None - done: bool = False - - -# Module-level registry for background long-running operations. -# Keyed by operation_id. Cleaned up after result is consumed. -_background_ops: dict[str, _BackgroundOp] = {} -_background_ops_lock = asyncio.Lock() - -_CHECK_OP_TOOL_NAME = "check_operation" +# ContextVar so the service layer can inject the callback per-request. +_long_running_callback: ContextVar[LongRunningCallback | None] = ContextVar( + "long_running_callback", default=None +) def set_execution_context( user_id: str | None, session: ChatSession, + long_running_callback: LongRunningCallback | None = None, ) -> None: """Set the execution context for tool calls. This must be called before streaming begins to ensure tools have access to user_id and session information. + + Args: + user_id: Current user's ID. + session: Current chat session. + long_running_callback: Optional callback to delegate long-running tools + to the non-SDK background infrastructure (stream_registry + Redis). """ _current_user_id.set(user_id) _current_session.set(session) _pending_tool_outputs.set({}) + _long_running_callback.set(long_running_callback) def get_execution_context() -> tuple[str | None, ChatSession | None]: @@ -142,9 +147,10 @@ def create_tool_handler(base_tool: BaseTool): This wraps the existing BaseTool._execute method to be compatible with the Claude Agent SDK MCP tool format. - Long-running tools (``is_long_running=True``) are spawned as background - tasks and return immediately with an ``operation_id``. The SDK should - then poll ``check_operation`` to retrieve the result. + Long-running tools (``is_long_running=True``) are delegated to the + non-SDK background infrastructure via a callback set in the execution + context. The callback persists the operation in Redis (stream_registry) + so results survive page refreshes and pod restarts. """ async def tool_handler(args: dict[str, Any]) -> dict[str, Any]: @@ -154,52 +160,23 @@ def create_tool_handler(base_tool: BaseTool): if session is None: return _mcp_error("No session context available") - # --- Long-running: fire-and-forget, return operation_id --- + # --- Long-running: delegate to non-SDK background infrastructure --- if base_tool.is_long_running: - op_id = f"op-{uuid.uuid4().hex[:12]}" - - async def _bg_run() -> None: + callback = _long_running_callback.get(None) + if callback: try: - result = await _execute_tool_sync(base_tool, user_id, session, args) - op = _background_ops.get(op_id) - if op: - op.result = result - op.done = True - except Exception as exc: - op = _background_ops.get(op_id) - if op: - op.result = _mcp_error(str(exc)) - op.done = True + return await callback(base_tool.name, args, session) + except Exception as e: logger.error( - f"Background tool {base_tool.name} failed: {exc}", + f"Long-running callback failed for {base_tool.name}: {e}", exc_info=True, ) - - task = asyncio.create_task(_bg_run()) - _background_ops[op_id] = _BackgroundOp(tool_name=base_tool.name, task=task) - logger.info( - f"[SDK] Long-running tool {base_tool.name} started " - f"(operation_id={op_id})" + return _mcp_error(f"Failed to start {base_tool.name}: {e}") + # No callback — fall through to synchronous execution + logger.warning( + f"[SDK] No long-running callback for {base_tool.name}, " + f"executing synchronously (may block)" ) - return { - "content": [ - { - "type": "text", - "text": json.dumps( - { - "status": "started", - "operation_id": op_id, - "message": ( - f"{base_tool.name} is running in the background. " - f"Call check_operation with " - f"operation_id='{op_id}' to get the result." - ), - } - ), - } - ], - "isError": False, - } # --- Normal (fast) tool: execute synchronously --- try: @@ -255,58 +232,6 @@ async def _read_file_handler(args: dict[str, Any]) -> dict[str, Any]: } -async def _check_operation_handler(args: dict[str, Any]) -> dict[str, Any]: - """Check the status of a background long-running operation.""" - op_id = args.get("operation_id", "") - if not op_id or op_id not in _background_ops: - return _mcp_error(f"Operation '{op_id}' not found.") - - op = _background_ops[op_id] - if not op.done: - return { - "content": [ - { - "type": "text", - "text": json.dumps( - { - "status": "in_progress", - "operation_id": op_id, - "tool_name": op.tool_name, - "message": ( - f"{op.tool_name} is still running. " - "Check again in a few seconds." - ), - } - ), - } - ], - "isError": False, - } - - # Done — return result and clean up - result = op.result or _mcp_error("Operation completed but no result available.") - del _background_ops[op_id] - logger.info(f"[SDK] Background operation {op_id} ({op.tool_name}) collected") - return result - - -_CHECK_OP_DESCRIPTION = ( - "Check the status of a background operation started by a long-running tool " - "(like create_agent). Returns the result when done, or 'in_progress' if still " - "running. Call this periodically (every few seconds) after starting an operation." -) -_CHECK_OP_SCHEMA = { - "type": "object", - "properties": { - "operation_id": { - "type": "string", - "description": "The operation_id returned by the long-running tool.", - }, - }, - "required": ["operation_id"], -} - - _READ_TOOL_NAME = "Read" _READ_TOOL_DESCRIPTION = ( "Read a file from the local filesystem. " @@ -365,14 +290,6 @@ def create_copilot_mcp_server(): )(_read_file_handler) sdk_tools.append(read_tool) - # Add the check_operation tool for polling background operations - check_op_tool = tool( - _CHECK_OP_TOOL_NAME, - _CHECK_OP_DESCRIPTION, - _CHECK_OP_SCHEMA, - )(_check_operation_handler) - sdk_tools.append(check_op_tool) - server = create_sdk_mcp_server( name=MCP_SERVER_NAME, version="1.0.0", @@ -399,6 +316,5 @@ _SDK_BUILTIN_TOOLS = ["Read", "Write", "Edit", "Glob", "Grep", "Task"] COPILOT_TOOL_NAMES = [ *[f"{MCP_TOOL_PREFIX}{name}" for name in TOOL_REGISTRY.keys()], f"{MCP_TOOL_PREFIX}{_READ_TOOL_NAME}", - f"{MCP_TOOL_PREFIX}{_CHECK_OP_TOOL_NAME}", *_SDK_BUILTIN_TOOLS, ] diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/__init__.py b/autogpt_platform/backend/backend/api/features/chat/tools/__init__.py index c0ed346a2d..9c9a51b2b4 100644 --- a/autogpt_platform/backend/backend/api/features/chat/tools/__init__.py +++ b/autogpt_platform/backend/backend/api/features/chat/tools/__init__.py @@ -17,7 +17,6 @@ from .find_agent import FindAgentTool from .find_block import FindBlockTool from .find_library_agent import FindLibraryAgentTool from .get_doc_page import GetDocPageTool -from .python_exec import PythonExecTool from .run_agent import RunAgentTool from .run_block import RunBlockTool from .search_docs import SearchDocsTool @@ -50,8 +49,7 @@ TOOL_REGISTRY: dict[str, BaseTool] = { "get_doc_page": GetDocPageTool(), # Web fetch for safe URL retrieval "web_fetch": WebFetchTool(), - # Sandboxed code execution (network-isolated) - "python_exec": PythonExecTool(), + # Sandboxed code execution (bubblewrap) "bash_exec": BashExecTool(), # Workspace tools for CoPilot file operations "list_workspace_files": ListWorkspaceFilesTool(), diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/bash_exec.py b/autogpt_platform/backend/backend/api/features/chat/tools/bash_exec.py index 538cdeb26b..7bd74e124f 100644 --- a/autogpt_platform/backend/backend/api/features/chat/tools/bash_exec.py +++ b/autogpt_platform/backend/backend/api/features/chat/tools/bash_exec.py @@ -1,12 +1,14 @@ -"""Bash execution tool — run shell commands in a network-isolated sandbox. +"""Bash execution tool — run shell commands in a bubblewrap sandbox. Full Bash scripting is allowed (loops, conditionals, pipes, functions, etc.). -Safety comes from kernel-level network isolation and workspace confinement, -not from restricting language features. +Safety comes from OS-level isolation (bubblewrap): only system dirs visible +read-only, writable workspace only, clean env, no network. + +Requires bubblewrap (``bwrap``) — the tool is disabled when bwrap is not +available (e.g. macOS development). """ import logging -import re from typing import Any from backend.api.features.chat.model import ChatSession @@ -18,46 +20,15 @@ from backend.api.features.chat.tools.models import ( ) from backend.api.features.chat.tools.sandbox import ( get_workspace_dir, - has_network_sandbox, + has_full_sandbox, run_sandboxed, ) logger = logging.getLogger(__name__) -# Destructive patterns blocked regardless of network sandbox -_BLOCKED_PATTERNS: list[tuple[str, str]] = [ - (r"rm\s+-[a-zA-Z]*r[a-zA-Z]*\s+/(?!\w)", "Recursive removal of root paths"), - (r"dd\s+.*of=/dev/", "Direct disk writes"), - (r"mkfs\b", "Filesystem formatting"), - (r":\(\)\s*\{", "Fork bomb"), - (r"\bshutdown\b|\breboot\b|\bhalt\b|\bpoweroff\b", "System power commands"), - (r"/dev/sd[a-z]|/dev/nvme|/dev/hd[a-z]", "Raw disk device access"), -] - -# Commands blocked when kernel network sandbox is NOT available (fallback) -_NETWORK_COMMANDS = { - "curl", - "wget", - "ssh", - "scp", - "sftp", - "rsync", - "nc", - "ncat", - "netcat", - "telnet", - "ftp", - "ping", - "traceroute", - "nslookup", - "dig", - "host", - "nmap", -} - class BashExecTool(BaseTool): - """Execute Bash commands in a sandboxed environment.""" + """Execute Bash commands in a bubblewrap sandbox.""" @property def name(self) -> str: @@ -65,14 +36,21 @@ class BashExecTool(BaseTool): @property def description(self) -> str: + if not has_full_sandbox(): + return ( + "Bash execution is DISABLED — bubblewrap sandbox is not " + "available on this platform. Do not call this tool." + ) return ( - "Execute a Bash command or script in a sandboxed environment. " - "Full Bash scripting is supported (loops, conditionals, pipes, functions, etc.). " - "SECURITY: All internet/network access is blocked at the kernel level " - "(no curl, wget, nc, or any outbound connections). " + "Execute a Bash command or script in a bubblewrap sandbox. " + "Full Bash scripting is supported (loops, conditionals, pipes, " + "functions, etc.). " + "SECURITY: Only system directories (/usr, /bin, /lib, /etc) are " + "visible read-only, the per-session workspace is the only writable " + "path, environment variables are wiped (no secrets), and all " + "network access is blocked at the kernel level. Application code, " + "configs, and other directories are NOT accessible. " "To fetch web content, use the web_fetch tool instead. " - "Commands run in an isolated per-session workspace directory — " - "they cannot access files outside that directory. " "Execution is killed after the timeout (default 30s, max 120s). " "Returns stdout and stderr. " "Useful for file manipulation, data processing with Unix tools " @@ -109,9 +87,17 @@ class BashExecTool(BaseTool): session: ChatSession, **kwargs: Any, ) -> ToolResponseBase: + session_id = session.session_id if session else None + + if not has_full_sandbox(): + return ErrorResponse( + message="bash_exec requires bubblewrap sandbox (Linux only).", + error="sandbox_unavailable", + session_id=session_id, + ) + command: str = (kwargs.get("command") or "").strip() timeout: int = kwargs.get("timeout", 30) - session_id = session.session_id if session else None if not command: return ErrorResponse( @@ -120,29 +106,6 @@ class BashExecTool(BaseTool): session_id=session_id, ) - # Block destructive patterns - for pattern, reason in _BLOCKED_PATTERNS: - if re.search(pattern, command, re.IGNORECASE): - return ErrorResponse( - message=f"Command blocked: {reason}", - error="blocked_command", - session_id=session_id, - ) - - # When kernel network sandbox unavailable, block network commands - if not has_network_sandbox(): - words = set(re.findall(r"\b\w+\b", command)) - blocked = words & _NETWORK_COMMANDS - if blocked: - return ErrorResponse( - message=( - f"Network commands not available: {', '.join(sorted(blocked))}. " - "Use web_fetch instead." - ), - error="network_blocked", - session_id=session_id, - ) - workspace = get_workspace_dir(session_id or "default") stdout, stderr, exit_code, timed_out = await run_sandboxed( diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/models.py b/autogpt_platform/backend/backend/api/features/chat/tools/models.py index 58ee3d1331..2319297c0a 100644 --- a/autogpt_platform/backend/backend/api/features/chat/tools/models.py +++ b/autogpt_platform/backend/backend/api/features/chat/tools/models.py @@ -43,7 +43,6 @@ class ResponseType(str, Enum): # Web fetch WEB_FETCH = "web_fetch" # Code execution - PYTHON_EXEC = "python_exec" BASH_EXEC = "bash_exec" @@ -445,16 +444,6 @@ class WebFetchResponse(ToolResponseBase): truncated: bool = False -class PythonExecResponse(ToolResponseBase): - """Response for python_exec tool.""" - - type: ResponseType = ResponseType.PYTHON_EXEC - stdout: str - stderr: str - exit_code: int - timed_out: bool = False - - class BashExecResponse(ToolResponseBase): """Response for bash_exec tool.""" diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/python_exec.py b/autogpt_platform/backend/backend/api/features/chat/tools/python_exec.py deleted file mode 100644 index 87aa7c0d4b..0000000000 --- a/autogpt_platform/backend/backend/api/features/chat/tools/python_exec.py +++ /dev/null @@ -1,162 +0,0 @@ -"""Python execution tool — run Python code in a network-isolated sandbox.""" - -import logging -import os -from typing import Any - -from backend.api.features.chat.model import ChatSession -from backend.api.features.chat.tools.base import BaseTool -from backend.api.features.chat.tools.models import ( - ErrorResponse, - PythonExecResponse, - ToolResponseBase, -) -from backend.api.features.chat.tools.sandbox import ( - get_workspace_dir, - has_network_sandbox, - run_sandboxed, -) - -logger = logging.getLogger(__name__) - -# Modules blocked via import hook when kernel network sandbox is unavailable -_BLOCKED_MODULES = { - "socket", - "ssl", - "http", - "urllib", - "requests", - "httpx", - "aiohttp", - "ftplib", - "smtplib", - "poplib", - "imaplib", - "telnetlib", - "xmlrpc", - "subprocess", - "ctypes", - "multiprocessing", -} - -# Security prelude injected before user code (only when unshare unavailable) -_SECURITY_PRELUDE = """\ -import builtins as _b -_BLOCKED = {blocked} -_orig = _b.__import__ -def _si(name, *a, **k): - if name.split(".")[0] in _BLOCKED: - raise ImportError(f"Module '{{name}}' is not available in the sandbox") - return _orig(name, *a, **k) -_b.__import__ = _si -import os as _os -_os.system = lambda *a, **k: (_ for _ in ()).throw( - PermissionError("os.system is blocked") -) -_os.popen = lambda *a, **k: (_ for _ in ()).throw( - PermissionError("os.popen is blocked") -) -del _b, _BLOCKED, _orig, _si, _os -""" - - -class PythonExecTool(BaseTool): - """Execute Python code in a sandboxed environment.""" - - @property - def name(self) -> str: - return "python_exec" - - @property - def description(self) -> str: - return ( - "Execute Python code in a sandboxed environment. " - "SECURITY: All internet/network access is blocked at the kernel level " - "(no HTTP, sockets, DNS, or any outbound connections). " - "To fetch web content, use the web_fetch tool instead. " - "Code runs in an isolated per-session workspace directory — " - "it cannot read or write files outside that directory. " - "Execution is killed after the timeout (default 30s, max 120s). " - "Returns stdout and stderr. " - "Useful for data processing, calculations, text manipulation, " - "JSON/CSV parsing, and generating files in the workspace." - ) - - @property - def parameters(self) -> dict[str, Any]: - return { - "type": "object", - "properties": { - "code": { - "type": "string", - "description": "Python code to execute.", - }, - "timeout": { - "type": "integer", - "description": ( - "Max execution time in seconds (default 30, max 120)." - ), - "default": 30, - }, - }, - "required": ["code"], - } - - @property - def requires_auth(self) -> bool: - return False - - async def _execute( - self, - user_id: str | None, - session: ChatSession, - **kwargs: Any, - ) -> ToolResponseBase: - code: str = (kwargs.get("code") or "").strip() - timeout: int = kwargs.get("timeout", 30) - session_id = session.session_id if session else None - - if not code: - return ErrorResponse( - message="No code provided.", - error="empty_code", - session_id=session_id, - ) - - workspace = get_workspace_dir(session_id or "default") - - # Add security prelude when kernel network isolation is unavailable - if not has_network_sandbox(): - prelude = _SECURITY_PRELUDE.format(blocked=repr(_BLOCKED_MODULES)) - full_code = prelude + "\n" + code - else: - full_code = code - - script_path = os.path.join(workspace, "_exec.py") - try: - with open(script_path, "w") as f: - f.write(full_code) - - stdout, stderr, exit_code, timed_out = await run_sandboxed( - command=["python3", "-I", "-u", script_path], - cwd=workspace, - timeout=timeout, - ) - - return PythonExecResponse( - message=( - "Execution timed out" - if timed_out - else f"Code executed (exit {exit_code})" - ), - stdout=stdout, - stderr=stderr, - exit_code=exit_code, - timed_out=timed_out, - session_id=session_id, - ) - finally: - try: - os.unlink(script_path) - except OSError: - pass diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/sandbox.py b/autogpt_platform/backend/backend/api/features/chat/tools/sandbox.py index 9ac56eda20..0fea6728e0 100644 --- a/autogpt_platform/backend/backend/api/features/chat/tools/sandbox.py +++ b/autogpt_platform/backend/backend/api/features/chat/tools/sandbox.py @@ -1,7 +1,11 @@ """Sandbox execution utilities for code execution tools. -Provides network-isolated command execution using Linux ``unshare --net`` -(kernel-level, no bypass possible) with a fallback for development on macOS. +Provides filesystem + network isolated command execution using **bubblewrap** +(``bwrap``): whitelist-only filesystem (only system dirs visible read-only), +writable workspace only, clean environment, network blocked. + +Tools that call :func:`run_sandboxed` must first check :func:`has_full_sandbox` +and refuse to run if bubblewrap is not available. """ import asyncio @@ -18,23 +22,24 @@ _DEFAULT_TIMEOUT = 30 _MAX_TIMEOUT = 120 -def _check_unshare() -> bool: - """Check if ``unshare --net`` is available for kernel-level network isolation.""" - if platform.system() != "Linux": - return False - return shutil.which("unshare") is not None +# --------------------------------------------------------------------------- +# Sandbox capability detection (cached at first call) +# --------------------------------------------------------------------------- + +_BWRAP_AVAILABLE: bool | None = None -# Cached at import time so we don't shell out on every call -_UNSHARE_AVAILABLE: bool | None = None +def has_full_sandbox() -> bool: + """Return True if bubblewrap is available (filesystem + network isolation). - -def has_network_sandbox() -> bool: - """Return True if kernel-level network isolation is available.""" - global _UNSHARE_AVAILABLE - if _UNSHARE_AVAILABLE is None: - _UNSHARE_AVAILABLE = _check_unshare() - return _UNSHARE_AVAILABLE + On non-Linux platforms (macOS), always returns False. + """ + global _BWRAP_AVAILABLE + if _BWRAP_AVAILABLE is None: + _BWRAP_AVAILABLE = ( + platform.system() == "Linux" and shutil.which("bwrap") is not None + ) + return _BWRAP_AVAILABLE WORKSPACE_PREFIX = "/tmp/copilot-" @@ -70,30 +75,122 @@ def get_workspace_dir(session_id: str) -> str: """Get or create the workspace directory for a session. Uses :func:`make_session_path` — the same path the SDK uses — so that - python_exec / bash_exec share the workspace with the SDK file tools. + bash_exec shares the workspace with the SDK file tools. """ workspace = make_session_path(session_id) os.makedirs(workspace, exist_ok=True) return workspace +# --------------------------------------------------------------------------- +# Bubblewrap command builder +# --------------------------------------------------------------------------- + +# System directories mounted read-only inside the sandbox. +# ONLY these are visible — /app, /root, /home, /opt, /var etc. are NOT accessible. +_SYSTEM_RO_BINDS = [ + "/usr", # binaries, libraries, Python interpreter + "/etc", # system config: ld.so, locale, passwd, alternatives +] + +# Symlinks to /usr/* on modern Debian, may be real dirs on older systems. +_COMPAT_RO_BINDS = [ + "/bin", # -> /usr/bin on Debian 13 + "/sbin", # -> /usr/sbin on Debian 13 + "/lib", # -> /usr/lib on Debian 13 + "/lib64", # 64-bit libraries (may not exist) +] + + +def _build_bwrap_command( + command: list[str], cwd: str, env: dict[str, str] +) -> list[str]: + """Build a bubblewrap command with strict filesystem + network isolation. + + Security model: + - **Whitelist-only filesystem**: only system directories (``/usr``, ``/etc``, + ``/bin``, ``/lib``) are mounted read-only. Application code (``/app``), + home directories, ``/var``, ``/opt``, etc. are NOT accessible at all. + - **Writable workspace only**: the per-session workspace is the sole + writable path. + - **Clean environment**: ``--clearenv`` wipes all inherited env vars. + Only the explicitly-passed safe env vars are set inside the sandbox. + - **Network isolation**: ``--unshare-net`` blocks all network access. + - **New session**: prevents terminal control escape. + - **Die with parent**: prevents orphaned sandbox processes. + """ + cmd = [ + "bwrap", + # Wipe all inherited environment variables (API keys, secrets, etc.) + "--clearenv", + ] + + # Set only the safe env vars inside the sandbox + for key, value in env.items(): + cmd.extend(["--setenv", key, value]) + + # System directories: read-only + for path in _SYSTEM_RO_BINDS: + cmd.extend(["--ro-bind", path, path]) + + # Compat paths: bind only if they exist on the host + for path in _COMPAT_RO_BINDS: + if os.path.exists(path): + cmd.extend(["--ro-bind", path, path]) + + cmd.extend( + [ + # Writable workspace only + "--bind", + cwd, + cwd, + # Fresh virtual filesystems + "--dev", + "/dev", + "--proc", + "/proc", + "--tmpdir", + "/tmp", + # Isolation + "--unshare-net", + "--die-with-parent", + "--new-session", + "--chdir", + cwd, + "--", + *command, + ] + ) + + return cmd + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + async def run_sandboxed( command: list[str], cwd: str, timeout: int = _DEFAULT_TIMEOUT, env: dict[str, str] | None = None, ) -> tuple[str, str, int, bool]: - """Run a command in a sandboxed environment. + """Run a command inside a bubblewrap sandbox. + + Callers **must** check :func:`has_full_sandbox` before calling this + function. If bubblewrap is not available, this function raises + :class:`RuntimeError` rather than running unsandboxed. Returns: (stdout, stderr, exit_code, timed_out) - - Security layers: - - Network isolation via ``unshare --net`` (Linux) - - Restricted working directory - - Minimal environment variables - - Hard timeout """ + if not has_full_sandbox(): + raise RuntimeError( + "run_sandboxed() requires bubblewrap but bwrap is not available. " + "Callers must check has_full_sandbox() before calling this function." + ) + timeout = min(max(timeout, 1), _MAX_TIMEOUT) safe_env = { @@ -107,11 +204,7 @@ async def run_sandboxed( if env: safe_env.update(env) - # Wrap with unshare --net on Linux for kernel-level network isolation - if has_network_sandbox(): - full_command = ["unshare", "--net", *command] - else: - full_command = command + full_command = _build_bwrap_command(command, cwd, safe_env) try: proc = await asyncio.create_subprocess_exec( @@ -134,5 +227,7 @@ async def run_sandboxed( await proc.communicate() return "", f"Execution timed out after {timeout}s", -1, True + except RuntimeError: + raise except Exception as e: return "", f"Sandbox error: {e}", -1, False diff --git a/autogpt_platform/backend/test/chat/__init__.py b/autogpt_platform/backend/test/chat/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/autogpt_platform/backend/test/chat/test_security_hooks.py b/autogpt_platform/backend/test/chat/test_security_hooks.py new file mode 100644 index 0000000000..f10a90871b --- /dev/null +++ b/autogpt_platform/backend/test/chat/test_security_hooks.py @@ -0,0 +1,133 @@ +"""Tests for SDK security hooks — workspace paths, tool access, and deny messages. + +These are pure unit tests with no external dependencies (no SDK, no DB, no server). +They validate that the security hooks correctly block unauthorized paths, +tool access, and dangerous input patterns. + +Note: Bash command validation was removed — the SDK built-in Bash tool is not in +allowed_tools, and the bash_exec MCP tool has kernel-level network isolation +(unshare --net) making command-level parsing unnecessary. +""" + +from backend.api.features.chat.sdk.security_hooks import ( + _validate_tool_access, + _validate_workspace_path, +) + +SDK_CWD = "/tmp/copilot-test-session" + + +def _is_denied(result: dict) -> bool: + hook = result.get("hookSpecificOutput", {}) + return hook.get("permissionDecision") == "deny" + + +def _reason(result: dict) -> str: + return result.get("hookSpecificOutput", {}).get("permissionDecisionReason", "") + + +# ============================================================ +# Workspace path validation (Read, Write, Edit, etc.) +# ============================================================ + + +class TestWorkspacePathValidation: + def test_path_in_workspace(self): + result = _validate_workspace_path( + "Read", {"file_path": f"{SDK_CWD}/file.txt"}, SDK_CWD + ) + assert not _is_denied(result) + + def test_path_outside_workspace(self): + result = _validate_workspace_path("Read", {"file_path": "/etc/passwd"}, SDK_CWD) + assert _is_denied(result) + + def test_tool_results_allowed(self): + result = _validate_workspace_path( + "Read", + {"file_path": "~/.claude/projects/abc/tool-results/out.txt"}, + SDK_CWD, + ) + assert not _is_denied(result) + + def test_claude_settings_blocked(self): + result = _validate_workspace_path( + "Read", {"file_path": "~/.claude/settings.json"}, SDK_CWD + ) + assert _is_denied(result) + + def test_claude_projects_without_tool_results(self): + result = _validate_workspace_path( + "Read", {"file_path": "~/.claude/projects/abc/credentials.json"}, SDK_CWD + ) + assert _is_denied(result) + + def test_no_path_allowed(self): + """Glob/Grep without path defaults to cwd — should be allowed.""" + result = _validate_workspace_path("Grep", {"pattern": "foo"}, SDK_CWD) + assert not _is_denied(result) + + def test_path_traversal_with_dotdot(self): + result = _validate_workspace_path( + "Read", {"file_path": f"{SDK_CWD}/../../../etc/passwd"}, SDK_CWD + ) + assert _is_denied(result) + + +# ============================================================ +# Tool access validation +# ============================================================ + + +class TestToolAccessValidation: + def test_blocked_tools(self): + for tool in ("bash", "shell", "exec", "terminal", "command"): + result = _validate_tool_access(tool, {}) + assert _is_denied(result), f"Tool '{tool}' should be blocked" + + def test_bash_builtin_blocked(self): + """SDK built-in Bash (capital) is blocked as defence-in-depth.""" + result = _validate_tool_access("Bash", {"command": "echo hello"}, SDK_CWD) + assert _is_denied(result) + assert "Bash" in _reason(result) + + def test_workspace_tools_delegate(self): + result = _validate_tool_access( + "Read", {"file_path": f"{SDK_CWD}/file.txt"}, SDK_CWD + ) + assert not _is_denied(result) + + def test_dangerous_pattern_blocked(self): + result = _validate_tool_access("SomeUnknownTool", {"data": "sudo rm -rf /"}) + assert _is_denied(result) + + def test_safe_unknown_tool_allowed(self): + result = _validate_tool_access("SomeSafeTool", {"data": "hello world"}) + assert not _is_denied(result) + + +# ============================================================ +# Deny message quality (ntindle feedback) +# ============================================================ + + +class TestDenyMessageClarity: + """Deny messages must include [SECURITY] and 'cannot be bypassed' + so the model knows the restriction is enforced, not a suggestion.""" + + def test_blocked_tool_message(self): + reason = _reason(_validate_tool_access("bash", {})) + assert "[SECURITY]" in reason + assert "cannot be bypassed" in reason + + def test_bash_builtin_blocked_message(self): + reason = _reason(_validate_tool_access("Bash", {"command": "echo hello"})) + assert "[SECURITY]" in reason + assert "cannot be bypassed" in reason + + def test_workspace_path_message(self): + reason = _reason( + _validate_workspace_path("Read", {"file_path": "/etc/passwd"}, SDK_CWD) + ) + assert "[SECURITY]" in reason + assert "cannot be bypassed" in reason