From 1dd53db21c6b30c480f9d4253534a10985e23672 Mon Sep 17 00:00:00 2001
From: Zamil Majdy <zamil.majdy@agpt.co>
Date: Thu, 12 Feb 2026 21:44:40 +0400
Subject: [PATCH] feat(chat/sandbox): bubblewrap sandbox for bash_exec, remove
 python_exec
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Replace `--ro-bind / /` with whitelist-only filesystem: only /usr, /etc,
  /bin, /lib, /sbin mounted read-only. /app, /root, /home, /opt, /var are
  completely invisible inside the sandbox.
- Add `--clearenv` to wipe all inherited env vars (API keys, DB passwords).
  Only safe vars (PATH, HOME=workspace, LANG) are explicitly set.
- Remove python_exec tool — bash_exec can run `python3 -c` or heredocs with
  identical bubblewrap protection, reducing attack surface.
- Remove all fallback security code (import hooks, blocked modules, network
  command lists). Tools now hard-require bubblewrap — disabled on platforms
  without bwrap.
- Clean up security_hooks.py: remove ~200 lines of dead bash validation code,
  add Bash to BLOCKED_TOOLS as defence-in-depth.
- Wire up long-running tool callback in SDK service for create_agent/edit_agent
  delegation to Redis Streams background infrastructure.
---
 autogpt_platform/backend/Dockerfile           |   6 +-
 .../api/features/chat/sdk/security_hooks.py   | 157 ++---------------
 .../backend/api/features/chat/sdk/service.py  | 148 ++++++++++++++--
 .../api/features/chat/sdk/tool_adapter.py     | 160 ++++-------------
 .../api/features/chat/tools/__init__.py       |   4 +-
 .../api/features/chat/tools/bash_exec.py      |  97 ++++-------
 .../backend/api/features/chat/tools/models.py |  11 --
 .../api/features/chat/tools/python_exec.py    | 162 ------------------
 .../api/features/chat/tools/sandbox.py        | 153 +++++++++++++----
 .../backend/test/chat/__init__.py             |   0
 .../backend/test/chat/test_security_hooks.py  | 133 ++++++++++++++
 11 files changed, 481 insertions(+), 550 deletions(-)
 delete mode 100644 autogpt_platform/backend/backend/api/features/chat/tools/python_exec.py
 create mode 100644 autogpt_platform/backend/test/chat/__init__.py
 create mode 100644 autogpt_platform/backend/test/chat/test_security_hooks.py

diff --git a/autogpt_platform/backend/Dockerfile b/autogpt_platform/backend/Dockerfile
index d5a216927c..ba5d04d056 100644
--- a/autogpt_platform/backend/Dockerfile
+++ b/autogpt_platform/backend/Dockerfile
@@ -62,8 +62,9 @@ ENV POETRY_HOME=/opt/poetry \
     DEBIAN_FRONTEND=noninteractive
 ENV PATH=/opt/poetry/bin:$PATH
 
-# Install Python, FFmpeg, ImageMagick, and CLI tools for agent use
-# CLI tools match ALLOWED_BASH_COMMANDS in security_hooks.py
+# Install Python, FFmpeg, ImageMagick, and CLI tools for agent use.
+# bubblewrap provides OS-level sandbox (whitelist-only FS + no network)
+# for the bash_exec MCP tool.
 RUN apt-get update && apt-get install -y \
     python3.13 \
     python3-pip \
@@ -72,6 +73,7 @@ RUN apt-get update && apt-get install -y \
     jq \
     ripgrep \
     tree \
+    bubblewrap \
     && rm -rf /var/lib/apt/lists/*
 
 # Copy only necessary files from builder
diff --git a/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks.py b/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks.py
index 245a13407b..82e76924a0 100644
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks.py
@@ -8,15 +8,18 @@ import json
 import logging
 import os
 import re
-import shlex
 from typing import Any, cast
 
 from backend.api.features.chat.sdk.tool_adapter import MCP_TOOL_PREFIX
 
 logger = logging.getLogger(__name__)
 
-# Tools that are blocked entirely (CLI/system access)
+# Tools that are blocked entirely (CLI/system access).
+# "Bash" (capital) is the SDK built-in — it's NOT in allowed_tools but blocked
+# here as defence-in-depth.  The agent uses mcp__copilot__bash_exec instead,
+# which has kernel-level network isolation (unshare --net).
 BLOCKED_TOOLS = {
+    "Bash",
     "bash",
     "shell",
     "exec",
@@ -24,66 +27,11 @@ BLOCKED_TOOLS = {
     "command",
 }
 
-# Safe read-only commands allowed in the sandboxed Bash tool.
-# These are data-processing / inspection utilities — no writes, no network.
-ALLOWED_BASH_COMMANDS = {
-    # JSON / structured data
-    "jq",
-    # Text processing
-    "grep",
-    "egrep",
-    "fgrep",
-    "rg",
-    "head",
-    "tail",
-    "cat",
-    "wc",
-    "sort",
-    "uniq",
-    "cut",
-    "tr",
-    "sed",
-    "awk",
-    "column",
-    "fold",
-    "fmt",
-    "nl",
-    "paste",
-    "rev",
-    # File inspection (read-only)
-    "find",
-    "ls",
-    "file",
-    "stat",
-    "du",
-    "tree",
-    "basename",
-    "dirname",
-    "realpath",
-    # Utilities
-    "echo",
-    "printf",
-    "date",
-    "true",
-    "false",
-    "xargs",
-    "tee",
-    # Comparison / encoding
-    "diff",
-    "comm",
-    "base64",
-    "md5sum",
-    "sha256sum",
-}
-
 # Tools allowed only when their path argument stays within the SDK workspace.
 # The SDK uses these to handle oversized tool results (writes to tool-results/
 # files, then reads them back) and for workspace file operations.
 WORKSPACE_SCOPED_TOOLS = {"Read", "Write", "Edit", "Glob", "Grep"}
 
-# Tools that get sandboxed Bash validation (command allowlist + workspace paths).
-SANDBOXED_BASH_TOOLS = {"Bash"}
-
 # Dangerous patterns in tool inputs
 DANGEROUS_PATTERNS = [
     r"sudo",
@@ -144,88 +92,11 @@ def _validate_workspace_path(
         f"Blocked {tool_name} outside workspace: {path} (resolved={resolved})"
     )
     return _deny(
-        f"Tool '{tool_name}' can only access files within the workspace directory."
+        f"[SECURITY] Tool '{tool_name}' can only access files within the workspace "
+        "directory. This is enforced by the platform and cannot be bypassed."
     )
 
 
-def _validate_bash_command(
-    tool_input: dict[str, Any], sdk_cwd: str | None
-) -> dict[str, Any]:
-    """Validate a Bash command against the allowlist of safe commands.
-
-    Only read-only data-processing commands are allowed (jq, grep, head, etc.).
-    Blocks command substitution, output redirection, and disallowed executables.
-
-    Uses ``shlex.split`` to properly handle quoted strings (e.g. jq filters
-    containing ``|`` won't be mistaken for shell pipes).
-    """
-    command = tool_input.get("command", "")
-    if not command or not isinstance(command, str):
-        return _deny("Bash command is empty.")
-
-    # Block command substitution — can smuggle arbitrary commands
-    if "$(" in command or "`" in command:
-        return _deny("Command substitution ($() or ``) is not allowed in Bash.")
-
-    # Block output redirection — Bash should be read-only.
-    # Strip quoted strings first so `jq '.x > 5'` isn't a false positive,
-    # then check for unquoted > or >> (with or without surrounding spaces).
-    unquoted = re.sub(r"'[^']*'|\"[^\"]*\"", "", command)
-    if re.search(r"(?<![0-9&])>{1,2}", unquoted):
-        return _deny("Output redirection (> or >>) is not allowed in Bash.")
-
-    # Block /dev/ access (e.g., /dev/tcp for network)
-    if "/dev/" in command:
-        return _deny("Access to /dev/ is not allowed in Bash.")
-
-    # Tokenize with shlex (respects quotes), then extract command names.
-    # shlex preserves shell operators like | ; && || as separate tokens.
-    try:
-        tokens = shlex.split(command)
-    except ValueError:
-        return _deny("Malformed command (unmatched quotes).")
-
-    # Walk tokens: the first non-assignment token after a pipe/separator is a command.
-    expect_command = True
-    for token in tokens:
-        if token in ("|", "||", "&&", ";"):
-            expect_command = True
-            continue
-        if expect_command:
-            # Skip env var assignments (VAR=value)
-            if "=" in token and not token.startswith("-"):
-                continue
-            cmd_name = os.path.basename(token)
-            if cmd_name not in ALLOWED_BASH_COMMANDS:
-                allowed = ", ".join(sorted(ALLOWED_BASH_COMMANDS))
-                logger.warning(f"Blocked Bash command: {cmd_name}")
-                return _deny(
-                    f"Command '{cmd_name}' is not allowed. "
-                    f"Allowed commands: {allowed}"
-                )
-            expect_command = False
-
-    # Validate absolute file paths stay within workspace
-    if sdk_cwd:
-        norm_cwd = os.path.normpath(sdk_cwd)
-        claude_dir = os.path.normpath(os.path.expanduser("~/.claude/projects"))
-        for token in tokens:
-            if not token.startswith("/"):
-                continue
-            resolved = os.path.normpath(token)
-            if resolved.startswith(norm_cwd + os.sep) or resolved == norm_cwd:
-                continue
-            if resolved.startswith(claude_dir + os.sep) and "tool-results" in resolved:
-                continue
-            logger.warning(f"Blocked Bash path outside workspace: {token}")
-            return _deny(
-                f"Bash can only access files within the workspace directory. "
-                f"Path '{token}' is outside the workspace."
-            )
-
-    return {}
-
-
 def _validate_tool_access(
     tool_name: str, tool_input: dict[str, Any], sdk_cwd: str | None = None
 ) -> dict[str, Any]:
@@ -238,14 +109,11 @@ def _validate_tool_access(
     if tool_name in BLOCKED_TOOLS:
         logger.warning(f"Blocked tool access attempt: {tool_name}")
         return _deny(
-            f"Tool '{tool_name}' is not available. "
-            "Use the CoPilot-specific tools instead."
+            f"[SECURITY] Tool '{tool_name}' is blocked for security. "
+            "This is enforced by the platform and cannot be bypassed. "
+            "Use the CoPilot-specific MCP tools instead."
         )
 
-    # Sandboxed Bash: only allowlisted commands, workspace-scoped paths
-    if tool_name in SANDBOXED_BASH_TOOLS:
-        return _validate_bash_command(tool_input, sdk_cwd)
-
     # Workspace-scoped tools: allowed only within the SDK workspace directory
     if tool_name in WORKSPACE_SCOPED_TOOLS:
         return _validate_workspace_path(tool_name, tool_input, sdk_cwd)
@@ -259,7 +127,10 @@ def _validate_tool_access(
             logger.warning(
                 f"Blocked dangerous pattern in tool input: {pattern} in {tool_name}"
             )
-            return _deny("Input contains blocked pattern")
+            return _deny(
+                "[SECURITY] Input contains a blocked pattern. "
+                "This is enforced by the platform and cannot be bypassed."
+            )
 
     return {}
 
diff --git a/autogpt_platform/backend/backend/api/features/chat/sdk/service.py b/autogpt_platform/backend/backend/api/features/chat/sdk/service.py
index 8ba92bf44f..cdcb00ec5f 100644
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/service.py
@@ -10,6 +10,7 @@ from typing import Any
 
 from backend.util.exceptions import NotFoundError
 
+from .. import stream_registry
 from ..config import ChatConfig
 from ..model import (
     ChatMessage,
@@ -27,13 +28,19 @@ from ..response_model import (
     StreamToolInputAvailable,
     StreamToolOutputAvailable,
 )
-from ..service import _build_system_prompt, _generate_session_title
+from ..service import (
+    _build_system_prompt,
+    _execute_long_running_tool_with_streaming,
+    _generate_session_title,
+)
+from ..tools.models import OperationPendingResponse, OperationStartedResponse
 from ..tools.sandbox import WORKSPACE_PREFIX, make_session_path
 from ..tracking import track_user_message
 from .response_adapter import SDKResponseAdapter
 from .security_hooks import create_security_hooks
 from .tool_adapter import (
     COPILOT_TOOL_NAMES,
+    LongRunningCallback,
     create_copilot_mcp_server,
     set_execution_context,
 )
@@ -47,21 +54,136 @@ _background_tasks: set[asyncio.Task[Any]] = set()
 
 _SDK_CWD_PREFIX = WORKSPACE_PREFIX
 
-# Appended to the system prompt to inform the agent about Bash restrictions.
-# The SDK already describes each tool (Read, Write, Edit, Glob, Grep, Bash),
-# but it doesn't know about our security hooks' command allowlist for Bash.
+# Appended to the system prompt to inform the agent about available tools.
+# The SDK built-in Bash is NOT available — use mcp__copilot__bash_exec instead,
+# which has kernel-level network isolation (unshare --net).
 _SDK_TOOL_SUPPLEMENT = """
 
-## Bash restrictions
+## Tool notes
 
-The Bash tool is restricted to safe, read-only data-processing commands:
-jq, grep, head, tail, cat, wc, sort, uniq, cut, tr, sed, awk, find, ls,
-echo, diff, base64, and similar utilities.
-Network commands (curl, wget), destructive commands (rm, chmod), and
-interpreters (python, node) are NOT available.
+- The SDK built-in Bash tool is NOT available.  Use the `bash_exec` MCP tool
+  for shell commands — it runs in a network-isolated sandbox.
+- Long-running tools (create_agent, edit_agent, etc.) are handled
+  asynchronously.  You will receive an immediate response; the actual result
+  is delivered to the user via a background stream.
 """
 
 
+def _build_long_running_callback(user_id: str | None) -> LongRunningCallback:
+    """Build a callback that delegates long-running tools to the non-SDK infrastructure.
+
+    Long-running tools (create_agent, edit_agent, etc.) are delegated to the
+    existing background infrastructure: stream_registry (Redis Streams),
+    database persistence, and SSE reconnection.  This means results survive
+    page refreshes / pod restarts, and the frontend shows the proper loading
+    widget with progress updates.
+
+    The returned callback matches the ``LongRunningCallback`` signature:
+    ``(tool_name, args, session) -> MCP response dict``.
+    """
+
+    async def _callback(
+        tool_name: str, args: dict[str, Any], session: ChatSession
+    ) -> dict[str, Any]:
+        operation_id = str(uuid.uuid4())
+        task_id = str(uuid.uuid4())
+        tool_call_id = f"sdk-{uuid.uuid4().hex[:12]}"
+        session_id = session.session_id
+
+        # --- Build user-friendly messages (matches non-SDK service) ---
+        if tool_name == "create_agent":
+            desc = args.get("description", "")
+            desc_preview = (desc[:100] + "...") if len(desc) > 100 else desc
+            pending_msg = (
+                f"Creating your agent: {desc_preview}"
+                if desc_preview
+                else "Creating agent... This may take a few minutes."
+            )
+            started_msg = (
+                "Agent creation started. You can close this tab - "
+                "check your library in a few minutes."
+            )
+        elif tool_name == "edit_agent":
+            changes = args.get("changes", "")
+            changes_preview = (changes[:100] + "...") if len(changes) > 100 else changes
+            pending_msg = (
+                f"Editing agent: {changes_preview}"
+                if changes_preview
+                else "Editing agent... This may take a few minutes."
+            )
+            started_msg = (
+                "Agent edit started. You can close this tab - "
+                "check your library in a few minutes."
+            )
+        else:
+            pending_msg = f"Running {tool_name}... This may take a few minutes."
+            started_msg = (
+                f"{tool_name} started. You can close this tab - "
+                "check back in a few minutes."
+            )
+
+        # --- Register task in Redis for SSE reconnection ---
+        await stream_registry.create_task(
+            task_id=task_id,
+            session_id=session_id,
+            user_id=user_id,
+            tool_call_id=tool_call_id,
+            tool_name=tool_name,
+            operation_id=operation_id,
+        )
+
+        # --- Save OperationPendingResponse to chat history ---
+        pending_message = ChatMessage(
+            role="tool",
+            content=OperationPendingResponse(
+                message=pending_msg,
+                operation_id=operation_id,
+                tool_name=tool_name,
+            ).model_dump_json(),
+            tool_call_id=tool_call_id,
+        )
+        session.messages.append(pending_message)
+        await upsert_chat_session(session)
+
+        # --- Spawn background task (reuses non-SDK infrastructure) ---
+        bg_task = asyncio.create_task(
+            _execute_long_running_tool_with_streaming(
+                tool_name=tool_name,
+                parameters=args,
+                tool_call_id=tool_call_id,
+                operation_id=operation_id,
+                task_id=task_id,
+                session_id=session_id,
+                user_id=user_id,
+            )
+        )
+        _background_tasks.add(bg_task)
+        bg_task.add_done_callback(_background_tasks.discard)
+        await stream_registry.set_task_asyncio_task(task_id, bg_task)
+
+        logger.info(
+            f"[SDK] Long-running tool {tool_name} delegated to background "
+            f"(operation_id={operation_id}, task_id={task_id})"
+        )
+
+        # --- Return OperationStartedResponse as MCP tool result ---
+        # This flows through SDK → response adapter → frontend, triggering
+        # the loading widget with SSE reconnection support.
+        started_json = OperationStartedResponse(
+            message=started_msg,
+            operation_id=operation_id,
+            tool_name=tool_name,
+            task_id=task_id,
+        ).model_dump_json()
+
+        return {
+            "content": [{"type": "text", "text": started_json}],
+            "isError": False,
+        }
+
+    return _callback
+
+
 def _resolve_sdk_model() -> str | None:
     """Resolve the model name for the Claude Agent SDK CLI.
 
@@ -339,7 +461,11 @@ async def stream_chat_completion_sdk(
     sdk_cwd = _make_sdk_cwd(session_id)
     os.makedirs(sdk_cwd, exist_ok=True)
 
-    set_execution_context(user_id, session)
+    set_execution_context(
+        user_id,
+        session,
+        long_running_callback=_build_long_running_callback(user_id),
+    )
 
     try:
         try:
diff --git a/autogpt_platform/backend/backend/api/features/chat/sdk/tool_adapter.py b/autogpt_platform/backend/backend/api/features/chat/sdk/tool_adapter.py
index 15f9a2cb4b..ad69f13874 100644
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/tool_adapter.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/tool_adapter.py
@@ -2,15 +2,19 @@
 
 This module provides the adapter layer that converts existing BaseTool implementations
 into in-process MCP tools that can be used with the Claude Agent SDK.
+
+Long-running tools (``is_long_running=True``) are delegated to the non-SDK
+background infrastructure (stream_registry, Redis persistence, SSE reconnection)
+via a callback provided by the service layer.  This avoids wasteful SDK polling
+and makes results survive page refreshes.
 """
 
-import asyncio
 import json
 import logging
 import os
 import uuid
+from collections.abc import Awaitable, Callable
 from contextvars import ContextVar
-from dataclasses import dataclass
 from typing import Any
 
 from backend.api.features.chat.model import ChatSession
@@ -40,37 +44,38 @@ _pending_tool_outputs: ContextVar[dict[str, str]] = ContextVar(
     "pending_tool_outputs", default=None  # type: ignore[arg-type]
 )
 
+# Callback type for delegating long-running tools to the non-SDK infrastructure.
+# Args: (tool_name, arguments, session) → MCP-formatted response dict.
+LongRunningCallback = Callable[
+    [str, dict[str, Any], ChatSession], Awaitable[dict[str, Any]]
+]
 
-@dataclass
-class _BackgroundOp:
-    """Tracks a background tool operation."""
-
-    tool_name: str
-    task: asyncio.Task[Any]
-    result: dict[str, Any] | None = None
-    done: bool = False
-
-
-# Module-level registry for background long-running operations.
-# Keyed by operation_id.  Cleaned up after result is consumed.
-_background_ops: dict[str, _BackgroundOp] = {}
-_background_ops_lock = asyncio.Lock()
-
-_CHECK_OP_TOOL_NAME = "check_operation"
+# ContextVar so the service layer can inject the callback per-request.
+_long_running_callback: ContextVar[LongRunningCallback | None] = ContextVar(
+    "long_running_callback", default=None
+)
 
 
 def set_execution_context(
     user_id: str | None,
     session: ChatSession,
+    long_running_callback: LongRunningCallback | None = None,
 ) -> None:
     """Set the execution context for tool calls.
 
     This must be called before streaming begins to ensure tools have access
     to user_id and session information.
+
+    Args:
+        user_id: Current user's ID.
+        session: Current chat session.
+        long_running_callback: Optional callback to delegate long-running tools
+            to the non-SDK background infrastructure (stream_registry + Redis).
     """
     _current_user_id.set(user_id)
     _current_session.set(session)
     _pending_tool_outputs.set({})
+    _long_running_callback.set(long_running_callback)
 
 
 def get_execution_context() -> tuple[str | None, ChatSession | None]:
@@ -142,9 +147,10 @@ def create_tool_handler(base_tool: BaseTool):
     This wraps the existing BaseTool._execute method to be compatible
     with the Claude Agent SDK MCP tool format.
 
-    Long-running tools (``is_long_running=True``) are spawned as background
-    tasks and return immediately with an ``operation_id``.  The SDK should
-    then poll ``check_operation`` to retrieve the result.
+    Long-running tools (``is_long_running=True``) are delegated to the
+    non-SDK background infrastructure via a callback set in the execution
+    context.  The callback persists the operation in Redis (stream_registry)
+    so results survive page refreshes and pod restarts.
     """
 
     async def tool_handler(args: dict[str, Any]) -> dict[str, Any]:
@@ -154,52 +160,23 @@ def create_tool_handler(base_tool: BaseTool):
         if session is None:
             return _mcp_error("No session context available")
 
-        # --- Long-running: fire-and-forget, return operation_id ---
+        # --- Long-running: delegate to non-SDK background infrastructure ---
         if base_tool.is_long_running:
-            op_id = f"op-{uuid.uuid4().hex[:12]}"
-
-            async def _bg_run() -> None:
+            callback = _long_running_callback.get(None)
+            if callback:
                 try:
-                    result = await _execute_tool_sync(base_tool, user_id, session, args)
-                    op = _background_ops.get(op_id)
-                    if op:
-                        op.result = result
-                        op.done = True
-                except Exception as exc:
-                    op = _background_ops.get(op_id)
-                    if op:
-                        op.result = _mcp_error(str(exc))
-                        op.done = True
+                    return await callback(base_tool.name, args, session)
+                except Exception as e:
                     logger.error(
-                        f"Background tool {base_tool.name} failed: {exc}",
+                        f"Long-running callback failed for {base_tool.name}: {e}",
                         exc_info=True,
                     )
-
-            task = asyncio.create_task(_bg_run())
-            _background_ops[op_id] = _BackgroundOp(tool_name=base_tool.name, task=task)
-            logger.info(
-                f"[SDK] Long-running tool {base_tool.name} started "
-                f"(operation_id={op_id})"
+                    return _mcp_error(f"Failed to start {base_tool.name}: {e}")
+            # No callback — fall through to synchronous execution
+            logger.warning(
+                f"[SDK] No long-running callback for {base_tool.name}, "
+                f"executing synchronously (may block)"
             )
-            return {
-                "content": [
-                    {
-                        "type": "text",
-                        "text": json.dumps(
-                            {
-                                "status": "started",
-                                "operation_id": op_id,
-                                "message": (
-                                    f"{base_tool.name} is running in the background. "
-                                    f"Call check_operation with "
-                                    f"operation_id='{op_id}' to get the result."
-                                ),
-                            }
-                        ),
-                    }
-                ],
-                "isError": False,
-            }
 
         # --- Normal (fast) tool: execute synchronously ---
         try:
@@ -255,58 +232,6 @@ async def _read_file_handler(args: dict[str, Any]) -> dict[str, Any]:
         }
 
 
-async def _check_operation_handler(args: dict[str, Any]) -> dict[str, Any]:
-    """Check the status of a background long-running operation."""
-    op_id = args.get("operation_id", "")
-    if not op_id or op_id not in _background_ops:
-        return _mcp_error(f"Operation '{op_id}' not found.")
-
-    op = _background_ops[op_id]
-    if not op.done:
-        return {
-            "content": [
-                {
-                    "type": "text",
-                    "text": json.dumps(
-                        {
-                            "status": "in_progress",
-                            "operation_id": op_id,
-                            "tool_name": op.tool_name,
-                            "message": (
-                                f"{op.tool_name} is still running. "
-                                "Check again in a few seconds."
-                            ),
-                        }
-                    ),
-                }
-            ],
-            "isError": False,
-        }
-
-    # Done — return result and clean up
-    result = op.result or _mcp_error("Operation completed but no result available.")
-    del _background_ops[op_id]
-    logger.info(f"[SDK] Background operation {op_id} ({op.tool_name}) collected")
-    return result
-
-
-_CHECK_OP_DESCRIPTION = (
-    "Check the status of a background operation started by a long-running tool "
-    "(like create_agent). Returns the result when done, or 'in_progress' if still "
-    "running. Call this periodically (every few seconds) after starting an operation."
-)
-_CHECK_OP_SCHEMA = {
-    "type": "object",
-    "properties": {
-        "operation_id": {
-            "type": "string",
-            "description": "The operation_id returned by the long-running tool.",
-        },
-    },
-    "required": ["operation_id"],
-}
-
-
 _READ_TOOL_NAME = "Read"
 _READ_TOOL_DESCRIPTION = (
     "Read a file from the local filesystem. "
@@ -365,14 +290,6 @@ def create_copilot_mcp_server():
         )(_read_file_handler)
         sdk_tools.append(read_tool)
 
-        # Add the check_operation tool for polling background operations
-        check_op_tool = tool(
-            _CHECK_OP_TOOL_NAME,
-            _CHECK_OP_DESCRIPTION,
-            _CHECK_OP_SCHEMA,
-        )(_check_operation_handler)
-        sdk_tools.append(check_op_tool)
-
         server = create_sdk_mcp_server(
             name=MCP_SERVER_NAME,
             version="1.0.0",
@@ -399,6 +316,5 @@ _SDK_BUILTIN_TOOLS = ["Read", "Write", "Edit", "Glob", "Grep", "Task"]
 COPILOT_TOOL_NAMES = [
     *[f"{MCP_TOOL_PREFIX}{name}" for name in TOOL_REGISTRY.keys()],
     f"{MCP_TOOL_PREFIX}{_READ_TOOL_NAME}",
-    f"{MCP_TOOL_PREFIX}{_CHECK_OP_TOOL_NAME}",
     *_SDK_BUILTIN_TOOLS,
 ]
diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/__init__.py b/autogpt_platform/backend/backend/api/features/chat/tools/__init__.py
index c0ed346a2d..9c9a51b2b4 100644
--- a/autogpt_platform/backend/backend/api/features/chat/tools/__init__.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/__init__.py
@@ -17,7 +17,6 @@ from .find_agent import FindAgentTool
 from .find_block import FindBlockTool
 from .find_library_agent import FindLibraryAgentTool
 from .get_doc_page import GetDocPageTool
-from .python_exec import PythonExecTool
 from .run_agent import RunAgentTool
 from .run_block import RunBlockTool
 from .search_docs import SearchDocsTool
@@ -50,8 +49,7 @@ TOOL_REGISTRY: dict[str, BaseTool] = {
     "get_doc_page": GetDocPageTool(),
     # Web fetch for safe URL retrieval
     "web_fetch": WebFetchTool(),
-    # Sandboxed code execution (network-isolated)
-    "python_exec": PythonExecTool(),
+    # Sandboxed code execution (bubblewrap)
     "bash_exec": BashExecTool(),
     # Workspace tools for CoPilot file operations
     "list_workspace_files": ListWorkspaceFilesTool(),
diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/bash_exec.py b/autogpt_platform/backend/backend/api/features/chat/tools/bash_exec.py
index 538cdeb26b..7bd74e124f 100644
--- a/autogpt_platform/backend/backend/api/features/chat/tools/bash_exec.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/bash_exec.py
@@ -1,12 +1,14 @@
-"""Bash execution tool — run shell commands in a network-isolated sandbox.
+"""Bash execution tool — run shell commands in a bubblewrap sandbox.
 
 Full Bash scripting is allowed (loops, conditionals, pipes, functions, etc.).
-Safety comes from kernel-level network isolation and workspace confinement,
-not from restricting language features.
+Safety comes from OS-level isolation (bubblewrap): only system dirs visible
+read-only, writable workspace only, clean env, no network.
+
+Requires bubblewrap (``bwrap``) — the tool is disabled when bwrap is not
+available (e.g. macOS development).
 """
 
 import logging
-import re
 from typing import Any
 
 from backend.api.features.chat.model import ChatSession
@@ -18,46 +20,15 @@ from backend.api.features.chat.tools.models import (
 )
 from backend.api.features.chat.tools.sandbox import (
     get_workspace_dir,
-    has_network_sandbox,
+    has_full_sandbox,
     run_sandboxed,
 )
 
 logger = logging.getLogger(__name__)
 
-# Destructive patterns blocked regardless of network sandbox
-_BLOCKED_PATTERNS: list[tuple[str, str]] = [
-    (r"rm\s+-[a-zA-Z]*r[a-zA-Z]*\s+/(?!\w)", "Recursive removal of root paths"),
-    (r"dd\s+.*of=/dev/", "Direct disk writes"),
-    (r"mkfs\b", "Filesystem formatting"),
-    (r":\(\)\s*\{", "Fork bomb"),
-    (r"\bshutdown\b|\breboot\b|\bhalt\b|\bpoweroff\b", "System power commands"),
-    (r"/dev/sd[a-z]|/dev/nvme|/dev/hd[a-z]", "Raw disk device access"),
-]
-
-# Commands blocked when kernel network sandbox is NOT available (fallback)
-_NETWORK_COMMANDS = {
-    "curl",
-    "wget",
-    "ssh",
-    "scp",
-    "sftp",
-    "rsync",
-    "nc",
-    "ncat",
-    "netcat",
-    "telnet",
-    "ftp",
-    "ping",
-    "traceroute",
-    "nslookup",
-    "dig",
-    "host",
-    "nmap",
-}
-
 
 class BashExecTool(BaseTool):
-    """Execute Bash commands in a sandboxed environment."""
+    """Execute Bash commands in a bubblewrap sandbox."""
 
     @property
     def name(self) -> str:
@@ -65,14 +36,21 @@ class BashExecTool(BaseTool):
 
     @property
     def description(self) -> str:
+        if not has_full_sandbox():
+            return (
+                "Bash execution is DISABLED — bubblewrap sandbox is not "
+                "available on this platform. Do not call this tool."
+            )
         return (
-            "Execute a Bash command or script in a sandboxed environment. "
-            "Full Bash scripting is supported (loops, conditionals, pipes, functions, etc.). "
-            "SECURITY: All internet/network access is blocked at the kernel level "
-            "(no curl, wget, nc, or any outbound connections). "
+            "Execute a Bash command or script in a bubblewrap sandbox. "
+            "Full Bash scripting is supported (loops, conditionals, pipes, "
+            "functions, etc.). "
+            "SECURITY: Only system directories (/usr, /bin, /lib, /etc) are "
+            "visible read-only, the per-session workspace is the only writable "
+            "path, environment variables are wiped (no secrets), and all "
+            "network access is blocked at the kernel level. Application code, "
+            "configs, and other directories are NOT accessible. "
             "To fetch web content, use the web_fetch tool instead. "
-            "Commands run in an isolated per-session workspace directory — "
-            "they cannot access files outside that directory. "
             "Execution is killed after the timeout (default 30s, max 120s). "
             "Returns stdout and stderr. "
             "Useful for file manipulation, data processing with Unix tools "
@@ -109,9 +87,17 @@ class BashExecTool(BaseTool):
         session: ChatSession,
         **kwargs: Any,
     ) -> ToolResponseBase:
+        session_id = session.session_id if session else None
+
+        if not has_full_sandbox():
+            return ErrorResponse(
+                message="bash_exec requires bubblewrap sandbox (Linux only).",
+                error="sandbox_unavailable",
+                session_id=session_id,
+            )
+
         command: str = (kwargs.get("command") or "").strip()
         timeout: int = kwargs.get("timeout", 30)
-        session_id = session.session_id if session else None
 
         if not command:
             return ErrorResponse(
@@ -120,29 +106,6 @@ class BashExecTool(BaseTool):
                 session_id=session_id,
             )
 
-        # Block destructive patterns
-        for pattern, reason in _BLOCKED_PATTERNS:
-            if re.search(pattern, command, re.IGNORECASE):
-                return ErrorResponse(
-                    message=f"Command blocked: {reason}",
-                    error="blocked_command",
-                    session_id=session_id,
-                )
-
-        # When kernel network sandbox unavailable, block network commands
-        if not has_network_sandbox():
-            words = set(re.findall(r"\b\w+\b", command))
-            blocked = words & _NETWORK_COMMANDS
-            if blocked:
-                return ErrorResponse(
-                    message=(
-                        f"Network commands not available: {', '.join(sorted(blocked))}. "
-                        "Use web_fetch instead."
-                    ),
-                    error="network_blocked",
-                    session_id=session_id,
-                )
-
         workspace = get_workspace_dir(session_id or "default")
 
         stdout, stderr, exit_code, timed_out = await run_sandboxed(
diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/models.py b/autogpt_platform/backend/backend/api/features/chat/tools/models.py
index 58ee3d1331..2319297c0a 100644
--- a/autogpt_platform/backend/backend/api/features/chat/tools/models.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/models.py
@@ -43,7 +43,6 @@ class ResponseType(str, Enum):
     # Web fetch
     WEB_FETCH = "web_fetch"
     # Code execution
-    PYTHON_EXEC = "python_exec"
     BASH_EXEC = "bash_exec"
 
 
@@ -445,16 +444,6 @@ class WebFetchResponse(ToolResponseBase):
     truncated: bool = False
 
 
-class PythonExecResponse(ToolResponseBase):
-    """Response for python_exec tool."""
-
-    type: ResponseType = ResponseType.PYTHON_EXEC
-    stdout: str
-    stderr: str
-    exit_code: int
-    timed_out: bool = False
-
-
 class BashExecResponse(ToolResponseBase):
     """Response for bash_exec tool."""
 
diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/python_exec.py b/autogpt_platform/backend/backend/api/features/chat/tools/python_exec.py
deleted file mode 100644
index 87aa7c0d4b..0000000000
--- a/autogpt_platform/backend/backend/api/features/chat/tools/python_exec.py
+++ /dev/null
@@ -1,162 +0,0 @@
-"""Python execution tool — run Python code in a network-isolated sandbox."""
-
-import logging
-import os
-from typing import Any
-
-from backend.api.features.chat.model import ChatSession
-from backend.api.features.chat.tools.base import BaseTool
-from backend.api.features.chat.tools.models import (
-    ErrorResponse,
-    PythonExecResponse,
-    ToolResponseBase,
-)
-from backend.api.features.chat.tools.sandbox import (
-    get_workspace_dir,
-    has_network_sandbox,
-    run_sandboxed,
-)
-
-logger = logging.getLogger(__name__)
-
-# Modules blocked via import hook when kernel network sandbox is unavailable
-_BLOCKED_MODULES = {
-    "socket",
-    "ssl",
-    "http",
-    "urllib",
-    "requests",
-    "httpx",
-    "aiohttp",
-    "ftplib",
-    "smtplib",
-    "poplib",
-    "imaplib",
-    "telnetlib",
-    "xmlrpc",
-    "subprocess",
-    "ctypes",
-    "multiprocessing",
-}
-
-# Security prelude injected before user code (only when unshare unavailable)
-_SECURITY_PRELUDE = """\
-import builtins as _b
-_BLOCKED = {blocked}
-_orig = _b.__import__
-def _si(name, *a, **k):
-    if name.split(".")[0] in _BLOCKED:
-        raise ImportError(f"Module '{{name}}' is not available in the sandbox")
-    return _orig(name, *a, **k)
-_b.__import__ = _si
-import os as _os
-_os.system = lambda *a, **k: (_ for _ in ()).throw(
-    PermissionError("os.system is blocked")
-)
-_os.popen = lambda *a, **k: (_ for _ in ()).throw(
-    PermissionError("os.popen is blocked")
-)
-del _b, _BLOCKED, _orig, _si, _os
-"""
-
-
-class PythonExecTool(BaseTool):
-    """Execute Python code in a sandboxed environment."""
-
-    @property
-    def name(self) -> str:
-        return "python_exec"
-
-    @property
-    def description(self) -> str:
-        return (
-            "Execute Python code in a sandboxed environment. "
-            "SECURITY: All internet/network access is blocked at the kernel level "
-            "(no HTTP, sockets, DNS, or any outbound connections). "
-            "To fetch web content, use the web_fetch tool instead. "
-            "Code runs in an isolated per-session workspace directory — "
-            "it cannot read or write files outside that directory. "
-            "Execution is killed after the timeout (default 30s, max 120s). "
-            "Returns stdout and stderr. "
-            "Useful for data processing, calculations, text manipulation, "
-            "JSON/CSV parsing, and generating files in the workspace."
-        )
-
-    @property
-    def parameters(self) -> dict[str, Any]:
-        return {
-            "type": "object",
-            "properties": {
-                "code": {
-                    "type": "string",
-                    "description": "Python code to execute.",
-                },
-                "timeout": {
-                    "type": "integer",
-                    "description": (
-                        "Max execution time in seconds (default 30, max 120)."
-                    ),
-                    "default": 30,
-                },
-            },
-            "required": ["code"],
-        }
-
-    @property
-    def requires_auth(self) -> bool:
-        return False
-
-    async def _execute(
-        self,
-        user_id: str | None,
-        session: ChatSession,
-        **kwargs: Any,
-    ) -> ToolResponseBase:
-        code: str = (kwargs.get("code") or "").strip()
-        timeout: int = kwargs.get("timeout", 30)
-        session_id = session.session_id if session else None
-
-        if not code:
-            return ErrorResponse(
-                message="No code provided.",
-                error="empty_code",
-                session_id=session_id,
-            )
-
-        workspace = get_workspace_dir(session_id or "default")
-
-        # Add security prelude when kernel network isolation is unavailable
-        if not has_network_sandbox():
-            prelude = _SECURITY_PRELUDE.format(blocked=repr(_BLOCKED_MODULES))
-            full_code = prelude + "\n" + code
-        else:
-            full_code = code
-
-        script_path = os.path.join(workspace, "_exec.py")
-        try:
-            with open(script_path, "w") as f:
-                f.write(full_code)
-
-            stdout, stderr, exit_code, timed_out = await run_sandboxed(
-                command=["python3", "-I", "-u", script_path],
-                cwd=workspace,
-                timeout=timeout,
-            )
-
-            return PythonExecResponse(
-                message=(
-                    "Execution timed out"
-                    if timed_out
-                    else f"Code executed (exit {exit_code})"
-                ),
-                stdout=stdout,
-                stderr=stderr,
-                exit_code=exit_code,
-                timed_out=timed_out,
-                session_id=session_id,
-            )
-        finally:
-            try:
-                os.unlink(script_path)
-            except OSError:
-                pass
diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/sandbox.py b/autogpt_platform/backend/backend/api/features/chat/tools/sandbox.py
index 9ac56eda20..0fea6728e0 100644
--- a/autogpt_platform/backend/backend/api/features/chat/tools/sandbox.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/sandbox.py
@@ -1,7 +1,11 @@
 """Sandbox execution utilities for code execution tools.
 
-Provides network-isolated command execution using Linux ``unshare --net``
-(kernel-level, no bypass possible) with a fallback for development on macOS.
+Provides filesystem + network isolated command execution using **bubblewrap**
+(``bwrap``): whitelist-only filesystem (only system dirs visible read-only),
+writable workspace only, clean environment, network blocked.
+
+Tools that call :func:`run_sandboxed` must first check :func:`has_full_sandbox`
+and refuse to run if bubblewrap is not available.
 """
 
 import asyncio
@@ -18,23 +22,24 @@ _DEFAULT_TIMEOUT = 30
 _MAX_TIMEOUT = 120
 
 
-def _check_unshare() -> bool:
-    """Check if ``unshare --net`` is available for kernel-level network isolation."""
-    if platform.system() != "Linux":
-        return False
-    return shutil.which("unshare") is not None
+# ---------------------------------------------------------------------------
+# Sandbox capability detection (cached at first call)
+# ---------------------------------------------------------------------------
+
+_BWRAP_AVAILABLE: bool | None = None
 
 
-# Cached at import time so we don't shell out on every call
-_UNSHARE_AVAILABLE: bool | None = None
+def has_full_sandbox() -> bool:
+    """Return True if bubblewrap is available (filesystem + network isolation).
 
-
-def has_network_sandbox() -> bool:
-    """Return True if kernel-level network isolation is available."""
-    global _UNSHARE_AVAILABLE
-    if _UNSHARE_AVAILABLE is None:
-        _UNSHARE_AVAILABLE = _check_unshare()
-    return _UNSHARE_AVAILABLE
+    On non-Linux platforms (macOS), always returns False.
+    """
+    global _BWRAP_AVAILABLE
+    if _BWRAP_AVAILABLE is None:
+        _BWRAP_AVAILABLE = (
+            platform.system() == "Linux" and shutil.which("bwrap") is not None
+        )
+    return _BWRAP_AVAILABLE
 
 
 WORKSPACE_PREFIX = "/tmp/copilot-"
@@ -70,30 +75,122 @@ def get_workspace_dir(session_id: str) -> str:
     """Get or create the workspace directory for a session.
 
     Uses :func:`make_session_path` — the same path the SDK uses — so that
-    python_exec / bash_exec share the workspace with the SDK file tools.
+    bash_exec shares the workspace with the SDK file tools.
     """
     workspace = make_session_path(session_id)
     os.makedirs(workspace, exist_ok=True)
     return workspace
 
 
+# ---------------------------------------------------------------------------
+# Bubblewrap command builder
+# ---------------------------------------------------------------------------
+
+# System directories mounted read-only inside the sandbox.
+# ONLY these are visible — /app, /root, /home, /opt, /var etc. are NOT accessible.
+_SYSTEM_RO_BINDS = [
+    "/usr",  # binaries, libraries, Python interpreter
+    "/etc",  # system config: ld.so, locale, passwd, alternatives
+]
+
+# Symlinks to /usr/* on modern Debian, may be real dirs on older systems.
+_COMPAT_RO_BINDS = [
+    "/bin",  # -> /usr/bin on Debian 13
+    "/sbin",  # -> /usr/sbin on Debian 13
+    "/lib",  # -> /usr/lib on Debian 13
+    "/lib64",  # 64-bit libraries (may not exist)
+]
+
+
+def _build_bwrap_command(
+    command: list[str], cwd: str, env: dict[str, str]
+) -> list[str]:
+    """Build a bubblewrap command with strict filesystem + network isolation.
+
+    Security model:
+    - **Whitelist-only filesystem**: only system directories (``/usr``, ``/etc``,
+      ``/bin``, ``/lib``) are mounted read-only.  Application code (``/app``),
+      home directories, ``/var``, ``/opt``, etc. are NOT accessible at all.
+    - **Writable workspace only**: the per-session workspace is the sole
+      writable path.
+    - **Clean environment**: ``--clearenv`` wipes all inherited env vars.
+      Only the explicitly-passed safe env vars are set inside the sandbox.
+    - **Network isolation**: ``--unshare-net`` blocks all network access.
+    - **New session**: prevents terminal control escape.
+    - **Die with parent**: prevents orphaned sandbox processes.
+    """
+    cmd = [
+        "bwrap",
+        # Wipe all inherited environment variables (API keys, secrets, etc.)
+        "--clearenv",
+    ]
+
+    # Set only the safe env vars inside the sandbox
+    for key, value in env.items():
+        cmd.extend(["--setenv", key, value])
+
+    # System directories: read-only
+    for path in _SYSTEM_RO_BINDS:
+        cmd.extend(["--ro-bind", path, path])
+
+    # Compat paths: bind only if they exist on the host
+    for path in _COMPAT_RO_BINDS:
+        if os.path.exists(path):
+            cmd.extend(["--ro-bind", path, path])
+
+    cmd.extend(
+        [
+            # Writable workspace only
+            "--bind",
+            cwd,
+            cwd,
+            # Fresh virtual filesystems
+            "--dev",
+            "/dev",
+            "--proc",
+            "/proc",
+            "--tmpdir",
+            "/tmp",
+            # Isolation
+            "--unshare-net",
+            "--die-with-parent",
+            "--new-session",
+            "--chdir",
+            cwd,
+            "--",
+            *command,
+        ]
+    )
+
+    return cmd
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
 async def run_sandboxed(
     command: list[str],
     cwd: str,
     timeout: int = _DEFAULT_TIMEOUT,
     env: dict[str, str] | None = None,
 ) -> tuple[str, str, int, bool]:
-    """Run a command in a sandboxed environment.
+    """Run a command inside a bubblewrap sandbox.
+
+    Callers **must** check :func:`has_full_sandbox` before calling this
+    function.  If bubblewrap is not available, this function raises
+    :class:`RuntimeError` rather than running unsandboxed.
 
     Returns:
         (stdout, stderr, exit_code, timed_out)
-
-    Security layers:
-    - Network isolation via ``unshare --net`` (Linux)
-    - Restricted working directory
-    - Minimal environment variables
-    - Hard timeout
     """
+    if not has_full_sandbox():
+        raise RuntimeError(
+            "run_sandboxed() requires bubblewrap but bwrap is not available. "
+            "Callers must check has_full_sandbox() before calling this function."
+        )
+
     timeout = min(max(timeout, 1), _MAX_TIMEOUT)
 
     safe_env = {
@@ -107,11 +204,7 @@ async def run_sandboxed(
     if env:
         safe_env.update(env)
 
-    # Wrap with unshare --net on Linux for kernel-level network isolation
-    if has_network_sandbox():
-        full_command = ["unshare", "--net", *command]
-    else:
-        full_command = command
+    full_command = _build_bwrap_command(command, cwd, safe_env)
 
     try:
         proc = await asyncio.create_subprocess_exec(
@@ -134,5 +227,7 @@ async def run_sandboxed(
             await proc.communicate()
             return "", f"Execution timed out after {timeout}s", -1, True
 
+    except RuntimeError:
+        raise
     except Exception as e:
         return "", f"Sandbox error: {e}", -1, False
diff --git a/autogpt_platform/backend/test/chat/__init__.py b/autogpt_platform/backend/test/chat/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/autogpt_platform/backend/test/chat/test_security_hooks.py b/autogpt_platform/backend/test/chat/test_security_hooks.py
new file mode 100644
index 0000000000..f10a90871b
--- /dev/null
+++ b/autogpt_platform/backend/test/chat/test_security_hooks.py
@@ -0,0 +1,133 @@
+"""Tests for SDK security hooks — workspace paths, tool access, and deny messages.
+
+These are pure unit tests with no external dependencies (no SDK, no DB, no server).
+They validate that the security hooks correctly block unauthorized paths,
+tool access, and dangerous input patterns.
+
+Note: Bash command validation was removed — the SDK built-in Bash tool is not in
+allowed_tools, and the bash_exec MCP tool has kernel-level network isolation
+(unshare --net) making command-level parsing unnecessary.
+"""
+
+from backend.api.features.chat.sdk.security_hooks import (
+    _validate_tool_access,
+    _validate_workspace_path,
+)
+
+SDK_CWD = "/tmp/copilot-test-session"
+
+
+def _is_denied(result: dict) -> bool:
+    hook = result.get("hookSpecificOutput", {})
+    return hook.get("permissionDecision") == "deny"
+
+
+def _reason(result: dict) -> str:
+    return result.get("hookSpecificOutput", {}).get("permissionDecisionReason", "")
+
+
+# ============================================================
+# Workspace path validation (Read, Write, Edit, etc.)
+# ============================================================
+
+
+class TestWorkspacePathValidation:
+    def test_path_in_workspace(self):
+        result = _validate_workspace_path(
+            "Read", {"file_path": f"{SDK_CWD}/file.txt"}, SDK_CWD
+        )
+        assert not _is_denied(result)
+
+    def test_path_outside_workspace(self):
+        result = _validate_workspace_path("Read", {"file_path": "/etc/passwd"}, SDK_CWD)
+        assert _is_denied(result)
+
+    def test_tool_results_allowed(self):
+        result = _validate_workspace_path(
+            "Read",
+            {"file_path": "~/.claude/projects/abc/tool-results/out.txt"},
+            SDK_CWD,
+        )
+        assert not _is_denied(result)
+
+    def test_claude_settings_blocked(self):
+        result = _validate_workspace_path(
+            "Read", {"file_path": "~/.claude/settings.json"}, SDK_CWD
+        )
+        assert _is_denied(result)
+
+    def test_claude_projects_without_tool_results(self):
+        result = _validate_workspace_path(
+            "Read", {"file_path": "~/.claude/projects/abc/credentials.json"}, SDK_CWD
+        )
+        assert _is_denied(result)
+
+    def test_no_path_allowed(self):
+        """Glob/Grep without path defaults to cwd — should be allowed."""
+        result = _validate_workspace_path("Grep", {"pattern": "foo"}, SDK_CWD)
+        assert not _is_denied(result)
+
+    def test_path_traversal_with_dotdot(self):
+        result = _validate_workspace_path(
+            "Read", {"file_path": f"{SDK_CWD}/../../../etc/passwd"}, SDK_CWD
+        )
+        assert _is_denied(result)
+
+
+# ============================================================
+# Tool access validation
+# ============================================================
+
+
+class TestToolAccessValidation:
+    def test_blocked_tools(self):
+        for tool in ("bash", "shell", "exec", "terminal", "command"):
+            result = _validate_tool_access(tool, {})
+            assert _is_denied(result), f"Tool '{tool}' should be blocked"
+
+    def test_bash_builtin_blocked(self):
+        """SDK built-in Bash (capital) is blocked as defence-in-depth."""
+        result = _validate_tool_access("Bash", {"command": "echo hello"}, SDK_CWD)
+        assert _is_denied(result)
+        assert "Bash" in _reason(result)
+
+    def test_workspace_tools_delegate(self):
+        result = _validate_tool_access(
+            "Read", {"file_path": f"{SDK_CWD}/file.txt"}, SDK_CWD
+        )
+        assert not _is_denied(result)
+
+    def test_dangerous_pattern_blocked(self):
+        result = _validate_tool_access("SomeUnknownTool", {"data": "sudo rm -rf /"})
+        assert _is_denied(result)
+
+    def test_safe_unknown_tool_allowed(self):
+        result = _validate_tool_access("SomeSafeTool", {"data": "hello world"})
+        assert not _is_denied(result)
+
+
+# ============================================================
+# Deny message quality (ntindle feedback)
+# ============================================================
+
+
+class TestDenyMessageClarity:
+    """Deny messages must include [SECURITY] and 'cannot be bypassed'
+    so the model knows the restriction is enforced, not a suggestion."""
+
+    def test_blocked_tool_message(self):
+        reason = _reason(_validate_tool_access("bash", {}))
+        assert "[SECURITY]" in reason
+        assert "cannot be bypassed" in reason
+
+    def test_bash_builtin_blocked_message(self):
+        reason = _reason(_validate_tool_access("Bash", {"command": "echo hello"}))
+        assert "[SECURITY]" in reason
+        assert "cannot be bypassed" in reason
+
+    def test_workspace_path_message(self):
+        reason = _reason(
+            _validate_workspace_path("Read", {"file_path": "/etc/passwd"}, SDK_CWD)
+        )
+        assert "[SECURITY]" in reason
+        assert "cannot be bypassed" in reason