fix(backend/copilot): respect CLAUDE_CONFIG_DIR in SDK_PROJECTS_DIR constant

SDK_PROJECTS_DIR was hardcoded to ~/.claude/projects, ignoring the CLAUDE_CONFIG_DIR environment variable. This caused path validation mismatches in environments with custom Claude configurations. Now consistent with transcript.py's _projects_base() function.
fix(backend/copilot): update test mock to use get_workspace_manager
2026-03-17 03:00:27 -04:00 · 2026-03-17 13:49:45 +07:00 · 2026-03-17 12:00:34 +07:00 · 2026-03-17 07:11:15 +07:00 · 2026-03-16 06:27:43 +07:00 · 2026-03-16 06:15:59 +07:00
38 changed files with 1668 additions and 511 deletions
--- a/autogpt_platform/backend/backend/copilot/context.py
+++ b/autogpt_platform/backend/backend/copilot/context.py
@@ -17,8 +17,17 @@ from backend.util.workspace import WorkspaceManager
 if TYPE_CHECKING:
    from e2b import AsyncSandbox

-# Allowed base directory for the Read tool.
-_SDK_PROJECTS_DIR = os.path.realpath(os.path.expanduser("~/.claude/projects"))
+# Allowed base directory for the Read tool.  Public so service.py can use it
+# for sweep operations without depending on a private implementation detail.
+# Respects CLAUDE_CONFIG_DIR env var, consistent with transcript.py's
+# _projects_base() function.
+_config_dir = os.environ.get("CLAUDE_CONFIG_DIR") or os.path.expanduser("~/.claude")
+SDK_PROJECTS_DIR = os.path.realpath(os.path.join(_config_dir, "projects"))
+
+# Compiled UUID pattern for validating conversation directory names.
+# Kept as a module-level constant so the security-relevant pattern is easy
+# to audit in one place and avoids recompilation on every call.
+_UUID_RE = re.compile(r"^[0-9a-f]{8}(?:-[0-9a-f]{4}){3}-[0-9a-f]{12}$", re.IGNORECASE)

 # Encoded project-directory name for the current session (e.g.
 # "-private-tmp-copilot-<uuid>").  Set by set_execution_context() so path
@@ -35,11 +44,20 @@ _current_sandbox: ContextVar["AsyncSandbox | None"] = ContextVar(
 _current_sdk_cwd: ContextVar[str] = ContextVar("_current_sdk_cwd", default="")


-def _encode_cwd_for_cli(cwd: str) -> str:
-    """Encode a working directory path the same way the Claude CLI does."""
+def encode_cwd_for_cli(cwd: str) -> str:
+    """Encode a working directory path the same way the Claude CLI does.
+
+    The Claude CLI encodes the absolute cwd as a directory name by replacing
+    every non-alphanumeric character with ``-``.  For example
+    ``/tmp/copilot-abc`` becomes ``-tmp-copilot-abc``.
+    """
    return re.sub(r"[^a-zA-Z0-9]", "-", os.path.realpath(cwd))


+# Keep the private alias for internal callers (backwards compat).
+_encode_cwd_for_cli = encode_cwd_for_cli
+
+
 def set_execution_context(
    user_id: str | None,
    session: ChatSession,
@@ -100,7 +118,9 @@ def is_allowed_local_path(path: str, sdk_cwd: str | None = None) -> bool:

    Allowed:
    - Files under *sdk_cwd* (``/tmp/copilot-<session>/``)
-    - Files under ``~/.claude/projects/<encoded-cwd>/tool-results/`` (SDK tool-results)
+    - Files under ``~/.claude/projects/<encoded-cwd>/<uuid>/tool-results/...``.
+      The SDK nests tool-results under a conversation UUID directory;
+      the UUID segment is validated with ``_UUID_RE``.
    """
    if not path:
        return False
@@ -119,10 +139,22 @@ def is_allowed_local_path(path: str, sdk_cwd: str | None = None) -> bool:

    encoded = _current_project_dir.get("")
    if encoded:
-        tool_results_dir = os.path.join(_SDK_PROJECTS_DIR, encoded, "tool-results")
-        if resolved == tool_results_dir or resolved.startswith(
-            tool_results_dir + os.sep
-        ):
-            return True
+        project_dir = os.path.realpath(os.path.join(SDK_PROJECTS_DIR, encoded))
+        # Defence-in-depth: ensure project_dir didn't escape the base.
+        if not project_dir.startswith(SDK_PROJECTS_DIR + os.sep):
+            return False
+        # Only allow: <encoded-cwd>/<uuid>/tool-results/<file>
+        # The SDK always creates a conversation UUID directory between
+        # the project dir and tool-results/.
+        if resolved.startswith(project_dir + os.sep):
+            relative = resolved[len(project_dir) + 1 :]
+            parts = relative.split(os.sep)
+            # Require exactly: [<uuid>, "tool-results", <file>, ...]
+            if (
+                len(parts) >= 3
+                and _UUID_RE.match(parts[0])
+                and parts[1] == "tool-results"
+            ):
+                return True

    return False
--- a/autogpt_platform/backend/backend/copilot/context_test.py
+++ b/autogpt_platform/backend/backend/copilot/context_test.py
@@ -9,7 +9,7 @@ from unittest.mock import MagicMock
 import pytest

 from backend.copilot.context import (
-    _SDK_PROJECTS_DIR,
+    SDK_PROJECTS_DIR,
    _current_project_dir,
    get_current_sandbox,
    get_execution_context,
@@ -104,11 +104,13 @@ def test_is_allowed_local_path_no_sdk_cwd_no_project_dir():
    assert not is_allowed_local_path("/tmp/some-file.txt", sdk_cwd=None)


-def test_is_allowed_local_path_tool_results_dir():
-    """Files under the tool-results directory for the current project are allowed."""
+def test_is_allowed_local_path_tool_results_with_uuid():
+    """Files under <encoded-cwd>/<uuid>/tool-results/ are allowed."""
    encoded = "test-encoded-dir"
-    tool_results_dir = os.path.join(_SDK_PROJECTS_DIR, encoded, "tool-results")
-    path = os.path.join(tool_results_dir, "output.txt")
+    conv_uuid = "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
+    path = os.path.join(
+        SDK_PROJECTS_DIR, encoded, conv_uuid, "tool-results", "output.txt"
+    )

    _current_project_dir.set(encoded)
    try:
@@ -117,10 +119,22 @@ def test_is_allowed_local_path_tool_results_dir():
        _current_project_dir.set("")


+def test_is_allowed_local_path_tool_results_without_uuid_rejected():
+    """Direct <encoded-cwd>/tool-results/ (no UUID) is rejected."""
+    encoded = "test-encoded-dir"
+    path = os.path.join(SDK_PROJECTS_DIR, encoded, "tool-results", "output.txt")
+
+    _current_project_dir.set(encoded)
+    try:
+        assert not is_allowed_local_path(path, sdk_cwd=None)
+    finally:
+        _current_project_dir.set("")
+
+
 def test_is_allowed_local_path_sibling_of_tool_results_is_rejected():
    """A path adjacent to tool-results/ but not inside it is rejected."""
    encoded = "test-encoded-dir"
-    sibling_path = os.path.join(_SDK_PROJECTS_DIR, encoded, "other-dir", "file.txt")
+    sibling_path = os.path.join(SDK_PROJECTS_DIR, encoded, "other-dir", "file.txt")

    _current_project_dir.set(encoded)
    try:
@@ -129,6 +143,21 @@ def test_is_allowed_local_path_sibling_of_tool_results_is_rejected():
        _current_project_dir.set("")


+def test_is_allowed_local_path_valid_uuid_wrong_segment_name_rejected():
+    """A valid UUID dir but non-'tool-results' second segment is rejected."""
+    encoded = "test-encoded-dir"
+    uuid_str = "12345678-1234-5678-9abc-def012345678"
+    path = os.path.join(
+        SDK_PROJECTS_DIR, encoded, uuid_str, "not-tool-results", "output.txt"
+    )
+
+    _current_project_dir.set(encoded)
+    try:
+        assert not is_allowed_local_path(path, sdk_cwd=None)
+    finally:
+        _current_project_dir.set("")
+
+
 # ---------------------------------------------------------------------------
 # resolve_sandbox_path
 # ---------------------------------------------------------------------------
--- a/autogpt_platform/backend/backend/copilot/prompting.py
+++ b/autogpt_platform/backend/backend/copilot/prompting.py
@@ -11,18 +11,34 @@ from backend.copilot.tools import TOOL_REGISTRY
 # Shared technical notes that apply to both SDK and baseline modes
 _SHARED_TOOL_NOTES = """\

-### Sharing files
-After `write_workspace_file`, embed the `download_url` in Markdown:
- File: `[report.csv](workspace://file_id#text/csv)`
- Image: `![chart](workspace://file_id#image/png)`
- Video: `![recording](workspace://file_id#video/mp4)`
+### Sharing files with the user
+After saving a file to the persistent workspace with `write_workspace_file`,
+share it with the user by embedding the `download_url` from the response in
+your message as a Markdown link or image:

-### File references — @@agptfile:
-Pass large file content to tools by reference: `@@agptfile:<uri>[<start>-<end>]`
- `workspace://<file_id>` or `workspace:///<path>` — workspace files
- `/absolute/path` — local/sandbox files
- `[start-end]` — optional 1-indexed line range
- Multiple refs per argument supported. Only `workspace://` and absolute paths are expanded.
+- **Any file** — shows as a clickable download link:
+  `[report.csv](workspace://file_id#text/csv)`
+- **Image** — renders inline in chat:
+  `![chart](workspace://file_id#image/png)`
+- **Video** — renders inline in chat with player controls:
+  `![recording](workspace://file_id#video/mp4)`
+
+The `download_url` field in the `write_workspace_file` response is already
+in the correct format — paste it directly after the `(` in the Markdown.
+
+### Passing file content to tools — @@agptfile: references
+Instead of copying large file contents into a tool argument, pass a file
+reference and the platform will load the content for you.
+
+Syntax: `@@agptfile:<uri>[<start>-<end>]`
+
+- `<uri>` **must** start with `workspace://` or `/` (absolute path):
+  - `workspace://<file_id>` — workspace file by ID
+  - `workspace:///<path>` — workspace file by virtual path
+  - `/absolute/local/path` — ephemeral or sdk_cwd file
+  - E2B sandbox absolute path (e.g. `/home/user/script.py`)
+- `[<start>-<end>]` is an optional 1-indexed inclusive line range.
+- URIs that do not start with `workspace://` or `/` are **not** expanded.

 Examples:
 ```
@@ -33,16 +49,50 @@ Examples:
@@agptfile:/home/user/script.py
 ```

-**Structured data**: When the entire argument is a single file reference, the platform auto-parses by extension/MIME. Supported: JSON, JSONL, CSV, TSV, YAML, TOML, Parquet, Excel (.xlsx only). Unrecognised formats return plain string.
+You can embed a reference inside any string argument, or use it as the entire
+value.  Multiple references in one argument are all expanded.

-**Type coercion**: The platform auto-coerces expanded string values to match block input types (e.g. JSON string → `list[list[str]]`).
+**Structured data**: When the **entire** argument value is a single file
+reference (no surrounding text), the platform automatically parses the file
+content based on its extension or MIME type.  Supported formats: JSON, JSONL,
+CSV, TSV, YAML, TOML, Parquet, and Excel (.xlsx — first sheet only).
+For example, pass `@@agptfile:workspace://<id>` where the file is a `.csv` and
+the rows will be parsed into `list[list[str]]` automatically.  If the format is
+unrecognised or parsing fails, the content is returned as a plain string.
+Legacy `.xls` files are **not** supported — only the modern `.xlsx` format.
+
+**Type coercion**: The platform also coerces expanded values to match the
+block's expected input types.  For example, if a block expects `list[list[str]]`
+and the expanded value is a JSON string, it will be parsed into the correct type.

 ### Media file inputs (format: "file")
-Inputs with `"format": "file"` accept `workspace://<file_id>` or `data:<mime>;base64,<payload>`.
-Pass the `workspace://` URI directly (do NOT wrap in `@@agptfile:`). This avoids large payloads and preserves binary content.
+Some block inputs accept media files — their schema shows `"format": "file"`.
+These fields accept:
+- **`workspace://<file_id>`** or **`workspace://<file_id>#<mime>`** — preferred
+  for large files (images, videos, PDFs). The platform passes the reference
+  directly to the block without reading the content into memory.
+- **`data:<mime>;base64,<payload>`** — inline base64 data URI, suitable for
+  small files only.
+
+When a block input has `format: "file"`, **pass the `workspace://` URI
+directly as the value** (do NOT wrap it in `@@agptfile:`). This avoids large
+payloads in tool arguments and preserves binary content (images, videos)
+that would be corrupted by text encoding.
+
+Example — committing an image file to GitHub:
+```json
+{
+  "files": [{
+    "path": "docs/hero.png",
+    "content": "workspace://abc123#image/png",
+    "operation": "upsert"
+  }]
+}
+```

 ### Sub-agent tasks
- Task tool: NEVER set `run_in_background` to true.
+- When using the Task tool, NEVER set `run_in_background` to true.
+  All tasks must run in the foreground.
 """


@@ -78,18 +128,37 @@ def _build_storage_supplement(

 ## Tool notes

-### Shell & filesystem
- Use `bash_exec` for shell commands ({sandbox_type}). Working dir: `{working_dir}`
- All file tools share the same filesystem. Use relative or absolute paths under this dir.
+### Shell commands
+- The SDK built-in Bash tool is NOT available.  Use the `bash_exec` MCP tool
+  for shell commands — it runs {sandbox_type}.
+
+### Working directory
+- Your working directory is: `{working_dir}`
+- All SDK file tools AND `bash_exec` operate on the same filesystem
+- Use relative paths or absolute paths under `{working_dir}` for all file operations
+
+### Two storage systems — CRITICAL to understand

-### Storage — important
 1. **{storage_system_1_name}** (`{working_dir}`):
 {characteristics}
 {persistence}
-2. **Persistent workspace** (cloud) — survives across sessions.
-   - {file_move_name_1_to_2}: use `write_workspace_file`
-   - {file_move_name_2_to_1}: use `read_workspace_file` with save_to_path
-   - Save important files to workspace for persistence.
+
+2. **Persistent workspace** (cloud storage):
+   - Files here **survive across sessions indefinitely**
+
+### Moving files between storages
+- **{file_move_name_1_to_2}**: Copy to persistent workspace
+- **{file_move_name_2_to_1}**: Download for processing
+
+### File persistence
+Important files (code, configs, outputs) should be saved to workspace to ensure they persist.
+
+### SDK tool-result files
+When tool outputs are large, the SDK truncates them and saves the full output to
+a local file under `~/.claude/projects/.../tool-results/`. To read these files,
+always use `read_file` or `Read` (NOT `read_workspace_file`).
+`read_workspace_file` reads from cloud workspace storage, where SDK
+tool-results are NOT stored.
 {_SHARED_TOOL_NOTES}"""


--- a/autogpt_platform/backend/backend/copilot/sdk/e2b_file_tools.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/e2b_file_tools.py
@@ -26,6 +26,41 @@ from backend.copilot.context import (
 logger = logging.getLogger(__name__)


+async def _check_sandbox_symlink_escape(
+    sandbox: Any,
+    parent: str,
+) -> str | None:
+    """Resolve the canonical parent path inside the sandbox to detect symlink escapes.
+
+    ``normpath`` (used by ``resolve_sandbox_path``) only normalises the string;
+    ``readlink -f`` follows actual symlinks on the sandbox filesystem.
+
+    Returns the canonical parent path, or ``None`` if the path escapes
+    ``E2B_WORKDIR``.
+
+    Note: There is an inherent TOCTOU window between this check and the
+    subsequent ``sandbox.files.write()``.  A symlink could theoretically be
+    replaced between the two operations.  This is acceptable in the E2B
+    sandbox model since the sandbox is single-user and ephemeral.
+    """
+    canonical_res = await sandbox.commands.run(
+        f"readlink -f {shlex.quote(parent or E2B_WORKDIR)}",
+        cwd=E2B_WORKDIR,
+        timeout=5,
+    )
+    canonical_parent = (canonical_res.stdout or "").strip()
+    if (
+        canonical_res.exit_code != 0
+        or not canonical_parent
+        or (
+            canonical_parent != E2B_WORKDIR
+            and not canonical_parent.startswith(E2B_WORKDIR + "/")
+        )
+    ):
+        return None
+    return canonical_parent
+
+
 def _get_sandbox():
    return get_current_sandbox()

@@ -106,6 +141,10 @@ async def _handle_write_file(args: dict[str, Any]) -> dict[str, Any]:
        parent = os.path.dirname(remote)
        if parent and parent != E2B_WORKDIR:
            await sandbox.files.make_dir(parent)
+        canonical_parent = await _check_sandbox_symlink_escape(sandbox, parent)
+        if canonical_parent is None:
+            return _mcp(f"Path must be within {E2B_WORKDIR}: {parent}", error=True)
+        remote = os.path.join(canonical_parent, os.path.basename(remote))
        await sandbox.files.write(remote, content)
    except Exception as exc:
        return _mcp(f"Failed to write {remote}: {exc}", error=True)
@@ -130,6 +169,12 @@ async def _handle_edit_file(args: dict[str, Any]) -> dict[str, Any]:
        return result
    sandbox, remote = result

+    parent = os.path.dirname(remote)
+    canonical_parent = await _check_sandbox_symlink_escape(sandbox, parent)
+    if canonical_parent is None:
+        return _mcp(f"Path must be within {E2B_WORKDIR}: {parent}", error=True)
+    remote = os.path.join(canonical_parent, os.path.basename(remote))
+
    try:
        raw: bytes = await sandbox.files.read(remote, format="bytes")
        content = raw.decode("utf-8", errors="replace")
--- a/autogpt_platform/backend/backend/copilot/sdk/e2b_file_tools_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/e2b_file_tools_test.py
@@ -4,15 +4,19 @@ Pure unit tests with no external dependencies (no E2B, no sandbox).
 """

 import os
+import shutil
+from types import SimpleNamespace
+from unittest.mock import AsyncMock

 import pytest

-from backend.copilot.context import _current_project_dir
-
-from .e2b_file_tools import _read_local, resolve_sandbox_path
-
-_SDK_PROJECTS_DIR = os.path.realpath(os.path.expanduser("~/.claude/projects"))
+from backend.copilot.context import E2B_WORKDIR, SDK_PROJECTS_DIR, _current_project_dir

+from .e2b_file_tools import (
+    _check_sandbox_symlink_escape,
+    _read_local,
+    resolve_sandbox_path,
+)

 # ---------------------------------------------------------------------------
 # resolve_sandbox_path — sandbox path normalisation & boundary enforcement
@@ -21,46 +25,48 @@ _SDK_PROJECTS_DIR = os.path.realpath(os.path.expanduser("~/.claude/projects"))

 class TestResolveSandboxPath:
    def test_relative_path_resolved(self):
-        assert resolve_sandbox_path("src/main.py") == "/home/user/src/main.py"
+        assert resolve_sandbox_path("src/main.py") == f"{E2B_WORKDIR}/src/main.py"

    def test_absolute_within_sandbox(self):
-        assert resolve_sandbox_path("/home/user/file.txt") == "/home/user/file.txt"
+        assert (
+            resolve_sandbox_path(f"{E2B_WORKDIR}/file.txt") == f"{E2B_WORKDIR}/file.txt"
+        )

    def test_workdir_itself(self):
-        assert resolve_sandbox_path("/home/user") == "/home/user"
+        assert resolve_sandbox_path(E2B_WORKDIR) == E2B_WORKDIR

    def test_relative_dotslash(self):
-        assert resolve_sandbox_path("./README.md") == "/home/user/README.md"
+        assert resolve_sandbox_path("./README.md") == f"{E2B_WORKDIR}/README.md"

    def test_traversal_blocked(self):
-        with pytest.raises(ValueError, match="must be within /home/user"):
+        with pytest.raises(ValueError, match=f"must be within {E2B_WORKDIR}"):
            resolve_sandbox_path("../../etc/passwd")

    def test_absolute_traversal_blocked(self):
-        with pytest.raises(ValueError, match="must be within /home/user"):
-            resolve_sandbox_path("/home/user/../../etc/passwd")
+        with pytest.raises(ValueError, match=f"must be within {E2B_WORKDIR}"):
+            resolve_sandbox_path(f"{E2B_WORKDIR}/../../etc/passwd")

    def test_absolute_outside_sandbox_blocked(self):
-        with pytest.raises(ValueError, match="must be within /home/user"):
+        with pytest.raises(ValueError, match=f"must be within {E2B_WORKDIR}"):
            resolve_sandbox_path("/etc/passwd")

    def test_root_blocked(self):
-        with pytest.raises(ValueError, match="must be within /home/user"):
+        with pytest.raises(ValueError, match=f"must be within {E2B_WORKDIR}"):
            resolve_sandbox_path("/")

    def test_home_other_user_blocked(self):
-        with pytest.raises(ValueError, match="must be within /home/user"):
+        with pytest.raises(ValueError, match=f"must be within {E2B_WORKDIR}"):
            resolve_sandbox_path("/home/other/file.txt")

    def test_deep_nested_allowed(self):
-        assert resolve_sandbox_path("a/b/c/d/e.txt") == "/home/user/a/b/c/d/e.txt"
+        assert resolve_sandbox_path("a/b/c/d/e.txt") == f"{E2B_WORKDIR}/a/b/c/d/e.txt"

    def test_trailing_slash_normalised(self):
-        assert resolve_sandbox_path("src/") == "/home/user/src"
+        assert resolve_sandbox_path("src/") == f"{E2B_WORKDIR}/src"

    def test_double_dots_within_sandbox_ok(self):
-        """Path that resolves back within /home/user is allowed."""
-        assert resolve_sandbox_path("a/b/../c.txt") == "/home/user/a/c.txt"
+        """Path that resolves back within E2B_WORKDIR is allowed."""
+        assert resolve_sandbox_path("a/b/../c.txt") == f"{E2B_WORKDIR}/a/c.txt"


 # ---------------------------------------------------------------------------
@@ -73,9 +79,13 @@ class TestResolveSandboxPath:


 class TestReadLocal:
+    _CONV_UUID = "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
+
    def _make_tool_results_file(self, encoded: str, filename: str, content: str) -> str:
-        """Create a tool-results file and return its path."""
-        tool_results_dir = os.path.join(_SDK_PROJECTS_DIR, encoded, "tool-results")
+        """Create a tool-results file under <encoded>/<uuid>/tool-results/."""
+        tool_results_dir = os.path.join(
+            SDK_PROJECTS_DIR, encoded, self._CONV_UUID, "tool-results"
+        )
        os.makedirs(tool_results_dir, exist_ok=True)
        filepath = os.path.join(tool_results_dir, filename)
        with open(filepath, "w") as f:
@@ -107,7 +117,9 @@ class TestReadLocal:
    def test_read_nonexistent_tool_results(self):
        """A tool-results path that doesn't exist returns FileNotFoundError."""
        encoded = "-tmp-copilot-e2b-test-nofile"
-        tool_results_dir = os.path.join(_SDK_PROJECTS_DIR, encoded, "tool-results")
+        tool_results_dir = os.path.join(
+            SDK_PROJECTS_DIR, encoded, self._CONV_UUID, "tool-results"
+        )
        os.makedirs(tool_results_dir, exist_ok=True)
        filepath = os.path.join(tool_results_dir, "nonexistent.txt")
        token = _current_project_dir.set(encoded)
@@ -117,7 +129,7 @@ class TestReadLocal:
            assert "not found" in result["content"][0]["text"].lower()
        finally:
            _current_project_dir.reset(token)
-            os.rmdir(tool_results_dir)
+            shutil.rmtree(os.path.join(SDK_PROJECTS_DIR, encoded), ignore_errors=True)

    def test_read_traversal_path_blocked(self):
        """A traversal attempt that escapes allowed directories is blocked."""
@@ -152,3 +164,66 @@ class TestReadLocal:
        """Without _current_project_dir set, all paths are blocked."""
        result = _read_local("/tmp/anything.txt", offset=0, limit=10)
        assert result["isError"] is True
+
+
+# ---------------------------------------------------------------------------
+# _check_sandbox_symlink_escape — symlink escape detection
+# ---------------------------------------------------------------------------
+
+
+def _make_sandbox(stdout: str, exit_code: int = 0) -> SimpleNamespace:
+    """Build a minimal sandbox mock whose commands.run returns a fixed result."""
+    run_result = SimpleNamespace(stdout=stdout, exit_code=exit_code)
+    commands = SimpleNamespace(run=AsyncMock(return_value=run_result))
+    return SimpleNamespace(commands=commands)
+
+
+class TestCheckSandboxSymlinkEscape:
+    @pytest.mark.asyncio
+    async def test_canonical_path_within_workdir_returns_path(self):
+        """When readlink -f resolves to a path inside E2B_WORKDIR, returns it."""
+        sandbox = _make_sandbox(stdout=f"{E2B_WORKDIR}/src\n", exit_code=0)
+        result = await _check_sandbox_symlink_escape(sandbox, f"{E2B_WORKDIR}/src")
+        assert result == f"{E2B_WORKDIR}/src"
+
+    @pytest.mark.asyncio
+    async def test_workdir_itself_returns_workdir(self):
+        """When readlink -f resolves to E2B_WORKDIR exactly, returns E2B_WORKDIR."""
+        sandbox = _make_sandbox(stdout=f"{E2B_WORKDIR}\n", exit_code=0)
+        result = await _check_sandbox_symlink_escape(sandbox, E2B_WORKDIR)
+        assert result == E2B_WORKDIR
+
+    @pytest.mark.asyncio
+    async def test_symlink_escape_returns_none(self):
+        """When readlink -f resolves outside E2B_WORKDIR (symlink escape), returns None."""
+        sandbox = _make_sandbox(stdout="/etc\n", exit_code=0)
+        result = await _check_sandbox_symlink_escape(sandbox, f"{E2B_WORKDIR}/evil")
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_nonzero_exit_code_returns_none(self):
+        """A non-zero exit code from readlink -f returns None."""
+        sandbox = _make_sandbox(stdout="", exit_code=1)
+        result = await _check_sandbox_symlink_escape(sandbox, f"{E2B_WORKDIR}/src")
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_empty_stdout_returns_none(self):
+        """Empty stdout from readlink (e.g. path doesn't exist yet) returns None."""
+        sandbox = _make_sandbox(stdout="", exit_code=0)
+        result = await _check_sandbox_symlink_escape(sandbox, f"{E2B_WORKDIR}/src")
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_prefix_collision_returns_none(self):
+        """A path prefixed with E2B_WORKDIR but not within it is rejected."""
+        sandbox = _make_sandbox(stdout=f"{E2B_WORKDIR}-evil\n", exit_code=0)
+        result = await _check_sandbox_symlink_escape(sandbox, f"{E2B_WORKDIR}-evil")
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_deeply_nested_path_within_workdir(self):
+        """Deep nested paths inside E2B_WORKDIR are allowed."""
+        sandbox = _make_sandbox(stdout=f"{E2B_WORKDIR}/a/b/c/d\n", exit_code=0)
+        result = await _check_sandbox_symlink_escape(sandbox, f"{E2B_WORKDIR}/a/b/c/d")
+        assert result == f"{E2B_WORKDIR}/a/b/c/d"
--- a/autogpt_platform/backend/backend/copilot/sdk/security_hooks.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/security_hooks.py
@@ -42,7 +42,7 @@ def _validate_workspace_path(
    Delegates to :func:`is_allowed_local_path` which permits:
    - The SDK working directory (``/tmp/copilot-<session>/``)
    - The current session's tool-results directory
-      (``~/.claude/projects/<encoded-cwd>/tool-results/``)
+      (``~/.claude/projects/<encoded-cwd>/<uuid>/tool-results/``)
    """
    path = tool_input.get("file_path") or tool_input.get("path") or ""
    if not path:
@@ -302,7 +302,11 @@ def create_security_hooks(
            """
            _ = context, tool_use_id
            trigger = input_data.get("trigger", "auto")
-            # Sanitize untrusted input before logging to prevent log injection
+            # Sanitize untrusted input: strip control chars for logging AND
+            # for the value passed downstream.  read_compacted_entries()
+            # validates against _projects_base() as defence-in-depth, but
+            # sanitizing here prevents log injection and rejects obviously
+            # malformed paths early.
            transcript_path = (
                str(input_data.get("transcript_path", ""))
                .replace("\n", "")
--- a/autogpt_platform/backend/backend/copilot/sdk/security_hooks_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/security_hooks_test.py
@@ -122,7 +122,7 @@ def test_read_no_cwd_denies_absolute():

 def test_read_tool_results_allowed():
    home = os.path.expanduser("~")
-    path = f"{home}/.claude/projects/-tmp-copilot-abc123/tool-results/12345.txt"
+    path = f"{home}/.claude/projects/-tmp-copilot-abc123/a1b2c3d4-e5f6-7890-abcd-ef1234567890/tool-results/12345.txt"
    # is_allowed_local_path requires the session's encoded cwd to be set
    token = _current_project_dir.set("-tmp-copilot-abc123")
    try:
--- a/autogpt_platform/backend/backend/copilot/sdk/service.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service.py
@@ -10,6 +10,7 @@ import re
 import shutil
 import subprocess
 import sys
+import time
 import uuid
 from collections.abc import AsyncGenerator
 from typing import Any, cast
@@ -38,6 +39,7 @@ from backend.util.settings import Settings

 from ..config import ChatConfig
 from ..constants import COPILOT_ERROR_PREFIX, COPILOT_SYSTEM_PREFIX
+from ..context import encode_cwd_for_cli
 from ..model import (
    ChatMessage,
    ChatSession,
@@ -75,7 +77,7 @@ from .tool_adapter import (
    wait_for_stash,
 )
 from .transcript import (
-    cleanup_cli_project_dir,
+    cleanup_stale_project_dirs,
    download_transcript,
    read_compacted_entries,
    upload_transcript,
@@ -143,6 +145,9 @@ _background_tasks: set[asyncio.Task[Any]] = set()

 _SDK_CWD_PREFIX = WORKSPACE_PREFIX

+_last_sweep_time: float = 0.0
+_SWEEP_INTERVAL_SECONDS = 300  # 5 minutes
+
 # Heartbeat interval — keep SSE alive through proxies/LBs during tool execution.
 # IMPORTANT: Must be less than frontend timeout (12s in useCopilotPage.ts)
 _HEARTBEAT_INTERVAL = 10.0  # seconds
@@ -281,31 +286,34 @@ def _make_sdk_cwd(session_id: str) -> str:
    return cwd


-def _cleanup_sdk_tool_results(cwd: str) -> None:
+async def _cleanup_sdk_tool_results(cwd: str) -> None:
    """Remove SDK session artifacts for a specific working directory.

-    Cleans up:
-    - ``~/.claude/projects/<encoded-cwd>/`` — CLI session transcripts and
-      tool-result files.  Each SDK turn uses a unique cwd, so this directory
-      is safe to remove entirely.
-    - ``/tmp/copilot-<session>/`` — the ephemeral working directory.
+    Cleans up the ephemeral working directory ``/tmp/copilot-<session>/``.
+
+    Also sweeps stale CLI project directories (older than 12 h) to prevent
+    unbounded disk growth.  The sweep is best-effort, rate-limited to once
+    every 5 minutes, and capped at 50 directories per sweep.

    Security: *cwd* MUST be created by ``_make_sdk_cwd()`` which sanitizes
    the session_id.
    """
    normalized = os.path.normpath(cwd)
    if not normalized.startswith(_SDK_CWD_PREFIX):
-        logger.warning(f"[SDK] Rejecting cleanup for path outside workspace: {cwd}")
+        logger.warning("[SDK] Rejecting cleanup for path outside workspace: %s", cwd)
        return

-    # Clean the CLI's project directory (transcripts + tool-results).
-    cleanup_cli_project_dir(cwd)
+    await asyncio.to_thread(shutil.rmtree, normalized, True)

-    # Clean up the temp cwd directory itself.
-    try:
-        shutil.rmtree(normalized, ignore_errors=True)
-    except OSError:
-        pass
+    # Best-effort sweep of old project dirs to prevent disk leak.
+    # Pass the encoded cwd so only this session's project directory is swept,
+    # which is safe in multi-tenant environments.
+    global _last_sweep_time
+    now = time.time()
+    if now - _last_sweep_time >= _SWEEP_INTERVAL_SECONDS:
+        _last_sweep_time = now
+        encoded = encode_cwd_for_cli(normalized)
+        await asyncio.to_thread(cleanup_stale_project_dirs, encoded)


 def _format_sdk_content_blocks(blocks: list) -> list[dict[str, Any]]:
@@ -797,7 +805,7 @@ async def stream_chat_completion_sdk(
                )
            except Exception as transcript_err:
                logger.warning(
-                    "%s Transcript download failed, continuing without " "--resume: %s",
+                    "%s Transcript download failed, continuing without --resume: %s",
                    log_prefix,
                    transcript_err,
                )
@@ -820,7 +828,7 @@ async def stream_chat_completion_sdk(
            is_valid = validate_transcript(dl.content)
            dl_lines = dl.content.strip().split("\n") if dl.content else []
            logger.info(
-                "%s Downloaded transcript: %dB, %d lines, " "msg_count=%d, valid=%s",
+                "%s Downloaded transcript: %dB, %d lines, msg_count=%d, valid=%s",
                log_prefix,
                len(dl.content),
                len(dl_lines),
@@ -1054,8 +1062,7 @@ async def stream_chat_completion_sdk(
                        break

                    logger.info(
-                        "%s Received: %s %s "
-                        "(unresolved=%d, current=%d, resolved=%d)",
+                        "%s Received: %s %s (unresolved=%d, current=%d, resolved=%d)",
                        log_prefix,
                        type(sdk_msg).__name__,
                        getattr(sdk_msg, "subtype", ""),
@@ -1100,7 +1107,14 @@ async def stream_chat_completion_sdk(
                        and isinstance(sdk_msg, (AssistantMessage, ResultMessage))
                        and not is_parallel_continuation
                    ):
-                        if await wait_for_stash(timeout=0.5):
+                        # 2.0 s timeout: the original 0.5 s caused frequent
+                        # timeouts under load (parallel tool calls, large
+                        # outputs).  2.0 s gives margin while still failing
+                        # fast when the hook genuinely will not fire.
+                        if await wait_for_stash(timeout=2.0):
+                            # Yield once so any callbacks scheduled by the
+                            # stash signal can propagate before we process
+                            # the next SDK message.
                            await asyncio.sleep(0)
                        else:
                            logger.warning(
@@ -1486,11 +1500,14 @@ async def stream_chat_completion_sdk(
                    exc_info=True,
                )

-        if sdk_cwd:
-            _cleanup_sdk_tool_results(sdk_cwd)
-
-        # Release stream lock to allow new streams for this session
-        await lock.release()
+        try:
+            if sdk_cwd:
+                await _cleanup_sdk_tool_results(sdk_cwd)
+        except Exception:
+            logger.warning("%s SDK cleanup failed", log_prefix, exc_info=True)
+        finally:
+            # Release stream lock to allow new streams for this session
+            await lock.release()


 async def _update_title_async(
--- a/autogpt_platform/backend/backend/copilot/sdk/service_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service_test.py
@@ -288,3 +288,90 @@ class TestPromptSupplement:
            # Count how many times this tool appears as a bullet point
            count = docs.count(f"- **`{tool_name}`**")
            assert count == 1, f"Tool '{tool_name}' appears {count} times (should be 1)"
+
+
+# ---------------------------------------------------------------------------
+# _cleanup_sdk_tool_results — orchestration + rate-limiting
+# ---------------------------------------------------------------------------
+
+
+class TestCleanupSdkToolResults:
+    """Tests for _cleanup_sdk_tool_results orchestration and sweep rate-limiting."""
+
+    # All valid cwds must start with /tmp/copilot- (the _SDK_CWD_PREFIX).
+    _CWD_PREFIX = "/tmp/copilot-"
+
+    @pytest.mark.asyncio
+    async def test_removes_cwd_directory(self):
+        """Cleanup removes the session working directory."""
+
+        from .service import _cleanup_sdk_tool_results
+
+        cwd = "/tmp/copilot-test-cleanup-remove"
+        os.makedirs(cwd, exist_ok=True)
+
+        with patch("backend.copilot.sdk.service.cleanup_stale_project_dirs"):
+            import backend.copilot.sdk.service as svc_mod
+
+            svc_mod._last_sweep_time = 0.0
+            await _cleanup_sdk_tool_results(cwd)
+
+        assert not os.path.exists(cwd)
+
+    @pytest.mark.asyncio
+    async def test_sweep_runs_when_interval_elapsed(self):
+        """cleanup_stale_project_dirs is called when 5-minute interval has elapsed."""
+
+        import backend.copilot.sdk.service as svc_mod
+
+        from .service import _cleanup_sdk_tool_results
+
+        cwd = "/tmp/copilot-test-sweep-elapsed"
+        os.makedirs(cwd, exist_ok=True)
+
+        with patch(
+            "backend.copilot.sdk.service.cleanup_stale_project_dirs"
+        ) as mock_sweep:
+            # Set last sweep to a time far in the past
+            svc_mod._last_sweep_time = 0.0
+            await _cleanup_sdk_tool_results(cwd)
+
+        mock_sweep.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_sweep_skipped_within_interval(self):
+        """cleanup_stale_project_dirs is NOT called when within 5-minute interval."""
+        import time
+
+        import backend.copilot.sdk.service as svc_mod
+
+        from .service import _cleanup_sdk_tool_results
+
+        cwd = "/tmp/copilot-test-sweep-ratelimit"
+        os.makedirs(cwd, exist_ok=True)
+
+        with patch(
+            "backend.copilot.sdk.service.cleanup_stale_project_dirs"
+        ) as mock_sweep:
+            # Set last sweep to now — interval not elapsed
+            svc_mod._last_sweep_time = time.time()
+            await _cleanup_sdk_tool_results(cwd)
+
+        mock_sweep.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_rejects_path_outside_prefix(self, tmp_path):
+        """Cleanup rejects a cwd that does not start with the expected prefix."""
+        from .service import _cleanup_sdk_tool_results
+
+        evil_cwd = str(tmp_path / "evil-path")
+        os.makedirs(evil_cwd, exist_ok=True)
+
+        with patch(
+            "backend.copilot.sdk.service.cleanup_stale_project_dirs"
+        ) as mock_sweep:
+            await _cleanup_sdk_tool_results(evil_cwd)
+
+        # Directory should NOT have been removed (rejected early)
+        assert os.path.exists(evil_cwd)
+        mock_sweep.assert_not_called()
--- a/autogpt_platform/backend/backend/copilot/sdk/tool_adapter.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/tool_adapter.py
@@ -146,7 +146,7 @@ def stash_pending_tool_output(tool_name: str, output: Any) -> None:
        event.set()


-async def wait_for_stash(timeout: float = 0.5) -> bool:
+async def wait_for_stash(timeout: float = 2.0) -> bool:
    """Wait for a PostToolUse hook to stash tool output.

    The SDK fires PostToolUse hooks asynchronously via ``start_soon()`` —
@@ -155,12 +155,12 @@ async def wait_for_stash(timeout: float = 0.5) -> bool:
    by waiting on the ``_stash_event``, which is signaled by
    :func:`stash_pending_tool_output`.

-    After the event fires, callers should ``await asyncio.sleep(0)`` to
-    give any remaining concurrent hooks a chance to complete.
-
    Returns ``True`` if a stash signal was received, ``False`` on timeout.
-    The timeout is a safety net — normally the stash happens within
-    microseconds of yielding to the event loop.
+
+    The 2.0 s default was chosen based on production metrics: the original
+    0.5 s caused frequent timeouts under load (parallel tool calls, large
+    outputs).  2.0 s gives a comfortable margin while still failing fast
+    when the hook genuinely will not fire.
    """
    event = _stash_event.get(None)
    if event is None:
@@ -285,7 +285,7 @@ async def _read_file_handler(args: dict[str, Any]) -> dict[str, Any]:

    resolved = os.path.realpath(os.path.expanduser(file_path))
    try:
-        with open(resolved) as f:
+        with open(resolved, encoding="utf-8", errors="replace") as f:
            selected = list(itertools.islice(f, offset, offset + limit))
        # Cleanup happens in _cleanup_sdk_tool_results after session ends;
        # don't delete here — the SDK may read in multiple chunks.
--- a/autogpt_platform/backend/backend/copilot/sdk/transcript.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/transcript.py
@@ -151,44 +151,110 @@ def _projects_base() -> str:
    return os.path.realpath(os.path.join(config_dir, "projects"))


-def _cli_project_dir(sdk_cwd: str) -> str | None:
-    """Return the CLI's project directory for a given working directory.
+_STALE_PROJECT_DIR_SECONDS = 12 * 3600  # 12 hours — matches max session lifetime
+_MAX_PROJECT_DIRS_TO_SWEEP = 50  # limit per sweep to avoid long pauses

-    Returns ``None`` if the path would escape the projects base.
+
+def cleanup_stale_project_dirs(encoded_cwd: str | None = None) -> int:
+    """Remove CLI project directories older than ``_STALE_PROJECT_DIR_SECONDS``.
+
+    Each CoPilot SDK turn creates a unique ``~/.claude/projects/<encoded-cwd>/``
+    directory.  These are intentionally kept across turns so the model can read
+    tool-result files via ``--resume``.  However, after a session ends they
+    become stale.  This function sweeps old ones to prevent unbounded disk
+    growth.
+
+    When *encoded_cwd* is provided the sweep is scoped to that single
+    directory, making the operation safe in multi-tenant environments where
+    multiple copilot sessions share the same host.  Without it the function
+    falls back to sweeping all directories matching the copilot naming pattern
+    (``-tmp-copilot-``), which is only safe for single-tenant deployments.
+
+    Returns the number of directories removed.
    """
-    cwd_encoded = re.sub(r"[^a-zA-Z0-9]", "-", os.path.realpath(sdk_cwd))
    projects_base = _projects_base()
-    project_dir = os.path.realpath(os.path.join(projects_base, cwd_encoded))
+    if not os.path.isdir(projects_base):
+        return 0

-    if not project_dir.startswith(projects_base + os.sep):
-        logger.warning(
-            "[Transcript] Project dir escaped projects base: %s", project_dir
-        )
-        return None
-    return project_dir
+    now = time.time()
+    removed = 0

-
-def _safe_glob_jsonl(project_dir: str) -> list[Path]:
-    """Glob ``*.jsonl`` files, filtering out symlinks that escape the directory."""
-    try:
-        resolved_base = Path(project_dir).resolve()
-    except OSError as e:
-        logger.warning("[Transcript] Failed to resolve project dir: %s", e)
-        return []
-
-    result: list[Path] = []
-    for candidate in Path(project_dir).glob("*.jsonl"):
-        try:
-            resolved = candidate.resolve()
-            if resolved.is_relative_to(resolved_base):
-                result.append(resolved)
-        except (OSError, RuntimeError) as e:
-            logger.debug(
-                "[Transcript] Skipping invalid CLI session candidate %s: %s",
-                candidate,
-                e,
+    # Scoped mode: only clean up the one directory for the current session.
+    if encoded_cwd:
+        target = Path(projects_base) / encoded_cwd
+        if not target.is_dir():
+            return 0
+        # Guard: only sweep copilot-generated dirs.
+        if "-tmp-copilot-" not in target.name:
+            logger.warning(
+                "[Transcript] Refusing to sweep non-copilot dir: %s", target.name
            )
-    return result
+            return 0
+        try:
+            # st_mtime is used as a proxy for session activity. Claude CLI writes
+            # its JSONL transcript into this directory during each turn, so mtime
+            # advances on every turn. A directory whose mtime is older than
+            # _STALE_PROJECT_DIR_SECONDS has not had an active turn in that window
+            # and is safe to remove (the session cannot --resume after cleanup).
+            age = now - target.stat().st_mtime
+        except OSError:
+            return 0
+        if age < _STALE_PROJECT_DIR_SECONDS:
+            return 0
+        try:
+            shutil.rmtree(target, ignore_errors=True)
+            removed = 1
+        except OSError:
+            pass
+        if removed:
+            logger.info(
+                "[Transcript] Swept stale CLI project dir %s (age %ds > %ds)",
+                target.name,
+                int(age),
+                _STALE_PROJECT_DIR_SECONDS,
+            )
+        return removed
+
+    # Unscoped fallback: sweep all copilot dirs across the projects base.
+    # Only safe for single-tenant deployments; callers should prefer the
+    # scoped variant by passing encoded_cwd.
+    try:
+        entries = Path(projects_base).iterdir()
+    except OSError as e:
+        logger.warning("[Transcript] Failed to list projects dir: %s", e)
+        return 0
+
+    for entry in entries:
+        if removed >= _MAX_PROJECT_DIRS_TO_SWEEP:
+            break
+        # Only sweep copilot-generated dirs (pattern: -tmp-copilot- or
+        # -private-tmp-copilot-).
+        if "-tmp-copilot-" not in entry.name:
+            continue
+        if not entry.is_dir():
+            continue
+        try:
+            # See the scoped-mode comment above: st_mtime advances on every turn,
+            # so a stale mtime reliably indicates an inactive session.
+            age = now - entry.stat().st_mtime
+        except OSError:
+            continue
+        if age < _STALE_PROJECT_DIR_SECONDS:
+            continue
+
+        try:
+            shutil.rmtree(entry, ignore_errors=True)
+            removed += 1
+        except OSError:
+            pass
+
+    if removed:
+        logger.info(
+            "[Transcript] Swept %d stale CLI project dirs (older than %ds)",
+            removed,
+            _STALE_PROJECT_DIR_SECONDS,
+        )
+    return removed


 def read_compacted_entries(transcript_path: str) -> list[dict] | None:
@@ -255,63 +321,6 @@ def read_compacted_entries(transcript_path: str) -> list[dict] | None:
    return entries


-def read_cli_session_file(sdk_cwd: str) -> str | None:
-    """Read the CLI's own session file, which reflects any compaction.
-
-    The CLI writes its session transcript to
-    ``~/.claude/projects/<encoded_cwd>/<session_id>.jsonl``.
-    Since each SDK turn uses a unique ``sdk_cwd``, there should be
-    exactly one ``.jsonl`` file in that directory.
-
-    Returns the file content, or ``None`` if not found.
-    """
-    project_dir = _cli_project_dir(sdk_cwd)
-    if not project_dir or not os.path.isdir(project_dir):
-        return None
-
-    jsonl_files = _safe_glob_jsonl(project_dir)
-    if not jsonl_files:
-        logger.debug("[Transcript] No CLI session file found in %s", project_dir)
-        return None
-
-    # Pick the most recently modified file (should be only one per turn).
-    try:
-        session_file = max(jsonl_files, key=lambda p: p.stat().st_mtime)
-    except OSError as e:
-        logger.warning("[Transcript] Failed to inspect CLI session files: %s", e)
-        return None
-
-    try:
-        content = session_file.read_text()
-        logger.info(
-            "[Transcript] Read CLI session file: %s (%d bytes)",
-            session_file,
-            len(content),
-        )
-        return content
-    except OSError as e:
-        logger.warning("[Transcript] Failed to read CLI session file: %s", e)
-        return None
-
-
-def cleanup_cli_project_dir(sdk_cwd: str) -> None:
-    """Remove the CLI's project directory for a specific working directory.
-
-    The CLI stores session data under ``~/.claude/projects/<encoded_cwd>/``.
-    Each SDK turn uses a unique ``sdk_cwd``, so the project directory is
-    safe to remove entirely after the transcript has been uploaded.
-    """
-    project_dir = _cli_project_dir(sdk_cwd)
-    if not project_dir:
-        return
-
-    if os.path.isdir(project_dir):
-        shutil.rmtree(project_dir, ignore_errors=True)
-        logger.debug("[Transcript] Cleaned up CLI project dir: %s", project_dir)
-    else:
-        logger.debug("[Transcript] Project dir not found: %s", project_dir)
-
-
 def write_transcript_to_tempfile(
    transcript_content: str,
    session_id: str,
--- a/autogpt_platform/backend/backend/copilot/sdk/transcript_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/transcript_test.py
@@ -9,9 +9,7 @@ from backend.util import json

 from .transcript import (
    STRIPPABLE_TYPES,
-    _cli_project_dir,
    delete_transcript,
-    read_cli_session_file,
    read_compacted_entries,
    strip_progress_entries,
    validate_transcript,
@@ -292,85 +290,6 @@ class TestStripProgressEntries:
        assert asst_entry["parentUuid"] == "u1"  # reparented


-# --- read_cli_session_file ---
-
-
-class TestReadCliSessionFile:
-    def test_no_matching_files_returns_none(self, tmp_path, monkeypatch):
-        """read_cli_session_file returns None when no .jsonl files exist."""
-        # Create a project dir with no jsonl files
-        project_dir = tmp_path / "projects" / "encoded-cwd"
-        project_dir.mkdir(parents=True)
-        monkeypatch.setattr(
-            "backend.copilot.sdk.transcript._cli_project_dir",
-            lambda sdk_cwd: str(project_dir),
-        )
-        assert read_cli_session_file("/fake/cwd") is None
-
-    def test_one_jsonl_file_returns_content(self, tmp_path, monkeypatch):
-        """read_cli_session_file returns the content of a single .jsonl file."""
-        project_dir = tmp_path / "projects" / "encoded-cwd"
-        project_dir.mkdir(parents=True)
-        jsonl_file = project_dir / "session.jsonl"
-        jsonl_file.write_text("line1\nline2\n")
-        monkeypatch.setattr(
-            "backend.copilot.sdk.transcript._cli_project_dir",
-            lambda sdk_cwd: str(project_dir),
-        )
-        result = read_cli_session_file("/fake/cwd")
-        assert result == "line1\nline2\n"
-
-    def test_symlink_escaping_project_dir_is_skipped(self, tmp_path, monkeypatch):
-        """read_cli_session_file skips symlinks that escape the project dir."""
-        project_dir = tmp_path / "projects" / "encoded-cwd"
-        project_dir.mkdir(parents=True)
-
-        # Create a file outside the project dir
-        outside = tmp_path / "outside"
-        outside.mkdir()
-        outside_file = outside / "evil.jsonl"
-        outside_file.write_text("should not be read\n")
-
-        # Symlink from inside project_dir to outside file
-        symlink = project_dir / "evil.jsonl"
-        symlink.symlink_to(outside_file)
-
-        monkeypatch.setattr(
-            "backend.copilot.sdk.transcript._cli_project_dir",
-            lambda sdk_cwd: str(project_dir),
-        )
-        # The symlink target resolves outside project_dir, so it should be skipped
-        result = read_cli_session_file("/fake/cwd")
-        assert result is None
-
-
-# --- _cli_project_dir ---
-
-
-class TestCliProjectDir:
-    def test_returns_none_for_path_traversal(self, tmp_path, monkeypatch):
-        """_cli_project_dir returns None when the project dir symlink escapes projects base."""
-        config_dir = tmp_path / "config"
-        config_dir.mkdir()
-        projects_dir = config_dir / "projects"
-        projects_dir.mkdir()
-
-        monkeypatch.setenv("CLAUDE_CONFIG_DIR", str(config_dir))
-
-        # Create a symlink inside projects/ that points outside of it.
-        # _cli_project_dir encodes the cwd as all-alnum-hyphens, so use a
-        # cwd whose encoded form matches the symlink name we create.
-        evil_target = tmp_path / "escaped"
-        evil_target.mkdir()
-
-        # The encoded form of "/evil/cwd" is "-evil-cwd"
-        symlink_path = projects_dir / "-evil-cwd"
-        symlink_path.symlink_to(evil_target)
-
-        result = _cli_project_dir("/evil/cwd")
-        assert result is None
-
-
 # --- delete_transcript ---


@@ -897,3 +816,209 @@ class TestCompactionFlowIntegration:
        output2 = builder2.to_jsonl()
        lines2 = [json.loads(line) for line in output2.strip().split("\n")]
        assert lines2[-1]["parentUuid"] == "a2"
+
+
+# ---------------------------------------------------------------------------
+# cleanup_stale_project_dirs
+# ---------------------------------------------------------------------------
+
+
+class TestCleanupStaleProjectDirs:
+    """Tests for cleanup_stale_project_dirs (disk leak prevention)."""
+
+    def test_removes_old_copilot_dirs(self, tmp_path, monkeypatch):
+        """Directories matching copilot pattern older than threshold are removed."""
+        from backend.copilot.sdk.transcript import (
+            _STALE_PROJECT_DIR_SECONDS,
+            cleanup_stale_project_dirs,
+        )
+
+        projects_dir = tmp_path / "projects"
+        projects_dir.mkdir()
+        monkeypatch.setattr(
+            "backend.copilot.sdk.transcript._projects_base",
+            lambda: str(projects_dir),
+        )
+
+        # Create a stale dir
+        stale = projects_dir / "-tmp-copilot-old-session"
+        stale.mkdir()
+        # Set mtime to past the threshold
+        import time
+
+        old_time = time.time() - _STALE_PROJECT_DIR_SECONDS - 100
+        os.utime(stale, (old_time, old_time))
+
+        # Create a fresh dir
+        fresh = projects_dir / "-tmp-copilot-new-session"
+        fresh.mkdir()
+
+        removed = cleanup_stale_project_dirs()
+        assert removed == 1
+        assert not stale.exists()
+        assert fresh.exists()
+
+    def test_ignores_non_copilot_dirs(self, tmp_path, monkeypatch):
+        """Directories not matching copilot pattern are left alone."""
+        from backend.copilot.sdk.transcript import cleanup_stale_project_dirs
+
+        projects_dir = tmp_path / "projects"
+        projects_dir.mkdir()
+        monkeypatch.setattr(
+            "backend.copilot.sdk.transcript._projects_base",
+            lambda: str(projects_dir),
+        )
+
+        # Non-copilot dir that's old
+        import time
+
+        other = projects_dir / "some-other-project"
+        other.mkdir()
+        old_time = time.time() - 999999
+        os.utime(other, (old_time, old_time))
+
+        removed = cleanup_stale_project_dirs()
+        assert removed == 0
+        assert other.exists()
+
+    def test_ttl_boundary_not_removed(self, tmp_path, monkeypatch):
+        """A directory exactly at the TTL boundary should NOT be removed."""
+        from backend.copilot.sdk.transcript import (
+            _STALE_PROJECT_DIR_SECONDS,
+            cleanup_stale_project_dirs,
+        )
+
+        projects_dir = tmp_path / "projects"
+        projects_dir.mkdir()
+        monkeypatch.setattr(
+            "backend.copilot.sdk.transcript._projects_base",
+            lambda: str(projects_dir),
+        )
+
+        import time
+
+        # Dir that's exactly at the TTL (age == threshold, not >) — should survive
+        boundary = projects_dir / "-tmp-copilot-boundary"
+        boundary.mkdir()
+        boundary_time = time.time() - _STALE_PROJECT_DIR_SECONDS + 1
+        os.utime(boundary, (boundary_time, boundary_time))
+
+        removed = cleanup_stale_project_dirs()
+        assert removed == 0
+        assert boundary.exists()
+
+    def test_skips_non_directory_entries(self, tmp_path, monkeypatch):
+        """Regular files matching the copilot pattern are not removed."""
+        from backend.copilot.sdk.transcript import (
+            _STALE_PROJECT_DIR_SECONDS,
+            cleanup_stale_project_dirs,
+        )
+
+        projects_dir = tmp_path / "projects"
+        projects_dir.mkdir()
+        monkeypatch.setattr(
+            "backend.copilot.sdk.transcript._projects_base",
+            lambda: str(projects_dir),
+        )
+
+        import time
+
+        # Create a regular FILE (not a dir) with the copilot pattern name
+        stale_file = projects_dir / "-tmp-copilot-stale-file"
+        stale_file.write_text("not a dir")
+        old_time = time.time() - _STALE_PROJECT_DIR_SECONDS - 100
+        os.utime(stale_file, (old_time, old_time))
+
+        removed = cleanup_stale_project_dirs()
+        assert removed == 0
+        assert stale_file.exists()
+
+    def test_missing_base_dir_returns_zero(self, tmp_path, monkeypatch):
+        """If the projects base directory doesn't exist, return 0 gracefully."""
+        from backend.copilot.sdk.transcript import cleanup_stale_project_dirs
+
+        nonexistent = str(tmp_path / "does-not-exist" / "projects")
+        monkeypatch.setattr(
+            "backend.copilot.sdk.transcript._projects_base",
+            lambda: nonexistent,
+        )
+
+        removed = cleanup_stale_project_dirs()
+        assert removed == 0
+
+    def test_scoped_removes_only_target_dir(self, tmp_path, monkeypatch):
+        """When encoded_cwd is supplied only that directory is swept."""
+        import time
+
+        from backend.copilot.sdk.transcript import (
+            _STALE_PROJECT_DIR_SECONDS,
+            cleanup_stale_project_dirs,
+        )
+
+        projects_dir = tmp_path / "projects"
+        projects_dir.mkdir()
+        monkeypatch.setattr(
+            "backend.copilot.sdk.transcript._projects_base",
+            lambda: str(projects_dir),
+        )
+
+        old_time = time.time() - _STALE_PROJECT_DIR_SECONDS - 100
+
+        # Two stale copilot dirs
+        target = projects_dir / "-tmp-copilot-session-abc"
+        target.mkdir()
+        os.utime(target, (old_time, old_time))
+
+        other = projects_dir / "-tmp-copilot-session-xyz"
+        other.mkdir()
+        os.utime(other, (old_time, old_time))
+
+        # Only the target dir should be removed
+        removed = cleanup_stale_project_dirs(encoded_cwd="-tmp-copilot-session-abc")
+        assert removed == 1
+        assert not target.exists()
+        assert other.exists()  # untouched — not the current session
+
+    def test_scoped_fresh_dir_not_removed(self, tmp_path, monkeypatch):
+        """Scoped sweep leaves a fresh directory alone."""
+        from backend.copilot.sdk.transcript import cleanup_stale_project_dirs
+
+        projects_dir = tmp_path / "projects"
+        projects_dir.mkdir()
+        monkeypatch.setattr(
+            "backend.copilot.sdk.transcript._projects_base",
+            lambda: str(projects_dir),
+        )
+
+        fresh = projects_dir / "-tmp-copilot-session-new"
+        fresh.mkdir()
+        # mtime is now — well within TTL
+
+        removed = cleanup_stale_project_dirs(encoded_cwd="-tmp-copilot-session-new")
+        assert removed == 0
+        assert fresh.exists()
+
+    def test_scoped_non_copilot_dir_not_removed(self, tmp_path, monkeypatch):
+        """Scoped sweep refuses to remove a non-copilot directory."""
+        import time
+
+        from backend.copilot.sdk.transcript import (
+            _STALE_PROJECT_DIR_SECONDS,
+            cleanup_stale_project_dirs,
+        )
+
+        projects_dir = tmp_path / "projects"
+        projects_dir.mkdir()
+        monkeypatch.setattr(
+            "backend.copilot.sdk.transcript._projects_base",
+            lambda: str(projects_dir),
+        )
+
+        old_time = time.time() - _STALE_PROJECT_DIR_SECONDS - 100
+        non_copilot = projects_dir / "some-other-project"
+        non_copilot.mkdir()
+        os.utime(non_copilot, (old_time, old_time))
+
+        removed = cleanup_stale_project_dirs(encoded_cwd="some-other-project")
+        assert removed == 0
+        assert non_copilot.exists()
--- a/autogpt_platform/backend/backend/copilot/tools/add_understanding.py
+++ b/autogpt_platform/backend/backend/copilot/tools/add_understanding.py
@@ -22,11 +22,13 @@ class AddUnderstandingTool(BaseTool):

    @property
    def description(self) -> str:
-        return (
-            "Store user's business context, workflows, pain points, and automation goals. "
-            "Call whenever the user shares business info. Each call incrementally merges "
-            "with existing data — provide only the fields you have."
-        )
+        return """Capture and store information about the user's business context,
+workflows, pain points, and automation goals. Call this tool whenever the user
+shares information about their business. Each call incrementally adds to the
+existing understanding - you don't need to provide all fields at once.
+
+Use this to build a comprehensive profile that helps recommend better agents
+and automations for the user's specific needs."""

    @property
    def parameters(self) -> dict[str, Any]:
--- a/autogpt_platform/backend/backend/copilot/tools/agent_browser.py
+++ b/autogpt_platform/backend/backend/copilot/tools/agent_browser.py
@@ -408,11 +408,18 @@ class BrowserNavigateTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Navigate to a URL in a real browser. Returns accessibility tree with @ref IDs "
-            "for browser_act. Session persists (cookies/auth carry over). "
-            "For static pages, prefer web_fetch. "
-            "For SPAs, elements may load late — use browser_act with wait + browser_screenshot to verify. "
-            "For auth: navigate to login, fill creds with browser_act, then navigate to target."
+            "Navigate to a URL using a real browser. Returns an accessibility "
+            "tree snapshot listing the page's interactive elements with @ref IDs "
+            "(e.g. @e3) that can be used with browser_act. "
+            "Session persists — cookies and login state carry over between calls. "
+            "Use this (with browser_act) for multi-step interaction: login flows, "
+            "form filling, button clicks, or anything requiring page interaction. "
+            "For plain static pages, prefer web_fetch — no browser overhead. "
+            "For authenticated pages: navigate to the login page first, use browser_act "
+            "to fill credentials and submit, then navigate to the target page. "
+            "Note: for slow SPAs, the returned snapshot may reflect a partially-loaded "
+            "state. If elements seem missing, use browser_act with action='wait' and a "
+            "CSS selector or millisecond delay, then take a browser_screenshot to verify."
        )

    @property
@@ -422,13 +429,13 @@ class BrowserNavigateTool(BaseTool):
            "properties": {
                "url": {
                    "type": "string",
-                    "description": "HTTP/HTTPS URL to navigate to.",
+                    "description": "The HTTP/HTTPS URL to navigate to.",
                },
                "wait_for": {
                    "type": "string",
                    "enum": ["networkidle", "load", "domcontentloaded"],
                    "default": "networkidle",
-                    "description": "Navigation completion strategy (default: networkidle).",
+                    "description": "When to consider navigation complete. Use 'networkidle' for SPAs (default).",
                },
            },
            "required": ["url"],
@@ -547,12 +554,14 @@ class BrowserActTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Interact with the current browser page using @ref IDs from the snapshot. "
-            "Actions: click, dblclick, fill, type, scroll, hover, press, "
+            "Interact with the current browser page. Use @ref IDs from the "
+            "snapshot (e.g. '@e3') to target elements. Returns an updated snapshot. "
+            "Supported actions: click, dblclick, fill, type, scroll, hover, press, "
            "check, uncheck, select, wait, back, forward, reload. "
-            "fill clears field first; type appends. "
-            "wait accepts CSS selector or milliseconds (e.g. '1000'). "
-            "Returns updated snapshot."
+            "fill clears the field before typing; type appends without clearing. "
+            "wait accepts a CSS selector (waits for element) or milliseconds string (e.g. '1000'). "
+            "Example login flow: fill @e1 with email → fill @e2 with password → "
+            "click @e3 (submit) → browser_navigate to the target page."
        )

    @property
@@ -578,21 +587,30 @@ class BrowserActTool(BaseTool):
                        "forward",
                        "reload",
                    ],
-                    "description": "Action to perform.",
+                    "description": "The action to perform.",
                },
                "target": {
                    "type": "string",
-                    "description": "@ref ID (e.g. '@e3'), CSS selector, or text description.",
+                    "description": (
+                        "Element to target. Use @ref from snapshot (e.g. '@e3'), "
+                        "a CSS selector, or a text description. "
+                        "Required for: click, dblclick, fill, type, hover, check, uncheck, select. "
+                        "For wait: a CSS selector to wait for, or milliseconds as a string (e.g. '1000')."
+                    ),
                },
                "value": {
                    "type": "string",
-                    "description": "Text for fill/type, key for press (e.g. 'Enter'), option for select.",
+                    "description": (
+                        "For fill/type: the text to enter. "
+                        "For press: key name (e.g. 'Enter', 'Tab', 'Control+a'). "
+                        "For select: the option value to select."
+                    ),
                },
                "direction": {
                    "type": "string",
                    "enum": ["up", "down", "left", "right"],
                    "default": "down",
-                    "description": "Scroll direction (default: down).",
+                    "description": "For scroll: direction to scroll.",
                },
            },
            "required": ["action"],
@@ -739,10 +757,12 @@ class BrowserScreenshotTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Screenshot the current browser page and save to workspace. "
-            "annotate=true overlays @ref labels on elements. "
-            "IMPORTANT: After calling, you MUST immediately call read_workspace_file with the "
-            "returned file_id to display the image inline."
+            "Take a screenshot of the current browser page and save it to the workspace. "
+            "IMPORTANT: After calling this tool, immediately call read_workspace_file "
+            "with the returned file_id to display the image inline to the user — "
+            "the screenshot is not visible until you do this. "
+            "With annotate=true (default), @ref labels are overlaid on interactive "
+            "elements, making it easy to see which @ref ID maps to which element on screen."
        )

    @property
@@ -753,12 +773,12 @@ class BrowserScreenshotTool(BaseTool):
                "annotate": {
                    "type": "boolean",
                    "default": True,
-                    "description": "Overlay @ref labels (default: true).",
+                    "description": "Overlay @ref labels on interactive elements (default: true).",
                },
                "filename": {
                    "type": "string",
                    "default": "screenshot.png",
-                    "description": "Workspace filename (default: screenshot.png).",
+                    "description": "Filename to save in the workspace.",
                },
            },
        }
--- a/autogpt_platform/backend/backend/copilot/tools/agent_output.py
+++ b/autogpt_platform/backend/backend/copilot/tools/agent_output.py
@@ -108,12 +108,22 @@ class AgentOutputTool(BaseTool):

    @property
    def description(self) -> str:
-        return (
-            "Retrieve execution outputs from a library agent. "
-            "Identify by agent_name, library_agent_id, or store_slug. "
-            "Filter by execution_id or run_time. "
-            "Optionally wait for running executions."
-        )
+        return """Retrieve execution outputs from agents in the user's library.
+
+        Identify the agent using one of:
+        - agent_name: Fuzzy search in user's library
+        - library_agent_id: Exact library agent ID
+        - store_slug: Marketplace format 'username/agent-name'
+
+        Select which run to retrieve using:
+        - execution_id: Specific execution ID
+        - run_time: 'latest' (default), 'yesterday', 'last week', or ISO date 'YYYY-MM-DD'
+
+        Wait for completion (optional):
+        - wait_if_running: Max seconds to wait if execution is still running (0-300).
+          If the execution is running/queued, waits up to this many seconds for completion.
+          Returns current status on timeout. If already finished, returns immediately.
+        """

    @property
    def parameters(self) -> dict[str, Any]:
@@ -122,27 +132,32 @@ class AgentOutputTool(BaseTool):
            "properties": {
                "agent_name": {
                    "type": "string",
-                    "description": "Agent name (fuzzy match).",
+                    "description": "Agent name to search for in user's library (fuzzy match)",
                },
                "library_agent_id": {
                    "type": "string",
-                    "description": "Library agent ID.",
+                    "description": "Exact library agent ID",
                },
                "store_slug": {
                    "type": "string",
-                    "description": "Marketplace 'username/agent-slug'.",
+                    "description": "Marketplace identifier: 'username/agent-slug'",
                },
                "execution_id": {
                    "type": "string",
-                    "description": "Specific execution ID.",
+                    "description": "Specific execution ID to retrieve",
                },
                "run_time": {
                    "type": "string",
-                    "description": "Time filter: 'latest', today/yesterday/last week/last 7 days/last month/last 30 days, 'YYYY-MM-DD', or ISO datetime.",
+                    "description": (
+                        "Time filter: 'latest', 'yesterday', 'last week', or 'YYYY-MM-DD'"
+                    ),
                },
                "wait_if_running": {
                    "type": "integer",
-                    "description": "Max seconds to wait if still running (0-300). Returns current state on timeout.",
+                    "description": (
+                        "Max seconds to wait if execution is still running (0-300). "
+                        "If running, waits for completion. Returns current state on timeout."
+                    ),
                },
            },
            "required": [],
--- a/autogpt_platform/backend/backend/copilot/tools/bash_exec.py
+++ b/autogpt_platform/backend/backend/copilot/tools/bash_exec.py
@@ -41,9 +41,15 @@ class BashExecTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Execute a Bash command or script. Shares filesystem with SDK file tools. "
-            "Useful for scripts, data processing, and package installation. "
-            "Killed after timeout (default 30s, max 120s)."
+            "Execute a Bash command or script. "
+            "Full Bash scripting is supported (loops, conditionals, pipes, "
+            "functions, etc.). "
+            "The working directory is shared with the SDK Read/Write/Edit/Glob/Grep "
+            "tools — files created by either are immediately visible to both. "
+            "Execution is killed after the timeout (default 30s, max 120s). "
+            "Returns stdout and stderr. "
+            "Useful for file manipulation, data processing, running scripts, "
+            "and installing packages."
        )

    @property
@@ -53,11 +59,13 @@ class BashExecTool(BaseTool):
            "properties": {
                "command": {
                    "type": "string",
-                    "description": "Bash command or script.",
+                    "description": "Bash command or script to execute.",
                },
                "timeout": {
                    "type": "integer",
-                    "description": "Max seconds (default 30, max 120).",
+                    "description": (
+                        "Max execution time in seconds (default 30, max 120)."
+                    ),
                    "default": 30,
                },
            },
--- a/autogpt_platform/backend/backend/copilot/tools/continue_run_block.py
+++ b/autogpt_platform/backend/backend/copilot/tools/continue_run_block.py
@@ -30,7 +30,12 @@ class ContinueRunBlockTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Resume block execution after human review approval. Pass the review_id."
+        return (
+            "Continue executing a block after human review approval. "
+            "Use this after a run_block call returned review_required. "
+            "Pass the review_id from the review_required response. "
+            "The block will execute with the original pre-approved input data."
+        )

    @property
    def parameters(self) -> dict[str, Any]:
@@ -39,7 +44,10 @@ class ContinueRunBlockTool(BaseTool):
            "properties": {
                "review_id": {
                    "type": "string",
-                    "description": "review_id from the review_required response.",
+                    "description": (
+                        "The review_id from a previous review_required response. "
+                        "This resumes execution with the pre-approved input data."
+                    ),
                },
            },
            "required": ["review_id"],
--- a/autogpt_platform/backend/backend/copilot/tools/create_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/create_agent.py
@@ -23,8 +23,12 @@ class CreateAgentTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Create a new agent from JSON (nodes + links). Validates, auto-fixes, and saves. "
-            "Before calling, search for existing agents with find_library_agent."
+            "Create a new agent workflow. Pass `agent_json` with the complete "
+            "agent graph JSON you generated using block schemas from find_block. "
+            "The tool validates, auto-fixes, and saves.\n\n"
+            "IMPORTANT: Before calling this tool, search for relevant existing agents "
+            "using find_library_agent that could be used as building blocks. "
+            "Pass their IDs in the library_agent_ids parameter."
        )

    @property
@@ -38,21 +42,34 @@ class CreateAgentTool(BaseTool):
            "properties": {
                "agent_json": {
                    "type": "object",
-                    "description": "Agent graph with 'nodes' and 'links' arrays.",
+                    "description": (
+                        "The agent JSON to validate and save. "
+                        "Must contain 'nodes' and 'links' arrays, and optionally "
+                        "'name' and 'description'."
+                    ),
                },
                "library_agent_ids": {
                    "type": "array",
                    "items": {"type": "string"},
-                    "description": "Library agent IDs as building blocks.",
+                    "description": (
+                        "List of library agent IDs to use as building blocks."
+                    ),
                },
                "save": {
                    "type": "boolean",
-                    "description": "Save the agent (default: true). False for preview.",
+                    "description": (
+                        "Whether to save the agent. Default is true. "
+                        "Set to false for preview only."
+                    ),
                    "default": True,
                },
                "folder_id": {
                    "type": "string",
-                    "description": "Folder ID to save into (default: root).",
+                    "description": (
+                        "Optional folder ID to save the agent into. "
+                        "If not provided, the agent is saved at root level. "
+                        "Use list_folders to find available folders."
+                    ),
                },
            },
            "required": ["agent_json"],
--- a/autogpt_platform/backend/backend/copilot/tools/customize_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/customize_agent.py
@@ -23,7 +23,9 @@ class CustomizeAgentTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Customize a marketplace/template agent. Validates, auto-fixes, and saves."
+            "Customize a marketplace or template agent. Pass `agent_json` "
+            "with the complete customized agent JSON. The tool validates, "
+            "auto-fixes, and saves."
        )

    @property
@@ -37,21 +39,32 @@ class CustomizeAgentTool(BaseTool):
            "properties": {
                "agent_json": {
                    "type": "object",
-                    "description": "Customized agent JSON with nodes and links.",
+                    "description": (
+                        "Complete customized agent JSON to validate and save. "
+                        "Optionally include 'name' and 'description'."
+                    ),
                },
                "library_agent_ids": {
                    "type": "array",
                    "items": {"type": "string"},
-                    "description": "Library agent IDs as building blocks.",
+                    "description": (
+                        "List of library agent IDs to use as building blocks."
+                    ),
                },
                "save": {
                    "type": "boolean",
-                    "description": "Save the agent (default: true). False for preview.",
+                    "description": (
+                        "Whether to save the customized agent. Default is true."
+                    ),
                    "default": True,
                },
                "folder_id": {
                    "type": "string",
-                    "description": "Folder ID to save into (default: root).",
+                    "description": (
+                        "Optional folder ID to save the agent into. "
+                        "If not provided, the agent is saved at root level. "
+                        "Use list_folders to find available folders."
+                    ),
                },
            },
            "required": ["agent_json"],
--- a/autogpt_platform/backend/backend/copilot/tools/edit_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/edit_agent.py
@@ -23,8 +23,12 @@ class EditAgentTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Edit an existing agent. Validates, auto-fixes, and saves. "
-            "Before calling, search for existing agents with find_library_agent."
+            "Edit an existing agent. Pass `agent_json` with the complete "
+            "updated agent JSON you generated. The tool validates, auto-fixes, "
+            "and saves.\n\n"
+            "IMPORTANT: Before calling this tool, if the changes involve adding new "
+            "functionality, search for relevant existing agents using find_library_agent "
+            "that could be used as building blocks."
        )

    @property
@@ -38,20 +42,33 @@ class EditAgentTool(BaseTool):
            "properties": {
                "agent_id": {
                    "type": "string",
-                    "description": "Graph ID or library agent ID to edit.",
+                    "description": (
+                        "The ID of the agent to edit. "
+                        "Can be a graph ID or library agent ID."
+                    ),
                },
                "agent_json": {
                    "type": "object",
-                    "description": "Updated agent JSON with nodes and links.",
+                    "description": (
+                        "Complete updated agent JSON to validate and save. "
+                        "Must contain 'nodes' and 'links'. "
+                        "Include 'name' and/or 'description' if they need "
+                        "to be updated."
+                    ),
                },
                "library_agent_ids": {
                    "type": "array",
                    "items": {"type": "string"},
-                    "description": "Library agent IDs as building blocks.",
+                    "description": (
+                        "List of library agent IDs to use as building blocks for the changes."
+                    ),
                },
                "save": {
                    "type": "boolean",
-                    "description": "Save changes (default: true). False for preview.",
+                    "description": (
+                        "Whether to save the changes. "
+                        "Default is true. Set to false for preview only."
+                    ),
                    "default": True,
                },
            },
--- a/autogpt_platform/backend/backend/copilot/tools/feature_requests.py
+++ b/autogpt_platform/backend/backend/copilot/tools/feature_requests.py
@@ -134,7 +134,11 @@ class SearchFeatureRequestsTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Search existing feature requests. Check before creating a new one."
+        return (
+            "Search existing feature requests to check if a similar request "
+            "already exists before creating a new one. Returns matching feature "
+            "requests with their ID, title, and description."
+        )

    @property
    def parameters(self) -> dict[str, Any]:
@@ -230,9 +234,14 @@ class CreateFeatureRequestTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Create a feature request or add need to existing one. "
-            "Search first to avoid duplicates. Pass existing_issue_id to add to existing. "
-            "Never include PII (names, emails, phone numbers, company names) in title/description."
+            "Create a new feature request or add a customer need to an existing one. "
+            "Always search first with search_feature_requests to avoid duplicates. "
+            "If a matching request exists, pass its ID as existing_issue_id to add "
+            "the user's need to it instead of creating a duplicate. "
+            "IMPORTANT: Never include personally identifiable information (PII) in "
+            "the title or description — no names, emails, phone numbers, company "
+            "names, or other identifying details. Write titles and descriptions in "
+            "generic, feature-focused language."
        )

    @property
@@ -242,15 +251,28 @@ class CreateFeatureRequestTool(BaseTool):
            "properties": {
                "title": {
                    "type": "string",
-                    "description": "Feature request title. No PII.",
+                    "description": (
+                        "Title for the feature request. Must be generic and "
+                        "feature-focused — do not include any user names, emails, "
+                        "company names, or other PII."
+                    ),
                },
                "description": {
                    "type": "string",
-                    "description": "What the user wants and why. No PII.",
+                    "description": (
+                        "Detailed description of what the user wants and why. "
+                        "Must not contain any personally identifiable information "
+                        "(PII) — describe the feature need generically without "
+                        "referencing specific users, companies, or contact details."
+                    ),
                },
                "existing_issue_id": {
                    "type": "string",
-                    "description": "Linear issue ID to add need to (from search results).",
+                    "description": (
+                        "If adding a need to an existing feature request, "
+                        "provide its Linear issue ID (from search results). "
+                        "Omit to create a new feature request."
+                    ),
                },
            },
            "required": ["title", "description"],
--- a/autogpt_platform/backend/backend/copilot/tools/find_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/find_agent.py
@@ -18,7 +18,9 @@ class FindAgentTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Search marketplace agents by capability."
+        return (
+            "Discover agents from the marketplace based on capabilities and user needs."
+        )

    @property
    def parameters(self) -> dict[str, Any]:
@@ -27,7 +29,7 @@ class FindAgentTool(BaseTool):
            "properties": {
                "query": {
                    "type": "string",
-                    "description": "Search keywords (single keywords work best).",
+                    "description": "Search query describing what the user wants to accomplish. Use single keywords for best results.",
                },
            },
            "required": ["query"],
--- a/autogpt_platform/backend/backend/copilot/tools/find_block.py
+++ b/autogpt_platform/backend/backend/copilot/tools/find_block.py
@@ -51,7 +51,14 @@ class FindBlockTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Search blocks by name or description. Returns block IDs for run_block. Always call this FIRST to get block IDs before using run_block."
+        return (
+            "Search for available blocks by name or description. "
+            "Blocks are reusable components that perform specific tasks like "
+            "sending emails, making API calls, processing text, etc. "
+            "IMPORTANT: Use this tool FIRST to get the block's 'id' before calling run_block. "
+            "The response includes each block's id, name, and description. "
+            "Call run_block with the block's id **with no inputs** to see detailed inputs/outputs and execute it."
+        )

    @property
    def parameters(self) -> dict[str, Any]:
@@ -60,11 +67,18 @@ class FindBlockTool(BaseTool):
            "properties": {
                "query": {
                    "type": "string",
-                    "description": "Search keywords (e.g. 'email', 'http', 'ai').",
+                    "description": (
+                        "Search query to find blocks by name or description. "
+                        "Use keywords like 'email', 'http', 'text', 'ai', etc."
+                    ),
                },
                "include_schemas": {
                    "type": "boolean",
-                    "description": "Include full input/output schemas (for agent JSON generation).",
+                    "description": (
+                        "If true, include full input_schema and output_schema "
+                        "for each block. Use when generating agent JSON that "
+                        "needs block schemas. Default is false."
+                    ),
                    "default": False,
                },
            },
--- a/autogpt_platform/backend/backend/copilot/tools/find_library_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/find_library_agent.py
@@ -19,8 +19,13 @@ class FindLibraryAgentTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Search user's library agents. Returns graph_id, schemas for sub-agent composition. "
-            "Omit query to list all."
+            "Search for or list agents in the user's library. Use this to find "
+            "agents the user has already added to their library, including agents "
+            "they created or added from the marketplace. "
+            "When creating agents with sub-agent composition, use this to get "
+            "the agent's graph_id, graph_version, input_schema, and output_schema "
+            "needed for AgentExecutorBlock nodes. "
+            "Omit the query to list all agents."
        )

    @property
@@ -30,7 +35,10 @@ class FindLibraryAgentTool(BaseTool):
            "properties": {
                "query": {
                    "type": "string",
-                    "description": "Search by name/description. Omit to list all.",
+                    "description": (
+                        "Search query to find agents by name or description. "
+                        "Omit to list all agents in the library."
+                    ),
                },
            },
            "required": [],
--- a/autogpt_platform/backend/backend/copilot/tools/fix_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/fix_agent.py
@@ -22,8 +22,20 @@ class FixAgentGraphTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Auto-fix common agent JSON issues (UUIDs, types, credentials, spacing, etc.). "
-            "Returns fixed JSON and list of fixes applied."
+            "Auto-fix common issues in an agent JSON graph. Applies fixes for:\n"
+            "- Missing or invalid UUIDs on nodes and links\n"
+            "- StoreValueBlock prerequisites for ConditionBlock\n"
+            "- Double curly brace escaping in prompt templates\n"
+            "- AddToList/AddToDictionary prerequisite blocks\n"
+            "- CodeExecutionBlock output field naming\n"
+            "- Missing credentials configuration\n"
+            "- Node X coordinate spacing (800+ units apart)\n"
+            "- AI model default parameters\n"
+            "- Link static properties based on input schema\n"
+            "- Type mismatches (inserts conversion blocks)\n\n"
+            "Returns the fixed agent JSON plus a list of fixes applied. "
+            "After fixing, the agent is re-validated. If still invalid, "
+            "the remaining errors are included in the response."
        )

    @property
--- a/autogpt_platform/backend/backend/copilot/tools/get_agent_building_guide.py
+++ b/autogpt_platform/backend/backend/copilot/tools/get_agent_building_guide.py
@@ -42,7 +42,12 @@ class GetAgentBuildingGuideTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Get the agent JSON building guide (nodes, links, AgentExecutorBlock, MCPToolBlock usage). Call before generating agent JSON."
+        return (
+            "Returns the complete guide for building agent JSON graphs, including "
+            "block IDs, link structure, AgentInputBlock, AgentOutputBlock, "
+            "AgentExecutorBlock (for sub-agent composition), and MCPToolBlock usage. "
+            "Call this before generating agent JSON to ensure correct structure."
+        )

    @property
    def parameters(self) -> dict[str, Any]:
--- a/autogpt_platform/backend/backend/copilot/tools/get_doc_page.py
+++ b/autogpt_platform/backend/backend/copilot/tools/get_doc_page.py
@@ -25,7 +25,8 @@ class GetDocPageTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Read full documentation page content by path (from search_docs results)."
+            "Get the full content of a documentation page by its path. "
+            "Use this after search_docs to read the complete content of a relevant page."
        )

    @property
@@ -35,7 +36,10 @@ class GetDocPageTool(BaseTool):
            "properties": {
                "path": {
                    "type": "string",
-                    "description": "Doc file path (e.g. 'platform/block-sdk-guide.md').",
+                    "description": (
+                        "The path to the documentation file, as returned by search_docs. "
+                        "Example: 'platform/block-sdk-guide.md'"
+                    ),
                },
            },
            "required": ["path"],
--- a/autogpt_platform/backend/backend/copilot/tools/get_mcp_guide.py
+++ b/autogpt_platform/backend/backend/copilot/tools/get_mcp_guide.py
@@ -38,7 +38,11 @@ class GetMCPGuideTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Get MCP server URLs and auth guide."
+        return (
+            "Returns the MCP tool guide: known hosted server URLs (Notion, Linear, "
+            "Stripe, Intercom, Cloudflare, Atlassian) and authentication workflow. "
+            "Call before using run_mcp_tool if you need a server URL or auth info."
+        )

    @property
    def parameters(self) -> dict[str, Any]:
--- a/autogpt_platform/backend/backend/copilot/tools/manage_folders.py
+++ b/autogpt_platform/backend/backend/copilot/tools/manage_folders.py
@@ -88,7 +88,10 @@ class CreateFolderTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Create a library folder. Use parent_id to nest inside another folder."
+        return (
+            "Create a new folder in the user's library to organize agents. "
+            "Optionally nest it inside an existing folder using parent_id."
+        )

    @property
    def requires_auth(self) -> bool:
@@ -101,19 +104,22 @@ class CreateFolderTool(BaseTool):
            "properties": {
                "name": {
                    "type": "string",
-                    "description": "Folder name (max 100 chars).",
+                    "description": "Name for the new folder (max 100 chars).",
                },
                "parent_id": {
                    "type": "string",
-                    "description": "Parent folder ID (omit for root).",
+                    "description": (
+                        "ID of the parent folder to nest inside. "
+                        "Omit to create at root level."
+                    ),
                },
                "icon": {
                    "type": "string",
-                    "description": "Icon identifier.",
+                    "description": "Optional icon identifier for the folder.",
                },
                "color": {
                    "type": "string",
-                    "description": "Hex color (#RRGGBB).",
+                    "description": "Optional hex color code (#RRGGBB).",
                },
            },
            "required": ["name"],
@@ -169,8 +175,13 @@ class ListFoldersTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "List library folders. Omit parent_id for full tree. "
-            "Set include_agents=true when user asks about agents in folders."
+            "List the user's library folders. "
+            "Omit parent_id to get the full folder tree. "
+            "Provide parent_id to list only direct children of that folder. "
+            "Set include_agents=true to also return the agents inside each folder "
+            "and root-level agents not in any folder. Always set include_agents=true "
+            "when the user asks about agents, wants to see what's in their folders, "
+            "or mentions agents alongside folders."
        )

    @property
@@ -184,11 +195,17 @@ class ListFoldersTool(BaseTool):
            "properties": {
                "parent_id": {
                    "type": "string",
-                    "description": "List children of this folder (omit for full tree).",
+                    "description": (
+                        "List children of this folder. "
+                        "Omit to get the full folder tree."
+                    ),
                },
                "include_agents": {
                    "type": "boolean",
-                    "description": "Include agents in each folder (default: false).",
+                    "description": (
+                        "Whether to include the list of agents inside each folder. "
+                        "Defaults to false."
+                    ),
                },
            },
            "required": [],
@@ -340,7 +357,10 @@ class MoveFolderTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Move a folder. Set target_parent_id to null for root."
+        return (
+            "Move a folder to a different parent folder. "
+            "Set target_parent_id to null to move to root level."
+        )

    @property
    def requires_auth(self) -> bool:
@@ -353,11 +373,14 @@ class MoveFolderTool(BaseTool):
            "properties": {
                "folder_id": {
                    "type": "string",
-                    "description": "Folder ID.",
+                    "description": "ID of the folder to move.",
                },
                "target_parent_id": {
                    "type": ["string", "null"],
-                    "description": "New parent folder ID (null for root).",
+                    "description": (
+                        "ID of the new parent folder. "
+                        "Use null to move to root level."
+                    ),
                },
            },
            "required": ["folder_id"],
@@ -410,7 +433,10 @@ class DeleteFolderTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Delete a folder. Agents inside move to root (not deleted)."
+        return (
+            "Delete a folder from the user's library. "
+            "Agents inside the folder are moved to root level (not deleted)."
+        )

    @property
    def requires_auth(self) -> bool:
@@ -473,7 +499,10 @@ class MoveAgentsToFolderTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Move agents to a folder. Set folder_id to null for root."
+        return (
+            "Move one or more agents to a folder. "
+            "Set folder_id to null to move agents to root level."
+        )

    @property
    def requires_auth(self) -> bool:
@@ -487,11 +516,13 @@ class MoveAgentsToFolderTool(BaseTool):
                "agent_ids": {
                    "type": "array",
                    "items": {"type": "string"},
-                    "description": "Library agent IDs to move.",
+                    "description": "List of library agent IDs to move.",
                },
                "folder_id": {
                    "type": ["string", "null"],
-                    "description": "Target folder ID (null for root).",
+                    "description": (
+                        "Target folder ID. Use null to move to root level."
+                    ),
                },
            },
            "required": ["agent_ids"],
--- a/autogpt_platform/backend/backend/copilot/tools/run_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/run_agent.py
@@ -104,11 +104,19 @@ class RunAgentTool(BaseTool):

    @property
    def description(self) -> str:
-        return (
-            "Run or schedule an agent. Automatically checks inputs and credentials. "
-            "Identify by username_agent_slug ('user/agent') or library_agent_id. "
-            "For scheduling, provide schedule_name + cron."
-        )
+        return """Run or schedule an agent from the marketplace or user's library.
+
+        The tool automatically handles the setup flow:
+        - Returns missing inputs if required fields are not provided
+        - Returns missing credentials if user needs to configure them
+        - Executes immediately if all requirements are met
+        - Schedules execution if cron expression is provided
+
+        Identify the agent using either:
+        - username_agent_slug: Marketplace format 'username/agent-name'
+        - library_agent_id: ID of an agent in the user's library
+
+        For scheduled execution, provide: schedule_name, cron, and optionally timezone."""

    @property
    def parameters(self) -> dict[str, Any]:
@@ -117,36 +125,40 @@ class RunAgentTool(BaseTool):
            "properties": {
                "username_agent_slug": {
                    "type": "string",
-                    "description": "Marketplace format 'username/agent-name'.",
+                    "description": "Agent identifier in format 'username/agent-name'",
                },
                "library_agent_id": {
                    "type": "string",
-                    "description": "Library agent ID.",
+                    "description": "Library agent ID from user's library",
                },
                "inputs": {
                    "type": "object",
-                    "description": "Input values for the agent.",
+                    "description": "Input values for the agent",
                    "additionalProperties": True,
                },
                "use_defaults": {
                    "type": "boolean",
-                    "description": "Run with default values (confirm with user first).",
+                    "description": "Set to true to run with default values (user must confirm)",
                },
                "schedule_name": {
                    "type": "string",
-                    "description": "Name for scheduled execution.",
+                    "description": "Name for scheduled execution (triggers scheduling mode)",
                },
                "cron": {
                    "type": "string",
-                    "description": "Cron expression (min hour day month weekday).",
+                    "description": "Cron expression (5 fields: min hour day month weekday)",
                },
                "timezone": {
                    "type": "string",
-                    "description": "IANA timezone (default: UTC).",
+                    "description": "IANA timezone for schedule (default: UTC)",
                },
                "wait_for_result": {
                    "type": "integer",
-                    "description": "Max seconds to wait for completion (0-300).",
+                    "description": (
+                        "Max seconds to wait for execution to complete (0-300). "
+                        "If >0, blocks until the execution finishes or times out. "
+                        "Returns execution outputs when complete."
+                    ),
                },
            },
            "required": [],
--- a/autogpt_platform/backend/backend/copilot/tools/run_block.py
+++ b/autogpt_platform/backend/backend/copilot/tools/run_block.py
@@ -45,10 +45,13 @@ class RunBlockTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Execute a block. IMPORTANT: Always get block_id from find_block first "
-            "— do NOT guess or fabricate IDs. "
-            "Call with empty input_data to see schema, then with data to execute. "
-            "If review_required, use continue_run_block."
+            "Execute a specific block with the provided input data. "
+            "IMPORTANT: You MUST call find_block first to get the block's 'id' - "
+            "do NOT guess or make up block IDs. "
+            "On first attempt (without input_data), returns detailed schema showing "
+            "required inputs and outputs. Then call again with proper input_data to execute. "
+            "If a block requires human review, use continue_run_block with the "
+            "review_id after the user approves."
        )

    @property
@@ -58,14 +61,28 @@ class RunBlockTool(BaseTool):
            "properties": {
                "block_id": {
                    "type": "string",
-                    "description": "Block ID from find_block results.",
+                    "description": (
+                        "The block's 'id' field from find_block results. "
+                        "NEVER guess this - always get it from find_block first."
+                    ),
+                },
+                "block_name": {
+                    "type": "string",
+                    "description": (
+                        "The block's human-readable name from find_block results. "
+                        "Used for display purposes in the UI."
+                    ),
                },
                "input_data": {
                    "type": "object",
-                    "description": "Input values. Use {} first to see schema.",
+                    "description": (
+                        "Input values for the block. "
+                        "First call with empty {} to see the block's schema, "
+                        "then call again with proper values to execute."
+                    ),
                },
            },
-            "required": ["block_id", "input_data"],
+            "required": ["block_id", "block_name", "input_data"],
        }

    @property
--- a/autogpt_platform/backend/backend/copilot/tools/run_mcp_tool.py
+++ b/autogpt_platform/backend/backend/copilot/tools/run_mcp_tool.py
@@ -57,9 +57,10 @@ class RunMCPToolTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Discover and execute MCP server tools. "
-            "Call with server_url only to list tools, then with tool_name + tool_arguments to execute. "
-            "Call get_mcp_guide first for server URLs and auth."
+            "Connect to an MCP (Model Context Protocol) server to discover and execute its tools. "
+            "Two-step: (1) call with server_url to list available tools, "
+            "(2) call again with server_url + tool_name + tool_arguments to execute. "
+            "Call get_mcp_guide for known server URLs and auth details."
        )

    @property
@@ -69,15 +70,24 @@ class RunMCPToolTool(BaseTool):
            "properties": {
                "server_url": {
                    "type": "string",
-                    "description": "MCP server URL (Streamable HTTP endpoint).",
+                    "description": (
+                        "URL of the MCP server (Streamable HTTP endpoint), "
+                        "e.g. https://mcp.example.com/mcp"
+                    ),
                },
                "tool_name": {
                    "type": "string",
-                    "description": "Tool to execute. Omit to discover available tools.",
+                    "description": (
+                        "Name of the MCP tool to execute. "
+                        "Omit on first call to discover available tools."
+                    ),
                },
                "tool_arguments": {
                    "type": "object",
-                    "description": "Arguments matching the tool's input schema.",
+                    "description": (
+                        "Arguments to pass to the selected tool. "
+                        "Must match the tool's input schema returned during discovery."
+                    ),
                },
            },
            "required": ["server_url"],
--- a/autogpt_platform/backend/backend/copilot/tools/search_docs.py
+++ b/autogpt_platform/backend/backend/copilot/tools/search_docs.py
@@ -38,7 +38,11 @@ class SearchDocsTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Search platform documentation by keyword. Use get_doc_page to read full results."
+        return (
+            "Search the AutoGPT platform documentation for information about "
+            "how to use the platform, build agents, configure blocks, and more. "
+            "Returns relevant documentation sections. Use get_doc_page to read full content."
+        )

    @property
    def parameters(self) -> dict[str, Any]:
@@ -47,7 +51,10 @@ class SearchDocsTool(BaseTool):
            "properties": {
                "query": {
                    "type": "string",
-                    "description": "Documentation search query.",
+                    "description": (
+                        "Search query to find relevant documentation. "
+                        "Use natural language to describe what you're looking for."
+                    ),
                },
            },
            "required": ["query"],
--- a/autogpt_platform/backend/backend/copilot/tools/tool_schema_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/tool_schema_test.py
@@ -1,81 +0,0 @@
-"""Schema regression tests for all registered CoPilot tools.
-
-Validates that every tool in TOOL_REGISTRY produces a well-formed schema:
- description is non-empty
- all `required` fields exist in `properties`
- every property has a `type` and `description`
- total token budget does not regress past 8000 tokens
-"""
-
-import json
-
-import pytest
-import tiktoken
-
-from backend.copilot.tools import TOOL_REGISTRY
-
-_TOKEN_BUDGET = 8_000
-
-
-def _get_all_tool_schemas() -> list[tuple[str, object]]:
-    """Return (tool_name, openai_schema) pairs for every registered tool."""
-    return [(name, tool.as_openai_tool()) for name, tool in TOOL_REGISTRY.items()]
-
-
-_ALL_SCHEMAS = _get_all_tool_schemas()
-
-
-@pytest.mark.parametrize(
-    "tool_name,schema",
-    _ALL_SCHEMAS,
-    ids=[name for name, _ in _ALL_SCHEMAS],
-)
-class TestToolSchema:
-    """Validate schema invariants for every registered tool."""
-
-    def test_description_non_empty(self, tool_name: str, schema: dict) -> None:
-        desc = schema["function"].get("description", "")
-        assert desc, f"Tool '{tool_name}' has an empty description"
-
-    def test_required_fields_exist_in_properties(
-        self, tool_name: str, schema: dict
-    ) -> None:
-        params = schema["function"].get("parameters", {})
-        properties = params.get("properties", {})
-        required = params.get("required", [])
-        for field in required:
-            assert field in properties, (
-                f"Tool '{tool_name}': required field '{field}' "
-                f"not found in properties {list(properties.keys())}"
-            )
-
-    def test_every_property_has_type_and_description(
-        self, tool_name: str, schema: dict
-    ) -> None:
-        params = schema["function"].get("parameters", {})
-        properties = params.get("properties", {})
-        for prop_name, prop_def in properties.items():
-            assert (
-                "type" in prop_def
-            ), f"Tool '{tool_name}', property '{prop_name}' is missing 'type'"
-            assert (
-                "description" in prop_def
-            ), f"Tool '{tool_name}', property '{prop_name}' is missing 'description'"
-
-
-def test_total_schema_token_budget() -> None:
-    """Assert total tool schema size stays under the token budget.
-
-    This locks in the 34% token reduction from #12398 and prevents future
-    description bloat from eroding the gains. Budget is set to 8000 tokens.
-    Note: this measures tool JSON only (not the full system prompt); the actual
-    baseline for tool schemas alone is ~6470 tokens, giving ~19% headroom.
-    """
-    schemas = [tool.as_openai_tool() for tool in TOOL_REGISTRY.values()]
-    serialized = json.dumps(schemas)
-    enc = tiktoken.get_encoding("cl100k_base")
-    total_tokens = len(enc.encode(serialized))
-    assert total_tokens < _TOKEN_BUDGET, (
-        f"Tool schemas use {total_tokens} tokens, exceeding budget of {_TOKEN_BUDGET}. "
-        f"Description bloat detected — trim descriptions or raise the budget intentionally."
-    )
--- a/autogpt_platform/backend/backend/copilot/tools/validate_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/validate_agent.py
@@ -21,7 +21,19 @@ class ValidateAgentGraphTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Validate agent JSON for correctness (block_ids, links, types, schemas). On failure, use fix_agent_graph to auto-fix."
+        return (
+            "Validate an agent JSON graph for correctness. Checks:\n"
+            "- All block_ids reference real blocks\n"
+            "- All links reference valid source/sink nodes and fields\n"
+            "- Required input fields are wired or have defaults\n"
+            "- Data types are compatible across links\n"
+            "- Nested sink links use correct notation\n"
+            "- Prompt templates use proper curly brace escaping\n"
+            "- AgentExecutorBlock configurations are valid\n\n"
+            "Call this after generating agent JSON to verify correctness. "
+            "If validation fails, either fix issues manually based on the error "
+            "descriptions, or call fix_agent_graph to auto-fix common problems."
+        )

    @property
    def requires_auth(self) -> bool:
@@ -34,7 +46,11 @@ class ValidateAgentGraphTool(BaseTool):
            "properties": {
                "agent_json": {
                    "type": "object",
-                    "description": "Agent JSON with 'nodes' and 'links' arrays.",
+                    "description": (
+                        "The agent JSON to validate. Must contain 'nodes' and 'links' arrays. "
+                        "Each node needs: id (UUID), block_id, input_default, metadata. "
+                        "Each link needs: id (UUID), source_id, source_name, sink_id, sink_name."
+                    ),
                },
            },
            "required": ["agent_json"],
--- a/autogpt_platform/backend/backend/copilot/tools/web_fetch.py
+++ b/autogpt_platform/backend/backend/copilot/tools/web_fetch.py
@@ -59,7 +59,13 @@ class WebFetchTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Fetch a public web page. Public URLs only — internal addresses blocked. Returns readable text from HTML by default."
+        return (
+            "Fetch the content of a public web page by URL. "
+            "Returns readable text extracted from HTML by default. "
+            "Useful for reading documentation, articles, and API responses. "
+            "Only supports HTTP/HTTPS GET requests to public URLs "
+            "(private/internal network addresses are blocked)."
+        )

    @property
    def parameters(self) -> dict[str, Any]:
@@ -68,11 +74,14 @@ class WebFetchTool(BaseTool):
            "properties": {
                "url": {
                    "type": "string",
-                    "description": "Public HTTP/HTTPS URL.",
+                    "description": "The public HTTP/HTTPS URL to fetch.",
                },
                "extract_text": {
                    "type": "boolean",
-                    "description": "Extract text from HTML (default: true).",
+                    "description": (
+                        "If true (default), extract readable text from HTML. "
+                        "If false, return raw content."
+                    ),
                    "default": True,
                },
            },
--- a/autogpt_platform/backend/backend/copilot/tools/workspace_files.py
+++ b/autogpt_platform/backend/backend/copilot/tools/workspace_files.py
@@ -2,6 +2,7 @@

 import base64
 import logging
+import mimetypes
 import os
 from typing import Any, Optional

@@ -10,7 +11,9 @@ from pydantic import BaseModel
 from backend.copilot.context import (
    E2B_WORKDIR,
    get_current_sandbox,
+    get_sdk_cwd,
    get_workspace_manager,
+    is_allowed_local_path,
    resolve_sandbox_path,
 )
 from backend.copilot.model import ChatSession
@@ -24,6 +27,10 @@ from .models import ErrorResponse, ResponseType, ToolResponseBase

 logger = logging.getLogger(__name__)

+# Sentinel file_id used when a tool-result file is read directly from the local
+# host filesystem (rather than from workspace storage).
+_LOCAL_TOOL_RESULT_FILE_ID = "local"
+

 async def _resolve_write_content(
    content_text: str | None,
@@ -275,6 +282,93 @@ class WorkspaceFileContentResponse(ToolResponseBase):
    content_base64: str


+_MAX_LOCAL_TOOL_RESULT_BYTES = 10 * 1024 * 1024  # 10 MB
+
+
+def _read_local_tool_result(
+    path: str,
+    char_offset: int,
+    char_length: Optional[int],
+    session_id: str,
+    sdk_cwd: str | None = None,
+) -> ToolResponseBase:
+    """Read an SDK tool-result file from local disk.
+
+    This is a fallback for when the model mistakenly calls
+    ``read_workspace_file`` with an SDK tool-result path that only exists on
+    the host filesystem, not in cloud workspace storage.
+
+    Defence-in-depth: validates *path* via :func:`is_allowed_local_path`
+    regardless of what the caller has already checked.
+    """
+    # TOCTOU: path validated then opened separately. Acceptable because
+    # the tool-results directory is server-controlled, not user-writable.
+    expanded = os.path.realpath(os.path.expanduser(path))
+    # Defence-in-depth: re-check with resolved path (caller checked raw path).
+    if not is_allowed_local_path(expanded, sdk_cwd or get_sdk_cwd()):
+        return ErrorResponse(
+            message=f"Path not allowed: {os.path.basename(path)}", session_id=session_id
+        )
+    try:
+        # The 10 MB cap (_MAX_LOCAL_TOOL_RESULT_BYTES) bounds memory usage.
+        # Pre-read size check prevents loading files far above the cap;
+        # the remaining TOCTOU gap is acceptable for server-controlled paths.
+        file_size = os.path.getsize(expanded)
+        if file_size > _MAX_LOCAL_TOOL_RESULT_BYTES:
+            return ErrorResponse(
+                message=(f"File too large: {os.path.basename(path)}"),
+                session_id=session_id,
+            )
+
+        # Detect binary files: try strict UTF-8 first, fall back to
+        # base64-encoding the raw bytes for binary content.
+        with open(expanded, "rb") as fh:
+            raw = fh.read()
+        try:
+            text_content = raw.decode("utf-8")
+        except UnicodeDecodeError:
+            # Binary file — return raw base64, ignore char_offset/char_length
+            return WorkspaceFileContentResponse(
+                file_id=_LOCAL_TOOL_RESULT_FILE_ID,
+                name=os.path.basename(path),
+                path=path,
+                mime_type=mimetypes.guess_type(path)[0] or "application/octet-stream",
+                content_base64=base64.b64encode(raw).decode("ascii"),
+                message=(
+                    f"Read {file_size:,} bytes (binary) from local tool-result "
+                    f"{os.path.basename(path)}"
+                ),
+                session_id=session_id,
+            )
+
+        end = (
+            char_offset + char_length if char_length is not None else len(text_content)
+        )
+        slice_text = text_content[char_offset:end]
+    except FileNotFoundError:
+        return ErrorResponse(
+            message=f"File not found: {os.path.basename(path)}", session_id=session_id
+        )
+    except Exception as exc:
+        return ErrorResponse(
+            message=f"Error reading file: {type(exc).__name__}", session_id=session_id
+        )
+
+    return WorkspaceFileContentResponse(
+        file_id=_LOCAL_TOOL_RESULT_FILE_ID,
+        name=os.path.basename(path),
+        path=path,
+        mime_type=mimetypes.guess_type(path)[0] or "text/plain",
+        content_base64=base64.b64encode(slice_text.encode("utf-8")).decode("ascii"),
+        message=(
+            f"Read chars {char_offset}\u2013{char_offset + len(slice_text)} "
+            f"of {len(text_content):,} chars from local tool-result "
+            f"{os.path.basename(path)}"
+        ),
+        session_id=session_id,
+    )
+
+
 class WorkspaceFileMetadataResponse(ToolResponseBase):
    """Response containing workspace file metadata and download URL (prevents context bloat)."""

@@ -321,7 +415,13 @@ class ListWorkspaceFilesTool(BaseTool):

    @property
    def description(self) -> str:
-        return "List persistent workspace files. For ephemeral session files, use SDK Glob/Read instead. Optionally filter by path prefix."
+        return (
+            "List files in the user's persistent workspace (cloud storage). "
+            "These files survive across sessions. "
+            "For ephemeral session files, use the SDK Read/Glob tools instead. "
+            "Returns file names, paths, sizes, and metadata. "
+            "Optionally filter by path prefix."
+        )

    @property
    def parameters(self) -> dict[str, Any]:
@@ -330,17 +430,24 @@ class ListWorkspaceFilesTool(BaseTool):
            "properties": {
                "path_prefix": {
                    "type": "string",
-                    "description": "Filter by path prefix (e.g. '/documents/').",
+                    "description": (
+                        "Optional path prefix to filter files "
+                        "(e.g., '/documents/' to list only files in documents folder). "
+                        "By default, only files from the current session are listed."
+                    ),
                },
                "limit": {
                    "type": "integer",
-                    "description": "Max files to return (default 50, max 100).",
+                    "description": "Maximum number of files to return (default 50, max 100)",
                    "minimum": 1,
                    "maximum": 100,
                },
                "include_all_sessions": {
                    "type": "boolean",
-                    "description": "Include files from all sessions (default: false).",
+                    "description": (
+                        "If true, list files from all sessions. "
+                        "Default is false (only current session's files)."
+                    ),
                },
            },
            "required": [],
@@ -423,10 +530,18 @@ class ReadWorkspaceFileTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Read a file from persistent workspace. Specify file_id or path. "
-            "Small text/image files return inline; large/binary return metadata+URL. "
-            "Use save_to_path to copy to working dir for processing. "
-            "Use offset/length for paginated reads."
+            "Read a file from the user's persistent workspace (cloud storage). "
+            "These files survive across sessions. "
+            "For ephemeral session files, use the SDK Read tool instead. "
+            "Specify either file_id or path to identify the file. "
+            "For small text files, returns content directly. "
+            "For large or binary files, returns metadata and a download URL. "
+            "Use 'save_to_path' to copy the file to the working directory "
+            "(sandbox or ephemeral) for processing with bash_exec or file tools. "
+            "Use 'offset' and 'length' for paginated reads of large files "
+            "(e.g., persisted tool outputs). "
+            "Paths are scoped to the current session by default. "
+            "Use /sessions/<session_id>/... for cross-session access."
        )

    @property
@@ -436,30 +551,48 @@ class ReadWorkspaceFileTool(BaseTool):
            "properties": {
                "file_id": {
                    "type": "string",
-                    "description": "File ID from list_workspace_files.",
+                    "description": "The file's unique ID (from list_workspace_files)",
                },
                "path": {
                    "type": "string",
-                    "description": "Virtual file path (e.g. '/documents/report.pdf').",
+                    "description": (
+                        "The virtual file path (e.g., '/documents/report.pdf'). "
+                        "Scoped to current session by default."
+                    ),
                },
                "save_to_path": {
                    "type": "string",
-                    "description": "Copy file to this working directory path for processing.",
+                    "description": (
+                        "If provided, save the file to this path in the working "
+                        "directory (cloud sandbox when E2B is active, or "
+                        "ephemeral dir otherwise) so it can be processed with "
+                        "bash_exec or file tools. "
+                        "The file content is still returned in the response."
+                    ),
                },
                "force_download_url": {
                    "type": "boolean",
-                    "description": "Always return metadata+URL instead of inline content.",
+                    "description": (
+                        "If true, always return metadata+URL instead of inline content. "
+                        "Default is false (auto-selects based on file size/type)."
+                    ),
                },
                "offset": {
                    "type": "integer",
-                    "description": "Character offset for paginated reads (0-based).",
+                    "description": (
+                        "Character offset to start reading from (0-based). "
+                        "Use with 'length' for paginated reads of large files."
+                    ),
                },
                "length": {
                    "type": "integer",
-                    "description": "Max characters to return for paginated reads.",
+                    "description": (
+                        "Maximum number of characters to return. "
+                        "Defaults to full file. Use with 'offset' for paginated reads."
+                    ),
                },
            },
-            "required": [],  # At least one of file_id or path must be provided
+            "required": [],  # At least one must be provided
        }

    @property
@@ -494,6 +627,14 @@ class ReadWorkspaceFileTool(BaseTool):
            manager = await get_workspace_manager(user_id, session_id)
            resolved = await _resolve_file(manager, file_id, path, session_id)
            if isinstance(resolved, ErrorResponse):
+                # Fallback: if the path is an SDK tool-result on local disk,
+                # read it directly instead of failing.  The model sometimes
+                # calls read_workspace_file for these paths by mistake.
+                sdk_cwd = get_sdk_cwd()
+                if path and is_allowed_local_path(path, sdk_cwd):
+                    return _read_local_tool_result(
+                        path, char_offset, char_length, session_id, sdk_cwd=sdk_cwd
+                    )
                return resolved
            target_file_id, file_info = resolved

@@ -614,9 +755,15 @@ class WriteWorkspaceFileTool(BaseTool):
    @property
    def description(self) -> str:
        return (
-            "Write a file to persistent workspace (survives across sessions). "
-            "Provide exactly one of: content (text), content_base64 (binary), "
-            f"or source_path (copy from working dir). Max {Config().max_file_size_mb}MB."
+            "Write or create a file in the user's persistent workspace (cloud storage). "
+            "These files survive across sessions. "
+            "For ephemeral session files, use the SDK Write tool instead. "
+            "Provide content as plain text via 'content', OR base64-encoded via "
+            "'content_base64', OR copy a file from the ephemeral working directory "
+            "via 'source_path'. Exactly one of these three is required. "
+            f"Maximum file size is {Config().max_file_size_mb}MB. "
+            "Files are saved to the current session's folder by default. "
+            "Use /sessions/<session_id>/... for cross-session access."
        )

    @property
@@ -626,31 +773,51 @@ class WriteWorkspaceFileTool(BaseTool):
            "properties": {
                "filename": {
                    "type": "string",
-                    "description": "Filename (e.g. 'report.pdf').",
+                    "description": "Name for the file (e.g., 'report.pdf')",
                },
                "content": {
                    "type": "string",
-                    "description": "Plain text content. Mutually exclusive with content_base64/source_path.",
+                    "description": (
+                        "Plain text content to write. Use this for text files "
+                        "(code, configs, documents, etc.). "
+                        "Mutually exclusive with content_base64 and source_path."
+                    ),
                },
                "content_base64": {
                    "type": "string",
-                    "description": "Base64-encoded binary content. Mutually exclusive with content/source_path.",
+                    "description": (
+                        "Base64-encoded file content. Use this for binary files "
+                        "(images, PDFs, etc.). "
+                        "Mutually exclusive with content and source_path."
+                    ),
                },
                "source_path": {
                    "type": "string",
-                    "description": "Working directory path to copy to workspace. Mutually exclusive with content/content_base64.",
+                    "description": (
+                        "Path to a file in the ephemeral working directory to "
+                        "copy to workspace (e.g., '/tmp/copilot-.../output.csv'). "
+                        "Use this to persist files created by bash_exec or SDK Write. "
+                        "Mutually exclusive with content and content_base64."
+                    ),
                },
                "path": {
                    "type": "string",
-                    "description": "Virtual path (e.g. '/documents/report.pdf'). Defaults to '/{filename}'.",
+                    "description": (
+                        "Optional virtual path where to save the file "
+                        "(e.g., '/documents/report.pdf'). "
+                        "Defaults to '/{filename}'. Scoped to current session."
+                    ),
                },
                "mime_type": {
                    "type": "string",
-                    "description": "MIME type. Auto-detected from filename if omitted.",
+                    "description": (
+                        "Optional MIME type of the file. "
+                        "Auto-detected from filename if not provided."
+                    ),
                },
                "overwrite": {
                    "type": "boolean",
-                    "description": "Overwrite if file exists (default: false).",
+                    "description": "Whether to overwrite if file exists at path (default: false)",
                },
            },
            "required": ["filename"],
@@ -777,7 +944,12 @@ class DeleteWorkspaceFileTool(BaseTool):

    @property
    def description(self) -> str:
-        return "Delete a file from persistent workspace. Specify file_id or path."
+        return (
+            "Delete a file from the user's persistent workspace (cloud storage). "
+            "Specify either file_id or path to identify the file. "
+            "Paths are scoped to the current session by default. "
+            "Use /sessions/<session_id>/... for cross-session access."
+        )

    @property
    def parameters(self) -> dict[str, Any]:
@@ -786,14 +958,17 @@ class DeleteWorkspaceFileTool(BaseTool):
            "properties": {
                "file_id": {
                    "type": "string",
-                    "description": "File ID from list_workspace_files.",
+                    "description": "The file's unique ID (from list_workspace_files)",
                },
                "path": {
                    "type": "string",
-                    "description": "Virtual file path.",
+                    "description": (
+                        "The virtual file path (e.g., '/documents/report.pdf'). "
+                        "Scoped to current session by default."
+                    ),
                },
            },
-            "required": [],  # At least one of file_id or path must be provided
+            "required": [],  # At least one must be provided
        }

    @property
--- a/autogpt_platform/backend/backend/copilot/tools/workspace_files_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/workspace_files_test.py
@@ -2,18 +2,25 @@

 import base64
 import os
+import shutil
+from unittest.mock import AsyncMock, patch

 import pytest

+from backend.copilot.context import SDK_PROJECTS_DIR, _current_project_dir
 from backend.copilot.tools._test_data import make_session, setup_test_data
+from backend.copilot.tools.models import ErrorResponse
 from backend.copilot.tools.workspace_files import (
+    _MAX_LOCAL_TOOL_RESULT_BYTES,
    DeleteWorkspaceFileTool,
    ListWorkspaceFilesTool,
    ReadWorkspaceFileTool,
    WorkspaceDeleteResponse,
+    WorkspaceFileContentResponse,
    WorkspaceFileListResponse,
    WorkspaceWriteResponse,
    WriteWorkspaceFileTool,
+    _read_local_tool_result,
    _resolve_write_content,
    _validate_ephemeral_path,
 )
@@ -325,3 +332,294 @@ async def test_write_workspace_file_source_path(setup_test_data):
    await delete_tool._execute(
        user_id=user.id, session=session, file_id=write_resp.file_id
    )
+
+
+# ---------------------------------------------------------------------------
+# _read_local_tool_result — local disk fallback for SDK tool-result files
+# ---------------------------------------------------------------------------
+
+_CONV_UUID = "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
+
+
+class TestReadLocalToolResult:
+    """Tests for _read_local_tool_result (local disk fallback)."""
+
+    def _make_tool_result(self, encoded: str, filename: str, content: bytes) -> str:
+        """Create a tool-results file and return its path."""
+        tool_dir = os.path.join(SDK_PROJECTS_DIR, encoded, _CONV_UUID, "tool-results")
+        os.makedirs(tool_dir, exist_ok=True)
+        filepath = os.path.join(tool_dir, filename)
+        with open(filepath, "wb") as f:
+            f.write(content)
+        return filepath
+
+    def _cleanup(self, encoded: str) -> None:
+        shutil.rmtree(os.path.join(SDK_PROJECTS_DIR, encoded), ignore_errors=True)
+
+    def test_read_text_file(self):
+        """Read a UTF-8 text tool-result file."""
+        encoded = "-tmp-copilot-local-read-text"
+        path = self._make_tool_result(encoded, "output.txt", b"hello world")
+        token = _current_project_dir.set(encoded)
+        try:
+            result = _read_local_tool_result(path, 0, None, "s1")
+            assert isinstance(result, WorkspaceFileContentResponse)
+            decoded = base64.b64decode(result.content_base64).decode("utf-8")
+            assert decoded == "hello world"
+            assert "text/plain" in result.mime_type
+        finally:
+            _current_project_dir.reset(token)
+            self._cleanup(encoded)
+
+    def test_read_text_with_offset(self):
+        """Read a slice of a text file using char_offset and char_length."""
+        encoded = "-tmp-copilot-local-read-offset"
+        path = self._make_tool_result(encoded, "data.txt", b"ABCDEFGHIJ")
+        token = _current_project_dir.set(encoded)
+        try:
+            result = _read_local_tool_result(path, 3, 4, "s1")
+            assert isinstance(result, WorkspaceFileContentResponse)
+            decoded = base64.b64decode(result.content_base64).decode("utf-8")
+            assert decoded == "DEFG"
+        finally:
+            _current_project_dir.reset(token)
+            self._cleanup(encoded)
+
+    def test_read_binary_file(self):
+        """Binary files are returned as raw base64."""
+        encoded = "-tmp-copilot-local-read-binary"
+        binary_data = bytes(range(256))
+        path = self._make_tool_result(encoded, "image.png", binary_data)
+        token = _current_project_dir.set(encoded)
+        try:
+            result = _read_local_tool_result(path, 0, None, "s1")
+            assert isinstance(result, WorkspaceFileContentResponse)
+            decoded = base64.b64decode(result.content_base64)
+            assert decoded == binary_data
+            assert "binary" in result.message
+        finally:
+            _current_project_dir.reset(token)
+            self._cleanup(encoded)
+
+    def test_disallowed_path_rejected(self):
+        """Paths not under allowed directories are rejected."""
+        result = _read_local_tool_result("/etc/passwd", 0, None, "s1")
+        assert isinstance(result, ErrorResponse)
+        assert "not allowed" in result.message.lower()
+
+    def test_file_not_found(self):
+        """Missing files return an error."""
+        encoded = "-tmp-copilot-local-read-missing"
+        tool_dir = os.path.join(SDK_PROJECTS_DIR, encoded, _CONV_UUID, "tool-results")
+        os.makedirs(tool_dir, exist_ok=True)
+        path = os.path.join(tool_dir, "nope.txt")
+        token = _current_project_dir.set(encoded)
+        try:
+            result = _read_local_tool_result(path, 0, None, "s1")
+            assert isinstance(result, ErrorResponse)
+            assert "not found" in result.message.lower()
+        finally:
+            _current_project_dir.reset(token)
+            self._cleanup(encoded)
+
+    def test_file_too_large(self, monkeypatch):
+        """Files exceeding the size limit are rejected."""
+        encoded = "-tmp-copilot-local-read-large"
+        # Create a small file but fake os.path.getsize to return a huge value
+        path = self._make_tool_result(encoded, "big.txt", b"small")
+        token = _current_project_dir.set(encoded)
+        monkeypatch.setattr(
+            "os.path.getsize", lambda _: _MAX_LOCAL_TOOL_RESULT_BYTES + 1
+        )
+        try:
+            result = _read_local_tool_result(path, 0, None, "s1")
+            assert isinstance(result, ErrorResponse)
+            assert "too large" in result.message.lower()
+        finally:
+            _current_project_dir.reset(token)
+            self._cleanup(encoded)
+
+    def test_offset_beyond_file_length(self):
+        """Offset past end-of-file returns empty content."""
+        encoded = "-tmp-copilot-local-read-past-eof"
+        path = self._make_tool_result(encoded, "short.txt", b"abc")
+        token = _current_project_dir.set(encoded)
+        try:
+            result = _read_local_tool_result(path, 999, 10, "s1")
+            assert isinstance(result, WorkspaceFileContentResponse)
+            decoded = base64.b64decode(result.content_base64).decode("utf-8")
+            assert decoded == ""
+        finally:
+            _current_project_dir.reset(token)
+            self._cleanup(encoded)
+
+    def test_zero_length_read(self):
+        """Requesting zero characters returns empty content."""
+        encoded = "-tmp-copilot-local-read-zero-len"
+        path = self._make_tool_result(encoded, "data.txt", b"ABCDEF")
+        token = _current_project_dir.set(encoded)
+        try:
+            result = _read_local_tool_result(path, 2, 0, "s1")
+            assert isinstance(result, WorkspaceFileContentResponse)
+            decoded = base64.b64decode(result.content_base64).decode("utf-8")
+            assert decoded == ""
+        finally:
+            _current_project_dir.reset(token)
+            self._cleanup(encoded)
+
+    def test_mime_type_from_json_extension(self):
+        """JSON files get application/json MIME type, not hardcoded text/plain."""
+        encoded = "-tmp-copilot-local-read-json"
+        path = self._make_tool_result(encoded, "result.json", b'{"key": "value"}')
+        token = _current_project_dir.set(encoded)
+        try:
+            result = _read_local_tool_result(path, 0, None, "s1")
+            assert isinstance(result, WorkspaceFileContentResponse)
+            assert result.mime_type == "application/json"
+        finally:
+            _current_project_dir.reset(token)
+            self._cleanup(encoded)
+
+    def test_mime_type_from_png_extension(self):
+        """Binary .png files get image/png MIME type via mimetypes."""
+        encoded = "-tmp-copilot-local-read-png-mime"
+        binary_data = bytes(range(256))
+        path = self._make_tool_result(encoded, "chart.png", binary_data)
+        token = _current_project_dir.set(encoded)
+        try:
+            result = _read_local_tool_result(path, 0, None, "s1")
+            assert isinstance(result, WorkspaceFileContentResponse)
+            assert result.mime_type == "image/png"
+        finally:
+            _current_project_dir.reset(token)
+            self._cleanup(encoded)
+
+    def test_explicit_sdk_cwd_parameter(self):
+        """The sdk_cwd parameter overrides get_sdk_cwd() for path validation."""
+        encoded = "-tmp-copilot-local-read-sdkcwd"
+        path = self._make_tool_result(encoded, "out.txt", b"content")
+        token = _current_project_dir.set(encoded)
+        try:
+            # Pass sdk_cwd explicitly — should still succeed because the path
+            # is under SDK_PROJECTS_DIR which is always allowed.
+            result = _read_local_tool_result(
+                path, 0, None, "s1", sdk_cwd="/tmp/copilot-test"
+            )
+            assert isinstance(result, WorkspaceFileContentResponse)
+            decoded = base64.b64decode(result.content_base64).decode("utf-8")
+            assert decoded == "content"
+        finally:
+            _current_project_dir.reset(token)
+            self._cleanup(encoded)
+
+    def test_offset_with_no_length_reads_to_end(self):
+        """When char_length is None, read from offset to end of file."""
+        encoded = "-tmp-copilot-local-read-offset-noLen"
+        path = self._make_tool_result(encoded, "data.txt", b"0123456789")
+        token = _current_project_dir.set(encoded)
+        try:
+            result = _read_local_tool_result(path, 5, None, "s1")
+            assert isinstance(result, WorkspaceFileContentResponse)
+            decoded = base64.b64decode(result.content_base64).decode("utf-8")
+            assert decoded == "56789"
+        finally:
+            _current_project_dir.reset(token)
+            self._cleanup(encoded)
+
+
+# ---------------------------------------------------------------------------
+# ReadWorkspaceFileTool fallback to _read_local_tool_result
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_read_workspace_file_falls_back_to_local_tool_result(setup_test_data):
+    """When _resolve_file returns ErrorResponse for an allowed local path,
+    ReadWorkspaceFileTool should fall back to _read_local_tool_result."""
+    user = setup_test_data["user"]
+    session = make_session(user.id)
+
+    # Create a real tool-result file on disk so the fallback can read it.
+    encoded = "-tmp-copilot-fallback-test"
+    conv_uuid = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
+    tool_dir = os.path.join(SDK_PROJECTS_DIR, encoded, conv_uuid, "tool-results")
+    os.makedirs(tool_dir, exist_ok=True)
+    filepath = os.path.join(tool_dir, "result.txt")
+    with open(filepath, "w") as f:
+        f.write("fallback content")
+
+    token = _current_project_dir.set(encoded)
+    try:
+        # Mock _resolve_file to return an ErrorResponse (simulating "file not
+        # found in workspace") so the fallback branch is exercised.
+        mock_resolve = AsyncMock(
+            return_value=ErrorResponse(
+                message="File not found at path: result.txt",
+                session_id=session.session_id,
+            )
+        )
+        with patch("backend.copilot.tools.workspace_files._resolve_file", mock_resolve):
+            read_tool = ReadWorkspaceFileTool()
+            result = await read_tool._execute(
+                user_id=user.id,
+                session=session,
+                path=filepath,
+            )
+
+        # Should have fallen back to _read_local_tool_result and succeeded.
+        assert isinstance(result, WorkspaceFileContentResponse), (
+            f"Expected fallback to local read, got {type(result).__name__}: "
+            f"{getattr(result, 'message', '')}"
+        )
+        decoded = base64.b64decode(result.content_base64).decode("utf-8")
+        assert decoded == "fallback content"
+        mock_resolve.assert_awaited_once()
+    finally:
+        _current_project_dir.reset(token)
+        shutil.rmtree(os.path.join(SDK_PROJECTS_DIR, encoded), ignore_errors=True)
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_read_workspace_file_no_fallback_when_resolve_succeeds(setup_test_data):
+    """When _resolve_file succeeds, the local-disk fallback must NOT be invoked."""
+    user = setup_test_data["user"]
+    session = make_session(user.id)
+
+    fake_file_id = "fake-file-id-001"
+    fake_content = b"workspace content"
+
+    # Build a minimal file_info stub that the tool's happy-path needs.
+    class _FakeFileInfo:
+        id = fake_file_id
+        name = "result.json"
+        path = "/result.json"
+        mime_type = "text/plain"
+        size_bytes = len(fake_content)
+
+    mock_resolve = AsyncMock(return_value=(fake_file_id, _FakeFileInfo()))
+
+    mock_manager = AsyncMock()
+    mock_manager.read_file_by_id = AsyncMock(return_value=fake_content)
+
+    with (
+        patch("backend.copilot.tools.workspace_files._resolve_file", mock_resolve),
+        patch(
+            "backend.copilot.tools.workspace_files.get_workspace_manager",
+            AsyncMock(return_value=mock_manager),
+        ),
+        patch(
+            "backend.copilot.tools.workspace_files._read_local_tool_result"
+        ) as patched_local,
+    ):
+        read_tool = ReadWorkspaceFileTool()
+        result = await read_tool._execute(
+            user_id=user.id,
+            session=session,
+            file_id=fake_file_id,
+        )
+
+    # Fallback must not have been called.
+    patched_local.assert_not_called()
+    # Normal workspace path must have produced a content response.
+    assert isinstance(result, WorkspaceFileContentResponse)
+    assert base64.b64decode(result.content_base64) == fake_content
Author	SHA1	Message	Date
Zamil Majdy	885459c6e1	fix(backend/copilot): respect CLAUDE_CONFIG_DIR in SDK_PROJECTS_DIR constant SDK_PROJECTS_DIR was hardcoded to ~/.claude/projects, ignoring the CLAUDE_CONFIG_DIR environment variable. This caused path validation mismatches in environments with custom Claude configurations. Now consistent with transcript.py's _projects_base() function.	2026-03-17 13:49:45 +07:00
Zamil Majdy	38ff768a65	fix(backend/copilot): update test mock to use get_workspace_manager The dev branch renamed get_manager to get_workspace_manager but the test was still patching the old name, causing an AttributeError.	2026-03-17 12:00:34 +07:00
Zamil Majdy	76dbf3bbec	Merge origin/dev into fix/copilot-tool-result-read Resolve import conflict in workspace_files.py: keep both get_workspace_manager (from dev) and get_sdk_cwd/is_allowed_local_path (from this PR).	2026-03-17 07:11:15 +07:00
Zamil Majdy	c0ade4be68	fix(copilot): patch mock at service module level after top-level import refactor The test was patching backend.copilot.sdk.transcript.cleanup_stale_project_dirs but service.py now imports it at module level, creating its own binding. Patch the symbol at the call site (service module) instead.	2026-03-16 06:27:43 +07:00
Zamil Majdy	24cbe738ff	fix(copilot): address review items — top-level import, path sanitization, E2B_WORKDIR constant, st_mtime comment, no-fallback test - Move `cleanup_stale_project_dirs` from deferred import inside `_cleanup_sdk_tool_results` to the top-level `from .transcript import (...)` block - Sanitize `FileNotFoundError` message in `_read_local_tool_result` to use `os.path.basename(path)` instead of leaking the full path - Replace hardcoded `/home/user` strings in `e2b_file_tools_test.py` with the `E2B_WORKDIR` constant - Add `st_mtime` write-once invariant comment to `cleanup_stale_project_dirs` explaining why mtime reliably signals session activity - Add test asserting the local-disk fallback is NOT invoked when `_resolve_file` succeeds	2026-03-16 06:15:59 +07:00
Zamil Majdy	49e031b54d	fix(copilot): use _LOCAL_TOOL_RESULT_FILE_ID constant for text path in _read_local_tool_result Replace remaining hardcoded "local" string with the named constant _LOCAL_TOOL_RESULT_FILE_ID in the text-file return path of _read_local_tool_result, completing the previous fix that only updated the binary-file return path.	2026-03-15 22:55:59 +07:00
Zamil Majdy	4b6c2a1323	fix(backend/copilot): scope stale project-dir sweep to current session and expose encode_cwd_for_cli Addresses multi-tenant safety concern: cleanup_stale_project_dirs now accepts an optional encoded_cwd parameter that limits the sweep to just the current session's directory instead of all ~/.claude/projects/ entries. Exposes encode_cwd_for_cli as a public function from context.py and passes the encoded cwd from _cleanup_sdk_tool_results. Adds three new tests covering scoped sweep behaviour.	2026-03-15 22:44:48 +07:00
Zamil Majdy	c854c1a485	fix(copilot): address review items — symlink check for edit_file, local constant, sort removal, and unit tests - Extend _check_sandbox_symlink_escape to _handle_edit_file for consistency - Define _LOCAL_TOOL_RESULT_FILE_ID constant, replacing magic string "local" - Replace sorted(Path.iterdir()) with plain iterdir() in cleanup_stale_project_dirs - Add TestCheckSandboxSymlinkEscape unit tests (7 cases) in e2b_file_tools_test.py - Add TestCleanupSdkToolResults unit tests (4 cases) in service_test.py covering rate-limiting and path rejection	2026-03-15 22:20:29 +07:00
Zamil Majdy	53a2c84796	fix(backend/copilot): add tests for local tool result reading and stale dir sweep	2026-03-15 04:10:05 +07:00
Zamil Majdy	3063ce22ac	fix(copilot): add inline comments for timeout rationale and sweep safety Address remaining PR review comments: - Document 2.0s timeout reasoning at call site (was 0.5s, caused frequent timeouts under load) - Document sleep(0) yield purpose after successful stash wait - Clarify multi-tenant safety of sweep in docstring (12h TTL + pattern match ensures active sessions are never affected)	2026-03-14 23:44:25 +07:00
Zamil Majdy	69db0815c3	fix(backend/copilot): add defence-in-depth realpath check in is_allowed_local_path Resolve project_dir via os.path.realpath and validate it stays within SDK_PROJECTS_DIR before checking the resolved path. Guards against potential future bugs in _encode_cwd_for_cli, matching the pattern already used in transcript.py.	2026-03-14 23:42:13 +07:00
Zamil Majdy	775ed85bba	fix(backend/copilot): sanitize error paths, add cleanup sweep, and harden file handling	2026-03-14 23:40:19 +07:00
Zamil Majdy	f07bb52ac3	fix: correct tool name guidance, UUID comment, and docstring path - Support both read_file (E2B) and Read (non-E2B) in prompt guidance - Fix UUID comment from "UUID-v4" to "UUID" (regex accepts all versions) - Update security_hooks docstring to include UUID segment in path	2026-03-14 22:28:12 +07:00
Zamil Majdy	84482071a8	Merge remote-tracking branch 'origin/dev' into fix/copilot-tool-result-read	2026-03-14 22:11:16 +07:00
Zamil Majdy	9ac01a0cf6	fix(backend/copilot): harden tool-result reads, add disk sweep, remove dead code - _read_local_tool_result: detect binary files (return raw base64 instead of corrupting with errors="replace"), add 10 MB size limit, move getsize inside try block, use consistent char units in messages - Add cleanup_stale_project_dirs() to sweep CLI project dirs older than 6h, preventing unbounded disk growth from per-turn directory creation - Add re.IGNORECASE to _UUID_RE for robust UUID matching - Add TOCTOU acknowledgment to _check_sandbox_symlink_escape docstring - Clarify transcript_path sanitization comment in security_hooks.py - Remove dead code: read_cli_session_file, cleanup_cli_project_dir, _cli_project_dir, _safe_glob_jsonl (no remaining callers after cleanup changes) - Add tests: TestReadLocalToolResult (6 cases), TestCleanupStaleProjectDirs (2 cases)	2026-03-14 22:09:59 +07:00
Zamil Majdy	71337c0514	Merge origin/dev into fix/copilot-tool-result-read Resolve conflict in transcript.py by accepting new functions from dev (_projects_base, _cli_project_dir, _safe_glob_jsonl, read_compacted_entries, read_cli_session_file, cleanup_cli_project_dir).	2026-03-14 10:21:54 +07:00
Zamil Majdy	b2808f223a	fix(backend/copilot): address review comments — text seek bug, symlink helper, cleanup simplification - Fix invalid fh.seek() on text-mode file in _read_local_tool_result by reading full content and slicing (sentry bot bug report) - Extract symlink escape check into _check_sandbox_symlink_escape helper - Remove over-engineered TTL sweep of project dirs; just clean tmp dir	2026-03-13 22:07:33 +07:00
Zamil Majdy	85101bfc5b	fix(backend/copilot): address third-bump review comments - Add defence-in-depth is_allowed_local_path check in _read_local_tool_result - Scope _sweep_stale_project_dirs to current session's encoded_dir only - Remove dead cleanup_cli_project_dir from transcript.py - Check readlink exit_code in e2b_file_tools symlink validation - Remove redundant try/except around shutil.rmtree(ignore_errors=True) - Add test for parts[1] != "tool-results" rejection path - Rename _SDK_PROJECTS_DIR to SDK_PROJECTS_DIR (public API) - Remove sleep(0) band-aid from wait_for_stash, add timeout justification - Extract _UUID_RE compiled constant for conversation UUID validation	2026-03-13 19:54:00 +07:00
Zamil Majdy	3334a4b4b5	Merge remote-tracking branch 'origin/dev' into fix/copilot-tool-result-read	2026-03-13 19:27:06 +07:00
Zamil Majdy	796e737d77	fix(backend/copilot): address reviewer comments on tool-result PR - Move local imports (time, _SDK_PROJECTS_DIR) to top-level in service.py - Add UUID format regex validation for path segments in context.py - Extract _latest_mtime helper to reduce nesting in _sweep_stale_project_dirs - Use mimetypes.guess_type() instead of hardcoded mime_type in workspace_files.py - Update test UUIDs to match the new strict UUID regex validation	2026-03-13 17:51:07 +07:00
Zamil Majdy	8d16f8052b	fix(backend/copilot): ensure stream lock release even if cleanup fails Wrap _cleanup_sdk_tool_results in try/finally so lock.release() is always called, preventing session deadlocks on cleanup exceptions.	2026-03-13 16:32:40 +07:00
Zamil Majdy	1f8ab0687c	fix(backend/copilot): offload sync cleanup to thread to avoid blocking event loop Move filesystem IO in _cleanup_sdk_tool_results (shutil.rmtree and _sweep_stale_project_dirs) to asyncio.to_thread so the async stream generator's finally block doesn't block the event loop during cleanup.	2026-03-13 16:20:09 +07:00
Zamil Majdy	035aba9cf1	fix(backend/copilot): address PR review — mtime staleness and symlink escape - Use max mtime across conv dir and immediate children (tool-results/) to avoid premature cleanup of active sessions whose directory mtime hasn't updated (addresses sentry bot review) - Replace normpath-based re-validation with readlink -f inside the E2B sandbox to properly detect symlink escapes after mkdir (addresses coderabbit review)	2026-03-13 16:04:22 +07:00
Zamil Majdy	e0128470a9	fix(backend/copilot): harden tool-result path validation and address review feedback - Tighten is_allowed_local_path to only allow UUID-nested tool-results paths (<encoded-cwd>/<uuid>/tool-results/<file>), rejecting the non-UUID pattern that isn't a real SDK flow - Add TTL-based cleanup (24h) for stale conversation UUID dirs under ~/.claude/projects/ to prevent disk leak (addresses sentry bot review) - Add path re-validation after mkdir in E2B write handler to prevent symlink escape - Increase wait_for_stash timeout from 0.5s to 2.0s and add post-timeout retry to reduce PostToolUse hook race condition output loss - Update all affected tests to use UUID-nested path pattern	2026-03-13 15:50:17 +07:00
Zamil Majdy	a4deae0f69	fix(backend/copilot): fix tool-result file read failures across turns Three bugs caused "file not found" errors when the model tried to read SDK tool-result files: 1. Path validation mismatch: is_allowed_local_path() expected tool-results directly under the project dir, but the SDK nests them under a conversation UUID subdirectory. Fixed to match any tool-results/ segment within the project dir. 2. Wrong tool fallback: when the model mistakenly called read_workspace_file (cloud storage) for SDK tool-result paths on local disk, it got "file not found". Added a fallback in ReadWorkspaceFileTool that detects allowed local paths and reads from disk instead. 3. Cross-turn cleanup: _cleanup_sdk_tool_results deleted the entire CLI project directory (including tool-results/) between turns. Subsequent turns referencing those paths via --resume transcript would fail. Removed the project dir cleanup — only the temp cwd is cleaned now. Also added system prompt guidance telling the model to use read_file (not read_workspace_file) for SDK tool-result paths.	2026-03-13 15:33:30 +07:00