fix: add explicit ValueError guard for stat output parsing

style: fix formatting and sync docs
- Fix Black formatting for is_text/is_binary checks - Update llm.md to reflect binary file support in Claude Code block
2026-02-16 17:55:55 -05:00 · 2026-02-16 14:46:06 +00:00 · 2026-02-16 14:40:53 +00:00 · 2026-02-16 14:18:25 +00:00 · 2026-02-16 14:10:05 +00:00
6 changed files with 125 additions and 38 deletions
--- a/autogpt_platform/autogpt_libs/poetry.lock
+++ b/autogpt_platform/autogpt_libs/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand.

 [[package]]
 name = "annotated-doc"
@@ -67,7 +67,7 @@ description = "Backport of asyncio.Runner, a context manager that controls event
 optional = false
 python-versions = "<3.11,>=3.8"
 groups = ["dev"]
-markers = "python_version == \"3.10\""
+markers = "python_version < \"3.11\""
 files = [
    {file = "backports_asyncio_runner-1.2.0-py3-none-any.whl", hash = "sha256:0da0a936a8aeb554eccb426dc55af3ba63bcdc69fa1a600b5bb305413a4477b5"},
    {file = "backports_asyncio_runner-1.2.0.tar.gz", hash = "sha256:a5aa7b2b7d8f8bfcaa2b57313f70792df84e32a2a746f585213373f900b42162"},
@@ -541,7 +541,7 @@ description = "Backport of PEP 654 (exception groups)"
 optional = false
 python-versions = ">=3.7"
 groups = ["main", "dev"]
-markers = "python_version == \"3.10\""
+markers = "python_version < \"3.11\""
 files = [
    {file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"},
    {file = "exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88"},
@@ -2342,30 +2342,30 @@ pyasn1 = ">=0.1.3"

 [[package]]
 name = "ruff"
-version = "0.15.1"
+version = "0.15.0"
 description = "An extremely fast Python linter and code formatter, written in Rust."
 optional = false
 python-versions = ">=3.7"
 groups = ["dev"]
 files = [
-    {file = "ruff-0.15.1-py3-none-linux_armv6l.whl", hash = "sha256:b101ed7cf4615bda6ffe65bdb59f964e9f4a0d3f85cbf0e54f0ab76d7b90228a"},
-    {file = "ruff-0.15.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:939c995e9277e63ea632cc8d3fae17aa758526f49a9a850d2e7e758bfef46602"},
-    {file = "ruff-0.15.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:1d83466455fdefe60b8d9c8df81d3c1bbb2115cede53549d3b522ce2bc703899"},
-    {file = "ruff-0.15.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9457e3c3291024866222b96108ab2d8265b477e5b1534c7ddb1810904858d16"},
-    {file = "ruff-0.15.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:92c92b003e9d4f7fbd33b1867bb15a1b785b1735069108dfc23821ba045b29bc"},
-    {file = "ruff-0.15.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fe5c41ab43e3a06778844c586251eb5a510f67125427625f9eb2b9526535779"},
-    {file = "ruff-0.15.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66a6dd6df4d80dc382c6484f8ce1bcceb55c32e9f27a8b94c32f6c7331bf14fb"},
-    {file = "ruff-0.15.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6a4a42cbb8af0bda9bcd7606b064d7c0bc311a88d141d02f78920be6acb5aa83"},
-    {file = "ruff-0.15.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ab064052c31dddada35079901592dfba2e05f5b1e43af3954aafcbc1096a5b2"},
-    {file = "ruff-0.15.1-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:5631c940fe9fe91f817a4c2ea4e81f47bee3ca4aa646134a24374f3c19ad9454"},
-    {file = "ruff-0.15.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:68138a4ba184b4691ccdc39f7795c66b3c68160c586519e7e8444cf5a53e1b4c"},
-    {file = "ruff-0.15.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:518f9af03bfc33c03bdb4cb63fabc935341bb7f54af500f92ac309ecfbba6330"},
-    {file = "ruff-0.15.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:da79f4d6a826caaea95de0237a67e33b81e6ec2e25fc7e1993a4015dffca7c61"},
-    {file = "ruff-0.15.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3dd86dccb83cd7d4dcfac303ffc277e6048600dfc22e38158afa208e8bf94a1f"},
-    {file = "ruff-0.15.1-py3-none-win32.whl", hash = "sha256:660975d9cb49b5d5278b12b03bb9951d554543a90b74ed5d366b20e2c57c2098"},
-    {file = "ruff-0.15.1-py3-none-win_amd64.whl", hash = "sha256:c820fef9dd5d4172a6570e5721704a96c6679b80cf7be41659ed439653f62336"},
-    {file = "ruff-0.15.1-py3-none-win_arm64.whl", hash = "sha256:5ff7d5f0f88567850f45081fac8f4ec212be8d0b963e385c3f7d0d2eb4899416"},
-    {file = "ruff-0.15.1.tar.gz", hash = "sha256:c590fe13fb57c97141ae975c03a1aedb3d3156030cabd740d6ff0b0d601e203f"},
+    {file = "ruff-0.15.0-py3-none-linux_armv6l.whl", hash = "sha256:aac4ebaa612a82b23d45964586f24ae9bc23ca101919f5590bdb368d74ad5455"},
+    {file = "ruff-0.15.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:dcd4be7cc75cfbbca24a98d04d0b9b36a270d0833241f776b788d59f4142b14d"},
+    {file = "ruff-0.15.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d747e3319b2bce179c7c1eaad3d884dc0a199b5f4d5187620530adf9105268ce"},
+    {file = "ruff-0.15.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:650bd9c56ae03102c51a5e4b554d74d825ff3abe4db22b90fd32d816c2e90621"},
+    {file = "ruff-0.15.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a6664b7eac559e3048223a2da77769c2f92b43a6dfd4720cef42654299a599c9"},
+    {file = "ruff-0.15.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f811f97b0f092b35320d1556f3353bf238763420ade5d9e62ebd2b73f2ff179"},
+    {file = "ruff-0.15.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:761ec0a66680fab6454236635a39abaf14198818c8cdf691e036f4bc0f406b2d"},
+    {file = "ruff-0.15.0-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:940f11c2604d317e797b289f4f9f3fa5555ffe4fb574b55ed006c3d9b6f0eb78"},
+    {file = "ruff-0.15.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcbca3d40558789126da91d7ef9a7c87772ee107033db7191edefa34e2c7f1b4"},
+    {file = "ruff-0.15.0-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:9a121a96db1d75fa3eb39c4539e607f628920dd72ff1f7c5ee4f1b768ac62d6e"},
+    {file = "ruff-0.15.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5298d518e493061f2eabd4abd067c7e4fb89e2f63291c94332e35631c07c3662"},
+    {file = "ruff-0.15.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:afb6e603d6375ff0d6b0cee563fa21ab570fd15e65c852cb24922cef25050cf1"},
+    {file = "ruff-0.15.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:77e515f6b15f828b94dc17d2b4ace334c9ddb7d9468c54b2f9ed2b9c1593ef16"},
+    {file = "ruff-0.15.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:6f6e80850a01eb13b3e42ee0ebdf6e4497151b48c35051aab51c101266d187a3"},
+    {file = "ruff-0.15.0-py3-none-win32.whl", hash = "sha256:238a717ef803e501b6d51e0bdd0d2c6e8513fe9eec14002445134d3907cd46c3"},
+    {file = "ruff-0.15.0-py3-none-win_amd64.whl", hash = "sha256:dd5e4d3301dc01de614da3cdffc33d4b1b96fb89e45721f1598e5532ccf78b18"},
+    {file = "ruff-0.15.0-py3-none-win_arm64.whl", hash = "sha256:c480d632cc0ca3f0727acac8b7d053542d9e114a462a145d0b00e7cd658c515a"},
+    {file = "ruff-0.15.0.tar.gz", hash = "sha256:6bdea47cdbea30d40f8f8d7d69c0854ba7c15420ec75a26f463290949d7f7e9a"},
 ]

 [[package]]
@@ -2564,7 +2564,7 @@ description = "A lil' TOML parser"
 optional = false
 python-versions = ">=3.8"
 groups = ["dev"]
-markers = "python_version == \"3.10\""
+markers = "python_version < \"3.11\""
 files = [
    {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
    {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
@@ -2912,4 +2912,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<4.0"
-content-hash = "271eb1d736439a1f23e8b8783ac623a6c5c76982e5e0333c1221de418654cc6a"
+content-hash = "9619cae908ad38fa2c48016a58bcf4241f6f5793aa0e6cc140276e91c433cbbb"
--- a/autogpt_platform/autogpt_libs/pyproject.toml
+++ b/autogpt_platform/autogpt_libs/pyproject.toml
@@ -27,7 +27,7 @@ pytest = "^8.4.1"
 pytest-asyncio = "^1.3.0"
 pytest-mock = "^3.15.1"
 pytest-cov = "^7.0.0"
-ruff = "^0.15.1"
+ruff = "^0.15.0"

 [build-system]
 requires = ["poetry-core"]
--- a/autogpt_platform/backend/backend/blocks/claude_code.py
+++ b/autogpt_platform/backend/backend/blocks/claude_code.py
@@ -187,9 +187,11 @@ class ClaudeCodeBlock(Block):
        )
        files: list[SandboxFileOutput] = SchemaField(
            description=(
-                "List of text files created/modified by Claude Code during this execution. "
+                "List of files created/modified by Claude Code during this execution. "
+                "Includes text files and binary files (images, PDFs, etc.). "
                "Each file has 'path', 'relative_path', 'name', 'content', and 'workspace_ref' fields. "
-                "workspace_ref contains a workspace:// URI if the file was stored to workspace."
+                "workspace_ref contains a workspace:// URI for workspace storage. "
+                "For binary files, content contains a placeholder; use workspace_ref to access the file."
            )
        )
        conversation_history: str = SchemaField(
@@ -453,12 +455,14 @@ class ClaudeCodeBlock(Block):
                    new_conversation_history = turn_entry

            # Extract files created/modified during this run and store to workspace
+            # Include binary files (images, PDFs, etc.) - they'll be stored via
+            # store_media_file which handles virus scanning and workspace storage
            sandbox_files = await extract_and_store_sandbox_files(
                sandbox=sandbox,
                working_directory=working_directory,
                execution_context=execution_context,
                since_timestamp=start_timestamp,
-                text_only=True,
+                text_only=False,  # Extract both text and binary files
            )

            return (
--- a/autogpt_platform/backend/backend/util/sandbox_files.py
+++ b/autogpt_platform/backend/backend/util/sandbox_files.py
@@ -74,8 +74,51 @@ TEXT_EXTENSIONS = {
    ".tex",
    ".csv",
    ".log",
+    ".svg",  # SVG is XML-based text
 }

+# Binary file extensions we explicitly support extracting
+# These are common output formats that users expect to retrieve
+BINARY_EXTENSIONS = {
+    # Images
+    ".png",
+    ".jpg",
+    ".jpeg",
+    ".gif",
+    ".webp",
+    ".ico",
+    ".bmp",
+    ".tiff",
+    ".tif",
+    # Documents
+    ".pdf",
+    # Archives
+    ".zip",
+    ".tar",
+    ".gz",
+    ".7z",
+    # Audio
+    ".mp3",
+    ".wav",
+    ".ogg",
+    ".flac",
+    # Video
+    ".mp4",
+    ".webm",
+    ".mov",
+    ".avi",
+    # Fonts
+    ".woff",
+    ".woff2",
+    ".ttf",
+    ".otf",
+    ".eot",
+}
+
+# Maximum file size for binary extraction (50MB)
+# Prevents OOM from accidentally extracting huge files
+MAX_BINARY_FILE_SIZE = 50 * 1024 * 1024
+

 class SandboxFileOutput(BaseModel):
    """A file extracted from a sandbox and optionally stored in workspace."""
@@ -120,7 +163,8 @@ async def extract_sandbox_files(
        sandbox: The E2B sandbox instance
        working_directory: Directory to search for files
        since_timestamp: ISO timestamp - only return files modified after this time
-        text_only: If True, only extract text files (default). If False, extract all files.
+        text_only: If True, only extract text files. If False, also extract
+                   supported binary files (images, PDFs, etc.).

    Returns:
        List of ExtractedFile objects with path, content, and metadata
@@ -149,14 +193,53 @@ async def extract_sandbox_files(
            if not file_path:
                continue

-            # Check if it's a text file
-            is_text = any(file_path.endswith(ext) for ext in TEXT_EXTENSIONS)
+            # Check file type (case-insensitive for extensions)
+            file_path_lower = file_path.lower()
+            is_text = any(
+                file_path_lower.endswith(ext.lower()) for ext in TEXT_EXTENSIONS
+            )
+            is_binary = any(
+                file_path_lower.endswith(ext.lower()) for ext in BINARY_EXTENSIONS
+            )

-            # Skip non-text files if text_only mode
-            if text_only and not is_text:
-                continue
+            # Determine if we should extract this file
+            if text_only:
+                # Only extract text files
+                if not is_text:
+                    continue
+            else:
+                # Extract text files and supported binary files
+                if not is_text and not is_binary:
+                    continue

            try:
+                # For binary files, check size before reading to prevent OOM
+                if is_binary:
+                    stat_result = await sandbox.commands.run(
+                        f"stat -c %s {shlex.quote(file_path)} 2>/dev/null"
+                    )
+                    if stat_result.exit_code != 0 or not stat_result.stdout:
+                        logger.debug(
+                            f"Skipping {file_path}: could not determine file size"
+                        )
+                        continue
+
+                    try:
+                        file_size = int(stat_result.stdout.strip())
+                    except ValueError:
+                        logger.debug(
+                            f"Skipping {file_path}: unexpected stat output "
+                            f"{stat_result.stdout.strip()!r}"
+                        )
+                        continue
+
+                    if file_size > MAX_BINARY_FILE_SIZE:
+                        logger.info(
+                            f"Skipping {file_path}: size {file_size} bytes "
+                            f"exceeds limit {MAX_BINARY_FILE_SIZE}"
+                        )
+                        continue
+
                # Read file content as bytes
                content = await sandbox.files.read(file_path, format="bytes")
                if isinstance(content, str):
--- a/docs/integrations/block-integrations/claude_code.md
+++ b/docs/integrations/block-integrations/claude_code.md
@@ -16,7 +16,7 @@ When activated, the block:
   - Install dependencies (npm, pip, etc.)
   - Run terminal commands
   - Build and test applications
-5. Extracts all text files created/modified during execution
+5. Extracts all files created/modified during execution (text files and binary files like images, PDFs, etc.)
 6. Returns the response and files, optionally keeping the sandbox alive for follow-up tasks

 The block supports conversation continuation through three mechanisms:
@@ -42,7 +42,7 @@ The block supports conversation continuation through three mechanisms:
 | Output | Description |
 |--------|-------------|
 | Response | The output/response from Claude Code execution |
-| Files | List of text files created/modified during execution. Each file includes path, relative_path, name, and content fields |
+| Files | List of files (text and binary) created/modified during execution. Includes images, PDFs, and other supported formats. Each file has path, relative_path, name, content, and workspace_ref fields. Binary files are stored in workspace and accessible via workspace_ref |
 | Conversation History | Full conversation history including this turn. Use to restore context on a fresh sandbox |
 | Session ID | Session ID for this conversation. Pass back with sandbox_id to continue the conversation |
 | Sandbox ID | ID of the sandbox instance (null if disposed). Pass back with session_id to continue the conversation |
--- a/docs/integrations/block-integrations/llm.md
+++ b/docs/integrations/block-integrations/llm.md
@@ -535,7 +535,7 @@ When activated, the block:
 2. Installs the latest version of Claude Code in the sandbox
 3. Optionally runs setup commands to prepare the environment
 4. Executes your prompt using Claude Code, which can create/edit files, install dependencies, run terminal commands, and build applications
-5. Extracts all text files created/modified during execution
+5. Extracts all files created/modified during execution (text files and binary files like images, PDFs, etc.)
 6. Returns the response and files, optionally keeping the sandbox alive for follow-up tasks

 The block supports conversation continuation through three mechanisms:
@@ -563,7 +563,7 @@ The block supports conversation continuation through three mechanisms:
 |--------|-------------|------|
 | error | Error message if execution failed | str |
 | response | The output/response from Claude Code execution | str |
-| files | List of text files created/modified by Claude Code during this execution. Each file has 'path', 'relative_path', 'name', 'content', and 'workspace_ref' fields. workspace_ref contains a workspace:// URI if the file was stored to workspace. | List[SandboxFileOutput] |
+| files | List of files created/modified by Claude Code during this execution. Includes text files and binary files (images, PDFs, etc.). Each file has 'path', 'relative_path', 'name', 'content', and 'workspace_ref' fields. workspace_ref contains a workspace:// URI for workspace storage. For binary files, content contains a placeholder; use workspace_ref to access the file. | List[SandboxFileOutput] |
 | conversation_history | Full conversation history including this turn. Pass this to conversation_history input to continue on a fresh sandbox if the previous sandbox timed out. | str |
 | session_id | Session ID for this conversation. Pass this back along with sandbox_id to continue the conversation. | str |
 | sandbox_id | ID of the sandbox instance. Pass this back along with session_id to continue the conversation. This is None if dispose_sandbox was True (sandbox was disposed). | str |
Author	SHA1	Message	Date
Bentlybro	719c4ee1d1	fix: add explicit ValueError guard for stat output parsing	2026-02-16 14:46:06 +00:00
Bentlybro	411c399e03	style: fix formatting and sync docs - Fix Black formatting for is_text/is_binary checks - Update llm.md to reflect binary file support in Claude Code block	2026-02-16 14:40:53 +00:00
Bentlybro	6ac011e36c	fix: normalize extension case in sandbox file extraction Fixes bug where 'Dockerfile' in TEXT_EXTENSIONS wouldn't match after lowercasing file_path because the extension itself wasn't lowercased.	2026-02-16 14:18:25 +00:00
Bentlybro	5e554526e2	fix(backend): Extract binary files from ClaudeCodeBlock sandbox Enables binary file extraction (images, PDFs, etc.) for the Claude Code block by setting text_only=False in extract_and_store_sandbox_files. Changes: - sandbox_files.py: Add BINARY_EXTENSIONS set with supported formats - sandbox_files.py: Add MAX_BINARY_FILE_SIZE (50MB) limit to prevent OOM - sandbox_files.py: Add size check before reading binary files - sandbox_files.py: Add .svg to TEXT_EXTENSIONS (XML-based) - sandbox_files.py: Make extension matching case-insensitive - claude_code.py: Enable binary file extraction (text_only=False) - claude_code.py: Update output description to mention binary support - claude_code.md: Update docs to reflect binary file support Binary files are stored via store_media_file which handles: - Virus scanning via scan_content_safe() - Workspace storage (returns workspace:// URI in CoPilot) - Data URI fallback for graph execution Closes SECRT-1897	2026-02-16 14:10:05 +00:00