diff --git a/autogpt_platform/backend/backend/blocks/claude_code.py b/autogpt_platform/backend/backend/blocks/claude_code.py index 2e870f02b6..f4cf5650b1 100644 --- a/autogpt_platform/backend/backend/blocks/claude_code.py +++ b/autogpt_platform/backend/backend/blocks/claude_code.py @@ -187,9 +187,11 @@ class ClaudeCodeBlock(Block): ) files: list[SandboxFileOutput] = SchemaField( description=( - "List of text files created/modified by Claude Code during this execution. " + "List of files created/modified by Claude Code during this execution. " + "Includes text files and binary files (images, PDFs, etc.). " "Each file has 'path', 'relative_path', 'name', 'content', and 'workspace_ref' fields. " - "workspace_ref contains a workspace:// URI if the file was stored to workspace." + "workspace_ref contains a workspace:// URI for workspace storage. " + "For binary files, content contains a placeholder; use workspace_ref to access the file." ) ) conversation_history: str = SchemaField( @@ -453,12 +455,14 @@ class ClaudeCodeBlock(Block): new_conversation_history = turn_entry # Extract files created/modified during this run and store to workspace + # Include binary files (images, PDFs, etc.) - they'll be stored via + # store_media_file which handles virus scanning and workspace storage sandbox_files = await extract_and_store_sandbox_files( sandbox=sandbox, working_directory=working_directory, execution_context=execution_context, since_timestamp=start_timestamp, - text_only=True, + text_only=False, # Extract both text and binary files ) return ( diff --git a/autogpt_platform/backend/backend/util/sandbox_files.py b/autogpt_platform/backend/backend/util/sandbox_files.py index 9db53ded14..a1ce5ab0f4 100644 --- a/autogpt_platform/backend/backend/util/sandbox_files.py +++ b/autogpt_platform/backend/backend/util/sandbox_files.py @@ -74,8 +74,51 @@ TEXT_EXTENSIONS = { ".tex", ".csv", ".log", + ".svg", # SVG is XML-based text } +# Binary file extensions we explicitly support extracting +# These are common output formats that users expect to retrieve +BINARY_EXTENSIONS = { + # Images + ".png", + ".jpg", + ".jpeg", + ".gif", + ".webp", + ".ico", + ".bmp", + ".tiff", + ".tif", + # Documents + ".pdf", + # Archives + ".zip", + ".tar", + ".gz", + ".7z", + # Audio + ".mp3", + ".wav", + ".ogg", + ".flac", + # Video + ".mp4", + ".webm", + ".mov", + ".avi", + # Fonts + ".woff", + ".woff2", + ".ttf", + ".otf", + ".eot", +} + +# Maximum file size for binary extraction (50MB) +# Prevents OOM from accidentally extracting huge files +MAX_BINARY_FILE_SIZE = 50 * 1024 * 1024 + class SandboxFileOutput(BaseModel): """A file extracted from a sandbox and optionally stored in workspace.""" @@ -120,7 +163,8 @@ async def extract_sandbox_files( sandbox: The E2B sandbox instance working_directory: Directory to search for files since_timestamp: ISO timestamp - only return files modified after this time - text_only: If True, only extract text files (default). If False, extract all files. + text_only: If True, only extract text files. If False, also extract + supported binary files (images, PDFs, etc.). Returns: List of ExtractedFile objects with path, content, and metadata @@ -149,14 +193,41 @@ async def extract_sandbox_files( if not file_path: continue - # Check if it's a text file - is_text = any(file_path.endswith(ext) for ext in TEXT_EXTENSIONS) + # Check file type (case-insensitive for extensions) + file_path_lower = file_path.lower() + is_text = any(file_path_lower.endswith(ext) for ext in TEXT_EXTENSIONS) + is_binary = any(file_path_lower.endswith(ext) for ext in BINARY_EXTENSIONS) - # Skip non-text files if text_only mode - if text_only and not is_text: - continue + # Determine if we should extract this file + if text_only: + # Only extract text files + if not is_text: + continue + else: + # Extract text files and supported binary files + if not is_text and not is_binary: + continue try: + # For binary files, check size before reading to prevent OOM + if is_binary: + stat_result = await sandbox.commands.run( + f"stat -c %s {shlex.quote(file_path)} 2>/dev/null" + ) + if stat_result.exit_code != 0 or not stat_result.stdout: + logger.debug( + f"Skipping {file_path}: could not determine file size" + ) + continue + + file_size = int(stat_result.stdout.strip()) + if file_size > MAX_BINARY_FILE_SIZE: + logger.info( + f"Skipping {file_path}: size {file_size} bytes " + f"exceeds limit {MAX_BINARY_FILE_SIZE}" + ) + continue + # Read file content as bytes content = await sandbox.files.read(file_path, format="bytes") if isinstance(content, str): diff --git a/docs/integrations/block-integrations/claude_code.md b/docs/integrations/block-integrations/claude_code.md index fea67cb494..78f9ccaf5d 100644 --- a/docs/integrations/block-integrations/claude_code.md +++ b/docs/integrations/block-integrations/claude_code.md @@ -16,7 +16,7 @@ When activated, the block: - Install dependencies (npm, pip, etc.) - Run terminal commands - Build and test applications -5. Extracts all text files created/modified during execution +5. Extracts all files created/modified during execution (text files and binary files like images, PDFs, etc.) 6. Returns the response and files, optionally keeping the sandbox alive for follow-up tasks The block supports conversation continuation through three mechanisms: @@ -42,7 +42,7 @@ The block supports conversation continuation through three mechanisms: | Output | Description | |--------|-------------| | Response | The output/response from Claude Code execution | -| Files | List of text files created/modified during execution. Each file includes path, relative_path, name, and content fields | +| Files | List of files (text and binary) created/modified during execution. Includes images, PDFs, and other supported formats. Each file has path, relative_path, name, content, and workspace_ref fields. Binary files are stored in workspace and accessible via workspace_ref | | Conversation History | Full conversation history including this turn. Use to restore context on a fresh sandbox | | Session ID | Session ID for this conversation. Pass back with sandbox_id to continue the conversation | | Sandbox ID | ID of the sandbox instance (null if disposed). Pass back with session_id to continue the conversation |