Files
AutoGPT/autogpt_platform/backend/backend/util/file_test.py
Nicholas Tindle 7668c17d9c feat(platform): add User Workspace for persistent CoPilot file storage (#11867)
Implements persistent User Workspace storage for CoPilot, enabling
blocks to save and retrieve files across sessions. Files are stored in
session-scoped virtual paths (`/sessions/{session_id}/`).

Fixes SECRT-1833

### Changes 🏗️

**Database & Storage:**
- Add `UserWorkspace` and `UserWorkspaceFile` Prisma models
- Implement `WorkspaceStorageBackend` abstraction (GCS for cloud, local
filesystem for self-hosted)
- Add `workspace_id` and `session_id` fields to `ExecutionContext`

**Backend API:**
- Add REST endpoints: `GET/POST /api/workspace/files`, `GET/DELETE
/api/workspace/files/{id}`, `GET /api/workspace/files/{id}/download`
- Add CoPilot tools: `list_workspace_files`, `read_workspace_file`,
`write_workspace_file`
- Integrate workspace storage into `store_media_file()` - returns
`workspace://file-id` references

**Block Updates:**
- Refactor all file-handling blocks to use unified `ExecutionContext`
parameter
- Update media-generating blocks to persist outputs to workspace
(AIImageGenerator, AIImageCustomizer, FluxKontext, TalkingHead, FAL
video, Bannerbear, etc.)

**Frontend:**
- Render `workspace://` image references in chat via proxy endpoint
- Add "AI cannot see this image" overlay indicator

**CoPilot Context Mapping:**
- Session = Agent (graph_id) = Run (graph_exec_id)
- Files scoped to `/sessions/{session_id}/`

### Checklist 📋

#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [ ] I have tested my changes according to the test plan:
- [ ] Create CoPilot session, generate image with AIImageGeneratorBlock
  - [ ] Verify image returns `workspace://file-id` (not base64)
  - [ ] Verify image renders in chat with visibility indicator
  - [ ] Verify workspace files persist across sessions
  - [ ] Test list/read/write workspace files via CoPilot tools
  - [ ] Test local storage backend for self-hosted deployments

#### For configuration changes:
- [x] `.env.default` is updated or already compatible with my changes
- [x] `docker-compose.yml` is updated or already compatible with my
changes
- [x] I have included a list of my configuration changes in the PR
description (under **Changes**)

🤖 Generated with [Claude Code](https://claude.ai/code)

<!-- CURSOR_SUMMARY -->
---

> [!NOTE]
> **Medium Risk**
> Introduces a new persistent file-storage surface area (DB tables,
storage backends, download API, and chat tools) and rewires
`store_media_file()`/block execution context across many blocks, so
regressions could impact file handling, access control, or storage
costs.
> 
> **Overview**
> Adds a **persistent per-user Workspace** (new
`UserWorkspace`/`UserWorkspaceFile` models plus `WorkspaceManager` +
`WorkspaceStorageBackend` with GCS/local implementations) and wires it
into the API via a new `/api/workspace/files/{file_id}/download` route
(including header-sanitized `Content-Disposition`) and shutdown
lifecycle hooks.
> 
> Extends `ExecutionContext` to carry execution identity +
`workspace_id`/`session_id`, updates executor tooling to clone
node-specific contexts, and updates `run_block` (CoPilot) to create a
session-scoped workspace and synthetic graph/run/node IDs.
> 
> Refactors `store_media_file()` to require `execution_context` +
`return_format` and to support `workspace://` references; migrates many
media/file-handling blocks and related tests to the new API and to
persist generated media as `workspace://...` (or fall back to data URIs
outside CoPilot), and adds CoPilot chat tools for
listing/reading/writing/deleting workspace files with safeguards against
context bloat.
> 
> <sup>Written by [Cursor
Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit
6abc70f793. This will update automatically
on new commits. Configure
[here](https://cursor.com/dashboard?tab=bugbot).</sup>
<!-- /CURSOR_SUMMARY -->

---------

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
Co-authored-by: Reinier van der Leer <pwuts@agpt.co>
2026-01-29 05:49:47 +00:00

250 lines
9.9 KiB
Python

"""
Tests for cloud storage integration in file utilities.
"""
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from backend.data.execution import ExecutionContext
from backend.util.file import store_media_file
from backend.util.type import MediaFileType
def make_test_context(
graph_exec_id: str = "test-exec-123",
user_id: str = "test-user-123",
) -> ExecutionContext:
"""Helper to create test ExecutionContext."""
return ExecutionContext(
user_id=user_id,
graph_exec_id=graph_exec_id,
)
class TestFileCloudIntegration:
"""Test cases for cloud storage integration in file utilities."""
@pytest.mark.asyncio
async def test_store_media_file_cloud_path(self):
"""Test storing a file from cloud storage path."""
graph_exec_id = "test-exec-123"
cloud_path = "gcs://test-bucket/uploads/456/source.txt"
cloud_content = b"cloud file content"
with patch(
"backend.util.file.get_cloud_storage_handler"
) as mock_handler_getter, patch(
"backend.util.file.scan_content_safe"
) as mock_scan, patch(
"backend.util.file.Path"
) as mock_path_class:
# Mock cloud storage handler
mock_handler = MagicMock()
mock_handler.is_cloud_path.return_value = True
mock_handler.parse_cloud_path.return_value = (
"gcs",
"test-bucket/uploads/456/source.txt",
)
mock_handler.retrieve_file = AsyncMock(return_value=cloud_content)
mock_handler_getter.return_value = mock_handler
# Mock virus scanner
mock_scan.return_value = None
# Mock file system operations
mock_base_path = MagicMock()
mock_target_path = MagicMock()
mock_resolved_path = MagicMock()
mock_path_class.return_value = mock_base_path
mock_base_path.mkdir = MagicMock()
mock_base_path.__truediv__ = MagicMock(return_value=mock_target_path)
mock_target_path.resolve.return_value = mock_resolved_path
mock_resolved_path.is_relative_to.return_value = True
mock_resolved_path.write_bytes = MagicMock()
mock_resolved_path.relative_to.return_value = Path("source.txt")
# Configure the main Path mock to handle filename extraction
# When Path(path_part) is called, it should return a mock with .name = "source.txt"
mock_path_for_filename = MagicMock()
mock_path_for_filename.name = "source.txt"
# The Path constructor should return different mocks for different calls
def path_constructor(*args, **kwargs):
if len(args) == 1 and "source.txt" in str(args[0]):
return mock_path_for_filename
else:
return mock_base_path
mock_path_class.side_effect = path_constructor
result = await store_media_file(
file=MediaFileType(cloud_path),
execution_context=make_test_context(graph_exec_id=graph_exec_id),
return_format="for_local_processing",
)
# Verify cloud storage operations
mock_handler.is_cloud_path.assert_called_once_with(cloud_path)
mock_handler.parse_cloud_path.assert_called_once_with(cloud_path)
mock_handler.retrieve_file.assert_called_once_with(
cloud_path, user_id="test-user-123", graph_exec_id=graph_exec_id
)
# Verify virus scan
mock_scan.assert_called_once_with(cloud_content, filename="source.txt")
# Verify file operations
mock_resolved_path.write_bytes.assert_called_once_with(cloud_content)
# Result should be the relative path
assert str(result) == "source.txt"
@pytest.mark.asyncio
async def test_store_media_file_cloud_path_return_content(self):
"""Test storing a file from cloud storage and returning content."""
graph_exec_id = "test-exec-123"
cloud_path = "gcs://test-bucket/uploads/456/image.png"
cloud_content = b"\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIHDR" # PNG header
with patch(
"backend.util.file.get_cloud_storage_handler"
) as mock_handler_getter, patch(
"backend.util.file.scan_content_safe"
) as mock_scan, patch(
"backend.util.file.get_mime_type"
) as mock_mime, patch(
"backend.util.file.base64.b64encode"
) as mock_b64, patch(
"backend.util.file.Path"
) as mock_path_class:
# Mock cloud storage handler
mock_handler = MagicMock()
mock_handler.is_cloud_path.return_value = True
mock_handler.parse_cloud_path.return_value = (
"gcs",
"test-bucket/uploads/456/image.png",
)
mock_handler.retrieve_file = AsyncMock(return_value=cloud_content)
mock_handler_getter.return_value = mock_handler
# Mock other operations
mock_scan.return_value = None
mock_mime.return_value = "image/png"
mock_b64.return_value.decode.return_value = "iVBORw0KGgoAAAANSUhEUgA="
# Mock file system operations
mock_base_path = MagicMock()
mock_target_path = MagicMock()
mock_resolved_path = MagicMock()
mock_path_class.return_value = mock_base_path
mock_base_path.mkdir = MagicMock()
mock_base_path.__truediv__ = MagicMock(return_value=mock_target_path)
mock_target_path.resolve.return_value = mock_resolved_path
mock_resolved_path.is_relative_to.return_value = True
mock_resolved_path.write_bytes = MagicMock()
mock_resolved_path.read_bytes.return_value = cloud_content
# Mock Path constructor for filename extraction
mock_path_obj = MagicMock()
mock_path_obj.name = "image.png"
with patch("backend.util.file.Path", return_value=mock_path_obj):
result = await store_media_file(
file=MediaFileType(cloud_path),
execution_context=make_test_context(graph_exec_id=graph_exec_id),
return_format="for_external_api",
)
# Verify result is a data URI
assert str(result).startswith("data:image/png;base64,")
@pytest.mark.asyncio
async def test_store_media_file_non_cloud_path(self):
"""Test that non-cloud paths are handled normally."""
graph_exec_id = "test-exec-123"
data_uri = "data:text/plain;base64,SGVsbG8gd29ybGQ="
with patch(
"backend.util.file.get_cloud_storage_handler"
) as mock_handler_getter, patch(
"backend.util.file.scan_content_safe"
) as mock_scan, patch(
"backend.util.file.base64.b64decode"
) as mock_b64decode, patch(
"backend.util.file.uuid.uuid4"
) as mock_uuid, patch(
"backend.util.file.Path"
) as mock_path_class:
# Mock cloud storage handler
mock_handler = MagicMock()
mock_handler.is_cloud_path.return_value = False
mock_handler.retrieve_file = (
AsyncMock()
) # Add this even though it won't be called
mock_handler_getter.return_value = mock_handler
# Mock other operations
mock_scan.return_value = None
mock_b64decode.return_value = b"Hello world"
mock_uuid.return_value = "test-uuid-789"
# Mock file system operations
mock_base_path = MagicMock()
mock_target_path = MagicMock()
mock_resolved_path = MagicMock()
mock_path_class.return_value = mock_base_path
mock_base_path.mkdir = MagicMock()
mock_base_path.__truediv__ = MagicMock(return_value=mock_target_path)
mock_target_path.resolve.return_value = mock_resolved_path
mock_resolved_path.is_relative_to.return_value = True
mock_resolved_path.write_bytes = MagicMock()
mock_resolved_path.relative_to.return_value = Path("test-uuid-789.txt")
await store_media_file(
file=MediaFileType(data_uri),
execution_context=make_test_context(graph_exec_id=graph_exec_id),
return_format="for_local_processing",
)
# Verify cloud handler was checked but not used for retrieval
mock_handler.is_cloud_path.assert_called_once_with(data_uri)
mock_handler.retrieve_file.assert_not_called()
# Verify normal data URI processing occurred
mock_b64decode.assert_called_once()
mock_resolved_path.write_bytes.assert_called_once_with(b"Hello world")
@pytest.mark.asyncio
async def test_store_media_file_cloud_retrieval_error(self):
"""Test error handling when cloud retrieval fails."""
graph_exec_id = "test-exec-123"
cloud_path = "gcs://test-bucket/nonexistent.txt"
with patch(
"backend.util.file.get_cloud_storage_handler"
) as mock_handler_getter:
# Mock cloud storage handler to raise error
mock_handler = AsyncMock()
mock_handler.is_cloud_path.return_value = True
mock_handler.retrieve_file.side_effect = FileNotFoundError(
"File not found in cloud storage"
)
mock_handler_getter.return_value = mock_handler
with pytest.raises(
FileNotFoundError, match="File not found in cloud storage"
):
await store_media_file(
file=MediaFileType(cloud_path),
execution_context=make_test_context(graph_exec_id=graph_exec_id),
return_format="for_local_processing",
)