mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-30 03:00:41 -04:00
Implements persistent User Workspace storage for CoPilot, enabling
blocks to save and retrieve files across sessions. Files are stored in
session-scoped virtual paths (`/sessions/{session_id}/`).
Fixes SECRT-1833
### Changes 🏗️
**Database & Storage:**
- Add `UserWorkspace` and `UserWorkspaceFile` Prisma models
- Implement `WorkspaceStorageBackend` abstraction (GCS for cloud, local
filesystem for self-hosted)
- Add `workspace_id` and `session_id` fields to `ExecutionContext`
**Backend API:**
- Add REST endpoints: `GET/POST /api/workspace/files`, `GET/DELETE
/api/workspace/files/{id}`, `GET /api/workspace/files/{id}/download`
- Add CoPilot tools: `list_workspace_files`, `read_workspace_file`,
`write_workspace_file`
- Integrate workspace storage into `store_media_file()` - returns
`workspace://file-id` references
**Block Updates:**
- Refactor all file-handling blocks to use unified `ExecutionContext`
parameter
- Update media-generating blocks to persist outputs to workspace
(AIImageGenerator, AIImageCustomizer, FluxKontext, TalkingHead, FAL
video, Bannerbear, etc.)
**Frontend:**
- Render `workspace://` image references in chat via proxy endpoint
- Add "AI cannot see this image" overlay indicator
**CoPilot Context Mapping:**
- Session = Agent (graph_id) = Run (graph_exec_id)
- Files scoped to `/sessions/{session_id}/`
### Checklist 📋
#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [ ] I have tested my changes according to the test plan:
- [ ] Create CoPilot session, generate image with AIImageGeneratorBlock
- [ ] Verify image returns `workspace://file-id` (not base64)
- [ ] Verify image renders in chat with visibility indicator
- [ ] Verify workspace files persist across sessions
- [ ] Test list/read/write workspace files via CoPilot tools
- [ ] Test local storage backend for self-hosted deployments
#### For configuration changes:
- [x] `.env.default` is updated or already compatible with my changes
- [x] `docker-compose.yml` is updated or already compatible with my
changes
- [x] I have included a list of my configuration changes in the PR
description (under **Changes**)
🤖 Generated with [Claude Code](https://claude.ai/code)
<!-- CURSOR_SUMMARY -->
---
> [!NOTE]
> **Medium Risk**
> Introduces a new persistent file-storage surface area (DB tables,
storage backends, download API, and chat tools) and rewires
`store_media_file()`/block execution context across many blocks, so
regressions could impact file handling, access control, or storage
costs.
>
> **Overview**
> Adds a **persistent per-user Workspace** (new
`UserWorkspace`/`UserWorkspaceFile` models plus `WorkspaceManager` +
`WorkspaceStorageBackend` with GCS/local implementations) and wires it
into the API via a new `/api/workspace/files/{file_id}/download` route
(including header-sanitized `Content-Disposition`) and shutdown
lifecycle hooks.
>
> Extends `ExecutionContext` to carry execution identity +
`workspace_id`/`session_id`, updates executor tooling to clone
node-specific contexts, and updates `run_block` (CoPilot) to create a
session-scoped workspace and synthetic graph/run/node IDs.
>
> Refactors `store_media_file()` to require `execution_context` +
`return_format` and to support `workspace://` references; migrates many
media/file-handling blocks and related tests to the new API and to
persist generated media as `workspace://...` (or fall back to data URIs
outside CoPilot), and adds CoPilot chat tools for
listing/reading/writing/deleting workspace files with safeguards against
context bloat.
>
> <sup>Written by [Cursor
Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit
6abc70f793. This will update automatically
on new commits. Configure
[here](https://cursor.com/dashboard?tab=bugbot).</sup>
<!-- /CURSOR_SUMMARY -->
---------
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
Co-authored-by: Reinier van der Leer <pwuts@agpt.co>
250 lines
9.9 KiB
Python
250 lines
9.9 KiB
Python
"""
|
|
Tests for cloud storage integration in file utilities.
|
|
"""
|
|
|
|
from pathlib import Path
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from backend.data.execution import ExecutionContext
|
|
from backend.util.file import store_media_file
|
|
from backend.util.type import MediaFileType
|
|
|
|
|
|
def make_test_context(
|
|
graph_exec_id: str = "test-exec-123",
|
|
user_id: str = "test-user-123",
|
|
) -> ExecutionContext:
|
|
"""Helper to create test ExecutionContext."""
|
|
return ExecutionContext(
|
|
user_id=user_id,
|
|
graph_exec_id=graph_exec_id,
|
|
)
|
|
|
|
|
|
class TestFileCloudIntegration:
|
|
"""Test cases for cloud storage integration in file utilities."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_store_media_file_cloud_path(self):
|
|
"""Test storing a file from cloud storage path."""
|
|
graph_exec_id = "test-exec-123"
|
|
cloud_path = "gcs://test-bucket/uploads/456/source.txt"
|
|
cloud_content = b"cloud file content"
|
|
|
|
with patch(
|
|
"backend.util.file.get_cloud_storage_handler"
|
|
) as mock_handler_getter, patch(
|
|
"backend.util.file.scan_content_safe"
|
|
) as mock_scan, patch(
|
|
"backend.util.file.Path"
|
|
) as mock_path_class:
|
|
|
|
# Mock cloud storage handler
|
|
mock_handler = MagicMock()
|
|
mock_handler.is_cloud_path.return_value = True
|
|
mock_handler.parse_cloud_path.return_value = (
|
|
"gcs",
|
|
"test-bucket/uploads/456/source.txt",
|
|
)
|
|
mock_handler.retrieve_file = AsyncMock(return_value=cloud_content)
|
|
mock_handler_getter.return_value = mock_handler
|
|
|
|
# Mock virus scanner
|
|
mock_scan.return_value = None
|
|
|
|
# Mock file system operations
|
|
mock_base_path = MagicMock()
|
|
mock_target_path = MagicMock()
|
|
mock_resolved_path = MagicMock()
|
|
|
|
mock_path_class.return_value = mock_base_path
|
|
mock_base_path.mkdir = MagicMock()
|
|
mock_base_path.__truediv__ = MagicMock(return_value=mock_target_path)
|
|
mock_target_path.resolve.return_value = mock_resolved_path
|
|
mock_resolved_path.is_relative_to.return_value = True
|
|
mock_resolved_path.write_bytes = MagicMock()
|
|
mock_resolved_path.relative_to.return_value = Path("source.txt")
|
|
|
|
# Configure the main Path mock to handle filename extraction
|
|
# When Path(path_part) is called, it should return a mock with .name = "source.txt"
|
|
mock_path_for_filename = MagicMock()
|
|
mock_path_for_filename.name = "source.txt"
|
|
|
|
# The Path constructor should return different mocks for different calls
|
|
def path_constructor(*args, **kwargs):
|
|
if len(args) == 1 and "source.txt" in str(args[0]):
|
|
return mock_path_for_filename
|
|
else:
|
|
return mock_base_path
|
|
|
|
mock_path_class.side_effect = path_constructor
|
|
|
|
result = await store_media_file(
|
|
file=MediaFileType(cloud_path),
|
|
execution_context=make_test_context(graph_exec_id=graph_exec_id),
|
|
return_format="for_local_processing",
|
|
)
|
|
|
|
# Verify cloud storage operations
|
|
mock_handler.is_cloud_path.assert_called_once_with(cloud_path)
|
|
mock_handler.parse_cloud_path.assert_called_once_with(cloud_path)
|
|
mock_handler.retrieve_file.assert_called_once_with(
|
|
cloud_path, user_id="test-user-123", graph_exec_id=graph_exec_id
|
|
)
|
|
|
|
# Verify virus scan
|
|
mock_scan.assert_called_once_with(cloud_content, filename="source.txt")
|
|
|
|
# Verify file operations
|
|
mock_resolved_path.write_bytes.assert_called_once_with(cloud_content)
|
|
|
|
# Result should be the relative path
|
|
assert str(result) == "source.txt"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_store_media_file_cloud_path_return_content(self):
|
|
"""Test storing a file from cloud storage and returning content."""
|
|
graph_exec_id = "test-exec-123"
|
|
cloud_path = "gcs://test-bucket/uploads/456/image.png"
|
|
cloud_content = b"\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIHDR" # PNG header
|
|
|
|
with patch(
|
|
"backend.util.file.get_cloud_storage_handler"
|
|
) as mock_handler_getter, patch(
|
|
"backend.util.file.scan_content_safe"
|
|
) as mock_scan, patch(
|
|
"backend.util.file.get_mime_type"
|
|
) as mock_mime, patch(
|
|
"backend.util.file.base64.b64encode"
|
|
) as mock_b64, patch(
|
|
"backend.util.file.Path"
|
|
) as mock_path_class:
|
|
|
|
# Mock cloud storage handler
|
|
mock_handler = MagicMock()
|
|
mock_handler.is_cloud_path.return_value = True
|
|
mock_handler.parse_cloud_path.return_value = (
|
|
"gcs",
|
|
"test-bucket/uploads/456/image.png",
|
|
)
|
|
mock_handler.retrieve_file = AsyncMock(return_value=cloud_content)
|
|
mock_handler_getter.return_value = mock_handler
|
|
|
|
# Mock other operations
|
|
mock_scan.return_value = None
|
|
mock_mime.return_value = "image/png"
|
|
mock_b64.return_value.decode.return_value = "iVBORw0KGgoAAAANSUhEUgA="
|
|
|
|
# Mock file system operations
|
|
mock_base_path = MagicMock()
|
|
mock_target_path = MagicMock()
|
|
mock_resolved_path = MagicMock()
|
|
|
|
mock_path_class.return_value = mock_base_path
|
|
mock_base_path.mkdir = MagicMock()
|
|
mock_base_path.__truediv__ = MagicMock(return_value=mock_target_path)
|
|
mock_target_path.resolve.return_value = mock_resolved_path
|
|
mock_resolved_path.is_relative_to.return_value = True
|
|
mock_resolved_path.write_bytes = MagicMock()
|
|
mock_resolved_path.read_bytes.return_value = cloud_content
|
|
|
|
# Mock Path constructor for filename extraction
|
|
mock_path_obj = MagicMock()
|
|
mock_path_obj.name = "image.png"
|
|
with patch("backend.util.file.Path", return_value=mock_path_obj):
|
|
result = await store_media_file(
|
|
file=MediaFileType(cloud_path),
|
|
execution_context=make_test_context(graph_exec_id=graph_exec_id),
|
|
return_format="for_external_api",
|
|
)
|
|
|
|
# Verify result is a data URI
|
|
assert str(result).startswith("data:image/png;base64,")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_store_media_file_non_cloud_path(self):
|
|
"""Test that non-cloud paths are handled normally."""
|
|
graph_exec_id = "test-exec-123"
|
|
data_uri = "data:text/plain;base64,SGVsbG8gd29ybGQ="
|
|
|
|
with patch(
|
|
"backend.util.file.get_cloud_storage_handler"
|
|
) as mock_handler_getter, patch(
|
|
"backend.util.file.scan_content_safe"
|
|
) as mock_scan, patch(
|
|
"backend.util.file.base64.b64decode"
|
|
) as mock_b64decode, patch(
|
|
"backend.util.file.uuid.uuid4"
|
|
) as mock_uuid, patch(
|
|
"backend.util.file.Path"
|
|
) as mock_path_class:
|
|
|
|
# Mock cloud storage handler
|
|
mock_handler = MagicMock()
|
|
mock_handler.is_cloud_path.return_value = False
|
|
mock_handler.retrieve_file = (
|
|
AsyncMock()
|
|
) # Add this even though it won't be called
|
|
mock_handler_getter.return_value = mock_handler
|
|
|
|
# Mock other operations
|
|
mock_scan.return_value = None
|
|
mock_b64decode.return_value = b"Hello world"
|
|
mock_uuid.return_value = "test-uuid-789"
|
|
|
|
# Mock file system operations
|
|
mock_base_path = MagicMock()
|
|
mock_target_path = MagicMock()
|
|
mock_resolved_path = MagicMock()
|
|
|
|
mock_path_class.return_value = mock_base_path
|
|
mock_base_path.mkdir = MagicMock()
|
|
mock_base_path.__truediv__ = MagicMock(return_value=mock_target_path)
|
|
mock_target_path.resolve.return_value = mock_resolved_path
|
|
mock_resolved_path.is_relative_to.return_value = True
|
|
mock_resolved_path.write_bytes = MagicMock()
|
|
mock_resolved_path.relative_to.return_value = Path("test-uuid-789.txt")
|
|
|
|
await store_media_file(
|
|
file=MediaFileType(data_uri),
|
|
execution_context=make_test_context(graph_exec_id=graph_exec_id),
|
|
return_format="for_local_processing",
|
|
)
|
|
|
|
# Verify cloud handler was checked but not used for retrieval
|
|
mock_handler.is_cloud_path.assert_called_once_with(data_uri)
|
|
mock_handler.retrieve_file.assert_not_called()
|
|
|
|
# Verify normal data URI processing occurred
|
|
mock_b64decode.assert_called_once()
|
|
mock_resolved_path.write_bytes.assert_called_once_with(b"Hello world")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_store_media_file_cloud_retrieval_error(self):
|
|
"""Test error handling when cloud retrieval fails."""
|
|
graph_exec_id = "test-exec-123"
|
|
cloud_path = "gcs://test-bucket/nonexistent.txt"
|
|
|
|
with patch(
|
|
"backend.util.file.get_cloud_storage_handler"
|
|
) as mock_handler_getter:
|
|
|
|
# Mock cloud storage handler to raise error
|
|
mock_handler = AsyncMock()
|
|
mock_handler.is_cloud_path.return_value = True
|
|
mock_handler.retrieve_file.side_effect = FileNotFoundError(
|
|
"File not found in cloud storage"
|
|
)
|
|
mock_handler_getter.return_value = mock_handler
|
|
|
|
with pytest.raises(
|
|
FileNotFoundError, match="File not found in cloud storage"
|
|
):
|
|
await store_media_file(
|
|
file=MediaFileType(cloud_path),
|
|
execution_context=make_test_context(graph_exec_id=graph_exec_id),
|
|
return_format="for_local_processing",
|
|
)
|