mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-30 03:00:41 -04:00
Implements persistent User Workspace storage for CoPilot, enabling
blocks to save and retrieve files across sessions. Files are stored in
session-scoped virtual paths (`/sessions/{session_id}/`).
Fixes SECRT-1833
### Changes 🏗️
**Database & Storage:**
- Add `UserWorkspace` and `UserWorkspaceFile` Prisma models
- Implement `WorkspaceStorageBackend` abstraction (GCS for cloud, local
filesystem for self-hosted)
- Add `workspace_id` and `session_id` fields to `ExecutionContext`
**Backend API:**
- Add REST endpoints: `GET/POST /api/workspace/files`, `GET/DELETE
/api/workspace/files/{id}`, `GET /api/workspace/files/{id}/download`
- Add CoPilot tools: `list_workspace_files`, `read_workspace_file`,
`write_workspace_file`
- Integrate workspace storage into `store_media_file()` - returns
`workspace://file-id` references
**Block Updates:**
- Refactor all file-handling blocks to use unified `ExecutionContext`
parameter
- Update media-generating blocks to persist outputs to workspace
(AIImageGenerator, AIImageCustomizer, FluxKontext, TalkingHead, FAL
video, Bannerbear, etc.)
**Frontend:**
- Render `workspace://` image references in chat via proxy endpoint
- Add "AI cannot see this image" overlay indicator
**CoPilot Context Mapping:**
- Session = Agent (graph_id) = Run (graph_exec_id)
- Files scoped to `/sessions/{session_id}/`
### Checklist 📋
#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [ ] I have tested my changes according to the test plan:
- [ ] Create CoPilot session, generate image with AIImageGeneratorBlock
- [ ] Verify image returns `workspace://file-id` (not base64)
- [ ] Verify image renders in chat with visibility indicator
- [ ] Verify workspace files persist across sessions
- [ ] Test list/read/write workspace files via CoPilot tools
- [ ] Test local storage backend for self-hosted deployments
#### For configuration changes:
- [x] `.env.default` is updated or already compatible with my changes
- [x] `docker-compose.yml` is updated or already compatible with my
changes
- [x] I have included a list of my configuration changes in the PR
description (under **Changes**)
🤖 Generated with [Claude Code](https://claude.ai/code)
<!-- CURSOR_SUMMARY -->
---
> [!NOTE]
> **Medium Risk**
> Introduces a new persistent file-storage surface area (DB tables,
storage backends, download API, and chat tools) and rewires
`store_media_file()`/block execution context across many blocks, so
regressions could impact file handling, access control, or storage
costs.
>
> **Overview**
> Adds a **persistent per-user Workspace** (new
`UserWorkspace`/`UserWorkspaceFile` models plus `WorkspaceManager` +
`WorkspaceStorageBackend` with GCS/local implementations) and wires it
into the API via a new `/api/workspace/files/{file_id}/download` route
(including header-sanitized `Content-Disposition`) and shutdown
lifecycle hooks.
>
> Extends `ExecutionContext` to carry execution identity +
`workspace_id`/`session_id`, updates executor tooling to clone
node-specific contexts, and updates `run_block` (CoPilot) to create a
session-scoped workspace and synthetic graph/run/node IDs.
>
> Refactors `store_media_file()` to require `execution_context` +
`return_format` and to support `workspace://` references; migrates many
media/file-handling blocks and related tests to the new API and to
persist generated media as `workspace://...` (or fall back to data URIs
outside CoPilot), and adds CoPilot chat tools for
listing/reading/writing/deleting workspace files with safeguards against
context bloat.
>
> <sup>Written by [Cursor
Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit
6abc70f793. This will update automatically
on new commits. Configure
[here](https://cursor.com/dashboard?tab=bugbot).</sup>
<!-- /CURSOR_SUMMARY -->
---------
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
Co-authored-by: Reinier van der Leer <pwuts@agpt.co>
190 lines
6.8 KiB
Python
190 lines
6.8 KiB
Python
from pathlib import Path
|
|
|
|
from backend.data.block import (
|
|
Block,
|
|
BlockCategory,
|
|
BlockOutput,
|
|
BlockSchemaInput,
|
|
BlockSchemaOutput,
|
|
)
|
|
from backend.data.execution import ExecutionContext
|
|
from backend.data.model import ContributorDetails, SchemaField
|
|
from backend.util.file import get_exec_file_path, store_media_file
|
|
from backend.util.type import MediaFileType
|
|
|
|
|
|
class ReadSpreadsheetBlock(Block):
|
|
class Input(BlockSchemaInput):
|
|
contents: str | None = SchemaField(
|
|
description="The contents of the CSV/spreadsheet data to read",
|
|
placeholder="a, b, c\n1,2,3\n4,5,6",
|
|
default=None,
|
|
advanced=False,
|
|
)
|
|
file_input: MediaFileType | None = SchemaField(
|
|
description="CSV or Excel file to read from (URL, data URI, or local path). Excel files are automatically converted to CSV",
|
|
default=None,
|
|
advanced=False,
|
|
)
|
|
delimiter: str = SchemaField(
|
|
description="The delimiter used in the CSV/spreadsheet data",
|
|
default=",",
|
|
)
|
|
quotechar: str = SchemaField(
|
|
description="The character used to quote fields",
|
|
default='"',
|
|
)
|
|
escapechar: str = SchemaField(
|
|
description="The character used to escape the delimiter",
|
|
default="\\",
|
|
)
|
|
has_header: bool = SchemaField(
|
|
description="Whether the CSV file has a header row",
|
|
default=True,
|
|
)
|
|
skip_rows: int = SchemaField(
|
|
description="The number of rows to skip from the start of the file",
|
|
default=0,
|
|
)
|
|
strip: bool = SchemaField(
|
|
description="Whether to strip whitespace from the values",
|
|
default=True,
|
|
)
|
|
skip_columns: list[str] = SchemaField(
|
|
description="The columns to skip from the start of the row",
|
|
default_factory=list,
|
|
)
|
|
produce_singular_result: bool = SchemaField(
|
|
description="If True, yield individual 'row' outputs only (can be slow). If False, yield both 'rows' (all data)",
|
|
default=False,
|
|
)
|
|
|
|
class Output(BlockSchemaOutput):
|
|
row: dict[str, str] = SchemaField(
|
|
description="The data produced from each row in the spreadsheet"
|
|
)
|
|
rows: list[dict[str, str]] = SchemaField(
|
|
description="All the data in the spreadsheet as a list of rows"
|
|
)
|
|
|
|
def __init__(self):
|
|
super().__init__(
|
|
id="acf7625e-d2cb-4941-bfeb-2819fc6fc015",
|
|
input_schema=ReadSpreadsheetBlock.Input,
|
|
output_schema=ReadSpreadsheetBlock.Output,
|
|
description="Reads CSV and Excel files and outputs the data as a list of dictionaries and individual rows. Excel files are automatically converted to CSV format.",
|
|
contributors=[ContributorDetails(name="Nicholas Tindle")],
|
|
categories={BlockCategory.TEXT, BlockCategory.DATA},
|
|
test_input=[
|
|
{
|
|
"contents": "a, b, c\n1,2,3\n4,5,6",
|
|
"produce_singular_result": False,
|
|
},
|
|
{
|
|
"contents": "a, b, c\n1,2,3\n4,5,6",
|
|
"produce_singular_result": True,
|
|
},
|
|
],
|
|
test_output=[
|
|
(
|
|
"rows",
|
|
[
|
|
{"a": "1", "b": "2", "c": "3"},
|
|
{"a": "4", "b": "5", "c": "6"},
|
|
],
|
|
),
|
|
("row", {"a": "1", "b": "2", "c": "3"}),
|
|
("row", {"a": "4", "b": "5", "c": "6"}),
|
|
],
|
|
)
|
|
|
|
async def run(
|
|
self, input_data: Input, *, execution_context: ExecutionContext, **_kwargs
|
|
) -> BlockOutput:
|
|
import csv
|
|
from io import StringIO
|
|
|
|
# Determine data source - prefer file_input if provided, otherwise use contents
|
|
if input_data.file_input:
|
|
stored_file_path = await store_media_file(
|
|
file=input_data.file_input,
|
|
execution_context=execution_context,
|
|
return_format="for_local_processing",
|
|
)
|
|
|
|
# Get full file path
|
|
assert execution_context.graph_exec_id # Validated by store_media_file
|
|
file_path = get_exec_file_path(
|
|
execution_context.graph_exec_id, stored_file_path
|
|
)
|
|
if not Path(file_path).exists():
|
|
raise ValueError(f"File does not exist: {file_path}")
|
|
|
|
# Check if file is an Excel file and convert to CSV
|
|
file_extension = Path(file_path).suffix.lower()
|
|
|
|
if file_extension in [".xlsx", ".xls"]:
|
|
# Handle Excel files
|
|
try:
|
|
from io import StringIO
|
|
|
|
import pandas as pd
|
|
|
|
# Read Excel file
|
|
df = pd.read_excel(file_path)
|
|
|
|
# Convert to CSV string
|
|
csv_buffer = StringIO()
|
|
df.to_csv(csv_buffer, index=False)
|
|
csv_content = csv_buffer.getvalue()
|
|
|
|
except ImportError:
|
|
raise ValueError(
|
|
"pandas library is required to read Excel files. Please install it."
|
|
)
|
|
except Exception as e:
|
|
raise ValueError(f"Unable to read Excel file: {e}")
|
|
else:
|
|
# Handle CSV/text files
|
|
csv_content = Path(file_path).read_text(encoding="utf-8")
|
|
elif input_data.contents:
|
|
# Use direct string content
|
|
csv_content = input_data.contents
|
|
else:
|
|
raise ValueError("Either 'contents' or 'file_input' must be provided")
|
|
|
|
csv_file = StringIO(csv_content)
|
|
reader = csv.reader(
|
|
csv_file,
|
|
delimiter=input_data.delimiter,
|
|
quotechar=input_data.quotechar,
|
|
escapechar=input_data.escapechar,
|
|
)
|
|
|
|
header = None
|
|
if input_data.has_header:
|
|
header = next(reader)
|
|
if input_data.strip:
|
|
header = [h.strip() for h in header]
|
|
|
|
for _ in range(input_data.skip_rows):
|
|
next(reader)
|
|
|
|
def process_row(row):
|
|
data = {}
|
|
for i, value in enumerate(row):
|
|
if i not in input_data.skip_columns:
|
|
if input_data.has_header and header:
|
|
data[header[i]] = value.strip() if input_data.strip else value
|
|
else:
|
|
data[str(i)] = value.strip() if input_data.strip else value
|
|
return data
|
|
|
|
rows = [process_row(row) for row in reader]
|
|
|
|
if input_data.produce_singular_result:
|
|
for processed_row in rows:
|
|
yield "row", processed_row
|
|
else:
|
|
yield "rows", rows
|