Files
AutoGPT/autogpt_platform/backend/backend/data/workspace.py
Otto e9ba7e51db fix(copilot): Route workspace through db_accessors, fix transcript upload (#12148)
## Summary

Fixes two bugs in the copilot executor:

### SECRT-2008: WorkspaceManager bypasses db_accessors
`backend/util/workspace.py` imported 6 workspace functions directly from
`backend/data/workspace.py`, which call `prisma()` directly. In the
copilot executor (no Prisma connection), these fail.

**Fix:** Replace direct imports with `workspace_db()` from
`db_accessors`, routing through the database_manager HTTP client when
Prisma is unavailable. Also:
- Register all 6 workspace functions in `DatabaseManager` and
`DatabaseManagerAsyncClient`
- Add `UniqueViolationError` to the service `EXCEPTION_MAPPING` so it's
properly re-raised over HTTP (needed for race-condition retry logic)

### SECRT-2009: Transcript upload asyncio.timeout error
`asyncio.create_task()` at line 696 of `service.py` creates an orphaned
background task in the executor's thread event loop.
`gcloud-aio-storage`'s `asyncio.timeout()` fails in this context.

**Fix:** Replace `create_task` with direct `await`. The upload runs
after streaming completes (all chunks already yielded), so no
user-facing latency impact. The function already has internal try/except
error handling.
2026-02-17 22:24:19 +00:00

338 lines
9.0 KiB
Python

"""
Database CRUD operations for User Workspace.
This module provides functions for managing user workspaces and workspace files.
"""
import logging
from datetime import datetime, timezone
from typing import Optional
import pydantic
from prisma.models import UserWorkspace, UserWorkspaceFile
from prisma.types import UserWorkspaceFileWhereInput
from backend.util.json import SafeJson
logger = logging.getLogger(__name__)
class Workspace(pydantic.BaseModel):
"""Pydantic model for UserWorkspace, safe for RPC transport."""
id: str
user_id: str
created_at: datetime
updated_at: datetime
@staticmethod
def from_db(workspace: "UserWorkspace") -> "Workspace":
return Workspace(
id=workspace.id,
user_id=workspace.userId,
created_at=workspace.createdAt,
updated_at=workspace.updatedAt,
)
class WorkspaceFile(pydantic.BaseModel):
"""Pydantic model for UserWorkspaceFile, safe for RPC transport."""
id: str
workspace_id: str
created_at: datetime
updated_at: datetime
name: str
path: str
storage_path: str
mime_type: str
size_bytes: int
checksum: Optional[str] = None
is_deleted: bool = False
deleted_at: Optional[datetime] = None
metadata: dict = pydantic.Field(default_factory=dict)
@staticmethod
def from_db(file: "UserWorkspaceFile") -> "WorkspaceFile":
return WorkspaceFile(
id=file.id,
workspace_id=file.workspaceId,
created_at=file.createdAt,
updated_at=file.updatedAt,
name=file.name,
path=file.path,
storage_path=file.storagePath,
mime_type=file.mimeType,
size_bytes=file.sizeBytes,
checksum=file.checksum,
is_deleted=file.isDeleted,
deleted_at=file.deletedAt,
metadata=file.metadata if isinstance(file.metadata, dict) else {},
)
async def get_or_create_workspace(user_id: str) -> Workspace:
"""
Get user's workspace, creating one if it doesn't exist.
Uses upsert to handle race conditions when multiple concurrent requests
attempt to create a workspace for the same user.
Args:
user_id: The user's ID
Returns:
Workspace instance
"""
workspace = await UserWorkspace.prisma().upsert(
where={"userId": user_id},
data={
"create": {"userId": user_id},
"update": {}, # No updates needed if exists
},
)
return Workspace.from_db(workspace)
async def get_workspace(user_id: str) -> Optional[Workspace]:
"""
Get user's workspace if it exists.
Args:
user_id: The user's ID
Returns:
Workspace instance or None
"""
workspace = await UserWorkspace.prisma().find_unique(where={"userId": user_id})
return Workspace.from_db(workspace) if workspace else None
async def create_workspace_file(
workspace_id: str,
file_id: str,
name: str,
path: str,
storage_path: str,
mime_type: str,
size_bytes: int,
checksum: Optional[str] = None,
metadata: Optional[dict] = None,
) -> WorkspaceFile:
"""
Create a new workspace file record.
Args:
workspace_id: The workspace ID
file_id: The file ID (same as used in storage path for consistency)
name: User-visible filename
path: Virtual path (e.g., "/documents/report.pdf")
storage_path: Actual storage path (GCS or local)
mime_type: MIME type of the file
size_bytes: File size in bytes
checksum: Optional SHA256 checksum
metadata: Optional additional metadata
Returns:
Created WorkspaceFile instance
"""
# Normalize path to start with /
if not path.startswith("/"):
path = f"/{path}"
file = await UserWorkspaceFile.prisma().create(
data={
"id": file_id,
"workspaceId": workspace_id,
"name": name,
"path": path,
"storagePath": storage_path,
"mimeType": mime_type,
"sizeBytes": size_bytes,
"checksum": checksum,
"metadata": SafeJson(metadata or {}),
}
)
logger.info(
f"Created workspace file {file.id} at path {path} "
f"in workspace {workspace_id}"
)
return WorkspaceFile.from_db(file)
async def get_workspace_file(
file_id: str,
workspace_id: str,
) -> Optional[WorkspaceFile]:
"""
Get a workspace file by ID.
Args:
file_id: The file ID
workspace_id: Workspace ID for scoping (required)
Returns:
WorkspaceFile instance or None
"""
where_clause: UserWorkspaceFileWhereInput = {
"id": file_id,
"isDeleted": False,
"workspaceId": workspace_id,
}
file = await UserWorkspaceFile.prisma().find_first(where=where_clause)
return WorkspaceFile.from_db(file) if file else None
async def get_workspace_file_by_path(
workspace_id: str,
path: str,
) -> Optional[WorkspaceFile]:
"""
Get a workspace file by its virtual path.
Args:
workspace_id: The workspace ID
path: Virtual path
Returns:
WorkspaceFile instance or None
"""
# Normalize path
if not path.startswith("/"):
path = f"/{path}"
file = await UserWorkspaceFile.prisma().find_first(
where={
"workspaceId": workspace_id,
"path": path,
"isDeleted": False,
}
)
return WorkspaceFile.from_db(file) if file else None
async def list_workspace_files(
workspace_id: str,
path_prefix: Optional[str] = None,
include_deleted: bool = False,
limit: Optional[int] = None,
offset: int = 0,
) -> list[WorkspaceFile]:
"""
List files in a workspace.
Args:
workspace_id: The workspace ID
path_prefix: Optional path prefix to filter (e.g., "/documents/")
include_deleted: Whether to include soft-deleted files
limit: Maximum number of files to return
offset: Number of files to skip
Returns:
List of WorkspaceFile instances
"""
where_clause: UserWorkspaceFileWhereInput = {"workspaceId": workspace_id}
if not include_deleted:
where_clause["isDeleted"] = False
if path_prefix:
# Normalize prefix
if not path_prefix.startswith("/"):
path_prefix = f"/{path_prefix}"
where_clause["path"] = {"startswith": path_prefix}
files = await UserWorkspaceFile.prisma().find_many(
where=where_clause,
order={"createdAt": "desc"},
take=limit,
skip=offset,
)
return [WorkspaceFile.from_db(f) for f in files]
async def count_workspace_files(
workspace_id: str,
path_prefix: Optional[str] = None,
include_deleted: bool = False,
) -> int:
"""
Count files in a workspace.
Args:
workspace_id: The workspace ID
path_prefix: Optional path prefix to filter (e.g., "/sessions/abc123/")
include_deleted: Whether to include soft-deleted files
Returns:
Number of files
"""
where_clause: UserWorkspaceFileWhereInput = {"workspaceId": workspace_id}
if not include_deleted:
where_clause["isDeleted"] = False
if path_prefix:
# Normalize prefix
if not path_prefix.startswith("/"):
path_prefix = f"/{path_prefix}"
where_clause["path"] = {"startswith": path_prefix}
return await UserWorkspaceFile.prisma().count(where=where_clause)
async def soft_delete_workspace_file(
file_id: str,
workspace_id: str,
) -> Optional[WorkspaceFile]:
"""
Soft-delete a workspace file.
The path is modified to include a deletion timestamp to free up the original
path for new files while preserving the record for potential recovery.
Args:
file_id: The file ID
workspace_id: Workspace ID for scoping (required)
Returns:
Updated WorkspaceFile instance or None if not found
"""
# First verify the file exists and belongs to workspace
file = await get_workspace_file(file_id, workspace_id)
if file is None:
return None
deleted_at = datetime.now(timezone.utc)
# Modify path to free up the unique constraint for new files at original path
# Format: {original_path}__deleted__{timestamp}
deleted_path = f"{file.path}__deleted__{int(deleted_at.timestamp())}"
updated = await UserWorkspaceFile.prisma().update(
where={"id": file_id},
data={
"isDeleted": True,
"deletedAt": deleted_at,
"path": deleted_path,
},
)
logger.info(f"Soft-deleted workspace file {file_id}")
return WorkspaceFile.from_db(updated) if updated else None
async def get_workspace_total_size(workspace_id: str) -> int:
"""
Get the total size of all files in a workspace.
Args:
workspace_id: The workspace ID
Returns:
Total size in bytes
"""
files = await list_workspace_files(workspace_id)
return sum(file.size_bytes for file in files)