Compare commits

..

2 Commits

Author SHA1 Message Date
Otto
bf55f7b648 fix: address review feedback
- Close Redis event bus connection in finally block (fixes connection leak)
- Handle race condition: re-check DB after subscribing to pubsub
- Handle REVIEW status (human-in-the-loop) as a stop-waiting state
- Handle TERMINATED explicitly in run_agent instead of falling into else-branch
- Include REVIEW in queryable statuses when waiting
2026-02-18 11:58:12 +00:00
Otto
aeb3e8a129 feat(copilot): add wait_for_result to run_agent and wait_if_running to agent_output
SECRT-2003: Allow CoPilot to wait for agent execution completion.

- Add execution_utils.py with Redis pubsub-based wait_for_execution()
- run_agent: new wait_for_result param (0-300s), returns outputs on completion
- agent_output: new wait_if_running param, waits for running executions
- Both tools handle timeout gracefully, returning current state
2026-02-17 18:10:14 +00:00
20 changed files with 464 additions and 238 deletions

View File

@@ -164,23 +164,21 @@ class CoPilotExecutor(AppProcess):
self._cancel_thread, self.cancel_client, "[cleanup][cancel]"
)
# Clean up worker threads (closes per-loop workspace storage sessions)
# Shutdown executor
if self._executor:
from .processor import cleanup_worker
logger.info(f"[cleanup {pid}] Cleaning up workers...")
futures = []
for _ in range(self._executor._max_workers):
futures.append(self._executor.submit(cleanup_worker))
for f in futures:
try:
f.result(timeout=10)
except Exception as e:
logger.warning(f"[cleanup {pid}] Worker cleanup error: {e}")
logger.info(f"[cleanup {pid}] Shutting down executor...")
self._executor.shutdown(wait=False)
# Close async resources (workspace storage aiohttp session, etc.)
try:
from backend.util.workspace_storage import shutdown_workspace_storage
loop = asyncio.new_event_loop()
loop.run_until_complete(shutdown_workspace_storage())
loop.close()
except Exception as e:
logger.warning(f"[cleanup {pid}] Error closing workspace storage: {e}")
# Release any remaining locks
for task_id, lock in list(self._task_locks.items()):
try:

View File

@@ -60,18 +60,6 @@ def init_worker():
_tls.processor.on_executor_start()
def cleanup_worker():
"""Clean up the processor for the current worker thread.
Should be called before the worker thread's event loop is destroyed so
that event-loop-bound resources (e.g. ``aiohttp.ClientSession``) are
closed on the correct loop.
"""
processor: CoPilotProcessor | None = getattr(_tls, "processor", None)
if processor is not None:
processor.cleanup()
# ============ Processor Class ============ #
@@ -110,28 +98,6 @@ class CoPilotProcessor:
logger.info(f"[CoPilotExecutor] Worker {self.tid} started")
def cleanup(self):
"""Clean up event-loop-bound resources before the loop is destroyed.
Shuts down the workspace storage instance that belongs to this
worker's event loop, ensuring ``aiohttp.ClientSession.close()``
runs on the same loop that created the session.
"""
from backend.util.workspace_storage import shutdown_workspace_storage
try:
future = asyncio.run_coroutine_threadsafe(
shutdown_workspace_storage(), self.execution_loop
)
future.result(timeout=5)
except Exception as e:
logger.warning(f"[CoPilotExecutor] Worker {self.tid} cleanup error: {e}")
# Stop the event loop
self.execution_loop.call_soon_threadsafe(self.execution_loop.stop)
self.execution_thread.join(timeout=5)
logger.info(f"[CoPilotExecutor] Worker {self.tid} cleaned up")
@error_logged(swallow=False)
def execute(
self,

View File

@@ -693,15 +693,11 @@ async def stream_chat_completion_sdk(
await asyncio.sleep(0.5)
raw_transcript = read_transcript_file(captured_transcript.path)
if raw_transcript:
try:
async with asyncio.timeout(30):
await _upload_transcript_bg(
user_id, session_id, raw_transcript
)
except asyncio.TimeoutError:
logger.warning(
f"[SDK] Transcript upload timed out for {session_id}"
)
task = asyncio.create_task(
_upload_transcript_bg(user_id, session_id, raw_transcript)
)
_background_tasks.add(task)
task.add_done_callback(_background_tasks.discard)
else:
logger.debug("[SDK] Stop hook fired but transcript not usable")

View File

@@ -5,7 +5,7 @@ import re
from datetime import datetime, timedelta, timezone
from typing import Any
from pydantic import BaseModel, field_validator
from pydantic import BaseModel, Field, field_validator
from backend.api.features.library.model import LibraryAgent
from backend.copilot.model import ChatSession
@@ -13,6 +13,7 @@ from backend.data.db_accessors import execution_db, library_db
from backend.data.execution import ExecutionStatus, GraphExecution, GraphExecutionMeta
from .base import BaseTool
from .execution_utils import TERMINAL_STATUSES, wait_for_execution
from .models import (
AgentOutputResponse,
ErrorResponse,
@@ -33,6 +34,7 @@ class AgentOutputInput(BaseModel):
store_slug: str = ""
execution_id: str = ""
run_time: str = "latest"
wait_if_running: int = Field(default=0, ge=0, le=300)
@field_validator(
"agent_name",
@@ -116,6 +118,11 @@ class AgentOutputTool(BaseTool):
Select which run to retrieve using:
- execution_id: Specific execution ID
- run_time: 'latest' (default), 'yesterday', 'last week', or ISO date 'YYYY-MM-DD'
Wait for completion (optional):
- wait_if_running: Max seconds to wait if execution is still running (0-300).
If the execution is running/queued, waits up to this many seconds for completion.
Returns current status on timeout. If already finished, returns immediately.
"""
@property
@@ -145,6 +152,13 @@ class AgentOutputTool(BaseTool):
"Time filter: 'latest', 'yesterday', 'last week', or 'YYYY-MM-DD'"
),
},
"wait_if_running": {
"type": "integer",
"description": (
"Max seconds to wait if execution is still running (0-300). "
"If running, waits for completion. Returns current state on timeout."
),
},
},
"required": [],
}
@@ -224,10 +238,14 @@ class AgentOutputTool(BaseTool):
execution_id: str | None,
time_start: datetime | None,
time_end: datetime | None,
include_running: bool = False,
) -> tuple[GraphExecution | None, list[GraphExecutionMeta], str | None]:
"""
Fetch execution(s) based on filters.
Returns (single_execution, available_executions_meta, error_message).
Args:
include_running: If True, also look for running/queued executions (for waiting)
"""
exec_db = execution_db()
@@ -242,11 +260,23 @@ class AgentOutputTool(BaseTool):
return None, [], f"Execution '{execution_id}' not found"
return execution, [], None
# Get completed executions with time filters
# Determine which statuses to query
statuses = [ExecutionStatus.COMPLETED]
if include_running:
statuses.extend(
[
ExecutionStatus.RUNNING,
ExecutionStatus.QUEUED,
ExecutionStatus.INCOMPLETE,
ExecutionStatus.REVIEW,
]
)
# Get executions with time filters
executions = await exec_db.get_graph_executions(
graph_id=graph_id,
user_id=user_id,
statuses=[ExecutionStatus.COMPLETED],
statuses=statuses,
created_time_gte=time_start,
created_time_lte=time_end,
limit=10,
@@ -313,10 +343,33 @@ class AgentOutputTool(BaseTool):
for e in available_executions[:5]
]
message = f"Found execution outputs for agent '{agent.name}'"
# Build appropriate message based on execution status
if execution.status == ExecutionStatus.COMPLETED:
message = f"Found execution outputs for agent '{agent.name}'"
elif execution.status == ExecutionStatus.FAILED:
message = f"Execution for agent '{agent.name}' failed"
elif execution.status == ExecutionStatus.TERMINATED:
message = f"Execution for agent '{agent.name}' was terminated"
elif execution.status == ExecutionStatus.REVIEW:
message = (
f"Execution for agent '{agent.name}' is awaiting human review. "
"The user needs to approve it before it can continue."
)
elif execution.status in (
ExecutionStatus.RUNNING,
ExecutionStatus.QUEUED,
ExecutionStatus.INCOMPLETE,
):
message = (
f"Execution for agent '{agent.name}' is still {execution.status.value}. "
"Results may be incomplete. Use wait_if_running to wait for completion."
)
else:
message = f"Found execution for agent '{agent.name}' (status: {execution.status.value})"
if len(available_executions) > 1:
message += (
f". Showing latest of {len(available_executions)} matching executions."
f" Showing latest of {len(available_executions)} matching executions."
)
return AgentOutputResponse(
@@ -431,13 +484,17 @@ class AgentOutputTool(BaseTool):
# Parse time expression
time_start, time_end = parse_time_expression(input_data.run_time)
# Fetch execution(s)
# Check if we should wait for running executions
wait_timeout = input_data.wait_if_running
# Fetch execution(s) - include running if we're going to wait
execution, available_executions, exec_error = await self._get_execution(
user_id=user_id,
graph_id=agent.graph_id,
execution_id=input_data.execution_id or None,
time_start=time_start,
time_end=time_end,
include_running=wait_timeout > 0,
)
if exec_error:
@@ -446,4 +503,17 @@ class AgentOutputTool(BaseTool):
session_id=session_id,
)
# If we have an execution that's still running and we should wait
if execution and wait_timeout > 0 and execution.status not in TERMINAL_STATUSES:
logger.info(
f"Execution {execution.id} is {execution.status}, "
f"waiting up to {wait_timeout}s for completion"
)
execution = await wait_for_execution(
user_id=user_id,
graph_id=agent.graph_id,
execution_id=execution.id,
timeout_seconds=wait_timeout,
)
return self._build_response(agent, execution, available_executions, session_id)

View File

@@ -0,0 +1,169 @@
"""Shared utilities for execution waiting and status handling."""
import asyncio
import logging
from typing import Any
from backend.data.db_accessors import execution_db
from backend.data.execution import (
AsyncRedisExecutionEventBus,
ExecutionStatus,
GraphExecution,
GraphExecutionEvent,
)
logger = logging.getLogger(__name__)
# Terminal statuses that indicate execution is complete
TERMINAL_STATUSES = frozenset(
{
ExecutionStatus.COMPLETED,
ExecutionStatus.FAILED,
ExecutionStatus.TERMINATED,
}
)
# Statuses where execution is paused but not finished (e.g. human-in-the-loop)
PAUSED_STATUSES = frozenset(
{
ExecutionStatus.REVIEW,
}
)
# Statuses that mean "stop waiting" (terminal or paused)
STOP_WAITING_STATUSES = TERMINAL_STATUSES | PAUSED_STATUSES
async def wait_for_execution(
user_id: str,
graph_id: str,
execution_id: str,
timeout_seconds: int,
) -> GraphExecution | None:
"""
Wait for an execution to reach a terminal or paused status using Redis pubsub.
Handles the race condition between checking status and subscribing by
re-checking the DB after the subscription is established.
Args:
user_id: User ID
graph_id: Graph ID
execution_id: Execution ID to wait for
timeout_seconds: Max seconds to wait
Returns:
The execution with current status, or None if not found
"""
exec_db = execution_db()
# Quick check — maybe it's already done
execution = await exec_db.get_graph_execution(
user_id=user_id,
execution_id=execution_id,
include_node_executions=False,
)
if not execution:
return None
if execution.status in STOP_WAITING_STATUSES:
logger.debug(
f"Execution {execution_id} already in stop-waiting state: "
f"{execution.status}"
)
return execution
logger.info(
f"Waiting up to {timeout_seconds}s for execution {execution_id} "
f"(current status: {execution.status})"
)
event_bus = AsyncRedisExecutionEventBus()
channel_key = f"{user_id}/{graph_id}/{execution_id}"
try:
result = await asyncio.wait_for(
_subscribe_and_wait(
event_bus, channel_key, user_id, execution_id, exec_db
),
timeout=timeout_seconds,
)
return result
except asyncio.TimeoutError:
logger.info(f"Timeout waiting for execution {execution_id}")
except Exception as e:
logger.error(f"Error waiting for execution: {e}", exc_info=True)
finally:
await event_bus.close()
# Return current state on timeout/error
return await exec_db.get_graph_execution(
user_id=user_id,
execution_id=execution_id,
include_node_executions=False,
)
async def _subscribe_and_wait(
event_bus: AsyncRedisExecutionEventBus,
channel_key: str,
user_id: str,
execution_id: str,
exec_db: Any,
) -> GraphExecution | None:
"""
Subscribe to execution events and wait for a terminal/paused status.
To avoid the race condition where the execution completes between the
initial DB check and the Redis subscription, we:
1. Start listening (which subscribes internally)
2. Re-check the DB after subscription is active
3. If still running, wait for pubsub events
"""
listen_iter = event_bus.listen_events(channel_key).__aiter__()
# Start a background task to consume pubsub events
result_event: GraphExecutionEvent | None = None
done = asyncio.Event()
async def _consume():
nonlocal result_event
async for event in listen_iter:
if isinstance(event, GraphExecutionEvent):
logger.debug(f"Received execution update: {event.status}")
if event.status in STOP_WAITING_STATUSES:
result_event = event
done.set()
return
consume_task = asyncio.create_task(_consume())
# Give the subscription a moment to establish, then re-check DB
await asyncio.sleep(0.1)
execution = await exec_db.get_graph_execution(
user_id=user_id,
execution_id=execution_id,
include_node_executions=False,
)
if execution and execution.status in STOP_WAITING_STATUSES:
consume_task.cancel()
return execution
# Wait for the pubsub consumer to find a terminal event
await done.wait()
consume_task.cancel()
# Fetch full execution
return await exec_db.get_graph_execution(
user_id=user_id,
execution_id=execution_id,
include_node_executions=False,
)
def get_execution_outputs(execution: GraphExecution | None) -> dict[str, Any] | None:
"""Extract outputs from an execution, or return None."""
if execution is None:
return None
return execution.outputs

View File

@@ -33,6 +33,7 @@ query SearchFeatureRequests($term: String!, $filter: IssueFilter, $first: Int) {
id
identifier
title
description
}
}
}
@@ -204,6 +205,7 @@ class SearchFeatureRequestsTool(BaseTool):
id=node["id"],
identifier=node["identifier"],
title=node["title"],
description=node.get("description"),
)
for node in nodes
]
@@ -237,11 +239,7 @@ class CreateFeatureRequestTool(BaseTool):
"Create a new feature request or add a customer need to an existing one. "
"Always search first with search_feature_requests to avoid duplicates. "
"If a matching request exists, pass its ID as existing_issue_id to add "
"the user's need to it instead of creating a duplicate. "
"IMPORTANT: Never include personally identifiable information (PII) in "
"the title or description — no names, emails, phone numbers, company "
"names, or other identifying details. Write titles and descriptions in "
"generic, feature-focused language."
"the user's need to it instead of creating a duplicate."
)
@property
@@ -251,20 +249,11 @@ class CreateFeatureRequestTool(BaseTool):
"properties": {
"title": {
"type": "string",
"description": (
"Title for the feature request. Must be generic and "
"feature-focused — do not include any user names, emails, "
"company names, or other PII."
),
"description": "Title for the feature request.",
},
"description": {
"type": "string",
"description": (
"Detailed description of what the user wants and why. "
"Must not contain any personally identifiable information "
"(PII) — describe the feature need generically without "
"referencing specific users, companies, or contact details."
),
"description": "Detailed description of what the user wants and why.",
},
"existing_issue_id": {
"type": "string",

View File

@@ -117,11 +117,13 @@ class TestSearchFeatureRequestsTool:
"id": "id-1",
"identifier": "FR-1",
"title": "Dark mode",
"description": "Add dark mode support",
},
{
"id": "id-2",
"identifier": "FR-2",
"title": "Dark theme",
"description": None,
},
]
patcher, _ = _mock_linear_config(query_return=_search_response(nodes))

View File

@@ -486,6 +486,7 @@ class FeatureRequestInfo(BaseModel):
id: str
identifier: str
title: str
description: str | None = None
class FeatureRequestSearchResponse(ToolResponseBase):

View File

@@ -9,6 +9,7 @@ from backend.copilot.config import ChatConfig
from backend.copilot.model import ChatSession
from backend.copilot.tracking import track_agent_run_success, track_agent_scheduled
from backend.data.db_accessors import graph_db, library_db, user_db
from backend.data.execution import ExecutionStatus
from backend.data.graph import GraphModel
from backend.data.model import CredentialsMetaInput
from backend.executor import utils as execution_utils
@@ -24,6 +25,7 @@ from .helpers import get_inputs_from_schema
from .models import (
AgentDetails,
AgentDetailsResponse,
AgentOutputResponse,
ErrorResponse,
ExecutionOptions,
ExecutionStartedResponse,
@@ -33,6 +35,7 @@ from .models import (
ToolResponseBase,
UserReadiness,
)
from .execution_utils import get_execution_outputs, wait_for_execution
from .utils import (
build_missing_credentials_from_graph,
extract_credentials_from_schema,
@@ -66,6 +69,7 @@ class RunAgentInput(BaseModel):
schedule_name: str = ""
cron: str = ""
timezone: str = "UTC"
wait_for_result: int = Field(default=0, ge=0, le=300)
@field_validator(
"username_agent_slug",
@@ -147,6 +151,14 @@ class RunAgentTool(BaseTool):
"type": "string",
"description": "IANA timezone for schedule (default: UTC)",
},
"wait_for_result": {
"type": "integer",
"description": (
"Max seconds to wait for execution to complete (0-300). "
"If >0, blocks until the execution finishes or times out. "
"Returns execution outputs when complete."
),
},
},
"required": [],
}
@@ -341,6 +353,7 @@ class RunAgentTool(BaseTool):
graph=graph,
graph_credentials=graph_credentials,
inputs=params.inputs,
wait_for_result=params.wait_for_result,
)
except NotFoundError as e:
@@ -424,8 +437,9 @@ class RunAgentTool(BaseTool):
graph: GraphModel,
graph_credentials: dict[str, CredentialsMetaInput],
inputs: dict[str, Any],
wait_for_result: int = 0,
) -> ToolResponseBase:
"""Execute an agent immediately."""
"""Execute an agent immediately, optionally waiting for completion."""
session_id = session.session_id
# Check rate limits
@@ -462,6 +476,78 @@ class RunAgentTool(BaseTool):
)
library_agent_link = f"/library/agents/{library_agent.id}"
# If wait_for_result is requested, wait for execution to complete
if wait_for_result > 0:
logger.info(
f"Waiting up to {wait_for_result}s for execution {execution.id}"
)
completed = await wait_for_execution(
user_id=user_id,
graph_id=library_agent.graph_id,
execution_id=execution.id,
timeout_seconds=wait_for_result,
)
if completed and completed.status == ExecutionStatus.COMPLETED:
outputs = get_execution_outputs(completed)
return AgentOutputResponse(
message=(
f"Agent '{library_agent.name}' completed successfully. "
f"View at {library_agent_link}."
),
session_id=session_id,
execution_id=execution.id,
graph_id=library_agent.graph_id,
graph_name=library_agent.name,
outputs=outputs or {},
)
elif completed and completed.status == ExecutionStatus.FAILED:
return ErrorResponse(
message=(
f"Agent '{library_agent.name}' execution failed. "
f"View details at {library_agent_link}."
),
session_id=session_id,
)
elif completed and completed.status == ExecutionStatus.TERMINATED:
return ErrorResponse(
message=(
f"Agent '{library_agent.name}' execution was terminated. "
f"View details at {library_agent_link}."
),
session_id=session_id,
)
elif completed and completed.status == ExecutionStatus.REVIEW:
return ExecutionStartedResponse(
message=(
f"Agent '{library_agent.name}' is awaiting human review. "
f"Check at {library_agent_link}."
),
session_id=session_id,
execution_id=execution.id,
graph_id=library_agent.graph_id,
graph_name=library_agent.name,
library_agent_id=library_agent.id,
library_agent_link=library_agent_link,
)
else:
status = completed.status.value if completed else "unknown"
return ExecutionStartedResponse(
message=(
f"Agent '{library_agent.name}' is still {status} after "
f"{wait_for_result}s. Check results later at "
f"{library_agent_link}. "
f"Use view_agent_output with wait_if_running to check again."
),
session_id=session_id,
execution_id=execution.id,
graph_id=library_agent.graph_id,
graph_name=library_agent.name,
library_agent_id=library_agent.id,
library_agent_link=library_agent_link,
)
return ExecutionStartedResponse(
message=(
f"Agent '{library_agent.name}' execution started successfully. "

View File

@@ -93,15 +93,7 @@ from backend.data.user import (
get_user_notification_preference,
update_user_integrations,
)
from backend.data.workspace import (
count_workspace_files,
create_workspace_file,
get_or_create_workspace,
get_workspace_file,
get_workspace_file_by_path,
list_workspace_files,
soft_delete_workspace_file,
)
from backend.data.workspace import get_or_create_workspace
from backend.util.service import (
AppService,
AppServiceClient,
@@ -282,13 +274,7 @@ class DatabaseManager(AppService):
get_user_execution_summary_data = _(get_user_execution_summary_data)
# ============ Workspace ============ #
count_workspace_files = _(count_workspace_files)
create_workspace_file = _(create_workspace_file)
get_or_create_workspace = _(get_or_create_workspace)
get_workspace_file = _(get_workspace_file)
get_workspace_file_by_path = _(get_workspace_file_by_path)
list_workspace_files = _(list_workspace_files)
soft_delete_workspace_file = _(soft_delete_workspace_file)
# ============ Understanding ============ #
get_business_understanding = _(get_business_understanding)
@@ -452,13 +438,7 @@ class DatabaseManagerAsyncClient(AppServiceClient):
get_user_execution_summary_data = d.get_user_execution_summary_data
# ============ Workspace ============ #
count_workspace_files = d.count_workspace_files
create_workspace_file = d.create_workspace_file
get_or_create_workspace = d.get_or_create_workspace
get_workspace_file = d.get_workspace_file
get_workspace_file_by_path = d.get_workspace_file_by_path
list_workspace_files = d.list_workspace_files
soft_delete_workspace_file = d.soft_delete_workspace_file
# ============ Understanding ============ #
get_business_understanding = d.get_business_understanding

View File

@@ -164,23 +164,21 @@ async def create_workspace_file(
async def get_workspace_file(
file_id: str,
workspace_id: str,
workspace_id: Optional[str] = None,
) -> Optional[WorkspaceFile]:
"""
Get a workspace file by ID.
Args:
file_id: The file ID
workspace_id: Workspace ID for scoping (required)
workspace_id: Optional workspace ID for validation
Returns:
WorkspaceFile instance or None
"""
where_clause: UserWorkspaceFileWhereInput = {
"id": file_id,
"isDeleted": False,
"workspaceId": workspace_id,
}
where_clause: dict = {"id": file_id, "isDeleted": False}
if workspace_id:
where_clause["workspaceId"] = workspace_id
file = await UserWorkspaceFile.prisma().find_first(where=where_clause)
return WorkspaceFile.from_db(file) if file else None
@@ -270,7 +268,7 @@ async def count_workspace_files(
Returns:
Number of files
"""
where_clause: UserWorkspaceFileWhereInput = {"workspaceId": workspace_id}
where_clause: dict = {"workspaceId": workspace_id}
if not include_deleted:
where_clause["isDeleted"] = False
@@ -285,7 +283,7 @@ async def count_workspace_files(
async def soft_delete_workspace_file(
file_id: str,
workspace_id: str,
workspace_id: Optional[str] = None,
) -> Optional[WorkspaceFile]:
"""
Soft-delete a workspace file.
@@ -295,7 +293,7 @@ async def soft_delete_workspace_file(
Args:
file_id: The file ID
workspace_id: Workspace ID for scoping (required)
workspace_id: Optional workspace ID for validation
Returns:
Updated WorkspaceFile instance or None if not found

View File

@@ -28,7 +28,7 @@ from typing import (
import httpx
import uvicorn
from fastapi import FastAPI, Request, responses
from prisma.errors import DataError, UniqueViolationError
from prisma.errors import DataError
from pydantic import BaseModel, TypeAdapter, create_model
import backend.util.exceptions as exceptions
@@ -201,7 +201,6 @@ EXCEPTION_MAPPING = {
UnhealthyServiceError,
HTTPClientError,
HTTPServerError,
UniqueViolationError,
*[
ErrorType
for _, ErrorType in inspect.getmembers(exceptions)
@@ -417,9 +416,6 @@ class AppService(BaseAppService, ABC):
self.fastapi_app.add_exception_handler(
DataError, self._handle_internal_http_error(400)
)
self.fastapi_app.add_exception_handler(
UniqueViolationError, self._handle_internal_http_error(400)
)
self.fastapi_app.add_exception_handler(
Exception, self._handle_internal_http_error(500)
)
@@ -482,7 +478,6 @@ def get_service_client(
# Don't retry these specific exceptions that won't be fixed by retrying
ValueError, # Invalid input/parameters
DataError, # Prisma data integrity errors (foreign key, unique constraints)
UniqueViolationError, # Unique constraint violations
KeyError, # Missing required data
TypeError, # Wrong data types
AttributeError, # Missing attributes

View File

@@ -12,8 +12,15 @@ from typing import Optional
from prisma.errors import UniqueViolationError
from backend.data.db_accessors import workspace_db
from backend.data.workspace import WorkspaceFile
from backend.data.workspace import (
WorkspaceFile,
count_workspace_files,
create_workspace_file,
get_workspace_file,
get_workspace_file_by_path,
list_workspace_files,
soft_delete_workspace_file,
)
from backend.util.settings import Config
from backend.util.virus_scanner import scan_content_safe
from backend.util.workspace_storage import compute_file_checksum, get_workspace_storage
@@ -118,9 +125,8 @@ class WorkspaceManager:
Raises:
FileNotFoundError: If file doesn't exist
"""
db = workspace_db()
resolved_path = self._resolve_path(path)
file = await db.get_workspace_file_by_path(self.workspace_id, resolved_path)
file = await get_workspace_file_by_path(self.workspace_id, resolved_path)
if file is None:
raise FileNotFoundError(f"File not found at path: {resolved_path}")
@@ -140,8 +146,7 @@ class WorkspaceManager:
Raises:
FileNotFoundError: If file doesn't exist
"""
db = workspace_db()
file = await db.get_workspace_file(file_id, self.workspace_id)
file = await get_workspace_file(file_id, self.workspace_id)
if file is None:
raise FileNotFoundError(f"File not found: {file_id}")
@@ -199,10 +204,8 @@ class WorkspaceManager:
# For overwrite=True, we let the write proceed and handle via UniqueViolationError
# This ensures the new file is written to storage BEFORE the old one is deleted,
# preventing data loss if the new write fails
db = workspace_db()
if not overwrite:
existing = await db.get_workspace_file_by_path(self.workspace_id, path)
existing = await get_workspace_file_by_path(self.workspace_id, path)
if existing is not None:
raise ValueError(f"File already exists at path: {path}")
@@ -229,7 +232,7 @@ class WorkspaceManager:
# Create database record - handle race condition where another request
# created a file at the same path between our check and create
try:
file = await db.create_workspace_file(
file = await create_workspace_file(
workspace_id=self.workspace_id,
file_id=file_id,
name=filename,
@@ -243,12 +246,12 @@ class WorkspaceManager:
# Race condition: another request created a file at this path
if overwrite:
# Re-fetch and delete the conflicting file, then retry
existing = await db.get_workspace_file_by_path(self.workspace_id, path)
existing = await get_workspace_file_by_path(self.workspace_id, path)
if existing:
await self.delete_file(existing.id)
# Retry the create - if this also fails, clean up storage file
try:
file = await db.create_workspace_file(
file = await create_workspace_file(
workspace_id=self.workspace_id,
file_id=file_id,
name=filename,
@@ -311,9 +314,8 @@ class WorkspaceManager:
List of WorkspaceFile instances
"""
effective_path = self._get_effective_path(path, include_all_sessions)
db = workspace_db()
return await db.list_workspace_files(
return await list_workspace_files(
workspace_id=self.workspace_id,
path_prefix=effective_path,
limit=limit,
@@ -330,8 +332,7 @@ class WorkspaceManager:
Returns:
True if deleted, False if not found
"""
db = workspace_db()
file = await db.get_workspace_file(file_id, self.workspace_id)
file = await get_workspace_file(file_id, self.workspace_id)
if file is None:
return False
@@ -344,7 +345,7 @@ class WorkspaceManager:
# Continue with database soft-delete even if storage delete fails
# Soft-delete database record
result = await db.soft_delete_workspace_file(file_id, self.workspace_id)
result = await soft_delete_workspace_file(file_id, self.workspace_id)
return result is not None
async def get_download_url(self, file_id: str, expires_in: int = 3600) -> str:
@@ -361,8 +362,7 @@ class WorkspaceManager:
Raises:
FileNotFoundError: If file doesn't exist
"""
db = workspace_db()
file = await db.get_workspace_file(file_id, self.workspace_id)
file = await get_workspace_file(file_id, self.workspace_id)
if file is None:
raise FileNotFoundError(f"File not found: {file_id}")
@@ -379,8 +379,7 @@ class WorkspaceManager:
Returns:
WorkspaceFile instance or None
"""
db = workspace_db()
return await db.get_workspace_file(file_id, self.workspace_id)
return await get_workspace_file(file_id, self.workspace_id)
async def get_file_info_by_path(self, path: str) -> Optional[WorkspaceFile]:
"""
@@ -395,9 +394,8 @@ class WorkspaceManager:
Returns:
WorkspaceFile instance or None
"""
db = workspace_db()
resolved_path = self._resolve_path(path)
return await db.get_workspace_file_by_path(self.workspace_id, resolved_path)
return await get_workspace_file_by_path(self.workspace_id, resolved_path)
async def get_file_count(
self,
@@ -419,8 +417,7 @@ class WorkspaceManager:
Number of files
"""
effective_path = self._get_effective_path(path, include_all_sessions)
db = workspace_db()
return await db.count_workspace_files(
return await count_workspace_files(
self.workspace_id, path_prefix=effective_path
)

View File

@@ -93,14 +93,7 @@ class WorkspaceStorageBackend(ABC):
class GCSWorkspaceStorage(WorkspaceStorageBackend):
"""Google Cloud Storage implementation for workspace storage.
Each instance owns a single ``aiohttp.ClientSession`` and GCS async
client. Because ``ClientSession`` is bound to the event loop on which it
was created, callers that run on separate loops (e.g. copilot executor
worker threads) **must** obtain their own ``GCSWorkspaceStorage`` instance
via :func:`get_workspace_storage` which is event-loop-aware.
"""
"""Google Cloud Storage implementation for workspace storage."""
def __init__(self, bucket_name: str):
self.bucket_name = bucket_name
@@ -344,73 +337,60 @@ class LocalWorkspaceStorage(WorkspaceStorageBackend):
raise ValueError(f"Invalid storage path format: {storage_path}")
# ---------------------------------------------------------------------------
# Storage instance management
# ---------------------------------------------------------------------------
# ``aiohttp.ClientSession`` is bound to the event loop where it is created.
# The copilot executor runs each worker in its own thread with a dedicated
# event loop, so a single global ``GCSWorkspaceStorage`` instance would break.
#
# For **local storage** a single shared instance is fine (no async I/O).
# For **GCS storage** we keep one instance *per event loop* so every loop
# gets its own ``ClientSession``.
# ---------------------------------------------------------------------------
_local_storage: Optional[LocalWorkspaceStorage] = None
_gcs_storages: dict[int, GCSWorkspaceStorage] = {}
# Global storage backend instance
_workspace_storage: Optional[WorkspaceStorageBackend] = None
_storage_lock = asyncio.Lock()
async def get_workspace_storage() -> WorkspaceStorageBackend:
"""Return a workspace storage backend for the **current** event loop.
* Local storage → single shared instance (no event-loop affinity).
* GCS storage → one instance per event loop to avoid cross-loop
``aiohttp`` errors.
"""
global _local_storage
Get the workspace storage backend instance.
config = Config()
Uses GCS if media_gcs_bucket_name is configured, otherwise uses local storage.
"""
global _workspace_storage
# --- Local storage (shared) ---
if not config.media_gcs_bucket_name:
if _local_storage is None:
storage_dir = (
config.workspace_storage_dir if config.workspace_storage_dir else None
)
logger.info(f"Using local workspace storage: {storage_dir or 'default'}")
_local_storage = LocalWorkspaceStorage(storage_dir)
return _local_storage
if _workspace_storage is None:
async with _storage_lock:
if _workspace_storage is None:
config = Config()
# --- GCS storage (per event loop) ---
loop_id = id(asyncio.get_running_loop())
if loop_id not in _gcs_storages:
logger.info(
f"Creating GCS workspace storage for loop {loop_id}: "
f"{config.media_gcs_bucket_name}"
)
_gcs_storages[loop_id] = GCSWorkspaceStorage(config.media_gcs_bucket_name)
return _gcs_storages[loop_id]
if config.media_gcs_bucket_name:
logger.info(
f"Using GCS workspace storage: {config.media_gcs_bucket_name}"
)
_workspace_storage = GCSWorkspaceStorage(
config.media_gcs_bucket_name
)
else:
storage_dir = (
config.workspace_storage_dir
if config.workspace_storage_dir
else None
)
logger.info(
f"Using local workspace storage: {storage_dir or 'default'}"
)
_workspace_storage = LocalWorkspaceStorage(storage_dir)
return _workspace_storage
async def shutdown_workspace_storage() -> None:
"""Shut down workspace storage for the **current** event loop.
Closes the ``aiohttp`` session owned by the current loop's GCS instance.
Each worker thread should call this on its own loop before the loop is
destroyed. The REST API lifespan hook calls it for the main server loop.
"""
global _local_storage
Properly shutdown the global workspace storage backend.
loop_id = id(asyncio.get_running_loop())
storage = _gcs_storages.pop(loop_id, None)
if storage is not None:
await storage.close()
Closes aiohttp sessions and other resources for GCS backend.
Should be called during application shutdown.
"""
global _workspace_storage
# Clear local storage only when the last GCS instance is gone
# (i.e. full shutdown, not just a single worker stopping).
if not _gcs_storages:
_local_storage = None
if _workspace_storage is not None:
async with _storage_lock:
if _workspace_storage is not None:
if isinstance(_workspace_storage, GCSWorkspaceStorage):
await _workspace_storage.close()
_workspace_storage = None
def compute_file_checksum(content: bytes) -> str:

View File

@@ -69,11 +69,12 @@ test.describe("Marketplace Creator Page Basic Functionality", () => {
await marketplacePage.getFirstCreatorProfile(page);
await firstCreatorProfile.click();
await page.waitForURL("**/marketplace/creator/**");
await page.waitForLoadState("networkidle").catch(() => {});
const firstAgent = page
.locator('[data-testid="store-card"]:visible')
.first();
await firstAgent.waitFor({ state: "visible", timeout: 15000 });
await firstAgent.waitFor({ state: "visible", timeout: 30000 });
await firstAgent.click();
await page.waitForURL("**/marketplace/agent/**");

View File

@@ -115,11 +115,18 @@ test.describe("Marketplace Basic Functionality", () => {
const searchTerm = page.getByText("DummyInput").first();
await isVisible(searchTerm);
await expect
.poll(() => marketplacePage.getSearchResultsCount(page), {
timeout: 15000,
})
.toBeGreaterThan(0);
await page.waitForLoadState("networkidle").catch(() => {});
await page
.waitForFunction(
() =>
document.querySelectorAll('[data-testid="store-card"]').length > 0,
{ timeout: 15000 },
)
.catch(() => console.log("No search results appeared within timeout"));
const results = await marketplacePage.getSearchResultsCount(page);
expect(results).toBeGreaterThan(0);
console.log("Complete search flow works correctly test passed ✅");
});
@@ -128,9 +135,7 @@ test.describe("Marketplace Basic Functionality", () => {
});
test.describe("Marketplace Edge Cases", () => {
test("Search for non-existent item renders search page correctly", async ({
page,
}) => {
test("Search for non-existent item shows no results", async ({ page }) => {
const marketplacePage = new MarketplacePage(page);
await marketplacePage.goto(page);
@@ -146,18 +151,9 @@ test.describe("Marketplace Edge Cases", () => {
const searchTerm = page.getByText("xyznonexistentitemxyz123");
await isVisible(searchTerm);
// The search page should render either results or a "No results found" message
await page.waitForLoadState("networkidle").catch(() => {});
const hasResults =
(await page.locator('[data-testid="store-card"]').count()) > 0;
const hasNoResultsMsg = await page
.getByText("No results found")
.isVisible()
.catch(() => false);
expect(hasResults || hasNoResultsMsg).toBe(true);
const results = await marketplacePage.getSearchResultsCount(page);
expect(results).toBe(0);
console.log(
"Search for non-existent item renders search page correctly test passed ✅",
);
console.log("Search for non-existent item shows no results test passed ✅");
});
});

View File

@@ -125,8 +125,16 @@ export class BuildPage extends BasePage {
`[data-id="block-card-${blockCardId}"]`,
);
await blockCard.waitFor({ state: "visible", timeout: 10000 });
await blockCard.click();
try {
// Wait for the block card to be visible with a reasonable timeout
await blockCard.waitFor({ state: "visible", timeout: 10000 });
await blockCard.click();
} catch (error) {
console.log(
`Block ${block.name} (display: ${displayName}) returned from the API but not found in block list`,
);
console.log(`Error: ${error}`);
}
}
async hasBlock(_block: Block) {

View File

@@ -65,7 +65,7 @@ export class LoginPage {
await this.page.waitForLoadState("load", { timeout: 10_000 });
console.log("➡️ Navigating to /marketplace ...");
await this.page.goto("/marketplace", { timeout: 20_000 });
await this.page.goto("/marketplace", { timeout: 10_000 });
console.log("✅ Login process complete");
// If Wallet popover auto-opens, close it to avoid blocking account menu interactions

View File

@@ -9,12 +9,7 @@ export class MarketplacePage extends BasePage {
async goto(page: Page) {
await page.goto("/marketplace");
await page
.locator(
'[data-testid="store-card"], [data-testid="featured-store-card"]',
)
.first()
.waitFor({ state: "visible", timeout: 20000 });
await page.waitForLoadState("networkidle").catch(() => {});
}
async getMarketplaceTitle(page: Page) {
@@ -116,7 +111,7 @@ export class MarketplacePage extends BasePage {
async getFirstFeaturedAgent(page: Page) {
const { getId } = getSelectors(page);
const card = getId("featured-store-card").first();
await card.waitFor({ state: "visible", timeout: 15000 });
await card.waitFor({ state: "visible", timeout: 30000 });
return card;
}
@@ -124,14 +119,14 @@ export class MarketplacePage extends BasePage {
const card = this.page
.locator('[data-testid="store-card"]:visible')
.first();
await card.waitFor({ state: "visible", timeout: 15000 });
await card.waitFor({ state: "visible", timeout: 30000 });
return card;
}
async getFirstCreatorProfile(page: Page) {
const { getId } = getSelectors(page);
const card = getId("creator-card").first();
await card.waitFor({ state: "visible", timeout: 15000 });
await card.waitFor({ state: "visible", timeout: 30000 });
return card;
}

View File

@@ -45,9 +45,8 @@ export async function isEnabled(el: Locator) {
}
export async function hasMinCount(el: Locator, minCount: number) {
await expect
.poll(async () => await el.count(), { timeout: 10000 })
.toBeGreaterThanOrEqual(minCount);
const count = await el.count();
expect(count).toBeGreaterThanOrEqual(minCount);
}
export async function matchesUrl(page: Page, pattern: RegExp) {