fix: address review feedback

- Close Redis event bus connection in finally block (fixes connection leak) - Handle race condition: re-check DB after subscribing to pubsub - Handle REVIEW status (human-in-the-loop) as a stop-waiting state - Handle TERMINATED explicitly in run_agent instead of falling into else-branch - Include REVIEW in queryable statuses when waiting
feat(copilot): add wait_for_result to run_agent and wait_if_running to agent_output
2026-02-18 18:44:42 -05:00 · 2026-02-18 11:58:12 +00:00 · 2026-02-17 18:10:14 +00:00
20 changed files with 464 additions and 238 deletions
--- a/autogpt_platform/backend/backend/copilot/executor/manager.py
+++ b/autogpt_platform/backend/backend/copilot/executor/manager.py
@@ -164,23 +164,21 @@ class CoPilotExecutor(AppProcess):
                self._cancel_thread, self.cancel_client, "[cleanup][cancel]"
            )

-        # Clean up worker threads (closes per-loop workspace storage sessions)
+        # Shutdown executor
        if self._executor:
-            from .processor import cleanup_worker
-
-            logger.info(f"[cleanup {pid}] Cleaning up workers...")
-            futures = []
-            for _ in range(self._executor._max_workers):
-                futures.append(self._executor.submit(cleanup_worker))
-            for f in futures:
-                try:
-                    f.result(timeout=10)
-                except Exception as e:
-                    logger.warning(f"[cleanup {pid}] Worker cleanup error: {e}")
-
            logger.info(f"[cleanup {pid}] Shutting down executor...")
            self._executor.shutdown(wait=False)

+        # Close async resources (workspace storage aiohttp session, etc.)
+        try:
+            from backend.util.workspace_storage import shutdown_workspace_storage
+
+            loop = asyncio.new_event_loop()
+            loop.run_until_complete(shutdown_workspace_storage())
+            loop.close()
+        except Exception as e:
+            logger.warning(f"[cleanup {pid}] Error closing workspace storage: {e}")
+
        # Release any remaining locks
        for task_id, lock in list(self._task_locks.items()):
            try:
--- a/autogpt_platform/backend/backend/copilot/executor/processor.py
+++ b/autogpt_platform/backend/backend/copilot/executor/processor.py
@@ -60,18 +60,6 @@ def init_worker():
    _tls.processor.on_executor_start()


-def cleanup_worker():
-    """Clean up the processor for the current worker thread.
-
-    Should be called before the worker thread's event loop is destroyed so
-    that event-loop-bound resources (e.g. ``aiohttp.ClientSession``) are
-    closed on the correct loop.
-    """
-    processor: CoPilotProcessor | None = getattr(_tls, "processor", None)
-    if processor is not None:
-        processor.cleanup()
-
-
 # ============ Processor Class ============ #


@@ -110,28 +98,6 @@ class CoPilotProcessor:

        logger.info(f"[CoPilotExecutor] Worker {self.tid} started")

-    def cleanup(self):
-        """Clean up event-loop-bound resources before the loop is destroyed.
-
-        Shuts down the workspace storage instance that belongs to this
-        worker's event loop, ensuring ``aiohttp.ClientSession.close()``
-        runs on the same loop that created the session.
-        """
-        from backend.util.workspace_storage import shutdown_workspace_storage
-
-        try:
-            future = asyncio.run_coroutine_threadsafe(
-                shutdown_workspace_storage(), self.execution_loop
-            )
-            future.result(timeout=5)
-        except Exception as e:
-            logger.warning(f"[CoPilotExecutor] Worker {self.tid} cleanup error: {e}")
-
-        # Stop the event loop
-        self.execution_loop.call_soon_threadsafe(self.execution_loop.stop)
-        self.execution_thread.join(timeout=5)
-        logger.info(f"[CoPilotExecutor] Worker {self.tid} cleaned up")
-
    @error_logged(swallow=False)
    def execute(
        self,
--- a/autogpt_platform/backend/backend/copilot/sdk/service.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service.py
@@ -693,15 +693,11 @@ async def stream_chat_completion_sdk(
                    await asyncio.sleep(0.5)
                    raw_transcript = read_transcript_file(captured_transcript.path)
                    if raw_transcript:
-                        try:
-                            async with asyncio.timeout(30):
-                                await _upload_transcript_bg(
-                                    user_id, session_id, raw_transcript
-                                )
-                        except asyncio.TimeoutError:
-                            logger.warning(
-                                f"[SDK] Transcript upload timed out for {session_id}"
-                            )
+                        task = asyncio.create_task(
+                            _upload_transcript_bg(user_id, session_id, raw_transcript)
+                        )
+                        _background_tasks.add(task)
+                        task.add_done_callback(_background_tasks.discard)
                    else:
                        logger.debug("[SDK] Stop hook fired but transcript not usable")

--- a/autogpt_platform/backend/backend/copilot/tools/agent_output.py
+++ b/autogpt_platform/backend/backend/copilot/tools/agent_output.py
@@ -5,7 +5,7 @@ import re
 from datetime import datetime, timedelta, timezone
 from typing import Any

-from pydantic import BaseModel, field_validator
+from pydantic import BaseModel, Field, field_validator

 from backend.api.features.library.model import LibraryAgent
 from backend.copilot.model import ChatSession
@@ -13,6 +13,7 @@ from backend.data.db_accessors import execution_db, library_db
 from backend.data.execution import ExecutionStatus, GraphExecution, GraphExecutionMeta

 from .base import BaseTool
+from .execution_utils import TERMINAL_STATUSES, wait_for_execution
 from .models import (
    AgentOutputResponse,
    ErrorResponse,
@@ -33,6 +34,7 @@ class AgentOutputInput(BaseModel):
    store_slug: str = ""
    execution_id: str = ""
    run_time: str = "latest"
+    wait_if_running: int = Field(default=0, ge=0, le=300)

    @field_validator(
        "agent_name",
@@ -116,6 +118,11 @@ class AgentOutputTool(BaseTool):
        Select which run to retrieve using:
        - execution_id: Specific execution ID
        - run_time: 'latest' (default), 'yesterday', 'last week', or ISO date 'YYYY-MM-DD'
+
+        Wait for completion (optional):
+        - wait_if_running: Max seconds to wait if execution is still running (0-300).
+          If the execution is running/queued, waits up to this many seconds for completion.
+          Returns current status on timeout. If already finished, returns immediately.
        """

    @property
@@ -145,6 +152,13 @@ class AgentOutputTool(BaseTool):
                        "Time filter: 'latest', 'yesterday', 'last week', or 'YYYY-MM-DD'"
                    ),
                },
+                "wait_if_running": {
+                    "type": "integer",
+                    "description": (
+                        "Max seconds to wait if execution is still running (0-300). "
+                        "If running, waits for completion. Returns current state on timeout."
+                    ),
+                },
            },
            "required": [],
        }
@@ -224,10 +238,14 @@ class AgentOutputTool(BaseTool):
        execution_id: str | None,
        time_start: datetime | None,
        time_end: datetime | None,
+        include_running: bool = False,
    ) -> tuple[GraphExecution | None, list[GraphExecutionMeta], str | None]:
        """
        Fetch execution(s) based on filters.
        Returns (single_execution, available_executions_meta, error_message).
+
+        Args:
+            include_running: If True, also look for running/queued executions (for waiting)
        """
        exec_db = execution_db()

@@ -242,11 +260,23 @@ class AgentOutputTool(BaseTool):
                return None, [], f"Execution '{execution_id}' not found"
            return execution, [], None

-        # Get completed executions with time filters
+        # Determine which statuses to query
+        statuses = [ExecutionStatus.COMPLETED]
+        if include_running:
+            statuses.extend(
+                [
+                    ExecutionStatus.RUNNING,
+                    ExecutionStatus.QUEUED,
+                    ExecutionStatus.INCOMPLETE,
+                    ExecutionStatus.REVIEW,
+                ]
+            )
+
+        # Get executions with time filters
        executions = await exec_db.get_graph_executions(
            graph_id=graph_id,
            user_id=user_id,
-            statuses=[ExecutionStatus.COMPLETED],
+            statuses=statuses,
            created_time_gte=time_start,
            created_time_lte=time_end,
            limit=10,
@@ -313,10 +343,33 @@ class AgentOutputTool(BaseTool):
                for e in available_executions[:5]
            ]

-        message = f"Found execution outputs for agent '{agent.name}'"
+        # Build appropriate message based on execution status
+        if execution.status == ExecutionStatus.COMPLETED:
+            message = f"Found execution outputs for agent '{agent.name}'"
+        elif execution.status == ExecutionStatus.FAILED:
+            message = f"Execution for agent '{agent.name}' failed"
+        elif execution.status == ExecutionStatus.TERMINATED:
+            message = f"Execution for agent '{agent.name}' was terminated"
+        elif execution.status == ExecutionStatus.REVIEW:
+            message = (
+                f"Execution for agent '{agent.name}' is awaiting human review. "
+                "The user needs to approve it before it can continue."
+            )
+        elif execution.status in (
+            ExecutionStatus.RUNNING,
+            ExecutionStatus.QUEUED,
+            ExecutionStatus.INCOMPLETE,
+        ):
+            message = (
+                f"Execution for agent '{agent.name}' is still {execution.status.value}. "
+                "Results may be incomplete. Use wait_if_running to wait for completion."
+            )
+        else:
+            message = f"Found execution for agent '{agent.name}' (status: {execution.status.value})"
+
        if len(available_executions) > 1:
            message += (
-                f". Showing latest of {len(available_executions)} matching executions."
+                f" Showing latest of {len(available_executions)} matching executions."
            )

        return AgentOutputResponse(
@@ -431,13 +484,17 @@ class AgentOutputTool(BaseTool):
        # Parse time expression
        time_start, time_end = parse_time_expression(input_data.run_time)

-        # Fetch execution(s)
+        # Check if we should wait for running executions
+        wait_timeout = input_data.wait_if_running
+
+        # Fetch execution(s) - include running if we're going to wait
        execution, available_executions, exec_error = await self._get_execution(
            user_id=user_id,
            graph_id=agent.graph_id,
            execution_id=input_data.execution_id or None,
            time_start=time_start,
            time_end=time_end,
+            include_running=wait_timeout > 0,
        )

        if exec_error:
@@ -446,4 +503,17 @@ class AgentOutputTool(BaseTool):
                session_id=session_id,
            )

+        # If we have an execution that's still running and we should wait
+        if execution and wait_timeout > 0 and execution.status not in TERMINAL_STATUSES:
+            logger.info(
+                f"Execution {execution.id} is {execution.status}, "
+                f"waiting up to {wait_timeout}s for completion"
+            )
+            execution = await wait_for_execution(
+                user_id=user_id,
+                graph_id=agent.graph_id,
+                execution_id=execution.id,
+                timeout_seconds=wait_timeout,
+            )
+
        return self._build_response(agent, execution, available_executions, session_id)
--- a/autogpt_platform/backend/backend/copilot/tools/execution_utils.py
+++ b/autogpt_platform/backend/backend/copilot/tools/execution_utils.py
@@ -0,0 +1,169 @@
+"""Shared utilities for execution waiting and status handling."""
+
+import asyncio
+import logging
+from typing import Any
+
+from backend.data.db_accessors import execution_db
+from backend.data.execution import (
+    AsyncRedisExecutionEventBus,
+    ExecutionStatus,
+    GraphExecution,
+    GraphExecutionEvent,
+)
+
+logger = logging.getLogger(__name__)
+
+# Terminal statuses that indicate execution is complete
+TERMINAL_STATUSES = frozenset(
+    {
+        ExecutionStatus.COMPLETED,
+        ExecutionStatus.FAILED,
+        ExecutionStatus.TERMINATED,
+    }
+)
+
+# Statuses where execution is paused but not finished (e.g. human-in-the-loop)
+PAUSED_STATUSES = frozenset(
+    {
+        ExecutionStatus.REVIEW,
+    }
+)
+
+# Statuses that mean "stop waiting" (terminal or paused)
+STOP_WAITING_STATUSES = TERMINAL_STATUSES | PAUSED_STATUSES
+
+
+async def wait_for_execution(
+    user_id: str,
+    graph_id: str,
+    execution_id: str,
+    timeout_seconds: int,
+) -> GraphExecution | None:
+    """
+    Wait for an execution to reach a terminal or paused status using Redis pubsub.
+
+    Handles the race condition between checking status and subscribing by
+    re-checking the DB after the subscription is established.
+
+    Args:
+        user_id: User ID
+        graph_id: Graph ID
+        execution_id: Execution ID to wait for
+        timeout_seconds: Max seconds to wait
+
+    Returns:
+        The execution with current status, or None if not found
+    """
+    exec_db = execution_db()
+
+    # Quick check — maybe it's already done
+    execution = await exec_db.get_graph_execution(
+        user_id=user_id,
+        execution_id=execution_id,
+        include_node_executions=False,
+    )
+    if not execution:
+        return None
+
+    if execution.status in STOP_WAITING_STATUSES:
+        logger.debug(
+            f"Execution {execution_id} already in stop-waiting state: "
+            f"{execution.status}"
+        )
+        return execution
+
+    logger.info(
+        f"Waiting up to {timeout_seconds}s for execution {execution_id} "
+        f"(current status: {execution.status})"
+    )
+
+    event_bus = AsyncRedisExecutionEventBus()
+    channel_key = f"{user_id}/{graph_id}/{execution_id}"
+
+    try:
+        result = await asyncio.wait_for(
+            _subscribe_and_wait(
+                event_bus, channel_key, user_id, execution_id, exec_db
+            ),
+            timeout=timeout_seconds,
+        )
+        return result
+    except asyncio.TimeoutError:
+        logger.info(f"Timeout waiting for execution {execution_id}")
+    except Exception as e:
+        logger.error(f"Error waiting for execution: {e}", exc_info=True)
+    finally:
+        await event_bus.close()
+
+    # Return current state on timeout/error
+    return await exec_db.get_graph_execution(
+        user_id=user_id,
+        execution_id=execution_id,
+        include_node_executions=False,
+    )
+
+
+async def _subscribe_and_wait(
+    event_bus: AsyncRedisExecutionEventBus,
+    channel_key: str,
+    user_id: str,
+    execution_id: str,
+    exec_db: Any,
+) -> GraphExecution | None:
+    """
+    Subscribe to execution events and wait for a terminal/paused status.
+
+    To avoid the race condition where the execution completes between the
+    initial DB check and the Redis subscription, we:
+    1. Start listening (which subscribes internally)
+    2. Re-check the DB after subscription is active
+    3. If still running, wait for pubsub events
+    """
+    listen_iter = event_bus.listen_events(channel_key).__aiter__()
+
+    # Start a background task to consume pubsub events
+    result_event: GraphExecutionEvent | None = None
+    done = asyncio.Event()
+
+    async def _consume():
+        nonlocal result_event
+        async for event in listen_iter:
+            if isinstance(event, GraphExecutionEvent):
+                logger.debug(f"Received execution update: {event.status}")
+                if event.status in STOP_WAITING_STATUSES:
+                    result_event = event
+                    done.set()
+                    return
+
+    consume_task = asyncio.create_task(_consume())
+
+    # Give the subscription a moment to establish, then re-check DB
+    await asyncio.sleep(0.1)
+
+    execution = await exec_db.get_graph_execution(
+        user_id=user_id,
+        execution_id=execution_id,
+        include_node_executions=False,
+    )
+    if execution and execution.status in STOP_WAITING_STATUSES:
+        consume_task.cancel()
+        return execution
+
+    # Wait for the pubsub consumer to find a terminal event
+    await done.wait()
+    consume_task.cancel()
+
+    # Fetch full execution
+    return await exec_db.get_graph_execution(
+        user_id=user_id,
+        execution_id=execution_id,
+        include_node_executions=False,
+    )
+
+
+def get_execution_outputs(execution: GraphExecution | None) -> dict[str, Any] | None:
+    """Extract outputs from an execution, or return None."""
+    if execution is None:
+        return None
+    return execution.outputs
--- a/autogpt_platform/backend/backend/copilot/tools/feature_requests.py
+++ b/autogpt_platform/backend/backend/copilot/tools/feature_requests.py
@@ -33,6 +33,7 @@ query SearchFeatureRequests($term: String!, $filter: IssueFilter, $first: Int) {
      id
      identifier
      title
+      description
    }
  }
 }
@@ -204,6 +205,7 @@ class SearchFeatureRequestsTool(BaseTool):
                    id=node["id"],
                    identifier=node["identifier"],
                    title=node["title"],
+                    description=node.get("description"),
                )
                for node in nodes
            ]
@@ -237,11 +239,7 @@ class CreateFeatureRequestTool(BaseTool):
            "Create a new feature request or add a customer need to an existing one. "
            "Always search first with search_feature_requests to avoid duplicates. "
            "If a matching request exists, pass its ID as existing_issue_id to add "
-            "the user's need to it instead of creating a duplicate. "
-            "IMPORTANT: Never include personally identifiable information (PII) in "
-            "the title or description — no names, emails, phone numbers, company "
-            "names, or other identifying details. Write titles and descriptions in "
-            "generic, feature-focused language."
+            "the user's need to it instead of creating a duplicate."
        )

    @property
@@ -251,20 +249,11 @@ class CreateFeatureRequestTool(BaseTool):
            "properties": {
                "title": {
                    "type": "string",
-                    "description": (
-                        "Title for the feature request. Must be generic and "
-                        "feature-focused — do not include any user names, emails, "
-                        "company names, or other PII."
-                    ),
+                    "description": "Title for the feature request.",
                },
                "description": {
                    "type": "string",
-                    "description": (
-                        "Detailed description of what the user wants and why. "
-                        "Must not contain any personally identifiable information "
-                        "(PII) — describe the feature need generically without "
-                        "referencing specific users, companies, or contact details."
-                    ),
+                    "description": "Detailed description of what the user wants and why.",
                },
                "existing_issue_id": {
                    "type": "string",
--- a/autogpt_platform/backend/backend/copilot/tools/feature_requests_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/feature_requests_test.py
@@ -117,11 +117,13 @@ class TestSearchFeatureRequestsTool:
                "id": "id-1",
                "identifier": "FR-1",
                "title": "Dark mode",
+                "description": "Add dark mode support",
            },
            {
                "id": "id-2",
                "identifier": "FR-2",
                "title": "Dark theme",
+                "description": None,
            },
        ]
        patcher, _ = _mock_linear_config(query_return=_search_response(nodes))
--- a/autogpt_platform/backend/backend/copilot/tools/models.py
+++ b/autogpt_platform/backend/backend/copilot/tools/models.py
@@ -486,6 +486,7 @@ class FeatureRequestInfo(BaseModel):
    id: str
    identifier: str
    title: str
+    description: str | None = None


 class FeatureRequestSearchResponse(ToolResponseBase):
--- a/autogpt_platform/backend/backend/copilot/tools/run_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/run_agent.py
@@ -9,6 +9,7 @@ from backend.copilot.config import ChatConfig
 from backend.copilot.model import ChatSession
 from backend.copilot.tracking import track_agent_run_success, track_agent_scheduled
 from backend.data.db_accessors import graph_db, library_db, user_db
+from backend.data.execution import ExecutionStatus
 from backend.data.graph import GraphModel
 from backend.data.model import CredentialsMetaInput
 from backend.executor import utils as execution_utils
@@ -24,6 +25,7 @@ from .helpers import get_inputs_from_schema
 from .models import (
    AgentDetails,
    AgentDetailsResponse,
+    AgentOutputResponse,
    ErrorResponse,
    ExecutionOptions,
    ExecutionStartedResponse,
@@ -33,6 +35,7 @@ from .models import (
    ToolResponseBase,
    UserReadiness,
 )
+from .execution_utils import get_execution_outputs, wait_for_execution
 from .utils import (
    build_missing_credentials_from_graph,
    extract_credentials_from_schema,
@@ -66,6 +69,7 @@ class RunAgentInput(BaseModel):
    schedule_name: str = ""
    cron: str = ""
    timezone: str = "UTC"
+    wait_for_result: int = Field(default=0, ge=0, le=300)

    @field_validator(
        "username_agent_slug",
@@ -147,6 +151,14 @@ class RunAgentTool(BaseTool):
                    "type": "string",
                    "description": "IANA timezone for schedule (default: UTC)",
                },
+                "wait_for_result": {
+                    "type": "integer",
+                    "description": (
+                        "Max seconds to wait for execution to complete (0-300). "
+                        "If >0, blocks until the execution finishes or times out. "
+                        "Returns execution outputs when complete."
+                    ),
+                },
            },
            "required": [],
        }
@@ -341,6 +353,7 @@ class RunAgentTool(BaseTool):
                    graph=graph,
                    graph_credentials=graph_credentials,
                    inputs=params.inputs,
+                    wait_for_result=params.wait_for_result,
                )

        except NotFoundError as e:
@@ -424,8 +437,9 @@ class RunAgentTool(BaseTool):
        graph: GraphModel,
        graph_credentials: dict[str, CredentialsMetaInput],
        inputs: dict[str, Any],
+        wait_for_result: int = 0,
    ) -> ToolResponseBase:
-        """Execute an agent immediately."""
+        """Execute an agent immediately, optionally waiting for completion."""
        session_id = session.session_id

        # Check rate limits
@@ -462,6 +476,78 @@ class RunAgentTool(BaseTool):
        )

        library_agent_link = f"/library/agents/{library_agent.id}"
+
+        # If wait_for_result is requested, wait for execution to complete
+        if wait_for_result > 0:
+            logger.info(
+                f"Waiting up to {wait_for_result}s for execution {execution.id}"
+            )
+            completed = await wait_for_execution(
+                user_id=user_id,
+                graph_id=library_agent.graph_id,
+                execution_id=execution.id,
+                timeout_seconds=wait_for_result,
+            )
+
+            if completed and completed.status == ExecutionStatus.COMPLETED:
+                outputs = get_execution_outputs(completed)
+                return AgentOutputResponse(
+                    message=(
+                        f"Agent '{library_agent.name}' completed successfully. "
+                        f"View at {library_agent_link}."
+                    ),
+                    session_id=session_id,
+                    execution_id=execution.id,
+                    graph_id=library_agent.graph_id,
+                    graph_name=library_agent.name,
+                    outputs=outputs or {},
+                )
+            elif completed and completed.status == ExecutionStatus.FAILED:
+                return ErrorResponse(
+                    message=(
+                        f"Agent '{library_agent.name}' execution failed. "
+                        f"View details at {library_agent_link}."
+                    ),
+                    session_id=session_id,
+                )
+            elif completed and completed.status == ExecutionStatus.TERMINATED:
+                return ErrorResponse(
+                    message=(
+                        f"Agent '{library_agent.name}' execution was terminated. "
+                        f"View details at {library_agent_link}."
+                    ),
+                    session_id=session_id,
+                )
+            elif completed and completed.status == ExecutionStatus.REVIEW:
+                return ExecutionStartedResponse(
+                    message=(
+                        f"Agent '{library_agent.name}' is awaiting human review. "
+                        f"Check at {library_agent_link}."
+                    ),
+                    session_id=session_id,
+                    execution_id=execution.id,
+                    graph_id=library_agent.graph_id,
+                    graph_name=library_agent.name,
+                    library_agent_id=library_agent.id,
+                    library_agent_link=library_agent_link,
+                )
+            else:
+                status = completed.status.value if completed else "unknown"
+                return ExecutionStartedResponse(
+                    message=(
+                        f"Agent '{library_agent.name}' is still {status} after "
+                        f"{wait_for_result}s. Check results later at "
+                        f"{library_agent_link}. "
+                        f"Use view_agent_output with wait_if_running to check again."
+                    ),
+                    session_id=session_id,
+                    execution_id=execution.id,
+                    graph_id=library_agent.graph_id,
+                    graph_name=library_agent.name,
+                    library_agent_id=library_agent.id,
+                    library_agent_link=library_agent_link,
+                )
+
        return ExecutionStartedResponse(
            message=(
                f"Agent '{library_agent.name}' execution started successfully. "
--- a/autogpt_platform/backend/backend/data/db_manager.py
+++ b/autogpt_platform/backend/backend/data/db_manager.py
@@ -93,15 +93,7 @@ from backend.data.user import (
    get_user_notification_preference,
    update_user_integrations,
 )
-from backend.data.workspace import (
-    count_workspace_files,
-    create_workspace_file,
-    get_or_create_workspace,
-    get_workspace_file,
-    get_workspace_file_by_path,
-    list_workspace_files,
-    soft_delete_workspace_file,
-)
+from backend.data.workspace import get_or_create_workspace
 from backend.util.service import (
    AppService,
    AppServiceClient,
@@ -282,13 +274,7 @@ class DatabaseManager(AppService):
    get_user_execution_summary_data = _(get_user_execution_summary_data)

    # ============ Workspace ============ #
-    count_workspace_files = _(count_workspace_files)
-    create_workspace_file = _(create_workspace_file)
    get_or_create_workspace = _(get_or_create_workspace)
-    get_workspace_file = _(get_workspace_file)
-    get_workspace_file_by_path = _(get_workspace_file_by_path)
-    list_workspace_files = _(list_workspace_files)
-    soft_delete_workspace_file = _(soft_delete_workspace_file)

    # ============ Understanding ============ #
    get_business_understanding = _(get_business_understanding)
@@ -452,13 +438,7 @@ class DatabaseManagerAsyncClient(AppServiceClient):
    get_user_execution_summary_data = d.get_user_execution_summary_data

    # ============ Workspace ============ #
-    count_workspace_files = d.count_workspace_files
-    create_workspace_file = d.create_workspace_file
    get_or_create_workspace = d.get_or_create_workspace
-    get_workspace_file = d.get_workspace_file
-    get_workspace_file_by_path = d.get_workspace_file_by_path
-    list_workspace_files = d.list_workspace_files
-    soft_delete_workspace_file = d.soft_delete_workspace_file

    # ============ Understanding ============ #
    get_business_understanding = d.get_business_understanding
--- a/autogpt_platform/backend/backend/data/workspace.py
+++ b/autogpt_platform/backend/backend/data/workspace.py
@@ -164,23 +164,21 @@ async def create_workspace_file(

 async def get_workspace_file(
    file_id: str,
-    workspace_id: str,
+    workspace_id: Optional[str] = None,
 ) -> Optional[WorkspaceFile]:
    """
    Get a workspace file by ID.

    Args:
        file_id: The file ID
-        workspace_id: Workspace ID for scoping (required)
+        workspace_id: Optional workspace ID for validation

    Returns:
        WorkspaceFile instance or None
    """
-    where_clause: UserWorkspaceFileWhereInput = {
-        "id": file_id,
-        "isDeleted": False,
-        "workspaceId": workspace_id,
-    }
+    where_clause: dict = {"id": file_id, "isDeleted": False}
+    if workspace_id:
+        where_clause["workspaceId"] = workspace_id

    file = await UserWorkspaceFile.prisma().find_first(where=where_clause)
    return WorkspaceFile.from_db(file) if file else None
@@ -270,7 +268,7 @@ async def count_workspace_files(
    Returns:
        Number of files
    """
-    where_clause: UserWorkspaceFileWhereInput = {"workspaceId": workspace_id}
+    where_clause: dict = {"workspaceId": workspace_id}
    if not include_deleted:
        where_clause["isDeleted"] = False

@@ -285,7 +283,7 @@ async def count_workspace_files(

 async def soft_delete_workspace_file(
    file_id: str,
-    workspace_id: str,
+    workspace_id: Optional[str] = None,
 ) -> Optional[WorkspaceFile]:
    """
    Soft-delete a workspace file.
@@ -295,7 +293,7 @@ async def soft_delete_workspace_file(

    Args:
        file_id: The file ID
-        workspace_id: Workspace ID for scoping (required)
+        workspace_id: Optional workspace ID for validation

    Returns:
        Updated WorkspaceFile instance or None if not found
--- a/autogpt_platform/backend/backend/util/service.py
+++ b/autogpt_platform/backend/backend/util/service.py
@@ -28,7 +28,7 @@ from typing import (
 import httpx
 import uvicorn
 from fastapi import FastAPI, Request, responses
-from prisma.errors import DataError, UniqueViolationError
+from prisma.errors import DataError
 from pydantic import BaseModel, TypeAdapter, create_model

 import backend.util.exceptions as exceptions
@@ -201,7 +201,6 @@ EXCEPTION_MAPPING = {
        UnhealthyServiceError,
        HTTPClientError,
        HTTPServerError,
-        UniqueViolationError,
        *[
            ErrorType
            for _, ErrorType in inspect.getmembers(exceptions)
@@ -417,9 +416,6 @@ class AppService(BaseAppService, ABC):
        self.fastapi_app.add_exception_handler(
            DataError, self._handle_internal_http_error(400)
        )
-        self.fastapi_app.add_exception_handler(
-            UniqueViolationError, self._handle_internal_http_error(400)
-        )
        self.fastapi_app.add_exception_handler(
            Exception, self._handle_internal_http_error(500)
        )
@@ -482,7 +478,6 @@ def get_service_client(
                # Don't retry these specific exceptions that won't be fixed by retrying
                ValueError,  # Invalid input/parameters
                DataError,  # Prisma data integrity errors (foreign key, unique constraints)
-                UniqueViolationError,  # Unique constraint violations
                KeyError,  # Missing required data
                TypeError,  # Wrong data types
                AttributeError,  # Missing attributes
--- a/autogpt_platform/backend/backend/util/workspace.py
+++ b/autogpt_platform/backend/backend/util/workspace.py
@@ -12,8 +12,15 @@ from typing import Optional

 from prisma.errors import UniqueViolationError

-from backend.data.db_accessors import workspace_db
-from backend.data.workspace import WorkspaceFile
+from backend.data.workspace import (
+    WorkspaceFile,
+    count_workspace_files,
+    create_workspace_file,
+    get_workspace_file,
+    get_workspace_file_by_path,
+    list_workspace_files,
+    soft_delete_workspace_file,
+)
 from backend.util.settings import Config
 from backend.util.virus_scanner import scan_content_safe
 from backend.util.workspace_storage import compute_file_checksum, get_workspace_storage
@@ -118,9 +125,8 @@ class WorkspaceManager:
        Raises:
            FileNotFoundError: If file doesn't exist
        """
-        db = workspace_db()
        resolved_path = self._resolve_path(path)
-        file = await db.get_workspace_file_by_path(self.workspace_id, resolved_path)
+        file = await get_workspace_file_by_path(self.workspace_id, resolved_path)
        if file is None:
            raise FileNotFoundError(f"File not found at path: {resolved_path}")

@@ -140,8 +146,7 @@ class WorkspaceManager:
        Raises:
            FileNotFoundError: If file doesn't exist
        """
-        db = workspace_db()
-        file = await db.get_workspace_file(file_id, self.workspace_id)
+        file = await get_workspace_file(file_id, self.workspace_id)
        if file is None:
            raise FileNotFoundError(f"File not found: {file_id}")

@@ -199,10 +204,8 @@ class WorkspaceManager:
        # For overwrite=True, we let the write proceed and handle via UniqueViolationError
        # This ensures the new file is written to storage BEFORE the old one is deleted,
        # preventing data loss if the new write fails
-        db = workspace_db()
-
        if not overwrite:
-            existing = await db.get_workspace_file_by_path(self.workspace_id, path)
+            existing = await get_workspace_file_by_path(self.workspace_id, path)
            if existing is not None:
                raise ValueError(f"File already exists at path: {path}")

@@ -229,7 +232,7 @@ class WorkspaceManager:
        # Create database record - handle race condition where another request
        # created a file at the same path between our check and create
        try:
-            file = await db.create_workspace_file(
+            file = await create_workspace_file(
                workspace_id=self.workspace_id,
                file_id=file_id,
                name=filename,
@@ -243,12 +246,12 @@ class WorkspaceManager:
            # Race condition: another request created a file at this path
            if overwrite:
                # Re-fetch and delete the conflicting file, then retry
-                existing = await db.get_workspace_file_by_path(self.workspace_id, path)
+                existing = await get_workspace_file_by_path(self.workspace_id, path)
                if existing:
                    await self.delete_file(existing.id)
                # Retry the create - if this also fails, clean up storage file
                try:
-                    file = await db.create_workspace_file(
+                    file = await create_workspace_file(
                        workspace_id=self.workspace_id,
                        file_id=file_id,
                        name=filename,
@@ -311,9 +314,8 @@ class WorkspaceManager:
            List of WorkspaceFile instances
        """
        effective_path = self._get_effective_path(path, include_all_sessions)
-        db = workspace_db()

-        return await db.list_workspace_files(
+        return await list_workspace_files(
            workspace_id=self.workspace_id,
            path_prefix=effective_path,
            limit=limit,
@@ -330,8 +332,7 @@ class WorkspaceManager:
        Returns:
            True if deleted, False if not found
        """
-        db = workspace_db()
-        file = await db.get_workspace_file(file_id, self.workspace_id)
+        file = await get_workspace_file(file_id, self.workspace_id)
        if file is None:
            return False

@@ -344,7 +345,7 @@ class WorkspaceManager:
            # Continue with database soft-delete even if storage delete fails

        # Soft-delete database record
-        result = await db.soft_delete_workspace_file(file_id, self.workspace_id)
+        result = await soft_delete_workspace_file(file_id, self.workspace_id)
        return result is not None

    async def get_download_url(self, file_id: str, expires_in: int = 3600) -> str:
@@ -361,8 +362,7 @@ class WorkspaceManager:
        Raises:
            FileNotFoundError: If file doesn't exist
        """
-        db = workspace_db()
-        file = await db.get_workspace_file(file_id, self.workspace_id)
+        file = await get_workspace_file(file_id, self.workspace_id)
        if file is None:
            raise FileNotFoundError(f"File not found: {file_id}")

@@ -379,8 +379,7 @@ class WorkspaceManager:
        Returns:
            WorkspaceFile instance or None
        """
-        db = workspace_db()
-        return await db.get_workspace_file(file_id, self.workspace_id)
+        return await get_workspace_file(file_id, self.workspace_id)

    async def get_file_info_by_path(self, path: str) -> Optional[WorkspaceFile]:
        """
@@ -395,9 +394,8 @@ class WorkspaceManager:
        Returns:
            WorkspaceFile instance or None
        """
-        db = workspace_db()
        resolved_path = self._resolve_path(path)
-        return await db.get_workspace_file_by_path(self.workspace_id, resolved_path)
+        return await get_workspace_file_by_path(self.workspace_id, resolved_path)

    async def get_file_count(
        self,
@@ -419,8 +417,7 @@ class WorkspaceManager:
            Number of files
        """
        effective_path = self._get_effective_path(path, include_all_sessions)
-        db = workspace_db()

-        return await db.count_workspace_files(
+        return await count_workspace_files(
            self.workspace_id, path_prefix=effective_path
        )
--- a/autogpt_platform/backend/backend/util/workspace_storage.py
+++ b/autogpt_platform/backend/backend/util/workspace_storage.py
@@ -93,14 +93,7 @@ class WorkspaceStorageBackend(ABC):


 class GCSWorkspaceStorage(WorkspaceStorageBackend):
-    """Google Cloud Storage implementation for workspace storage.
-
-    Each instance owns a single ``aiohttp.ClientSession`` and GCS async
-    client.  Because ``ClientSession`` is bound to the event loop on which it
-    was created, callers that run on separate loops (e.g. copilot executor
-    worker threads) **must** obtain their own ``GCSWorkspaceStorage`` instance
-    via :func:`get_workspace_storage` which is event-loop-aware.
-    """
+    """Google Cloud Storage implementation for workspace storage."""

    def __init__(self, bucket_name: str):
        self.bucket_name = bucket_name
@@ -344,73 +337,60 @@ class LocalWorkspaceStorage(WorkspaceStorageBackend):
            raise ValueError(f"Invalid storage path format: {storage_path}")


-# ---------------------------------------------------------------------------
-# Storage instance management
-# ---------------------------------------------------------------------------
-# ``aiohttp.ClientSession`` is bound to the event loop where it is created.
-# The copilot executor runs each worker in its own thread with a dedicated
-# event loop, so a single global ``GCSWorkspaceStorage`` instance would break.
-#
-# For **local storage** a single shared instance is fine (no async I/O).
-# For **GCS storage** we keep one instance *per event loop* so every loop
-# gets its own ``ClientSession``.
-# ---------------------------------------------------------------------------
-
-_local_storage: Optional[LocalWorkspaceStorage] = None
-_gcs_storages: dict[int, GCSWorkspaceStorage] = {}
+# Global storage backend instance
+_workspace_storage: Optional[WorkspaceStorageBackend] = None
 _storage_lock = asyncio.Lock()


 async def get_workspace_storage() -> WorkspaceStorageBackend:
-    """Return a workspace storage backend for the **current** event loop.
-
-    * Local storage → single shared instance (no event-loop affinity).
-    * GCS storage   → one instance per event loop to avoid cross-loop
-      ``aiohttp`` errors.
    """
-    global _local_storage
+    Get the workspace storage backend instance.

-    config = Config()
+    Uses GCS if media_gcs_bucket_name is configured, otherwise uses local storage.
+    """
+    global _workspace_storage

-    # --- Local storage (shared) ---
-    if not config.media_gcs_bucket_name:
-        if _local_storage is None:
-            storage_dir = (
-                config.workspace_storage_dir if config.workspace_storage_dir else None
-            )
-            logger.info(f"Using local workspace storage: {storage_dir or 'default'}")
-            _local_storage = LocalWorkspaceStorage(storage_dir)
-        return _local_storage
+    if _workspace_storage is None:
+        async with _storage_lock:
+            if _workspace_storage is None:
+                config = Config()

-    # --- GCS storage (per event loop) ---
-    loop_id = id(asyncio.get_running_loop())
-    if loop_id not in _gcs_storages:
-        logger.info(
-            f"Creating GCS workspace storage for loop {loop_id}: "
-            f"{config.media_gcs_bucket_name}"
-        )
-        _gcs_storages[loop_id] = GCSWorkspaceStorage(config.media_gcs_bucket_name)
-    return _gcs_storages[loop_id]
+                if config.media_gcs_bucket_name:
+                    logger.info(
+                        f"Using GCS workspace storage: {config.media_gcs_bucket_name}"
+                    )
+                    _workspace_storage = GCSWorkspaceStorage(
+                        config.media_gcs_bucket_name
+                    )
+                else:
+                    storage_dir = (
+                        config.workspace_storage_dir
+                        if config.workspace_storage_dir
+                        else None
+                    )
+                    logger.info(
+                        f"Using local workspace storage: {storage_dir or 'default'}"
+                    )
+                    _workspace_storage = LocalWorkspaceStorage(storage_dir)
+
+    return _workspace_storage


 async def shutdown_workspace_storage() -> None:
-    """Shut down workspace storage for the **current** event loop.
-
-    Closes the ``aiohttp`` session owned by the current loop's GCS instance.
-    Each worker thread should call this on its own loop before the loop is
-    destroyed.  The REST API lifespan hook calls it for the main server loop.
    """
-    global _local_storage
+    Properly shutdown the global workspace storage backend.

-    loop_id = id(asyncio.get_running_loop())
-    storage = _gcs_storages.pop(loop_id, None)
-    if storage is not None:
-        await storage.close()
+    Closes aiohttp sessions and other resources for GCS backend.
+    Should be called during application shutdown.
+    """
+    global _workspace_storage

-    # Clear local storage only when the last GCS instance is gone
-    # (i.e. full shutdown, not just a single worker stopping).
-    if not _gcs_storages:
-        _local_storage = None
+    if _workspace_storage is not None:
+        async with _storage_lock:
+            if _workspace_storage is not None:
+                if isinstance(_workspace_storage, GCSWorkspaceStorage):
+                    await _workspace_storage.close()
+                _workspace_storage = None


 def compute_file_checksum(content: bytes) -> str:
--- a/autogpt_platform/frontend/src/tests/marketplace-creator.spec.ts
+++ b/autogpt_platform/frontend/src/tests/marketplace-creator.spec.ts
@@ -69,11 +69,12 @@ test.describe("Marketplace Creator Page – Basic Functionality", () => {
      await marketplacePage.getFirstCreatorProfile(page);
    await firstCreatorProfile.click();
    await page.waitForURL("**/marketplace/creator/**");
+    await page.waitForLoadState("networkidle").catch(() => {});

    const firstAgent = page
      .locator('[data-testid="store-card"]:visible')
      .first();
-    await firstAgent.waitFor({ state: "visible", timeout: 15000 });
+    await firstAgent.waitFor({ state: "visible", timeout: 30000 });

    await firstAgent.click();
    await page.waitForURL("**/marketplace/agent/**");
--- a/autogpt_platform/frontend/src/tests/marketplace.spec.ts
+++ b/autogpt_platform/frontend/src/tests/marketplace.spec.ts
@@ -115,11 +115,18 @@ test.describe("Marketplace – Basic Functionality", () => {
    const searchTerm = page.getByText("DummyInput").first();
    await isVisible(searchTerm);

-    await expect
-      .poll(() => marketplacePage.getSearchResultsCount(page), {
-        timeout: 15000,
-      })
-      .toBeGreaterThan(0);
+    await page.waitForLoadState("networkidle").catch(() => {});
+
+    await page
+      .waitForFunction(
+        () =>
+          document.querySelectorAll('[data-testid="store-card"]').length > 0,
+        { timeout: 15000 },
+      )
+      .catch(() => console.log("No search results appeared within timeout"));
+
+    const results = await marketplacePage.getSearchResultsCount(page);
+    expect(results).toBeGreaterThan(0);

    console.log("Complete search flow works correctly test passed ✅");
  });
@@ -128,9 +135,7 @@ test.describe("Marketplace – Basic Functionality", () => {
 });

 test.describe("Marketplace – Edge Cases", () => {
-  test("Search for non-existent item renders search page correctly", async ({
-    page,
-  }) => {
+  test("Search for non-existent item shows no results", async ({ page }) => {
    const marketplacePage = new MarketplacePage(page);
    await marketplacePage.goto(page);

@@ -146,18 +151,9 @@ test.describe("Marketplace – Edge Cases", () => {
    const searchTerm = page.getByText("xyznonexistentitemxyz123");
    await isVisible(searchTerm);

-    // The search page should render either results or a "No results found" message
-    await page.waitForLoadState("networkidle").catch(() => {});
-    const hasResults =
-      (await page.locator('[data-testid="store-card"]').count()) > 0;
-    const hasNoResultsMsg = await page
-      .getByText("No results found")
-      .isVisible()
-      .catch(() => false);
-    expect(hasResults || hasNoResultsMsg).toBe(true);
+    const results = await marketplacePage.getSearchResultsCount(page);
+    expect(results).toBe(0);

-    console.log(
-      "Search for non-existent item renders search page correctly test passed ✅",
-    );
+    console.log("Search for non-existent item shows no results test passed ✅");
  });
 });
--- a/autogpt_platform/frontend/src/tests/pages/build.page.ts
+++ b/autogpt_platform/frontend/src/tests/pages/build.page.ts
@@ -125,8 +125,16 @@ export class BuildPage extends BasePage {
      `[data-id="block-card-${blockCardId}"]`,
    );

-    await blockCard.waitFor({ state: "visible", timeout: 10000 });
-    await blockCard.click();
+    try {
+      // Wait for the block card to be visible with a reasonable timeout
+      await blockCard.waitFor({ state: "visible", timeout: 10000 });
+      await blockCard.click();
+    } catch (error) {
+      console.log(
+        `Block ${block.name} (display: ${displayName}) returned from the API but not found in block list`,
+      );
+      console.log(`Error: ${error}`);
+    }
  }

  async hasBlock(_block: Block) {
--- a/autogpt_platform/frontend/src/tests/pages/login.page.ts
+++ b/autogpt_platform/frontend/src/tests/pages/login.page.ts
@@ -65,7 +65,7 @@ export class LoginPage {
    await this.page.waitForLoadState("load", { timeout: 10_000 });

    console.log("➡️ Navigating to /marketplace ...");
-    await this.page.goto("/marketplace", { timeout: 20_000 });
+    await this.page.goto("/marketplace", { timeout: 10_000 });
    console.log("✅ Login process complete");

    // If Wallet popover auto-opens, close it to avoid blocking account menu interactions
--- a/autogpt_platform/frontend/src/tests/pages/marketplace.page.ts
+++ b/autogpt_platform/frontend/src/tests/pages/marketplace.page.ts
@@ -9,12 +9,7 @@ export class MarketplacePage extends BasePage {

  async goto(page: Page) {
    await page.goto("/marketplace");
-    await page
-      .locator(
-        '[data-testid="store-card"], [data-testid="featured-store-card"]',
-      )
-      .first()
-      .waitFor({ state: "visible", timeout: 20000 });
+    await page.waitForLoadState("networkidle").catch(() => {});
  }

  async getMarketplaceTitle(page: Page) {
@@ -116,7 +111,7 @@ export class MarketplacePage extends BasePage {
  async getFirstFeaturedAgent(page: Page) {
    const { getId } = getSelectors(page);
    const card = getId("featured-store-card").first();
-    await card.waitFor({ state: "visible", timeout: 15000 });
+    await card.waitFor({ state: "visible", timeout: 30000 });
    return card;
  }

@@ -124,14 +119,14 @@ export class MarketplacePage extends BasePage {
    const card = this.page
      .locator('[data-testid="store-card"]:visible')
      .first();
-    await card.waitFor({ state: "visible", timeout: 15000 });
+    await card.waitFor({ state: "visible", timeout: 30000 });
    return card;
  }

  async getFirstCreatorProfile(page: Page) {
    const { getId } = getSelectors(page);
    const card = getId("creator-card").first();
-    await card.waitFor({ state: "visible", timeout: 15000 });
+    await card.waitFor({ state: "visible", timeout: 30000 });
    return card;
  }

--- a/autogpt_platform/frontend/src/tests/utils/assertion.ts
+++ b/autogpt_platform/frontend/src/tests/utils/assertion.ts
@@ -45,9 +45,8 @@ export async function isEnabled(el: Locator) {
 }

 export async function hasMinCount(el: Locator, minCount: number) {
-  await expect
-    .poll(async () => await el.count(), { timeout: 10000 })
-    .toBeGreaterThanOrEqual(minCount);
+  const count = await el.count();
+  expect(count).toBeGreaterThanOrEqual(minCount);
 }

 export async function matchesUrl(page: Page, pattern: RegExp) {