feat(backend): improve stop graph execution reliability (#10293)

## Summary - Enhanced graph execution cancellation and cleanup mechanisms - Improved error handling and logging for graph execution lifecycle - Added timeout handling for graph termination with proper status updates - Exposed a new API for stopping graph based on only graph_id or user_id - Refactored logging metadata structure for better error tracking ## Key Changes ### Backend - **Graph Execution Management**: Enhanced `stop_graph_execution` with timeout-based waiting and proper status transitions - **Execution Cleanup**: Added proper cancellation waiting with timeout handling in executor manager - **Logging Improvements**: Centralized `LogMetadata` class and improved error logging consistency - **API Enhancements**: Added bulk graph execution stopping functionality - **Error Handling**: Better exception handling and status management for failed/cancelled executions ### Frontend - **Status Safety**: Added null safety checks for status chips to prevent runtime errors - **Execution Control**: Simplified stop execution request handling ## Test Plan - [x] Verify graph execution can be properly stopped and reaches terminal state - [x] Test timeout scenarios for stuck executions - [x] Validate proper cleanup of running node executions when graph is cancelled - [x] Check frontend status chips handle undefined statuses gracefully - [x] Test bulk execution stopping functionality 🤖 Generated with [Claude Code](https://claude.ai/code) --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-04-08 03:00:28 -04:00 · 2025-07-02 14:21:26 -07:00
parent f394a0eabb
commit f1cc2afbda
10 changed files with 257 additions and 182 deletions
--- a/autogpt_platform/backend/backend/blocks/agent.py
+++ b/autogpt_platform/backend/backend/blocks/agent.py
@@ -15,9 +15,9 @@ from backend.data.block import (
 )
 from backend.data.execution import ExecutionStatus
 from backend.data.model import SchemaField
-from backend.util import json
+from backend.util import json, retry

-logger = logging.getLogger(__name__)
+_logger = logging.getLogger(__name__)


 class AgentExecutorBlock(Block):
@@ -77,27 +77,42 @@ class AgentExecutorBlock(Block):
            use_db_query=False,
        )

+        logger = execution_utils.LogMetadata(
+            logger=_logger,
+            user_id=input_data.user_id,
+            graph_eid=graph_exec.id,
+            graph_id=input_data.graph_id,
+            node_eid="*",
+            node_id="*",
+            block_name=self.name,
+        )
+
        try:
            async for name, data in self._run(
                graph_id=input_data.graph_id,
                graph_version=input_data.graph_version,
                graph_exec_id=graph_exec.id,
                user_id=input_data.user_id,
+                logger=logger,
            ):
                yield name, data
        except asyncio.CancelledError:
-            logger.warning(
-                f"Execution of graph {input_data.graph_id} version {input_data.graph_version} was cancelled."
+            await self._stop(
+                graph_exec_id=graph_exec.id,
+                user_id=input_data.user_id,
+                logger=logger,
            )
-            await execution_utils.stop_graph_execution(
-                graph_exec.id, use_db_query=False
+            logger.warning(
+                f"Execution of graph {input_data.graph_id}v{input_data.graph_version} was cancelled."
            )
        except Exception as e:
-            logger.error(
-                f"Execution of graph {input_data.graph_id} version {input_data.graph_version} failed: {e}, stopping execution."
+            await self._stop(
+                graph_exec_id=graph_exec.id,
+                user_id=input_data.user_id,
+                logger=logger,
            )
-            await execution_utils.stop_graph_execution(
-                graph_exec.id, use_db_query=False
+            logger.error(
+                f"Execution of graph {input_data.graph_id}v{input_data.graph_version} failed: {e}, execution is stopped."
            )
            raise

@@ -107,6 +122,7 @@ class AgentExecutorBlock(Block):
        graph_version: int,
        graph_exec_id: str,
        user_id: str,
+        logger,
    ) -> BlockOutput:

        from backend.data.execution import ExecutionEventType
@@ -159,3 +175,25 @@ class AgentExecutorBlock(Block):
                    f"Execution {log_id} produced {output_name}: {output_data}"
                )
                yield output_name, output_data
+
+    @retry.func_retry
+    async def _stop(
+        self,
+        graph_exec_id: str,
+        user_id: str,
+        logger,
+    ) -> None:
+        from backend.executor import utils as execution_utils
+
+        log_id = f"Graph exec-id: {graph_exec_id}"
+        logger.info(f"Stopping execution of {log_id}")
+
+        try:
+            await execution_utils.stop_graph_execution(
+                graph_exec_id=graph_exec_id,
+                user_id=user_id,
+                use_db_query=False,
+            )
+            logger.info(f"Execution {log_id} stopped successfully.")
+        except Exception as e:
+            logger.error(f"Failed to stop execution {log_id}: {e}")
--- a/autogpt_platform/backend/backend/blocks/basic.py
+++ b/autogpt_platform/backend/backend/blocks/basic.py
@@ -529,7 +529,7 @@ class CreateListBlock(Block):
                cur_size, cur_tokens = cur_size + 1, cur_tokens + tokens

        # Yield final chunk if any
-        if chunk:
+        if chunk or not input_data.values:
            yield "list", chunk


--- a/autogpt_platform/backend/backend/data/execution.py
+++ b/autogpt_platform/backend/backend/data/execution.py
@@ -347,6 +347,7 @@ class NodeExecutionResult(BaseModel):


 async def get_graph_executions(
+    graph_exec_id: str | None = None,
    graph_id: str | None = None,
    user_id: str | None = None,
    statuses: list[ExecutionStatus] | None = None,
@@ -357,6 +358,8 @@ async def get_graph_executions(
    where_filter: AgentGraphExecutionWhereInput = {
        "isDeleted": False,
    }
+    if graph_exec_id:
+        where_filter["id"] = graph_exec_id
    if user_id:
        where_filter["userId"] = user_id
    if graph_id:
--- a/autogpt_platform/backend/backend/executor/database.py
+++ b/autogpt_platform/backend/backend/executor/database.py
@@ -204,6 +204,7 @@ class DatabaseManagerAsyncClient(AppServiceClient):
    create_graph_execution = d.create_graph_execution
    get_latest_node_execution = d.get_latest_node_execution
    get_graph = d.get_graph
+    get_graph_execution_meta = d.get_graph_execution_meta
    get_node = d.get_node
    get_node_execution = d.get_node_execution
    get_node_executions = d.get_node_executions
--- a/autogpt_platform/backend/backend/executor/manager.py
+++ b/autogpt_platform/backend/backend/executor/manager.py
@@ -24,7 +24,7 @@ from backend.data.notifications import (
    NotificationType,
 )
 from backend.data.rabbitmq import SyncRabbitMQ
-from backend.executor.utils import create_execution_queue_config
+from backend.executor.utils import LogMetadata, create_execution_queue_config
 from backend.notifications.notifications import queue_notification
 from backend.util.exceptions import InsufficientBalanceError

@@ -98,35 +98,6 @@ utilization_gauge = Gauge(
 )


-class LogMetadata(TruncatedLogger):
-    def __init__(
-        self,
-        user_id: str,
-        graph_eid: str,
-        graph_id: str,
-        node_eid: str,
-        node_id: str,
-        block_name: str,
-        max_length: int = 1000,
-    ):
-        metadata = {
-            "component": "ExecutionManager",
-            "user_id": user_id,
-            "graph_eid": graph_eid,
-            "graph_id": graph_id,
-            "node_eid": node_eid,
-            "node_id": node_id,
-            "block_name": block_name,
-        }
-        prefix = f"[ExecutionManager|uid:{user_id}|gid:{graph_id}|nid:{node_id}]|geid:{graph_eid}|neid:{node_eid}|{block_name}]"
-        super().__init__(
-            _logger,
-            max_length=max_length,
-            prefix=prefix,
-            metadata=metadata,
-        )
-
-
 T = TypeVar("T")


@@ -158,6 +129,7 @@ async def execute_node(
    node_block = node.block

    log_metadata = LogMetadata(
+        logger=_logger,
        user_id=user_id,
        graph_eid=graph_exec_id,
        graph_id=graph_id,
@@ -429,6 +401,7 @@ class Executor:
        nodes_input_masks: Optional[dict[str, dict[str, JsonValue]]] = None,
    ) -> NodeExecutionStats:
        log_metadata = LogMetadata(
+            logger=_logger,
            user_id=node_exec.user_id,
            graph_eid=node_exec.graph_exec_id,
            graph_id=node_exec.graph_id,
@@ -534,6 +507,7 @@ class Executor:
        cls, graph_exec: GraphExecutionEntry, cancel: threading.Event
    ):
        log_metadata = LogMetadata(
+            logger=_logger,
            user_id=graph_exec.user_id,
            graph_eid=graph_exec.graph_exec_id,
            graph_id=graph_exec.graph_id,
@@ -861,6 +835,7 @@ class Executor:
                f"Failed graph execution {graph_exec.graph_exec_id}: {error}"
            )
        finally:
+            # Cancel and wait for all node executions to complete
            for node_id, inflight_exec in running_node_execution.items():
                if inflight_exec.is_done():
                    continue
@@ -873,6 +848,28 @@ class Executor:
                log_metadata.info(f"Stopping node evaluation {node_id}")
                inflight_eval.cancel()

+            for node_id, inflight_exec in running_node_execution.items():
+                if inflight_exec.is_done():
+                    continue
+                try:
+                    inflight_exec.wait_for_cancellation(timeout=60.0)
+                except TimeoutError:
+                    log_metadata.exception(
+                        f"Node execution #{node_id} did not stop in time, "
+                        "it may be stuck or taking too long."
+                    )
+
+            for node_id, inflight_eval in running_node_evaluation.items():
+                if inflight_eval.done():
+                    continue
+                try:
+                    inflight_eval.result(timeout=60.0)
+                except TimeoutError:
+                    log_metadata.exception(
+                        f"Node evaluation #{node_id} did not stop in time, "
+                        "it may be stuck or taking too long."
+                    )
+
            if execution_status in [ExecutionStatus.TERMINATED, ExecutionStatus.FAILED]:
                inflight_executions = db_client.get_node_executions(
                    graph_exec.graph_exec_id,
--- a/autogpt_platform/backend/backend/executor/utils.py
+++ b/autogpt_platform/backend/backend/executor/utils.py
@@ -1,5 +1,6 @@
 import asyncio
 import logging
+import time
 from collections import defaultdict
 from concurrent.futures import Future
 from typing import TYPE_CHECKING, Any, Callable, Optional, cast
@@ -7,6 +8,8 @@ from typing import TYPE_CHECKING, Any, Callable, Optional, cast
 from autogpt_libs.utils.cache import thread_cached
 from pydantic import BaseModel, JsonValue

+from backend.data import execution as execution_db
+from backend.data import graph as graph_db
 from backend.data.block import (
    Block,
    BlockData,
@@ -23,12 +26,8 @@ from backend.data.execution import (
    GraphExecutionStats,
    GraphExecutionWithNodes,
    RedisExecutionEventBus,
-    create_graph_execution,
-    get_node_executions,
-    update_graph_execution_stats,
-    update_node_execution_status_batch,
 )
-from backend.data.graph import GraphModel, Node, get_graph
+from backend.data.graph import GraphModel, Node
 from backend.data.model import CredentialsMetaInput
 from backend.data.rabbitmq import (
    AsyncRabbitMQ,
@@ -55,6 +54,36 @@ logger = TruncatedLogger(logging.getLogger(__name__), prefix="[GraphExecutorUtil
 # ============ Resource Helpers ============ #


+class LogMetadata(TruncatedLogger):
+    def __init__(
+        self,
+        logger: logging.Logger,
+        user_id: str,
+        graph_eid: str,
+        graph_id: str,
+        node_eid: str,
+        node_id: str,
+        block_name: str,
+        max_length: int = 1000,
+    ):
+        metadata = {
+            "component": "ExecutionManager",
+            "user_id": user_id,
+            "graph_eid": graph_eid,
+            "graph_id": graph_id,
+            "node_eid": node_eid,
+            "node_id": node_id,
+            "block_name": block_name,
+        }
+        prefix = f"[ExecutionManager|uid:{user_id}|gid:{graph_id}|nid:{node_id}]|geid:{graph_eid}|neid:{node_eid}|{block_name}]"
+        super().__init__(
+            logger,
+            max_length=max_length,
+            prefix=prefix,
+            metadata=metadata,
+        )
+
+
@thread_cached
 def get_execution_event_bus() -> RedisExecutionEventBus:
    return RedisExecutionEventBus()
@@ -653,8 +682,10 @@ def create_execution_queue_config() -> RabbitMQConfig:


 async def stop_graph_execution(
+    user_id: str,
    graph_exec_id: str,
    use_db_query: bool = True,
+    wait_timeout: float = 60.0,
 ):
    """
    Mechanism:
@@ -664,66 +695,56 @@ async def stop_graph_execution(
    3. Update execution statuses in DB and set `error` outputs to `"TERMINATED"`.
    """
    queue_client = await get_async_execution_queue()
+    db = execution_db if use_db_query else get_db_async_client()
    await queue_client.publish_message(
        routing_key="",
        message=CancelExecutionEvent(graph_exec_id=graph_exec_id).model_dump_json(),
        exchange=GRAPH_EXECUTION_CANCEL_EXCHANGE,
    )

-    # Update the status of the graph execution
-    if use_db_query:
-        graph_execution = await update_graph_execution_stats(
-            graph_exec_id,
-            ExecutionStatus.TERMINATED,
-        )
-    else:
-        graph_execution = await get_db_async_client().update_graph_execution_stats(
-            graph_exec_id,
-            ExecutionStatus.TERMINATED,
+    if not wait_timeout:
+        return
+
+    start_time = time.time()
+    while time.time() - start_time < wait_timeout:
+        graph_exec = await db.get_graph_execution_meta(
+            execution_id=graph_exec_id, user_id=user_id
        )

-    if graph_execution:
-        await get_async_execution_event_bus().publish(graph_execution)
-    else:
-        raise NotFoundError(
-            f"Graph execution #{graph_exec_id} not found for termination."
-        )
+        if not graph_exec:
+            raise NotFoundError(f"Graph execution #{graph_exec_id} not found.")

-    # Update the status of the node executions
-    if use_db_query:
-        node_executions = await get_node_executions(
-            graph_exec_id=graph_exec_id,
-            statuses=[
-                ExecutionStatus.QUEUED,
-                ExecutionStatus.RUNNING,
-                ExecutionStatus.INCOMPLETE,
-            ],
-        )
-        await update_node_execution_status_batch(
-            [v.node_exec_id for v in node_executions],
+        if graph_exec.status in [
            ExecutionStatus.TERMINATED,
-        )
-    else:
-        node_executions = await get_db_async_client().get_node_executions(
-            graph_exec_id=graph_exec_id,
-            statuses=[
-                ExecutionStatus.QUEUED,
-                ExecutionStatus.RUNNING,
-                ExecutionStatus.INCOMPLETE,
-            ],
-        )
-        await get_db_async_client().update_node_execution_status_batch(
-            [v.node_exec_id for v in node_executions],
-            ExecutionStatus.TERMINATED,
-        )
+            ExecutionStatus.COMPLETED,
+            ExecutionStatus.FAILED,
+        ]:
+            # If graph execution is terminated/completed/failed, cancellation is complete
+            return

-    await asyncio.gather(
-        *[
-            get_async_execution_event_bus().publish(
-                v.model_copy(update={"status": ExecutionStatus.TERMINATED})
+        elif graph_exec.status in [
+            ExecutionStatus.QUEUED,
+            ExecutionStatus.INCOMPLETE,
+        ]:
+            # If the graph is still on the queue, we can prevent them from being executed
+            # by setting the status to TERMINATED.
+            node_execs = await db.get_node_executions(
+                graph_exec_id=graph_exec_id,
+                statuses=[ExecutionStatus.QUEUED, ExecutionStatus.INCOMPLETE],
            )
-            for v in node_executions
-        ]
+            await db.update_node_execution_status_batch(
+                [node_exec.node_exec_id for node_exec in node_execs],
+                ExecutionStatus.TERMINATED,
+            )
+            await db.update_graph_execution_stats(
+                graph_exec_id=graph_exec_id,
+                status=ExecutionStatus.TERMINATED,
+            )
+
+        await asyncio.sleep(1.0)
+
+    raise TimeoutError(
+        f"Timed out waiting for graph execution #{graph_exec_id} to terminate."
    )


@@ -753,22 +774,16 @@ async def add_graph_execution(
        GraphExecutionEntry: The entry for the graph execution.
    Raises:
        ValueError: If the graph is not found or if there are validation errors.
-    """  # noqa
-    if use_db_query:
-        graph: GraphModel | None = await get_graph(
-            graph_id=graph_id,
-            user_id=user_id,
-            version=graph_version,
-            include_subgraphs=True,
-        )
-    else:
-        graph: GraphModel | None = await get_db_async_client().get_graph(
-            graph_id=graph_id,
-            user_id=user_id,
-            version=graph_version,
-            include_subgraphs=True,
-        )
+    """
+    gdb = graph_db if use_db_query else get_db_async_client()
+    edb = execution_db if use_db_query else get_db_async_client()

+    graph: GraphModel | None = await gdb.get_graph(
+        graph_id=graph_id,
+        user_id=user_id,
+        version=graph_version,
+        include_subgraphs=True,
+    )
    if not graph:
        raise NotFoundError(f"Graph #{graph_id} not found.")

@@ -787,22 +802,13 @@ async def add_graph_execution(
        nodes_input_masks=nodes_input_masks,
    )

-    if use_db_query:
-        graph_exec = await create_graph_execution(
-            user_id=user_id,
-            graph_id=graph_id,
-            graph_version=graph.version,
-            starting_nodes_input=starting_nodes_input,
-            preset_id=preset_id,
-        )
-    else:
-        graph_exec = await get_db_async_client().create_graph_execution(
-            user_id=user_id,
-            graph_id=graph_id,
-            graph_version=graph.version,
-            starting_nodes_input=starting_nodes_input,
-            preset_id=preset_id,
-        )
+    graph_exec = await edb.create_graph_execution(
+        user_id=user_id,
+        graph_id=graph_id,
+        graph_version=graph.version,
+        starting_nodes_input=starting_nodes_input,
+        preset_id=preset_id,
+    )

    try:
        queue = await get_async_execution_queue()
@@ -821,28 +827,15 @@ async def add_graph_execution(
        return graph_exec
    except Exception as e:
        logger.error(f"Unable to publish graph #{graph_id} exec #{graph_exec.id}: {e}")
-
-        if use_db_query:
-            await update_node_execution_status_batch(
-                [node_exec.node_exec_id for node_exec in graph_exec.node_executions],
-                ExecutionStatus.FAILED,
-            )
-            await update_graph_execution_stats(
-                graph_exec_id=graph_exec.id,
-                status=ExecutionStatus.FAILED,
-                stats=GraphExecutionStats(error=str(e)),
-            )
-        else:
-            await get_db_async_client().update_node_execution_status_batch(
-                [node_exec.node_exec_id for node_exec in graph_exec.node_executions],
-                ExecutionStatus.FAILED,
-            )
-            await get_db_async_client().update_graph_execution_stats(
-                graph_exec_id=graph_exec.id,
-                status=ExecutionStatus.FAILED,
-                stats=GraphExecutionStats(error=str(e)),
-            )
-
+        await edb.update_node_execution_status_batch(
+            [node_exec.node_exec_id for node_exec in graph_exec.node_executions],
+            ExecutionStatus.FAILED,
+        )
+        await edb.update_graph_execution_stats(
+            graph_exec_id=graph_exec.id,
+            status=ExecutionStatus.FAILED,
+            stats=GraphExecutionStats(error=str(e)),
+        )
        raise


@@ -897,14 +890,10 @@ class NodeExecutionProgress:
        try:
            self.tasks[exec_id].result(wait_time)
        except TimeoutError:
-            print(
-                ">>>>>>>  -- Timeout, after waiting for",
-                wait_time,
-                "seconds for node_id",
-                exec_id,
-            )
            pass
-
+        except Exception as e:
+            logger.error(f"Task for exec ID {exec_id} failed with error: {str(e)}")
+            pass
        return self.is_done(0)

    def stop(self) -> list[str]:
@@ -921,6 +910,25 @@ class NodeExecutionProgress:
            cancelled_ids.append(task_id)
        return cancelled_ids

+    def wait_for_cancellation(self, timeout: float = 5.0):
+        """
+        Wait for all cancelled tasks to complete cancellation.
+
+        Args:
+            timeout: Maximum time to wait for cancellation in seconds
+        """
+        start_time = time.time()
+
+        while time.time() - start_time < timeout:
+            # Check if all tasks are done (either completed or cancelled)
+            if all(task.done() for task in self.tasks.values()):
+                return True
+            time.sleep(0.1)  # Small delay to avoid busy waiting
+
+        raise TimeoutError(
+            f"Timeout waiting for cancellation of tasks: {list(self.tasks.keys())}"
+        )
+
    def _pop_done_task(self, exec_id: str) -> bool:
        task = self.tasks.get(exec_id)
        if not task:
@@ -933,8 +941,10 @@ class NodeExecutionProgress:
            return False

        if task := self.tasks.pop(exec_id):
-            self.on_done_task(exec_id, task.result())
-
+            try:
+                self.on_done_task(exec_id, task.result())
+            except Exception as e:
+                logger.error(f"Task for exec ID {exec_id} failed with error: {str(e)}")
        return True

    def _next_exec(self) -> str | None:
--- a/autogpt_platform/backend/backend/server/routers/v1.py
+++ b/autogpt_platform/backend/backend/server/routers/v1.py
@@ -669,24 +669,57 @@ async def execute_graph(
 )
 async def stop_graph_run(
    graph_id: str, graph_exec_id: str, user_id: Annotated[str, Depends(get_user_id)]
-) -> execution_db.GraphExecution:
-    if not await execution_db.get_graph_execution_meta(
-        user_id=user_id, execution_id=graph_exec_id
-    ):
-        raise HTTPException(404, detail=f"Agent execution #{graph_exec_id} not found")
-
-    await execution_utils.stop_graph_execution(graph_exec_id)
-
-    # Retrieve & return canceled graph execution in its final state
-    result = await execution_db.get_graph_execution(
-        execution_id=graph_exec_id, user_id=user_id
+) -> execution_db.GraphExecutionMeta:
+    res = await _stop_graph_run(
+        user_id=user_id,
+        graph_id=graph_id,
+        graph_exec_id=graph_exec_id,
    )
-    if not result:
+    if not res:
        raise HTTPException(
-            500,
-            detail=f"Could not fetch graph execution #{graph_exec_id} after stopping",
+            status_code=HTTP_404_NOT_FOUND,
+            detail=f"Graph execution #{graph_exec_id} not found.",
        )
-    return result
+    return res[0]
+
+
+@v1_router.post(
+    path="/executions",
+    summary="Stop graph executions",
+    tags=["graphs"],
+    dependencies=[Depends(auth_middleware)],
+)
+async def stop_graph_runs(
+    graph_id: str, graph_exec_id: str, user_id: Annotated[str, Depends(get_user_id)]
+) -> list[execution_db.GraphExecutionMeta]:
+    return await _stop_graph_run(
+        user_id=user_id,
+        graph_id=graph_id,
+        graph_exec_id=graph_exec_id,
+    )
+
+
+async def _stop_graph_run(
+    user_id: str,
+    graph_id: Optional[str] = None,
+    graph_exec_id: Optional[str] = None,
+) -> list[execution_db.GraphExecutionMeta]:
+    graph_execs = await execution_db.get_graph_executions(
+        user_id=user_id,
+        graph_id=graph_id,
+        graph_exec_id=graph_exec_id,
+        statuses=[
+            execution_db.ExecutionStatus.INCOMPLETE,
+            execution_db.ExecutionStatus.QUEUED,
+            execution_db.ExecutionStatus.RUNNING,
+        ],
+    )
+    stopped_execs = [
+        execution_utils.stop_graph_execution(graph_exec_id=exec.id, user_id=user_id)
+        for exec in graph_execs
+    ]
+    await asyncio.gather(*stopped_execs)
+    return graph_execs


@v1_router.get(
--- a/autogpt_platform/frontend/src/components/agents/agent-run-status-chip.tsx
+++ b/autogpt_platform/frontend/src/components/agents/agent-run-status-chip.tsx
@@ -57,9 +57,9 @@ export default function AgentRunStatusChip({
  return (
    <Badge
      variant="secondary"
-      className={`text-xs font-medium ${statusStyles[statusData[status].variant]} rounded-[45px] px-[9px] py-[3px]`}
+      className={`text-xs font-medium ${statusStyles[statusData[status]?.variant]} rounded-[45px] px-[9px] py-[3px]`}
    >
-      {statusData[status].label}
+      {statusData[status]?.label}
    </Badge>
  );
 }
--- a/autogpt_platform/frontend/src/components/agents/agent-status-chip.tsx
+++ b/autogpt_platform/frontend/src/components/agents/agent-status-chip.tsx
@@ -32,7 +32,7 @@ export default function AgentStatusChip({
  return (
    <Badge
      variant="secondary"
-      className={`text-xs font-medium ${statusStyles[statusData[status].variant]} rounded-[45px] px-[9px] py-[3px]`}
+      className={`text-xs font-medium ${statusStyles[statusData[status]?.variant]} rounded-[45px] px-[9px] py-[3px]`}
    >
      {statusData[status].label}
    </Badge>
--- a/autogpt_platform/frontend/src/hooks/useAgentGraph.tsx
+++ b/autogpt_platform/frontend/src/hooks/useAgentGraph.tsx
@@ -673,14 +673,7 @@ export default function useAgentGraph(
      savedAgent &&
      saveRunRequest.activeExecutionID
    ) {
-      setSaveRunRequest({
-        request: "stop",
-        state: "stopping",
-        activeExecutionID: saveRunRequest.activeExecutionID,
-      });
-      api
-        .stopGraphExecution(savedAgent.id, saveRunRequest.activeExecutionID)
-        .then(() => setSaveRunRequest({ request: "none", state: "none" }));
+      api.stopGraphExecution(savedAgent.id, saveRunRequest.activeExecutionID);
    }
  }, [
    api,