feat(backend): Remove RPC service from Agent Executor (#9804)

Currently the execution task is not properly distributed between executors because we need to send the execution request to the execution server. The execution manager now accepts the execution request from the message queue. Thus, we can remove the synchronous RPC system from this service, let the system focus on executing the agent, and not spare any process for the HTTP API interface. This will also reduce the risk of the execution service being too busy and not able to accept any add execution requests. ### Changes 🏗️ * Remove the RPC system in Agent Executor * Allow the cancellation of the execution that is still waiting in the queue (by avoiding it from being executed). * Make a unified helper for adding an execution request to the system and move other execution-related helper functions into `executor/utils.py`. * Remove non-db connections (redis / rabbitmq) in Database Manager and let the client manage this by themselves. ### Checklist 📋 #### For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan: - [x] Existing CI, some agent runs
2026-04-08 03:00:28 -04:00 · 2025-04-11 21:03:47 +02:00
parent f7ca5ac1ba
commit bb92226f5d
13 changed files with 669 additions and 598 deletions
--- a/autogpt_platform/backend/backend/blocks/agent.py
+++ b/autogpt_platform/backend/backend/blocks/agent.py
@@ -1,8 +1,6 @@
 import logging
 from typing import Any

-from autogpt_libs.utils.cache import thread_cached
-
 from backend.data.block import (
    Block,
    BlockCategory,
@@ -19,21 +17,6 @@ from backend.util import json
 logger = logging.getLogger(__name__)


-@thread_cached
-def get_executor_manager_client():
-    from backend.executor import ExecutionManager
-    from backend.util.service import get_service_client
-
-    return get_service_client(ExecutionManager)
-
-
-@thread_cached
-def get_event_bus():
-    from backend.data.execution import RedisExecutionEventBus
-
-    return RedisExecutionEventBus()
-
-
 class AgentExecutorBlock(Block):
    class Input(BlockSchema):
        user_id: str = SchemaField(description="User ID")
@@ -76,11 +59,11 @@ class AgentExecutorBlock(Block):

    def run(self, input_data: Input, **kwargs) -> BlockOutput:
        from backend.data.execution import ExecutionEventType
+        from backend.executor import utils as execution_utils

-        executor_manager = get_executor_manager_client()
-        event_bus = get_event_bus()
+        event_bus = execution_utils.get_execution_event_bus()

-        graph_exec = executor_manager.add_execution(
+        graph_exec = execution_utils.add_graph_execution(
            graph_id=input_data.graph_id,
            graph_version=input_data.graph_version,
            user_id=input_data.user_id,
--- a/autogpt_platform/backend/backend/data/execution.py
+++ b/autogpt_platform/backend/backend/data/execution.py
@@ -34,11 +34,10 @@ from pydantic import BaseModel
 from pydantic.fields import Field

 from backend.server.v2.store.exceptions import DatabaseError
-from backend.util import mock
 from backend.util import type as type_utils
 from backend.util.settings import Config

-from .block import BlockData, BlockInput, BlockType, CompletedBlockOutput, get_block
+from .block import BlockInput, BlockType, CompletedBlockOutput, get_block
 from .db import BaseDbModel
 from .includes import (
    EXECUTION_RESULT_INCLUDE,
@@ -203,6 +202,26 @@ class GraphExecutionWithNodes(GraphExecution):
            node_executions=node_executions,
        )

+    def to_graph_execution_entry(self):
+        return GraphExecutionEntry(
+            user_id=self.user_id,
+            graph_id=self.graph_id,
+            graph_version=self.graph_version or 0,
+            graph_exec_id=self.id,
+            start_node_execs=[
+                NodeExecutionEntry(
+                    user_id=self.user_id,
+                    graph_exec_id=node_exec.graph_exec_id,
+                    graph_id=node_exec.graph_id,
+                    node_exec_id=node_exec.node_exec_id,
+                    node_id=node_exec.node_id,
+                    block_id=node_exec.block_id,
+                    data=node_exec.input_data,
+                )
+                for node_exec in self.node_executions
+            ],
+        )
+

 class NodeExecutionResult(BaseModel):
    user_id: str
@@ -469,19 +488,27 @@ async def upsert_execution_output(
    )


-async def update_graph_execution_start_time(graph_exec_id: str) -> GraphExecution:
-    res = await AgentGraphExecution.prisma().update(
-        where={"id": graph_exec_id},
+async def update_graph_execution_start_time(
+    graph_exec_id: str,
+) -> GraphExecution | None:
+    count = await AgentGraphExecution.prisma().update_many(
+        where={
+            "id": graph_exec_id,
+            "executionStatus": ExecutionStatus.QUEUED,
+        },
        data={
            "executionStatus": ExecutionStatus.RUNNING,
            "startedAt": datetime.now(tz=timezone.utc),
        },
+    )
+    if count == 0:
+        return None
+
+    res = await AgentGraphExecution.prisma().find_unique(
+        where={"id": graph_exec_id},
        include=GRAPH_EXECUTION_INCLUDE,
    )
-    if not res:
-        raise ValueError(f"Graph execution #{graph_exec_id} not found")
-
-    return GraphExecution.from_db(res)
+    return GraphExecution.from_db(res) if res else None


 async def update_graph_execution_stats(
@@ -717,144 +744,6 @@ class ExecutionQueue(Generic[T]):
        return self.queue.empty()


-# ------------------- Execution Utilities -------------------- #
-
-
-LIST_SPLIT = "_$_"
-DICT_SPLIT = "_#_"
-OBJC_SPLIT = "_@_"
-
-
-def parse_execution_output(output: BlockData, name: str) -> Any | None:
-    """
-    Extracts partial output data by name from a given BlockData.
-
-    The function supports extracting data from lists, dictionaries, and objects
-    using specific naming conventions:
-    - For lists: <output_name>_$_<index>
-    - For dictionaries: <output_name>_#_<key>
-    - For objects: <output_name>_@_<attribute>
-
-    Args:
-        output (BlockData): A tuple containing the output name and data.
-        name (str): The name used to extract specific data from the output.
-
-    Returns:
-        Any | None: The extracted data if found, otherwise None.
-
-    Examples:
-        >>> output = ("result", [10, 20, 30])
-        >>> parse_execution_output(output, "result_$_1")
-        20
-
-        >>> output = ("config", {"key1": "value1", "key2": "value2"})
-        >>> parse_execution_output(output, "config_#_key1")
-        'value1'
-
-        >>> class Sample:
-        ...     attr1 = "value1"
-        ...     attr2 = "value2"
-        >>> output = ("object", Sample())
-        >>> parse_execution_output(output, "object_@_attr1")
-        'value1'
-    """
-    output_name, output_data = output
-
-    if name == output_name:
-        return output_data
-
-    if name.startswith(f"{output_name}{LIST_SPLIT}"):
-        index = int(name.split(LIST_SPLIT)[1])
-        if not isinstance(output_data, list) or len(output_data) <= index:
-            return None
-        return output_data[int(name.split(LIST_SPLIT)[1])]
-
-    if name.startswith(f"{output_name}{DICT_SPLIT}"):
-        index = name.split(DICT_SPLIT)[1]
-        if not isinstance(output_data, dict) or index not in output_data:
-            return None
-        return output_data[index]
-
-    if name.startswith(f"{output_name}{OBJC_SPLIT}"):
-        index = name.split(OBJC_SPLIT)[1]
-        if isinstance(output_data, object) and hasattr(output_data, index):
-            return getattr(output_data, index)
-        return None
-
-    return None
-
-
-def merge_execution_input(data: BlockInput) -> BlockInput:
-    """
-    Merges dynamic input pins into a single list, dictionary, or object based on naming patterns.
-
-    This function processes input keys that follow specific patterns to merge them into a unified structure:
-    - `<input_name>_$_<index>` for list inputs.
-    - `<input_name>_#_<index>` for dictionary inputs.
-    - `<input_name>_@_<index>` for object inputs.
-
-    Args:
-        data (BlockInput): A dictionary containing input keys and their corresponding values.
-
-    Returns:
-        BlockInput: A dictionary with merged inputs.
-
-    Raises:
-        ValueError: If a list index is not an integer.
-
-    Examples:
-        >>> data = {
-        ...     "list_$_0": "a",
-        ...     "list_$_1": "b",
-        ...     "dict_#_key1": "value1",
-        ...     "dict_#_key2": "value2",
-        ...     "object_@_attr1": "value1",
-        ...     "object_@_attr2": "value2"
-        ... }
-        >>> merge_execution_input(data)
-        {
-            "list": ["a", "b"],
-            "dict": {"key1": "value1", "key2": "value2"},
-            "object": <MockObject attr1="value1" attr2="value2">
-        }
-    """
-
-    # Merge all input with <input_name>_$_<index> into a single list.
-    items = list(data.items())
-
-    for key, value in items:
-        if LIST_SPLIT not in key:
-            continue
-        name, index = key.split(LIST_SPLIT)
-        if not index.isdigit():
-            raise ValueError(f"Invalid key: {key}, #{index} index must be an integer.")
-
-        data[name] = data.get(name, [])
-        if int(index) >= len(data[name]):
-            # Pad list with empty string on missing indices.
-            data[name].extend([""] * (int(index) - len(data[name]) + 1))
-        data[name][int(index)] = value
-
-    # Merge all input with <input_name>_#_<index> into a single dict.
-    for key, value in items:
-        if DICT_SPLIT not in key:
-            continue
-        name, index = key.split(DICT_SPLIT)
-        data[name] = data.get(name, {})
-        data[name][index] = value
-
-    # Merge all input with <input_name>_@_<index> into a single object.
-    for key, value in items:
-        if OBJC_SPLIT not in key:
-            continue
-        name, index = key.split(OBJC_SPLIT)
-        if name not in data or not isinstance(data[name], object):
-            data[name] = mock.MockObject()
-        setattr(data[name], index, value)
-
-    return data
-
-
 # --------------------- Event Bus --------------------- #


--- a/autogpt_platform/backend/backend/executor/database.py
+++ b/autogpt_platform/backend/backend/executor/database.py
@@ -1,11 +1,8 @@
 import logging

-from backend.data import db, redis
+from backend.data import db
 from backend.data.credit import UsageTransactionMetadata, get_user_credit_model
 from backend.data.execution import (
-    GraphExecution,
-    NodeExecutionResult,
-    RedisExecutionEventBus,
    create_graph_execution,
    get_graph_execution,
    get_incomplete_node_executions,
@@ -42,7 +39,7 @@ from backend.data.user import (
    update_user_integrations,
    update_user_metadata,
 )
-from backend.util.service import AppService, expose, exposed_run_and_wait
+from backend.util.service import AppService, exposed_run_and_wait
 from backend.util.settings import Config

 config = Config()
@@ -57,21 +54,14 @@ async def _spend_credits(


 class DatabaseManager(AppService):
-    def __init__(self):
-        super().__init__()
-        self.execution_event_bus = RedisExecutionEventBus()

    def run_service(self) -> None:
        logger.info(f"[{self.service_name}] ⏳ Connecting to Database...")
        self.run_and_wait(db.connect())
-        logger.info(f"[{self.service_name}] ⏳ Connecting to Redis...")
-        redis.connect()
        super().run_service()

    def cleanup(self):
        super().cleanup()
-        logger.info(f"[{self.service_name}] ⏳ Disconnecting Redis...")
-        redis.disconnect()
        logger.info(f"[{self.service_name}] ⏳ Disconnecting Database...")
        self.run_and_wait(db.disconnect())

@@ -79,12 +69,6 @@ class DatabaseManager(AppService):
    def get_port(cls) -> int:
        return config.database_api_port

-    @expose
-    def send_execution_update(
-        self, execution_result: GraphExecution | NodeExecutionResult
-    ):
-        self.execution_event_bus.publish(execution_result)
-
    # Executions
    get_graph_execution = exposed_run_and_wait(get_graph_execution)
    create_graph_execution = exposed_run_and_wait(create_graph_execution)
--- a/autogpt_platform/backend/backend/executor/manager.py
+++ b/autogpt_platform/backend/backend/executor/manager.py
@@ -9,11 +9,10 @@ import time
 from concurrent.futures import Future, ProcessPoolExecutor
 from contextlib import contextmanager
 from multiprocessing.pool import AsyncResult, Pool
-from typing import TYPE_CHECKING, Any, Generator, Optional, TypeVar, cast
+from typing import TYPE_CHECKING, Any, Generator, TypeVar, cast

 from pika.adapters.blocking_connection import BlockingChannel
 from pika.spec import Basic
-from pydantic import BaseModel
 from redis.lock import Lock as RedisLock

 from backend.blocks.io import AgentOutputBlock
@@ -24,13 +23,6 @@ from backend.data.notifications import (
    NotificationEventDTO,
    NotificationType,
 )
-from backend.data.rabbitmq import (
-    Exchange,
-    ExchangeType,
-    Queue,
-    RabbitMQConfig,
-    SyncRabbitMQ,
-)
 from backend.util.exceptions import InsufficientBalanceError

 if TYPE_CHECKING:
@@ -41,43 +33,36 @@ from autogpt_libs.utils.cache import thread_cached

 from backend.blocks.agent import AgentExecutorBlock
 from backend.data import redis
-from backend.data.block import (
-    Block,
-    BlockData,
-    BlockInput,
-    BlockSchema,
-    BlockType,
-    get_block,
-)
+from backend.data.block import BlockData, BlockInput, BlockSchema, get_block
 from backend.data.execution import (
    ExecutionQueue,
    ExecutionStatus,
+    GraphExecution,
    GraphExecutionEntry,
    NodeExecutionEntry,
    NodeExecutionResult,
-    merge_execution_input,
-    parse_execution_output,
 )
-from backend.data.graph import GraphModel, Link, Node
+from backend.data.graph import Link, Node
 from backend.executor.utils import (
+    GRAPH_EXECUTION_CANCEL_QUEUE_NAME,
+    GRAPH_EXECUTION_QUEUE_NAME,
+    CancelExecutionEvent,
    UsageTransactionMetadata,
    block_usage_cost,
    execution_usage_cost,
+    get_execution_event_bus,
+    get_execution_queue,
+    parse_execution_output,
+    validate_exec,
 )
 from backend.integrations.creds_manager import IntegrationCredentialsManager
 from backend.util import json
 from backend.util.decorator import error_logged, time_measured
 from backend.util.file import clean_exec_files
 from backend.util.logging import configure_logging
-from backend.util.process import set_service_name
-from backend.util.service import (
-    AppService,
-    close_service_client,
-    expose,
-    get_service_client,
-)
+from backend.util.process import AppProcess, set_service_name
+from backend.util.service import close_service_client, get_service_client
 from backend.util.settings import Settings
-from backend.util.type import convert

 logger = logging.getLogger(__name__)
 settings = Settings()
@@ -163,7 +148,7 @@ def execute_node(
    def update_execution_status(status: ExecutionStatus) -> NodeExecutionResult:
        """Sets status and fetches+broadcasts the latest state of the node execution"""
        exec_update = db_client.update_node_execution_status(node_exec_id, status)
-        db_client.send_execution_update(exec_update)
+        send_execution_update(exec_update)
        return exec_update

    node = db_client.get_node(node_id)
@@ -299,7 +284,7 @@ def _enqueue_next_nodes(
        exec_update = db_client.update_node_execution_status(
            node_exec_id, ExecutionStatus.QUEUED, data
        )
-        db_client.send_execution_update(exec_update)
+        send_execution_update(exec_update)
        return NodeExecutionEntry(
            user_id=user_id,
            graph_exec_id=graph_exec_id,
@@ -411,105 +396,6 @@ def _enqueue_next_nodes(
    ]


-def validate_exec(
-    node: Node,
-    data: BlockInput,
-    resolve_input: bool = True,
-) -> tuple[BlockInput | None, str]:
-    """
-    Validate the input data for a node execution.
-
-    Args:
-        node: The node to execute.
-        data: The input data for the node execution.
-        resolve_input: Whether to resolve dynamic pins into dict/list/object.
-
-    Returns:
-        A tuple of the validated data and the block name.
-        If the data is invalid, the first element will be None, and the second element
-        will be an error message.
-        If the data is valid, the first element will be the resolved input data, and
-        the second element will be the block name.
-    """
-    node_block: Block | None = get_block(node.block_id)
-    if not node_block:
-        return None, f"Block for {node.block_id} not found."
-    schema = node_block.input_schema
-
-    # Convert non-matching data types to the expected input schema.
-    for name, data_type in schema.__annotations__.items():
-        if (value := data.get(name)) and (type(value) is not data_type):
-            data[name] = convert(value, data_type)
-
-    # Input data (without default values) should contain all required fields.
-    error_prefix = f"Input data missing or mismatch for `{node_block.name}`:"
-    if missing_links := schema.get_missing_links(data, node.input_links):
-        return None, f"{error_prefix} unpopulated links {missing_links}"
-
-    # Merge input data with default values and resolve dynamic dict/list/object pins.
-    input_default = schema.get_input_defaults(node.input_default)
-    data = {**input_default, **data}
-    if resolve_input:
-        data = merge_execution_input(data)
-
-    # Input data post-merge should contain all required fields from the schema.
-    if missing_input := schema.get_missing_input(data):
-        return None, f"{error_prefix} missing input {missing_input}"
-
-    # Last validation: Validate the input values against the schema.
-    if error := schema.get_mismatch_error(data):
-        error_message = f"{error_prefix} {error}"
-        logger.error(error_message)
-        return None, error_message
-
-    return data, node_block.name
-
-
-GRAPH_EXECUTION_EXCHANGE = Exchange(
-    name="graph_execution",
-    type=ExchangeType.DIRECT,
-    durable=True,
-    auto_delete=False,
-)
-GRAPH_EXECUTION_QUEUE_NAME = "graph_execution_queue"
-GRAPH_EXECUTION_ROUTING_KEY = "graph_execution.run"
-
-GRAPH_EXECUTION_CANCEL_EXCHANGE = Exchange(
-    name="graph_execution_cancel",
-    type=ExchangeType.FANOUT,
-    durable=True,
-    auto_delete=True,
-)
-GRAPH_EXECUTION_CANCEL_QUEUE_NAME = "graph_execution_cancel_queue"
-
-
-def create_execution_config() -> RabbitMQConfig:
-    """
-    Define two exchanges and queues:
-    - 'graph_execution' (DIRECT) for run tasks.
-    - 'graph_execution_cancel' (FANOUT) for cancel requests.
-    """
-    run_queue = Queue(
-        name=GRAPH_EXECUTION_QUEUE_NAME,
-        exchange=GRAPH_EXECUTION_EXCHANGE,
-        routing_key=GRAPH_EXECUTION_ROUTING_KEY,
-        durable=True,
-        auto_delete=False,
-    )
-    cancel_queue = Queue(
-        name=GRAPH_EXECUTION_CANCEL_QUEUE_NAME,
-        exchange=GRAPH_EXECUTION_CANCEL_EXCHANGE,
-        routing_key="",  # not used for FANOUT
-        durable=True,
-        auto_delete=False,
-    )
-    return RabbitMQConfig(
-        vhost="/",
-        exchanges=[GRAPH_EXECUTION_EXCHANGE, GRAPH_EXECUTION_CANCEL_EXCHANGE],
-        queues=[run_queue, cancel_queue],
-    )
-
-
 class Executor:
    """
    This class contains event handlers for the process pool executor events.
@@ -689,7 +575,13 @@ class Executor:
        exec_meta = cls.db_client.update_graph_execution_start_time(
            graph_exec.graph_exec_id
        )
-        cls.db_client.send_execution_update(exec_meta)
+        if exec_meta is None:
+            logger.warning(
+                f"Skipped graph execution {graph_exec.graph_exec_id}, the graph execution is not found or not currently in the QUEUED state."
+            )
+            return
+
+        send_execution_update(exec_meta)
        timing_info, (exec_stats, status, error) = cls._on_graph_execution(
            graph_exec, cancel, log_metadata
        )
@@ -702,7 +594,7 @@ class Executor:
            status=status,
            stats=exec_stats,
        ):
-            cls.db_client.send_execution_update(graph_exec_result)
+            send_execution_update(graph_exec_result)

        cls._handle_agent_run_notif(graph_exec, exec_stats)

@@ -815,7 +707,7 @@ class Executor:
                        status=execution_status,
                        stats=execution_stats,
                    ):
-                        cls.db_client.send_execution_update(_graph_exec)
+                        send_execution_update(_graph_exec)
                    else:
                        logger.error(
                            "Callback for "
@@ -866,7 +758,7 @@ class Executor:
                    exec_update = cls.db_client.update_node_execution_status(
                        node_exec_id, execution_status
                    )
-                    cls.db_client.send_execution_update(exec_update)
+                    send_execution_update(exec_update)

                    cls._handle_low_balance_notif(
                        graph_exec.user_id,
@@ -983,32 +875,25 @@ class Executor:
        )


-class CancelExecutionEvent(BaseModel):
-    graph_exec_id: str
-
-
-class ExecutionManager(AppService):
+class ExecutionManager(AppProcess):
    def __init__(self):
        super().__init__()
        self.pool_size = settings.config.num_graph_workers
-        self.rabbit_config = create_execution_config()
-        self.rabbitmq_service = SyncRabbitMQ(self.rabbit_config)
        self.running = True
-        self.active_graph_runs: dict[str, tuple[Future, threading.Event, int]] = {}
+        self.active_graph_runs: dict[str, tuple[Future, threading.Event]] = {}

    @classmethod
    def get_port(cls) -> int:
        return settings.config.execution_manager_port

-    def run_service(self):
-        from backend.integrations.credentials_store import IntegrationCredentialsStore
-
-        self.credentials_store = IntegrationCredentialsStore()
-
-        logger.info(f"[{self.service_name}] ⏳ Connecting to RabbitMQ...")
-        self.rabbitmq_service.connect()
-        channel = self.rabbitmq_service.get_channel()
+    def run(self):
+        while True:
+            try:
+                self._run()
+            except Exception:
+                logger.exception(f"[{self.service_name}] error in graph executor loop")

+    def _run(self):
        logger.info(f"[{self.service_name}] ⏳ Spawn max-{self.pool_size} workers...")
        self.executor = ProcessPoolExecutor(
            max_workers=self.pool_size,
@@ -1020,6 +905,8 @@ class ExecutionManager(AppService):

        logger.info(f"[{self.service_name}] Ready to consume messages...")
        while True:
+            channel = get_execution_queue().get_channel()
+
            # cancel graph execution requests
            method_frame, _, body = channel.basic_get(
                queue=GRAPH_EXECUTION_CANCEL_QUEUE_NAME,
@@ -1036,7 +923,7 @@ class ExecutionManager(AppService):
            if method_frame:
                self._handle_run_message(channel, method_frame, body)
            else:
-                time.sleep(0.1)
+                time.sleep(0.2)

    def _handle_cancel_message(self, body: bytes):
        try:
@@ -1053,7 +940,7 @@ class ExecutionManager(AppService):
                )
                return

-            _, cancel_event, _ = self.active_graph_runs[graph_exec_id]
+            _, cancel_event = self.active_graph_runs[graph_exec_id]
            logger.info(f"[{self.service_name}] Received cancel for {graph_exec_id}")
            if not cancel_event.is_set():
                cancel_event.set()
@@ -1091,21 +978,19 @@ class ExecutionManager(AppService):
        future = self.executor.submit(
            Executor.on_graph_execution, graph_exec_entry, cancel_event
        )
-        self.active_graph_runs[graph_exec_id] = (future, cancel_event, delivery_tag)
+        self.active_graph_runs[graph_exec_id] = (future, cancel_event)

        def _on_run_done(f: Future):
            logger.info(f"[{self.service_name}] Run completed for {graph_exec_id}")
-            info = self.active_graph_runs.pop(graph_exec_id, None)
-            if not info:
-                return
-            _, _, delivery_tag = info
-            if future.exception():
-                logger.error(
-                    f"[{self.service_name}] Execution for {graph_exec_id} failed: {future.exception()}"
-                )
-                channel.basic_nack(delivery_tag, requeue=False)
-            else:
+            try:
                channel.basic_ack(delivery_tag)
+                self.active_graph_runs.pop(graph_exec_id, None)
+                if f.exception():
+                    logger.error(
+                        f"[{self.service_name}] Execution for {graph_exec_id} failed: {f.exception()}"
+                    )
+            except Exception as e:
+                logger.error(f"[{self.service_name}] Error acknowledging message: {e}")

        future.add_done_callback(_on_run_done)

@@ -1121,186 +1006,10 @@ class ExecutionManager(AppService):
        logger.info(f"[{self.service_name}] ⏳ Disconnecting Redis...")
        redis.disconnect()

-        logger.info(f"[{self.service_name}] ⏳ Disconnecting RabbitMQ...")
-        self.rabbitmq_service.disconnect()
-
-        logger.info(f"[{self.service_name}] ⏳ Shutting down graph executor pool...")
-        self.executor.shutdown(cancel_futures=True)
-
-        logger.info(f"[{self.service_name}] ⏳ Disconnecting Redis...")
-        redis.disconnect()
-
    @property
    def db_client(self) -> "DatabaseManager":
        return get_db_client()

-    @expose
-    def add_execution(
-        self,
-        graph_id: str,
-        data: BlockInput,
-        user_id: str,
-        graph_version: Optional[int] = None,
-        preset_id: str | None = None,
-    ) -> GraphExecutionEntry:
-        graph: GraphModel | None = self.db_client.get_graph(
-            graph_id=graph_id, user_id=user_id, version=graph_version
-        )
-        if not graph:
-            raise ValueError(f"Graph #{graph_id} not found.")
-
-        graph.validate_graph(for_run=True)
-        self._validate_node_input_credentials(graph, user_id)
-
-        nodes_input = []
-        for node in graph.starting_nodes:
-            input_data = {}
-            block = node.block
-
-            # Note block should never be executed.
-            if block.block_type == BlockType.NOTE:
-                continue
-
-            # Extract request input data, and assign it to the input pin.
-            if block.block_type == BlockType.INPUT:
-                input_name = node.input_default.get("name")
-                if input_name and input_name in data:
-                    input_data = {"value": data[input_name]}
-
-            # Extract webhook payload, and assign it to the input pin
-            webhook_payload_key = f"webhook_{node.webhook_id}_payload"
-            if (
-                block.block_type in (BlockType.WEBHOOK, BlockType.WEBHOOK_MANUAL)
-                and node.webhook_id
-            ):
-                if webhook_payload_key not in data:
-                    raise ValueError(
-                        f"Node {block.name} #{node.id} webhook payload is missing"
-                    )
-                input_data = {"payload": data[webhook_payload_key]}
-
-            input_data, error = validate_exec(node, input_data)
-            if input_data is None:
-                raise ValueError(error)
-            else:
-                nodes_input.append((node.id, input_data))
-
-        if not nodes_input:
-            raise ValueError(
-                "No starting nodes found for the graph, make sure an AgentInput or blocks with no inbound links are present as starting nodes."
-            )
-
-        graph_exec = self.db_client.create_graph_execution(
-            graph_id=graph_id,
-            graph_version=graph.version,
-            nodes_input=nodes_input,
-            user_id=user_id,
-            preset_id=preset_id,
-        )
-        self.db_client.send_execution_update(graph_exec)
-
-        graph_exec_entry = GraphExecutionEntry(
-            user_id=user_id,
-            graph_id=graph_id,
-            graph_version=graph_version or 0,
-            graph_exec_id=graph_exec.id,
-            start_node_execs=[
-                NodeExecutionEntry(
-                    user_id=user_id,
-                    graph_exec_id=node_exec.graph_exec_id,
-                    graph_id=node_exec.graph_id,
-                    node_exec_id=node_exec.node_exec_id,
-                    node_id=node_exec.node_id,
-                    block_id=node_exec.block_id,
-                    data=node_exec.input_data,
-                )
-                for node_exec in graph_exec.node_executions
-            ],
-        )
-        self.rabbitmq_service.publish_message(
-            routing_key=GRAPH_EXECUTION_ROUTING_KEY,
-            message=graph_exec_entry.model_dump_json(),
-            exchange=GRAPH_EXECUTION_EXCHANGE,
-        )
-        return graph_exec_entry
-
-    @expose
-    def cancel_execution(self, graph_exec_id: str) -> None:
-        """
-        Mechanism:
-        1. Set the cancel event
-        2. Graph executor's cancel handler thread detects the event, terminates workers,
-           reinitializes worker pool, and returns.
-        3. Update execution statuses in DB and set `error` outputs to `"TERMINATED"`.
-        """
-        self.rabbitmq_service.publish_message(
-            routing_key="",
-            message=CancelExecutionEvent(graph_exec_id=graph_exec_id).model_dump_json(),
-            exchange=GRAPH_EXECUTION_CANCEL_EXCHANGE,
-        )
-
-        # Update the status of the graph & node executions
-        self.db_client.update_graph_execution_stats(
-            graph_exec_id,
-            ExecutionStatus.TERMINATED,
-        )
-        node_execs = self.db_client.get_node_execution_results(
-            graph_exec_id=graph_exec_id,
-            statuses=[
-                ExecutionStatus.QUEUED,
-                ExecutionStatus.RUNNING,
-                ExecutionStatus.INCOMPLETE,
-            ],
-        )
-        self.db_client.update_node_execution_status_batch(
-            [node_exec.node_exec_id for node_exec in node_execs],
-            ExecutionStatus.TERMINATED,
-        )
-        for node_exec in node_execs:
-            node_exec.status = ExecutionStatus.TERMINATED
-            self.db_client.send_execution_update(node_exec)
-
-    def _validate_node_input_credentials(self, graph: GraphModel, user_id: str):
-        """Checks all credentials for all nodes of the graph"""
-
-        for node in graph.nodes:
-            block = node.block
-
-            # Find any fields of type CredentialsMetaInput
-            credentials_fields = cast(
-                type[BlockSchema], block.input_schema
-            ).get_credentials_fields()
-            if not credentials_fields:
-                continue
-
-            for field_name, credentials_meta_type in credentials_fields.items():
-                credentials_meta = credentials_meta_type.model_validate(
-                    node.input_default[field_name]
-                )
-                # Fetch the corresponding Credentials and perform sanity checks
-                credentials = self.credentials_store.get_creds_by_id(
-                    user_id, credentials_meta.id
-                )
-                if not credentials:
-                    raise ValueError(
-                        f"Unknown credentials #{credentials_meta.id} "
-                        f"for node #{node.id} input '{field_name}'"
-                    )
-                if (
-                    credentials.provider != credentials_meta.provider
-                    or credentials.type != credentials_meta.type
-                ):
-                    logger.warning(
-                        f"Invalid credentials #{credentials.id} for node #{node.id}: "
-                        "type/provider mismatch: "
-                        f"{credentials_meta.type}<>{credentials.type};"
-                        f"{credentials_meta.provider}<>{credentials.provider}"
-                    )
-                    raise ValueError(
-                        f"Invalid credentials #{credentials.id} for node #{node.id}: "
-                        "type/provider mismatch"
-                    )
-

 # ------- UTILITIES ------- #

@@ -1319,6 +1028,10 @@ def get_notification_service() -> "NotificationManager":
    return get_service_client(NotificationManager)


+def send_execution_update(entry: GraphExecution | NodeExecutionResult):
+    return get_execution_event_bus().publish(entry)
+
+
@contextmanager
 def synchronized(key: str, timeout: int = 60):
    lock: RedisLock = redis.get_redis().lock(f"lock:{key}", timeout=timeout)
--- a/autogpt_platform/backend/backend/executor/scheduler.py
+++ b/autogpt_platform/backend/backend/executor/scheduler.py
@@ -16,7 +16,7 @@ from pydantic import BaseModel
 from sqlalchemy import MetaData, create_engine

 from backend.data.block import BlockInput
-from backend.executor.manager import ExecutionManager
+from backend.executor import utils as execution_utils
 from backend.notifications.notifications import NotificationManager
 from backend.util.service import AppService, expose, get_service_client
 from backend.util.settings import Config
@@ -57,11 +57,6 @@ def job_listener(event):
        log(f"Job {event.job_id} completed successfully.")


-@thread_cached
-def get_execution_client() -> ExecutionManager:
-    return get_service_client(ExecutionManager)
-
-
@thread_cached
 def get_notification_client():
    from backend.notifications import NotificationManager
@@ -73,7 +68,7 @@ def execute_graph(**kwargs):
    args = ExecutionJobArgs(**kwargs)
    try:
        log(f"Executing recurring job for graph #{args.graph_id}")
-        get_execution_client().add_execution(
+        execution_utils.add_graph_execution(
            graph_id=args.graph_id,
            data=args.input_data,
            user_id=args.user_id,
@@ -164,11 +159,6 @@ class Scheduler(AppService):
    def db_pool_size(cls) -> int:
        return config.scheduler_db_pool_size

-    @property
-    @thread_cached
-    def execution_client(self) -> ExecutionManager:
-        return get_service_client(ExecutionManager)
-
    @property
    @thread_cached
    def notification_client(self) -> NotificationManager:
--- a/autogpt_platform/backend/backend/executor/utils.py
+++ b/autogpt_platform/backend/backend/executor/utils.py
@@ -1,11 +1,70 @@
+import logging
+from typing import TYPE_CHECKING, Any, cast
+
+from autogpt_libs.utils.cache import thread_cached
 from pydantic import BaseModel

-from backend.data.block import Block, BlockInput
+from backend.data.block import (
+    Block,
+    BlockData,
+    BlockInput,
+    BlockSchema,
+    BlockType,
+    get_block,
+)
 from backend.data.block_cost_config import BLOCK_COSTS
 from backend.data.cost import BlockCostType
+from backend.data.execution import GraphExecutionEntry, RedisExecutionEventBus
+from backend.data.graph import GraphModel, Node
+from backend.data.rabbitmq import (
+    Exchange,
+    ExchangeType,
+    Queue,
+    RabbitMQConfig,
+    SyncRabbitMQ,
+)
+from backend.util.mock import MockObject
+from backend.util.service import get_service_client
 from backend.util.settings import Config
+from backend.util.type import convert
+
+if TYPE_CHECKING:
+    from backend.executor import DatabaseManager
+    from backend.integrations.credentials_store import IntegrationCredentialsStore

 config = Config()
+logger = logging.getLogger(__name__)
+
+# ============ Resource Helpers ============ #
+
+
+@thread_cached
+def get_execution_event_bus() -> RedisExecutionEventBus:
+    return RedisExecutionEventBus()
+
+
+@thread_cached
+def get_execution_queue() -> SyncRabbitMQ:
+    client = SyncRabbitMQ(create_execution_queue_config())
+    client.connect()
+    return client
+
+
+@thread_cached
+def get_integration_credentials_store() -> "IntegrationCredentialsStore":
+    from backend.integrations.credentials_store import IntegrationCredentialsStore
+
+    return IntegrationCredentialsStore()
+
+
+@thread_cached
+def get_db_client() -> "DatabaseManager":
+    from backend.executor import DatabaseManager
+
+    return get_service_client(DatabaseManager)
+
+
+# ============ Execution Cost Helpers ============ #


 class UsageTransactionMetadata(BaseModel):
@@ -95,3 +154,398 @@ def _is_cost_filter_match(cost_filter: BlockInput, input_data: BlockInput) -> bo
        or (input_data.get(k) and _is_cost_filter_match(v, input_data[k]))
        for k, v in cost_filter.items()
    )
+
+
+# ============ Execution Input Helpers ============ #
+
+LIST_SPLIT = "_$_"
+DICT_SPLIT = "_#_"
+OBJC_SPLIT = "_@_"
+
+
+def parse_execution_output(output: BlockData, name: str) -> Any | None:
+    """
+    Extracts partial output data by name from a given BlockData.
+
+    The function supports extracting data from lists, dictionaries, and objects
+    using specific naming conventions:
+    - For lists: <output_name>_$_<index>
+    - For dictionaries: <output_name>_#_<key>
+    - For objects: <output_name>_@_<attribute>
+
+    Args:
+        output (BlockData): A tuple containing the output name and data.
+        name (str): The name used to extract specific data from the output.
+
+    Returns:
+        Any | None: The extracted data if found, otherwise None.
+
+    Examples:
+        >>> output = ("result", [10, 20, 30])
+        >>> parse_execution_output(output, "result_$_1")
+        20
+
+        >>> output = ("config", {"key1": "value1", "key2": "value2"})
+        >>> parse_execution_output(output, "config_#_key1")
+        'value1'
+
+        >>> class Sample:
+        ...     attr1 = "value1"
+        ...     attr2 = "value2"
+        >>> output = ("object", Sample())
+        >>> parse_execution_output(output, "object_@_attr1")
+        'value1'
+    """
+    output_name, output_data = output
+
+    if name == output_name:
+        return output_data
+
+    if name.startswith(f"{output_name}{LIST_SPLIT}"):
+        index = int(name.split(LIST_SPLIT)[1])
+        if not isinstance(output_data, list) or len(output_data) <= index:
+            return None
+        return output_data[int(name.split(LIST_SPLIT)[1])]
+
+    if name.startswith(f"{output_name}{DICT_SPLIT}"):
+        index = name.split(DICT_SPLIT)[1]
+        if not isinstance(output_data, dict) or index not in output_data:
+            return None
+        return output_data[index]
+
+    if name.startswith(f"{output_name}{OBJC_SPLIT}"):
+        index = name.split(OBJC_SPLIT)[1]
+        if isinstance(output_data, object) and hasattr(output_data, index):
+            return getattr(output_data, index)
+        return None
+
+    return None
+
+
+def validate_exec(
+    node: Node,
+    data: BlockInput,
+    resolve_input: bool = True,
+) -> tuple[BlockInput | None, str]:
+    """
+    Validate the input data for a node execution.
+
+    Args:
+        node: The node to execute.
+        data: The input data for the node execution.
+        resolve_input: Whether to resolve dynamic pins into dict/list/object.
+
+    Returns:
+        A tuple of the validated data and the block name.
+        If the data is invalid, the first element will be None, and the second element
+        will be an error message.
+        If the data is valid, the first element will be the resolved input data, and
+        the second element will be the block name.
+    """
+    node_block: Block | None = get_block(node.block_id)
+    if not node_block:
+        return None, f"Block for {node.block_id} not found."
+    schema = node_block.input_schema
+
+    # Convert non-matching data types to the expected input schema.
+    for name, data_type in schema.__annotations__.items():
+        if (value := data.get(name)) and (type(value) is not data_type):
+            data[name] = convert(value, data_type)
+
+    # Input data (without default values) should contain all required fields.
+    error_prefix = f"Input data missing or mismatch for `{node_block.name}`:"
+    if missing_links := schema.get_missing_links(data, node.input_links):
+        return None, f"{error_prefix} unpopulated links {missing_links}"
+
+    # Merge input data with default values and resolve dynamic dict/list/object pins.
+    input_default = schema.get_input_defaults(node.input_default)
+    data = {**input_default, **data}
+    if resolve_input:
+        data = merge_execution_input(data)
+
+    # Input data post-merge should contain all required fields from the schema.
+    if missing_input := schema.get_missing_input(data):
+        return None, f"{error_prefix} missing input {missing_input}"
+
+    # Last validation: Validate the input values against the schema.
+    if error := schema.get_mismatch_error(data):
+        error_message = f"{error_prefix} {error}"
+        logger.error(error_message)
+        return None, error_message
+
+    return data, node_block.name
+
+
+def merge_execution_input(data: BlockInput) -> BlockInput:
+    """
+    Merges dynamic input pins into a single list, dictionary, or object based on naming patterns.
+
+    This function processes input keys that follow specific patterns to merge them into a unified structure:
+    - `<input_name>_$_<index>` for list inputs.
+    - `<input_name>_#_<index>` for dictionary inputs.
+    - `<input_name>_@_<index>` for object inputs.
+
+    Args:
+        data (BlockInput): A dictionary containing input keys and their corresponding values.
+
+    Returns:
+        BlockInput: A dictionary with merged inputs.
+
+    Raises:
+        ValueError: If a list index is not an integer.
+
+    Examples:
+        >>> data = {
+        ...     "list_$_0": "a",
+        ...     "list_$_1": "b",
+        ...     "dict_#_key1": "value1",
+        ...     "dict_#_key2": "value2",
+        ...     "object_@_attr1": "value1",
+        ...     "object_@_attr2": "value2"
+        ... }
+        >>> merge_execution_input(data)
+        {
+            "list": ["a", "b"],
+            "dict": {"key1": "value1", "key2": "value2"},
+            "object": <MockObject attr1="value1" attr2="value2">
+        }
+    """
+
+    # Merge all input with <input_name>_$_<index> into a single list.
+    items = list(data.items())
+
+    for key, value in items:
+        if LIST_SPLIT not in key:
+            continue
+        name, index = key.split(LIST_SPLIT)
+        if not index.isdigit():
+            raise ValueError(f"Invalid key: {key}, #{index} index must be an integer.")
+
+        data[name] = data.get(name, [])
+        if int(index) >= len(data[name]):
+            # Pad list with empty string on missing indices.
+            data[name].extend([""] * (int(index) - len(data[name]) + 1))
+        data[name][int(index)] = value
+
+    # Merge all input with <input_name>_#_<index> into a single dict.
+    for key, value in items:
+        if DICT_SPLIT not in key:
+            continue
+        name, index = key.split(DICT_SPLIT)
+        data[name] = data.get(name, {})
+        data[name][index] = value
+
+    # Merge all input with <input_name>_@_<index> into a single object.
+    for key, value in items:
+        if OBJC_SPLIT not in key:
+            continue
+        name, index = key.split(OBJC_SPLIT)
+        if name not in data or not isinstance(data[name], object):
+            data[name] = MockObject()
+        setattr(data[name], index, value)
+
+    return data
+
+
+def _validate_node_input_credentials(graph: GraphModel, user_id: str):
+    """Checks all credentials for all nodes of the graph"""
+
+    for node in graph.nodes:
+        block = node.block
+
+        # Find any fields of type CredentialsMetaInput
+        credentials_fields = cast(
+            type[BlockSchema], block.input_schema
+        ).get_credentials_fields()
+        if not credentials_fields:
+            continue
+
+        for field_name, credentials_meta_type in credentials_fields.items():
+            credentials_meta = credentials_meta_type.model_validate(
+                node.input_default[field_name]
+            )
+            # Fetch the corresponding Credentials and perform sanity checks
+            credentials = get_integration_credentials_store().get_creds_by_id(
+                user_id, credentials_meta.id
+            )
+            if not credentials:
+                raise ValueError(
+                    f"Unknown credentials #{credentials_meta.id} "
+                    f"for node #{node.id} input '{field_name}'"
+                )
+            if (
+                credentials.provider != credentials_meta.provider
+                or credentials.type != credentials_meta.type
+            ):
+                logger.warning(
+                    f"Invalid credentials #{credentials.id} for node #{node.id}: "
+                    "type/provider mismatch: "
+                    f"{credentials_meta.type}<>{credentials.type};"
+                    f"{credentials_meta.provider}<>{credentials.provider}"
+                )
+                raise ValueError(
+                    f"Invalid credentials #{credentials.id} for node #{node.id}: "
+                    "type/provider mismatch"
+                )
+
+
+def construct_node_execution_input(
+    graph: GraphModel,
+    user_id: str,
+    data: BlockInput,
+) -> list[tuple[str, BlockInput]]:
+    """
+    Validates and prepares the input data for executing a graph.
+    This function checks the graph for starting nodes, validates the input data
+    against the schema, and resolves dynamic input pins into a single list,
+    dictionary, or object.
+
+    Args:
+        graph (GraphModel): The graph model to execute.
+        user_id (str): The ID of the user executing the graph.
+        data (BlockInput): The input data for the graph execution.
+
+    Returns:
+        list[tuple[str, BlockInput]]: A list of tuples, each containing the node ID and
+            the corresponding input data for that node.
+    """
+    graph.validate_graph(for_run=True)
+    _validate_node_input_credentials(graph, user_id)
+
+    nodes_input = []
+    for node in graph.starting_nodes:
+        input_data = {}
+        block = node.block
+
+        # Note block should never be executed.
+        if block.block_type == BlockType.NOTE:
+            continue
+
+        # Extract request input data, and assign it to the input pin.
+        if block.block_type == BlockType.INPUT:
+            input_name = node.input_default.get("name")
+            if input_name and input_name in data:
+                input_data = {"value": data[input_name]}
+
+        # Extract webhook payload, and assign it to the input pin
+        webhook_payload_key = f"webhook_{node.webhook_id}_payload"
+        if (
+            block.block_type in (BlockType.WEBHOOK, BlockType.WEBHOOK_MANUAL)
+            and node.webhook_id
+        ):
+            if webhook_payload_key not in data:
+                raise ValueError(
+                    f"Node {block.name} #{node.id} webhook payload is missing"
+                )
+            input_data = {"payload": data[webhook_payload_key]}
+
+        input_data, error = validate_exec(node, input_data)
+        if input_data is None:
+            raise ValueError(error)
+        else:
+            nodes_input.append((node.id, input_data))
+
+    if not nodes_input:
+        raise ValueError(
+            "No starting nodes found for the graph, make sure an AgentInput or blocks with no inbound links are present as starting nodes."
+        )
+
+    return nodes_input
+
+
+# ============ Execution Queue Helpers ============ #
+
+
+class CancelExecutionEvent(BaseModel):
+    graph_exec_id: str
+
+
+GRAPH_EXECUTION_EXCHANGE = Exchange(
+    name="graph_execution",
+    type=ExchangeType.DIRECT,
+    durable=True,
+    auto_delete=False,
+)
+GRAPH_EXECUTION_QUEUE_NAME = "graph_execution_queue"
+GRAPH_EXECUTION_ROUTING_KEY = "graph_execution.run"
+
+GRAPH_EXECUTION_CANCEL_EXCHANGE = Exchange(
+    name="graph_execution_cancel",
+    type=ExchangeType.FANOUT,
+    durable=True,
+    auto_delete=True,
+)
+GRAPH_EXECUTION_CANCEL_QUEUE_NAME = "graph_execution_cancel_queue"
+
+
+def create_execution_queue_config() -> RabbitMQConfig:
+    """
+    Define two exchanges and queues:
+    - 'graph_execution' (DIRECT) for run tasks.
+    - 'graph_execution_cancel' (FANOUT) for cancel requests.
+    """
+    run_queue = Queue(
+        name=GRAPH_EXECUTION_QUEUE_NAME,
+        exchange=GRAPH_EXECUTION_EXCHANGE,
+        routing_key=GRAPH_EXECUTION_ROUTING_KEY,
+        durable=True,
+        auto_delete=False,
+    )
+    cancel_queue = Queue(
+        name=GRAPH_EXECUTION_CANCEL_QUEUE_NAME,
+        exchange=GRAPH_EXECUTION_CANCEL_EXCHANGE,
+        routing_key="",  # not used for FANOUT
+        durable=True,
+        auto_delete=False,
+    )
+    return RabbitMQConfig(
+        vhost="/",
+        exchanges=[GRAPH_EXECUTION_EXCHANGE, GRAPH_EXECUTION_CANCEL_EXCHANGE],
+        queues=[run_queue, cancel_queue],
+    )
+
+
+def add_graph_execution(
+    graph_id: str,
+    data: BlockInput,
+    user_id: str,
+    graph_version: int | None = None,
+    preset_id: str | None = None,
+) -> GraphExecutionEntry:
+    """
+    Adds a graph execution to the queue and returns the execution entry.
+
+    Args:
+        graph_id (str): The ID of the graph to execute.
+        data (BlockInput): The input data for the graph execution.
+        user_id (str): The ID of the user executing the graph.
+        graph_version (int | None): The version of the graph to execute. Defaults to None.
+        preset_id (str | None): The ID of the preset to use. Defaults to None.
+    Returns:
+        GraphExecutionEntry: The entry for the graph execution.
+    Raises:
+        ValueError: If the graph is not found or if there are validation errors.
+    """
+    graph: GraphModel | None = get_db_client().get_graph(
+        graph_id=graph_id, user_id=user_id, version=graph_version
+    )
+    if not graph:
+        raise ValueError(f"Graph #{graph_id} not found.")
+
+    graph_exec = get_db_client().create_graph_execution(
+        graph_id=graph_id,
+        graph_version=graph.version,
+        nodes_input=construct_node_execution_input(graph, user_id, data),
+        user_id=user_id,
+        preset_id=preset_id,
+    )
+    get_execution_event_bus().publish(graph_exec)
+
+    graph_exec_entry = graph_exec.to_graph_execution_entry()
+    get_execution_queue().publish_message(
+        routing_key=GRAPH_EXECUTION_ROUTING_KEY,
+        message=graph_exec_entry.model_dump_json(),
+        exchange=GRAPH_EXECUTION_EXCHANGE,
+    )
+
+    return graph_exec_entry
--- a/autogpt_platform/backend/backend/server/external/routes/v1.py
+++ b/autogpt_platform/backend/backend/server/external/routes/v1.py
@@ -2,7 +2,6 @@ import logging
 from collections import defaultdict
 from typing import Annotated, Any, Dict, List, Optional, Sequence

-from autogpt_libs.utils.cache import thread_cached
 from fastapi import APIRouter, Body, Depends, HTTPException
 from prisma.enums import AgentExecutionStatus, APIKeyPermission
 from typing_extensions import TypedDict
@@ -13,17 +12,10 @@ from backend.data import graph as graph_db
 from backend.data.api_key import APIKey
 from backend.data.block import BlockInput, CompletedBlockOutput
 from backend.data.execution import NodeExecutionResult
-from backend.executor import ExecutionManager
 from backend.server.external.middleware import require_permission
-from backend.util.service import get_service_client
+from backend.server.routers import v1 as internal_api_routes
 from backend.util.settings import Settings

-
-@thread_cached
-def execution_manager_client() -> ExecutionManager:
-    return get_service_client(ExecutionManager)
-
-
 settings = Settings()
 logger = logging.getLogger(__name__)

@@ -98,18 +90,18 @@ def execute_graph_block(
    path="/graphs/{graph_id}/execute/{graph_version}",
    tags=["graphs"],
 )
-def execute_graph(
+async def execute_graph(
    graph_id: str,
    graph_version: int,
    node_input: Annotated[dict[str, Any], Body(..., embed=True, default_factory=dict)],
    api_key: APIKey = Depends(require_permission(APIKeyPermission.EXECUTE_GRAPH)),
 ) -> dict[str, Any]:
    try:
-        graph_exec = execution_manager_client().add_execution(
-            graph_id,
-            graph_version=graph_version,
-            data=node_input,
+        graph_exec = await internal_api_routes.execute_graph(
+            graph_id=graph_id,
+            node_input=node_input,
            user_id=api_key.user_id,
+            graph_version=graph_version,
        )
        return {"id": graph_exec.graph_exec_id}
    except Exception as e:
--- a/autogpt_platform/backend/backend/server/integrations/router.py
+++ b/autogpt_platform/backend/backend/server/integrations/router.py
@@ -1,3 +1,4 @@
+import asyncio
 import logging
 from typing import TYPE_CHECKING, Annotated, Literal

@@ -14,13 +15,12 @@ from backend.data.integrations import (
    wait_for_webhook_event,
 )
 from backend.data.model import Credentials, CredentialsType, OAuth2Credentials
-from backend.executor.manager import ExecutionManager
 from backend.integrations.creds_manager import IntegrationCredentialsManager
 from backend.integrations.oauth import HANDLERS_BY_NAME
 from backend.integrations.providers import ProviderName
 from backend.integrations.webhooks import get_webhook_manager
+from backend.server.routers import v1 as internal_api_routes
 from backend.util.exceptions import NeedConfirmation, NotFoundError
-from backend.util.service import get_service_client
 from backend.util.settings import Settings

 if TYPE_CHECKING:
@@ -309,19 +309,22 @@ async def webhook_ingress_generic(
    if not webhook.attached_nodes:
        return

-    executor = get_service_client(ExecutionManager)
+    executions = []
    for node in webhook.attached_nodes:
        logger.debug(f"Webhook-attached node: {node}")
        if not node.is_triggered_by_event_type(event_type):
            logger.debug(f"Node #{node.id} doesn't trigger on event {event_type}")
            continue
        logger.debug(f"Executing graph #{node.graph_id} node #{node.id}")
-        executor.add_execution(
-            graph_id=node.graph_id,
-            graph_version=node.graph_version,
-            data={f"webhook_{webhook_id}_payload": payload},
-            user_id=webhook.user_id,
+        executions.append(
+            internal_api_routes.execute_graph(
+                graph_id=node.graph_id,
+                graph_version=node.graph_version,
+                node_input={f"webhook_{webhook_id}_payload": payload},
+                user_id=webhook.user_id,
+            )
        )
+    asyncio.gather(*executions)


@router.post("/webhooks/{webhook_id}/ping")
--- a/autogpt_platform/backend/backend/server/rest_api.py
+++ b/autogpt_platform/backend/backend/server/rest_api.py
@@ -17,7 +17,6 @@ import backend.data.block
 import backend.data.db
 import backend.data.graph
 import backend.data.user
-import backend.server.integrations.router
 import backend.server.routers.postmark.postmark
 import backend.server.routers.v1
 import backend.server.v2.admin.store_admin_routes
@@ -156,7 +155,7 @@ class AgentServer(backend.util.service.AppProcess):
        graph_version: Optional[int] = None,
        node_input: Optional[dict[str, Any]] = None,
    ):
-        return backend.server.routers.v1.execute_graph(
+        return await backend.server.routers.v1.execute_graph(
            user_id=user_id,
            graph_id=graph_id,
            graph_version=graph_version,
@@ -275,7 +274,9 @@ class AgentServer(backend.util.service.AppProcess):
        provider: ProviderName,
        credentials: Credentials,
    ) -> Credentials:
-        return backend.server.integrations.router.create_credentials(
+        from backend.server.integrations.router import create_credentials
+
+        return create_credentials(
            user_id=user_id, provider=provider, credentials=credentials
        )

--- a/autogpt_platform/backend/backend/server/routers/v1.py
+++ b/autogpt_platform/backend/backend/server/routers/v1.py
@@ -2,7 +2,7 @@ import asyncio
 import logging
 from collections import defaultdict
 from datetime import datetime
-from typing import TYPE_CHECKING, Annotated, Any, Sequence
+from typing import TYPE_CHECKING, Annotated, Any, Coroutine, Sequence

 import pydantic
 import stripe
@@ -13,7 +13,6 @@ from fastapi import APIRouter, Body, Depends, HTTPException, Request, Response
 from starlette.status import HTTP_204_NO_CONTENT, HTTP_404_NOT_FOUND
 from typing_extensions import Optional, TypedDict

-import backend.data.block
 import backend.server.integrations.router
 import backend.server.routers.analytics
 import backend.server.v2.library.db as library_db
@@ -31,7 +30,7 @@ from backend.data.api_key import (
    suspend_api_key,
    update_api_key_permissions,
 )
-from backend.data.block import BlockInput, CompletedBlockOutput
+from backend.data.block import BlockInput, CompletedBlockOutput, get_block, get_blocks
 from backend.data.credit import (
    AutoTopUpConfig,
    RefundRequest,
@@ -41,6 +40,7 @@ from backend.data.credit import (
    get_user_credit_model,
    set_auto_top_up,
 )
+from backend.data.execution import AsyncRedisExecutionEventBus
 from backend.data.notifications import NotificationPreference, NotificationPreferenceDTO
 from backend.data.onboarding import (
    UserOnboardingUpdate,
@@ -49,13 +49,16 @@ from backend.data.onboarding import (
    onboarding_enabled,
    update_user_onboarding,
 )
+from backend.data.rabbitmq import AsyncRabbitMQ
 from backend.data.user import (
    get_or_create_user,
    get_user_notification_preference,
    update_user_email,
    update_user_notification_preference,
 )
-from backend.executor import ExecutionManager, Scheduler, scheduler
+from backend.executor import Scheduler, scheduler
+from backend.executor import utils as execution_utils
+from backend.executor.utils import create_execution_queue_config
 from backend.integrations.creds_manager import IntegrationCredentialsManager
 from backend.integrations.webhooks.graph_lifecycle_hooks import (
    on_graph_activate,
@@ -79,13 +82,23 @@ if TYPE_CHECKING:


@thread_cached
-def execution_manager_client() -> ExecutionManager:
-    return get_service_client(ExecutionManager)
+def execution_scheduler_client() -> Scheduler:
+    return get_service_client(Scheduler)


@thread_cached
-def execution_scheduler_client() -> Scheduler:
-    return get_service_client(Scheduler)
+def execution_queue_client() -> Coroutine[None, None, AsyncRabbitMQ]:
+    async def f() -> AsyncRabbitMQ:
+        client = AsyncRabbitMQ(create_execution_queue_config())
+        await client.connect()
+        return client
+
+    return f()
+
+
+@thread_cached
+def execution_event_bus() -> AsyncRedisExecutionEventBus:
+    return AsyncRedisExecutionEventBus()


 settings = Settings()
@@ -206,7 +219,7 @@ async def is_onboarding_enabled():

@v1_router.get(path="/blocks", tags=["blocks"], dependencies=[Depends(auth_middleware)])
 def get_graph_blocks() -> Sequence[dict[Any, Any]]:
-    blocks = [block() for block in backend.data.block.get_blocks().values()]
+    blocks = [block() for block in get_blocks().values()]
    costs = get_block_costs()
    return [
        {**b.to_dict(), "costs": costs.get(b.id, [])} for b in blocks if not b.disabled
@@ -219,7 +232,7 @@ def get_graph_blocks() -> Sequence[dict[Any, Any]]:
    dependencies=[Depends(auth_middleware)],
 )
 def execute_graph_block(block_id: str, data: BlockInput) -> CompletedBlockOutput:
-    obj = backend.data.block.get_block(block_id)
+    obj = get_block(block_id)
    if not obj:
        raise HTTPException(status_code=404, detail=f"Block #{block_id} not found.")

@@ -308,7 +321,7 @@ async def configure_user_auto_top_up(
    dependencies=[Depends(auth_middleware)],
 )
 async def get_user_auto_top_up(
-    user_id: Annotated[str, Depends(get_user_id)]
+    user_id: Annotated[str, Depends(get_user_id)],
 ) -> AutoTopUpConfig:
    return await get_auto_top_up(user_id)

@@ -375,7 +388,7 @@ async def get_credit_history(

@v1_router.get(path="/credits/refunds", dependencies=[Depends(auth_middleware)])
 async def get_refund_requests(
-    user_id: Annotated[str, Depends(get_user_id)]
+    user_id: Annotated[str, Depends(get_user_id)],
 ) -> list[RefundRequest]:
    return await _user_credit_model.get_refund_requests(user_id)

@@ -391,7 +404,7 @@ class DeleteGraphResponse(TypedDict):

@v1_router.get(path="/graphs", tags=["graphs"], dependencies=[Depends(auth_middleware)])
 async def get_graphs(
-    user_id: Annotated[str, Depends(get_user_id)]
+    user_id: Annotated[str, Depends(get_user_id)],
 ) -> Sequence[graph_db.GraphModel]:
    return await graph_db.get_graphs(filter_by="active", user_id=user_id)

@@ -580,16 +593,35 @@ async def set_graph_active_version(
    tags=["graphs"],
    dependencies=[Depends(auth_middleware)],
 )
-def execute_graph(
+async def execute_graph(
    graph_id: str,
    node_input: Annotated[dict[str, Any], Body(..., default_factory=dict)],
    user_id: Annotated[str, Depends(get_user_id)],
    graph_version: Optional[int] = None,
+    preset_id: Optional[str] = None,
 ) -> ExecuteGraphResponse:
-    graph_exec = execution_manager_client().add_execution(
-        graph_id, node_input, user_id=user_id, graph_version=graph_version
+    graph: graph_db.GraphModel | None = await graph_db.get_graph(
+        graph_id=graph_id, user_id=user_id, version=graph_version
    )
-    return ExecuteGraphResponse(graph_exec_id=graph_exec.graph_exec_id)
+    if not graph:
+        raise ValueError(f"Graph #{graph_id} not found.")
+
+    graph_exec = await execution_db.create_graph_execution(
+        graph_id=graph_id,
+        graph_version=graph.version,
+        nodes_input=execution_utils.construct_node_execution_input(
+            graph, user_id, node_input
+        ),
+        user_id=user_id,
+        preset_id=preset_id,
+    )
+    execution_utils.get_execution_event_bus().publish(graph_exec)
+    execution_utils.get_execution_queue().publish_message(
+        routing_key=execution_utils.GRAPH_EXECUTION_ROUTING_KEY,
+        message=graph_exec.to_graph_execution_entry().model_dump_json(),
+        exchange=execution_utils.GRAPH_EXECUTION_EXCHANGE,
+    )
+    return ExecuteGraphResponse(graph_exec_id=graph_exec.id)


@v1_router.post(
@@ -605,9 +637,7 @@ async def stop_graph_run(
    ):
        raise HTTPException(404, detail=f"Agent execution #{graph_exec_id} not found")

-    await asyncio.to_thread(
-        lambda: execution_manager_client().cancel_execution(graph_exec_id)
-    )
+    await _cancel_execution(graph_exec_id)

    # Retrieve & return canceled graph execution in its final state
    result = await execution_db.get_graph_execution(
@@ -621,6 +651,49 @@ async def stop_graph_run(
    return result


+async def _cancel_execution(graph_exec_id: str):
+    """
+    Mechanism:
+    1. Set the cancel event
+    2. Graph executor's cancel handler thread detects the event, terminates workers,
+       reinitializes worker pool, and returns.
+    3. Update execution statuses in DB and set `error` outputs to `"TERMINATED"`.
+    """
+    queue_client = await execution_queue_client()
+    await queue_client.publish_message(
+        routing_key="",
+        message=execution_utils.CancelExecutionEvent(
+            graph_exec_id=graph_exec_id
+        ).model_dump_json(),
+        exchange=execution_utils.GRAPH_EXECUTION_CANCEL_EXCHANGE,
+    )
+
+    # Update the status of the graph & node executions
+    await execution_db.update_graph_execution_stats(
+        graph_exec_id,
+        execution_db.ExecutionStatus.TERMINATED,
+    )
+    node_execs = [
+        node_exec.model_copy(update={"status": execution_db.ExecutionStatus.TERMINATED})
+        for node_exec in await execution_db.get_node_execution_results(
+            graph_exec_id=graph_exec_id,
+            statuses=[
+                execution_db.ExecutionStatus.QUEUED,
+                execution_db.ExecutionStatus.RUNNING,
+                execution_db.ExecutionStatus.INCOMPLETE,
+            ],
+        )
+    ]
+
+    await execution_db.update_node_execution_status_batch(
+        [node_exec.node_exec_id for node_exec in node_execs],
+        execution_db.ExecutionStatus.TERMINATED,
+    )
+    await asyncio.gather(
+        *[execution_event_bus().publish(node_exec) for node_exec in node_execs]
+    )
+
+
@v1_router.get(
    path="/executions",
    tags=["graphs"],
@@ -792,7 +865,7 @@ async def create_api_key(
    dependencies=[Depends(auth_middleware)],
 )
 async def get_api_keys(
-    user_id: Annotated[str, Depends(get_user_id)]
+    user_id: Annotated[str, Depends(get_user_id)],
 ) -> list[APIKeyWithoutHash]:
    """List all API keys for the user"""
    try:
--- a/autogpt_platform/backend/backend/server/v2/library/routes/presets.py
+++ b/autogpt_platform/backend/backend/server/v2/library/routes/presets.py
@@ -2,25 +2,16 @@ import logging
 from typing import Annotated, Any

 import autogpt_libs.auth as autogpt_auth_lib
-import autogpt_libs.utils.cache
 from fastapi import APIRouter, Body, Depends, HTTPException, status

-import backend.executor
 import backend.server.v2.library.db as db
 import backend.server.v2.library.model as models
-import backend.util.service

 logger = logging.getLogger(__name__)

 router = APIRouter()


-@autogpt_libs.utils.cache.thread_cached
-def execution_manager_client() -> backend.executor.ExecutionManager:
-    """Return a cached instance of ExecutionManager client."""
-    return backend.util.service.get_service_client(backend.executor.ExecutionManager)
-
-
@router.get(
    "/presets",
    summary="List presets",
@@ -216,6 +207,8 @@ async def execute_preset(
        HTTPException: If the preset is not found or an error occurs while executing the preset.
    """
    try:
+        from backend.server.routers import v1 as internal_api_routes
+
        preset = await db.get_preset(user_id, preset_id)
        if not preset:
            raise HTTPException(
@@ -226,10 +219,10 @@ async def execute_preset(
        # Merge input overrides with preset inputs
        merged_node_input = preset.inputs | node_input

-        execution = execution_manager_client().add_execution(
+        execution = await internal_api_routes.execute_graph(
            graph_id=graph_id,
+            node_input=merged_node_input,
            graph_version=graph_version,
-            data=merged_node_input,
            user_id=user_id,
            preset_id=preset_id,
        )
--- a/autogpt_platform/backend/test/executor/test_execution_functions.py
+++ b/autogpt_platform/backend/test/executor/test_execution_functions.py
@@ -1,4 +1,4 @@
-from backend.data.execution import merge_execution_input, parse_execution_output
+from backend.executor.utils import merge_execution_input, parse_execution_output


 def test_parse_execution_output():
--- a/autogpt_platform/backend/test/test_data_creator.py
+++ b/autogpt_platform/backend/test/test_data_creator.py
@@ -10,16 +10,12 @@ from prisma.types import (
    AgentGraphCreateInput,
    AgentNodeCreateInput,
    AgentNodeLinkCreateInput,
-    AgentPresetCreateInput,
    AnalyticsDetailsCreateInput,
    AnalyticsMetricsCreateInput,
    APIKeyCreateInput,
    CreditTransactionCreateInput,
-    LibraryAgentCreateInput,
    ProfileCreateInput,
-    StoreListingCreateInput,
    StoreListingReviewCreateInput,
-    StoreListingVersionCreateInput,
    UserCreateInput,
 )