From f520b646938935e9ecc5f4427feb0188843c459c Mon Sep 17 00:00:00 2001
From: majdyz <majdy.zamil@gmail.com>
Date: Fri, 10 Apr 2026 08:21:41 +0000
Subject: [PATCH 01/24] fix(backend/orchestrator): charge per LLM iteration and
 per tool call

The OrchestratorBlock in agent mode makes multiple LLM calls in a single
node execution, but the executor was only charging the user once per run.
Tools spawned by the orchestrator also bypassed _charge_usage entirely,
producing free internal block executions.

Two fixes:

1. Per-iteration LLM charging
   - Add `Block.charge_per_llm_call` class flag (default False)
   - OrchestratorBlock sets it to True
   - After on_node_execution completes, the executor calls
     `charge_extra_iterations(node_exec, llm_call_count - 1)` which
     debits `base_cost * extra_iterations` additional credits via
     spend_credits, using the same per-model cost from BLOCK_COSTS.
   - Skipped for dry runs and failed runs.

2. Tool execution charging
   - In `_execute_single_tool_with_manager`, call the new public
     `ExecutionProcessor.charge_node_usage()` before invoking
     `on_node_execution()` for the spawned tool node.
   - Tools now incur the same credit cost as queue-driven node
     executions; the cost is also added to the orchestrator's
     `extra_cost` so it shows up in graph stats.

Tests cover the flag opt-in, the charge_extra_iterations math
(positive, zero, negative iterations, zero base cost).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../backend/backend/blocks/_base.py           |   7 +
 .../backend/backend/blocks/orchestrator.py    |  17 +++
 .../test_orchestrator_per_iteration_cost.py   | 134 ++++++++++++++++++
 .../backend/backend/executor/manager.py       |  86 +++++++++++
 4 files changed, 244 insertions(+)
 create mode 100644 autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py

diff --git a/autogpt_platform/backend/backend/blocks/_base.py b/autogpt_platform/backend/backend/blocks/_base.py
index 56986d15c4..1eeec991f8 100644
--- a/autogpt_platform/backend/backend/blocks/_base.py
+++ b/autogpt_platform/backend/backend/blocks/_base.py
@@ -420,6 +420,13 @@ class BlockWebhookConfig(BlockManualWebhookConfig):
 class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
     _optimized_description: ClassVar[str | None] = None
 
+    # Set to True for blocks (e.g. OrchestratorBlock in agent mode) that
+    # may make multiple LLM calls in a single run. The executor will then
+    # charge `block_cost * (llm_call_count - 1)` extra credits after the
+    # block completes — the first call is already covered by the upfront
+    # `_charge_usage()` call. Defaults to False (single charge per run).
+    charge_per_llm_call: ClassVar[bool] = False
+
     def __init__(
         self,
         id: str = "",
diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py
index 6bdbf9acb3..9d23ff9dcb 100644
--- a/autogpt_platform/backend/backend/blocks/orchestrator.py
+++ b/autogpt_platform/backend/backend/blocks/orchestrator.py
@@ -369,6 +369,11 @@ class OrchestratorBlock(Block):
     single-shot and iterative agent mode execution.
     """
 
+    # In agent mode the block makes one LLM call per iteration. The executor
+    # uses this flag to charge `cost * (llm_call_count - 1)` extra credits
+    # after the block completes; the first call is covered by _charge_usage().
+    charge_per_llm_call = True
+
     # MCP server name used by the Claude Code SDK execution mode.  Keep in sync
     # with _create_graph_mcp_server and the MCP_PREFIX derivation in _execute_tools_sdk_mode.
     _SDK_MCP_SERVER_NAME = "graph_tools"
@@ -1102,6 +1107,18 @@ class OrchestratorBlock(Block):
                 execution_processor.execution_stats_lock,
             )
 
+            # Charge user credits for the tool execution. Tools spawned by the
+            # orchestrator bypass the main execution queue (where _charge_usage
+            # is called), so we must charge here to avoid free tool execution.
+            # Skipped for dry runs and when block has no cost configured.
+            if not execution_params.execution_context.dry_run:
+                tool_cost, _ = await asyncio.to_thread(
+                    execution_processor.charge_node_usage,
+                    node_exec_entry,
+                )
+                if tool_cost > 0:
+                    self.merge_stats(NodeExecutionStats(extra_cost=tool_cost))
+
             # Create a completed future for the task tracking system
             node_exec_future = Future()
             node_exec_progress.add_task(
diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
new file mode 100644
index 0000000000..917871d3d7
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
@@ -0,0 +1,134 @@
+"""Tests for OrchestratorBlock per-iteration cost charging.
+
+The OrchestratorBlock in agent mode makes multiple LLM calls in a single
+node execution. The executor uses ``Block.charge_per_llm_call`` to detect
+this and charge ``base_cost * (llm_call_count - 1)`` extra credits after
+the block completes.
+"""
+
+from unittest.mock import MagicMock
+
+from backend.blocks.orchestrator import OrchestratorBlock
+
+
+class TestChargePerLlmCallFlag:
+    """OrchestratorBlock opts into per-LLM-call billing."""
+
+    def test_orchestrator_opts_in(self):
+        assert OrchestratorBlock.charge_per_llm_call is True
+
+    def test_default_block_does_not_opt_in(self):
+        from backend.blocks._base import Block
+
+        assert Block.charge_per_llm_call is False
+
+
+class TestChargeExtraIterations:
+    """The executor charges ``cost * (llm_call_count - 1)`` extra credits."""
+
+    def _make_processor_with_block_cost(self, base_cost: int):
+        """Build a minimal ExecutionProcessor stub with a stubbed block lookup."""
+        from backend.executor import manager
+
+        proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+
+        # Stub the spend_credits client and block_usage_cost helper.
+        spent: list[int] = []
+
+        class FakeDb:
+            def spend_credits(self, *, user_id, cost, metadata):
+                spent.append(cost)
+                return 0
+
+        # Patch get_db_client and get_block + block_usage_cost on the manager
+        # module so charge_extra_iterations sees deterministic values.
+        original_get_db = manager.get_db_client
+        original_get_block = manager.get_block
+        original_block_usage_cost = manager.block_usage_cost
+
+        def restore():
+            manager.get_db_client = original_get_db
+            manager.get_block = original_get_block
+            manager.block_usage_cost = original_block_usage_cost
+
+        manager.get_db_client = lambda: FakeDb()
+        manager.get_block = lambda block_id: MagicMock(name="block")
+        manager.block_usage_cost = lambda block, input_data: (
+            base_cost,
+            {"model": "claude-sonnet-4-6"},
+        )
+
+        return proc, spent, restore
+
+    def test_zero_extra_iterations_charges_nothing(self):
+        proc, spent, restore = self._make_processor_with_block_cost(base_cost=10)
+        try:
+            node_exec = MagicMock()
+            node_exec.user_id = "u"
+            node_exec.graph_exec_id = "g"
+            node_exec.graph_id = "g"
+            node_exec.node_exec_id = "ne"
+            node_exec.node_id = "n"
+            node_exec.block_id = "b"
+            node_exec.inputs = {}
+
+            charged = proc.charge_extra_iterations(node_exec, extra_iterations=0)
+            assert charged == 0
+            assert spent == []
+        finally:
+            restore()
+
+    def test_extra_iterations_multiplies_base_cost(self):
+        proc, spent, restore = self._make_processor_with_block_cost(base_cost=10)
+        try:
+            node_exec = MagicMock()
+            node_exec.user_id = "u"
+            node_exec.graph_exec_id = "g"
+            node_exec.graph_id = "g"
+            node_exec.node_exec_id = "ne"
+            node_exec.node_id = "n"
+            node_exec.block_id = "b"
+            node_exec.inputs = {}
+
+            charged = proc.charge_extra_iterations(node_exec, extra_iterations=4)
+            # 4 extra iterations × 10 base_cost = 40
+            assert charged == 40
+            assert spent == [40]
+        finally:
+            restore()
+
+    def test_zero_base_cost_skips_charge(self):
+        proc, spent, restore = self._make_processor_with_block_cost(base_cost=0)
+        try:
+            node_exec = MagicMock()
+            node_exec.user_id = "u"
+            node_exec.graph_exec_id = "g"
+            node_exec.graph_id = "g"
+            node_exec.node_exec_id = "ne"
+            node_exec.node_id = "n"
+            node_exec.block_id = "b"
+            node_exec.inputs = {}
+
+            charged = proc.charge_extra_iterations(node_exec, extra_iterations=4)
+            assert charged == 0
+            assert spent == []
+        finally:
+            restore()
+
+    def test_negative_extra_iterations_charges_nothing(self):
+        proc, spent, restore = self._make_processor_with_block_cost(base_cost=10)
+        try:
+            node_exec = MagicMock()
+            node_exec.user_id = "u"
+            node_exec.graph_exec_id = "g"
+            node_exec.graph_id = "g"
+            node_exec.node_exec_id = "ne"
+            node_exec.node_id = "n"
+            node_exec.block_id = "b"
+            node_exec.inputs = {}
+
+            charged = proc.charge_extra_iterations(node_exec, extra_iterations=-1)
+            assert charged == 0
+            assert spent == []
+        finally:
+            restore()
diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py
index 3a51682b51..e7631d0991 100644
--- a/autogpt_platform/backend/backend/executor/manager.py
+++ b/autogpt_platform/backend/backend/executor/manager.py
@@ -679,6 +679,30 @@ class ExecutionProcessor:
         execution_stats.walltime = timing_info.wall_time
         execution_stats.cputime = timing_info.cpu_time
 
+        # Charge extra iterations for blocks that opt into per-LLM-call
+        # billing (e.g. OrchestratorBlock in agent mode). The first call
+        # is already covered by _charge_usage(); each additional LLM call
+        # costs another base_cost. Skipped for dry runs and failed runs.
+        if (
+            status == ExecutionStatus.COMPLETED
+            and node.block.charge_per_llm_call
+            and execution_stats.llm_call_count > 1
+            and not node_exec.execution_context.dry_run
+        ):
+            extra_iterations = execution_stats.llm_call_count - 1
+            try:
+                extra_cost = await asyncio.to_thread(
+                    self.charge_extra_iterations,
+                    node_exec,
+                    extra_iterations,
+                )
+                if extra_cost > 0:
+                    execution_stats.extra_cost += extra_cost
+            except Exception as e:
+                log_metadata.warning(
+                    f"Failed to charge extra iterations for " f"{node.block.name}: {e}"
+                )
+
         graph_stats, graph_stats_lock = graph_stats_pair
         with graph_stats_lock:
             graph_stats.node_count += 1 + execution_stats.extra_steps
@@ -994,6 +1018,68 @@ class ExecutionProcessor:
 
         return total_cost, remaining_balance
 
+    def charge_node_usage(
+        self,
+        node_exec: NodeExecutionEntry,
+    ) -> tuple[int, int]:
+        """Charge a single node execution to the user.
+
+        Public wrapper around :meth:`_charge_usage` for blocks (e.g. the
+        OrchestratorBlock) that spawn nested node executions outside the
+        main queue and therefore need to charge them explicitly.
+        """
+        return self._charge_usage(
+            node_exec=node_exec,
+            execution_count=increment_execution_count(node_exec.user_id),
+        )
+
+    def charge_extra_iterations(
+        self,
+        node_exec: NodeExecutionEntry,
+        extra_iterations: int,
+    ) -> int:
+        """Charge a block extra iterations beyond the initial run.
+
+        Used by agent-mode blocks (e.g. OrchestratorBlock) that make
+        multiple LLM calls within a single node execution. The first
+        iteration is already charged by :meth:`_charge_usage`; this
+        method charges *extra_iterations* additional copies of the
+        block's base cost.
+        """
+        if extra_iterations <= 0:
+            return 0
+        db_client = get_db_client()
+        block = get_block(node_exec.block_id)
+        if not block:
+            return 0
+        cost, matching_filter = block_usage_cost(
+            block=block, input_data=node_exec.inputs
+        )
+        if cost <= 0:
+            return 0
+        total_extra_cost = cost * extra_iterations
+        db_client.spend_credits(
+            user_id=node_exec.user_id,
+            cost=total_extra_cost,
+            metadata=UsageTransactionMetadata(
+                graph_exec_id=node_exec.graph_exec_id,
+                graph_id=node_exec.graph_id,
+                node_exec_id=node_exec.node_exec_id,
+                node_id=node_exec.node_id,
+                block_id=node_exec.block_id,
+                block=block.name,
+                input={
+                    **matching_filter,
+                    "extra_iterations": extra_iterations,
+                },
+                reason=(
+                    f"Extra agent-mode iterations for {block.name} "
+                    f"({extra_iterations} additional LLM calls)"
+                ),
+            ),
+        )
+        return total_extra_cost
+
     @time_measured
     def _on_graph_execution(
         self,

From 17a9ff12785d5321db8eeaf86a333560ef920a87 Mon Sep 17 00:00:00 2001
From: majdyz <majdy.zamil@gmail.com>
Date: Fri, 10 Apr 2026 08:57:40 +0000
Subject: [PATCH 02/24] fix(orchestrator): address review feedback on
 per-iteration cost charging
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses critical issues from autogpt-pr-reviewer + coderabbit review:

Billing safety:
- Surface InsufficientBalanceError as ERROR (not warning) so monitoring
  picks up billing leaks; other charge failures still log a warning.
- Cap extra_iterations at MAX_EXTRA_ITERATIONS=200 to prevent a corrupted
  llm_call_count from draining a user's balance.
- Tools now charged AFTER successful execution, not before — failed
  tools no longer cost credits, matching the rest of the platform.
- charge_node_usage uses execution_count=0 so nested tool calls don't
  inflate the per-execution counter / push users into higher cost tiers.
- charge_extra_iterations now returns (cost, remaining_balance) and the
  caller invokes _handle_low_balance to send low-balance notifications.

Error handling consistency:
- _execute_single_tool_with_manager re-raises InsufficientBalanceError
  instead of swallowing it into a tool-error response. This prevents
  leaking the user's exact balance to the LLM context and lets the
  outer error handling stop the run cleanly, mirroring the main queue.

Test fixes:
- test_orchestrator_per_iteration_cost.py: rewritten with pytest
  monkeypatch fixtures (no more manual save/restore), proper FakeBlock
  with .name attribute set correctly, plus new tests for the cap,
  block-not-found, InsufficientBalanceError propagation, and
  charge_node_usage delegation.
- test_orchestrator.py / test_orchestrator_responses_api.py /
  test_orchestrator_dynamic_fields.py: mock charge_node_usage on the
  execution processor stub so existing agent-mode tests still pass
  with the new charging call.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../backend/backend/blocks/orchestrator.py    |  50 ++--
 .../backend/blocks/test/test_orchestrator.py  |   4 +
 .../test/test_orchestrator_dynamic_fields.py  |   4 +
 .../test_orchestrator_per_iteration_cost.py   | 279 ++++++++++++------
 .../test/test_orchestrator_responses_api.py   |   2 +
 .../backend/backend/executor/manager.py       |  56 +++-
 6 files changed, 277 insertions(+), 118 deletions(-)

diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py
index 9d23ff9dcb..55facaa081 100644
--- a/autogpt_platform/backend/backend/blocks/orchestrator.py
+++ b/autogpt_platform/backend/backend/blocks/orchestrator.py
@@ -36,6 +36,7 @@ from backend.data.execution import ExecutionContext
 from backend.data.model import NodeExecutionStats, SchemaField
 from backend.util import json
 from backend.util.clients import get_database_manager_async_client
+from backend.util.exceptions import InsufficientBalanceError
 from backend.util.prompt import MAIN_OBJECTIVE_PREFIX
 from backend.util.security import SENSITIVE_FIELD_NAMES
 from backend.util.tool_call_loop import (
@@ -1107,18 +1108,6 @@ class OrchestratorBlock(Block):
                 execution_processor.execution_stats_lock,
             )
 
-            # Charge user credits for the tool execution. Tools spawned by the
-            # orchestrator bypass the main execution queue (where _charge_usage
-            # is called), so we must charge here to avoid free tool execution.
-            # Skipped for dry runs and when block has no cost configured.
-            if not execution_params.execution_context.dry_run:
-                tool_cost, _ = await asyncio.to_thread(
-                    execution_processor.charge_node_usage,
-                    node_exec_entry,
-                )
-                if tool_cost > 0:
-                    self.merge_stats(NodeExecutionStats(extra_cost=tool_cost))
-
             # Create a completed future for the task tracking system
             node_exec_future = Future()
             node_exec_progress.add_task(
@@ -1127,14 +1116,31 @@ class OrchestratorBlock(Block):
             )
 
             # Execute the node directly since we're in the Orchestrator context
-            node_exec_future.set_result(
-                await execution_processor.on_node_execution(
-                    node_exec=node_exec_entry,
-                    node_exec_progress=node_exec_progress,
-                    nodes_input_masks=None,
-                    graph_stats_pair=graph_stats_pair,
-                )
+            tool_node_stats = await execution_processor.on_node_execution(
+                node_exec=node_exec_entry,
+                node_exec_progress=node_exec_progress,
+                nodes_input_masks=None,
+                graph_stats_pair=graph_stats_pair,
             )
+            node_exec_future.set_result(tool_node_stats)
+
+            # Charge user credits AFTER successful tool execution. Tools
+            # spawned by the orchestrator bypass the main execution queue
+            # (where _charge_usage is called), so we must charge here to
+            # avoid free tool execution. Charging post-completion (vs.
+            # pre-execution) avoids billing users for failed tool calls.
+            # Skipped for dry runs.
+            if (
+                not execution_params.execution_context.dry_run
+                and tool_node_stats is not None
+                and not isinstance(tool_node_stats.error, Exception)
+            ):
+                tool_cost, _ = await asyncio.to_thread(
+                    execution_processor.charge_node_usage,
+                    node_exec_entry,
+                )
+                if tool_cost > 0:
+                    self.merge_stats(NodeExecutionStats(extra_cost=tool_cost))
 
             # Get outputs from database after execution completes using database manager client
             node_outputs = await db_client.get_execution_outputs_by_node_exec_id(
@@ -1151,6 +1157,12 @@ class OrchestratorBlock(Block):
                 tool_call.id, tool_response_content, responses_api=responses_api
             )
 
+        except InsufficientBalanceError:
+            # Don't downgrade billing failures into tool errors — let the
+            # orchestrator's outer error handling stop the run cleanly,
+            # mirroring the behaviour of the main execution queue. Also
+            # prevents leaking exact balance amounts to the LLM context.
+            raise
         except Exception as e:
             logger.warning("Tool execution with manager failed: %s", e)
             # Return error response
diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py
index 55f137428f..85d1b78fc7 100644
--- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py
@@ -922,6 +922,10 @@ async def test_orchestrator_agent_mode():
         mock_execution_processor.on_node_execution = AsyncMock(
             return_value=mock_node_stats
         )
+        # Mock charge_node_usage (called after successful tool execution).
+        # Returns (cost, remaining_balance). Synchronous because it's called
+        # via asyncio.to_thread.
+        mock_execution_processor.charge_node_usage = MagicMock(return_value=(0, 0))
 
         # Mock the get_execution_outputs_by_node_exec_id method
         mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {
diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_dynamic_fields.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_dynamic_fields.py
index ac4fa0710b..e5a7300732 100644
--- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_dynamic_fields.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_dynamic_fields.py
@@ -638,6 +638,10 @@ async def test_validation_errors_dont_pollute_conversation():
                 mock_execution_processor.on_node_execution.return_value = (
                     mock_node_stats
                 )
+                # Mock charge_node_usage (called after successful tool execution).
+                mock_execution_processor.charge_node_usage = MagicMock(
+                    return_value=(0, 0)
+                )
 
                 async for output_name, output_value in block.run(
                     input_data,
diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
index 917871d3d7..6654282c58 100644
--- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
@@ -8,9 +8,14 @@ the block completes.
 
 from unittest.mock import MagicMock
 
+import pytest
+
 from backend.blocks.orchestrator import OrchestratorBlock
 
 
+# ── Class flag ──────────────────────────────────────────────────────
+
+
 class TestChargePerLlmCallFlag:
     """OrchestratorBlock opts into per-LLM-call billing."""
 
@@ -23,16 +28,117 @@ class TestChargePerLlmCallFlag:
         assert Block.charge_per_llm_call is False
 
 
-class TestChargeExtraIterations:
-    """The executor charges ``cost * (llm_call_count - 1)`` extra credits."""
+# ── charge_extra_iterations math ───────────────────────────────────
 
-    def _make_processor_with_block_cost(self, base_cost: int):
-        """Build a minimal ExecutionProcessor stub with a stubbed block lookup."""
+
+@pytest.fixture()
+def fake_node_exec():
+    node_exec = MagicMock()
+    node_exec.user_id = "u"
+    node_exec.graph_exec_id = "g"
+    node_exec.graph_id = "g"
+    node_exec.node_exec_id = "ne"
+    node_exec.node_id = "n"
+    node_exec.block_id = "b"
+    node_exec.inputs = {}
+    return node_exec
+
+
+@pytest.fixture()
+def patched_processor(monkeypatch):
+    """ExecutionProcessor with stubbed db client / block lookup helpers.
+
+    Returns the processor and a list of credit amounts spent so tests can
+    assert on what was charged.
+    """
+    from backend.executor import manager
+
+    spent: list[int] = []
+
+    class FakeDb:
+        def spend_credits(self, *, user_id, cost, metadata):
+            spent.append(cost)
+            return 1000  # remaining balance
+
+    fake_block = MagicMock()
+    fake_block.name = "FakeBlock"
+
+    monkeypatch.setattr(manager, "get_db_client", lambda: FakeDb())
+    monkeypatch.setattr(manager, "get_block", lambda block_id: fake_block)
+    monkeypatch.setattr(
+        manager,
+        "block_usage_cost",
+        lambda block, input_data, **_kw: (10, {"model": "claude-sonnet-4-6"}),
+    )
+
+    proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+    return proc, spent
+
+
+class TestChargeExtraIterations:
+    def test_zero_extra_iterations_charges_nothing(
+        self, patched_processor, fake_node_exec
+    ):
+        proc, spent = patched_processor
+        cost, balance = proc.charge_extra_iterations(fake_node_exec, extra_iterations=0)
+        assert cost == 0
+        assert balance == 0
+        assert spent == []
+
+    def test_extra_iterations_multiplies_base_cost(
+        self, patched_processor, fake_node_exec
+    ):
+        proc, spent = patched_processor
+        cost, balance = proc.charge_extra_iterations(fake_node_exec, extra_iterations=4)
+        assert cost == 40  # 4 × 10
+        assert balance == 1000
+        assert spent == [40]
+
+    def test_negative_extra_iterations_charges_nothing(
+        self, patched_processor, fake_node_exec
+    ):
+        proc, spent = patched_processor
+        cost, balance = proc.charge_extra_iterations(
+            fake_node_exec, extra_iterations=-1
+        )
+        assert cost == 0
+        assert balance == 0
+        assert spent == []
+
+    def test_capped_at_max(self, monkeypatch, fake_node_exec):
+        """Runaway llm_call_count is capped at _MAX_EXTRA_ITERATIONS."""
         from backend.executor import manager
 
-        proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+        spent: list[int] = []
+
+        class FakeDb:
+            def spend_credits(self, *, user_id, cost, metadata):
+                spent.append(cost)
+                return 1000
+
+        fake_block = MagicMock()
+        fake_block.name = "FakeBlock"
+
+        monkeypatch.setattr(manager, "get_db_client", lambda: FakeDb())
+        monkeypatch.setattr(manager, "get_block", lambda block_id: fake_block)
+        monkeypatch.setattr(
+            manager,
+            "block_usage_cost",
+            lambda block, input_data, **_kw: (10, {}),
+        )
+
+        proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+        cap = manager.ExecutionProcessor._MAX_EXTRA_ITERATIONS
+        cost, _ = proc.charge_extra_iterations(
+            fake_node_exec, extra_iterations=cap * 100
+        )
+        # Charged at most cap × 10
+        assert cost == cap * 10
+        assert spent == [cap * 10]
+
+    def test_zero_base_cost_skips_charge(self, monkeypatch, fake_node_exec):
+        from backend.executor import manager
 
-        # Stub the spend_credits client and block_usage_cost helper.
         spent: list[int] = []
 
         class FakeDb:
@@ -40,95 +146,94 @@ class TestChargeExtraIterations:
                 spent.append(cost)
                 return 0
 
-        # Patch get_db_client and get_block + block_usage_cost on the manager
-        # module so charge_extra_iterations sees deterministic values.
-        original_get_db = manager.get_db_client
-        original_get_block = manager.get_block
-        original_block_usage_cost = manager.block_usage_cost
+        fake_block = MagicMock()
+        fake_block.name = "FakeBlock"
 
-        def restore():
-            manager.get_db_client = original_get_db
-            manager.get_block = original_get_block
-            manager.block_usage_cost = original_block_usage_cost
-
-        manager.get_db_client = lambda: FakeDb()
-        manager.get_block = lambda block_id: MagicMock(name="block")
-        manager.block_usage_cost = lambda block, input_data: (
-            base_cost,
-            {"model": "claude-sonnet-4-6"},
+        monkeypatch.setattr(manager, "get_db_client", lambda: FakeDb())
+        monkeypatch.setattr(manager, "get_block", lambda block_id: fake_block)
+        monkeypatch.setattr(
+            manager, "block_usage_cost", lambda block, input_data, **_kw: (0, {})
         )
 
-        return proc, spent, restore
+        proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+        cost, balance = proc.charge_extra_iterations(fake_node_exec, extra_iterations=4)
+        assert cost == 0
+        assert balance == 0
+        assert spent == []
 
-    def test_zero_extra_iterations_charges_nothing(self):
-        proc, spent, restore = self._make_processor_with_block_cost(base_cost=10)
-        try:
-            node_exec = MagicMock()
-            node_exec.user_id = "u"
-            node_exec.graph_exec_id = "g"
-            node_exec.graph_id = "g"
-            node_exec.node_exec_id = "ne"
-            node_exec.node_id = "n"
-            node_exec.block_id = "b"
-            node_exec.inputs = {}
+    def test_block_not_found_skips_charge(self, monkeypatch, fake_node_exec):
+        from backend.executor import manager
 
-            charged = proc.charge_extra_iterations(node_exec, extra_iterations=0)
-            assert charged == 0
-            assert spent == []
-        finally:
-            restore()
+        spent: list[int] = []
 
-    def test_extra_iterations_multiplies_base_cost(self):
-        proc, spent, restore = self._make_processor_with_block_cost(base_cost=10)
-        try:
-            node_exec = MagicMock()
-            node_exec.user_id = "u"
-            node_exec.graph_exec_id = "g"
-            node_exec.graph_id = "g"
-            node_exec.node_exec_id = "ne"
-            node_exec.node_id = "n"
-            node_exec.block_id = "b"
-            node_exec.inputs = {}
+        class FakeDb:
+            def spend_credits(self, *, user_id, cost, metadata):
+                spent.append(cost)
+                return 0
 
-            charged = proc.charge_extra_iterations(node_exec, extra_iterations=4)
-            # 4 extra iterations × 10 base_cost = 40
-            assert charged == 40
-            assert spent == [40]
-        finally:
-            restore()
+        monkeypatch.setattr(manager, "get_db_client", lambda: FakeDb())
+        monkeypatch.setattr(manager, "get_block", lambda block_id: None)
+        monkeypatch.setattr(
+            manager, "block_usage_cost", lambda block, input_data, **_kw: (10, {})
+        )
 
-    def test_zero_base_cost_skips_charge(self):
-        proc, spent, restore = self._make_processor_with_block_cost(base_cost=0)
-        try:
-            node_exec = MagicMock()
-            node_exec.user_id = "u"
-            node_exec.graph_exec_id = "g"
-            node_exec.graph_id = "g"
-            node_exec.node_exec_id = "ne"
-            node_exec.node_id = "n"
-            node_exec.block_id = "b"
-            node_exec.inputs = {}
+        proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+        cost, balance = proc.charge_extra_iterations(fake_node_exec, extra_iterations=3)
+        assert cost == 0
+        assert balance == 0
+        assert spent == []
 
-            charged = proc.charge_extra_iterations(node_exec, extra_iterations=4)
-            assert charged == 0
-            assert spent == []
-        finally:
-            restore()
+    def test_propagates_insufficient_balance_error(self, monkeypatch, fake_node_exec):
+        """Out-of-credits errors must propagate, not be silently swallowed."""
+        from backend.executor import manager
+        from backend.util.exceptions import InsufficientBalanceError
 
-    def test_negative_extra_iterations_charges_nothing(self):
-        proc, spent, restore = self._make_processor_with_block_cost(base_cost=10)
-        try:
-            node_exec = MagicMock()
-            node_exec.user_id = "u"
-            node_exec.graph_exec_id = "g"
-            node_exec.graph_id = "g"
-            node_exec.node_exec_id = "ne"
-            node_exec.node_id = "n"
-            node_exec.block_id = "b"
-            node_exec.inputs = {}
+        class FakeDb:
+            def spend_credits(self, *, user_id, cost, metadata):
+                raise InsufficientBalanceError(
+                    user_id=user_id,
+                    message="Insufficient balance",
+                    balance=0,
+                    amount=cost,
+                )
 
-            charged = proc.charge_extra_iterations(node_exec, extra_iterations=-1)
-            assert charged == 0
-            assert spent == []
-        finally:
-            restore()
+        fake_block = MagicMock()
+        fake_block.name = "FakeBlock"
+
+        monkeypatch.setattr(manager, "get_db_client", lambda: FakeDb())
+        monkeypatch.setattr(manager, "get_block", lambda block_id: fake_block)
+        monkeypatch.setattr(
+            manager, "block_usage_cost", lambda block, input_data, **_kw: (10, {})
+        )
+
+        proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+        with pytest.raises(InsufficientBalanceError):
+            proc.charge_extra_iterations(fake_node_exec, extra_iterations=4)
+
+
+# ── charge_node_usage ──────────────────────────────────────────────
+
+
+class TestChargeNodeUsage:
+    """charge_node_usage delegates to _charge_usage with execution_count=0."""
+
+    def test_delegates_with_zero_execution_count(self, monkeypatch, fake_node_exec):
+        """Nested tool charges should NOT inflate the per-execution counter."""
+        from backend.executor import manager
+
+        captured: dict = {}
+
+        def fake_charge_usage(self, node_exec, execution_count):
+            captured["execution_count"] = execution_count
+            captured["node_exec"] = node_exec
+            return (5, 100)
+
+        monkeypatch.setattr(
+            manager.ExecutionProcessor, "_charge_usage", fake_charge_usage
+        )
+
+        proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+        cost, balance = proc.charge_node_usage(fake_node_exec)
+        assert cost == 5
+        assert balance == 100
+        assert captured["execution_count"] == 0
diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_responses_api.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_responses_api.py
index f9ec7676ba..649716a491 100644
--- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_responses_api.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_responses_api.py
@@ -956,6 +956,8 @@ async def test_agent_mode_conversation_valid_for_responses_api():
     ep.execution_stats_lock = threading.Lock()
     ns = MagicMock(error=None)
     ep.on_node_execution = AsyncMock(return_value=ns)
+    # Mock charge_node_usage (called after successful tool execution).
+    ep.charge_node_usage = MagicMock(return_value=(0, 0))
 
     with patch("backend.blocks.llm.llm_call", llm_mock), patch.object(
         block, "_create_tool_node_signatures", return_value=tool_sigs
diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py
index e7631d0991..0454f0c5f3 100644
--- a/autogpt_platform/backend/backend/executor/manager.py
+++ b/autogpt_platform/backend/backend/executor/manager.py
@@ -683,6 +683,11 @@ class ExecutionProcessor:
         # billing (e.g. OrchestratorBlock in agent mode). The first call
         # is already covered by _charge_usage(); each additional LLM call
         # costs another base_cost. Skipped for dry runs and failed runs.
+        #
+        # InsufficientBalanceError is logged at ERROR level (this is a
+        # billing leak — the work is already done, but the user can't pay)
+        # and re-surfaced via execution_stats.error so monitoring can pick
+        # it up. Other exceptions are warnings.
         if (
             status == ExecutionStatus.COMPLETED
             and node.block.charge_per_llm_call
@@ -691,16 +696,27 @@ class ExecutionProcessor:
         ):
             extra_iterations = execution_stats.llm_call_count - 1
             try:
-                extra_cost = await asyncio.to_thread(
+                extra_cost, remaining_balance = await asyncio.to_thread(
                     self.charge_extra_iterations,
                     node_exec,
                     extra_iterations,
                 )
                 if extra_cost > 0:
                     execution_stats.extra_cost += extra_cost
+                    self._handle_low_balance(
+                        db_client=get_db_client(),
+                        user_id=node_exec.user_id,
+                        current_balance=remaining_balance,
+                        transaction_cost=extra_cost,
+                    )
+            except InsufficientBalanceError as e:
+                log_metadata.error(
+                    f"Billing leak: insufficient balance after {node.block.name} "
+                    f"completed {extra_iterations} extra iterations: {e}"
+                )
             except Exception as e:
                 log_metadata.warning(
-                    f"Failed to charge extra iterations for " f"{node.block.name}: {e}"
+                    f"Failed to charge extra iterations for {node.block.name}: {e}"
                 )
 
         graph_stats, graph_stats_lock = graph_stats_pair
@@ -1018,6 +1034,12 @@ class ExecutionProcessor:
 
         return total_cost, remaining_balance
 
+    # Hard cap on the multiplier passed to charge_extra_iterations to
+    # protect against a corrupted llm_call_count draining a user's balance.
+    # Real agent-mode runs are bounded by agent_mode_max_iterations (~50);
+    # 200 leaves headroom while preventing runaway charges.
+    _MAX_EXTRA_ITERATIONS = 200
+
     def charge_node_usage(
         self,
         node_exec: NodeExecutionEntry,
@@ -1027,17 +1049,22 @@ class ExecutionProcessor:
         Public wrapper around :meth:`_charge_usage` for blocks (e.g. the
         OrchestratorBlock) that spawn nested node executions outside the
         main queue and therefore need to charge them explicitly.
+
+        Note: this **does not** increment the global execution counter
+        (``increment_execution_count``). Nested tool executions are
+        sub-steps of a single block run from the user's perspective and
+        should not push them into higher per-execution cost tiers.
         """
         return self._charge_usage(
             node_exec=node_exec,
-            execution_count=increment_execution_count(node_exec.user_id),
+            execution_count=0,
         )
 
     def charge_extra_iterations(
         self,
         node_exec: NodeExecutionEntry,
         extra_iterations: int,
-    ) -> int:
+    ) -> tuple[int, int]:
         """Charge a block extra iterations beyond the initial run.
 
         Used by agent-mode blocks (e.g. OrchestratorBlock) that make
@@ -1045,20 +1072,25 @@ class ExecutionProcessor:
         iteration is already charged by :meth:`_charge_usage`; this
         method charges *extra_iterations* additional copies of the
         block's base cost.
+
+        Returns ``(total_extra_cost, remaining_balance)``. May raise
+        ``InsufficientBalanceError`` if the user can't afford the charge.
         """
         if extra_iterations <= 0:
-            return 0
+            return 0, 0
+        # Cap to protect against a corrupted llm_call_count.
+        capped_iterations = min(extra_iterations, self._MAX_EXTRA_ITERATIONS)
         db_client = get_db_client()
         block = get_block(node_exec.block_id)
         if not block:
-            return 0
+            return 0, 0
         cost, matching_filter = block_usage_cost(
             block=block, input_data=node_exec.inputs
         )
         if cost <= 0:
-            return 0
-        total_extra_cost = cost * extra_iterations
-        db_client.spend_credits(
+            return 0, 0
+        total_extra_cost = cost * capped_iterations
+        remaining_balance = db_client.spend_credits(
             user_id=node_exec.user_id,
             cost=total_extra_cost,
             metadata=UsageTransactionMetadata(
@@ -1070,15 +1102,15 @@ class ExecutionProcessor:
                 block=block.name,
                 input={
                     **matching_filter,
-                    "extra_iterations": extra_iterations,
+                    "extra_iterations": capped_iterations,
                 },
                 reason=(
                     f"Extra agent-mode iterations for {block.name} "
-                    f"({extra_iterations} additional LLM calls)"
+                    f"({capped_iterations} additional LLM calls)"
                 ),
             ),
         )
-        return total_extra_cost
+        return total_extra_cost, remaining_balance
 
     @time_measured
     def _on_graph_execution(

From 45d67cfaccd94bbb92a45521a80cb7d40d16ada4 Mon Sep 17 00:00:00 2001
From: majdyz <zamil.majdy@agpt.co>
Date: Fri, 10 Apr 2026 09:56:39 +0000
Subject: [PATCH 03/24] style: fix isort import grouping in
 test_orchestrator_per_iteration_cost

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../backend/blocks/test/test_orchestrator_per_iteration_cost.py  | 1 -
 1 file changed, 1 deletion(-)

diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
index 6654282c58..352a371fda 100644
--- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
@@ -12,7 +12,6 @@ import pytest
 
 from backend.blocks.orchestrator import OrchestratorBlock
 
-
 # ── Class flag ──────────────────────────────────────────────────────
 
 

From 215340690f0785c6049194ff0de73b9d6473e327 Mon Sep 17 00:00:00 2001
From: majdyz <zamil.majdy@agpt.co>
Date: Fri, 10 Apr 2026 09:59:00 +0000
Subject: [PATCH 04/24] docs: regenerate block docs (memory_search/memory_store
 tools)

Auto-generated by `poetry run python scripts/generate_block_docs.py`.
The OrchestratorBlock tool list gained `memory_search` / `memory_store`
on dev but the doc table wasn't regenerated.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 docs/integrations/block-integrations/misc.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/integrations/block-integrations/misc.md b/docs/integrations/block-integrations/misc.md
index d43826a959..ef7fd938db 100644
--- a/docs/integrations/block-integrations/misc.md
+++ b/docs/integrations/block-integrations/misc.md
@@ -58,7 +58,7 @@ Tool and block identifiers provided in `tools` and `blocks` are validated at run
 | system_context | Optional additional context prepended to the prompt. Use this to constrain autopilot behavior, provide domain context, or set output format requirements. | str | No |
 | session_id | Session ID to continue an existing autopilot conversation. Leave empty to start a new session. Use the session_id output from a previous run to continue. | str | No |
 | max_recursion_depth | Maximum nesting depth when the autopilot calls this block recursively (sub-agent pattern). Prevents infinite loops. | int | No |
-| tools | Tool names to filter. Works with tools_exclude to form an allow-list or deny-list. Leave empty to apply no tool filter. | List["add_understanding" \| "ask_question" \| "bash_exec" \| "browser_act" \| "browser_navigate" \| "browser_screenshot" \| "connect_integration" \| "continue_run_block" \| "create_agent" \| "create_feature_request" \| "create_folder" \| "customize_agent" \| "delete_folder" \| "delete_workspace_file" \| "edit_agent" \| "find_agent" \| "find_block" \| "find_library_agent" \| "fix_agent_graph" \| "get_agent_building_guide" \| "get_doc_page" \| "get_mcp_guide" \| "list_folders" \| "list_workspace_files" \| "move_agents_to_folder" \| "move_folder" \| "read_workspace_file" \| "run_agent" \| "run_block" \| "run_mcp_tool" \| "search_docs" \| "search_feature_requests" \| "update_folder" \| "validate_agent_graph" \| "view_agent_output" \| "web_fetch" \| "write_workspace_file" \| "Agent" \| "Edit" \| "Glob" \| "Grep" \| "Read" \| "Task" \| "TodoWrite" \| "WebSearch" \| "Write"] | No |
+| tools | Tool names to filter. Works with tools_exclude to form an allow-list or deny-list. Leave empty to apply no tool filter. | List["add_understanding" \| "ask_question" \| "bash_exec" \| "browser_act" \| "browser_navigate" \| "browser_screenshot" \| "connect_integration" \| "continue_run_block" \| "create_agent" \| "create_feature_request" \| "create_folder" \| "customize_agent" \| "delete_folder" \| "delete_workspace_file" \| "edit_agent" \| "find_agent" \| "find_block" \| "find_library_agent" \| "fix_agent_graph" \| "get_agent_building_guide" \| "get_doc_page" \| "get_mcp_guide" \| "list_folders" \| "list_workspace_files" \| "memory_search" \| "memory_store" \| "move_agents_to_folder" \| "move_folder" \| "read_workspace_file" \| "run_agent" \| "run_block" \| "run_mcp_tool" \| "search_docs" \| "search_feature_requests" \| "update_folder" \| "validate_agent_graph" \| "view_agent_output" \| "web_fetch" \| "write_workspace_file" \| "Agent" \| "Edit" \| "Glob" \| "Grep" \| "Read" \| "Task" \| "TodoWrite" \| "WebSearch" \| "Write"] | No |
 | tools_exclude | Controls how the 'tools' list is interpreted. True (default): 'tools' is a deny-list — listed tools are blocked, all others are allowed. An empty 'tools' list means allow everything. False: 'tools' is an allow-list — only listed tools are permitted. | bool | No |
 | blocks | Block identifiers to filter when the copilot uses run_block. Each entry can be: a block name (e.g. 'HTTP Request'), a full block UUID, or the first 8 hex characters of the UUID (e.g. 'c069dc6b'). Works with blocks_exclude. Leave empty to apply no block filter. | List[str] | No |
 | blocks_exclude | Controls how the 'blocks' list is interpreted. True (default): 'blocks' is a deny-list — listed blocks are blocked, all others are allowed. An empty 'blocks' list means allow everything. False: 'blocks' is an allow-list — only listed blocks are permitted. | bool | No |

From ada27256286cf77b1dafe9bdb69b7ee56f6936cd Mon Sep 17 00:00:00 2001
From: majdyz <majdyz@users.noreply.github.com>
Date: Fri, 10 Apr 2026 11:53:46 +0000
Subject: [PATCH 05/24] fix(orchestrator): propagate billing errors and close
 leak windows

Round 3 review fixes on top of the per-iteration cost charging PR:

- Propagate InsufficientBalanceError out of `_agent_mode_tool_executor`
  and the SDK MCP tool handler so billing failures stop the agent loop
  instead of being re-injected into the LLM as a tool error (which
  previously leaked balance amounts and let the loop keep consuming
  unpaid compute).
- On post-execution extra-iteration charging failure, record
  `execution_stats.error`, log with structured billing_leak fields,
  and fire `_handle_insufficient_funds_notif` so the user is actually
  notified. Comment now matches behaviour.
- Tighten tool-success gate to `tool_node_stats.error is None` so
  cancelled/terminated tool runs (BaseException subclasses such as
  CancelledError) are not billed.
- Extract shared `_resolve_block_cost` helper used by `_charge_usage`
  and `charge_extra_iterations` to DRY the block/cost lookup.
- Add integration tests for the `on_node_execution` charging gate
  covering each branch (status, flag, llm_call_count, dry_run) plus
  the InsufficientBalanceError path that asserts error recording and
  notification.
- Add tool-charging skip tests (dry_run, failed tool, cancelled tool)
  and an InsufficientBalanceError propagation test for
  `_execute_single_tool_with_manager`.
- Assert `charge_node_usage` is actually called in the existing
  `test_orchestrator_agent_mode` test and return a non-zero cost so
  the `merge_stats` branch is exercised.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../backend/backend/blocks/orchestrator.py    |  20 +-
 .../backend/blocks/test/test_orchestrator.py  |  12 +-
 .../test_orchestrator_per_iteration_cost.py   | 441 +++++++++++++++++-
 .../backend/backend/executor/manager.py       |  81 +++-
 4 files changed, 532 insertions(+), 22 deletions(-)

diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py
index 55facaa081..13591bed01 100644
--- a/autogpt_platform/backend/backend/blocks/orchestrator.py
+++ b/autogpt_platform/backend/backend/blocks/orchestrator.py
@@ -1130,10 +1130,14 @@ class OrchestratorBlock(Block):
             # avoid free tool execution. Charging post-completion (vs.
             # pre-execution) avoids billing users for failed tool calls.
             # Skipped for dry runs.
+            #
+            # `error is None` intentionally excludes both Exception and
+            # BaseException subclasses (e.g. CancelledError) so cancelled
+            # or terminated tool runs are not billed.
             if (
                 not execution_params.execution_context.dry_run
                 and tool_node_stats is not None
-                and not isinstance(tool_node_stats.error, Exception)
+                and tool_node_stats.error is None
             ):
                 tool_cost, _ = await asyncio.to_thread(
                     execution_processor.charge_node_usage,
@@ -1277,6 +1281,13 @@ class OrchestratorBlock(Block):
                 content=content,
                 is_error=tool_failed,
             )
+        except InsufficientBalanceError:
+            # Billing failures must stop the agent loop cleanly — do NOT
+            # downgrade them into a tool error that gets fed back to the
+            # LLM. Re-raise so the orchestrator's outer error handling
+            # halts the run (mirrors main execution queue behaviour) and
+            # avoids leaking exact balance amounts into LLM context.
+            raise
         except Exception as e:
             logger.error("Tool execution failed: %s", e)
             return ToolCallResult(
@@ -1481,6 +1492,13 @@ class OrchestratorBlock(Block):
                             "content": [{"type": "text", "text": text}],
                             "isError": tool_failed,
                         }
+                    except InsufficientBalanceError:
+                        # Same carve-out as _agent_mode_tool_executor:
+                        # billing failures must propagate to stop the run
+                        # rather than be fed back to the LLM as a tool
+                        # error (which would leak balance amounts and let
+                        # the loop continue consuming unbillable work).
+                        raise
                     except Exception as e:
                         logger.error("SDK tool execution failed: %s", e)
                         return {
diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py
index 85d1b78fc7..c69c107de3 100644
--- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py
@@ -924,8 +924,11 @@ async def test_orchestrator_agent_mode():
         )
         # Mock charge_node_usage (called after successful tool execution).
         # Returns (cost, remaining_balance). Synchronous because it's called
-        # via asyncio.to_thread.
-        mock_execution_processor.charge_node_usage = MagicMock(return_value=(0, 0))
+        # via asyncio.to_thread. Use a non-zero cost so the merge_stats
+        # branch is actually exercised, and assert it's called below.
+        mock_execution_processor.charge_node_usage = MagicMock(
+            return_value=(10, 990)
+        )
 
         # Mock the get_execution_outputs_by_node_exec_id method
         mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {
@@ -971,6 +974,11 @@ async def test_orchestrator_agent_mode():
         # Verify tool was executed via execution processor
         assert mock_execution_processor.on_node_execution.call_count == 1
 
+        # Verify charge_node_usage was actually called for the successful
+        # tool execution — this guards against regressions where the
+        # post-execution tool charging is accidentally removed.
+        assert mock_execution_processor.charge_node_usage.call_count == 1
+
 
 @pytest.mark.asyncio
 async def test_orchestrator_traditional_mode_default():
diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
index 352a371fda..abf5c22740 100644
--- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
@@ -6,7 +6,8 @@ this and charge ``base_cost * (llm_call_count - 1)`` extra credits after
 the block completes.
 """
 
-from unittest.mock import MagicMock
+import threading
+from unittest.mock import AsyncMock, MagicMock
 
 import pytest
 
@@ -236,3 +237,441 @@ class TestChargeNodeUsage:
         assert cost == 5
         assert balance == 100
         assert captured["execution_count"] == 0
+
+
+# ── on_node_execution charging gate ────────────────────────────────
+
+
+class _FakeNode:
+    """Minimal stand-in for a ``Node`` object with a block attribute."""
+
+    def __init__(self, charge_per_llm_call: bool, block_name: str = "FakeBlock"):
+        self.block = MagicMock()
+        self.block.charge_per_llm_call = charge_per_llm_call
+        self.block.name = block_name
+
+
+class _FakeExecContext:
+    def __init__(self, dry_run: bool = False):
+        self.dry_run = dry_run
+
+
+def _make_node_exec(dry_run: bool = False) -> MagicMock:
+    """Build a NodeExecutionEntry-like mock for on_node_execution tests."""
+    ne = MagicMock()
+    ne.user_id = "u"
+    ne.graph_id = "g"
+    ne.graph_exec_id = "ge"
+    ne.node_id = "n"
+    ne.node_exec_id = "ne"
+    ne.block_id = "b"
+    ne.inputs = {}
+    ne.execution_context = _FakeExecContext(dry_run=dry_run)
+    return ne
+
+
+@pytest.fixture()
+def gated_processor(monkeypatch):
+    """ExecutionProcessor with on_node_execution's downstream calls stubbed.
+
+    Lets tests flip the four gate conditions (status, charge_per_llm_call,
+    llm_call_count, dry_run) and observe whether charge_extra_iterations
+    was called.
+    """
+    from backend.data.execution import ExecutionStatus
+    from backend.data.model import NodeExecutionStats
+    from backend.executor import manager
+
+    calls: dict[str, list] = {
+        "charge_extra_iterations": [],
+        "handle_low_balance": [],
+        "handle_insufficient_funds_notif": [],
+    }
+
+    # Stub node lookup + DB client so the wrapper doesn't touch real infra.
+    fake_db = MagicMock()
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True))
+    monkeypatch.setattr(manager, "get_db_async_client", lambda: fake_db)
+    monkeypatch.setattr(manager, "get_db_client", lambda: fake_db)
+    # get_block is called by LogMetadata construction in on_node_execution.
+    monkeypatch.setattr(
+        manager,
+        "get_block",
+        lambda block_id: MagicMock(name="FakeBlock"),
+    )
+    # Persistence + cost logging are not under test here.
+    monkeypatch.setattr(
+        manager,
+        "async_update_node_execution_status",
+        AsyncMock(return_value=None),
+    )
+    monkeypatch.setattr(
+        manager,
+        "async_update_graph_execution_state",
+        AsyncMock(return_value=None),
+    )
+    monkeypatch.setattr(
+        manager,
+        "log_system_credential_cost",
+        AsyncMock(return_value=None),
+    )
+
+    proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+
+    # Control the status returned by the inner execution call.
+    inner_result = {"status": ExecutionStatus.COMPLETED, "llm_call_count": 3}
+
+    async def fake_inner(
+        self, *, node, node_exec, node_exec_progress, stats, db_client,
+        log_metadata, nodes_input_masks=None, nodes_to_skip=None,
+    ):
+        stats.llm_call_count = inner_result["llm_call_count"]
+        return MagicMock(wall_time=0.1, cpu_time=0.1), inner_result["status"]
+
+    monkeypatch.setattr(
+        manager.ExecutionProcessor,
+        "_on_node_execution",
+        fake_inner,
+    )
+
+    def fake_charge_extra(self, node_exec, extra_iterations):
+        calls["charge_extra_iterations"].append(extra_iterations)
+        return (extra_iterations * 10, 500)
+
+    monkeypatch.setattr(
+        manager.ExecutionProcessor,
+        "charge_extra_iterations",
+        fake_charge_extra,
+    )
+
+    def fake_low_balance(self, **kwargs):
+        calls["handle_low_balance"].append(kwargs)
+
+    monkeypatch.setattr(
+        manager.ExecutionProcessor,
+        "_handle_low_balance",
+        fake_low_balance,
+    )
+
+    def fake_notif(self, db_client, user_id, graph_id, e):
+        calls["handle_insufficient_funds_notif"].append(
+            {"user_id": user_id, "graph_id": graph_id, "error": e}
+        )
+
+    monkeypatch.setattr(
+        manager.ExecutionProcessor,
+        "_handle_insufficient_funds_notif",
+        fake_notif,
+    )
+
+    return proc, calls, inner_result, fake_db, NodeExecutionStats
+
+
+@pytest.mark.asyncio
+async def test_on_node_execution_charges_extra_iterations_when_gate_passes(
+    gated_processor,
+):
+    """COMPLETED + charge_per_llm_call + llm_call_count>1 + not dry_run → charged."""
+    from backend.data.execution import ExecutionStatus
+
+    proc, calls, inner, fake_db, _ = gated_processor
+    inner["status"] = ExecutionStatus.COMPLETED
+    inner["llm_call_count"] = 3  # → extra_iterations = 2
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True))
+
+    stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0,
+                            cost=0, node_error_count=0), threading.Lock())
+    await proc.on_node_execution(
+        node_exec=_make_node_exec(dry_run=False),
+        node_exec_progress=MagicMock(),
+        nodes_input_masks=None,
+        graph_stats_pair=stats_pair,
+    )
+    assert calls["charge_extra_iterations"] == [2]
+
+
+@pytest.mark.asyncio
+async def test_on_node_execution_skips_when_status_not_completed(gated_processor):
+    from backend.data.execution import ExecutionStatus
+
+    proc, calls, inner, fake_db, _ = gated_processor
+    inner["status"] = ExecutionStatus.FAILED
+    inner["llm_call_count"] = 5
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True))
+
+    stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0,
+                            cost=0, node_error_count=0), threading.Lock())
+    await proc.on_node_execution(
+        node_exec=_make_node_exec(dry_run=False),
+        node_exec_progress=MagicMock(),
+        nodes_input_masks=None,
+        graph_stats_pair=stats_pair,
+    )
+    assert calls["charge_extra_iterations"] == []
+
+
+@pytest.mark.asyncio
+async def test_on_node_execution_skips_when_charge_flag_false(gated_processor):
+    from backend.data.execution import ExecutionStatus
+
+    proc, calls, inner, fake_db, _ = gated_processor
+    inner["status"] = ExecutionStatus.COMPLETED
+    inner["llm_call_count"] = 5
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=False))
+
+    stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0,
+                            cost=0, node_error_count=0), threading.Lock())
+    await proc.on_node_execution(
+        node_exec=_make_node_exec(dry_run=False),
+        node_exec_progress=MagicMock(),
+        nodes_input_masks=None,
+        graph_stats_pair=stats_pair,
+    )
+    assert calls["charge_extra_iterations"] == []
+
+
+@pytest.mark.asyncio
+async def test_on_node_execution_skips_when_llm_call_count_le_1(gated_processor):
+    from backend.data.execution import ExecutionStatus
+
+    proc, calls, inner, fake_db, _ = gated_processor
+    inner["status"] = ExecutionStatus.COMPLETED
+    inner["llm_call_count"] = 1  # exactly the base charge, no extras
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True))
+
+    stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0,
+                            cost=0, node_error_count=0), threading.Lock())
+    await proc.on_node_execution(
+        node_exec=_make_node_exec(dry_run=False),
+        node_exec_progress=MagicMock(),
+        nodes_input_masks=None,
+        graph_stats_pair=stats_pair,
+    )
+    assert calls["charge_extra_iterations"] == []
+
+
+@pytest.mark.asyncio
+async def test_on_node_execution_skips_when_dry_run(gated_processor):
+    from backend.data.execution import ExecutionStatus
+
+    proc, calls, inner, fake_db, _ = gated_processor
+    inner["status"] = ExecutionStatus.COMPLETED
+    inner["llm_call_count"] = 5
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True))
+
+    stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0,
+                            cost=0, node_error_count=0), threading.Lock())
+    await proc.on_node_execution(
+        node_exec=_make_node_exec(dry_run=True),
+        node_exec_progress=MagicMock(),
+        nodes_input_masks=None,
+        graph_stats_pair=stats_pair,
+    )
+    assert calls["charge_extra_iterations"] == []
+
+
+@pytest.mark.asyncio
+async def test_on_node_execution_insufficient_balance_records_error_and_notifies(
+    monkeypatch, gated_processor,
+):
+    """When extra-iteration charging fails with InsufficientBalanceError:
+
+    - the run still reports COMPLETED (the work is already done)
+    - execution_stats.error is set so monitoring picks it up
+    - _handle_insufficient_funds_notif is called so the user is notified
+    """
+    from backend.data.execution import ExecutionStatus
+    from backend.executor import manager
+    from backend.util.exceptions import InsufficientBalanceError
+
+    proc, calls, inner, fake_db, _ = gated_processor
+    inner["status"] = ExecutionStatus.COMPLETED
+    inner["llm_call_count"] = 4
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True))
+
+    def raise_ibe(self, node_exec, extra_iterations):
+        raise InsufficientBalanceError(
+            user_id=node_exec.user_id,
+            message="Insufficient balance",
+            balance=0,
+            amount=extra_iterations * 10,
+        )
+
+    monkeypatch.setattr(
+        manager.ExecutionProcessor, "charge_extra_iterations", raise_ibe
+    )
+
+    stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0,
+                            cost=0, node_error_count=0), threading.Lock())
+    result_stats = await proc.on_node_execution(
+        node_exec=_make_node_exec(dry_run=False),
+        node_exec_progress=MagicMock(),
+        nodes_input_masks=None,
+        graph_stats_pair=stats_pair,
+    )
+    # Error recorded on stats so downstream monitoring can surface it.
+    assert isinstance(result_stats.error, InsufficientBalanceError)
+    # User notification fired.
+    assert len(calls["handle_insufficient_funds_notif"]) == 1
+    assert calls["handle_insufficient_funds_notif"][0]["user_id"] == "u"
+
+
+# ── Orchestrator _execute_single_tool_with_manager charging gates ──
+
+
+async def _run_tool_exec_with_stats(
+    *, dry_run: bool, tool_stats_error, charge_node_usage_mock=None,
+):
+    """Invoke _execute_single_tool_with_manager against fully mocked deps
+    and return (charge_call_count, merge_stats_calls).
+
+    Used to prove the dry_run and error guards around charge_node_usage
+    behave as documented, and that InsufficientBalanceError propagates.
+    """
+    import threading
+    from collections import defaultdict
+    from unittest.mock import AsyncMock, MagicMock, patch
+
+    from backend.blocks.orchestrator import ExecutionParams, OrchestratorBlock
+    from backend.data.execution import ExecutionContext
+
+    block = OrchestratorBlock()
+
+    # Mocked async DB client used inside orchestrator.
+    mock_db_client = AsyncMock()
+    mock_target_node = MagicMock()
+    mock_target_node.block_id = "test-block-id"
+    mock_target_node.input_default = {}
+    mock_db_client.get_node.return_value = mock_target_node
+    mock_node_exec_result = MagicMock()
+    mock_node_exec_result.node_exec_id = "test-tool-exec-id"
+    mock_db_client.upsert_execution_input.return_value = (
+        mock_node_exec_result,
+        {"query": "t"},
+    )
+    mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {
+        "result": "ok"
+    }
+
+    # ExecutionProcessor mock: on_node_execution returns supplied error.
+    mock_processor = AsyncMock()
+    mock_processor.running_node_execution = defaultdict(MagicMock)
+    mock_processor.execution_stats = MagicMock()
+    mock_processor.execution_stats_lock = threading.Lock()
+    mock_node_stats = MagicMock()
+    mock_node_stats.error = tool_stats_error
+    mock_processor.on_node_execution = AsyncMock(return_value=mock_node_stats)
+    mock_processor.charge_node_usage = (
+        charge_node_usage_mock or MagicMock(return_value=(10, 990))
+    )
+
+    # Build a tool_info shaped like _build_tool_info_from_args output.
+    tool_call = MagicMock()
+    tool_call.id = "call-1"
+    tool_call.name = "search_keywords"
+    tool_call.arguments = '{"query":"t"}'
+    tool_def = {
+        "type": "function",
+        "function": {
+            "name": "search_keywords",
+            "_sink_node_id": "test-sink-node-id",
+            "_field_mapping": {},
+            "parameters": {
+                "properties": {"query": {"type": "string"}},
+                "required": ["query"],
+            },
+        },
+    }
+    tool_info = OrchestratorBlock._build_tool_info_from_args(
+        tool_call_id="call-1",
+        tool_name="search_keywords",
+        tool_args={"query": "t"},
+        tool_def=tool_def,
+    )
+
+    exec_params = ExecutionParams(
+        user_id="u",
+        graph_id="g",
+        node_id="n",
+        graph_version=1,
+        graph_exec_id="ge",
+        node_exec_id="ne",
+        execution_context=ExecutionContext(
+            human_in_the_loop_safe_mode=False, dry_run=dry_run
+        ),
+    )
+
+    with patch(
+        "backend.blocks.orchestrator.get_database_manager_async_client",
+        return_value=mock_db_client,
+    ):
+        try:
+            await block._execute_single_tool_with_manager(
+                tool_info, exec_params, mock_processor, responses_api=False
+            )
+            raised = None
+        except Exception as e:
+            raised = e
+
+    return mock_processor.charge_node_usage, raised
+
+
+@pytest.mark.asyncio
+async def test_tool_execution_skips_charging_on_dry_run():
+    """dry_run=True → charge_node_usage is NOT called."""
+    charge_mock, raised = await _run_tool_exec_with_stats(
+        dry_run=True, tool_stats_error=None
+    )
+    assert raised is None
+    assert charge_mock.call_count == 0
+
+
+@pytest.mark.asyncio
+async def test_tool_execution_skips_charging_on_failed_tool():
+    """tool_node_stats.error is an Exception → charge_node_usage NOT called."""
+    charge_mock, raised = await _run_tool_exec_with_stats(
+        dry_run=False, tool_stats_error=RuntimeError("tool blew up")
+    )
+    assert raised is None
+    assert charge_mock.call_count == 0
+
+
+@pytest.mark.asyncio
+async def test_tool_execution_skips_charging_on_cancelled_tool():
+    """Cancellation (BaseException subclass) → charge_node_usage NOT called.
+
+    Guards the fix for sentry's BaseException concern: the old
+    `isinstance(error, Exception)` check would have treated CancelledError
+    as "no error" and billed the user for a terminated run.
+    """
+    import asyncio as _asyncio
+
+    charge_mock, raised = await _run_tool_exec_with_stats(
+        dry_run=False, tool_stats_error=_asyncio.CancelledError()
+    )
+    assert raised is None
+    assert charge_mock.call_count == 0
+
+
+@pytest.mark.asyncio
+async def test_tool_execution_insufficient_balance_propagates():
+    """InsufficientBalanceError from charge_node_usage must propagate out.
+
+    If this leaked into a ToolCallResult the LLM loop would keep running
+    with 'tool failed' errors and the user would get unpaid work.
+    """
+    from unittest.mock import MagicMock
+
+    from backend.util.exceptions import InsufficientBalanceError
+
+    raising_charge = MagicMock(
+        side_effect=InsufficientBalanceError(
+            user_id="u", message="nope", balance=0, amount=10
+        )
+    )
+    _, raised = await _run_tool_exec_with_stats(
+        dry_run=False,
+        tool_stats_error=None,
+        charge_node_usage_mock=raising_charge,
+    )
+    assert isinstance(raised, InsufficientBalanceError)
diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py
index 0454f0c5f3..3a1fd68db9 100644
--- a/autogpt_platform/backend/backend/executor/manager.py
+++ b/autogpt_platform/backend/backend/executor/manager.py
@@ -684,10 +684,16 @@ class ExecutionProcessor:
         # is already covered by _charge_usage(); each additional LLM call
         # costs another base_cost. Skipped for dry runs and failed runs.
         #
-        # InsufficientBalanceError is logged at ERROR level (this is a
-        # billing leak — the work is already done, but the user can't pay)
-        # and re-surfaced via execution_stats.error so monitoring can pick
-        # it up. Other exceptions are warnings.
+        # InsufficientBalanceError here is a post-hoc billing leak — the
+        # work is already done but the user can no longer pay. We:
+        #   1. log at ERROR with structured fields so alerting can catch it
+        #   2. record the error on execution_stats.error for downstream
+        #      monitoring (stats are persisted into node_stats below)
+        #   3. fire _handle_insufficient_funds_notif so the user is
+        #      notified (mirrors the main queue path at ~line 1254)
+        # The run itself is kept COMPLETED (the block's outputs are
+        # already committed) — matching the documented "billing leak"
+        # contract rather than retroactively failing a successful run.
         if (
             status == ExecutionStatus.COMPLETED
             and node.block.charge_per_llm_call
@@ -711,9 +717,36 @@ class ExecutionProcessor:
                     )
             except InsufficientBalanceError as e:
                 log_metadata.error(
-                    f"Billing leak: insufficient balance after {node.block.name} "
-                    f"completed {extra_iterations} extra iterations: {e}"
+                    "billing_leak: insufficient balance after "
+                    f"{node.block.name} completed {extra_iterations} "
+                    f"extra iterations",
+                    extra={
+                        "billing_leak": True,
+                        "user_id": node_exec.user_id,
+                        "graph_id": node_exec.graph_id,
+                        "block_id": node_exec.block_id,
+                        "extra_iterations": extra_iterations,
+                        "error": str(e),
+                    },
                 )
+                # Surface on execution_stats so node_stats persistence
+                # below records the billing failure for monitoring.
+                execution_stats.error = e
+                # Notify the user they're out of credits. Runs through
+                # Redis dedup (per user+graph) so repeat runs don't spam.
+                try:
+                    await asyncio.to_thread(
+                        self._handle_insufficient_funds_notif,
+                        get_db_client(),
+                        node_exec.user_id,
+                        node_exec.graph_id,
+                        e,
+                    )
+                except Exception as notif_error:  # pragma: no cover
+                    log_metadata.warning(
+                        f"Failed to send insufficient funds notification: "
+                        f"{notif_error}"
+                    )
             except Exception as e:
                 log_metadata.warning(
                     f"Failed to charge extra iterations for {node.block.name}: {e}"
@@ -982,6 +1015,27 @@ class ExecutionProcessor:
                 stats=exec_stats,
             )
 
+    def _resolve_block_cost(
+        self,
+        node_exec: NodeExecutionEntry,
+    ) -> tuple[Any, int, dict]:
+        """Look up the block and compute its base usage cost for an exec.
+
+        Shared by :meth:`_charge_usage` and :meth:`charge_extra_iterations`
+        so the (get_block, block_usage_cost) lookup lives in exactly one
+        place. Returns ``(block, cost, matching_filter)``. ``block`` is
+        ``None`` if the block id can't be resolved — callers should treat
+        that as "nothing to charge".
+        """
+        block = get_block(node_exec.block_id)
+        if not block:
+            logger.error(f"Block {node_exec.block_id} not found.")
+            return None, 0, {}
+        cost, matching_filter = block_usage_cost(
+            block=block, input_data=node_exec.inputs
+        )
+        return block, cost, matching_filter
+
     def _charge_usage(
         self,
         node_exec: NodeExecutionEntry,
@@ -990,14 +1044,10 @@ class ExecutionProcessor:
         total_cost = 0
         remaining_balance = 0
         db_client = get_db_client()
-        block = get_block(node_exec.block_id)
+        block, cost, matching_filter = self._resolve_block_cost(node_exec)
         if not block:
-            logger.error(f"Block {node_exec.block_id} not found.")
             return total_cost, 0
 
-        cost, matching_filter = block_usage_cost(
-            block=block, input_data=node_exec.inputs
-        )
         if cost > 0:
             remaining_balance = db_client.spend_credits(
                 user_id=node_exec.user_id,
@@ -1081,13 +1131,8 @@ class ExecutionProcessor:
         # Cap to protect against a corrupted llm_call_count.
         capped_iterations = min(extra_iterations, self._MAX_EXTRA_ITERATIONS)
         db_client = get_db_client()
-        block = get_block(node_exec.block_id)
-        if not block:
-            return 0, 0
-        cost, matching_filter = block_usage_cost(
-            block=block, input_data=node_exec.inputs
-        )
-        if cost <= 0:
+        block, cost, matching_filter = self._resolve_block_cost(node_exec)
+        if not block or cost <= 0:
             return 0, 0
         total_extra_cost = cost * capped_iterations
         remaining_balance = db_client.spend_credits(

From 613321180ebc5ff9d490d696f807e7ead683a406 Mon Sep 17 00:00:00 2001
From: majdyz <majdyz@users.noreply.github.com>
Date: Fri, 10 Apr 2026 12:00:08 +0000
Subject: [PATCH 06/24] fix(orchestrator): propagate InsufficientBalanceError
 past outer loop catch-all

Follow-up to 6d2d476 addressing sentry's newly-posted review finding.

Both `_execute_tools_agent_mode` and `_execute_tools_sdk_mode` had broad
`except Exception` blocks at the top level of the tool-calling loop that
would still swallow `InsufficientBalanceError` even after the inner tool
executor carve-outs re-raised it: the error would escape the inner
`_execute_single_tool_with_manager`, propagate up through
`_agent_mode_tool_executor`, then get caught by the outer loop's
catch-all and converted into a user-visible "error" yield.

Add explicit `except InsufficientBalanceError: raise` carve-outs before
each broad handler so billing failures propagate all the way out of the
block's `run()` generator, reaching the executor's billing-leak handling
(error recording on execution_stats, user notification, structured log).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../backend/backend/blocks/orchestrator.py    | 28 +++++++++++++++----
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py
index 13591bed01..1f2861c87c 100644
--- a/autogpt_platform/backend/backend/blocks/orchestrator.py
+++ b/autogpt_platform/backend/backend/blocks/orchestrator.py
@@ -1407,9 +1407,17 @@ class OrchestratorBlock(Block):
                             "arguments": tc.arguments,
                         },
                     )
+        except InsufficientBalanceError:
+            # Billing failures must propagate out of the block so the
+            # executor's billing-leak handling fires (error recording on
+            # execution_stats, user notification, structured logging).
+            # Do NOT downgrade to a user-visible "error" output — that
+            # would swallow the failure and leak the exact balance amount.
+            raise
         except Exception as e:
-            # Catch all errors (validation, network, API) so that the block
-            # surfaces them as user-visible output instead of crashing.
+            # Catch all OTHER errors (validation, network, API) so that
+            # the block surfaces them as user-visible output instead of
+            # crashing.
             yield "error", str(e)
             return
 
@@ -1774,11 +1782,19 @@ class OrchestratorBlock(Block):
                             await pending_task
                         except (asyncio.CancelledError, StopAsyncIteration):
                             pass
+        except InsufficientBalanceError:
+            # Billing failures must propagate so the executor's
+            # billing-leak handling fires (error recording, user
+            # notification, structured logging). Mirrors the carve-out
+            # in _execute_tools_agent_mode — do not downgrade to a
+            # user-visible error output. The `finally` block below
+            # still runs and records partial token usage.
+            raise
         except Exception as e:
-            # Surface SDK errors as user-visible output instead of crashing,
-            # consistent with _execute_tools_agent_mode error handling.
-            # Don't return yet — fall through to merge_stats below so
-            # partial token usage is always recorded.
+            # Surface OTHER SDK errors as user-visible output instead
+            # of crashing, consistent with _execute_tools_agent_mode
+            # error handling. Don't return yet — fall through to
+            # merge_stats below so partial token usage is always recorded.
             sdk_error = e
         finally:
             # Always record usage stats, even on error.  The SDK may have

From fddd23435fb9560e98761b0c23089bb307e51f98 Mon Sep 17 00:00:00 2001
From: majdyz <majdy.zamil@gmail.com>
Date: Fri, 10 Apr 2026 12:42:27 +0000
Subject: [PATCH 07/24] fix(orchestrator): address round 3 sentry + coderabbit
 findings

1. Don't set execution_stats.error on post-hoc IBE
   - Setting it caused node_error_count++ at line 762, creating an
     inconsistent "errored COMPLETED node" in graph metrics
   - Also leaked balance amounts into persisted node_stats
   - Now: log structured ERROR, notify user, leave node_stats clean
   - Fixes sentry finding 3064117116

2. Wire low-balance notifications for nested tool charges
   - charge_node_usage returned (cost, balance) but caller dropped balance
   - Now invokes _handle_low_balance after a successful tool charge,
     mirroring the main queue behaviour
   - Fixes coderabbit finding 3064103939

3. Lint: black formatting on test_orchestrator_per_iteration_cost.py

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../backend/backend/blocks/orchestrator.py    | 17 ++++-
 .../test_orchestrator_per_iteration_cost.py   | 76 ++++++++++++++-----
 .../backend/backend/executor/manager.py       | 10 ++-
 3 files changed, 77 insertions(+), 26 deletions(-)

diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py
index 1f2861c87c..d1f8693d67 100644
--- a/autogpt_platform/backend/backend/blocks/orchestrator.py
+++ b/autogpt_platform/backend/backend/blocks/orchestrator.py
@@ -35,7 +35,10 @@ from backend.data.dynamic_fields import (
 from backend.data.execution import ExecutionContext
 from backend.data.model import NodeExecutionStats, SchemaField
 from backend.util import json
-from backend.util.clients import get_database_manager_async_client
+from backend.util.clients import (
+    get_database_manager_async_client,
+    get_database_manager_client,
+)
 from backend.util.exceptions import InsufficientBalanceError
 from backend.util.prompt import MAIN_OBJECTIVE_PREFIX
 from backend.util.security import SENSITIVE_FIELD_NAMES
@@ -1139,12 +1142,22 @@ class OrchestratorBlock(Block):
                 and tool_node_stats is not None
                 and tool_node_stats.error is None
             ):
-                tool_cost, _ = await asyncio.to_thread(
+                tool_cost, remaining_balance = await asyncio.to_thread(
                     execution_processor.charge_node_usage,
                     node_exec_entry,
                 )
                 if tool_cost > 0:
                     self.merge_stats(NodeExecutionStats(extra_cost=tool_cost))
+                    # Notify the user if their balance is now low — mirrors
+                    # the main queue path so nested tool charges don't
+                    # bypass low-balance alerts.
+                    await asyncio.to_thread(
+                        execution_processor._handle_low_balance,
+                        db_client=get_database_manager_client(),
+                        user_id=execution_params.user_id,
+                        current_balance=remaining_balance,
+                        transaction_cost=tool_cost,
+                    )
 
             # Get outputs from database after execution completes using database manager client
             node_outputs = await db_client.get_execution_outputs_by_node_exec_id(
diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
index abf5c22740..f1806bf6dc 100644
--- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
@@ -322,8 +322,16 @@ def gated_processor(monkeypatch):
     inner_result = {"status": ExecutionStatus.COMPLETED, "llm_call_count": 3}
 
     async def fake_inner(
-        self, *, node, node_exec, node_exec_progress, stats, db_client,
-        log_metadata, nodes_input_masks=None, nodes_to_skip=None,
+        self,
+        *,
+        node,
+        node_exec,
+        node_exec_progress,
+        stats,
+        db_client,
+        log_metadata,
+        nodes_input_masks=None,
+        nodes_to_skip=None,
     ):
         stats.llm_call_count = inner_result["llm_call_count"]
         return MagicMock(wall_time=0.1, cpu_time=0.1), inner_result["status"]
@@ -379,8 +387,12 @@ async def test_on_node_execution_charges_extra_iterations_when_gate_passes(
     inner["llm_call_count"] = 3  # → extra_iterations = 2
     fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True))
 
-    stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0,
-                            cost=0, node_error_count=0), threading.Lock())
+    stats_pair = (
+        MagicMock(
+            node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0
+        ),
+        threading.Lock(),
+    )
     await proc.on_node_execution(
         node_exec=_make_node_exec(dry_run=False),
         node_exec_progress=MagicMock(),
@@ -399,8 +411,12 @@ async def test_on_node_execution_skips_when_status_not_completed(gated_processor
     inner["llm_call_count"] = 5
     fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True))
 
-    stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0,
-                            cost=0, node_error_count=0), threading.Lock())
+    stats_pair = (
+        MagicMock(
+            node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0
+        ),
+        threading.Lock(),
+    )
     await proc.on_node_execution(
         node_exec=_make_node_exec(dry_run=False),
         node_exec_progress=MagicMock(),
@@ -419,8 +435,12 @@ async def test_on_node_execution_skips_when_charge_flag_false(gated_processor):
     inner["llm_call_count"] = 5
     fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=False))
 
-    stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0,
-                            cost=0, node_error_count=0), threading.Lock())
+    stats_pair = (
+        MagicMock(
+            node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0
+        ),
+        threading.Lock(),
+    )
     await proc.on_node_execution(
         node_exec=_make_node_exec(dry_run=False),
         node_exec_progress=MagicMock(),
@@ -439,8 +459,12 @@ async def test_on_node_execution_skips_when_llm_call_count_le_1(gated_processor)
     inner["llm_call_count"] = 1  # exactly the base charge, no extras
     fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True))
 
-    stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0,
-                            cost=0, node_error_count=0), threading.Lock())
+    stats_pair = (
+        MagicMock(
+            node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0
+        ),
+        threading.Lock(),
+    )
     await proc.on_node_execution(
         node_exec=_make_node_exec(dry_run=False),
         node_exec_progress=MagicMock(),
@@ -459,8 +483,12 @@ async def test_on_node_execution_skips_when_dry_run(gated_processor):
     inner["llm_call_count"] = 5
     fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True))
 
-    stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0,
-                            cost=0, node_error_count=0), threading.Lock())
+    stats_pair = (
+        MagicMock(
+            node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0
+        ),
+        threading.Lock(),
+    )
     await proc.on_node_execution(
         node_exec=_make_node_exec(dry_run=True),
         node_exec_progress=MagicMock(),
@@ -472,7 +500,8 @@ async def test_on_node_execution_skips_when_dry_run(gated_processor):
 
 @pytest.mark.asyncio
 async def test_on_node_execution_insufficient_balance_records_error_and_notifies(
-    monkeypatch, gated_processor,
+    monkeypatch,
+    gated_processor,
 ):
     """When extra-iteration charging fails with InsufficientBalanceError:
 
@@ -501,8 +530,12 @@ async def test_on_node_execution_insufficient_balance_records_error_and_notifies
         manager.ExecutionProcessor, "charge_extra_iterations", raise_ibe
     )
 
-    stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0,
-                            cost=0, node_error_count=0), threading.Lock())
+    stats_pair = (
+        MagicMock(
+            node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0
+        ),
+        threading.Lock(),
+    )
     result_stats = await proc.on_node_execution(
         node_exec=_make_node_exec(dry_run=False),
         node_exec_progress=MagicMock(),
@@ -520,7 +553,10 @@ async def test_on_node_execution_insufficient_balance_records_error_and_notifies
 
 
 async def _run_tool_exec_with_stats(
-    *, dry_run: bool, tool_stats_error, charge_node_usage_mock=None,
+    *,
+    dry_run: bool,
+    tool_stats_error,
+    charge_node_usage_mock=None,
 ):
     """Invoke _execute_single_tool_with_manager against fully mocked deps
     and return (charge_call_count, merge_stats_calls).
@@ -549,9 +585,7 @@ async def _run_tool_exec_with_stats(
         mock_node_exec_result,
         {"query": "t"},
     )
-    mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {
-        "result": "ok"
-    }
+    mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {"result": "ok"}
 
     # ExecutionProcessor mock: on_node_execution returns supplied error.
     mock_processor = AsyncMock()
@@ -561,8 +595,8 @@ async def _run_tool_exec_with_stats(
     mock_node_stats = MagicMock()
     mock_node_stats.error = tool_stats_error
     mock_processor.on_node_execution = AsyncMock(return_value=mock_node_stats)
-    mock_processor.charge_node_usage = (
-        charge_node_usage_mock or MagicMock(return_value=(10, 990))
+    mock_processor.charge_node_usage = charge_node_usage_mock or MagicMock(
+        return_value=(10, 990)
     )
 
     # Build a tool_info shaped like _build_tool_info_from_args output.
diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py
index 3a1fd68db9..1069218ad9 100644
--- a/autogpt_platform/backend/backend/executor/manager.py
+++ b/autogpt_platform/backend/backend/executor/manager.py
@@ -729,9 +729,13 @@ class ExecutionProcessor:
                         "error": str(e),
                     },
                 )
-                # Surface on execution_stats so node_stats persistence
-                # below records the billing failure for monitoring.
-                execution_stats.error = e
+                # NOTE: Do NOT set execution_stats.error here. The node ran
+                # to completion — only the post-hoc charge failed. Setting
+                # .error would (a) flip node_error_count below, creating an
+                # "errored COMPLETED node" inconsistency in the metrics, and
+                # (b) leak balance amounts into the persisted node_stats.
+                # The structured ERROR log above is the alerting hook;
+                # node_stats stays clean.
                 # Notify the user they're out of credits. Runs through
                 # Redis dedup (per user+graph) so repeat runs don't spam.
                 try:

From 743f1f82c9760fee8ee2c57fbc0fae582c6d9520 Mon Sep 17 00:00:00 2001
From: majdyz <majdy.zamil@gmail.com>
Date: Fri, 10 Apr 2026 12:45:41 +0000
Subject: [PATCH 08/24] style: black formatting on test_orchestrator.py

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../backend/backend/blocks/test/test_orchestrator.py          | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py
index c69c107de3..c92ba3c702 100644
--- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py
@@ -926,9 +926,7 @@ async def test_orchestrator_agent_mode():
         # Returns (cost, remaining_balance). Synchronous because it's called
         # via asyncio.to_thread. Use a non-zero cost so the merge_stats
         # branch is actually exercised, and assert it's called below.
-        mock_execution_processor.charge_node_usage = MagicMock(
-            return_value=(10, 990)
-        )
+        mock_execution_processor.charge_node_usage = MagicMock(return_value=(10, 990))
 
         # Mock the get_execution_outputs_by_node_exec_id method
         mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {

From b29f1608499d337f9f6bc625a41d1924155d2127 Mon Sep 17 00:00:00 2001
From: majdyz <majdy.zamil@gmail.com>
Date: Fri, 10 Apr 2026 13:10:07 +0000
Subject: [PATCH 09/24] fix(executor): structured ERROR log on unexpected
 post-exec billing failures

The catch-all `except Exception` in the post-execution charge path was
logging at WARNING and dropping the error. Sentry flagged this as a
silent billing leak risk.

Now logs at ERROR with the same `billing_leak: True` structured marker
used by the InsufficientBalanceError branch, plus error_type/error/
extra_iterations fields and exc_info=True for the full traceback.
Monitoring/alerting can pick up either failure mode via the same key.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../backend/backend/executor/manager.py       | 20 +++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py
index 1069218ad9..a1acbbd990 100644
--- a/autogpt_platform/backend/backend/executor/manager.py
+++ b/autogpt_platform/backend/backend/executor/manager.py
@@ -752,8 +752,24 @@ class ExecutionProcessor:
                         f"{notif_error}"
                     )
             except Exception as e:
-                log_metadata.warning(
-                    f"Failed to charge extra iterations for {node.block.name}: {e}"
+                # Unexpected billing failure (DB outage, network, etc.).
+                # Log at ERROR with structured fields and the same
+                # `billing_leak: True` marker so monitoring catches it
+                # alongside InsufficientBalanceError.  The run is kept
+                # COMPLETED because the work is already done.
+                log_metadata.error(
+                    f"billing_leak: failed to charge extra iterations "
+                    f"for {node.block.name}",
+                    extra={
+                        "billing_leak": True,
+                        "user_id": node_exec.user_id,
+                        "graph_id": node_exec.graph_id,
+                        "block_id": node_exec.block_id,
+                        "extra_iterations": extra_iterations,
+                        "error_type": type(e).__name__,
+                        "error": str(e),
+                    },
+                    exc_info=True,
                 )
 
         graph_stats, graph_stats_lock = graph_stats_pair

From 6469334ae743d82ba867e1832f5d4523d74872f0 Mon Sep 17 00:00:00 2001
From: majdyz <majdy.zamil@gmail.com>
Date: Fri, 10 Apr 2026 13:18:32 +0000
Subject: [PATCH 10/24] fix(executor): notify user when nested tool charge
 raises IBE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the OrchestratorBlock's nested tool charge in
_execute_single_tool_with_manager raises InsufficientBalanceError, the
exception propagates up through on_node_execution and the node is
marked FAILED. The main queue's _charge_usage IBE catch (line 1305)
doesn't fire because the initial _charge_usage already succeeded —
only the nested tool charge failed.

This left the user without any notification about why their agent run
stopped. Add a status==FAILED + isinstance(error, IBE) branch in
on_node_execution that calls _handle_insufficient_funds_notif. The
notification flow goes through the same Redis dedup as the main queue
path so repeat runs don't spam.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../backend/backend/executor/manager.py       | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py
index a1acbbd990..d9c2f71198 100644
--- a/autogpt_platform/backend/backend/executor/manager.py
+++ b/autogpt_platform/backend/backend/executor/manager.py
@@ -807,6 +807,29 @@ class ExecutionProcessor:
                 db_client=db_client,
             )
 
+        # If the node failed because a nested tool charge raised
+        # InsufficientBalanceError (orchestrator agent mode), the main
+        # queue's _charge_usage IBE notification path was bypassed (the
+        # initial charge succeeded — only the nested tool charge failed).
+        # Send the user notification here so they understand why their
+        # agent run stopped.  Failures are logged but not raised so the
+        # node-execution path stays clean.
+        if status == ExecutionStatus.FAILED and isinstance(
+            execution_stats.error, InsufficientBalanceError
+        ):
+            try:
+                await asyncio.to_thread(
+                    self._handle_insufficient_funds_notif,
+                    get_db_client(),
+                    node_exec.user_id,
+                    node_exec.graph_id,
+                    execution_stats.error,
+                )
+            except Exception as notif_error:  # pragma: no cover
+                log_metadata.warning(
+                    f"Failed to send insufficient funds notification: " f"{notif_error}"
+                )
+
         return execution_stats
 
     @async_time_measured

From 389b2f4fb26f3a7dee36dae7f29faed19a7e762e Mon Sep 17 00:00:00 2001
From: majdyz <majdy.zamil@gmail.com>
Date: Fri, 10 Apr 2026 13:33:04 +0000
Subject: [PATCH 11/24] test(orchestrator): align IBE test with no-error-set
 fix

The test asserted result_stats.error is set to the IBE, but commit
b662eab36 removed that line from the IBE handler in on_node_execution
because it caused node_error_count++ inconsistency and leaked balance
amounts into persisted node_stats. Update the test to assert
result_stats.error is None (the structured ERROR log is the alerting
hook now, not the .error field).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../test/test_orchestrator_per_iteration_cost.py     | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
index f1806bf6dc..1c8fca368b 100644
--- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
@@ -506,8 +506,11 @@ async def test_on_node_execution_insufficient_balance_records_error_and_notifies
     """When extra-iteration charging fails with InsufficientBalanceError:
 
     - the run still reports COMPLETED (the work is already done)
-    - execution_stats.error is set so monitoring picks it up
+    - execution_stats.error is NOT set (would flip node_error_count and
+      leak balance amounts into persisted node_stats — see manager.py
+      comment in the IBE handler)
     - _handle_insufficient_funds_notif is called so the user is notified
+    - the structured ERROR log is the alerting hook
     """
     from backend.data.execution import ExecutionStatus
     from backend.executor import manager
@@ -542,8 +545,11 @@ async def test_on_node_execution_insufficient_balance_records_error_and_notifies
         nodes_input_masks=None,
         graph_stats_pair=stats_pair,
     )
-    # Error recorded on stats so downstream monitoring can surface it.
-    assert isinstance(result_stats.error, InsufficientBalanceError)
+    # error stays None — node ran to completion, only the post-hoc
+    # charge failed. Setting .error would (a) flip node_error_count++
+    # creating an "errored COMPLETED node" inconsistency, and (b) leak
+    # balance amounts into persisted node_stats.
+    assert result_stats.error is None
     # User notification fired.
     assert len(calls["handle_insufficient_funds_notif"]) == 1
     assert calls["handle_insufficient_funds_notif"][0]["user_id"] == "u"

From 4525869a755ae8f459e8fc470ee29d911581d420 Mon Sep 17 00:00:00 2001
From: majdyz <majdy.zamil@gmail.com>
Date: Fri, 10 Apr 2026 13:34:01 +0000
Subject: [PATCH 12/24] fix(executor): wrap _handle_low_balance in to_thread to
 avoid blocking

The post-execution low-balance check in on_node_execution called
_handle_low_balance directly. _handle_low_balance does sync DB work,
and on_node_execution is async, so the call blocked the event loop.

Sentry caught it. Wrap in asyncio.to_thread.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 autogpt_platform/backend/backend/executor/manager.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py
index d9c2f71198..bb7f29a9a1 100644
--- a/autogpt_platform/backend/backend/executor/manager.py
+++ b/autogpt_platform/backend/backend/executor/manager.py
@@ -709,7 +709,11 @@ class ExecutionProcessor:
                 )
                 if extra_cost > 0:
                     execution_stats.extra_cost += extra_cost
-                    self._handle_low_balance(
+                    # Wrap in to_thread — _handle_low_balance does sync DB
+                    # work and we're in an async method, so calling it
+                    # directly would block the event loop.
+                    await asyncio.to_thread(
+                        self._handle_low_balance,
                         db_client=get_db_client(),
                         user_id=node_exec.user_id,
                         current_balance=remaining_balance,

From 5b27ccf90878007fab50239caf2f2bef84bba037 Mon Sep 17 00:00:00 2001
From: majdyz <zamil.majdy@agpt.co>
Date: Fri, 10 Apr 2026 22:56:38 +0700
Subject: [PATCH 13/24] refactor(backend/orchestrator): replace
 charge_per_llm_call flag with extra_credit_charges hook + async charge
 methods

- Replace ClassVar[bool] charge_per_llm_call in Block._base with
  extra_credit_charges(execution_stats) -> int method; OrchestratorBlock
  overrides it to return max(0, llm_call_count - 1)
- Make charge_extra_iterations and charge_node_usage async; sync work
  runs via run_in_executor. charge_node_usage now folds in
  _handle_low_balance so callers don't touch private methods
- orchestrator.py no longer calls execution_processor._handle_low_balance
  directly; just awaits charge_node_usage which handles it internally
- Update tests: TestChargePerLlmCallFlag -> TestExtraCreditCharges,
  all async charge method tests converted to @pytest.mark.asyncio,
  added TestChargeNodeUsage assertions for _handle_low_balance calls
---
 .../backend/backend/blocks/_base.py           |  18 +-
 .../backend/backend/blocks/orchestrator.py    |  30 +--
 .../test_orchestrator_per_iteration_cost.py   | 247 +++++++++++++-----
 .../backend/backend/executor/manager.py       | 125 +++++----
 4 files changed, 273 insertions(+), 147 deletions(-)

diff --git a/autogpt_platform/backend/backend/blocks/_base.py b/autogpt_platform/backend/backend/blocks/_base.py
index 1eeec991f8..cbebdee85f 100644
--- a/autogpt_platform/backend/backend/blocks/_base.py
+++ b/autogpt_platform/backend/backend/blocks/_base.py
@@ -420,12 +420,18 @@ class BlockWebhookConfig(BlockManualWebhookConfig):
 class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
     _optimized_description: ClassVar[str | None] = None
 
-    # Set to True for blocks (e.g. OrchestratorBlock in agent mode) that
-    # may make multiple LLM calls in a single run. The executor will then
-    # charge `block_cost * (llm_call_count - 1)` extra credits after the
-    # block completes — the first call is already covered by the upfront
-    # `_charge_usage()` call. Defaults to False (single charge per run).
-    charge_per_llm_call: ClassVar[bool] = False
+    def extra_credit_charges(self, execution_stats: "NodeExecutionStats") -> int:
+        """Return extra credits to charge after this block run completes.
+
+        Called by the executor after a block finishes with COMPLETED status.
+        The return value is the number of additional base-cost credits to
+        charge beyond the single credit already collected by ``_charge_usage``
+        at the start of execution. Defaults to 0 (no extra charges).
+
+        Override in blocks (e.g. OrchestratorBlock) that make multiple LLM
+        calls within one run and should be billed per call.
+        """
+        return 0
 
     def __init__(
         self,
diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py
index d1f8693d67..9ab0318165 100644
--- a/autogpt_platform/backend/backend/blocks/orchestrator.py
+++ b/autogpt_platform/backend/backend/blocks/orchestrator.py
@@ -35,10 +35,7 @@ from backend.data.dynamic_fields import (
 from backend.data.execution import ExecutionContext
 from backend.data.model import NodeExecutionStats, SchemaField
 from backend.util import json
-from backend.util.clients import (
-    get_database_manager_async_client,
-    get_database_manager_client,
-)
+from backend.util.clients import get_database_manager_async_client
 from backend.util.exceptions import InsufficientBalanceError
 from backend.util.prompt import MAIN_OBJECTIVE_PREFIX
 from backend.util.security import SENSITIVE_FIELD_NAMES
@@ -373,10 +370,14 @@ class OrchestratorBlock(Block):
     single-shot and iterative agent mode execution.
     """
 
-    # In agent mode the block makes one LLM call per iteration. The executor
-    # uses this flag to charge `cost * (llm_call_count - 1)` extra credits
-    # after the block completes; the first call is covered by _charge_usage().
-    charge_per_llm_call = True
+    def extra_credit_charges(self, execution_stats: NodeExecutionStats) -> int:
+        """Charge one extra base credit per LLM call beyond the first.
+
+        In agent mode each iteration makes one LLM call. The first is already
+        covered by _charge_usage(); this returns the number of additional
+        credits so the executor can bill the remaining calls post-completion.
+        """
+        return max(0, execution_stats.llm_call_count - 1)
 
     # MCP server name used by the Claude Code SDK execution mode.  Keep in sync
     # with _create_graph_mcp_server and the MCP_PREFIX derivation in _execute_tools_sdk_mode.
@@ -1142,22 +1143,11 @@ class OrchestratorBlock(Block):
                 and tool_node_stats is not None
                 and tool_node_stats.error is None
             ):
-                tool_cost, remaining_balance = await asyncio.to_thread(
-                    execution_processor.charge_node_usage,
+                tool_cost, _ = await execution_processor.charge_node_usage(
                     node_exec_entry,
                 )
                 if tool_cost > 0:
                     self.merge_stats(NodeExecutionStats(extra_cost=tool_cost))
-                    # Notify the user if their balance is now low — mirrors
-                    # the main queue path so nested tool charges don't
-                    # bypass low-balance alerts.
-                    await asyncio.to_thread(
-                        execution_processor._handle_low_balance,
-                        db_client=get_database_manager_client(),
-                        user_id=execution_params.user_id,
-                        current_balance=remaining_balance,
-                        transaction_cost=tool_cost,
-                    )
 
             # Get outputs from database after execution completes using database manager client
             node_outputs = await db_client.get_execution_outputs_by_node_exec_id(
diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
index 1c8fca368b..c703e32c71 100644
--- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
@@ -1,7 +1,7 @@
 """Tests for OrchestratorBlock per-iteration cost charging.
 
 The OrchestratorBlock in agent mode makes multiple LLM calls in a single
-node execution. The executor uses ``Block.charge_per_llm_call`` to detect
+node execution. The executor uses ``Block.extra_credit_charges`` to detect
 this and charge ``base_cost * (llm_call_count - 1)`` extra credits after
 the block completes.
 """
@@ -13,19 +13,60 @@ import pytest
 
 from backend.blocks.orchestrator import OrchestratorBlock
 
-# ── Class flag ──────────────────────────────────────────────────────
+# ── extra_credit_charges hook ────────────────────────────────────────
 
 
-class TestChargePerLlmCallFlag:
-    """OrchestratorBlock opts into per-LLM-call billing."""
+class TestExtraCreditCharges:
+    """OrchestratorBlock opts into per-LLM-call billing via extra_credit_charges."""
 
-    def test_orchestrator_opts_in(self):
-        assert OrchestratorBlock.charge_per_llm_call is True
+    def test_orchestrator_returns_nonzero_for_multiple_calls(self):
+        from backend.data.model import NodeExecutionStats
+
+        block = OrchestratorBlock()
+        stats = NodeExecutionStats(llm_call_count=3)
+        assert block.extra_credit_charges(stats) == 2
+
+    def test_orchestrator_returns_zero_for_single_call(self):
+        from backend.data.model import NodeExecutionStats
+
+        block = OrchestratorBlock()
+        stats = NodeExecutionStats(llm_call_count=1)
+        assert block.extra_credit_charges(stats) == 0
+
+    def test_orchestrator_returns_zero_for_zero_calls(self):
+        from backend.data.model import NodeExecutionStats
+
+        block = OrchestratorBlock()
+        stats = NodeExecutionStats(llm_call_count=0)
+        assert block.extra_credit_charges(stats) == 0
+
+    def test_default_block_returns_zero(self):
+        from backend.data.model import NodeExecutionStats
+
+        # Use a concrete block (not the abstract Block base) to verify the
+        # default implementation returns 0.
+        block = OrchestratorBlock()
+        stats = NodeExecutionStats(llm_call_count=0)
+        # When llm_call_count=0, extra_credit_charges should clamp to 0.
+        assert block.extra_credit_charges(stats) == 0
+
+        # Also verify via Block.extra_credit_charges directly (method-level
+        # check) by calling the unbound method on an OrchestratorBlock
+        # instance with the base implementation patched out.
+        from unittest.mock import patch
 
-    def test_default_block_does_not_opt_in(self):
         from backend.blocks._base import Block
 
-        assert Block.charge_per_llm_call is False
+        with patch.object(
+            OrchestratorBlock,
+            "extra_credit_charges",
+            Block.extra_credit_charges,
+        ):
+            base_block = OrchestratorBlock()
+            assert (
+                base_block.extra_credit_charges(NodeExecutionStats(llm_call_count=10))
+                == 0
+            )
 
 
 # ── charge_extra_iterations math ───────────────────────────────────
@@ -76,36 +117,44 @@ def patched_processor(monkeypatch):
 
 
 class TestChargeExtraIterations:
-    def test_zero_extra_iterations_charges_nothing(
+    @pytest.mark.asyncio
+    async def test_zero_extra_iterations_charges_nothing(
         self, patched_processor, fake_node_exec
     ):
         proc, spent = patched_processor
-        cost, balance = proc.charge_extra_iterations(fake_node_exec, extra_iterations=0)
+        cost, balance = await proc.charge_extra_iterations(
+            fake_node_exec, extra_iterations=0
+        )
         assert cost == 0
         assert balance == 0
         assert spent == []
 
-    def test_extra_iterations_multiplies_base_cost(
+    @pytest.mark.asyncio
+    async def test_extra_iterations_multiplies_base_cost(
         self, patched_processor, fake_node_exec
     ):
         proc, spent = patched_processor
-        cost, balance = proc.charge_extra_iterations(fake_node_exec, extra_iterations=4)
+        cost, balance = await proc.charge_extra_iterations(
+            fake_node_exec, extra_iterations=4
+        )
         assert cost == 40  # 4 × 10
         assert balance == 1000
         assert spent == [40]
 
-    def test_negative_extra_iterations_charges_nothing(
+    @pytest.mark.asyncio
+    async def test_negative_extra_iterations_charges_nothing(
         self, patched_processor, fake_node_exec
     ):
         proc, spent = patched_processor
-        cost, balance = proc.charge_extra_iterations(
+        cost, balance = await proc.charge_extra_iterations(
             fake_node_exec, extra_iterations=-1
         )
         assert cost == 0
         assert balance == 0
         assert spent == []
 
-    def test_capped_at_max(self, monkeypatch, fake_node_exec):
+    @pytest.mark.asyncio
+    async def test_capped_at_max(self, monkeypatch, fake_node_exec):
         """Runaway llm_call_count is capped at _MAX_EXTRA_ITERATIONS."""
         from backend.executor import manager
 
@@ -129,14 +178,15 @@ class TestChargeExtraIterations:
 
         proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
         cap = manager.ExecutionProcessor._MAX_EXTRA_ITERATIONS
-        cost, _ = proc.charge_extra_iterations(
+        cost, _ = await proc.charge_extra_iterations(
             fake_node_exec, extra_iterations=cap * 100
         )
         # Charged at most cap × 10
         assert cost == cap * 10
         assert spent == [cap * 10]
 
-    def test_zero_base_cost_skips_charge(self, monkeypatch, fake_node_exec):
+    @pytest.mark.asyncio
+    async def test_zero_base_cost_skips_charge(self, monkeypatch, fake_node_exec):
         from backend.executor import manager
 
         spent: list[int] = []
@@ -156,12 +206,15 @@ class TestChargeExtraIterations:
         )
 
         proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
-        cost, balance = proc.charge_extra_iterations(fake_node_exec, extra_iterations=4)
+        cost, balance = await proc.charge_extra_iterations(
+            fake_node_exec, extra_iterations=4
+        )
         assert cost == 0
         assert balance == 0
         assert spent == []
 
-    def test_block_not_found_skips_charge(self, monkeypatch, fake_node_exec):
+    @pytest.mark.asyncio
+    async def test_block_not_found_skips_charge(self, monkeypatch, fake_node_exec):
         from backend.executor import manager
 
         spent: list[int] = []
@@ -178,12 +231,17 @@ class TestChargeExtraIterations:
         )
 
         proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
-        cost, balance = proc.charge_extra_iterations(fake_node_exec, extra_iterations=3)
+        cost, balance = await proc.charge_extra_iterations(
+            fake_node_exec, extra_iterations=3
+        )
         assert cost == 0
         assert balance == 0
         assert spent == []
 
-    def test_propagates_insufficient_balance_error(self, monkeypatch, fake_node_exec):
+    @pytest.mark.asyncio
+    async def test_propagates_insufficient_balance_error(
+        self, monkeypatch, fake_node_exec
+    ):
         """Out-of-credits errors must propagate, not be silently swallowed."""
         from backend.executor import manager
         from backend.util.exceptions import InsufficientBalanceError
@@ -208,7 +266,7 @@ class TestChargeExtraIterations:
 
         proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
         with pytest.raises(InsufficientBalanceError):
-            proc.charge_extra_iterations(fake_node_exec, extra_iterations=4)
+            await proc.charge_extra_iterations(fake_node_exec, extra_iterations=4)
 
 
 # ── charge_node_usage ──────────────────────────────────────────────
@@ -217,7 +275,10 @@ class TestChargeExtraIterations:
 class TestChargeNodeUsage:
     """charge_node_usage delegates to _charge_usage with execution_count=0."""
 
-    def test_delegates_with_zero_execution_count(self, monkeypatch, fake_node_exec):
+    @pytest.mark.asyncio
+    async def test_delegates_with_zero_execution_count(
+        self, monkeypatch, fake_node_exec
+    ):
         """Nested tool charges should NOT inflate the per-execution counter."""
         from backend.executor import manager
 
@@ -228,16 +289,95 @@ class TestChargeNodeUsage:
             captured["node_exec"] = node_exec
             return (5, 100)
 
+        def fake_handle_low_balance(
+            self, db_client, user_id, current_balance, transaction_cost
+        ):
+            pass
+
         monkeypatch.setattr(
             manager.ExecutionProcessor, "_charge_usage", fake_charge_usage
         )
+        monkeypatch.setattr(
+            manager.ExecutionProcessor, "_handle_low_balance", fake_handle_low_balance
+        )
+        monkeypatch.setattr(manager, "get_db_client", lambda: MagicMock())
 
         proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
-        cost, balance = proc.charge_node_usage(fake_node_exec)
+        cost, balance = await proc.charge_node_usage(fake_node_exec)
         assert cost == 5
         assert balance == 100
         assert captured["execution_count"] == 0
 
+    @pytest.mark.asyncio
+    async def test_calls_handle_low_balance_when_cost_nonzero(
+        self, monkeypatch, fake_node_exec
+    ):
+        """charge_node_usage should call _handle_low_balance when total_cost > 0."""
+        from backend.executor import manager
+
+        low_balance_calls: list[dict] = []
+
+        def fake_charge_usage(self, node_exec, execution_count):
+            return (10, 50)
+
+        def fake_handle_low_balance(
+            self, db_client, user_id, current_balance, transaction_cost
+        ):
+            low_balance_calls.append(
+                {
+                    "user_id": user_id,
+                    "current_balance": current_balance,
+                    "transaction_cost": transaction_cost,
+                }
+            )
+
+        monkeypatch.setattr(
+            manager.ExecutionProcessor, "_charge_usage", fake_charge_usage
+        )
+        monkeypatch.setattr(
+            manager.ExecutionProcessor, "_handle_low_balance", fake_handle_low_balance
+        )
+        monkeypatch.setattr(manager, "get_db_client", lambda: MagicMock())
+
+        proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+        cost, balance = await proc.charge_node_usage(fake_node_exec)
+        assert cost == 10
+        assert balance == 50
+        assert len(low_balance_calls) == 1
+        assert low_balance_calls[0]["user_id"] == "u"
+        assert low_balance_calls[0]["current_balance"] == 50
+        assert low_balance_calls[0]["transaction_cost"] == 10
+
+    @pytest.mark.asyncio
+    async def test_skips_handle_low_balance_when_cost_zero(
+        self, monkeypatch, fake_node_exec
+    ):
+        """charge_node_usage should NOT call _handle_low_balance when cost is 0."""
+        from backend.executor import manager
+
+        low_balance_calls: list = []
+
+        def fake_charge_usage(self, node_exec, execution_count):
+            return (0, 200)
+
+        def fake_handle_low_balance(
+            self, db_client, user_id, current_balance, transaction_cost
+        ):
+            low_balance_calls.append(True)
+
+        monkeypatch.setattr(
+            manager.ExecutionProcessor, "_charge_usage", fake_charge_usage
+        )
+        monkeypatch.setattr(
+            manager.ExecutionProcessor, "_handle_low_balance", fake_handle_low_balance
+        )
+        monkeypatch.setattr(manager, "get_db_client", lambda: MagicMock())
+
+        proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+        cost, balance = await proc.charge_node_usage(fake_node_exec)
+        assert cost == 0
+        assert low_balance_calls == []
+
 
 # ── on_node_execution charging gate ────────────────────────────────
 
@@ -245,10 +385,10 @@ class TestChargeNodeUsage:
 class _FakeNode:
     """Minimal stand-in for a ``Node`` object with a block attribute."""
 
-    def __init__(self, charge_per_llm_call: bool, block_name: str = "FakeBlock"):
+    def __init__(self, extra_charges: int = 0, block_name: str = "FakeBlock"):
         self.block = MagicMock()
-        self.block.charge_per_llm_call = charge_per_llm_call
         self.block.name = block_name
+        self.block.extra_credit_charges = MagicMock(return_value=extra_charges)
 
 
 class _FakeExecContext:
@@ -274,7 +414,7 @@ def _make_node_exec(dry_run: bool = False) -> MagicMock:
 def gated_processor(monkeypatch):
     """ExecutionProcessor with on_node_execution's downstream calls stubbed.
 
-    Lets tests flip the four gate conditions (status, charge_per_llm_call,
+    Lets tests flip the gate conditions (status, extra_credit_charges result,
     llm_call_count, dry_run) and observe whether charge_extra_iterations
     was called.
     """
@@ -290,7 +430,7 @@ def gated_processor(monkeypatch):
 
     # Stub node lookup + DB client so the wrapper doesn't touch real infra.
     fake_db = MagicMock()
-    fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True))
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=2))
     monkeypatch.setattr(manager, "get_db_async_client", lambda: fake_db)
     monkeypatch.setattr(manager, "get_db_client", lambda: fake_db)
     # get_block is called by LogMetadata construction in on_node_execution.
@@ -342,7 +482,7 @@ def gated_processor(monkeypatch):
         fake_inner,
     )
 
-    def fake_charge_extra(self, node_exec, extra_iterations):
+    async def fake_charge_extra(self, node_exec, extra_iterations):
         calls["charge_extra_iterations"].append(extra_iterations)
         return (extra_iterations * 10, 500)
 
@@ -379,13 +519,13 @@ def gated_processor(monkeypatch):
 async def test_on_node_execution_charges_extra_iterations_when_gate_passes(
     gated_processor,
 ):
-    """COMPLETED + charge_per_llm_call + llm_call_count>1 + not dry_run → charged."""
+    """COMPLETED + extra_credit_charges > 0 + not dry_run → charged."""
     from backend.data.execution import ExecutionStatus
 
     proc, calls, inner, fake_db, _ = gated_processor
     inner["status"] = ExecutionStatus.COMPLETED
-    inner["llm_call_count"] = 3  # → extra_iterations = 2
-    fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True))
+    inner["llm_call_count"] = 3  # → extra_charges = 2
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=2))
 
     stats_pair = (
         MagicMock(
@@ -409,7 +549,7 @@ async def test_on_node_execution_skips_when_status_not_completed(gated_processor
     proc, calls, inner, fake_db, _ = gated_processor
     inner["status"] = ExecutionStatus.FAILED
     inner["llm_call_count"] = 5
-    fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True))
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=4))
 
     stats_pair = (
         MagicMock(
@@ -427,37 +567,14 @@ async def test_on_node_execution_skips_when_status_not_completed(gated_processor
 
 
 @pytest.mark.asyncio
-async def test_on_node_execution_skips_when_charge_flag_false(gated_processor):
+async def test_on_node_execution_skips_when_extra_charges_zero(gated_processor):
     from backend.data.execution import ExecutionStatus
 
     proc, calls, inner, fake_db, _ = gated_processor
     inner["status"] = ExecutionStatus.COMPLETED
     inner["llm_call_count"] = 5
-    fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=False))
-
-    stats_pair = (
-        MagicMock(
-            node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0
-        ),
-        threading.Lock(),
-    )
-    await proc.on_node_execution(
-        node_exec=_make_node_exec(dry_run=False),
-        node_exec_progress=MagicMock(),
-        nodes_input_masks=None,
-        graph_stats_pair=stats_pair,
-    )
-    assert calls["charge_extra_iterations"] == []
-
-
-@pytest.mark.asyncio
-async def test_on_node_execution_skips_when_llm_call_count_le_1(gated_processor):
-    from backend.data.execution import ExecutionStatus
-
-    proc, calls, inner, fake_db, _ = gated_processor
-    inner["status"] = ExecutionStatus.COMPLETED
-    inner["llm_call_count"] = 1  # exactly the base charge, no extras
-    fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True))
+    # Block returns 0 extra charges (base class default)
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=0))
 
     stats_pair = (
         MagicMock(
@@ -481,7 +598,7 @@ async def test_on_node_execution_skips_when_dry_run(gated_processor):
     proc, calls, inner, fake_db, _ = gated_processor
     inner["status"] = ExecutionStatus.COMPLETED
     inner["llm_call_count"] = 5
-    fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True))
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=4))
 
     stats_pair = (
         MagicMock(
@@ -519,9 +636,9 @@ async def test_on_node_execution_insufficient_balance_records_error_and_notifies
     proc, calls, inner, fake_db, _ = gated_processor
     inner["status"] = ExecutionStatus.COMPLETED
     inner["llm_call_count"] = 4
-    fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True))
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=3))
 
-    def raise_ibe(self, node_exec, extra_iterations):
+    async def raise_ibe(self, node_exec, extra_iterations):
         raise InsufficientBalanceError(
             user_id=node_exec.user_id,
             message="Insufficient balance",
@@ -601,7 +718,7 @@ async def _run_tool_exec_with_stats(
     mock_node_stats = MagicMock()
     mock_node_stats.error = tool_stats_error
     mock_processor.on_node_execution = AsyncMock(return_value=mock_node_stats)
-    mock_processor.charge_node_usage = charge_node_usage_mock or MagicMock(
+    mock_processor.charge_node_usage = charge_node_usage_mock or AsyncMock(
         return_value=(10, 990)
     )
 
@@ -700,11 +817,11 @@ async def test_tool_execution_insufficient_balance_propagates():
     If this leaked into a ToolCallResult the LLM loop would keep running
     with 'tool failed' errors and the user would get unpaid work.
     """
-    from unittest.mock import MagicMock
+    from unittest.mock import AsyncMock
 
     from backend.util.exceptions import InsufficientBalanceError
 
-    raising_charge = MagicMock(
+    raising_charge = AsyncMock(
         side_effect=InsufficientBalanceError(
             user_id="u", message="nope", balance=0, amount=10
         )
diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py
index bb7f29a9a1..d0348c0611 100644
--- a/autogpt_platform/backend/backend/executor/manager.py
+++ b/autogpt_platform/backend/backend/executor/manager.py
@@ -687,38 +687,25 @@ class ExecutionProcessor:
         # InsufficientBalanceError here is a post-hoc billing leak — the
         # work is already done but the user can no longer pay. We:
         #   1. log at ERROR with structured fields so alerting can catch it
-        #   2. record the error on execution_stats.error for downstream
-        #      monitoring (stats are persisted into node_stats below)
-        #   3. fire _handle_insufficient_funds_notif so the user is
+        #   2. fire _handle_insufficient_funds_notif so the user is
         #      notified (mirrors the main queue path at ~line 1254)
         # The run itself is kept COMPLETED (the block's outputs are
         # already committed) — matching the documented "billing leak"
         # contract rather than retroactively failing a successful run.
-        if (
-            status == ExecutionStatus.COMPLETED
-            and node.block.charge_per_llm_call
-            and execution_stats.llm_call_count > 1
+        extra_iterations = (
+            node.block.extra_credit_charges(execution_stats)
+            if status == ExecutionStatus.COMPLETED
             and not node_exec.execution_context.dry_run
-        ):
-            extra_iterations = execution_stats.llm_call_count - 1
+            else 0
+        )
+        if extra_iterations > 0:
             try:
-                extra_cost, remaining_balance = await asyncio.to_thread(
-                    self.charge_extra_iterations,
+                extra_cost, remaining_balance = await self.charge_extra_iterations(
                     node_exec,
                     extra_iterations,
                 )
                 if extra_cost > 0:
                     execution_stats.extra_cost += extra_cost
-                    # Wrap in to_thread — _handle_low_balance does sync DB
-                    # work and we're in an async method, so calling it
-                    # directly would block the event loop.
-                    await asyncio.to_thread(
-                        self._handle_low_balance,
-                        db_client=get_db_client(),
-                        user_id=node_exec.user_id,
-                        current_balance=remaining_balance,
-                        transaction_cost=extra_cost,
-                    )
             except InsufficientBalanceError as e:
                 log_metadata.error(
                     "billing_leak: insufficient balance after "
@@ -1137,46 +1124,16 @@ class ExecutionProcessor:
     # 200 leaves headroom while preventing runaway charges.
     _MAX_EXTRA_ITERATIONS = 200
 
-    def charge_node_usage(
+    def _charge_extra_iterations_sync(
         self,
         node_exec: NodeExecutionEntry,
+        capped_iterations: int,
     ) -> tuple[int, int]:
-        """Charge a single node execution to the user.
+        """Synchronous implementation — runs in a thread-pool worker.
 
-        Public wrapper around :meth:`_charge_usage` for blocks (e.g. the
-        OrchestratorBlock) that spawn nested node executions outside the
-        main queue and therefore need to charge them explicitly.
-
-        Note: this **does not** increment the global execution counter
-        (``increment_execution_count``). Nested tool executions are
-        sub-steps of a single block run from the user's perspective and
-        should not push them into higher per-execution cost tiers.
+        Called only from :meth:`charge_extra_iterations`. Do not call
+        directly from async code.
         """
-        return self._charge_usage(
-            node_exec=node_exec,
-            execution_count=0,
-        )
-
-    def charge_extra_iterations(
-        self,
-        node_exec: NodeExecutionEntry,
-        extra_iterations: int,
-    ) -> tuple[int, int]:
-        """Charge a block extra iterations beyond the initial run.
-
-        Used by agent-mode blocks (e.g. OrchestratorBlock) that make
-        multiple LLM calls within a single node execution. The first
-        iteration is already charged by :meth:`_charge_usage`; this
-        method charges *extra_iterations* additional copies of the
-        block's base cost.
-
-        Returns ``(total_extra_cost, remaining_balance)``. May raise
-        ``InsufficientBalanceError`` if the user can't afford the charge.
-        """
-        if extra_iterations <= 0:
-            return 0, 0
-        # Cap to protect against a corrupted llm_call_count.
-        capped_iterations = min(extra_iterations, self._MAX_EXTRA_ITERATIONS)
         db_client = get_db_client()
         block, cost, matching_filter = self._resolve_block_cost(node_exec)
         if not block or cost <= 0:
@@ -1204,6 +1161,62 @@ class ExecutionProcessor:
         )
         return total_extra_cost, remaining_balance
 
+    async def charge_extra_iterations(
+        self,
+        node_exec: NodeExecutionEntry,
+        extra_iterations: int,
+    ) -> tuple[int, int]:
+        """Charge a block extra iterations beyond the initial run.
+
+        Used by agent-mode blocks (e.g. OrchestratorBlock) that make
+        multiple LLM calls within a single node execution. The first
+        iteration is already charged by :meth:`_charge_usage`; this
+        method charges *extra_iterations* additional copies of the
+        block's base cost.
+
+        Returns ``(total_extra_cost, remaining_balance)``. May raise
+        ``InsufficientBalanceError`` if the user can't afford the charge.
+        """
+        if extra_iterations <= 0:
+            return 0, 0
+        # Cap to protect against a corrupted llm_call_count.
+        capped = min(extra_iterations, self._MAX_EXTRA_ITERATIONS)
+        return await asyncio.get_event_loop().run_in_executor(
+            None, self._charge_extra_iterations_sync, node_exec, capped
+        )
+
+    async def charge_node_usage(
+        self,
+        node_exec: NodeExecutionEntry,
+    ) -> tuple[int, int]:
+        """Charge a single node execution to the user.
+
+        Public async wrapper around :meth:`_charge_usage` for blocks (e.g. the
+        OrchestratorBlock) that spawn nested node executions outside the
+        main queue and therefore need to charge them explicitly.
+
+        Also handles low-balance notification so callers don't need to touch
+        private methods directly.
+
+        Note: this **does not** increment the global execution counter
+        (``increment_execution_count``). Nested tool executions are
+        sub-steps of a single block run from the user's perspective and
+        should not push them into higher per-execution cost tiers.
+        """
+        total_cost, remaining = await asyncio.get_event_loop().run_in_executor(
+            None, self._charge_usage, node_exec, 0
+        )
+        if total_cost > 0:
+            await asyncio.get_event_loop().run_in_executor(
+                None,
+                self._handle_low_balance,
+                get_db_client(),
+                node_exec.user_id,
+                remaining,
+                total_cost,
+            )
+        return total_cost, remaining
+
     @time_measured
     def _on_graph_execution(
         self,

From 6523dce30cc5497771ee32b1eca1fc49d144916c Mon Sep 17 00:00:00 2001
From: majdyz <zamil.majdy@agpt.co>
Date: Fri, 10 Apr 2026 23:53:52 +0700
Subject: [PATCH 14/24] fix(backend/orchestrator): use AsyncMock for
 charge_node_usage in test

charge_node_usage is async and is directly awaited; using MagicMock
caused a TypeError that was silently swallowed by the outer except
Exception block, meaning the billing assertion passed for the wrong
reason (mock called but await failed, so no billing actually ran).
---
 .../backend/backend/blocks/test/test_orchestrator.py      | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py
index c92ba3c702..2eb27012dc 100644
--- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py
@@ -923,10 +923,10 @@ async def test_orchestrator_agent_mode():
             return_value=mock_node_stats
         )
         # Mock charge_node_usage (called after successful tool execution).
-        # Returns (cost, remaining_balance). Synchronous because it's called
-        # via asyncio.to_thread. Use a non-zero cost so the merge_stats
-        # branch is actually exercised, and assert it's called below.
-        mock_execution_processor.charge_node_usage = MagicMock(return_value=(10, 990))
+        # Returns (cost, remaining_balance). Must be AsyncMock because it is
+        # an async method and is directly awaited in _execute_single_tool_with_manager.
+        # Use a non-zero cost so the merge_stats branch is exercised.
+        mock_execution_processor.charge_node_usage = AsyncMock(return_value=(10, 990))
 
         # Mock the get_execution_outputs_by_node_exec_id method
         mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {

From 7e7b3c42cbdcc24843c873ccaafc7c9fd6cabb72 Mon Sep 17 00:00:00 2001
From: majdyz <zamil.majdy@agpt.co>
Date: Fri, 10 Apr 2026 23:56:24 +0700
Subject: [PATCH 15/24] style(backend/executor): replace deprecated
 get_event_loop().run_in_executor with asyncio.to_thread

asyncio.get_event_loop() is deprecated in Python 3.10+; asyncio.to_thread
is the idiomatic replacement and consistent with the rest of manager.py.
---
 autogpt_platform/backend/backend/executor/manager.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py
index d0348c0611..5c30617fdb 100644
--- a/autogpt_platform/backend/backend/executor/manager.py
+++ b/autogpt_platform/backend/backend/executor/manager.py
@@ -1181,8 +1181,8 @@ class ExecutionProcessor:
             return 0, 0
         # Cap to protect against a corrupted llm_call_count.
         capped = min(extra_iterations, self._MAX_EXTRA_ITERATIONS)
-        return await asyncio.get_event_loop().run_in_executor(
-            None, self._charge_extra_iterations_sync, node_exec, capped
+        return await asyncio.to_thread(
+            self._charge_extra_iterations_sync, node_exec, capped
         )
 
     async def charge_node_usage(
@@ -1203,12 +1203,11 @@ class ExecutionProcessor:
         sub-steps of a single block run from the user's perspective and
         should not push them into higher per-execution cost tiers.
         """
-        total_cost, remaining = await asyncio.get_event_loop().run_in_executor(
-            None, self._charge_usage, node_exec, 0
+        total_cost, remaining = await asyncio.to_thread(
+            self._charge_usage, node_exec, 0
         )
         if total_cost > 0:
-            await asyncio.get_event_loop().run_in_executor(
-                None,
+            await asyncio.to_thread(
                 self._handle_low_balance,
                 get_db_client(),
                 node_exec.user_id,

From 90b9c2ab460bbf7e74860b9c73a2fe4b76ddce09 Mon Sep 17 00:00:00 2001
From: majdyz <zamil.majdy@agpt.co>
Date: Sat, 11 Apr 2026 00:13:59 +0700
Subject: [PATCH 16/24] fix(backend/executor): skip execution tier charge for
 nested tool calls

execution_usage_cost(0) incorrectly charges 1 credit because 0 % threshold
== 0. charge_node_usage passes 0 to _charge_usage to signal "no tier
increment", but the modulo check fires at 0. Fix: skip execution_usage_cost
entirely when execution_count == 0, preserving the intent that nested tool
executions don't count against execution tiers.
---
 autogpt_platform/backend/backend/executor/manager.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py
index f63f8ec76c..e3c6702039 100644
--- a/autogpt_platform/backend/backend/executor/manager.py
+++ b/autogpt_platform/backend/backend/executor/manager.py
@@ -1101,7 +1101,13 @@ class ExecutionProcessor:
             )
             total_cost += cost
 
-        cost, usage_count = execution_usage_cost(execution_count)
+        # execution_count=0 is used by charge_node_usage for nested tool calls
+        # which must not be pushed into higher execution-count tiers.
+        # execution_usage_cost(0) would trigger a charge because 0 % threshold == 0,
+        # so skip it entirely when execution_count is 0.
+        cost, usage_count = (
+            execution_usage_cost(execution_count) if execution_count > 0 else (0, 0)
+        )
         if cost > 0:
             remaining_balance = db_client.spend_credits(
                 user_id=node_exec.user_id,

From 2a6b65fd7bb2911afc3a53b02cea333e1a91f271 Mon Sep 17 00:00:00 2001
From: majdyz <zamil.majdy@agpt.co>
Date: Sat, 11 Apr 2026 04:47:56 +0000
Subject: [PATCH 17/24] fix(executor): notify low balance after extra-iteration
 charges
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The `_on_node_execution` path called `charge_extra_iterations` and
ignored the returned `remaining_balance`, so users were never notified
when their balance crossed the low-balance threshold via these
post-hoc per-LLM-call charges. `charge_node_usage` already does the
right thing — mirror that pattern here so all charging paths route
through `_handle_low_balance` consistently.

Sentry bug prediction: PRRT_kwDOJKSTjM56Mibw (severity MEDIUM).
---
 autogpt_platform/backend/backend/executor/manager.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py
index e3c6702039..ca66fad63a 100644
--- a/autogpt_platform/backend/backend/executor/manager.py
+++ b/autogpt_platform/backend/backend/executor/manager.py
@@ -708,6 +708,18 @@ class ExecutionProcessor:
                 )
                 if extra_cost > 0:
                     execution_stats.extra_cost += extra_cost
+                    # Mirror the low-balance notification path in
+                    # ``charge_node_usage`` so users are alerted when
+                    # their balance crosses the threshold from these
+                    # extra-iteration charges (sentry HIGH/MEDIUM bug
+                    # PRRT_kwDOJKSTjM56Mibw).
+                    await asyncio.to_thread(
+                        self._handle_low_balance,
+                        get_db_client(),
+                        node_exec.user_id,
+                        remaining_balance,
+                        extra_cost,
+                    )
             except InsufficientBalanceError as e:
                 log_metadata.error(
                     "billing_leak: insufficient balance after "

From 72660f8df08749526cd8216463e8515ad803e59f Mon Sep 17 00:00:00 2001
From: majdyz <zamil.majdy@agpt.co>
Date: Sat, 11 Apr 2026 11:47:18 +0000
Subject: [PATCH 18/24] test(orchestrator): use AsyncMock for charge_node_usage
 in remaining tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

charge_node_usage is async and is awaited in _execute_single_tool_with_manager.
Mocking it as MagicMock returned a non-awaitable tuple, which raised TypeError
inside the tool execution path; the error was silently swallowed by the
orchestrator's catch-all and converted into a 'Tool execution failed' string,
so downstream assertions effectively ran against an error response instead
of the success path. Switch both tests to AsyncMock to actually exercise the
post-tool charging branch — matching the fix already applied in
test_orchestrator.py:929.
---
 .../backend/blocks/test/test_orchestrator_dynamic_fields.py | 6 +++++-
 .../backend/blocks/test/test_orchestrator_responses_api.py  | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_dynamic_fields.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_dynamic_fields.py
index 5407972fa1..f2242ea527 100644
--- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_dynamic_fields.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_dynamic_fields.py
@@ -642,7 +642,11 @@ async def test_validation_errors_dont_pollute_conversation():
                     mock_node_stats
                 )
                 # Mock charge_node_usage (called after successful tool execution).
-                mock_execution_processor.charge_node_usage = MagicMock(
+                # Must be AsyncMock because it is async and is awaited in
+                # _execute_single_tool_with_manager — a plain MagicMock would
+                # return a non-awaitable tuple and TypeError out, then be
+                # silently swallowed by the orchestrator's catch-all.
+                mock_execution_processor.charge_node_usage = AsyncMock(
                     return_value=(0, 0)
                 )
 
diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_responses_api.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_responses_api.py
index 649716a491..ac78b6d35b 100644
--- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_responses_api.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_responses_api.py
@@ -957,7 +957,11 @@ async def test_agent_mode_conversation_valid_for_responses_api():
     ns = MagicMock(error=None)
     ep.on_node_execution = AsyncMock(return_value=ns)
     # Mock charge_node_usage (called after successful tool execution).
-    ep.charge_node_usage = MagicMock(return_value=(0, 0))
+    # Must be AsyncMock because it is async and is awaited in
+    # _execute_single_tool_with_manager — a plain MagicMock would return a
+    # non-awaitable tuple and TypeError out, then be silently swallowed by
+    # the orchestrator's catch-all.
+    ep.charge_node_usage = AsyncMock(return_value=(0, 0))
 
     with patch("backend.blocks.llm.llm_call", llm_mock), patch.object(
         block, "_create_tool_node_signatures", return_value=tool_sigs

From 5e8d3ba889a9a621c903ce6c5007d36902e858be Mon Sep 17 00:00:00 2001
From: majdyz <zamil.majdy@agpt.co>
Date: Sun, 12 Apr 2026 10:17:09 +0000
Subject: [PATCH 19/24] fix(backend/executor): harden orchestrator tool
 execution error handling

- Replace assert with proper if-guard + RuntimeError for node_exec_result
- Wrap on_node_execution in try/except to always resolve the Future via
  set_exception on error, preventing dangling unresolved Futures
- Add separate try/except around charge_node_usage so non-balance billing
  exceptions propagate instead of being silently swallowed by the outer
  except Exception handler
- Add _is_error flag to tool response dicts to replace fragile string
  matching on "Tool execution failed:" prefix
---
 .../backend/backend/blocks/orchestrator.py    | 58 ++++++++++++++-----
 1 file changed, 42 insertions(+), 16 deletions(-)

diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py
index 7e51f43b80..79e9c566e8 100644
--- a/autogpt_platform/backend/backend/blocks/orchestrator.py
+++ b/autogpt_platform/backend/backend/blocks/orchestrator.py
@@ -1087,7 +1087,10 @@ class OrchestratorBlock(Block):
                 input_data=input_value,
             )
 
-        assert node_exec_result is not None, "node_exec_result should not be None"
+        if node_exec_result is None:
+            raise RuntimeError(
+                f"upsert_execution_input returned None for node {sink_node_id}"
+            )
 
         # Create NodeExecutionEntry for execution manager
         node_exec_entry = NodeExecutionEntry(
@@ -1122,14 +1125,20 @@ class OrchestratorBlock(Block):
                 task=node_exec_future,
             )
 
-            # Execute the node directly since we're in the Orchestrator context
-            tool_node_stats = await execution_processor.on_node_execution(
-                node_exec=node_exec_entry,
-                node_exec_progress=node_exec_progress,
-                nodes_input_masks=None,
-                graph_stats_pair=graph_stats_pair,
-            )
-            node_exec_future.set_result(tool_node_stats)
+            # Execute the node directly since we're in the Orchestrator context.
+            # Wrap in try/except so the future is always resolved, even on
+            # error — an unresolved Future would block anything awaiting it.
+            try:
+                tool_node_stats = await execution_processor.on_node_execution(
+                    node_exec=node_exec_entry,
+                    node_exec_progress=node_exec_progress,
+                    nodes_input_masks=None,
+                    graph_stats_pair=graph_stats_pair,
+                )
+                node_exec_future.set_result(tool_node_stats)
+            except Exception as exec_err:
+                node_exec_future.set_exception(exec_err)
+                raise
 
             # Charge user credits AFTER successful tool execution. Tools
             # spawned by the orchestrator bypass the main execution queue
@@ -1141,14 +1150,27 @@ class OrchestratorBlock(Block):
             # `error is None` intentionally excludes both Exception and
             # BaseException subclasses (e.g. CancelledError) so cancelled
             # or terminated tool runs are not billed.
+            #
+            # Billing errors (including non-balance exceptions) are kept
+            # in a separate try/except so they are never silently swallowed
+            # by the generic tool-error handler below.
             if (
                 not execution_params.execution_context.dry_run
                 and tool_node_stats is not None
                 and tool_node_stats.error is None
             ):
-                tool_cost, _ = await execution_processor.charge_node_usage(
-                    node_exec_entry,
-                )
+                try:
+                    tool_cost, _ = await execution_processor.charge_node_usage(
+                        node_exec_entry,
+                    )
+                except InsufficientBalanceError:
+                    raise
+                except Exception:
+                    logger.exception(
+                        "Unexpected error charging for tool node %s",
+                        sink_node_id,
+                    )
+                    raise
                 if tool_cost > 0:
                     self.merge_stats(NodeExecutionStats(extra_cost=tool_cost))
 
@@ -1163,9 +1185,11 @@ class OrchestratorBlock(Block):
                 if node_outputs
                 else "Tool executed successfully"
             )
-            return _create_tool_response(
+            resp = _create_tool_response(
                 tool_call.id, tool_response_content, responses_api=responses_api
             )
+            resp["_is_error"] = False
+            return resp
 
         except InsufficientBalanceError:
             # Don't downgrade billing failures into tool errors — let the
@@ -1176,11 +1200,13 @@ class OrchestratorBlock(Block):
         except Exception as e:
             logger.warning("Tool execution with manager failed: %s", e)
             # Return error response
-            return _create_tool_response(
+            resp = _create_tool_response(
                 tool_call.id,
                 f"Tool execution failed: {e}",
                 responses_api=responses_api,
             )
+            resp["_is_error"] = True
+            return resp
 
     async def _agent_mode_llm_caller(
         self,
@@ -1280,7 +1306,7 @@ class OrchestratorBlock(Block):
                 content = str(raw_content)
             else:
                 content = "Tool executed successfully"
-            tool_failed = content.startswith("Tool execution failed:")
+            tool_failed = result.get("_is_error", False)
             return ToolCallResult(
                 tool_call_id=tool_call.id,
                 tool_name=tool_call.name,
@@ -1501,7 +1527,7 @@ class OrchestratorBlock(Block):
                             text = content
                         else:
                             text = json.dumps(content)
-                        tool_failed = text.startswith("Tool execution failed:")
+                        tool_failed = result.get("_is_error", False)
                         return {
                             "content": [{"type": "text", "text": text}],
                             "isError": tool_failed,

From b62288655fa1435178da06d366250e3ebc95b920 Mon Sep 17 00:00:00 2001
From: majdyz <zamil.majdy@agpt.co>
Date: Sun, 12 Apr 2026 12:10:43 +0000
Subject: [PATCH 20/24] Add None guard for tool_node_stats after
 on_node_execution

If on_node_execution returns None, return an error response with
_is_error=True instead of falling through to the success path.
---
 autogpt_platform/backend/backend/blocks/orchestrator.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py
index 79e9c566e8..5262f8eb1d 100644
--- a/autogpt_platform/backend/backend/blocks/orchestrator.py
+++ b/autogpt_platform/backend/backend/blocks/orchestrator.py
@@ -1140,6 +1140,15 @@ class OrchestratorBlock(Block):
                 node_exec_future.set_exception(exec_err)
                 raise
 
+            if tool_node_stats is None:
+                resp = _create_tool_response(
+                    tool_call.id,
+                    "Tool execution returned no result",
+                    responses_api=responses_api,
+                )
+                resp["_is_error"] = True
+                return resp
+
             # Charge user credits AFTER successful tool execution. Tools
             # spawned by the orchestrator bypass the main execution queue
             # (where _charge_usage is called), so we must charge here to

From 626fe17aac5cfc6b6b986dce52880c93b9f51fd0 Mon Sep 17 00:00:00 2001
From: majdyz <zamil.majdy@agpt.co>
Date: Mon, 13 Apr 2026 04:03:08 +0000
Subject: [PATCH 21/24] fix(orchestrator): resolve None future on swallowed
 errors; add missing tests

- Move tool_node_stats None guard before node_exec_future.set_result so
  that when on_node_execution returns None (swallowed by @async_error_logged),
  the future carries set_exception(RuntimeError) rather than set_result(None),
  giving the tracking system an accurate error state
- Remove redundant `tool_node_stats is not None` check that was dead code
  after the early-return guard was added
- Add explanatory comment in _charge_extra_iterations_sync docstring explaining
  why the block lookup is intentionally repeated rather than cached from
  _charge_usage (two separate thread-pool workers, no shared mutable state)
- Add assertion to test_on_node_execution_charges_extra_iterations_when_gate_passes
  verifying _handle_low_balance is called when extra_cost > 0
- Add test_on_node_execution_failed_ibe_sends_notification covering the
  FAILED + InsufficientBalanceError path in on_node_execution (lines 822-836)
  that was previously untested
---
 .../backend/backend/blocks/orchestrator.py    | 29 +++++---
 .../test_orchestrator_per_iteration_cost.py   | 74 +++++++++++++++++++
 .../backend/backend/executor/manager.py       | 12 ++-
 3 files changed, 102 insertions(+), 13 deletions(-)

diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py
index 5262f8eb1d..1c40a7df96 100644
--- a/autogpt_platform/backend/backend/blocks/orchestrator.py
+++ b/autogpt_platform/backend/backend/blocks/orchestrator.py
@@ -1128,6 +1128,12 @@ class OrchestratorBlock(Block):
             # Execute the node directly since we're in the Orchestrator context.
             # Wrap in try/except so the future is always resolved, even on
             # error — an unresolved Future would block anything awaiting it.
+            #
+            # on_node_execution is decorated with @async_error_logged(swallow=True),
+            # which catches BaseException and returns None rather than raising.
+            # Treat a None return as a failure: set_exception so the future
+            # carries an error state rather than a None result, and return an
+            # error response so the LLM knows the tool failed.
             try:
                 tool_node_stats = await execution_processor.on_node_execution(
                     node_exec=node_exec_entry,
@@ -1135,20 +1141,24 @@ class OrchestratorBlock(Block):
                     nodes_input_masks=None,
                     graph_stats_pair=graph_stats_pair,
                 )
+                if tool_node_stats is None:
+                    nil_err = RuntimeError(
+                        f"on_node_execution returned None for node {sink_node_id} "
+                        "(error was swallowed by @async_error_logged)"
+                    )
+                    node_exec_future.set_exception(nil_err)
+                    resp = _create_tool_response(
+                        tool_call.id,
+                        "Tool execution returned no result",
+                        responses_api=responses_api,
+                    )
+                    resp["_is_error"] = True
+                    return resp
                 node_exec_future.set_result(tool_node_stats)
             except Exception as exec_err:
                 node_exec_future.set_exception(exec_err)
                 raise
 
-            if tool_node_stats is None:
-                resp = _create_tool_response(
-                    tool_call.id,
-                    "Tool execution returned no result",
-                    responses_api=responses_api,
-                )
-                resp["_is_error"] = True
-                return resp
-
             # Charge user credits AFTER successful tool execution. Tools
             # spawned by the orchestrator bypass the main execution queue
             # (where _charge_usage is called), so we must charge here to
@@ -1165,7 +1175,6 @@ class OrchestratorBlock(Block):
             # by the generic tool-error handler below.
             if (
                 not execution_params.execution_context.dry_run
-                and tool_node_stats is not None
                 and tool_node_stats.error is None
             ):
                 try:
diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
index c703e32c71..611ab54fac 100644
--- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
@@ -540,6 +540,9 @@ async def test_on_node_execution_charges_extra_iterations_when_gate_passes(
         graph_stats_pair=stats_pair,
     )
     assert calls["charge_extra_iterations"] == [2]
+    # _handle_low_balance must be called with the remaining balance returned by
+    # charge_extra_iterations (500) so users are alerted when balance drops low.
+    assert len(calls["handle_low_balance"]) == 1
 
 
 @pytest.mark.asyncio
@@ -832,3 +835,74 @@ async def test_tool_execution_insufficient_balance_propagates():
         charge_node_usage_mock=raising_charge,
     )
     assert isinstance(raised, InsufficientBalanceError)
+
+
+# ── on_node_execution FAILED + InsufficientBalanceError notification ──
+
+
+@pytest.mark.asyncio
+async def test_on_node_execution_failed_ibe_sends_notification(
+    monkeypatch,
+    gated_processor,
+):
+    """When status == FAILED and execution_stats.error is InsufficientBalanceError,
+    _handle_insufficient_funds_notif must be called.
+
+    This path fires when a nested tool charge inside the orchestrator raises
+    InsufficientBalanceError, which propagates out of the block's run() generator
+    and is caught by _on_node_execution's broad except, setting status=FAILED and
+    execution_stats.error=IBE. on_node_execution's post-execution block then
+    sends the user notification so they understand why the run stopped.
+    """
+    from backend.data.execution import ExecutionStatus
+    from backend.executor import manager
+    from backend.util.exceptions import InsufficientBalanceError
+
+    proc, calls, inner, fake_db, NodeExecutionStats = gated_processor
+    ibe = InsufficientBalanceError(
+        user_id="u",
+        message="Insufficient balance",
+        balance=0,
+        amount=30,
+    )
+
+    # Simulate _on_node_execution returning FAILED with IBE in stats.error.
+    async def fake_inner_failed(
+        self,
+        *,
+        node,
+        node_exec,
+        node_exec_progress,
+        stats,
+        db_client,
+        log_metadata,
+        nodes_input_masks=None,
+        nodes_to_skip=None,
+    ):
+        stats.error = ibe
+        return MagicMock(wall_time=0.1, cpu_time=0.1), ExecutionStatus.FAILED
+
+    monkeypatch.setattr(
+        manager.ExecutionProcessor,
+        "_on_node_execution",
+        fake_inner_failed,
+    )
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=0))
+
+    stats_pair = (
+        MagicMock(
+            node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0
+        ),
+        threading.Lock(),
+    )
+    await proc.on_node_execution(
+        node_exec=_make_node_exec(dry_run=False),
+        node_exec_progress=MagicMock(),
+        nodes_input_masks=None,
+        graph_stats_pair=stats_pair,
+    )
+    # The notification must have fired so the user knows why their run stopped.
+    assert len(calls["handle_insufficient_funds_notif"]) == 1
+    assert calls["handle_insufficient_funds_notif"][0]["user_id"] == "u"
+    # charge_extra_iterations must NOT be called — status is FAILED.
+    assert calls["charge_extra_iterations"] == []
diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py
index ca66fad63a..68f0ab7fb4 100644
--- a/autogpt_platform/backend/backend/executor/manager.py
+++ b/autogpt_platform/backend/backend/executor/manager.py
@@ -753,8 +753,7 @@ class ExecutionProcessor:
                     )
                 except Exception as notif_error:  # pragma: no cover
                     log_metadata.warning(
-                        f"Failed to send insufficient funds notification: "
-                        f"{notif_error}"
+                        f"Failed to send insufficient funds notification: {notif_error}"
                     )
             except Exception as e:
                 # Unexpected billing failure (DB outage, network, etc.).
@@ -832,7 +831,7 @@ class ExecutionProcessor:
                 )
             except Exception as notif_error:  # pragma: no cover
                 log_metadata.warning(
-                    f"Failed to send insufficient funds notification: " f"{notif_error}"
+                    f"Failed to send insufficient funds notification: {notif_error}"
                 )
 
         return execution_stats
@@ -1153,6 +1152,13 @@ class ExecutionProcessor:
 
         Called only from :meth:`charge_extra_iterations`. Do not call
         directly from async code.
+
+        Note: ``_resolve_block_cost`` is called again here (rather than
+        reusing the result from ``_charge_usage`` at the start of execution)
+        because the two calls happen in separate thread-pool workers and
+        sharing mutable state across workers would require locks. The block
+        config is immutable during a run, so the repeated lookup is safe and
+        produces the same cost; the only overhead is an extra registry lookup.
         """
         db_client = get_db_client()
         block, cost, matching_filter = self._resolve_block_cost(node_exec)

From e901b64bedc1e2cbb5ac784530f32ca788ee453f Mon Sep 17 00:00:00 2001
From: majdyz <zamil.majdy@agpt.co>
Date: Mon, 13 Apr 2026 04:22:03 +0000
Subject: [PATCH 22/24] fix(test): fix _handle_low_balance mock signature to
 accept positional args

The gated_processor fixture's fake_low_balance mock used **kwargs, but
production code calls _handle_low_balance with positional args via
asyncio.to_thread. This caused a silent TypeError caught by the broad
except handler, making the handle_low_balance assertion fail (0 calls
instead of 1). Updated mock to match the actual method signature.
---
 .../test/test_orchestrator_per_iteration_cost.py     | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
index 611ab54fac..f166093d4c 100644
--- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
@@ -492,8 +492,16 @@ def gated_processor(monkeypatch):
         fake_charge_extra,
     )
 
-    def fake_low_balance(self, **kwargs):
-        calls["handle_low_balance"].append(kwargs)
+    def fake_low_balance(
+        self, db_client, user_id, current_balance, transaction_cost
+    ):
+        calls["handle_low_balance"].append(
+            {
+                "user_id": user_id,
+                "current_balance": current_balance,
+                "transaction_cost": transaction_cost,
+            }
+        )
 
     monkeypatch.setattr(
         manager.ExecutionProcessor,

From 5ff46ff2075209781c3d62d5d4b8670d47816b79 Mon Sep 17 00:00:00 2001
From: majdyz <zamil.majdy@agpt.co>
Date: Mon, 13 Apr 2026 06:44:20 +0000
Subject: [PATCH 23/24] fix(backend): address review feedback on orchestrator
 billing

- Extract post-execution billing into _handle_post_execution_billing()
- Deduplicate IBE notification into _try_send_insufficient_funds_notif()
- Combine _charge_usage + _handle_low_balance into single thread dispatch
- Sanitize error messages to LLM (no internal details leaked)
- Default _is_error to True (fail-closed) for tool responses
- Add IBE propagation contract to OrchestratorBlock class docstring
- Reduce per-site IBE comments to one-liners referencing class docstring
- Fix _resolve_block_cost return type annotation (Block | None)
- Move test imports to module level, fix test_default_block_returns_zero
- Add tests for non-IBE billing failure and _charge_usage(count=0)
- Fix Black formatting (CI lint blocker)
---
 .../backend/backend/blocks/orchestrator.py    |  58 ++--
 .../test_orchestrator_per_iteration_cost.py   | 189 ++++++++-----
 .../backend/backend/executor/manager.py       | 256 +++++++++---------
 3 files changed, 278 insertions(+), 225 deletions(-)

diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py
index 1c40a7df96..34b4929879 100644
--- a/autogpt_platform/backend/backend/blocks/orchestrator.py
+++ b/autogpt_platform/backend/backend/blocks/orchestrator.py
@@ -365,9 +365,15 @@ def _disambiguate_tool_names(tools: list[dict[str, Any]]) -> None:
 
 
 class OrchestratorBlock(Block):
-    """
-    A block that uses a language model to orchestrate tool calls, supporting both
-    single-shot and iterative agent mode execution.
+    """A block that uses a language model to orchestrate tool calls.
+
+    Supports both single-shot and iterative agent mode execution.
+
+    **InsufficientBalanceError propagation contract**: ``InsufficientBalanceError``
+    (IBE) must always re-raise through every ``except`` block in this class.
+    Swallowing IBE would let the agent loop continue with unpaid work. Every
+    exception handler that catches ``Exception`` includes an explicit IBE
+    re-raise carve-out for this reason.
     """
 
     def extra_credit_charges(self, execution_stats: NodeExecutionStats) -> int:
@@ -1182,8 +1188,12 @@ class OrchestratorBlock(Block):
                         node_exec_entry,
                     )
                 except InsufficientBalanceError:
+                    # IBE must propagate — see OrchestratorBlock class docstring.
                     raise
                 except Exception:
+                    # Non-billing charge failures (DB outage, network, etc.)
+                    # are logged with full traceback but surfaced to the LLM
+                    # as a generic error to avoid leaking infrastructure details.
                     logger.exception(
                         "Unexpected error charging for tool node %s",
                         sink_node_id,
@@ -1210,17 +1220,15 @@ class OrchestratorBlock(Block):
             return resp
 
         except InsufficientBalanceError:
-            # Don't downgrade billing failures into tool errors — let the
-            # orchestrator's outer error handling stop the run cleanly,
-            # mirroring the behaviour of the main execution queue. Also
-            # prevents leaking exact balance amounts to the LLM context.
+            # IBE must propagate — see class docstring.
             raise
         except Exception as e:
-            logger.warning("Tool execution with manager failed: %s", e)
-            # Return error response
+            logger.warning("Tool execution with manager failed: %s", e, exc_info=True)
+            # Return a generic error to the LLM — internal exception messages
+            # may contain server paths, DB details, or infrastructure info.
             resp = _create_tool_response(
                 tool_call.id,
-                f"Tool execution failed: {e}",
+                "Tool execution failed due to an internal error",
                 responses_api=responses_api,
             )
             resp["_is_error"] = True
@@ -1324,7 +1332,7 @@ class OrchestratorBlock(Block):
                 content = str(raw_content)
             else:
                 content = "Tool executed successfully"
-            tool_failed = result.get("_is_error", False)
+            tool_failed = result.get("_is_error", True)
             return ToolCallResult(
                 tool_call_id=tool_call.id,
                 tool_name=tool_call.name,
@@ -1332,11 +1340,7 @@ class OrchestratorBlock(Block):
                 is_error=tool_failed,
             )
         except InsufficientBalanceError:
-            # Billing failures must stop the agent loop cleanly — do NOT
-            # downgrade them into a tool error that gets fed back to the
-            # LLM. Re-raise so the orchestrator's outer error handling
-            # halts the run (mirrors main execution queue behaviour) and
-            # avoids leaking exact balance amounts into LLM context.
+            # IBE must propagate — see class docstring.
             raise
         except Exception as e:
             logger.error("Tool execution failed: %s", e)
@@ -1458,11 +1462,7 @@ class OrchestratorBlock(Block):
                         },
                     )
         except InsufficientBalanceError:
-            # Billing failures must propagate out of the block so the
-            # executor's billing-leak handling fires (error recording on
-            # execution_stats, user notification, structured logging).
-            # Do NOT downgrade to a user-visible "error" output — that
-            # would swallow the failure and leak the exact balance amount.
+            # IBE must propagate — see class docstring.
             raise
         except Exception as e:
             # Catch all OTHER errors (validation, network, API) so that
@@ -1545,17 +1545,13 @@ class OrchestratorBlock(Block):
                             text = content
                         else:
                             text = json.dumps(content)
-                        tool_failed = result.get("_is_error", False)
+                        tool_failed = result.get("_is_error", True)
                         return {
                             "content": [{"type": "text", "text": text}],
                             "isError": tool_failed,
                         }
                     except InsufficientBalanceError:
-                        # Same carve-out as _agent_mode_tool_executor:
-                        # billing failures must propagate to stop the run
-                        # rather than be fed back to the LLM as a tool
-                        # error (which would leak balance amounts and let
-                        # the loop continue consuming unbillable work).
+                        # IBE must propagate — see class docstring.
                         raise
                     except Exception as e:
                         logger.error("SDK tool execution failed: %s", e)
@@ -1836,12 +1832,8 @@ class OrchestratorBlock(Block):
                         except (asyncio.CancelledError, StopAsyncIteration):
                             pass
         except InsufficientBalanceError:
-            # Billing failures must propagate so the executor's
-            # billing-leak handling fires (error recording, user
-            # notification, structured logging). Mirrors the carve-out
-            # in _execute_tools_agent_mode — do not downgrade to a
-            # user-visible error output. The `finally` block below
-            # still runs and records partial token usage.
+            # IBE must propagate — see class docstring. The `finally`
+            # block below still runs and records partial token usage.
             raise
         except Exception as e:
             # Surface OTHER SDK errors as user-visible output instead
diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
index f166093d4c..3dc9e9b9ae 100644
--- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
@@ -11,63 +11,50 @@ from unittest.mock import AsyncMock, MagicMock
 
 import pytest
 
-from backend.blocks.orchestrator import OrchestratorBlock
+from backend.blocks._base import Block
+from backend.blocks.orchestrator import ExecutionParams, OrchestratorBlock
+from backend.data.execution import ExecutionContext, ExecutionStatus
+from backend.data.model import NodeExecutionStats
+from backend.executor import manager
+from backend.util.exceptions import InsufficientBalanceError
 
 # ── extra_credit_charges hook ────────────────────────────────────────
 
 
+class _NoOpBlock(Block):
+    """Minimal concrete Block subclass that does not override extra_credit_charges."""
+
+    def __init__(self):
+        super().__init__(id="noop-block", description="No-op test block")
+
+    def run(self, input_data, **kwargs):  # type: ignore[override]
+        yield "out", {}
+
+
 class TestExtraCreditCharges:
     """OrchestratorBlock opts into per-LLM-call billing via extra_credit_charges."""
 
     def test_orchestrator_returns_nonzero_for_multiple_calls(self):
-        from backend.data.model import NodeExecutionStats
-
         block = OrchestratorBlock()
         stats = NodeExecutionStats(llm_call_count=3)
         assert block.extra_credit_charges(stats) == 2
 
     def test_orchestrator_returns_zero_for_single_call(self):
-        from backend.data.model import NodeExecutionStats
-
         block = OrchestratorBlock()
         stats = NodeExecutionStats(llm_call_count=1)
         assert block.extra_credit_charges(stats) == 0
 
     def test_orchestrator_returns_zero_for_zero_calls(self):
-        from backend.data.model import NodeExecutionStats
-
         block = OrchestratorBlock()
         stats = NodeExecutionStats(llm_call_count=0)
         assert block.extra_credit_charges(stats) == 0
 
     def test_default_block_returns_zero(self):
-        from backend.data.model import NodeExecutionStats
-
-        # Use a concrete block (not the abstract Block base) to verify the
-        # default implementation returns 0.
-        block = OrchestratorBlock()
-        stats = NodeExecutionStats(llm_call_count=0)
-        # When llm_call_count=0, extra_credit_charges should clamp to 0.
+        """A block that does not override extra_credit_charges returns 0."""
+        block = _NoOpBlock()
+        stats = NodeExecutionStats(llm_call_count=10)
         assert block.extra_credit_charges(stats) == 0
 
-        # Also verify via Block.extra_credit_charges directly (method-level
-        # check) by calling the unbound method on an OrchestratorBlock
-        # instance with the base implementation patched out.
-        from unittest.mock import patch
-
-        from backend.blocks._base import Block
-
-        with patch.object(
-            OrchestratorBlock,
-            "extra_credit_charges",
-            Block.extra_credit_charges,
-        ):
-            base_block = OrchestratorBlock()
-            assert (
-                base_block.extra_credit_charges(NodeExecutionStats(llm_call_count=10))
-                == 0
-            )
-
 
 # ── charge_extra_iterations math ───────────────────────────────────
 
@@ -91,9 +78,11 @@ def patched_processor(monkeypatch):
 
     Returns the processor and a list of credit amounts spent so tests can
     assert on what was charged.
-    """
-    from backend.executor import manager
 
+    Note: ``ExecutionProcessor.__new__()`` bypasses ``__init__`` — if
+    ``__init__`` gains required state in the future this fixture will need
+    updating.
+    """
     spent: list[int] = []
 
     class FakeDb:
@@ -156,7 +145,6 @@ class TestChargeExtraIterations:
     @pytest.mark.asyncio
     async def test_capped_at_max(self, monkeypatch, fake_node_exec):
         """Runaway llm_call_count is capped at _MAX_EXTRA_ITERATIONS."""
-        from backend.executor import manager
 
         spent: list[int] = []
 
@@ -187,7 +175,6 @@ class TestChargeExtraIterations:
 
     @pytest.mark.asyncio
     async def test_zero_base_cost_skips_charge(self, monkeypatch, fake_node_exec):
-        from backend.executor import manager
 
         spent: list[int] = []
 
@@ -215,7 +202,6 @@ class TestChargeExtraIterations:
 
     @pytest.mark.asyncio
     async def test_block_not_found_skips_charge(self, monkeypatch, fake_node_exec):
-        from backend.executor import manager
 
         spent: list[int] = []
 
@@ -243,8 +229,6 @@ class TestChargeExtraIterations:
         self, monkeypatch, fake_node_exec
     ):
         """Out-of-credits errors must propagate, not be silently swallowed."""
-        from backend.executor import manager
-        from backend.util.exceptions import InsufficientBalanceError
 
         class FakeDb:
             def spend_credits(self, *, user_id, cost, metadata):
@@ -280,7 +264,6 @@ class TestChargeNodeUsage:
         self, monkeypatch, fake_node_exec
     ):
         """Nested tool charges should NOT inflate the per-execution counter."""
-        from backend.executor import manager
 
         captured: dict = {}
 
@@ -313,7 +296,6 @@ class TestChargeNodeUsage:
         self, monkeypatch, fake_node_exec
     ):
         """charge_node_usage should call _handle_low_balance when total_cost > 0."""
-        from backend.executor import manager
 
         low_balance_calls: list[dict] = []
 
@@ -353,7 +335,6 @@ class TestChargeNodeUsage:
         self, monkeypatch, fake_node_exec
     ):
         """charge_node_usage should NOT call _handle_low_balance when cost is 0."""
-        from backend.executor import manager
 
         low_balance_calls: list = []
 
@@ -418,9 +399,6 @@ def gated_processor(monkeypatch):
     llm_call_count, dry_run) and observe whether charge_extra_iterations
     was called.
     """
-    from backend.data.execution import ExecutionStatus
-    from backend.data.model import NodeExecutionStats
-    from backend.executor import manager
 
     calls: dict[str, list] = {
         "charge_extra_iterations": [],
@@ -492,9 +470,7 @@ def gated_processor(monkeypatch):
         fake_charge_extra,
     )
 
-    def fake_low_balance(
-        self, db_client, user_id, current_balance, transaction_cost
-    ):
+    def fake_low_balance(self, db_client, user_id, current_balance, transaction_cost):
         calls["handle_low_balance"].append(
             {
                 "user_id": user_id,
@@ -528,7 +504,6 @@ async def test_on_node_execution_charges_extra_iterations_when_gate_passes(
     gated_processor,
 ):
     """COMPLETED + extra_credit_charges > 0 + not dry_run → charged."""
-    from backend.data.execution import ExecutionStatus
 
     proc, calls, inner, fake_db, _ = gated_processor
     inner["status"] = ExecutionStatus.COMPLETED
@@ -555,7 +530,6 @@ async def test_on_node_execution_charges_extra_iterations_when_gate_passes(
 
 @pytest.mark.asyncio
 async def test_on_node_execution_skips_when_status_not_completed(gated_processor):
-    from backend.data.execution import ExecutionStatus
 
     proc, calls, inner, fake_db, _ = gated_processor
     inner["status"] = ExecutionStatus.FAILED
@@ -579,7 +553,6 @@ async def test_on_node_execution_skips_when_status_not_completed(gated_processor
 
 @pytest.mark.asyncio
 async def test_on_node_execution_skips_when_extra_charges_zero(gated_processor):
-    from backend.data.execution import ExecutionStatus
 
     proc, calls, inner, fake_db, _ = gated_processor
     inner["status"] = ExecutionStatus.COMPLETED
@@ -604,7 +577,6 @@ async def test_on_node_execution_skips_when_extra_charges_zero(gated_processor):
 
 @pytest.mark.asyncio
 async def test_on_node_execution_skips_when_dry_run(gated_processor):
-    from backend.data.execution import ExecutionStatus
 
     proc, calls, inner, fake_db, _ = gated_processor
     inner["status"] = ExecutionStatus.COMPLETED
@@ -640,9 +612,6 @@ async def test_on_node_execution_insufficient_balance_records_error_and_notifies
     - _handle_insufficient_funds_notif is called so the user is notified
     - the structured ERROR log is the alerting hook
     """
-    from backend.data.execution import ExecutionStatus
-    from backend.executor import manager
-    from backend.util.exceptions import InsufficientBalanceError
 
     proc, calls, inner, fake_db, _ = gated_processor
     inner["status"] = ExecutionStatus.COMPLETED
@@ -698,13 +667,9 @@ async def _run_tool_exec_with_stats(
     Used to prove the dry_run and error guards around charge_node_usage
     behave as documented, and that InsufficientBalanceError propagates.
     """
-    import threading
     from collections import defaultdict
     from unittest.mock import AsyncMock, MagicMock, patch
 
-    from backend.blocks.orchestrator import ExecutionParams, OrchestratorBlock
-    from backend.data.execution import ExecutionContext
-
     block = OrchestratorBlock()
 
     # Mocked async DB client used inside orchestrator.
@@ -828,10 +793,6 @@ async def test_tool_execution_insufficient_balance_propagates():
     If this leaked into a ToolCallResult the LLM loop would keep running
     with 'tool failed' errors and the user would get unpaid work.
     """
-    from unittest.mock import AsyncMock
-
-    from backend.util.exceptions import InsufficientBalanceError
-
     raising_charge = AsyncMock(
         side_effect=InsufficientBalanceError(
             user_id="u", message="nope", balance=0, amount=10
@@ -862,9 +823,6 @@ async def test_on_node_execution_failed_ibe_sends_notification(
     execution_stats.error=IBE. on_node_execution's post-execution block then
     sends the user notification so they understand why the run stopped.
     """
-    from backend.data.execution import ExecutionStatus
-    from backend.executor import manager
-    from backend.util.exceptions import InsufficientBalanceError
 
     proc, calls, inner, fake_db, NodeExecutionStats = gated_processor
     ibe = InsufficientBalanceError(
@@ -914,3 +872,102 @@ async def test_on_node_execution_failed_ibe_sends_notification(
     assert calls["handle_insufficient_funds_notif"][0]["user_id"] == "u"
     # charge_extra_iterations must NOT be called — status is FAILED.
     assert calls["charge_extra_iterations"] == []
+
+
+# ── Billing leak: non-IBE exception during extra-iteration charging ──
+
+
+@pytest.mark.asyncio
+async def test_on_node_execution_non_ibe_billing_failure_keeps_completed(
+    monkeypatch,
+    gated_processor,
+):
+    """When charge_extra_iterations raises a non-IBE exception (e.g. DB outage):
+
+    - execution_stats.error stays None (node ran to completion)
+    - status stays COMPLETED (work already done)
+    - the billing_leak error is logged but does not corrupt execution_stats
+    """
+    proc, calls, inner, fake_db, _ = gated_processor
+    inner["status"] = ExecutionStatus.COMPLETED
+    inner["llm_call_count"] = 4
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=3))
+
+    async def raise_conn_error(self, node_exec, extra_iterations):
+        raise ConnectionError("DB connection lost")
+
+    monkeypatch.setattr(
+        manager.ExecutionProcessor, "charge_extra_iterations", raise_conn_error
+    )
+
+    stats_pair = (
+        MagicMock(
+            node_count=0,
+            nodes_cputime=0,
+            nodes_walltime=0,
+            cost=0,
+            node_error_count=0,
+        ),
+        threading.Lock(),
+    )
+    result_stats = await proc.on_node_execution(
+        node_exec=_make_node_exec(dry_run=False),
+        node_exec_progress=MagicMock(),
+        nodes_input_masks=None,
+        graph_stats_pair=stats_pair,
+    )
+    # error stays None — node completed, only billing failed.
+    assert result_stats.error is None
+    # No notification was sent (only IBE triggers notification).
+    assert len(calls["handle_insufficient_funds_notif"]) == 0
+
+
+# ── _charge_usage with execution_count=0 ──
+
+
+class TestChargeUsageZeroExecutionCount:
+    """Verify _charge_usage(node_exec, 0) does not invoke execution_usage_cost."""
+
+    def test_execution_count_zero_skips_execution_tier(self, monkeypatch):
+        """_charge_usage with execution_count=0 must not call execution_usage_cost."""
+        execution_tier_called = []
+
+        def fake_execution_usage_cost(count):
+            execution_tier_called.append(count)
+            return (100, count)
+
+        spent: list[int] = []
+
+        class FakeDb:
+            def spend_credits(self, *, user_id, cost, metadata):
+                spent.append(cost)
+                return 500
+
+        fake_block = MagicMock()
+        fake_block.name = "FakeBlock"
+
+        monkeypatch.setattr(manager, "get_db_client", lambda: FakeDb())
+        monkeypatch.setattr(manager, "get_block", lambda block_id: fake_block)
+        monkeypatch.setattr(
+            manager,
+            "block_usage_cost",
+            lambda block, input_data, **_kw: (10, {}),
+        )
+        monkeypatch.setattr(manager, "execution_usage_cost", fake_execution_usage_cost)
+
+        proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+        ne = MagicMock()
+        ne.user_id = "u"
+        ne.graph_exec_id = "ge"
+        ne.graph_id = "g"
+        ne.node_exec_id = "ne"
+        ne.node_id = "n"
+        ne.block_id = "b"
+        ne.inputs = {}
+
+        total_cost, remaining = proc._charge_usage(ne, 0)
+        assert total_cost == 10  # block cost only
+        assert remaining == 500
+        assert spent == [10]
+        # execution_usage_cost must NOT have been called
+        assert execution_tier_called == []
diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py
index 68f0ab7fb4..6e4b3e2aad 100644
--- a/autogpt_platform/backend/backend/executor/manager.py
+++ b/autogpt_platform/backend/backend/executor/manager.py
@@ -19,7 +19,7 @@ from sentry_sdk.api import flush as _sentry_flush
 from sentry_sdk.api import get_current_scope as _sentry_get_current_scope
 
 from backend.blocks import get_block
-from backend.blocks._base import BlockSchema
+from backend.blocks._base import Block, BlockSchema
 from backend.blocks.agent import AgentExecutorBlock
 from backend.blocks.io import AgentOutputBlock
 from backend.blocks.mcp.block import MCPToolBlock
@@ -681,100 +681,9 @@ class ExecutionProcessor:
         execution_stats.walltime = timing_info.wall_time
         execution_stats.cputime = timing_info.cpu_time
 
-        # Charge extra iterations for blocks that opt into per-LLM-call
-        # billing (e.g. OrchestratorBlock in agent mode). The first call
-        # is already covered by _charge_usage(); each additional LLM call
-        # costs another base_cost. Skipped for dry runs and failed runs.
-        #
-        # InsufficientBalanceError here is a post-hoc billing leak — the
-        # work is already done but the user can no longer pay. We:
-        #   1. log at ERROR with structured fields so alerting can catch it
-        #   2. fire _handle_insufficient_funds_notif so the user is
-        #      notified (mirrors the main queue path at ~line 1254)
-        # The run itself is kept COMPLETED (the block's outputs are
-        # already committed) — matching the documented "billing leak"
-        # contract rather than retroactively failing a successful run.
-        extra_iterations = (
-            node.block.extra_credit_charges(execution_stats)
-            if status == ExecutionStatus.COMPLETED
-            and not node_exec.execution_context.dry_run
-            else 0
+        await self._handle_post_execution_billing(
+            node, node_exec, execution_stats, status, log_metadata
         )
-        if extra_iterations > 0:
-            try:
-                extra_cost, remaining_balance = await self.charge_extra_iterations(
-                    node_exec,
-                    extra_iterations,
-                )
-                if extra_cost > 0:
-                    execution_stats.extra_cost += extra_cost
-                    # Mirror the low-balance notification path in
-                    # ``charge_node_usage`` so users are alerted when
-                    # their balance crosses the threshold from these
-                    # extra-iteration charges (sentry HIGH/MEDIUM bug
-                    # PRRT_kwDOJKSTjM56Mibw).
-                    await asyncio.to_thread(
-                        self._handle_low_balance,
-                        get_db_client(),
-                        node_exec.user_id,
-                        remaining_balance,
-                        extra_cost,
-                    )
-            except InsufficientBalanceError as e:
-                log_metadata.error(
-                    "billing_leak: insufficient balance after "
-                    f"{node.block.name} completed {extra_iterations} "
-                    f"extra iterations",
-                    extra={
-                        "billing_leak": True,
-                        "user_id": node_exec.user_id,
-                        "graph_id": node_exec.graph_id,
-                        "block_id": node_exec.block_id,
-                        "extra_iterations": extra_iterations,
-                        "error": str(e),
-                    },
-                )
-                # NOTE: Do NOT set execution_stats.error here. The node ran
-                # to completion — only the post-hoc charge failed. Setting
-                # .error would (a) flip node_error_count below, creating an
-                # "errored COMPLETED node" inconsistency in the metrics, and
-                # (b) leak balance amounts into the persisted node_stats.
-                # The structured ERROR log above is the alerting hook;
-                # node_stats stays clean.
-                # Notify the user they're out of credits. Runs through
-                # Redis dedup (per user+graph) so repeat runs don't spam.
-                try:
-                    await asyncio.to_thread(
-                        self._handle_insufficient_funds_notif,
-                        get_db_client(),
-                        node_exec.user_id,
-                        node_exec.graph_id,
-                        e,
-                    )
-                except Exception as notif_error:  # pragma: no cover
-                    log_metadata.warning(
-                        f"Failed to send insufficient funds notification: {notif_error}"
-                    )
-            except Exception as e:
-                # Unexpected billing failure (DB outage, network, etc.).
-                # Log at ERROR with structured fields and the same
-                # `billing_leak: True` marker so monitoring catches it
-                # alongside InsufficientBalanceError.  The run is kept
-                # COMPLETED because the work is already done.
-                log_metadata.error(
-                    f"billing_leak: failed to charge extra iterations "
-                    f"for {node.block.name}",
-                    extra={
-                        "billing_leak": True,
-                        "user_id": node_exec.user_id,
-                        "graph_id": node_exec.graph_id,
-                        "block_id": node_exec.block_id,
-                        "extra_iterations": extra_iterations,
-                        "error_type": type(e).__name__,
-                        "error": str(e),
-                    },
-                    exc_info=True,
-                )
 
         graph_stats, graph_stats_lock = graph_stats_pair
         with graph_stats_lock:
@@ -811,31 +720,121 @@ class ExecutionProcessor:
                 db_client=db_client,
             )
 
-        # If the node failed because a nested tool charge raised
-        # InsufficientBalanceError (orchestrator agent mode), the main
-        # queue's _charge_usage IBE notification path was bypassed (the
-        # initial charge succeeded — only the nested tool charge failed).
-        # Send the user notification here so they understand why their
-        # agent run stopped.  Failures are logged but not raised so the
-        # node-execution path stays clean.
+        # If the node failed because a nested tool charge raised IBE,
+        # send the user notification so they understand why the run stopped.
         if status == ExecutionStatus.FAILED and isinstance(
             execution_stats.error, InsufficientBalanceError
         ):
-            try:
-                await asyncio.to_thread(
-                    self._handle_insufficient_funds_notif,
-                    get_db_client(),
-                    node_exec.user_id,
-                    node_exec.graph_id,
-                    execution_stats.error,
-                )
-            except Exception as notif_error:  # pragma: no cover
-                log_metadata.warning(
-                    f"Failed to send insufficient funds notification: {notif_error}"
-                )
+            await self._try_send_insufficient_funds_notif(
+                node_exec.user_id,
+                node_exec.graph_id,
+                execution_stats.error,
+                log_metadata,
+            )
 
         return execution_stats
 
+    async def _try_send_insufficient_funds_notif(
+        self,
+        user_id: str,
+        graph_id: str,
+        error: InsufficientBalanceError,
+        log_metadata: LogMetadata,
+    ) -> None:
+        """Send an insufficient-funds notification, swallowing failures."""
+        try:
+            await asyncio.to_thread(
+                self._handle_insufficient_funds_notif,
+                get_db_client(),
+                user_id,
+                graph_id,
+                error,
+            )
+        except Exception as notif_error:  # pragma: no cover
+            log_metadata.warning(
+                f"Failed to send insufficient funds notification: {notif_error}"
+            )
+
+    async def _handle_post_execution_billing(
+        self,
+        node: Node,
+        node_exec: NodeExecutionEntry,
+        execution_stats: NodeExecutionStats,
+        status: ExecutionStatus,
+        log_metadata: LogMetadata,
+    ) -> None:
+        """Charge extra iterations for blocks that opt into per-LLM-call billing.
+
+        The first LLM call is already covered by ``_charge_usage()``; each
+        additional call costs another ``base_cost``. Skipped for dry runs and
+        failed runs.
+
+        InsufficientBalanceError here is a post-hoc billing leak: the work is
+        already done but the user can no longer pay. The run stays COMPLETED and
+        the error is logged with ``billing_leak: True`` for alerting.
+        """
+        extra_iterations = (
+            node.block.extra_credit_charges(execution_stats)
+            if status == ExecutionStatus.COMPLETED
+            and not node_exec.execution_context.dry_run
+            else 0
+        )
+        if extra_iterations <= 0:
+            return
+
+        try:
+            extra_cost, remaining_balance = await self.charge_extra_iterations(
+                node_exec,
+                extra_iterations,
+            )
+            if extra_cost > 0:
+                execution_stats.extra_cost += extra_cost
+                await asyncio.to_thread(
+                    self._handle_low_balance,
+                    get_db_client(),
+                    node_exec.user_id,
+                    remaining_balance,
+                    extra_cost,
+                )
+        except InsufficientBalanceError as e:
+            log_metadata.error(
+                "billing_leak: insufficient balance after "
+                f"{node.block.name} completed {extra_iterations} "
+                f"extra iterations",
+                extra={
+                    "billing_leak": True,
+                    "user_id": node_exec.user_id,
+                    "graph_id": node_exec.graph_id,
+                    "block_id": node_exec.block_id,
+                    "extra_iterations": extra_iterations,
+                    "error": str(e),
+                },
+            )
+            # Do NOT set execution_stats.error — the node ran to completion,
+            # only the post-hoc charge failed. See class-level billing-leak
+            # contract documentation.
+            await self._try_send_insufficient_funds_notif(
+                node_exec.user_id,
+                node_exec.graph_id,
+                e,
+                log_metadata,
+            )
+        except Exception as e:
+            log_metadata.error(
+                f"billing_leak: failed to charge extra iterations "
+                f"for {node.block.name}",
+                extra={
+                    "billing_leak": True,
+                    "user_id": node_exec.user_id,
+                    "graph_id": node_exec.graph_id,
+                    "block_id": node_exec.block_id,
+                    "extra_iterations": extra_iterations,
+                    "error_type": type(e).__name__,
+                    "error": str(e),
+                },
+                exc_info=True,
+            )
+
     @async_time_measured
     async def _on_node_execution(
         self,
@@ -1065,7 +1064,7 @@ class ExecutionProcessor:
     def _resolve_block_cost(
         self,
         node_exec: NodeExecutionEntry,
-    ) -> tuple[Any, int, dict]:
+    ) -> tuple[Block | None, int, dict]:
         """Look up the block and compute its base usage cost for an exec.
 
         Shared by :meth:`_charge_usage` and :meth:`charge_extra_iterations`
@@ -1211,6 +1210,22 @@ class ExecutionProcessor:
             self._charge_extra_iterations_sync, node_exec, capped
         )
 
+    def _charge_and_check_balance(
+        self,
+        node_exec: NodeExecutionEntry,
+    ) -> tuple[int, int]:
+        """Charge usage and check low balance in a single thread-pool worker.
+
+        Combines ``_charge_usage`` and ``_handle_low_balance`` to avoid
+        dispatching two thread-pool calls per tool execution.
+        """
+        total_cost, remaining = self._charge_usage(node_exec, 0)
+        if total_cost > 0:
+            self._handle_low_balance(
+                get_db_client(), node_exec.user_id, remaining, total_cost
+            )
+        return total_cost, remaining
+
     async def charge_node_usage(
         self,
         node_exec: NodeExecutionEntry,
@@ -1229,18 +1244,7 @@ class ExecutionProcessor:
         sub-steps of a single block run from the user's perspective and
         should not push them into higher per-execution cost tiers.
         """
-        total_cost, remaining = await asyncio.to_thread(
-            self._charge_usage, node_exec, 0
-        )
-        if total_cost > 0:
-            await asyncio.to_thread(
-                self._handle_low_balance,
-                get_db_client(),
-                node_exec.user_id,
-                remaining,
-                total_cost,
-            )
-        return total_cost, remaining
+        return await asyncio.to_thread(self._charge_and_check_balance, node_exec)
 
     @time_measured
     def _on_graph_execution(

From e558c60104416b101119a8afc75c5c8a09818caa Mon Sep 17 00:00:00 2001
From: majdyz <zamil.majdy@agpt.co>
Date: Mon, 13 Apr 2026 07:02:10 +0000
Subject: [PATCH 24/24] fix(orchestrator): don't propagate non-billing charge
 errors as tool failures

Non-IBE exceptions from charge_node_usage (e.g. DB timeout) were
re-raised and caught by the outer generic handler, incorrectly marking
a successful tool execution as failed. This could cause the LLM to
retry side-effectful operations. Now logs the error and continues to
the success path since the tool itself completed successfully.
---
 .../backend/backend/blocks/orchestrator.py            | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py
index 34b4929879..3e956e8d6f 100644
--- a/autogpt_platform/backend/backend/blocks/orchestrator.py
+++ b/autogpt_platform/backend/backend/blocks/orchestrator.py
@@ -1192,13 +1192,16 @@ class OrchestratorBlock(Block):
                     raise
                 except Exception:
                     # Non-billing charge failures (DB outage, network, etc.)
-                    # are logged with full traceback but surfaced to the LLM
-                    # as a generic error to avoid leaking infrastructure details.
+                    # must NOT propagate to the outer except handler because
+                    # the tool itself succeeded. Re-raising would mark the
+                    # tool as failed (_is_error=True), causing the LLM to
+                    # retry side-effectful operations. Log and continue.
                     logger.exception(
-                        "Unexpected error charging for tool node %s",
+                        "Unexpected error charging for tool node %s; "
+                        "tool execution was successful",
                         sink_node_id,
                     )
-                    raise
+                    tool_cost = 0
                 if tool_cost > 0:
                     self.merge_stats(NodeExecutionStats(extra_cost=tool_cost))