From f520b646938935e9ecc5f4427feb0188843c459c Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 08:21:41 +0000 Subject: [PATCH 01/24] fix(backend/orchestrator): charge per LLM iteration and per tool call The OrchestratorBlock in agent mode makes multiple LLM calls in a single node execution, but the executor was only charging the user once per run. Tools spawned by the orchestrator also bypassed _charge_usage entirely, producing free internal block executions. Two fixes: 1. Per-iteration LLM charging - Add `Block.charge_per_llm_call` class flag (default False) - OrchestratorBlock sets it to True - After on_node_execution completes, the executor calls `charge_extra_iterations(node_exec, llm_call_count - 1)` which debits `base_cost * extra_iterations` additional credits via spend_credits, using the same per-model cost from BLOCK_COSTS. - Skipped for dry runs and failed runs. 2. Tool execution charging - In `_execute_single_tool_with_manager`, call the new public `ExecutionProcessor.charge_node_usage()` before invoking `on_node_execution()` for the spawned tool node. - Tools now incur the same credit cost as queue-driven node executions; the cost is also added to the orchestrator's `extra_cost` so it shows up in graph stats. Tests cover the flag opt-in, the charge_extra_iterations math (positive, zero, negative iterations, zero base cost). Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/backend/blocks/_base.py | 7 + .../backend/backend/blocks/orchestrator.py | 17 +++ .../test_orchestrator_per_iteration_cost.py | 134 ++++++++++++++++++ .../backend/backend/executor/manager.py | 86 +++++++++++ 4 files changed, 244 insertions(+) create mode 100644 autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py diff --git a/autogpt_platform/backend/backend/blocks/_base.py b/autogpt_platform/backend/backend/blocks/_base.py index 56986d15c4..1eeec991f8 100644 --- a/autogpt_platform/backend/backend/blocks/_base.py +++ b/autogpt_platform/backend/backend/blocks/_base.py @@ -420,6 +420,13 @@ class BlockWebhookConfig(BlockManualWebhookConfig): class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]): _optimized_description: ClassVar[str | None] = None + # Set to True for blocks (e.g. OrchestratorBlock in agent mode) that + # may make multiple LLM calls in a single run. The executor will then + # charge `block_cost * (llm_call_count - 1)` extra credits after the + # block completes — the first call is already covered by the upfront + # `_charge_usage()` call. Defaults to False (single charge per run). + charge_per_llm_call: ClassVar[bool] = False + def __init__( self, id: str = "", diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py index 6bdbf9acb3..9d23ff9dcb 100644 --- a/autogpt_platform/backend/backend/blocks/orchestrator.py +++ b/autogpt_platform/backend/backend/blocks/orchestrator.py @@ -369,6 +369,11 @@ class OrchestratorBlock(Block): single-shot and iterative agent mode execution. """ + # In agent mode the block makes one LLM call per iteration. The executor + # uses this flag to charge `cost * (llm_call_count - 1)` extra credits + # after the block completes; the first call is covered by _charge_usage(). + charge_per_llm_call = True + # MCP server name used by the Claude Code SDK execution mode. Keep in sync # with _create_graph_mcp_server and the MCP_PREFIX derivation in _execute_tools_sdk_mode. _SDK_MCP_SERVER_NAME = "graph_tools" @@ -1102,6 +1107,18 @@ class OrchestratorBlock(Block): execution_processor.execution_stats_lock, ) + # Charge user credits for the tool execution. Tools spawned by the + # orchestrator bypass the main execution queue (where _charge_usage + # is called), so we must charge here to avoid free tool execution. + # Skipped for dry runs and when block has no cost configured. + if not execution_params.execution_context.dry_run: + tool_cost, _ = await asyncio.to_thread( + execution_processor.charge_node_usage, + node_exec_entry, + ) + if tool_cost > 0: + self.merge_stats(NodeExecutionStats(extra_cost=tool_cost)) + # Create a completed future for the task tracking system node_exec_future = Future() node_exec_progress.add_task( diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py new file mode 100644 index 0000000000..917871d3d7 --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py @@ -0,0 +1,134 @@ +"""Tests for OrchestratorBlock per-iteration cost charging. + +The OrchestratorBlock in agent mode makes multiple LLM calls in a single +node execution. The executor uses ``Block.charge_per_llm_call`` to detect +this and charge ``base_cost * (llm_call_count - 1)`` extra credits after +the block completes. +""" + +from unittest.mock import MagicMock + +from backend.blocks.orchestrator import OrchestratorBlock + + +class TestChargePerLlmCallFlag: + """OrchestratorBlock opts into per-LLM-call billing.""" + + def test_orchestrator_opts_in(self): + assert OrchestratorBlock.charge_per_llm_call is True + + def test_default_block_does_not_opt_in(self): + from backend.blocks._base import Block + + assert Block.charge_per_llm_call is False + + +class TestChargeExtraIterations: + """The executor charges ``cost * (llm_call_count - 1)`` extra credits.""" + + def _make_processor_with_block_cost(self, base_cost: int): + """Build a minimal ExecutionProcessor stub with a stubbed block lookup.""" + from backend.executor import manager + + proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor) + + # Stub the spend_credits client and block_usage_cost helper. + spent: list[int] = [] + + class FakeDb: + def spend_credits(self, *, user_id, cost, metadata): + spent.append(cost) + return 0 + + # Patch get_db_client and get_block + block_usage_cost on the manager + # module so charge_extra_iterations sees deterministic values. + original_get_db = manager.get_db_client + original_get_block = manager.get_block + original_block_usage_cost = manager.block_usage_cost + + def restore(): + manager.get_db_client = original_get_db + manager.get_block = original_get_block + manager.block_usage_cost = original_block_usage_cost + + manager.get_db_client = lambda: FakeDb() + manager.get_block = lambda block_id: MagicMock(name="block") + manager.block_usage_cost = lambda block, input_data: ( + base_cost, + {"model": "claude-sonnet-4-6"}, + ) + + return proc, spent, restore + + def test_zero_extra_iterations_charges_nothing(self): + proc, spent, restore = self._make_processor_with_block_cost(base_cost=10) + try: + node_exec = MagicMock() + node_exec.user_id = "u" + node_exec.graph_exec_id = "g" + node_exec.graph_id = "g" + node_exec.node_exec_id = "ne" + node_exec.node_id = "n" + node_exec.block_id = "b" + node_exec.inputs = {} + + charged = proc.charge_extra_iterations(node_exec, extra_iterations=0) + assert charged == 0 + assert spent == [] + finally: + restore() + + def test_extra_iterations_multiplies_base_cost(self): + proc, spent, restore = self._make_processor_with_block_cost(base_cost=10) + try: + node_exec = MagicMock() + node_exec.user_id = "u" + node_exec.graph_exec_id = "g" + node_exec.graph_id = "g" + node_exec.node_exec_id = "ne" + node_exec.node_id = "n" + node_exec.block_id = "b" + node_exec.inputs = {} + + charged = proc.charge_extra_iterations(node_exec, extra_iterations=4) + # 4 extra iterations × 10 base_cost = 40 + assert charged == 40 + assert spent == [40] + finally: + restore() + + def test_zero_base_cost_skips_charge(self): + proc, spent, restore = self._make_processor_with_block_cost(base_cost=0) + try: + node_exec = MagicMock() + node_exec.user_id = "u" + node_exec.graph_exec_id = "g" + node_exec.graph_id = "g" + node_exec.node_exec_id = "ne" + node_exec.node_id = "n" + node_exec.block_id = "b" + node_exec.inputs = {} + + charged = proc.charge_extra_iterations(node_exec, extra_iterations=4) + assert charged == 0 + assert spent == [] + finally: + restore() + + def test_negative_extra_iterations_charges_nothing(self): + proc, spent, restore = self._make_processor_with_block_cost(base_cost=10) + try: + node_exec = MagicMock() + node_exec.user_id = "u" + node_exec.graph_exec_id = "g" + node_exec.graph_id = "g" + node_exec.node_exec_id = "ne" + node_exec.node_id = "n" + node_exec.block_id = "b" + node_exec.inputs = {} + + charged = proc.charge_extra_iterations(node_exec, extra_iterations=-1) + assert charged == 0 + assert spent == [] + finally: + restore() diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py index 3a51682b51..e7631d0991 100644 --- a/autogpt_platform/backend/backend/executor/manager.py +++ b/autogpt_platform/backend/backend/executor/manager.py @@ -679,6 +679,30 @@ class ExecutionProcessor: execution_stats.walltime = timing_info.wall_time execution_stats.cputime = timing_info.cpu_time + # Charge extra iterations for blocks that opt into per-LLM-call + # billing (e.g. OrchestratorBlock in agent mode). The first call + # is already covered by _charge_usage(); each additional LLM call + # costs another base_cost. Skipped for dry runs and failed runs. + if ( + status == ExecutionStatus.COMPLETED + and node.block.charge_per_llm_call + and execution_stats.llm_call_count > 1 + and not node_exec.execution_context.dry_run + ): + extra_iterations = execution_stats.llm_call_count - 1 + try: + extra_cost = await asyncio.to_thread( + self.charge_extra_iterations, + node_exec, + extra_iterations, + ) + if extra_cost > 0: + execution_stats.extra_cost += extra_cost + except Exception as e: + log_metadata.warning( + f"Failed to charge extra iterations for " f"{node.block.name}: {e}" + ) + graph_stats, graph_stats_lock = graph_stats_pair with graph_stats_lock: graph_stats.node_count += 1 + execution_stats.extra_steps @@ -994,6 +1018,68 @@ class ExecutionProcessor: return total_cost, remaining_balance + def charge_node_usage( + self, + node_exec: NodeExecutionEntry, + ) -> tuple[int, int]: + """Charge a single node execution to the user. + + Public wrapper around :meth:`_charge_usage` for blocks (e.g. the + OrchestratorBlock) that spawn nested node executions outside the + main queue and therefore need to charge them explicitly. + """ + return self._charge_usage( + node_exec=node_exec, + execution_count=increment_execution_count(node_exec.user_id), + ) + + def charge_extra_iterations( + self, + node_exec: NodeExecutionEntry, + extra_iterations: int, + ) -> int: + """Charge a block extra iterations beyond the initial run. + + Used by agent-mode blocks (e.g. OrchestratorBlock) that make + multiple LLM calls within a single node execution. The first + iteration is already charged by :meth:`_charge_usage`; this + method charges *extra_iterations* additional copies of the + block's base cost. + """ + if extra_iterations <= 0: + return 0 + db_client = get_db_client() + block = get_block(node_exec.block_id) + if not block: + return 0 + cost, matching_filter = block_usage_cost( + block=block, input_data=node_exec.inputs + ) + if cost <= 0: + return 0 + total_extra_cost = cost * extra_iterations + db_client.spend_credits( + user_id=node_exec.user_id, + cost=total_extra_cost, + metadata=UsageTransactionMetadata( + graph_exec_id=node_exec.graph_exec_id, + graph_id=node_exec.graph_id, + node_exec_id=node_exec.node_exec_id, + node_id=node_exec.node_id, + block_id=node_exec.block_id, + block=block.name, + input={ + **matching_filter, + "extra_iterations": extra_iterations, + }, + reason=( + f"Extra agent-mode iterations for {block.name} " + f"({extra_iterations} additional LLM calls)" + ), + ), + ) + return total_extra_cost + @time_measured def _on_graph_execution( self, From 17a9ff12785d5321db8eeaf86a333560ef920a87 Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 08:57:40 +0000 Subject: [PATCH 02/24] fix(orchestrator): address review feedback on per-iteration cost charging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses critical issues from autogpt-pr-reviewer + coderabbit review: Billing safety: - Surface InsufficientBalanceError as ERROR (not warning) so monitoring picks up billing leaks; other charge failures still log a warning. - Cap extra_iterations at MAX_EXTRA_ITERATIONS=200 to prevent a corrupted llm_call_count from draining a user's balance. - Tools now charged AFTER successful execution, not before — failed tools no longer cost credits, matching the rest of the platform. - charge_node_usage uses execution_count=0 so nested tool calls don't inflate the per-execution counter / push users into higher cost tiers. - charge_extra_iterations now returns (cost, remaining_balance) and the caller invokes _handle_low_balance to send low-balance notifications. Error handling consistency: - _execute_single_tool_with_manager re-raises InsufficientBalanceError instead of swallowing it into a tool-error response. This prevents leaking the user's exact balance to the LLM context and lets the outer error handling stop the run cleanly, mirroring the main queue. Test fixes: - test_orchestrator_per_iteration_cost.py: rewritten with pytest monkeypatch fixtures (no more manual save/restore), proper FakeBlock with .name attribute set correctly, plus new tests for the cap, block-not-found, InsufficientBalanceError propagation, and charge_node_usage delegation. - test_orchestrator.py / test_orchestrator_responses_api.py / test_orchestrator_dynamic_fields.py: mock charge_node_usage on the execution processor stub so existing agent-mode tests still pass with the new charging call. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/backend/blocks/orchestrator.py | 50 ++-- .../backend/blocks/test/test_orchestrator.py | 4 + .../test/test_orchestrator_dynamic_fields.py | 4 + .../test_orchestrator_per_iteration_cost.py | 279 ++++++++++++------ .../test/test_orchestrator_responses_api.py | 2 + .../backend/backend/executor/manager.py | 56 +++- 6 files changed, 277 insertions(+), 118 deletions(-) diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py index 9d23ff9dcb..55facaa081 100644 --- a/autogpt_platform/backend/backend/blocks/orchestrator.py +++ b/autogpt_platform/backend/backend/blocks/orchestrator.py @@ -36,6 +36,7 @@ from backend.data.execution import ExecutionContext from backend.data.model import NodeExecutionStats, SchemaField from backend.util import json from backend.util.clients import get_database_manager_async_client +from backend.util.exceptions import InsufficientBalanceError from backend.util.prompt import MAIN_OBJECTIVE_PREFIX from backend.util.security import SENSITIVE_FIELD_NAMES from backend.util.tool_call_loop import ( @@ -1107,18 +1108,6 @@ class OrchestratorBlock(Block): execution_processor.execution_stats_lock, ) - # Charge user credits for the tool execution. Tools spawned by the - # orchestrator bypass the main execution queue (where _charge_usage - # is called), so we must charge here to avoid free tool execution. - # Skipped for dry runs and when block has no cost configured. - if not execution_params.execution_context.dry_run: - tool_cost, _ = await asyncio.to_thread( - execution_processor.charge_node_usage, - node_exec_entry, - ) - if tool_cost > 0: - self.merge_stats(NodeExecutionStats(extra_cost=tool_cost)) - # Create a completed future for the task tracking system node_exec_future = Future() node_exec_progress.add_task( @@ -1127,14 +1116,31 @@ class OrchestratorBlock(Block): ) # Execute the node directly since we're in the Orchestrator context - node_exec_future.set_result( - await execution_processor.on_node_execution( - node_exec=node_exec_entry, - node_exec_progress=node_exec_progress, - nodes_input_masks=None, - graph_stats_pair=graph_stats_pair, - ) + tool_node_stats = await execution_processor.on_node_execution( + node_exec=node_exec_entry, + node_exec_progress=node_exec_progress, + nodes_input_masks=None, + graph_stats_pair=graph_stats_pair, ) + node_exec_future.set_result(tool_node_stats) + + # Charge user credits AFTER successful tool execution. Tools + # spawned by the orchestrator bypass the main execution queue + # (where _charge_usage is called), so we must charge here to + # avoid free tool execution. Charging post-completion (vs. + # pre-execution) avoids billing users for failed tool calls. + # Skipped for dry runs. + if ( + not execution_params.execution_context.dry_run + and tool_node_stats is not None + and not isinstance(tool_node_stats.error, Exception) + ): + tool_cost, _ = await asyncio.to_thread( + execution_processor.charge_node_usage, + node_exec_entry, + ) + if tool_cost > 0: + self.merge_stats(NodeExecutionStats(extra_cost=tool_cost)) # Get outputs from database after execution completes using database manager client node_outputs = await db_client.get_execution_outputs_by_node_exec_id( @@ -1151,6 +1157,12 @@ class OrchestratorBlock(Block): tool_call.id, tool_response_content, responses_api=responses_api ) + except InsufficientBalanceError: + # Don't downgrade billing failures into tool errors — let the + # orchestrator's outer error handling stop the run cleanly, + # mirroring the behaviour of the main execution queue. Also + # prevents leaking exact balance amounts to the LLM context. + raise except Exception as e: logger.warning("Tool execution with manager failed: %s", e) # Return error response diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py index 55f137428f..85d1b78fc7 100644 --- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py +++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py @@ -922,6 +922,10 @@ async def test_orchestrator_agent_mode(): mock_execution_processor.on_node_execution = AsyncMock( return_value=mock_node_stats ) + # Mock charge_node_usage (called after successful tool execution). + # Returns (cost, remaining_balance). Synchronous because it's called + # via asyncio.to_thread. + mock_execution_processor.charge_node_usage = MagicMock(return_value=(0, 0)) # Mock the get_execution_outputs_by_node_exec_id method mock_db_client.get_execution_outputs_by_node_exec_id.return_value = { diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_dynamic_fields.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_dynamic_fields.py index ac4fa0710b..e5a7300732 100644 --- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_dynamic_fields.py +++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_dynamic_fields.py @@ -638,6 +638,10 @@ async def test_validation_errors_dont_pollute_conversation(): mock_execution_processor.on_node_execution.return_value = ( mock_node_stats ) + # Mock charge_node_usage (called after successful tool execution). + mock_execution_processor.charge_node_usage = MagicMock( + return_value=(0, 0) + ) async for output_name, output_value in block.run( input_data, diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py index 917871d3d7..6654282c58 100644 --- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py +++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py @@ -8,9 +8,14 @@ the block completes. from unittest.mock import MagicMock +import pytest + from backend.blocks.orchestrator import OrchestratorBlock +# ── Class flag ────────────────────────────────────────────────────── + + class TestChargePerLlmCallFlag: """OrchestratorBlock opts into per-LLM-call billing.""" @@ -23,16 +28,117 @@ class TestChargePerLlmCallFlag: assert Block.charge_per_llm_call is False -class TestChargeExtraIterations: - """The executor charges ``cost * (llm_call_count - 1)`` extra credits.""" +# ── charge_extra_iterations math ─────────────────────────────────── - def _make_processor_with_block_cost(self, base_cost: int): - """Build a minimal ExecutionProcessor stub with a stubbed block lookup.""" + +@pytest.fixture() +def fake_node_exec(): + node_exec = MagicMock() + node_exec.user_id = "u" + node_exec.graph_exec_id = "g" + node_exec.graph_id = "g" + node_exec.node_exec_id = "ne" + node_exec.node_id = "n" + node_exec.block_id = "b" + node_exec.inputs = {} + return node_exec + + +@pytest.fixture() +def patched_processor(monkeypatch): + """ExecutionProcessor with stubbed db client / block lookup helpers. + + Returns the processor and a list of credit amounts spent so tests can + assert on what was charged. + """ + from backend.executor import manager + + spent: list[int] = [] + + class FakeDb: + def spend_credits(self, *, user_id, cost, metadata): + spent.append(cost) + return 1000 # remaining balance + + fake_block = MagicMock() + fake_block.name = "FakeBlock" + + monkeypatch.setattr(manager, "get_db_client", lambda: FakeDb()) + monkeypatch.setattr(manager, "get_block", lambda block_id: fake_block) + monkeypatch.setattr( + manager, + "block_usage_cost", + lambda block, input_data, **_kw: (10, {"model": "claude-sonnet-4-6"}), + ) + + proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor) + return proc, spent + + +class TestChargeExtraIterations: + def test_zero_extra_iterations_charges_nothing( + self, patched_processor, fake_node_exec + ): + proc, spent = patched_processor + cost, balance = proc.charge_extra_iterations(fake_node_exec, extra_iterations=0) + assert cost == 0 + assert balance == 0 + assert spent == [] + + def test_extra_iterations_multiplies_base_cost( + self, patched_processor, fake_node_exec + ): + proc, spent = patched_processor + cost, balance = proc.charge_extra_iterations(fake_node_exec, extra_iterations=4) + assert cost == 40 # 4 × 10 + assert balance == 1000 + assert spent == [40] + + def test_negative_extra_iterations_charges_nothing( + self, patched_processor, fake_node_exec + ): + proc, spent = patched_processor + cost, balance = proc.charge_extra_iterations( + fake_node_exec, extra_iterations=-1 + ) + assert cost == 0 + assert balance == 0 + assert spent == [] + + def test_capped_at_max(self, monkeypatch, fake_node_exec): + """Runaway llm_call_count is capped at _MAX_EXTRA_ITERATIONS.""" from backend.executor import manager - proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor) + spent: list[int] = [] + + class FakeDb: + def spend_credits(self, *, user_id, cost, metadata): + spent.append(cost) + return 1000 + + fake_block = MagicMock() + fake_block.name = "FakeBlock" + + monkeypatch.setattr(manager, "get_db_client", lambda: FakeDb()) + monkeypatch.setattr(manager, "get_block", lambda block_id: fake_block) + monkeypatch.setattr( + manager, + "block_usage_cost", + lambda block, input_data, **_kw: (10, {}), + ) + + proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor) + cap = manager.ExecutionProcessor._MAX_EXTRA_ITERATIONS + cost, _ = proc.charge_extra_iterations( + fake_node_exec, extra_iterations=cap * 100 + ) + # Charged at most cap × 10 + assert cost == cap * 10 + assert spent == [cap * 10] + + def test_zero_base_cost_skips_charge(self, monkeypatch, fake_node_exec): + from backend.executor import manager - # Stub the spend_credits client and block_usage_cost helper. spent: list[int] = [] class FakeDb: @@ -40,95 +146,94 @@ class TestChargeExtraIterations: spent.append(cost) return 0 - # Patch get_db_client and get_block + block_usage_cost on the manager - # module so charge_extra_iterations sees deterministic values. - original_get_db = manager.get_db_client - original_get_block = manager.get_block - original_block_usage_cost = manager.block_usage_cost + fake_block = MagicMock() + fake_block.name = "FakeBlock" - def restore(): - manager.get_db_client = original_get_db - manager.get_block = original_get_block - manager.block_usage_cost = original_block_usage_cost - - manager.get_db_client = lambda: FakeDb() - manager.get_block = lambda block_id: MagicMock(name="block") - manager.block_usage_cost = lambda block, input_data: ( - base_cost, - {"model": "claude-sonnet-4-6"}, + monkeypatch.setattr(manager, "get_db_client", lambda: FakeDb()) + monkeypatch.setattr(manager, "get_block", lambda block_id: fake_block) + monkeypatch.setattr( + manager, "block_usage_cost", lambda block, input_data, **_kw: (0, {}) ) - return proc, spent, restore + proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor) + cost, balance = proc.charge_extra_iterations(fake_node_exec, extra_iterations=4) + assert cost == 0 + assert balance == 0 + assert spent == [] - def test_zero_extra_iterations_charges_nothing(self): - proc, spent, restore = self._make_processor_with_block_cost(base_cost=10) - try: - node_exec = MagicMock() - node_exec.user_id = "u" - node_exec.graph_exec_id = "g" - node_exec.graph_id = "g" - node_exec.node_exec_id = "ne" - node_exec.node_id = "n" - node_exec.block_id = "b" - node_exec.inputs = {} + def test_block_not_found_skips_charge(self, monkeypatch, fake_node_exec): + from backend.executor import manager - charged = proc.charge_extra_iterations(node_exec, extra_iterations=0) - assert charged == 0 - assert spent == [] - finally: - restore() + spent: list[int] = [] - def test_extra_iterations_multiplies_base_cost(self): - proc, spent, restore = self._make_processor_with_block_cost(base_cost=10) - try: - node_exec = MagicMock() - node_exec.user_id = "u" - node_exec.graph_exec_id = "g" - node_exec.graph_id = "g" - node_exec.node_exec_id = "ne" - node_exec.node_id = "n" - node_exec.block_id = "b" - node_exec.inputs = {} + class FakeDb: + def spend_credits(self, *, user_id, cost, metadata): + spent.append(cost) + return 0 - charged = proc.charge_extra_iterations(node_exec, extra_iterations=4) - # 4 extra iterations × 10 base_cost = 40 - assert charged == 40 - assert spent == [40] - finally: - restore() + monkeypatch.setattr(manager, "get_db_client", lambda: FakeDb()) + monkeypatch.setattr(manager, "get_block", lambda block_id: None) + monkeypatch.setattr( + manager, "block_usage_cost", lambda block, input_data, **_kw: (10, {}) + ) - def test_zero_base_cost_skips_charge(self): - proc, spent, restore = self._make_processor_with_block_cost(base_cost=0) - try: - node_exec = MagicMock() - node_exec.user_id = "u" - node_exec.graph_exec_id = "g" - node_exec.graph_id = "g" - node_exec.node_exec_id = "ne" - node_exec.node_id = "n" - node_exec.block_id = "b" - node_exec.inputs = {} + proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor) + cost, balance = proc.charge_extra_iterations(fake_node_exec, extra_iterations=3) + assert cost == 0 + assert balance == 0 + assert spent == [] - charged = proc.charge_extra_iterations(node_exec, extra_iterations=4) - assert charged == 0 - assert spent == [] - finally: - restore() + def test_propagates_insufficient_balance_error(self, monkeypatch, fake_node_exec): + """Out-of-credits errors must propagate, not be silently swallowed.""" + from backend.executor import manager + from backend.util.exceptions import InsufficientBalanceError - def test_negative_extra_iterations_charges_nothing(self): - proc, spent, restore = self._make_processor_with_block_cost(base_cost=10) - try: - node_exec = MagicMock() - node_exec.user_id = "u" - node_exec.graph_exec_id = "g" - node_exec.graph_id = "g" - node_exec.node_exec_id = "ne" - node_exec.node_id = "n" - node_exec.block_id = "b" - node_exec.inputs = {} + class FakeDb: + def spend_credits(self, *, user_id, cost, metadata): + raise InsufficientBalanceError( + user_id=user_id, + message="Insufficient balance", + balance=0, + amount=cost, + ) - charged = proc.charge_extra_iterations(node_exec, extra_iterations=-1) - assert charged == 0 - assert spent == [] - finally: - restore() + fake_block = MagicMock() + fake_block.name = "FakeBlock" + + monkeypatch.setattr(manager, "get_db_client", lambda: FakeDb()) + monkeypatch.setattr(manager, "get_block", lambda block_id: fake_block) + monkeypatch.setattr( + manager, "block_usage_cost", lambda block, input_data, **_kw: (10, {}) + ) + + proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor) + with pytest.raises(InsufficientBalanceError): + proc.charge_extra_iterations(fake_node_exec, extra_iterations=4) + + +# ── charge_node_usage ────────────────────────────────────────────── + + +class TestChargeNodeUsage: + """charge_node_usage delegates to _charge_usage with execution_count=0.""" + + def test_delegates_with_zero_execution_count(self, monkeypatch, fake_node_exec): + """Nested tool charges should NOT inflate the per-execution counter.""" + from backend.executor import manager + + captured: dict = {} + + def fake_charge_usage(self, node_exec, execution_count): + captured["execution_count"] = execution_count + captured["node_exec"] = node_exec + return (5, 100) + + monkeypatch.setattr( + manager.ExecutionProcessor, "_charge_usage", fake_charge_usage + ) + + proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor) + cost, balance = proc.charge_node_usage(fake_node_exec) + assert cost == 5 + assert balance == 100 + assert captured["execution_count"] == 0 diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_responses_api.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_responses_api.py index f9ec7676ba..649716a491 100644 --- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_responses_api.py +++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_responses_api.py @@ -956,6 +956,8 @@ async def test_agent_mode_conversation_valid_for_responses_api(): ep.execution_stats_lock = threading.Lock() ns = MagicMock(error=None) ep.on_node_execution = AsyncMock(return_value=ns) + # Mock charge_node_usage (called after successful tool execution). + ep.charge_node_usage = MagicMock(return_value=(0, 0)) with patch("backend.blocks.llm.llm_call", llm_mock), patch.object( block, "_create_tool_node_signatures", return_value=tool_sigs diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py index e7631d0991..0454f0c5f3 100644 --- a/autogpt_platform/backend/backend/executor/manager.py +++ b/autogpt_platform/backend/backend/executor/manager.py @@ -683,6 +683,11 @@ class ExecutionProcessor: # billing (e.g. OrchestratorBlock in agent mode). The first call # is already covered by _charge_usage(); each additional LLM call # costs another base_cost. Skipped for dry runs and failed runs. + # + # InsufficientBalanceError is logged at ERROR level (this is a + # billing leak — the work is already done, but the user can't pay) + # and re-surfaced via execution_stats.error so monitoring can pick + # it up. Other exceptions are warnings. if ( status == ExecutionStatus.COMPLETED and node.block.charge_per_llm_call @@ -691,16 +696,27 @@ class ExecutionProcessor: ): extra_iterations = execution_stats.llm_call_count - 1 try: - extra_cost = await asyncio.to_thread( + extra_cost, remaining_balance = await asyncio.to_thread( self.charge_extra_iterations, node_exec, extra_iterations, ) if extra_cost > 0: execution_stats.extra_cost += extra_cost + self._handle_low_balance( + db_client=get_db_client(), + user_id=node_exec.user_id, + current_balance=remaining_balance, + transaction_cost=extra_cost, + ) + except InsufficientBalanceError as e: + log_metadata.error( + f"Billing leak: insufficient balance after {node.block.name} " + f"completed {extra_iterations} extra iterations: {e}" + ) except Exception as e: log_metadata.warning( - f"Failed to charge extra iterations for " f"{node.block.name}: {e}" + f"Failed to charge extra iterations for {node.block.name}: {e}" ) graph_stats, graph_stats_lock = graph_stats_pair @@ -1018,6 +1034,12 @@ class ExecutionProcessor: return total_cost, remaining_balance + # Hard cap on the multiplier passed to charge_extra_iterations to + # protect against a corrupted llm_call_count draining a user's balance. + # Real agent-mode runs are bounded by agent_mode_max_iterations (~50); + # 200 leaves headroom while preventing runaway charges. + _MAX_EXTRA_ITERATIONS = 200 + def charge_node_usage( self, node_exec: NodeExecutionEntry, @@ -1027,17 +1049,22 @@ class ExecutionProcessor: Public wrapper around :meth:`_charge_usage` for blocks (e.g. the OrchestratorBlock) that spawn nested node executions outside the main queue and therefore need to charge them explicitly. + + Note: this **does not** increment the global execution counter + (``increment_execution_count``). Nested tool executions are + sub-steps of a single block run from the user's perspective and + should not push them into higher per-execution cost tiers. """ return self._charge_usage( node_exec=node_exec, - execution_count=increment_execution_count(node_exec.user_id), + execution_count=0, ) def charge_extra_iterations( self, node_exec: NodeExecutionEntry, extra_iterations: int, - ) -> int: + ) -> tuple[int, int]: """Charge a block extra iterations beyond the initial run. Used by agent-mode blocks (e.g. OrchestratorBlock) that make @@ -1045,20 +1072,25 @@ class ExecutionProcessor: iteration is already charged by :meth:`_charge_usage`; this method charges *extra_iterations* additional copies of the block's base cost. + + Returns ``(total_extra_cost, remaining_balance)``. May raise + ``InsufficientBalanceError`` if the user can't afford the charge. """ if extra_iterations <= 0: - return 0 + return 0, 0 + # Cap to protect against a corrupted llm_call_count. + capped_iterations = min(extra_iterations, self._MAX_EXTRA_ITERATIONS) db_client = get_db_client() block = get_block(node_exec.block_id) if not block: - return 0 + return 0, 0 cost, matching_filter = block_usage_cost( block=block, input_data=node_exec.inputs ) if cost <= 0: - return 0 - total_extra_cost = cost * extra_iterations - db_client.spend_credits( + return 0, 0 + total_extra_cost = cost * capped_iterations + remaining_balance = db_client.spend_credits( user_id=node_exec.user_id, cost=total_extra_cost, metadata=UsageTransactionMetadata( @@ -1070,15 +1102,15 @@ class ExecutionProcessor: block=block.name, input={ **matching_filter, - "extra_iterations": extra_iterations, + "extra_iterations": capped_iterations, }, reason=( f"Extra agent-mode iterations for {block.name} " - f"({extra_iterations} additional LLM calls)" + f"({capped_iterations} additional LLM calls)" ), ), ) - return total_extra_cost + return total_extra_cost, remaining_balance @time_measured def _on_graph_execution( From 45d67cfaccd94bbb92a45521a80cb7d40d16ada4 Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 09:56:39 +0000 Subject: [PATCH 03/24] style: fix isort import grouping in test_orchestrator_per_iteration_cost Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/blocks/test/test_orchestrator_per_iteration_cost.py | 1 - 1 file changed, 1 deletion(-) diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py index 6654282c58..352a371fda 100644 --- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py +++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py @@ -12,7 +12,6 @@ import pytest from backend.blocks.orchestrator import OrchestratorBlock - # ── Class flag ────────────────────────────────────────────────────── From 215340690f0785c6049194ff0de73b9d6473e327 Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 09:59:00 +0000 Subject: [PATCH 04/24] docs: regenerate block docs (memory_search/memory_store tools) Auto-generated by `poetry run python scripts/generate_block_docs.py`. The OrchestratorBlock tool list gained `memory_search` / `memory_store` on dev but the doc table wasn't regenerated. Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/integrations/block-integrations/misc.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/integrations/block-integrations/misc.md b/docs/integrations/block-integrations/misc.md index d43826a959..ef7fd938db 100644 --- a/docs/integrations/block-integrations/misc.md +++ b/docs/integrations/block-integrations/misc.md @@ -58,7 +58,7 @@ Tool and block identifiers provided in `tools` and `blocks` are validated at run | system_context | Optional additional context prepended to the prompt. Use this to constrain autopilot behavior, provide domain context, or set output format requirements. | str | No | | session_id | Session ID to continue an existing autopilot conversation. Leave empty to start a new session. Use the session_id output from a previous run to continue. | str | No | | max_recursion_depth | Maximum nesting depth when the autopilot calls this block recursively (sub-agent pattern). Prevents infinite loops. | int | No | -| tools | Tool names to filter. Works with tools_exclude to form an allow-list or deny-list. Leave empty to apply no tool filter. | List["add_understanding" \| "ask_question" \| "bash_exec" \| "browser_act" \| "browser_navigate" \| "browser_screenshot" \| "connect_integration" \| "continue_run_block" \| "create_agent" \| "create_feature_request" \| "create_folder" \| "customize_agent" \| "delete_folder" \| "delete_workspace_file" \| "edit_agent" \| "find_agent" \| "find_block" \| "find_library_agent" \| "fix_agent_graph" \| "get_agent_building_guide" \| "get_doc_page" \| "get_mcp_guide" \| "list_folders" \| "list_workspace_files" \| "move_agents_to_folder" \| "move_folder" \| "read_workspace_file" \| "run_agent" \| "run_block" \| "run_mcp_tool" \| "search_docs" \| "search_feature_requests" \| "update_folder" \| "validate_agent_graph" \| "view_agent_output" \| "web_fetch" \| "write_workspace_file" \| "Agent" \| "Edit" \| "Glob" \| "Grep" \| "Read" \| "Task" \| "TodoWrite" \| "WebSearch" \| "Write"] | No | +| tools | Tool names to filter. Works with tools_exclude to form an allow-list or deny-list. Leave empty to apply no tool filter. | List["add_understanding" \| "ask_question" \| "bash_exec" \| "browser_act" \| "browser_navigate" \| "browser_screenshot" \| "connect_integration" \| "continue_run_block" \| "create_agent" \| "create_feature_request" \| "create_folder" \| "customize_agent" \| "delete_folder" \| "delete_workspace_file" \| "edit_agent" \| "find_agent" \| "find_block" \| "find_library_agent" \| "fix_agent_graph" \| "get_agent_building_guide" \| "get_doc_page" \| "get_mcp_guide" \| "list_folders" \| "list_workspace_files" \| "memory_search" \| "memory_store" \| "move_agents_to_folder" \| "move_folder" \| "read_workspace_file" \| "run_agent" \| "run_block" \| "run_mcp_tool" \| "search_docs" \| "search_feature_requests" \| "update_folder" \| "validate_agent_graph" \| "view_agent_output" \| "web_fetch" \| "write_workspace_file" \| "Agent" \| "Edit" \| "Glob" \| "Grep" \| "Read" \| "Task" \| "TodoWrite" \| "WebSearch" \| "Write"] | No | | tools_exclude | Controls how the 'tools' list is interpreted. True (default): 'tools' is a deny-list — listed tools are blocked, all others are allowed. An empty 'tools' list means allow everything. False: 'tools' is an allow-list — only listed tools are permitted. | bool | No | | blocks | Block identifiers to filter when the copilot uses run_block. Each entry can be: a block name (e.g. 'HTTP Request'), a full block UUID, or the first 8 hex characters of the UUID (e.g. 'c069dc6b'). Works with blocks_exclude. Leave empty to apply no block filter. | List[str] | No | | blocks_exclude | Controls how the 'blocks' list is interpreted. True (default): 'blocks' is a deny-list — listed blocks are blocked, all others are allowed. An empty 'blocks' list means allow everything. False: 'blocks' is an allow-list — only listed blocks are permitted. | bool | No | From ada27256286cf77b1dafe9bdb69b7ee56f6936cd Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 11:53:46 +0000 Subject: [PATCH 05/24] fix(orchestrator): propagate billing errors and close leak windows Round 3 review fixes on top of the per-iteration cost charging PR: - Propagate InsufficientBalanceError out of `_agent_mode_tool_executor` and the SDK MCP tool handler so billing failures stop the agent loop instead of being re-injected into the LLM as a tool error (which previously leaked balance amounts and let the loop keep consuming unpaid compute). - On post-execution extra-iteration charging failure, record `execution_stats.error`, log with structured billing_leak fields, and fire `_handle_insufficient_funds_notif` so the user is actually notified. Comment now matches behaviour. - Tighten tool-success gate to `tool_node_stats.error is None` so cancelled/terminated tool runs (BaseException subclasses such as CancelledError) are not billed. - Extract shared `_resolve_block_cost` helper used by `_charge_usage` and `charge_extra_iterations` to DRY the block/cost lookup. - Add integration tests for the `on_node_execution` charging gate covering each branch (status, flag, llm_call_count, dry_run) plus the InsufficientBalanceError path that asserts error recording and notification. - Add tool-charging skip tests (dry_run, failed tool, cancelled tool) and an InsufficientBalanceError propagation test for `_execute_single_tool_with_manager`. - Assert `charge_node_usage` is actually called in the existing `test_orchestrator_agent_mode` test and return a non-zero cost so the `merge_stats` branch is exercised. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/backend/blocks/orchestrator.py | 20 +- .../backend/blocks/test/test_orchestrator.py | 12 +- .../test_orchestrator_per_iteration_cost.py | 441 +++++++++++++++++- .../backend/backend/executor/manager.py | 81 +++- 4 files changed, 532 insertions(+), 22 deletions(-) diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py index 55facaa081..13591bed01 100644 --- a/autogpt_platform/backend/backend/blocks/orchestrator.py +++ b/autogpt_platform/backend/backend/blocks/orchestrator.py @@ -1130,10 +1130,14 @@ class OrchestratorBlock(Block): # avoid free tool execution. Charging post-completion (vs. # pre-execution) avoids billing users for failed tool calls. # Skipped for dry runs. + # + # `error is None` intentionally excludes both Exception and + # BaseException subclasses (e.g. CancelledError) so cancelled + # or terminated tool runs are not billed. if ( not execution_params.execution_context.dry_run and tool_node_stats is not None - and not isinstance(tool_node_stats.error, Exception) + and tool_node_stats.error is None ): tool_cost, _ = await asyncio.to_thread( execution_processor.charge_node_usage, @@ -1277,6 +1281,13 @@ class OrchestratorBlock(Block): content=content, is_error=tool_failed, ) + except InsufficientBalanceError: + # Billing failures must stop the agent loop cleanly — do NOT + # downgrade them into a tool error that gets fed back to the + # LLM. Re-raise so the orchestrator's outer error handling + # halts the run (mirrors main execution queue behaviour) and + # avoids leaking exact balance amounts into LLM context. + raise except Exception as e: logger.error("Tool execution failed: %s", e) return ToolCallResult( @@ -1481,6 +1492,13 @@ class OrchestratorBlock(Block): "content": [{"type": "text", "text": text}], "isError": tool_failed, } + except InsufficientBalanceError: + # Same carve-out as _agent_mode_tool_executor: + # billing failures must propagate to stop the run + # rather than be fed back to the LLM as a tool + # error (which would leak balance amounts and let + # the loop continue consuming unbillable work). + raise except Exception as e: logger.error("SDK tool execution failed: %s", e) return { diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py index 85d1b78fc7..c69c107de3 100644 --- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py +++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py @@ -924,8 +924,11 @@ async def test_orchestrator_agent_mode(): ) # Mock charge_node_usage (called after successful tool execution). # Returns (cost, remaining_balance). Synchronous because it's called - # via asyncio.to_thread. - mock_execution_processor.charge_node_usage = MagicMock(return_value=(0, 0)) + # via asyncio.to_thread. Use a non-zero cost so the merge_stats + # branch is actually exercised, and assert it's called below. + mock_execution_processor.charge_node_usage = MagicMock( + return_value=(10, 990) + ) # Mock the get_execution_outputs_by_node_exec_id method mock_db_client.get_execution_outputs_by_node_exec_id.return_value = { @@ -971,6 +974,11 @@ async def test_orchestrator_agent_mode(): # Verify tool was executed via execution processor assert mock_execution_processor.on_node_execution.call_count == 1 + # Verify charge_node_usage was actually called for the successful + # tool execution — this guards against regressions where the + # post-execution tool charging is accidentally removed. + assert mock_execution_processor.charge_node_usage.call_count == 1 + @pytest.mark.asyncio async def test_orchestrator_traditional_mode_default(): diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py index 352a371fda..abf5c22740 100644 --- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py +++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py @@ -6,7 +6,8 @@ this and charge ``base_cost * (llm_call_count - 1)`` extra credits after the block completes. """ -from unittest.mock import MagicMock +import threading +from unittest.mock import AsyncMock, MagicMock import pytest @@ -236,3 +237,441 @@ class TestChargeNodeUsage: assert cost == 5 assert balance == 100 assert captured["execution_count"] == 0 + + +# ── on_node_execution charging gate ──────────────────────────────── + + +class _FakeNode: + """Minimal stand-in for a ``Node`` object with a block attribute.""" + + def __init__(self, charge_per_llm_call: bool, block_name: str = "FakeBlock"): + self.block = MagicMock() + self.block.charge_per_llm_call = charge_per_llm_call + self.block.name = block_name + + +class _FakeExecContext: + def __init__(self, dry_run: bool = False): + self.dry_run = dry_run + + +def _make_node_exec(dry_run: bool = False) -> MagicMock: + """Build a NodeExecutionEntry-like mock for on_node_execution tests.""" + ne = MagicMock() + ne.user_id = "u" + ne.graph_id = "g" + ne.graph_exec_id = "ge" + ne.node_id = "n" + ne.node_exec_id = "ne" + ne.block_id = "b" + ne.inputs = {} + ne.execution_context = _FakeExecContext(dry_run=dry_run) + return ne + + +@pytest.fixture() +def gated_processor(monkeypatch): + """ExecutionProcessor with on_node_execution's downstream calls stubbed. + + Lets tests flip the four gate conditions (status, charge_per_llm_call, + llm_call_count, dry_run) and observe whether charge_extra_iterations + was called. + """ + from backend.data.execution import ExecutionStatus + from backend.data.model import NodeExecutionStats + from backend.executor import manager + + calls: dict[str, list] = { + "charge_extra_iterations": [], + "handle_low_balance": [], + "handle_insufficient_funds_notif": [], + } + + # Stub node lookup + DB client so the wrapper doesn't touch real infra. + fake_db = MagicMock() + fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True)) + monkeypatch.setattr(manager, "get_db_async_client", lambda: fake_db) + monkeypatch.setattr(manager, "get_db_client", lambda: fake_db) + # get_block is called by LogMetadata construction in on_node_execution. + monkeypatch.setattr( + manager, + "get_block", + lambda block_id: MagicMock(name="FakeBlock"), + ) + # Persistence + cost logging are not under test here. + monkeypatch.setattr( + manager, + "async_update_node_execution_status", + AsyncMock(return_value=None), + ) + monkeypatch.setattr( + manager, + "async_update_graph_execution_state", + AsyncMock(return_value=None), + ) + monkeypatch.setattr( + manager, + "log_system_credential_cost", + AsyncMock(return_value=None), + ) + + proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor) + + # Control the status returned by the inner execution call. + inner_result = {"status": ExecutionStatus.COMPLETED, "llm_call_count": 3} + + async def fake_inner( + self, *, node, node_exec, node_exec_progress, stats, db_client, + log_metadata, nodes_input_masks=None, nodes_to_skip=None, + ): + stats.llm_call_count = inner_result["llm_call_count"] + return MagicMock(wall_time=0.1, cpu_time=0.1), inner_result["status"] + + monkeypatch.setattr( + manager.ExecutionProcessor, + "_on_node_execution", + fake_inner, + ) + + def fake_charge_extra(self, node_exec, extra_iterations): + calls["charge_extra_iterations"].append(extra_iterations) + return (extra_iterations * 10, 500) + + monkeypatch.setattr( + manager.ExecutionProcessor, + "charge_extra_iterations", + fake_charge_extra, + ) + + def fake_low_balance(self, **kwargs): + calls["handle_low_balance"].append(kwargs) + + monkeypatch.setattr( + manager.ExecutionProcessor, + "_handle_low_balance", + fake_low_balance, + ) + + def fake_notif(self, db_client, user_id, graph_id, e): + calls["handle_insufficient_funds_notif"].append( + {"user_id": user_id, "graph_id": graph_id, "error": e} + ) + + monkeypatch.setattr( + manager.ExecutionProcessor, + "_handle_insufficient_funds_notif", + fake_notif, + ) + + return proc, calls, inner_result, fake_db, NodeExecutionStats + + +@pytest.mark.asyncio +async def test_on_node_execution_charges_extra_iterations_when_gate_passes( + gated_processor, +): + """COMPLETED + charge_per_llm_call + llm_call_count>1 + not dry_run → charged.""" + from backend.data.execution import ExecutionStatus + + proc, calls, inner, fake_db, _ = gated_processor + inner["status"] = ExecutionStatus.COMPLETED + inner["llm_call_count"] = 3 # → extra_iterations = 2 + fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True)) + + stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0, + cost=0, node_error_count=0), threading.Lock()) + await proc.on_node_execution( + node_exec=_make_node_exec(dry_run=False), + node_exec_progress=MagicMock(), + nodes_input_masks=None, + graph_stats_pair=stats_pair, + ) + assert calls["charge_extra_iterations"] == [2] + + +@pytest.mark.asyncio +async def test_on_node_execution_skips_when_status_not_completed(gated_processor): + from backend.data.execution import ExecutionStatus + + proc, calls, inner, fake_db, _ = gated_processor + inner["status"] = ExecutionStatus.FAILED + inner["llm_call_count"] = 5 + fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True)) + + stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0, + cost=0, node_error_count=0), threading.Lock()) + await proc.on_node_execution( + node_exec=_make_node_exec(dry_run=False), + node_exec_progress=MagicMock(), + nodes_input_masks=None, + graph_stats_pair=stats_pair, + ) + assert calls["charge_extra_iterations"] == [] + + +@pytest.mark.asyncio +async def test_on_node_execution_skips_when_charge_flag_false(gated_processor): + from backend.data.execution import ExecutionStatus + + proc, calls, inner, fake_db, _ = gated_processor + inner["status"] = ExecutionStatus.COMPLETED + inner["llm_call_count"] = 5 + fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=False)) + + stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0, + cost=0, node_error_count=0), threading.Lock()) + await proc.on_node_execution( + node_exec=_make_node_exec(dry_run=False), + node_exec_progress=MagicMock(), + nodes_input_masks=None, + graph_stats_pair=stats_pair, + ) + assert calls["charge_extra_iterations"] == [] + + +@pytest.mark.asyncio +async def test_on_node_execution_skips_when_llm_call_count_le_1(gated_processor): + from backend.data.execution import ExecutionStatus + + proc, calls, inner, fake_db, _ = gated_processor + inner["status"] = ExecutionStatus.COMPLETED + inner["llm_call_count"] = 1 # exactly the base charge, no extras + fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True)) + + stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0, + cost=0, node_error_count=0), threading.Lock()) + await proc.on_node_execution( + node_exec=_make_node_exec(dry_run=False), + node_exec_progress=MagicMock(), + nodes_input_masks=None, + graph_stats_pair=stats_pair, + ) + assert calls["charge_extra_iterations"] == [] + + +@pytest.mark.asyncio +async def test_on_node_execution_skips_when_dry_run(gated_processor): + from backend.data.execution import ExecutionStatus + + proc, calls, inner, fake_db, _ = gated_processor + inner["status"] = ExecutionStatus.COMPLETED + inner["llm_call_count"] = 5 + fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True)) + + stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0, + cost=0, node_error_count=0), threading.Lock()) + await proc.on_node_execution( + node_exec=_make_node_exec(dry_run=True), + node_exec_progress=MagicMock(), + nodes_input_masks=None, + graph_stats_pair=stats_pair, + ) + assert calls["charge_extra_iterations"] == [] + + +@pytest.mark.asyncio +async def test_on_node_execution_insufficient_balance_records_error_and_notifies( + monkeypatch, gated_processor, +): + """When extra-iteration charging fails with InsufficientBalanceError: + + - the run still reports COMPLETED (the work is already done) + - execution_stats.error is set so monitoring picks it up + - _handle_insufficient_funds_notif is called so the user is notified + """ + from backend.data.execution import ExecutionStatus + from backend.executor import manager + from backend.util.exceptions import InsufficientBalanceError + + proc, calls, inner, fake_db, _ = gated_processor + inner["status"] = ExecutionStatus.COMPLETED + inner["llm_call_count"] = 4 + fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True)) + + def raise_ibe(self, node_exec, extra_iterations): + raise InsufficientBalanceError( + user_id=node_exec.user_id, + message="Insufficient balance", + balance=0, + amount=extra_iterations * 10, + ) + + monkeypatch.setattr( + manager.ExecutionProcessor, "charge_extra_iterations", raise_ibe + ) + + stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0, + cost=0, node_error_count=0), threading.Lock()) + result_stats = await proc.on_node_execution( + node_exec=_make_node_exec(dry_run=False), + node_exec_progress=MagicMock(), + nodes_input_masks=None, + graph_stats_pair=stats_pair, + ) + # Error recorded on stats so downstream monitoring can surface it. + assert isinstance(result_stats.error, InsufficientBalanceError) + # User notification fired. + assert len(calls["handle_insufficient_funds_notif"]) == 1 + assert calls["handle_insufficient_funds_notif"][0]["user_id"] == "u" + + +# ── Orchestrator _execute_single_tool_with_manager charging gates ── + + +async def _run_tool_exec_with_stats( + *, dry_run: bool, tool_stats_error, charge_node_usage_mock=None, +): + """Invoke _execute_single_tool_with_manager against fully mocked deps + and return (charge_call_count, merge_stats_calls). + + Used to prove the dry_run and error guards around charge_node_usage + behave as documented, and that InsufficientBalanceError propagates. + """ + import threading + from collections import defaultdict + from unittest.mock import AsyncMock, MagicMock, patch + + from backend.blocks.orchestrator import ExecutionParams, OrchestratorBlock + from backend.data.execution import ExecutionContext + + block = OrchestratorBlock() + + # Mocked async DB client used inside orchestrator. + mock_db_client = AsyncMock() + mock_target_node = MagicMock() + mock_target_node.block_id = "test-block-id" + mock_target_node.input_default = {} + mock_db_client.get_node.return_value = mock_target_node + mock_node_exec_result = MagicMock() + mock_node_exec_result.node_exec_id = "test-tool-exec-id" + mock_db_client.upsert_execution_input.return_value = ( + mock_node_exec_result, + {"query": "t"}, + ) + mock_db_client.get_execution_outputs_by_node_exec_id.return_value = { + "result": "ok" + } + + # ExecutionProcessor mock: on_node_execution returns supplied error. + mock_processor = AsyncMock() + mock_processor.running_node_execution = defaultdict(MagicMock) + mock_processor.execution_stats = MagicMock() + mock_processor.execution_stats_lock = threading.Lock() + mock_node_stats = MagicMock() + mock_node_stats.error = tool_stats_error + mock_processor.on_node_execution = AsyncMock(return_value=mock_node_stats) + mock_processor.charge_node_usage = ( + charge_node_usage_mock or MagicMock(return_value=(10, 990)) + ) + + # Build a tool_info shaped like _build_tool_info_from_args output. + tool_call = MagicMock() + tool_call.id = "call-1" + tool_call.name = "search_keywords" + tool_call.arguments = '{"query":"t"}' + tool_def = { + "type": "function", + "function": { + "name": "search_keywords", + "_sink_node_id": "test-sink-node-id", + "_field_mapping": {}, + "parameters": { + "properties": {"query": {"type": "string"}}, + "required": ["query"], + }, + }, + } + tool_info = OrchestratorBlock._build_tool_info_from_args( + tool_call_id="call-1", + tool_name="search_keywords", + tool_args={"query": "t"}, + tool_def=tool_def, + ) + + exec_params = ExecutionParams( + user_id="u", + graph_id="g", + node_id="n", + graph_version=1, + graph_exec_id="ge", + node_exec_id="ne", + execution_context=ExecutionContext( + human_in_the_loop_safe_mode=False, dry_run=dry_run + ), + ) + + with patch( + "backend.blocks.orchestrator.get_database_manager_async_client", + return_value=mock_db_client, + ): + try: + await block._execute_single_tool_with_manager( + tool_info, exec_params, mock_processor, responses_api=False + ) + raised = None + except Exception as e: + raised = e + + return mock_processor.charge_node_usage, raised + + +@pytest.mark.asyncio +async def test_tool_execution_skips_charging_on_dry_run(): + """dry_run=True → charge_node_usage is NOT called.""" + charge_mock, raised = await _run_tool_exec_with_stats( + dry_run=True, tool_stats_error=None + ) + assert raised is None + assert charge_mock.call_count == 0 + + +@pytest.mark.asyncio +async def test_tool_execution_skips_charging_on_failed_tool(): + """tool_node_stats.error is an Exception → charge_node_usage NOT called.""" + charge_mock, raised = await _run_tool_exec_with_stats( + dry_run=False, tool_stats_error=RuntimeError("tool blew up") + ) + assert raised is None + assert charge_mock.call_count == 0 + + +@pytest.mark.asyncio +async def test_tool_execution_skips_charging_on_cancelled_tool(): + """Cancellation (BaseException subclass) → charge_node_usage NOT called. + + Guards the fix for sentry's BaseException concern: the old + `isinstance(error, Exception)` check would have treated CancelledError + as "no error" and billed the user for a terminated run. + """ + import asyncio as _asyncio + + charge_mock, raised = await _run_tool_exec_with_stats( + dry_run=False, tool_stats_error=_asyncio.CancelledError() + ) + assert raised is None + assert charge_mock.call_count == 0 + + +@pytest.mark.asyncio +async def test_tool_execution_insufficient_balance_propagates(): + """InsufficientBalanceError from charge_node_usage must propagate out. + + If this leaked into a ToolCallResult the LLM loop would keep running + with 'tool failed' errors and the user would get unpaid work. + """ + from unittest.mock import MagicMock + + from backend.util.exceptions import InsufficientBalanceError + + raising_charge = MagicMock( + side_effect=InsufficientBalanceError( + user_id="u", message="nope", balance=0, amount=10 + ) + ) + _, raised = await _run_tool_exec_with_stats( + dry_run=False, + tool_stats_error=None, + charge_node_usage_mock=raising_charge, + ) + assert isinstance(raised, InsufficientBalanceError) diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py index 0454f0c5f3..3a1fd68db9 100644 --- a/autogpt_platform/backend/backend/executor/manager.py +++ b/autogpt_platform/backend/backend/executor/manager.py @@ -684,10 +684,16 @@ class ExecutionProcessor: # is already covered by _charge_usage(); each additional LLM call # costs another base_cost. Skipped for dry runs and failed runs. # - # InsufficientBalanceError is logged at ERROR level (this is a - # billing leak — the work is already done, but the user can't pay) - # and re-surfaced via execution_stats.error so monitoring can pick - # it up. Other exceptions are warnings. + # InsufficientBalanceError here is a post-hoc billing leak — the + # work is already done but the user can no longer pay. We: + # 1. log at ERROR with structured fields so alerting can catch it + # 2. record the error on execution_stats.error for downstream + # monitoring (stats are persisted into node_stats below) + # 3. fire _handle_insufficient_funds_notif so the user is + # notified (mirrors the main queue path at ~line 1254) + # The run itself is kept COMPLETED (the block's outputs are + # already committed) — matching the documented "billing leak" + # contract rather than retroactively failing a successful run. if ( status == ExecutionStatus.COMPLETED and node.block.charge_per_llm_call @@ -711,9 +717,36 @@ class ExecutionProcessor: ) except InsufficientBalanceError as e: log_metadata.error( - f"Billing leak: insufficient balance after {node.block.name} " - f"completed {extra_iterations} extra iterations: {e}" + "billing_leak: insufficient balance after " + f"{node.block.name} completed {extra_iterations} " + f"extra iterations", + extra={ + "billing_leak": True, + "user_id": node_exec.user_id, + "graph_id": node_exec.graph_id, + "block_id": node_exec.block_id, + "extra_iterations": extra_iterations, + "error": str(e), + }, ) + # Surface on execution_stats so node_stats persistence + # below records the billing failure for monitoring. + execution_stats.error = e + # Notify the user they're out of credits. Runs through + # Redis dedup (per user+graph) so repeat runs don't spam. + try: + await asyncio.to_thread( + self._handle_insufficient_funds_notif, + get_db_client(), + node_exec.user_id, + node_exec.graph_id, + e, + ) + except Exception as notif_error: # pragma: no cover + log_metadata.warning( + f"Failed to send insufficient funds notification: " + f"{notif_error}" + ) except Exception as e: log_metadata.warning( f"Failed to charge extra iterations for {node.block.name}: {e}" @@ -982,6 +1015,27 @@ class ExecutionProcessor: stats=exec_stats, ) + def _resolve_block_cost( + self, + node_exec: NodeExecutionEntry, + ) -> tuple[Any, int, dict]: + """Look up the block and compute its base usage cost for an exec. + + Shared by :meth:`_charge_usage` and :meth:`charge_extra_iterations` + so the (get_block, block_usage_cost) lookup lives in exactly one + place. Returns ``(block, cost, matching_filter)``. ``block`` is + ``None`` if the block id can't be resolved — callers should treat + that as "nothing to charge". + """ + block = get_block(node_exec.block_id) + if not block: + logger.error(f"Block {node_exec.block_id} not found.") + return None, 0, {} + cost, matching_filter = block_usage_cost( + block=block, input_data=node_exec.inputs + ) + return block, cost, matching_filter + def _charge_usage( self, node_exec: NodeExecutionEntry, @@ -990,14 +1044,10 @@ class ExecutionProcessor: total_cost = 0 remaining_balance = 0 db_client = get_db_client() - block = get_block(node_exec.block_id) + block, cost, matching_filter = self._resolve_block_cost(node_exec) if not block: - logger.error(f"Block {node_exec.block_id} not found.") return total_cost, 0 - cost, matching_filter = block_usage_cost( - block=block, input_data=node_exec.inputs - ) if cost > 0: remaining_balance = db_client.spend_credits( user_id=node_exec.user_id, @@ -1081,13 +1131,8 @@ class ExecutionProcessor: # Cap to protect against a corrupted llm_call_count. capped_iterations = min(extra_iterations, self._MAX_EXTRA_ITERATIONS) db_client = get_db_client() - block = get_block(node_exec.block_id) - if not block: - return 0, 0 - cost, matching_filter = block_usage_cost( - block=block, input_data=node_exec.inputs - ) - if cost <= 0: + block, cost, matching_filter = self._resolve_block_cost(node_exec) + if not block or cost <= 0: return 0, 0 total_extra_cost = cost * capped_iterations remaining_balance = db_client.spend_credits( From 613321180ebc5ff9d490d696f807e7ead683a406 Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 12:00:08 +0000 Subject: [PATCH 06/24] fix(orchestrator): propagate InsufficientBalanceError past outer loop catch-all Follow-up to 6d2d476 addressing sentry's newly-posted review finding. Both `_execute_tools_agent_mode` and `_execute_tools_sdk_mode` had broad `except Exception` blocks at the top level of the tool-calling loop that would still swallow `InsufficientBalanceError` even after the inner tool executor carve-outs re-raised it: the error would escape the inner `_execute_single_tool_with_manager`, propagate up through `_agent_mode_tool_executor`, then get caught by the outer loop's catch-all and converted into a user-visible "error" yield. Add explicit `except InsufficientBalanceError: raise` carve-outs before each broad handler so billing failures propagate all the way out of the block's `run()` generator, reaching the executor's billing-leak handling (error recording on execution_stats, user notification, structured log). Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/backend/blocks/orchestrator.py | 28 +++++++++++++++---- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py index 13591bed01..1f2861c87c 100644 --- a/autogpt_platform/backend/backend/blocks/orchestrator.py +++ b/autogpt_platform/backend/backend/blocks/orchestrator.py @@ -1407,9 +1407,17 @@ class OrchestratorBlock(Block): "arguments": tc.arguments, }, ) + except InsufficientBalanceError: + # Billing failures must propagate out of the block so the + # executor's billing-leak handling fires (error recording on + # execution_stats, user notification, structured logging). + # Do NOT downgrade to a user-visible "error" output — that + # would swallow the failure and leak the exact balance amount. + raise except Exception as e: - # Catch all errors (validation, network, API) so that the block - # surfaces them as user-visible output instead of crashing. + # Catch all OTHER errors (validation, network, API) so that + # the block surfaces them as user-visible output instead of + # crashing. yield "error", str(e) return @@ -1774,11 +1782,19 @@ class OrchestratorBlock(Block): await pending_task except (asyncio.CancelledError, StopAsyncIteration): pass + except InsufficientBalanceError: + # Billing failures must propagate so the executor's + # billing-leak handling fires (error recording, user + # notification, structured logging). Mirrors the carve-out + # in _execute_tools_agent_mode — do not downgrade to a + # user-visible error output. The `finally` block below + # still runs and records partial token usage. + raise except Exception as e: - # Surface SDK errors as user-visible output instead of crashing, - # consistent with _execute_tools_agent_mode error handling. - # Don't return yet — fall through to merge_stats below so - # partial token usage is always recorded. + # Surface OTHER SDK errors as user-visible output instead + # of crashing, consistent with _execute_tools_agent_mode + # error handling. Don't return yet — fall through to + # merge_stats below so partial token usage is always recorded. sdk_error = e finally: # Always record usage stats, even on error. The SDK may have From fddd23435fb9560e98761b0c23089bb307e51f98 Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 12:42:27 +0000 Subject: [PATCH 07/24] fix(orchestrator): address round 3 sentry + coderabbit findings 1. Don't set execution_stats.error on post-hoc IBE - Setting it caused node_error_count++ at line 762, creating an inconsistent "errored COMPLETED node" in graph metrics - Also leaked balance amounts into persisted node_stats - Now: log structured ERROR, notify user, leave node_stats clean - Fixes sentry finding 3064117116 2. Wire low-balance notifications for nested tool charges - charge_node_usage returned (cost, balance) but caller dropped balance - Now invokes _handle_low_balance after a successful tool charge, mirroring the main queue behaviour - Fixes coderabbit finding 3064103939 3. Lint: black formatting on test_orchestrator_per_iteration_cost.py Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/backend/blocks/orchestrator.py | 17 ++++- .../test_orchestrator_per_iteration_cost.py | 76 ++++++++++++++----- .../backend/backend/executor/manager.py | 10 ++- 3 files changed, 77 insertions(+), 26 deletions(-) diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py index 1f2861c87c..d1f8693d67 100644 --- a/autogpt_platform/backend/backend/blocks/orchestrator.py +++ b/autogpt_platform/backend/backend/blocks/orchestrator.py @@ -35,7 +35,10 @@ from backend.data.dynamic_fields import ( from backend.data.execution import ExecutionContext from backend.data.model import NodeExecutionStats, SchemaField from backend.util import json -from backend.util.clients import get_database_manager_async_client +from backend.util.clients import ( + get_database_manager_async_client, + get_database_manager_client, +) from backend.util.exceptions import InsufficientBalanceError from backend.util.prompt import MAIN_OBJECTIVE_PREFIX from backend.util.security import SENSITIVE_FIELD_NAMES @@ -1139,12 +1142,22 @@ class OrchestratorBlock(Block): and tool_node_stats is not None and tool_node_stats.error is None ): - tool_cost, _ = await asyncio.to_thread( + tool_cost, remaining_balance = await asyncio.to_thread( execution_processor.charge_node_usage, node_exec_entry, ) if tool_cost > 0: self.merge_stats(NodeExecutionStats(extra_cost=tool_cost)) + # Notify the user if their balance is now low — mirrors + # the main queue path so nested tool charges don't + # bypass low-balance alerts. + await asyncio.to_thread( + execution_processor._handle_low_balance, + db_client=get_database_manager_client(), + user_id=execution_params.user_id, + current_balance=remaining_balance, + transaction_cost=tool_cost, + ) # Get outputs from database after execution completes using database manager client node_outputs = await db_client.get_execution_outputs_by_node_exec_id( diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py index abf5c22740..f1806bf6dc 100644 --- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py +++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py @@ -322,8 +322,16 @@ def gated_processor(monkeypatch): inner_result = {"status": ExecutionStatus.COMPLETED, "llm_call_count": 3} async def fake_inner( - self, *, node, node_exec, node_exec_progress, stats, db_client, - log_metadata, nodes_input_masks=None, nodes_to_skip=None, + self, + *, + node, + node_exec, + node_exec_progress, + stats, + db_client, + log_metadata, + nodes_input_masks=None, + nodes_to_skip=None, ): stats.llm_call_count = inner_result["llm_call_count"] return MagicMock(wall_time=0.1, cpu_time=0.1), inner_result["status"] @@ -379,8 +387,12 @@ async def test_on_node_execution_charges_extra_iterations_when_gate_passes( inner["llm_call_count"] = 3 # → extra_iterations = 2 fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True)) - stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0, - cost=0, node_error_count=0), threading.Lock()) + stats_pair = ( + MagicMock( + node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0 + ), + threading.Lock(), + ) await proc.on_node_execution( node_exec=_make_node_exec(dry_run=False), node_exec_progress=MagicMock(), @@ -399,8 +411,12 @@ async def test_on_node_execution_skips_when_status_not_completed(gated_processor inner["llm_call_count"] = 5 fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True)) - stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0, - cost=0, node_error_count=0), threading.Lock()) + stats_pair = ( + MagicMock( + node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0 + ), + threading.Lock(), + ) await proc.on_node_execution( node_exec=_make_node_exec(dry_run=False), node_exec_progress=MagicMock(), @@ -419,8 +435,12 @@ async def test_on_node_execution_skips_when_charge_flag_false(gated_processor): inner["llm_call_count"] = 5 fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=False)) - stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0, - cost=0, node_error_count=0), threading.Lock()) + stats_pair = ( + MagicMock( + node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0 + ), + threading.Lock(), + ) await proc.on_node_execution( node_exec=_make_node_exec(dry_run=False), node_exec_progress=MagicMock(), @@ -439,8 +459,12 @@ async def test_on_node_execution_skips_when_llm_call_count_le_1(gated_processor) inner["llm_call_count"] = 1 # exactly the base charge, no extras fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True)) - stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0, - cost=0, node_error_count=0), threading.Lock()) + stats_pair = ( + MagicMock( + node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0 + ), + threading.Lock(), + ) await proc.on_node_execution( node_exec=_make_node_exec(dry_run=False), node_exec_progress=MagicMock(), @@ -459,8 +483,12 @@ async def test_on_node_execution_skips_when_dry_run(gated_processor): inner["llm_call_count"] = 5 fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True)) - stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0, - cost=0, node_error_count=0), threading.Lock()) + stats_pair = ( + MagicMock( + node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0 + ), + threading.Lock(), + ) await proc.on_node_execution( node_exec=_make_node_exec(dry_run=True), node_exec_progress=MagicMock(), @@ -472,7 +500,8 @@ async def test_on_node_execution_skips_when_dry_run(gated_processor): @pytest.mark.asyncio async def test_on_node_execution_insufficient_balance_records_error_and_notifies( - monkeypatch, gated_processor, + monkeypatch, + gated_processor, ): """When extra-iteration charging fails with InsufficientBalanceError: @@ -501,8 +530,12 @@ async def test_on_node_execution_insufficient_balance_records_error_and_notifies manager.ExecutionProcessor, "charge_extra_iterations", raise_ibe ) - stats_pair = (MagicMock(node_count=0, nodes_cputime=0, nodes_walltime=0, - cost=0, node_error_count=0), threading.Lock()) + stats_pair = ( + MagicMock( + node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0 + ), + threading.Lock(), + ) result_stats = await proc.on_node_execution( node_exec=_make_node_exec(dry_run=False), node_exec_progress=MagicMock(), @@ -520,7 +553,10 @@ async def test_on_node_execution_insufficient_balance_records_error_and_notifies async def _run_tool_exec_with_stats( - *, dry_run: bool, tool_stats_error, charge_node_usage_mock=None, + *, + dry_run: bool, + tool_stats_error, + charge_node_usage_mock=None, ): """Invoke _execute_single_tool_with_manager against fully mocked deps and return (charge_call_count, merge_stats_calls). @@ -549,9 +585,7 @@ async def _run_tool_exec_with_stats( mock_node_exec_result, {"query": "t"}, ) - mock_db_client.get_execution_outputs_by_node_exec_id.return_value = { - "result": "ok" - } + mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {"result": "ok"} # ExecutionProcessor mock: on_node_execution returns supplied error. mock_processor = AsyncMock() @@ -561,8 +595,8 @@ async def _run_tool_exec_with_stats( mock_node_stats = MagicMock() mock_node_stats.error = tool_stats_error mock_processor.on_node_execution = AsyncMock(return_value=mock_node_stats) - mock_processor.charge_node_usage = ( - charge_node_usage_mock or MagicMock(return_value=(10, 990)) + mock_processor.charge_node_usage = charge_node_usage_mock or MagicMock( + return_value=(10, 990) ) # Build a tool_info shaped like _build_tool_info_from_args output. diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py index 3a1fd68db9..1069218ad9 100644 --- a/autogpt_platform/backend/backend/executor/manager.py +++ b/autogpt_platform/backend/backend/executor/manager.py @@ -729,9 +729,13 @@ class ExecutionProcessor: "error": str(e), }, ) - # Surface on execution_stats so node_stats persistence - # below records the billing failure for monitoring. - execution_stats.error = e + # NOTE: Do NOT set execution_stats.error here. The node ran + # to completion — only the post-hoc charge failed. Setting + # .error would (a) flip node_error_count below, creating an + # "errored COMPLETED node" inconsistency in the metrics, and + # (b) leak balance amounts into the persisted node_stats. + # The structured ERROR log above is the alerting hook; + # node_stats stays clean. # Notify the user they're out of credits. Runs through # Redis dedup (per user+graph) so repeat runs don't spam. try: From 743f1f82c9760fee8ee2c57fbc0fae582c6d9520 Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 12:45:41 +0000 Subject: [PATCH 08/24] style: black formatting on test_orchestrator.py Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/backend/blocks/test/test_orchestrator.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py index c69c107de3..c92ba3c702 100644 --- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py +++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py @@ -926,9 +926,7 @@ async def test_orchestrator_agent_mode(): # Returns (cost, remaining_balance). Synchronous because it's called # via asyncio.to_thread. Use a non-zero cost so the merge_stats # branch is actually exercised, and assert it's called below. - mock_execution_processor.charge_node_usage = MagicMock( - return_value=(10, 990) - ) + mock_execution_processor.charge_node_usage = MagicMock(return_value=(10, 990)) # Mock the get_execution_outputs_by_node_exec_id method mock_db_client.get_execution_outputs_by_node_exec_id.return_value = { From b29f1608499d337f9f6bc625a41d1924155d2127 Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 13:10:07 +0000 Subject: [PATCH 09/24] fix(executor): structured ERROR log on unexpected post-exec billing failures The catch-all `except Exception` in the post-execution charge path was logging at WARNING and dropping the error. Sentry flagged this as a silent billing leak risk. Now logs at ERROR with the same `billing_leak: True` structured marker used by the InsufficientBalanceError branch, plus error_type/error/ extra_iterations fields and exc_info=True for the full traceback. Monitoring/alerting can pick up either failure mode via the same key. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/backend/executor/manager.py | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py index 1069218ad9..a1acbbd990 100644 --- a/autogpt_platform/backend/backend/executor/manager.py +++ b/autogpt_platform/backend/backend/executor/manager.py @@ -752,8 +752,24 @@ class ExecutionProcessor: f"{notif_error}" ) except Exception as e: - log_metadata.warning( - f"Failed to charge extra iterations for {node.block.name}: {e}" + # Unexpected billing failure (DB outage, network, etc.). + # Log at ERROR with structured fields and the same + # `billing_leak: True` marker so monitoring catches it + # alongside InsufficientBalanceError. The run is kept + # COMPLETED because the work is already done. + log_metadata.error( + f"billing_leak: failed to charge extra iterations " + f"for {node.block.name}", + extra={ + "billing_leak": True, + "user_id": node_exec.user_id, + "graph_id": node_exec.graph_id, + "block_id": node_exec.block_id, + "extra_iterations": extra_iterations, + "error_type": type(e).__name__, + "error": str(e), + }, + exc_info=True, ) graph_stats, graph_stats_lock = graph_stats_pair From 6469334ae743d82ba867e1832f5d4523d74872f0 Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 13:18:32 +0000 Subject: [PATCH 10/24] fix(executor): notify user when nested tool charge raises IBE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the OrchestratorBlock's nested tool charge in _execute_single_tool_with_manager raises InsufficientBalanceError, the exception propagates up through on_node_execution and the node is marked FAILED. The main queue's _charge_usage IBE catch (line 1305) doesn't fire because the initial _charge_usage already succeeded — only the nested tool charge failed. This left the user without any notification about why their agent run stopped. Add a status==FAILED + isinstance(error, IBE) branch in on_node_execution that calls _handle_insufficient_funds_notif. The notification flow goes through the same Redis dedup as the main queue path so repeat runs don't spam. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/backend/executor/manager.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py index a1acbbd990..d9c2f71198 100644 --- a/autogpt_platform/backend/backend/executor/manager.py +++ b/autogpt_platform/backend/backend/executor/manager.py @@ -807,6 +807,29 @@ class ExecutionProcessor: db_client=db_client, ) + # If the node failed because a nested tool charge raised + # InsufficientBalanceError (orchestrator agent mode), the main + # queue's _charge_usage IBE notification path was bypassed (the + # initial charge succeeded — only the nested tool charge failed). + # Send the user notification here so they understand why their + # agent run stopped. Failures are logged but not raised so the + # node-execution path stays clean. + if status == ExecutionStatus.FAILED and isinstance( + execution_stats.error, InsufficientBalanceError + ): + try: + await asyncio.to_thread( + self._handle_insufficient_funds_notif, + get_db_client(), + node_exec.user_id, + node_exec.graph_id, + execution_stats.error, + ) + except Exception as notif_error: # pragma: no cover + log_metadata.warning( + f"Failed to send insufficient funds notification: " f"{notif_error}" + ) + return execution_stats @async_time_measured From 389b2f4fb26f3a7dee36dae7f29faed19a7e762e Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 13:33:04 +0000 Subject: [PATCH 11/24] test(orchestrator): align IBE test with no-error-set fix The test asserted result_stats.error is set to the IBE, but commit b662eab36 removed that line from the IBE handler in on_node_execution because it caused node_error_count++ inconsistency and leaked balance amounts into persisted node_stats. Update the test to assert result_stats.error is None (the structured ERROR log is the alerting hook now, not the .error field). Co-Authored-By: Claude Opus 4.6 (1M context) --- .../test/test_orchestrator_per_iteration_cost.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py index f1806bf6dc..1c8fca368b 100644 --- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py +++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py @@ -506,8 +506,11 @@ async def test_on_node_execution_insufficient_balance_records_error_and_notifies """When extra-iteration charging fails with InsufficientBalanceError: - the run still reports COMPLETED (the work is already done) - - execution_stats.error is set so monitoring picks it up + - execution_stats.error is NOT set (would flip node_error_count and + leak balance amounts into persisted node_stats — see manager.py + comment in the IBE handler) - _handle_insufficient_funds_notif is called so the user is notified + - the structured ERROR log is the alerting hook """ from backend.data.execution import ExecutionStatus from backend.executor import manager @@ -542,8 +545,11 @@ async def test_on_node_execution_insufficient_balance_records_error_and_notifies nodes_input_masks=None, graph_stats_pair=stats_pair, ) - # Error recorded on stats so downstream monitoring can surface it. - assert isinstance(result_stats.error, InsufficientBalanceError) + # error stays None — node ran to completion, only the post-hoc + # charge failed. Setting .error would (a) flip node_error_count++ + # creating an "errored COMPLETED node" inconsistency, and (b) leak + # balance amounts into persisted node_stats. + assert result_stats.error is None # User notification fired. assert len(calls["handle_insufficient_funds_notif"]) == 1 assert calls["handle_insufficient_funds_notif"][0]["user_id"] == "u" From 4525869a755ae8f459e8fc470ee29d911581d420 Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 13:34:01 +0000 Subject: [PATCH 12/24] fix(executor): wrap _handle_low_balance in to_thread to avoid blocking The post-execution low-balance check in on_node_execution called _handle_low_balance directly. _handle_low_balance does sync DB work, and on_node_execution is async, so the call blocked the event loop. Sentry caught it. Wrap in asyncio.to_thread. Co-Authored-By: Claude Opus 4.6 (1M context) --- autogpt_platform/backend/backend/executor/manager.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py index d9c2f71198..bb7f29a9a1 100644 --- a/autogpt_platform/backend/backend/executor/manager.py +++ b/autogpt_platform/backend/backend/executor/manager.py @@ -709,7 +709,11 @@ class ExecutionProcessor: ) if extra_cost > 0: execution_stats.extra_cost += extra_cost - self._handle_low_balance( + # Wrap in to_thread — _handle_low_balance does sync DB + # work and we're in an async method, so calling it + # directly would block the event loop. + await asyncio.to_thread( + self._handle_low_balance, db_client=get_db_client(), user_id=node_exec.user_id, current_balance=remaining_balance, From 5b27ccf90878007fab50239caf2f2bef84bba037 Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 22:56:38 +0700 Subject: [PATCH 13/24] refactor(backend/orchestrator): replace charge_per_llm_call flag with extra_credit_charges hook + async charge methods - Replace ClassVar[bool] charge_per_llm_call in Block._base with extra_credit_charges(execution_stats) -> int method; OrchestratorBlock overrides it to return max(0, llm_call_count - 1) - Make charge_extra_iterations and charge_node_usage async; sync work runs via run_in_executor. charge_node_usage now folds in _handle_low_balance so callers don't touch private methods - orchestrator.py no longer calls execution_processor._handle_low_balance directly; just awaits charge_node_usage which handles it internally - Update tests: TestChargePerLlmCallFlag -> TestExtraCreditCharges, all async charge method tests converted to @pytest.mark.asyncio, added TestChargeNodeUsage assertions for _handle_low_balance calls --- .../backend/backend/blocks/_base.py | 18 +- .../backend/backend/blocks/orchestrator.py | 30 +-- .../test_orchestrator_per_iteration_cost.py | 247 +++++++++++++----- .../backend/backend/executor/manager.py | 125 +++++---- 4 files changed, 273 insertions(+), 147 deletions(-) diff --git a/autogpt_platform/backend/backend/blocks/_base.py b/autogpt_platform/backend/backend/blocks/_base.py index 1eeec991f8..cbebdee85f 100644 --- a/autogpt_platform/backend/backend/blocks/_base.py +++ b/autogpt_platform/backend/backend/blocks/_base.py @@ -420,12 +420,18 @@ class BlockWebhookConfig(BlockManualWebhookConfig): class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]): _optimized_description: ClassVar[str | None] = None - # Set to True for blocks (e.g. OrchestratorBlock in agent mode) that - # may make multiple LLM calls in a single run. The executor will then - # charge `block_cost * (llm_call_count - 1)` extra credits after the - # block completes — the first call is already covered by the upfront - # `_charge_usage()` call. Defaults to False (single charge per run). - charge_per_llm_call: ClassVar[bool] = False + def extra_credit_charges(self, execution_stats: "NodeExecutionStats") -> int: + """Return extra credits to charge after this block run completes. + + Called by the executor after a block finishes with COMPLETED status. + The return value is the number of additional base-cost credits to + charge beyond the single credit already collected by ``_charge_usage`` + at the start of execution. Defaults to 0 (no extra charges). + + Override in blocks (e.g. OrchestratorBlock) that make multiple LLM + calls within one run and should be billed per call. + """ + return 0 def __init__( self, diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py index d1f8693d67..9ab0318165 100644 --- a/autogpt_platform/backend/backend/blocks/orchestrator.py +++ b/autogpt_platform/backend/backend/blocks/orchestrator.py @@ -35,10 +35,7 @@ from backend.data.dynamic_fields import ( from backend.data.execution import ExecutionContext from backend.data.model import NodeExecutionStats, SchemaField from backend.util import json -from backend.util.clients import ( - get_database_manager_async_client, - get_database_manager_client, -) +from backend.util.clients import get_database_manager_async_client from backend.util.exceptions import InsufficientBalanceError from backend.util.prompt import MAIN_OBJECTIVE_PREFIX from backend.util.security import SENSITIVE_FIELD_NAMES @@ -373,10 +370,14 @@ class OrchestratorBlock(Block): single-shot and iterative agent mode execution. """ - # In agent mode the block makes one LLM call per iteration. The executor - # uses this flag to charge `cost * (llm_call_count - 1)` extra credits - # after the block completes; the first call is covered by _charge_usage(). - charge_per_llm_call = True + def extra_credit_charges(self, execution_stats: NodeExecutionStats) -> int: + """Charge one extra base credit per LLM call beyond the first. + + In agent mode each iteration makes one LLM call. The first is already + covered by _charge_usage(); this returns the number of additional + credits so the executor can bill the remaining calls post-completion. + """ + return max(0, execution_stats.llm_call_count - 1) # MCP server name used by the Claude Code SDK execution mode. Keep in sync # with _create_graph_mcp_server and the MCP_PREFIX derivation in _execute_tools_sdk_mode. @@ -1142,22 +1143,11 @@ class OrchestratorBlock(Block): and tool_node_stats is not None and tool_node_stats.error is None ): - tool_cost, remaining_balance = await asyncio.to_thread( - execution_processor.charge_node_usage, + tool_cost, _ = await execution_processor.charge_node_usage( node_exec_entry, ) if tool_cost > 0: self.merge_stats(NodeExecutionStats(extra_cost=tool_cost)) - # Notify the user if their balance is now low — mirrors - # the main queue path so nested tool charges don't - # bypass low-balance alerts. - await asyncio.to_thread( - execution_processor._handle_low_balance, - db_client=get_database_manager_client(), - user_id=execution_params.user_id, - current_balance=remaining_balance, - transaction_cost=tool_cost, - ) # Get outputs from database after execution completes using database manager client node_outputs = await db_client.get_execution_outputs_by_node_exec_id( diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py index 1c8fca368b..c703e32c71 100644 --- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py +++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py @@ -1,7 +1,7 @@ """Tests for OrchestratorBlock per-iteration cost charging. The OrchestratorBlock in agent mode makes multiple LLM calls in a single -node execution. The executor uses ``Block.charge_per_llm_call`` to detect +node execution. The executor uses ``Block.extra_credit_charges`` to detect this and charge ``base_cost * (llm_call_count - 1)`` extra credits after the block completes. """ @@ -13,19 +13,60 @@ import pytest from backend.blocks.orchestrator import OrchestratorBlock -# ── Class flag ────────────────────────────────────────────────────── +# ── extra_credit_charges hook ──────────────────────────────────────── -class TestChargePerLlmCallFlag: - """OrchestratorBlock opts into per-LLM-call billing.""" +class TestExtraCreditCharges: + """OrchestratorBlock opts into per-LLM-call billing via extra_credit_charges.""" - def test_orchestrator_opts_in(self): - assert OrchestratorBlock.charge_per_llm_call is True + def test_orchestrator_returns_nonzero_for_multiple_calls(self): + from backend.data.model import NodeExecutionStats + + block = OrchestratorBlock() + stats = NodeExecutionStats(llm_call_count=3) + assert block.extra_credit_charges(stats) == 2 + + def test_orchestrator_returns_zero_for_single_call(self): + from backend.data.model import NodeExecutionStats + + block = OrchestratorBlock() + stats = NodeExecutionStats(llm_call_count=1) + assert block.extra_credit_charges(stats) == 0 + + def test_orchestrator_returns_zero_for_zero_calls(self): + from backend.data.model import NodeExecutionStats + + block = OrchestratorBlock() + stats = NodeExecutionStats(llm_call_count=0) + assert block.extra_credit_charges(stats) == 0 + + def test_default_block_returns_zero(self): + from backend.data.model import NodeExecutionStats + + # Use a concrete block (not the abstract Block base) to verify the + # default implementation returns 0. + block = OrchestratorBlock() + stats = NodeExecutionStats(llm_call_count=0) + # When llm_call_count=0, extra_credit_charges should clamp to 0. + assert block.extra_credit_charges(stats) == 0 + + # Also verify via Block.extra_credit_charges directly (method-level + # check) by calling the unbound method on an OrchestratorBlock + # instance with the base implementation patched out. + from unittest.mock import patch - def test_default_block_does_not_opt_in(self): from backend.blocks._base import Block - assert Block.charge_per_llm_call is False + with patch.object( + OrchestratorBlock, + "extra_credit_charges", + Block.extra_credit_charges, + ): + base_block = OrchestratorBlock() + assert ( + base_block.extra_credit_charges(NodeExecutionStats(llm_call_count=10)) + == 0 + ) # ── charge_extra_iterations math ─────────────────────────────────── @@ -76,36 +117,44 @@ def patched_processor(monkeypatch): class TestChargeExtraIterations: - def test_zero_extra_iterations_charges_nothing( + @pytest.mark.asyncio + async def test_zero_extra_iterations_charges_nothing( self, patched_processor, fake_node_exec ): proc, spent = patched_processor - cost, balance = proc.charge_extra_iterations(fake_node_exec, extra_iterations=0) + cost, balance = await proc.charge_extra_iterations( + fake_node_exec, extra_iterations=0 + ) assert cost == 0 assert balance == 0 assert spent == [] - def test_extra_iterations_multiplies_base_cost( + @pytest.mark.asyncio + async def test_extra_iterations_multiplies_base_cost( self, patched_processor, fake_node_exec ): proc, spent = patched_processor - cost, balance = proc.charge_extra_iterations(fake_node_exec, extra_iterations=4) + cost, balance = await proc.charge_extra_iterations( + fake_node_exec, extra_iterations=4 + ) assert cost == 40 # 4 × 10 assert balance == 1000 assert spent == [40] - def test_negative_extra_iterations_charges_nothing( + @pytest.mark.asyncio + async def test_negative_extra_iterations_charges_nothing( self, patched_processor, fake_node_exec ): proc, spent = patched_processor - cost, balance = proc.charge_extra_iterations( + cost, balance = await proc.charge_extra_iterations( fake_node_exec, extra_iterations=-1 ) assert cost == 0 assert balance == 0 assert spent == [] - def test_capped_at_max(self, monkeypatch, fake_node_exec): + @pytest.mark.asyncio + async def test_capped_at_max(self, monkeypatch, fake_node_exec): """Runaway llm_call_count is capped at _MAX_EXTRA_ITERATIONS.""" from backend.executor import manager @@ -129,14 +178,15 @@ class TestChargeExtraIterations: proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor) cap = manager.ExecutionProcessor._MAX_EXTRA_ITERATIONS - cost, _ = proc.charge_extra_iterations( + cost, _ = await proc.charge_extra_iterations( fake_node_exec, extra_iterations=cap * 100 ) # Charged at most cap × 10 assert cost == cap * 10 assert spent == [cap * 10] - def test_zero_base_cost_skips_charge(self, monkeypatch, fake_node_exec): + @pytest.mark.asyncio + async def test_zero_base_cost_skips_charge(self, monkeypatch, fake_node_exec): from backend.executor import manager spent: list[int] = [] @@ -156,12 +206,15 @@ class TestChargeExtraIterations: ) proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor) - cost, balance = proc.charge_extra_iterations(fake_node_exec, extra_iterations=4) + cost, balance = await proc.charge_extra_iterations( + fake_node_exec, extra_iterations=4 + ) assert cost == 0 assert balance == 0 assert spent == [] - def test_block_not_found_skips_charge(self, monkeypatch, fake_node_exec): + @pytest.mark.asyncio + async def test_block_not_found_skips_charge(self, monkeypatch, fake_node_exec): from backend.executor import manager spent: list[int] = [] @@ -178,12 +231,17 @@ class TestChargeExtraIterations: ) proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor) - cost, balance = proc.charge_extra_iterations(fake_node_exec, extra_iterations=3) + cost, balance = await proc.charge_extra_iterations( + fake_node_exec, extra_iterations=3 + ) assert cost == 0 assert balance == 0 assert spent == [] - def test_propagates_insufficient_balance_error(self, monkeypatch, fake_node_exec): + @pytest.mark.asyncio + async def test_propagates_insufficient_balance_error( + self, monkeypatch, fake_node_exec + ): """Out-of-credits errors must propagate, not be silently swallowed.""" from backend.executor import manager from backend.util.exceptions import InsufficientBalanceError @@ -208,7 +266,7 @@ class TestChargeExtraIterations: proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor) with pytest.raises(InsufficientBalanceError): - proc.charge_extra_iterations(fake_node_exec, extra_iterations=4) + await proc.charge_extra_iterations(fake_node_exec, extra_iterations=4) # ── charge_node_usage ────────────────────────────────────────────── @@ -217,7 +275,10 @@ class TestChargeExtraIterations: class TestChargeNodeUsage: """charge_node_usage delegates to _charge_usage with execution_count=0.""" - def test_delegates_with_zero_execution_count(self, monkeypatch, fake_node_exec): + @pytest.mark.asyncio + async def test_delegates_with_zero_execution_count( + self, monkeypatch, fake_node_exec + ): """Nested tool charges should NOT inflate the per-execution counter.""" from backend.executor import manager @@ -228,16 +289,95 @@ class TestChargeNodeUsage: captured["node_exec"] = node_exec return (5, 100) + def fake_handle_low_balance( + self, db_client, user_id, current_balance, transaction_cost + ): + pass + monkeypatch.setattr( manager.ExecutionProcessor, "_charge_usage", fake_charge_usage ) + monkeypatch.setattr( + manager.ExecutionProcessor, "_handle_low_balance", fake_handle_low_balance + ) + monkeypatch.setattr(manager, "get_db_client", lambda: MagicMock()) proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor) - cost, balance = proc.charge_node_usage(fake_node_exec) + cost, balance = await proc.charge_node_usage(fake_node_exec) assert cost == 5 assert balance == 100 assert captured["execution_count"] == 0 + @pytest.mark.asyncio + async def test_calls_handle_low_balance_when_cost_nonzero( + self, monkeypatch, fake_node_exec + ): + """charge_node_usage should call _handle_low_balance when total_cost > 0.""" + from backend.executor import manager + + low_balance_calls: list[dict] = [] + + def fake_charge_usage(self, node_exec, execution_count): + return (10, 50) + + def fake_handle_low_balance( + self, db_client, user_id, current_balance, transaction_cost + ): + low_balance_calls.append( + { + "user_id": user_id, + "current_balance": current_balance, + "transaction_cost": transaction_cost, + } + ) + + monkeypatch.setattr( + manager.ExecutionProcessor, "_charge_usage", fake_charge_usage + ) + monkeypatch.setattr( + manager.ExecutionProcessor, "_handle_low_balance", fake_handle_low_balance + ) + monkeypatch.setattr(manager, "get_db_client", lambda: MagicMock()) + + proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor) + cost, balance = await proc.charge_node_usage(fake_node_exec) + assert cost == 10 + assert balance == 50 + assert len(low_balance_calls) == 1 + assert low_balance_calls[0]["user_id"] == "u" + assert low_balance_calls[0]["current_balance"] == 50 + assert low_balance_calls[0]["transaction_cost"] == 10 + + @pytest.mark.asyncio + async def test_skips_handle_low_balance_when_cost_zero( + self, monkeypatch, fake_node_exec + ): + """charge_node_usage should NOT call _handle_low_balance when cost is 0.""" + from backend.executor import manager + + low_balance_calls: list = [] + + def fake_charge_usage(self, node_exec, execution_count): + return (0, 200) + + def fake_handle_low_balance( + self, db_client, user_id, current_balance, transaction_cost + ): + low_balance_calls.append(True) + + monkeypatch.setattr( + manager.ExecutionProcessor, "_charge_usage", fake_charge_usage + ) + monkeypatch.setattr( + manager.ExecutionProcessor, "_handle_low_balance", fake_handle_low_balance + ) + monkeypatch.setattr(manager, "get_db_client", lambda: MagicMock()) + + proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor) + cost, balance = await proc.charge_node_usage(fake_node_exec) + assert cost == 0 + assert low_balance_calls == [] + # ── on_node_execution charging gate ──────────────────────────────── @@ -245,10 +385,10 @@ class TestChargeNodeUsage: class _FakeNode: """Minimal stand-in for a ``Node`` object with a block attribute.""" - def __init__(self, charge_per_llm_call: bool, block_name: str = "FakeBlock"): + def __init__(self, extra_charges: int = 0, block_name: str = "FakeBlock"): self.block = MagicMock() - self.block.charge_per_llm_call = charge_per_llm_call self.block.name = block_name + self.block.extra_credit_charges = MagicMock(return_value=extra_charges) class _FakeExecContext: @@ -274,7 +414,7 @@ def _make_node_exec(dry_run: bool = False) -> MagicMock: def gated_processor(monkeypatch): """ExecutionProcessor with on_node_execution's downstream calls stubbed. - Lets tests flip the four gate conditions (status, charge_per_llm_call, + Lets tests flip the gate conditions (status, extra_credit_charges result, llm_call_count, dry_run) and observe whether charge_extra_iterations was called. """ @@ -290,7 +430,7 @@ def gated_processor(monkeypatch): # Stub node lookup + DB client so the wrapper doesn't touch real infra. fake_db = MagicMock() - fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True)) + fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=2)) monkeypatch.setattr(manager, "get_db_async_client", lambda: fake_db) monkeypatch.setattr(manager, "get_db_client", lambda: fake_db) # get_block is called by LogMetadata construction in on_node_execution. @@ -342,7 +482,7 @@ def gated_processor(monkeypatch): fake_inner, ) - def fake_charge_extra(self, node_exec, extra_iterations): + async def fake_charge_extra(self, node_exec, extra_iterations): calls["charge_extra_iterations"].append(extra_iterations) return (extra_iterations * 10, 500) @@ -379,13 +519,13 @@ def gated_processor(monkeypatch): async def test_on_node_execution_charges_extra_iterations_when_gate_passes( gated_processor, ): - """COMPLETED + charge_per_llm_call + llm_call_count>1 + not dry_run → charged.""" + """COMPLETED + extra_credit_charges > 0 + not dry_run → charged.""" from backend.data.execution import ExecutionStatus proc, calls, inner, fake_db, _ = gated_processor inner["status"] = ExecutionStatus.COMPLETED - inner["llm_call_count"] = 3 # → extra_iterations = 2 - fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True)) + inner["llm_call_count"] = 3 # → extra_charges = 2 + fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=2)) stats_pair = ( MagicMock( @@ -409,7 +549,7 @@ async def test_on_node_execution_skips_when_status_not_completed(gated_processor proc, calls, inner, fake_db, _ = gated_processor inner["status"] = ExecutionStatus.FAILED inner["llm_call_count"] = 5 - fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True)) + fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=4)) stats_pair = ( MagicMock( @@ -427,37 +567,14 @@ async def test_on_node_execution_skips_when_status_not_completed(gated_processor @pytest.mark.asyncio -async def test_on_node_execution_skips_when_charge_flag_false(gated_processor): +async def test_on_node_execution_skips_when_extra_charges_zero(gated_processor): from backend.data.execution import ExecutionStatus proc, calls, inner, fake_db, _ = gated_processor inner["status"] = ExecutionStatus.COMPLETED inner["llm_call_count"] = 5 - fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=False)) - - stats_pair = ( - MagicMock( - node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0 - ), - threading.Lock(), - ) - await proc.on_node_execution( - node_exec=_make_node_exec(dry_run=False), - node_exec_progress=MagicMock(), - nodes_input_masks=None, - graph_stats_pair=stats_pair, - ) - assert calls["charge_extra_iterations"] == [] - - -@pytest.mark.asyncio -async def test_on_node_execution_skips_when_llm_call_count_le_1(gated_processor): - from backend.data.execution import ExecutionStatus - - proc, calls, inner, fake_db, _ = gated_processor - inner["status"] = ExecutionStatus.COMPLETED - inner["llm_call_count"] = 1 # exactly the base charge, no extras - fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True)) + # Block returns 0 extra charges (base class default) + fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=0)) stats_pair = ( MagicMock( @@ -481,7 +598,7 @@ async def test_on_node_execution_skips_when_dry_run(gated_processor): proc, calls, inner, fake_db, _ = gated_processor inner["status"] = ExecutionStatus.COMPLETED inner["llm_call_count"] = 5 - fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True)) + fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=4)) stats_pair = ( MagicMock( @@ -519,9 +636,9 @@ async def test_on_node_execution_insufficient_balance_records_error_and_notifies proc, calls, inner, fake_db, _ = gated_processor inner["status"] = ExecutionStatus.COMPLETED inner["llm_call_count"] = 4 - fake_db.get_node = AsyncMock(return_value=_FakeNode(charge_per_llm_call=True)) + fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=3)) - def raise_ibe(self, node_exec, extra_iterations): + async def raise_ibe(self, node_exec, extra_iterations): raise InsufficientBalanceError( user_id=node_exec.user_id, message="Insufficient balance", @@ -601,7 +718,7 @@ async def _run_tool_exec_with_stats( mock_node_stats = MagicMock() mock_node_stats.error = tool_stats_error mock_processor.on_node_execution = AsyncMock(return_value=mock_node_stats) - mock_processor.charge_node_usage = charge_node_usage_mock or MagicMock( + mock_processor.charge_node_usage = charge_node_usage_mock or AsyncMock( return_value=(10, 990) ) @@ -700,11 +817,11 @@ async def test_tool_execution_insufficient_balance_propagates(): If this leaked into a ToolCallResult the LLM loop would keep running with 'tool failed' errors and the user would get unpaid work. """ - from unittest.mock import MagicMock + from unittest.mock import AsyncMock from backend.util.exceptions import InsufficientBalanceError - raising_charge = MagicMock( + raising_charge = AsyncMock( side_effect=InsufficientBalanceError( user_id="u", message="nope", balance=0, amount=10 ) diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py index bb7f29a9a1..d0348c0611 100644 --- a/autogpt_platform/backend/backend/executor/manager.py +++ b/autogpt_platform/backend/backend/executor/manager.py @@ -687,38 +687,25 @@ class ExecutionProcessor: # InsufficientBalanceError here is a post-hoc billing leak — the # work is already done but the user can no longer pay. We: # 1. log at ERROR with structured fields so alerting can catch it - # 2. record the error on execution_stats.error for downstream - # monitoring (stats are persisted into node_stats below) - # 3. fire _handle_insufficient_funds_notif so the user is + # 2. fire _handle_insufficient_funds_notif so the user is # notified (mirrors the main queue path at ~line 1254) # The run itself is kept COMPLETED (the block's outputs are # already committed) — matching the documented "billing leak" # contract rather than retroactively failing a successful run. - if ( - status == ExecutionStatus.COMPLETED - and node.block.charge_per_llm_call - and execution_stats.llm_call_count > 1 + extra_iterations = ( + node.block.extra_credit_charges(execution_stats) + if status == ExecutionStatus.COMPLETED and not node_exec.execution_context.dry_run - ): - extra_iterations = execution_stats.llm_call_count - 1 + else 0 + ) + if extra_iterations > 0: try: - extra_cost, remaining_balance = await asyncio.to_thread( - self.charge_extra_iterations, + extra_cost, remaining_balance = await self.charge_extra_iterations( node_exec, extra_iterations, ) if extra_cost > 0: execution_stats.extra_cost += extra_cost - # Wrap in to_thread — _handle_low_balance does sync DB - # work and we're in an async method, so calling it - # directly would block the event loop. - await asyncio.to_thread( - self._handle_low_balance, - db_client=get_db_client(), - user_id=node_exec.user_id, - current_balance=remaining_balance, - transaction_cost=extra_cost, - ) except InsufficientBalanceError as e: log_metadata.error( "billing_leak: insufficient balance after " @@ -1137,46 +1124,16 @@ class ExecutionProcessor: # 200 leaves headroom while preventing runaway charges. _MAX_EXTRA_ITERATIONS = 200 - def charge_node_usage( + def _charge_extra_iterations_sync( self, node_exec: NodeExecutionEntry, + capped_iterations: int, ) -> tuple[int, int]: - """Charge a single node execution to the user. + """Synchronous implementation — runs in a thread-pool worker. - Public wrapper around :meth:`_charge_usage` for blocks (e.g. the - OrchestratorBlock) that spawn nested node executions outside the - main queue and therefore need to charge them explicitly. - - Note: this **does not** increment the global execution counter - (``increment_execution_count``). Nested tool executions are - sub-steps of a single block run from the user's perspective and - should not push them into higher per-execution cost tiers. + Called only from :meth:`charge_extra_iterations`. Do not call + directly from async code. """ - return self._charge_usage( - node_exec=node_exec, - execution_count=0, - ) - - def charge_extra_iterations( - self, - node_exec: NodeExecutionEntry, - extra_iterations: int, - ) -> tuple[int, int]: - """Charge a block extra iterations beyond the initial run. - - Used by agent-mode blocks (e.g. OrchestratorBlock) that make - multiple LLM calls within a single node execution. The first - iteration is already charged by :meth:`_charge_usage`; this - method charges *extra_iterations* additional copies of the - block's base cost. - - Returns ``(total_extra_cost, remaining_balance)``. May raise - ``InsufficientBalanceError`` if the user can't afford the charge. - """ - if extra_iterations <= 0: - return 0, 0 - # Cap to protect against a corrupted llm_call_count. - capped_iterations = min(extra_iterations, self._MAX_EXTRA_ITERATIONS) db_client = get_db_client() block, cost, matching_filter = self._resolve_block_cost(node_exec) if not block or cost <= 0: @@ -1204,6 +1161,62 @@ class ExecutionProcessor: ) return total_extra_cost, remaining_balance + async def charge_extra_iterations( + self, + node_exec: NodeExecutionEntry, + extra_iterations: int, + ) -> tuple[int, int]: + """Charge a block extra iterations beyond the initial run. + + Used by agent-mode blocks (e.g. OrchestratorBlock) that make + multiple LLM calls within a single node execution. The first + iteration is already charged by :meth:`_charge_usage`; this + method charges *extra_iterations* additional copies of the + block's base cost. + + Returns ``(total_extra_cost, remaining_balance)``. May raise + ``InsufficientBalanceError`` if the user can't afford the charge. + """ + if extra_iterations <= 0: + return 0, 0 + # Cap to protect against a corrupted llm_call_count. + capped = min(extra_iterations, self._MAX_EXTRA_ITERATIONS) + return await asyncio.get_event_loop().run_in_executor( + None, self._charge_extra_iterations_sync, node_exec, capped + ) + + async def charge_node_usage( + self, + node_exec: NodeExecutionEntry, + ) -> tuple[int, int]: + """Charge a single node execution to the user. + + Public async wrapper around :meth:`_charge_usage` for blocks (e.g. the + OrchestratorBlock) that spawn nested node executions outside the + main queue and therefore need to charge them explicitly. + + Also handles low-balance notification so callers don't need to touch + private methods directly. + + Note: this **does not** increment the global execution counter + (``increment_execution_count``). Nested tool executions are + sub-steps of a single block run from the user's perspective and + should not push them into higher per-execution cost tiers. + """ + total_cost, remaining = await asyncio.get_event_loop().run_in_executor( + None, self._charge_usage, node_exec, 0 + ) + if total_cost > 0: + await asyncio.get_event_loop().run_in_executor( + None, + self._handle_low_balance, + get_db_client(), + node_exec.user_id, + remaining, + total_cost, + ) + return total_cost, remaining + @time_measured def _on_graph_execution( self, From 6523dce30cc5497771ee32b1eca1fc49d144916c Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 23:53:52 +0700 Subject: [PATCH 14/24] fix(backend/orchestrator): use AsyncMock for charge_node_usage in test charge_node_usage is async and is directly awaited; using MagicMock caused a TypeError that was silently swallowed by the outer except Exception block, meaning the billing assertion passed for the wrong reason (mock called but await failed, so no billing actually ran). --- .../backend/backend/blocks/test/test_orchestrator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py index c92ba3c702..2eb27012dc 100644 --- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py +++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py @@ -923,10 +923,10 @@ async def test_orchestrator_agent_mode(): return_value=mock_node_stats ) # Mock charge_node_usage (called after successful tool execution). - # Returns (cost, remaining_balance). Synchronous because it's called - # via asyncio.to_thread. Use a non-zero cost so the merge_stats - # branch is actually exercised, and assert it's called below. - mock_execution_processor.charge_node_usage = MagicMock(return_value=(10, 990)) + # Returns (cost, remaining_balance). Must be AsyncMock because it is + # an async method and is directly awaited in _execute_single_tool_with_manager. + # Use a non-zero cost so the merge_stats branch is exercised. + mock_execution_processor.charge_node_usage = AsyncMock(return_value=(10, 990)) # Mock the get_execution_outputs_by_node_exec_id method mock_db_client.get_execution_outputs_by_node_exec_id.return_value = { From 7e7b3c42cbdcc24843c873ccaafc7c9fd6cabb72 Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 23:56:24 +0700 Subject: [PATCH 15/24] style(backend/executor): replace deprecated get_event_loop().run_in_executor with asyncio.to_thread asyncio.get_event_loop() is deprecated in Python 3.10+; asyncio.to_thread is the idiomatic replacement and consistent with the rest of manager.py. --- autogpt_platform/backend/backend/executor/manager.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py index d0348c0611..5c30617fdb 100644 --- a/autogpt_platform/backend/backend/executor/manager.py +++ b/autogpt_platform/backend/backend/executor/manager.py @@ -1181,8 +1181,8 @@ class ExecutionProcessor: return 0, 0 # Cap to protect against a corrupted llm_call_count. capped = min(extra_iterations, self._MAX_EXTRA_ITERATIONS) - return await asyncio.get_event_loop().run_in_executor( - None, self._charge_extra_iterations_sync, node_exec, capped + return await asyncio.to_thread( + self._charge_extra_iterations_sync, node_exec, capped ) async def charge_node_usage( @@ -1203,12 +1203,11 @@ class ExecutionProcessor: sub-steps of a single block run from the user's perspective and should not push them into higher per-execution cost tiers. """ - total_cost, remaining = await asyncio.get_event_loop().run_in_executor( - None, self._charge_usage, node_exec, 0 + total_cost, remaining = await asyncio.to_thread( + self._charge_usage, node_exec, 0 ) if total_cost > 0: - await asyncio.get_event_loop().run_in_executor( - None, + await asyncio.to_thread( self._handle_low_balance, get_db_client(), node_exec.user_id, From 90b9c2ab460bbf7e74860b9c73a2fe4b76ddce09 Mon Sep 17 00:00:00 2001 From: majdyz Date: Sat, 11 Apr 2026 00:13:59 +0700 Subject: [PATCH 16/24] fix(backend/executor): skip execution tier charge for nested tool calls execution_usage_cost(0) incorrectly charges 1 credit because 0 % threshold == 0. charge_node_usage passes 0 to _charge_usage to signal "no tier increment", but the modulo check fires at 0. Fix: skip execution_usage_cost entirely when execution_count == 0, preserving the intent that nested tool executions don't count against execution tiers. --- autogpt_platform/backend/backend/executor/manager.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py index f63f8ec76c..e3c6702039 100644 --- a/autogpt_platform/backend/backend/executor/manager.py +++ b/autogpt_platform/backend/backend/executor/manager.py @@ -1101,7 +1101,13 @@ class ExecutionProcessor: ) total_cost += cost - cost, usage_count = execution_usage_cost(execution_count) + # execution_count=0 is used by charge_node_usage for nested tool calls + # which must not be pushed into higher execution-count tiers. + # execution_usage_cost(0) would trigger a charge because 0 % threshold == 0, + # so skip it entirely when execution_count is 0. + cost, usage_count = ( + execution_usage_cost(execution_count) if execution_count > 0 else (0, 0) + ) if cost > 0: remaining_balance = db_client.spend_credits( user_id=node_exec.user_id, From 2a6b65fd7bb2911afc3a53b02cea333e1a91f271 Mon Sep 17 00:00:00 2001 From: majdyz Date: Sat, 11 Apr 2026 04:47:56 +0000 Subject: [PATCH 17/24] fix(executor): notify low balance after extra-iteration charges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `_on_node_execution` path called `charge_extra_iterations` and ignored the returned `remaining_balance`, so users were never notified when their balance crossed the low-balance threshold via these post-hoc per-LLM-call charges. `charge_node_usage` already does the right thing — mirror that pattern here so all charging paths route through `_handle_low_balance` consistently. Sentry bug prediction: PRRT_kwDOJKSTjM56Mibw (severity MEDIUM). --- autogpt_platform/backend/backend/executor/manager.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py index e3c6702039..ca66fad63a 100644 --- a/autogpt_platform/backend/backend/executor/manager.py +++ b/autogpt_platform/backend/backend/executor/manager.py @@ -708,6 +708,18 @@ class ExecutionProcessor: ) if extra_cost > 0: execution_stats.extra_cost += extra_cost + # Mirror the low-balance notification path in + # ``charge_node_usage`` so users are alerted when + # their balance crosses the threshold from these + # extra-iteration charges (sentry HIGH/MEDIUM bug + # PRRT_kwDOJKSTjM56Mibw). + await asyncio.to_thread( + self._handle_low_balance, + get_db_client(), + node_exec.user_id, + remaining_balance, + extra_cost, + ) except InsufficientBalanceError as e: log_metadata.error( "billing_leak: insufficient balance after " From 72660f8df08749526cd8216463e8515ad803e59f Mon Sep 17 00:00:00 2001 From: majdyz Date: Sat, 11 Apr 2026 11:47:18 +0000 Subject: [PATCH 18/24] test(orchestrator): use AsyncMock for charge_node_usage in remaining tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit charge_node_usage is async and is awaited in _execute_single_tool_with_manager. Mocking it as MagicMock returned a non-awaitable tuple, which raised TypeError inside the tool execution path; the error was silently swallowed by the orchestrator's catch-all and converted into a 'Tool execution failed' string, so downstream assertions effectively ran against an error response instead of the success path. Switch both tests to AsyncMock to actually exercise the post-tool charging branch — matching the fix already applied in test_orchestrator.py:929. --- .../backend/blocks/test/test_orchestrator_dynamic_fields.py | 6 +++++- .../backend/blocks/test/test_orchestrator_responses_api.py | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_dynamic_fields.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_dynamic_fields.py index 5407972fa1..f2242ea527 100644 --- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_dynamic_fields.py +++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_dynamic_fields.py @@ -642,7 +642,11 @@ async def test_validation_errors_dont_pollute_conversation(): mock_node_stats ) # Mock charge_node_usage (called after successful tool execution). - mock_execution_processor.charge_node_usage = MagicMock( + # Must be AsyncMock because it is async and is awaited in + # _execute_single_tool_with_manager — a plain MagicMock would + # return a non-awaitable tuple and TypeError out, then be + # silently swallowed by the orchestrator's catch-all. + mock_execution_processor.charge_node_usage = AsyncMock( return_value=(0, 0) ) diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_responses_api.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_responses_api.py index 649716a491..ac78b6d35b 100644 --- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_responses_api.py +++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_responses_api.py @@ -957,7 +957,11 @@ async def test_agent_mode_conversation_valid_for_responses_api(): ns = MagicMock(error=None) ep.on_node_execution = AsyncMock(return_value=ns) # Mock charge_node_usage (called after successful tool execution). - ep.charge_node_usage = MagicMock(return_value=(0, 0)) + # Must be AsyncMock because it is async and is awaited in + # _execute_single_tool_with_manager — a plain MagicMock would return a + # non-awaitable tuple and TypeError out, then be silently swallowed by + # the orchestrator's catch-all. + ep.charge_node_usage = AsyncMock(return_value=(0, 0)) with patch("backend.blocks.llm.llm_call", llm_mock), patch.object( block, "_create_tool_node_signatures", return_value=tool_sigs From 5e8d3ba889a9a621c903ce6c5007d36902e858be Mon Sep 17 00:00:00 2001 From: majdyz Date: Sun, 12 Apr 2026 10:17:09 +0000 Subject: [PATCH 19/24] fix(backend/executor): harden orchestrator tool execution error handling - Replace assert with proper if-guard + RuntimeError for node_exec_result - Wrap on_node_execution in try/except to always resolve the Future via set_exception on error, preventing dangling unresolved Futures - Add separate try/except around charge_node_usage so non-balance billing exceptions propagate instead of being silently swallowed by the outer except Exception handler - Add _is_error flag to tool response dicts to replace fragile string matching on "Tool execution failed:" prefix --- .../backend/backend/blocks/orchestrator.py | 58 ++++++++++++++----- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py index 7e51f43b80..79e9c566e8 100644 --- a/autogpt_platform/backend/backend/blocks/orchestrator.py +++ b/autogpt_platform/backend/backend/blocks/orchestrator.py @@ -1087,7 +1087,10 @@ class OrchestratorBlock(Block): input_data=input_value, ) - assert node_exec_result is not None, "node_exec_result should not be None" + if node_exec_result is None: + raise RuntimeError( + f"upsert_execution_input returned None for node {sink_node_id}" + ) # Create NodeExecutionEntry for execution manager node_exec_entry = NodeExecutionEntry( @@ -1122,14 +1125,20 @@ class OrchestratorBlock(Block): task=node_exec_future, ) - # Execute the node directly since we're in the Orchestrator context - tool_node_stats = await execution_processor.on_node_execution( - node_exec=node_exec_entry, - node_exec_progress=node_exec_progress, - nodes_input_masks=None, - graph_stats_pair=graph_stats_pair, - ) - node_exec_future.set_result(tool_node_stats) + # Execute the node directly since we're in the Orchestrator context. + # Wrap in try/except so the future is always resolved, even on + # error — an unresolved Future would block anything awaiting it. + try: + tool_node_stats = await execution_processor.on_node_execution( + node_exec=node_exec_entry, + node_exec_progress=node_exec_progress, + nodes_input_masks=None, + graph_stats_pair=graph_stats_pair, + ) + node_exec_future.set_result(tool_node_stats) + except Exception as exec_err: + node_exec_future.set_exception(exec_err) + raise # Charge user credits AFTER successful tool execution. Tools # spawned by the orchestrator bypass the main execution queue @@ -1141,14 +1150,27 @@ class OrchestratorBlock(Block): # `error is None` intentionally excludes both Exception and # BaseException subclasses (e.g. CancelledError) so cancelled # or terminated tool runs are not billed. + # + # Billing errors (including non-balance exceptions) are kept + # in a separate try/except so they are never silently swallowed + # by the generic tool-error handler below. if ( not execution_params.execution_context.dry_run and tool_node_stats is not None and tool_node_stats.error is None ): - tool_cost, _ = await execution_processor.charge_node_usage( - node_exec_entry, - ) + try: + tool_cost, _ = await execution_processor.charge_node_usage( + node_exec_entry, + ) + except InsufficientBalanceError: + raise + except Exception: + logger.exception( + "Unexpected error charging for tool node %s", + sink_node_id, + ) + raise if tool_cost > 0: self.merge_stats(NodeExecutionStats(extra_cost=tool_cost)) @@ -1163,9 +1185,11 @@ class OrchestratorBlock(Block): if node_outputs else "Tool executed successfully" ) - return _create_tool_response( + resp = _create_tool_response( tool_call.id, tool_response_content, responses_api=responses_api ) + resp["_is_error"] = False + return resp except InsufficientBalanceError: # Don't downgrade billing failures into tool errors — let the @@ -1176,11 +1200,13 @@ class OrchestratorBlock(Block): except Exception as e: logger.warning("Tool execution with manager failed: %s", e) # Return error response - return _create_tool_response( + resp = _create_tool_response( tool_call.id, f"Tool execution failed: {e}", responses_api=responses_api, ) + resp["_is_error"] = True + return resp async def _agent_mode_llm_caller( self, @@ -1280,7 +1306,7 @@ class OrchestratorBlock(Block): content = str(raw_content) else: content = "Tool executed successfully" - tool_failed = content.startswith("Tool execution failed:") + tool_failed = result.get("_is_error", False) return ToolCallResult( tool_call_id=tool_call.id, tool_name=tool_call.name, @@ -1501,7 +1527,7 @@ class OrchestratorBlock(Block): text = content else: text = json.dumps(content) - tool_failed = text.startswith("Tool execution failed:") + tool_failed = result.get("_is_error", False) return { "content": [{"type": "text", "text": text}], "isError": tool_failed, From b62288655fa1435178da06d366250e3ebc95b920 Mon Sep 17 00:00:00 2001 From: majdyz Date: Sun, 12 Apr 2026 12:10:43 +0000 Subject: [PATCH 20/24] Add None guard for tool_node_stats after on_node_execution If on_node_execution returns None, return an error response with _is_error=True instead of falling through to the success path. --- autogpt_platform/backend/backend/blocks/orchestrator.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py index 79e9c566e8..5262f8eb1d 100644 --- a/autogpt_platform/backend/backend/blocks/orchestrator.py +++ b/autogpt_platform/backend/backend/blocks/orchestrator.py @@ -1140,6 +1140,15 @@ class OrchestratorBlock(Block): node_exec_future.set_exception(exec_err) raise + if tool_node_stats is None: + resp = _create_tool_response( + tool_call.id, + "Tool execution returned no result", + responses_api=responses_api, + ) + resp["_is_error"] = True + return resp + # Charge user credits AFTER successful tool execution. Tools # spawned by the orchestrator bypass the main execution queue # (where _charge_usage is called), so we must charge here to From 626fe17aac5cfc6b6b986dce52880c93b9f51fd0 Mon Sep 17 00:00:00 2001 From: majdyz Date: Mon, 13 Apr 2026 04:03:08 +0000 Subject: [PATCH 21/24] fix(orchestrator): resolve None future on swallowed errors; add missing tests - Move tool_node_stats None guard before node_exec_future.set_result so that when on_node_execution returns None (swallowed by @async_error_logged), the future carries set_exception(RuntimeError) rather than set_result(None), giving the tracking system an accurate error state - Remove redundant `tool_node_stats is not None` check that was dead code after the early-return guard was added - Add explanatory comment in _charge_extra_iterations_sync docstring explaining why the block lookup is intentionally repeated rather than cached from _charge_usage (two separate thread-pool workers, no shared mutable state) - Add assertion to test_on_node_execution_charges_extra_iterations_when_gate_passes verifying _handle_low_balance is called when extra_cost > 0 - Add test_on_node_execution_failed_ibe_sends_notification covering the FAILED + InsufficientBalanceError path in on_node_execution (lines 822-836) that was previously untested --- .../backend/backend/blocks/orchestrator.py | 29 +++++--- .../test_orchestrator_per_iteration_cost.py | 74 +++++++++++++++++++ .../backend/backend/executor/manager.py | 12 ++- 3 files changed, 102 insertions(+), 13 deletions(-) diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py index 5262f8eb1d..1c40a7df96 100644 --- a/autogpt_platform/backend/backend/blocks/orchestrator.py +++ b/autogpt_platform/backend/backend/blocks/orchestrator.py @@ -1128,6 +1128,12 @@ class OrchestratorBlock(Block): # Execute the node directly since we're in the Orchestrator context. # Wrap in try/except so the future is always resolved, even on # error — an unresolved Future would block anything awaiting it. + # + # on_node_execution is decorated with @async_error_logged(swallow=True), + # which catches BaseException and returns None rather than raising. + # Treat a None return as a failure: set_exception so the future + # carries an error state rather than a None result, and return an + # error response so the LLM knows the tool failed. try: tool_node_stats = await execution_processor.on_node_execution( node_exec=node_exec_entry, @@ -1135,20 +1141,24 @@ class OrchestratorBlock(Block): nodes_input_masks=None, graph_stats_pair=graph_stats_pair, ) + if tool_node_stats is None: + nil_err = RuntimeError( + f"on_node_execution returned None for node {sink_node_id} " + "(error was swallowed by @async_error_logged)" + ) + node_exec_future.set_exception(nil_err) + resp = _create_tool_response( + tool_call.id, + "Tool execution returned no result", + responses_api=responses_api, + ) + resp["_is_error"] = True + return resp node_exec_future.set_result(tool_node_stats) except Exception as exec_err: node_exec_future.set_exception(exec_err) raise - if tool_node_stats is None: - resp = _create_tool_response( - tool_call.id, - "Tool execution returned no result", - responses_api=responses_api, - ) - resp["_is_error"] = True - return resp - # Charge user credits AFTER successful tool execution. Tools # spawned by the orchestrator bypass the main execution queue # (where _charge_usage is called), so we must charge here to @@ -1165,7 +1175,6 @@ class OrchestratorBlock(Block): # by the generic tool-error handler below. if ( not execution_params.execution_context.dry_run - and tool_node_stats is not None and tool_node_stats.error is None ): try: diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py index c703e32c71..611ab54fac 100644 --- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py +++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py @@ -540,6 +540,9 @@ async def test_on_node_execution_charges_extra_iterations_when_gate_passes( graph_stats_pair=stats_pair, ) assert calls["charge_extra_iterations"] == [2] + # _handle_low_balance must be called with the remaining balance returned by + # charge_extra_iterations (500) so users are alerted when balance drops low. + assert len(calls["handle_low_balance"]) == 1 @pytest.mark.asyncio @@ -832,3 +835,74 @@ async def test_tool_execution_insufficient_balance_propagates(): charge_node_usage_mock=raising_charge, ) assert isinstance(raised, InsufficientBalanceError) + + +# ── on_node_execution FAILED + InsufficientBalanceError notification ── + + +@pytest.mark.asyncio +async def test_on_node_execution_failed_ibe_sends_notification( + monkeypatch, + gated_processor, +): + """When status == FAILED and execution_stats.error is InsufficientBalanceError, + _handle_insufficient_funds_notif must be called. + + This path fires when a nested tool charge inside the orchestrator raises + InsufficientBalanceError, which propagates out of the block's run() generator + and is caught by _on_node_execution's broad except, setting status=FAILED and + execution_stats.error=IBE. on_node_execution's post-execution block then + sends the user notification so they understand why the run stopped. + """ + from backend.data.execution import ExecutionStatus + from backend.executor import manager + from backend.util.exceptions import InsufficientBalanceError + + proc, calls, inner, fake_db, NodeExecutionStats = gated_processor + ibe = InsufficientBalanceError( + user_id="u", + message="Insufficient balance", + balance=0, + amount=30, + ) + + # Simulate _on_node_execution returning FAILED with IBE in stats.error. + async def fake_inner_failed( + self, + *, + node, + node_exec, + node_exec_progress, + stats, + db_client, + log_metadata, + nodes_input_masks=None, + nodes_to_skip=None, + ): + stats.error = ibe + return MagicMock(wall_time=0.1, cpu_time=0.1), ExecutionStatus.FAILED + + monkeypatch.setattr( + manager.ExecutionProcessor, + "_on_node_execution", + fake_inner_failed, + ) + fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=0)) + + stats_pair = ( + MagicMock( + node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0 + ), + threading.Lock(), + ) + await proc.on_node_execution( + node_exec=_make_node_exec(dry_run=False), + node_exec_progress=MagicMock(), + nodes_input_masks=None, + graph_stats_pair=stats_pair, + ) + # The notification must have fired so the user knows why their run stopped. + assert len(calls["handle_insufficient_funds_notif"]) == 1 + assert calls["handle_insufficient_funds_notif"][0]["user_id"] == "u" + # charge_extra_iterations must NOT be called — status is FAILED. + assert calls["charge_extra_iterations"] == [] diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py index ca66fad63a..68f0ab7fb4 100644 --- a/autogpt_platform/backend/backend/executor/manager.py +++ b/autogpt_platform/backend/backend/executor/manager.py @@ -753,8 +753,7 @@ class ExecutionProcessor: ) except Exception as notif_error: # pragma: no cover log_metadata.warning( - f"Failed to send insufficient funds notification: " - f"{notif_error}" + f"Failed to send insufficient funds notification: {notif_error}" ) except Exception as e: # Unexpected billing failure (DB outage, network, etc.). @@ -832,7 +831,7 @@ class ExecutionProcessor: ) except Exception as notif_error: # pragma: no cover log_metadata.warning( - f"Failed to send insufficient funds notification: " f"{notif_error}" + f"Failed to send insufficient funds notification: {notif_error}" ) return execution_stats @@ -1153,6 +1152,13 @@ class ExecutionProcessor: Called only from :meth:`charge_extra_iterations`. Do not call directly from async code. + + Note: ``_resolve_block_cost`` is called again here (rather than + reusing the result from ``_charge_usage`` at the start of execution) + because the two calls happen in separate thread-pool workers and + sharing mutable state across workers would require locks. The block + config is immutable during a run, so the repeated lookup is safe and + produces the same cost; the only overhead is an extra registry lookup. """ db_client = get_db_client() block, cost, matching_filter = self._resolve_block_cost(node_exec) From e901b64bedc1e2cbb5ac784530f32ca788ee453f Mon Sep 17 00:00:00 2001 From: majdyz Date: Mon, 13 Apr 2026 04:22:03 +0000 Subject: [PATCH 22/24] fix(test): fix _handle_low_balance mock signature to accept positional args The gated_processor fixture's fake_low_balance mock used **kwargs, but production code calls _handle_low_balance with positional args via asyncio.to_thread. This caused a silent TypeError caught by the broad except handler, making the handle_low_balance assertion fail (0 calls instead of 1). Updated mock to match the actual method signature. --- .../test/test_orchestrator_per_iteration_cost.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py index 611ab54fac..f166093d4c 100644 --- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py +++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py @@ -492,8 +492,16 @@ def gated_processor(monkeypatch): fake_charge_extra, ) - def fake_low_balance(self, **kwargs): - calls["handle_low_balance"].append(kwargs) + def fake_low_balance( + self, db_client, user_id, current_balance, transaction_cost + ): + calls["handle_low_balance"].append( + { + "user_id": user_id, + "current_balance": current_balance, + "transaction_cost": transaction_cost, + } + ) monkeypatch.setattr( manager.ExecutionProcessor, From 5ff46ff2075209781c3d62d5d4b8670d47816b79 Mon Sep 17 00:00:00 2001 From: majdyz Date: Mon, 13 Apr 2026 06:44:20 +0000 Subject: [PATCH 23/24] fix(backend): address review feedback on orchestrator billing - Extract post-execution billing into _handle_post_execution_billing() - Deduplicate IBE notification into _try_send_insufficient_funds_notif() - Combine _charge_usage + _handle_low_balance into single thread dispatch - Sanitize error messages to LLM (no internal details leaked) - Default _is_error to True (fail-closed) for tool responses - Add IBE propagation contract to OrchestratorBlock class docstring - Reduce per-site IBE comments to one-liners referencing class docstring - Fix _resolve_block_cost return type annotation (Block | None) - Move test imports to module level, fix test_default_block_returns_zero - Add tests for non-IBE billing failure and _charge_usage(count=0) - Fix Black formatting (CI lint blocker) --- .../backend/backend/blocks/orchestrator.py | 58 ++-- .../test_orchestrator_per_iteration_cost.py | 189 ++++++++----- .../backend/backend/executor/manager.py | 256 +++++++++--------- 3 files changed, 278 insertions(+), 225 deletions(-) diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py index 1c40a7df96..34b4929879 100644 --- a/autogpt_platform/backend/backend/blocks/orchestrator.py +++ b/autogpt_platform/backend/backend/blocks/orchestrator.py @@ -365,9 +365,15 @@ def _disambiguate_tool_names(tools: list[dict[str, Any]]) -> None: class OrchestratorBlock(Block): - """ - A block that uses a language model to orchestrate tool calls, supporting both - single-shot and iterative agent mode execution. + """A block that uses a language model to orchestrate tool calls. + + Supports both single-shot and iterative agent mode execution. + + **InsufficientBalanceError propagation contract**: ``InsufficientBalanceError`` + (IBE) must always re-raise through every ``except`` block in this class. + Swallowing IBE would let the agent loop continue with unpaid work. Every + exception handler that catches ``Exception`` includes an explicit IBE + re-raise carve-out for this reason. """ def extra_credit_charges(self, execution_stats: NodeExecutionStats) -> int: @@ -1182,8 +1188,12 @@ class OrchestratorBlock(Block): node_exec_entry, ) except InsufficientBalanceError: + # IBE must propagate — see OrchestratorBlock class docstring. raise except Exception: + # Non-billing charge failures (DB outage, network, etc.) + # are logged with full traceback but surfaced to the LLM + # as a generic error to avoid leaking infrastructure details. logger.exception( "Unexpected error charging for tool node %s", sink_node_id, @@ -1210,17 +1220,15 @@ class OrchestratorBlock(Block): return resp except InsufficientBalanceError: - # Don't downgrade billing failures into tool errors — let the - # orchestrator's outer error handling stop the run cleanly, - # mirroring the behaviour of the main execution queue. Also - # prevents leaking exact balance amounts to the LLM context. + # IBE must propagate — see class docstring. raise except Exception as e: - logger.warning("Tool execution with manager failed: %s", e) - # Return error response + logger.warning("Tool execution with manager failed: %s", e, exc_info=True) + # Return a generic error to the LLM — internal exception messages + # may contain server paths, DB details, or infrastructure info. resp = _create_tool_response( tool_call.id, - f"Tool execution failed: {e}", + "Tool execution failed due to an internal error", responses_api=responses_api, ) resp["_is_error"] = True @@ -1324,7 +1332,7 @@ class OrchestratorBlock(Block): content = str(raw_content) else: content = "Tool executed successfully" - tool_failed = result.get("_is_error", False) + tool_failed = result.get("_is_error", True) return ToolCallResult( tool_call_id=tool_call.id, tool_name=tool_call.name, @@ -1332,11 +1340,7 @@ class OrchestratorBlock(Block): is_error=tool_failed, ) except InsufficientBalanceError: - # Billing failures must stop the agent loop cleanly — do NOT - # downgrade them into a tool error that gets fed back to the - # LLM. Re-raise so the orchestrator's outer error handling - # halts the run (mirrors main execution queue behaviour) and - # avoids leaking exact balance amounts into LLM context. + # IBE must propagate — see class docstring. raise except Exception as e: logger.error("Tool execution failed: %s", e) @@ -1458,11 +1462,7 @@ class OrchestratorBlock(Block): }, ) except InsufficientBalanceError: - # Billing failures must propagate out of the block so the - # executor's billing-leak handling fires (error recording on - # execution_stats, user notification, structured logging). - # Do NOT downgrade to a user-visible "error" output — that - # would swallow the failure and leak the exact balance amount. + # IBE must propagate — see class docstring. raise except Exception as e: # Catch all OTHER errors (validation, network, API) so that @@ -1545,17 +1545,13 @@ class OrchestratorBlock(Block): text = content else: text = json.dumps(content) - tool_failed = result.get("_is_error", False) + tool_failed = result.get("_is_error", True) return { "content": [{"type": "text", "text": text}], "isError": tool_failed, } except InsufficientBalanceError: - # Same carve-out as _agent_mode_tool_executor: - # billing failures must propagate to stop the run - # rather than be fed back to the LLM as a tool - # error (which would leak balance amounts and let - # the loop continue consuming unbillable work). + # IBE must propagate — see class docstring. raise except Exception as e: logger.error("SDK tool execution failed: %s", e) @@ -1836,12 +1832,8 @@ class OrchestratorBlock(Block): except (asyncio.CancelledError, StopAsyncIteration): pass except InsufficientBalanceError: - # Billing failures must propagate so the executor's - # billing-leak handling fires (error recording, user - # notification, structured logging). Mirrors the carve-out - # in _execute_tools_agent_mode — do not downgrade to a - # user-visible error output. The `finally` block below - # still runs and records partial token usage. + # IBE must propagate — see class docstring. The `finally` + # block below still runs and records partial token usage. raise except Exception as e: # Surface OTHER SDK errors as user-visible output instead diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py index f166093d4c..3dc9e9b9ae 100644 --- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py +++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py @@ -11,63 +11,50 @@ from unittest.mock import AsyncMock, MagicMock import pytest -from backend.blocks.orchestrator import OrchestratorBlock +from backend.blocks._base import Block +from backend.blocks.orchestrator import ExecutionParams, OrchestratorBlock +from backend.data.execution import ExecutionContext, ExecutionStatus +from backend.data.model import NodeExecutionStats +from backend.executor import manager +from backend.util.exceptions import InsufficientBalanceError # ── extra_credit_charges hook ──────────────────────────────────────── +class _NoOpBlock(Block): + """Minimal concrete Block subclass that does not override extra_credit_charges.""" + + def __init__(self): + super().__init__(id="noop-block", description="No-op test block") + + def run(self, input_data, **kwargs): # type: ignore[override] + yield "out", {} + + class TestExtraCreditCharges: """OrchestratorBlock opts into per-LLM-call billing via extra_credit_charges.""" def test_orchestrator_returns_nonzero_for_multiple_calls(self): - from backend.data.model import NodeExecutionStats - block = OrchestratorBlock() stats = NodeExecutionStats(llm_call_count=3) assert block.extra_credit_charges(stats) == 2 def test_orchestrator_returns_zero_for_single_call(self): - from backend.data.model import NodeExecutionStats - block = OrchestratorBlock() stats = NodeExecutionStats(llm_call_count=1) assert block.extra_credit_charges(stats) == 0 def test_orchestrator_returns_zero_for_zero_calls(self): - from backend.data.model import NodeExecutionStats - block = OrchestratorBlock() stats = NodeExecutionStats(llm_call_count=0) assert block.extra_credit_charges(stats) == 0 def test_default_block_returns_zero(self): - from backend.data.model import NodeExecutionStats - - # Use a concrete block (not the abstract Block base) to verify the - # default implementation returns 0. - block = OrchestratorBlock() - stats = NodeExecutionStats(llm_call_count=0) - # When llm_call_count=0, extra_credit_charges should clamp to 0. + """A block that does not override extra_credit_charges returns 0.""" + block = _NoOpBlock() + stats = NodeExecutionStats(llm_call_count=10) assert block.extra_credit_charges(stats) == 0 - # Also verify via Block.extra_credit_charges directly (method-level - # check) by calling the unbound method on an OrchestratorBlock - # instance with the base implementation patched out. - from unittest.mock import patch - - from backend.blocks._base import Block - - with patch.object( - OrchestratorBlock, - "extra_credit_charges", - Block.extra_credit_charges, - ): - base_block = OrchestratorBlock() - assert ( - base_block.extra_credit_charges(NodeExecutionStats(llm_call_count=10)) - == 0 - ) - # ── charge_extra_iterations math ─────────────────────────────────── @@ -91,9 +78,11 @@ def patched_processor(monkeypatch): Returns the processor and a list of credit amounts spent so tests can assert on what was charged. - """ - from backend.executor import manager + Note: ``ExecutionProcessor.__new__()`` bypasses ``__init__`` — if + ``__init__`` gains required state in the future this fixture will need + updating. + """ spent: list[int] = [] class FakeDb: @@ -156,7 +145,6 @@ class TestChargeExtraIterations: @pytest.mark.asyncio async def test_capped_at_max(self, monkeypatch, fake_node_exec): """Runaway llm_call_count is capped at _MAX_EXTRA_ITERATIONS.""" - from backend.executor import manager spent: list[int] = [] @@ -187,7 +175,6 @@ class TestChargeExtraIterations: @pytest.mark.asyncio async def test_zero_base_cost_skips_charge(self, monkeypatch, fake_node_exec): - from backend.executor import manager spent: list[int] = [] @@ -215,7 +202,6 @@ class TestChargeExtraIterations: @pytest.mark.asyncio async def test_block_not_found_skips_charge(self, monkeypatch, fake_node_exec): - from backend.executor import manager spent: list[int] = [] @@ -243,8 +229,6 @@ class TestChargeExtraIterations: self, monkeypatch, fake_node_exec ): """Out-of-credits errors must propagate, not be silently swallowed.""" - from backend.executor import manager - from backend.util.exceptions import InsufficientBalanceError class FakeDb: def spend_credits(self, *, user_id, cost, metadata): @@ -280,7 +264,6 @@ class TestChargeNodeUsage: self, monkeypatch, fake_node_exec ): """Nested tool charges should NOT inflate the per-execution counter.""" - from backend.executor import manager captured: dict = {} @@ -313,7 +296,6 @@ class TestChargeNodeUsage: self, monkeypatch, fake_node_exec ): """charge_node_usage should call _handle_low_balance when total_cost > 0.""" - from backend.executor import manager low_balance_calls: list[dict] = [] @@ -353,7 +335,6 @@ class TestChargeNodeUsage: self, monkeypatch, fake_node_exec ): """charge_node_usage should NOT call _handle_low_balance when cost is 0.""" - from backend.executor import manager low_balance_calls: list = [] @@ -418,9 +399,6 @@ def gated_processor(monkeypatch): llm_call_count, dry_run) and observe whether charge_extra_iterations was called. """ - from backend.data.execution import ExecutionStatus - from backend.data.model import NodeExecutionStats - from backend.executor import manager calls: dict[str, list] = { "charge_extra_iterations": [], @@ -492,9 +470,7 @@ def gated_processor(monkeypatch): fake_charge_extra, ) - def fake_low_balance( - self, db_client, user_id, current_balance, transaction_cost - ): + def fake_low_balance(self, db_client, user_id, current_balance, transaction_cost): calls["handle_low_balance"].append( { "user_id": user_id, @@ -528,7 +504,6 @@ async def test_on_node_execution_charges_extra_iterations_when_gate_passes( gated_processor, ): """COMPLETED + extra_credit_charges > 0 + not dry_run → charged.""" - from backend.data.execution import ExecutionStatus proc, calls, inner, fake_db, _ = gated_processor inner["status"] = ExecutionStatus.COMPLETED @@ -555,7 +530,6 @@ async def test_on_node_execution_charges_extra_iterations_when_gate_passes( @pytest.mark.asyncio async def test_on_node_execution_skips_when_status_not_completed(gated_processor): - from backend.data.execution import ExecutionStatus proc, calls, inner, fake_db, _ = gated_processor inner["status"] = ExecutionStatus.FAILED @@ -579,7 +553,6 @@ async def test_on_node_execution_skips_when_status_not_completed(gated_processor @pytest.mark.asyncio async def test_on_node_execution_skips_when_extra_charges_zero(gated_processor): - from backend.data.execution import ExecutionStatus proc, calls, inner, fake_db, _ = gated_processor inner["status"] = ExecutionStatus.COMPLETED @@ -604,7 +577,6 @@ async def test_on_node_execution_skips_when_extra_charges_zero(gated_processor): @pytest.mark.asyncio async def test_on_node_execution_skips_when_dry_run(gated_processor): - from backend.data.execution import ExecutionStatus proc, calls, inner, fake_db, _ = gated_processor inner["status"] = ExecutionStatus.COMPLETED @@ -640,9 +612,6 @@ async def test_on_node_execution_insufficient_balance_records_error_and_notifies - _handle_insufficient_funds_notif is called so the user is notified - the structured ERROR log is the alerting hook """ - from backend.data.execution import ExecutionStatus - from backend.executor import manager - from backend.util.exceptions import InsufficientBalanceError proc, calls, inner, fake_db, _ = gated_processor inner["status"] = ExecutionStatus.COMPLETED @@ -698,13 +667,9 @@ async def _run_tool_exec_with_stats( Used to prove the dry_run and error guards around charge_node_usage behave as documented, and that InsufficientBalanceError propagates. """ - import threading from collections import defaultdict from unittest.mock import AsyncMock, MagicMock, patch - from backend.blocks.orchestrator import ExecutionParams, OrchestratorBlock - from backend.data.execution import ExecutionContext - block = OrchestratorBlock() # Mocked async DB client used inside orchestrator. @@ -828,10 +793,6 @@ async def test_tool_execution_insufficient_balance_propagates(): If this leaked into a ToolCallResult the LLM loop would keep running with 'tool failed' errors and the user would get unpaid work. """ - from unittest.mock import AsyncMock - - from backend.util.exceptions import InsufficientBalanceError - raising_charge = AsyncMock( side_effect=InsufficientBalanceError( user_id="u", message="nope", balance=0, amount=10 @@ -862,9 +823,6 @@ async def test_on_node_execution_failed_ibe_sends_notification( execution_stats.error=IBE. on_node_execution's post-execution block then sends the user notification so they understand why the run stopped. """ - from backend.data.execution import ExecutionStatus - from backend.executor import manager - from backend.util.exceptions import InsufficientBalanceError proc, calls, inner, fake_db, NodeExecutionStats = gated_processor ibe = InsufficientBalanceError( @@ -914,3 +872,102 @@ async def test_on_node_execution_failed_ibe_sends_notification( assert calls["handle_insufficient_funds_notif"][0]["user_id"] == "u" # charge_extra_iterations must NOT be called — status is FAILED. assert calls["charge_extra_iterations"] == [] + + +# ── Billing leak: non-IBE exception during extra-iteration charging ── + + +@pytest.mark.asyncio +async def test_on_node_execution_non_ibe_billing_failure_keeps_completed( + monkeypatch, + gated_processor, +): + """When charge_extra_iterations raises a non-IBE exception (e.g. DB outage): + + - execution_stats.error stays None (node ran to completion) + - status stays COMPLETED (work already done) + - the billing_leak error is logged but does not corrupt execution_stats + """ + proc, calls, inner, fake_db, _ = gated_processor + inner["status"] = ExecutionStatus.COMPLETED + inner["llm_call_count"] = 4 + fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=3)) + + async def raise_conn_error(self, node_exec, extra_iterations): + raise ConnectionError("DB connection lost") + + monkeypatch.setattr( + manager.ExecutionProcessor, "charge_extra_iterations", raise_conn_error + ) + + stats_pair = ( + MagicMock( + node_count=0, + nodes_cputime=0, + nodes_walltime=0, + cost=0, + node_error_count=0, + ), + threading.Lock(), + ) + result_stats = await proc.on_node_execution( + node_exec=_make_node_exec(dry_run=False), + node_exec_progress=MagicMock(), + nodes_input_masks=None, + graph_stats_pair=stats_pair, + ) + # error stays None — node completed, only billing failed. + assert result_stats.error is None + # No notification was sent (only IBE triggers notification). + assert len(calls["handle_insufficient_funds_notif"]) == 0 + + +# ── _charge_usage with execution_count=0 ── + + +class TestChargeUsageZeroExecutionCount: + """Verify _charge_usage(node_exec, 0) does not invoke execution_usage_cost.""" + + def test_execution_count_zero_skips_execution_tier(self, monkeypatch): + """_charge_usage with execution_count=0 must not call execution_usage_cost.""" + execution_tier_called = [] + + def fake_execution_usage_cost(count): + execution_tier_called.append(count) + return (100, count) + + spent: list[int] = [] + + class FakeDb: + def spend_credits(self, *, user_id, cost, metadata): + spent.append(cost) + return 500 + + fake_block = MagicMock() + fake_block.name = "FakeBlock" + + monkeypatch.setattr(manager, "get_db_client", lambda: FakeDb()) + monkeypatch.setattr(manager, "get_block", lambda block_id: fake_block) + monkeypatch.setattr( + manager, + "block_usage_cost", + lambda block, input_data, **_kw: (10, {}), + ) + monkeypatch.setattr(manager, "execution_usage_cost", fake_execution_usage_cost) + + proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor) + ne = MagicMock() + ne.user_id = "u" + ne.graph_exec_id = "ge" + ne.graph_id = "g" + ne.node_exec_id = "ne" + ne.node_id = "n" + ne.block_id = "b" + ne.inputs = {} + + total_cost, remaining = proc._charge_usage(ne, 0) + assert total_cost == 10 # block cost only + assert remaining == 500 + assert spent == [10] + # execution_usage_cost must NOT have been called + assert execution_tier_called == [] diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py index 68f0ab7fb4..6e4b3e2aad 100644 --- a/autogpt_platform/backend/backend/executor/manager.py +++ b/autogpt_platform/backend/backend/executor/manager.py @@ -19,7 +19,7 @@ from sentry_sdk.api import flush as _sentry_flush from sentry_sdk.api import get_current_scope as _sentry_get_current_scope from backend.blocks import get_block -from backend.blocks._base import BlockSchema +from backend.blocks._base import Block, BlockSchema from backend.blocks.agent import AgentExecutorBlock from backend.blocks.io import AgentOutputBlock from backend.blocks.mcp.block import MCPToolBlock @@ -681,100 +681,9 @@ class ExecutionProcessor: execution_stats.walltime = timing_info.wall_time execution_stats.cputime = timing_info.cpu_time - # Charge extra iterations for blocks that opt into per-LLM-call - # billing (e.g. OrchestratorBlock in agent mode). The first call - # is already covered by _charge_usage(); each additional LLM call - # costs another base_cost. Skipped for dry runs and failed runs. - # - # InsufficientBalanceError here is a post-hoc billing leak — the - # work is already done but the user can no longer pay. We: - # 1. log at ERROR with structured fields so alerting can catch it - # 2. fire _handle_insufficient_funds_notif so the user is - # notified (mirrors the main queue path at ~line 1254) - # The run itself is kept COMPLETED (the block's outputs are - # already committed) — matching the documented "billing leak" - # contract rather than retroactively failing a successful run. - extra_iterations = ( - node.block.extra_credit_charges(execution_stats) - if status == ExecutionStatus.COMPLETED - and not node_exec.execution_context.dry_run - else 0 + await self._handle_post_execution_billing( + node, node_exec, execution_stats, status, log_metadata ) - if extra_iterations > 0: - try: - extra_cost, remaining_balance = await self.charge_extra_iterations( - node_exec, - extra_iterations, - ) - if extra_cost > 0: - execution_stats.extra_cost += extra_cost - # Mirror the low-balance notification path in - # ``charge_node_usage`` so users are alerted when - # their balance crosses the threshold from these - # extra-iteration charges (sentry HIGH/MEDIUM bug - # PRRT_kwDOJKSTjM56Mibw). - await asyncio.to_thread( - self._handle_low_balance, - get_db_client(), - node_exec.user_id, - remaining_balance, - extra_cost, - ) - except InsufficientBalanceError as e: - log_metadata.error( - "billing_leak: insufficient balance after " - f"{node.block.name} completed {extra_iterations} " - f"extra iterations", - extra={ - "billing_leak": True, - "user_id": node_exec.user_id, - "graph_id": node_exec.graph_id, - "block_id": node_exec.block_id, - "extra_iterations": extra_iterations, - "error": str(e), - }, - ) - # NOTE: Do NOT set execution_stats.error here. The node ran - # to completion — only the post-hoc charge failed. Setting - # .error would (a) flip node_error_count below, creating an - # "errored COMPLETED node" inconsistency in the metrics, and - # (b) leak balance amounts into the persisted node_stats. - # The structured ERROR log above is the alerting hook; - # node_stats stays clean. - # Notify the user they're out of credits. Runs through - # Redis dedup (per user+graph) so repeat runs don't spam. - try: - await asyncio.to_thread( - self._handle_insufficient_funds_notif, - get_db_client(), - node_exec.user_id, - node_exec.graph_id, - e, - ) - except Exception as notif_error: # pragma: no cover - log_metadata.warning( - f"Failed to send insufficient funds notification: {notif_error}" - ) - except Exception as e: - # Unexpected billing failure (DB outage, network, etc.). - # Log at ERROR with structured fields and the same - # `billing_leak: True` marker so monitoring catches it - # alongside InsufficientBalanceError. The run is kept - # COMPLETED because the work is already done. - log_metadata.error( - f"billing_leak: failed to charge extra iterations " - f"for {node.block.name}", - extra={ - "billing_leak": True, - "user_id": node_exec.user_id, - "graph_id": node_exec.graph_id, - "block_id": node_exec.block_id, - "extra_iterations": extra_iterations, - "error_type": type(e).__name__, - "error": str(e), - }, - exc_info=True, - ) graph_stats, graph_stats_lock = graph_stats_pair with graph_stats_lock: @@ -811,31 +720,121 @@ class ExecutionProcessor: db_client=db_client, ) - # If the node failed because a nested tool charge raised - # InsufficientBalanceError (orchestrator agent mode), the main - # queue's _charge_usage IBE notification path was bypassed (the - # initial charge succeeded — only the nested tool charge failed). - # Send the user notification here so they understand why their - # agent run stopped. Failures are logged but not raised so the - # node-execution path stays clean. + # If the node failed because a nested tool charge raised IBE, + # send the user notification so they understand why the run stopped. if status == ExecutionStatus.FAILED and isinstance( execution_stats.error, InsufficientBalanceError ): - try: - await asyncio.to_thread( - self._handle_insufficient_funds_notif, - get_db_client(), - node_exec.user_id, - node_exec.graph_id, - execution_stats.error, - ) - except Exception as notif_error: # pragma: no cover - log_metadata.warning( - f"Failed to send insufficient funds notification: {notif_error}" - ) + await self._try_send_insufficient_funds_notif( + node_exec.user_id, + node_exec.graph_id, + execution_stats.error, + log_metadata, + ) return execution_stats + async def _try_send_insufficient_funds_notif( + self, + user_id: str, + graph_id: str, + error: InsufficientBalanceError, + log_metadata: LogMetadata, + ) -> None: + """Send an insufficient-funds notification, swallowing failures.""" + try: + await asyncio.to_thread( + self._handle_insufficient_funds_notif, + get_db_client(), + user_id, + graph_id, + error, + ) + except Exception as notif_error: # pragma: no cover + log_metadata.warning( + f"Failed to send insufficient funds notification: {notif_error}" + ) + + async def _handle_post_execution_billing( + self, + node: Node, + node_exec: NodeExecutionEntry, + execution_stats: NodeExecutionStats, + status: ExecutionStatus, + log_metadata: LogMetadata, + ) -> None: + """Charge extra iterations for blocks that opt into per-LLM-call billing. + + The first LLM call is already covered by ``_charge_usage()``; each + additional call costs another ``base_cost``. Skipped for dry runs and + failed runs. + + InsufficientBalanceError here is a post-hoc billing leak: the work is + already done but the user can no longer pay. The run stays COMPLETED and + the error is logged with ``billing_leak: True`` for alerting. + """ + extra_iterations = ( + node.block.extra_credit_charges(execution_stats) + if status == ExecutionStatus.COMPLETED + and not node_exec.execution_context.dry_run + else 0 + ) + if extra_iterations <= 0: + return + + try: + extra_cost, remaining_balance = await self.charge_extra_iterations( + node_exec, + extra_iterations, + ) + if extra_cost > 0: + execution_stats.extra_cost += extra_cost + await asyncio.to_thread( + self._handle_low_balance, + get_db_client(), + node_exec.user_id, + remaining_balance, + extra_cost, + ) + except InsufficientBalanceError as e: + log_metadata.error( + "billing_leak: insufficient balance after " + f"{node.block.name} completed {extra_iterations} " + f"extra iterations", + extra={ + "billing_leak": True, + "user_id": node_exec.user_id, + "graph_id": node_exec.graph_id, + "block_id": node_exec.block_id, + "extra_iterations": extra_iterations, + "error": str(e), + }, + ) + # Do NOT set execution_stats.error — the node ran to completion, + # only the post-hoc charge failed. See class-level billing-leak + # contract documentation. + await self._try_send_insufficient_funds_notif( + node_exec.user_id, + node_exec.graph_id, + e, + log_metadata, + ) + except Exception as e: + log_metadata.error( + f"billing_leak: failed to charge extra iterations " + f"for {node.block.name}", + extra={ + "billing_leak": True, + "user_id": node_exec.user_id, + "graph_id": node_exec.graph_id, + "block_id": node_exec.block_id, + "extra_iterations": extra_iterations, + "error_type": type(e).__name__, + "error": str(e), + }, + exc_info=True, + ) + @async_time_measured async def _on_node_execution( self, @@ -1065,7 +1064,7 @@ class ExecutionProcessor: def _resolve_block_cost( self, node_exec: NodeExecutionEntry, - ) -> tuple[Any, int, dict]: + ) -> tuple[Block | None, int, dict]: """Look up the block and compute its base usage cost for an exec. Shared by :meth:`_charge_usage` and :meth:`charge_extra_iterations` @@ -1211,6 +1210,22 @@ class ExecutionProcessor: self._charge_extra_iterations_sync, node_exec, capped ) + def _charge_and_check_balance( + self, + node_exec: NodeExecutionEntry, + ) -> tuple[int, int]: + """Charge usage and check low balance in a single thread-pool worker. + + Combines ``_charge_usage`` and ``_handle_low_balance`` to avoid + dispatching two thread-pool calls per tool execution. + """ + total_cost, remaining = self._charge_usage(node_exec, 0) + if total_cost > 0: + self._handle_low_balance( + get_db_client(), node_exec.user_id, remaining, total_cost + ) + return total_cost, remaining + async def charge_node_usage( self, node_exec: NodeExecutionEntry, @@ -1229,18 +1244,7 @@ class ExecutionProcessor: sub-steps of a single block run from the user's perspective and should not push them into higher per-execution cost tiers. """ - total_cost, remaining = await asyncio.to_thread( - self._charge_usage, node_exec, 0 - ) - if total_cost > 0: - await asyncio.to_thread( - self._handle_low_balance, - get_db_client(), - node_exec.user_id, - remaining, - total_cost, - ) - return total_cost, remaining + return await asyncio.to_thread(self._charge_and_check_balance, node_exec) @time_measured def _on_graph_execution( From e558c60104416b101119a8afc75c5c8a09818caa Mon Sep 17 00:00:00 2001 From: majdyz Date: Mon, 13 Apr 2026 07:02:10 +0000 Subject: [PATCH 24/24] fix(orchestrator): don't propagate non-billing charge errors as tool failures Non-IBE exceptions from charge_node_usage (e.g. DB timeout) were re-raised and caught by the outer generic handler, incorrectly marking a successful tool execution as failed. This could cause the LLM to retry side-effectful operations. Now logs the error and continues to the success path since the tool itself completed successfully. --- .../backend/backend/blocks/orchestrator.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py index 34b4929879..3e956e8d6f 100644 --- a/autogpt_platform/backend/backend/blocks/orchestrator.py +++ b/autogpt_platform/backend/backend/blocks/orchestrator.py @@ -1192,13 +1192,16 @@ class OrchestratorBlock(Block): raise except Exception: # Non-billing charge failures (DB outage, network, etc.) - # are logged with full traceback but surfaced to the LLM - # as a generic error to avoid leaking infrastructure details. + # must NOT propagate to the outer except handler because + # the tool itself succeeded. Re-raising would mark the + # tool as failed (_is_error=True), causing the LLM to + # retry side-effectful operations. Log and continue. logger.exception( - "Unexpected error charging for tool node %s", + "Unexpected error charging for tool node %s; " + "tool execution was successful", sink_node_id, ) - raise + tool_cost = 0 if tool_cost > 0: self.merge_stats(NodeExecutionStats(extra_cost=tool_cost))