Fix activity status wording for mid-execution credit failures

Change 'couldn't start' to 'couldn't complete' since we now also match mid-execution credit exhaustion where some steps may have already run.
Address review: None score, mid-execution credits, generic log, Exception test
2026-04-08 03:00:28 -04:00 · 2026-03-31 14:26:08 +00:00 · 2026-03-31 14:00:48 +00:00 · 2026-03-31 08:29:34 +00:00 · 2026-03-31 08:04:46 +00:00 · 2026-03-31 07:59:43 +00:00
3 changed files with 244 additions and 58 deletions
--- a/autogpt_platform/backend/backend/executor/activity_status_generator.py
+++ b/autogpt_platform/backend/backend/executor/activity_status_generator.py
@@ -168,7 +168,7 @@ class ActivityStatusResponse(TypedDict):
    """Type definition for structured activity status response."""

    activity_status: str
-    correctness_score: float
+    correctness_score: float | None


 def _truncate_uuid(uuid_str: str) -> str:
@@ -178,6 +178,45 @@ def _truncate_uuid(uuid_str: str) -> str:
    return uuid_str.split("-")[0] if "-" in uuid_str else uuid_str[:8]


+_CREDIT_EXHAUSTION_MESSAGES = (
+    "you have no credits left to run an agent.",
+    "insufficient balance of",
+)
+
+
+def _is_credit_exhaustion(error_str: str) -> bool:
+    """Check if the error indicates credit/balance exhaustion."""
+    error_lower = error_str.lower()
+    return any(message in error_lower for message in _CREDIT_EXHAUSTION_MESSAGES)
+
+
+def _check_obvious_failure(
+    execution_stats: GraphExecutionStats,
+    execution_status: ExecutionStatus | None,
+) -> ActivityStatusResponse | None:
+    """
+    Check if the execution failed for an obvious, deterministic reason
+    that doesn't require LLM analysis.
+
+    Returns a static ActivityStatusResponse if matched, None otherwise.
+    """
+    if execution_status != ExecutionStatus.FAILED:
+        return None
+
+    error_str = str(execution_stats.error) if execution_stats.error else ""
+
+    if _is_credit_exhaustion(error_str):
+        return {
+            "activity_status": (
+                "This run couldn't complete because your account has run out of credits. "
+                "Please top up your credits to continue using this agent."
+            ),
+            "correctness_score": None,
+        }
+
+    return None
+
+
 async def generate_activity_status_for_execution(
    graph_exec_id: str,
    graph_id: str,
@@ -237,6 +276,14 @@ async def generate_activity_status_for_execution(
            "correctness_score": execution_stats.correctness_score,
        }

+    # Check for obvious failures that don't need LLM analysis
+    obvious_result = _check_obvious_failure(execution_stats, execution_status)
+    if obvious_result is not None:
+        logger.info(
+            f"Skipping LLM analysis for {graph_exec_id}: " "obvious failure detected"
+        )
+        return obvious_result
+
    # Check if we have OpenAI API key
    try:
        settings = Settings()
--- a/autogpt_platform/backend/backend/executor/activity_status_generator_test.py
+++ b/autogpt_platform/backend/backend/executor/activity_status_generator_test.py
@@ -12,6 +12,8 @@ from backend.data.execution import ExecutionStatus, NodeExecutionResult
 from backend.data.model import GraphExecutionStats
 from backend.executor.activity_status_generator import (
    _build_execution_summary,
+    _check_obvious_failure,
+    _is_credit_exhaustion,
    generate_activity_status_for_execution,
 )

@@ -379,8 +381,9 @@ class TestLLMCall:
        from backend.blocks.llm import AIStructuredResponseGeneratorBlock
        from backend.data.model import APIKeyCredentials

-        with patch("backend.blocks.llm.llm_call") as mock_llm_call, patch(
-            "backend.blocks.llm.secrets.token_hex", return_value="test123"
+        with (
+            patch("backend.blocks.llm.llm_call") as mock_llm_call,
+            patch("backend.blocks.llm.secrets.token_hex", return_value="test123"),
        ):
            mock_llm_call.return_value = LLMResponse(
                raw_response={},
@@ -442,8 +445,9 @@ class TestLLMCall:
        from backend.blocks.llm import AIStructuredResponseGeneratorBlock
        from backend.data.model import APIKeyCredentials

-        with patch("backend.blocks.llm.llm_call") as mock_llm_call, patch(
-            "backend.blocks.llm.secrets.token_hex", return_value="test123"
+        with (
+            patch("backend.blocks.llm.llm_call") as mock_llm_call,
+            patch("backend.blocks.llm.secrets.token_hex", return_value="test123"),
        ):
            # Return invalid JSON that will fail validation (missing required field)
            mock_llm_call.return_value = LLMResponse(
@@ -515,17 +519,21 @@ class TestGenerateActivityStatusForExecution:
        mock_graph.links = []
        mock_db_client.get_graph.return_value = mock_graph

-        with patch(
-            "backend.executor.activity_status_generator.get_block"
-        ) as mock_get_block, patch(
-            "backend.executor.activity_status_generator.Settings"
-        ) as mock_settings, patch(
-            "backend.executor.activity_status_generator.AIStructuredResponseGeneratorBlock"
-        ) as mock_structured_block, patch(
-            "backend.executor.activity_status_generator.is_feature_enabled",
-            return_value=True,
+        with (
+            patch(
+                "backend.executor.activity_status_generator.get_block"
+            ) as mock_get_block,
+            patch(
+                "backend.executor.activity_status_generator.Settings"
+            ) as mock_settings,
+            patch(
+                "backend.executor.activity_status_generator.AIStructuredResponseGeneratorBlock"
+            ) as mock_structured_block,
+            patch(
+                "backend.executor.activity_status_generator.is_feature_enabled",
+                return_value=True,
+            ),
        ):
-
            mock_get_block.side_effect = lambda block_id: mock_blocks.get(block_id)
            mock_settings.return_value.secrets.openai_internal_api_key = "test_key"

@@ -533,10 +541,13 @@ class TestGenerateActivityStatusForExecution:
            mock_instance = mock_structured_block.return_value

            async def mock_run(*args, **kwargs):
-                yield "response", {
-                    "activity_status": "I analyzed your data and provided the requested insights.",
-                    "correctness_score": 0.85,
-                }
+                yield (
+                    "response",
+                    {
+                        "activity_status": "I analyzed your data and provided the requested insights.",
+                        "correctness_score": 0.85,
+                    },
+                )

            mock_instance.run = mock_run

@@ -586,11 +597,14 @@ class TestGenerateActivityStatusForExecution:
        """Test activity status generation with no API key."""
        mock_db_client = AsyncMock()

-        with patch(
-            "backend.executor.activity_status_generator.Settings"
-        ) as mock_settings, patch(
-            "backend.executor.activity_status_generator.is_feature_enabled",
-            return_value=True,
+        with (
+            patch(
+                "backend.executor.activity_status_generator.Settings"
+            ) as mock_settings,
+            patch(
+                "backend.executor.activity_status_generator.is_feature_enabled",
+                return_value=True,
+            ),
        ):
            mock_settings.return_value.secrets.openai_internal_api_key = ""

@@ -612,11 +626,14 @@ class TestGenerateActivityStatusForExecution:
        mock_db_client = AsyncMock()
        mock_db_client.get_node_executions.side_effect = Exception("Database error")

-        with patch(
-            "backend.executor.activity_status_generator.Settings"
-        ) as mock_settings, patch(
-            "backend.executor.activity_status_generator.is_feature_enabled",
-            return_value=True,
+        with (
+            patch(
+                "backend.executor.activity_status_generator.Settings"
+            ) as mock_settings,
+            patch(
+                "backend.executor.activity_status_generator.is_feature_enabled",
+                return_value=True,
+            ),
        ):
            mock_settings.return_value.secrets.openai_internal_api_key = "test_key"

@@ -641,17 +658,21 @@ class TestGenerateActivityStatusForExecution:
        mock_db_client.get_graph_metadata.return_value = None  # No metadata
        mock_db_client.get_graph.return_value = None  # No graph

-        with patch(
-            "backend.executor.activity_status_generator.get_block"
-        ) as mock_get_block, patch(
-            "backend.executor.activity_status_generator.Settings"
-        ) as mock_settings, patch(
-            "backend.executor.activity_status_generator.AIStructuredResponseGeneratorBlock"
-        ) as mock_structured_block, patch(
-            "backend.executor.activity_status_generator.is_feature_enabled",
-            return_value=True,
+        with (
+            patch(
+                "backend.executor.activity_status_generator.get_block"
+            ) as mock_get_block,
+            patch(
+                "backend.executor.activity_status_generator.Settings"
+            ) as mock_settings,
+            patch(
+                "backend.executor.activity_status_generator.AIStructuredResponseGeneratorBlock"
+            ) as mock_structured_block,
+            patch(
+                "backend.executor.activity_status_generator.is_feature_enabled",
+                return_value=True,
+            ),
        ):
-
            mock_get_block.side_effect = lambda block_id: mock_blocks.get(block_id)
            mock_settings.return_value.secrets.openai_internal_api_key = "test_key"

@@ -659,10 +680,13 @@ class TestGenerateActivityStatusForExecution:
            mock_instance = mock_structured_block.return_value

            async def mock_run(*args, **kwargs):
-                yield "response", {
-                    "activity_status": "Agent completed execution.",
-                    "correctness_score": 0.8,
-                }
+                yield (
+                    "response",
+                    {
+                        "activity_status": "Agent completed execution.",
+                        "correctness_score": 0.8,
+                    },
+                )

            mock_instance.run = mock_run

@@ -704,17 +728,21 @@ class TestIntegration:

        expected_activity = "I processed user input but failed during final output generation due to system error."

-        with patch(
-            "backend.executor.activity_status_generator.get_block"
-        ) as mock_get_block, patch(
-            "backend.executor.activity_status_generator.Settings"
-        ) as mock_settings, patch(
-            "backend.executor.activity_status_generator.AIStructuredResponseGeneratorBlock"
-        ) as mock_structured_block, patch(
-            "backend.executor.activity_status_generator.is_feature_enabled",
-            return_value=True,
+        with (
+            patch(
+                "backend.executor.activity_status_generator.get_block"
+            ) as mock_get_block,
+            patch(
+                "backend.executor.activity_status_generator.Settings"
+            ) as mock_settings,
+            patch(
+                "backend.executor.activity_status_generator.AIStructuredResponseGeneratorBlock"
+            ) as mock_structured_block,
+            patch(
+                "backend.executor.activity_status_generator.is_feature_enabled",
+                return_value=True,
+            ),
        ):
-
            mock_get_block.side_effect = lambda block_id: mock_blocks.get(block_id)
            mock_settings.return_value.secrets.openai_internal_api_key = "test_key"

@@ -722,10 +750,13 @@ class TestIntegration:
            mock_instance = mock_structured_block.return_value

            async def mock_run(*args, **kwargs):
-                yield "response", {
-                    "activity_status": expected_activity,
-                    "correctness_score": 0.3,  # Low score since there was a failure
-                }
+                yield (
+                    "response",
+                    {
+                        "activity_status": expected_activity,
+                        "correctness_score": 0.3,  # Low score since there was a failure
+                    },
+                )

            mock_instance.run = mock_run

@@ -774,3 +805,109 @@ class TestIntegration:
            mock_db_client.get_node_executions.assert_not_called()
            mock_db_client.get_graph_metadata.assert_not_called()
            mock_db_client.get_graph.assert_not_called()
+
+
+class TestObviousFailureDetection:
+    """Tests for obvious failure detection that skips LLM analysis."""
+
+    def test_credit_exhaustion_detected(self):
+        """Credit exhaustion errors should be detected."""
+        assert _is_credit_exhaustion("You have no credits left to run an agent.")
+        assert _is_credit_exhaustion(
+            "Insufficient balance of $0, where this will cost $1"
+        )
+
+    def test_credit_exhaustion_case_insensitive(self):
+        """Detection should be case-insensitive."""
+        assert _is_credit_exhaustion("YOU HAVE NO CREDITS LEFT TO RUN AN AGENT.")
+        assert _is_credit_exhaustion("INSUFFICIENT BALANCE OF $0")
+
+    def test_non_credit_errors_not_matched(self):
+        """Non-credit errors should not match."""
+        assert not _is_credit_exhaustion("Connection timeout")
+        assert not _is_credit_exhaustion("API rate limit exceeded")
+        assert not _is_credit_exhaustion("Invalid credentials")
+        assert not _is_credit_exhaustion("")
+        assert not _is_credit_exhaustion("Insufficient balance")  # No trailing "of"
+
+    def test_partial_word_no_false_positive(self):
+        """Similar words like 'credential' should not match 'credit'."""
+        assert not _is_credit_exhaustion("Invalid credential provided")
+        assert not _is_credit_exhaustion("Credential expired")
+
+    def test_check_obvious_failure_credit_exhaustion(self):
+        """Credit exhaustion should return static response."""
+        stats = GraphExecutionStats(error="You have no credits left to run an agent.")
+        result = _check_obvious_failure(stats, ExecutionStatus.FAILED)
+
+        assert result is not None
+        assert result["correctness_score"] is None
+        assert "credits" in result["activity_status"].lower()
+
+    def test_check_obvious_failure_non_failed_status(self):
+        """Non-FAILED status should always return None."""
+        stats = GraphExecutionStats(error="Some error")
+        assert _check_obvious_failure(stats, ExecutionStatus.COMPLETED) is None
+        assert _check_obvious_failure(stats, ExecutionStatus.TERMINATED) is None
+        assert _check_obvious_failure(stats, None) is None
+
+    def test_check_obvious_failure_unknown_error(self):
+        """Unknown errors should return None (fall through to LLM)."""
+        stats = GraphExecutionStats(error="Some unexpected error occurred")
+        result = _check_obvious_failure(stats, ExecutionStatus.FAILED)
+        assert result is None
+
+    def test_check_obvious_failure_no_error(self):
+        """FAILED status with no error string should return None."""
+        stats = GraphExecutionStats(error=None)
+        result = _check_obvious_failure(stats, ExecutionStatus.FAILED)
+        assert result is None
+
+    def test_check_obvious_failure_with_exception_object(self):
+        """Exception objects (not just strings) should be handled via str()."""
+        error = Exception("You have no credits left to run an agent.")
+        stats = GraphExecutionStats(error=error)
+        result = _check_obvious_failure(stats, ExecutionStatus.FAILED)
+        assert result is not None
+        assert result["correctness_score"] is None
+        assert "credits" in result["activity_status"].lower()
+
+    @pytest.mark.asyncio
+    async def test_generate_skips_llm_for_credit_exhaustion(self):
+        """Full integration: credit exhaustion should skip LLM and DB calls."""
+        stats = GraphExecutionStats(
+            error="You have no credits left to run an agent.",
+            node_count=0,
+            node_error_count=0,
+        )
+        mock_db_client = AsyncMock()
+
+        with (
+            patch(
+                "backend.executor.activity_status_generator.is_feature_enabled",
+                return_value=True,
+            ),
+            patch(
+                "backend.executor.activity_status_generator.AIStructuredResponseGeneratorBlock"
+            ) as mock_structured_block,
+        ):
+            result = await generate_activity_status_for_execution(
+                graph_exec_id="test_exec",
+                graph_id="test_graph",
+                graph_version=1,
+                execution_stats=stats,
+                db_client=mock_db_client,
+                user_id="test_user",
+                execution_status=ExecutionStatus.FAILED,
+                skip_feature_flag=False,
+            )
+
+        assert result is not None
+        assert result["correctness_score"] is None
+        assert "credits" in result["activity_status"].lower()
+
+        # Verify NO database or LLM calls were made
+        mock_db_client.get_node_executions.assert_not_called()
+        mock_db_client.get_graph_metadata.assert_not_called()
+        mock_db_client.get_graph.assert_not_called()
+        mock_structured_block.assert_not_called()
--- a/autogpt_platform/backend/backend/executor/manager.py
+++ b/autogpt_platform/backend/backend/executor/manager.py
@@ -880,9 +880,11 @@ class ExecutionProcessor:
            if activity_response is not None:
                exec_stats.activity_status = activity_response["activity_status"]
                exec_stats.correctness_score = activity_response["correctness_score"]
+                score = activity_response["correctness_score"]
+                score_str = f"{score:.2f}" if score is not None else "N/A"
                log_metadata.info(
                    f"Generated activity status: {activity_response['activity_status']} "
-                    f"(correctness: {activity_response['correctness_score']:.2f})"
+                    f"(correctness: {score_str})"
                )
            else:
                log_metadata.debug(
Author	SHA1	Message	Date
Otto	5e883459e0	Fix activity status wording for mid-execution credit failures Change 'couldn't start' to 'couldn't complete' since we now also match mid-execution credit exhaustion where some steps may have already run.	2026-03-31 14:26:08 +00:00
Otto	c9cb953274	Address review: None score, mid-execution credits, generic log, Exception test - Return correctness_score=None instead of 0.0 so frontend skips the misleading red failure bar (it checks typeof === 'number') - Replace dead 'insufficientbalanceerror' pattern with 'insufficient balance of' to catch mid-execution credit failures from credit.py - Make log message generic (no hardcoded 'credit exhaustion') - Handle None score in manager.py log formatting - Add test for Exception object flowing through str() conversion - Update ActivityStatusResponse type to allow None score	2026-03-31 14:00:48 +00:00
Otto	9ad68b4e1e	Assert LLM block is not called in short-circuit path Add mock for AIStructuredResponseGeneratorBlock in the integration test to explicitly verify the LLM is never invoked when credit exhaustion triggers the early exit.	2026-03-31 08:29:34 +00:00
Otto	b672e20b0d	Fix formatting: apply black style to test file	2026-03-31 08:04:46 +00:00
Otto	a21748b1a8	Address CodeRabbit review: tighten matcher and sanitize logs - Tighten _is_credit_exhaustion() to only match the exact internal preflight message and InsufficientBalanceError class name, avoiding false positives from broad 'insufficient balance' matching - Remove raw error from INFO log to avoid leaking user IDs/balance data - Update tests to reflect tighter matching	2026-03-31 07:59:43 +00:00
Otto	63f6b5122a	Skip LLM execution analysis for credit exhaustion failures Add early-exit in generate_activity_status_for_execution() that detects credit/balance exhaustion errors and returns a static ActivityStatusResponse without making an LLM call. 48% of all execution failures (1,472/3,048 in the last 24h) are credit exhaustion, each triggering a paid LLM call to conclude 'no credits.' This skips the LLM entirely for these known failures, saving ~$1-2/day. Co-authored-by: Bently <github@bentlybro.com>	2026-03-31 07:04:34 +00:00