mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
Compare commits
6 Commits
ci/gate-e2
...
otto/secrt
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5e883459e0 | ||
|
|
c9cb953274 | ||
|
|
9ad68b4e1e | ||
|
|
b672e20b0d | ||
|
|
a21748b1a8 | ||
|
|
63f6b5122a |
@@ -168,7 +168,7 @@ class ActivityStatusResponse(TypedDict):
|
||||
"""Type definition for structured activity status response."""
|
||||
|
||||
activity_status: str
|
||||
correctness_score: float
|
||||
correctness_score: float | None
|
||||
|
||||
|
||||
def _truncate_uuid(uuid_str: str) -> str:
|
||||
@@ -178,6 +178,45 @@ def _truncate_uuid(uuid_str: str) -> str:
|
||||
return uuid_str.split("-")[0] if "-" in uuid_str else uuid_str[:8]
|
||||
|
||||
|
||||
_CREDIT_EXHAUSTION_MESSAGES = (
|
||||
"you have no credits left to run an agent.",
|
||||
"insufficient balance of",
|
||||
)
|
||||
|
||||
|
||||
def _is_credit_exhaustion(error_str: str) -> bool:
|
||||
"""Check if the error indicates credit/balance exhaustion."""
|
||||
error_lower = error_str.lower()
|
||||
return any(message in error_lower for message in _CREDIT_EXHAUSTION_MESSAGES)
|
||||
|
||||
|
||||
def _check_obvious_failure(
|
||||
execution_stats: GraphExecutionStats,
|
||||
execution_status: ExecutionStatus | None,
|
||||
) -> ActivityStatusResponse | None:
|
||||
"""
|
||||
Check if the execution failed for an obvious, deterministic reason
|
||||
that doesn't require LLM analysis.
|
||||
|
||||
Returns a static ActivityStatusResponse if matched, None otherwise.
|
||||
"""
|
||||
if execution_status != ExecutionStatus.FAILED:
|
||||
return None
|
||||
|
||||
error_str = str(execution_stats.error) if execution_stats.error else ""
|
||||
|
||||
if _is_credit_exhaustion(error_str):
|
||||
return {
|
||||
"activity_status": (
|
||||
"This run couldn't complete because your account has run out of credits. "
|
||||
"Please top up your credits to continue using this agent."
|
||||
),
|
||||
"correctness_score": None,
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
async def generate_activity_status_for_execution(
|
||||
graph_exec_id: str,
|
||||
graph_id: str,
|
||||
@@ -237,6 +276,14 @@ async def generate_activity_status_for_execution(
|
||||
"correctness_score": execution_stats.correctness_score,
|
||||
}
|
||||
|
||||
# Check for obvious failures that don't need LLM analysis
|
||||
obvious_result = _check_obvious_failure(execution_stats, execution_status)
|
||||
if obvious_result is not None:
|
||||
logger.info(
|
||||
f"Skipping LLM analysis for {graph_exec_id}: " "obvious failure detected"
|
||||
)
|
||||
return obvious_result
|
||||
|
||||
# Check if we have OpenAI API key
|
||||
try:
|
||||
settings = Settings()
|
||||
|
||||
@@ -12,6 +12,8 @@ from backend.data.execution import ExecutionStatus, NodeExecutionResult
|
||||
from backend.data.model import GraphExecutionStats
|
||||
from backend.executor.activity_status_generator import (
|
||||
_build_execution_summary,
|
||||
_check_obvious_failure,
|
||||
_is_credit_exhaustion,
|
||||
generate_activity_status_for_execution,
|
||||
)
|
||||
|
||||
@@ -379,8 +381,9 @@ class TestLLMCall:
|
||||
from backend.blocks.llm import AIStructuredResponseGeneratorBlock
|
||||
from backend.data.model import APIKeyCredentials
|
||||
|
||||
with patch("backend.blocks.llm.llm_call") as mock_llm_call, patch(
|
||||
"backend.blocks.llm.secrets.token_hex", return_value="test123"
|
||||
with (
|
||||
patch("backend.blocks.llm.llm_call") as mock_llm_call,
|
||||
patch("backend.blocks.llm.secrets.token_hex", return_value="test123"),
|
||||
):
|
||||
mock_llm_call.return_value = LLMResponse(
|
||||
raw_response={},
|
||||
@@ -442,8 +445,9 @@ class TestLLMCall:
|
||||
from backend.blocks.llm import AIStructuredResponseGeneratorBlock
|
||||
from backend.data.model import APIKeyCredentials
|
||||
|
||||
with patch("backend.blocks.llm.llm_call") as mock_llm_call, patch(
|
||||
"backend.blocks.llm.secrets.token_hex", return_value="test123"
|
||||
with (
|
||||
patch("backend.blocks.llm.llm_call") as mock_llm_call,
|
||||
patch("backend.blocks.llm.secrets.token_hex", return_value="test123"),
|
||||
):
|
||||
# Return invalid JSON that will fail validation (missing required field)
|
||||
mock_llm_call.return_value = LLMResponse(
|
||||
@@ -515,17 +519,21 @@ class TestGenerateActivityStatusForExecution:
|
||||
mock_graph.links = []
|
||||
mock_db_client.get_graph.return_value = mock_graph
|
||||
|
||||
with patch(
|
||||
"backend.executor.activity_status_generator.get_block"
|
||||
) as mock_get_block, patch(
|
||||
"backend.executor.activity_status_generator.Settings"
|
||||
) as mock_settings, patch(
|
||||
"backend.executor.activity_status_generator.AIStructuredResponseGeneratorBlock"
|
||||
) as mock_structured_block, patch(
|
||||
"backend.executor.activity_status_generator.is_feature_enabled",
|
||||
return_value=True,
|
||||
with (
|
||||
patch(
|
||||
"backend.executor.activity_status_generator.get_block"
|
||||
) as mock_get_block,
|
||||
patch(
|
||||
"backend.executor.activity_status_generator.Settings"
|
||||
) as mock_settings,
|
||||
patch(
|
||||
"backend.executor.activity_status_generator.AIStructuredResponseGeneratorBlock"
|
||||
) as mock_structured_block,
|
||||
patch(
|
||||
"backend.executor.activity_status_generator.is_feature_enabled",
|
||||
return_value=True,
|
||||
),
|
||||
):
|
||||
|
||||
mock_get_block.side_effect = lambda block_id: mock_blocks.get(block_id)
|
||||
mock_settings.return_value.secrets.openai_internal_api_key = "test_key"
|
||||
|
||||
@@ -533,10 +541,13 @@ class TestGenerateActivityStatusForExecution:
|
||||
mock_instance = mock_structured_block.return_value
|
||||
|
||||
async def mock_run(*args, **kwargs):
|
||||
yield "response", {
|
||||
"activity_status": "I analyzed your data and provided the requested insights.",
|
||||
"correctness_score": 0.85,
|
||||
}
|
||||
yield (
|
||||
"response",
|
||||
{
|
||||
"activity_status": "I analyzed your data and provided the requested insights.",
|
||||
"correctness_score": 0.85,
|
||||
},
|
||||
)
|
||||
|
||||
mock_instance.run = mock_run
|
||||
|
||||
@@ -586,11 +597,14 @@ class TestGenerateActivityStatusForExecution:
|
||||
"""Test activity status generation with no API key."""
|
||||
mock_db_client = AsyncMock()
|
||||
|
||||
with patch(
|
||||
"backend.executor.activity_status_generator.Settings"
|
||||
) as mock_settings, patch(
|
||||
"backend.executor.activity_status_generator.is_feature_enabled",
|
||||
return_value=True,
|
||||
with (
|
||||
patch(
|
||||
"backend.executor.activity_status_generator.Settings"
|
||||
) as mock_settings,
|
||||
patch(
|
||||
"backend.executor.activity_status_generator.is_feature_enabled",
|
||||
return_value=True,
|
||||
),
|
||||
):
|
||||
mock_settings.return_value.secrets.openai_internal_api_key = ""
|
||||
|
||||
@@ -612,11 +626,14 @@ class TestGenerateActivityStatusForExecution:
|
||||
mock_db_client = AsyncMock()
|
||||
mock_db_client.get_node_executions.side_effect = Exception("Database error")
|
||||
|
||||
with patch(
|
||||
"backend.executor.activity_status_generator.Settings"
|
||||
) as mock_settings, patch(
|
||||
"backend.executor.activity_status_generator.is_feature_enabled",
|
||||
return_value=True,
|
||||
with (
|
||||
patch(
|
||||
"backend.executor.activity_status_generator.Settings"
|
||||
) as mock_settings,
|
||||
patch(
|
||||
"backend.executor.activity_status_generator.is_feature_enabled",
|
||||
return_value=True,
|
||||
),
|
||||
):
|
||||
mock_settings.return_value.secrets.openai_internal_api_key = "test_key"
|
||||
|
||||
@@ -641,17 +658,21 @@ class TestGenerateActivityStatusForExecution:
|
||||
mock_db_client.get_graph_metadata.return_value = None # No metadata
|
||||
mock_db_client.get_graph.return_value = None # No graph
|
||||
|
||||
with patch(
|
||||
"backend.executor.activity_status_generator.get_block"
|
||||
) as mock_get_block, patch(
|
||||
"backend.executor.activity_status_generator.Settings"
|
||||
) as mock_settings, patch(
|
||||
"backend.executor.activity_status_generator.AIStructuredResponseGeneratorBlock"
|
||||
) as mock_structured_block, patch(
|
||||
"backend.executor.activity_status_generator.is_feature_enabled",
|
||||
return_value=True,
|
||||
with (
|
||||
patch(
|
||||
"backend.executor.activity_status_generator.get_block"
|
||||
) as mock_get_block,
|
||||
patch(
|
||||
"backend.executor.activity_status_generator.Settings"
|
||||
) as mock_settings,
|
||||
patch(
|
||||
"backend.executor.activity_status_generator.AIStructuredResponseGeneratorBlock"
|
||||
) as mock_structured_block,
|
||||
patch(
|
||||
"backend.executor.activity_status_generator.is_feature_enabled",
|
||||
return_value=True,
|
||||
),
|
||||
):
|
||||
|
||||
mock_get_block.side_effect = lambda block_id: mock_blocks.get(block_id)
|
||||
mock_settings.return_value.secrets.openai_internal_api_key = "test_key"
|
||||
|
||||
@@ -659,10 +680,13 @@ class TestGenerateActivityStatusForExecution:
|
||||
mock_instance = mock_structured_block.return_value
|
||||
|
||||
async def mock_run(*args, **kwargs):
|
||||
yield "response", {
|
||||
"activity_status": "Agent completed execution.",
|
||||
"correctness_score": 0.8,
|
||||
}
|
||||
yield (
|
||||
"response",
|
||||
{
|
||||
"activity_status": "Agent completed execution.",
|
||||
"correctness_score": 0.8,
|
||||
},
|
||||
)
|
||||
|
||||
mock_instance.run = mock_run
|
||||
|
||||
@@ -704,17 +728,21 @@ class TestIntegration:
|
||||
|
||||
expected_activity = "I processed user input but failed during final output generation due to system error."
|
||||
|
||||
with patch(
|
||||
"backend.executor.activity_status_generator.get_block"
|
||||
) as mock_get_block, patch(
|
||||
"backend.executor.activity_status_generator.Settings"
|
||||
) as mock_settings, patch(
|
||||
"backend.executor.activity_status_generator.AIStructuredResponseGeneratorBlock"
|
||||
) as mock_structured_block, patch(
|
||||
"backend.executor.activity_status_generator.is_feature_enabled",
|
||||
return_value=True,
|
||||
with (
|
||||
patch(
|
||||
"backend.executor.activity_status_generator.get_block"
|
||||
) as mock_get_block,
|
||||
patch(
|
||||
"backend.executor.activity_status_generator.Settings"
|
||||
) as mock_settings,
|
||||
patch(
|
||||
"backend.executor.activity_status_generator.AIStructuredResponseGeneratorBlock"
|
||||
) as mock_structured_block,
|
||||
patch(
|
||||
"backend.executor.activity_status_generator.is_feature_enabled",
|
||||
return_value=True,
|
||||
),
|
||||
):
|
||||
|
||||
mock_get_block.side_effect = lambda block_id: mock_blocks.get(block_id)
|
||||
mock_settings.return_value.secrets.openai_internal_api_key = "test_key"
|
||||
|
||||
@@ -722,10 +750,13 @@ class TestIntegration:
|
||||
mock_instance = mock_structured_block.return_value
|
||||
|
||||
async def mock_run(*args, **kwargs):
|
||||
yield "response", {
|
||||
"activity_status": expected_activity,
|
||||
"correctness_score": 0.3, # Low score since there was a failure
|
||||
}
|
||||
yield (
|
||||
"response",
|
||||
{
|
||||
"activity_status": expected_activity,
|
||||
"correctness_score": 0.3, # Low score since there was a failure
|
||||
},
|
||||
)
|
||||
|
||||
mock_instance.run = mock_run
|
||||
|
||||
@@ -774,3 +805,109 @@ class TestIntegration:
|
||||
mock_db_client.get_node_executions.assert_not_called()
|
||||
mock_db_client.get_graph_metadata.assert_not_called()
|
||||
mock_db_client.get_graph.assert_not_called()
|
||||
|
||||
|
||||
class TestObviousFailureDetection:
|
||||
"""Tests for obvious failure detection that skips LLM analysis."""
|
||||
|
||||
def test_credit_exhaustion_detected(self):
|
||||
"""Credit exhaustion errors should be detected."""
|
||||
assert _is_credit_exhaustion("You have no credits left to run an agent.")
|
||||
assert _is_credit_exhaustion(
|
||||
"Insufficient balance of $0, where this will cost $1"
|
||||
)
|
||||
|
||||
def test_credit_exhaustion_case_insensitive(self):
|
||||
"""Detection should be case-insensitive."""
|
||||
assert _is_credit_exhaustion("YOU HAVE NO CREDITS LEFT TO RUN AN AGENT.")
|
||||
assert _is_credit_exhaustion("INSUFFICIENT BALANCE OF $0")
|
||||
|
||||
def test_non_credit_errors_not_matched(self):
|
||||
"""Non-credit errors should not match."""
|
||||
assert not _is_credit_exhaustion("Connection timeout")
|
||||
assert not _is_credit_exhaustion("API rate limit exceeded")
|
||||
assert not _is_credit_exhaustion("Invalid credentials")
|
||||
assert not _is_credit_exhaustion("")
|
||||
assert not _is_credit_exhaustion("Insufficient balance") # No trailing "of"
|
||||
|
||||
def test_partial_word_no_false_positive(self):
|
||||
"""Similar words like 'credential' should not match 'credit'."""
|
||||
assert not _is_credit_exhaustion("Invalid credential provided")
|
||||
assert not _is_credit_exhaustion("Credential expired")
|
||||
|
||||
def test_check_obvious_failure_credit_exhaustion(self):
|
||||
"""Credit exhaustion should return static response."""
|
||||
stats = GraphExecutionStats(error="You have no credits left to run an agent.")
|
||||
result = _check_obvious_failure(stats, ExecutionStatus.FAILED)
|
||||
|
||||
assert result is not None
|
||||
assert result["correctness_score"] is None
|
||||
assert "credits" in result["activity_status"].lower()
|
||||
|
||||
def test_check_obvious_failure_non_failed_status(self):
|
||||
"""Non-FAILED status should always return None."""
|
||||
stats = GraphExecutionStats(error="Some error")
|
||||
assert _check_obvious_failure(stats, ExecutionStatus.COMPLETED) is None
|
||||
assert _check_obvious_failure(stats, ExecutionStatus.TERMINATED) is None
|
||||
assert _check_obvious_failure(stats, None) is None
|
||||
|
||||
def test_check_obvious_failure_unknown_error(self):
|
||||
"""Unknown errors should return None (fall through to LLM)."""
|
||||
stats = GraphExecutionStats(error="Some unexpected error occurred")
|
||||
result = _check_obvious_failure(stats, ExecutionStatus.FAILED)
|
||||
assert result is None
|
||||
|
||||
def test_check_obvious_failure_no_error(self):
|
||||
"""FAILED status with no error string should return None."""
|
||||
stats = GraphExecutionStats(error=None)
|
||||
result = _check_obvious_failure(stats, ExecutionStatus.FAILED)
|
||||
assert result is None
|
||||
|
||||
def test_check_obvious_failure_with_exception_object(self):
|
||||
"""Exception objects (not just strings) should be handled via str()."""
|
||||
error = Exception("You have no credits left to run an agent.")
|
||||
stats = GraphExecutionStats(error=error)
|
||||
result = _check_obvious_failure(stats, ExecutionStatus.FAILED)
|
||||
assert result is not None
|
||||
assert result["correctness_score"] is None
|
||||
assert "credits" in result["activity_status"].lower()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_generate_skips_llm_for_credit_exhaustion(self):
|
||||
"""Full integration: credit exhaustion should skip LLM and DB calls."""
|
||||
stats = GraphExecutionStats(
|
||||
error="You have no credits left to run an agent.",
|
||||
node_count=0,
|
||||
node_error_count=0,
|
||||
)
|
||||
mock_db_client = AsyncMock()
|
||||
|
||||
with (
|
||||
patch(
|
||||
"backend.executor.activity_status_generator.is_feature_enabled",
|
||||
return_value=True,
|
||||
),
|
||||
patch(
|
||||
"backend.executor.activity_status_generator.AIStructuredResponseGeneratorBlock"
|
||||
) as mock_structured_block,
|
||||
):
|
||||
result = await generate_activity_status_for_execution(
|
||||
graph_exec_id="test_exec",
|
||||
graph_id="test_graph",
|
||||
graph_version=1,
|
||||
execution_stats=stats,
|
||||
db_client=mock_db_client,
|
||||
user_id="test_user",
|
||||
execution_status=ExecutionStatus.FAILED,
|
||||
skip_feature_flag=False,
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result["correctness_score"] is None
|
||||
assert "credits" in result["activity_status"].lower()
|
||||
|
||||
# Verify NO database or LLM calls were made
|
||||
mock_db_client.get_node_executions.assert_not_called()
|
||||
mock_db_client.get_graph_metadata.assert_not_called()
|
||||
mock_db_client.get_graph.assert_not_called()
|
||||
mock_structured_block.assert_not_called()
|
||||
|
||||
@@ -880,9 +880,11 @@ class ExecutionProcessor:
|
||||
if activity_response is not None:
|
||||
exec_stats.activity_status = activity_response["activity_status"]
|
||||
exec_stats.correctness_score = activity_response["correctness_score"]
|
||||
score = activity_response["correctness_score"]
|
||||
score_str = f"{score:.2f}" if score is not None else "N/A"
|
||||
log_metadata.info(
|
||||
f"Generated activity status: {activity_response['activity_status']} "
|
||||
f"(correctness: {activity_response['correctness_score']:.2f})"
|
||||
f"(correctness: {score_str})"
|
||||
)
|
||||
else:
|
||||
log_metadata.debug(
|
||||
|
||||
Reference in New Issue
Block a user