test: add comprehensive e2e tests for all SmartDecisionMaker failure modes

Add test suites covering 17 identified failure modes:

1. Concurrency tests (test_smart_decision_maker_concurrency.py):
   - Conversation history race conditions
   - Concurrent execution state sharing
   - Pending tool call race conditions
   - Thread safety of cleanup function

2. Agent mode tests (test_smart_decision_maker_agent_mode.py):
   - Silent tool failures in agent mode
   - Unbounded iteration scenarios
   - Credential expiration mid-execution
   - Tool signature cache invalidation
   - Conversation growth management

3. Error handling tests (test_smart_decision_maker_error_handling.py):
   - JSON deserialization errors (malformed LLM responses)
   - Database transaction inconsistency
   - Missing null checks after DB calls
   - Error message context loss
   - Validation retry mechanism

4. Data integrity tests (test_smart_decision_maker_data_integrity.py):
   - Field name collision detection
   - Unhandled field mapping keys
   - Silent value loss in output routing
   - Tool call matching logic
   - Output emit key generation

5. Dynamic fields tests (test_dynamic_fields_edge_cases.py):
   - Type validation in dynamic field merging
   - Dynamic field path validation
   - Nested field extraction
   - Edge cases in merge_execution_input

6. Conversation tests (test_smart_decision_maker_conversation.py):
   - Conversation corruption in error paths
   - Tool response format validation
   - Conversation history preservation
   - Orphaned tool output handling

These tests document current buggy behavior and will help catch
regressions when fixes are implemented.
This commit is contained in:
Claude
2026-01-11 18:45:52 +00:00
parent 00207eb4c9
commit 3f29f71dd6
6 changed files with 4163 additions and 0 deletions

View File

@@ -0,0 +1,916 @@
"""
Tests for SmartDecisionMaker agent mode specific failure modes.
Covers failure modes:
2. Silent Tool Failures in Agent Mode
3. Unbounded Agent Mode Iterations
10. Unbounded Agent Iterations
12. Stale Credentials in Agent Mode
13. Tool Signature Cache Invalidation
"""
import asyncio
import json
import threading
from collections import defaultdict
from typing import Any
from unittest.mock import AsyncMock, MagicMock, Mock, patch
import pytest
from backend.blocks.smart_decision_maker import (
SmartDecisionMakerBlock,
ExecutionParams,
ToolInfo,
)
class TestSilentToolFailuresInAgentMode:
"""
Tests for Failure Mode #2: Silent Tool Failures in Agent Mode
When tool execution fails in agent mode, the error is converted to a
tool response and execution continues silently.
"""
@pytest.mark.asyncio
async def test_tool_execution_failure_converted_to_response(self):
"""
Test that tool execution failures are silently converted to responses.
"""
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
# First response: tool call
mock_tool_call = MagicMock()
mock_tool_call.id = "call_1"
mock_tool_call.function.name = "failing_tool"
mock_tool_call.function.arguments = json.dumps({"param": "value"})
mock_response_1 = MagicMock()
mock_response_1.response = None
mock_response_1.tool_calls = [mock_tool_call]
mock_response_1.prompt_tokens = 50
mock_response_1.completion_tokens = 25
mock_response_1.reasoning = None
mock_response_1.raw_response = {
"role": "assistant",
"content": [{"type": "tool_use", "id": "call_1"}]
}
# Second response: finish after seeing error
mock_response_2 = MagicMock()
mock_response_2.response = "I encountered an error"
mock_response_2.tool_calls = []
mock_response_2.prompt_tokens = 30
mock_response_2.completion_tokens = 15
mock_response_2.reasoning = None
mock_response_2.raw_response = {"role": "assistant", "content": "I encountered an error"}
llm_call_count = 0
async def mock_llm_call(**kwargs):
nonlocal llm_call_count
llm_call_count += 1
if llm_call_count == 1:
return mock_response_1
return mock_response_2
mock_tool_signatures = [
{
"type": "function",
"function": {
"name": "failing_tool",
"_sink_node_id": "sink-node",
"_field_mapping": {"param": "param"},
"parameters": {
"properties": {"param": {"type": "string"}},
"required": ["param"],
},
},
}
]
# Mock database client that will fail
mock_db_client = AsyncMock()
mock_db_client.get_node.side_effect = Exception("Database connection failed!")
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = AsyncMock()
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
mock_execution_processor.execution_stats = MagicMock()
mock_execution_processor.execution_stats_lock = threading.Lock()
input_data = SmartDecisionMakerBlock.Input(
prompt="Do something",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=5,
)
outputs = {}
async for name, value in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph",
node_id="test-node",
graph_exec_id="test-exec",
node_exec_id="test-node-exec",
user_id="test-user",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
outputs[name] = value
# The execution completed (didn't crash)
assert "finished" in outputs or "conversations" in outputs
# BUG: The tool failure was silent - user doesn't know what happened
# The error was just logged and converted to a tool response
@pytest.mark.asyncio
async def test_tool_failure_causes_infinite_retry_loop(self):
"""
Test scenario where LLM keeps calling the same failing tool.
If tool fails but LLM doesn't realize it, it may keep trying.
"""
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
call_count = 0
max_calls = 10 # Limit for test
def create_tool_call_response():
mock_tool_call = MagicMock()
mock_tool_call.id = f"call_{call_count}"
mock_tool_call.function.name = "persistent_tool"
mock_tool_call.function.arguments = json.dumps({"retry": call_count})
mock_response = MagicMock()
mock_response.response = None
mock_response.tool_calls = [mock_tool_call]
mock_response.prompt_tokens = 50
mock_response.completion_tokens = 25
mock_response.reasoning = None
mock_response.raw_response = {
"role": "assistant",
"content": [{"type": "tool_use", "id": f"call_{call_count}"}]
}
return mock_response
async def mock_llm_call(**kwargs):
nonlocal call_count
call_count += 1
if call_count >= max_calls:
# Eventually finish to prevent actual infinite loop in test
final = MagicMock()
final.response = "Giving up"
final.tool_calls = []
final.prompt_tokens = 10
final.completion_tokens = 5
final.reasoning = None
final.raw_response = {"role": "assistant", "content": "Giving up"}
return final
return create_tool_call_response()
mock_tool_signatures = [
{
"type": "function",
"function": {
"name": "persistent_tool",
"_sink_node_id": "sink-node",
"_field_mapping": {"retry": "retry"},
"parameters": {
"properties": {"retry": {"type": "integer"}},
"required": ["retry"],
},
},
}
]
mock_db_client = AsyncMock()
mock_db_client.get_node.side_effect = Exception("Always fails!")
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = AsyncMock()
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
mock_execution_processor.execution_stats = MagicMock()
mock_execution_processor.execution_stats_lock = threading.Lock()
input_data = SmartDecisionMakerBlock.Input(
prompt="Keep trying",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=-1, # Infinite mode!
)
# Use timeout to prevent actual infinite loop
try:
async with asyncio.timeout(5):
outputs = {}
async for name, value in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph",
node_id="test-node",
graph_exec_id="test-exec",
node_exec_id="test-node-exec",
user_id="test-user",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
outputs[name] = value
except asyncio.TimeoutError:
pass # Expected if we hit infinite loop
# Document that many calls were made before we gave up
assert call_count >= max_calls - 1, \
f"Expected many retries, got {call_count}"
class TestUnboundedAgentIterations:
"""
Tests for Failure Mode #3 and #10: Unbounded Agent Mode Iterations
With max_iterations = -1, the agent can run forever, consuming
unlimited tokens and compute resources.
"""
@pytest.mark.asyncio
async def test_infinite_mode_requires_llm_to_stop(self):
"""
Test that infinite mode (-1) only stops when LLM stops making tool calls.
"""
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
iterations = 0
max_test_iterations = 20
async def mock_llm_call(**kwargs):
nonlocal iterations
iterations += 1
if iterations >= max_test_iterations:
# Stop to prevent actual infinite loop
resp = MagicMock()
resp.response = "Finally done"
resp.tool_calls = []
resp.prompt_tokens = 10
resp.completion_tokens = 5
resp.reasoning = None
resp.raw_response = {"role": "assistant", "content": "Done"}
return resp
# Keep making tool calls
tool_call = MagicMock()
tool_call.id = f"call_{iterations}"
tool_call.function.name = "counter_tool"
tool_call.function.arguments = json.dumps({"count": iterations})
resp = MagicMock()
resp.response = None
resp.tool_calls = [tool_call]
resp.prompt_tokens = 50
resp.completion_tokens = 25
resp.reasoning = None
resp.raw_response = {
"role": "assistant",
"content": [{"type": "tool_use", "id": f"call_{iterations}"}]
}
return resp
mock_tool_signatures = [
{
"type": "function",
"function": {
"name": "counter_tool",
"_sink_node_id": "sink",
"_field_mapping": {"count": "count"},
"parameters": {
"properties": {"count": {"type": "integer"}},
"required": ["count"],
},
},
}
]
mock_db_client = AsyncMock()
mock_node = MagicMock()
mock_node.block_id = "test-block"
mock_db_client.get_node.return_value = mock_node
mock_exec_result = MagicMock()
mock_exec_result.node_exec_id = "exec-id"
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {"count": 1})
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {"result": "ok"}
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = AsyncMock()
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
mock_execution_processor.execution_stats = MagicMock()
mock_execution_processor.execution_stats_lock = threading.Lock()
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
input_data = SmartDecisionMakerBlock.Input(
prompt="Count forever",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=-1, # INFINITE MODE
)
async with asyncio.timeout(10):
outputs = {}
async for name, value in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph",
node_id="test-node",
graph_exec_id="test-exec",
node_exec_id="test-node-exec",
user_id="test-user",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
outputs[name] = value
# We ran many iterations before stopping
assert iterations == max_test_iterations
# BUG: No built-in safeguard against runaway iterations
@pytest.mark.asyncio
async def test_max_iterations_limit_enforced(self):
"""
Test that max_iterations limit is properly enforced.
"""
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
iterations = 0
async def mock_llm_call(**kwargs):
nonlocal iterations
iterations += 1
# Always make tool calls (never finish voluntarily)
tool_call = MagicMock()
tool_call.id = f"call_{iterations}"
tool_call.function.name = "endless_tool"
tool_call.function.arguments = json.dumps({})
resp = MagicMock()
resp.response = None
resp.tool_calls = [tool_call]
resp.prompt_tokens = 50
resp.completion_tokens = 25
resp.reasoning = None
resp.raw_response = {
"role": "assistant",
"content": [{"type": "tool_use", "id": f"call_{iterations}"}]
}
return resp
mock_tool_signatures = [
{
"type": "function",
"function": {
"name": "endless_tool",
"_sink_node_id": "sink",
"_field_mapping": {},
"parameters": {"properties": {}, "required": []},
},
}
]
mock_db_client = AsyncMock()
mock_node = MagicMock()
mock_node.block_id = "test-block"
mock_db_client.get_node.return_value = mock_node
mock_exec_result = MagicMock()
mock_exec_result.node_exec_id = "exec-id"
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {})
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {}
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = AsyncMock()
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
mock_execution_processor.execution_stats = MagicMock()
mock_execution_processor.execution_stats_lock = threading.Lock()
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
MAX_ITERATIONS = 3
input_data = SmartDecisionMakerBlock.Input(
prompt="Run forever",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=MAX_ITERATIONS,
)
outputs = {}
async for name, value in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph",
node_id="test-node",
graph_exec_id="test-exec",
node_exec_id="test-node-exec",
user_id="test-user",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
outputs[name] = value
# Should have stopped at max iterations
assert iterations == MAX_ITERATIONS
assert "finished" in outputs
assert "limit reached" in outputs["finished"].lower()
class TestStaleCredentialsInAgentMode:
"""
Tests for Failure Mode #12: Stale Credentials in Agent Mode
Credentials are validated once at start but can expire during
long-running agent mode executions.
"""
@pytest.mark.asyncio
async def test_credentials_not_revalidated_between_iterations(self):
"""
Test that credentials are used without revalidation in agent mode.
"""
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
credential_check_count = 0
iteration = 0
async def mock_llm_call(**kwargs):
nonlocal credential_check_count, iteration
iteration += 1
# Simulate credential check (in real code this happens in llm_call)
credential_check_count += 1
if iteration >= 3:
resp = MagicMock()
resp.response = "Done"
resp.tool_calls = []
resp.prompt_tokens = 10
resp.completion_tokens = 5
resp.reasoning = None
resp.raw_response = {"role": "assistant", "content": "Done"}
return resp
tool_call = MagicMock()
tool_call.id = f"call_{iteration}"
tool_call.function.name = "test_tool"
tool_call.function.arguments = json.dumps({})
resp = MagicMock()
resp.response = None
resp.tool_calls = [tool_call]
resp.prompt_tokens = 50
resp.completion_tokens = 25
resp.reasoning = None
resp.raw_response = {
"role": "assistant",
"content": [{"type": "tool_use", "id": f"call_{iteration}"}]
}
return resp
mock_tool_signatures = [
{
"type": "function",
"function": {
"name": "test_tool",
"_sink_node_id": "sink",
"_field_mapping": {},
"parameters": {"properties": {}, "required": []},
},
}
]
mock_db_client = AsyncMock()
mock_node = MagicMock()
mock_node.block_id = "test-block"
mock_db_client.get_node.return_value = mock_node
mock_exec_result = MagicMock()
mock_exec_result.node_exec_id = "exec-id"
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {})
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {}
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = AsyncMock()
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
mock_execution_processor.execution_stats = MagicMock()
mock_execution_processor.execution_stats_lock = threading.Lock()
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
input_data = SmartDecisionMakerBlock.Input(
prompt="Test credentials",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=5,
)
outputs = {}
async for name, value in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph",
node_id="test-node",
graph_exec_id="test-exec",
node_exec_id="test-node-exec",
user_id="test-user",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
outputs[name] = value
# Credentials were checked on each LLM call but not refreshed
# If they expired mid-execution, we'd get auth errors
assert credential_check_count == iteration
@pytest.mark.asyncio
async def test_credential_expiration_mid_execution(self):
"""
Test what happens when credentials expire during agent mode.
"""
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
iteration = 0
async def mock_llm_call_with_expiration(**kwargs):
nonlocal iteration
iteration += 1
if iteration >= 3:
# Simulate credential expiration
raise Exception("401 Unauthorized: API key expired")
tool_call = MagicMock()
tool_call.id = f"call_{iteration}"
tool_call.function.name = "test_tool"
tool_call.function.arguments = json.dumps({})
resp = MagicMock()
resp.response = None
resp.tool_calls = [tool_call]
resp.prompt_tokens = 50
resp.completion_tokens = 25
resp.reasoning = None
resp.raw_response = {
"role": "assistant",
"content": [{"type": "tool_use", "id": f"call_{iteration}"}]
}
return resp
mock_tool_signatures = [
{
"type": "function",
"function": {
"name": "test_tool",
"_sink_node_id": "sink",
"_field_mapping": {},
"parameters": {"properties": {}, "required": []},
},
}
]
mock_db_client = AsyncMock()
mock_node = MagicMock()
mock_node.block_id = "test-block"
mock_db_client.get_node.return_value = mock_node
mock_exec_result = MagicMock()
mock_exec_result.node_exec_id = "exec-id"
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {})
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {}
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call_with_expiration), \
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = AsyncMock()
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
mock_execution_processor.execution_stats = MagicMock()
mock_execution_processor.execution_stats_lock = threading.Lock()
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
input_data = SmartDecisionMakerBlock.Input(
prompt="Test credentials",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=10,
)
outputs = {}
async for name, value in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph",
node_id="test-node",
graph_exec_id="test-exec",
node_exec_id="test-node-exec",
user_id="test-user",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
outputs[name] = value
# Should have an error output
assert "error" in outputs
assert "expired" in outputs["error"].lower() or "unauthorized" in outputs["error"].lower()
class TestToolSignatureCacheInvalidation:
"""
Tests for Failure Mode #13: Tool Signature Cache Invalidation
Tool signatures are created once at the start of run() but the
graph could change during agent mode execution.
"""
@pytest.mark.asyncio
async def test_signatures_created_once_at_start(self):
"""
Test that tool signatures are only created once, not refreshed.
"""
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
signature_creation_count = 0
iteration = 0
original_create_signatures = block._create_tool_node_signatures
async def counting_create_signatures(node_id):
nonlocal signature_creation_count
signature_creation_count += 1
return [
{
"type": "function",
"function": {
"name": "tool_v1",
"_sink_node_id": "sink",
"_field_mapping": {},
"parameters": {"properties": {}, "required": []},
},
}
]
async def mock_llm_call(**kwargs):
nonlocal iteration
iteration += 1
if iteration >= 3:
resp = MagicMock()
resp.response = "Done"
resp.tool_calls = []
resp.prompt_tokens = 10
resp.completion_tokens = 5
resp.reasoning = None
resp.raw_response = {"role": "assistant", "content": "Done"}
return resp
tool_call = MagicMock()
tool_call.id = f"call_{iteration}"
tool_call.function.name = "tool_v1"
tool_call.function.arguments = json.dumps({})
resp = MagicMock()
resp.response = None
resp.tool_calls = [tool_call]
resp.prompt_tokens = 50
resp.completion_tokens = 25
resp.reasoning = None
resp.raw_response = {
"role": "assistant",
"content": [{"type": "tool_use", "id": f"call_{iteration}"}]
}
return resp
mock_db_client = AsyncMock()
mock_node = MagicMock()
mock_node.block_id = "test-block"
mock_db_client.get_node.return_value = mock_node
mock_exec_result = MagicMock()
mock_exec_result.node_exec_id = "exec-id"
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {})
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {}
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
patch.object(block, "_create_tool_node_signatures", side_effect=counting_create_signatures), \
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = AsyncMock()
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
mock_execution_processor.execution_stats = MagicMock()
mock_execution_processor.execution_stats_lock = threading.Lock()
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
input_data = SmartDecisionMakerBlock.Input(
prompt="Test signatures",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=5,
)
outputs = {}
async for name, value in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph",
node_id="test-node",
graph_exec_id="test-exec",
node_exec_id="test-node-exec",
user_id="test-user",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
outputs[name] = value
# Signatures were only created once, even though we had multiple iterations
assert signature_creation_count == 1
assert iteration >= 3 # We had multiple iterations
@pytest.mark.asyncio
async def test_stale_signatures_cause_tool_mismatch(self):
"""
Test scenario where tool definitions change but agent uses stale signatures.
"""
# This documents the potential issue:
# 1. Agent starts with tool_v1
# 2. User modifies graph, tool becomes tool_v2
# 3. Agent still thinks tool_v1 exists
# 4. LLM calls tool_v1, but it no longer exists
# Since signatures are created once at start and never refreshed,
# any changes to the graph during execution won't be reflected.
# This is more of a documentation test - the actual fix would
# require either:
# a) Refreshing signatures periodically
# b) Locking the graph during execution
# c) Checking tool existence before each call
pass
class TestAgentModeConversationManagement:
"""Tests for conversation management in agent mode."""
@pytest.mark.asyncio
async def test_conversation_grows_with_iterations(self):
"""
Test that conversation history grows correctly with each iteration.
"""
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
iteration = 0
conversation_lengths = []
async def mock_llm_call(**kwargs):
nonlocal iteration
iteration += 1
# Record conversation length at each call
prompt = kwargs.get("prompt", [])
conversation_lengths.append(len(prompt))
if iteration >= 3:
resp = MagicMock()
resp.response = "Done"
resp.tool_calls = []
resp.prompt_tokens = 10
resp.completion_tokens = 5
resp.reasoning = None
resp.raw_response = {"role": "assistant", "content": "Done"}
return resp
tool_call = MagicMock()
tool_call.id = f"call_{iteration}"
tool_call.function.name = "test_tool"
tool_call.function.arguments = json.dumps({})
resp = MagicMock()
resp.response = None
resp.tool_calls = [tool_call]
resp.prompt_tokens = 50
resp.completion_tokens = 25
resp.reasoning = None
resp.raw_response = {
"role": "assistant",
"content": [{"type": "tool_use", "id": f"call_{iteration}"}]
}
return resp
mock_tool_signatures = [
{
"type": "function",
"function": {
"name": "test_tool",
"_sink_node_id": "sink",
"_field_mapping": {},
"parameters": {"properties": {}, "required": []},
},
}
]
mock_db_client = AsyncMock()
mock_node = MagicMock()
mock_node.block_id = "test-block"
mock_db_client.get_node.return_value = mock_node
mock_exec_result = MagicMock()
mock_exec_result.node_exec_id = "exec-id"
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {})
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {"result": "ok"}
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = AsyncMock()
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
mock_execution_processor.execution_stats = MagicMock()
mock_execution_processor.execution_stats_lock = threading.Lock()
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
input_data = SmartDecisionMakerBlock.Input(
prompt="Test conversation",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=5,
)
outputs = {}
async for name, value in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph",
node_id="test-node",
graph_exec_id="test-exec",
node_exec_id="test-node-exec",
user_id="test-user",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
outputs[name] = value
# Conversation should grow with each iteration
# Each iteration adds: assistant message + tool response
assert len(conversation_lengths) == 3
for i in range(1, len(conversation_lengths)):
assert conversation_lengths[i] > conversation_lengths[i-1], \
f"Conversation should grow: {conversation_lengths}"

View File

@@ -0,0 +1,525 @@
"""
Tests for SmartDecisionMaker concurrency issues and race conditions.
Covers failure modes:
1. Conversation History Race Condition
4. Concurrent Execution State Sharing
7. Race in Pending Tool Calls
11. Race in Pending Tool Call Retrieval
14. Concurrent State Sharing
"""
import asyncio
import json
import threading
from collections import Counter
from concurrent.futures import ThreadPoolExecutor
from typing import Any
from unittest.mock import AsyncMock, MagicMock, Mock, patch
import pytest
from backend.blocks.smart_decision_maker import (
SmartDecisionMakerBlock,
get_pending_tool_calls,
_create_tool_response,
_get_tool_requests,
_get_tool_responses,
)
class TestConversationHistoryRaceCondition:
"""
Tests for Failure Mode #1: Conversation History Race Condition
When multiple executions share conversation history, concurrent
modifications can cause data loss or corruption.
"""
def test_get_pending_tool_calls_with_concurrent_modification(self):
"""
Test that concurrent modifications to conversation history
can cause inconsistent pending tool call counts.
"""
# Shared conversation history
conversation_history = [
{
"role": "assistant",
"content": [
{"type": "tool_use", "id": "toolu_1"},
{"type": "tool_use", "id": "toolu_2"},
{"type": "tool_use", "id": "toolu_3"},
]
}
]
results = []
errors = []
def reader_thread():
"""Repeatedly read pending calls."""
for _ in range(100):
try:
pending = get_pending_tool_calls(conversation_history)
results.append(len(pending))
except Exception as e:
errors.append(str(e))
def writer_thread():
"""Modify conversation while readers are active."""
for i in range(50):
# Add a tool response
conversation_history.append({
"role": "user",
"content": [{"type": "tool_result", "tool_use_id": f"toolu_{(i % 3) + 1}"}]
})
# Remove it
if len(conversation_history) > 1:
conversation_history.pop()
# Run concurrent readers and writers
threads = []
for _ in range(3):
threads.append(threading.Thread(target=reader_thread))
threads.append(threading.Thread(target=writer_thread))
for t in threads:
t.start()
for t in threads:
t.join()
# The issue: results may be inconsistent due to race conditions
# In a correct implementation, we'd expect consistent results
# Document that this CAN produce inconsistent results
assert len(results) > 0, "Should have some results"
# Note: This test documents the race condition exists
# When fixed, all results should be consistent
def test_prompt_list_mutation_race(self):
"""
Test that mutating prompt list during iteration can cause issues.
"""
prompt = []
errors = []
def appender():
for i in range(100):
prompt.append({"role": "user", "content": f"msg_{i}"})
def extender():
for i in range(100):
prompt.extend([{"role": "assistant", "content": f"resp_{i}"}])
def reader():
for _ in range(100):
try:
# Iterate while others modify
_ = [p for p in prompt if p.get("role") == "user"]
except RuntimeError as e:
# "dictionary changed size during iteration" or similar
errors.append(str(e))
threads = [
threading.Thread(target=appender),
threading.Thread(target=extender),
threading.Thread(target=reader),
]
for t in threads:
t.start()
for t in threads:
t.join()
# Document that race conditions can occur
# In production, this could cause silent data corruption
@pytest.mark.asyncio
async def test_concurrent_block_runs_share_state(self):
"""
Test that concurrent runs on same block instance can share state incorrectly.
This is Failure Mode #14: Concurrent State Sharing
"""
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
# Track all outputs from all runs
all_outputs = []
lock = threading.Lock()
async def run_block(run_id: int):
"""Run the block with a unique run_id."""
mock_response = MagicMock()
mock_response.response = f"Response for run {run_id}"
mock_response.tool_calls = [] # No tool calls, just finish
mock_response.prompt_tokens = 50
mock_response.completion_tokens = 25
mock_response.reasoning = None
mock_response.raw_response = {"role": "assistant", "content": f"Run {run_id}"}
mock_tool_signatures = []
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm:
mock_llm.return_value = mock_response
with patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures):
input_data = SmartDecisionMakerBlock.Input(
prompt=f"Prompt for run {run_id}",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=0,
)
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = MagicMock()
outputs = {}
async for output_name, output_data in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id=f"graph-{run_id}",
node_id=f"node-{run_id}",
graph_exec_id=f"exec-{run_id}",
node_exec_id=f"node-exec-{run_id}",
user_id=f"user-{run_id}",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
outputs[output_name] = output_data
with lock:
all_outputs.append((run_id, outputs))
# Run multiple concurrent executions
tasks = [run_block(i) for i in range(5)]
await asyncio.gather(*tasks)
# Verify each run got its own response (no cross-contamination)
for run_id, outputs in all_outputs:
if "finished" in outputs:
assert f"run {run_id}" in outputs["finished"].lower() or outputs["finished"] == f"Response for run {run_id}", \
f"Run {run_id} may have received contaminated response: {outputs}"
class TestPendingToolCallRace:
"""
Tests for Failure Mode #7 and #11: Race in Pending Tool Calls
The get_pending_tool_calls function can race with modifications
to the conversation history, causing StopIteration or incorrect counts.
"""
def test_pending_tool_calls_counter_accuracy(self):
"""Test that pending tool call counting is accurate."""
conversation = [
# Assistant makes 3 tool calls
{
"role": "assistant",
"content": [
{"type": "tool_use", "id": "call_1"},
{"type": "tool_use", "id": "call_2"},
{"type": "tool_use", "id": "call_3"},
]
},
# User provides 1 response
{
"role": "user",
"content": [
{"type": "tool_result", "tool_use_id": "call_1"}
]
}
]
pending = get_pending_tool_calls(conversation)
# Should have 2 pending (call_2, call_3)
assert len(pending) == 2
assert "call_2" in pending
assert "call_3" in pending
assert pending["call_2"] == 1
assert pending["call_3"] == 1
def test_pending_tool_calls_duplicate_responses(self):
"""Test handling of duplicate tool responses."""
conversation = [
{
"role": "assistant",
"content": [{"type": "tool_use", "id": "call_1"}]
},
# Duplicate responses for same call
{
"role": "user",
"content": [{"type": "tool_result", "tool_use_id": "call_1"}]
},
{
"role": "user",
"content": [{"type": "tool_result", "tool_use_id": "call_1"}]
}
]
pending = get_pending_tool_calls(conversation)
# call_1 has count -1 (1 request - 2 responses)
# Should not be in pending (count <= 0)
assert "call_1" not in pending or pending.get("call_1", 0) <= 0
def test_empty_conversation_no_pending(self):
"""Test that empty conversation has no pending calls."""
assert get_pending_tool_calls([]) == {}
assert get_pending_tool_calls(None) == {}
def test_next_iter_on_empty_dict_raises_stop_iteration(self):
"""
Document the StopIteration vulnerability.
If pending_tool_calls becomes empty between the check and
next(iter(...)), StopIteration is raised.
"""
pending = {}
# This is the pattern used in smart_decision_maker.py:1019
# if pending_tool_calls and ...:
# first_call_id = next(iter(pending_tool_calls.keys()))
with pytest.raises(StopIteration):
next(iter(pending.keys()))
# Safe pattern should be:
# first_call_id = next(iter(pending_tool_calls.keys()), None)
safe_result = next(iter(pending.keys()), None)
assert safe_result is None
class TestToolRequestResponseParsing:
"""Tests for tool request/response parsing edge cases."""
def test_get_tool_requests_openai_format(self):
"""Test parsing OpenAI format tool requests."""
entry = {
"role": "assistant",
"tool_calls": [
{"id": "call_abc123"},
{"id": "call_def456"},
]
}
requests = _get_tool_requests(entry)
assert requests == ["call_abc123", "call_def456"]
def test_get_tool_requests_anthropic_format(self):
"""Test parsing Anthropic format tool requests."""
entry = {
"role": "assistant",
"content": [
{"type": "tool_use", "id": "toolu_abc123"},
{"type": "text", "text": "Let me call this tool"},
{"type": "tool_use", "id": "toolu_def456"},
]
}
requests = _get_tool_requests(entry)
assert requests == ["toolu_abc123", "toolu_def456"]
def test_get_tool_requests_non_assistant_role(self):
"""Non-assistant roles should return empty list."""
entry = {"role": "user", "tool_calls": [{"id": "call_123"}]}
assert _get_tool_requests(entry) == []
def test_get_tool_responses_openai_format(self):
"""Test parsing OpenAI format tool responses."""
entry = {
"role": "tool",
"tool_call_id": "call_abc123",
"content": "Result"
}
responses = _get_tool_responses(entry)
assert responses == ["call_abc123"]
def test_get_tool_responses_anthropic_format(self):
"""Test parsing Anthropic format tool responses."""
entry = {
"role": "user",
"content": [
{"type": "tool_result", "tool_use_id": "toolu_abc123"},
{"type": "tool_result", "tool_use_id": "toolu_def456"},
]
}
responses = _get_tool_responses(entry)
assert responses == ["toolu_abc123", "toolu_def456"]
def test_get_tool_responses_mixed_content(self):
"""Test parsing responses with mixed content types."""
entry = {
"role": "user",
"content": [
{"type": "text", "text": "Here are the results"},
{"type": "tool_result", "tool_use_id": "toolu_123"},
{"type": "image", "url": "http://example.com/img.png"},
]
}
responses = _get_tool_responses(entry)
assert responses == ["toolu_123"]
class TestConcurrentToolSignatureCreation:
"""Tests for concurrent tool signature creation."""
@pytest.mark.asyncio
async def test_concurrent_signature_creation_same_node(self):
"""
Test that concurrent signature creation for same node
doesn't cause issues.
"""
block = SmartDecisionMakerBlock()
mock_node = Mock()
mock_node.id = "test-node"
mock_node.block = Mock()
mock_node.block.name = "TestBlock"
mock_node.block.description = "Test"
mock_node.block.input_schema = Mock()
mock_node.block.input_schema.jsonschema = Mock(
return_value={"properties": {}, "required": []}
)
mock_node.block.input_schema.get_field_schema = Mock(
return_value={"type": "string", "description": "test"}
)
mock_links = [
Mock(sink_name="field1", sink_id="test-node", source_id="source"),
Mock(sink_name="field2", sink_id="test-node", source_id="source"),
]
# Run multiple concurrent signature creations
tasks = [
block._create_block_function_signature(mock_node, mock_links)
for _ in range(10)
]
results = await asyncio.gather(*tasks)
# All results should be identical
first = results[0]
for i, result in enumerate(results[1:], 1):
assert result["function"]["name"] == first["function"]["name"], \
f"Result {i} has different name"
assert set(result["function"]["parameters"]["properties"].keys()) == \
set(first["function"]["parameters"]["properties"].keys()), \
f"Result {i} has different properties"
class TestThreadSafetyOfCleanup:
"""Tests for thread safety of cleanup function."""
def test_cleanup_is_thread_safe(self):
"""
Test that cleanup function is thread-safe.
Since it's a pure function with no shared state, it should be safe.
"""
results = {}
lock = threading.Lock()
test_inputs = [
"Max Keyword Difficulty",
"Search Volume (Monthly)",
"CPC ($)",
"Target URL",
]
def worker(input_str: str, thread_id: int):
for _ in range(100):
result = SmartDecisionMakerBlock.cleanup(input_str)
with lock:
key = f"{thread_id}_{input_str}"
if key not in results:
results[key] = set()
results[key].add(result)
threads = []
for i, input_str in enumerate(test_inputs):
for j in range(3):
t = threading.Thread(target=worker, args=(input_str, i * 3 + j))
threads.append(t)
for t in threads:
t.start()
for t in threads:
t.join()
# Each input should produce exactly one unique output
for key, values in results.items():
assert len(values) == 1, f"Non-deterministic cleanup for {key}: {values}"
class TestAsyncConcurrencyPatterns:
"""Tests for async concurrency patterns in the block."""
@pytest.mark.asyncio
async def test_multiple_async_runs_isolation(self):
"""
Test that multiple async runs are properly isolated.
"""
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
run_count = 5
results = []
async def single_run(run_id: int):
mock_response = MagicMock()
mock_response.response = f"Unique response {run_id}"
mock_response.tool_calls = []
mock_response.prompt_tokens = 10
mock_response.completion_tokens = 5
mock_response.reasoning = None
mock_response.raw_response = {"role": "assistant", "content": f"Run {run_id}"}
# Add small random delay to increase chance of interleaving
await asyncio.sleep(0.001 * (run_id % 3))
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm:
mock_llm.return_value = mock_response
with patch.object(block, "_create_tool_node_signatures", return_value=[]):
input_data = SmartDecisionMakerBlock.Input(
prompt=f"Prompt {run_id}",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=0,
)
outputs = {}
async for name, value in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id=f"g{run_id}",
node_id=f"n{run_id}",
graph_exec_id=f"e{run_id}",
node_exec_id=f"ne{run_id}",
user_id=f"u{run_id}",
graph_version=1,
execution_context=ExecutionContext(safe_mode=False),
execution_processor=MagicMock(),
):
outputs[name] = value
return run_id, outputs
# Run all concurrently
tasks = [single_run(i) for i in range(run_count)]
results = await asyncio.gather(*tasks)
# Verify isolation
for run_id, outputs in results:
if "finished" in outputs:
assert str(run_id) in outputs["finished"], \
f"Run {run_id} got wrong response: {outputs['finished']}"

View File

@@ -0,0 +1,667 @@
"""
Tests for SmartDecisionMaker conversation handling and corruption scenarios.
Covers failure modes:
6. Conversation Corruption in Error Paths
And related conversation management issues.
"""
import json
from typing import Any
from unittest.mock import AsyncMock, MagicMock, Mock, patch
import pytest
from backend.blocks.smart_decision_maker import (
SmartDecisionMakerBlock,
get_pending_tool_calls,
_create_tool_response,
_combine_tool_responses,
_convert_raw_response_to_dict,
_get_tool_requests,
_get_tool_responses,
)
class TestConversationCorruptionInErrorPaths:
"""
Tests for Failure Mode #6: Conversation Corruption in Error Paths
When there's a logic error (orphaned tool output), the code appends
it as a "user" message instead of proper tool response format,
violating LLM conversation structure.
"""
@pytest.mark.asyncio
async def test_orphaned_tool_output_creates_user_message(self):
"""
Test that orphaned tool output (no pending calls) creates wrong message type.
"""
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
# Response with no tool calls
mock_response = MagicMock()
mock_response.response = "No tools needed"
mock_response.tool_calls = []
mock_response.prompt_tokens = 50
mock_response.completion_tokens = 25
mock_response.reasoning = None
mock_response.raw_response = {"role": "assistant", "content": "No tools needed"}
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm:
mock_llm.return_value = mock_response
with patch.object(block, "_create_tool_node_signatures", return_value=[]):
input_data = SmartDecisionMakerBlock.Input(
prompt="Test",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=0,
# Orphaned tool output - no pending calls but we have output
last_tool_output={"result": "orphaned data"},
conversation_history=[], # Empty - no pending calls
)
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = MagicMock()
outputs = {}
async for name, value in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph",
node_id="test-node",
graph_exec_id="test-exec",
node_exec_id="test-node-exec",
user_id="test-user",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
outputs[name] = value
# Check the conversation for the orphaned output handling
# The orphaned output is logged as error but may be added as user message
# This is the BUG: should not add orphaned outputs to conversation
def test_create_tool_response_anthropic_format(self):
"""Test that Anthropic format tool responses are created correctly."""
response = _create_tool_response(
"toolu_abc123",
{"result": "success"}
)
assert response["role"] == "user"
assert response["type"] == "message"
assert isinstance(response["content"], list)
assert response["content"][0]["type"] == "tool_result"
assert response["content"][0]["tool_use_id"] == "toolu_abc123"
def test_create_tool_response_openai_format(self):
"""Test that OpenAI format tool responses are created correctly."""
response = _create_tool_response(
"call_abc123",
{"result": "success"}
)
assert response["role"] == "tool"
assert response["tool_call_id"] == "call_abc123"
assert "content" in response
def test_tool_response_with_string_content(self):
"""Test tool response creation with string content."""
response = _create_tool_response(
"call_123",
"Simple string result"
)
assert response["content"] == "Simple string result"
def test_tool_response_with_complex_content(self):
"""Test tool response creation with complex JSON content."""
complex_data = {
"nested": {"key": "value"},
"list": [1, 2, 3],
"null": None,
}
response = _create_tool_response("call_123", complex_data)
# Content should be JSON string
parsed = json.loads(response["content"])
assert parsed == complex_data
class TestCombineToolResponses:
"""Tests for combining multiple tool responses."""
def test_combine_single_response_unchanged(self):
"""Test that single response is returned unchanged."""
responses = [
{
"role": "user",
"type": "message",
"content": [{"type": "tool_result", "tool_use_id": "123"}]
}
]
result = _combine_tool_responses(responses)
assert result == responses
def test_combine_multiple_anthropic_responses(self):
"""Test combining multiple Anthropic responses."""
responses = [
{
"role": "user",
"type": "message",
"content": [{"type": "tool_result", "tool_use_id": "123", "content": "a"}]
},
{
"role": "user",
"type": "message",
"content": [{"type": "tool_result", "tool_use_id": "456", "content": "b"}]
},
]
result = _combine_tool_responses(responses)
# Should be combined into single message
assert len(result) == 1
assert result[0]["role"] == "user"
assert len(result[0]["content"]) == 2
def test_combine_mixed_responses(self):
"""Test combining mixed Anthropic and OpenAI responses."""
responses = [
{
"role": "user",
"type": "message",
"content": [{"type": "tool_result", "tool_use_id": "123"}]
},
{
"role": "tool",
"tool_call_id": "call_456",
"content": "openai result"
},
]
result = _combine_tool_responses(responses)
# Anthropic response combined, OpenAI kept separate
assert len(result) == 2
def test_combine_empty_list(self):
"""Test combining empty list."""
result = _combine_tool_responses([])
assert result == []
class TestConversationHistoryValidation:
"""Tests for conversation history validation."""
def test_pending_tool_calls_basic(self):
"""Test basic pending tool call counting."""
history = [
{
"role": "assistant",
"content": [
{"type": "tool_use", "id": "call_1"},
{"type": "tool_use", "id": "call_2"},
]
}
]
pending = get_pending_tool_calls(history)
assert len(pending) == 2
assert "call_1" in pending
assert "call_2" in pending
def test_pending_tool_calls_with_responses(self):
"""Test pending calls after some responses."""
history = [
{
"role": "assistant",
"content": [
{"type": "tool_use", "id": "call_1"},
{"type": "tool_use", "id": "call_2"},
]
},
{
"role": "user",
"content": [
{"type": "tool_result", "tool_use_id": "call_1"}
]
}
]
pending = get_pending_tool_calls(history)
assert len(pending) == 1
assert "call_2" in pending
assert "call_1" not in pending
def test_pending_tool_calls_all_responded(self):
"""Test when all tool calls have responses."""
history = [
{
"role": "assistant",
"content": [{"type": "tool_use", "id": "call_1"}]
},
{
"role": "user",
"content": [{"type": "tool_result", "tool_use_id": "call_1"}]
}
]
pending = get_pending_tool_calls(history)
assert len(pending) == 0
def test_pending_tool_calls_openai_format(self):
"""Test pending calls with OpenAI format."""
history = [
{
"role": "assistant",
"tool_calls": [
{"id": "call_1"},
{"id": "call_2"},
]
},
{
"role": "tool",
"tool_call_id": "call_1",
"content": "result"
}
]
pending = get_pending_tool_calls(history)
assert len(pending) == 1
assert "call_2" in pending
class TestConversationUpdateBehavior:
"""Tests for conversation update behavior."""
@pytest.mark.asyncio
async def test_conversation_includes_assistant_response(self):
"""Test that assistant responses are added to conversation."""
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
mock_response = MagicMock()
mock_response.response = "Final answer"
mock_response.tool_calls = []
mock_response.prompt_tokens = 50
mock_response.completion_tokens = 25
mock_response.reasoning = None
mock_response.raw_response = {"role": "assistant", "content": "Final answer"}
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm:
mock_llm.return_value = mock_response
with patch.object(block, "_create_tool_node_signatures", return_value=[]):
input_data = SmartDecisionMakerBlock.Input(
prompt="Test",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=0,
)
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = MagicMock()
outputs = {}
async for name, value in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph",
node_id="test-node",
graph_exec_id="test-exec",
node_exec_id="test-node-exec",
user_id="test-user",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
outputs[name] = value
# No conversations output when no tool calls (just finished)
assert "finished" in outputs
assert outputs["finished"] == "Final answer"
@pytest.mark.asyncio
async def test_conversation_with_tool_calls(self):
"""Test that tool calls are properly added to conversation."""
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
mock_tool_call = MagicMock()
mock_tool_call.function.name = "test_tool"
mock_tool_call.function.arguments = json.dumps({"param": "value"})
mock_response = MagicMock()
mock_response.response = None
mock_response.tool_calls = [mock_tool_call]
mock_response.prompt_tokens = 50
mock_response.completion_tokens = 25
mock_response.reasoning = "I'll use the test tool"
mock_response.raw_response = {
"role": "assistant",
"content": None,
"tool_calls": [{"id": "call_1"}]
}
mock_tool_signatures = [
{
"type": "function",
"function": {
"name": "test_tool",
"_sink_node_id": "sink",
"_field_mapping": {"param": "param"},
"parameters": {
"properties": {"param": {"type": "string"}},
"required": ["param"],
},
},
}
]
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm:
mock_llm.return_value = mock_response
with patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures):
input_data = SmartDecisionMakerBlock.Input(
prompt="Test",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=0,
)
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = MagicMock()
outputs = {}
async for name, value in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph",
node_id="test-node",
graph_exec_id="test-exec",
node_exec_id="test-node-exec",
user_id="test-user",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
outputs[name] = value
# Should have conversations output
assert "conversations" in outputs
# Conversation should include the assistant message
conversations = outputs["conversations"]
has_assistant = any(
msg.get("role") == "assistant"
for msg in conversations
)
assert has_assistant
class TestConversationHistoryPreservation:
"""Tests for conversation history preservation across calls."""
@pytest.mark.asyncio
async def test_existing_history_preserved(self):
"""Test that existing conversation history is preserved."""
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
existing_history = [
{"role": "user", "content": "Previous message 1"},
{"role": "assistant", "content": "Previous response 1"},
{"role": "user", "content": "Previous message 2"},
]
mock_response = MagicMock()
mock_response.response = "New response"
mock_response.tool_calls = []
mock_response.prompt_tokens = 50
mock_response.completion_tokens = 25
mock_response.reasoning = None
mock_response.raw_response = {"role": "assistant", "content": "New response"}
captured_prompt = []
async def capture_llm_call(**kwargs):
captured_prompt.extend(kwargs.get("prompt", []))
return mock_response
with patch("backend.blocks.llm.llm_call", side_effect=capture_llm_call):
with patch.object(block, "_create_tool_node_signatures", return_value=[]):
input_data = SmartDecisionMakerBlock.Input(
prompt="New message",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=0,
conversation_history=existing_history,
)
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = MagicMock()
async for _ in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph",
node_id="test-node",
graph_exec_id="test-exec",
node_exec_id="test-node-exec",
user_id="test-user",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
pass
# Existing history should be in the prompt
assert len(captured_prompt) >= len(existing_history)
class TestRawResponseConversion:
"""Tests for raw response to dict conversion."""
def test_string_response(self):
"""Test conversion of string response."""
result = _convert_raw_response_to_dict("Hello world")
assert result == {"role": "assistant", "content": "Hello world"}
def test_dict_response(self):
"""Test that dict response is passed through."""
original = {"role": "assistant", "content": "test", "extra": "data"}
result = _convert_raw_response_to_dict(original)
assert result == original
def test_object_response(self):
"""Test conversion of object response."""
mock_obj = MagicMock()
with patch("backend.blocks.smart_decision_maker.json.to_dict") as mock_to_dict:
mock_to_dict.return_value = {"role": "assistant", "content": "converted"}
result = _convert_raw_response_to_dict(mock_obj)
mock_to_dict.assert_called_once_with(mock_obj)
assert result["role"] == "assistant"
class TestConversationMessageStructure:
"""Tests for correct conversation message structure."""
def test_system_message_not_duplicated(self):
"""Test that system messages are not duplicated."""
from backend.util.prompt import MAIN_OBJECTIVE_PREFIX
# Existing system message in history
existing_history = [
{"role": "system", "content": f"{MAIN_OBJECTIVE_PREFIX}Existing system prompt"},
]
# The block should not add another system message
# This is verified by checking the prompt passed to LLM
def test_user_message_not_duplicated(self):
"""Test that user messages are not duplicated."""
from backend.util.prompt import MAIN_OBJECTIVE_PREFIX
# Existing user message with MAIN_OBJECTIVE_PREFIX
existing_history = [
{"role": "user", "content": f"{MAIN_OBJECTIVE_PREFIX}Existing user prompt"},
]
# The block should not add another user message with same prefix
# This is verified by checking the prompt passed to LLM
def test_tool_response_after_tool_call(self):
"""Test that tool responses come after tool calls."""
# Valid conversation structure
valid_history = [
{
"role": "assistant",
"content": [{"type": "tool_use", "id": "call_1"}]
},
{
"role": "user",
"content": [{"type": "tool_result", "tool_use_id": "call_1"}]
}
]
# This should be valid - tool result follows tool use
pending = get_pending_tool_calls(valid_history)
assert len(pending) == 0
def test_orphaned_tool_response_detected(self):
"""Test detection of orphaned tool responses."""
# Invalid: tool response without matching tool call
invalid_history = [
{
"role": "user",
"content": [{"type": "tool_result", "tool_use_id": "orphan_call"}]
}
]
pending = get_pending_tool_calls(invalid_history)
# Orphan response creates negative count
# Should have count -1 for orphan_call
# But it's filtered out (count <= 0)
assert "orphan_call" not in pending
class TestValidationErrorInConversation:
"""Tests for validation error handling in conversation."""
@pytest.mark.asyncio
async def test_validation_error_feedback_not_in_final_conversation(self):
"""
Test that validation error feedback is not in final conversation output.
When retrying due to validation errors, the error feedback should
only be used for the retry prompt, not persisted in final conversation.
"""
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
call_count = 0
async def mock_llm_call(**kwargs):
nonlocal call_count
call_count += 1
if call_count == 1:
# First call: invalid tool call
mock_tool_call = MagicMock()
mock_tool_call.function.name = "test_tool"
mock_tool_call.function.arguments = json.dumps({"wrong": "param"})
resp = MagicMock()
resp.response = None
resp.tool_calls = [mock_tool_call]
resp.prompt_tokens = 50
resp.completion_tokens = 25
resp.reasoning = None
resp.raw_response = {"role": "assistant", "content": None}
return resp
else:
# Second call: finish
resp = MagicMock()
resp.response = "Done"
resp.tool_calls = []
resp.prompt_tokens = 50
resp.completion_tokens = 25
resp.reasoning = None
resp.raw_response = {"role": "assistant", "content": "Done"}
return resp
mock_tool_signatures = [
{
"type": "function",
"function": {
"name": "test_tool",
"_sink_node_id": "sink",
"_field_mapping": {"correct": "correct"},
"parameters": {
"properties": {"correct": {"type": "string"}},
"required": ["correct"],
},
},
}
]
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call):
with patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures):
input_data = SmartDecisionMakerBlock.Input(
prompt="Test",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=0,
retry=3,
)
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = MagicMock()
outputs = {}
async for name, value in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph",
node_id="test-node",
graph_exec_id="test-exec",
node_exec_id="test-node-exec",
user_id="test-user",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
outputs[name] = value
# Should have finished successfully after retry
assert "finished" in outputs
# Note: In traditional mode (agent_mode_max_iterations=0),
# conversations are only output when there are tool calls
# After the retry succeeds with no tool calls, we just get "finished"

View File

@@ -0,0 +1,671 @@
"""
Tests for SmartDecisionMaker data integrity failure modes.
Covers failure modes:
6. Conversation Corruption in Error Paths
7. Field Name Collision Not Detected
8. No Type Validation in Dynamic Field Merging
9. Unhandled Field Mapping Keys
16. Silent Value Loss in Output Routing
"""
import json
from typing import Any
from unittest.mock import AsyncMock, MagicMock, Mock, patch
import pytest
from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
class TestFieldNameCollisionDetection:
"""
Tests for Failure Mode #7: Field Name Collision Not Detected
When multiple field names sanitize to the same value,
the last one silently overwrites previous mappings.
"""
def test_different_names_same_sanitized_result(self):
"""Test that different names can produce the same sanitized result."""
cleanup = SmartDecisionMakerBlock.cleanup
# All these sanitize to "test_field"
variants = [
"test_field",
"Test Field",
"test field",
"TEST_FIELD",
"Test_Field",
"test-field", # Note: hyphen is preserved, this is different
]
sanitized = [cleanup(v) for v in variants]
# Count unique sanitized values
unique = set(sanitized)
# Most should collide (except hyphenated one)
assert len(unique) < len(variants), \
f"Expected collisions, got {unique}"
@pytest.mark.asyncio
async def test_collision_last_one_wins(self):
"""Test that in case of collision, the last field mapping wins."""
block = SmartDecisionMakerBlock()
mock_node = Mock()
mock_node.id = "test-node"
mock_node.block = Mock()
mock_node.block.name = "TestBlock"
mock_node.block.description = "Test"
mock_node.block.input_schema = Mock()
mock_node.block.input_schema.jsonschema = Mock(
return_value={"properties": {}, "required": []}
)
mock_node.block.input_schema.get_field_schema = Mock(
return_value={"type": "string", "description": "test"}
)
# Two fields that sanitize to the same name
mock_links = [
Mock(sink_name="Test Field", sink_id="test-node", source_id="source"),
Mock(sink_name="test field", sink_id="test-node", source_id="source"),
]
signature = await block._create_block_function_signature(mock_node, mock_links)
field_mapping = signature["function"]["_field_mapping"]
properties = signature["function"]["parameters"]["properties"]
# Only one property (collision)
assert len(properties) == 1
assert "test_field" in properties
# The mapping has only the last one
# This is the BUG: first field's mapping is lost
assert field_mapping["test_field"] in ["Test Field", "test field"]
@pytest.mark.asyncio
async def test_collision_causes_data_loss(self):
"""
Test that field collision can cause actual data loss.
Scenario:
1. Two fields "Field A" and "field a" both map to "field_a"
2. LLM provides value for "field_a"
3. Only one original field gets the value
4. The other field's expected input is lost
"""
block = SmartDecisionMakerBlock()
# Simulate processing tool calls with collision
mock_response = Mock()
mock_tool_call = Mock()
mock_tool_call.function.name = "test_tool"
mock_tool_call.function.arguments = json.dumps({
"field_a": "value_for_both" # LLM uses sanitized name
})
mock_response.tool_calls = [mock_tool_call]
# Tool definition with collision in field mapping
tool_functions = [
{
"type": "function",
"function": {
"name": "test_tool",
"parameters": {
"properties": {
"field_a": {"type": "string"},
},
"required": ["field_a"],
},
"_sink_node_id": "sink",
# BUG: Only one original name is stored
# "Field A" was overwritten by "field a"
"_field_mapping": {"field_a": "field a"},
},
}
]
processed = block._process_tool_calls(mock_response, tool_functions)
assert len(processed) == 1
input_data = processed[0].input_data
# Only "field a" gets the value
assert "field a" in input_data
assert input_data["field a"] == "value_for_both"
# "Field A" is completely lost!
assert "Field A" not in input_data
class TestUnhandledFieldMappingKeys:
"""
Tests for Failure Mode #9: Unhandled Field Mapping Keys
When field_mapping is missing a key, the code falls back to
the clean name, which may not be what the sink expects.
"""
@pytest.mark.asyncio
async def test_missing_field_mapping_falls_back_to_clean_name(self):
"""Test that missing field mapping falls back to clean name."""
block = SmartDecisionMakerBlock()
mock_response = Mock()
mock_tool_call = Mock()
mock_tool_call.function.name = "test_tool"
mock_tool_call.function.arguments = json.dumps({
"unmapped_field": "value"
})
mock_response.tool_calls = [mock_tool_call]
# Tool definition with incomplete field mapping
tool_functions = [
{
"type": "function",
"function": {
"name": "test_tool",
"parameters": {
"properties": {
"unmapped_field": {"type": "string"},
},
"required": [],
},
"_sink_node_id": "sink",
"_field_mapping": {}, # Empty! No mapping for unmapped_field
},
}
]
processed = block._process_tool_calls(mock_response, tool_functions)
assert len(processed) == 1
input_data = processed[0].input_data
# Falls back to clean name (which IS the key since it's already clean)
assert "unmapped_field" in input_data
@pytest.mark.asyncio
async def test_partial_field_mapping(self):
"""Test behavior with partial field mapping."""
block = SmartDecisionMakerBlock()
mock_response = Mock()
mock_tool_call = Mock()
mock_tool_call.function.name = "test_tool"
mock_tool_call.function.arguments = json.dumps({
"mapped_field": "value1",
"unmapped_field": "value2",
})
mock_response.tool_calls = [mock_tool_call]
tool_functions = [
{
"type": "function",
"function": {
"name": "test_tool",
"parameters": {
"properties": {
"mapped_field": {"type": "string"},
"unmapped_field": {"type": "string"},
},
"required": [],
},
"_sink_node_id": "sink",
# Only one field is mapped
"_field_mapping": {
"mapped_field": "Original Mapped Field",
},
},
}
]
processed = block._process_tool_calls(mock_response, tool_functions)
assert len(processed) == 1
input_data = processed[0].input_data
# Mapped field uses original name
assert "Original Mapped Field" in input_data
# Unmapped field uses clean name (fallback)
assert "unmapped_field" in input_data
class TestSilentValueLossInRouting:
"""
Tests for Failure Mode #16: Silent Value Loss in Output Routing
When routing fails in parse_execution_output, it returns None
without any logging or indication of why it failed.
"""
def test_routing_mismatch_returns_none_silently(self):
"""Test that routing mismatch returns None without error."""
from backend.data.dynamic_fields import parse_execution_output
output_item = ("tools_^_node-123_~_sanitized_name", "important_value")
result = parse_execution_output(
output_item,
link_output_selector="tools",
sink_node_id="node-123",
sink_pin_name="Original Name", # Doesn't match sanitized_name
)
# Silently returns None
assert result is None
# No way to distinguish "value is None" from "routing failed"
def test_wrong_node_id_returns_none(self):
"""Test that wrong node ID returns None."""
from backend.data.dynamic_fields import parse_execution_output
output_item = ("tools_^_node-123_~_field", "value")
result = parse_execution_output(
output_item,
link_output_selector="tools",
sink_node_id="different-node", # Wrong node
sink_pin_name="field",
)
assert result is None
def test_wrong_selector_returns_none(self):
"""Test that wrong selector returns None."""
from backend.data.dynamic_fields import parse_execution_output
output_item = ("tools_^_node-123_~_field", "value")
result = parse_execution_output(
output_item,
link_output_selector="different_selector", # Wrong selector
sink_node_id="node-123",
sink_pin_name="field",
)
assert result is None
def test_cannot_distinguish_none_value_from_routing_failure(self):
"""
Test that None as actual value is indistinguishable from routing failure.
"""
from backend.data.dynamic_fields import parse_execution_output
# Case 1: Actual None value
output_with_none = ("field_name", None)
result1 = parse_execution_output(
output_with_none,
link_output_selector="field_name",
sink_node_id=None,
sink_pin_name=None,
)
# Case 2: Routing failure
output_mismatched = ("field_name", "value")
result2 = parse_execution_output(
output_mismatched,
link_output_selector="different_field",
sink_node_id=None,
sink_pin_name=None,
)
# Both return None - cannot distinguish!
assert result1 is None
assert result2 is None
class TestProcessToolCallsInputData:
"""Tests for _process_tool_calls input data generation."""
@pytest.mark.asyncio
async def test_all_expected_args_included(self):
"""Test that all expected arguments are included in input_data."""
block = SmartDecisionMakerBlock()
mock_response = Mock()
mock_tool_call = Mock()
mock_tool_call.function.name = "test_tool"
mock_tool_call.function.arguments = json.dumps({
"provided_field": "value",
# optional_field not provided
})
mock_response.tool_calls = [mock_tool_call]
tool_functions = [
{
"type": "function",
"function": {
"name": "test_tool",
"parameters": {
"properties": {
"provided_field": {"type": "string"},
"optional_field": {"type": "string"},
},
"required": ["provided_field"],
},
"_sink_node_id": "sink",
"_field_mapping": {
"provided_field": "Provided Field",
"optional_field": "Optional Field",
},
},
}
]
processed = block._process_tool_calls(mock_response, tool_functions)
assert len(processed) == 1
input_data = processed[0].input_data
# Both fields should be in input_data
assert "Provided Field" in input_data
assert "Optional Field" in input_data
# Provided has value, optional is None
assert input_data["Provided Field"] == "value"
assert input_data["Optional Field"] is None
@pytest.mark.asyncio
async def test_extra_args_from_llm_ignored(self):
"""Test that extra arguments from LLM not in schema are ignored."""
block = SmartDecisionMakerBlock()
mock_response = Mock()
mock_tool_call = Mock()
mock_tool_call.function.name = "test_tool"
mock_tool_call.function.arguments = json.dumps({
"expected_field": "value",
"unexpected_field": "should_be_ignored",
})
mock_response.tool_calls = [mock_tool_call]
tool_functions = [
{
"type": "function",
"function": {
"name": "test_tool",
"parameters": {
"properties": {
"expected_field": {"type": "string"},
# unexpected_field not in schema
},
"required": [],
},
"_sink_node_id": "sink",
"_field_mapping": {"expected_field": "Expected Field"},
},
}
]
processed = block._process_tool_calls(mock_response, tool_functions)
assert len(processed) == 1
input_data = processed[0].input_data
# Only expected field should be in input_data
assert "Expected Field" in input_data
assert "unexpected_field" not in input_data
assert "Unexpected Field" not in input_data
class TestToolCallMatching:
"""Tests for tool call matching logic."""
@pytest.mark.asyncio
async def test_tool_not_found_skipped(self):
"""Test that tool calls for unknown tools are skipped."""
block = SmartDecisionMakerBlock()
mock_response = Mock()
mock_tool_call = Mock()
mock_tool_call.function.name = "unknown_tool"
mock_tool_call.function.arguments = json.dumps({})
mock_response.tool_calls = [mock_tool_call]
tool_functions = [
{
"type": "function",
"function": {
"name": "known_tool", # Different name
"parameters": {"properties": {}, "required": []},
"_sink_node_id": "sink",
},
}
]
processed = block._process_tool_calls(mock_response, tool_functions)
# Unknown tool is skipped (not processed)
assert len(processed) == 0
@pytest.mark.asyncio
async def test_single_tool_fallback(self):
"""Test fallback when only one tool exists but name doesn't match."""
block = SmartDecisionMakerBlock()
mock_response = Mock()
mock_tool_call = Mock()
mock_tool_call.function.name = "wrong_name"
mock_tool_call.function.arguments = json.dumps({"field": "value"})
mock_response.tool_calls = [mock_tool_call]
# Only one tool defined
tool_functions = [
{
"type": "function",
"function": {
"name": "only_tool",
"parameters": {
"properties": {"field": {"type": "string"}},
"required": [],
},
"_sink_node_id": "sink",
"_field_mapping": {"field": "Field"},
},
}
]
processed = block._process_tool_calls(mock_response, tool_functions)
# Falls back to the only tool
assert len(processed) == 1
assert processed[0].input_data["Field"] == "value"
@pytest.mark.asyncio
async def test_multiple_tool_calls_processed(self):
"""Test that multiple tool calls are all processed."""
block = SmartDecisionMakerBlock()
mock_response = Mock()
mock_tool_call_1 = Mock()
mock_tool_call_1.function.name = "tool_a"
mock_tool_call_1.function.arguments = json.dumps({"a": "1"})
mock_tool_call_2 = Mock()
mock_tool_call_2.function.name = "tool_b"
mock_tool_call_2.function.arguments = json.dumps({"b": "2"})
mock_response.tool_calls = [mock_tool_call_1, mock_tool_call_2]
tool_functions = [
{
"type": "function",
"function": {
"name": "tool_a",
"parameters": {
"properties": {"a": {"type": "string"}},
"required": [],
},
"_sink_node_id": "sink_a",
"_field_mapping": {"a": "A"},
},
},
{
"type": "function",
"function": {
"name": "tool_b",
"parameters": {
"properties": {"b": {"type": "string"}},
"required": [],
},
"_sink_node_id": "sink_b",
"_field_mapping": {"b": "B"},
},
},
]
processed = block._process_tool_calls(mock_response, tool_functions)
assert len(processed) == 2
assert processed[0].input_data["A"] == "1"
assert processed[1].input_data["B"] == "2"
class TestOutputEmitKeyGeneration:
"""Tests for output emit key generation consistency."""
def test_emit_key_uses_sanitized_field_name(self):
"""Test that emit keys use sanitized field names."""
cleanup = SmartDecisionMakerBlock.cleanup
original_field = "Max Keyword Difficulty"
sink_node_id = "node-123"
sanitized = cleanup(original_field)
emit_key = f"tools_^_{sink_node_id}_~_{sanitized}"
assert emit_key == "tools_^_node-123_~_max_keyword_difficulty"
def test_emit_key_format_consistent(self):
"""Test that emit key format is consistent."""
test_cases = [
("field", "node", "tools_^_node_~_field"),
("Field Name", "node-123", "tools_^_node-123_~_field_name"),
("CPC ($)", "abc", "tools_^_abc_~_cpc____"),
]
cleanup = SmartDecisionMakerBlock.cleanup
for original_field, node_id, expected in test_cases:
sanitized = cleanup(original_field)
emit_key = f"tools_^_{node_id}_~_{sanitized}"
assert emit_key == expected, \
f"Expected {expected}, got {emit_key}"
def test_emit_key_sanitization_idempotent(self):
"""Test that sanitizing an already sanitized name gives same result."""
cleanup = SmartDecisionMakerBlock.cleanup
original = "Test Field Name"
first_clean = cleanup(original)
second_clean = cleanup(first_clean)
assert first_clean == second_clean
class TestToolFunctionMetadata:
"""Tests for tool function metadata handling."""
@pytest.mark.asyncio
async def test_sink_node_id_preserved(self):
"""Test that _sink_node_id is preserved in tool function."""
block = SmartDecisionMakerBlock()
mock_node = Mock()
mock_node.id = "specific-node-id"
mock_node.block = Mock()
mock_node.block.name = "TestBlock"
mock_node.block.description = "Test"
mock_node.block.input_schema = Mock()
mock_node.block.input_schema.jsonschema = Mock(
return_value={"properties": {}, "required": []}
)
mock_node.block.input_schema.get_field_schema = Mock(
return_value={"type": "string", "description": "test"}
)
mock_links = [
Mock(sink_name="field", sink_id="specific-node-id", source_id="source"),
]
signature = await block._create_block_function_signature(mock_node, mock_links)
assert signature["function"]["_sink_node_id"] == "specific-node-id"
@pytest.mark.asyncio
async def test_field_mapping_preserved(self):
"""Test that _field_mapping is preserved in tool function."""
block = SmartDecisionMakerBlock()
mock_node = Mock()
mock_node.id = "test-node"
mock_node.block = Mock()
mock_node.block.name = "TestBlock"
mock_node.block.description = "Test"
mock_node.block.input_schema = Mock()
mock_node.block.input_schema.jsonschema = Mock(
return_value={"properties": {}, "required": []}
)
mock_node.block.input_schema.get_field_schema = Mock(
return_value={"type": "string", "description": "test"}
)
mock_links = [
Mock(sink_name="Original Field Name", sink_id="test-node", source_id="source"),
]
signature = await block._create_block_function_signature(mock_node, mock_links)
field_mapping = signature["function"]["_field_mapping"]
assert "original_field_name" in field_mapping
assert field_mapping["original_field_name"] == "Original Field Name"
class TestRequiredFieldsHandling:
"""Tests for required fields handling."""
@pytest.mark.asyncio
async def test_required_fields_use_sanitized_names(self):
"""Test that required fields array uses sanitized names."""
block = SmartDecisionMakerBlock()
mock_node = Mock()
mock_node.id = "test-node"
mock_node.block = Mock()
mock_node.block.name = "TestBlock"
mock_node.block.description = "Test"
mock_node.block.input_schema = Mock()
mock_node.block.input_schema.jsonschema = Mock(
return_value={
"properties": {},
"required": ["Required Field", "Another Required"],
}
)
mock_node.block.input_schema.get_field_schema = Mock(
return_value={"type": "string", "description": "test"}
)
mock_links = [
Mock(sink_name="Required Field", sink_id="test-node", source_id="source"),
Mock(sink_name="Another Required", sink_id="test-node", source_id="source"),
Mock(sink_name="Optional Field", sink_id="test-node", source_id="source"),
]
signature = await block._create_block_function_signature(mock_node, mock_links)
required = signature["function"]["parameters"]["required"]
# Should use sanitized names
assert "required_field" in required
assert "another_required" in required
# Original names should NOT be in required
assert "Required Field" not in required
assert "Another Required" not in required
# Optional field should not be required
assert "optional_field" not in required
assert "Optional Field" not in required

View File

@@ -0,0 +1,871 @@
"""
Tests for SmartDecisionMaker error handling failure modes.
Covers failure modes:
3. JSON Deserialization Without Exception Handling
4. Database Transaction Inconsistency
5. Missing Null Checks After Database Calls
15. Error Message Context Loss
17. No Validation of Dynamic Field Paths
"""
import json
from typing import Any
from unittest.mock import AsyncMock, MagicMock, Mock, patch
import pytest
from backend.blocks.smart_decision_maker import (
SmartDecisionMakerBlock,
_convert_raw_response_to_dict,
_create_tool_response,
)
class TestJSONDeserializationErrors:
"""
Tests for Failure Mode #3: JSON Deserialization Without Exception Handling
When LLM returns malformed JSON in tool call arguments, the json.loads()
call fails without proper error handling.
"""
def test_malformed_json_single_quotes(self):
"""
Test that single quotes in JSON cause parsing failure.
LLMs sometimes return {'key': 'value'} instead of {"key": "value"}
"""
malformed = "{'key': 'value'}"
with pytest.raises(json.JSONDecodeError):
json.loads(malformed)
def test_malformed_json_trailing_comma(self):
"""
Test that trailing commas cause parsing failure.
"""
malformed = '{"key": "value",}'
with pytest.raises(json.JSONDecodeError):
json.loads(malformed)
def test_malformed_json_unquoted_keys(self):
"""
Test that unquoted keys cause parsing failure.
"""
malformed = '{key: "value"}'
with pytest.raises(json.JSONDecodeError):
json.loads(malformed)
def test_malformed_json_python_none(self):
"""
Test that Python None instead of null causes failure.
"""
malformed = '{"key": None}'
with pytest.raises(json.JSONDecodeError):
json.loads(malformed)
def test_malformed_json_python_true_false(self):
"""
Test that Python True/False instead of true/false causes failure.
"""
malformed_true = '{"key": True}'
malformed_false = '{"key": False}'
with pytest.raises(json.JSONDecodeError):
json.loads(malformed_true)
with pytest.raises(json.JSONDecodeError):
json.loads(malformed_false)
@pytest.mark.asyncio
async def test_llm_returns_malformed_json_crashes_block(self):
"""
Test that malformed JSON from LLM causes block to crash.
BUG: The json.loads() at line 625, 706, 1124 can throw JSONDecodeError
which is not caught, causing the entire block to fail.
"""
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
# Create response with malformed JSON
mock_tool_call = MagicMock()
mock_tool_call.function.name = "test_tool"
mock_tool_call.function.arguments = "{'malformed': 'json'}" # Single quotes!
mock_response = MagicMock()
mock_response.response = None
mock_response.tool_calls = [mock_tool_call]
mock_response.prompt_tokens = 50
mock_response.completion_tokens = 25
mock_response.reasoning = None
mock_response.raw_response = {"role": "assistant", "content": None}
mock_tool_signatures = [
{
"type": "function",
"function": {
"name": "test_tool",
"_sink_node_id": "sink",
"_field_mapping": {},
"parameters": {"properties": {"malformed": {"type": "string"}}, "required": []},
},
}
]
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm:
mock_llm.return_value = mock_response
with patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures):
input_data = SmartDecisionMakerBlock.Input(
prompt="Test",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=0,
)
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = MagicMock()
# BUG: This should raise JSONDecodeError
with pytest.raises(json.JSONDecodeError):
async for _ in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph",
node_id="test-node",
graph_exec_id="test-exec",
node_exec_id="test-node-exec",
user_id="test-user",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
pass
class TestDatabaseTransactionInconsistency:
"""
Tests for Failure Mode #4: Database Transaction Inconsistency
When multiple database operations are performed in sequence,
a failure partway through leaves the database in an inconsistent state.
"""
@pytest.mark.asyncio
async def test_partial_input_insertion_on_failure(self):
"""
Test that partial failures during multi-input insertion
leave database in inconsistent state.
"""
import threading
from collections import defaultdict
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
# Track which inputs were inserted
inserted_inputs = []
call_count = 0
async def failing_upsert(node_id, graph_exec_id, input_name, input_data):
nonlocal call_count
call_count += 1
# Fail on the third input
if call_count == 3:
raise Exception("Database connection lost!")
inserted_inputs.append(input_name)
mock_result = MagicMock()
mock_result.node_exec_id = "exec-id"
return mock_result, {input_name: input_data}
mock_tool_call = MagicMock()
mock_tool_call.id = "call_1"
mock_tool_call.function.name = "multi_input_tool"
mock_tool_call.function.arguments = json.dumps({
"input1": "value1",
"input2": "value2",
"input3": "value3", # This one will fail
"input4": "value4",
"input5": "value5",
})
mock_response = MagicMock()
mock_response.response = None
mock_response.tool_calls = [mock_tool_call]
mock_response.prompt_tokens = 50
mock_response.completion_tokens = 25
mock_response.reasoning = None
mock_response.raw_response = {
"role": "assistant",
"content": [{"type": "tool_use", "id": "call_1"}]
}
mock_tool_signatures = [
{
"type": "function",
"function": {
"name": "multi_input_tool",
"_sink_node_id": "sink",
"_field_mapping": {
"input1": "input1",
"input2": "input2",
"input3": "input3",
"input4": "input4",
"input5": "input5",
},
"parameters": {
"properties": {
"input1": {"type": "string"},
"input2": {"type": "string"},
"input3": {"type": "string"},
"input4": {"type": "string"},
"input5": {"type": "string"},
},
"required": ["input1", "input2", "input3", "input4", "input5"],
},
},
}
]
mock_db_client = AsyncMock()
mock_node = MagicMock()
mock_node.block_id = "test-block"
mock_db_client.get_node.return_value = mock_node
mock_db_client.upsert_execution_input.side_effect = failing_upsert
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm, \
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
mock_llm.return_value = mock_response
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = AsyncMock()
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
mock_execution_processor.execution_stats = MagicMock()
mock_execution_processor.execution_stats_lock = threading.Lock()
input_data = SmartDecisionMakerBlock.Input(
prompt="Test",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=1,
)
# The block should fail, but some inputs were already inserted
outputs = {}
try:
async for name, value in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph",
node_id="test-node",
graph_exec_id="test-exec",
node_exec_id="test-node-exec",
user_id="test-user",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
outputs[name] = value
except Exception:
pass # Expected
# BUG: Some inputs were inserted before failure
# Database is now in inconsistent state
assert len(inserted_inputs) == 2, \
f"Expected 2 inserted before failure, got {inserted_inputs}"
assert "input1" in inserted_inputs
assert "input2" in inserted_inputs
# input3, input4, input5 were never inserted
class TestMissingNullChecks:
"""
Tests for Failure Mode #5: Missing Null Checks After Database Calls
"""
@pytest.mark.asyncio
async def test_get_node_returns_none(self):
"""
Test handling when get_node returns None.
"""
import threading
from collections import defaultdict
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
mock_tool_call = MagicMock()
mock_tool_call.id = "call_1"
mock_tool_call.function.name = "test_tool"
mock_tool_call.function.arguments = json.dumps({"param": "value"})
mock_response = MagicMock()
mock_response.response = None
mock_response.tool_calls = [mock_tool_call]
mock_response.prompt_tokens = 50
mock_response.completion_tokens = 25
mock_response.reasoning = None
mock_response.raw_response = {
"role": "assistant",
"content": [{"type": "tool_use", "id": "call_1"}]
}
mock_tool_signatures = [
{
"type": "function",
"function": {
"name": "test_tool",
"_sink_node_id": "nonexistent-node",
"_field_mapping": {"param": "param"},
"parameters": {
"properties": {"param": {"type": "string"}},
"required": ["param"],
},
},
}
]
mock_db_client = AsyncMock()
mock_db_client.get_node.return_value = None # Node doesn't exist!
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm, \
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
mock_llm.return_value = mock_response
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = AsyncMock()
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
mock_execution_processor.execution_stats = MagicMock()
mock_execution_processor.execution_stats_lock = threading.Lock()
input_data = SmartDecisionMakerBlock.Input(
prompt="Test",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=1,
)
# Should raise ValueError for missing node
with pytest.raises(ValueError, match="not found"):
async for _ in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph",
node_id="test-node",
graph_exec_id="test-exec",
node_exec_id="test-node-exec",
user_id="test-user",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
pass
@pytest.mark.asyncio
async def test_empty_execution_outputs(self):
"""
Test handling when get_execution_outputs_by_node_exec_id returns empty.
"""
import threading
from collections import defaultdict
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
call_count = 0
async def mock_llm_call(**kwargs):
nonlocal call_count
call_count += 1
if call_count > 1:
resp = MagicMock()
resp.response = "Done"
resp.tool_calls = []
resp.prompt_tokens = 10
resp.completion_tokens = 5
resp.reasoning = None
resp.raw_response = {"role": "assistant", "content": "Done"}
return resp
mock_tool_call = MagicMock()
mock_tool_call.id = "call_1"
mock_tool_call.function.name = "test_tool"
mock_tool_call.function.arguments = json.dumps({})
resp = MagicMock()
resp.response = None
resp.tool_calls = [mock_tool_call]
resp.prompt_tokens = 50
resp.completion_tokens = 25
resp.reasoning = None
resp.raw_response = {
"role": "assistant",
"content": [{"type": "tool_use", "id": "call_1"}]
}
return resp
mock_tool_signatures = [
{
"type": "function",
"function": {
"name": "test_tool",
"_sink_node_id": "sink",
"_field_mapping": {},
"parameters": {"properties": {}, "required": []},
},
}
]
mock_db_client = AsyncMock()
mock_node = MagicMock()
mock_node.block_id = "test-block"
mock_db_client.get_node.return_value = mock_node
mock_exec_result = MagicMock()
mock_exec_result.node_exec_id = "exec-id"
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {})
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {} # Empty!
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = AsyncMock()
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
mock_execution_processor.execution_stats = MagicMock()
mock_execution_processor.execution_stats_lock = threading.Lock()
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
input_data = SmartDecisionMakerBlock.Input(
prompt="Test",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=2,
)
outputs = {}
async for name, value in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph",
node_id="test-node",
graph_exec_id="test-exec",
node_exec_id="test-node-exec",
user_id="test-user",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
outputs[name] = value
# Empty outputs should be handled gracefully
# (uses "Tool executed successfully" as fallback)
assert "finished" in outputs or "conversations" in outputs
class TestErrorMessageContextLoss:
"""
Tests for Failure Mode #15: Error Message Context Loss
When exceptions are caught and converted to strings, important
debugging information is lost.
"""
def test_exception_to_string_loses_traceback(self):
"""
Test that converting exception to string loses traceback.
"""
try:
def inner():
raise ValueError("Inner error")
def outer():
inner()
outer()
except Exception as e:
error_string = str(e)
error_repr = repr(e)
# String representation loses call stack
assert "inner" not in error_string
assert "outer" not in error_string
# Even repr doesn't have full traceback
assert "Traceback" not in error_repr
def test_tool_response_loses_exception_type(self):
"""
Test that _create_tool_response loses exception type information.
"""
original_error = ConnectionError("Database unreachable")
tool_response = _create_tool_response(
"call_123",
f"Tool execution failed: {str(original_error)}"
)
content = tool_response.get("content", "")
# Original exception type is lost
assert "ConnectionError" not in content
# Only the message remains
assert "Database unreachable" in content
@pytest.mark.asyncio
async def test_agent_mode_error_response_lacks_context(self):
"""
Test that agent mode error responses lack debugging context.
"""
import threading
from collections import defaultdict
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
mock_tool_call = MagicMock()
mock_tool_call.id = "call_1"
mock_tool_call.function.name = "test_tool"
mock_tool_call.function.arguments = json.dumps({})
mock_response_1 = MagicMock()
mock_response_1.response = None
mock_response_1.tool_calls = [mock_tool_call]
mock_response_1.prompt_tokens = 50
mock_response_1.completion_tokens = 25
mock_response_1.reasoning = None
mock_response_1.raw_response = {
"role": "assistant",
"content": [{"type": "tool_use", "id": "call_1"}]
}
mock_response_2 = MagicMock()
mock_response_2.response = "Handled the error"
mock_response_2.tool_calls = []
mock_response_2.prompt_tokens = 30
mock_response_2.completion_tokens = 15
mock_response_2.reasoning = None
mock_response_2.raw_response = {"role": "assistant", "content": "Handled"}
call_count = 0
async def mock_llm_call(**kwargs):
nonlocal call_count
call_count += 1
if call_count == 1:
return mock_response_1
return mock_response_2
mock_tool_signatures = [
{
"type": "function",
"function": {
"name": "test_tool",
"_sink_node_id": "sink",
"_field_mapping": {},
"parameters": {"properties": {}, "required": []},
},
}
]
# Create a complex error with nested cause
class CustomDatabaseError(Exception):
pass
def create_complex_error():
try:
raise ConnectionError("Network timeout after 30s")
except ConnectionError as e:
raise CustomDatabaseError("Failed to connect to database") from e
mock_db_client = AsyncMock()
mock_node = MagicMock()
mock_node.block_id = "test-block"
mock_db_client.get_node.return_value = mock_node
# Make upsert raise the complex error
try:
create_complex_error()
except CustomDatabaseError as e:
mock_db_client.upsert_execution_input.side_effect = e
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = AsyncMock()
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
mock_execution_processor.execution_stats = MagicMock()
mock_execution_processor.execution_stats_lock = threading.Lock()
input_data = SmartDecisionMakerBlock.Input(
prompt="Test",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=2,
)
outputs = {}
async for name, value in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph",
node_id="test-node",
graph_exec_id="test-exec",
node_exec_id="test-node-exec",
user_id="test-user",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
outputs[name] = value
# Check conversation for error details
conversations = outputs.get("conversations", [])
error_found = False
for msg in conversations:
content = msg.get("content", "")
if isinstance(content, list):
for item in content:
if item.get("type") == "tool_result":
result_content = item.get("content", "")
if "Error" in result_content or "failed" in result_content.lower():
error_found = True
# BUG: The error content lacks:
# - Exception type (CustomDatabaseError)
# - Chained cause (ConnectionError)
# - Stack trace
assert "CustomDatabaseError" not in result_content
assert "ConnectionError" not in result_content
# Note: error_found may be False if the error prevented tool response creation
class TestRawResponseConversion:
"""Tests for _convert_raw_response_to_dict edge cases."""
def test_string_response_converted(self):
"""Test that string responses are properly wrapped."""
result = _convert_raw_response_to_dict("Hello, world!")
assert result == {"role": "assistant", "content": "Hello, world!"}
def test_dict_response_unchanged(self):
"""Test that dict responses are passed through."""
original = {"role": "assistant", "content": "test", "extra": "field"}
result = _convert_raw_response_to_dict(original)
assert result == original
def test_object_response_converted(self):
"""Test that objects are converted using json.to_dict."""
mock_obj = MagicMock()
with patch("backend.blocks.smart_decision_maker.json.to_dict") as mock_to_dict:
mock_to_dict.return_value = {"converted": True}
result = _convert_raw_response_to_dict(mock_obj)
mock_to_dict.assert_called_once_with(mock_obj)
assert result == {"converted": True}
def test_none_response(self):
"""Test handling of None response."""
with patch("backend.blocks.smart_decision_maker.json.to_dict") as mock_to_dict:
mock_to_dict.return_value = None
result = _convert_raw_response_to_dict(None)
# None is not a string or dict, so it goes through to_dict
assert result is None
class TestValidationRetryMechanism:
"""Tests for the validation and retry mechanism."""
@pytest.mark.asyncio
async def test_validation_error_triggers_retry(self):
"""
Test that validation errors trigger retry with feedback.
"""
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
call_count = 0
async def mock_llm_call(**kwargs):
nonlocal call_count
call_count += 1
prompt = kwargs.get("prompt", [])
if call_count == 1:
# First call: return tool call with wrong parameter
mock_tool_call = MagicMock()
mock_tool_call.function.name = "test_tool"
mock_tool_call.function.arguments = json.dumps({"wrong_param": "value"})
resp = MagicMock()
resp.response = None
resp.tool_calls = [mock_tool_call]
resp.prompt_tokens = 50
resp.completion_tokens = 25
resp.reasoning = None
resp.raw_response = {"role": "assistant", "content": None}
return resp
else:
# Second call: check that error feedback was added
has_error_feedback = any(
"parameter errors" in str(msg.get("content", "")).lower()
for msg in prompt
)
# Return correct tool call
mock_tool_call = MagicMock()
mock_tool_call.function.name = "test_tool"
mock_tool_call.function.arguments = json.dumps({"correct_param": "value"})
resp = MagicMock()
resp.response = None
resp.tool_calls = [mock_tool_call]
resp.prompt_tokens = 50
resp.completion_tokens = 25
resp.reasoning = None
resp.raw_response = {"role": "assistant", "content": None}
return resp
mock_tool_signatures = [
{
"type": "function",
"function": {
"name": "test_tool",
"_sink_node_id": "sink",
"_field_mapping": {"correct_param": "correct_param"},
"parameters": {
"properties": {"correct_param": {"type": "string"}},
"required": ["correct_param"],
},
},
}
]
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures):
input_data = SmartDecisionMakerBlock.Input(
prompt="Test",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=0, # Traditional mode
retry=3,
)
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = MagicMock()
outputs = {}
async for name, value in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph",
node_id="test-node",
graph_exec_id="test-exec",
node_exec_id="test-node-exec",
user_id="test-user",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
outputs[name] = value
# Should have made multiple calls due to retry
assert call_count >= 2
@pytest.mark.asyncio
async def test_max_retries_exceeded(self):
"""
Test behavior when max retries are exceeded.
"""
import backend.blocks.llm as llm_module
from backend.data.execution import ExecutionContext
block = SmartDecisionMakerBlock()
async def mock_llm_call(**kwargs):
# Always return invalid tool call
mock_tool_call = MagicMock()
mock_tool_call.function.name = "test_tool"
mock_tool_call.function.arguments = json.dumps({"wrong": "param"})
resp = MagicMock()
resp.response = None
resp.tool_calls = [mock_tool_call]
resp.prompt_tokens = 50
resp.completion_tokens = 25
resp.reasoning = None
resp.raw_response = {"role": "assistant", "content": None}
return resp
mock_tool_signatures = [
{
"type": "function",
"function": {
"name": "test_tool",
"_sink_node_id": "sink",
"_field_mapping": {"correct": "correct"},
"parameters": {
"properties": {"correct": {"type": "string"}},
"required": ["correct"],
},
},
}
]
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures):
input_data = SmartDecisionMakerBlock.Input(
prompt="Test",
model=llm_module.DEFAULT_LLM_MODEL,
credentials=llm_module.TEST_CREDENTIALS_INPUT,
agent_mode_max_iterations=0,
retry=2, # Only 2 retries
)
mock_execution_context = ExecutionContext(safe_mode=False)
mock_execution_processor = MagicMock()
# Should raise ValueError after max retries
with pytest.raises(ValueError, match="parameter errors"):
async for _ in block.run(
input_data,
credentials=llm_module.TEST_CREDENTIALS,
graph_id="test-graph",
node_id="test-node",
graph_exec_id="test-exec",
node_exec_id="test-node-exec",
user_id="test-user",
graph_version=1,
execution_context=mock_execution_context,
execution_processor=mock_execution_processor,
):
pass

View File

@@ -0,0 +1,513 @@
"""
Tests for dynamic fields edge cases and failure modes.
Covers failure modes:
8. No Type Validation in Dynamic Field Merging
17. No Validation of Dynamic Field Paths
"""
from typing import Any
import pytest
from backend.data.dynamic_fields import (
DICT_SPLIT,
LIST_SPLIT,
OBJC_SPLIT,
extract_base_field_name,
get_dynamic_field_description,
is_dynamic_field,
is_tool_pin,
merge_execution_input,
parse_execution_output,
sanitize_pin_name,
)
class TestDynamicFieldMergingTypeValidation:
"""
Tests for Failure Mode #8: No Type Validation in Dynamic Field Merging
When merging dynamic fields, there's no validation that intermediate
structures have the correct type, leading to potential type coercion errors.
"""
def test_merge_dict_field_creates_dict(self):
"""Test that dictionary fields create dict structure."""
data = {
"values_#_name": "Alice",
"values_#_age": 30,
}
result = merge_execution_input(data)
assert "values" in result
assert isinstance(result["values"], dict)
assert result["values"]["name"] == "Alice"
assert result["values"]["age"] == 30
def test_merge_list_field_creates_list(self):
"""Test that list fields create list structure."""
data = {
"items_$_0": "first",
"items_$_1": "second",
"items_$_2": "third",
}
result = merge_execution_input(data)
assert "items" in result
assert isinstance(result["items"], list)
assert result["items"] == ["first", "second", "third"]
def test_merge_with_existing_primitive_type_conflict(self):
"""
Test behavior when merging into existing primitive value.
BUG: If the base field already exists as a primitive,
merging a dynamic field may fail or corrupt data.
"""
# Pre-existing primitive value
data = {
"value": "I am a string", # Primitive
"value_#_key": "dict value", # Dynamic dict field
}
# This may raise an error or produce unexpected results
# depending on merge order and implementation
try:
result = merge_execution_input(data)
# If it succeeds, check what happened
# The primitive may have been overwritten
if isinstance(result.get("value"), dict):
# Primitive was converted to dict - data loss!
assert "key" in result["value"]
else:
# Or the dynamic field was ignored
pass
except (TypeError, AttributeError):
# Expected error when trying to merge into primitive
pass
def test_merge_list_with_gaps(self):
"""Test merging list fields with non-contiguous indices."""
data = {
"items_$_0": "zero",
"items_$_2": "two", # Gap at index 1
"items_$_5": "five", # Larger gap
}
result = merge_execution_input(data)
assert "items" in result
# Check how gaps are handled
items = result["items"]
assert items[0] == "zero"
# Index 1 may be None or missing
assert items[2] == "two"
assert items[5] == "five"
def test_merge_nested_dynamic_fields(self):
"""Test merging deeply nested dynamic fields."""
data = {
"data_#_users_$_0": "user1",
"data_#_users_$_1": "user2",
"data_#_config_#_enabled": True,
}
result = merge_execution_input(data)
# Complex nested structures should be created
assert "data" in result
def test_merge_object_field(self):
"""Test merging object attribute fields."""
data = {
"user_@_name": "Alice",
"user_@_email": "alice@example.com",
}
result = merge_execution_input(data)
assert "user" in result
# Object fields create dict-like structure
assert result["user"]["name"] == "Alice"
assert result["user"]["email"] == "alice@example.com"
def test_merge_mixed_field_types(self):
"""Test merging mixed regular and dynamic fields."""
data = {
"regular": "value",
"dict_field_#_key": "dict_value",
"list_field_$_0": "list_item",
}
result = merge_execution_input(data)
assert result["regular"] == "value"
assert result["dict_field"]["key"] == "dict_value"
assert result["list_field"][0] == "list_item"
class TestDynamicFieldPathValidation:
"""
Tests for Failure Mode #17: No Validation of Dynamic Field Paths
When traversing dynamic field paths, intermediate None values
can cause TypeErrors instead of graceful failures.
"""
def test_parse_output_with_none_intermediate(self):
"""
Test parse_execution_output with None intermediate value.
If data contains {"items": None} and we try to access items[0],
it should return None gracefully, not raise TypeError.
"""
# Output with nested path
output_item = ("data_$_0", "value")
# When the base is None, should return None
# This tests the path traversal logic
result = parse_execution_output(
output_item,
link_output_selector="data",
sink_node_id=None,
sink_pin_name=None,
)
# Should handle gracefully (return the value or None)
# Not raise TypeError
def test_extract_base_field_name_with_multiple_delimiters(self):
"""Test extracting base name with multiple delimiters."""
# Multiple dict delimiters
assert extract_base_field_name("a_#_b_#_c") == "a"
# Multiple list delimiters
assert extract_base_field_name("a_$_0_$_1") == "a"
# Mixed delimiters
assert extract_base_field_name("a_#_b_$_0") == "a"
def test_is_dynamic_field_edge_cases(self):
"""Test is_dynamic_field with edge cases."""
# Standard dynamic fields
assert is_dynamic_field("values_#_key") is True
assert is_dynamic_field("items_$_0") is True
assert is_dynamic_field("obj_@_attr") is True
# Regular fields
assert is_dynamic_field("regular") is False
assert is_dynamic_field("with_underscore") is False
# Edge cases
assert is_dynamic_field("") is False
assert is_dynamic_field("_#_") is True # Just delimiter
assert is_dynamic_field("a_#_") is True # Trailing delimiter
def test_sanitize_pin_name_with_tool_pins(self):
"""Test sanitize_pin_name with various tool pin formats."""
# Tool pins should return "tools"
assert sanitize_pin_name("tools") == "tools"
assert sanitize_pin_name("tools_^_node_~_field") == "tools"
# Dynamic fields should return base name
assert sanitize_pin_name("values_#_key") == "values"
assert sanitize_pin_name("items_$_0") == "items"
# Regular fields unchanged
assert sanitize_pin_name("regular") == "regular"
class TestDynamicFieldDescriptions:
"""Tests for dynamic field description generation."""
def test_dict_field_description(self):
"""Test description for dictionary fields."""
desc = get_dynamic_field_description("values_#_user_name")
assert "Dictionary field" in desc
assert "values['user_name']" in desc
def test_list_field_description(self):
"""Test description for list fields."""
desc = get_dynamic_field_description("items_$_0")
assert "List item 0" in desc
assert "items[0]" in desc
def test_object_field_description(self):
"""Test description for object fields."""
desc = get_dynamic_field_description("user_@_email")
assert "Object attribute" in desc
assert "user.email" in desc
def test_regular_field_description(self):
"""Test description for regular (non-dynamic) fields."""
desc = get_dynamic_field_description("regular_field")
assert desc == "Value for regular_field"
def test_description_with_numeric_key(self):
"""Test description with numeric dictionary key."""
desc = get_dynamic_field_description("values_#_123")
assert "Dictionary field" in desc
assert "values['123']" in desc
class TestParseExecutionOutputToolRouting:
"""Tests for tool pin routing in parse_execution_output."""
def test_tool_pin_routing_exact_match(self):
"""Test tool pin routing with exact match."""
output_item = ("tools_^_node-123_~_field_name", "value")
result = parse_execution_output(
output_item,
link_output_selector="tools",
sink_node_id="node-123",
sink_pin_name="field_name",
)
assert result == "value"
def test_tool_pin_routing_node_mismatch(self):
"""Test tool pin routing with node ID mismatch."""
output_item = ("tools_^_node-123_~_field_name", "value")
result = parse_execution_output(
output_item,
link_output_selector="tools",
sink_node_id="different-node",
sink_pin_name="field_name",
)
assert result is None
def test_tool_pin_routing_field_mismatch(self):
"""Test tool pin routing with field name mismatch."""
output_item = ("tools_^_node-123_~_field_name", "value")
result = parse_execution_output(
output_item,
link_output_selector="tools",
sink_node_id="node-123",
sink_pin_name="different_field",
)
assert result is None
def test_tool_pin_missing_required_params(self):
"""Test that tool pins require node_id and pin_name."""
output_item = ("tools_^_node-123_~_field", "value")
with pytest.raises(ValueError, match="must be provided"):
parse_execution_output(
output_item,
link_output_selector="tools",
sink_node_id=None,
sink_pin_name="field",
)
with pytest.raises(ValueError, match="must be provided"):
parse_execution_output(
output_item,
link_output_selector="tools",
sink_node_id="node-123",
sink_pin_name=None,
)
class TestParseExecutionOutputDynamicFields:
"""Tests for dynamic field routing in parse_execution_output."""
def test_dict_field_extraction(self):
"""Test extraction of dictionary field value."""
# The output_item is (field_name, data_structure)
data = {"key1": "value1", "key2": "value2"}
output_item = ("values", data)
result = parse_execution_output(
output_item,
link_output_selector="values_#_key1",
sink_node_id=None,
sink_pin_name=None,
)
assert result == "value1"
def test_list_field_extraction(self):
"""Test extraction of list item value."""
data = ["zero", "one", "two"]
output_item = ("items", data)
result = parse_execution_output(
output_item,
link_output_selector="items_$_1",
sink_node_id=None,
sink_pin_name=None,
)
assert result == "one"
def test_nested_field_extraction(self):
"""Test extraction of nested field value."""
data = {
"users": [
{"name": "Alice", "email": "alice@example.com"},
{"name": "Bob", "email": "bob@example.com"},
]
}
output_item = ("data", data)
# Access nested path
result = parse_execution_output(
output_item,
link_output_selector="data_#_users",
sink_node_id=None,
sink_pin_name=None,
)
assert result == data["users"]
def test_missing_key_returns_none(self):
"""Test that missing keys return None."""
data = {"existing": "value"}
output_item = ("values", data)
result = parse_execution_output(
output_item,
link_output_selector="values_#_nonexistent",
sink_node_id=None,
sink_pin_name=None,
)
assert result is None
def test_index_out_of_bounds_returns_none(self):
"""Test that out-of-bounds indices return None."""
data = ["zero", "one"]
output_item = ("items", data)
result = parse_execution_output(
output_item,
link_output_selector="items_$_99",
sink_node_id=None,
sink_pin_name=None,
)
assert result is None
class TestIsToolPin:
"""Tests for is_tool_pin function."""
def test_tools_prefix(self):
"""Test that 'tools_^_' prefix is recognized."""
assert is_tool_pin("tools_^_node_~_field") is True
assert is_tool_pin("tools_^_anything") is True
def test_tools_exact(self):
"""Test that exact 'tools' is recognized."""
assert is_tool_pin("tools") is True
def test_non_tool_pins(self):
"""Test that non-tool pins are not recognized."""
assert is_tool_pin("input") is False
assert is_tool_pin("output") is False
assert is_tool_pin("toolsomething") is False
assert is_tool_pin("my_tools") is False
assert is_tool_pin("") is False
class TestMergeExecutionInputEdgeCases:
"""Edge case tests for merge_execution_input."""
def test_empty_input(self):
"""Test merging empty input."""
result = merge_execution_input({})
assert result == {}
def test_only_regular_fields(self):
"""Test merging only regular fields (no dynamic)."""
data = {"a": 1, "b": 2, "c": 3}
result = merge_execution_input(data)
assert result == data
def test_overwrite_behavior(self):
"""Test behavior when same key is set multiple times."""
# This shouldn't happen in practice, but test the behavior
data = {
"values_#_key": "first",
}
result = merge_execution_input(data)
assert result["values"]["key"] == "first"
def test_numeric_string_keys(self):
"""Test handling of numeric string keys in dict fields."""
data = {
"values_#_123": "numeric_key",
"values_#_456": "another_numeric",
}
result = merge_execution_input(data)
assert result["values"]["123"] == "numeric_key"
assert result["values"]["456"] == "another_numeric"
def test_special_characters_in_keys(self):
"""Test handling of special characters in keys."""
data = {
"values_#_key-with-dashes": "value1",
"values_#_key.with.dots": "value2",
}
result = merge_execution_input(data)
assert result["values"]["key-with-dashes"] == "value1"
assert result["values"]["key.with.dots"] == "value2"
def test_deeply_nested_list(self):
"""Test deeply nested list indices."""
data = {
"matrix_$_0_$_0": "0,0",
"matrix_$_0_$_1": "0,1",
"matrix_$_1_$_0": "1,0",
"matrix_$_1_$_1": "1,1",
}
# Note: Current implementation may not support this depth
# Test documents expected behavior
try:
result = merge_execution_input(data)
# If supported, verify structure
except (KeyError, TypeError, IndexError):
# Deep nesting may not be supported
pass
def test_none_values(self):
"""Test handling of None values in input."""
data = {
"regular": None,
"dict_#_key": None,
"list_$_0": None,
}
result = merge_execution_input(data)
assert result["regular"] is None
assert result["dict"]["key"] is None
assert result["list"][0] is None
def test_complex_values(self):
"""Test handling of complex values (dicts, lists)."""
data = {
"values_#_nested_dict": {"inner": "value"},
"values_#_nested_list": [1, 2, 3],
}
result = merge_execution_input(data)
assert result["values"]["nested_dict"] == {"inner": "value"}
assert result["values"]["nested_list"] == [1, 2, 3]