mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-02-13 08:14:58 -05:00
Compare commits
3 Commits
fix/claude
...
claude/tes
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
edba0c5ca6 | ||
|
|
3f29f71dd6 | ||
|
|
00207eb4c9 |
@@ -0,0 +1,246 @@
|
|||||||
|
"""
|
||||||
|
Standalone tests for pin name sanitization that can run without full backend dependencies.
|
||||||
|
|
||||||
|
These tests verify the core sanitization logic independently of the full system.
|
||||||
|
Run with: python -m pytest test_pin_sanitization_standalone.py -v
|
||||||
|
Or simply: python test_pin_sanitization_standalone.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
# Simulate the exact cleanup function from SmartDecisionMakerBlock
|
||||||
|
def cleanup(s: str) -> str:
|
||||||
|
"""Clean up names for use as tool function names."""
|
||||||
|
return re.sub(r"[^a-zA-Z0-9_-]", "_", s).lower()
|
||||||
|
|
||||||
|
|
||||||
|
# Simulate the key parts of parse_execution_output
|
||||||
|
def simulate_tool_routing(
|
||||||
|
emit_key: str,
|
||||||
|
sink_node_id: str,
|
||||||
|
sink_pin_name: str,
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Simulate the routing comparison from parse_execution_output.
|
||||||
|
|
||||||
|
Returns True if routing would succeed, False otherwise.
|
||||||
|
"""
|
||||||
|
if not emit_key.startswith("tools_^_") or "_~_" not in emit_key:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Extract routing info from emit key: tools_^_{node_id}_~_{field}
|
||||||
|
selector = emit_key[8:] # Remove "tools_^_"
|
||||||
|
target_node_id, target_input_pin = selector.split("_~_", 1)
|
||||||
|
|
||||||
|
# Current (buggy) comparison - direct string comparison
|
||||||
|
return target_node_id == sink_node_id and target_input_pin == sink_pin_name
|
||||||
|
|
||||||
|
|
||||||
|
def simulate_fixed_tool_routing(
|
||||||
|
emit_key: str,
|
||||||
|
sink_node_id: str,
|
||||||
|
sink_pin_name: str,
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Simulate the FIXED routing comparison.
|
||||||
|
|
||||||
|
The fix: sanitize sink_pin_name before comparison.
|
||||||
|
"""
|
||||||
|
if not emit_key.startswith("tools_^_") or "_~_" not in emit_key:
|
||||||
|
return False
|
||||||
|
|
||||||
|
selector = emit_key[8:]
|
||||||
|
target_node_id, target_input_pin = selector.split("_~_", 1)
|
||||||
|
|
||||||
|
# Fixed comparison - sanitize sink_pin_name
|
||||||
|
return target_node_id == sink_node_id and target_input_pin == cleanup(sink_pin_name)
|
||||||
|
|
||||||
|
|
||||||
|
class TestCleanupFunction:
|
||||||
|
"""Tests for the cleanup function."""
|
||||||
|
|
||||||
|
def test_spaces_to_underscores(self):
|
||||||
|
assert cleanup("Max Keyword Difficulty") == "max_keyword_difficulty"
|
||||||
|
|
||||||
|
def test_mixed_case_to_lowercase(self):
|
||||||
|
assert cleanup("MaxKeywordDifficulty") == "maxkeyworddifficulty"
|
||||||
|
|
||||||
|
def test_special_chars_to_underscores(self):
|
||||||
|
assert cleanup("field@name!") == "field_name_"
|
||||||
|
assert cleanup("CPC ($)") == "cpc____"
|
||||||
|
|
||||||
|
def test_preserves_valid_chars(self):
|
||||||
|
assert cleanup("valid_name-123") == "valid_name-123"
|
||||||
|
|
||||||
|
def test_empty_string(self):
|
||||||
|
assert cleanup("") == ""
|
||||||
|
|
||||||
|
def test_consecutive_spaces(self):
|
||||||
|
assert cleanup("a b") == "a___b"
|
||||||
|
|
||||||
|
def test_unicode(self):
|
||||||
|
assert cleanup("café") == "caf_"
|
||||||
|
|
||||||
|
|
||||||
|
class TestCurrentRoutingBehavior:
|
||||||
|
"""Tests demonstrating the current (buggy) routing behavior."""
|
||||||
|
|
||||||
|
def test_exact_match_works(self):
|
||||||
|
"""When names match exactly, routing works."""
|
||||||
|
emit_key = "tools_^_node-123_~_query"
|
||||||
|
assert simulate_tool_routing(emit_key, "node-123", "query") is True
|
||||||
|
|
||||||
|
def test_spaces_cause_failure(self):
|
||||||
|
"""When sink_pin has spaces, routing fails."""
|
||||||
|
sanitized = cleanup("Max Keyword Difficulty")
|
||||||
|
emit_key = f"tools_^_node-123_~_{sanitized}"
|
||||||
|
assert simulate_tool_routing(emit_key, "node-123", "Max Keyword Difficulty") is False
|
||||||
|
|
||||||
|
def test_special_chars_cause_failure(self):
|
||||||
|
"""When sink_pin has special chars, routing fails."""
|
||||||
|
sanitized = cleanup("CPC ($)")
|
||||||
|
emit_key = f"tools_^_node-123_~_{sanitized}"
|
||||||
|
assert simulate_tool_routing(emit_key, "node-123", "CPC ($)") is False
|
||||||
|
|
||||||
|
|
||||||
|
class TestFixedRoutingBehavior:
|
||||||
|
"""Tests demonstrating the fixed routing behavior."""
|
||||||
|
|
||||||
|
def test_exact_match_still_works(self):
|
||||||
|
"""When names match exactly, routing still works."""
|
||||||
|
emit_key = "tools_^_node-123_~_query"
|
||||||
|
assert simulate_fixed_tool_routing(emit_key, "node-123", "query") is True
|
||||||
|
|
||||||
|
def test_spaces_work_with_fix(self):
|
||||||
|
"""With the fix, spaces in sink_pin work."""
|
||||||
|
sanitized = cleanup("Max Keyword Difficulty")
|
||||||
|
emit_key = f"tools_^_node-123_~_{sanitized}"
|
||||||
|
assert simulate_fixed_tool_routing(emit_key, "node-123", "Max Keyword Difficulty") is True
|
||||||
|
|
||||||
|
def test_special_chars_work_with_fix(self):
|
||||||
|
"""With the fix, special chars in sink_pin work."""
|
||||||
|
sanitized = cleanup("CPC ($)")
|
||||||
|
emit_key = f"tools_^_node-123_~_{sanitized}"
|
||||||
|
assert simulate_fixed_tool_routing(emit_key, "node-123", "CPC ($)") is True
|
||||||
|
|
||||||
|
|
||||||
|
class TestBugReproduction:
|
||||||
|
"""Exact reproduction of the reported bug."""
|
||||||
|
|
||||||
|
def test_max_keyword_difficulty_bug(self):
|
||||||
|
"""
|
||||||
|
Reproduce the exact bug from the issue:
|
||||||
|
|
||||||
|
"For this agent specifically the input pin has space and unsanitized,
|
||||||
|
the frontend somehow connect without sanitizing creating a link like:
|
||||||
|
tools_^_767682f5-..._~_Max Keyword Difficulty
|
||||||
|
but what's produced by backend is
|
||||||
|
tools_^_767682f5-..._~_max_keyword_difficulty
|
||||||
|
so the tool calls go into the void"
|
||||||
|
"""
|
||||||
|
node_id = "767682f5-fake-uuid"
|
||||||
|
original_field = "Max Keyword Difficulty"
|
||||||
|
sanitized_field = cleanup(original_field)
|
||||||
|
|
||||||
|
# What backend produces (emit key)
|
||||||
|
emit_key = f"tools_^_{node_id}_~_{sanitized_field}"
|
||||||
|
assert emit_key == f"tools_^_{node_id}_~_max_keyword_difficulty"
|
||||||
|
|
||||||
|
# What frontend link has (sink_pin_name)
|
||||||
|
frontend_sink = original_field
|
||||||
|
|
||||||
|
# Current behavior: FAILS
|
||||||
|
assert simulate_tool_routing(emit_key, node_id, frontend_sink) is False
|
||||||
|
|
||||||
|
# With fix: WORKS
|
||||||
|
assert simulate_fixed_tool_routing(emit_key, node_id, frontend_sink) is True
|
||||||
|
|
||||||
|
|
||||||
|
class TestCommonFieldNamePatterns:
|
||||||
|
"""Test common field name patterns that could cause issues."""
|
||||||
|
|
||||||
|
FIELD_NAMES = [
|
||||||
|
"Max Keyword Difficulty",
|
||||||
|
"Search Volume (Monthly)",
|
||||||
|
"CPC ($)",
|
||||||
|
"User's Input",
|
||||||
|
"Target URL",
|
||||||
|
"API Response",
|
||||||
|
"Query #1",
|
||||||
|
"First Name",
|
||||||
|
"Last Name",
|
||||||
|
"Email Address",
|
||||||
|
"Phone Number",
|
||||||
|
"Total Cost ($)",
|
||||||
|
"Discount (%)",
|
||||||
|
"Created At",
|
||||||
|
"Updated At",
|
||||||
|
"Is Active",
|
||||||
|
]
|
||||||
|
|
||||||
|
def test_current_behavior_fails_for_special_names(self):
|
||||||
|
"""Current behavior fails for names with spaces/special chars."""
|
||||||
|
failed = []
|
||||||
|
for name in self.FIELD_NAMES:
|
||||||
|
sanitized = cleanup(name)
|
||||||
|
emit_key = f"tools_^_node_~_{sanitized}"
|
||||||
|
if not simulate_tool_routing(emit_key, "node", name):
|
||||||
|
failed.append(name)
|
||||||
|
|
||||||
|
# All names with spaces should fail
|
||||||
|
names_with_spaces = [n for n in self.FIELD_NAMES if " " in n or any(c in n for c in "()$%#'")]
|
||||||
|
assert set(failed) == set(names_with_spaces)
|
||||||
|
|
||||||
|
def test_fixed_behavior_works_for_all_names(self):
|
||||||
|
"""Fixed behavior works for all names."""
|
||||||
|
for name in self.FIELD_NAMES:
|
||||||
|
sanitized = cleanup(name)
|
||||||
|
emit_key = f"tools_^_node_~_{sanitized}"
|
||||||
|
assert simulate_fixed_tool_routing(emit_key, "node", name) is True, f"Failed for: {name}"
|
||||||
|
|
||||||
|
|
||||||
|
def run_tests():
|
||||||
|
"""Run all tests manually without pytest."""
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
test_classes = [
|
||||||
|
TestCleanupFunction,
|
||||||
|
TestCurrentRoutingBehavior,
|
||||||
|
TestFixedRoutingBehavior,
|
||||||
|
TestBugReproduction,
|
||||||
|
TestCommonFieldNamePatterns,
|
||||||
|
]
|
||||||
|
|
||||||
|
total = 0
|
||||||
|
passed = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
|
for test_class in test_classes:
|
||||||
|
print(f"\n{test_class.__name__}:")
|
||||||
|
instance = test_class()
|
||||||
|
for name in dir(instance):
|
||||||
|
if name.startswith("test_"):
|
||||||
|
total += 1
|
||||||
|
try:
|
||||||
|
getattr(instance, name)()
|
||||||
|
print(f" ✓ {name}")
|
||||||
|
passed += 1
|
||||||
|
except AssertionError as e:
|
||||||
|
print(f" ✗ {name}: {e}")
|
||||||
|
failed += 1
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ✗ {name}: {e}")
|
||||||
|
traceback.print_exc()
|
||||||
|
failed += 1
|
||||||
|
|
||||||
|
print(f"\n{'='*50}")
|
||||||
|
print(f"Total: {total}, Passed: {passed}, Failed: {failed}")
|
||||||
|
return failed == 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
success = run_tests()
|
||||||
|
sys.exit(0 if success else 1)
|
||||||
@@ -0,0 +1,916 @@
|
|||||||
|
"""
|
||||||
|
Tests for SmartDecisionMaker agent mode specific failure modes.
|
||||||
|
|
||||||
|
Covers failure modes:
|
||||||
|
2. Silent Tool Failures in Agent Mode
|
||||||
|
3. Unbounded Agent Mode Iterations
|
||||||
|
10. Unbounded Agent Iterations
|
||||||
|
12. Stale Credentials in Agent Mode
|
||||||
|
13. Tool Signature Cache Invalidation
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import threading
|
||||||
|
from collections import defaultdict
|
||||||
|
from typing import Any
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, Mock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from backend.blocks.smart_decision_maker import (
|
||||||
|
SmartDecisionMakerBlock,
|
||||||
|
ExecutionParams,
|
||||||
|
ToolInfo,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestSilentToolFailuresInAgentMode:
|
||||||
|
"""
|
||||||
|
Tests for Failure Mode #2: Silent Tool Failures in Agent Mode
|
||||||
|
|
||||||
|
When tool execution fails in agent mode, the error is converted to a
|
||||||
|
tool response and execution continues silently.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_tool_execution_failure_converted_to_response(self):
|
||||||
|
"""
|
||||||
|
Test that tool execution failures are silently converted to responses.
|
||||||
|
"""
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
# First response: tool call
|
||||||
|
mock_tool_call = MagicMock()
|
||||||
|
mock_tool_call.id = "call_1"
|
||||||
|
mock_tool_call.function.name = "failing_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({"param": "value"})
|
||||||
|
|
||||||
|
mock_response_1 = MagicMock()
|
||||||
|
mock_response_1.response = None
|
||||||
|
mock_response_1.tool_calls = [mock_tool_call]
|
||||||
|
mock_response_1.prompt_tokens = 50
|
||||||
|
mock_response_1.completion_tokens = 25
|
||||||
|
mock_response_1.reasoning = None
|
||||||
|
mock_response_1.raw_response = {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [{"type": "tool_use", "id": "call_1"}]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Second response: finish after seeing error
|
||||||
|
mock_response_2 = MagicMock()
|
||||||
|
mock_response_2.response = "I encountered an error"
|
||||||
|
mock_response_2.tool_calls = []
|
||||||
|
mock_response_2.prompt_tokens = 30
|
||||||
|
mock_response_2.completion_tokens = 15
|
||||||
|
mock_response_2.reasoning = None
|
||||||
|
mock_response_2.raw_response = {"role": "assistant", "content": "I encountered an error"}
|
||||||
|
|
||||||
|
llm_call_count = 0
|
||||||
|
|
||||||
|
async def mock_llm_call(**kwargs):
|
||||||
|
nonlocal llm_call_count
|
||||||
|
llm_call_count += 1
|
||||||
|
if llm_call_count == 1:
|
||||||
|
return mock_response_1
|
||||||
|
return mock_response_2
|
||||||
|
|
||||||
|
mock_tool_signatures = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "failing_tool",
|
||||||
|
"_sink_node_id": "sink-node",
|
||||||
|
"_field_mapping": {"param": "param"},
|
||||||
|
"parameters": {
|
||||||
|
"properties": {"param": {"type": "string"}},
|
||||||
|
"required": ["param"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# Mock database client that will fail
|
||||||
|
mock_db_client = AsyncMock()
|
||||||
|
mock_db_client.get_node.side_effect = Exception("Database connection failed!")
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||||
|
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||||
|
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = AsyncMock()
|
||||||
|
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||||
|
mock_execution_processor.execution_stats = MagicMock()
|
||||||
|
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||||
|
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Do something",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=5,
|
||||||
|
)
|
||||||
|
|
||||||
|
outputs = {}
|
||||||
|
async for name, value in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph",
|
||||||
|
node_id="test-node",
|
||||||
|
graph_exec_id="test-exec",
|
||||||
|
node_exec_id="test-node-exec",
|
||||||
|
user_id="test-user",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
outputs[name] = value
|
||||||
|
|
||||||
|
# The execution completed (didn't crash)
|
||||||
|
assert "finished" in outputs or "conversations" in outputs
|
||||||
|
|
||||||
|
# BUG: The tool failure was silent - user doesn't know what happened
|
||||||
|
# The error was just logged and converted to a tool response
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_tool_failure_causes_infinite_retry_loop(self):
|
||||||
|
"""
|
||||||
|
Test scenario where LLM keeps calling the same failing tool.
|
||||||
|
|
||||||
|
If tool fails but LLM doesn't realize it, it may keep trying.
|
||||||
|
"""
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
call_count = 0
|
||||||
|
max_calls = 10 # Limit for test
|
||||||
|
|
||||||
|
def create_tool_call_response():
|
||||||
|
mock_tool_call = MagicMock()
|
||||||
|
mock_tool_call.id = f"call_{call_count}"
|
||||||
|
mock_tool_call.function.name = "persistent_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({"retry": call_count})
|
||||||
|
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.response = None
|
||||||
|
mock_response.tool_calls = [mock_tool_call]
|
||||||
|
mock_response.prompt_tokens = 50
|
||||||
|
mock_response.completion_tokens = 25
|
||||||
|
mock_response.reasoning = None
|
||||||
|
mock_response.raw_response = {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [{"type": "tool_use", "id": f"call_{call_count}"}]
|
||||||
|
}
|
||||||
|
return mock_response
|
||||||
|
|
||||||
|
async def mock_llm_call(**kwargs):
|
||||||
|
nonlocal call_count
|
||||||
|
call_count += 1
|
||||||
|
|
||||||
|
if call_count >= max_calls:
|
||||||
|
# Eventually finish to prevent actual infinite loop in test
|
||||||
|
final = MagicMock()
|
||||||
|
final.response = "Giving up"
|
||||||
|
final.tool_calls = []
|
||||||
|
final.prompt_tokens = 10
|
||||||
|
final.completion_tokens = 5
|
||||||
|
final.reasoning = None
|
||||||
|
final.raw_response = {"role": "assistant", "content": "Giving up"}
|
||||||
|
return final
|
||||||
|
|
||||||
|
return create_tool_call_response()
|
||||||
|
|
||||||
|
mock_tool_signatures = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "persistent_tool",
|
||||||
|
"_sink_node_id": "sink-node",
|
||||||
|
"_field_mapping": {"retry": "retry"},
|
||||||
|
"parameters": {
|
||||||
|
"properties": {"retry": {"type": "integer"}},
|
||||||
|
"required": ["retry"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
mock_db_client = AsyncMock()
|
||||||
|
mock_db_client.get_node.side_effect = Exception("Always fails!")
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||||
|
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||||
|
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = AsyncMock()
|
||||||
|
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||||
|
mock_execution_processor.execution_stats = MagicMock()
|
||||||
|
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||||
|
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Keep trying",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=-1, # Infinite mode!
|
||||||
|
)
|
||||||
|
|
||||||
|
# Use timeout to prevent actual infinite loop
|
||||||
|
try:
|
||||||
|
async with asyncio.timeout(5):
|
||||||
|
outputs = {}
|
||||||
|
async for name, value in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph",
|
||||||
|
node_id="test-node",
|
||||||
|
graph_exec_id="test-exec",
|
||||||
|
node_exec_id="test-node-exec",
|
||||||
|
user_id="test-user",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
outputs[name] = value
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
pass # Expected if we hit infinite loop
|
||||||
|
|
||||||
|
# Document that many calls were made before we gave up
|
||||||
|
assert call_count >= max_calls - 1, \
|
||||||
|
f"Expected many retries, got {call_count}"
|
||||||
|
|
||||||
|
|
||||||
|
class TestUnboundedAgentIterations:
|
||||||
|
"""
|
||||||
|
Tests for Failure Mode #3 and #10: Unbounded Agent Mode Iterations
|
||||||
|
|
||||||
|
With max_iterations = -1, the agent can run forever, consuming
|
||||||
|
unlimited tokens and compute resources.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_infinite_mode_requires_llm_to_stop(self):
|
||||||
|
"""
|
||||||
|
Test that infinite mode (-1) only stops when LLM stops making tool calls.
|
||||||
|
"""
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
iterations = 0
|
||||||
|
max_test_iterations = 20
|
||||||
|
|
||||||
|
async def mock_llm_call(**kwargs):
|
||||||
|
nonlocal iterations
|
||||||
|
iterations += 1
|
||||||
|
|
||||||
|
if iterations >= max_test_iterations:
|
||||||
|
# Stop to prevent actual infinite loop
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.response = "Finally done"
|
||||||
|
resp.tool_calls = []
|
||||||
|
resp.prompt_tokens = 10
|
||||||
|
resp.completion_tokens = 5
|
||||||
|
resp.reasoning = None
|
||||||
|
resp.raw_response = {"role": "assistant", "content": "Done"}
|
||||||
|
return resp
|
||||||
|
|
||||||
|
# Keep making tool calls
|
||||||
|
tool_call = MagicMock()
|
||||||
|
tool_call.id = f"call_{iterations}"
|
||||||
|
tool_call.function.name = "counter_tool"
|
||||||
|
tool_call.function.arguments = json.dumps({"count": iterations})
|
||||||
|
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.response = None
|
||||||
|
resp.tool_calls = [tool_call]
|
||||||
|
resp.prompt_tokens = 50
|
||||||
|
resp.completion_tokens = 25
|
||||||
|
resp.reasoning = None
|
||||||
|
resp.raw_response = {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [{"type": "tool_use", "id": f"call_{iterations}"}]
|
||||||
|
}
|
||||||
|
return resp
|
||||||
|
|
||||||
|
mock_tool_signatures = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "counter_tool",
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
"_field_mapping": {"count": "count"},
|
||||||
|
"parameters": {
|
||||||
|
"properties": {"count": {"type": "integer"}},
|
||||||
|
"required": ["count"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
mock_db_client = AsyncMock()
|
||||||
|
mock_node = MagicMock()
|
||||||
|
mock_node.block_id = "test-block"
|
||||||
|
mock_db_client.get_node.return_value = mock_node
|
||||||
|
|
||||||
|
mock_exec_result = MagicMock()
|
||||||
|
mock_exec_result.node_exec_id = "exec-id"
|
||||||
|
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {"count": 1})
|
||||||
|
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {"result": "ok"}
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||||
|
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||||
|
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = AsyncMock()
|
||||||
|
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||||
|
mock_execution_processor.execution_stats = MagicMock()
|
||||||
|
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||||
|
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
|
||||||
|
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Count forever",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=-1, # INFINITE MODE
|
||||||
|
)
|
||||||
|
|
||||||
|
async with asyncio.timeout(10):
|
||||||
|
outputs = {}
|
||||||
|
async for name, value in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph",
|
||||||
|
node_id="test-node",
|
||||||
|
graph_exec_id="test-exec",
|
||||||
|
node_exec_id="test-node-exec",
|
||||||
|
user_id="test-user",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
outputs[name] = value
|
||||||
|
|
||||||
|
# We ran many iterations before stopping
|
||||||
|
assert iterations == max_test_iterations
|
||||||
|
# BUG: No built-in safeguard against runaway iterations
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_max_iterations_limit_enforced(self):
|
||||||
|
"""
|
||||||
|
Test that max_iterations limit is properly enforced.
|
||||||
|
"""
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
iterations = 0
|
||||||
|
|
||||||
|
async def mock_llm_call(**kwargs):
|
||||||
|
nonlocal iterations
|
||||||
|
iterations += 1
|
||||||
|
|
||||||
|
# Always make tool calls (never finish voluntarily)
|
||||||
|
tool_call = MagicMock()
|
||||||
|
tool_call.id = f"call_{iterations}"
|
||||||
|
tool_call.function.name = "endless_tool"
|
||||||
|
tool_call.function.arguments = json.dumps({})
|
||||||
|
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.response = None
|
||||||
|
resp.tool_calls = [tool_call]
|
||||||
|
resp.prompt_tokens = 50
|
||||||
|
resp.completion_tokens = 25
|
||||||
|
resp.reasoning = None
|
||||||
|
resp.raw_response = {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [{"type": "tool_use", "id": f"call_{iterations}"}]
|
||||||
|
}
|
||||||
|
return resp
|
||||||
|
|
||||||
|
mock_tool_signatures = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "endless_tool",
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
"_field_mapping": {},
|
||||||
|
"parameters": {"properties": {}, "required": []},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
mock_db_client = AsyncMock()
|
||||||
|
mock_node = MagicMock()
|
||||||
|
mock_node.block_id = "test-block"
|
||||||
|
mock_db_client.get_node.return_value = mock_node
|
||||||
|
mock_exec_result = MagicMock()
|
||||||
|
mock_exec_result.node_exec_id = "exec-id"
|
||||||
|
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {})
|
||||||
|
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {}
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||||
|
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||||
|
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = AsyncMock()
|
||||||
|
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||||
|
mock_execution_processor.execution_stats = MagicMock()
|
||||||
|
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||||
|
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
|
||||||
|
|
||||||
|
MAX_ITERATIONS = 3
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Run forever",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=MAX_ITERATIONS,
|
||||||
|
)
|
||||||
|
|
||||||
|
outputs = {}
|
||||||
|
async for name, value in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph",
|
||||||
|
node_id="test-node",
|
||||||
|
graph_exec_id="test-exec",
|
||||||
|
node_exec_id="test-node-exec",
|
||||||
|
user_id="test-user",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
outputs[name] = value
|
||||||
|
|
||||||
|
# Should have stopped at max iterations
|
||||||
|
assert iterations == MAX_ITERATIONS
|
||||||
|
assert "finished" in outputs
|
||||||
|
assert "limit reached" in outputs["finished"].lower()
|
||||||
|
|
||||||
|
|
||||||
|
class TestStaleCredentialsInAgentMode:
|
||||||
|
"""
|
||||||
|
Tests for Failure Mode #12: Stale Credentials in Agent Mode
|
||||||
|
|
||||||
|
Credentials are validated once at start but can expire during
|
||||||
|
long-running agent mode executions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_credentials_not_revalidated_between_iterations(self):
|
||||||
|
"""
|
||||||
|
Test that credentials are used without revalidation in agent mode.
|
||||||
|
"""
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
credential_check_count = 0
|
||||||
|
iteration = 0
|
||||||
|
|
||||||
|
async def mock_llm_call(**kwargs):
|
||||||
|
nonlocal credential_check_count, iteration
|
||||||
|
iteration += 1
|
||||||
|
|
||||||
|
# Simulate credential check (in real code this happens in llm_call)
|
||||||
|
credential_check_count += 1
|
||||||
|
|
||||||
|
if iteration >= 3:
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.response = "Done"
|
||||||
|
resp.tool_calls = []
|
||||||
|
resp.prompt_tokens = 10
|
||||||
|
resp.completion_tokens = 5
|
||||||
|
resp.reasoning = None
|
||||||
|
resp.raw_response = {"role": "assistant", "content": "Done"}
|
||||||
|
return resp
|
||||||
|
|
||||||
|
tool_call = MagicMock()
|
||||||
|
tool_call.id = f"call_{iteration}"
|
||||||
|
tool_call.function.name = "test_tool"
|
||||||
|
tool_call.function.arguments = json.dumps({})
|
||||||
|
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.response = None
|
||||||
|
resp.tool_calls = [tool_call]
|
||||||
|
resp.prompt_tokens = 50
|
||||||
|
resp.completion_tokens = 25
|
||||||
|
resp.reasoning = None
|
||||||
|
resp.raw_response = {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [{"type": "tool_use", "id": f"call_{iteration}"}]
|
||||||
|
}
|
||||||
|
return resp
|
||||||
|
|
||||||
|
mock_tool_signatures = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "test_tool",
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
"_field_mapping": {},
|
||||||
|
"parameters": {"properties": {}, "required": []},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
mock_db_client = AsyncMock()
|
||||||
|
mock_node = MagicMock()
|
||||||
|
mock_node.block_id = "test-block"
|
||||||
|
mock_db_client.get_node.return_value = mock_node
|
||||||
|
mock_exec_result = MagicMock()
|
||||||
|
mock_exec_result.node_exec_id = "exec-id"
|
||||||
|
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {})
|
||||||
|
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {}
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||||
|
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||||
|
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = AsyncMock()
|
||||||
|
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||||
|
mock_execution_processor.execution_stats = MagicMock()
|
||||||
|
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||||
|
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
|
||||||
|
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Test credentials",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=5,
|
||||||
|
)
|
||||||
|
|
||||||
|
outputs = {}
|
||||||
|
async for name, value in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph",
|
||||||
|
node_id="test-node",
|
||||||
|
graph_exec_id="test-exec",
|
||||||
|
node_exec_id="test-node-exec",
|
||||||
|
user_id="test-user",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
outputs[name] = value
|
||||||
|
|
||||||
|
# Credentials were checked on each LLM call but not refreshed
|
||||||
|
# If they expired mid-execution, we'd get auth errors
|
||||||
|
assert credential_check_count == iteration
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_credential_expiration_mid_execution(self):
|
||||||
|
"""
|
||||||
|
Test what happens when credentials expire during agent mode.
|
||||||
|
"""
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
iteration = 0
|
||||||
|
|
||||||
|
async def mock_llm_call_with_expiration(**kwargs):
|
||||||
|
nonlocal iteration
|
||||||
|
iteration += 1
|
||||||
|
|
||||||
|
if iteration >= 3:
|
||||||
|
# Simulate credential expiration
|
||||||
|
raise Exception("401 Unauthorized: API key expired")
|
||||||
|
|
||||||
|
tool_call = MagicMock()
|
||||||
|
tool_call.id = f"call_{iteration}"
|
||||||
|
tool_call.function.name = "test_tool"
|
||||||
|
tool_call.function.arguments = json.dumps({})
|
||||||
|
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.response = None
|
||||||
|
resp.tool_calls = [tool_call]
|
||||||
|
resp.prompt_tokens = 50
|
||||||
|
resp.completion_tokens = 25
|
||||||
|
resp.reasoning = None
|
||||||
|
resp.raw_response = {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [{"type": "tool_use", "id": f"call_{iteration}"}]
|
||||||
|
}
|
||||||
|
return resp
|
||||||
|
|
||||||
|
mock_tool_signatures = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "test_tool",
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
"_field_mapping": {},
|
||||||
|
"parameters": {"properties": {}, "required": []},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
mock_db_client = AsyncMock()
|
||||||
|
mock_node = MagicMock()
|
||||||
|
mock_node.block_id = "test-block"
|
||||||
|
mock_db_client.get_node.return_value = mock_node
|
||||||
|
mock_exec_result = MagicMock()
|
||||||
|
mock_exec_result.node_exec_id = "exec-id"
|
||||||
|
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {})
|
||||||
|
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {}
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call_with_expiration), \
|
||||||
|
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||||
|
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = AsyncMock()
|
||||||
|
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||||
|
mock_execution_processor.execution_stats = MagicMock()
|
||||||
|
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||||
|
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
|
||||||
|
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Test credentials",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=10,
|
||||||
|
)
|
||||||
|
|
||||||
|
outputs = {}
|
||||||
|
async for name, value in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph",
|
||||||
|
node_id="test-node",
|
||||||
|
graph_exec_id="test-exec",
|
||||||
|
node_exec_id="test-node-exec",
|
||||||
|
user_id="test-user",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
outputs[name] = value
|
||||||
|
|
||||||
|
# Should have an error output
|
||||||
|
assert "error" in outputs
|
||||||
|
assert "expired" in outputs["error"].lower() or "unauthorized" in outputs["error"].lower()
|
||||||
|
|
||||||
|
|
||||||
|
class TestToolSignatureCacheInvalidation:
|
||||||
|
"""
|
||||||
|
Tests for Failure Mode #13: Tool Signature Cache Invalidation
|
||||||
|
|
||||||
|
Tool signatures are created once at the start of run() but the
|
||||||
|
graph could change during agent mode execution.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_signatures_created_once_at_start(self):
|
||||||
|
"""
|
||||||
|
Test that tool signatures are only created once, not refreshed.
|
||||||
|
"""
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
signature_creation_count = 0
|
||||||
|
iteration = 0
|
||||||
|
|
||||||
|
original_create_signatures = block._create_tool_node_signatures
|
||||||
|
|
||||||
|
async def counting_create_signatures(node_id):
|
||||||
|
nonlocal signature_creation_count
|
||||||
|
signature_creation_count += 1
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "tool_v1",
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
"_field_mapping": {},
|
||||||
|
"parameters": {"properties": {}, "required": []},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
async def mock_llm_call(**kwargs):
|
||||||
|
nonlocal iteration
|
||||||
|
iteration += 1
|
||||||
|
|
||||||
|
if iteration >= 3:
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.response = "Done"
|
||||||
|
resp.tool_calls = []
|
||||||
|
resp.prompt_tokens = 10
|
||||||
|
resp.completion_tokens = 5
|
||||||
|
resp.reasoning = None
|
||||||
|
resp.raw_response = {"role": "assistant", "content": "Done"}
|
||||||
|
return resp
|
||||||
|
|
||||||
|
tool_call = MagicMock()
|
||||||
|
tool_call.id = f"call_{iteration}"
|
||||||
|
tool_call.function.name = "tool_v1"
|
||||||
|
tool_call.function.arguments = json.dumps({})
|
||||||
|
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.response = None
|
||||||
|
resp.tool_calls = [tool_call]
|
||||||
|
resp.prompt_tokens = 50
|
||||||
|
resp.completion_tokens = 25
|
||||||
|
resp.reasoning = None
|
||||||
|
resp.raw_response = {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [{"type": "tool_use", "id": f"call_{iteration}"}]
|
||||||
|
}
|
||||||
|
return resp
|
||||||
|
|
||||||
|
mock_db_client = AsyncMock()
|
||||||
|
mock_node = MagicMock()
|
||||||
|
mock_node.block_id = "test-block"
|
||||||
|
mock_db_client.get_node.return_value = mock_node
|
||||||
|
mock_exec_result = MagicMock()
|
||||||
|
mock_exec_result.node_exec_id = "exec-id"
|
||||||
|
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {})
|
||||||
|
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {}
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||||
|
patch.object(block, "_create_tool_node_signatures", side_effect=counting_create_signatures), \
|
||||||
|
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = AsyncMock()
|
||||||
|
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||||
|
mock_execution_processor.execution_stats = MagicMock()
|
||||||
|
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||||
|
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
|
||||||
|
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Test signatures",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=5,
|
||||||
|
)
|
||||||
|
|
||||||
|
outputs = {}
|
||||||
|
async for name, value in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph",
|
||||||
|
node_id="test-node",
|
||||||
|
graph_exec_id="test-exec",
|
||||||
|
node_exec_id="test-node-exec",
|
||||||
|
user_id="test-user",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
outputs[name] = value
|
||||||
|
|
||||||
|
# Signatures were only created once, even though we had multiple iterations
|
||||||
|
assert signature_creation_count == 1
|
||||||
|
assert iteration >= 3 # We had multiple iterations
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_stale_signatures_cause_tool_mismatch(self):
|
||||||
|
"""
|
||||||
|
Test scenario where tool definitions change but agent uses stale signatures.
|
||||||
|
"""
|
||||||
|
# This documents the potential issue:
|
||||||
|
# 1. Agent starts with tool_v1
|
||||||
|
# 2. User modifies graph, tool becomes tool_v2
|
||||||
|
# 3. Agent still thinks tool_v1 exists
|
||||||
|
# 4. LLM calls tool_v1, but it no longer exists
|
||||||
|
|
||||||
|
# Since signatures are created once at start and never refreshed,
|
||||||
|
# any changes to the graph during execution won't be reflected.
|
||||||
|
|
||||||
|
# This is more of a documentation test - the actual fix would
|
||||||
|
# require either:
|
||||||
|
# a) Refreshing signatures periodically
|
||||||
|
# b) Locking the graph during execution
|
||||||
|
# c) Checking tool existence before each call
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TestAgentModeConversationManagement:
|
||||||
|
"""Tests for conversation management in agent mode."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_conversation_grows_with_iterations(self):
|
||||||
|
"""
|
||||||
|
Test that conversation history grows correctly with each iteration.
|
||||||
|
"""
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
iteration = 0
|
||||||
|
conversation_lengths = []
|
||||||
|
|
||||||
|
async def mock_llm_call(**kwargs):
|
||||||
|
nonlocal iteration
|
||||||
|
iteration += 1
|
||||||
|
|
||||||
|
# Record conversation length at each call
|
||||||
|
prompt = kwargs.get("prompt", [])
|
||||||
|
conversation_lengths.append(len(prompt))
|
||||||
|
|
||||||
|
if iteration >= 3:
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.response = "Done"
|
||||||
|
resp.tool_calls = []
|
||||||
|
resp.prompt_tokens = 10
|
||||||
|
resp.completion_tokens = 5
|
||||||
|
resp.reasoning = None
|
||||||
|
resp.raw_response = {"role": "assistant", "content": "Done"}
|
||||||
|
return resp
|
||||||
|
|
||||||
|
tool_call = MagicMock()
|
||||||
|
tool_call.id = f"call_{iteration}"
|
||||||
|
tool_call.function.name = "test_tool"
|
||||||
|
tool_call.function.arguments = json.dumps({})
|
||||||
|
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.response = None
|
||||||
|
resp.tool_calls = [tool_call]
|
||||||
|
resp.prompt_tokens = 50
|
||||||
|
resp.completion_tokens = 25
|
||||||
|
resp.reasoning = None
|
||||||
|
resp.raw_response = {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [{"type": "tool_use", "id": f"call_{iteration}"}]
|
||||||
|
}
|
||||||
|
return resp
|
||||||
|
|
||||||
|
mock_tool_signatures = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "test_tool",
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
"_field_mapping": {},
|
||||||
|
"parameters": {"properties": {}, "required": []},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
mock_db_client = AsyncMock()
|
||||||
|
mock_node = MagicMock()
|
||||||
|
mock_node.block_id = "test-block"
|
||||||
|
mock_db_client.get_node.return_value = mock_node
|
||||||
|
mock_exec_result = MagicMock()
|
||||||
|
mock_exec_result.node_exec_id = "exec-id"
|
||||||
|
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {})
|
||||||
|
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {"result": "ok"}
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||||
|
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||||
|
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = AsyncMock()
|
||||||
|
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||||
|
mock_execution_processor.execution_stats = MagicMock()
|
||||||
|
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||||
|
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
|
||||||
|
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Test conversation",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=5,
|
||||||
|
)
|
||||||
|
|
||||||
|
outputs = {}
|
||||||
|
async for name, value in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph",
|
||||||
|
node_id="test-node",
|
||||||
|
graph_exec_id="test-exec",
|
||||||
|
node_exec_id="test-node-exec",
|
||||||
|
user_id="test-user",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
outputs[name] = value
|
||||||
|
|
||||||
|
# Conversation should grow with each iteration
|
||||||
|
# Each iteration adds: assistant message + tool response
|
||||||
|
assert len(conversation_lengths) == 3
|
||||||
|
for i in range(1, len(conversation_lengths)):
|
||||||
|
assert conversation_lengths[i] > conversation_lengths[i-1], \
|
||||||
|
f"Conversation should grow: {conversation_lengths}"
|
||||||
@@ -0,0 +1,525 @@
|
|||||||
|
"""
|
||||||
|
Tests for SmartDecisionMaker concurrency issues and race conditions.
|
||||||
|
|
||||||
|
Covers failure modes:
|
||||||
|
1. Conversation History Race Condition
|
||||||
|
4. Concurrent Execution State Sharing
|
||||||
|
7. Race in Pending Tool Calls
|
||||||
|
11. Race in Pending Tool Call Retrieval
|
||||||
|
14. Concurrent State Sharing
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import threading
|
||||||
|
from collections import Counter
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from typing import Any
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, Mock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from backend.blocks.smart_decision_maker import (
|
||||||
|
SmartDecisionMakerBlock,
|
||||||
|
get_pending_tool_calls,
|
||||||
|
_create_tool_response,
|
||||||
|
_get_tool_requests,
|
||||||
|
_get_tool_responses,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestConversationHistoryRaceCondition:
|
||||||
|
"""
|
||||||
|
Tests for Failure Mode #1: Conversation History Race Condition
|
||||||
|
|
||||||
|
When multiple executions share conversation history, concurrent
|
||||||
|
modifications can cause data loss or corruption.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_get_pending_tool_calls_with_concurrent_modification(self):
|
||||||
|
"""
|
||||||
|
Test that concurrent modifications to conversation history
|
||||||
|
can cause inconsistent pending tool call counts.
|
||||||
|
"""
|
||||||
|
# Shared conversation history
|
||||||
|
conversation_history = [
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [
|
||||||
|
{"type": "tool_use", "id": "toolu_1"},
|
||||||
|
{"type": "tool_use", "id": "toolu_2"},
|
||||||
|
{"type": "tool_use", "id": "toolu_3"},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
results = []
|
||||||
|
errors = []
|
||||||
|
|
||||||
|
def reader_thread():
|
||||||
|
"""Repeatedly read pending calls."""
|
||||||
|
for _ in range(100):
|
||||||
|
try:
|
||||||
|
pending = get_pending_tool_calls(conversation_history)
|
||||||
|
results.append(len(pending))
|
||||||
|
except Exception as e:
|
||||||
|
errors.append(str(e))
|
||||||
|
|
||||||
|
def writer_thread():
|
||||||
|
"""Modify conversation while readers are active."""
|
||||||
|
for i in range(50):
|
||||||
|
# Add a tool response
|
||||||
|
conversation_history.append({
|
||||||
|
"role": "user",
|
||||||
|
"content": [{"type": "tool_result", "tool_use_id": f"toolu_{(i % 3) + 1}"}]
|
||||||
|
})
|
||||||
|
# Remove it
|
||||||
|
if len(conversation_history) > 1:
|
||||||
|
conversation_history.pop()
|
||||||
|
|
||||||
|
# Run concurrent readers and writers
|
||||||
|
threads = []
|
||||||
|
for _ in range(3):
|
||||||
|
threads.append(threading.Thread(target=reader_thread))
|
||||||
|
threads.append(threading.Thread(target=writer_thread))
|
||||||
|
|
||||||
|
for t in threads:
|
||||||
|
t.start()
|
||||||
|
for t in threads:
|
||||||
|
t.join()
|
||||||
|
|
||||||
|
# The issue: results may be inconsistent due to race conditions
|
||||||
|
# In a correct implementation, we'd expect consistent results
|
||||||
|
# Document that this CAN produce inconsistent results
|
||||||
|
assert len(results) > 0, "Should have some results"
|
||||||
|
# Note: This test documents the race condition exists
|
||||||
|
# When fixed, all results should be consistent
|
||||||
|
|
||||||
|
def test_prompt_list_mutation_race(self):
|
||||||
|
"""
|
||||||
|
Test that mutating prompt list during iteration can cause issues.
|
||||||
|
"""
|
||||||
|
prompt = []
|
||||||
|
errors = []
|
||||||
|
|
||||||
|
def appender():
|
||||||
|
for i in range(100):
|
||||||
|
prompt.append({"role": "user", "content": f"msg_{i}"})
|
||||||
|
|
||||||
|
def extender():
|
||||||
|
for i in range(100):
|
||||||
|
prompt.extend([{"role": "assistant", "content": f"resp_{i}"}])
|
||||||
|
|
||||||
|
def reader():
|
||||||
|
for _ in range(100):
|
||||||
|
try:
|
||||||
|
# Iterate while others modify
|
||||||
|
_ = [p for p in prompt if p.get("role") == "user"]
|
||||||
|
except RuntimeError as e:
|
||||||
|
# "dictionary changed size during iteration" or similar
|
||||||
|
errors.append(str(e))
|
||||||
|
|
||||||
|
threads = [
|
||||||
|
threading.Thread(target=appender),
|
||||||
|
threading.Thread(target=extender),
|
||||||
|
threading.Thread(target=reader),
|
||||||
|
]
|
||||||
|
|
||||||
|
for t in threads:
|
||||||
|
t.start()
|
||||||
|
for t in threads:
|
||||||
|
t.join()
|
||||||
|
|
||||||
|
# Document that race conditions can occur
|
||||||
|
# In production, this could cause silent data corruption
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_concurrent_block_runs_share_state(self):
|
||||||
|
"""
|
||||||
|
Test that concurrent runs on same block instance can share state incorrectly.
|
||||||
|
|
||||||
|
This is Failure Mode #14: Concurrent State Sharing
|
||||||
|
"""
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
# Track all outputs from all runs
|
||||||
|
all_outputs = []
|
||||||
|
lock = threading.Lock()
|
||||||
|
|
||||||
|
async def run_block(run_id: int):
|
||||||
|
"""Run the block with a unique run_id."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.response = f"Response for run {run_id}"
|
||||||
|
mock_response.tool_calls = [] # No tool calls, just finish
|
||||||
|
mock_response.prompt_tokens = 50
|
||||||
|
mock_response.completion_tokens = 25
|
||||||
|
mock_response.reasoning = None
|
||||||
|
mock_response.raw_response = {"role": "assistant", "content": f"Run {run_id}"}
|
||||||
|
|
||||||
|
mock_tool_signatures = []
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm:
|
||||||
|
mock_llm.return_value = mock_response
|
||||||
|
|
||||||
|
with patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures):
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt=f"Prompt for run {run_id}",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = MagicMock()
|
||||||
|
|
||||||
|
outputs = {}
|
||||||
|
async for output_name, output_data in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id=f"graph-{run_id}",
|
||||||
|
node_id=f"node-{run_id}",
|
||||||
|
graph_exec_id=f"exec-{run_id}",
|
||||||
|
node_exec_id=f"node-exec-{run_id}",
|
||||||
|
user_id=f"user-{run_id}",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
outputs[output_name] = output_data
|
||||||
|
|
||||||
|
with lock:
|
||||||
|
all_outputs.append((run_id, outputs))
|
||||||
|
|
||||||
|
# Run multiple concurrent executions
|
||||||
|
tasks = [run_block(i) for i in range(5)]
|
||||||
|
await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
# Verify each run got its own response (no cross-contamination)
|
||||||
|
for run_id, outputs in all_outputs:
|
||||||
|
if "finished" in outputs:
|
||||||
|
assert f"run {run_id}" in outputs["finished"].lower() or outputs["finished"] == f"Response for run {run_id}", \
|
||||||
|
f"Run {run_id} may have received contaminated response: {outputs}"
|
||||||
|
|
||||||
|
|
||||||
|
class TestPendingToolCallRace:
|
||||||
|
"""
|
||||||
|
Tests for Failure Mode #7 and #11: Race in Pending Tool Calls
|
||||||
|
|
||||||
|
The get_pending_tool_calls function can race with modifications
|
||||||
|
to the conversation history, causing StopIteration or incorrect counts.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_pending_tool_calls_counter_accuracy(self):
|
||||||
|
"""Test that pending tool call counting is accurate."""
|
||||||
|
conversation = [
|
||||||
|
# Assistant makes 3 tool calls
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [
|
||||||
|
{"type": "tool_use", "id": "call_1"},
|
||||||
|
{"type": "tool_use", "id": "call_2"},
|
||||||
|
{"type": "tool_use", "id": "call_3"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
# User provides 1 response
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "tool_result", "tool_use_id": "call_1"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
pending = get_pending_tool_calls(conversation)
|
||||||
|
|
||||||
|
# Should have 2 pending (call_2, call_3)
|
||||||
|
assert len(pending) == 2
|
||||||
|
assert "call_2" in pending
|
||||||
|
assert "call_3" in pending
|
||||||
|
assert pending["call_2"] == 1
|
||||||
|
assert pending["call_3"] == 1
|
||||||
|
|
||||||
|
def test_pending_tool_calls_duplicate_responses(self):
|
||||||
|
"""Test handling of duplicate tool responses."""
|
||||||
|
conversation = [
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [{"type": "tool_use", "id": "call_1"}]
|
||||||
|
},
|
||||||
|
# Duplicate responses for same call
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [{"type": "tool_result", "tool_use_id": "call_1"}]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [{"type": "tool_result", "tool_use_id": "call_1"}]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
pending = get_pending_tool_calls(conversation)
|
||||||
|
|
||||||
|
# call_1 has count -1 (1 request - 2 responses)
|
||||||
|
# Should not be in pending (count <= 0)
|
||||||
|
assert "call_1" not in pending or pending.get("call_1", 0) <= 0
|
||||||
|
|
||||||
|
def test_empty_conversation_no_pending(self):
|
||||||
|
"""Test that empty conversation has no pending calls."""
|
||||||
|
assert get_pending_tool_calls([]) == {}
|
||||||
|
assert get_pending_tool_calls(None) == {}
|
||||||
|
|
||||||
|
def test_next_iter_on_empty_dict_raises_stop_iteration(self):
|
||||||
|
"""
|
||||||
|
Document the StopIteration vulnerability.
|
||||||
|
|
||||||
|
If pending_tool_calls becomes empty between the check and
|
||||||
|
next(iter(...)), StopIteration is raised.
|
||||||
|
"""
|
||||||
|
pending = {}
|
||||||
|
|
||||||
|
# This is the pattern used in smart_decision_maker.py:1019
|
||||||
|
# if pending_tool_calls and ...:
|
||||||
|
# first_call_id = next(iter(pending_tool_calls.keys()))
|
||||||
|
|
||||||
|
with pytest.raises(StopIteration):
|
||||||
|
next(iter(pending.keys()))
|
||||||
|
|
||||||
|
# Safe pattern should be:
|
||||||
|
# first_call_id = next(iter(pending_tool_calls.keys()), None)
|
||||||
|
safe_result = next(iter(pending.keys()), None)
|
||||||
|
assert safe_result is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestToolRequestResponseParsing:
|
||||||
|
"""Tests for tool request/response parsing edge cases."""
|
||||||
|
|
||||||
|
def test_get_tool_requests_openai_format(self):
|
||||||
|
"""Test parsing OpenAI format tool requests."""
|
||||||
|
entry = {
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{"id": "call_abc123"},
|
||||||
|
{"id": "call_def456"},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
requests = _get_tool_requests(entry)
|
||||||
|
assert requests == ["call_abc123", "call_def456"]
|
||||||
|
|
||||||
|
def test_get_tool_requests_anthropic_format(self):
|
||||||
|
"""Test parsing Anthropic format tool requests."""
|
||||||
|
entry = {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [
|
||||||
|
{"type": "tool_use", "id": "toolu_abc123"},
|
||||||
|
{"type": "text", "text": "Let me call this tool"},
|
||||||
|
{"type": "tool_use", "id": "toolu_def456"},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
requests = _get_tool_requests(entry)
|
||||||
|
assert requests == ["toolu_abc123", "toolu_def456"]
|
||||||
|
|
||||||
|
def test_get_tool_requests_non_assistant_role(self):
|
||||||
|
"""Non-assistant roles should return empty list."""
|
||||||
|
entry = {"role": "user", "tool_calls": [{"id": "call_123"}]}
|
||||||
|
assert _get_tool_requests(entry) == []
|
||||||
|
|
||||||
|
def test_get_tool_responses_openai_format(self):
|
||||||
|
"""Test parsing OpenAI format tool responses."""
|
||||||
|
entry = {
|
||||||
|
"role": "tool",
|
||||||
|
"tool_call_id": "call_abc123",
|
||||||
|
"content": "Result"
|
||||||
|
}
|
||||||
|
|
||||||
|
responses = _get_tool_responses(entry)
|
||||||
|
assert responses == ["call_abc123"]
|
||||||
|
|
||||||
|
def test_get_tool_responses_anthropic_format(self):
|
||||||
|
"""Test parsing Anthropic format tool responses."""
|
||||||
|
entry = {
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "tool_result", "tool_use_id": "toolu_abc123"},
|
||||||
|
{"type": "tool_result", "tool_use_id": "toolu_def456"},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
responses = _get_tool_responses(entry)
|
||||||
|
assert responses == ["toolu_abc123", "toolu_def456"]
|
||||||
|
|
||||||
|
def test_get_tool_responses_mixed_content(self):
|
||||||
|
"""Test parsing responses with mixed content types."""
|
||||||
|
entry = {
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": "Here are the results"},
|
||||||
|
{"type": "tool_result", "tool_use_id": "toolu_123"},
|
||||||
|
{"type": "image", "url": "http://example.com/img.png"},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
responses = _get_tool_responses(entry)
|
||||||
|
assert responses == ["toolu_123"]
|
||||||
|
|
||||||
|
|
||||||
|
class TestConcurrentToolSignatureCreation:
|
||||||
|
"""Tests for concurrent tool signature creation."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_concurrent_signature_creation_same_node(self):
|
||||||
|
"""
|
||||||
|
Test that concurrent signature creation for same node
|
||||||
|
doesn't cause issues.
|
||||||
|
"""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_node = Mock()
|
||||||
|
mock_node.id = "test-node"
|
||||||
|
mock_node.block = Mock()
|
||||||
|
mock_node.block.name = "TestBlock"
|
||||||
|
mock_node.block.description = "Test"
|
||||||
|
mock_node.block.input_schema = Mock()
|
||||||
|
mock_node.block.input_schema.jsonschema = Mock(
|
||||||
|
return_value={"properties": {}, "required": []}
|
||||||
|
)
|
||||||
|
mock_node.block.input_schema.get_field_schema = Mock(
|
||||||
|
return_value={"type": "string", "description": "test"}
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_links = [
|
||||||
|
Mock(sink_name="field1", sink_id="test-node", source_id="source"),
|
||||||
|
Mock(sink_name="field2", sink_id="test-node", source_id="source"),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Run multiple concurrent signature creations
|
||||||
|
tasks = [
|
||||||
|
block._create_block_function_signature(mock_node, mock_links)
|
||||||
|
for _ in range(10)
|
||||||
|
]
|
||||||
|
|
||||||
|
results = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
# All results should be identical
|
||||||
|
first = results[0]
|
||||||
|
for i, result in enumerate(results[1:], 1):
|
||||||
|
assert result["function"]["name"] == first["function"]["name"], \
|
||||||
|
f"Result {i} has different name"
|
||||||
|
assert set(result["function"]["parameters"]["properties"].keys()) == \
|
||||||
|
set(first["function"]["parameters"]["properties"].keys()), \
|
||||||
|
f"Result {i} has different properties"
|
||||||
|
|
||||||
|
|
||||||
|
class TestThreadSafetyOfCleanup:
|
||||||
|
"""Tests for thread safety of cleanup function."""
|
||||||
|
|
||||||
|
def test_cleanup_is_thread_safe(self):
|
||||||
|
"""
|
||||||
|
Test that cleanup function is thread-safe.
|
||||||
|
|
||||||
|
Since it's a pure function with no shared state, it should be safe.
|
||||||
|
"""
|
||||||
|
results = {}
|
||||||
|
lock = threading.Lock()
|
||||||
|
|
||||||
|
test_inputs = [
|
||||||
|
"Max Keyword Difficulty",
|
||||||
|
"Search Volume (Monthly)",
|
||||||
|
"CPC ($)",
|
||||||
|
"Target URL",
|
||||||
|
]
|
||||||
|
|
||||||
|
def worker(input_str: str, thread_id: int):
|
||||||
|
for _ in range(100):
|
||||||
|
result = SmartDecisionMakerBlock.cleanup(input_str)
|
||||||
|
with lock:
|
||||||
|
key = f"{thread_id}_{input_str}"
|
||||||
|
if key not in results:
|
||||||
|
results[key] = set()
|
||||||
|
results[key].add(result)
|
||||||
|
|
||||||
|
threads = []
|
||||||
|
for i, input_str in enumerate(test_inputs):
|
||||||
|
for j in range(3):
|
||||||
|
t = threading.Thread(target=worker, args=(input_str, i * 3 + j))
|
||||||
|
threads.append(t)
|
||||||
|
|
||||||
|
for t in threads:
|
||||||
|
t.start()
|
||||||
|
for t in threads:
|
||||||
|
t.join()
|
||||||
|
|
||||||
|
# Each input should produce exactly one unique output
|
||||||
|
for key, values in results.items():
|
||||||
|
assert len(values) == 1, f"Non-deterministic cleanup for {key}: {values}"
|
||||||
|
|
||||||
|
|
||||||
|
class TestAsyncConcurrencyPatterns:
|
||||||
|
"""Tests for async concurrency patterns in the block."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_multiple_async_runs_isolation(self):
|
||||||
|
"""
|
||||||
|
Test that multiple async runs are properly isolated.
|
||||||
|
"""
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
run_count = 5
|
||||||
|
results = []
|
||||||
|
|
||||||
|
async def single_run(run_id: int):
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.response = f"Unique response {run_id}"
|
||||||
|
mock_response.tool_calls = []
|
||||||
|
mock_response.prompt_tokens = 10
|
||||||
|
mock_response.completion_tokens = 5
|
||||||
|
mock_response.reasoning = None
|
||||||
|
mock_response.raw_response = {"role": "assistant", "content": f"Run {run_id}"}
|
||||||
|
|
||||||
|
# Add small random delay to increase chance of interleaving
|
||||||
|
await asyncio.sleep(0.001 * (run_id % 3))
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm:
|
||||||
|
mock_llm.return_value = mock_response
|
||||||
|
|
||||||
|
with patch.object(block, "_create_tool_node_signatures", return_value=[]):
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt=f"Prompt {run_id}",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
outputs = {}
|
||||||
|
async for name, value in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id=f"g{run_id}",
|
||||||
|
node_id=f"n{run_id}",
|
||||||
|
graph_exec_id=f"e{run_id}",
|
||||||
|
node_exec_id=f"ne{run_id}",
|
||||||
|
user_id=f"u{run_id}",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=ExecutionContext(safe_mode=False),
|
||||||
|
execution_processor=MagicMock(),
|
||||||
|
):
|
||||||
|
outputs[name] = value
|
||||||
|
|
||||||
|
return run_id, outputs
|
||||||
|
|
||||||
|
# Run all concurrently
|
||||||
|
tasks = [single_run(i) for i in range(run_count)]
|
||||||
|
results = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
# Verify isolation
|
||||||
|
for run_id, outputs in results:
|
||||||
|
if "finished" in outputs:
|
||||||
|
assert str(run_id) in outputs["finished"], \
|
||||||
|
f"Run {run_id} got wrong response: {outputs['finished']}"
|
||||||
@@ -0,0 +1,667 @@
|
|||||||
|
"""
|
||||||
|
Tests for SmartDecisionMaker conversation handling and corruption scenarios.
|
||||||
|
|
||||||
|
Covers failure modes:
|
||||||
|
6. Conversation Corruption in Error Paths
|
||||||
|
And related conversation management issues.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from typing import Any
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, Mock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from backend.blocks.smart_decision_maker import (
|
||||||
|
SmartDecisionMakerBlock,
|
||||||
|
get_pending_tool_calls,
|
||||||
|
_create_tool_response,
|
||||||
|
_combine_tool_responses,
|
||||||
|
_convert_raw_response_to_dict,
|
||||||
|
_get_tool_requests,
|
||||||
|
_get_tool_responses,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestConversationCorruptionInErrorPaths:
|
||||||
|
"""
|
||||||
|
Tests for Failure Mode #6: Conversation Corruption in Error Paths
|
||||||
|
|
||||||
|
When there's a logic error (orphaned tool output), the code appends
|
||||||
|
it as a "user" message instead of proper tool response format,
|
||||||
|
violating LLM conversation structure.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_orphaned_tool_output_creates_user_message(self):
|
||||||
|
"""
|
||||||
|
Test that orphaned tool output (no pending calls) creates wrong message type.
|
||||||
|
"""
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
# Response with no tool calls
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.response = "No tools needed"
|
||||||
|
mock_response.tool_calls = []
|
||||||
|
mock_response.prompt_tokens = 50
|
||||||
|
mock_response.completion_tokens = 25
|
||||||
|
mock_response.reasoning = None
|
||||||
|
mock_response.raw_response = {"role": "assistant", "content": "No tools needed"}
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm:
|
||||||
|
mock_llm.return_value = mock_response
|
||||||
|
|
||||||
|
with patch.object(block, "_create_tool_node_signatures", return_value=[]):
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Test",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=0,
|
||||||
|
# Orphaned tool output - no pending calls but we have output
|
||||||
|
last_tool_output={"result": "orphaned data"},
|
||||||
|
conversation_history=[], # Empty - no pending calls
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = MagicMock()
|
||||||
|
|
||||||
|
outputs = {}
|
||||||
|
async for name, value in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph",
|
||||||
|
node_id="test-node",
|
||||||
|
graph_exec_id="test-exec",
|
||||||
|
node_exec_id="test-node-exec",
|
||||||
|
user_id="test-user",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
outputs[name] = value
|
||||||
|
|
||||||
|
# Check the conversation for the orphaned output handling
|
||||||
|
# The orphaned output is logged as error but may be added as user message
|
||||||
|
# This is the BUG: should not add orphaned outputs to conversation
|
||||||
|
|
||||||
|
def test_create_tool_response_anthropic_format(self):
|
||||||
|
"""Test that Anthropic format tool responses are created correctly."""
|
||||||
|
response = _create_tool_response(
|
||||||
|
"toolu_abc123",
|
||||||
|
{"result": "success"}
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response["role"] == "user"
|
||||||
|
assert response["type"] == "message"
|
||||||
|
assert isinstance(response["content"], list)
|
||||||
|
assert response["content"][0]["type"] == "tool_result"
|
||||||
|
assert response["content"][0]["tool_use_id"] == "toolu_abc123"
|
||||||
|
|
||||||
|
def test_create_tool_response_openai_format(self):
|
||||||
|
"""Test that OpenAI format tool responses are created correctly."""
|
||||||
|
response = _create_tool_response(
|
||||||
|
"call_abc123",
|
||||||
|
{"result": "success"}
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response["role"] == "tool"
|
||||||
|
assert response["tool_call_id"] == "call_abc123"
|
||||||
|
assert "content" in response
|
||||||
|
|
||||||
|
def test_tool_response_with_string_content(self):
|
||||||
|
"""Test tool response creation with string content."""
|
||||||
|
response = _create_tool_response(
|
||||||
|
"call_123",
|
||||||
|
"Simple string result"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response["content"] == "Simple string result"
|
||||||
|
|
||||||
|
def test_tool_response_with_complex_content(self):
|
||||||
|
"""Test tool response creation with complex JSON content."""
|
||||||
|
complex_data = {
|
||||||
|
"nested": {"key": "value"},
|
||||||
|
"list": [1, 2, 3],
|
||||||
|
"null": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
response = _create_tool_response("call_123", complex_data)
|
||||||
|
|
||||||
|
# Content should be JSON string
|
||||||
|
parsed = json.loads(response["content"])
|
||||||
|
assert parsed == complex_data
|
||||||
|
|
||||||
|
|
||||||
|
class TestCombineToolResponses:
|
||||||
|
"""Tests for combining multiple tool responses."""
|
||||||
|
|
||||||
|
def test_combine_single_response_unchanged(self):
|
||||||
|
"""Test that single response is returned unchanged."""
|
||||||
|
responses = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"type": "message",
|
||||||
|
"content": [{"type": "tool_result", "tool_use_id": "123"}]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
result = _combine_tool_responses(responses)
|
||||||
|
assert result == responses
|
||||||
|
|
||||||
|
def test_combine_multiple_anthropic_responses(self):
|
||||||
|
"""Test combining multiple Anthropic responses."""
|
||||||
|
responses = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"type": "message",
|
||||||
|
"content": [{"type": "tool_result", "tool_use_id": "123", "content": "a"}]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"type": "message",
|
||||||
|
"content": [{"type": "tool_result", "tool_use_id": "456", "content": "b"}]
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
result = _combine_tool_responses(responses)
|
||||||
|
|
||||||
|
# Should be combined into single message
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0]["role"] == "user"
|
||||||
|
assert len(result[0]["content"]) == 2
|
||||||
|
|
||||||
|
def test_combine_mixed_responses(self):
|
||||||
|
"""Test combining mixed Anthropic and OpenAI responses."""
|
||||||
|
responses = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"type": "message",
|
||||||
|
"content": [{"type": "tool_result", "tool_use_id": "123"}]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "tool",
|
||||||
|
"tool_call_id": "call_456",
|
||||||
|
"content": "openai result"
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
result = _combine_tool_responses(responses)
|
||||||
|
|
||||||
|
# Anthropic response combined, OpenAI kept separate
|
||||||
|
assert len(result) == 2
|
||||||
|
|
||||||
|
def test_combine_empty_list(self):
|
||||||
|
"""Test combining empty list."""
|
||||||
|
result = _combine_tool_responses([])
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
|
||||||
|
class TestConversationHistoryValidation:
|
||||||
|
"""Tests for conversation history validation."""
|
||||||
|
|
||||||
|
def test_pending_tool_calls_basic(self):
|
||||||
|
"""Test basic pending tool call counting."""
|
||||||
|
history = [
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [
|
||||||
|
{"type": "tool_use", "id": "call_1"},
|
||||||
|
{"type": "tool_use", "id": "call_2"},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
pending = get_pending_tool_calls(history)
|
||||||
|
|
||||||
|
assert len(pending) == 2
|
||||||
|
assert "call_1" in pending
|
||||||
|
assert "call_2" in pending
|
||||||
|
|
||||||
|
def test_pending_tool_calls_with_responses(self):
|
||||||
|
"""Test pending calls after some responses."""
|
||||||
|
history = [
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [
|
||||||
|
{"type": "tool_use", "id": "call_1"},
|
||||||
|
{"type": "tool_use", "id": "call_2"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "tool_result", "tool_use_id": "call_1"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
pending = get_pending_tool_calls(history)
|
||||||
|
|
||||||
|
assert len(pending) == 1
|
||||||
|
assert "call_2" in pending
|
||||||
|
assert "call_1" not in pending
|
||||||
|
|
||||||
|
def test_pending_tool_calls_all_responded(self):
|
||||||
|
"""Test when all tool calls have responses."""
|
||||||
|
history = [
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [{"type": "tool_use", "id": "call_1"}]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [{"type": "tool_result", "tool_use_id": "call_1"}]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
pending = get_pending_tool_calls(history)
|
||||||
|
|
||||||
|
assert len(pending) == 0
|
||||||
|
|
||||||
|
def test_pending_tool_calls_openai_format(self):
|
||||||
|
"""Test pending calls with OpenAI format."""
|
||||||
|
history = [
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{"id": "call_1"},
|
||||||
|
{"id": "call_2"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "tool",
|
||||||
|
"tool_call_id": "call_1",
|
||||||
|
"content": "result"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
pending = get_pending_tool_calls(history)
|
||||||
|
|
||||||
|
assert len(pending) == 1
|
||||||
|
assert "call_2" in pending
|
||||||
|
|
||||||
|
|
||||||
|
class TestConversationUpdateBehavior:
|
||||||
|
"""Tests for conversation update behavior."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_conversation_includes_assistant_response(self):
|
||||||
|
"""Test that assistant responses are added to conversation."""
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.response = "Final answer"
|
||||||
|
mock_response.tool_calls = []
|
||||||
|
mock_response.prompt_tokens = 50
|
||||||
|
mock_response.completion_tokens = 25
|
||||||
|
mock_response.reasoning = None
|
||||||
|
mock_response.raw_response = {"role": "assistant", "content": "Final answer"}
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm:
|
||||||
|
mock_llm.return_value = mock_response
|
||||||
|
|
||||||
|
with patch.object(block, "_create_tool_node_signatures", return_value=[]):
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Test",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = MagicMock()
|
||||||
|
|
||||||
|
outputs = {}
|
||||||
|
async for name, value in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph",
|
||||||
|
node_id="test-node",
|
||||||
|
graph_exec_id="test-exec",
|
||||||
|
node_exec_id="test-node-exec",
|
||||||
|
user_id="test-user",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
outputs[name] = value
|
||||||
|
|
||||||
|
# No conversations output when no tool calls (just finished)
|
||||||
|
assert "finished" in outputs
|
||||||
|
assert outputs["finished"] == "Final answer"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_conversation_with_tool_calls(self):
|
||||||
|
"""Test that tool calls are properly added to conversation."""
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_tool_call = MagicMock()
|
||||||
|
mock_tool_call.function.name = "test_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({"param": "value"})
|
||||||
|
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.response = None
|
||||||
|
mock_response.tool_calls = [mock_tool_call]
|
||||||
|
mock_response.prompt_tokens = 50
|
||||||
|
mock_response.completion_tokens = 25
|
||||||
|
mock_response.reasoning = "I'll use the test tool"
|
||||||
|
mock_response.raw_response = {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": None,
|
||||||
|
"tool_calls": [{"id": "call_1"}]
|
||||||
|
}
|
||||||
|
|
||||||
|
mock_tool_signatures = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "test_tool",
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
"_field_mapping": {"param": "param"},
|
||||||
|
"parameters": {
|
||||||
|
"properties": {"param": {"type": "string"}},
|
||||||
|
"required": ["param"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm:
|
||||||
|
mock_llm.return_value = mock_response
|
||||||
|
|
||||||
|
with patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures):
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Test",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = MagicMock()
|
||||||
|
|
||||||
|
outputs = {}
|
||||||
|
async for name, value in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph",
|
||||||
|
node_id="test-node",
|
||||||
|
graph_exec_id="test-exec",
|
||||||
|
node_exec_id="test-node-exec",
|
||||||
|
user_id="test-user",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
outputs[name] = value
|
||||||
|
|
||||||
|
# Should have conversations output
|
||||||
|
assert "conversations" in outputs
|
||||||
|
|
||||||
|
# Conversation should include the assistant message
|
||||||
|
conversations = outputs["conversations"]
|
||||||
|
has_assistant = any(
|
||||||
|
msg.get("role") == "assistant"
|
||||||
|
for msg in conversations
|
||||||
|
)
|
||||||
|
assert has_assistant
|
||||||
|
|
||||||
|
|
||||||
|
class TestConversationHistoryPreservation:
|
||||||
|
"""Tests for conversation history preservation across calls."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_existing_history_preserved(self):
|
||||||
|
"""Test that existing conversation history is preserved."""
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
existing_history = [
|
||||||
|
{"role": "user", "content": "Previous message 1"},
|
||||||
|
{"role": "assistant", "content": "Previous response 1"},
|
||||||
|
{"role": "user", "content": "Previous message 2"},
|
||||||
|
]
|
||||||
|
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.response = "New response"
|
||||||
|
mock_response.tool_calls = []
|
||||||
|
mock_response.prompt_tokens = 50
|
||||||
|
mock_response.completion_tokens = 25
|
||||||
|
mock_response.reasoning = None
|
||||||
|
mock_response.raw_response = {"role": "assistant", "content": "New response"}
|
||||||
|
|
||||||
|
captured_prompt = []
|
||||||
|
|
||||||
|
async def capture_llm_call(**kwargs):
|
||||||
|
captured_prompt.extend(kwargs.get("prompt", []))
|
||||||
|
return mock_response
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", side_effect=capture_llm_call):
|
||||||
|
with patch.object(block, "_create_tool_node_signatures", return_value=[]):
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="New message",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=0,
|
||||||
|
conversation_history=existing_history,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = MagicMock()
|
||||||
|
|
||||||
|
async for _ in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph",
|
||||||
|
node_id="test-node",
|
||||||
|
graph_exec_id="test-exec",
|
||||||
|
node_exec_id="test-node-exec",
|
||||||
|
user_id="test-user",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Existing history should be in the prompt
|
||||||
|
assert len(captured_prompt) >= len(existing_history)
|
||||||
|
|
||||||
|
|
||||||
|
class TestRawResponseConversion:
|
||||||
|
"""Tests for raw response to dict conversion."""
|
||||||
|
|
||||||
|
def test_string_response(self):
|
||||||
|
"""Test conversion of string response."""
|
||||||
|
result = _convert_raw_response_to_dict("Hello world")
|
||||||
|
|
||||||
|
assert result == {"role": "assistant", "content": "Hello world"}
|
||||||
|
|
||||||
|
def test_dict_response(self):
|
||||||
|
"""Test that dict response is passed through."""
|
||||||
|
original = {"role": "assistant", "content": "test", "extra": "data"}
|
||||||
|
result = _convert_raw_response_to_dict(original)
|
||||||
|
|
||||||
|
assert result == original
|
||||||
|
|
||||||
|
def test_object_response(self):
|
||||||
|
"""Test conversion of object response."""
|
||||||
|
mock_obj = MagicMock()
|
||||||
|
|
||||||
|
with patch("backend.blocks.smart_decision_maker.json.to_dict") as mock_to_dict:
|
||||||
|
mock_to_dict.return_value = {"role": "assistant", "content": "converted"}
|
||||||
|
result = _convert_raw_response_to_dict(mock_obj)
|
||||||
|
|
||||||
|
mock_to_dict.assert_called_once_with(mock_obj)
|
||||||
|
assert result["role"] == "assistant"
|
||||||
|
|
||||||
|
|
||||||
|
class TestConversationMessageStructure:
|
||||||
|
"""Tests for correct conversation message structure."""
|
||||||
|
|
||||||
|
def test_system_message_not_duplicated(self):
|
||||||
|
"""Test that system messages are not duplicated."""
|
||||||
|
from backend.util.prompt import MAIN_OBJECTIVE_PREFIX
|
||||||
|
|
||||||
|
# Existing system message in history
|
||||||
|
existing_history = [
|
||||||
|
{"role": "system", "content": f"{MAIN_OBJECTIVE_PREFIX}Existing system prompt"},
|
||||||
|
]
|
||||||
|
|
||||||
|
# The block should not add another system message
|
||||||
|
# This is verified by checking the prompt passed to LLM
|
||||||
|
|
||||||
|
def test_user_message_not_duplicated(self):
|
||||||
|
"""Test that user messages are not duplicated."""
|
||||||
|
from backend.util.prompt import MAIN_OBJECTIVE_PREFIX
|
||||||
|
|
||||||
|
# Existing user message with MAIN_OBJECTIVE_PREFIX
|
||||||
|
existing_history = [
|
||||||
|
{"role": "user", "content": f"{MAIN_OBJECTIVE_PREFIX}Existing user prompt"},
|
||||||
|
]
|
||||||
|
|
||||||
|
# The block should not add another user message with same prefix
|
||||||
|
# This is verified by checking the prompt passed to LLM
|
||||||
|
|
||||||
|
def test_tool_response_after_tool_call(self):
|
||||||
|
"""Test that tool responses come after tool calls."""
|
||||||
|
# Valid conversation structure
|
||||||
|
valid_history = [
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [{"type": "tool_use", "id": "call_1"}]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [{"type": "tool_result", "tool_use_id": "call_1"}]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# This should be valid - tool result follows tool use
|
||||||
|
pending = get_pending_tool_calls(valid_history)
|
||||||
|
assert len(pending) == 0
|
||||||
|
|
||||||
|
def test_orphaned_tool_response_detected(self):
|
||||||
|
"""Test detection of orphaned tool responses."""
|
||||||
|
# Invalid: tool response without matching tool call
|
||||||
|
invalid_history = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [{"type": "tool_result", "tool_use_id": "orphan_call"}]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
pending = get_pending_tool_calls(invalid_history)
|
||||||
|
|
||||||
|
# Orphan response creates negative count
|
||||||
|
# Should have count -1 for orphan_call
|
||||||
|
# But it's filtered out (count <= 0)
|
||||||
|
assert "orphan_call" not in pending
|
||||||
|
|
||||||
|
|
||||||
|
class TestValidationErrorInConversation:
|
||||||
|
"""Tests for validation error handling in conversation."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_validation_error_feedback_not_in_final_conversation(self):
|
||||||
|
"""
|
||||||
|
Test that validation error feedback is not in final conversation output.
|
||||||
|
|
||||||
|
When retrying due to validation errors, the error feedback should
|
||||||
|
only be used for the retry prompt, not persisted in final conversation.
|
||||||
|
"""
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
call_count = 0
|
||||||
|
|
||||||
|
async def mock_llm_call(**kwargs):
|
||||||
|
nonlocal call_count
|
||||||
|
call_count += 1
|
||||||
|
|
||||||
|
if call_count == 1:
|
||||||
|
# First call: invalid tool call
|
||||||
|
mock_tool_call = MagicMock()
|
||||||
|
mock_tool_call.function.name = "test_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({"wrong": "param"})
|
||||||
|
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.response = None
|
||||||
|
resp.tool_calls = [mock_tool_call]
|
||||||
|
resp.prompt_tokens = 50
|
||||||
|
resp.completion_tokens = 25
|
||||||
|
resp.reasoning = None
|
||||||
|
resp.raw_response = {"role": "assistant", "content": None}
|
||||||
|
return resp
|
||||||
|
else:
|
||||||
|
# Second call: finish
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.response = "Done"
|
||||||
|
resp.tool_calls = []
|
||||||
|
resp.prompt_tokens = 50
|
||||||
|
resp.completion_tokens = 25
|
||||||
|
resp.reasoning = None
|
||||||
|
resp.raw_response = {"role": "assistant", "content": "Done"}
|
||||||
|
return resp
|
||||||
|
|
||||||
|
mock_tool_signatures = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "test_tool",
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
"_field_mapping": {"correct": "correct"},
|
||||||
|
"parameters": {
|
||||||
|
"properties": {"correct": {"type": "string"}},
|
||||||
|
"required": ["correct"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call):
|
||||||
|
with patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures):
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Test",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=0,
|
||||||
|
retry=3,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = MagicMock()
|
||||||
|
|
||||||
|
outputs = {}
|
||||||
|
async for name, value in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph",
|
||||||
|
node_id="test-node",
|
||||||
|
graph_exec_id="test-exec",
|
||||||
|
node_exec_id="test-node-exec",
|
||||||
|
user_id="test-user",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
outputs[name] = value
|
||||||
|
|
||||||
|
# Should have finished successfully after retry
|
||||||
|
assert "finished" in outputs
|
||||||
|
|
||||||
|
# Note: In traditional mode (agent_mode_max_iterations=0),
|
||||||
|
# conversations are only output when there are tool calls
|
||||||
|
# After the retry succeeds with no tool calls, we just get "finished"
|
||||||
@@ -0,0 +1,671 @@
|
|||||||
|
"""
|
||||||
|
Tests for SmartDecisionMaker data integrity failure modes.
|
||||||
|
|
||||||
|
Covers failure modes:
|
||||||
|
6. Conversation Corruption in Error Paths
|
||||||
|
7. Field Name Collision Not Detected
|
||||||
|
8. No Type Validation in Dynamic Field Merging
|
||||||
|
9. Unhandled Field Mapping Keys
|
||||||
|
16. Silent Value Loss in Output Routing
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from typing import Any
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, Mock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
|
||||||
|
|
||||||
|
|
||||||
|
class TestFieldNameCollisionDetection:
|
||||||
|
"""
|
||||||
|
Tests for Failure Mode #7: Field Name Collision Not Detected
|
||||||
|
|
||||||
|
When multiple field names sanitize to the same value,
|
||||||
|
the last one silently overwrites previous mappings.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_different_names_same_sanitized_result(self):
|
||||||
|
"""Test that different names can produce the same sanitized result."""
|
||||||
|
cleanup = SmartDecisionMakerBlock.cleanup
|
||||||
|
|
||||||
|
# All these sanitize to "test_field"
|
||||||
|
variants = [
|
||||||
|
"test_field",
|
||||||
|
"Test Field",
|
||||||
|
"test field",
|
||||||
|
"TEST_FIELD",
|
||||||
|
"Test_Field",
|
||||||
|
"test-field", # Note: hyphen is preserved, this is different
|
||||||
|
]
|
||||||
|
|
||||||
|
sanitized = [cleanup(v) for v in variants]
|
||||||
|
|
||||||
|
# Count unique sanitized values
|
||||||
|
unique = set(sanitized)
|
||||||
|
# Most should collide (except hyphenated one)
|
||||||
|
assert len(unique) < len(variants), \
|
||||||
|
f"Expected collisions, got {unique}"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_collision_last_one_wins(self):
|
||||||
|
"""Test that in case of collision, the last field mapping wins."""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_node = Mock()
|
||||||
|
mock_node.id = "test-node"
|
||||||
|
mock_node.block = Mock()
|
||||||
|
mock_node.block.name = "TestBlock"
|
||||||
|
mock_node.block.description = "Test"
|
||||||
|
mock_node.block.input_schema = Mock()
|
||||||
|
mock_node.block.input_schema.jsonschema = Mock(
|
||||||
|
return_value={"properties": {}, "required": []}
|
||||||
|
)
|
||||||
|
mock_node.block.input_schema.get_field_schema = Mock(
|
||||||
|
return_value={"type": "string", "description": "test"}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Two fields that sanitize to the same name
|
||||||
|
mock_links = [
|
||||||
|
Mock(sink_name="Test Field", sink_id="test-node", source_id="source"),
|
||||||
|
Mock(sink_name="test field", sink_id="test-node", source_id="source"),
|
||||||
|
]
|
||||||
|
|
||||||
|
signature = await block._create_block_function_signature(mock_node, mock_links)
|
||||||
|
|
||||||
|
field_mapping = signature["function"]["_field_mapping"]
|
||||||
|
properties = signature["function"]["parameters"]["properties"]
|
||||||
|
|
||||||
|
# Only one property (collision)
|
||||||
|
assert len(properties) == 1
|
||||||
|
assert "test_field" in properties
|
||||||
|
|
||||||
|
# The mapping has only the last one
|
||||||
|
# This is the BUG: first field's mapping is lost
|
||||||
|
assert field_mapping["test_field"] in ["Test Field", "test field"]
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_collision_causes_data_loss(self):
|
||||||
|
"""
|
||||||
|
Test that field collision can cause actual data loss.
|
||||||
|
|
||||||
|
Scenario:
|
||||||
|
1. Two fields "Field A" and "field a" both map to "field_a"
|
||||||
|
2. LLM provides value for "field_a"
|
||||||
|
3. Only one original field gets the value
|
||||||
|
4. The other field's expected input is lost
|
||||||
|
"""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
# Simulate processing tool calls with collision
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_tool_call = Mock()
|
||||||
|
mock_tool_call.function.name = "test_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({
|
||||||
|
"field_a": "value_for_both" # LLM uses sanitized name
|
||||||
|
})
|
||||||
|
mock_response.tool_calls = [mock_tool_call]
|
||||||
|
|
||||||
|
# Tool definition with collision in field mapping
|
||||||
|
tool_functions = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "test_tool",
|
||||||
|
"parameters": {
|
||||||
|
"properties": {
|
||||||
|
"field_a": {"type": "string"},
|
||||||
|
},
|
||||||
|
"required": ["field_a"],
|
||||||
|
},
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
# BUG: Only one original name is stored
|
||||||
|
# "Field A" was overwritten by "field a"
|
||||||
|
"_field_mapping": {"field_a": "field a"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
processed = block._process_tool_calls(mock_response, tool_functions)
|
||||||
|
|
||||||
|
assert len(processed) == 1
|
||||||
|
input_data = processed[0].input_data
|
||||||
|
|
||||||
|
# Only "field a" gets the value
|
||||||
|
assert "field a" in input_data
|
||||||
|
assert input_data["field a"] == "value_for_both"
|
||||||
|
|
||||||
|
# "Field A" is completely lost!
|
||||||
|
assert "Field A" not in input_data
|
||||||
|
|
||||||
|
|
||||||
|
class TestUnhandledFieldMappingKeys:
|
||||||
|
"""
|
||||||
|
Tests for Failure Mode #9: Unhandled Field Mapping Keys
|
||||||
|
|
||||||
|
When field_mapping is missing a key, the code falls back to
|
||||||
|
the clean name, which may not be what the sink expects.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_missing_field_mapping_falls_back_to_clean_name(self):
|
||||||
|
"""Test that missing field mapping falls back to clean name."""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_tool_call = Mock()
|
||||||
|
mock_tool_call.function.name = "test_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({
|
||||||
|
"unmapped_field": "value"
|
||||||
|
})
|
||||||
|
mock_response.tool_calls = [mock_tool_call]
|
||||||
|
|
||||||
|
# Tool definition with incomplete field mapping
|
||||||
|
tool_functions = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "test_tool",
|
||||||
|
"parameters": {
|
||||||
|
"properties": {
|
||||||
|
"unmapped_field": {"type": "string"},
|
||||||
|
},
|
||||||
|
"required": [],
|
||||||
|
},
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
"_field_mapping": {}, # Empty! No mapping for unmapped_field
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
processed = block._process_tool_calls(mock_response, tool_functions)
|
||||||
|
|
||||||
|
assert len(processed) == 1
|
||||||
|
input_data = processed[0].input_data
|
||||||
|
|
||||||
|
# Falls back to clean name (which IS the key since it's already clean)
|
||||||
|
assert "unmapped_field" in input_data
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_partial_field_mapping(self):
|
||||||
|
"""Test behavior with partial field mapping."""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_tool_call = Mock()
|
||||||
|
mock_tool_call.function.name = "test_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({
|
||||||
|
"mapped_field": "value1",
|
||||||
|
"unmapped_field": "value2",
|
||||||
|
})
|
||||||
|
mock_response.tool_calls = [mock_tool_call]
|
||||||
|
|
||||||
|
tool_functions = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "test_tool",
|
||||||
|
"parameters": {
|
||||||
|
"properties": {
|
||||||
|
"mapped_field": {"type": "string"},
|
||||||
|
"unmapped_field": {"type": "string"},
|
||||||
|
},
|
||||||
|
"required": [],
|
||||||
|
},
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
# Only one field is mapped
|
||||||
|
"_field_mapping": {
|
||||||
|
"mapped_field": "Original Mapped Field",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
processed = block._process_tool_calls(mock_response, tool_functions)
|
||||||
|
|
||||||
|
assert len(processed) == 1
|
||||||
|
input_data = processed[0].input_data
|
||||||
|
|
||||||
|
# Mapped field uses original name
|
||||||
|
assert "Original Mapped Field" in input_data
|
||||||
|
# Unmapped field uses clean name (fallback)
|
||||||
|
assert "unmapped_field" in input_data
|
||||||
|
|
||||||
|
|
||||||
|
class TestSilentValueLossInRouting:
|
||||||
|
"""
|
||||||
|
Tests for Failure Mode #16: Silent Value Loss in Output Routing
|
||||||
|
|
||||||
|
When routing fails in parse_execution_output, it returns None
|
||||||
|
without any logging or indication of why it failed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_routing_mismatch_returns_none_silently(self):
|
||||||
|
"""Test that routing mismatch returns None without error."""
|
||||||
|
from backend.data.dynamic_fields import parse_execution_output
|
||||||
|
|
||||||
|
output_item = ("tools_^_node-123_~_sanitized_name", "important_value")
|
||||||
|
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id="node-123",
|
||||||
|
sink_pin_name="Original Name", # Doesn't match sanitized_name
|
||||||
|
)
|
||||||
|
|
||||||
|
# Silently returns None
|
||||||
|
assert result is None
|
||||||
|
# No way to distinguish "value is None" from "routing failed"
|
||||||
|
|
||||||
|
def test_wrong_node_id_returns_none(self):
|
||||||
|
"""Test that wrong node ID returns None."""
|
||||||
|
from backend.data.dynamic_fields import parse_execution_output
|
||||||
|
|
||||||
|
output_item = ("tools_^_node-123_~_field", "value")
|
||||||
|
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id="different-node", # Wrong node
|
||||||
|
sink_pin_name="field",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_wrong_selector_returns_none(self):
|
||||||
|
"""Test that wrong selector returns None."""
|
||||||
|
from backend.data.dynamic_fields import parse_execution_output
|
||||||
|
|
||||||
|
output_item = ("tools_^_node-123_~_field", "value")
|
||||||
|
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="different_selector", # Wrong selector
|
||||||
|
sink_node_id="node-123",
|
||||||
|
sink_pin_name="field",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_cannot_distinguish_none_value_from_routing_failure(self):
|
||||||
|
"""
|
||||||
|
Test that None as actual value is indistinguishable from routing failure.
|
||||||
|
"""
|
||||||
|
from backend.data.dynamic_fields import parse_execution_output
|
||||||
|
|
||||||
|
# Case 1: Actual None value
|
||||||
|
output_with_none = ("field_name", None)
|
||||||
|
result1 = parse_execution_output(
|
||||||
|
output_with_none,
|
||||||
|
link_output_selector="field_name",
|
||||||
|
sink_node_id=None,
|
||||||
|
sink_pin_name=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Case 2: Routing failure
|
||||||
|
output_mismatched = ("field_name", "value")
|
||||||
|
result2 = parse_execution_output(
|
||||||
|
output_mismatched,
|
||||||
|
link_output_selector="different_field",
|
||||||
|
sink_node_id=None,
|
||||||
|
sink_pin_name=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Both return None - cannot distinguish!
|
||||||
|
assert result1 is None
|
||||||
|
assert result2 is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestProcessToolCallsInputData:
|
||||||
|
"""Tests for _process_tool_calls input data generation."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_all_expected_args_included(self):
|
||||||
|
"""Test that all expected arguments are included in input_data."""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_tool_call = Mock()
|
||||||
|
mock_tool_call.function.name = "test_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({
|
||||||
|
"provided_field": "value",
|
||||||
|
# optional_field not provided
|
||||||
|
})
|
||||||
|
mock_response.tool_calls = [mock_tool_call]
|
||||||
|
|
||||||
|
tool_functions = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "test_tool",
|
||||||
|
"parameters": {
|
||||||
|
"properties": {
|
||||||
|
"provided_field": {"type": "string"},
|
||||||
|
"optional_field": {"type": "string"},
|
||||||
|
},
|
||||||
|
"required": ["provided_field"],
|
||||||
|
},
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
"_field_mapping": {
|
||||||
|
"provided_field": "Provided Field",
|
||||||
|
"optional_field": "Optional Field",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
processed = block._process_tool_calls(mock_response, tool_functions)
|
||||||
|
|
||||||
|
assert len(processed) == 1
|
||||||
|
input_data = processed[0].input_data
|
||||||
|
|
||||||
|
# Both fields should be in input_data
|
||||||
|
assert "Provided Field" in input_data
|
||||||
|
assert "Optional Field" in input_data
|
||||||
|
|
||||||
|
# Provided has value, optional is None
|
||||||
|
assert input_data["Provided Field"] == "value"
|
||||||
|
assert input_data["Optional Field"] is None
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_extra_args_from_llm_ignored(self):
|
||||||
|
"""Test that extra arguments from LLM not in schema are ignored."""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_tool_call = Mock()
|
||||||
|
mock_tool_call.function.name = "test_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({
|
||||||
|
"expected_field": "value",
|
||||||
|
"unexpected_field": "should_be_ignored",
|
||||||
|
})
|
||||||
|
mock_response.tool_calls = [mock_tool_call]
|
||||||
|
|
||||||
|
tool_functions = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "test_tool",
|
||||||
|
"parameters": {
|
||||||
|
"properties": {
|
||||||
|
"expected_field": {"type": "string"},
|
||||||
|
# unexpected_field not in schema
|
||||||
|
},
|
||||||
|
"required": [],
|
||||||
|
},
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
"_field_mapping": {"expected_field": "Expected Field"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
processed = block._process_tool_calls(mock_response, tool_functions)
|
||||||
|
|
||||||
|
assert len(processed) == 1
|
||||||
|
input_data = processed[0].input_data
|
||||||
|
|
||||||
|
# Only expected field should be in input_data
|
||||||
|
assert "Expected Field" in input_data
|
||||||
|
assert "unexpected_field" not in input_data
|
||||||
|
assert "Unexpected Field" not in input_data
|
||||||
|
|
||||||
|
|
||||||
|
class TestToolCallMatching:
|
||||||
|
"""Tests for tool call matching logic."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_tool_not_found_skipped(self):
|
||||||
|
"""Test that tool calls for unknown tools are skipped."""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_tool_call = Mock()
|
||||||
|
mock_tool_call.function.name = "unknown_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({})
|
||||||
|
mock_response.tool_calls = [mock_tool_call]
|
||||||
|
|
||||||
|
tool_functions = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "known_tool", # Different name
|
||||||
|
"parameters": {"properties": {}, "required": []},
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
processed = block._process_tool_calls(mock_response, tool_functions)
|
||||||
|
|
||||||
|
# Unknown tool is skipped (not processed)
|
||||||
|
assert len(processed) == 0
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_single_tool_fallback(self):
|
||||||
|
"""Test fallback when only one tool exists but name doesn't match."""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_tool_call = Mock()
|
||||||
|
mock_tool_call.function.name = "wrong_name"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({"field": "value"})
|
||||||
|
mock_response.tool_calls = [mock_tool_call]
|
||||||
|
|
||||||
|
# Only one tool defined
|
||||||
|
tool_functions = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "only_tool",
|
||||||
|
"parameters": {
|
||||||
|
"properties": {"field": {"type": "string"}},
|
||||||
|
"required": [],
|
||||||
|
},
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
"_field_mapping": {"field": "Field"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
processed = block._process_tool_calls(mock_response, tool_functions)
|
||||||
|
|
||||||
|
# Falls back to the only tool
|
||||||
|
assert len(processed) == 1
|
||||||
|
assert processed[0].input_data["Field"] == "value"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_multiple_tool_calls_processed(self):
|
||||||
|
"""Test that multiple tool calls are all processed."""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_tool_call_1 = Mock()
|
||||||
|
mock_tool_call_1.function.name = "tool_a"
|
||||||
|
mock_tool_call_1.function.arguments = json.dumps({"a": "1"})
|
||||||
|
|
||||||
|
mock_tool_call_2 = Mock()
|
||||||
|
mock_tool_call_2.function.name = "tool_b"
|
||||||
|
mock_tool_call_2.function.arguments = json.dumps({"b": "2"})
|
||||||
|
|
||||||
|
mock_response.tool_calls = [mock_tool_call_1, mock_tool_call_2]
|
||||||
|
|
||||||
|
tool_functions = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "tool_a",
|
||||||
|
"parameters": {
|
||||||
|
"properties": {"a": {"type": "string"}},
|
||||||
|
"required": [],
|
||||||
|
},
|
||||||
|
"_sink_node_id": "sink_a",
|
||||||
|
"_field_mapping": {"a": "A"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "tool_b",
|
||||||
|
"parameters": {
|
||||||
|
"properties": {"b": {"type": "string"}},
|
||||||
|
"required": [],
|
||||||
|
},
|
||||||
|
"_sink_node_id": "sink_b",
|
||||||
|
"_field_mapping": {"b": "B"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
processed = block._process_tool_calls(mock_response, tool_functions)
|
||||||
|
|
||||||
|
assert len(processed) == 2
|
||||||
|
assert processed[0].input_data["A"] == "1"
|
||||||
|
assert processed[1].input_data["B"] == "2"
|
||||||
|
|
||||||
|
|
||||||
|
class TestOutputEmitKeyGeneration:
|
||||||
|
"""Tests for output emit key generation consistency."""
|
||||||
|
|
||||||
|
def test_emit_key_uses_sanitized_field_name(self):
|
||||||
|
"""Test that emit keys use sanitized field names."""
|
||||||
|
cleanup = SmartDecisionMakerBlock.cleanup
|
||||||
|
|
||||||
|
original_field = "Max Keyword Difficulty"
|
||||||
|
sink_node_id = "node-123"
|
||||||
|
|
||||||
|
sanitized = cleanup(original_field)
|
||||||
|
emit_key = f"tools_^_{sink_node_id}_~_{sanitized}"
|
||||||
|
|
||||||
|
assert emit_key == "tools_^_node-123_~_max_keyword_difficulty"
|
||||||
|
|
||||||
|
def test_emit_key_format_consistent(self):
|
||||||
|
"""Test that emit key format is consistent."""
|
||||||
|
test_cases = [
|
||||||
|
("field", "node", "tools_^_node_~_field"),
|
||||||
|
("Field Name", "node-123", "tools_^_node-123_~_field_name"),
|
||||||
|
("CPC ($)", "abc", "tools_^_abc_~_cpc____"),
|
||||||
|
]
|
||||||
|
|
||||||
|
cleanup = SmartDecisionMakerBlock.cleanup
|
||||||
|
|
||||||
|
for original_field, node_id, expected in test_cases:
|
||||||
|
sanitized = cleanup(original_field)
|
||||||
|
emit_key = f"tools_^_{node_id}_~_{sanitized}"
|
||||||
|
assert emit_key == expected, \
|
||||||
|
f"Expected {expected}, got {emit_key}"
|
||||||
|
|
||||||
|
def test_emit_key_sanitization_idempotent(self):
|
||||||
|
"""Test that sanitizing an already sanitized name gives same result."""
|
||||||
|
cleanup = SmartDecisionMakerBlock.cleanup
|
||||||
|
|
||||||
|
original = "Test Field Name"
|
||||||
|
first_clean = cleanup(original)
|
||||||
|
second_clean = cleanup(first_clean)
|
||||||
|
|
||||||
|
assert first_clean == second_clean
|
||||||
|
|
||||||
|
|
||||||
|
class TestToolFunctionMetadata:
|
||||||
|
"""Tests for tool function metadata handling."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_sink_node_id_preserved(self):
|
||||||
|
"""Test that _sink_node_id is preserved in tool function."""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_node = Mock()
|
||||||
|
mock_node.id = "specific-node-id"
|
||||||
|
mock_node.block = Mock()
|
||||||
|
mock_node.block.name = "TestBlock"
|
||||||
|
mock_node.block.description = "Test"
|
||||||
|
mock_node.block.input_schema = Mock()
|
||||||
|
mock_node.block.input_schema.jsonschema = Mock(
|
||||||
|
return_value={"properties": {}, "required": []}
|
||||||
|
)
|
||||||
|
mock_node.block.input_schema.get_field_schema = Mock(
|
||||||
|
return_value={"type": "string", "description": "test"}
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_links = [
|
||||||
|
Mock(sink_name="field", sink_id="specific-node-id", source_id="source"),
|
||||||
|
]
|
||||||
|
|
||||||
|
signature = await block._create_block_function_signature(mock_node, mock_links)
|
||||||
|
|
||||||
|
assert signature["function"]["_sink_node_id"] == "specific-node-id"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_field_mapping_preserved(self):
|
||||||
|
"""Test that _field_mapping is preserved in tool function."""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_node = Mock()
|
||||||
|
mock_node.id = "test-node"
|
||||||
|
mock_node.block = Mock()
|
||||||
|
mock_node.block.name = "TestBlock"
|
||||||
|
mock_node.block.description = "Test"
|
||||||
|
mock_node.block.input_schema = Mock()
|
||||||
|
mock_node.block.input_schema.jsonschema = Mock(
|
||||||
|
return_value={"properties": {}, "required": []}
|
||||||
|
)
|
||||||
|
mock_node.block.input_schema.get_field_schema = Mock(
|
||||||
|
return_value={"type": "string", "description": "test"}
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_links = [
|
||||||
|
Mock(sink_name="Original Field Name", sink_id="test-node", source_id="source"),
|
||||||
|
]
|
||||||
|
|
||||||
|
signature = await block._create_block_function_signature(mock_node, mock_links)
|
||||||
|
|
||||||
|
field_mapping = signature["function"]["_field_mapping"]
|
||||||
|
assert "original_field_name" in field_mapping
|
||||||
|
assert field_mapping["original_field_name"] == "Original Field Name"
|
||||||
|
|
||||||
|
|
||||||
|
class TestRequiredFieldsHandling:
|
||||||
|
"""Tests for required fields handling."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_required_fields_use_sanitized_names(self):
|
||||||
|
"""Test that required fields array uses sanitized names."""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_node = Mock()
|
||||||
|
mock_node.id = "test-node"
|
||||||
|
mock_node.block = Mock()
|
||||||
|
mock_node.block.name = "TestBlock"
|
||||||
|
mock_node.block.description = "Test"
|
||||||
|
mock_node.block.input_schema = Mock()
|
||||||
|
mock_node.block.input_schema.jsonschema = Mock(
|
||||||
|
return_value={
|
||||||
|
"properties": {},
|
||||||
|
"required": ["Required Field", "Another Required"],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
mock_node.block.input_schema.get_field_schema = Mock(
|
||||||
|
return_value={"type": "string", "description": "test"}
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_links = [
|
||||||
|
Mock(sink_name="Required Field", sink_id="test-node", source_id="source"),
|
||||||
|
Mock(sink_name="Another Required", sink_id="test-node", source_id="source"),
|
||||||
|
Mock(sink_name="Optional Field", sink_id="test-node", source_id="source"),
|
||||||
|
]
|
||||||
|
|
||||||
|
signature = await block._create_block_function_signature(mock_node, mock_links)
|
||||||
|
|
||||||
|
required = signature["function"]["parameters"]["required"]
|
||||||
|
|
||||||
|
# Should use sanitized names
|
||||||
|
assert "required_field" in required
|
||||||
|
assert "another_required" in required
|
||||||
|
|
||||||
|
# Original names should NOT be in required
|
||||||
|
assert "Required Field" not in required
|
||||||
|
assert "Another Required" not in required
|
||||||
|
|
||||||
|
# Optional field should not be required
|
||||||
|
assert "optional_field" not in required
|
||||||
|
assert "Optional Field" not in required
|
||||||
@@ -0,0 +1,871 @@
|
|||||||
|
"""
|
||||||
|
Tests for SmartDecisionMaker error handling failure modes.
|
||||||
|
|
||||||
|
Covers failure modes:
|
||||||
|
3. JSON Deserialization Without Exception Handling
|
||||||
|
4. Database Transaction Inconsistency
|
||||||
|
5. Missing Null Checks After Database Calls
|
||||||
|
15. Error Message Context Loss
|
||||||
|
17. No Validation of Dynamic Field Paths
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from typing import Any
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, Mock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from backend.blocks.smart_decision_maker import (
|
||||||
|
SmartDecisionMakerBlock,
|
||||||
|
_convert_raw_response_to_dict,
|
||||||
|
_create_tool_response,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestJSONDeserializationErrors:
|
||||||
|
"""
|
||||||
|
Tests for Failure Mode #3: JSON Deserialization Without Exception Handling
|
||||||
|
|
||||||
|
When LLM returns malformed JSON in tool call arguments, the json.loads()
|
||||||
|
call fails without proper error handling.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_malformed_json_single_quotes(self):
|
||||||
|
"""
|
||||||
|
Test that single quotes in JSON cause parsing failure.
|
||||||
|
|
||||||
|
LLMs sometimes return {'key': 'value'} instead of {"key": "value"}
|
||||||
|
"""
|
||||||
|
malformed = "{'key': 'value'}"
|
||||||
|
|
||||||
|
with pytest.raises(json.JSONDecodeError):
|
||||||
|
json.loads(malformed)
|
||||||
|
|
||||||
|
def test_malformed_json_trailing_comma(self):
|
||||||
|
"""
|
||||||
|
Test that trailing commas cause parsing failure.
|
||||||
|
"""
|
||||||
|
malformed = '{"key": "value",}'
|
||||||
|
|
||||||
|
with pytest.raises(json.JSONDecodeError):
|
||||||
|
json.loads(malformed)
|
||||||
|
|
||||||
|
def test_malformed_json_unquoted_keys(self):
|
||||||
|
"""
|
||||||
|
Test that unquoted keys cause parsing failure.
|
||||||
|
"""
|
||||||
|
malformed = '{key: "value"}'
|
||||||
|
|
||||||
|
with pytest.raises(json.JSONDecodeError):
|
||||||
|
json.loads(malformed)
|
||||||
|
|
||||||
|
def test_malformed_json_python_none(self):
|
||||||
|
"""
|
||||||
|
Test that Python None instead of null causes failure.
|
||||||
|
"""
|
||||||
|
malformed = '{"key": None}'
|
||||||
|
|
||||||
|
with pytest.raises(json.JSONDecodeError):
|
||||||
|
json.loads(malformed)
|
||||||
|
|
||||||
|
def test_malformed_json_python_true_false(self):
|
||||||
|
"""
|
||||||
|
Test that Python True/False instead of true/false causes failure.
|
||||||
|
"""
|
||||||
|
malformed_true = '{"key": True}'
|
||||||
|
malformed_false = '{"key": False}'
|
||||||
|
|
||||||
|
with pytest.raises(json.JSONDecodeError):
|
||||||
|
json.loads(malformed_true)
|
||||||
|
|
||||||
|
with pytest.raises(json.JSONDecodeError):
|
||||||
|
json.loads(malformed_false)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_llm_returns_malformed_json_crashes_block(self):
|
||||||
|
"""
|
||||||
|
Test that malformed JSON from LLM causes block to crash.
|
||||||
|
|
||||||
|
BUG: The json.loads() at line 625, 706, 1124 can throw JSONDecodeError
|
||||||
|
which is not caught, causing the entire block to fail.
|
||||||
|
"""
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
# Create response with malformed JSON
|
||||||
|
mock_tool_call = MagicMock()
|
||||||
|
mock_tool_call.function.name = "test_tool"
|
||||||
|
mock_tool_call.function.arguments = "{'malformed': 'json'}" # Single quotes!
|
||||||
|
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.response = None
|
||||||
|
mock_response.tool_calls = [mock_tool_call]
|
||||||
|
mock_response.prompt_tokens = 50
|
||||||
|
mock_response.completion_tokens = 25
|
||||||
|
mock_response.reasoning = None
|
||||||
|
mock_response.raw_response = {"role": "assistant", "content": None}
|
||||||
|
|
||||||
|
mock_tool_signatures = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "test_tool",
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
"_field_mapping": {},
|
||||||
|
"parameters": {"properties": {"malformed": {"type": "string"}}, "required": []},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm:
|
||||||
|
mock_llm.return_value = mock_response
|
||||||
|
|
||||||
|
with patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures):
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Test",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = MagicMock()
|
||||||
|
|
||||||
|
# BUG: This should raise JSONDecodeError
|
||||||
|
with pytest.raises(json.JSONDecodeError):
|
||||||
|
async for _ in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph",
|
||||||
|
node_id="test-node",
|
||||||
|
graph_exec_id="test-exec",
|
||||||
|
node_exec_id="test-node-exec",
|
||||||
|
user_id="test-user",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TestDatabaseTransactionInconsistency:
|
||||||
|
"""
|
||||||
|
Tests for Failure Mode #4: Database Transaction Inconsistency
|
||||||
|
|
||||||
|
When multiple database operations are performed in sequence,
|
||||||
|
a failure partway through leaves the database in an inconsistent state.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_partial_input_insertion_on_failure(self):
|
||||||
|
"""
|
||||||
|
Test that partial failures during multi-input insertion
|
||||||
|
leave database in inconsistent state.
|
||||||
|
"""
|
||||||
|
import threading
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
# Track which inputs were inserted
|
||||||
|
inserted_inputs = []
|
||||||
|
call_count = 0
|
||||||
|
|
||||||
|
async def failing_upsert(node_id, graph_exec_id, input_name, input_data):
|
||||||
|
nonlocal call_count
|
||||||
|
call_count += 1
|
||||||
|
|
||||||
|
# Fail on the third input
|
||||||
|
if call_count == 3:
|
||||||
|
raise Exception("Database connection lost!")
|
||||||
|
|
||||||
|
inserted_inputs.append(input_name)
|
||||||
|
|
||||||
|
mock_result = MagicMock()
|
||||||
|
mock_result.node_exec_id = "exec-id"
|
||||||
|
return mock_result, {input_name: input_data}
|
||||||
|
|
||||||
|
mock_tool_call = MagicMock()
|
||||||
|
mock_tool_call.id = "call_1"
|
||||||
|
mock_tool_call.function.name = "multi_input_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({
|
||||||
|
"input1": "value1",
|
||||||
|
"input2": "value2",
|
||||||
|
"input3": "value3", # This one will fail
|
||||||
|
"input4": "value4",
|
||||||
|
"input5": "value5",
|
||||||
|
})
|
||||||
|
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.response = None
|
||||||
|
mock_response.tool_calls = [mock_tool_call]
|
||||||
|
mock_response.prompt_tokens = 50
|
||||||
|
mock_response.completion_tokens = 25
|
||||||
|
mock_response.reasoning = None
|
||||||
|
mock_response.raw_response = {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [{"type": "tool_use", "id": "call_1"}]
|
||||||
|
}
|
||||||
|
|
||||||
|
mock_tool_signatures = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "multi_input_tool",
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
"_field_mapping": {
|
||||||
|
"input1": "input1",
|
||||||
|
"input2": "input2",
|
||||||
|
"input3": "input3",
|
||||||
|
"input4": "input4",
|
||||||
|
"input5": "input5",
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"properties": {
|
||||||
|
"input1": {"type": "string"},
|
||||||
|
"input2": {"type": "string"},
|
||||||
|
"input3": {"type": "string"},
|
||||||
|
"input4": {"type": "string"},
|
||||||
|
"input5": {"type": "string"},
|
||||||
|
},
|
||||||
|
"required": ["input1", "input2", "input3", "input4", "input5"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
mock_db_client = AsyncMock()
|
||||||
|
mock_node = MagicMock()
|
||||||
|
mock_node.block_id = "test-block"
|
||||||
|
mock_db_client.get_node.return_value = mock_node
|
||||||
|
mock_db_client.upsert_execution_input.side_effect = failing_upsert
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm, \
|
||||||
|
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||||
|
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||||
|
|
||||||
|
mock_llm.return_value = mock_response
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = AsyncMock()
|
||||||
|
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||||
|
mock_execution_processor.execution_stats = MagicMock()
|
||||||
|
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||||
|
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Test",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
# The block should fail, but some inputs were already inserted
|
||||||
|
outputs = {}
|
||||||
|
try:
|
||||||
|
async for name, value in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph",
|
||||||
|
node_id="test-node",
|
||||||
|
graph_exec_id="test-exec",
|
||||||
|
node_exec_id="test-node-exec",
|
||||||
|
user_id="test-user",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
outputs[name] = value
|
||||||
|
except Exception:
|
||||||
|
pass # Expected
|
||||||
|
|
||||||
|
# BUG: Some inputs were inserted before failure
|
||||||
|
# Database is now in inconsistent state
|
||||||
|
assert len(inserted_inputs) == 2, \
|
||||||
|
f"Expected 2 inserted before failure, got {inserted_inputs}"
|
||||||
|
assert "input1" in inserted_inputs
|
||||||
|
assert "input2" in inserted_inputs
|
||||||
|
# input3, input4, input5 were never inserted
|
||||||
|
|
||||||
|
|
||||||
|
class TestMissingNullChecks:
|
||||||
|
"""
|
||||||
|
Tests for Failure Mode #5: Missing Null Checks After Database Calls
|
||||||
|
"""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_get_node_returns_none(self):
|
||||||
|
"""
|
||||||
|
Test handling when get_node returns None.
|
||||||
|
"""
|
||||||
|
import threading
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_tool_call = MagicMock()
|
||||||
|
mock_tool_call.id = "call_1"
|
||||||
|
mock_tool_call.function.name = "test_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({"param": "value"})
|
||||||
|
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.response = None
|
||||||
|
mock_response.tool_calls = [mock_tool_call]
|
||||||
|
mock_response.prompt_tokens = 50
|
||||||
|
mock_response.completion_tokens = 25
|
||||||
|
mock_response.reasoning = None
|
||||||
|
mock_response.raw_response = {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [{"type": "tool_use", "id": "call_1"}]
|
||||||
|
}
|
||||||
|
|
||||||
|
mock_tool_signatures = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "test_tool",
|
||||||
|
"_sink_node_id": "nonexistent-node",
|
||||||
|
"_field_mapping": {"param": "param"},
|
||||||
|
"parameters": {
|
||||||
|
"properties": {"param": {"type": "string"}},
|
||||||
|
"required": ["param"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
mock_db_client = AsyncMock()
|
||||||
|
mock_db_client.get_node.return_value = None # Node doesn't exist!
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm, \
|
||||||
|
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||||
|
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||||
|
|
||||||
|
mock_llm.return_value = mock_response
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = AsyncMock()
|
||||||
|
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||||
|
mock_execution_processor.execution_stats = MagicMock()
|
||||||
|
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||||
|
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Test",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should raise ValueError for missing node
|
||||||
|
with pytest.raises(ValueError, match="not found"):
|
||||||
|
async for _ in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph",
|
||||||
|
node_id="test-node",
|
||||||
|
graph_exec_id="test-exec",
|
||||||
|
node_exec_id="test-node-exec",
|
||||||
|
user_id="test-user",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_empty_execution_outputs(self):
|
||||||
|
"""
|
||||||
|
Test handling when get_execution_outputs_by_node_exec_id returns empty.
|
||||||
|
"""
|
||||||
|
import threading
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
call_count = 0
|
||||||
|
|
||||||
|
async def mock_llm_call(**kwargs):
|
||||||
|
nonlocal call_count
|
||||||
|
call_count += 1
|
||||||
|
|
||||||
|
if call_count > 1:
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.response = "Done"
|
||||||
|
resp.tool_calls = []
|
||||||
|
resp.prompt_tokens = 10
|
||||||
|
resp.completion_tokens = 5
|
||||||
|
resp.reasoning = None
|
||||||
|
resp.raw_response = {"role": "assistant", "content": "Done"}
|
||||||
|
return resp
|
||||||
|
|
||||||
|
mock_tool_call = MagicMock()
|
||||||
|
mock_tool_call.id = "call_1"
|
||||||
|
mock_tool_call.function.name = "test_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({})
|
||||||
|
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.response = None
|
||||||
|
resp.tool_calls = [mock_tool_call]
|
||||||
|
resp.prompt_tokens = 50
|
||||||
|
resp.completion_tokens = 25
|
||||||
|
resp.reasoning = None
|
||||||
|
resp.raw_response = {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [{"type": "tool_use", "id": "call_1"}]
|
||||||
|
}
|
||||||
|
return resp
|
||||||
|
|
||||||
|
mock_tool_signatures = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "test_tool",
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
"_field_mapping": {},
|
||||||
|
"parameters": {"properties": {}, "required": []},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
mock_db_client = AsyncMock()
|
||||||
|
mock_node = MagicMock()
|
||||||
|
mock_node.block_id = "test-block"
|
||||||
|
mock_db_client.get_node.return_value = mock_node
|
||||||
|
mock_exec_result = MagicMock()
|
||||||
|
mock_exec_result.node_exec_id = "exec-id"
|
||||||
|
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {})
|
||||||
|
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {} # Empty!
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||||
|
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||||
|
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = AsyncMock()
|
||||||
|
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||||
|
mock_execution_processor.execution_stats = MagicMock()
|
||||||
|
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||||
|
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
|
||||||
|
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Test",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=2,
|
||||||
|
)
|
||||||
|
|
||||||
|
outputs = {}
|
||||||
|
async for name, value in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph",
|
||||||
|
node_id="test-node",
|
||||||
|
graph_exec_id="test-exec",
|
||||||
|
node_exec_id="test-node-exec",
|
||||||
|
user_id="test-user",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
outputs[name] = value
|
||||||
|
|
||||||
|
# Empty outputs should be handled gracefully
|
||||||
|
# (uses "Tool executed successfully" as fallback)
|
||||||
|
assert "finished" in outputs or "conversations" in outputs
|
||||||
|
|
||||||
|
|
||||||
|
class TestErrorMessageContextLoss:
|
||||||
|
"""
|
||||||
|
Tests for Failure Mode #15: Error Message Context Loss
|
||||||
|
|
||||||
|
When exceptions are caught and converted to strings, important
|
||||||
|
debugging information is lost.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_exception_to_string_loses_traceback(self):
|
||||||
|
"""
|
||||||
|
Test that converting exception to string loses traceback.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
def inner():
|
||||||
|
raise ValueError("Inner error")
|
||||||
|
|
||||||
|
def outer():
|
||||||
|
inner()
|
||||||
|
|
||||||
|
outer()
|
||||||
|
except Exception as e:
|
||||||
|
error_string = str(e)
|
||||||
|
error_repr = repr(e)
|
||||||
|
|
||||||
|
# String representation loses call stack
|
||||||
|
assert "inner" not in error_string
|
||||||
|
assert "outer" not in error_string
|
||||||
|
|
||||||
|
# Even repr doesn't have full traceback
|
||||||
|
assert "Traceback" not in error_repr
|
||||||
|
|
||||||
|
def test_tool_response_loses_exception_type(self):
|
||||||
|
"""
|
||||||
|
Test that _create_tool_response loses exception type information.
|
||||||
|
"""
|
||||||
|
original_error = ConnectionError("Database unreachable")
|
||||||
|
tool_response = _create_tool_response(
|
||||||
|
"call_123",
|
||||||
|
f"Tool execution failed: {str(original_error)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
content = tool_response.get("content", "")
|
||||||
|
|
||||||
|
# Original exception type is lost
|
||||||
|
assert "ConnectionError" not in content
|
||||||
|
# Only the message remains
|
||||||
|
assert "Database unreachable" in content
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_agent_mode_error_response_lacks_context(self):
|
||||||
|
"""
|
||||||
|
Test that agent mode error responses lack debugging context.
|
||||||
|
"""
|
||||||
|
import threading
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_tool_call = MagicMock()
|
||||||
|
mock_tool_call.id = "call_1"
|
||||||
|
mock_tool_call.function.name = "test_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({})
|
||||||
|
|
||||||
|
mock_response_1 = MagicMock()
|
||||||
|
mock_response_1.response = None
|
||||||
|
mock_response_1.tool_calls = [mock_tool_call]
|
||||||
|
mock_response_1.prompt_tokens = 50
|
||||||
|
mock_response_1.completion_tokens = 25
|
||||||
|
mock_response_1.reasoning = None
|
||||||
|
mock_response_1.raw_response = {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [{"type": "tool_use", "id": "call_1"}]
|
||||||
|
}
|
||||||
|
|
||||||
|
mock_response_2 = MagicMock()
|
||||||
|
mock_response_2.response = "Handled the error"
|
||||||
|
mock_response_2.tool_calls = []
|
||||||
|
mock_response_2.prompt_tokens = 30
|
||||||
|
mock_response_2.completion_tokens = 15
|
||||||
|
mock_response_2.reasoning = None
|
||||||
|
mock_response_2.raw_response = {"role": "assistant", "content": "Handled"}
|
||||||
|
|
||||||
|
call_count = 0
|
||||||
|
|
||||||
|
async def mock_llm_call(**kwargs):
|
||||||
|
nonlocal call_count
|
||||||
|
call_count += 1
|
||||||
|
if call_count == 1:
|
||||||
|
return mock_response_1
|
||||||
|
return mock_response_2
|
||||||
|
|
||||||
|
mock_tool_signatures = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "test_tool",
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
"_field_mapping": {},
|
||||||
|
"parameters": {"properties": {}, "required": []},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# Create a complex error with nested cause
|
||||||
|
class CustomDatabaseError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def create_complex_error():
|
||||||
|
try:
|
||||||
|
raise ConnectionError("Network timeout after 30s")
|
||||||
|
except ConnectionError as e:
|
||||||
|
raise CustomDatabaseError("Failed to connect to database") from e
|
||||||
|
|
||||||
|
mock_db_client = AsyncMock()
|
||||||
|
mock_node = MagicMock()
|
||||||
|
mock_node.block_id = "test-block"
|
||||||
|
mock_db_client.get_node.return_value = mock_node
|
||||||
|
|
||||||
|
# Make upsert raise the complex error
|
||||||
|
try:
|
||||||
|
create_complex_error()
|
||||||
|
except CustomDatabaseError as e:
|
||||||
|
mock_db_client.upsert_execution_input.side_effect = e
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||||
|
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||||
|
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = AsyncMock()
|
||||||
|
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||||
|
mock_execution_processor.execution_stats = MagicMock()
|
||||||
|
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||||
|
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Test",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=2,
|
||||||
|
)
|
||||||
|
|
||||||
|
outputs = {}
|
||||||
|
async for name, value in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph",
|
||||||
|
node_id="test-node",
|
||||||
|
graph_exec_id="test-exec",
|
||||||
|
node_exec_id="test-node-exec",
|
||||||
|
user_id="test-user",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
outputs[name] = value
|
||||||
|
|
||||||
|
# Check conversation for error details
|
||||||
|
conversations = outputs.get("conversations", [])
|
||||||
|
error_found = False
|
||||||
|
for msg in conversations:
|
||||||
|
content = msg.get("content", "")
|
||||||
|
if isinstance(content, list):
|
||||||
|
for item in content:
|
||||||
|
if item.get("type") == "tool_result":
|
||||||
|
result_content = item.get("content", "")
|
||||||
|
if "Error" in result_content or "failed" in result_content.lower():
|
||||||
|
error_found = True
|
||||||
|
# BUG: The error content lacks:
|
||||||
|
# - Exception type (CustomDatabaseError)
|
||||||
|
# - Chained cause (ConnectionError)
|
||||||
|
# - Stack trace
|
||||||
|
assert "CustomDatabaseError" not in result_content
|
||||||
|
assert "ConnectionError" not in result_content
|
||||||
|
|
||||||
|
# Note: error_found may be False if the error prevented tool response creation
|
||||||
|
|
||||||
|
|
||||||
|
class TestRawResponseConversion:
|
||||||
|
"""Tests for _convert_raw_response_to_dict edge cases."""
|
||||||
|
|
||||||
|
def test_string_response_converted(self):
|
||||||
|
"""Test that string responses are properly wrapped."""
|
||||||
|
result = _convert_raw_response_to_dict("Hello, world!")
|
||||||
|
assert result == {"role": "assistant", "content": "Hello, world!"}
|
||||||
|
|
||||||
|
def test_dict_response_unchanged(self):
|
||||||
|
"""Test that dict responses are passed through."""
|
||||||
|
original = {"role": "assistant", "content": "test", "extra": "field"}
|
||||||
|
result = _convert_raw_response_to_dict(original)
|
||||||
|
assert result == original
|
||||||
|
|
||||||
|
def test_object_response_converted(self):
|
||||||
|
"""Test that objects are converted using json.to_dict."""
|
||||||
|
mock_obj = MagicMock()
|
||||||
|
|
||||||
|
with patch("backend.blocks.smart_decision_maker.json.to_dict") as mock_to_dict:
|
||||||
|
mock_to_dict.return_value = {"converted": True}
|
||||||
|
result = _convert_raw_response_to_dict(mock_obj)
|
||||||
|
mock_to_dict.assert_called_once_with(mock_obj)
|
||||||
|
assert result == {"converted": True}
|
||||||
|
|
||||||
|
def test_none_response(self):
|
||||||
|
"""Test handling of None response."""
|
||||||
|
with patch("backend.blocks.smart_decision_maker.json.to_dict") as mock_to_dict:
|
||||||
|
mock_to_dict.return_value = None
|
||||||
|
result = _convert_raw_response_to_dict(None)
|
||||||
|
# None is not a string or dict, so it goes through to_dict
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestValidationRetryMechanism:
|
||||||
|
"""Tests for the validation and retry mechanism."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_validation_error_triggers_retry(self):
|
||||||
|
"""
|
||||||
|
Test that validation errors trigger retry with feedback.
|
||||||
|
"""
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
call_count = 0
|
||||||
|
|
||||||
|
async def mock_llm_call(**kwargs):
|
||||||
|
nonlocal call_count
|
||||||
|
call_count += 1
|
||||||
|
|
||||||
|
prompt = kwargs.get("prompt", [])
|
||||||
|
|
||||||
|
if call_count == 1:
|
||||||
|
# First call: return tool call with wrong parameter
|
||||||
|
mock_tool_call = MagicMock()
|
||||||
|
mock_tool_call.function.name = "test_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({"wrong_param": "value"})
|
||||||
|
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.response = None
|
||||||
|
resp.tool_calls = [mock_tool_call]
|
||||||
|
resp.prompt_tokens = 50
|
||||||
|
resp.completion_tokens = 25
|
||||||
|
resp.reasoning = None
|
||||||
|
resp.raw_response = {"role": "assistant", "content": None}
|
||||||
|
return resp
|
||||||
|
else:
|
||||||
|
# Second call: check that error feedback was added
|
||||||
|
has_error_feedback = any(
|
||||||
|
"parameter errors" in str(msg.get("content", "")).lower()
|
||||||
|
for msg in prompt
|
||||||
|
)
|
||||||
|
|
||||||
|
# Return correct tool call
|
||||||
|
mock_tool_call = MagicMock()
|
||||||
|
mock_tool_call.function.name = "test_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({"correct_param": "value"})
|
||||||
|
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.response = None
|
||||||
|
resp.tool_calls = [mock_tool_call]
|
||||||
|
resp.prompt_tokens = 50
|
||||||
|
resp.completion_tokens = 25
|
||||||
|
resp.reasoning = None
|
||||||
|
resp.raw_response = {"role": "assistant", "content": None}
|
||||||
|
return resp
|
||||||
|
|
||||||
|
mock_tool_signatures = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "test_tool",
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
"_field_mapping": {"correct_param": "correct_param"},
|
||||||
|
"parameters": {
|
||||||
|
"properties": {"correct_param": {"type": "string"}},
|
||||||
|
"required": ["correct_param"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||||
|
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures):
|
||||||
|
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Test",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=0, # Traditional mode
|
||||||
|
retry=3,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = MagicMock()
|
||||||
|
|
||||||
|
outputs = {}
|
||||||
|
async for name, value in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph",
|
||||||
|
node_id="test-node",
|
||||||
|
graph_exec_id="test-exec",
|
||||||
|
node_exec_id="test-node-exec",
|
||||||
|
user_id="test-user",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
outputs[name] = value
|
||||||
|
|
||||||
|
# Should have made multiple calls due to retry
|
||||||
|
assert call_count >= 2
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_max_retries_exceeded(self):
|
||||||
|
"""
|
||||||
|
Test behavior when max retries are exceeded.
|
||||||
|
"""
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
async def mock_llm_call(**kwargs):
|
||||||
|
# Always return invalid tool call
|
||||||
|
mock_tool_call = MagicMock()
|
||||||
|
mock_tool_call.function.name = "test_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({"wrong": "param"})
|
||||||
|
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.response = None
|
||||||
|
resp.tool_calls = [mock_tool_call]
|
||||||
|
resp.prompt_tokens = 50
|
||||||
|
resp.completion_tokens = 25
|
||||||
|
resp.reasoning = None
|
||||||
|
resp.raw_response = {"role": "assistant", "content": None}
|
||||||
|
return resp
|
||||||
|
|
||||||
|
mock_tool_signatures = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "test_tool",
|
||||||
|
"_sink_node_id": "sink",
|
||||||
|
"_field_mapping": {"correct": "correct"},
|
||||||
|
"parameters": {
|
||||||
|
"properties": {"correct": {"type": "string"}},
|
||||||
|
"required": ["correct"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||||
|
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures):
|
||||||
|
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Test",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=0,
|
||||||
|
retry=2, # Only 2 retries
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = MagicMock()
|
||||||
|
|
||||||
|
# Should raise ValueError after max retries
|
||||||
|
with pytest.raises(ValueError, match="parameter errors"):
|
||||||
|
async for _ in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph",
|
||||||
|
node_id="test-node",
|
||||||
|
graph_exec_id="test-exec",
|
||||||
|
node_exec_id="test-node-exec",
|
||||||
|
user_id="test-user",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
pass
|
||||||
@@ -0,0 +1,819 @@
|
|||||||
|
"""
|
||||||
|
Comprehensive tests for SmartDecisionMakerBlock pin name sanitization.
|
||||||
|
|
||||||
|
This test file addresses the critical bug where field names with spaces/special characters
|
||||||
|
(e.g., "Max Keyword Difficulty") are not consistently sanitized between frontend and backend,
|
||||||
|
causing tool calls to "go into the void".
|
||||||
|
|
||||||
|
The core issue:
|
||||||
|
- Frontend connects link with original name: tools_^_{node_id}_~_Max Keyword Difficulty
|
||||||
|
- Backend emits with sanitized name: tools_^_{node_id}_~_max_keyword_difficulty
|
||||||
|
- parse_execution_output compares sink_pin_name directly without sanitization
|
||||||
|
- Result: mismatch causes tool calls to fail silently
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, Mock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
|
||||||
|
from backend.data.dynamic_fields import (
|
||||||
|
parse_execution_output,
|
||||||
|
sanitize_pin_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestCleanupFunction:
|
||||||
|
"""Tests for the SmartDecisionMakerBlock.cleanup() static method."""
|
||||||
|
|
||||||
|
def test_cleanup_spaces_to_underscores(self):
|
||||||
|
"""Spaces should be replaced with underscores."""
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("Max Keyword Difficulty") == "max_keyword_difficulty"
|
||||||
|
|
||||||
|
def test_cleanup_mixed_case_to_lowercase(self):
|
||||||
|
"""Mixed case should be converted to lowercase."""
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("MaxKeywordDifficulty") == "maxkeyworddifficulty"
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("UPPER_CASE") == "upper_case"
|
||||||
|
|
||||||
|
def test_cleanup_special_characters(self):
|
||||||
|
"""Special characters should be replaced with underscores."""
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("field@name!") == "field_name_"
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("value#1") == "value_1"
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("test$value") == "test_value"
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("a%b^c") == "a_b_c"
|
||||||
|
|
||||||
|
def test_cleanup_preserves_valid_characters(self):
|
||||||
|
"""Valid characters (alphanumeric, underscore, hyphen) should be preserved."""
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("valid_name-123") == "valid_name-123"
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("abc123") == "abc123"
|
||||||
|
|
||||||
|
def test_cleanup_empty_string(self):
|
||||||
|
"""Empty string should return empty string."""
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("") == ""
|
||||||
|
|
||||||
|
def test_cleanup_only_special_chars(self):
|
||||||
|
"""String of only special characters should return underscores."""
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("@#$%") == "____"
|
||||||
|
|
||||||
|
def test_cleanup_unicode_characters(self):
|
||||||
|
"""Unicode characters should be replaced with underscores."""
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("café") == "caf_"
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("日本語") == "___"
|
||||||
|
|
||||||
|
def test_cleanup_multiple_consecutive_spaces(self):
|
||||||
|
"""Multiple consecutive spaces should become multiple underscores."""
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("a b") == "a___b"
|
||||||
|
|
||||||
|
def test_cleanup_leading_trailing_spaces(self):
|
||||||
|
"""Leading/trailing spaces should become underscores."""
|
||||||
|
assert SmartDecisionMakerBlock.cleanup(" name ") == "_name_"
|
||||||
|
|
||||||
|
def test_cleanup_realistic_field_names(self):
|
||||||
|
"""Test realistic field names from actual use cases."""
|
||||||
|
# From the reported bug
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("Max Keyword Difficulty") == "max_keyword_difficulty"
|
||||||
|
# Other realistic names
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("Search Query") == "search_query"
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("API Response (JSON)") == "api_response__json_"
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("User's Input") == "user_s_input"
|
||||||
|
|
||||||
|
|
||||||
|
class TestFieldMappingCreation:
|
||||||
|
"""Tests for field mapping creation in function signatures."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_field_mapping_with_spaces_in_names(self):
|
||||||
|
"""Test that field mapping correctly maps clean names back to original names with spaces."""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_node = Mock()
|
||||||
|
mock_node.id = "test-node-id"
|
||||||
|
mock_node.block = Mock()
|
||||||
|
mock_node.block.name = "TestBlock"
|
||||||
|
mock_node.block.description = "Test description"
|
||||||
|
mock_node.block.input_schema = Mock()
|
||||||
|
mock_node.block.input_schema.jsonschema = Mock(
|
||||||
|
return_value={"properties": {}, "required": ["Max Keyword Difficulty"]}
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_field_schema(field_name):
|
||||||
|
if field_name == "Max Keyword Difficulty":
|
||||||
|
return {"type": "integer", "description": "Maximum keyword difficulty (0-100)"}
|
||||||
|
raise KeyError(f"Field {field_name} not found")
|
||||||
|
|
||||||
|
mock_node.block.input_schema.get_field_schema = get_field_schema
|
||||||
|
|
||||||
|
mock_links = [
|
||||||
|
Mock(
|
||||||
|
source_name="tools_^_test_~_max_keyword_difficulty",
|
||||||
|
sink_name="Max Keyword Difficulty", # Original name with spaces
|
||||||
|
sink_id="test-node-id",
|
||||||
|
source_id="smart_node_id",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
signature = await block._create_block_function_signature(mock_node, mock_links)
|
||||||
|
|
||||||
|
# Verify the cleaned name is used in properties
|
||||||
|
properties = signature["function"]["parameters"]["properties"]
|
||||||
|
assert "max_keyword_difficulty" in properties
|
||||||
|
|
||||||
|
# Verify the field mapping maps back to original
|
||||||
|
field_mapping = signature["function"]["_field_mapping"]
|
||||||
|
assert field_mapping["max_keyword_difficulty"] == "Max Keyword Difficulty"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_field_mapping_with_multiple_special_char_names(self):
|
||||||
|
"""Test field mapping with multiple fields containing special characters."""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_node = Mock()
|
||||||
|
mock_node.id = "test-node-id"
|
||||||
|
mock_node.block = Mock()
|
||||||
|
mock_node.block.name = "SEO Tool"
|
||||||
|
mock_node.block.description = "SEO analysis tool"
|
||||||
|
mock_node.block.input_schema = Mock()
|
||||||
|
mock_node.block.input_schema.jsonschema = Mock(
|
||||||
|
return_value={"properties": {}, "required": []}
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_field_schema(field_name):
|
||||||
|
schemas = {
|
||||||
|
"Max Keyword Difficulty": {"type": "integer", "description": "Max difficulty"},
|
||||||
|
"Search Volume (Monthly)": {"type": "integer", "description": "Monthly volume"},
|
||||||
|
"CPC ($)": {"type": "number", "description": "Cost per click"},
|
||||||
|
"Target URL": {"type": "string", "description": "URL to analyze"},
|
||||||
|
}
|
||||||
|
if field_name in schemas:
|
||||||
|
return schemas[field_name]
|
||||||
|
raise KeyError(f"Field {field_name} not found")
|
||||||
|
|
||||||
|
mock_node.block.input_schema.get_field_schema = get_field_schema
|
||||||
|
|
||||||
|
mock_links = [
|
||||||
|
Mock(sink_name="Max Keyword Difficulty", sink_id="test-node-id", source_id="smart_node_id"),
|
||||||
|
Mock(sink_name="Search Volume (Monthly)", sink_id="test-node-id", source_id="smart_node_id"),
|
||||||
|
Mock(sink_name="CPC ($)", sink_id="test-node-id", source_id="smart_node_id"),
|
||||||
|
Mock(sink_name="Target URL", sink_id="test-node-id", source_id="smart_node_id"),
|
||||||
|
]
|
||||||
|
|
||||||
|
signature = await block._create_block_function_signature(mock_node, mock_links)
|
||||||
|
|
||||||
|
properties = signature["function"]["parameters"]["properties"]
|
||||||
|
field_mapping = signature["function"]["_field_mapping"]
|
||||||
|
|
||||||
|
# Verify all cleaned names are in properties
|
||||||
|
assert "max_keyword_difficulty" in properties
|
||||||
|
assert "search_volume__monthly_" in properties
|
||||||
|
assert "cpc____" in properties
|
||||||
|
assert "target_url" in properties
|
||||||
|
|
||||||
|
# Verify field mappings
|
||||||
|
assert field_mapping["max_keyword_difficulty"] == "Max Keyword Difficulty"
|
||||||
|
assert field_mapping["search_volume__monthly_"] == "Search Volume (Monthly)"
|
||||||
|
assert field_mapping["cpc____"] == "CPC ($)"
|
||||||
|
assert field_mapping["target_url"] == "Target URL"
|
||||||
|
|
||||||
|
|
||||||
|
class TestFieldNameCollision:
|
||||||
|
"""Tests for detecting field name collisions after sanitization."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_collision_detection_same_sanitized_name(self):
|
||||||
|
"""Test behavior when two different names sanitize to the same value."""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
# These two different names will sanitize to the same value
|
||||||
|
name1 = "max keyword difficulty" # -> max_keyword_difficulty
|
||||||
|
name2 = "Max Keyword Difficulty" # -> max_keyword_difficulty
|
||||||
|
name3 = "MAX_KEYWORD_DIFFICULTY" # -> max_keyword_difficulty
|
||||||
|
|
||||||
|
assert SmartDecisionMakerBlock.cleanup(name1) == SmartDecisionMakerBlock.cleanup(name2)
|
||||||
|
assert SmartDecisionMakerBlock.cleanup(name2) == SmartDecisionMakerBlock.cleanup(name3)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_collision_in_function_signature(self):
|
||||||
|
"""Test that collisions in sanitized names could cause issues."""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_node = Mock()
|
||||||
|
mock_node.id = "test-node-id"
|
||||||
|
mock_node.block = Mock()
|
||||||
|
mock_node.block.name = "TestBlock"
|
||||||
|
mock_node.block.description = "Test description"
|
||||||
|
mock_node.block.input_schema = Mock()
|
||||||
|
mock_node.block.input_schema.jsonschema = Mock(
|
||||||
|
return_value={"properties": {}, "required": []}
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_field_schema(field_name):
|
||||||
|
return {"type": "string", "description": f"Field: {field_name}"}
|
||||||
|
|
||||||
|
mock_node.block.input_schema.get_field_schema = get_field_schema
|
||||||
|
|
||||||
|
# Two different fields that sanitize to the same name
|
||||||
|
mock_links = [
|
||||||
|
Mock(sink_name="Test Field", sink_id="test-node-id", source_id="smart_node_id"),
|
||||||
|
Mock(sink_name="test field", sink_id="test-node-id", source_id="smart_node_id"),
|
||||||
|
]
|
||||||
|
|
||||||
|
signature = await block._create_block_function_signature(mock_node, mock_links)
|
||||||
|
|
||||||
|
properties = signature["function"]["parameters"]["properties"]
|
||||||
|
field_mapping = signature["function"]["_field_mapping"]
|
||||||
|
|
||||||
|
# Both sanitize to "test_field" - only one will be in properties
|
||||||
|
assert "test_field" in properties
|
||||||
|
# The field_mapping will have the last one written
|
||||||
|
assert field_mapping["test_field"] in ["Test Field", "test field"]
|
||||||
|
|
||||||
|
|
||||||
|
class TestOutputRouting:
|
||||||
|
"""Tests for output routing with sanitized names."""
|
||||||
|
|
||||||
|
def test_emit_key_format_with_spaces(self):
|
||||||
|
"""Test that emit keys use sanitized field names."""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
original_field_name = "Max Keyword Difficulty"
|
||||||
|
sink_node_id = "node-123"
|
||||||
|
|
||||||
|
sanitized_name = block.cleanup(original_field_name)
|
||||||
|
emit_key = f"tools_^_{sink_node_id}_~_{sanitized_name}"
|
||||||
|
|
||||||
|
assert emit_key == "tools_^_node-123_~_max_keyword_difficulty"
|
||||||
|
|
||||||
|
def test_parse_execution_output_exact_match(self):
|
||||||
|
"""Test parse_execution_output with exact matching names."""
|
||||||
|
output_item = ("tools_^_node-123_~_max_keyword_difficulty", 50)
|
||||||
|
|
||||||
|
# When sink_pin_name matches the sanitized name, it should work
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id="node-123",
|
||||||
|
sink_pin_name="max_keyword_difficulty",
|
||||||
|
)
|
||||||
|
assert result == 50
|
||||||
|
|
||||||
|
def test_parse_execution_output_mismatch_original_vs_sanitized(self):
|
||||||
|
"""
|
||||||
|
CRITICAL TEST: This reproduces the exact bug reported.
|
||||||
|
|
||||||
|
When frontend creates a link with original name "Max Keyword Difficulty"
|
||||||
|
but backend emits with sanitized name "max_keyword_difficulty",
|
||||||
|
the tool call should still be routed correctly.
|
||||||
|
|
||||||
|
CURRENT BEHAVIOR (BUG): Returns None because names don't match
|
||||||
|
EXPECTED BEHAVIOR: Should return the value (50) after sanitizing both names
|
||||||
|
"""
|
||||||
|
output_item = ("tools_^_node-123_~_max_keyword_difficulty", 50)
|
||||||
|
|
||||||
|
# This is what happens: sink_pin_name comes from frontend link (unsanitized)
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id="node-123",
|
||||||
|
sink_pin_name="Max Keyword Difficulty", # Original name with spaces
|
||||||
|
)
|
||||||
|
|
||||||
|
# BUG: This currently returns None because:
|
||||||
|
# - target_input_pin = "max_keyword_difficulty" (from emit key, sanitized)
|
||||||
|
# - sink_pin_name = "Max Keyword Difficulty" (from link, original)
|
||||||
|
# - They don't match, so routing fails
|
||||||
|
#
|
||||||
|
# TODO: When the bug is fixed, change this assertion to:
|
||||||
|
# assert result == 50
|
||||||
|
assert result is None # Current buggy behavior
|
||||||
|
|
||||||
|
def test_parse_execution_output_with_sanitized_sink_pin(self):
|
||||||
|
"""Test that if sink_pin_name is pre-sanitized, routing works."""
|
||||||
|
output_item = ("tools_^_node-123_~_max_keyword_difficulty", 50)
|
||||||
|
|
||||||
|
# If sink_pin_name is already sanitized, routing works
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id="node-123",
|
||||||
|
sink_pin_name="max_keyword_difficulty", # Pre-sanitized
|
||||||
|
)
|
||||||
|
assert result == 50
|
||||||
|
|
||||||
|
|
||||||
|
class TestProcessToolCallsMapping:
|
||||||
|
"""Tests for _process_tool_calls method field mapping."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_process_tool_calls_maps_clean_to_original(self):
|
||||||
|
"""Test that _process_tool_calls correctly maps clean names back to original."""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_tool_call = Mock()
|
||||||
|
mock_tool_call.function.name = "seo_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({
|
||||||
|
"max_keyword_difficulty": 50, # LLM uses clean name
|
||||||
|
"search_query": "test query",
|
||||||
|
})
|
||||||
|
mock_response.tool_calls = [mock_tool_call]
|
||||||
|
|
||||||
|
tool_functions = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "seo_tool",
|
||||||
|
"parameters": {
|
||||||
|
"properties": {
|
||||||
|
"max_keyword_difficulty": {"type": "integer"},
|
||||||
|
"search_query": {"type": "string"},
|
||||||
|
},
|
||||||
|
"required": ["max_keyword_difficulty", "search_query"],
|
||||||
|
},
|
||||||
|
"_sink_node_id": "test-sink-node",
|
||||||
|
"_field_mapping": {
|
||||||
|
"max_keyword_difficulty": "Max Keyword Difficulty", # Original name
|
||||||
|
"search_query": "Search Query",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
processed = block._process_tool_calls(mock_response, tool_functions)
|
||||||
|
|
||||||
|
assert len(processed) == 1
|
||||||
|
tool_info = processed[0]
|
||||||
|
|
||||||
|
# Verify input_data uses ORIGINAL field names
|
||||||
|
assert "Max Keyword Difficulty" in tool_info.input_data
|
||||||
|
assert "Search Query" in tool_info.input_data
|
||||||
|
assert tool_info.input_data["Max Keyword Difficulty"] == 50
|
||||||
|
assert tool_info.input_data["Search Query"] == "test query"
|
||||||
|
|
||||||
|
|
||||||
|
class TestToolOutputEmitting:
|
||||||
|
"""Tests for the tool output emitting in traditional mode."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_emit_keys_use_sanitized_names(self):
|
||||||
|
"""Test that emit keys always use sanitized field names."""
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_tool_call = MagicMock()
|
||||||
|
mock_tool_call.function.name = "seo_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({
|
||||||
|
"max_keyword_difficulty": 50,
|
||||||
|
})
|
||||||
|
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.response = None
|
||||||
|
mock_response.tool_calls = [mock_tool_call]
|
||||||
|
mock_response.prompt_tokens = 50
|
||||||
|
mock_response.completion_tokens = 25
|
||||||
|
mock_response.reasoning = None
|
||||||
|
mock_response.raw_response = {"role": "assistant", "content": None}
|
||||||
|
|
||||||
|
mock_tool_signatures = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "seo_tool",
|
||||||
|
"_sink_node_id": "test-sink-node-id",
|
||||||
|
"_field_mapping": {
|
||||||
|
"max_keyword_difficulty": "Max Keyword Difficulty",
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"properties": {
|
||||||
|
"max_keyword_difficulty": {"type": "integer"},
|
||||||
|
},
|
||||||
|
"required": ["max_keyword_difficulty"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"backend.blocks.llm.llm_call",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
return_value=mock_response,
|
||||||
|
), patch.object(
|
||||||
|
block, "_create_tool_node_signatures", return_value=mock_tool_signatures
|
||||||
|
):
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Test prompt",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = MagicMock()
|
||||||
|
|
||||||
|
outputs = {}
|
||||||
|
async for output_name, output_data in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph-id",
|
||||||
|
node_id="test-node-id",
|
||||||
|
graph_exec_id="test-exec-id",
|
||||||
|
node_exec_id="test-node-exec-id",
|
||||||
|
user_id="test-user-id",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
outputs[output_name] = output_data
|
||||||
|
|
||||||
|
# The emit key should use the sanitized field name
|
||||||
|
# Even though the original was "Max Keyword Difficulty", emit uses sanitized
|
||||||
|
assert "tools_^_test-sink-node-id_~_max_keyword_difficulty" in outputs
|
||||||
|
assert outputs["tools_^_test-sink-node-id_~_max_keyword_difficulty"] == 50
|
||||||
|
|
||||||
|
|
||||||
|
class TestSanitizationConsistency:
|
||||||
|
"""Tests for ensuring sanitization is consistent throughout the pipeline."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_full_round_trip_with_spaces(self):
|
||||||
|
"""
|
||||||
|
Test the full round-trip of a field name with spaces through the system.
|
||||||
|
|
||||||
|
This simulates:
|
||||||
|
1. Frontend creates link with sink_name="Max Keyword Difficulty"
|
||||||
|
2. Backend creates function signature with cleaned property name
|
||||||
|
3. LLM responds with cleaned name
|
||||||
|
4. Backend processes response and maps back to original
|
||||||
|
5. Backend emits with sanitized name
|
||||||
|
6. Routing should match (currently broken)
|
||||||
|
"""
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
original_field_name = "Max Keyword Difficulty"
|
||||||
|
cleaned_field_name = SmartDecisionMakerBlock.cleanup(original_field_name)
|
||||||
|
|
||||||
|
# Step 1: Simulate frontend link creation
|
||||||
|
mock_link = Mock()
|
||||||
|
mock_link.sink_name = original_field_name # Frontend uses original
|
||||||
|
mock_link.sink_id = "test-sink-node-id"
|
||||||
|
mock_link.source_id = "smart-node-id"
|
||||||
|
|
||||||
|
# Step 2: Create function signature
|
||||||
|
mock_node = Mock()
|
||||||
|
mock_node.id = "test-sink-node-id"
|
||||||
|
mock_node.block = Mock()
|
||||||
|
mock_node.block.name = "SEO Tool"
|
||||||
|
mock_node.block.description = "SEO analysis"
|
||||||
|
mock_node.block.input_schema = Mock()
|
||||||
|
mock_node.block.input_schema.jsonschema = Mock(
|
||||||
|
return_value={"properties": {}, "required": [original_field_name]}
|
||||||
|
)
|
||||||
|
mock_node.block.input_schema.get_field_schema = Mock(
|
||||||
|
return_value={"type": "integer", "description": "Max difficulty"}
|
||||||
|
)
|
||||||
|
|
||||||
|
signature = await block._create_block_function_signature(mock_node, [mock_link])
|
||||||
|
|
||||||
|
# Verify cleaned name is in properties
|
||||||
|
assert cleaned_field_name in signature["function"]["parameters"]["properties"]
|
||||||
|
# Verify field mapping exists
|
||||||
|
assert signature["function"]["_field_mapping"][cleaned_field_name] == original_field_name
|
||||||
|
|
||||||
|
# Step 3: Simulate LLM response using cleaned name
|
||||||
|
mock_tool_call = MagicMock()
|
||||||
|
mock_tool_call.function.name = "seo_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({
|
||||||
|
cleaned_field_name: 50 # LLM uses cleaned name
|
||||||
|
})
|
||||||
|
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.response = None
|
||||||
|
mock_response.tool_calls = [mock_tool_call]
|
||||||
|
mock_response.prompt_tokens = 50
|
||||||
|
mock_response.completion_tokens = 25
|
||||||
|
mock_response.reasoning = None
|
||||||
|
mock_response.raw_response = {"role": "assistant", "content": None}
|
||||||
|
|
||||||
|
# Prepare tool_functions as they would be in run()
|
||||||
|
tool_functions = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "seo_tool",
|
||||||
|
"_sink_node_id": "test-sink-node-id",
|
||||||
|
"_field_mapping": signature["function"]["_field_mapping"],
|
||||||
|
"parameters": signature["function"]["parameters"],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# Step 4: Process tool calls
|
||||||
|
processed = block._process_tool_calls(mock_response, tool_functions)
|
||||||
|
assert len(processed) == 1
|
||||||
|
# Input data should have ORIGINAL name
|
||||||
|
assert original_field_name in processed[0].input_data
|
||||||
|
assert processed[0].input_data[original_field_name] == 50
|
||||||
|
|
||||||
|
# Step 5: Emit key generation (from run method logic)
|
||||||
|
field_mapping = processed[0].field_mapping
|
||||||
|
for clean_arg_name in signature["function"]["parameters"]["properties"]:
|
||||||
|
original = field_mapping.get(clean_arg_name, clean_arg_name)
|
||||||
|
sanitized_arg_name = block.cleanup(original)
|
||||||
|
emit_key = f"tools_^_test-sink-node-id_~_{sanitized_arg_name}"
|
||||||
|
|
||||||
|
# Emit key uses sanitized name
|
||||||
|
assert emit_key == f"tools_^_test-sink-node-id_~_{cleaned_field_name}"
|
||||||
|
|
||||||
|
# Step 6: Routing check (this is where the bug manifests)
|
||||||
|
emit_key = f"tools_^_test-sink-node-id_~_{cleaned_field_name}"
|
||||||
|
output_item = (emit_key, 50)
|
||||||
|
|
||||||
|
# Current routing uses original sink_name from link
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id="test-sink-node-id",
|
||||||
|
sink_pin_name=original_field_name, # Frontend's original name
|
||||||
|
)
|
||||||
|
|
||||||
|
# BUG: This returns None because sanitized != original
|
||||||
|
# When fixed, this should return 50
|
||||||
|
assert result is None # Current broken behavior
|
||||||
|
|
||||||
|
def test_sanitization_is_idempotent(self):
|
||||||
|
"""Test that sanitizing an already sanitized name gives the same result."""
|
||||||
|
original = "Max Keyword Difficulty"
|
||||||
|
first_clean = SmartDecisionMakerBlock.cleanup(original)
|
||||||
|
second_clean = SmartDecisionMakerBlock.cleanup(first_clean)
|
||||||
|
|
||||||
|
assert first_clean == second_clean
|
||||||
|
|
||||||
|
|
||||||
|
class TestEdgeCases:
|
||||||
|
"""Tests for edge cases in the sanitization pipeline."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_empty_field_name(self):
|
||||||
|
"""Test handling of empty field name."""
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("") == ""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_very_long_field_name(self):
|
||||||
|
"""Test handling of very long field names."""
|
||||||
|
long_name = "A" * 1000 + " " + "B" * 1000
|
||||||
|
cleaned = SmartDecisionMakerBlock.cleanup(long_name)
|
||||||
|
assert "_" in cleaned # Space was replaced
|
||||||
|
assert len(cleaned) == len(long_name)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_field_name_with_newlines(self):
|
||||||
|
"""Test handling of field names with newlines."""
|
||||||
|
name_with_newline = "First Line\nSecond Line"
|
||||||
|
cleaned = SmartDecisionMakerBlock.cleanup(name_with_newline)
|
||||||
|
assert "\n" not in cleaned
|
||||||
|
assert "_" in cleaned
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_field_name_with_tabs(self):
|
||||||
|
"""Test handling of field names with tabs."""
|
||||||
|
name_with_tab = "First\tSecond"
|
||||||
|
cleaned = SmartDecisionMakerBlock.cleanup(name_with_tab)
|
||||||
|
assert "\t" not in cleaned
|
||||||
|
assert "_" in cleaned
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_numeric_field_name(self):
|
||||||
|
"""Test handling of purely numeric field names."""
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("123") == "123"
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("123 456") == "123_456"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_hyphenated_field_names(self):
|
||||||
|
"""Test that hyphens are preserved (valid in function names)."""
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("field-name") == "field-name"
|
||||||
|
assert SmartDecisionMakerBlock.cleanup("Field-Name") == "field-name"
|
||||||
|
|
||||||
|
|
||||||
|
class TestDynamicFieldsWithSpaces:
|
||||||
|
"""Tests for dynamic fields with spaces in their names."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_dynamic_dict_field_with_spaces(self):
|
||||||
|
"""Test dynamic dictionary fields where the key contains spaces."""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_node = Mock()
|
||||||
|
mock_node.id = "test-node-id"
|
||||||
|
mock_node.block = Mock()
|
||||||
|
mock_node.block.name = "CreateDictionary"
|
||||||
|
mock_node.block.description = "Creates a dictionary"
|
||||||
|
mock_node.block.input_schema = Mock()
|
||||||
|
mock_node.block.input_schema.jsonschema = Mock(
|
||||||
|
return_value={"properties": {}, "required": ["values"]}
|
||||||
|
)
|
||||||
|
mock_node.block.input_schema.get_field_schema = Mock(
|
||||||
|
side_effect=KeyError("not found")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Dynamic field with a key containing spaces
|
||||||
|
mock_links = [
|
||||||
|
Mock(
|
||||||
|
sink_name="values_#_User Name", # Dict key with space
|
||||||
|
sink_id="test-node-id",
|
||||||
|
source_id="smart_node_id",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
signature = await block._create_block_function_signature(mock_node, mock_links)
|
||||||
|
|
||||||
|
properties = signature["function"]["parameters"]["properties"]
|
||||||
|
field_mapping = signature["function"]["_field_mapping"]
|
||||||
|
|
||||||
|
# The cleaned name should be in properties
|
||||||
|
expected_clean = SmartDecisionMakerBlock.cleanup("values_#_User Name")
|
||||||
|
assert expected_clean in properties
|
||||||
|
|
||||||
|
# Field mapping should map back to original
|
||||||
|
assert field_mapping[expected_clean] == "values_#_User Name"
|
||||||
|
|
||||||
|
|
||||||
|
class TestAgentModeWithSpaces:
|
||||||
|
"""Tests for agent mode with field names containing spaces."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_agent_mode_tool_execution_with_spaces(self):
|
||||||
|
"""Test that agent mode correctly handles field names with spaces."""
|
||||||
|
import threading
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
import backend.blocks.llm as llm_module
|
||||||
|
from backend.data.execution import ExecutionContext
|
||||||
|
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
original_field = "Max Keyword Difficulty"
|
||||||
|
clean_field = SmartDecisionMakerBlock.cleanup(original_field)
|
||||||
|
|
||||||
|
mock_tool_call = MagicMock()
|
||||||
|
mock_tool_call.id = "call_1"
|
||||||
|
mock_tool_call.function.name = "seo_tool"
|
||||||
|
mock_tool_call.function.arguments = json.dumps({
|
||||||
|
clean_field: 50 # LLM uses clean name
|
||||||
|
})
|
||||||
|
|
||||||
|
mock_response_1 = MagicMock()
|
||||||
|
mock_response_1.response = None
|
||||||
|
mock_response_1.tool_calls = [mock_tool_call]
|
||||||
|
mock_response_1.prompt_tokens = 50
|
||||||
|
mock_response_1.completion_tokens = 25
|
||||||
|
mock_response_1.reasoning = None
|
||||||
|
mock_response_1.raw_response = {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": None,
|
||||||
|
"tool_calls": [{"id": "call_1", "type": "function"}],
|
||||||
|
}
|
||||||
|
|
||||||
|
mock_response_2 = MagicMock()
|
||||||
|
mock_response_2.response = "Task completed"
|
||||||
|
mock_response_2.tool_calls = []
|
||||||
|
mock_response_2.prompt_tokens = 30
|
||||||
|
mock_response_2.completion_tokens = 15
|
||||||
|
mock_response_2.reasoning = None
|
||||||
|
mock_response_2.raw_response = {"role": "assistant", "content": "Task completed"}
|
||||||
|
|
||||||
|
llm_call_mock = AsyncMock()
|
||||||
|
llm_call_mock.side_effect = [mock_response_1, mock_response_2]
|
||||||
|
|
||||||
|
mock_tool_signatures = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "seo_tool",
|
||||||
|
"_sink_node_id": "test-sink-node-id",
|
||||||
|
"_field_mapping": {
|
||||||
|
clean_field: original_field,
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"properties": {
|
||||||
|
clean_field: {"type": "integer"},
|
||||||
|
},
|
||||||
|
"required": [clean_field],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
mock_db_client = AsyncMock()
|
||||||
|
mock_node = MagicMock()
|
||||||
|
mock_node.block_id = "test-block-id"
|
||||||
|
mock_db_client.get_node.return_value = mock_node
|
||||||
|
|
||||||
|
mock_node_exec_result = MagicMock()
|
||||||
|
mock_node_exec_result.node_exec_id = "test-tool-exec-id"
|
||||||
|
|
||||||
|
# The input data should use ORIGINAL field name
|
||||||
|
mock_input_data = {original_field: 50}
|
||||||
|
mock_db_client.upsert_execution_input.return_value = (
|
||||||
|
mock_node_exec_result,
|
||||||
|
mock_input_data,
|
||||||
|
)
|
||||||
|
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {
|
||||||
|
"result": {"status": "success"}
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch("backend.blocks.llm.llm_call", llm_call_mock), patch.object(
|
||||||
|
block, "_create_tool_node_signatures", return_value=mock_tool_signatures
|
||||||
|
), patch(
|
||||||
|
"backend.blocks.smart_decision_maker.get_database_manager_async_client",
|
||||||
|
return_value=mock_db_client,
|
||||||
|
):
|
||||||
|
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||||
|
mock_execution_processor = AsyncMock()
|
||||||
|
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||||
|
mock_execution_processor.execution_stats = MagicMock()
|
||||||
|
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||||
|
|
||||||
|
mock_node_stats = MagicMock()
|
||||||
|
mock_node_stats.error = None
|
||||||
|
mock_execution_processor.on_node_execution = AsyncMock(
|
||||||
|
return_value=mock_node_stats
|
||||||
|
)
|
||||||
|
|
||||||
|
input_data = SmartDecisionMakerBlock.Input(
|
||||||
|
prompt="Analyze keywords",
|
||||||
|
model=llm_module.DEFAULT_LLM_MODEL,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||||
|
agent_mode_max_iterations=3,
|
||||||
|
)
|
||||||
|
|
||||||
|
outputs = {}
|
||||||
|
async for output_name, output_data in block.run(
|
||||||
|
input_data,
|
||||||
|
credentials=llm_module.TEST_CREDENTIALS,
|
||||||
|
graph_id="test-graph-id",
|
||||||
|
node_id="test-node-id",
|
||||||
|
graph_exec_id="test-exec-id",
|
||||||
|
node_exec_id="test-node-exec-id",
|
||||||
|
user_id="test-user-id",
|
||||||
|
graph_version=1,
|
||||||
|
execution_context=mock_execution_context,
|
||||||
|
execution_processor=mock_execution_processor,
|
||||||
|
):
|
||||||
|
outputs[output_name] = output_data
|
||||||
|
|
||||||
|
# Verify upsert was called with original field name
|
||||||
|
upsert_calls = mock_db_client.upsert_execution_input.call_args_list
|
||||||
|
assert len(upsert_calls) > 0
|
||||||
|
# Check that the original field name was used
|
||||||
|
for call in upsert_calls:
|
||||||
|
input_name = call.kwargs.get("input_name") or call.args[2]
|
||||||
|
# The input name should be the original (mapped back)
|
||||||
|
assert input_name == original_field
|
||||||
|
|
||||||
|
|
||||||
|
class TestRequiredFieldsWithSpaces:
|
||||||
|
"""Tests for required field handling with spaces in names."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_required_fields_use_clean_names(self):
|
||||||
|
"""Test that required fields array uses clean names for API compatibility."""
|
||||||
|
block = SmartDecisionMakerBlock()
|
||||||
|
|
||||||
|
mock_node = Mock()
|
||||||
|
mock_node.id = "test-node-id"
|
||||||
|
mock_node.block = Mock()
|
||||||
|
mock_node.block.name = "TestBlock"
|
||||||
|
mock_node.block.description = "Test"
|
||||||
|
mock_node.block.input_schema = Mock()
|
||||||
|
mock_node.block.input_schema.jsonschema = Mock(
|
||||||
|
return_value={
|
||||||
|
"properties": {},
|
||||||
|
"required": ["Max Keyword Difficulty", "Search Query"],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_field_schema(field_name):
|
||||||
|
return {"type": "string", "description": f"Field: {field_name}"}
|
||||||
|
|
||||||
|
mock_node.block.input_schema.get_field_schema = get_field_schema
|
||||||
|
|
||||||
|
mock_links = [
|
||||||
|
Mock(sink_name="Max Keyword Difficulty", sink_id="test-node-id", source_id="smart_node_id"),
|
||||||
|
Mock(sink_name="Search Query", sink_id="test-node-id", source_id="smart_node_id"),
|
||||||
|
]
|
||||||
|
|
||||||
|
signature = await block._create_block_function_signature(mock_node, mock_links)
|
||||||
|
|
||||||
|
required = signature["function"]["parameters"]["required"]
|
||||||
|
|
||||||
|
# Required array should use CLEAN names for API compatibility
|
||||||
|
assert "max_keyword_difficulty" in required
|
||||||
|
assert "search_query" in required
|
||||||
|
# Original names should NOT be in required
|
||||||
|
assert "Max Keyword Difficulty" not in required
|
||||||
|
assert "Search Query" not in required
|
||||||
@@ -0,0 +1,513 @@
|
|||||||
|
"""
|
||||||
|
Tests for dynamic fields edge cases and failure modes.
|
||||||
|
|
||||||
|
Covers failure modes:
|
||||||
|
8. No Type Validation in Dynamic Field Merging
|
||||||
|
17. No Validation of Dynamic Field Paths
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from backend.data.dynamic_fields import (
|
||||||
|
DICT_SPLIT,
|
||||||
|
LIST_SPLIT,
|
||||||
|
OBJC_SPLIT,
|
||||||
|
extract_base_field_name,
|
||||||
|
get_dynamic_field_description,
|
||||||
|
is_dynamic_field,
|
||||||
|
is_tool_pin,
|
||||||
|
merge_execution_input,
|
||||||
|
parse_execution_output,
|
||||||
|
sanitize_pin_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestDynamicFieldMergingTypeValidation:
|
||||||
|
"""
|
||||||
|
Tests for Failure Mode #8: No Type Validation in Dynamic Field Merging
|
||||||
|
|
||||||
|
When merging dynamic fields, there's no validation that intermediate
|
||||||
|
structures have the correct type, leading to potential type coercion errors.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_merge_dict_field_creates_dict(self):
|
||||||
|
"""Test that dictionary fields create dict structure."""
|
||||||
|
data = {
|
||||||
|
"values_#_name": "Alice",
|
||||||
|
"values_#_age": 30,
|
||||||
|
}
|
||||||
|
|
||||||
|
result = merge_execution_input(data)
|
||||||
|
|
||||||
|
assert "values" in result
|
||||||
|
assert isinstance(result["values"], dict)
|
||||||
|
assert result["values"]["name"] == "Alice"
|
||||||
|
assert result["values"]["age"] == 30
|
||||||
|
|
||||||
|
def test_merge_list_field_creates_list(self):
|
||||||
|
"""Test that list fields create list structure."""
|
||||||
|
data = {
|
||||||
|
"items_$_0": "first",
|
||||||
|
"items_$_1": "second",
|
||||||
|
"items_$_2": "third",
|
||||||
|
}
|
||||||
|
|
||||||
|
result = merge_execution_input(data)
|
||||||
|
|
||||||
|
assert "items" in result
|
||||||
|
assert isinstance(result["items"], list)
|
||||||
|
assert result["items"] == ["first", "second", "third"]
|
||||||
|
|
||||||
|
def test_merge_with_existing_primitive_type_conflict(self):
|
||||||
|
"""
|
||||||
|
Test behavior when merging into existing primitive value.
|
||||||
|
|
||||||
|
BUG: If the base field already exists as a primitive,
|
||||||
|
merging a dynamic field may fail or corrupt data.
|
||||||
|
"""
|
||||||
|
# Pre-existing primitive value
|
||||||
|
data = {
|
||||||
|
"value": "I am a string", # Primitive
|
||||||
|
"value_#_key": "dict value", # Dynamic dict field
|
||||||
|
}
|
||||||
|
|
||||||
|
# This may raise an error or produce unexpected results
|
||||||
|
# depending on merge order and implementation
|
||||||
|
try:
|
||||||
|
result = merge_execution_input(data)
|
||||||
|
# If it succeeds, check what happened
|
||||||
|
# The primitive may have been overwritten
|
||||||
|
if isinstance(result.get("value"), dict):
|
||||||
|
# Primitive was converted to dict - data loss!
|
||||||
|
assert "key" in result["value"]
|
||||||
|
else:
|
||||||
|
# Or the dynamic field was ignored
|
||||||
|
pass
|
||||||
|
except (TypeError, AttributeError):
|
||||||
|
# Expected error when trying to merge into primitive
|
||||||
|
pass
|
||||||
|
|
||||||
|
def test_merge_list_with_gaps(self):
|
||||||
|
"""Test merging list fields with non-contiguous indices."""
|
||||||
|
data = {
|
||||||
|
"items_$_0": "zero",
|
||||||
|
"items_$_2": "two", # Gap at index 1
|
||||||
|
"items_$_5": "five", # Larger gap
|
||||||
|
}
|
||||||
|
|
||||||
|
result = merge_execution_input(data)
|
||||||
|
|
||||||
|
assert "items" in result
|
||||||
|
# Check how gaps are handled
|
||||||
|
items = result["items"]
|
||||||
|
assert items[0] == "zero"
|
||||||
|
# Index 1 may be None or missing
|
||||||
|
assert items[2] == "two"
|
||||||
|
assert items[5] == "five"
|
||||||
|
|
||||||
|
def test_merge_nested_dynamic_fields(self):
|
||||||
|
"""Test merging deeply nested dynamic fields."""
|
||||||
|
data = {
|
||||||
|
"data_#_users_$_0": "user1",
|
||||||
|
"data_#_users_$_1": "user2",
|
||||||
|
"data_#_config_#_enabled": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
result = merge_execution_input(data)
|
||||||
|
|
||||||
|
# Complex nested structures should be created
|
||||||
|
assert "data" in result
|
||||||
|
|
||||||
|
def test_merge_object_field(self):
|
||||||
|
"""Test merging object attribute fields."""
|
||||||
|
data = {
|
||||||
|
"user_@_name": "Alice",
|
||||||
|
"user_@_email": "alice@example.com",
|
||||||
|
}
|
||||||
|
|
||||||
|
result = merge_execution_input(data)
|
||||||
|
|
||||||
|
assert "user" in result
|
||||||
|
# Object fields create dict-like structure
|
||||||
|
assert result["user"]["name"] == "Alice"
|
||||||
|
assert result["user"]["email"] == "alice@example.com"
|
||||||
|
|
||||||
|
def test_merge_mixed_field_types(self):
|
||||||
|
"""Test merging mixed regular and dynamic fields."""
|
||||||
|
data = {
|
||||||
|
"regular": "value",
|
||||||
|
"dict_field_#_key": "dict_value",
|
||||||
|
"list_field_$_0": "list_item",
|
||||||
|
}
|
||||||
|
|
||||||
|
result = merge_execution_input(data)
|
||||||
|
|
||||||
|
assert result["regular"] == "value"
|
||||||
|
assert result["dict_field"]["key"] == "dict_value"
|
||||||
|
assert result["list_field"][0] == "list_item"
|
||||||
|
|
||||||
|
|
||||||
|
class TestDynamicFieldPathValidation:
|
||||||
|
"""
|
||||||
|
Tests for Failure Mode #17: No Validation of Dynamic Field Paths
|
||||||
|
|
||||||
|
When traversing dynamic field paths, intermediate None values
|
||||||
|
can cause TypeErrors instead of graceful failures.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_parse_output_with_none_intermediate(self):
|
||||||
|
"""
|
||||||
|
Test parse_execution_output with None intermediate value.
|
||||||
|
|
||||||
|
If data contains {"items": None} and we try to access items[0],
|
||||||
|
it should return None gracefully, not raise TypeError.
|
||||||
|
"""
|
||||||
|
# Output with nested path
|
||||||
|
output_item = ("data_$_0", "value")
|
||||||
|
|
||||||
|
# When the base is None, should return None
|
||||||
|
# This tests the path traversal logic
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="data",
|
||||||
|
sink_node_id=None,
|
||||||
|
sink_pin_name=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should handle gracefully (return the value or None)
|
||||||
|
# Not raise TypeError
|
||||||
|
|
||||||
|
def test_extract_base_field_name_with_multiple_delimiters(self):
|
||||||
|
"""Test extracting base name with multiple delimiters."""
|
||||||
|
# Multiple dict delimiters
|
||||||
|
assert extract_base_field_name("a_#_b_#_c") == "a"
|
||||||
|
|
||||||
|
# Multiple list delimiters
|
||||||
|
assert extract_base_field_name("a_$_0_$_1") == "a"
|
||||||
|
|
||||||
|
# Mixed delimiters
|
||||||
|
assert extract_base_field_name("a_#_b_$_0") == "a"
|
||||||
|
|
||||||
|
def test_is_dynamic_field_edge_cases(self):
|
||||||
|
"""Test is_dynamic_field with edge cases."""
|
||||||
|
# Standard dynamic fields
|
||||||
|
assert is_dynamic_field("values_#_key") is True
|
||||||
|
assert is_dynamic_field("items_$_0") is True
|
||||||
|
assert is_dynamic_field("obj_@_attr") is True
|
||||||
|
|
||||||
|
# Regular fields
|
||||||
|
assert is_dynamic_field("regular") is False
|
||||||
|
assert is_dynamic_field("with_underscore") is False
|
||||||
|
|
||||||
|
# Edge cases
|
||||||
|
assert is_dynamic_field("") is False
|
||||||
|
assert is_dynamic_field("_#_") is True # Just delimiter
|
||||||
|
assert is_dynamic_field("a_#_") is True # Trailing delimiter
|
||||||
|
|
||||||
|
def test_sanitize_pin_name_with_tool_pins(self):
|
||||||
|
"""Test sanitize_pin_name with various tool pin formats."""
|
||||||
|
# Tool pins should return "tools"
|
||||||
|
assert sanitize_pin_name("tools") == "tools"
|
||||||
|
assert sanitize_pin_name("tools_^_node_~_field") == "tools"
|
||||||
|
|
||||||
|
# Dynamic fields should return base name
|
||||||
|
assert sanitize_pin_name("values_#_key") == "values"
|
||||||
|
assert sanitize_pin_name("items_$_0") == "items"
|
||||||
|
|
||||||
|
# Regular fields unchanged
|
||||||
|
assert sanitize_pin_name("regular") == "regular"
|
||||||
|
|
||||||
|
|
||||||
|
class TestDynamicFieldDescriptions:
|
||||||
|
"""Tests for dynamic field description generation."""
|
||||||
|
|
||||||
|
def test_dict_field_description(self):
|
||||||
|
"""Test description for dictionary fields."""
|
||||||
|
desc = get_dynamic_field_description("values_#_user_name")
|
||||||
|
|
||||||
|
assert "Dictionary field" in desc
|
||||||
|
assert "values['user_name']" in desc
|
||||||
|
|
||||||
|
def test_list_field_description(self):
|
||||||
|
"""Test description for list fields."""
|
||||||
|
desc = get_dynamic_field_description("items_$_0")
|
||||||
|
|
||||||
|
assert "List item 0" in desc
|
||||||
|
assert "items[0]" in desc
|
||||||
|
|
||||||
|
def test_object_field_description(self):
|
||||||
|
"""Test description for object fields."""
|
||||||
|
desc = get_dynamic_field_description("user_@_email")
|
||||||
|
|
||||||
|
assert "Object attribute" in desc
|
||||||
|
assert "user.email" in desc
|
||||||
|
|
||||||
|
def test_regular_field_description(self):
|
||||||
|
"""Test description for regular (non-dynamic) fields."""
|
||||||
|
desc = get_dynamic_field_description("regular_field")
|
||||||
|
|
||||||
|
assert desc == "Value for regular_field"
|
||||||
|
|
||||||
|
def test_description_with_numeric_key(self):
|
||||||
|
"""Test description with numeric dictionary key."""
|
||||||
|
desc = get_dynamic_field_description("values_#_123")
|
||||||
|
|
||||||
|
assert "Dictionary field" in desc
|
||||||
|
assert "values['123']" in desc
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseExecutionOutputToolRouting:
|
||||||
|
"""Tests for tool pin routing in parse_execution_output."""
|
||||||
|
|
||||||
|
def test_tool_pin_routing_exact_match(self):
|
||||||
|
"""Test tool pin routing with exact match."""
|
||||||
|
output_item = ("tools_^_node-123_~_field_name", "value")
|
||||||
|
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id="node-123",
|
||||||
|
sink_pin_name="field_name",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result == "value"
|
||||||
|
|
||||||
|
def test_tool_pin_routing_node_mismatch(self):
|
||||||
|
"""Test tool pin routing with node ID mismatch."""
|
||||||
|
output_item = ("tools_^_node-123_~_field_name", "value")
|
||||||
|
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id="different-node",
|
||||||
|
sink_pin_name="field_name",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_tool_pin_routing_field_mismatch(self):
|
||||||
|
"""Test tool pin routing with field name mismatch."""
|
||||||
|
output_item = ("tools_^_node-123_~_field_name", "value")
|
||||||
|
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id="node-123",
|
||||||
|
sink_pin_name="different_field",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_tool_pin_missing_required_params(self):
|
||||||
|
"""Test that tool pins require node_id and pin_name."""
|
||||||
|
output_item = ("tools_^_node-123_~_field", "value")
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="must be provided"):
|
||||||
|
parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id=None,
|
||||||
|
sink_pin_name="field",
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="must be provided"):
|
||||||
|
parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id="node-123",
|
||||||
|
sink_pin_name=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseExecutionOutputDynamicFields:
|
||||||
|
"""Tests for dynamic field routing in parse_execution_output."""
|
||||||
|
|
||||||
|
def test_dict_field_extraction(self):
|
||||||
|
"""Test extraction of dictionary field value."""
|
||||||
|
# The output_item is (field_name, data_structure)
|
||||||
|
data = {"key1": "value1", "key2": "value2"}
|
||||||
|
output_item = ("values", data)
|
||||||
|
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="values_#_key1",
|
||||||
|
sink_node_id=None,
|
||||||
|
sink_pin_name=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result == "value1"
|
||||||
|
|
||||||
|
def test_list_field_extraction(self):
|
||||||
|
"""Test extraction of list item value."""
|
||||||
|
data = ["zero", "one", "two"]
|
||||||
|
output_item = ("items", data)
|
||||||
|
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="items_$_1",
|
||||||
|
sink_node_id=None,
|
||||||
|
sink_pin_name=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result == "one"
|
||||||
|
|
||||||
|
def test_nested_field_extraction(self):
|
||||||
|
"""Test extraction of nested field value."""
|
||||||
|
data = {
|
||||||
|
"users": [
|
||||||
|
{"name": "Alice", "email": "alice@example.com"},
|
||||||
|
{"name": "Bob", "email": "bob@example.com"},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
output_item = ("data", data)
|
||||||
|
|
||||||
|
# Access nested path
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="data_#_users",
|
||||||
|
sink_node_id=None,
|
||||||
|
sink_pin_name=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result == data["users"]
|
||||||
|
|
||||||
|
def test_missing_key_returns_none(self):
|
||||||
|
"""Test that missing keys return None."""
|
||||||
|
data = {"existing": "value"}
|
||||||
|
output_item = ("values", data)
|
||||||
|
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="values_#_nonexistent",
|
||||||
|
sink_node_id=None,
|
||||||
|
sink_pin_name=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_index_out_of_bounds_returns_none(self):
|
||||||
|
"""Test that out-of-bounds indices return None."""
|
||||||
|
data = ["zero", "one"]
|
||||||
|
output_item = ("items", data)
|
||||||
|
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="items_$_99",
|
||||||
|
sink_node_id=None,
|
||||||
|
sink_pin_name=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestIsToolPin:
|
||||||
|
"""Tests for is_tool_pin function."""
|
||||||
|
|
||||||
|
def test_tools_prefix(self):
|
||||||
|
"""Test that 'tools_^_' prefix is recognized."""
|
||||||
|
assert is_tool_pin("tools_^_node_~_field") is True
|
||||||
|
assert is_tool_pin("tools_^_anything") is True
|
||||||
|
|
||||||
|
def test_tools_exact(self):
|
||||||
|
"""Test that exact 'tools' is recognized."""
|
||||||
|
assert is_tool_pin("tools") is True
|
||||||
|
|
||||||
|
def test_non_tool_pins(self):
|
||||||
|
"""Test that non-tool pins are not recognized."""
|
||||||
|
assert is_tool_pin("input") is False
|
||||||
|
assert is_tool_pin("output") is False
|
||||||
|
assert is_tool_pin("toolsomething") is False
|
||||||
|
assert is_tool_pin("my_tools") is False
|
||||||
|
assert is_tool_pin("") is False
|
||||||
|
|
||||||
|
|
||||||
|
class TestMergeExecutionInputEdgeCases:
|
||||||
|
"""Edge case tests for merge_execution_input."""
|
||||||
|
|
||||||
|
def test_empty_input(self):
|
||||||
|
"""Test merging empty input."""
|
||||||
|
result = merge_execution_input({})
|
||||||
|
assert result == {}
|
||||||
|
|
||||||
|
def test_only_regular_fields(self):
|
||||||
|
"""Test merging only regular fields (no dynamic)."""
|
||||||
|
data = {"a": 1, "b": 2, "c": 3}
|
||||||
|
result = merge_execution_input(data)
|
||||||
|
assert result == data
|
||||||
|
|
||||||
|
def test_overwrite_behavior(self):
|
||||||
|
"""Test behavior when same key is set multiple times."""
|
||||||
|
# This shouldn't happen in practice, but test the behavior
|
||||||
|
data = {
|
||||||
|
"values_#_key": "first",
|
||||||
|
}
|
||||||
|
result = merge_execution_input(data)
|
||||||
|
assert result["values"]["key"] == "first"
|
||||||
|
|
||||||
|
def test_numeric_string_keys(self):
|
||||||
|
"""Test handling of numeric string keys in dict fields."""
|
||||||
|
data = {
|
||||||
|
"values_#_123": "numeric_key",
|
||||||
|
"values_#_456": "another_numeric",
|
||||||
|
}
|
||||||
|
result = merge_execution_input(data)
|
||||||
|
|
||||||
|
assert result["values"]["123"] == "numeric_key"
|
||||||
|
assert result["values"]["456"] == "another_numeric"
|
||||||
|
|
||||||
|
def test_special_characters_in_keys(self):
|
||||||
|
"""Test handling of special characters in keys."""
|
||||||
|
data = {
|
||||||
|
"values_#_key-with-dashes": "value1",
|
||||||
|
"values_#_key.with.dots": "value2",
|
||||||
|
}
|
||||||
|
result = merge_execution_input(data)
|
||||||
|
|
||||||
|
assert result["values"]["key-with-dashes"] == "value1"
|
||||||
|
assert result["values"]["key.with.dots"] == "value2"
|
||||||
|
|
||||||
|
def test_deeply_nested_list(self):
|
||||||
|
"""Test deeply nested list indices."""
|
||||||
|
data = {
|
||||||
|
"matrix_$_0_$_0": "0,0",
|
||||||
|
"matrix_$_0_$_1": "0,1",
|
||||||
|
"matrix_$_1_$_0": "1,0",
|
||||||
|
"matrix_$_1_$_1": "1,1",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Note: Current implementation may not support this depth
|
||||||
|
# Test documents expected behavior
|
||||||
|
try:
|
||||||
|
result = merge_execution_input(data)
|
||||||
|
# If supported, verify structure
|
||||||
|
except (KeyError, TypeError, IndexError):
|
||||||
|
# Deep nesting may not be supported
|
||||||
|
pass
|
||||||
|
|
||||||
|
def test_none_values(self):
|
||||||
|
"""Test handling of None values in input."""
|
||||||
|
data = {
|
||||||
|
"regular": None,
|
||||||
|
"dict_#_key": None,
|
||||||
|
"list_$_0": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
result = merge_execution_input(data)
|
||||||
|
|
||||||
|
assert result["regular"] is None
|
||||||
|
assert result["dict"]["key"] is None
|
||||||
|
assert result["list"][0] is None
|
||||||
|
|
||||||
|
def test_complex_values(self):
|
||||||
|
"""Test handling of complex values (dicts, lists)."""
|
||||||
|
data = {
|
||||||
|
"values_#_nested_dict": {"inner": "value"},
|
||||||
|
"values_#_nested_list": [1, 2, 3],
|
||||||
|
}
|
||||||
|
|
||||||
|
result = merge_execution_input(data)
|
||||||
|
|
||||||
|
assert result["values"]["nested_dict"] == {"inner": "value"}
|
||||||
|
assert result["values"]["nested_list"] == [1, 2, 3]
|
||||||
@@ -0,0 +1,463 @@
|
|||||||
|
"""
|
||||||
|
Tests for dynamic field routing with sanitized names.
|
||||||
|
|
||||||
|
This test file specifically tests the parse_execution_output function
|
||||||
|
which is responsible for routing tool outputs to the correct nodes.
|
||||||
|
The critical bug this addresses is the mismatch between:
|
||||||
|
- emit keys using sanitized names (e.g., "max_keyword_difficulty")
|
||||||
|
- sink_pin_name using original names (e.g., "Max Keyword Difficulty")
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from backend.data.dynamic_fields import (
|
||||||
|
DICT_SPLIT,
|
||||||
|
LIST_SPLIT,
|
||||||
|
OBJC_SPLIT,
|
||||||
|
extract_base_field_name,
|
||||||
|
get_dynamic_field_description,
|
||||||
|
is_dynamic_field,
|
||||||
|
is_tool_pin,
|
||||||
|
merge_execution_input,
|
||||||
|
parse_execution_output,
|
||||||
|
sanitize_pin_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def cleanup(s: str) -> str:
|
||||||
|
"""
|
||||||
|
Simulate SmartDecisionMakerBlock.cleanup() for testing.
|
||||||
|
Clean up names for use as tool function names.
|
||||||
|
"""
|
||||||
|
return re.sub(r"[^a-zA-Z0-9_-]", "_", s).lower()
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseExecutionOutputToolRouting:
|
||||||
|
"""Tests for tool pin routing in parse_execution_output."""
|
||||||
|
|
||||||
|
def test_exact_match_routes_correctly(self):
|
||||||
|
"""When emit key field exactly matches sink_pin_name, routing works."""
|
||||||
|
output_item = ("tools_^_node-123_~_query", "test value")
|
||||||
|
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id="node-123",
|
||||||
|
sink_pin_name="query",
|
||||||
|
)
|
||||||
|
assert result == "test value"
|
||||||
|
|
||||||
|
def test_sanitized_emit_vs_original_sink_fails(self):
|
||||||
|
"""
|
||||||
|
CRITICAL BUG TEST: When emit key uses sanitized name but sink uses original,
|
||||||
|
routing fails.
|
||||||
|
"""
|
||||||
|
# Backend emits with sanitized name
|
||||||
|
sanitized_field = cleanup("Max Keyword Difficulty")
|
||||||
|
output_item = (f"tools_^_node-123_~_{sanitized_field}", 50)
|
||||||
|
|
||||||
|
# Frontend link has original name
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id="node-123",
|
||||||
|
sink_pin_name="Max Keyword Difficulty", # Original name
|
||||||
|
)
|
||||||
|
|
||||||
|
# BUG: This returns None because sanitized != original
|
||||||
|
# Once fixed, change this to: assert result == 50
|
||||||
|
assert result is None, "Expected None due to sanitization mismatch bug"
|
||||||
|
|
||||||
|
def test_node_id_mismatch_returns_none(self):
|
||||||
|
"""When node IDs don't match, routing should return None."""
|
||||||
|
output_item = ("tools_^_node-123_~_query", "test value")
|
||||||
|
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id="different-node", # Different node
|
||||||
|
sink_pin_name="query",
|
||||||
|
)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_both_node_and_pin_must_match(self):
|
||||||
|
"""Both node_id and pin_name must match for routing to succeed."""
|
||||||
|
output_item = ("tools_^_node-123_~_query", "test value")
|
||||||
|
|
||||||
|
# Wrong node, right pin
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id="wrong-node",
|
||||||
|
sink_pin_name="query",
|
||||||
|
)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
# Right node, wrong pin
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id="node-123",
|
||||||
|
sink_pin_name="wrong_pin",
|
||||||
|
)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
# Right node, right pin
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id="node-123",
|
||||||
|
sink_pin_name="query",
|
||||||
|
)
|
||||||
|
assert result == "test value"
|
||||||
|
|
||||||
|
|
||||||
|
class TestToolPinRoutingWithSpecialCharacters:
|
||||||
|
"""Tests for tool pin routing with various special characters in names."""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"original_name,sanitized_name",
|
||||||
|
[
|
||||||
|
("Max Keyword Difficulty", "max_keyword_difficulty"),
|
||||||
|
("Search Volume (Monthly)", "search_volume__monthly_"),
|
||||||
|
("CPC ($)", "cpc____"),
|
||||||
|
("User's Input", "user_s_input"),
|
||||||
|
("Query #1", "query__1"),
|
||||||
|
("API.Response", "api_response"),
|
||||||
|
("Field@Name", "field_name"),
|
||||||
|
("Test\tTab", "test_tab"),
|
||||||
|
("Test\nNewline", "test_newline"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_routing_mismatch_with_special_chars(self, original_name, sanitized_name):
|
||||||
|
"""
|
||||||
|
Test that various special characters cause routing mismatches.
|
||||||
|
|
||||||
|
This test documents the current buggy behavior where sanitized emit keys
|
||||||
|
don't match original sink_pin_names.
|
||||||
|
"""
|
||||||
|
# Verify sanitization
|
||||||
|
assert cleanup(original_name) == sanitized_name
|
||||||
|
|
||||||
|
# Backend emits with sanitized name
|
||||||
|
output_item = (f"tools_^_node-123_~_{sanitized_name}", "value")
|
||||||
|
|
||||||
|
# Frontend link has original name
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id="node-123",
|
||||||
|
sink_pin_name=original_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
# BUG: Returns None due to mismatch
|
||||||
|
assert result is None, f"Routing should fail for '{original_name}' vs '{sanitized_name}'"
|
||||||
|
|
||||||
|
|
||||||
|
class TestToolPinMissingParameters:
|
||||||
|
"""Tests for missing required parameters in parse_execution_output."""
|
||||||
|
|
||||||
|
def test_missing_sink_node_id_raises_error(self):
|
||||||
|
"""Missing sink_node_id should raise ValueError for tool pins."""
|
||||||
|
output_item = ("tools_^_node-123_~_query", "test value")
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="sink_node_id and sink_pin_name must be provided"):
|
||||||
|
parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id=None,
|
||||||
|
sink_pin_name="query",
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_missing_sink_pin_name_raises_error(self):
|
||||||
|
"""Missing sink_pin_name should raise ValueError for tool pins."""
|
||||||
|
output_item = ("tools_^_node-123_~_query", "test value")
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="sink_node_id and sink_pin_name must be provided"):
|
||||||
|
parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id="node-123",
|
||||||
|
sink_pin_name=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestIsToolPin:
|
||||||
|
"""Tests for is_tool_pin function."""
|
||||||
|
|
||||||
|
def test_tools_prefix_is_tool_pin(self):
|
||||||
|
"""Names starting with 'tools_^_' are tool pins."""
|
||||||
|
assert is_tool_pin("tools_^_node_~_field") is True
|
||||||
|
assert is_tool_pin("tools_^_anything") is True
|
||||||
|
|
||||||
|
def test_tools_exact_is_tool_pin(self):
|
||||||
|
"""Exact 'tools' is a tool pin."""
|
||||||
|
assert is_tool_pin("tools") is True
|
||||||
|
|
||||||
|
def test_non_tool_pins(self):
|
||||||
|
"""Non-tool pin names should return False."""
|
||||||
|
assert is_tool_pin("input") is False
|
||||||
|
assert is_tool_pin("output") is False
|
||||||
|
assert is_tool_pin("my_tools") is False
|
||||||
|
assert is_tool_pin("toolsomething") is False
|
||||||
|
|
||||||
|
|
||||||
|
class TestSanitizePinName:
|
||||||
|
"""Tests for sanitize_pin_name function."""
|
||||||
|
|
||||||
|
def test_extracts_base_from_dynamic_field(self):
|
||||||
|
"""Should extract base field name from dynamic fields."""
|
||||||
|
assert sanitize_pin_name("values_#_key") == "values"
|
||||||
|
assert sanitize_pin_name("items_$_0") == "items"
|
||||||
|
assert sanitize_pin_name("obj_@_attr") == "obj"
|
||||||
|
|
||||||
|
def test_returns_tools_for_tool_pins(self):
|
||||||
|
"""Tool pins should be sanitized to 'tools'."""
|
||||||
|
assert sanitize_pin_name("tools_^_node_~_field") == "tools"
|
||||||
|
assert sanitize_pin_name("tools") == "tools"
|
||||||
|
|
||||||
|
def test_regular_field_unchanged(self):
|
||||||
|
"""Regular field names should be unchanged."""
|
||||||
|
assert sanitize_pin_name("query") == "query"
|
||||||
|
assert sanitize_pin_name("max_difficulty") == "max_difficulty"
|
||||||
|
|
||||||
|
|
||||||
|
class TestDynamicFieldDescriptions:
|
||||||
|
"""Tests for dynamic field description generation."""
|
||||||
|
|
||||||
|
def test_dict_field_description_with_spaces_in_key(self):
|
||||||
|
"""Dictionary field keys with spaces should generate correct descriptions."""
|
||||||
|
# After cleanup, "User Name" becomes "user_name" in the field name
|
||||||
|
# But the original key might have had spaces
|
||||||
|
desc = get_dynamic_field_description("values_#_user_name")
|
||||||
|
assert "Dictionary field" in desc
|
||||||
|
assert "values['user_name']" in desc
|
||||||
|
|
||||||
|
def test_list_field_description(self):
|
||||||
|
"""List field descriptions should include index."""
|
||||||
|
desc = get_dynamic_field_description("items_$_0")
|
||||||
|
assert "List item 0" in desc
|
||||||
|
assert "items[0]" in desc
|
||||||
|
|
||||||
|
def test_object_field_description(self):
|
||||||
|
"""Object field descriptions should include attribute."""
|
||||||
|
desc = get_dynamic_field_description("user_@_email")
|
||||||
|
assert "Object attribute" in desc
|
||||||
|
assert "user.email" in desc
|
||||||
|
|
||||||
|
|
||||||
|
class TestMergeExecutionInput:
|
||||||
|
"""Tests for merge_execution_input function."""
|
||||||
|
|
||||||
|
def test_merges_dict_fields(self):
|
||||||
|
"""Dictionary fields should be merged into nested structure."""
|
||||||
|
data = {
|
||||||
|
"values_#_name": "Alice",
|
||||||
|
"values_#_age": 30,
|
||||||
|
"other_field": "unchanged",
|
||||||
|
}
|
||||||
|
|
||||||
|
result = merge_execution_input(data)
|
||||||
|
|
||||||
|
assert "values" in result
|
||||||
|
assert result["values"]["name"] == "Alice"
|
||||||
|
assert result["values"]["age"] == 30
|
||||||
|
assert result["other_field"] == "unchanged"
|
||||||
|
|
||||||
|
def test_merges_list_fields(self):
|
||||||
|
"""List fields should be merged into arrays."""
|
||||||
|
data = {
|
||||||
|
"items_$_0": "first",
|
||||||
|
"items_$_1": "second",
|
||||||
|
"items_$_2": "third",
|
||||||
|
}
|
||||||
|
|
||||||
|
result = merge_execution_input(data)
|
||||||
|
|
||||||
|
assert "items" in result
|
||||||
|
assert result["items"] == ["first", "second", "third"]
|
||||||
|
|
||||||
|
def test_merges_mixed_fields(self):
|
||||||
|
"""Mixed regular and dynamic fields should all be preserved."""
|
||||||
|
data = {
|
||||||
|
"regular": "value",
|
||||||
|
"dict_#_key": "dict_value",
|
||||||
|
"list_$_0": "list_item",
|
||||||
|
}
|
||||||
|
|
||||||
|
result = merge_execution_input(data)
|
||||||
|
|
||||||
|
assert result["regular"] == "value"
|
||||||
|
assert result["dict"]["key"] == "dict_value"
|
||||||
|
assert result["list"] == ["list_item"]
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractBaseFieldName:
|
||||||
|
"""Tests for extract_base_field_name function."""
|
||||||
|
|
||||||
|
def test_extracts_from_dict_delimiter(self):
|
||||||
|
"""Should extract base name before _#_ delimiter."""
|
||||||
|
assert extract_base_field_name("values_#_name") == "values"
|
||||||
|
assert extract_base_field_name("user_#_email_#_domain") == "user"
|
||||||
|
|
||||||
|
def test_extracts_from_list_delimiter(self):
|
||||||
|
"""Should extract base name before _$_ delimiter."""
|
||||||
|
assert extract_base_field_name("items_$_0") == "items"
|
||||||
|
assert extract_base_field_name("data_$_1_$_nested") == "data"
|
||||||
|
|
||||||
|
def test_extracts_from_object_delimiter(self):
|
||||||
|
"""Should extract base name before _@_ delimiter."""
|
||||||
|
assert extract_base_field_name("obj_@_attr") == "obj"
|
||||||
|
|
||||||
|
def test_no_delimiter_returns_original(self):
|
||||||
|
"""Names without delimiters should be returned unchanged."""
|
||||||
|
assert extract_base_field_name("regular_field") == "regular_field"
|
||||||
|
assert extract_base_field_name("query") == "query"
|
||||||
|
|
||||||
|
|
||||||
|
class TestIsDynamicField:
|
||||||
|
"""Tests for is_dynamic_field function."""
|
||||||
|
|
||||||
|
def test_dict_delimiter_is_dynamic(self):
|
||||||
|
"""Fields with _#_ are dynamic."""
|
||||||
|
assert is_dynamic_field("values_#_key") is True
|
||||||
|
|
||||||
|
def test_list_delimiter_is_dynamic(self):
|
||||||
|
"""Fields with _$_ are dynamic."""
|
||||||
|
assert is_dynamic_field("items_$_0") is True
|
||||||
|
|
||||||
|
def test_object_delimiter_is_dynamic(self):
|
||||||
|
"""Fields with _@_ are dynamic."""
|
||||||
|
assert is_dynamic_field("obj_@_attr") is True
|
||||||
|
|
||||||
|
def test_regular_fields_not_dynamic(self):
|
||||||
|
"""Regular field names without delimiters are not dynamic."""
|
||||||
|
assert is_dynamic_field("regular_field") is False
|
||||||
|
assert is_dynamic_field("query") is False
|
||||||
|
assert is_dynamic_field("Max Keyword Difficulty") is False
|
||||||
|
|
||||||
|
|
||||||
|
class TestRoutingEndToEnd:
|
||||||
|
"""End-to-end tests for the full routing flow."""
|
||||||
|
|
||||||
|
def test_successful_routing_without_spaces(self):
|
||||||
|
"""Full routing flow works when no spaces in names."""
|
||||||
|
field_name = "query"
|
||||||
|
node_id = "test-node-123"
|
||||||
|
|
||||||
|
# Emit key (as created by SmartDecisionMaker)
|
||||||
|
emit_key = f"tools_^_{node_id}_~_{cleanup(field_name)}"
|
||||||
|
output_item = (emit_key, "search term")
|
||||||
|
|
||||||
|
# Route (as called by executor)
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id=node_id,
|
||||||
|
sink_pin_name=field_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result == "search term"
|
||||||
|
|
||||||
|
def test_failed_routing_with_spaces(self):
|
||||||
|
"""
|
||||||
|
Full routing flow FAILS when names have spaces.
|
||||||
|
|
||||||
|
This test documents the exact bug scenario:
|
||||||
|
1. Frontend creates link with sink_name="Max Keyword Difficulty"
|
||||||
|
2. SmartDecisionMaker emits with sanitized name in key
|
||||||
|
3. Executor calls parse_execution_output with original sink_pin_name
|
||||||
|
4. Routing fails because names don't match
|
||||||
|
"""
|
||||||
|
original_field_name = "Max Keyword Difficulty"
|
||||||
|
sanitized_field_name = cleanup(original_field_name)
|
||||||
|
node_id = "test-node-123"
|
||||||
|
|
||||||
|
# Step 1 & 2: SmartDecisionMaker emits with sanitized name
|
||||||
|
emit_key = f"tools_^_{node_id}_~_{sanitized_field_name}"
|
||||||
|
output_item = (emit_key, 50)
|
||||||
|
|
||||||
|
# Step 3: Executor routes with original name from link
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id=node_id,
|
||||||
|
sink_pin_name=original_field_name, # Original from link!
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 4: BUG - Returns None instead of 50
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
# This is what should happen after fix:
|
||||||
|
# assert result == 50
|
||||||
|
|
||||||
|
def test_multiple_fields_with_spaces(self):
|
||||||
|
"""Test routing multiple fields where some have spaces."""
|
||||||
|
node_id = "test-node"
|
||||||
|
|
||||||
|
fields = {
|
||||||
|
"query": "test", # No spaces - should work
|
||||||
|
"Max Difficulty": 100, # Spaces - will fail
|
||||||
|
"min_volume": 1000, # No spaces - should work
|
||||||
|
}
|
||||||
|
|
||||||
|
results = {}
|
||||||
|
for original_name, value in fields.items():
|
||||||
|
sanitized = cleanup(original_name)
|
||||||
|
emit_key = f"tools_^_{node_id}_~_{sanitized}"
|
||||||
|
output_item = (emit_key, value)
|
||||||
|
|
||||||
|
result = parse_execution_output(
|
||||||
|
output_item,
|
||||||
|
link_output_selector="tools",
|
||||||
|
sink_node_id=node_id,
|
||||||
|
sink_pin_name=original_name,
|
||||||
|
)
|
||||||
|
results[original_name] = result
|
||||||
|
|
||||||
|
# Fields without spaces work
|
||||||
|
assert results["query"] == "test"
|
||||||
|
assert results["min_volume"] == 1000
|
||||||
|
|
||||||
|
# Fields with spaces fail
|
||||||
|
assert results["Max Difficulty"] is None # BUG!
|
||||||
|
|
||||||
|
|
||||||
|
class TestProposedFix:
|
||||||
|
"""
|
||||||
|
Tests for the proposed fix.
|
||||||
|
|
||||||
|
The fix should sanitize sink_pin_name before comparison in parse_execution_output.
|
||||||
|
This class contains tests that will pass once the fix is implemented.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_routing_should_sanitize_both_sides(self):
|
||||||
|
"""
|
||||||
|
PROPOSED FIX: parse_execution_output should sanitize sink_pin_name
|
||||||
|
before comparing with the field from emit key.
|
||||||
|
|
||||||
|
Current behavior: Direct string comparison
|
||||||
|
Fixed behavior: Compare cleanup(target_input_pin) == cleanup(sink_pin_name)
|
||||||
|
"""
|
||||||
|
original_field = "Max Keyword Difficulty"
|
||||||
|
sanitized_field = cleanup(original_field)
|
||||||
|
node_id = "node-123"
|
||||||
|
|
||||||
|
emit_key = f"tools_^_{node_id}_~_{sanitized_field}"
|
||||||
|
output_item = (emit_key, 50)
|
||||||
|
|
||||||
|
# Extract the comparison being made
|
||||||
|
selector = emit_key[8:] # Remove "tools_^_"
|
||||||
|
target_node_id, target_input_pin = selector.split("_~_", 1)
|
||||||
|
|
||||||
|
# Current comparison (FAILS):
|
||||||
|
current_comparison = (target_input_pin == original_field)
|
||||||
|
assert current_comparison is False, "Current comparison fails"
|
||||||
|
|
||||||
|
# Proposed fixed comparison (PASSES):
|
||||||
|
# Either sanitize sink_pin_name, or sanitize both
|
||||||
|
fixed_comparison = (target_input_pin == cleanup(original_field))
|
||||||
|
assert fixed_comparison is True, "Fixed comparison should pass"
|
||||||
596
autogpt_platform/frontend/src/tests/smart-decision-maker.spec.ts
Normal file
596
autogpt_platform/frontend/src/tests/smart-decision-maker.spec.ts
Normal file
@@ -0,0 +1,596 @@
|
|||||||
|
/**
|
||||||
|
* E2E tests for SmartDecisionMaker block functionality.
|
||||||
|
*
|
||||||
|
* These tests verify the critical bug where field names with spaces
|
||||||
|
* (e.g., "Max Keyword Difficulty") cause tool calls to fail due to
|
||||||
|
* inconsistent sanitization between frontend and backend.
|
||||||
|
*
|
||||||
|
* The bug:
|
||||||
|
* - Frontend creates links with original names: tools_^_{node_id}_~_Max Keyword Difficulty
|
||||||
|
* - Backend emits with sanitized names: tools_^_{node_id}_~_max_keyword_difficulty
|
||||||
|
* - Routing fails because names don't match
|
||||||
|
*/
|
||||||
|
|
||||||
|
import test, { expect } from "@playwright/test";
|
||||||
|
import { BuildPage, Block } from "./pages/build.page";
|
||||||
|
import { LoginPage } from "./pages/login.page";
|
||||||
|
import { hasUrl } from "./utils/assertion";
|
||||||
|
import { getTestUser } from "./utils/auth";
|
||||||
|
|
||||||
|
test.describe("SmartDecisionMaker", () => {
|
||||||
|
let buildPage: BuildPage;
|
||||||
|
|
||||||
|
test.beforeEach(async ({ page }) => {
|
||||||
|
test.setTimeout(60000); // Longer timeout for complex tests
|
||||||
|
const loginPage = new LoginPage(page);
|
||||||
|
const testUser = await getTestUser();
|
||||||
|
|
||||||
|
buildPage = new BuildPage(page);
|
||||||
|
|
||||||
|
await page.goto("/login");
|
||||||
|
await loginPage.login(testUser.email, testUser.password);
|
||||||
|
await hasUrl(page, "/marketplace");
|
||||||
|
await buildPage.navbar.clickBuildLink();
|
||||||
|
await hasUrl(page, "/build");
|
||||||
|
await buildPage.closeTutorial();
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper to find SmartDecisionMaker block from API
|
||||||
|
*/
|
||||||
|
async function getSmartDecisionMakerBlock(): Promise<Block | undefined> {
|
||||||
|
const blocks = await buildPage.getBlocksFromAPI();
|
||||||
|
return blocks.find(
|
||||||
|
(b) =>
|
||||||
|
b.name.toLowerCase().includes("smart decision") ||
|
||||||
|
b.name.toLowerCase().includes("ai decision") ||
|
||||||
|
b.id === "3b191d9f-356f-482d-8238-ba04b6d18381"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper to find a block by partial name match
|
||||||
|
*/
|
||||||
|
async function findBlockByName(partialName: string): Promise<Block | undefined> {
|
||||||
|
const blocks = await buildPage.getBlocksFromAPI();
|
||||||
|
return blocks.find((b) =>
|
||||||
|
b.name.toLowerCase().includes(partialName.toLowerCase())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
test.describe("Block Addition", () => {
|
||||||
|
test("can add SmartDecisionMaker block to canvas", async () => {
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
const smartBlock = await getSmartDecisionMakerBlock();
|
||||||
|
if (!smartBlock) {
|
||||||
|
test.skip(true, "SmartDecisionMaker block not found in API");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await buildPage.addBlock(smartBlock);
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
await buildPage.hasBlock(smartBlock);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("SmartDecisionMaker block has expected input pins", async ({ page }) => {
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
const smartBlock = await getSmartDecisionMakerBlock();
|
||||||
|
if (!smartBlock) {
|
||||||
|
test.skip(true, "SmartDecisionMaker block not found in API");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await buildPage.addBlock(smartBlock);
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
// Verify expected input handles exist
|
||||||
|
const blockElement = page.locator(`[data-blockid="${smartBlock.id}"]`).first();
|
||||||
|
await expect(blockElement).toBeVisible();
|
||||||
|
|
||||||
|
// Check for common SmartDecisionMaker inputs
|
||||||
|
const promptInput = blockElement.locator('[data-testid="input-handle-prompt"]');
|
||||||
|
const modelInput = blockElement.locator('[data-testid="input-handle-model"]');
|
||||||
|
|
||||||
|
// At least the prompt input should exist
|
||||||
|
await expect(promptInput).toBeAttached();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test.describe("Pin Name Handling", () => {
|
||||||
|
test("block connections preserve original field names in UI", async ({ page }) => {
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
// Add a Store Value block to test connections
|
||||||
|
const storeBlock = await findBlockByName("Store Value");
|
||||||
|
if (!storeBlock) {
|
||||||
|
test.skip(true, "Store Value block not found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await buildPage.addBlock({
|
||||||
|
...storeBlock,
|
||||||
|
name: "Store Value 1",
|
||||||
|
});
|
||||||
|
await buildPage.addBlock({
|
||||||
|
...storeBlock,
|
||||||
|
name: "Store Value 2",
|
||||||
|
});
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
// Connect the blocks
|
||||||
|
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||||
|
"1-1-output-source",
|
||||||
|
"1-2-input-target"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Verify connection was made
|
||||||
|
const edge = page.locator(".react-flow__edge");
|
||||||
|
await expect(edge.first()).toBeVisible();
|
||||||
|
});
|
||||||
|
|
||||||
|
test("input handles are accessible for fields with various names", async ({ page }) => {
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
// Find a block that might have inputs with spaces/special chars
|
||||||
|
const blocks = await buildPage.getBlocksFromAPI();
|
||||||
|
|
||||||
|
// Look for blocks in AI category which often have complex field names
|
||||||
|
const aiBlocks = blocks.filter((b) => b.type === "AI" || b.type === "Standard");
|
||||||
|
|
||||||
|
if (aiBlocks.length === 0) {
|
||||||
|
test.skip(true, "No suitable blocks found for testing");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add the first available block
|
||||||
|
const testBlock = aiBlocks[0];
|
||||||
|
await buildPage.addBlock(testBlock);
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
// Verify the block is on canvas
|
||||||
|
await buildPage.hasBlock(testBlock);
|
||||||
|
|
||||||
|
// Get all input handles on the block
|
||||||
|
const blockElement = page.locator(`[data-blockid="${testBlock.id}"]`).first();
|
||||||
|
const inputHandles = blockElement.locator('[data-testid^="input-handle-"]');
|
||||||
|
|
||||||
|
const handleCount = await inputHandles.count();
|
||||||
|
console.log(`Block ${testBlock.name} has ${handleCount} input handles`);
|
||||||
|
|
||||||
|
// Verify handles are accessible
|
||||||
|
if (handleCount > 0) {
|
||||||
|
const firstHandle = inputHandles.first();
|
||||||
|
await expect(firstHandle).toBeAttached();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test.describe("Block Connections", () => {
|
||||||
|
test("can connect SmartDecisionMaker output to downstream block", async ({ page }) => {
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
const smartBlock = await getSmartDecisionMakerBlock();
|
||||||
|
const storeBlock = await findBlockByName("Store Value");
|
||||||
|
|
||||||
|
if (!smartBlock || !storeBlock) {
|
||||||
|
test.skip(true, "Required blocks not found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add SmartDecisionMaker
|
||||||
|
await buildPage.addBlock(smartBlock);
|
||||||
|
|
||||||
|
// Add a downstream block
|
||||||
|
await buildPage.addBlock({
|
||||||
|
...storeBlock,
|
||||||
|
name: "Downstream Store",
|
||||||
|
});
|
||||||
|
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
// Wait for blocks to settle
|
||||||
|
await page.waitForTimeout(500);
|
||||||
|
|
||||||
|
// Verify both blocks are present
|
||||||
|
await buildPage.hasBlock(smartBlock);
|
||||||
|
|
||||||
|
// The tools output should be available for connection
|
||||||
|
const smartBlockElement = page.locator(`[data-blockid="${smartBlock.id}"]`).first();
|
||||||
|
const toolsOutput = smartBlockElement.locator('[data-testid="output-handle-tools"]');
|
||||||
|
|
||||||
|
// tools output may or may not exist depending on block configuration
|
||||||
|
const hasToolsOutput = await toolsOutput.count() > 0;
|
||||||
|
console.log(`SmartDecisionMaker has tools output: ${hasToolsOutput}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("connection data attributes use correct format", async ({ page }) => {
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
const storeBlock = await findBlockByName("Store Value");
|
||||||
|
if (!storeBlock) {
|
||||||
|
test.skip(true, "Store Value block not found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await buildPage.addBlock({
|
||||||
|
...storeBlock,
|
||||||
|
name: "Store 1",
|
||||||
|
});
|
||||||
|
await buildPage.addBlock({
|
||||||
|
...storeBlock,
|
||||||
|
name: "Store 2",
|
||||||
|
});
|
||||||
|
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
// Connect via data IDs
|
||||||
|
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||||
|
"1-1-output-source",
|
||||||
|
"1-2-input-target"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Verify edge was created
|
||||||
|
const edges = page.locator(".react-flow__edge");
|
||||||
|
await expect(edges.first()).toBeVisible();
|
||||||
|
|
||||||
|
// Get edge data attributes
|
||||||
|
const edgeElement = edges.first();
|
||||||
|
const sourceHandle = await edgeElement.getAttribute("data-sourcehandle");
|
||||||
|
const targetHandle = await edgeElement.getAttribute("data-targethandle");
|
||||||
|
|
||||||
|
console.log(`Edge source handle: ${sourceHandle}`);
|
||||||
|
console.log(`Edge target handle: ${targetHandle}`);
|
||||||
|
|
||||||
|
// The handles should be set
|
||||||
|
expect(sourceHandle).toBeTruthy();
|
||||||
|
expect(targetHandle).toBeTruthy();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test.describe("Agent Save and Load", () => {
|
||||||
|
test("can save agent with SmartDecisionMaker block", async ({ page }) => {
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
const smartBlock = await getSmartDecisionMakerBlock();
|
||||||
|
if (!smartBlock) {
|
||||||
|
test.skip(true, "SmartDecisionMaker block not found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await buildPage.addBlock(smartBlock);
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
// Save the agent
|
||||||
|
const agentName = `SDM Test ${Date.now()}`;
|
||||||
|
await buildPage.saveAgent(agentName, "Testing SmartDecisionMaker");
|
||||||
|
|
||||||
|
// Verify URL updated with flowID
|
||||||
|
await expect(page).toHaveURL(({ searchParams }) => !!searchParams.get("flowID"));
|
||||||
|
|
||||||
|
// Wait for save to complete
|
||||||
|
await buildPage.waitForSaveButton();
|
||||||
|
});
|
||||||
|
|
||||||
|
test("saved agent preserves block connections", async ({ page }) => {
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
const storeBlock = await findBlockByName("Store Value");
|
||||||
|
if (!storeBlock) {
|
||||||
|
test.skip(true, "Store Value block not found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add and connect blocks
|
||||||
|
await buildPage.addBlock({
|
||||||
|
...storeBlock,
|
||||||
|
name: "Store 1",
|
||||||
|
});
|
||||||
|
await buildPage.addBlock({
|
||||||
|
...storeBlock,
|
||||||
|
name: "Store 2",
|
||||||
|
});
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||||
|
"1-1-output-source",
|
||||||
|
"1-2-input-target"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Save
|
||||||
|
const agentName = `Connection Test ${Date.now()}`;
|
||||||
|
await buildPage.saveAgent(agentName, "Testing connections");
|
||||||
|
await expect(page).toHaveURL(({ searchParams }) => !!searchParams.get("flowID"));
|
||||||
|
|
||||||
|
// Count edges before reload
|
||||||
|
const edgesBefore = await page.locator(".react-flow__edge").count();
|
||||||
|
|
||||||
|
// Reload the page
|
||||||
|
await page.reload();
|
||||||
|
await buildPage.closeTutorial();
|
||||||
|
|
||||||
|
// Wait for graph to load
|
||||||
|
await page.waitForTimeout(2000);
|
||||||
|
|
||||||
|
// Verify edges still exist
|
||||||
|
const edgesAfter = await page.locator(".react-flow__edge").count();
|
||||||
|
expect(edgesAfter).toBe(edgesBefore);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test.describe("Field Name Display", () => {
|
||||||
|
test("block inputs display readable field names", async ({ page }) => {
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
const smartBlock = await getSmartDecisionMakerBlock();
|
||||||
|
if (!smartBlock) {
|
||||||
|
test.skip(true, "SmartDecisionMaker block not found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await buildPage.addBlock(smartBlock);
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
const blockElement = page.locator(`[data-blockid="${smartBlock.id}"]`).first();
|
||||||
|
|
||||||
|
// Get all visible input labels
|
||||||
|
const inputLabels = blockElement.locator('[data-id^="input-handle-"]');
|
||||||
|
const count = await inputLabels.count();
|
||||||
|
|
||||||
|
console.log(`Found ${count} input containers`);
|
||||||
|
|
||||||
|
// Log each input's data-id to see field naming
|
||||||
|
for (let i = 0; i < Math.min(count, 5); i++) {
|
||||||
|
const label = inputLabels.nth(i);
|
||||||
|
const dataId = await label.getAttribute("data-id");
|
||||||
|
console.log(`Input ${i}: ${dataId}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test("output handles have correct data-testid format", async ({ page }) => {
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
const storeBlock = await findBlockByName("Store Value");
|
||||||
|
if (!storeBlock) {
|
||||||
|
test.skip(true, "Store Value block not found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await buildPage.addBlock(storeBlock);
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
const blockElement = page.locator(`[data-blockid="${storeBlock.id}"]`).first();
|
||||||
|
const outputHandles = blockElement.locator('[data-testid^="output-handle-"]');
|
||||||
|
|
||||||
|
const count = await outputHandles.count();
|
||||||
|
console.log(`Found ${count} output handles`);
|
||||||
|
|
||||||
|
for (let i = 0; i < count; i++) {
|
||||||
|
const handle = outputHandles.nth(i);
|
||||||
|
const testId = await handle.getAttribute("data-testid");
|
||||||
|
console.log(`Output handle ${i}: ${testId}`);
|
||||||
|
|
||||||
|
// Verify format: output-handle-{fieldname}
|
||||||
|
expect(testId).toMatch(/^output-handle-/);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test.describe("Multi-Block Workflows", () => {
|
||||||
|
test("can create workflow with multiple connected blocks", async ({ page }) => {
|
||||||
|
test.setTimeout(90000);
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
const storeBlock = await findBlockByName("Store Value");
|
||||||
|
if (!storeBlock) {
|
||||||
|
test.skip(true, "Store Value block not found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add three blocks in a chain
|
||||||
|
await buildPage.addBlock({
|
||||||
|
...storeBlock,
|
||||||
|
name: "Block A",
|
||||||
|
});
|
||||||
|
await buildPage.addBlock({
|
||||||
|
...storeBlock,
|
||||||
|
name: "Block B",
|
||||||
|
});
|
||||||
|
await buildPage.addBlock({
|
||||||
|
...storeBlock,
|
||||||
|
name: "Block C",
|
||||||
|
});
|
||||||
|
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
// Connect A -> B
|
||||||
|
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||||
|
"1-1-output-source",
|
||||||
|
"1-2-input-target"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Connect B -> C
|
||||||
|
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||||
|
"1-2-output-source",
|
||||||
|
"1-3-input-target"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Verify we have 2 edges
|
||||||
|
const edges = page.locator(".react-flow__edge");
|
||||||
|
await expect(edges).toHaveCount(2);
|
||||||
|
|
||||||
|
// Save the workflow
|
||||||
|
await buildPage.saveAgent(
|
||||||
|
`Workflow Test ${Date.now()}`,
|
||||||
|
"Multi-block workflow test"
|
||||||
|
);
|
||||||
|
await expect(page).toHaveURL(({ searchParams }) => !!searchParams.get("flowID"));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test.describe("SmartDecisionMaker Pin Sanitization", () => {
|
||||||
|
let buildPage: BuildPage;
|
||||||
|
|
||||||
|
test.beforeEach(async ({ page }) => {
|
||||||
|
test.setTimeout(60000);
|
||||||
|
const loginPage = new LoginPage(page);
|
||||||
|
const testUser = await getTestUser();
|
||||||
|
|
||||||
|
buildPage = new BuildPage(page);
|
||||||
|
|
||||||
|
await page.goto("/login");
|
||||||
|
await loginPage.login(testUser.email, testUser.password);
|
||||||
|
await hasUrl(page, "/marketplace");
|
||||||
|
await buildPage.navbar.clickBuildLink();
|
||||||
|
await hasUrl(page, "/build");
|
||||||
|
await buildPage.closeTutorial();
|
||||||
|
});
|
||||||
|
|
||||||
|
test("verifies input handle naming convention", async ({ page }) => {
|
||||||
|
/**
|
||||||
|
* This test documents the expected behavior of input handle naming.
|
||||||
|
*
|
||||||
|
* The bug: If frontend uses original names (with spaces) in data attributes
|
||||||
|
* but backend expects sanitized names (lowercase, underscores), routing fails.
|
||||||
|
*/
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
// Get all blocks and find one with inputs
|
||||||
|
const blocks = await buildPage.getBlocksFromAPI();
|
||||||
|
const blockWithInputs = blocks.find((b) => b.type === "Standard");
|
||||||
|
|
||||||
|
if (!blockWithInputs) {
|
||||||
|
test.skip(true, "No suitable block found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await buildPage.addBlock(blockWithInputs);
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
const blockElement = page.locator(`[data-blockid="${blockWithInputs.id}"]`).first();
|
||||||
|
const inputHandles = blockElement.locator('[data-testid^="input-handle-"]');
|
||||||
|
|
||||||
|
const count = await inputHandles.count();
|
||||||
|
|
||||||
|
// Document the actual naming convention used
|
||||||
|
const handleNames: string[] = [];
|
||||||
|
for (let i = 0; i < count; i++) {
|
||||||
|
const handle = inputHandles.nth(i);
|
||||||
|
const testId = await handle.getAttribute("data-testid");
|
||||||
|
if (testId) {
|
||||||
|
const fieldName = testId.replace("input-handle-", "");
|
||||||
|
handleNames.push(fieldName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Block: ${blockWithInputs.name}`);
|
||||||
|
console.log(`Input handle names: ${JSON.stringify(handleNames)}`);
|
||||||
|
|
||||||
|
// Check if names are lowercase (sanitized) or original case
|
||||||
|
for (const name of handleNames) {
|
||||||
|
const isLowercase = name === name.toLowerCase();
|
||||||
|
const hasSpaces = name.includes(" ");
|
||||||
|
const hasSpecialChars = /[^a-zA-Z0-9_-]/.test(name);
|
||||||
|
|
||||||
|
console.log(` ${name}: lowercase=${isLowercase}, spaces=${hasSpaces}, special=${hasSpecialChars}`);
|
||||||
|
|
||||||
|
// Document: Frontend uses lowercase handle names
|
||||||
|
// This should match backend sanitization
|
||||||
|
expect(isLowercase).toBe(true);
|
||||||
|
expect(hasSpaces).toBe(false);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test("verifies output handle naming matches input handle convention", async ({ page }) => {
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
const blocks = await buildPage.getBlocksFromAPI();
|
||||||
|
const blockWithOutputs = blocks.find((b) => b.type === "Standard");
|
||||||
|
|
||||||
|
if (!blockWithOutputs) {
|
||||||
|
test.skip(true, "No suitable block found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await buildPage.addBlock(blockWithOutputs);
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
const blockElement = page.locator(`[data-blockid="${blockWithOutputs.id}"]`).first();
|
||||||
|
const outputHandles = blockElement.locator('[data-testid^="output-handle-"]');
|
||||||
|
|
||||||
|
const count = await outputHandles.count();
|
||||||
|
|
||||||
|
for (let i = 0; i < count; i++) {
|
||||||
|
const handle = outputHandles.nth(i);
|
||||||
|
const testId = await handle.getAttribute("data-testid");
|
||||||
|
if (testId) {
|
||||||
|
const fieldName = testId.replace("output-handle-", "");
|
||||||
|
|
||||||
|
// Output handles should also use lowercase sanitized names
|
||||||
|
const isLowercase = fieldName === fieldName.toLowerCase();
|
||||||
|
expect(isLowercase).toBe(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test("link creation uses consistent field naming", async ({ page }) => {
|
||||||
|
/**
|
||||||
|
* This test verifies that when creating a connection (link),
|
||||||
|
* both source and target use consistent naming conventions.
|
||||||
|
*/
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
const storeBlock = await buildPage.getFilteredBlocksFromAPI(
|
||||||
|
(b) => b.name.toLowerCase().includes("store value")
|
||||||
|
);
|
||||||
|
|
||||||
|
if (storeBlock.length === 0) {
|
||||||
|
test.skip(true, "Store Value block not found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await buildPage.addBlock({
|
||||||
|
...storeBlock[0],
|
||||||
|
name: "Source Block",
|
||||||
|
});
|
||||||
|
await buildPage.addBlock({
|
||||||
|
...storeBlock[0],
|
||||||
|
name: "Target Block",
|
||||||
|
});
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
// Create connection
|
||||||
|
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||||
|
"1-1-output-source",
|
||||||
|
"1-2-input-target"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Get the created edge
|
||||||
|
const edge = page.locator(".react-flow__edge").first();
|
||||||
|
await expect(edge).toBeVisible();
|
||||||
|
|
||||||
|
// Check edge attributes for naming consistency
|
||||||
|
const sourceHandle = await edge.getAttribute("data-sourcehandle");
|
||||||
|
const targetHandle = await edge.getAttribute("data-targethandle");
|
||||||
|
|
||||||
|
console.log(`Source handle: ${sourceHandle}`);
|
||||||
|
console.log(`Target handle: ${targetHandle}`);
|
||||||
|
|
||||||
|
// Both should be non-empty
|
||||||
|
expect(sourceHandle).toBeTruthy();
|
||||||
|
expect(targetHandle).toBeTruthy();
|
||||||
|
|
||||||
|
// Check if handles follow sanitized naming convention
|
||||||
|
if (sourceHandle && targetHandle) {
|
||||||
|
const sourceIsLowercase = sourceHandle === sourceHandle.toLowerCase();
|
||||||
|
const targetIsLowercase = targetHandle === targetHandle.toLowerCase();
|
||||||
|
|
||||||
|
// Document: Edge handles should use sanitized names
|
||||||
|
// This ensures consistency with backend emit keys
|
||||||
|
console.log(`Source handle is lowercase: ${sourceIsLowercase}`);
|
||||||
|
console.log(`Target handle is lowercase: ${targetIsLowercase}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
467
autogpt_platform/frontend/src/tests/tool-connections.spec.ts
Normal file
467
autogpt_platform/frontend/src/tests/tool-connections.spec.ts
Normal file
@@ -0,0 +1,467 @@
|
|||||||
|
/**
|
||||||
|
* E2E tests for tool connections and routing in the graph builder.
|
||||||
|
*
|
||||||
|
* These tests focus on the connection behavior between blocks,
|
||||||
|
* particularly around the SmartDecisionMaker tools output routing.
|
||||||
|
*
|
||||||
|
* Key scenarios tested:
|
||||||
|
* 1. Connection data attribute formats
|
||||||
|
* 2. Handle naming conventions
|
||||||
|
* 3. Edge creation with various field name formats
|
||||||
|
* 4. Link persistence after save/reload
|
||||||
|
*/
|
||||||
|
|
||||||
|
import test, { expect } from "@playwright/test";
|
||||||
|
import { BuildPage, Block } from "./pages/build.page";
|
||||||
|
import { LoginPage } from "./pages/login.page";
|
||||||
|
import { hasUrl } from "./utils/assertion";
|
||||||
|
import { getTestUser } from "./utils/auth";
|
||||||
|
|
||||||
|
test.describe("Tool Connections", () => {
|
||||||
|
let buildPage: BuildPage;
|
||||||
|
|
||||||
|
test.beforeEach(async ({ page }) => {
|
||||||
|
test.setTimeout(45000);
|
||||||
|
const loginPage = new LoginPage(page);
|
||||||
|
const testUser = await getTestUser();
|
||||||
|
|
||||||
|
buildPage = new BuildPage(page);
|
||||||
|
|
||||||
|
await page.goto("/login");
|
||||||
|
await loginPage.login(testUser.email, testUser.password);
|
||||||
|
await hasUrl(page, "/marketplace");
|
||||||
|
await buildPage.navbar.clickBuildLink();
|
||||||
|
await hasUrl(page, "/build");
|
||||||
|
await buildPage.closeTutorial();
|
||||||
|
});
|
||||||
|
|
||||||
|
test.describe("Connection Data Attributes", () => {
|
||||||
|
test("edge source and target handles are set correctly", async ({ page }) => {
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
const storeBlock = await buildPage.getFilteredBlocksFromAPI(
|
||||||
|
(b) => b.name.toLowerCase().includes("store value")
|
||||||
|
);
|
||||||
|
|
||||||
|
if (storeBlock.length === 0) {
|
||||||
|
test.skip(true, "Store Value block not found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await buildPage.addBlock({ ...storeBlock[0], name: "Source" });
|
||||||
|
await buildPage.addBlock({ ...storeBlock[0], name: "Target" });
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
// Connect blocks
|
||||||
|
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||||
|
"1-1-output-source",
|
||||||
|
"1-2-input-target"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Verify edge exists and has correct attributes
|
||||||
|
const edge = page.locator(".react-flow__edge").first();
|
||||||
|
await expect(edge).toBeVisible();
|
||||||
|
|
||||||
|
// Get all relevant edge attributes
|
||||||
|
const attributes = await edge.evaluate((el) => ({
|
||||||
|
source: el.getAttribute("data-source"),
|
||||||
|
target: el.getAttribute("data-target"),
|
||||||
|
sourceHandle: el.getAttribute("data-sourcehandle"),
|
||||||
|
targetHandle: el.getAttribute("data-targethandle"),
|
||||||
|
id: el.getAttribute("id"),
|
||||||
|
}));
|
||||||
|
|
||||||
|
console.log("Edge attributes:", JSON.stringify(attributes, null, 2));
|
||||||
|
|
||||||
|
// Source and target should be node IDs
|
||||||
|
expect(attributes.source).toBeTruthy();
|
||||||
|
expect(attributes.target).toBeTruthy();
|
||||||
|
|
||||||
|
// Handles should reference field names
|
||||||
|
expect(attributes.sourceHandle).toBeTruthy();
|
||||||
|
expect(attributes.targetHandle).toBeTruthy();
|
||||||
|
});
|
||||||
|
|
||||||
|
test("edge ID follows expected format", async ({ page }) => {
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
const storeBlock = await buildPage.getFilteredBlocksFromAPI(
|
||||||
|
(b) => b.name.toLowerCase().includes("store value")
|
||||||
|
);
|
||||||
|
|
||||||
|
if (storeBlock.length === 0) {
|
||||||
|
test.skip(true, "Store Value block not found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await buildPage.addBlock({ ...storeBlock[0], name: "A" });
|
||||||
|
await buildPage.addBlock({ ...storeBlock[0], name: "B" });
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||||
|
"1-1-output-source",
|
||||||
|
"1-2-input-target"
|
||||||
|
);
|
||||||
|
|
||||||
|
const edge = page.locator(".react-flow__edge").first();
|
||||||
|
const edgeId = await edge.getAttribute("id");
|
||||||
|
|
||||||
|
console.log(`Edge ID: ${edgeId}`);
|
||||||
|
|
||||||
|
// Edge ID typically contains source-target info
|
||||||
|
expect(edgeId).toBeTruthy();
|
||||||
|
// Format: reactflow__edge-{source}{sourceHandle}-{target}{targetHandle}
|
||||||
|
expect(edgeId).toContain("reactflow__edge");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test.describe("Handle Naming Consistency", () => {
|
||||||
|
test("all input handles use lowercase naming", async ({ page }) => {
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
// Get multiple blocks to test variety
|
||||||
|
const blocks = await buildPage.getBlocksFromAPI();
|
||||||
|
const testBlocks = blocks.slice(0, 3).filter((b) => b.type !== "Agent");
|
||||||
|
|
||||||
|
if (testBlocks.length === 0) {
|
||||||
|
test.skip(true, "No suitable blocks found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const block of testBlocks) {
|
||||||
|
await buildPage.addBlock(block);
|
||||||
|
}
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
// Check all input handles across all blocks
|
||||||
|
const allInputHandles = page.locator('[data-testid^="input-handle-"]');
|
||||||
|
const count = await allInputHandles.count();
|
||||||
|
|
||||||
|
let uppercaseFound = false;
|
||||||
|
let spacesFound = false;
|
||||||
|
|
||||||
|
for (let i = 0; i < count; i++) {
|
||||||
|
const handle = allInputHandles.nth(i);
|
||||||
|
const testId = await handle.getAttribute("data-testid");
|
||||||
|
|
||||||
|
if (testId) {
|
||||||
|
const fieldName = testId.replace("input-handle-", "");
|
||||||
|
|
||||||
|
if (fieldName !== fieldName.toLowerCase()) {
|
||||||
|
console.log(`Non-lowercase input handle found: ${fieldName}`);
|
||||||
|
uppercaseFound = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fieldName.includes(" ")) {
|
||||||
|
console.log(`Input handle with spaces found: ${fieldName}`);
|
||||||
|
spacesFound = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Document: Frontend should use lowercase sanitized names
|
||||||
|
// If this fails, there's an inconsistency that could cause routing issues
|
||||||
|
expect(uppercaseFound).toBe(false);
|
||||||
|
expect(spacesFound).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("all output handles use lowercase naming", async ({ page }) => {
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
const blocks = await buildPage.getBlocksFromAPI();
|
||||||
|
const testBlocks = blocks.slice(0, 3).filter((b) => b.type !== "Agent");
|
||||||
|
|
||||||
|
if (testBlocks.length === 0) {
|
||||||
|
test.skip(true, "No suitable blocks found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const block of testBlocks) {
|
||||||
|
await buildPage.addBlock(block);
|
||||||
|
}
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
const allOutputHandles = page.locator('[data-testid^="output-handle-"]');
|
||||||
|
const count = await allOutputHandles.count();
|
||||||
|
|
||||||
|
let uppercaseFound = false;
|
||||||
|
let spacesFound = false;
|
||||||
|
|
||||||
|
for (let i = 0; i < count; i++) {
|
||||||
|
const handle = allOutputHandles.nth(i);
|
||||||
|
const testId = await handle.getAttribute("data-testid");
|
||||||
|
|
||||||
|
if (testId) {
|
||||||
|
const fieldName = testId.replace("output-handle-", "");
|
||||||
|
|
||||||
|
if (fieldName !== fieldName.toLowerCase()) {
|
||||||
|
uppercaseFound = true;
|
||||||
|
console.log(`Non-lowercase output handle: ${fieldName}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fieldName.includes(" ")) {
|
||||||
|
spacesFound = true;
|
||||||
|
console.log(`Output handle with spaces: ${fieldName}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(uppercaseFound).toBe(false);
|
||||||
|
expect(spacesFound).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test.describe("Connection Persistence", () => {
|
||||||
|
test("connections survive page reload", async ({ page }) => {
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
const storeBlock = await buildPage.getFilteredBlocksFromAPI(
|
||||||
|
(b) => b.name.toLowerCase().includes("store value")
|
||||||
|
);
|
||||||
|
|
||||||
|
if (storeBlock.length === 0) {
|
||||||
|
test.skip(true, "Store Value block not found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await buildPage.addBlock({ ...storeBlock[0], name: "Persist A" });
|
||||||
|
await buildPage.addBlock({ ...storeBlock[0], name: "Persist B" });
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||||
|
"1-1-output-source",
|
||||||
|
"1-2-input-target"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Save the agent
|
||||||
|
await buildPage.saveAgent(
|
||||||
|
`Persist Test ${Date.now()}`,
|
||||||
|
"Testing connection persistence"
|
||||||
|
);
|
||||||
|
await expect(page).toHaveURL(({ searchParams }) => !!searchParams.get("flowID"));
|
||||||
|
await buildPage.waitForSaveButton();
|
||||||
|
|
||||||
|
// Get current URL
|
||||||
|
const url = page.url();
|
||||||
|
|
||||||
|
// Reload
|
||||||
|
await page.reload();
|
||||||
|
await buildPage.closeTutorial();
|
||||||
|
await page.waitForTimeout(2000);
|
||||||
|
|
||||||
|
// Verify edge still exists
|
||||||
|
const edge = page.locator(".react-flow__edge").first();
|
||||||
|
await expect(edge).toBeVisible();
|
||||||
|
|
||||||
|
// Verify same URL
|
||||||
|
expect(page.url()).toBe(url);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("connection attributes preserved after save", async ({ page }) => {
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
const storeBlock = await buildPage.getFilteredBlocksFromAPI(
|
||||||
|
(b) => b.name.toLowerCase().includes("store value")
|
||||||
|
);
|
||||||
|
|
||||||
|
if (storeBlock.length === 0) {
|
||||||
|
test.skip(true, "Store Value block not found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await buildPage.addBlock({ ...storeBlock[0], name: "Attr A" });
|
||||||
|
await buildPage.addBlock({ ...storeBlock[0], name: "Attr B" });
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||||
|
"1-1-output-source",
|
||||||
|
"1-2-input-target"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Get attributes before save
|
||||||
|
const edgeBefore = page.locator(".react-flow__edge").first();
|
||||||
|
const attrsBefore = await edgeBefore.evaluate((el) => ({
|
||||||
|
sourceHandle: el.getAttribute("data-sourcehandle"),
|
||||||
|
targetHandle: el.getAttribute("data-targethandle"),
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Save
|
||||||
|
await buildPage.saveAgent(`Attr Test ${Date.now()}`, "Testing attributes");
|
||||||
|
await expect(page).toHaveURL(({ searchParams }) => !!searchParams.get("flowID"));
|
||||||
|
await buildPage.waitForSaveButton();
|
||||||
|
|
||||||
|
// Reload
|
||||||
|
await page.reload();
|
||||||
|
await buildPage.closeTutorial();
|
||||||
|
await page.waitForTimeout(2000);
|
||||||
|
|
||||||
|
// Get attributes after reload
|
||||||
|
const edgeAfter = page.locator(".react-flow__edge").first();
|
||||||
|
await expect(edgeAfter).toBeVisible();
|
||||||
|
|
||||||
|
const attrsAfter = await edgeAfter.evaluate((el) => ({
|
||||||
|
sourceHandle: el.getAttribute("data-sourcehandle"),
|
||||||
|
targetHandle: el.getAttribute("data-targethandle"),
|
||||||
|
}));
|
||||||
|
|
||||||
|
console.log("Before save:", attrsBefore);
|
||||||
|
console.log("After reload:", attrsAfter);
|
||||||
|
|
||||||
|
// Handle names should be preserved
|
||||||
|
expect(attrsAfter.sourceHandle).toBe(attrsBefore.sourceHandle);
|
||||||
|
expect(attrsAfter.targetHandle).toBe(attrsBefore.targetHandle);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test.describe("Multiple Connections", () => {
|
||||||
|
test("can create multiple connections from single output", async ({ page }) => {
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
const storeBlock = await buildPage.getFilteredBlocksFromAPI(
|
||||||
|
(b) => b.name.toLowerCase().includes("store value")
|
||||||
|
);
|
||||||
|
|
||||||
|
if (storeBlock.length === 0) {
|
||||||
|
test.skip(true, "Store Value block not found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add one source and two targets
|
||||||
|
await buildPage.addBlock({ ...storeBlock[0], name: "Multi Source" });
|
||||||
|
await buildPage.addBlock({ ...storeBlock[0], name: "Target 1" });
|
||||||
|
await buildPage.addBlock({ ...storeBlock[0], name: "Target 2" });
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
// Connect source to both targets
|
||||||
|
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||||
|
"1-1-output-source",
|
||||||
|
"1-2-input-target"
|
||||||
|
);
|
||||||
|
|
||||||
|
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||||
|
"1-1-output-source",
|
||||||
|
"1-3-input-target"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Should have 2 edges
|
||||||
|
const edges = page.locator(".react-flow__edge");
|
||||||
|
await expect(edges).toHaveCount(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("each connection has unique edge ID", async ({ page }) => {
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
const storeBlock = await buildPage.getFilteredBlocksFromAPI(
|
||||||
|
(b) => b.name.toLowerCase().includes("store value")
|
||||||
|
);
|
||||||
|
|
||||||
|
if (storeBlock.length === 0) {
|
||||||
|
test.skip(true, "Store Value block not found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await buildPage.addBlock({ ...storeBlock[0], name: "ID Source" });
|
||||||
|
await buildPage.addBlock({ ...storeBlock[0], name: "ID Target 1" });
|
||||||
|
await buildPage.addBlock({ ...storeBlock[0], name: "ID Target 2" });
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||||
|
"1-1-output-source",
|
||||||
|
"1-2-input-target"
|
||||||
|
);
|
||||||
|
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||||
|
"1-1-output-source",
|
||||||
|
"1-3-input-target"
|
||||||
|
);
|
||||||
|
|
||||||
|
const edges = page.locator(".react-flow__edge");
|
||||||
|
const edgeIds: string[] = [];
|
||||||
|
|
||||||
|
const count = await edges.count();
|
||||||
|
for (let i = 0; i < count; i++) {
|
||||||
|
const edge = edges.nth(i);
|
||||||
|
const id = await edge.getAttribute("id");
|
||||||
|
if (id) edgeIds.push(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log("Edge IDs:", edgeIds);
|
||||||
|
|
||||||
|
// All IDs should be unique
|
||||||
|
const uniqueIds = new Set(edgeIds);
|
||||||
|
expect(uniqueIds.size).toBe(edgeIds.length);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test.describe("Tool Output Pin Format", () => {
|
||||||
|
let buildPage: BuildPage;
|
||||||
|
|
||||||
|
test.beforeEach(async ({ page }) => {
|
||||||
|
test.setTimeout(45000);
|
||||||
|
const loginPage = new LoginPage(page);
|
||||||
|
const testUser = await getTestUser();
|
||||||
|
|
||||||
|
buildPage = new BuildPage(page);
|
||||||
|
|
||||||
|
await page.goto("/login");
|
||||||
|
await loginPage.login(testUser.email, testUser.password);
|
||||||
|
await hasUrl(page, "/marketplace");
|
||||||
|
await buildPage.navbar.clickBuildLink();
|
||||||
|
await hasUrl(page, "/build");
|
||||||
|
await buildPage.closeTutorial();
|
||||||
|
});
|
||||||
|
|
||||||
|
test("documents tool output pin naming format", async ({ page }) => {
|
||||||
|
/**
|
||||||
|
* This test documents the expected format for tool output pins
|
||||||
|
* which is critical for routing to work correctly.
|
||||||
|
*
|
||||||
|
* Expected format: tools_^_{sink_node_id}_~_{sanitized_field_name}
|
||||||
|
*
|
||||||
|
* The bug occurs when:
|
||||||
|
* - Frontend creates link with: tools_^_{node}_~_Max Keyword Difficulty
|
||||||
|
* - Backend emits with: tools_^_{node}_~_max_keyword_difficulty
|
||||||
|
*/
|
||||||
|
await buildPage.openBlocksPanel();
|
||||||
|
|
||||||
|
// Look for SmartDecisionMaker or any AI block
|
||||||
|
const blocks = await buildPage.getBlocksFromAPI();
|
||||||
|
const aiBlock = blocks.find(
|
||||||
|
(b) =>
|
||||||
|
b.type === "AI" ||
|
||||||
|
b.name.toLowerCase().includes("smart") ||
|
||||||
|
b.name.toLowerCase().includes("decision")
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!aiBlock) {
|
||||||
|
console.log("No AI block found, documenting expected format:");
|
||||||
|
console.log("Tool pin format: tools_^_{sink_node_id}_~_{sanitized_field_name}");
|
||||||
|
console.log("Example: tools_^_abc-123_~_max_keyword_difficulty");
|
||||||
|
test.skip(true, "No AI block available for testing");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await buildPage.addBlock(aiBlock);
|
||||||
|
await buildPage.closeBlocksPanel();
|
||||||
|
|
||||||
|
const blockElement = page.locator(`[data-blockid="${aiBlock.id}"]`).first();
|
||||||
|
|
||||||
|
// Get tools output handle if it exists
|
||||||
|
const toolsOutput = blockElement.locator('[data-testid="output-handle-tools"]');
|
||||||
|
const hasToolsOutput = (await toolsOutput.count()) > 0;
|
||||||
|
|
||||||
|
if (hasToolsOutput) {
|
||||||
|
console.log("Tools output pin found");
|
||||||
|
|
||||||
|
// Document the expected behavior
|
||||||
|
// When this pin is connected, the link should use sanitized names
|
||||||
|
} else {
|
||||||
|
console.log("No tools output pin on this block");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Document expected format regardless
|
||||||
|
console.log("\nExpected tool pin format for SmartDecisionMaker:");
|
||||||
|
console.log(" Source: tools_^_{sink_node_id}_~_{sanitized_field_name}");
|
||||||
|
console.log(" Example sink_pin_name: max_keyword_difficulty (NOT 'Max Keyword Difficulty')");
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user