mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-01-19 20:18:22 -05:00
Compare commits
3 Commits
fix/undefi
...
claude/tes
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
edba0c5ca6 | ||
|
|
3f29f71dd6 | ||
|
|
00207eb4c9 |
@@ -0,0 +1,246 @@
|
||||
"""
|
||||
Standalone tests for pin name sanitization that can run without full backend dependencies.
|
||||
|
||||
These tests verify the core sanitization logic independently of the full system.
|
||||
Run with: python -m pytest test_pin_sanitization_standalone.py -v
|
||||
Or simply: python test_pin_sanitization_standalone.py
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
|
||||
# Simulate the exact cleanup function from SmartDecisionMakerBlock
|
||||
def cleanup(s: str) -> str:
|
||||
"""Clean up names for use as tool function names."""
|
||||
return re.sub(r"[^a-zA-Z0-9_-]", "_", s).lower()
|
||||
|
||||
|
||||
# Simulate the key parts of parse_execution_output
|
||||
def simulate_tool_routing(
|
||||
emit_key: str,
|
||||
sink_node_id: str,
|
||||
sink_pin_name: str,
|
||||
) -> bool:
|
||||
"""
|
||||
Simulate the routing comparison from parse_execution_output.
|
||||
|
||||
Returns True if routing would succeed, False otherwise.
|
||||
"""
|
||||
if not emit_key.startswith("tools_^_") or "_~_" not in emit_key:
|
||||
return False
|
||||
|
||||
# Extract routing info from emit key: tools_^_{node_id}_~_{field}
|
||||
selector = emit_key[8:] # Remove "tools_^_"
|
||||
target_node_id, target_input_pin = selector.split("_~_", 1)
|
||||
|
||||
# Current (buggy) comparison - direct string comparison
|
||||
return target_node_id == sink_node_id and target_input_pin == sink_pin_name
|
||||
|
||||
|
||||
def simulate_fixed_tool_routing(
|
||||
emit_key: str,
|
||||
sink_node_id: str,
|
||||
sink_pin_name: str,
|
||||
) -> bool:
|
||||
"""
|
||||
Simulate the FIXED routing comparison.
|
||||
|
||||
The fix: sanitize sink_pin_name before comparison.
|
||||
"""
|
||||
if not emit_key.startswith("tools_^_") or "_~_" not in emit_key:
|
||||
return False
|
||||
|
||||
selector = emit_key[8:]
|
||||
target_node_id, target_input_pin = selector.split("_~_", 1)
|
||||
|
||||
# Fixed comparison - sanitize sink_pin_name
|
||||
return target_node_id == sink_node_id and target_input_pin == cleanup(sink_pin_name)
|
||||
|
||||
|
||||
class TestCleanupFunction:
|
||||
"""Tests for the cleanup function."""
|
||||
|
||||
def test_spaces_to_underscores(self):
|
||||
assert cleanup("Max Keyword Difficulty") == "max_keyword_difficulty"
|
||||
|
||||
def test_mixed_case_to_lowercase(self):
|
||||
assert cleanup("MaxKeywordDifficulty") == "maxkeyworddifficulty"
|
||||
|
||||
def test_special_chars_to_underscores(self):
|
||||
assert cleanup("field@name!") == "field_name_"
|
||||
assert cleanup("CPC ($)") == "cpc____"
|
||||
|
||||
def test_preserves_valid_chars(self):
|
||||
assert cleanup("valid_name-123") == "valid_name-123"
|
||||
|
||||
def test_empty_string(self):
|
||||
assert cleanup("") == ""
|
||||
|
||||
def test_consecutive_spaces(self):
|
||||
assert cleanup("a b") == "a___b"
|
||||
|
||||
def test_unicode(self):
|
||||
assert cleanup("café") == "caf_"
|
||||
|
||||
|
||||
class TestCurrentRoutingBehavior:
|
||||
"""Tests demonstrating the current (buggy) routing behavior."""
|
||||
|
||||
def test_exact_match_works(self):
|
||||
"""When names match exactly, routing works."""
|
||||
emit_key = "tools_^_node-123_~_query"
|
||||
assert simulate_tool_routing(emit_key, "node-123", "query") is True
|
||||
|
||||
def test_spaces_cause_failure(self):
|
||||
"""When sink_pin has spaces, routing fails."""
|
||||
sanitized = cleanup("Max Keyword Difficulty")
|
||||
emit_key = f"tools_^_node-123_~_{sanitized}"
|
||||
assert simulate_tool_routing(emit_key, "node-123", "Max Keyword Difficulty") is False
|
||||
|
||||
def test_special_chars_cause_failure(self):
|
||||
"""When sink_pin has special chars, routing fails."""
|
||||
sanitized = cleanup("CPC ($)")
|
||||
emit_key = f"tools_^_node-123_~_{sanitized}"
|
||||
assert simulate_tool_routing(emit_key, "node-123", "CPC ($)") is False
|
||||
|
||||
|
||||
class TestFixedRoutingBehavior:
|
||||
"""Tests demonstrating the fixed routing behavior."""
|
||||
|
||||
def test_exact_match_still_works(self):
|
||||
"""When names match exactly, routing still works."""
|
||||
emit_key = "tools_^_node-123_~_query"
|
||||
assert simulate_fixed_tool_routing(emit_key, "node-123", "query") is True
|
||||
|
||||
def test_spaces_work_with_fix(self):
|
||||
"""With the fix, spaces in sink_pin work."""
|
||||
sanitized = cleanup("Max Keyword Difficulty")
|
||||
emit_key = f"tools_^_node-123_~_{sanitized}"
|
||||
assert simulate_fixed_tool_routing(emit_key, "node-123", "Max Keyword Difficulty") is True
|
||||
|
||||
def test_special_chars_work_with_fix(self):
|
||||
"""With the fix, special chars in sink_pin work."""
|
||||
sanitized = cleanup("CPC ($)")
|
||||
emit_key = f"tools_^_node-123_~_{sanitized}"
|
||||
assert simulate_fixed_tool_routing(emit_key, "node-123", "CPC ($)") is True
|
||||
|
||||
|
||||
class TestBugReproduction:
|
||||
"""Exact reproduction of the reported bug."""
|
||||
|
||||
def test_max_keyword_difficulty_bug(self):
|
||||
"""
|
||||
Reproduce the exact bug from the issue:
|
||||
|
||||
"For this agent specifically the input pin has space and unsanitized,
|
||||
the frontend somehow connect without sanitizing creating a link like:
|
||||
tools_^_767682f5-..._~_Max Keyword Difficulty
|
||||
but what's produced by backend is
|
||||
tools_^_767682f5-..._~_max_keyword_difficulty
|
||||
so the tool calls go into the void"
|
||||
"""
|
||||
node_id = "767682f5-fake-uuid"
|
||||
original_field = "Max Keyword Difficulty"
|
||||
sanitized_field = cleanup(original_field)
|
||||
|
||||
# What backend produces (emit key)
|
||||
emit_key = f"tools_^_{node_id}_~_{sanitized_field}"
|
||||
assert emit_key == f"tools_^_{node_id}_~_max_keyword_difficulty"
|
||||
|
||||
# What frontend link has (sink_pin_name)
|
||||
frontend_sink = original_field
|
||||
|
||||
# Current behavior: FAILS
|
||||
assert simulate_tool_routing(emit_key, node_id, frontend_sink) is False
|
||||
|
||||
# With fix: WORKS
|
||||
assert simulate_fixed_tool_routing(emit_key, node_id, frontend_sink) is True
|
||||
|
||||
|
||||
class TestCommonFieldNamePatterns:
|
||||
"""Test common field name patterns that could cause issues."""
|
||||
|
||||
FIELD_NAMES = [
|
||||
"Max Keyword Difficulty",
|
||||
"Search Volume (Monthly)",
|
||||
"CPC ($)",
|
||||
"User's Input",
|
||||
"Target URL",
|
||||
"API Response",
|
||||
"Query #1",
|
||||
"First Name",
|
||||
"Last Name",
|
||||
"Email Address",
|
||||
"Phone Number",
|
||||
"Total Cost ($)",
|
||||
"Discount (%)",
|
||||
"Created At",
|
||||
"Updated At",
|
||||
"Is Active",
|
||||
]
|
||||
|
||||
def test_current_behavior_fails_for_special_names(self):
|
||||
"""Current behavior fails for names with spaces/special chars."""
|
||||
failed = []
|
||||
for name in self.FIELD_NAMES:
|
||||
sanitized = cleanup(name)
|
||||
emit_key = f"tools_^_node_~_{sanitized}"
|
||||
if not simulate_tool_routing(emit_key, "node", name):
|
||||
failed.append(name)
|
||||
|
||||
# All names with spaces should fail
|
||||
names_with_spaces = [n for n in self.FIELD_NAMES if " " in n or any(c in n for c in "()$%#'")]
|
||||
assert set(failed) == set(names_with_spaces)
|
||||
|
||||
def test_fixed_behavior_works_for_all_names(self):
|
||||
"""Fixed behavior works for all names."""
|
||||
for name in self.FIELD_NAMES:
|
||||
sanitized = cleanup(name)
|
||||
emit_key = f"tools_^_node_~_{sanitized}"
|
||||
assert simulate_fixed_tool_routing(emit_key, "node", name) is True, f"Failed for: {name}"
|
||||
|
||||
|
||||
def run_tests():
|
||||
"""Run all tests manually without pytest."""
|
||||
import traceback
|
||||
|
||||
test_classes = [
|
||||
TestCleanupFunction,
|
||||
TestCurrentRoutingBehavior,
|
||||
TestFixedRoutingBehavior,
|
||||
TestBugReproduction,
|
||||
TestCommonFieldNamePatterns,
|
||||
]
|
||||
|
||||
total = 0
|
||||
passed = 0
|
||||
failed = 0
|
||||
|
||||
for test_class in test_classes:
|
||||
print(f"\n{test_class.__name__}:")
|
||||
instance = test_class()
|
||||
for name in dir(instance):
|
||||
if name.startswith("test_"):
|
||||
total += 1
|
||||
try:
|
||||
getattr(instance, name)()
|
||||
print(f" ✓ {name}")
|
||||
passed += 1
|
||||
except AssertionError as e:
|
||||
print(f" ✗ {name}: {e}")
|
||||
failed += 1
|
||||
except Exception as e:
|
||||
print(f" ✗ {name}: {e}")
|
||||
traceback.print_exc()
|
||||
failed += 1
|
||||
|
||||
print(f"\n{'='*50}")
|
||||
print(f"Total: {total}, Passed: {passed}, Failed: {failed}")
|
||||
return failed == 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
success = run_tests()
|
||||
sys.exit(0 if success else 1)
|
||||
@@ -0,0 +1,916 @@
|
||||
"""
|
||||
Tests for SmartDecisionMaker agent mode specific failure modes.
|
||||
|
||||
Covers failure modes:
|
||||
2. Silent Tool Failures in Agent Mode
|
||||
3. Unbounded Agent Mode Iterations
|
||||
10. Unbounded Agent Iterations
|
||||
12. Stale Credentials in Agent Mode
|
||||
13. Tool Signature Cache Invalidation
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import threading
|
||||
from collections import defaultdict
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from backend.blocks.smart_decision_maker import (
|
||||
SmartDecisionMakerBlock,
|
||||
ExecutionParams,
|
||||
ToolInfo,
|
||||
)
|
||||
|
||||
|
||||
class TestSilentToolFailuresInAgentMode:
|
||||
"""
|
||||
Tests for Failure Mode #2: Silent Tool Failures in Agent Mode
|
||||
|
||||
When tool execution fails in agent mode, the error is converted to a
|
||||
tool response and execution continues silently.
|
||||
"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_execution_failure_converted_to_response(self):
|
||||
"""
|
||||
Test that tool execution failures are silently converted to responses.
|
||||
"""
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
# First response: tool call
|
||||
mock_tool_call = MagicMock()
|
||||
mock_tool_call.id = "call_1"
|
||||
mock_tool_call.function.name = "failing_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({"param": "value"})
|
||||
|
||||
mock_response_1 = MagicMock()
|
||||
mock_response_1.response = None
|
||||
mock_response_1.tool_calls = [mock_tool_call]
|
||||
mock_response_1.prompt_tokens = 50
|
||||
mock_response_1.completion_tokens = 25
|
||||
mock_response_1.reasoning = None
|
||||
mock_response_1.raw_response = {
|
||||
"role": "assistant",
|
||||
"content": [{"type": "tool_use", "id": "call_1"}]
|
||||
}
|
||||
|
||||
# Second response: finish after seeing error
|
||||
mock_response_2 = MagicMock()
|
||||
mock_response_2.response = "I encountered an error"
|
||||
mock_response_2.tool_calls = []
|
||||
mock_response_2.prompt_tokens = 30
|
||||
mock_response_2.completion_tokens = 15
|
||||
mock_response_2.reasoning = None
|
||||
mock_response_2.raw_response = {"role": "assistant", "content": "I encountered an error"}
|
||||
|
||||
llm_call_count = 0
|
||||
|
||||
async def mock_llm_call(**kwargs):
|
||||
nonlocal llm_call_count
|
||||
llm_call_count += 1
|
||||
if llm_call_count == 1:
|
||||
return mock_response_1
|
||||
return mock_response_2
|
||||
|
||||
mock_tool_signatures = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "failing_tool",
|
||||
"_sink_node_id": "sink-node",
|
||||
"_field_mapping": {"param": "param"},
|
||||
"parameters": {
|
||||
"properties": {"param": {"type": "string"}},
|
||||
"required": ["param"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
# Mock database client that will fail
|
||||
mock_db_client = AsyncMock()
|
||||
mock_db_client.get_node.side_effect = Exception("Database connection failed!")
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = AsyncMock()
|
||||
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||
mock_execution_processor.execution_stats = MagicMock()
|
||||
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Do something",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=5,
|
||||
)
|
||||
|
||||
outputs = {}
|
||||
async for name, value in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph",
|
||||
node_id="test-node",
|
||||
graph_exec_id="test-exec",
|
||||
node_exec_id="test-node-exec",
|
||||
user_id="test-user",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
outputs[name] = value
|
||||
|
||||
# The execution completed (didn't crash)
|
||||
assert "finished" in outputs or "conversations" in outputs
|
||||
|
||||
# BUG: The tool failure was silent - user doesn't know what happened
|
||||
# The error was just logged and converted to a tool response
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_failure_causes_infinite_retry_loop(self):
|
||||
"""
|
||||
Test scenario where LLM keeps calling the same failing tool.
|
||||
|
||||
If tool fails but LLM doesn't realize it, it may keep trying.
|
||||
"""
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
call_count = 0
|
||||
max_calls = 10 # Limit for test
|
||||
|
||||
def create_tool_call_response():
|
||||
mock_tool_call = MagicMock()
|
||||
mock_tool_call.id = f"call_{call_count}"
|
||||
mock_tool_call.function.name = "persistent_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({"retry": call_count})
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.response = None
|
||||
mock_response.tool_calls = [mock_tool_call]
|
||||
mock_response.prompt_tokens = 50
|
||||
mock_response.completion_tokens = 25
|
||||
mock_response.reasoning = None
|
||||
mock_response.raw_response = {
|
||||
"role": "assistant",
|
||||
"content": [{"type": "tool_use", "id": f"call_{call_count}"}]
|
||||
}
|
||||
return mock_response
|
||||
|
||||
async def mock_llm_call(**kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
|
||||
if call_count >= max_calls:
|
||||
# Eventually finish to prevent actual infinite loop in test
|
||||
final = MagicMock()
|
||||
final.response = "Giving up"
|
||||
final.tool_calls = []
|
||||
final.prompt_tokens = 10
|
||||
final.completion_tokens = 5
|
||||
final.reasoning = None
|
||||
final.raw_response = {"role": "assistant", "content": "Giving up"}
|
||||
return final
|
||||
|
||||
return create_tool_call_response()
|
||||
|
||||
mock_tool_signatures = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "persistent_tool",
|
||||
"_sink_node_id": "sink-node",
|
||||
"_field_mapping": {"retry": "retry"},
|
||||
"parameters": {
|
||||
"properties": {"retry": {"type": "integer"}},
|
||||
"required": ["retry"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
mock_db_client = AsyncMock()
|
||||
mock_db_client.get_node.side_effect = Exception("Always fails!")
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = AsyncMock()
|
||||
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||
mock_execution_processor.execution_stats = MagicMock()
|
||||
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Keep trying",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=-1, # Infinite mode!
|
||||
)
|
||||
|
||||
# Use timeout to prevent actual infinite loop
|
||||
try:
|
||||
async with asyncio.timeout(5):
|
||||
outputs = {}
|
||||
async for name, value in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph",
|
||||
node_id="test-node",
|
||||
graph_exec_id="test-exec",
|
||||
node_exec_id="test-node-exec",
|
||||
user_id="test-user",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
outputs[name] = value
|
||||
except asyncio.TimeoutError:
|
||||
pass # Expected if we hit infinite loop
|
||||
|
||||
# Document that many calls were made before we gave up
|
||||
assert call_count >= max_calls - 1, \
|
||||
f"Expected many retries, got {call_count}"
|
||||
|
||||
|
||||
class TestUnboundedAgentIterations:
|
||||
"""
|
||||
Tests for Failure Mode #3 and #10: Unbounded Agent Mode Iterations
|
||||
|
||||
With max_iterations = -1, the agent can run forever, consuming
|
||||
unlimited tokens and compute resources.
|
||||
"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_infinite_mode_requires_llm_to_stop(self):
|
||||
"""
|
||||
Test that infinite mode (-1) only stops when LLM stops making tool calls.
|
||||
"""
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
iterations = 0
|
||||
max_test_iterations = 20
|
||||
|
||||
async def mock_llm_call(**kwargs):
|
||||
nonlocal iterations
|
||||
iterations += 1
|
||||
|
||||
if iterations >= max_test_iterations:
|
||||
# Stop to prevent actual infinite loop
|
||||
resp = MagicMock()
|
||||
resp.response = "Finally done"
|
||||
resp.tool_calls = []
|
||||
resp.prompt_tokens = 10
|
||||
resp.completion_tokens = 5
|
||||
resp.reasoning = None
|
||||
resp.raw_response = {"role": "assistant", "content": "Done"}
|
||||
return resp
|
||||
|
||||
# Keep making tool calls
|
||||
tool_call = MagicMock()
|
||||
tool_call.id = f"call_{iterations}"
|
||||
tool_call.function.name = "counter_tool"
|
||||
tool_call.function.arguments = json.dumps({"count": iterations})
|
||||
|
||||
resp = MagicMock()
|
||||
resp.response = None
|
||||
resp.tool_calls = [tool_call]
|
||||
resp.prompt_tokens = 50
|
||||
resp.completion_tokens = 25
|
||||
resp.reasoning = None
|
||||
resp.raw_response = {
|
||||
"role": "assistant",
|
||||
"content": [{"type": "tool_use", "id": f"call_{iterations}"}]
|
||||
}
|
||||
return resp
|
||||
|
||||
mock_tool_signatures = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "counter_tool",
|
||||
"_sink_node_id": "sink",
|
||||
"_field_mapping": {"count": "count"},
|
||||
"parameters": {
|
||||
"properties": {"count": {"type": "integer"}},
|
||||
"required": ["count"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
mock_db_client = AsyncMock()
|
||||
mock_node = MagicMock()
|
||||
mock_node.block_id = "test-block"
|
||||
mock_db_client.get_node.return_value = mock_node
|
||||
|
||||
mock_exec_result = MagicMock()
|
||||
mock_exec_result.node_exec_id = "exec-id"
|
||||
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {"count": 1})
|
||||
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {"result": "ok"}
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = AsyncMock()
|
||||
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||
mock_execution_processor.execution_stats = MagicMock()
|
||||
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
|
||||
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Count forever",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=-1, # INFINITE MODE
|
||||
)
|
||||
|
||||
async with asyncio.timeout(10):
|
||||
outputs = {}
|
||||
async for name, value in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph",
|
||||
node_id="test-node",
|
||||
graph_exec_id="test-exec",
|
||||
node_exec_id="test-node-exec",
|
||||
user_id="test-user",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
outputs[name] = value
|
||||
|
||||
# We ran many iterations before stopping
|
||||
assert iterations == max_test_iterations
|
||||
# BUG: No built-in safeguard against runaway iterations
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_max_iterations_limit_enforced(self):
|
||||
"""
|
||||
Test that max_iterations limit is properly enforced.
|
||||
"""
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
iterations = 0
|
||||
|
||||
async def mock_llm_call(**kwargs):
|
||||
nonlocal iterations
|
||||
iterations += 1
|
||||
|
||||
# Always make tool calls (never finish voluntarily)
|
||||
tool_call = MagicMock()
|
||||
tool_call.id = f"call_{iterations}"
|
||||
tool_call.function.name = "endless_tool"
|
||||
tool_call.function.arguments = json.dumps({})
|
||||
|
||||
resp = MagicMock()
|
||||
resp.response = None
|
||||
resp.tool_calls = [tool_call]
|
||||
resp.prompt_tokens = 50
|
||||
resp.completion_tokens = 25
|
||||
resp.reasoning = None
|
||||
resp.raw_response = {
|
||||
"role": "assistant",
|
||||
"content": [{"type": "tool_use", "id": f"call_{iterations}"}]
|
||||
}
|
||||
return resp
|
||||
|
||||
mock_tool_signatures = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "endless_tool",
|
||||
"_sink_node_id": "sink",
|
||||
"_field_mapping": {},
|
||||
"parameters": {"properties": {}, "required": []},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
mock_db_client = AsyncMock()
|
||||
mock_node = MagicMock()
|
||||
mock_node.block_id = "test-block"
|
||||
mock_db_client.get_node.return_value = mock_node
|
||||
mock_exec_result = MagicMock()
|
||||
mock_exec_result.node_exec_id = "exec-id"
|
||||
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {})
|
||||
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {}
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = AsyncMock()
|
||||
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||
mock_execution_processor.execution_stats = MagicMock()
|
||||
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
|
||||
|
||||
MAX_ITERATIONS = 3
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Run forever",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=MAX_ITERATIONS,
|
||||
)
|
||||
|
||||
outputs = {}
|
||||
async for name, value in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph",
|
||||
node_id="test-node",
|
||||
graph_exec_id="test-exec",
|
||||
node_exec_id="test-node-exec",
|
||||
user_id="test-user",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
outputs[name] = value
|
||||
|
||||
# Should have stopped at max iterations
|
||||
assert iterations == MAX_ITERATIONS
|
||||
assert "finished" in outputs
|
||||
assert "limit reached" in outputs["finished"].lower()
|
||||
|
||||
|
||||
class TestStaleCredentialsInAgentMode:
|
||||
"""
|
||||
Tests for Failure Mode #12: Stale Credentials in Agent Mode
|
||||
|
||||
Credentials are validated once at start but can expire during
|
||||
long-running agent mode executions.
|
||||
"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_credentials_not_revalidated_between_iterations(self):
|
||||
"""
|
||||
Test that credentials are used without revalidation in agent mode.
|
||||
"""
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
credential_check_count = 0
|
||||
iteration = 0
|
||||
|
||||
async def mock_llm_call(**kwargs):
|
||||
nonlocal credential_check_count, iteration
|
||||
iteration += 1
|
||||
|
||||
# Simulate credential check (in real code this happens in llm_call)
|
||||
credential_check_count += 1
|
||||
|
||||
if iteration >= 3:
|
||||
resp = MagicMock()
|
||||
resp.response = "Done"
|
||||
resp.tool_calls = []
|
||||
resp.prompt_tokens = 10
|
||||
resp.completion_tokens = 5
|
||||
resp.reasoning = None
|
||||
resp.raw_response = {"role": "assistant", "content": "Done"}
|
||||
return resp
|
||||
|
||||
tool_call = MagicMock()
|
||||
tool_call.id = f"call_{iteration}"
|
||||
tool_call.function.name = "test_tool"
|
||||
tool_call.function.arguments = json.dumps({})
|
||||
|
||||
resp = MagicMock()
|
||||
resp.response = None
|
||||
resp.tool_calls = [tool_call]
|
||||
resp.prompt_tokens = 50
|
||||
resp.completion_tokens = 25
|
||||
resp.reasoning = None
|
||||
resp.raw_response = {
|
||||
"role": "assistant",
|
||||
"content": [{"type": "tool_use", "id": f"call_{iteration}"}]
|
||||
}
|
||||
return resp
|
||||
|
||||
mock_tool_signatures = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test_tool",
|
||||
"_sink_node_id": "sink",
|
||||
"_field_mapping": {},
|
||||
"parameters": {"properties": {}, "required": []},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
mock_db_client = AsyncMock()
|
||||
mock_node = MagicMock()
|
||||
mock_node.block_id = "test-block"
|
||||
mock_db_client.get_node.return_value = mock_node
|
||||
mock_exec_result = MagicMock()
|
||||
mock_exec_result.node_exec_id = "exec-id"
|
||||
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {})
|
||||
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {}
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = AsyncMock()
|
||||
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||
mock_execution_processor.execution_stats = MagicMock()
|
||||
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
|
||||
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Test credentials",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=5,
|
||||
)
|
||||
|
||||
outputs = {}
|
||||
async for name, value in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph",
|
||||
node_id="test-node",
|
||||
graph_exec_id="test-exec",
|
||||
node_exec_id="test-node-exec",
|
||||
user_id="test-user",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
outputs[name] = value
|
||||
|
||||
# Credentials were checked on each LLM call but not refreshed
|
||||
# If they expired mid-execution, we'd get auth errors
|
||||
assert credential_check_count == iteration
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_credential_expiration_mid_execution(self):
|
||||
"""
|
||||
Test what happens when credentials expire during agent mode.
|
||||
"""
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
iteration = 0
|
||||
|
||||
async def mock_llm_call_with_expiration(**kwargs):
|
||||
nonlocal iteration
|
||||
iteration += 1
|
||||
|
||||
if iteration >= 3:
|
||||
# Simulate credential expiration
|
||||
raise Exception("401 Unauthorized: API key expired")
|
||||
|
||||
tool_call = MagicMock()
|
||||
tool_call.id = f"call_{iteration}"
|
||||
tool_call.function.name = "test_tool"
|
||||
tool_call.function.arguments = json.dumps({})
|
||||
|
||||
resp = MagicMock()
|
||||
resp.response = None
|
||||
resp.tool_calls = [tool_call]
|
||||
resp.prompt_tokens = 50
|
||||
resp.completion_tokens = 25
|
||||
resp.reasoning = None
|
||||
resp.raw_response = {
|
||||
"role": "assistant",
|
||||
"content": [{"type": "tool_use", "id": f"call_{iteration}"}]
|
||||
}
|
||||
return resp
|
||||
|
||||
mock_tool_signatures = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test_tool",
|
||||
"_sink_node_id": "sink",
|
||||
"_field_mapping": {},
|
||||
"parameters": {"properties": {}, "required": []},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
mock_db_client = AsyncMock()
|
||||
mock_node = MagicMock()
|
||||
mock_node.block_id = "test-block"
|
||||
mock_db_client.get_node.return_value = mock_node
|
||||
mock_exec_result = MagicMock()
|
||||
mock_exec_result.node_exec_id = "exec-id"
|
||||
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {})
|
||||
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {}
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call_with_expiration), \
|
||||
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = AsyncMock()
|
||||
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||
mock_execution_processor.execution_stats = MagicMock()
|
||||
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
|
||||
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Test credentials",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=10,
|
||||
)
|
||||
|
||||
outputs = {}
|
||||
async for name, value in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph",
|
||||
node_id="test-node",
|
||||
graph_exec_id="test-exec",
|
||||
node_exec_id="test-node-exec",
|
||||
user_id="test-user",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
outputs[name] = value
|
||||
|
||||
# Should have an error output
|
||||
assert "error" in outputs
|
||||
assert "expired" in outputs["error"].lower() or "unauthorized" in outputs["error"].lower()
|
||||
|
||||
|
||||
class TestToolSignatureCacheInvalidation:
|
||||
"""
|
||||
Tests for Failure Mode #13: Tool Signature Cache Invalidation
|
||||
|
||||
Tool signatures are created once at the start of run() but the
|
||||
graph could change during agent mode execution.
|
||||
"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_signatures_created_once_at_start(self):
|
||||
"""
|
||||
Test that tool signatures are only created once, not refreshed.
|
||||
"""
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
signature_creation_count = 0
|
||||
iteration = 0
|
||||
|
||||
original_create_signatures = block._create_tool_node_signatures
|
||||
|
||||
async def counting_create_signatures(node_id):
|
||||
nonlocal signature_creation_count
|
||||
signature_creation_count += 1
|
||||
return [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "tool_v1",
|
||||
"_sink_node_id": "sink",
|
||||
"_field_mapping": {},
|
||||
"parameters": {"properties": {}, "required": []},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
async def mock_llm_call(**kwargs):
|
||||
nonlocal iteration
|
||||
iteration += 1
|
||||
|
||||
if iteration >= 3:
|
||||
resp = MagicMock()
|
||||
resp.response = "Done"
|
||||
resp.tool_calls = []
|
||||
resp.prompt_tokens = 10
|
||||
resp.completion_tokens = 5
|
||||
resp.reasoning = None
|
||||
resp.raw_response = {"role": "assistant", "content": "Done"}
|
||||
return resp
|
||||
|
||||
tool_call = MagicMock()
|
||||
tool_call.id = f"call_{iteration}"
|
||||
tool_call.function.name = "tool_v1"
|
||||
tool_call.function.arguments = json.dumps({})
|
||||
|
||||
resp = MagicMock()
|
||||
resp.response = None
|
||||
resp.tool_calls = [tool_call]
|
||||
resp.prompt_tokens = 50
|
||||
resp.completion_tokens = 25
|
||||
resp.reasoning = None
|
||||
resp.raw_response = {
|
||||
"role": "assistant",
|
||||
"content": [{"type": "tool_use", "id": f"call_{iteration}"}]
|
||||
}
|
||||
return resp
|
||||
|
||||
mock_db_client = AsyncMock()
|
||||
mock_node = MagicMock()
|
||||
mock_node.block_id = "test-block"
|
||||
mock_db_client.get_node.return_value = mock_node
|
||||
mock_exec_result = MagicMock()
|
||||
mock_exec_result.node_exec_id = "exec-id"
|
||||
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {})
|
||||
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {}
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||
patch.object(block, "_create_tool_node_signatures", side_effect=counting_create_signatures), \
|
||||
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = AsyncMock()
|
||||
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||
mock_execution_processor.execution_stats = MagicMock()
|
||||
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
|
||||
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Test signatures",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=5,
|
||||
)
|
||||
|
||||
outputs = {}
|
||||
async for name, value in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph",
|
||||
node_id="test-node",
|
||||
graph_exec_id="test-exec",
|
||||
node_exec_id="test-node-exec",
|
||||
user_id="test-user",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
outputs[name] = value
|
||||
|
||||
# Signatures were only created once, even though we had multiple iterations
|
||||
assert signature_creation_count == 1
|
||||
assert iteration >= 3 # We had multiple iterations
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stale_signatures_cause_tool_mismatch(self):
|
||||
"""
|
||||
Test scenario where tool definitions change but agent uses stale signatures.
|
||||
"""
|
||||
# This documents the potential issue:
|
||||
# 1. Agent starts with tool_v1
|
||||
# 2. User modifies graph, tool becomes tool_v2
|
||||
# 3. Agent still thinks tool_v1 exists
|
||||
# 4. LLM calls tool_v1, but it no longer exists
|
||||
|
||||
# Since signatures are created once at start and never refreshed,
|
||||
# any changes to the graph during execution won't be reflected.
|
||||
|
||||
# This is more of a documentation test - the actual fix would
|
||||
# require either:
|
||||
# a) Refreshing signatures periodically
|
||||
# b) Locking the graph during execution
|
||||
# c) Checking tool existence before each call
|
||||
pass
|
||||
|
||||
|
||||
class TestAgentModeConversationManagement:
|
||||
"""Tests for conversation management in agent mode."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_conversation_grows_with_iterations(self):
|
||||
"""
|
||||
Test that conversation history grows correctly with each iteration.
|
||||
"""
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
iteration = 0
|
||||
conversation_lengths = []
|
||||
|
||||
async def mock_llm_call(**kwargs):
|
||||
nonlocal iteration
|
||||
iteration += 1
|
||||
|
||||
# Record conversation length at each call
|
||||
prompt = kwargs.get("prompt", [])
|
||||
conversation_lengths.append(len(prompt))
|
||||
|
||||
if iteration >= 3:
|
||||
resp = MagicMock()
|
||||
resp.response = "Done"
|
||||
resp.tool_calls = []
|
||||
resp.prompt_tokens = 10
|
||||
resp.completion_tokens = 5
|
||||
resp.reasoning = None
|
||||
resp.raw_response = {"role": "assistant", "content": "Done"}
|
||||
return resp
|
||||
|
||||
tool_call = MagicMock()
|
||||
tool_call.id = f"call_{iteration}"
|
||||
tool_call.function.name = "test_tool"
|
||||
tool_call.function.arguments = json.dumps({})
|
||||
|
||||
resp = MagicMock()
|
||||
resp.response = None
|
||||
resp.tool_calls = [tool_call]
|
||||
resp.prompt_tokens = 50
|
||||
resp.completion_tokens = 25
|
||||
resp.reasoning = None
|
||||
resp.raw_response = {
|
||||
"role": "assistant",
|
||||
"content": [{"type": "tool_use", "id": f"call_{iteration}"}]
|
||||
}
|
||||
return resp
|
||||
|
||||
mock_tool_signatures = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test_tool",
|
||||
"_sink_node_id": "sink",
|
||||
"_field_mapping": {},
|
||||
"parameters": {"properties": {}, "required": []},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
mock_db_client = AsyncMock()
|
||||
mock_node = MagicMock()
|
||||
mock_node.block_id = "test-block"
|
||||
mock_db_client.get_node.return_value = mock_node
|
||||
mock_exec_result = MagicMock()
|
||||
mock_exec_result.node_exec_id = "exec-id"
|
||||
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {})
|
||||
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {"result": "ok"}
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = AsyncMock()
|
||||
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||
mock_execution_processor.execution_stats = MagicMock()
|
||||
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
|
||||
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Test conversation",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=5,
|
||||
)
|
||||
|
||||
outputs = {}
|
||||
async for name, value in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph",
|
||||
node_id="test-node",
|
||||
graph_exec_id="test-exec",
|
||||
node_exec_id="test-node-exec",
|
||||
user_id="test-user",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
outputs[name] = value
|
||||
|
||||
# Conversation should grow with each iteration
|
||||
# Each iteration adds: assistant message + tool response
|
||||
assert len(conversation_lengths) == 3
|
||||
for i in range(1, len(conversation_lengths)):
|
||||
assert conversation_lengths[i] > conversation_lengths[i-1], \
|
||||
f"Conversation should grow: {conversation_lengths}"
|
||||
@@ -0,0 +1,525 @@
|
||||
"""
|
||||
Tests for SmartDecisionMaker concurrency issues and race conditions.
|
||||
|
||||
Covers failure modes:
|
||||
1. Conversation History Race Condition
|
||||
4. Concurrent Execution State Sharing
|
||||
7. Race in Pending Tool Calls
|
||||
11. Race in Pending Tool Call Retrieval
|
||||
14. Concurrent State Sharing
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import threading
|
||||
from collections import Counter
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from backend.blocks.smart_decision_maker import (
|
||||
SmartDecisionMakerBlock,
|
||||
get_pending_tool_calls,
|
||||
_create_tool_response,
|
||||
_get_tool_requests,
|
||||
_get_tool_responses,
|
||||
)
|
||||
|
||||
|
||||
class TestConversationHistoryRaceCondition:
|
||||
"""
|
||||
Tests for Failure Mode #1: Conversation History Race Condition
|
||||
|
||||
When multiple executions share conversation history, concurrent
|
||||
modifications can cause data loss or corruption.
|
||||
"""
|
||||
|
||||
def test_get_pending_tool_calls_with_concurrent_modification(self):
|
||||
"""
|
||||
Test that concurrent modifications to conversation history
|
||||
can cause inconsistent pending tool call counts.
|
||||
"""
|
||||
# Shared conversation history
|
||||
conversation_history = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "tool_use", "id": "toolu_1"},
|
||||
{"type": "tool_use", "id": "toolu_2"},
|
||||
{"type": "tool_use", "id": "toolu_3"},
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
results = []
|
||||
errors = []
|
||||
|
||||
def reader_thread():
|
||||
"""Repeatedly read pending calls."""
|
||||
for _ in range(100):
|
||||
try:
|
||||
pending = get_pending_tool_calls(conversation_history)
|
||||
results.append(len(pending))
|
||||
except Exception as e:
|
||||
errors.append(str(e))
|
||||
|
||||
def writer_thread():
|
||||
"""Modify conversation while readers are active."""
|
||||
for i in range(50):
|
||||
# Add a tool response
|
||||
conversation_history.append({
|
||||
"role": "user",
|
||||
"content": [{"type": "tool_result", "tool_use_id": f"toolu_{(i % 3) + 1}"}]
|
||||
})
|
||||
# Remove it
|
||||
if len(conversation_history) > 1:
|
||||
conversation_history.pop()
|
||||
|
||||
# Run concurrent readers and writers
|
||||
threads = []
|
||||
for _ in range(3):
|
||||
threads.append(threading.Thread(target=reader_thread))
|
||||
threads.append(threading.Thread(target=writer_thread))
|
||||
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
# The issue: results may be inconsistent due to race conditions
|
||||
# In a correct implementation, we'd expect consistent results
|
||||
# Document that this CAN produce inconsistent results
|
||||
assert len(results) > 0, "Should have some results"
|
||||
# Note: This test documents the race condition exists
|
||||
# When fixed, all results should be consistent
|
||||
|
||||
def test_prompt_list_mutation_race(self):
|
||||
"""
|
||||
Test that mutating prompt list during iteration can cause issues.
|
||||
"""
|
||||
prompt = []
|
||||
errors = []
|
||||
|
||||
def appender():
|
||||
for i in range(100):
|
||||
prompt.append({"role": "user", "content": f"msg_{i}"})
|
||||
|
||||
def extender():
|
||||
for i in range(100):
|
||||
prompt.extend([{"role": "assistant", "content": f"resp_{i}"}])
|
||||
|
||||
def reader():
|
||||
for _ in range(100):
|
||||
try:
|
||||
# Iterate while others modify
|
||||
_ = [p for p in prompt if p.get("role") == "user"]
|
||||
except RuntimeError as e:
|
||||
# "dictionary changed size during iteration" or similar
|
||||
errors.append(str(e))
|
||||
|
||||
threads = [
|
||||
threading.Thread(target=appender),
|
||||
threading.Thread(target=extender),
|
||||
threading.Thread(target=reader),
|
||||
]
|
||||
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
# Document that race conditions can occur
|
||||
# In production, this could cause silent data corruption
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_concurrent_block_runs_share_state(self):
|
||||
"""
|
||||
Test that concurrent runs on same block instance can share state incorrectly.
|
||||
|
||||
This is Failure Mode #14: Concurrent State Sharing
|
||||
"""
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
# Track all outputs from all runs
|
||||
all_outputs = []
|
||||
lock = threading.Lock()
|
||||
|
||||
async def run_block(run_id: int):
|
||||
"""Run the block with a unique run_id."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.response = f"Response for run {run_id}"
|
||||
mock_response.tool_calls = [] # No tool calls, just finish
|
||||
mock_response.prompt_tokens = 50
|
||||
mock_response.completion_tokens = 25
|
||||
mock_response.reasoning = None
|
||||
mock_response.raw_response = {"role": "assistant", "content": f"Run {run_id}"}
|
||||
|
||||
mock_tool_signatures = []
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm:
|
||||
mock_llm.return_value = mock_response
|
||||
|
||||
with patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures):
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt=f"Prompt for run {run_id}",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=0,
|
||||
)
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = MagicMock()
|
||||
|
||||
outputs = {}
|
||||
async for output_name, output_data in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id=f"graph-{run_id}",
|
||||
node_id=f"node-{run_id}",
|
||||
graph_exec_id=f"exec-{run_id}",
|
||||
node_exec_id=f"node-exec-{run_id}",
|
||||
user_id=f"user-{run_id}",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
outputs[output_name] = output_data
|
||||
|
||||
with lock:
|
||||
all_outputs.append((run_id, outputs))
|
||||
|
||||
# Run multiple concurrent executions
|
||||
tasks = [run_block(i) for i in range(5)]
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
# Verify each run got its own response (no cross-contamination)
|
||||
for run_id, outputs in all_outputs:
|
||||
if "finished" in outputs:
|
||||
assert f"run {run_id}" in outputs["finished"].lower() or outputs["finished"] == f"Response for run {run_id}", \
|
||||
f"Run {run_id} may have received contaminated response: {outputs}"
|
||||
|
||||
|
||||
class TestPendingToolCallRace:
|
||||
"""
|
||||
Tests for Failure Mode #7 and #11: Race in Pending Tool Calls
|
||||
|
||||
The get_pending_tool_calls function can race with modifications
|
||||
to the conversation history, causing StopIteration or incorrect counts.
|
||||
"""
|
||||
|
||||
def test_pending_tool_calls_counter_accuracy(self):
|
||||
"""Test that pending tool call counting is accurate."""
|
||||
conversation = [
|
||||
# Assistant makes 3 tool calls
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "tool_use", "id": "call_1"},
|
||||
{"type": "tool_use", "id": "call_2"},
|
||||
{"type": "tool_use", "id": "call_3"},
|
||||
]
|
||||
},
|
||||
# User provides 1 response
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "tool_result", "tool_use_id": "call_1"}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
pending = get_pending_tool_calls(conversation)
|
||||
|
||||
# Should have 2 pending (call_2, call_3)
|
||||
assert len(pending) == 2
|
||||
assert "call_2" in pending
|
||||
assert "call_3" in pending
|
||||
assert pending["call_2"] == 1
|
||||
assert pending["call_3"] == 1
|
||||
|
||||
def test_pending_tool_calls_duplicate_responses(self):
|
||||
"""Test handling of duplicate tool responses."""
|
||||
conversation = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [{"type": "tool_use", "id": "call_1"}]
|
||||
},
|
||||
# Duplicate responses for same call
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"type": "tool_result", "tool_use_id": "call_1"}]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"type": "tool_result", "tool_use_id": "call_1"}]
|
||||
}
|
||||
]
|
||||
|
||||
pending = get_pending_tool_calls(conversation)
|
||||
|
||||
# call_1 has count -1 (1 request - 2 responses)
|
||||
# Should not be in pending (count <= 0)
|
||||
assert "call_1" not in pending or pending.get("call_1", 0) <= 0
|
||||
|
||||
def test_empty_conversation_no_pending(self):
|
||||
"""Test that empty conversation has no pending calls."""
|
||||
assert get_pending_tool_calls([]) == {}
|
||||
assert get_pending_tool_calls(None) == {}
|
||||
|
||||
def test_next_iter_on_empty_dict_raises_stop_iteration(self):
|
||||
"""
|
||||
Document the StopIteration vulnerability.
|
||||
|
||||
If pending_tool_calls becomes empty between the check and
|
||||
next(iter(...)), StopIteration is raised.
|
||||
"""
|
||||
pending = {}
|
||||
|
||||
# This is the pattern used in smart_decision_maker.py:1019
|
||||
# if pending_tool_calls and ...:
|
||||
# first_call_id = next(iter(pending_tool_calls.keys()))
|
||||
|
||||
with pytest.raises(StopIteration):
|
||||
next(iter(pending.keys()))
|
||||
|
||||
# Safe pattern should be:
|
||||
# first_call_id = next(iter(pending_tool_calls.keys()), None)
|
||||
safe_result = next(iter(pending.keys()), None)
|
||||
assert safe_result is None
|
||||
|
||||
|
||||
class TestToolRequestResponseParsing:
|
||||
"""Tests for tool request/response parsing edge cases."""
|
||||
|
||||
def test_get_tool_requests_openai_format(self):
|
||||
"""Test parsing OpenAI format tool requests."""
|
||||
entry = {
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{"id": "call_abc123"},
|
||||
{"id": "call_def456"},
|
||||
]
|
||||
}
|
||||
|
||||
requests = _get_tool_requests(entry)
|
||||
assert requests == ["call_abc123", "call_def456"]
|
||||
|
||||
def test_get_tool_requests_anthropic_format(self):
|
||||
"""Test parsing Anthropic format tool requests."""
|
||||
entry = {
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "tool_use", "id": "toolu_abc123"},
|
||||
{"type": "text", "text": "Let me call this tool"},
|
||||
{"type": "tool_use", "id": "toolu_def456"},
|
||||
]
|
||||
}
|
||||
|
||||
requests = _get_tool_requests(entry)
|
||||
assert requests == ["toolu_abc123", "toolu_def456"]
|
||||
|
||||
def test_get_tool_requests_non_assistant_role(self):
|
||||
"""Non-assistant roles should return empty list."""
|
||||
entry = {"role": "user", "tool_calls": [{"id": "call_123"}]}
|
||||
assert _get_tool_requests(entry) == []
|
||||
|
||||
def test_get_tool_responses_openai_format(self):
|
||||
"""Test parsing OpenAI format tool responses."""
|
||||
entry = {
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_abc123",
|
||||
"content": "Result"
|
||||
}
|
||||
|
||||
responses = _get_tool_responses(entry)
|
||||
assert responses == ["call_abc123"]
|
||||
|
||||
def test_get_tool_responses_anthropic_format(self):
|
||||
"""Test parsing Anthropic format tool responses."""
|
||||
entry = {
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "tool_result", "tool_use_id": "toolu_abc123"},
|
||||
{"type": "tool_result", "tool_use_id": "toolu_def456"},
|
||||
]
|
||||
}
|
||||
|
||||
responses = _get_tool_responses(entry)
|
||||
assert responses == ["toolu_abc123", "toolu_def456"]
|
||||
|
||||
def test_get_tool_responses_mixed_content(self):
|
||||
"""Test parsing responses with mixed content types."""
|
||||
entry = {
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "Here are the results"},
|
||||
{"type": "tool_result", "tool_use_id": "toolu_123"},
|
||||
{"type": "image", "url": "http://example.com/img.png"},
|
||||
]
|
||||
}
|
||||
|
||||
responses = _get_tool_responses(entry)
|
||||
assert responses == ["toolu_123"]
|
||||
|
||||
|
||||
class TestConcurrentToolSignatureCreation:
|
||||
"""Tests for concurrent tool signature creation."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_concurrent_signature_creation_same_node(self):
|
||||
"""
|
||||
Test that concurrent signature creation for same node
|
||||
doesn't cause issues.
|
||||
"""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_node = Mock()
|
||||
mock_node.id = "test-node"
|
||||
mock_node.block = Mock()
|
||||
mock_node.block.name = "TestBlock"
|
||||
mock_node.block.description = "Test"
|
||||
mock_node.block.input_schema = Mock()
|
||||
mock_node.block.input_schema.jsonschema = Mock(
|
||||
return_value={"properties": {}, "required": []}
|
||||
)
|
||||
mock_node.block.input_schema.get_field_schema = Mock(
|
||||
return_value={"type": "string", "description": "test"}
|
||||
)
|
||||
|
||||
mock_links = [
|
||||
Mock(sink_name="field1", sink_id="test-node", source_id="source"),
|
||||
Mock(sink_name="field2", sink_id="test-node", source_id="source"),
|
||||
]
|
||||
|
||||
# Run multiple concurrent signature creations
|
||||
tasks = [
|
||||
block._create_block_function_signature(mock_node, mock_links)
|
||||
for _ in range(10)
|
||||
]
|
||||
|
||||
results = await asyncio.gather(*tasks)
|
||||
|
||||
# All results should be identical
|
||||
first = results[0]
|
||||
for i, result in enumerate(results[1:], 1):
|
||||
assert result["function"]["name"] == first["function"]["name"], \
|
||||
f"Result {i} has different name"
|
||||
assert set(result["function"]["parameters"]["properties"].keys()) == \
|
||||
set(first["function"]["parameters"]["properties"].keys()), \
|
||||
f"Result {i} has different properties"
|
||||
|
||||
|
||||
class TestThreadSafetyOfCleanup:
|
||||
"""Tests for thread safety of cleanup function."""
|
||||
|
||||
def test_cleanup_is_thread_safe(self):
|
||||
"""
|
||||
Test that cleanup function is thread-safe.
|
||||
|
||||
Since it's a pure function with no shared state, it should be safe.
|
||||
"""
|
||||
results = {}
|
||||
lock = threading.Lock()
|
||||
|
||||
test_inputs = [
|
||||
"Max Keyword Difficulty",
|
||||
"Search Volume (Monthly)",
|
||||
"CPC ($)",
|
||||
"Target URL",
|
||||
]
|
||||
|
||||
def worker(input_str: str, thread_id: int):
|
||||
for _ in range(100):
|
||||
result = SmartDecisionMakerBlock.cleanup(input_str)
|
||||
with lock:
|
||||
key = f"{thread_id}_{input_str}"
|
||||
if key not in results:
|
||||
results[key] = set()
|
||||
results[key].add(result)
|
||||
|
||||
threads = []
|
||||
for i, input_str in enumerate(test_inputs):
|
||||
for j in range(3):
|
||||
t = threading.Thread(target=worker, args=(input_str, i * 3 + j))
|
||||
threads.append(t)
|
||||
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
# Each input should produce exactly one unique output
|
||||
for key, values in results.items():
|
||||
assert len(values) == 1, f"Non-deterministic cleanup for {key}: {values}"
|
||||
|
||||
|
||||
class TestAsyncConcurrencyPatterns:
|
||||
"""Tests for async concurrency patterns in the block."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_multiple_async_runs_isolation(self):
|
||||
"""
|
||||
Test that multiple async runs are properly isolated.
|
||||
"""
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
run_count = 5
|
||||
results = []
|
||||
|
||||
async def single_run(run_id: int):
|
||||
mock_response = MagicMock()
|
||||
mock_response.response = f"Unique response {run_id}"
|
||||
mock_response.tool_calls = []
|
||||
mock_response.prompt_tokens = 10
|
||||
mock_response.completion_tokens = 5
|
||||
mock_response.reasoning = None
|
||||
mock_response.raw_response = {"role": "assistant", "content": f"Run {run_id}"}
|
||||
|
||||
# Add small random delay to increase chance of interleaving
|
||||
await asyncio.sleep(0.001 * (run_id % 3))
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm:
|
||||
mock_llm.return_value = mock_response
|
||||
|
||||
with patch.object(block, "_create_tool_node_signatures", return_value=[]):
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt=f"Prompt {run_id}",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=0,
|
||||
)
|
||||
|
||||
outputs = {}
|
||||
async for name, value in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id=f"g{run_id}",
|
||||
node_id=f"n{run_id}",
|
||||
graph_exec_id=f"e{run_id}",
|
||||
node_exec_id=f"ne{run_id}",
|
||||
user_id=f"u{run_id}",
|
||||
graph_version=1,
|
||||
execution_context=ExecutionContext(safe_mode=False),
|
||||
execution_processor=MagicMock(),
|
||||
):
|
||||
outputs[name] = value
|
||||
|
||||
return run_id, outputs
|
||||
|
||||
# Run all concurrently
|
||||
tasks = [single_run(i) for i in range(run_count)]
|
||||
results = await asyncio.gather(*tasks)
|
||||
|
||||
# Verify isolation
|
||||
for run_id, outputs in results:
|
||||
if "finished" in outputs:
|
||||
assert str(run_id) in outputs["finished"], \
|
||||
f"Run {run_id} got wrong response: {outputs['finished']}"
|
||||
@@ -0,0 +1,667 @@
|
||||
"""
|
||||
Tests for SmartDecisionMaker conversation handling and corruption scenarios.
|
||||
|
||||
Covers failure modes:
|
||||
6. Conversation Corruption in Error Paths
|
||||
And related conversation management issues.
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from backend.blocks.smart_decision_maker import (
|
||||
SmartDecisionMakerBlock,
|
||||
get_pending_tool_calls,
|
||||
_create_tool_response,
|
||||
_combine_tool_responses,
|
||||
_convert_raw_response_to_dict,
|
||||
_get_tool_requests,
|
||||
_get_tool_responses,
|
||||
)
|
||||
|
||||
|
||||
class TestConversationCorruptionInErrorPaths:
|
||||
"""
|
||||
Tests for Failure Mode #6: Conversation Corruption in Error Paths
|
||||
|
||||
When there's a logic error (orphaned tool output), the code appends
|
||||
it as a "user" message instead of proper tool response format,
|
||||
violating LLM conversation structure.
|
||||
"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_orphaned_tool_output_creates_user_message(self):
|
||||
"""
|
||||
Test that orphaned tool output (no pending calls) creates wrong message type.
|
||||
"""
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
# Response with no tool calls
|
||||
mock_response = MagicMock()
|
||||
mock_response.response = "No tools needed"
|
||||
mock_response.tool_calls = []
|
||||
mock_response.prompt_tokens = 50
|
||||
mock_response.completion_tokens = 25
|
||||
mock_response.reasoning = None
|
||||
mock_response.raw_response = {"role": "assistant", "content": "No tools needed"}
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm:
|
||||
mock_llm.return_value = mock_response
|
||||
|
||||
with patch.object(block, "_create_tool_node_signatures", return_value=[]):
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Test",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=0,
|
||||
# Orphaned tool output - no pending calls but we have output
|
||||
last_tool_output={"result": "orphaned data"},
|
||||
conversation_history=[], # Empty - no pending calls
|
||||
)
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = MagicMock()
|
||||
|
||||
outputs = {}
|
||||
async for name, value in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph",
|
||||
node_id="test-node",
|
||||
graph_exec_id="test-exec",
|
||||
node_exec_id="test-node-exec",
|
||||
user_id="test-user",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
outputs[name] = value
|
||||
|
||||
# Check the conversation for the orphaned output handling
|
||||
# The orphaned output is logged as error but may be added as user message
|
||||
# This is the BUG: should not add orphaned outputs to conversation
|
||||
|
||||
def test_create_tool_response_anthropic_format(self):
|
||||
"""Test that Anthropic format tool responses are created correctly."""
|
||||
response = _create_tool_response(
|
||||
"toolu_abc123",
|
||||
{"result": "success"}
|
||||
)
|
||||
|
||||
assert response["role"] == "user"
|
||||
assert response["type"] == "message"
|
||||
assert isinstance(response["content"], list)
|
||||
assert response["content"][0]["type"] == "tool_result"
|
||||
assert response["content"][0]["tool_use_id"] == "toolu_abc123"
|
||||
|
||||
def test_create_tool_response_openai_format(self):
|
||||
"""Test that OpenAI format tool responses are created correctly."""
|
||||
response = _create_tool_response(
|
||||
"call_abc123",
|
||||
{"result": "success"}
|
||||
)
|
||||
|
||||
assert response["role"] == "tool"
|
||||
assert response["tool_call_id"] == "call_abc123"
|
||||
assert "content" in response
|
||||
|
||||
def test_tool_response_with_string_content(self):
|
||||
"""Test tool response creation with string content."""
|
||||
response = _create_tool_response(
|
||||
"call_123",
|
||||
"Simple string result"
|
||||
)
|
||||
|
||||
assert response["content"] == "Simple string result"
|
||||
|
||||
def test_tool_response_with_complex_content(self):
|
||||
"""Test tool response creation with complex JSON content."""
|
||||
complex_data = {
|
||||
"nested": {"key": "value"},
|
||||
"list": [1, 2, 3],
|
||||
"null": None,
|
||||
}
|
||||
|
||||
response = _create_tool_response("call_123", complex_data)
|
||||
|
||||
# Content should be JSON string
|
||||
parsed = json.loads(response["content"])
|
||||
assert parsed == complex_data
|
||||
|
||||
|
||||
class TestCombineToolResponses:
|
||||
"""Tests for combining multiple tool responses."""
|
||||
|
||||
def test_combine_single_response_unchanged(self):
|
||||
"""Test that single response is returned unchanged."""
|
||||
responses = [
|
||||
{
|
||||
"role": "user",
|
||||
"type": "message",
|
||||
"content": [{"type": "tool_result", "tool_use_id": "123"}]
|
||||
}
|
||||
]
|
||||
|
||||
result = _combine_tool_responses(responses)
|
||||
assert result == responses
|
||||
|
||||
def test_combine_multiple_anthropic_responses(self):
|
||||
"""Test combining multiple Anthropic responses."""
|
||||
responses = [
|
||||
{
|
||||
"role": "user",
|
||||
"type": "message",
|
||||
"content": [{"type": "tool_result", "tool_use_id": "123", "content": "a"}]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"type": "message",
|
||||
"content": [{"type": "tool_result", "tool_use_id": "456", "content": "b"}]
|
||||
},
|
||||
]
|
||||
|
||||
result = _combine_tool_responses(responses)
|
||||
|
||||
# Should be combined into single message
|
||||
assert len(result) == 1
|
||||
assert result[0]["role"] == "user"
|
||||
assert len(result[0]["content"]) == 2
|
||||
|
||||
def test_combine_mixed_responses(self):
|
||||
"""Test combining mixed Anthropic and OpenAI responses."""
|
||||
responses = [
|
||||
{
|
||||
"role": "user",
|
||||
"type": "message",
|
||||
"content": [{"type": "tool_result", "tool_use_id": "123"}]
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_456",
|
||||
"content": "openai result"
|
||||
},
|
||||
]
|
||||
|
||||
result = _combine_tool_responses(responses)
|
||||
|
||||
# Anthropic response combined, OpenAI kept separate
|
||||
assert len(result) == 2
|
||||
|
||||
def test_combine_empty_list(self):
|
||||
"""Test combining empty list."""
|
||||
result = _combine_tool_responses([])
|
||||
assert result == []
|
||||
|
||||
|
||||
class TestConversationHistoryValidation:
|
||||
"""Tests for conversation history validation."""
|
||||
|
||||
def test_pending_tool_calls_basic(self):
|
||||
"""Test basic pending tool call counting."""
|
||||
history = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "tool_use", "id": "call_1"},
|
||||
{"type": "tool_use", "id": "call_2"},
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
pending = get_pending_tool_calls(history)
|
||||
|
||||
assert len(pending) == 2
|
||||
assert "call_1" in pending
|
||||
assert "call_2" in pending
|
||||
|
||||
def test_pending_tool_calls_with_responses(self):
|
||||
"""Test pending calls after some responses."""
|
||||
history = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "tool_use", "id": "call_1"},
|
||||
{"type": "tool_use", "id": "call_2"},
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "tool_result", "tool_use_id": "call_1"}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
pending = get_pending_tool_calls(history)
|
||||
|
||||
assert len(pending) == 1
|
||||
assert "call_2" in pending
|
||||
assert "call_1" not in pending
|
||||
|
||||
def test_pending_tool_calls_all_responded(self):
|
||||
"""Test when all tool calls have responses."""
|
||||
history = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [{"type": "tool_use", "id": "call_1"}]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"type": "tool_result", "tool_use_id": "call_1"}]
|
||||
}
|
||||
]
|
||||
|
||||
pending = get_pending_tool_calls(history)
|
||||
|
||||
assert len(pending) == 0
|
||||
|
||||
def test_pending_tool_calls_openai_format(self):
|
||||
"""Test pending calls with OpenAI format."""
|
||||
history = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{"id": "call_1"},
|
||||
{"id": "call_2"},
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_1",
|
||||
"content": "result"
|
||||
}
|
||||
]
|
||||
|
||||
pending = get_pending_tool_calls(history)
|
||||
|
||||
assert len(pending) == 1
|
||||
assert "call_2" in pending
|
||||
|
||||
|
||||
class TestConversationUpdateBehavior:
|
||||
"""Tests for conversation update behavior."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_conversation_includes_assistant_response(self):
|
||||
"""Test that assistant responses are added to conversation."""
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.response = "Final answer"
|
||||
mock_response.tool_calls = []
|
||||
mock_response.prompt_tokens = 50
|
||||
mock_response.completion_tokens = 25
|
||||
mock_response.reasoning = None
|
||||
mock_response.raw_response = {"role": "assistant", "content": "Final answer"}
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm:
|
||||
mock_llm.return_value = mock_response
|
||||
|
||||
with patch.object(block, "_create_tool_node_signatures", return_value=[]):
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Test",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=0,
|
||||
)
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = MagicMock()
|
||||
|
||||
outputs = {}
|
||||
async for name, value in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph",
|
||||
node_id="test-node",
|
||||
graph_exec_id="test-exec",
|
||||
node_exec_id="test-node-exec",
|
||||
user_id="test-user",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
outputs[name] = value
|
||||
|
||||
# No conversations output when no tool calls (just finished)
|
||||
assert "finished" in outputs
|
||||
assert outputs["finished"] == "Final answer"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_conversation_with_tool_calls(self):
|
||||
"""Test that tool calls are properly added to conversation."""
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_tool_call = MagicMock()
|
||||
mock_tool_call.function.name = "test_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({"param": "value"})
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.response = None
|
||||
mock_response.tool_calls = [mock_tool_call]
|
||||
mock_response.prompt_tokens = 50
|
||||
mock_response.completion_tokens = 25
|
||||
mock_response.reasoning = "I'll use the test tool"
|
||||
mock_response.raw_response = {
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [{"id": "call_1"}]
|
||||
}
|
||||
|
||||
mock_tool_signatures = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test_tool",
|
||||
"_sink_node_id": "sink",
|
||||
"_field_mapping": {"param": "param"},
|
||||
"parameters": {
|
||||
"properties": {"param": {"type": "string"}},
|
||||
"required": ["param"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm:
|
||||
mock_llm.return_value = mock_response
|
||||
|
||||
with patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures):
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Test",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=0,
|
||||
)
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = MagicMock()
|
||||
|
||||
outputs = {}
|
||||
async for name, value in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph",
|
||||
node_id="test-node",
|
||||
graph_exec_id="test-exec",
|
||||
node_exec_id="test-node-exec",
|
||||
user_id="test-user",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
outputs[name] = value
|
||||
|
||||
# Should have conversations output
|
||||
assert "conversations" in outputs
|
||||
|
||||
# Conversation should include the assistant message
|
||||
conversations = outputs["conversations"]
|
||||
has_assistant = any(
|
||||
msg.get("role") == "assistant"
|
||||
for msg in conversations
|
||||
)
|
||||
assert has_assistant
|
||||
|
||||
|
||||
class TestConversationHistoryPreservation:
|
||||
"""Tests for conversation history preservation across calls."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_existing_history_preserved(self):
|
||||
"""Test that existing conversation history is preserved."""
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
existing_history = [
|
||||
{"role": "user", "content": "Previous message 1"},
|
||||
{"role": "assistant", "content": "Previous response 1"},
|
||||
{"role": "user", "content": "Previous message 2"},
|
||||
]
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.response = "New response"
|
||||
mock_response.tool_calls = []
|
||||
mock_response.prompt_tokens = 50
|
||||
mock_response.completion_tokens = 25
|
||||
mock_response.reasoning = None
|
||||
mock_response.raw_response = {"role": "assistant", "content": "New response"}
|
||||
|
||||
captured_prompt = []
|
||||
|
||||
async def capture_llm_call(**kwargs):
|
||||
captured_prompt.extend(kwargs.get("prompt", []))
|
||||
return mock_response
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", side_effect=capture_llm_call):
|
||||
with patch.object(block, "_create_tool_node_signatures", return_value=[]):
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="New message",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=0,
|
||||
conversation_history=existing_history,
|
||||
)
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = MagicMock()
|
||||
|
||||
async for _ in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph",
|
||||
node_id="test-node",
|
||||
graph_exec_id="test-exec",
|
||||
node_exec_id="test-node-exec",
|
||||
user_id="test-user",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
pass
|
||||
|
||||
# Existing history should be in the prompt
|
||||
assert len(captured_prompt) >= len(existing_history)
|
||||
|
||||
|
||||
class TestRawResponseConversion:
|
||||
"""Tests for raw response to dict conversion."""
|
||||
|
||||
def test_string_response(self):
|
||||
"""Test conversion of string response."""
|
||||
result = _convert_raw_response_to_dict("Hello world")
|
||||
|
||||
assert result == {"role": "assistant", "content": "Hello world"}
|
||||
|
||||
def test_dict_response(self):
|
||||
"""Test that dict response is passed through."""
|
||||
original = {"role": "assistant", "content": "test", "extra": "data"}
|
||||
result = _convert_raw_response_to_dict(original)
|
||||
|
||||
assert result == original
|
||||
|
||||
def test_object_response(self):
|
||||
"""Test conversion of object response."""
|
||||
mock_obj = MagicMock()
|
||||
|
||||
with patch("backend.blocks.smart_decision_maker.json.to_dict") as mock_to_dict:
|
||||
mock_to_dict.return_value = {"role": "assistant", "content": "converted"}
|
||||
result = _convert_raw_response_to_dict(mock_obj)
|
||||
|
||||
mock_to_dict.assert_called_once_with(mock_obj)
|
||||
assert result["role"] == "assistant"
|
||||
|
||||
|
||||
class TestConversationMessageStructure:
|
||||
"""Tests for correct conversation message structure."""
|
||||
|
||||
def test_system_message_not_duplicated(self):
|
||||
"""Test that system messages are not duplicated."""
|
||||
from backend.util.prompt import MAIN_OBJECTIVE_PREFIX
|
||||
|
||||
# Existing system message in history
|
||||
existing_history = [
|
||||
{"role": "system", "content": f"{MAIN_OBJECTIVE_PREFIX}Existing system prompt"},
|
||||
]
|
||||
|
||||
# The block should not add another system message
|
||||
# This is verified by checking the prompt passed to LLM
|
||||
|
||||
def test_user_message_not_duplicated(self):
|
||||
"""Test that user messages are not duplicated."""
|
||||
from backend.util.prompt import MAIN_OBJECTIVE_PREFIX
|
||||
|
||||
# Existing user message with MAIN_OBJECTIVE_PREFIX
|
||||
existing_history = [
|
||||
{"role": "user", "content": f"{MAIN_OBJECTIVE_PREFIX}Existing user prompt"},
|
||||
]
|
||||
|
||||
# The block should not add another user message with same prefix
|
||||
# This is verified by checking the prompt passed to LLM
|
||||
|
||||
def test_tool_response_after_tool_call(self):
|
||||
"""Test that tool responses come after tool calls."""
|
||||
# Valid conversation structure
|
||||
valid_history = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [{"type": "tool_use", "id": "call_1"}]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"type": "tool_result", "tool_use_id": "call_1"}]
|
||||
}
|
||||
]
|
||||
|
||||
# This should be valid - tool result follows tool use
|
||||
pending = get_pending_tool_calls(valid_history)
|
||||
assert len(pending) == 0
|
||||
|
||||
def test_orphaned_tool_response_detected(self):
|
||||
"""Test detection of orphaned tool responses."""
|
||||
# Invalid: tool response without matching tool call
|
||||
invalid_history = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"type": "tool_result", "tool_use_id": "orphan_call"}]
|
||||
}
|
||||
]
|
||||
|
||||
pending = get_pending_tool_calls(invalid_history)
|
||||
|
||||
# Orphan response creates negative count
|
||||
# Should have count -1 for orphan_call
|
||||
# But it's filtered out (count <= 0)
|
||||
assert "orphan_call" not in pending
|
||||
|
||||
|
||||
class TestValidationErrorInConversation:
|
||||
"""Tests for validation error handling in conversation."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_validation_error_feedback_not_in_final_conversation(self):
|
||||
"""
|
||||
Test that validation error feedback is not in final conversation output.
|
||||
|
||||
When retrying due to validation errors, the error feedback should
|
||||
only be used for the retry prompt, not persisted in final conversation.
|
||||
"""
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
call_count = 0
|
||||
|
||||
async def mock_llm_call(**kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
|
||||
if call_count == 1:
|
||||
# First call: invalid tool call
|
||||
mock_tool_call = MagicMock()
|
||||
mock_tool_call.function.name = "test_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({"wrong": "param"})
|
||||
|
||||
resp = MagicMock()
|
||||
resp.response = None
|
||||
resp.tool_calls = [mock_tool_call]
|
||||
resp.prompt_tokens = 50
|
||||
resp.completion_tokens = 25
|
||||
resp.reasoning = None
|
||||
resp.raw_response = {"role": "assistant", "content": None}
|
||||
return resp
|
||||
else:
|
||||
# Second call: finish
|
||||
resp = MagicMock()
|
||||
resp.response = "Done"
|
||||
resp.tool_calls = []
|
||||
resp.prompt_tokens = 50
|
||||
resp.completion_tokens = 25
|
||||
resp.reasoning = None
|
||||
resp.raw_response = {"role": "assistant", "content": "Done"}
|
||||
return resp
|
||||
|
||||
mock_tool_signatures = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test_tool",
|
||||
"_sink_node_id": "sink",
|
||||
"_field_mapping": {"correct": "correct"},
|
||||
"parameters": {
|
||||
"properties": {"correct": {"type": "string"}},
|
||||
"required": ["correct"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call):
|
||||
with patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures):
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Test",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=0,
|
||||
retry=3,
|
||||
)
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = MagicMock()
|
||||
|
||||
outputs = {}
|
||||
async for name, value in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph",
|
||||
node_id="test-node",
|
||||
graph_exec_id="test-exec",
|
||||
node_exec_id="test-node-exec",
|
||||
user_id="test-user",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
outputs[name] = value
|
||||
|
||||
# Should have finished successfully after retry
|
||||
assert "finished" in outputs
|
||||
|
||||
# Note: In traditional mode (agent_mode_max_iterations=0),
|
||||
# conversations are only output when there are tool calls
|
||||
# After the retry succeeds with no tool calls, we just get "finished"
|
||||
@@ -0,0 +1,671 @@
|
||||
"""
|
||||
Tests for SmartDecisionMaker data integrity failure modes.
|
||||
|
||||
Covers failure modes:
|
||||
6. Conversation Corruption in Error Paths
|
||||
7. Field Name Collision Not Detected
|
||||
8. No Type Validation in Dynamic Field Merging
|
||||
9. Unhandled Field Mapping Keys
|
||||
16. Silent Value Loss in Output Routing
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
|
||||
|
||||
|
||||
class TestFieldNameCollisionDetection:
|
||||
"""
|
||||
Tests for Failure Mode #7: Field Name Collision Not Detected
|
||||
|
||||
When multiple field names sanitize to the same value,
|
||||
the last one silently overwrites previous mappings.
|
||||
"""
|
||||
|
||||
def test_different_names_same_sanitized_result(self):
|
||||
"""Test that different names can produce the same sanitized result."""
|
||||
cleanup = SmartDecisionMakerBlock.cleanup
|
||||
|
||||
# All these sanitize to "test_field"
|
||||
variants = [
|
||||
"test_field",
|
||||
"Test Field",
|
||||
"test field",
|
||||
"TEST_FIELD",
|
||||
"Test_Field",
|
||||
"test-field", # Note: hyphen is preserved, this is different
|
||||
]
|
||||
|
||||
sanitized = [cleanup(v) for v in variants]
|
||||
|
||||
# Count unique sanitized values
|
||||
unique = set(sanitized)
|
||||
# Most should collide (except hyphenated one)
|
||||
assert len(unique) < len(variants), \
|
||||
f"Expected collisions, got {unique}"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_collision_last_one_wins(self):
|
||||
"""Test that in case of collision, the last field mapping wins."""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_node = Mock()
|
||||
mock_node.id = "test-node"
|
||||
mock_node.block = Mock()
|
||||
mock_node.block.name = "TestBlock"
|
||||
mock_node.block.description = "Test"
|
||||
mock_node.block.input_schema = Mock()
|
||||
mock_node.block.input_schema.jsonschema = Mock(
|
||||
return_value={"properties": {}, "required": []}
|
||||
)
|
||||
mock_node.block.input_schema.get_field_schema = Mock(
|
||||
return_value={"type": "string", "description": "test"}
|
||||
)
|
||||
|
||||
# Two fields that sanitize to the same name
|
||||
mock_links = [
|
||||
Mock(sink_name="Test Field", sink_id="test-node", source_id="source"),
|
||||
Mock(sink_name="test field", sink_id="test-node", source_id="source"),
|
||||
]
|
||||
|
||||
signature = await block._create_block_function_signature(mock_node, mock_links)
|
||||
|
||||
field_mapping = signature["function"]["_field_mapping"]
|
||||
properties = signature["function"]["parameters"]["properties"]
|
||||
|
||||
# Only one property (collision)
|
||||
assert len(properties) == 1
|
||||
assert "test_field" in properties
|
||||
|
||||
# The mapping has only the last one
|
||||
# This is the BUG: first field's mapping is lost
|
||||
assert field_mapping["test_field"] in ["Test Field", "test field"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_collision_causes_data_loss(self):
|
||||
"""
|
||||
Test that field collision can cause actual data loss.
|
||||
|
||||
Scenario:
|
||||
1. Two fields "Field A" and "field a" both map to "field_a"
|
||||
2. LLM provides value for "field_a"
|
||||
3. Only one original field gets the value
|
||||
4. The other field's expected input is lost
|
||||
"""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
# Simulate processing tool calls with collision
|
||||
mock_response = Mock()
|
||||
mock_tool_call = Mock()
|
||||
mock_tool_call.function.name = "test_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({
|
||||
"field_a": "value_for_both" # LLM uses sanitized name
|
||||
})
|
||||
mock_response.tool_calls = [mock_tool_call]
|
||||
|
||||
# Tool definition with collision in field mapping
|
||||
tool_functions = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test_tool",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"field_a": {"type": "string"},
|
||||
},
|
||||
"required": ["field_a"],
|
||||
},
|
||||
"_sink_node_id": "sink",
|
||||
# BUG: Only one original name is stored
|
||||
# "Field A" was overwritten by "field a"
|
||||
"_field_mapping": {"field_a": "field a"},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
processed = block._process_tool_calls(mock_response, tool_functions)
|
||||
|
||||
assert len(processed) == 1
|
||||
input_data = processed[0].input_data
|
||||
|
||||
# Only "field a" gets the value
|
||||
assert "field a" in input_data
|
||||
assert input_data["field a"] == "value_for_both"
|
||||
|
||||
# "Field A" is completely lost!
|
||||
assert "Field A" not in input_data
|
||||
|
||||
|
||||
class TestUnhandledFieldMappingKeys:
|
||||
"""
|
||||
Tests for Failure Mode #9: Unhandled Field Mapping Keys
|
||||
|
||||
When field_mapping is missing a key, the code falls back to
|
||||
the clean name, which may not be what the sink expects.
|
||||
"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_missing_field_mapping_falls_back_to_clean_name(self):
|
||||
"""Test that missing field mapping falls back to clean name."""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_response = Mock()
|
||||
mock_tool_call = Mock()
|
||||
mock_tool_call.function.name = "test_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({
|
||||
"unmapped_field": "value"
|
||||
})
|
||||
mock_response.tool_calls = [mock_tool_call]
|
||||
|
||||
# Tool definition with incomplete field mapping
|
||||
tool_functions = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test_tool",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"unmapped_field": {"type": "string"},
|
||||
},
|
||||
"required": [],
|
||||
},
|
||||
"_sink_node_id": "sink",
|
||||
"_field_mapping": {}, # Empty! No mapping for unmapped_field
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
processed = block._process_tool_calls(mock_response, tool_functions)
|
||||
|
||||
assert len(processed) == 1
|
||||
input_data = processed[0].input_data
|
||||
|
||||
# Falls back to clean name (which IS the key since it's already clean)
|
||||
assert "unmapped_field" in input_data
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_partial_field_mapping(self):
|
||||
"""Test behavior with partial field mapping."""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_response = Mock()
|
||||
mock_tool_call = Mock()
|
||||
mock_tool_call.function.name = "test_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({
|
||||
"mapped_field": "value1",
|
||||
"unmapped_field": "value2",
|
||||
})
|
||||
mock_response.tool_calls = [mock_tool_call]
|
||||
|
||||
tool_functions = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test_tool",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"mapped_field": {"type": "string"},
|
||||
"unmapped_field": {"type": "string"},
|
||||
},
|
||||
"required": [],
|
||||
},
|
||||
"_sink_node_id": "sink",
|
||||
# Only one field is mapped
|
||||
"_field_mapping": {
|
||||
"mapped_field": "Original Mapped Field",
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
processed = block._process_tool_calls(mock_response, tool_functions)
|
||||
|
||||
assert len(processed) == 1
|
||||
input_data = processed[0].input_data
|
||||
|
||||
# Mapped field uses original name
|
||||
assert "Original Mapped Field" in input_data
|
||||
# Unmapped field uses clean name (fallback)
|
||||
assert "unmapped_field" in input_data
|
||||
|
||||
|
||||
class TestSilentValueLossInRouting:
|
||||
"""
|
||||
Tests for Failure Mode #16: Silent Value Loss in Output Routing
|
||||
|
||||
When routing fails in parse_execution_output, it returns None
|
||||
without any logging or indication of why it failed.
|
||||
"""
|
||||
|
||||
def test_routing_mismatch_returns_none_silently(self):
|
||||
"""Test that routing mismatch returns None without error."""
|
||||
from backend.data.dynamic_fields import parse_execution_output
|
||||
|
||||
output_item = ("tools_^_node-123_~_sanitized_name", "important_value")
|
||||
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id="node-123",
|
||||
sink_pin_name="Original Name", # Doesn't match sanitized_name
|
||||
)
|
||||
|
||||
# Silently returns None
|
||||
assert result is None
|
||||
# No way to distinguish "value is None" from "routing failed"
|
||||
|
||||
def test_wrong_node_id_returns_none(self):
|
||||
"""Test that wrong node ID returns None."""
|
||||
from backend.data.dynamic_fields import parse_execution_output
|
||||
|
||||
output_item = ("tools_^_node-123_~_field", "value")
|
||||
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id="different-node", # Wrong node
|
||||
sink_pin_name="field",
|
||||
)
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_wrong_selector_returns_none(self):
|
||||
"""Test that wrong selector returns None."""
|
||||
from backend.data.dynamic_fields import parse_execution_output
|
||||
|
||||
output_item = ("tools_^_node-123_~_field", "value")
|
||||
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="different_selector", # Wrong selector
|
||||
sink_node_id="node-123",
|
||||
sink_pin_name="field",
|
||||
)
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_cannot_distinguish_none_value_from_routing_failure(self):
|
||||
"""
|
||||
Test that None as actual value is indistinguishable from routing failure.
|
||||
"""
|
||||
from backend.data.dynamic_fields import parse_execution_output
|
||||
|
||||
# Case 1: Actual None value
|
||||
output_with_none = ("field_name", None)
|
||||
result1 = parse_execution_output(
|
||||
output_with_none,
|
||||
link_output_selector="field_name",
|
||||
sink_node_id=None,
|
||||
sink_pin_name=None,
|
||||
)
|
||||
|
||||
# Case 2: Routing failure
|
||||
output_mismatched = ("field_name", "value")
|
||||
result2 = parse_execution_output(
|
||||
output_mismatched,
|
||||
link_output_selector="different_field",
|
||||
sink_node_id=None,
|
||||
sink_pin_name=None,
|
||||
)
|
||||
|
||||
# Both return None - cannot distinguish!
|
||||
assert result1 is None
|
||||
assert result2 is None
|
||||
|
||||
|
||||
class TestProcessToolCallsInputData:
|
||||
"""Tests for _process_tool_calls input data generation."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_all_expected_args_included(self):
|
||||
"""Test that all expected arguments are included in input_data."""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_response = Mock()
|
||||
mock_tool_call = Mock()
|
||||
mock_tool_call.function.name = "test_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({
|
||||
"provided_field": "value",
|
||||
# optional_field not provided
|
||||
})
|
||||
mock_response.tool_calls = [mock_tool_call]
|
||||
|
||||
tool_functions = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test_tool",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"provided_field": {"type": "string"},
|
||||
"optional_field": {"type": "string"},
|
||||
},
|
||||
"required": ["provided_field"],
|
||||
},
|
||||
"_sink_node_id": "sink",
|
||||
"_field_mapping": {
|
||||
"provided_field": "Provided Field",
|
||||
"optional_field": "Optional Field",
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
processed = block._process_tool_calls(mock_response, tool_functions)
|
||||
|
||||
assert len(processed) == 1
|
||||
input_data = processed[0].input_data
|
||||
|
||||
# Both fields should be in input_data
|
||||
assert "Provided Field" in input_data
|
||||
assert "Optional Field" in input_data
|
||||
|
||||
# Provided has value, optional is None
|
||||
assert input_data["Provided Field"] == "value"
|
||||
assert input_data["Optional Field"] is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extra_args_from_llm_ignored(self):
|
||||
"""Test that extra arguments from LLM not in schema are ignored."""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_response = Mock()
|
||||
mock_tool_call = Mock()
|
||||
mock_tool_call.function.name = "test_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({
|
||||
"expected_field": "value",
|
||||
"unexpected_field": "should_be_ignored",
|
||||
})
|
||||
mock_response.tool_calls = [mock_tool_call]
|
||||
|
||||
tool_functions = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test_tool",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"expected_field": {"type": "string"},
|
||||
# unexpected_field not in schema
|
||||
},
|
||||
"required": [],
|
||||
},
|
||||
"_sink_node_id": "sink",
|
||||
"_field_mapping": {"expected_field": "Expected Field"},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
processed = block._process_tool_calls(mock_response, tool_functions)
|
||||
|
||||
assert len(processed) == 1
|
||||
input_data = processed[0].input_data
|
||||
|
||||
# Only expected field should be in input_data
|
||||
assert "Expected Field" in input_data
|
||||
assert "unexpected_field" not in input_data
|
||||
assert "Unexpected Field" not in input_data
|
||||
|
||||
|
||||
class TestToolCallMatching:
|
||||
"""Tests for tool call matching logic."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_not_found_skipped(self):
|
||||
"""Test that tool calls for unknown tools are skipped."""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_response = Mock()
|
||||
mock_tool_call = Mock()
|
||||
mock_tool_call.function.name = "unknown_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({})
|
||||
mock_response.tool_calls = [mock_tool_call]
|
||||
|
||||
tool_functions = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "known_tool", # Different name
|
||||
"parameters": {"properties": {}, "required": []},
|
||||
"_sink_node_id": "sink",
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
processed = block._process_tool_calls(mock_response, tool_functions)
|
||||
|
||||
# Unknown tool is skipped (not processed)
|
||||
assert len(processed) == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_single_tool_fallback(self):
|
||||
"""Test fallback when only one tool exists but name doesn't match."""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_response = Mock()
|
||||
mock_tool_call = Mock()
|
||||
mock_tool_call.function.name = "wrong_name"
|
||||
mock_tool_call.function.arguments = json.dumps({"field": "value"})
|
||||
mock_response.tool_calls = [mock_tool_call]
|
||||
|
||||
# Only one tool defined
|
||||
tool_functions = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "only_tool",
|
||||
"parameters": {
|
||||
"properties": {"field": {"type": "string"}},
|
||||
"required": [],
|
||||
},
|
||||
"_sink_node_id": "sink",
|
||||
"_field_mapping": {"field": "Field"},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
processed = block._process_tool_calls(mock_response, tool_functions)
|
||||
|
||||
# Falls back to the only tool
|
||||
assert len(processed) == 1
|
||||
assert processed[0].input_data["Field"] == "value"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_multiple_tool_calls_processed(self):
|
||||
"""Test that multiple tool calls are all processed."""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_response = Mock()
|
||||
mock_tool_call_1 = Mock()
|
||||
mock_tool_call_1.function.name = "tool_a"
|
||||
mock_tool_call_1.function.arguments = json.dumps({"a": "1"})
|
||||
|
||||
mock_tool_call_2 = Mock()
|
||||
mock_tool_call_2.function.name = "tool_b"
|
||||
mock_tool_call_2.function.arguments = json.dumps({"b": "2"})
|
||||
|
||||
mock_response.tool_calls = [mock_tool_call_1, mock_tool_call_2]
|
||||
|
||||
tool_functions = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "tool_a",
|
||||
"parameters": {
|
||||
"properties": {"a": {"type": "string"}},
|
||||
"required": [],
|
||||
},
|
||||
"_sink_node_id": "sink_a",
|
||||
"_field_mapping": {"a": "A"},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "tool_b",
|
||||
"parameters": {
|
||||
"properties": {"b": {"type": "string"}},
|
||||
"required": [],
|
||||
},
|
||||
"_sink_node_id": "sink_b",
|
||||
"_field_mapping": {"b": "B"},
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
processed = block._process_tool_calls(mock_response, tool_functions)
|
||||
|
||||
assert len(processed) == 2
|
||||
assert processed[0].input_data["A"] == "1"
|
||||
assert processed[1].input_data["B"] == "2"
|
||||
|
||||
|
||||
class TestOutputEmitKeyGeneration:
|
||||
"""Tests for output emit key generation consistency."""
|
||||
|
||||
def test_emit_key_uses_sanitized_field_name(self):
|
||||
"""Test that emit keys use sanitized field names."""
|
||||
cleanup = SmartDecisionMakerBlock.cleanup
|
||||
|
||||
original_field = "Max Keyword Difficulty"
|
||||
sink_node_id = "node-123"
|
||||
|
||||
sanitized = cleanup(original_field)
|
||||
emit_key = f"tools_^_{sink_node_id}_~_{sanitized}"
|
||||
|
||||
assert emit_key == "tools_^_node-123_~_max_keyword_difficulty"
|
||||
|
||||
def test_emit_key_format_consistent(self):
|
||||
"""Test that emit key format is consistent."""
|
||||
test_cases = [
|
||||
("field", "node", "tools_^_node_~_field"),
|
||||
("Field Name", "node-123", "tools_^_node-123_~_field_name"),
|
||||
("CPC ($)", "abc", "tools_^_abc_~_cpc____"),
|
||||
]
|
||||
|
||||
cleanup = SmartDecisionMakerBlock.cleanup
|
||||
|
||||
for original_field, node_id, expected in test_cases:
|
||||
sanitized = cleanup(original_field)
|
||||
emit_key = f"tools_^_{node_id}_~_{sanitized}"
|
||||
assert emit_key == expected, \
|
||||
f"Expected {expected}, got {emit_key}"
|
||||
|
||||
def test_emit_key_sanitization_idempotent(self):
|
||||
"""Test that sanitizing an already sanitized name gives same result."""
|
||||
cleanup = SmartDecisionMakerBlock.cleanup
|
||||
|
||||
original = "Test Field Name"
|
||||
first_clean = cleanup(original)
|
||||
second_clean = cleanup(first_clean)
|
||||
|
||||
assert first_clean == second_clean
|
||||
|
||||
|
||||
class TestToolFunctionMetadata:
|
||||
"""Tests for tool function metadata handling."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_sink_node_id_preserved(self):
|
||||
"""Test that _sink_node_id is preserved in tool function."""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_node = Mock()
|
||||
mock_node.id = "specific-node-id"
|
||||
mock_node.block = Mock()
|
||||
mock_node.block.name = "TestBlock"
|
||||
mock_node.block.description = "Test"
|
||||
mock_node.block.input_schema = Mock()
|
||||
mock_node.block.input_schema.jsonschema = Mock(
|
||||
return_value={"properties": {}, "required": []}
|
||||
)
|
||||
mock_node.block.input_schema.get_field_schema = Mock(
|
||||
return_value={"type": "string", "description": "test"}
|
||||
)
|
||||
|
||||
mock_links = [
|
||||
Mock(sink_name="field", sink_id="specific-node-id", source_id="source"),
|
||||
]
|
||||
|
||||
signature = await block._create_block_function_signature(mock_node, mock_links)
|
||||
|
||||
assert signature["function"]["_sink_node_id"] == "specific-node-id"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_field_mapping_preserved(self):
|
||||
"""Test that _field_mapping is preserved in tool function."""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_node = Mock()
|
||||
mock_node.id = "test-node"
|
||||
mock_node.block = Mock()
|
||||
mock_node.block.name = "TestBlock"
|
||||
mock_node.block.description = "Test"
|
||||
mock_node.block.input_schema = Mock()
|
||||
mock_node.block.input_schema.jsonschema = Mock(
|
||||
return_value={"properties": {}, "required": []}
|
||||
)
|
||||
mock_node.block.input_schema.get_field_schema = Mock(
|
||||
return_value={"type": "string", "description": "test"}
|
||||
)
|
||||
|
||||
mock_links = [
|
||||
Mock(sink_name="Original Field Name", sink_id="test-node", source_id="source"),
|
||||
]
|
||||
|
||||
signature = await block._create_block_function_signature(mock_node, mock_links)
|
||||
|
||||
field_mapping = signature["function"]["_field_mapping"]
|
||||
assert "original_field_name" in field_mapping
|
||||
assert field_mapping["original_field_name"] == "Original Field Name"
|
||||
|
||||
|
||||
class TestRequiredFieldsHandling:
|
||||
"""Tests for required fields handling."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_required_fields_use_sanitized_names(self):
|
||||
"""Test that required fields array uses sanitized names."""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_node = Mock()
|
||||
mock_node.id = "test-node"
|
||||
mock_node.block = Mock()
|
||||
mock_node.block.name = "TestBlock"
|
||||
mock_node.block.description = "Test"
|
||||
mock_node.block.input_schema = Mock()
|
||||
mock_node.block.input_schema.jsonschema = Mock(
|
||||
return_value={
|
||||
"properties": {},
|
||||
"required": ["Required Field", "Another Required"],
|
||||
}
|
||||
)
|
||||
mock_node.block.input_schema.get_field_schema = Mock(
|
||||
return_value={"type": "string", "description": "test"}
|
||||
)
|
||||
|
||||
mock_links = [
|
||||
Mock(sink_name="Required Field", sink_id="test-node", source_id="source"),
|
||||
Mock(sink_name="Another Required", sink_id="test-node", source_id="source"),
|
||||
Mock(sink_name="Optional Field", sink_id="test-node", source_id="source"),
|
||||
]
|
||||
|
||||
signature = await block._create_block_function_signature(mock_node, mock_links)
|
||||
|
||||
required = signature["function"]["parameters"]["required"]
|
||||
|
||||
# Should use sanitized names
|
||||
assert "required_field" in required
|
||||
assert "another_required" in required
|
||||
|
||||
# Original names should NOT be in required
|
||||
assert "Required Field" not in required
|
||||
assert "Another Required" not in required
|
||||
|
||||
# Optional field should not be required
|
||||
assert "optional_field" not in required
|
||||
assert "Optional Field" not in required
|
||||
@@ -0,0 +1,871 @@
|
||||
"""
|
||||
Tests for SmartDecisionMaker error handling failure modes.
|
||||
|
||||
Covers failure modes:
|
||||
3. JSON Deserialization Without Exception Handling
|
||||
4. Database Transaction Inconsistency
|
||||
5. Missing Null Checks After Database Calls
|
||||
15. Error Message Context Loss
|
||||
17. No Validation of Dynamic Field Paths
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from backend.blocks.smart_decision_maker import (
|
||||
SmartDecisionMakerBlock,
|
||||
_convert_raw_response_to_dict,
|
||||
_create_tool_response,
|
||||
)
|
||||
|
||||
|
||||
class TestJSONDeserializationErrors:
|
||||
"""
|
||||
Tests for Failure Mode #3: JSON Deserialization Without Exception Handling
|
||||
|
||||
When LLM returns malformed JSON in tool call arguments, the json.loads()
|
||||
call fails without proper error handling.
|
||||
"""
|
||||
|
||||
def test_malformed_json_single_quotes(self):
|
||||
"""
|
||||
Test that single quotes in JSON cause parsing failure.
|
||||
|
||||
LLMs sometimes return {'key': 'value'} instead of {"key": "value"}
|
||||
"""
|
||||
malformed = "{'key': 'value'}"
|
||||
|
||||
with pytest.raises(json.JSONDecodeError):
|
||||
json.loads(malformed)
|
||||
|
||||
def test_malformed_json_trailing_comma(self):
|
||||
"""
|
||||
Test that trailing commas cause parsing failure.
|
||||
"""
|
||||
malformed = '{"key": "value",}'
|
||||
|
||||
with pytest.raises(json.JSONDecodeError):
|
||||
json.loads(malformed)
|
||||
|
||||
def test_malformed_json_unquoted_keys(self):
|
||||
"""
|
||||
Test that unquoted keys cause parsing failure.
|
||||
"""
|
||||
malformed = '{key: "value"}'
|
||||
|
||||
with pytest.raises(json.JSONDecodeError):
|
||||
json.loads(malformed)
|
||||
|
||||
def test_malformed_json_python_none(self):
|
||||
"""
|
||||
Test that Python None instead of null causes failure.
|
||||
"""
|
||||
malformed = '{"key": None}'
|
||||
|
||||
with pytest.raises(json.JSONDecodeError):
|
||||
json.loads(malformed)
|
||||
|
||||
def test_malformed_json_python_true_false(self):
|
||||
"""
|
||||
Test that Python True/False instead of true/false causes failure.
|
||||
"""
|
||||
malformed_true = '{"key": True}'
|
||||
malformed_false = '{"key": False}'
|
||||
|
||||
with pytest.raises(json.JSONDecodeError):
|
||||
json.loads(malformed_true)
|
||||
|
||||
with pytest.raises(json.JSONDecodeError):
|
||||
json.loads(malformed_false)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_llm_returns_malformed_json_crashes_block(self):
|
||||
"""
|
||||
Test that malformed JSON from LLM causes block to crash.
|
||||
|
||||
BUG: The json.loads() at line 625, 706, 1124 can throw JSONDecodeError
|
||||
which is not caught, causing the entire block to fail.
|
||||
"""
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
# Create response with malformed JSON
|
||||
mock_tool_call = MagicMock()
|
||||
mock_tool_call.function.name = "test_tool"
|
||||
mock_tool_call.function.arguments = "{'malformed': 'json'}" # Single quotes!
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.response = None
|
||||
mock_response.tool_calls = [mock_tool_call]
|
||||
mock_response.prompt_tokens = 50
|
||||
mock_response.completion_tokens = 25
|
||||
mock_response.reasoning = None
|
||||
mock_response.raw_response = {"role": "assistant", "content": None}
|
||||
|
||||
mock_tool_signatures = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test_tool",
|
||||
"_sink_node_id": "sink",
|
||||
"_field_mapping": {},
|
||||
"parameters": {"properties": {"malformed": {"type": "string"}}, "required": []},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm:
|
||||
mock_llm.return_value = mock_response
|
||||
|
||||
with patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures):
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Test",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=0,
|
||||
)
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = MagicMock()
|
||||
|
||||
# BUG: This should raise JSONDecodeError
|
||||
with pytest.raises(json.JSONDecodeError):
|
||||
async for _ in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph",
|
||||
node_id="test-node",
|
||||
graph_exec_id="test-exec",
|
||||
node_exec_id="test-node-exec",
|
||||
user_id="test-user",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
pass
|
||||
|
||||
|
||||
class TestDatabaseTransactionInconsistency:
|
||||
"""
|
||||
Tests for Failure Mode #4: Database Transaction Inconsistency
|
||||
|
||||
When multiple database operations are performed in sequence,
|
||||
a failure partway through leaves the database in an inconsistent state.
|
||||
"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_partial_input_insertion_on_failure(self):
|
||||
"""
|
||||
Test that partial failures during multi-input insertion
|
||||
leave database in inconsistent state.
|
||||
"""
|
||||
import threading
|
||||
from collections import defaultdict
|
||||
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
# Track which inputs were inserted
|
||||
inserted_inputs = []
|
||||
call_count = 0
|
||||
|
||||
async def failing_upsert(node_id, graph_exec_id, input_name, input_data):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
|
||||
# Fail on the third input
|
||||
if call_count == 3:
|
||||
raise Exception("Database connection lost!")
|
||||
|
||||
inserted_inputs.append(input_name)
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.node_exec_id = "exec-id"
|
||||
return mock_result, {input_name: input_data}
|
||||
|
||||
mock_tool_call = MagicMock()
|
||||
mock_tool_call.id = "call_1"
|
||||
mock_tool_call.function.name = "multi_input_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({
|
||||
"input1": "value1",
|
||||
"input2": "value2",
|
||||
"input3": "value3", # This one will fail
|
||||
"input4": "value4",
|
||||
"input5": "value5",
|
||||
})
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.response = None
|
||||
mock_response.tool_calls = [mock_tool_call]
|
||||
mock_response.prompt_tokens = 50
|
||||
mock_response.completion_tokens = 25
|
||||
mock_response.reasoning = None
|
||||
mock_response.raw_response = {
|
||||
"role": "assistant",
|
||||
"content": [{"type": "tool_use", "id": "call_1"}]
|
||||
}
|
||||
|
||||
mock_tool_signatures = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "multi_input_tool",
|
||||
"_sink_node_id": "sink",
|
||||
"_field_mapping": {
|
||||
"input1": "input1",
|
||||
"input2": "input2",
|
||||
"input3": "input3",
|
||||
"input4": "input4",
|
||||
"input5": "input5",
|
||||
},
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"input1": {"type": "string"},
|
||||
"input2": {"type": "string"},
|
||||
"input3": {"type": "string"},
|
||||
"input4": {"type": "string"},
|
||||
"input5": {"type": "string"},
|
||||
},
|
||||
"required": ["input1", "input2", "input3", "input4", "input5"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
mock_db_client = AsyncMock()
|
||||
mock_node = MagicMock()
|
||||
mock_node.block_id = "test-block"
|
||||
mock_db_client.get_node.return_value = mock_node
|
||||
mock_db_client.upsert_execution_input.side_effect = failing_upsert
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm, \
|
||||
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||
|
||||
mock_llm.return_value = mock_response
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = AsyncMock()
|
||||
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||
mock_execution_processor.execution_stats = MagicMock()
|
||||
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Test",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=1,
|
||||
)
|
||||
|
||||
# The block should fail, but some inputs were already inserted
|
||||
outputs = {}
|
||||
try:
|
||||
async for name, value in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph",
|
||||
node_id="test-node",
|
||||
graph_exec_id="test-exec",
|
||||
node_exec_id="test-node-exec",
|
||||
user_id="test-user",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
outputs[name] = value
|
||||
except Exception:
|
||||
pass # Expected
|
||||
|
||||
# BUG: Some inputs were inserted before failure
|
||||
# Database is now in inconsistent state
|
||||
assert len(inserted_inputs) == 2, \
|
||||
f"Expected 2 inserted before failure, got {inserted_inputs}"
|
||||
assert "input1" in inserted_inputs
|
||||
assert "input2" in inserted_inputs
|
||||
# input3, input4, input5 were never inserted
|
||||
|
||||
|
||||
class TestMissingNullChecks:
|
||||
"""
|
||||
Tests for Failure Mode #5: Missing Null Checks After Database Calls
|
||||
"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_node_returns_none(self):
|
||||
"""
|
||||
Test handling when get_node returns None.
|
||||
"""
|
||||
import threading
|
||||
from collections import defaultdict
|
||||
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_tool_call = MagicMock()
|
||||
mock_tool_call.id = "call_1"
|
||||
mock_tool_call.function.name = "test_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({"param": "value"})
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.response = None
|
||||
mock_response.tool_calls = [mock_tool_call]
|
||||
mock_response.prompt_tokens = 50
|
||||
mock_response.completion_tokens = 25
|
||||
mock_response.reasoning = None
|
||||
mock_response.raw_response = {
|
||||
"role": "assistant",
|
||||
"content": [{"type": "tool_use", "id": "call_1"}]
|
||||
}
|
||||
|
||||
mock_tool_signatures = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test_tool",
|
||||
"_sink_node_id": "nonexistent-node",
|
||||
"_field_mapping": {"param": "param"},
|
||||
"parameters": {
|
||||
"properties": {"param": {"type": "string"}},
|
||||
"required": ["param"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
mock_db_client = AsyncMock()
|
||||
mock_db_client.get_node.return_value = None # Node doesn't exist!
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", new_callable=AsyncMock) as mock_llm, \
|
||||
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||
|
||||
mock_llm.return_value = mock_response
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = AsyncMock()
|
||||
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||
mock_execution_processor.execution_stats = MagicMock()
|
||||
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Test",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=1,
|
||||
)
|
||||
|
||||
# Should raise ValueError for missing node
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
async for _ in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph",
|
||||
node_id="test-node",
|
||||
graph_exec_id="test-exec",
|
||||
node_exec_id="test-node-exec",
|
||||
user_id="test-user",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
pass
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_execution_outputs(self):
|
||||
"""
|
||||
Test handling when get_execution_outputs_by_node_exec_id returns empty.
|
||||
"""
|
||||
import threading
|
||||
from collections import defaultdict
|
||||
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
call_count = 0
|
||||
|
||||
async def mock_llm_call(**kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
|
||||
if call_count > 1:
|
||||
resp = MagicMock()
|
||||
resp.response = "Done"
|
||||
resp.tool_calls = []
|
||||
resp.prompt_tokens = 10
|
||||
resp.completion_tokens = 5
|
||||
resp.reasoning = None
|
||||
resp.raw_response = {"role": "assistant", "content": "Done"}
|
||||
return resp
|
||||
|
||||
mock_tool_call = MagicMock()
|
||||
mock_tool_call.id = "call_1"
|
||||
mock_tool_call.function.name = "test_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({})
|
||||
|
||||
resp = MagicMock()
|
||||
resp.response = None
|
||||
resp.tool_calls = [mock_tool_call]
|
||||
resp.prompt_tokens = 50
|
||||
resp.completion_tokens = 25
|
||||
resp.reasoning = None
|
||||
resp.raw_response = {
|
||||
"role": "assistant",
|
||||
"content": [{"type": "tool_use", "id": "call_1"}]
|
||||
}
|
||||
return resp
|
||||
|
||||
mock_tool_signatures = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test_tool",
|
||||
"_sink_node_id": "sink",
|
||||
"_field_mapping": {},
|
||||
"parameters": {"properties": {}, "required": []},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
mock_db_client = AsyncMock()
|
||||
mock_node = MagicMock()
|
||||
mock_node.block_id = "test-block"
|
||||
mock_db_client.get_node.return_value = mock_node
|
||||
mock_exec_result = MagicMock()
|
||||
mock_exec_result.node_exec_id = "exec-id"
|
||||
mock_db_client.upsert_execution_input.return_value = (mock_exec_result, {})
|
||||
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {} # Empty!
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = AsyncMock()
|
||||
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||
mock_execution_processor.execution_stats = MagicMock()
|
||||
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||
mock_execution_processor.on_node_execution = AsyncMock(return_value=MagicMock(error=None))
|
||||
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Test",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=2,
|
||||
)
|
||||
|
||||
outputs = {}
|
||||
async for name, value in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph",
|
||||
node_id="test-node",
|
||||
graph_exec_id="test-exec",
|
||||
node_exec_id="test-node-exec",
|
||||
user_id="test-user",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
outputs[name] = value
|
||||
|
||||
# Empty outputs should be handled gracefully
|
||||
# (uses "Tool executed successfully" as fallback)
|
||||
assert "finished" in outputs or "conversations" in outputs
|
||||
|
||||
|
||||
class TestErrorMessageContextLoss:
|
||||
"""
|
||||
Tests for Failure Mode #15: Error Message Context Loss
|
||||
|
||||
When exceptions are caught and converted to strings, important
|
||||
debugging information is lost.
|
||||
"""
|
||||
|
||||
def test_exception_to_string_loses_traceback(self):
|
||||
"""
|
||||
Test that converting exception to string loses traceback.
|
||||
"""
|
||||
try:
|
||||
def inner():
|
||||
raise ValueError("Inner error")
|
||||
|
||||
def outer():
|
||||
inner()
|
||||
|
||||
outer()
|
||||
except Exception as e:
|
||||
error_string = str(e)
|
||||
error_repr = repr(e)
|
||||
|
||||
# String representation loses call stack
|
||||
assert "inner" not in error_string
|
||||
assert "outer" not in error_string
|
||||
|
||||
# Even repr doesn't have full traceback
|
||||
assert "Traceback" not in error_repr
|
||||
|
||||
def test_tool_response_loses_exception_type(self):
|
||||
"""
|
||||
Test that _create_tool_response loses exception type information.
|
||||
"""
|
||||
original_error = ConnectionError("Database unreachable")
|
||||
tool_response = _create_tool_response(
|
||||
"call_123",
|
||||
f"Tool execution failed: {str(original_error)}"
|
||||
)
|
||||
|
||||
content = tool_response.get("content", "")
|
||||
|
||||
# Original exception type is lost
|
||||
assert "ConnectionError" not in content
|
||||
# Only the message remains
|
||||
assert "Database unreachable" in content
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_agent_mode_error_response_lacks_context(self):
|
||||
"""
|
||||
Test that agent mode error responses lack debugging context.
|
||||
"""
|
||||
import threading
|
||||
from collections import defaultdict
|
||||
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_tool_call = MagicMock()
|
||||
mock_tool_call.id = "call_1"
|
||||
mock_tool_call.function.name = "test_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({})
|
||||
|
||||
mock_response_1 = MagicMock()
|
||||
mock_response_1.response = None
|
||||
mock_response_1.tool_calls = [mock_tool_call]
|
||||
mock_response_1.prompt_tokens = 50
|
||||
mock_response_1.completion_tokens = 25
|
||||
mock_response_1.reasoning = None
|
||||
mock_response_1.raw_response = {
|
||||
"role": "assistant",
|
||||
"content": [{"type": "tool_use", "id": "call_1"}]
|
||||
}
|
||||
|
||||
mock_response_2 = MagicMock()
|
||||
mock_response_2.response = "Handled the error"
|
||||
mock_response_2.tool_calls = []
|
||||
mock_response_2.prompt_tokens = 30
|
||||
mock_response_2.completion_tokens = 15
|
||||
mock_response_2.reasoning = None
|
||||
mock_response_2.raw_response = {"role": "assistant", "content": "Handled"}
|
||||
|
||||
call_count = 0
|
||||
|
||||
async def mock_llm_call(**kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if call_count == 1:
|
||||
return mock_response_1
|
||||
return mock_response_2
|
||||
|
||||
mock_tool_signatures = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test_tool",
|
||||
"_sink_node_id": "sink",
|
||||
"_field_mapping": {},
|
||||
"parameters": {"properties": {}, "required": []},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
# Create a complex error with nested cause
|
||||
class CustomDatabaseError(Exception):
|
||||
pass
|
||||
|
||||
def create_complex_error():
|
||||
try:
|
||||
raise ConnectionError("Network timeout after 30s")
|
||||
except ConnectionError as e:
|
||||
raise CustomDatabaseError("Failed to connect to database") from e
|
||||
|
||||
mock_db_client = AsyncMock()
|
||||
mock_node = MagicMock()
|
||||
mock_node.block_id = "test-block"
|
||||
mock_db_client.get_node.return_value = mock_node
|
||||
|
||||
# Make upsert raise the complex error
|
||||
try:
|
||||
create_complex_error()
|
||||
except CustomDatabaseError as e:
|
||||
mock_db_client.upsert_execution_input.side_effect = e
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures), \
|
||||
patch("backend.blocks.smart_decision_maker.get_database_manager_async_client", return_value=mock_db_client):
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = AsyncMock()
|
||||
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||
mock_execution_processor.execution_stats = MagicMock()
|
||||
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Test",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=2,
|
||||
)
|
||||
|
||||
outputs = {}
|
||||
async for name, value in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph",
|
||||
node_id="test-node",
|
||||
graph_exec_id="test-exec",
|
||||
node_exec_id="test-node-exec",
|
||||
user_id="test-user",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
outputs[name] = value
|
||||
|
||||
# Check conversation for error details
|
||||
conversations = outputs.get("conversations", [])
|
||||
error_found = False
|
||||
for msg in conversations:
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, list):
|
||||
for item in content:
|
||||
if item.get("type") == "tool_result":
|
||||
result_content = item.get("content", "")
|
||||
if "Error" in result_content or "failed" in result_content.lower():
|
||||
error_found = True
|
||||
# BUG: The error content lacks:
|
||||
# - Exception type (CustomDatabaseError)
|
||||
# - Chained cause (ConnectionError)
|
||||
# - Stack trace
|
||||
assert "CustomDatabaseError" not in result_content
|
||||
assert "ConnectionError" not in result_content
|
||||
|
||||
# Note: error_found may be False if the error prevented tool response creation
|
||||
|
||||
|
||||
class TestRawResponseConversion:
|
||||
"""Tests for _convert_raw_response_to_dict edge cases."""
|
||||
|
||||
def test_string_response_converted(self):
|
||||
"""Test that string responses are properly wrapped."""
|
||||
result = _convert_raw_response_to_dict("Hello, world!")
|
||||
assert result == {"role": "assistant", "content": "Hello, world!"}
|
||||
|
||||
def test_dict_response_unchanged(self):
|
||||
"""Test that dict responses are passed through."""
|
||||
original = {"role": "assistant", "content": "test", "extra": "field"}
|
||||
result = _convert_raw_response_to_dict(original)
|
||||
assert result == original
|
||||
|
||||
def test_object_response_converted(self):
|
||||
"""Test that objects are converted using json.to_dict."""
|
||||
mock_obj = MagicMock()
|
||||
|
||||
with patch("backend.blocks.smart_decision_maker.json.to_dict") as mock_to_dict:
|
||||
mock_to_dict.return_value = {"converted": True}
|
||||
result = _convert_raw_response_to_dict(mock_obj)
|
||||
mock_to_dict.assert_called_once_with(mock_obj)
|
||||
assert result == {"converted": True}
|
||||
|
||||
def test_none_response(self):
|
||||
"""Test handling of None response."""
|
||||
with patch("backend.blocks.smart_decision_maker.json.to_dict") as mock_to_dict:
|
||||
mock_to_dict.return_value = None
|
||||
result = _convert_raw_response_to_dict(None)
|
||||
# None is not a string or dict, so it goes through to_dict
|
||||
assert result is None
|
||||
|
||||
|
||||
class TestValidationRetryMechanism:
|
||||
"""Tests for the validation and retry mechanism."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_validation_error_triggers_retry(self):
|
||||
"""
|
||||
Test that validation errors trigger retry with feedback.
|
||||
"""
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
call_count = 0
|
||||
|
||||
async def mock_llm_call(**kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
|
||||
prompt = kwargs.get("prompt", [])
|
||||
|
||||
if call_count == 1:
|
||||
# First call: return tool call with wrong parameter
|
||||
mock_tool_call = MagicMock()
|
||||
mock_tool_call.function.name = "test_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({"wrong_param": "value"})
|
||||
|
||||
resp = MagicMock()
|
||||
resp.response = None
|
||||
resp.tool_calls = [mock_tool_call]
|
||||
resp.prompt_tokens = 50
|
||||
resp.completion_tokens = 25
|
||||
resp.reasoning = None
|
||||
resp.raw_response = {"role": "assistant", "content": None}
|
||||
return resp
|
||||
else:
|
||||
# Second call: check that error feedback was added
|
||||
has_error_feedback = any(
|
||||
"parameter errors" in str(msg.get("content", "")).lower()
|
||||
for msg in prompt
|
||||
)
|
||||
|
||||
# Return correct tool call
|
||||
mock_tool_call = MagicMock()
|
||||
mock_tool_call.function.name = "test_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({"correct_param": "value"})
|
||||
|
||||
resp = MagicMock()
|
||||
resp.response = None
|
||||
resp.tool_calls = [mock_tool_call]
|
||||
resp.prompt_tokens = 50
|
||||
resp.completion_tokens = 25
|
||||
resp.reasoning = None
|
||||
resp.raw_response = {"role": "assistant", "content": None}
|
||||
return resp
|
||||
|
||||
mock_tool_signatures = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test_tool",
|
||||
"_sink_node_id": "sink",
|
||||
"_field_mapping": {"correct_param": "correct_param"},
|
||||
"parameters": {
|
||||
"properties": {"correct_param": {"type": "string"}},
|
||||
"required": ["correct_param"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures):
|
||||
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Test",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=0, # Traditional mode
|
||||
retry=3,
|
||||
)
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = MagicMock()
|
||||
|
||||
outputs = {}
|
||||
async for name, value in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph",
|
||||
node_id="test-node",
|
||||
graph_exec_id="test-exec",
|
||||
node_exec_id="test-node-exec",
|
||||
user_id="test-user",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
outputs[name] = value
|
||||
|
||||
# Should have made multiple calls due to retry
|
||||
assert call_count >= 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_max_retries_exceeded(self):
|
||||
"""
|
||||
Test behavior when max retries are exceeded.
|
||||
"""
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
async def mock_llm_call(**kwargs):
|
||||
# Always return invalid tool call
|
||||
mock_tool_call = MagicMock()
|
||||
mock_tool_call.function.name = "test_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({"wrong": "param"})
|
||||
|
||||
resp = MagicMock()
|
||||
resp.response = None
|
||||
resp.tool_calls = [mock_tool_call]
|
||||
resp.prompt_tokens = 50
|
||||
resp.completion_tokens = 25
|
||||
resp.reasoning = None
|
||||
resp.raw_response = {"role": "assistant", "content": None}
|
||||
return resp
|
||||
|
||||
mock_tool_signatures = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test_tool",
|
||||
"_sink_node_id": "sink",
|
||||
"_field_mapping": {"correct": "correct"},
|
||||
"parameters": {
|
||||
"properties": {"correct": {"type": "string"}},
|
||||
"required": ["correct"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", side_effect=mock_llm_call), \
|
||||
patch.object(block, "_create_tool_node_signatures", return_value=mock_tool_signatures):
|
||||
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Test",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=0,
|
||||
retry=2, # Only 2 retries
|
||||
)
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = MagicMock()
|
||||
|
||||
# Should raise ValueError after max retries
|
||||
with pytest.raises(ValueError, match="parameter errors"):
|
||||
async for _ in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph",
|
||||
node_id="test-node",
|
||||
graph_exec_id="test-exec",
|
||||
node_exec_id="test-node-exec",
|
||||
user_id="test-user",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
pass
|
||||
@@ -0,0 +1,819 @@
|
||||
"""
|
||||
Comprehensive tests for SmartDecisionMakerBlock pin name sanitization.
|
||||
|
||||
This test file addresses the critical bug where field names with spaces/special characters
|
||||
(e.g., "Max Keyword Difficulty") are not consistently sanitized between frontend and backend,
|
||||
causing tool calls to "go into the void".
|
||||
|
||||
The core issue:
|
||||
- Frontend connects link with original name: tools_^_{node_id}_~_Max Keyword Difficulty
|
||||
- Backend emits with sanitized name: tools_^_{node_id}_~_max_keyword_difficulty
|
||||
- parse_execution_output compares sink_pin_name directly without sanitization
|
||||
- Result: mismatch causes tool calls to fail silently
|
||||
"""
|
||||
|
||||
import json
|
||||
from unittest.mock import AsyncMock, MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from backend.blocks.smart_decision_maker import SmartDecisionMakerBlock
|
||||
from backend.data.dynamic_fields import (
|
||||
parse_execution_output,
|
||||
sanitize_pin_name,
|
||||
)
|
||||
|
||||
|
||||
class TestCleanupFunction:
|
||||
"""Tests for the SmartDecisionMakerBlock.cleanup() static method."""
|
||||
|
||||
def test_cleanup_spaces_to_underscores(self):
|
||||
"""Spaces should be replaced with underscores."""
|
||||
assert SmartDecisionMakerBlock.cleanup("Max Keyword Difficulty") == "max_keyword_difficulty"
|
||||
|
||||
def test_cleanup_mixed_case_to_lowercase(self):
|
||||
"""Mixed case should be converted to lowercase."""
|
||||
assert SmartDecisionMakerBlock.cleanup("MaxKeywordDifficulty") == "maxkeyworddifficulty"
|
||||
assert SmartDecisionMakerBlock.cleanup("UPPER_CASE") == "upper_case"
|
||||
|
||||
def test_cleanup_special_characters(self):
|
||||
"""Special characters should be replaced with underscores."""
|
||||
assert SmartDecisionMakerBlock.cleanup("field@name!") == "field_name_"
|
||||
assert SmartDecisionMakerBlock.cleanup("value#1") == "value_1"
|
||||
assert SmartDecisionMakerBlock.cleanup("test$value") == "test_value"
|
||||
assert SmartDecisionMakerBlock.cleanup("a%b^c") == "a_b_c"
|
||||
|
||||
def test_cleanup_preserves_valid_characters(self):
|
||||
"""Valid characters (alphanumeric, underscore, hyphen) should be preserved."""
|
||||
assert SmartDecisionMakerBlock.cleanup("valid_name-123") == "valid_name-123"
|
||||
assert SmartDecisionMakerBlock.cleanup("abc123") == "abc123"
|
||||
|
||||
def test_cleanup_empty_string(self):
|
||||
"""Empty string should return empty string."""
|
||||
assert SmartDecisionMakerBlock.cleanup("") == ""
|
||||
|
||||
def test_cleanup_only_special_chars(self):
|
||||
"""String of only special characters should return underscores."""
|
||||
assert SmartDecisionMakerBlock.cleanup("@#$%") == "____"
|
||||
|
||||
def test_cleanup_unicode_characters(self):
|
||||
"""Unicode characters should be replaced with underscores."""
|
||||
assert SmartDecisionMakerBlock.cleanup("café") == "caf_"
|
||||
assert SmartDecisionMakerBlock.cleanup("日本語") == "___"
|
||||
|
||||
def test_cleanup_multiple_consecutive_spaces(self):
|
||||
"""Multiple consecutive spaces should become multiple underscores."""
|
||||
assert SmartDecisionMakerBlock.cleanup("a b") == "a___b"
|
||||
|
||||
def test_cleanup_leading_trailing_spaces(self):
|
||||
"""Leading/trailing spaces should become underscores."""
|
||||
assert SmartDecisionMakerBlock.cleanup(" name ") == "_name_"
|
||||
|
||||
def test_cleanup_realistic_field_names(self):
|
||||
"""Test realistic field names from actual use cases."""
|
||||
# From the reported bug
|
||||
assert SmartDecisionMakerBlock.cleanup("Max Keyword Difficulty") == "max_keyword_difficulty"
|
||||
# Other realistic names
|
||||
assert SmartDecisionMakerBlock.cleanup("Search Query") == "search_query"
|
||||
assert SmartDecisionMakerBlock.cleanup("API Response (JSON)") == "api_response__json_"
|
||||
assert SmartDecisionMakerBlock.cleanup("User's Input") == "user_s_input"
|
||||
|
||||
|
||||
class TestFieldMappingCreation:
|
||||
"""Tests for field mapping creation in function signatures."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_field_mapping_with_spaces_in_names(self):
|
||||
"""Test that field mapping correctly maps clean names back to original names with spaces."""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_node = Mock()
|
||||
mock_node.id = "test-node-id"
|
||||
mock_node.block = Mock()
|
||||
mock_node.block.name = "TestBlock"
|
||||
mock_node.block.description = "Test description"
|
||||
mock_node.block.input_schema = Mock()
|
||||
mock_node.block.input_schema.jsonschema = Mock(
|
||||
return_value={"properties": {}, "required": ["Max Keyword Difficulty"]}
|
||||
)
|
||||
|
||||
def get_field_schema(field_name):
|
||||
if field_name == "Max Keyword Difficulty":
|
||||
return {"type": "integer", "description": "Maximum keyword difficulty (0-100)"}
|
||||
raise KeyError(f"Field {field_name} not found")
|
||||
|
||||
mock_node.block.input_schema.get_field_schema = get_field_schema
|
||||
|
||||
mock_links = [
|
||||
Mock(
|
||||
source_name="tools_^_test_~_max_keyword_difficulty",
|
||||
sink_name="Max Keyword Difficulty", # Original name with spaces
|
||||
sink_id="test-node-id",
|
||||
source_id="smart_node_id",
|
||||
),
|
||||
]
|
||||
|
||||
signature = await block._create_block_function_signature(mock_node, mock_links)
|
||||
|
||||
# Verify the cleaned name is used in properties
|
||||
properties = signature["function"]["parameters"]["properties"]
|
||||
assert "max_keyword_difficulty" in properties
|
||||
|
||||
# Verify the field mapping maps back to original
|
||||
field_mapping = signature["function"]["_field_mapping"]
|
||||
assert field_mapping["max_keyword_difficulty"] == "Max Keyword Difficulty"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_field_mapping_with_multiple_special_char_names(self):
|
||||
"""Test field mapping with multiple fields containing special characters."""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_node = Mock()
|
||||
mock_node.id = "test-node-id"
|
||||
mock_node.block = Mock()
|
||||
mock_node.block.name = "SEO Tool"
|
||||
mock_node.block.description = "SEO analysis tool"
|
||||
mock_node.block.input_schema = Mock()
|
||||
mock_node.block.input_schema.jsonschema = Mock(
|
||||
return_value={"properties": {}, "required": []}
|
||||
)
|
||||
|
||||
def get_field_schema(field_name):
|
||||
schemas = {
|
||||
"Max Keyword Difficulty": {"type": "integer", "description": "Max difficulty"},
|
||||
"Search Volume (Monthly)": {"type": "integer", "description": "Monthly volume"},
|
||||
"CPC ($)": {"type": "number", "description": "Cost per click"},
|
||||
"Target URL": {"type": "string", "description": "URL to analyze"},
|
||||
}
|
||||
if field_name in schemas:
|
||||
return schemas[field_name]
|
||||
raise KeyError(f"Field {field_name} not found")
|
||||
|
||||
mock_node.block.input_schema.get_field_schema = get_field_schema
|
||||
|
||||
mock_links = [
|
||||
Mock(sink_name="Max Keyword Difficulty", sink_id="test-node-id", source_id="smart_node_id"),
|
||||
Mock(sink_name="Search Volume (Monthly)", sink_id="test-node-id", source_id="smart_node_id"),
|
||||
Mock(sink_name="CPC ($)", sink_id="test-node-id", source_id="smart_node_id"),
|
||||
Mock(sink_name="Target URL", sink_id="test-node-id", source_id="smart_node_id"),
|
||||
]
|
||||
|
||||
signature = await block._create_block_function_signature(mock_node, mock_links)
|
||||
|
||||
properties = signature["function"]["parameters"]["properties"]
|
||||
field_mapping = signature["function"]["_field_mapping"]
|
||||
|
||||
# Verify all cleaned names are in properties
|
||||
assert "max_keyword_difficulty" in properties
|
||||
assert "search_volume__monthly_" in properties
|
||||
assert "cpc____" in properties
|
||||
assert "target_url" in properties
|
||||
|
||||
# Verify field mappings
|
||||
assert field_mapping["max_keyword_difficulty"] == "Max Keyword Difficulty"
|
||||
assert field_mapping["search_volume__monthly_"] == "Search Volume (Monthly)"
|
||||
assert field_mapping["cpc____"] == "CPC ($)"
|
||||
assert field_mapping["target_url"] == "Target URL"
|
||||
|
||||
|
||||
class TestFieldNameCollision:
|
||||
"""Tests for detecting field name collisions after sanitization."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_collision_detection_same_sanitized_name(self):
|
||||
"""Test behavior when two different names sanitize to the same value."""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
# These two different names will sanitize to the same value
|
||||
name1 = "max keyword difficulty" # -> max_keyword_difficulty
|
||||
name2 = "Max Keyword Difficulty" # -> max_keyword_difficulty
|
||||
name3 = "MAX_KEYWORD_DIFFICULTY" # -> max_keyword_difficulty
|
||||
|
||||
assert SmartDecisionMakerBlock.cleanup(name1) == SmartDecisionMakerBlock.cleanup(name2)
|
||||
assert SmartDecisionMakerBlock.cleanup(name2) == SmartDecisionMakerBlock.cleanup(name3)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_collision_in_function_signature(self):
|
||||
"""Test that collisions in sanitized names could cause issues."""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_node = Mock()
|
||||
mock_node.id = "test-node-id"
|
||||
mock_node.block = Mock()
|
||||
mock_node.block.name = "TestBlock"
|
||||
mock_node.block.description = "Test description"
|
||||
mock_node.block.input_schema = Mock()
|
||||
mock_node.block.input_schema.jsonschema = Mock(
|
||||
return_value={"properties": {}, "required": []}
|
||||
)
|
||||
|
||||
def get_field_schema(field_name):
|
||||
return {"type": "string", "description": f"Field: {field_name}"}
|
||||
|
||||
mock_node.block.input_schema.get_field_schema = get_field_schema
|
||||
|
||||
# Two different fields that sanitize to the same name
|
||||
mock_links = [
|
||||
Mock(sink_name="Test Field", sink_id="test-node-id", source_id="smart_node_id"),
|
||||
Mock(sink_name="test field", sink_id="test-node-id", source_id="smart_node_id"),
|
||||
]
|
||||
|
||||
signature = await block._create_block_function_signature(mock_node, mock_links)
|
||||
|
||||
properties = signature["function"]["parameters"]["properties"]
|
||||
field_mapping = signature["function"]["_field_mapping"]
|
||||
|
||||
# Both sanitize to "test_field" - only one will be in properties
|
||||
assert "test_field" in properties
|
||||
# The field_mapping will have the last one written
|
||||
assert field_mapping["test_field"] in ["Test Field", "test field"]
|
||||
|
||||
|
||||
class TestOutputRouting:
|
||||
"""Tests for output routing with sanitized names."""
|
||||
|
||||
def test_emit_key_format_with_spaces(self):
|
||||
"""Test that emit keys use sanitized field names."""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
original_field_name = "Max Keyword Difficulty"
|
||||
sink_node_id = "node-123"
|
||||
|
||||
sanitized_name = block.cleanup(original_field_name)
|
||||
emit_key = f"tools_^_{sink_node_id}_~_{sanitized_name}"
|
||||
|
||||
assert emit_key == "tools_^_node-123_~_max_keyword_difficulty"
|
||||
|
||||
def test_parse_execution_output_exact_match(self):
|
||||
"""Test parse_execution_output with exact matching names."""
|
||||
output_item = ("tools_^_node-123_~_max_keyword_difficulty", 50)
|
||||
|
||||
# When sink_pin_name matches the sanitized name, it should work
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id="node-123",
|
||||
sink_pin_name="max_keyword_difficulty",
|
||||
)
|
||||
assert result == 50
|
||||
|
||||
def test_parse_execution_output_mismatch_original_vs_sanitized(self):
|
||||
"""
|
||||
CRITICAL TEST: This reproduces the exact bug reported.
|
||||
|
||||
When frontend creates a link with original name "Max Keyword Difficulty"
|
||||
but backend emits with sanitized name "max_keyword_difficulty",
|
||||
the tool call should still be routed correctly.
|
||||
|
||||
CURRENT BEHAVIOR (BUG): Returns None because names don't match
|
||||
EXPECTED BEHAVIOR: Should return the value (50) after sanitizing both names
|
||||
"""
|
||||
output_item = ("tools_^_node-123_~_max_keyword_difficulty", 50)
|
||||
|
||||
# This is what happens: sink_pin_name comes from frontend link (unsanitized)
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id="node-123",
|
||||
sink_pin_name="Max Keyword Difficulty", # Original name with spaces
|
||||
)
|
||||
|
||||
# BUG: This currently returns None because:
|
||||
# - target_input_pin = "max_keyword_difficulty" (from emit key, sanitized)
|
||||
# - sink_pin_name = "Max Keyword Difficulty" (from link, original)
|
||||
# - They don't match, so routing fails
|
||||
#
|
||||
# TODO: When the bug is fixed, change this assertion to:
|
||||
# assert result == 50
|
||||
assert result is None # Current buggy behavior
|
||||
|
||||
def test_parse_execution_output_with_sanitized_sink_pin(self):
|
||||
"""Test that if sink_pin_name is pre-sanitized, routing works."""
|
||||
output_item = ("tools_^_node-123_~_max_keyword_difficulty", 50)
|
||||
|
||||
# If sink_pin_name is already sanitized, routing works
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id="node-123",
|
||||
sink_pin_name="max_keyword_difficulty", # Pre-sanitized
|
||||
)
|
||||
assert result == 50
|
||||
|
||||
|
||||
class TestProcessToolCallsMapping:
|
||||
"""Tests for _process_tool_calls method field mapping."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_process_tool_calls_maps_clean_to_original(self):
|
||||
"""Test that _process_tool_calls correctly maps clean names back to original."""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_response = Mock()
|
||||
mock_tool_call = Mock()
|
||||
mock_tool_call.function.name = "seo_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({
|
||||
"max_keyword_difficulty": 50, # LLM uses clean name
|
||||
"search_query": "test query",
|
||||
})
|
||||
mock_response.tool_calls = [mock_tool_call]
|
||||
|
||||
tool_functions = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "seo_tool",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"max_keyword_difficulty": {"type": "integer"},
|
||||
"search_query": {"type": "string"},
|
||||
},
|
||||
"required": ["max_keyword_difficulty", "search_query"],
|
||||
},
|
||||
"_sink_node_id": "test-sink-node",
|
||||
"_field_mapping": {
|
||||
"max_keyword_difficulty": "Max Keyword Difficulty", # Original name
|
||||
"search_query": "Search Query",
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
processed = block._process_tool_calls(mock_response, tool_functions)
|
||||
|
||||
assert len(processed) == 1
|
||||
tool_info = processed[0]
|
||||
|
||||
# Verify input_data uses ORIGINAL field names
|
||||
assert "Max Keyword Difficulty" in tool_info.input_data
|
||||
assert "Search Query" in tool_info.input_data
|
||||
assert tool_info.input_data["Max Keyword Difficulty"] == 50
|
||||
assert tool_info.input_data["Search Query"] == "test query"
|
||||
|
||||
|
||||
class TestToolOutputEmitting:
|
||||
"""Tests for the tool output emitting in traditional mode."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_emit_keys_use_sanitized_names(self):
|
||||
"""Test that emit keys always use sanitized field names."""
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_tool_call = MagicMock()
|
||||
mock_tool_call.function.name = "seo_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({
|
||||
"max_keyword_difficulty": 50,
|
||||
})
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.response = None
|
||||
mock_response.tool_calls = [mock_tool_call]
|
||||
mock_response.prompt_tokens = 50
|
||||
mock_response.completion_tokens = 25
|
||||
mock_response.reasoning = None
|
||||
mock_response.raw_response = {"role": "assistant", "content": None}
|
||||
|
||||
mock_tool_signatures = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "seo_tool",
|
||||
"_sink_node_id": "test-sink-node-id",
|
||||
"_field_mapping": {
|
||||
"max_keyword_difficulty": "Max Keyword Difficulty",
|
||||
},
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"max_keyword_difficulty": {"type": "integer"},
|
||||
},
|
||||
"required": ["max_keyword_difficulty"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
with patch(
|
||||
"backend.blocks.llm.llm_call",
|
||||
new_callable=AsyncMock,
|
||||
return_value=mock_response,
|
||||
), patch.object(
|
||||
block, "_create_tool_node_signatures", return_value=mock_tool_signatures
|
||||
):
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Test prompt",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=0,
|
||||
)
|
||||
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = MagicMock()
|
||||
|
||||
outputs = {}
|
||||
async for output_name, output_data in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph-id",
|
||||
node_id="test-node-id",
|
||||
graph_exec_id="test-exec-id",
|
||||
node_exec_id="test-node-exec-id",
|
||||
user_id="test-user-id",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
outputs[output_name] = output_data
|
||||
|
||||
# The emit key should use the sanitized field name
|
||||
# Even though the original was "Max Keyword Difficulty", emit uses sanitized
|
||||
assert "tools_^_test-sink-node-id_~_max_keyword_difficulty" in outputs
|
||||
assert outputs["tools_^_test-sink-node-id_~_max_keyword_difficulty"] == 50
|
||||
|
||||
|
||||
class TestSanitizationConsistency:
|
||||
"""Tests for ensuring sanitization is consistent throughout the pipeline."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_full_round_trip_with_spaces(self):
|
||||
"""
|
||||
Test the full round-trip of a field name with spaces through the system.
|
||||
|
||||
This simulates:
|
||||
1. Frontend creates link with sink_name="Max Keyword Difficulty"
|
||||
2. Backend creates function signature with cleaned property name
|
||||
3. LLM responds with cleaned name
|
||||
4. Backend processes response and maps back to original
|
||||
5. Backend emits with sanitized name
|
||||
6. Routing should match (currently broken)
|
||||
"""
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
original_field_name = "Max Keyword Difficulty"
|
||||
cleaned_field_name = SmartDecisionMakerBlock.cleanup(original_field_name)
|
||||
|
||||
# Step 1: Simulate frontend link creation
|
||||
mock_link = Mock()
|
||||
mock_link.sink_name = original_field_name # Frontend uses original
|
||||
mock_link.sink_id = "test-sink-node-id"
|
||||
mock_link.source_id = "smart-node-id"
|
||||
|
||||
# Step 2: Create function signature
|
||||
mock_node = Mock()
|
||||
mock_node.id = "test-sink-node-id"
|
||||
mock_node.block = Mock()
|
||||
mock_node.block.name = "SEO Tool"
|
||||
mock_node.block.description = "SEO analysis"
|
||||
mock_node.block.input_schema = Mock()
|
||||
mock_node.block.input_schema.jsonschema = Mock(
|
||||
return_value={"properties": {}, "required": [original_field_name]}
|
||||
)
|
||||
mock_node.block.input_schema.get_field_schema = Mock(
|
||||
return_value={"type": "integer", "description": "Max difficulty"}
|
||||
)
|
||||
|
||||
signature = await block._create_block_function_signature(mock_node, [mock_link])
|
||||
|
||||
# Verify cleaned name is in properties
|
||||
assert cleaned_field_name in signature["function"]["parameters"]["properties"]
|
||||
# Verify field mapping exists
|
||||
assert signature["function"]["_field_mapping"][cleaned_field_name] == original_field_name
|
||||
|
||||
# Step 3: Simulate LLM response using cleaned name
|
||||
mock_tool_call = MagicMock()
|
||||
mock_tool_call.function.name = "seo_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({
|
||||
cleaned_field_name: 50 # LLM uses cleaned name
|
||||
})
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.response = None
|
||||
mock_response.tool_calls = [mock_tool_call]
|
||||
mock_response.prompt_tokens = 50
|
||||
mock_response.completion_tokens = 25
|
||||
mock_response.reasoning = None
|
||||
mock_response.raw_response = {"role": "assistant", "content": None}
|
||||
|
||||
# Prepare tool_functions as they would be in run()
|
||||
tool_functions = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "seo_tool",
|
||||
"_sink_node_id": "test-sink-node-id",
|
||||
"_field_mapping": signature["function"]["_field_mapping"],
|
||||
"parameters": signature["function"]["parameters"],
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
# Step 4: Process tool calls
|
||||
processed = block._process_tool_calls(mock_response, tool_functions)
|
||||
assert len(processed) == 1
|
||||
# Input data should have ORIGINAL name
|
||||
assert original_field_name in processed[0].input_data
|
||||
assert processed[0].input_data[original_field_name] == 50
|
||||
|
||||
# Step 5: Emit key generation (from run method logic)
|
||||
field_mapping = processed[0].field_mapping
|
||||
for clean_arg_name in signature["function"]["parameters"]["properties"]:
|
||||
original = field_mapping.get(clean_arg_name, clean_arg_name)
|
||||
sanitized_arg_name = block.cleanup(original)
|
||||
emit_key = f"tools_^_test-sink-node-id_~_{sanitized_arg_name}"
|
||||
|
||||
# Emit key uses sanitized name
|
||||
assert emit_key == f"tools_^_test-sink-node-id_~_{cleaned_field_name}"
|
||||
|
||||
# Step 6: Routing check (this is where the bug manifests)
|
||||
emit_key = f"tools_^_test-sink-node-id_~_{cleaned_field_name}"
|
||||
output_item = (emit_key, 50)
|
||||
|
||||
# Current routing uses original sink_name from link
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id="test-sink-node-id",
|
||||
sink_pin_name=original_field_name, # Frontend's original name
|
||||
)
|
||||
|
||||
# BUG: This returns None because sanitized != original
|
||||
# When fixed, this should return 50
|
||||
assert result is None # Current broken behavior
|
||||
|
||||
def test_sanitization_is_idempotent(self):
|
||||
"""Test that sanitizing an already sanitized name gives the same result."""
|
||||
original = "Max Keyword Difficulty"
|
||||
first_clean = SmartDecisionMakerBlock.cleanup(original)
|
||||
second_clean = SmartDecisionMakerBlock.cleanup(first_clean)
|
||||
|
||||
assert first_clean == second_clean
|
||||
|
||||
|
||||
class TestEdgeCases:
|
||||
"""Tests for edge cases in the sanitization pipeline."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_field_name(self):
|
||||
"""Test handling of empty field name."""
|
||||
assert SmartDecisionMakerBlock.cleanup("") == ""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_very_long_field_name(self):
|
||||
"""Test handling of very long field names."""
|
||||
long_name = "A" * 1000 + " " + "B" * 1000
|
||||
cleaned = SmartDecisionMakerBlock.cleanup(long_name)
|
||||
assert "_" in cleaned # Space was replaced
|
||||
assert len(cleaned) == len(long_name)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_field_name_with_newlines(self):
|
||||
"""Test handling of field names with newlines."""
|
||||
name_with_newline = "First Line\nSecond Line"
|
||||
cleaned = SmartDecisionMakerBlock.cleanup(name_with_newline)
|
||||
assert "\n" not in cleaned
|
||||
assert "_" in cleaned
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_field_name_with_tabs(self):
|
||||
"""Test handling of field names with tabs."""
|
||||
name_with_tab = "First\tSecond"
|
||||
cleaned = SmartDecisionMakerBlock.cleanup(name_with_tab)
|
||||
assert "\t" not in cleaned
|
||||
assert "_" in cleaned
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_numeric_field_name(self):
|
||||
"""Test handling of purely numeric field names."""
|
||||
assert SmartDecisionMakerBlock.cleanup("123") == "123"
|
||||
assert SmartDecisionMakerBlock.cleanup("123 456") == "123_456"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_hyphenated_field_names(self):
|
||||
"""Test that hyphens are preserved (valid in function names)."""
|
||||
assert SmartDecisionMakerBlock.cleanup("field-name") == "field-name"
|
||||
assert SmartDecisionMakerBlock.cleanup("Field-Name") == "field-name"
|
||||
|
||||
|
||||
class TestDynamicFieldsWithSpaces:
|
||||
"""Tests for dynamic fields with spaces in their names."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dynamic_dict_field_with_spaces(self):
|
||||
"""Test dynamic dictionary fields where the key contains spaces."""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_node = Mock()
|
||||
mock_node.id = "test-node-id"
|
||||
mock_node.block = Mock()
|
||||
mock_node.block.name = "CreateDictionary"
|
||||
mock_node.block.description = "Creates a dictionary"
|
||||
mock_node.block.input_schema = Mock()
|
||||
mock_node.block.input_schema.jsonschema = Mock(
|
||||
return_value={"properties": {}, "required": ["values"]}
|
||||
)
|
||||
mock_node.block.input_schema.get_field_schema = Mock(
|
||||
side_effect=KeyError("not found")
|
||||
)
|
||||
|
||||
# Dynamic field with a key containing spaces
|
||||
mock_links = [
|
||||
Mock(
|
||||
sink_name="values_#_User Name", # Dict key with space
|
||||
sink_id="test-node-id",
|
||||
source_id="smart_node_id",
|
||||
),
|
||||
]
|
||||
|
||||
signature = await block._create_block_function_signature(mock_node, mock_links)
|
||||
|
||||
properties = signature["function"]["parameters"]["properties"]
|
||||
field_mapping = signature["function"]["_field_mapping"]
|
||||
|
||||
# The cleaned name should be in properties
|
||||
expected_clean = SmartDecisionMakerBlock.cleanup("values_#_User Name")
|
||||
assert expected_clean in properties
|
||||
|
||||
# Field mapping should map back to original
|
||||
assert field_mapping[expected_clean] == "values_#_User Name"
|
||||
|
||||
|
||||
class TestAgentModeWithSpaces:
|
||||
"""Tests for agent mode with field names containing spaces."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_agent_mode_tool_execution_with_spaces(self):
|
||||
"""Test that agent mode correctly handles field names with spaces."""
|
||||
import threading
|
||||
from collections import defaultdict
|
||||
|
||||
import backend.blocks.llm as llm_module
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
original_field = "Max Keyword Difficulty"
|
||||
clean_field = SmartDecisionMakerBlock.cleanup(original_field)
|
||||
|
||||
mock_tool_call = MagicMock()
|
||||
mock_tool_call.id = "call_1"
|
||||
mock_tool_call.function.name = "seo_tool"
|
||||
mock_tool_call.function.arguments = json.dumps({
|
||||
clean_field: 50 # LLM uses clean name
|
||||
})
|
||||
|
||||
mock_response_1 = MagicMock()
|
||||
mock_response_1.response = None
|
||||
mock_response_1.tool_calls = [mock_tool_call]
|
||||
mock_response_1.prompt_tokens = 50
|
||||
mock_response_1.completion_tokens = 25
|
||||
mock_response_1.reasoning = None
|
||||
mock_response_1.raw_response = {
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [{"id": "call_1", "type": "function"}],
|
||||
}
|
||||
|
||||
mock_response_2 = MagicMock()
|
||||
mock_response_2.response = "Task completed"
|
||||
mock_response_2.tool_calls = []
|
||||
mock_response_2.prompt_tokens = 30
|
||||
mock_response_2.completion_tokens = 15
|
||||
mock_response_2.reasoning = None
|
||||
mock_response_2.raw_response = {"role": "assistant", "content": "Task completed"}
|
||||
|
||||
llm_call_mock = AsyncMock()
|
||||
llm_call_mock.side_effect = [mock_response_1, mock_response_2]
|
||||
|
||||
mock_tool_signatures = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "seo_tool",
|
||||
"_sink_node_id": "test-sink-node-id",
|
||||
"_field_mapping": {
|
||||
clean_field: original_field,
|
||||
},
|
||||
"parameters": {
|
||||
"properties": {
|
||||
clean_field: {"type": "integer"},
|
||||
},
|
||||
"required": [clean_field],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
mock_db_client = AsyncMock()
|
||||
mock_node = MagicMock()
|
||||
mock_node.block_id = "test-block-id"
|
||||
mock_db_client.get_node.return_value = mock_node
|
||||
|
||||
mock_node_exec_result = MagicMock()
|
||||
mock_node_exec_result.node_exec_id = "test-tool-exec-id"
|
||||
|
||||
# The input data should use ORIGINAL field name
|
||||
mock_input_data = {original_field: 50}
|
||||
mock_db_client.upsert_execution_input.return_value = (
|
||||
mock_node_exec_result,
|
||||
mock_input_data,
|
||||
)
|
||||
mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {
|
||||
"result": {"status": "success"}
|
||||
}
|
||||
|
||||
with patch("backend.blocks.llm.llm_call", llm_call_mock), patch.object(
|
||||
block, "_create_tool_node_signatures", return_value=mock_tool_signatures
|
||||
), patch(
|
||||
"backend.blocks.smart_decision_maker.get_database_manager_async_client",
|
||||
return_value=mock_db_client,
|
||||
):
|
||||
mock_execution_context = ExecutionContext(safe_mode=False)
|
||||
mock_execution_processor = AsyncMock()
|
||||
mock_execution_processor.running_node_execution = defaultdict(MagicMock)
|
||||
mock_execution_processor.execution_stats = MagicMock()
|
||||
mock_execution_processor.execution_stats_lock = threading.Lock()
|
||||
|
||||
mock_node_stats = MagicMock()
|
||||
mock_node_stats.error = None
|
||||
mock_execution_processor.on_node_execution = AsyncMock(
|
||||
return_value=mock_node_stats
|
||||
)
|
||||
|
||||
input_data = SmartDecisionMakerBlock.Input(
|
||||
prompt="Analyze keywords",
|
||||
model=llm_module.DEFAULT_LLM_MODEL,
|
||||
credentials=llm_module.TEST_CREDENTIALS_INPUT,
|
||||
agent_mode_max_iterations=3,
|
||||
)
|
||||
|
||||
outputs = {}
|
||||
async for output_name, output_data in block.run(
|
||||
input_data,
|
||||
credentials=llm_module.TEST_CREDENTIALS,
|
||||
graph_id="test-graph-id",
|
||||
node_id="test-node-id",
|
||||
graph_exec_id="test-exec-id",
|
||||
node_exec_id="test-node-exec-id",
|
||||
user_id="test-user-id",
|
||||
graph_version=1,
|
||||
execution_context=mock_execution_context,
|
||||
execution_processor=mock_execution_processor,
|
||||
):
|
||||
outputs[output_name] = output_data
|
||||
|
||||
# Verify upsert was called with original field name
|
||||
upsert_calls = mock_db_client.upsert_execution_input.call_args_list
|
||||
assert len(upsert_calls) > 0
|
||||
# Check that the original field name was used
|
||||
for call in upsert_calls:
|
||||
input_name = call.kwargs.get("input_name") or call.args[2]
|
||||
# The input name should be the original (mapped back)
|
||||
assert input_name == original_field
|
||||
|
||||
|
||||
class TestRequiredFieldsWithSpaces:
|
||||
"""Tests for required field handling with spaces in names."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_required_fields_use_clean_names(self):
|
||||
"""Test that required fields array uses clean names for API compatibility."""
|
||||
block = SmartDecisionMakerBlock()
|
||||
|
||||
mock_node = Mock()
|
||||
mock_node.id = "test-node-id"
|
||||
mock_node.block = Mock()
|
||||
mock_node.block.name = "TestBlock"
|
||||
mock_node.block.description = "Test"
|
||||
mock_node.block.input_schema = Mock()
|
||||
mock_node.block.input_schema.jsonschema = Mock(
|
||||
return_value={
|
||||
"properties": {},
|
||||
"required": ["Max Keyword Difficulty", "Search Query"],
|
||||
}
|
||||
)
|
||||
|
||||
def get_field_schema(field_name):
|
||||
return {"type": "string", "description": f"Field: {field_name}"}
|
||||
|
||||
mock_node.block.input_schema.get_field_schema = get_field_schema
|
||||
|
||||
mock_links = [
|
||||
Mock(sink_name="Max Keyword Difficulty", sink_id="test-node-id", source_id="smart_node_id"),
|
||||
Mock(sink_name="Search Query", sink_id="test-node-id", source_id="smart_node_id"),
|
||||
]
|
||||
|
||||
signature = await block._create_block_function_signature(mock_node, mock_links)
|
||||
|
||||
required = signature["function"]["parameters"]["required"]
|
||||
|
||||
# Required array should use CLEAN names for API compatibility
|
||||
assert "max_keyword_difficulty" in required
|
||||
assert "search_query" in required
|
||||
# Original names should NOT be in required
|
||||
assert "Max Keyword Difficulty" not in required
|
||||
assert "Search Query" not in required
|
||||
@@ -0,0 +1,513 @@
|
||||
"""
|
||||
Tests for dynamic fields edge cases and failure modes.
|
||||
|
||||
Covers failure modes:
|
||||
8. No Type Validation in Dynamic Field Merging
|
||||
17. No Validation of Dynamic Field Paths
|
||||
"""
|
||||
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from backend.data.dynamic_fields import (
|
||||
DICT_SPLIT,
|
||||
LIST_SPLIT,
|
||||
OBJC_SPLIT,
|
||||
extract_base_field_name,
|
||||
get_dynamic_field_description,
|
||||
is_dynamic_field,
|
||||
is_tool_pin,
|
||||
merge_execution_input,
|
||||
parse_execution_output,
|
||||
sanitize_pin_name,
|
||||
)
|
||||
|
||||
|
||||
class TestDynamicFieldMergingTypeValidation:
|
||||
"""
|
||||
Tests for Failure Mode #8: No Type Validation in Dynamic Field Merging
|
||||
|
||||
When merging dynamic fields, there's no validation that intermediate
|
||||
structures have the correct type, leading to potential type coercion errors.
|
||||
"""
|
||||
|
||||
def test_merge_dict_field_creates_dict(self):
|
||||
"""Test that dictionary fields create dict structure."""
|
||||
data = {
|
||||
"values_#_name": "Alice",
|
||||
"values_#_age": 30,
|
||||
}
|
||||
|
||||
result = merge_execution_input(data)
|
||||
|
||||
assert "values" in result
|
||||
assert isinstance(result["values"], dict)
|
||||
assert result["values"]["name"] == "Alice"
|
||||
assert result["values"]["age"] == 30
|
||||
|
||||
def test_merge_list_field_creates_list(self):
|
||||
"""Test that list fields create list structure."""
|
||||
data = {
|
||||
"items_$_0": "first",
|
||||
"items_$_1": "second",
|
||||
"items_$_2": "third",
|
||||
}
|
||||
|
||||
result = merge_execution_input(data)
|
||||
|
||||
assert "items" in result
|
||||
assert isinstance(result["items"], list)
|
||||
assert result["items"] == ["first", "second", "third"]
|
||||
|
||||
def test_merge_with_existing_primitive_type_conflict(self):
|
||||
"""
|
||||
Test behavior when merging into existing primitive value.
|
||||
|
||||
BUG: If the base field already exists as a primitive,
|
||||
merging a dynamic field may fail or corrupt data.
|
||||
"""
|
||||
# Pre-existing primitive value
|
||||
data = {
|
||||
"value": "I am a string", # Primitive
|
||||
"value_#_key": "dict value", # Dynamic dict field
|
||||
}
|
||||
|
||||
# This may raise an error or produce unexpected results
|
||||
# depending on merge order and implementation
|
||||
try:
|
||||
result = merge_execution_input(data)
|
||||
# If it succeeds, check what happened
|
||||
# The primitive may have been overwritten
|
||||
if isinstance(result.get("value"), dict):
|
||||
# Primitive was converted to dict - data loss!
|
||||
assert "key" in result["value"]
|
||||
else:
|
||||
# Or the dynamic field was ignored
|
||||
pass
|
||||
except (TypeError, AttributeError):
|
||||
# Expected error when trying to merge into primitive
|
||||
pass
|
||||
|
||||
def test_merge_list_with_gaps(self):
|
||||
"""Test merging list fields with non-contiguous indices."""
|
||||
data = {
|
||||
"items_$_0": "zero",
|
||||
"items_$_2": "two", # Gap at index 1
|
||||
"items_$_5": "five", # Larger gap
|
||||
}
|
||||
|
||||
result = merge_execution_input(data)
|
||||
|
||||
assert "items" in result
|
||||
# Check how gaps are handled
|
||||
items = result["items"]
|
||||
assert items[0] == "zero"
|
||||
# Index 1 may be None or missing
|
||||
assert items[2] == "two"
|
||||
assert items[5] == "five"
|
||||
|
||||
def test_merge_nested_dynamic_fields(self):
|
||||
"""Test merging deeply nested dynamic fields."""
|
||||
data = {
|
||||
"data_#_users_$_0": "user1",
|
||||
"data_#_users_$_1": "user2",
|
||||
"data_#_config_#_enabled": True,
|
||||
}
|
||||
|
||||
result = merge_execution_input(data)
|
||||
|
||||
# Complex nested structures should be created
|
||||
assert "data" in result
|
||||
|
||||
def test_merge_object_field(self):
|
||||
"""Test merging object attribute fields."""
|
||||
data = {
|
||||
"user_@_name": "Alice",
|
||||
"user_@_email": "alice@example.com",
|
||||
}
|
||||
|
||||
result = merge_execution_input(data)
|
||||
|
||||
assert "user" in result
|
||||
# Object fields create dict-like structure
|
||||
assert result["user"]["name"] == "Alice"
|
||||
assert result["user"]["email"] == "alice@example.com"
|
||||
|
||||
def test_merge_mixed_field_types(self):
|
||||
"""Test merging mixed regular and dynamic fields."""
|
||||
data = {
|
||||
"regular": "value",
|
||||
"dict_field_#_key": "dict_value",
|
||||
"list_field_$_0": "list_item",
|
||||
}
|
||||
|
||||
result = merge_execution_input(data)
|
||||
|
||||
assert result["regular"] == "value"
|
||||
assert result["dict_field"]["key"] == "dict_value"
|
||||
assert result["list_field"][0] == "list_item"
|
||||
|
||||
|
||||
class TestDynamicFieldPathValidation:
|
||||
"""
|
||||
Tests for Failure Mode #17: No Validation of Dynamic Field Paths
|
||||
|
||||
When traversing dynamic field paths, intermediate None values
|
||||
can cause TypeErrors instead of graceful failures.
|
||||
"""
|
||||
|
||||
def test_parse_output_with_none_intermediate(self):
|
||||
"""
|
||||
Test parse_execution_output with None intermediate value.
|
||||
|
||||
If data contains {"items": None} and we try to access items[0],
|
||||
it should return None gracefully, not raise TypeError.
|
||||
"""
|
||||
# Output with nested path
|
||||
output_item = ("data_$_0", "value")
|
||||
|
||||
# When the base is None, should return None
|
||||
# This tests the path traversal logic
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="data",
|
||||
sink_node_id=None,
|
||||
sink_pin_name=None,
|
||||
)
|
||||
|
||||
# Should handle gracefully (return the value or None)
|
||||
# Not raise TypeError
|
||||
|
||||
def test_extract_base_field_name_with_multiple_delimiters(self):
|
||||
"""Test extracting base name with multiple delimiters."""
|
||||
# Multiple dict delimiters
|
||||
assert extract_base_field_name("a_#_b_#_c") == "a"
|
||||
|
||||
# Multiple list delimiters
|
||||
assert extract_base_field_name("a_$_0_$_1") == "a"
|
||||
|
||||
# Mixed delimiters
|
||||
assert extract_base_field_name("a_#_b_$_0") == "a"
|
||||
|
||||
def test_is_dynamic_field_edge_cases(self):
|
||||
"""Test is_dynamic_field with edge cases."""
|
||||
# Standard dynamic fields
|
||||
assert is_dynamic_field("values_#_key") is True
|
||||
assert is_dynamic_field("items_$_0") is True
|
||||
assert is_dynamic_field("obj_@_attr") is True
|
||||
|
||||
# Regular fields
|
||||
assert is_dynamic_field("regular") is False
|
||||
assert is_dynamic_field("with_underscore") is False
|
||||
|
||||
# Edge cases
|
||||
assert is_dynamic_field("") is False
|
||||
assert is_dynamic_field("_#_") is True # Just delimiter
|
||||
assert is_dynamic_field("a_#_") is True # Trailing delimiter
|
||||
|
||||
def test_sanitize_pin_name_with_tool_pins(self):
|
||||
"""Test sanitize_pin_name with various tool pin formats."""
|
||||
# Tool pins should return "tools"
|
||||
assert sanitize_pin_name("tools") == "tools"
|
||||
assert sanitize_pin_name("tools_^_node_~_field") == "tools"
|
||||
|
||||
# Dynamic fields should return base name
|
||||
assert sanitize_pin_name("values_#_key") == "values"
|
||||
assert sanitize_pin_name("items_$_0") == "items"
|
||||
|
||||
# Regular fields unchanged
|
||||
assert sanitize_pin_name("regular") == "regular"
|
||||
|
||||
|
||||
class TestDynamicFieldDescriptions:
|
||||
"""Tests for dynamic field description generation."""
|
||||
|
||||
def test_dict_field_description(self):
|
||||
"""Test description for dictionary fields."""
|
||||
desc = get_dynamic_field_description("values_#_user_name")
|
||||
|
||||
assert "Dictionary field" in desc
|
||||
assert "values['user_name']" in desc
|
||||
|
||||
def test_list_field_description(self):
|
||||
"""Test description for list fields."""
|
||||
desc = get_dynamic_field_description("items_$_0")
|
||||
|
||||
assert "List item 0" in desc
|
||||
assert "items[0]" in desc
|
||||
|
||||
def test_object_field_description(self):
|
||||
"""Test description for object fields."""
|
||||
desc = get_dynamic_field_description("user_@_email")
|
||||
|
||||
assert "Object attribute" in desc
|
||||
assert "user.email" in desc
|
||||
|
||||
def test_regular_field_description(self):
|
||||
"""Test description for regular (non-dynamic) fields."""
|
||||
desc = get_dynamic_field_description("regular_field")
|
||||
|
||||
assert desc == "Value for regular_field"
|
||||
|
||||
def test_description_with_numeric_key(self):
|
||||
"""Test description with numeric dictionary key."""
|
||||
desc = get_dynamic_field_description("values_#_123")
|
||||
|
||||
assert "Dictionary field" in desc
|
||||
assert "values['123']" in desc
|
||||
|
||||
|
||||
class TestParseExecutionOutputToolRouting:
|
||||
"""Tests for tool pin routing in parse_execution_output."""
|
||||
|
||||
def test_tool_pin_routing_exact_match(self):
|
||||
"""Test tool pin routing with exact match."""
|
||||
output_item = ("tools_^_node-123_~_field_name", "value")
|
||||
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id="node-123",
|
||||
sink_pin_name="field_name",
|
||||
)
|
||||
|
||||
assert result == "value"
|
||||
|
||||
def test_tool_pin_routing_node_mismatch(self):
|
||||
"""Test tool pin routing with node ID mismatch."""
|
||||
output_item = ("tools_^_node-123_~_field_name", "value")
|
||||
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id="different-node",
|
||||
sink_pin_name="field_name",
|
||||
)
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_tool_pin_routing_field_mismatch(self):
|
||||
"""Test tool pin routing with field name mismatch."""
|
||||
output_item = ("tools_^_node-123_~_field_name", "value")
|
||||
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id="node-123",
|
||||
sink_pin_name="different_field",
|
||||
)
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_tool_pin_missing_required_params(self):
|
||||
"""Test that tool pins require node_id and pin_name."""
|
||||
output_item = ("tools_^_node-123_~_field", "value")
|
||||
|
||||
with pytest.raises(ValueError, match="must be provided"):
|
||||
parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id=None,
|
||||
sink_pin_name="field",
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="must be provided"):
|
||||
parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id="node-123",
|
||||
sink_pin_name=None,
|
||||
)
|
||||
|
||||
|
||||
class TestParseExecutionOutputDynamicFields:
|
||||
"""Tests for dynamic field routing in parse_execution_output."""
|
||||
|
||||
def test_dict_field_extraction(self):
|
||||
"""Test extraction of dictionary field value."""
|
||||
# The output_item is (field_name, data_structure)
|
||||
data = {"key1": "value1", "key2": "value2"}
|
||||
output_item = ("values", data)
|
||||
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="values_#_key1",
|
||||
sink_node_id=None,
|
||||
sink_pin_name=None,
|
||||
)
|
||||
|
||||
assert result == "value1"
|
||||
|
||||
def test_list_field_extraction(self):
|
||||
"""Test extraction of list item value."""
|
||||
data = ["zero", "one", "two"]
|
||||
output_item = ("items", data)
|
||||
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="items_$_1",
|
||||
sink_node_id=None,
|
||||
sink_pin_name=None,
|
||||
)
|
||||
|
||||
assert result == "one"
|
||||
|
||||
def test_nested_field_extraction(self):
|
||||
"""Test extraction of nested field value."""
|
||||
data = {
|
||||
"users": [
|
||||
{"name": "Alice", "email": "alice@example.com"},
|
||||
{"name": "Bob", "email": "bob@example.com"},
|
||||
]
|
||||
}
|
||||
output_item = ("data", data)
|
||||
|
||||
# Access nested path
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="data_#_users",
|
||||
sink_node_id=None,
|
||||
sink_pin_name=None,
|
||||
)
|
||||
|
||||
assert result == data["users"]
|
||||
|
||||
def test_missing_key_returns_none(self):
|
||||
"""Test that missing keys return None."""
|
||||
data = {"existing": "value"}
|
||||
output_item = ("values", data)
|
||||
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="values_#_nonexistent",
|
||||
sink_node_id=None,
|
||||
sink_pin_name=None,
|
||||
)
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_index_out_of_bounds_returns_none(self):
|
||||
"""Test that out-of-bounds indices return None."""
|
||||
data = ["zero", "one"]
|
||||
output_item = ("items", data)
|
||||
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="items_$_99",
|
||||
sink_node_id=None,
|
||||
sink_pin_name=None,
|
||||
)
|
||||
|
||||
assert result is None
|
||||
|
||||
|
||||
class TestIsToolPin:
|
||||
"""Tests for is_tool_pin function."""
|
||||
|
||||
def test_tools_prefix(self):
|
||||
"""Test that 'tools_^_' prefix is recognized."""
|
||||
assert is_tool_pin("tools_^_node_~_field") is True
|
||||
assert is_tool_pin("tools_^_anything") is True
|
||||
|
||||
def test_tools_exact(self):
|
||||
"""Test that exact 'tools' is recognized."""
|
||||
assert is_tool_pin("tools") is True
|
||||
|
||||
def test_non_tool_pins(self):
|
||||
"""Test that non-tool pins are not recognized."""
|
||||
assert is_tool_pin("input") is False
|
||||
assert is_tool_pin("output") is False
|
||||
assert is_tool_pin("toolsomething") is False
|
||||
assert is_tool_pin("my_tools") is False
|
||||
assert is_tool_pin("") is False
|
||||
|
||||
|
||||
class TestMergeExecutionInputEdgeCases:
|
||||
"""Edge case tests for merge_execution_input."""
|
||||
|
||||
def test_empty_input(self):
|
||||
"""Test merging empty input."""
|
||||
result = merge_execution_input({})
|
||||
assert result == {}
|
||||
|
||||
def test_only_regular_fields(self):
|
||||
"""Test merging only regular fields (no dynamic)."""
|
||||
data = {"a": 1, "b": 2, "c": 3}
|
||||
result = merge_execution_input(data)
|
||||
assert result == data
|
||||
|
||||
def test_overwrite_behavior(self):
|
||||
"""Test behavior when same key is set multiple times."""
|
||||
# This shouldn't happen in practice, but test the behavior
|
||||
data = {
|
||||
"values_#_key": "first",
|
||||
}
|
||||
result = merge_execution_input(data)
|
||||
assert result["values"]["key"] == "first"
|
||||
|
||||
def test_numeric_string_keys(self):
|
||||
"""Test handling of numeric string keys in dict fields."""
|
||||
data = {
|
||||
"values_#_123": "numeric_key",
|
||||
"values_#_456": "another_numeric",
|
||||
}
|
||||
result = merge_execution_input(data)
|
||||
|
||||
assert result["values"]["123"] == "numeric_key"
|
||||
assert result["values"]["456"] == "another_numeric"
|
||||
|
||||
def test_special_characters_in_keys(self):
|
||||
"""Test handling of special characters in keys."""
|
||||
data = {
|
||||
"values_#_key-with-dashes": "value1",
|
||||
"values_#_key.with.dots": "value2",
|
||||
}
|
||||
result = merge_execution_input(data)
|
||||
|
||||
assert result["values"]["key-with-dashes"] == "value1"
|
||||
assert result["values"]["key.with.dots"] == "value2"
|
||||
|
||||
def test_deeply_nested_list(self):
|
||||
"""Test deeply nested list indices."""
|
||||
data = {
|
||||
"matrix_$_0_$_0": "0,0",
|
||||
"matrix_$_0_$_1": "0,1",
|
||||
"matrix_$_1_$_0": "1,0",
|
||||
"matrix_$_1_$_1": "1,1",
|
||||
}
|
||||
|
||||
# Note: Current implementation may not support this depth
|
||||
# Test documents expected behavior
|
||||
try:
|
||||
result = merge_execution_input(data)
|
||||
# If supported, verify structure
|
||||
except (KeyError, TypeError, IndexError):
|
||||
# Deep nesting may not be supported
|
||||
pass
|
||||
|
||||
def test_none_values(self):
|
||||
"""Test handling of None values in input."""
|
||||
data = {
|
||||
"regular": None,
|
||||
"dict_#_key": None,
|
||||
"list_$_0": None,
|
||||
}
|
||||
|
||||
result = merge_execution_input(data)
|
||||
|
||||
assert result["regular"] is None
|
||||
assert result["dict"]["key"] is None
|
||||
assert result["list"][0] is None
|
||||
|
||||
def test_complex_values(self):
|
||||
"""Test handling of complex values (dicts, lists)."""
|
||||
data = {
|
||||
"values_#_nested_dict": {"inner": "value"},
|
||||
"values_#_nested_list": [1, 2, 3],
|
||||
}
|
||||
|
||||
result = merge_execution_input(data)
|
||||
|
||||
assert result["values"]["nested_dict"] == {"inner": "value"}
|
||||
assert result["values"]["nested_list"] == [1, 2, 3]
|
||||
@@ -0,0 +1,463 @@
|
||||
"""
|
||||
Tests for dynamic field routing with sanitized names.
|
||||
|
||||
This test file specifically tests the parse_execution_output function
|
||||
which is responsible for routing tool outputs to the correct nodes.
|
||||
The critical bug this addresses is the mismatch between:
|
||||
- emit keys using sanitized names (e.g., "max_keyword_difficulty")
|
||||
- sink_pin_name using original names (e.g., "Max Keyword Difficulty")
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from backend.data.dynamic_fields import (
|
||||
DICT_SPLIT,
|
||||
LIST_SPLIT,
|
||||
OBJC_SPLIT,
|
||||
extract_base_field_name,
|
||||
get_dynamic_field_description,
|
||||
is_dynamic_field,
|
||||
is_tool_pin,
|
||||
merge_execution_input,
|
||||
parse_execution_output,
|
||||
sanitize_pin_name,
|
||||
)
|
||||
|
||||
|
||||
def cleanup(s: str) -> str:
|
||||
"""
|
||||
Simulate SmartDecisionMakerBlock.cleanup() for testing.
|
||||
Clean up names for use as tool function names.
|
||||
"""
|
||||
return re.sub(r"[^a-zA-Z0-9_-]", "_", s).lower()
|
||||
|
||||
|
||||
class TestParseExecutionOutputToolRouting:
|
||||
"""Tests for tool pin routing in parse_execution_output."""
|
||||
|
||||
def test_exact_match_routes_correctly(self):
|
||||
"""When emit key field exactly matches sink_pin_name, routing works."""
|
||||
output_item = ("tools_^_node-123_~_query", "test value")
|
||||
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id="node-123",
|
||||
sink_pin_name="query",
|
||||
)
|
||||
assert result == "test value"
|
||||
|
||||
def test_sanitized_emit_vs_original_sink_fails(self):
|
||||
"""
|
||||
CRITICAL BUG TEST: When emit key uses sanitized name but sink uses original,
|
||||
routing fails.
|
||||
"""
|
||||
# Backend emits with sanitized name
|
||||
sanitized_field = cleanup("Max Keyword Difficulty")
|
||||
output_item = (f"tools_^_node-123_~_{sanitized_field}", 50)
|
||||
|
||||
# Frontend link has original name
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id="node-123",
|
||||
sink_pin_name="Max Keyword Difficulty", # Original name
|
||||
)
|
||||
|
||||
# BUG: This returns None because sanitized != original
|
||||
# Once fixed, change this to: assert result == 50
|
||||
assert result is None, "Expected None due to sanitization mismatch bug"
|
||||
|
||||
def test_node_id_mismatch_returns_none(self):
|
||||
"""When node IDs don't match, routing should return None."""
|
||||
output_item = ("tools_^_node-123_~_query", "test value")
|
||||
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id="different-node", # Different node
|
||||
sink_pin_name="query",
|
||||
)
|
||||
assert result is None
|
||||
|
||||
def test_both_node_and_pin_must_match(self):
|
||||
"""Both node_id and pin_name must match for routing to succeed."""
|
||||
output_item = ("tools_^_node-123_~_query", "test value")
|
||||
|
||||
# Wrong node, right pin
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id="wrong-node",
|
||||
sink_pin_name="query",
|
||||
)
|
||||
assert result is None
|
||||
|
||||
# Right node, wrong pin
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id="node-123",
|
||||
sink_pin_name="wrong_pin",
|
||||
)
|
||||
assert result is None
|
||||
|
||||
# Right node, right pin
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id="node-123",
|
||||
sink_pin_name="query",
|
||||
)
|
||||
assert result == "test value"
|
||||
|
||||
|
||||
class TestToolPinRoutingWithSpecialCharacters:
|
||||
"""Tests for tool pin routing with various special characters in names."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"original_name,sanitized_name",
|
||||
[
|
||||
("Max Keyword Difficulty", "max_keyword_difficulty"),
|
||||
("Search Volume (Monthly)", "search_volume__monthly_"),
|
||||
("CPC ($)", "cpc____"),
|
||||
("User's Input", "user_s_input"),
|
||||
("Query #1", "query__1"),
|
||||
("API.Response", "api_response"),
|
||||
("Field@Name", "field_name"),
|
||||
("Test\tTab", "test_tab"),
|
||||
("Test\nNewline", "test_newline"),
|
||||
],
|
||||
)
|
||||
def test_routing_mismatch_with_special_chars(self, original_name, sanitized_name):
|
||||
"""
|
||||
Test that various special characters cause routing mismatches.
|
||||
|
||||
This test documents the current buggy behavior where sanitized emit keys
|
||||
don't match original sink_pin_names.
|
||||
"""
|
||||
# Verify sanitization
|
||||
assert cleanup(original_name) == sanitized_name
|
||||
|
||||
# Backend emits with sanitized name
|
||||
output_item = (f"tools_^_node-123_~_{sanitized_name}", "value")
|
||||
|
||||
# Frontend link has original name
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id="node-123",
|
||||
sink_pin_name=original_name,
|
||||
)
|
||||
|
||||
# BUG: Returns None due to mismatch
|
||||
assert result is None, f"Routing should fail for '{original_name}' vs '{sanitized_name}'"
|
||||
|
||||
|
||||
class TestToolPinMissingParameters:
|
||||
"""Tests for missing required parameters in parse_execution_output."""
|
||||
|
||||
def test_missing_sink_node_id_raises_error(self):
|
||||
"""Missing sink_node_id should raise ValueError for tool pins."""
|
||||
output_item = ("tools_^_node-123_~_query", "test value")
|
||||
|
||||
with pytest.raises(ValueError, match="sink_node_id and sink_pin_name must be provided"):
|
||||
parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id=None,
|
||||
sink_pin_name="query",
|
||||
)
|
||||
|
||||
def test_missing_sink_pin_name_raises_error(self):
|
||||
"""Missing sink_pin_name should raise ValueError for tool pins."""
|
||||
output_item = ("tools_^_node-123_~_query", "test value")
|
||||
|
||||
with pytest.raises(ValueError, match="sink_node_id and sink_pin_name must be provided"):
|
||||
parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id="node-123",
|
||||
sink_pin_name=None,
|
||||
)
|
||||
|
||||
|
||||
class TestIsToolPin:
|
||||
"""Tests for is_tool_pin function."""
|
||||
|
||||
def test_tools_prefix_is_tool_pin(self):
|
||||
"""Names starting with 'tools_^_' are tool pins."""
|
||||
assert is_tool_pin("tools_^_node_~_field") is True
|
||||
assert is_tool_pin("tools_^_anything") is True
|
||||
|
||||
def test_tools_exact_is_tool_pin(self):
|
||||
"""Exact 'tools' is a tool pin."""
|
||||
assert is_tool_pin("tools") is True
|
||||
|
||||
def test_non_tool_pins(self):
|
||||
"""Non-tool pin names should return False."""
|
||||
assert is_tool_pin("input") is False
|
||||
assert is_tool_pin("output") is False
|
||||
assert is_tool_pin("my_tools") is False
|
||||
assert is_tool_pin("toolsomething") is False
|
||||
|
||||
|
||||
class TestSanitizePinName:
|
||||
"""Tests for sanitize_pin_name function."""
|
||||
|
||||
def test_extracts_base_from_dynamic_field(self):
|
||||
"""Should extract base field name from dynamic fields."""
|
||||
assert sanitize_pin_name("values_#_key") == "values"
|
||||
assert sanitize_pin_name("items_$_0") == "items"
|
||||
assert sanitize_pin_name("obj_@_attr") == "obj"
|
||||
|
||||
def test_returns_tools_for_tool_pins(self):
|
||||
"""Tool pins should be sanitized to 'tools'."""
|
||||
assert sanitize_pin_name("tools_^_node_~_field") == "tools"
|
||||
assert sanitize_pin_name("tools") == "tools"
|
||||
|
||||
def test_regular_field_unchanged(self):
|
||||
"""Regular field names should be unchanged."""
|
||||
assert sanitize_pin_name("query") == "query"
|
||||
assert sanitize_pin_name("max_difficulty") == "max_difficulty"
|
||||
|
||||
|
||||
class TestDynamicFieldDescriptions:
|
||||
"""Tests for dynamic field description generation."""
|
||||
|
||||
def test_dict_field_description_with_spaces_in_key(self):
|
||||
"""Dictionary field keys with spaces should generate correct descriptions."""
|
||||
# After cleanup, "User Name" becomes "user_name" in the field name
|
||||
# But the original key might have had spaces
|
||||
desc = get_dynamic_field_description("values_#_user_name")
|
||||
assert "Dictionary field" in desc
|
||||
assert "values['user_name']" in desc
|
||||
|
||||
def test_list_field_description(self):
|
||||
"""List field descriptions should include index."""
|
||||
desc = get_dynamic_field_description("items_$_0")
|
||||
assert "List item 0" in desc
|
||||
assert "items[0]" in desc
|
||||
|
||||
def test_object_field_description(self):
|
||||
"""Object field descriptions should include attribute."""
|
||||
desc = get_dynamic_field_description("user_@_email")
|
||||
assert "Object attribute" in desc
|
||||
assert "user.email" in desc
|
||||
|
||||
|
||||
class TestMergeExecutionInput:
|
||||
"""Tests for merge_execution_input function."""
|
||||
|
||||
def test_merges_dict_fields(self):
|
||||
"""Dictionary fields should be merged into nested structure."""
|
||||
data = {
|
||||
"values_#_name": "Alice",
|
||||
"values_#_age": 30,
|
||||
"other_field": "unchanged",
|
||||
}
|
||||
|
||||
result = merge_execution_input(data)
|
||||
|
||||
assert "values" in result
|
||||
assert result["values"]["name"] == "Alice"
|
||||
assert result["values"]["age"] == 30
|
||||
assert result["other_field"] == "unchanged"
|
||||
|
||||
def test_merges_list_fields(self):
|
||||
"""List fields should be merged into arrays."""
|
||||
data = {
|
||||
"items_$_0": "first",
|
||||
"items_$_1": "second",
|
||||
"items_$_2": "third",
|
||||
}
|
||||
|
||||
result = merge_execution_input(data)
|
||||
|
||||
assert "items" in result
|
||||
assert result["items"] == ["first", "second", "third"]
|
||||
|
||||
def test_merges_mixed_fields(self):
|
||||
"""Mixed regular and dynamic fields should all be preserved."""
|
||||
data = {
|
||||
"regular": "value",
|
||||
"dict_#_key": "dict_value",
|
||||
"list_$_0": "list_item",
|
||||
}
|
||||
|
||||
result = merge_execution_input(data)
|
||||
|
||||
assert result["regular"] == "value"
|
||||
assert result["dict"]["key"] == "dict_value"
|
||||
assert result["list"] == ["list_item"]
|
||||
|
||||
|
||||
class TestExtractBaseFieldName:
|
||||
"""Tests for extract_base_field_name function."""
|
||||
|
||||
def test_extracts_from_dict_delimiter(self):
|
||||
"""Should extract base name before _#_ delimiter."""
|
||||
assert extract_base_field_name("values_#_name") == "values"
|
||||
assert extract_base_field_name("user_#_email_#_domain") == "user"
|
||||
|
||||
def test_extracts_from_list_delimiter(self):
|
||||
"""Should extract base name before _$_ delimiter."""
|
||||
assert extract_base_field_name("items_$_0") == "items"
|
||||
assert extract_base_field_name("data_$_1_$_nested") == "data"
|
||||
|
||||
def test_extracts_from_object_delimiter(self):
|
||||
"""Should extract base name before _@_ delimiter."""
|
||||
assert extract_base_field_name("obj_@_attr") == "obj"
|
||||
|
||||
def test_no_delimiter_returns_original(self):
|
||||
"""Names without delimiters should be returned unchanged."""
|
||||
assert extract_base_field_name("regular_field") == "regular_field"
|
||||
assert extract_base_field_name("query") == "query"
|
||||
|
||||
|
||||
class TestIsDynamicField:
|
||||
"""Tests for is_dynamic_field function."""
|
||||
|
||||
def test_dict_delimiter_is_dynamic(self):
|
||||
"""Fields with _#_ are dynamic."""
|
||||
assert is_dynamic_field("values_#_key") is True
|
||||
|
||||
def test_list_delimiter_is_dynamic(self):
|
||||
"""Fields with _$_ are dynamic."""
|
||||
assert is_dynamic_field("items_$_0") is True
|
||||
|
||||
def test_object_delimiter_is_dynamic(self):
|
||||
"""Fields with _@_ are dynamic."""
|
||||
assert is_dynamic_field("obj_@_attr") is True
|
||||
|
||||
def test_regular_fields_not_dynamic(self):
|
||||
"""Regular field names without delimiters are not dynamic."""
|
||||
assert is_dynamic_field("regular_field") is False
|
||||
assert is_dynamic_field("query") is False
|
||||
assert is_dynamic_field("Max Keyword Difficulty") is False
|
||||
|
||||
|
||||
class TestRoutingEndToEnd:
|
||||
"""End-to-end tests for the full routing flow."""
|
||||
|
||||
def test_successful_routing_without_spaces(self):
|
||||
"""Full routing flow works when no spaces in names."""
|
||||
field_name = "query"
|
||||
node_id = "test-node-123"
|
||||
|
||||
# Emit key (as created by SmartDecisionMaker)
|
||||
emit_key = f"tools_^_{node_id}_~_{cleanup(field_name)}"
|
||||
output_item = (emit_key, "search term")
|
||||
|
||||
# Route (as called by executor)
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id=node_id,
|
||||
sink_pin_name=field_name,
|
||||
)
|
||||
|
||||
assert result == "search term"
|
||||
|
||||
def test_failed_routing_with_spaces(self):
|
||||
"""
|
||||
Full routing flow FAILS when names have spaces.
|
||||
|
||||
This test documents the exact bug scenario:
|
||||
1. Frontend creates link with sink_name="Max Keyword Difficulty"
|
||||
2. SmartDecisionMaker emits with sanitized name in key
|
||||
3. Executor calls parse_execution_output with original sink_pin_name
|
||||
4. Routing fails because names don't match
|
||||
"""
|
||||
original_field_name = "Max Keyword Difficulty"
|
||||
sanitized_field_name = cleanup(original_field_name)
|
||||
node_id = "test-node-123"
|
||||
|
||||
# Step 1 & 2: SmartDecisionMaker emits with sanitized name
|
||||
emit_key = f"tools_^_{node_id}_~_{sanitized_field_name}"
|
||||
output_item = (emit_key, 50)
|
||||
|
||||
# Step 3: Executor routes with original name from link
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id=node_id,
|
||||
sink_pin_name=original_field_name, # Original from link!
|
||||
)
|
||||
|
||||
# Step 4: BUG - Returns None instead of 50
|
||||
assert result is None
|
||||
|
||||
# This is what should happen after fix:
|
||||
# assert result == 50
|
||||
|
||||
def test_multiple_fields_with_spaces(self):
|
||||
"""Test routing multiple fields where some have spaces."""
|
||||
node_id = "test-node"
|
||||
|
||||
fields = {
|
||||
"query": "test", # No spaces - should work
|
||||
"Max Difficulty": 100, # Spaces - will fail
|
||||
"min_volume": 1000, # No spaces - should work
|
||||
}
|
||||
|
||||
results = {}
|
||||
for original_name, value in fields.items():
|
||||
sanitized = cleanup(original_name)
|
||||
emit_key = f"tools_^_{node_id}_~_{sanitized}"
|
||||
output_item = (emit_key, value)
|
||||
|
||||
result = parse_execution_output(
|
||||
output_item,
|
||||
link_output_selector="tools",
|
||||
sink_node_id=node_id,
|
||||
sink_pin_name=original_name,
|
||||
)
|
||||
results[original_name] = result
|
||||
|
||||
# Fields without spaces work
|
||||
assert results["query"] == "test"
|
||||
assert results["min_volume"] == 1000
|
||||
|
||||
# Fields with spaces fail
|
||||
assert results["Max Difficulty"] is None # BUG!
|
||||
|
||||
|
||||
class TestProposedFix:
|
||||
"""
|
||||
Tests for the proposed fix.
|
||||
|
||||
The fix should sanitize sink_pin_name before comparison in parse_execution_output.
|
||||
This class contains tests that will pass once the fix is implemented.
|
||||
"""
|
||||
|
||||
def test_routing_should_sanitize_both_sides(self):
|
||||
"""
|
||||
PROPOSED FIX: parse_execution_output should sanitize sink_pin_name
|
||||
before comparing with the field from emit key.
|
||||
|
||||
Current behavior: Direct string comparison
|
||||
Fixed behavior: Compare cleanup(target_input_pin) == cleanup(sink_pin_name)
|
||||
"""
|
||||
original_field = "Max Keyword Difficulty"
|
||||
sanitized_field = cleanup(original_field)
|
||||
node_id = "node-123"
|
||||
|
||||
emit_key = f"tools_^_{node_id}_~_{sanitized_field}"
|
||||
output_item = (emit_key, 50)
|
||||
|
||||
# Extract the comparison being made
|
||||
selector = emit_key[8:] # Remove "tools_^_"
|
||||
target_node_id, target_input_pin = selector.split("_~_", 1)
|
||||
|
||||
# Current comparison (FAILS):
|
||||
current_comparison = (target_input_pin == original_field)
|
||||
assert current_comparison is False, "Current comparison fails"
|
||||
|
||||
# Proposed fixed comparison (PASSES):
|
||||
# Either sanitize sink_pin_name, or sanitize both
|
||||
fixed_comparison = (target_input_pin == cleanup(original_field))
|
||||
assert fixed_comparison is True, "Fixed comparison should pass"
|
||||
596
autogpt_platform/frontend/src/tests/smart-decision-maker.spec.ts
Normal file
596
autogpt_platform/frontend/src/tests/smart-decision-maker.spec.ts
Normal file
@@ -0,0 +1,596 @@
|
||||
/**
|
||||
* E2E tests for SmartDecisionMaker block functionality.
|
||||
*
|
||||
* These tests verify the critical bug where field names with spaces
|
||||
* (e.g., "Max Keyword Difficulty") cause tool calls to fail due to
|
||||
* inconsistent sanitization between frontend and backend.
|
||||
*
|
||||
* The bug:
|
||||
* - Frontend creates links with original names: tools_^_{node_id}_~_Max Keyword Difficulty
|
||||
* - Backend emits with sanitized names: tools_^_{node_id}_~_max_keyword_difficulty
|
||||
* - Routing fails because names don't match
|
||||
*/
|
||||
|
||||
import test, { expect } from "@playwright/test";
|
||||
import { BuildPage, Block } from "./pages/build.page";
|
||||
import { LoginPage } from "./pages/login.page";
|
||||
import { hasUrl } from "./utils/assertion";
|
||||
import { getTestUser } from "./utils/auth";
|
||||
|
||||
test.describe("SmartDecisionMaker", () => {
|
||||
let buildPage: BuildPage;
|
||||
|
||||
test.beforeEach(async ({ page }) => {
|
||||
test.setTimeout(60000); // Longer timeout for complex tests
|
||||
const loginPage = new LoginPage(page);
|
||||
const testUser = await getTestUser();
|
||||
|
||||
buildPage = new BuildPage(page);
|
||||
|
||||
await page.goto("/login");
|
||||
await loginPage.login(testUser.email, testUser.password);
|
||||
await hasUrl(page, "/marketplace");
|
||||
await buildPage.navbar.clickBuildLink();
|
||||
await hasUrl(page, "/build");
|
||||
await buildPage.closeTutorial();
|
||||
});
|
||||
|
||||
/**
|
||||
* Helper to find SmartDecisionMaker block from API
|
||||
*/
|
||||
async function getSmartDecisionMakerBlock(): Promise<Block | undefined> {
|
||||
const blocks = await buildPage.getBlocksFromAPI();
|
||||
return blocks.find(
|
||||
(b) =>
|
||||
b.name.toLowerCase().includes("smart decision") ||
|
||||
b.name.toLowerCase().includes("ai decision") ||
|
||||
b.id === "3b191d9f-356f-482d-8238-ba04b6d18381"
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to find a block by partial name match
|
||||
*/
|
||||
async function findBlockByName(partialName: string): Promise<Block | undefined> {
|
||||
const blocks = await buildPage.getBlocksFromAPI();
|
||||
return blocks.find((b) =>
|
||||
b.name.toLowerCase().includes(partialName.toLowerCase())
|
||||
);
|
||||
}
|
||||
|
||||
test.describe("Block Addition", () => {
|
||||
test("can add SmartDecisionMaker block to canvas", async () => {
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
const smartBlock = await getSmartDecisionMakerBlock();
|
||||
if (!smartBlock) {
|
||||
test.skip(true, "SmartDecisionMaker block not found in API");
|
||||
return;
|
||||
}
|
||||
|
||||
await buildPage.addBlock(smartBlock);
|
||||
await buildPage.closeBlocksPanel();
|
||||
await buildPage.hasBlock(smartBlock);
|
||||
});
|
||||
|
||||
test("SmartDecisionMaker block has expected input pins", async ({ page }) => {
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
const smartBlock = await getSmartDecisionMakerBlock();
|
||||
if (!smartBlock) {
|
||||
test.skip(true, "SmartDecisionMaker block not found in API");
|
||||
return;
|
||||
}
|
||||
|
||||
await buildPage.addBlock(smartBlock);
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
// Verify expected input handles exist
|
||||
const blockElement = page.locator(`[data-blockid="${smartBlock.id}"]`).first();
|
||||
await expect(blockElement).toBeVisible();
|
||||
|
||||
// Check for common SmartDecisionMaker inputs
|
||||
const promptInput = blockElement.locator('[data-testid="input-handle-prompt"]');
|
||||
const modelInput = blockElement.locator('[data-testid="input-handle-model"]');
|
||||
|
||||
// At least the prompt input should exist
|
||||
await expect(promptInput).toBeAttached();
|
||||
});
|
||||
});
|
||||
|
||||
test.describe("Pin Name Handling", () => {
|
||||
test("block connections preserve original field names in UI", async ({ page }) => {
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
// Add a Store Value block to test connections
|
||||
const storeBlock = await findBlockByName("Store Value");
|
||||
if (!storeBlock) {
|
||||
test.skip(true, "Store Value block not found");
|
||||
return;
|
||||
}
|
||||
|
||||
await buildPage.addBlock({
|
||||
...storeBlock,
|
||||
name: "Store Value 1",
|
||||
});
|
||||
await buildPage.addBlock({
|
||||
...storeBlock,
|
||||
name: "Store Value 2",
|
||||
});
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
// Connect the blocks
|
||||
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||
"1-1-output-source",
|
||||
"1-2-input-target"
|
||||
);
|
||||
|
||||
// Verify connection was made
|
||||
const edge = page.locator(".react-flow__edge");
|
||||
await expect(edge.first()).toBeVisible();
|
||||
});
|
||||
|
||||
test("input handles are accessible for fields with various names", async ({ page }) => {
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
// Find a block that might have inputs with spaces/special chars
|
||||
const blocks = await buildPage.getBlocksFromAPI();
|
||||
|
||||
// Look for blocks in AI category which often have complex field names
|
||||
const aiBlocks = blocks.filter((b) => b.type === "AI" || b.type === "Standard");
|
||||
|
||||
if (aiBlocks.length === 0) {
|
||||
test.skip(true, "No suitable blocks found for testing");
|
||||
return;
|
||||
}
|
||||
|
||||
// Add the first available block
|
||||
const testBlock = aiBlocks[0];
|
||||
await buildPage.addBlock(testBlock);
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
// Verify the block is on canvas
|
||||
await buildPage.hasBlock(testBlock);
|
||||
|
||||
// Get all input handles on the block
|
||||
const blockElement = page.locator(`[data-blockid="${testBlock.id}"]`).first();
|
||||
const inputHandles = blockElement.locator('[data-testid^="input-handle-"]');
|
||||
|
||||
const handleCount = await inputHandles.count();
|
||||
console.log(`Block ${testBlock.name} has ${handleCount} input handles`);
|
||||
|
||||
// Verify handles are accessible
|
||||
if (handleCount > 0) {
|
||||
const firstHandle = inputHandles.first();
|
||||
await expect(firstHandle).toBeAttached();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
test.describe("Block Connections", () => {
|
||||
test("can connect SmartDecisionMaker output to downstream block", async ({ page }) => {
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
const smartBlock = await getSmartDecisionMakerBlock();
|
||||
const storeBlock = await findBlockByName("Store Value");
|
||||
|
||||
if (!smartBlock || !storeBlock) {
|
||||
test.skip(true, "Required blocks not found");
|
||||
return;
|
||||
}
|
||||
|
||||
// Add SmartDecisionMaker
|
||||
await buildPage.addBlock(smartBlock);
|
||||
|
||||
// Add a downstream block
|
||||
await buildPage.addBlock({
|
||||
...storeBlock,
|
||||
name: "Downstream Store",
|
||||
});
|
||||
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
// Wait for blocks to settle
|
||||
await page.waitForTimeout(500);
|
||||
|
||||
// Verify both blocks are present
|
||||
await buildPage.hasBlock(smartBlock);
|
||||
|
||||
// The tools output should be available for connection
|
||||
const smartBlockElement = page.locator(`[data-blockid="${smartBlock.id}"]`).first();
|
||||
const toolsOutput = smartBlockElement.locator('[data-testid="output-handle-tools"]');
|
||||
|
||||
// tools output may or may not exist depending on block configuration
|
||||
const hasToolsOutput = await toolsOutput.count() > 0;
|
||||
console.log(`SmartDecisionMaker has tools output: ${hasToolsOutput}`);
|
||||
});
|
||||
|
||||
test("connection data attributes use correct format", async ({ page }) => {
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
const storeBlock = await findBlockByName("Store Value");
|
||||
if (!storeBlock) {
|
||||
test.skip(true, "Store Value block not found");
|
||||
return;
|
||||
}
|
||||
|
||||
await buildPage.addBlock({
|
||||
...storeBlock,
|
||||
name: "Store 1",
|
||||
});
|
||||
await buildPage.addBlock({
|
||||
...storeBlock,
|
||||
name: "Store 2",
|
||||
});
|
||||
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
// Connect via data IDs
|
||||
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||
"1-1-output-source",
|
||||
"1-2-input-target"
|
||||
);
|
||||
|
||||
// Verify edge was created
|
||||
const edges = page.locator(".react-flow__edge");
|
||||
await expect(edges.first()).toBeVisible();
|
||||
|
||||
// Get edge data attributes
|
||||
const edgeElement = edges.first();
|
||||
const sourceHandle = await edgeElement.getAttribute("data-sourcehandle");
|
||||
const targetHandle = await edgeElement.getAttribute("data-targethandle");
|
||||
|
||||
console.log(`Edge source handle: ${sourceHandle}`);
|
||||
console.log(`Edge target handle: ${targetHandle}`);
|
||||
|
||||
// The handles should be set
|
||||
expect(sourceHandle).toBeTruthy();
|
||||
expect(targetHandle).toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
||||
test.describe("Agent Save and Load", () => {
|
||||
test("can save agent with SmartDecisionMaker block", async ({ page }) => {
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
const smartBlock = await getSmartDecisionMakerBlock();
|
||||
if (!smartBlock) {
|
||||
test.skip(true, "SmartDecisionMaker block not found");
|
||||
return;
|
||||
}
|
||||
|
||||
await buildPage.addBlock(smartBlock);
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
// Save the agent
|
||||
const agentName = `SDM Test ${Date.now()}`;
|
||||
await buildPage.saveAgent(agentName, "Testing SmartDecisionMaker");
|
||||
|
||||
// Verify URL updated with flowID
|
||||
await expect(page).toHaveURL(({ searchParams }) => !!searchParams.get("flowID"));
|
||||
|
||||
// Wait for save to complete
|
||||
await buildPage.waitForSaveButton();
|
||||
});
|
||||
|
||||
test("saved agent preserves block connections", async ({ page }) => {
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
const storeBlock = await findBlockByName("Store Value");
|
||||
if (!storeBlock) {
|
||||
test.skip(true, "Store Value block not found");
|
||||
return;
|
||||
}
|
||||
|
||||
// Add and connect blocks
|
||||
await buildPage.addBlock({
|
||||
...storeBlock,
|
||||
name: "Store 1",
|
||||
});
|
||||
await buildPage.addBlock({
|
||||
...storeBlock,
|
||||
name: "Store 2",
|
||||
});
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||
"1-1-output-source",
|
||||
"1-2-input-target"
|
||||
);
|
||||
|
||||
// Save
|
||||
const agentName = `Connection Test ${Date.now()}`;
|
||||
await buildPage.saveAgent(agentName, "Testing connections");
|
||||
await expect(page).toHaveURL(({ searchParams }) => !!searchParams.get("flowID"));
|
||||
|
||||
// Count edges before reload
|
||||
const edgesBefore = await page.locator(".react-flow__edge").count();
|
||||
|
||||
// Reload the page
|
||||
await page.reload();
|
||||
await buildPage.closeTutorial();
|
||||
|
||||
// Wait for graph to load
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
// Verify edges still exist
|
||||
const edgesAfter = await page.locator(".react-flow__edge").count();
|
||||
expect(edgesAfter).toBe(edgesBefore);
|
||||
});
|
||||
});
|
||||
|
||||
test.describe("Field Name Display", () => {
|
||||
test("block inputs display readable field names", async ({ page }) => {
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
const smartBlock = await getSmartDecisionMakerBlock();
|
||||
if (!smartBlock) {
|
||||
test.skip(true, "SmartDecisionMaker block not found");
|
||||
return;
|
||||
}
|
||||
|
||||
await buildPage.addBlock(smartBlock);
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
const blockElement = page.locator(`[data-blockid="${smartBlock.id}"]`).first();
|
||||
|
||||
// Get all visible input labels
|
||||
const inputLabels = blockElement.locator('[data-id^="input-handle-"]');
|
||||
const count = await inputLabels.count();
|
||||
|
||||
console.log(`Found ${count} input containers`);
|
||||
|
||||
// Log each input's data-id to see field naming
|
||||
for (let i = 0; i < Math.min(count, 5); i++) {
|
||||
const label = inputLabels.nth(i);
|
||||
const dataId = await label.getAttribute("data-id");
|
||||
console.log(`Input ${i}: ${dataId}`);
|
||||
}
|
||||
});
|
||||
|
||||
test("output handles have correct data-testid format", async ({ page }) => {
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
const storeBlock = await findBlockByName("Store Value");
|
||||
if (!storeBlock) {
|
||||
test.skip(true, "Store Value block not found");
|
||||
return;
|
||||
}
|
||||
|
||||
await buildPage.addBlock(storeBlock);
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
const blockElement = page.locator(`[data-blockid="${storeBlock.id}"]`).first();
|
||||
const outputHandles = blockElement.locator('[data-testid^="output-handle-"]');
|
||||
|
||||
const count = await outputHandles.count();
|
||||
console.log(`Found ${count} output handles`);
|
||||
|
||||
for (let i = 0; i < count; i++) {
|
||||
const handle = outputHandles.nth(i);
|
||||
const testId = await handle.getAttribute("data-testid");
|
||||
console.log(`Output handle ${i}: ${testId}`);
|
||||
|
||||
// Verify format: output-handle-{fieldname}
|
||||
expect(testId).toMatch(/^output-handle-/);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
test.describe("Multi-Block Workflows", () => {
|
||||
test("can create workflow with multiple connected blocks", async ({ page }) => {
|
||||
test.setTimeout(90000);
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
const storeBlock = await findBlockByName("Store Value");
|
||||
if (!storeBlock) {
|
||||
test.skip(true, "Store Value block not found");
|
||||
return;
|
||||
}
|
||||
|
||||
// Add three blocks in a chain
|
||||
await buildPage.addBlock({
|
||||
...storeBlock,
|
||||
name: "Block A",
|
||||
});
|
||||
await buildPage.addBlock({
|
||||
...storeBlock,
|
||||
name: "Block B",
|
||||
});
|
||||
await buildPage.addBlock({
|
||||
...storeBlock,
|
||||
name: "Block C",
|
||||
});
|
||||
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
// Connect A -> B
|
||||
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||
"1-1-output-source",
|
||||
"1-2-input-target"
|
||||
);
|
||||
|
||||
// Connect B -> C
|
||||
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||
"1-2-output-source",
|
||||
"1-3-input-target"
|
||||
);
|
||||
|
||||
// Verify we have 2 edges
|
||||
const edges = page.locator(".react-flow__edge");
|
||||
await expect(edges).toHaveCount(2);
|
||||
|
||||
// Save the workflow
|
||||
await buildPage.saveAgent(
|
||||
`Workflow Test ${Date.now()}`,
|
||||
"Multi-block workflow test"
|
||||
);
|
||||
await expect(page).toHaveURL(({ searchParams }) => !!searchParams.get("flowID"));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
test.describe("SmartDecisionMaker Pin Sanitization", () => {
|
||||
let buildPage: BuildPage;
|
||||
|
||||
test.beforeEach(async ({ page }) => {
|
||||
test.setTimeout(60000);
|
||||
const loginPage = new LoginPage(page);
|
||||
const testUser = await getTestUser();
|
||||
|
||||
buildPage = new BuildPage(page);
|
||||
|
||||
await page.goto("/login");
|
||||
await loginPage.login(testUser.email, testUser.password);
|
||||
await hasUrl(page, "/marketplace");
|
||||
await buildPage.navbar.clickBuildLink();
|
||||
await hasUrl(page, "/build");
|
||||
await buildPage.closeTutorial();
|
||||
});
|
||||
|
||||
test("verifies input handle naming convention", async ({ page }) => {
|
||||
/**
|
||||
* This test documents the expected behavior of input handle naming.
|
||||
*
|
||||
* The bug: If frontend uses original names (with spaces) in data attributes
|
||||
* but backend expects sanitized names (lowercase, underscores), routing fails.
|
||||
*/
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
// Get all blocks and find one with inputs
|
||||
const blocks = await buildPage.getBlocksFromAPI();
|
||||
const blockWithInputs = blocks.find((b) => b.type === "Standard");
|
||||
|
||||
if (!blockWithInputs) {
|
||||
test.skip(true, "No suitable block found");
|
||||
return;
|
||||
}
|
||||
|
||||
await buildPage.addBlock(blockWithInputs);
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
const blockElement = page.locator(`[data-blockid="${blockWithInputs.id}"]`).first();
|
||||
const inputHandles = blockElement.locator('[data-testid^="input-handle-"]');
|
||||
|
||||
const count = await inputHandles.count();
|
||||
|
||||
// Document the actual naming convention used
|
||||
const handleNames: string[] = [];
|
||||
for (let i = 0; i < count; i++) {
|
||||
const handle = inputHandles.nth(i);
|
||||
const testId = await handle.getAttribute("data-testid");
|
||||
if (testId) {
|
||||
const fieldName = testId.replace("input-handle-", "");
|
||||
handleNames.push(fieldName);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Block: ${blockWithInputs.name}`);
|
||||
console.log(`Input handle names: ${JSON.stringify(handleNames)}`);
|
||||
|
||||
// Check if names are lowercase (sanitized) or original case
|
||||
for (const name of handleNames) {
|
||||
const isLowercase = name === name.toLowerCase();
|
||||
const hasSpaces = name.includes(" ");
|
||||
const hasSpecialChars = /[^a-zA-Z0-9_-]/.test(name);
|
||||
|
||||
console.log(` ${name}: lowercase=${isLowercase}, spaces=${hasSpaces}, special=${hasSpecialChars}`);
|
||||
|
||||
// Document: Frontend uses lowercase handle names
|
||||
// This should match backend sanitization
|
||||
expect(isLowercase).toBe(true);
|
||||
expect(hasSpaces).toBe(false);
|
||||
}
|
||||
});
|
||||
|
||||
test("verifies output handle naming matches input handle convention", async ({ page }) => {
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
const blocks = await buildPage.getBlocksFromAPI();
|
||||
const blockWithOutputs = blocks.find((b) => b.type === "Standard");
|
||||
|
||||
if (!blockWithOutputs) {
|
||||
test.skip(true, "No suitable block found");
|
||||
return;
|
||||
}
|
||||
|
||||
await buildPage.addBlock(blockWithOutputs);
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
const blockElement = page.locator(`[data-blockid="${blockWithOutputs.id}"]`).first();
|
||||
const outputHandles = blockElement.locator('[data-testid^="output-handle-"]');
|
||||
|
||||
const count = await outputHandles.count();
|
||||
|
||||
for (let i = 0; i < count; i++) {
|
||||
const handle = outputHandles.nth(i);
|
||||
const testId = await handle.getAttribute("data-testid");
|
||||
if (testId) {
|
||||
const fieldName = testId.replace("output-handle-", "");
|
||||
|
||||
// Output handles should also use lowercase sanitized names
|
||||
const isLowercase = fieldName === fieldName.toLowerCase();
|
||||
expect(isLowercase).toBe(true);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test("link creation uses consistent field naming", async ({ page }) => {
|
||||
/**
|
||||
* This test verifies that when creating a connection (link),
|
||||
* both source and target use consistent naming conventions.
|
||||
*/
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
const storeBlock = await buildPage.getFilteredBlocksFromAPI(
|
||||
(b) => b.name.toLowerCase().includes("store value")
|
||||
);
|
||||
|
||||
if (storeBlock.length === 0) {
|
||||
test.skip(true, "Store Value block not found");
|
||||
return;
|
||||
}
|
||||
|
||||
await buildPage.addBlock({
|
||||
...storeBlock[0],
|
||||
name: "Source Block",
|
||||
});
|
||||
await buildPage.addBlock({
|
||||
...storeBlock[0],
|
||||
name: "Target Block",
|
||||
});
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
// Create connection
|
||||
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||
"1-1-output-source",
|
||||
"1-2-input-target"
|
||||
);
|
||||
|
||||
// Get the created edge
|
||||
const edge = page.locator(".react-flow__edge").first();
|
||||
await expect(edge).toBeVisible();
|
||||
|
||||
// Check edge attributes for naming consistency
|
||||
const sourceHandle = await edge.getAttribute("data-sourcehandle");
|
||||
const targetHandle = await edge.getAttribute("data-targethandle");
|
||||
|
||||
console.log(`Source handle: ${sourceHandle}`);
|
||||
console.log(`Target handle: ${targetHandle}`);
|
||||
|
||||
// Both should be non-empty
|
||||
expect(sourceHandle).toBeTruthy();
|
||||
expect(targetHandle).toBeTruthy();
|
||||
|
||||
// Check if handles follow sanitized naming convention
|
||||
if (sourceHandle && targetHandle) {
|
||||
const sourceIsLowercase = sourceHandle === sourceHandle.toLowerCase();
|
||||
const targetIsLowercase = targetHandle === targetHandle.toLowerCase();
|
||||
|
||||
// Document: Edge handles should use sanitized names
|
||||
// This ensures consistency with backend emit keys
|
||||
console.log(`Source handle is lowercase: ${sourceIsLowercase}`);
|
||||
console.log(`Target handle is lowercase: ${targetIsLowercase}`);
|
||||
}
|
||||
});
|
||||
});
|
||||
467
autogpt_platform/frontend/src/tests/tool-connections.spec.ts
Normal file
467
autogpt_platform/frontend/src/tests/tool-connections.spec.ts
Normal file
@@ -0,0 +1,467 @@
|
||||
/**
|
||||
* E2E tests for tool connections and routing in the graph builder.
|
||||
*
|
||||
* These tests focus on the connection behavior between blocks,
|
||||
* particularly around the SmartDecisionMaker tools output routing.
|
||||
*
|
||||
* Key scenarios tested:
|
||||
* 1. Connection data attribute formats
|
||||
* 2. Handle naming conventions
|
||||
* 3. Edge creation with various field name formats
|
||||
* 4. Link persistence after save/reload
|
||||
*/
|
||||
|
||||
import test, { expect } from "@playwright/test";
|
||||
import { BuildPage, Block } from "./pages/build.page";
|
||||
import { LoginPage } from "./pages/login.page";
|
||||
import { hasUrl } from "./utils/assertion";
|
||||
import { getTestUser } from "./utils/auth";
|
||||
|
||||
test.describe("Tool Connections", () => {
|
||||
let buildPage: BuildPage;
|
||||
|
||||
test.beforeEach(async ({ page }) => {
|
||||
test.setTimeout(45000);
|
||||
const loginPage = new LoginPage(page);
|
||||
const testUser = await getTestUser();
|
||||
|
||||
buildPage = new BuildPage(page);
|
||||
|
||||
await page.goto("/login");
|
||||
await loginPage.login(testUser.email, testUser.password);
|
||||
await hasUrl(page, "/marketplace");
|
||||
await buildPage.navbar.clickBuildLink();
|
||||
await hasUrl(page, "/build");
|
||||
await buildPage.closeTutorial();
|
||||
});
|
||||
|
||||
test.describe("Connection Data Attributes", () => {
|
||||
test("edge source and target handles are set correctly", async ({ page }) => {
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
const storeBlock = await buildPage.getFilteredBlocksFromAPI(
|
||||
(b) => b.name.toLowerCase().includes("store value")
|
||||
);
|
||||
|
||||
if (storeBlock.length === 0) {
|
||||
test.skip(true, "Store Value block not found");
|
||||
return;
|
||||
}
|
||||
|
||||
await buildPage.addBlock({ ...storeBlock[0], name: "Source" });
|
||||
await buildPage.addBlock({ ...storeBlock[0], name: "Target" });
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
// Connect blocks
|
||||
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||
"1-1-output-source",
|
||||
"1-2-input-target"
|
||||
);
|
||||
|
||||
// Verify edge exists and has correct attributes
|
||||
const edge = page.locator(".react-flow__edge").first();
|
||||
await expect(edge).toBeVisible();
|
||||
|
||||
// Get all relevant edge attributes
|
||||
const attributes = await edge.evaluate((el) => ({
|
||||
source: el.getAttribute("data-source"),
|
||||
target: el.getAttribute("data-target"),
|
||||
sourceHandle: el.getAttribute("data-sourcehandle"),
|
||||
targetHandle: el.getAttribute("data-targethandle"),
|
||||
id: el.getAttribute("id"),
|
||||
}));
|
||||
|
||||
console.log("Edge attributes:", JSON.stringify(attributes, null, 2));
|
||||
|
||||
// Source and target should be node IDs
|
||||
expect(attributes.source).toBeTruthy();
|
||||
expect(attributes.target).toBeTruthy();
|
||||
|
||||
// Handles should reference field names
|
||||
expect(attributes.sourceHandle).toBeTruthy();
|
||||
expect(attributes.targetHandle).toBeTruthy();
|
||||
});
|
||||
|
||||
test("edge ID follows expected format", async ({ page }) => {
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
const storeBlock = await buildPage.getFilteredBlocksFromAPI(
|
||||
(b) => b.name.toLowerCase().includes("store value")
|
||||
);
|
||||
|
||||
if (storeBlock.length === 0) {
|
||||
test.skip(true, "Store Value block not found");
|
||||
return;
|
||||
}
|
||||
|
||||
await buildPage.addBlock({ ...storeBlock[0], name: "A" });
|
||||
await buildPage.addBlock({ ...storeBlock[0], name: "B" });
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||
"1-1-output-source",
|
||||
"1-2-input-target"
|
||||
);
|
||||
|
||||
const edge = page.locator(".react-flow__edge").first();
|
||||
const edgeId = await edge.getAttribute("id");
|
||||
|
||||
console.log(`Edge ID: ${edgeId}`);
|
||||
|
||||
// Edge ID typically contains source-target info
|
||||
expect(edgeId).toBeTruthy();
|
||||
// Format: reactflow__edge-{source}{sourceHandle}-{target}{targetHandle}
|
||||
expect(edgeId).toContain("reactflow__edge");
|
||||
});
|
||||
});
|
||||
|
||||
test.describe("Handle Naming Consistency", () => {
|
||||
test("all input handles use lowercase naming", async ({ page }) => {
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
// Get multiple blocks to test variety
|
||||
const blocks = await buildPage.getBlocksFromAPI();
|
||||
const testBlocks = blocks.slice(0, 3).filter((b) => b.type !== "Agent");
|
||||
|
||||
if (testBlocks.length === 0) {
|
||||
test.skip(true, "No suitable blocks found");
|
||||
return;
|
||||
}
|
||||
|
||||
for (const block of testBlocks) {
|
||||
await buildPage.addBlock(block);
|
||||
}
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
// Check all input handles across all blocks
|
||||
const allInputHandles = page.locator('[data-testid^="input-handle-"]');
|
||||
const count = await allInputHandles.count();
|
||||
|
||||
let uppercaseFound = false;
|
||||
let spacesFound = false;
|
||||
|
||||
for (let i = 0; i < count; i++) {
|
||||
const handle = allInputHandles.nth(i);
|
||||
const testId = await handle.getAttribute("data-testid");
|
||||
|
||||
if (testId) {
|
||||
const fieldName = testId.replace("input-handle-", "");
|
||||
|
||||
if (fieldName !== fieldName.toLowerCase()) {
|
||||
console.log(`Non-lowercase input handle found: ${fieldName}`);
|
||||
uppercaseFound = true;
|
||||
}
|
||||
|
||||
if (fieldName.includes(" ")) {
|
||||
console.log(`Input handle with spaces found: ${fieldName}`);
|
||||
spacesFound = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Document: Frontend should use lowercase sanitized names
|
||||
// If this fails, there's an inconsistency that could cause routing issues
|
||||
expect(uppercaseFound).toBe(false);
|
||||
expect(spacesFound).toBe(false);
|
||||
});
|
||||
|
||||
test("all output handles use lowercase naming", async ({ page }) => {
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
const blocks = await buildPage.getBlocksFromAPI();
|
||||
const testBlocks = blocks.slice(0, 3).filter((b) => b.type !== "Agent");
|
||||
|
||||
if (testBlocks.length === 0) {
|
||||
test.skip(true, "No suitable blocks found");
|
||||
return;
|
||||
}
|
||||
|
||||
for (const block of testBlocks) {
|
||||
await buildPage.addBlock(block);
|
||||
}
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
const allOutputHandles = page.locator('[data-testid^="output-handle-"]');
|
||||
const count = await allOutputHandles.count();
|
||||
|
||||
let uppercaseFound = false;
|
||||
let spacesFound = false;
|
||||
|
||||
for (let i = 0; i < count; i++) {
|
||||
const handle = allOutputHandles.nth(i);
|
||||
const testId = await handle.getAttribute("data-testid");
|
||||
|
||||
if (testId) {
|
||||
const fieldName = testId.replace("output-handle-", "");
|
||||
|
||||
if (fieldName !== fieldName.toLowerCase()) {
|
||||
uppercaseFound = true;
|
||||
console.log(`Non-lowercase output handle: ${fieldName}`);
|
||||
}
|
||||
|
||||
if (fieldName.includes(" ")) {
|
||||
spacesFound = true;
|
||||
console.log(`Output handle with spaces: ${fieldName}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
expect(uppercaseFound).toBe(false);
|
||||
expect(spacesFound).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
test.describe("Connection Persistence", () => {
|
||||
test("connections survive page reload", async ({ page }) => {
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
const storeBlock = await buildPage.getFilteredBlocksFromAPI(
|
||||
(b) => b.name.toLowerCase().includes("store value")
|
||||
);
|
||||
|
||||
if (storeBlock.length === 0) {
|
||||
test.skip(true, "Store Value block not found");
|
||||
return;
|
||||
}
|
||||
|
||||
await buildPage.addBlock({ ...storeBlock[0], name: "Persist A" });
|
||||
await buildPage.addBlock({ ...storeBlock[0], name: "Persist B" });
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||
"1-1-output-source",
|
||||
"1-2-input-target"
|
||||
);
|
||||
|
||||
// Save the agent
|
||||
await buildPage.saveAgent(
|
||||
`Persist Test ${Date.now()}`,
|
||||
"Testing connection persistence"
|
||||
);
|
||||
await expect(page).toHaveURL(({ searchParams }) => !!searchParams.get("flowID"));
|
||||
await buildPage.waitForSaveButton();
|
||||
|
||||
// Get current URL
|
||||
const url = page.url();
|
||||
|
||||
// Reload
|
||||
await page.reload();
|
||||
await buildPage.closeTutorial();
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
// Verify edge still exists
|
||||
const edge = page.locator(".react-flow__edge").first();
|
||||
await expect(edge).toBeVisible();
|
||||
|
||||
// Verify same URL
|
||||
expect(page.url()).toBe(url);
|
||||
});
|
||||
|
||||
test("connection attributes preserved after save", async ({ page }) => {
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
const storeBlock = await buildPage.getFilteredBlocksFromAPI(
|
||||
(b) => b.name.toLowerCase().includes("store value")
|
||||
);
|
||||
|
||||
if (storeBlock.length === 0) {
|
||||
test.skip(true, "Store Value block not found");
|
||||
return;
|
||||
}
|
||||
|
||||
await buildPage.addBlock({ ...storeBlock[0], name: "Attr A" });
|
||||
await buildPage.addBlock({ ...storeBlock[0], name: "Attr B" });
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||
"1-1-output-source",
|
||||
"1-2-input-target"
|
||||
);
|
||||
|
||||
// Get attributes before save
|
||||
const edgeBefore = page.locator(".react-flow__edge").first();
|
||||
const attrsBefore = await edgeBefore.evaluate((el) => ({
|
||||
sourceHandle: el.getAttribute("data-sourcehandle"),
|
||||
targetHandle: el.getAttribute("data-targethandle"),
|
||||
}));
|
||||
|
||||
// Save
|
||||
await buildPage.saveAgent(`Attr Test ${Date.now()}`, "Testing attributes");
|
||||
await expect(page).toHaveURL(({ searchParams }) => !!searchParams.get("flowID"));
|
||||
await buildPage.waitForSaveButton();
|
||||
|
||||
// Reload
|
||||
await page.reload();
|
||||
await buildPage.closeTutorial();
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
// Get attributes after reload
|
||||
const edgeAfter = page.locator(".react-flow__edge").first();
|
||||
await expect(edgeAfter).toBeVisible();
|
||||
|
||||
const attrsAfter = await edgeAfter.evaluate((el) => ({
|
||||
sourceHandle: el.getAttribute("data-sourcehandle"),
|
||||
targetHandle: el.getAttribute("data-targethandle"),
|
||||
}));
|
||||
|
||||
console.log("Before save:", attrsBefore);
|
||||
console.log("After reload:", attrsAfter);
|
||||
|
||||
// Handle names should be preserved
|
||||
expect(attrsAfter.sourceHandle).toBe(attrsBefore.sourceHandle);
|
||||
expect(attrsAfter.targetHandle).toBe(attrsBefore.targetHandle);
|
||||
});
|
||||
});
|
||||
|
||||
test.describe("Multiple Connections", () => {
|
||||
test("can create multiple connections from single output", async ({ page }) => {
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
const storeBlock = await buildPage.getFilteredBlocksFromAPI(
|
||||
(b) => b.name.toLowerCase().includes("store value")
|
||||
);
|
||||
|
||||
if (storeBlock.length === 0) {
|
||||
test.skip(true, "Store Value block not found");
|
||||
return;
|
||||
}
|
||||
|
||||
// Add one source and two targets
|
||||
await buildPage.addBlock({ ...storeBlock[0], name: "Multi Source" });
|
||||
await buildPage.addBlock({ ...storeBlock[0], name: "Target 1" });
|
||||
await buildPage.addBlock({ ...storeBlock[0], name: "Target 2" });
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
// Connect source to both targets
|
||||
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||
"1-1-output-source",
|
||||
"1-2-input-target"
|
||||
);
|
||||
|
||||
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||
"1-1-output-source",
|
||||
"1-3-input-target"
|
||||
);
|
||||
|
||||
// Should have 2 edges
|
||||
const edges = page.locator(".react-flow__edge");
|
||||
await expect(edges).toHaveCount(2);
|
||||
});
|
||||
|
||||
test("each connection has unique edge ID", async ({ page }) => {
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
const storeBlock = await buildPage.getFilteredBlocksFromAPI(
|
||||
(b) => b.name.toLowerCase().includes("store value")
|
||||
);
|
||||
|
||||
if (storeBlock.length === 0) {
|
||||
test.skip(true, "Store Value block not found");
|
||||
return;
|
||||
}
|
||||
|
||||
await buildPage.addBlock({ ...storeBlock[0], name: "ID Source" });
|
||||
await buildPage.addBlock({ ...storeBlock[0], name: "ID Target 1" });
|
||||
await buildPage.addBlock({ ...storeBlock[0], name: "ID Target 2" });
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||
"1-1-output-source",
|
||||
"1-2-input-target"
|
||||
);
|
||||
await buildPage.connectBlockOutputToBlockInputViaDataId(
|
||||
"1-1-output-source",
|
||||
"1-3-input-target"
|
||||
);
|
||||
|
||||
const edges = page.locator(".react-flow__edge");
|
||||
const edgeIds: string[] = [];
|
||||
|
||||
const count = await edges.count();
|
||||
for (let i = 0; i < count; i++) {
|
||||
const edge = edges.nth(i);
|
||||
const id = await edge.getAttribute("id");
|
||||
if (id) edgeIds.push(id);
|
||||
}
|
||||
|
||||
console.log("Edge IDs:", edgeIds);
|
||||
|
||||
// All IDs should be unique
|
||||
const uniqueIds = new Set(edgeIds);
|
||||
expect(uniqueIds.size).toBe(edgeIds.length);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
test.describe("Tool Output Pin Format", () => {
|
||||
let buildPage: BuildPage;
|
||||
|
||||
test.beforeEach(async ({ page }) => {
|
||||
test.setTimeout(45000);
|
||||
const loginPage = new LoginPage(page);
|
||||
const testUser = await getTestUser();
|
||||
|
||||
buildPage = new BuildPage(page);
|
||||
|
||||
await page.goto("/login");
|
||||
await loginPage.login(testUser.email, testUser.password);
|
||||
await hasUrl(page, "/marketplace");
|
||||
await buildPage.navbar.clickBuildLink();
|
||||
await hasUrl(page, "/build");
|
||||
await buildPage.closeTutorial();
|
||||
});
|
||||
|
||||
test("documents tool output pin naming format", async ({ page }) => {
|
||||
/**
|
||||
* This test documents the expected format for tool output pins
|
||||
* which is critical for routing to work correctly.
|
||||
*
|
||||
* Expected format: tools_^_{sink_node_id}_~_{sanitized_field_name}
|
||||
*
|
||||
* The bug occurs when:
|
||||
* - Frontend creates link with: tools_^_{node}_~_Max Keyword Difficulty
|
||||
* - Backend emits with: tools_^_{node}_~_max_keyword_difficulty
|
||||
*/
|
||||
await buildPage.openBlocksPanel();
|
||||
|
||||
// Look for SmartDecisionMaker or any AI block
|
||||
const blocks = await buildPage.getBlocksFromAPI();
|
||||
const aiBlock = blocks.find(
|
||||
(b) =>
|
||||
b.type === "AI" ||
|
||||
b.name.toLowerCase().includes("smart") ||
|
||||
b.name.toLowerCase().includes("decision")
|
||||
);
|
||||
|
||||
if (!aiBlock) {
|
||||
console.log("No AI block found, documenting expected format:");
|
||||
console.log("Tool pin format: tools_^_{sink_node_id}_~_{sanitized_field_name}");
|
||||
console.log("Example: tools_^_abc-123_~_max_keyword_difficulty");
|
||||
test.skip(true, "No AI block available for testing");
|
||||
return;
|
||||
}
|
||||
|
||||
await buildPage.addBlock(aiBlock);
|
||||
await buildPage.closeBlocksPanel();
|
||||
|
||||
const blockElement = page.locator(`[data-blockid="${aiBlock.id}"]`).first();
|
||||
|
||||
// Get tools output handle if it exists
|
||||
const toolsOutput = blockElement.locator('[data-testid="output-handle-tools"]');
|
||||
const hasToolsOutput = (await toolsOutput.count()) > 0;
|
||||
|
||||
if (hasToolsOutput) {
|
||||
console.log("Tools output pin found");
|
||||
|
||||
// Document the expected behavior
|
||||
// When this pin is connected, the link should use sanitized names
|
||||
} else {
|
||||
console.log("No tools output pin on this block");
|
||||
}
|
||||
|
||||
// Document expected format regardless
|
||||
console.log("\nExpected tool pin format for SmartDecisionMaker:");
|
||||
console.log(" Source: tools_^_{sink_node_id}_~_{sanitized_field_name}");
|
||||
console.log(" Example sink_pin_name: max_keyword_difficulty (NOT 'Max Keyword Difficulty')");
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user