Files
AutoGPT/autogpt_platform/backend/backend/executor/utils_test.py
Zamil Majdy 7b951c977e feat(platform): implement graph-level Safe Mode toggle for HITL blocks (#11455)
## Summary

This PR implements a graph-level Safe Mode toggle system for
Human-in-the-Loop (HITL) blocks. When Safe Mode is ON (default), HITL
blocks require manual review before proceeding. When OFF, they execute
automatically.

## 🔧 Backend Changes

- **Database**: Added `metadata` JSON column to `AgentGraph` table with
migration
- **API**: Updated `execute_graph` endpoint to accept `safe_mode`
parameter
- **Execution**: Enhanced execution context to use graph metadata as
default with API override capability
- **Auto-detection**: Automatically populate `has_human_in_the_loop` for
graphs containing HITL blocks
- **Block Detection**: HITL block ID:
`8b2a7b3c-6e9d-4a5f-8c1b-2e3f4a5b6c7d`

## 🎨 Frontend Changes

- **Component**: New `FloatingSafeModeToggle` with dual variants:
  - **White variant**: For library pages, integrates with action buttons
  - **Black variant**: For builders, floating positioned  
- **Integration**: Added toggles to both new/legacy builders and library
pages
- **API Integration**: Direct graph metadata updates via
`usePutV1UpdateGraphVersion`
- **Query Management**: React Query cache invalidation for consistent UI
updates
- **Conditional Display**: Toggle only appears when graph contains HITL
blocks

## 🛠 Technical Implementation

- **Safe Mode ON** (default): HITL blocks require manual review before
proceeding
- **Safe Mode OFF**: HITL blocks execute automatically without
intervention
- **Priority**: Backend API `safe_mode` parameter takes precedence over
graph metadata
- **Detection**: Auto-populates `has_human_in_the_loop` metadata field
- **Positioning**: Proper z-index and responsive positioning for
floating elements

## 🚧 Known Issues (Work in Progress)

### High Priority
- [ ] **Toggle state persistence**: Always shows "ON" regardless of
actual state - query invalidation issue
- [ ] **LibraryAgent metadata**: Missing metadata field causing
TypeScript errors
- [ ] **Tooltip z-index**: Still covered by some UI elements despite
high z-index

### Medium Priority  
- [ ] **HITL detection**: Logic needs improvement for reliable block
detection
- [ ] **Error handling**: Removing HITL blocks from graph causes save
errors
- [ ] **TypeScript**: Fix type mismatches between GraphModel and
LibraryAgent

### Low Priority
- [ ] **Frontend API**: Add `safe_mode` parameter to execution calls
once OpenAPI is regenerated
- [ ] **Performance**: Consider debouncing rapid toggle clicks

## 🧪 Test Plan

- [ ] Verify toggle appears only when graph has HITL blocks
- [ ] Test toggle persistence across page refreshes  
- [ ] Confirm API calls update graph metadata correctly
- [ ] Validate execution behavior respects safe mode setting
- [ ] Check styling consistency across builder and library contexts

## 🔗 Related

- Addresses requirements for graph-level HITL configuration
- Builds on existing FloatingReviewsPanel infrastructure
- Integrates with existing graph metadata system

🤖 Generated with [Claude Code](https://claude.ai/code)
2025-12-02 09:55:55 +00:00

459 lines
17 KiB
Python

from typing import cast
import pytest
from pytest_mock import MockerFixture
from backend.data.dynamic_fields import merge_execution_input, parse_execution_output
from backend.util.mock import MockObject
def test_parse_execution_output():
# Test case for basic output
output = ("result", "value")
assert parse_execution_output(output, "result") == "value"
# Test case for list output
output = ("result", [10, 20, 30])
assert parse_execution_output(output, "result_$_1") == 20
# Test case for dict output
output = ("result", {"key1": "value1", "key2": "value2"})
assert parse_execution_output(output, "result_#_key1") == "value1"
# Test case for object output
class Sample:
def __init__(self):
self.attr1 = "value1"
self.attr2 = "value2"
output = ("result", Sample())
assert parse_execution_output(output, "result_@_attr1") == "value1"
# Test case for nested list output
output = ("result", [[1, 2], [3, 4]])
assert parse_execution_output(output, "result_$_0_$_1") == 2
assert parse_execution_output(output, "result_$_1_$_0") == 3
# Test case for list containing dict
output = ("result", [{"key1": "value1"}, {"key2": "value2"}])
assert parse_execution_output(output, "result_$_0_#_key1") == "value1"
assert parse_execution_output(output, "result_$_1_#_key2") == "value2"
# Test case for dict containing list
output = ("result", {"key1": [1, 2], "key2": [3, 4]})
assert parse_execution_output(output, "result_#_key1_$_1") == 2
assert parse_execution_output(output, "result_#_key2_$_0") == 3
# Test case for complex nested structure
class NestedSample:
def __init__(self):
self.attr1 = [1, 2]
self.attr2 = {"key": "value"}
output = ("result", [NestedSample(), {"key": [1, 2]}])
assert parse_execution_output(output, "result_$_0_@_attr1_$_1") == 2
assert parse_execution_output(output, "result_$_0_@_attr2_#_key") == "value"
assert parse_execution_output(output, "result_$_1_#_key_$_0") == 1
# Test case for non-existent paths
output = ("result", [1, 2, 3])
assert parse_execution_output(output, "result_$_5") is None
assert parse_execution_output(output, "result_#_key") is None
assert parse_execution_output(output, "result_@_attr") is None
assert parse_execution_output(output, "wrong_name") is None
# Test cases for delimiter processing order
# Test case 1: List -> Dict -> List
output = ("result", [[{"key": [1, 2]}], [3, 4]])
assert parse_execution_output(output, "result_$_0_$_0_#_key_$_1") == 2
# Test case 2: Dict -> List -> Object
class NestedObj:
def __init__(self):
self.value = "nested"
output = ("result", {"key": [NestedObj(), 2]})
assert parse_execution_output(output, "result_#_key_$_0_@_value") == "nested"
# Test case 3: Object -> List -> Dict
class ParentObj:
def __init__(self):
self.items = [{"nested": "value"}]
output = ("result", ParentObj())
assert parse_execution_output(output, "result_@_items_$_0_#_nested") == "value"
# Test case 4: Complex nested structure with all types
class ComplexObj:
def __init__(self):
self.data = [{"items": [{"value": "deep"}]}]
output = ("result", {"key": [ComplexObj()]})
assert (
parse_execution_output(
output, "result_#_key_$_0_@_data_$_0_#_items_$_0_#_value"
)
== "deep"
)
# Test case 5: Invalid paths that should return None
output = ("result", [{"key": [1, 2]}])
assert parse_execution_output(output, "result_$_0_#_wrong_key") is None
assert parse_execution_output(output, "result_$_0_#_key_$_5") is None
assert parse_execution_output(output, "result_$_0_@_attr") is None
# Test case 6: Mixed delimiter types in wrong order
output = ("result", {"key": [1, 2]})
assert (
parse_execution_output(output, "result_#_key_$_1_@_attr") is None
) # Should fail at @_attr
assert (
parse_execution_output(output, "result_@_attr_$_0_#_key") is None
) # Should fail at @_attr
# Test case 7: Tool pin routing with matching node ID and pin name
output = ("tools_^_node123_~_query", "search term")
assert parse_execution_output(output, "tools", "node123", "query") == "search term"
# Test case 8: Tool pin routing with node ID mismatch
output = ("tools_^_node123_~_query", "search term")
assert parse_execution_output(output, "tools", "node456", "query") is None
# Test case 9: Tool pin routing with pin name mismatch
output = ("tools_^_node123_~_query", "search term")
assert parse_execution_output(output, "tools", "node123", "different_pin") is None
# Test case 10: Tool pin routing with complex field names
output = ("tools_^_node789_~_nested_field", {"key": "value"})
result = parse_execution_output(output, "tools", "node789", "nested_field")
assert result == {"key": "value"}
# Test case 11: Tool pin routing missing required parameters should raise error
output = ("tools_^_node123_~_query", "search term")
try:
parse_execution_output(output, "tools", "node123") # Missing sink_pin_name
assert False, "Should have raised ValueError"
except ValueError as e:
assert "must be provided for tool pin routing" in str(e)
# Test case 12: Non-tool pin with similar pattern should use normal logic
output = ("tools_^_node123_~_query", "search term")
assert parse_execution_output(output, "different_name", "node123", "query") is None
def test_merge_execution_input():
# Test case for basic list extraction
data = {
"list_$_0": "a",
"list_$_1": "b",
}
result = merge_execution_input(data)
assert "list" in result
assert result["list"] == ["a", "b"]
# Test case for basic dict extraction
data = {
"dict_#_key1": "value1",
"dict_#_key2": "value2",
}
result = merge_execution_input(data)
assert "dict" in result
assert result["dict"] == {"key1": "value1", "key2": "value2"}
# Test case for object extraction
class Sample:
def __init__(self):
self.attr1 = None
self.attr2 = None
data = {
"object_@_attr1": "value1",
"object_@_attr2": "value2",
}
result = merge_execution_input(data)
assert "object" in result
assert isinstance(result["object"], MockObject)
assert result["object"].attr1 == "value1"
assert result["object"].attr2 == "value2"
# Test case for nested list extraction
data = {
"nested_list_$_0_$_0": "a",
"nested_list_$_0_$_1": "b",
"nested_list_$_1_$_0": "c",
}
result = merge_execution_input(data)
assert "nested_list" in result
assert result["nested_list"] == [["a", "b"], ["c"]]
# Test case for list containing dict
data = {
"list_with_dict_$_0_#_key1": "value1",
"list_with_dict_$_0_#_key2": "value2",
"list_with_dict_$_1_#_key3": "value3",
}
result = merge_execution_input(data)
assert "list_with_dict" in result
assert result["list_with_dict"] == [
{"key1": "value1", "key2": "value2"},
{"key3": "value3"},
]
# Test case for dict containing list
data = {
"dict_with_list_#_key1_$_0": "value1",
"dict_with_list_#_key1_$_1": "value2",
"dict_with_list_#_key2_$_0": "value3",
}
result = merge_execution_input(data)
assert "dict_with_list" in result
assert result["dict_with_list"] == {
"key1": ["value1", "value2"],
"key2": ["value3"],
}
# Test case for complex nested structure
data = {
"complex_$_0_#_key1_$_0": "value1",
"complex_$_0_#_key1_$_1": "value2",
"complex_$_0_#_key2_@_attr1": "value3",
"complex_$_1_#_key3_$_0": "value4",
}
result = merge_execution_input(data)
assert "complex" in result
assert result["complex"][0]["key1"] == ["value1", "value2"]
assert isinstance(result["complex"][0]["key2"], MockObject)
assert result["complex"][0]["key2"].attr1 == "value3"
assert result["complex"][1]["key3"] == ["value4"]
# Test case for invalid list index
data = {"list_$_invalid": "value"}
with pytest.raises(ValueError, match="index must be an integer"):
merge_execution_input(data)
# Test cases for delimiter ordering
# Test case 1: List -> Dict -> List
data = {
"nested_$_0_#_key_$_0": "value1",
"nested_$_0_#_key_$_1": "value2",
}
result = merge_execution_input(data)
assert "nested" in result
assert result["nested"][0]["key"] == ["value1", "value2"]
# Test case 2: Dict -> List -> Object
data = {
"nested_#_key_$_0_@_attr": "value1",
"nested_#_key_$_1_@_attr": "value2",
}
result = merge_execution_input(data)
assert "nested" in result
assert isinstance(result["nested"]["key"][0], MockObject)
assert result["nested"]["key"][0].attr == "value1"
assert result["nested"]["key"][1].attr == "value2"
# Test case 3: Object -> List -> Dict
data = {
"nested_@_items_$_0_#_key": "value1",
"nested_@_items_$_1_#_key": "value2",
}
result = merge_execution_input(data)
assert "nested" in result
nested = result["nested"]
assert isinstance(nested, MockObject)
items = nested.items
assert isinstance(items, list)
assert items[0]["key"] == "value1"
assert items[1]["key"] == "value2"
# Test case 4: Complex nested structure with all types
data = {
"deep_#_key_$_0_@_data_$_0_#_items_$_0_#_value": "deep_value",
"deep_#_key_$_0_@_data_$_1_#_items_$_0_#_value": "another_value",
}
result = merge_execution_input(data)
assert "deep" in result
deep_key = result["deep"]["key"][0]
assert deep_key is not None
data0 = getattr(deep_key, "data", None)
assert isinstance(data0, list)
# Check items0
items0 = None
if len(data0) > 0 and isinstance(data0[0], dict) and "items" in data0[0]:
items0 = data0[0]["items"]
assert isinstance(items0, list)
items0 = cast(list, items0)
assert len(items0) > 0
assert isinstance(items0[0], dict)
assert items0[0]["value"] == "deep_value" # type: ignore
# Check items1
items1 = None
if len(data0) > 1 and isinstance(data0[1], dict) and "items" in data0[1]:
items1 = data0[1]["items"]
assert isinstance(items1, list)
items1 = cast(list, items1)
assert len(items1) > 0
assert isinstance(items1[0], dict)
assert items1[0]["value"] == "another_value" # type: ignore
# Test case 5: Mixed delimiter types in different orders
# the last one should replace the type
data = {
"mixed_$_0_#_key_@_attr": "value1", # List -> Dict -> Object
"mixed_#_key_$_0_@_attr": "value2", # Dict -> List -> Object
"mixed_@_attr_$_0_#_key": "value3", # Object -> List -> Dict
}
result = merge_execution_input(data)
assert "mixed" in result
assert result["mixed"].attr[0]["key"] == "value3"
@pytest.mark.asyncio
async def test_add_graph_execution_is_repeatable(mocker: MockerFixture):
"""
Verify that calling the function with its own output creates the same execution again.
"""
from backend.data.execution import GraphExecutionWithNodes
from backend.data.model import CredentialsMetaInput
from backend.executor.utils import add_graph_execution
from backend.integrations.providers import ProviderName
# Mock data
graph_id = "test-graph-id"
user_id = "test-user-id"
inputs = {"test_input": "test_value"}
preset_id = "test-preset-id"
graph_version = 1
graph_credentials_inputs = {
"cred_key": CredentialsMetaInput(
id="cred-id", provider=ProviderName("test_provider"), type="oauth2"
)
}
nodes_input_masks = {"node1": {"input1": "masked_value"}}
# Mock the graph object returned by validate_and_construct_node_execution_input
mock_graph = mocker.MagicMock()
mock_graph.version = graph_version
# Mock the starting nodes input and compiled nodes input masks
starting_nodes_input = [
("node1", {"input1": "value1"}),
("node2", {"input1": "value2"}),
]
compiled_nodes_input_masks = {"node1": {"input1": "compiled_mask"}}
# Mock the graph execution object
mock_graph_exec = mocker.MagicMock(spec=GraphExecutionWithNodes)
mock_graph_exec.id = "execution-id-123"
mock_graph_exec.node_executions = [] # Add this to avoid AttributeError
mock_graph_exec.to_graph_execution_entry.return_value = mocker.MagicMock()
# Mock the queue and event bus
mock_queue = mocker.AsyncMock()
mock_event_bus = mocker.MagicMock()
mock_event_bus.publish = mocker.AsyncMock()
# Setup mocks
mock_validate = mocker.patch(
"backend.executor.utils.validate_and_construct_node_execution_input"
)
mock_edb = mocker.patch("backend.executor.utils.execution_db")
mock_prisma = mocker.patch("backend.executor.utils.prisma")
mock_udb = mocker.patch("backend.executor.utils.user_db")
mock_gdb = mocker.patch("backend.executor.utils.graph_db")
mock_get_queue = mocker.patch("backend.executor.utils.get_async_execution_queue")
mock_get_event_bus = mocker.patch(
"backend.executor.utils.get_async_execution_event_bus"
)
# Setup mock returns
mock_validate.return_value = (
mock_graph,
starting_nodes_input,
compiled_nodes_input_masks,
)
mock_prisma.is_connected.return_value = True
mock_edb.create_graph_execution = mocker.AsyncMock(return_value=mock_graph_exec)
mock_edb.update_graph_execution_stats = mocker.AsyncMock(
return_value=mock_graph_exec
)
mock_edb.update_node_execution_status_batch = mocker.AsyncMock()
# Mock user and settings data
mock_user = mocker.MagicMock()
mock_user.timezone = "UTC"
mock_settings = mocker.MagicMock()
mock_settings.human_in_the_loop_safe_mode = True
mock_udb.get_user_by_id = mocker.AsyncMock(return_value=mock_user)
mock_gdb.get_graph_settings = mocker.AsyncMock(return_value=mock_settings)
mock_get_queue.return_value = mock_queue
mock_get_event_bus.return_value = mock_event_bus
# Call the function - first execution
result1 = await add_graph_execution(
graph_id=graph_id,
user_id=user_id,
inputs=inputs,
preset_id=preset_id,
graph_version=graph_version,
graph_credentials_inputs=graph_credentials_inputs,
nodes_input_masks=nodes_input_masks,
)
# Store the parameters used in the first call to create_graph_execution
first_call_kwargs = mock_edb.create_graph_execution.call_args[1]
# Verify the create_graph_execution was called with correct parameters
mock_edb.create_graph_execution.assert_called_once_with(
user_id=user_id,
graph_id=graph_id,
graph_version=mock_graph.version,
inputs=inputs,
credential_inputs=graph_credentials_inputs,
nodes_input_masks=nodes_input_masks,
starting_nodes_input=starting_nodes_input,
preset_id=preset_id,
parent_graph_exec_id=None,
)
# Set up the graph execution mock to have properties we can extract
mock_graph_exec.graph_id = graph_id
mock_graph_exec.user_id = user_id
mock_graph_exec.graph_version = graph_version
mock_graph_exec.inputs = inputs
mock_graph_exec.credential_inputs = graph_credentials_inputs
mock_graph_exec.nodes_input_masks = nodes_input_masks
mock_graph_exec.preset_id = preset_id
# Create a second mock execution for the sanity check
mock_graph_exec_2 = mocker.MagicMock(spec=GraphExecutionWithNodes)
mock_graph_exec_2.id = "execution-id-456"
mock_graph_exec_2.to_graph_execution_entry.return_value = mocker.MagicMock()
# Reset mocks and set up for second call
mock_edb.create_graph_execution.reset_mock()
mock_edb.create_graph_execution.return_value = mock_graph_exec_2
mock_validate.reset_mock()
# Sanity check: call add_graph_execution with properties from first result
# This should create the same execution parameters
result2 = await add_graph_execution(
graph_id=mock_graph_exec.graph_id,
user_id=mock_graph_exec.user_id,
inputs=mock_graph_exec.inputs,
preset_id=mock_graph_exec.preset_id,
graph_version=mock_graph_exec.graph_version,
graph_credentials_inputs=mock_graph_exec.credential_inputs,
nodes_input_masks=mock_graph_exec.nodes_input_masks,
)
# Verify that create_graph_execution was called with identical parameters
second_call_kwargs = mock_edb.create_graph_execution.call_args[1]
# The sanity check: both calls should use identical parameters
assert first_call_kwargs == second_call_kwargs
# Both executions should succeed (though they create different objects)
assert result1 == mock_graph_exec
assert result2 == mock_graph_exec_2