mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
## Summary - Fixes SmartDecisionMakerBlock conversation management to work with OpenAI's Responses API, which was introduced in #12099 (commit1240f38) - The migration to `responses.create` updated the outbound LLM call but missed the conversation history serialization — the `raw_response` is now the entire `Response` object (not a `ChatCompletionMessage`), and tool calls/results use `function_call` / `function_call_output` types instead of role-based messages - This caused a 400 error on the second LLM call in agent mode: `"Invalid value: ''. Supported values are: 'assistant', 'system', 'developer', and 'user'."` ### Changes **`smart_decision_maker.py`** — 6 functions updated: | Function | Fix | |---|---| | `_convert_raw_response_to_dict` | Detects Responses API `Response` objects, extracts output items as a list | | `_get_tool_requests` | Recognizes `type: "function_call"` items | | `_get_tool_responses` | Recognizes `type: "function_call_output"` items | | `_create_tool_response` | New `responses_api` kwarg produces `function_call_output` format | | `_update_conversation` | Handles list return from `_convert_raw_response_to_dict` | | Non-agent mode path | Same list handling for traditional execution | **`test_smart_decision_maker_responses_api.py`** — 61 tests covering: - Every branch of all 6 affected helper functions - Chat Completions, Anthropic, and Responses API formats - End-to-end agent mode and traditional mode conversation validity ## Test plan - [x] 61 new unit tests all pass - [x] 11 existing SmartDecisionMakerBlock tests still pass (no regressions) - [x] All pre-commit hooks pass (ruff, black, isort, pyright) - [ ] CI integration tests 🤖 Generated with [Claude Code](https://claude.com/claude-code) <!-- CURSOR_SUMMARY --> --- > [!NOTE] > **Medium Risk** > Updates core LLM invocation and agent conversation/tool-call bookkeeping to match OpenAI’s Responses API, which can affect tool execution loops and prompt serialization across providers. Risk is mitigated by extensive new unit tests, but regressions could surface in production agent-mode flows or token/usage accounting. > > **Overview** > **Migrates OpenAI calls from Chat Completions to the Responses API end-to-end**, including tool schema conversion, output parsing, reasoning/text extraction, and updated token usage fields in `LLMResponse`. > > **Fixes SmartDecisionMakerBlock conversation/tool handling for Responses API** by treating `raw_response` as a Response object (splitting it into `output` items for replay), recognizing `function_call`/`function_call_output` entries, and emitting tool outputs in the correct Responses format to prevent invalid follow-up prompts. > > Also adjusts prompt compaction/token estimation to understand Responses API tool items, changes `get_execution_outputs_by_node_exec_id` to return list-valued `CompletedBlockOutput`, removes `gpt-3.5-turbo` from model/cost/docs lists, and adds focused unit tests plus a lightweight `conftest.py` to run these tests without the full server stack. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commitff292efd3d. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup> <!-- /CURSOR_SUMMARY --> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Otto <otto@agpt.co> Co-authored-by: Krzysztof Czerwinski <kpczerwinski@gmail.com>
103 lines
3.5 KiB
Python
103 lines
3.5 KiB
Python
"""Test that get_execution_outputs_by_node_exec_id returns CompletedBlockOutput.
|
|
|
|
CompletedBlockOutput is dict[str, list[Any]] — values must be lists.
|
|
The RPC service layer validates return types via TypeAdapter, so if
|
|
the function returns plain values instead of lists, it causes:
|
|
|
|
1 validation error for dict[str,list[any]] response
|
|
Input should be a valid list [type=list_type, input_value='', input_type=str]
|
|
|
|
This breaks SmartDecisionMakerBlock agent mode tool execution.
|
|
"""
|
|
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
from pydantic import TypeAdapter
|
|
|
|
from backend.data.block import CompletedBlockOutput
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_outputs_are_lists():
|
|
"""Each value in the returned dict must be a list, matching CompletedBlockOutput."""
|
|
from backend.data.execution import get_execution_outputs_by_node_exec_id
|
|
|
|
mock_output = MagicMock()
|
|
mock_output.name = "response"
|
|
mock_output.data = "some text output"
|
|
|
|
with patch(
|
|
"backend.data.execution.AgentNodeExecutionInputOutput.prisma"
|
|
) as mock_prisma:
|
|
mock_prisma.return_value.find_many = AsyncMock(return_value=[mock_output])
|
|
result = await get_execution_outputs_by_node_exec_id("test-exec-id")
|
|
|
|
# The result must conform to CompletedBlockOutput = dict[str, list[Any]]
|
|
assert "response" in result
|
|
assert isinstance(
|
|
result["response"], list
|
|
), f"Expected list, got {type(result['response']).__name__}: {result['response']!r}"
|
|
|
|
# Must also pass TypeAdapter validation (this is what the RPC layer does)
|
|
adapter = TypeAdapter(CompletedBlockOutput)
|
|
validated = adapter.validate_python(result) # This is the line that fails in prod
|
|
assert validated == {"response": ["some text output"]}
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_multiple_outputs_same_name_are_collected():
|
|
"""Multiple outputs with the same name should all appear in the list."""
|
|
from backend.data.execution import get_execution_outputs_by_node_exec_id
|
|
|
|
mock_out1 = MagicMock()
|
|
mock_out1.name = "result"
|
|
mock_out1.data = "first"
|
|
|
|
mock_out2 = MagicMock()
|
|
mock_out2.name = "result"
|
|
mock_out2.data = "second"
|
|
|
|
with patch(
|
|
"backend.data.execution.AgentNodeExecutionInputOutput.prisma"
|
|
) as mock_prisma:
|
|
mock_prisma.return_value.find_many = AsyncMock(
|
|
return_value=[mock_out1, mock_out2]
|
|
)
|
|
result = await get_execution_outputs_by_node_exec_id("test-exec-id")
|
|
|
|
assert isinstance(result["result"], list)
|
|
assert len(result["result"]) == 2
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_empty_outputs_returns_empty_dict():
|
|
"""No outputs → empty dict."""
|
|
from backend.data.execution import get_execution_outputs_by_node_exec_id
|
|
|
|
with patch(
|
|
"backend.data.execution.AgentNodeExecutionInputOutput.prisma"
|
|
) as mock_prisma:
|
|
mock_prisma.return_value.find_many = AsyncMock(return_value=[])
|
|
result = await get_execution_outputs_by_node_exec_id("test-exec-id")
|
|
|
|
assert result == {}
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_none_data_skipped():
|
|
"""Outputs with data=None should be skipped."""
|
|
from backend.data.execution import get_execution_outputs_by_node_exec_id
|
|
|
|
mock_output = MagicMock()
|
|
mock_output.name = "response"
|
|
mock_output.data = None
|
|
|
|
with patch(
|
|
"backend.data.execution.AgentNodeExecutionInputOutput.prisma"
|
|
) as mock_prisma:
|
|
mock_prisma.return_value.find_many = AsyncMock(return_value=[mock_output])
|
|
result = await get_execution_outputs_by_node_exec_id("test-exec-id")
|
|
|
|
assert result == {}
|