mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
fix(backend): address PR review — KeyError, reasoning consistency, test cleanup
- Use p.get("role") instead of p["role"] to avoid KeyError on Responses API
items that lack a role key (sentry + cursor findings)
- Non-agent mode now checks for tool calls before inserting reasoning,
matching agent mode behavior (cursor finding)
- Update test docstrings to reflect that tests now pass (not xfail)
- Remove unused MESSAGE_ITEM fixture
- Fix outdated test comment about "correct by accident"
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1133,7 +1133,9 @@ class SmartDecisionMakerBlock(Block):
|
||||
)
|
||||
|
||||
if input_data.sys_prompt and not any(
|
||||
p["role"] == "system" and p["content"].startswith(MAIN_OBJECTIVE_PREFIX)
|
||||
p.get("role") == "system"
|
||||
and isinstance(p.get("content"), str)
|
||||
and p["content"].startswith(MAIN_OBJECTIVE_PREFIX)
|
||||
for p in prompt
|
||||
):
|
||||
prompt.append(
|
||||
@@ -1144,7 +1146,9 @@ class SmartDecisionMakerBlock(Block):
|
||||
)
|
||||
|
||||
if input_data.prompt and not any(
|
||||
p["role"] == "user" and p["content"].startswith(MAIN_OBJECTIVE_PREFIX)
|
||||
p.get("role") == "user"
|
||||
and isinstance(p.get("content"), str)
|
||||
and p["content"].startswith(MAIN_OBJECTIVE_PREFIX)
|
||||
for p in prompt
|
||||
):
|
||||
prompt.append(
|
||||
@@ -1253,7 +1257,18 @@ class SmartDecisionMakerBlock(Block):
|
||||
yield emit_key, arg_value
|
||||
|
||||
converted = _convert_raw_response_to_dict(response.raw_response)
|
||||
if response.reasoning:
|
||||
|
||||
# Check for tool calls to avoid inserting reasoning between tool pairs
|
||||
if isinstance(converted, list):
|
||||
has_tool_calls = any(
|
||||
item.get("type") == "function_call" for item in converted
|
||||
)
|
||||
else:
|
||||
has_tool_calls = isinstance(converted.get("content"), list) and any(
|
||||
item.get("type") == "tool_use" for item in converted.get("content", [])
|
||||
)
|
||||
|
||||
if response.reasoning and not has_tool_calls:
|
||||
prompt.append(
|
||||
{"role": "assistant", "content": f"[Reasoning]: {response.reasoning}"}
|
||||
)
|
||||
|
||||
@@ -5,13 +5,8 @@ The Responses API uses a different conversation format:
|
||||
- Tool results are items with ``type: "function_call_output"`` and ``call_id``
|
||||
- These items do NOT have ``role`` at the top level
|
||||
|
||||
When SmartDecisionMakerBlock builds conversation history with these items and
|
||||
``conversation_compaction=True`` (the default), the conversation passes through
|
||||
``compress_context()`` — whose helper functions currently only recognise the
|
||||
Chat Completions (``role: "tool"``) and Anthropic (``tool_use`` / ``tool_result``)
|
||||
formats.
|
||||
|
||||
Each xfail test documents a specific function that fails with Responses API items.
|
||||
These tests validate that prompt utilities correctly handle Responses API items
|
||||
alongside Chat Completions and Anthropic formats.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
@@ -55,14 +50,6 @@ FUNCTION_CALL_OUTPUT_ITEM = {
|
||||
"output": '{"results": ["result1", "result2", "result3"]}',
|
||||
}
|
||||
|
||||
MESSAGE_ITEM = {
|
||||
"type": "message",
|
||||
"id": "msg_abc",
|
||||
"role": "assistant",
|
||||
"status": "completed",
|
||||
"content": [{"type": "output_text", "text": "Here are the results."}],
|
||||
}
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
# _msg_tokens
|
||||
@@ -408,17 +395,15 @@ class TestValidateOrphansResponsesApi:
|
||||
result = validate_and_remove_orphan_tool_responses(messages, log_warning=False)
|
||||
assert len(result) == 2
|
||||
|
||||
def test_responses_api_paired_trivially_kept(self):
|
||||
"""Matched Responses API pairs are kept — but only because the
|
||||
validator doesn't see them at all (both are invisible). The
|
||||
result is correct by accident, not by design."""
|
||||
def test_responses_api_paired_kept(self):
|
||||
"""Matched Responses API pairs are kept because the validator
|
||||
properly recognizes function_call and function_call_output items."""
|
||||
messages = [
|
||||
{"role": "user", "content": "Do something."},
|
||||
FUNCTION_CALL_ITEM,
|
||||
FUNCTION_CALL_OUTPUT_ITEM,
|
||||
]
|
||||
result = validate_and_remove_orphan_tool_responses(messages, log_warning=False)
|
||||
# Kept, but only because neither item was recognised
|
||||
assert len(result) == 3
|
||||
|
||||
def test_responses_api_orphan_output_removed(self):
|
||||
|
||||
Reference in New Issue
Block a user