fix(backend): address PR review — KeyError, reasoning consistency, test cleanup

- Use p.get("role") instead of p["role"] to avoid KeyError on Responses API
  items that lack a role key (sentry + cursor findings)
- Non-agent mode now checks for tool calls before inserting reasoning,
  matching agent mode behavior (cursor finding)
- Update test docstrings to reflect that tests now pass (not xfail)
- Remove unused MESSAGE_ITEM fixture
- Fix outdated test comment about "correct by accident"

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Nicholas Tindle
2026-03-19 22:10:21 -05:00
parent 549c1ec566
commit ff292efd3d
2 changed files with 23 additions and 23 deletions

View File

@@ -1133,7 +1133,9 @@ class SmartDecisionMakerBlock(Block):
)
if input_data.sys_prompt and not any(
p["role"] == "system" and p["content"].startswith(MAIN_OBJECTIVE_PREFIX)
p.get("role") == "system"
and isinstance(p.get("content"), str)
and p["content"].startswith(MAIN_OBJECTIVE_PREFIX)
for p in prompt
):
prompt.append(
@@ -1144,7 +1146,9 @@ class SmartDecisionMakerBlock(Block):
)
if input_data.prompt and not any(
p["role"] == "user" and p["content"].startswith(MAIN_OBJECTIVE_PREFIX)
p.get("role") == "user"
and isinstance(p.get("content"), str)
and p["content"].startswith(MAIN_OBJECTIVE_PREFIX)
for p in prompt
):
prompt.append(
@@ -1253,7 +1257,18 @@ class SmartDecisionMakerBlock(Block):
yield emit_key, arg_value
converted = _convert_raw_response_to_dict(response.raw_response)
if response.reasoning:
# Check for tool calls to avoid inserting reasoning between tool pairs
if isinstance(converted, list):
has_tool_calls = any(
item.get("type") == "function_call" for item in converted
)
else:
has_tool_calls = isinstance(converted.get("content"), list) and any(
item.get("type") == "tool_use" for item in converted.get("content", [])
)
if response.reasoning and not has_tool_calls:
prompt.append(
{"role": "assistant", "content": f"[Reasoning]: {response.reasoning}"}
)

View File

@@ -5,13 +5,8 @@ The Responses API uses a different conversation format:
- Tool results are items with ``type: "function_call_output"`` and ``call_id``
- These items do NOT have ``role`` at the top level
When SmartDecisionMakerBlock builds conversation history with these items and
``conversation_compaction=True`` (the default), the conversation passes through
``compress_context()`` — whose helper functions currently only recognise the
Chat Completions (``role: "tool"``) and Anthropic (``tool_use`` / ``tool_result``)
formats.
Each xfail test documents a specific function that fails with Responses API items.
These tests validate that prompt utilities correctly handle Responses API items
alongside Chat Completions and Anthropic formats.
"""
import pytest
@@ -55,14 +50,6 @@ FUNCTION_CALL_OUTPUT_ITEM = {
"output": '{"results": ["result1", "result2", "result3"]}',
}
MESSAGE_ITEM = {
"type": "message",
"id": "msg_abc",
"role": "assistant",
"status": "completed",
"content": [{"type": "output_text", "text": "Here are the results."}],
}
# ═══════════════════════════════════════════════════════════════════════════
# _msg_tokens
@@ -408,17 +395,15 @@ class TestValidateOrphansResponsesApi:
result = validate_and_remove_orphan_tool_responses(messages, log_warning=False)
assert len(result) == 2
def test_responses_api_paired_trivially_kept(self):
"""Matched Responses API pairs are kept — but only because the
validator doesn't see them at all (both are invisible). The
result is correct by accident, not by design."""
def test_responses_api_paired_kept(self):
"""Matched Responses API pairs are kept because the validator
properly recognizes function_call and function_call_output items."""
messages = [
{"role": "user", "content": "Do something."},
FUNCTION_CALL_ITEM,
FUNCTION_CALL_OUTPUT_ITEM,
]
result = validate_and_remove_orphan_tool_responses(messages, log_warning=False)
# Kept, but only because neither item was recognised
assert len(result) == 3
def test_responses_api_orphan_output_removed(self):