fix: prevent LLM observation hallucination by properly attributing tool results

Fixes #4181

The issue was that tool observations were being appended to the assistant
message in the conversation history, which caused the LLM to learn to
hallucinate fake observations during tool calls.

Changes:
- Add llm_response field to AgentAction to store the original LLM response
  before observation is appended
- Modify handle_agent_action_core to store llm_response before appending
  observation to text (text still contains observation for logging)
- Update CrewAgentExecutor._invoke_loop and _ainvoke_loop to:
  - Append LLM response as assistant message
  - Append observation as user message (not assistant)
- Apply same fix to LiteAgent._invoke_loop
- Apply same fix to CrewAgentExecutorFlow.execute_tool_action
- Fix add_image_tool special case in both executors to use same pattern
- Add comprehensive tests for proper message attribution

Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
Devin AI
2026-01-06 06:57:16 +00:00
parent 25c0c030ce
commit 83b07b9d23
6 changed files with 398 additions and 9 deletions

View File

@@ -278,7 +278,20 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
)
self._invoke_step_callback(formatted_answer) # type: ignore[arg-type]
self._append_message(formatted_answer.text) # type: ignore[union-attr,attr-defined]
# Properly attribute messages to avoid LLM hallucination of observations:
# - LLM's response goes as assistant message
# - Tool observation goes as user message (not assistant)
if isinstance(formatted_answer, AgentAction) and formatted_answer.llm_response:
# For tool use: append LLM response as assistant, observation as user
self._append_message(formatted_answer.llm_response)
if formatted_answer.result:
self._append_message(
f"Observation: {formatted_answer.result}", role="user"
)
else:
# For final answer or other cases: append text as assistant
self._append_message(formatted_answer.text) # type: ignore[union-attr,attr-defined]
except OutputParserError as e:
formatted_answer = handle_output_parser_exception( # type: ignore[assignment]
@@ -431,7 +444,20 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
)
self._invoke_step_callback(formatted_answer) # type: ignore[arg-type]
self._append_message(formatted_answer.text) # type: ignore[union-attr,attr-defined]
# Properly attribute messages to avoid LLM hallucination of observations:
# - LLM's response goes as assistant message
# - Tool observation goes as user message (not assistant)
if isinstance(formatted_answer, AgentAction) and formatted_answer.llm_response:
# For tool use: append LLM response as assistant, observation as user
self._append_message(formatted_answer.llm_response)
if formatted_answer.result:
self._append_message(
f"Observation: {formatted_answer.result}", role="user"
)
else:
# For final answer or other cases: append text as assistant
self._append_message(formatted_answer.text) # type: ignore[union-attr,attr-defined]
except OutputParserError as e:
formatted_answer = handle_output_parser_exception( # type: ignore[assignment]
@@ -481,13 +507,18 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
Updated action or final answer.
"""
# Special case for add_image_tool
# Note: Even for add_image_tool, we should not attribute tool output to assistant
# to avoid LLM hallucination. The LLM's action is stored as assistant message,
# and the tool result (image) is stored as user message.
add_image_tool = self._i18n.tools("add_image")
if (
isinstance(add_image_tool, dict)
and formatted_answer.tool.casefold().strip()
== add_image_tool.get("name", "").casefold().strip()
):
self.messages.append({"role": "assistant", "content": tool_result.result})
# Store original LLM response for proper message attribution
formatted_answer.llm_response = formatted_answer.text
formatted_answer.result = tool_result.result
return formatted_answer
return handle_agent_action_core(

View File

@@ -33,6 +33,7 @@ class AgentAction:
tool_input: str
text: str
result: str | None = None
llm_response: str | None = None # Original LLM response before observation appended
@dataclass

View File

@@ -349,8 +349,18 @@ class CrewAgentExecutorFlow(Flow[AgentReActState], CrewAgentExecutorMixin):
# Invoke step callback if configured
self._invoke_step_callback(result)
# Append result message to conversation state
if hasattr(result, "text"):
# Properly attribute messages to avoid LLM hallucination of observations:
# - LLM's response goes as assistant message
# - Tool observation goes as user message (not assistant)
if isinstance(result, AgentAction) and result.llm_response:
# For tool use: append LLM response as assistant, observation as user
self._append_message_to_state(result.llm_response)
if result.result:
self._append_message_to_state(
f"Observation: {result.result}", role="user"
)
elif hasattr(result, "text"):
# For final answer or other cases: append text as assistant
self._append_message_to_state(result.text)
# Check if tool result became a final answer (result_as_answer flag)
@@ -537,15 +547,19 @@ class CrewAgentExecutorFlow(Flow[AgentReActState], CrewAgentExecutorMixin):
Returns:
Updated action or final answer.
"""
# Special case for add_image_tool
# Note: Even for add_image_tool, we should not attribute tool output to assistant
# to avoid LLM hallucination. The LLM's action is stored as assistant message,
# and the tool result (image) is stored as user message.
add_image_tool = self._i18n.tools("add_image")
if (
isinstance(add_image_tool, dict)
and formatted_answer.tool.casefold().strip()
== add_image_tool.get("name", "").casefold().strip()
):
self.state.messages.append(
{"role": "assistant", "content": tool_result.result}
)
# Store original LLM response for proper message attribution
formatted_answer.llm_response = formatted_answer.text
formatted_answer.result = tool_result.result
return formatted_answer
return handle_agent_action_core(

View File

@@ -582,7 +582,19 @@ class LiteAgent(FlowTrackable, BaseModel):
show_logs=self._show_logs,
)
self._append_message(formatted_answer.text, role="assistant")
# Properly attribute messages to avoid LLM hallucination of observations:
# - LLM's response goes as assistant message
# - Tool observation goes as user message (not assistant)
if isinstance(formatted_answer, AgentAction) and formatted_answer.llm_response:
# For tool use: append LLM response as assistant, observation as user
self._append_message(formatted_answer.llm_response, role="assistant")
if formatted_answer.result:
self._append_message(
f"Observation: {formatted_answer.result}", role="user"
)
else:
# For final answer or other cases: append text as assistant
self._append_message(formatted_answer.text, role="assistant")
except OutputParserError as e: # noqa: PERF203
self._printer.print(
content="Failed to parse LLM output. Retrying...",

View File

@@ -382,10 +382,21 @@ def handle_agent_action_core(
Notes:
- TODO: Remove messages parameter and its usage.
- The observation is appended to formatted_answer.text for logging/trace
purposes, but callers should use formatted_answer.llm_response for the
assistant message (without observation) and append the observation
separately as a user message to avoid LLM hallucination of observations.
"""
if step_callback:
step_callback(tool_result)
# Store the original LLM response before appending observation
# This is used by executors to correctly attribute messages:
# - llm_response goes as assistant message
# - observation goes as user message (to prevent LLM hallucination)
formatted_answer.llm_response = formatted_answer.text
# Append observation to text for logging/trace purposes
formatted_answer.text += f"\nObservation: {tool_result.result}"
formatted_answer.result = tool_result.result

View File

@@ -0,0 +1,320 @@
"""Tests for proper message attribution to prevent LLM observation hallucination.
This module tests that tool observations are correctly attributed to user messages
rather than assistant messages, which prevents the LLM from learning to hallucinate
fake observations during tool calls.
Related to GitHub issue #4181.
"""
from typing import Any
from unittest.mock import MagicMock, patch
import pytest
from crewai.agents.crew_agent_executor import CrewAgentExecutor
from crewai.agents.parser import AgentAction, AgentFinish
from crewai.tools.tool_types import ToolResult
from crewai.utilities.agent_utils import handle_agent_action_core
@pytest.fixture
def mock_llm() -> MagicMock:
"""Create a mock LLM for testing."""
llm = MagicMock()
llm.supports_stop_words.return_value = True
llm.stop = []
return llm
@pytest.fixture
def mock_agent() -> MagicMock:
"""Create a mock agent for testing."""
agent = MagicMock()
agent.role = "Test Agent"
agent.key = "test_agent_key"
agent.verbose = False
agent.id = "test_agent_id"
return agent
@pytest.fixture
def mock_task() -> MagicMock:
"""Create a mock task for testing."""
task = MagicMock()
task.description = "Test task description"
return task
@pytest.fixture
def mock_crew() -> MagicMock:
"""Create a mock crew for testing."""
crew = MagicMock()
crew.verbose = False
crew._train = False
return crew
@pytest.fixture
def mock_tools_handler() -> MagicMock:
"""Create a mock tools handler."""
return MagicMock()
@pytest.fixture
def executor(
mock_llm: MagicMock,
mock_agent: MagicMock,
mock_task: MagicMock,
mock_crew: MagicMock,
mock_tools_handler: MagicMock,
) -> CrewAgentExecutor:
"""Create a CrewAgentExecutor instance for testing."""
return CrewAgentExecutor(
llm=mock_llm,
task=mock_task,
crew=mock_crew,
agent=mock_agent,
prompt={"prompt": "Test prompt {input} {tool_names} {tools}"},
max_iter=5,
tools=[],
tools_names="",
stop_words=["Observation:"],
tools_description="",
tools_handler=mock_tools_handler,
)
class TestHandleAgentActionCore:
"""Tests for handle_agent_action_core function."""
def test_stores_llm_response_before_observation(self) -> None:
"""Test that llm_response is stored before observation is appended."""
original_text = "Thought: I need to search\nAction: search\nAction Input: query"
action = AgentAction(
thought="I need to search",
tool="search",
tool_input="query",
text=original_text,
)
tool_result = ToolResult(result="Search result: found data", result_as_answer=False)
result = handle_agent_action_core(
formatted_answer=action,
tool_result=tool_result,
)
assert isinstance(result, AgentAction)
assert result.llm_response == original_text
assert "Observation:" in result.text
assert result.result == "Search result: found data"
def test_text_contains_observation_for_logging(self) -> None:
"""Test that text contains observation for logging purposes."""
action = AgentAction(
thought="Testing",
tool="test_tool",
tool_input="{}",
text="Thought: Testing\nAction: test_tool\nAction Input: {}",
)
tool_result = ToolResult(result="Tool output", result_as_answer=False)
result = handle_agent_action_core(
formatted_answer=action,
tool_result=tool_result,
)
assert isinstance(result, AgentAction)
assert "Observation: Tool output" in result.text
def test_result_as_answer_returns_agent_finish(self) -> None:
"""Test that result_as_answer=True returns AgentFinish."""
action = AgentAction(
thought="Using tool",
tool="final_tool",
tool_input="{}",
text="Thought: Using tool\nAction: final_tool\nAction Input: {}",
)
tool_result = ToolResult(result="Final answer from tool", result_as_answer=True)
result = handle_agent_action_core(
formatted_answer=action,
tool_result=tool_result,
)
assert isinstance(result, AgentFinish)
assert result.output == "Final answer from tool"
class TestCrewAgentExecutorMessageAttribution:
"""Tests for proper message attribution in CrewAgentExecutor."""
def test_tool_observation_not_in_assistant_message(
self, executor: CrewAgentExecutor
) -> None:
"""Test that tool observations are not attributed to assistant messages.
This is the core fix for GitHub issue #4181 - observations should be
in user messages, not assistant messages, to prevent LLM hallucination.
"""
call_count = 0
def mock_llm_response(*args: Any, **kwargs: Any) -> str:
nonlocal call_count
call_count += 1
if call_count == 1:
return (
"Thought: I need to use a tool\n"
"Action: test_tool\n"
'Action Input: {"arg": "value"}'
)
return "Thought: I have the answer\nFinal Answer: Done"
with patch(
"crewai.agents.crew_agent_executor.get_llm_response",
side_effect=mock_llm_response,
):
with patch(
"crewai.agents.crew_agent_executor.execute_tool_and_check_finality",
return_value=ToolResult(
result="Tool executed successfully", result_as_answer=False
),
):
with patch.object(executor, "_show_logs"):
result = executor._invoke_loop()
assert isinstance(result, AgentFinish)
assistant_messages = [
msg for msg in executor.messages if msg.get("role") == "assistant"
]
user_messages = [msg for msg in executor.messages if msg.get("role") == "user"]
for msg in assistant_messages:
content = msg.get("content", "")
assert "Observation:" not in content, (
f"Assistant message should not contain 'Observation:'. "
f"Found: {content[:100]}..."
)
observation_in_user = any(
"Observation:" in msg.get("content", "") for msg in user_messages
)
assert observation_in_user, (
"Tool observation should be in a user message, not assistant message"
)
def test_llm_response_in_assistant_message(
self, executor: CrewAgentExecutor
) -> None:
"""Test that the LLM's actual response is in assistant messages."""
call_count = 0
llm_action_text = (
"Thought: I need to use a tool\n"
"Action: test_tool\n"
'Action Input: {"arg": "value"}'
)
def mock_llm_response(*args: Any, **kwargs: Any) -> str:
nonlocal call_count
call_count += 1
if call_count == 1:
return llm_action_text
return "Thought: I have the answer\nFinal Answer: Done"
with patch(
"crewai.agents.crew_agent_executor.get_llm_response",
side_effect=mock_llm_response,
):
with patch(
"crewai.agents.crew_agent_executor.execute_tool_and_check_finality",
return_value=ToolResult(
result="Tool executed successfully", result_as_answer=False
),
):
with patch.object(executor, "_show_logs"):
executor._invoke_loop()
assistant_messages = [
msg for msg in executor.messages if msg.get("role") == "assistant"
]
llm_response_found = any(
"Action: test_tool" in msg.get("content", "") for msg in assistant_messages
)
assert llm_response_found, (
"LLM's action response should be in an assistant message"
)
def test_message_order_after_tool_use(
self, executor: CrewAgentExecutor
) -> None:
"""Test that messages are in correct order: assistant (action) then user (observation)."""
call_count = 0
def mock_llm_response(*args: Any, **kwargs: Any) -> str:
nonlocal call_count
call_count += 1
if call_count == 1:
return (
"Thought: I need to use a tool\n"
"Action: test_tool\n"
'Action Input: {"arg": "value"}'
)
return "Thought: I have the answer\nFinal Answer: Done"
with patch(
"crewai.agents.crew_agent_executor.get_llm_response",
side_effect=mock_llm_response,
):
with patch(
"crewai.agents.crew_agent_executor.execute_tool_and_check_finality",
return_value=ToolResult(
result="Tool executed successfully", result_as_answer=False
),
):
with patch.object(executor, "_show_logs"):
executor._invoke_loop()
action_msg_idx = None
observation_msg_idx = None
for i, msg in enumerate(executor.messages):
content = msg.get("content", "")
if "Action: test_tool" in content and msg.get("role") == "assistant":
action_msg_idx = i
if "Observation:" in content and msg.get("role") == "user":
observation_msg_idx = i
assert action_msg_idx is not None, "Action message not found"
assert observation_msg_idx is not None, "Observation message not found"
assert observation_msg_idx == action_msg_idx + 1, (
f"Observation (user) should immediately follow action (assistant). "
f"Action at {action_msg_idx}, Observation at {observation_msg_idx}"
)
class TestAgentActionLlmResponseField:
"""Tests for the llm_response field on AgentAction."""
def test_agent_action_has_llm_response_field(self) -> None:
"""Test that AgentAction has llm_response field."""
action = AgentAction(
thought="Test",
tool="test",
tool_input="{}",
text="Test text",
)
assert hasattr(action, "llm_response")
assert action.llm_response is None
def test_agent_action_llm_response_can_be_set(self) -> None:
"""Test that llm_response can be set on AgentAction."""
action = AgentAction(
thought="Test",
tool="test",
tool_input="{}",
text="Test text",
)
action.llm_response = "Original LLM response"
assert action.llm_response == "Original LLM response"