fix: handle ValidationError in guardrail retry loop (fixes #4126)

When a Task has both output_pydantic and guardrails configured, if the
agent produces invalid JSON that fails Pydantic validation during a
retry, the system would crash instead of continuing to the next retry
attempt.

This fix wraps the _export_output call in a try-except block to catch
ValidationError and ConverterError exceptions. When these exceptions
occur, the pydantic_output and json_output are set to None, allowing
the retry loop to continue and the guardrail to check the raw output.

Changes:
- Import ValidationError from pydantic and ConverterError from converter
- Wrap _export_output calls in try-except in _invoke_guardrail_function
- Apply same fix to async version _ainvoke_guardrail_function
- Add 3 regression tests covering the issue scenario

Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
Devin AI
2025-12-19 02:23:57 +00:00
parent dc63bc2319
commit 52862b4f0f
2 changed files with 168 additions and 3 deletions

View File

@@ -24,6 +24,7 @@ from pydantic import (
BaseModel,
Field,
PrivateAttr,
ValidationError,
field_validator,
model_validator,
)
@@ -43,7 +44,7 @@ from crewai.tasks.task_output import TaskOutput
from crewai.tools.base_tool import BaseTool
from crewai.utilities.config import process_config
from crewai.utilities.constants import NOT_SPECIFIED, _NotSpecified
from crewai.utilities.converter import Converter, convert_to_model
from crewai.utilities.converter import Converter, ConverterError, convert_to_model
from crewai.utilities.guardrail import (
process_guardrail,
)
@@ -1044,7 +1045,13 @@ Follow these guidelines:
tools=tools,
)
pydantic_output, json_output = self._export_output(result)
try:
pydantic_output, json_output = self._export_output(result)
except (ValidationError, ConverterError):
# If export fails due to invalid output format, set outputs to None
# and let the next iteration's guardrail check handle it
pydantic_output, json_output = None, None
task_output = TaskOutput(
name=self.name or self.description,
description=self.description,
@@ -1140,7 +1147,13 @@ Follow these guidelines:
tools=tools,
)
pydantic_output, json_output = self._export_output(result)
try:
pydantic_output, json_output = self._export_output(result)
except (ValidationError, ConverterError):
# If export fails due to invalid output format, set outputs to None
# and let the next iteration's guardrail check handle it
pydantic_output, json_output = None, None
task_output = TaskOutput(
name=self.name or self.description,
description=self.description,

View File

@@ -752,3 +752,155 @@ def test_per_guardrail_independent_retry_tracking():
assert call_counts["g3"] == 1
assert "G3(1)" in result.raw
def test_guardrail_retries_with_invalid_pydantic_output():
"""Test that guardrail retries work when agent produces invalid pydantic output.
This test covers the bug reported in issue #4126 where guardrail_max_retries
logic was broken due to unhandled ValidationError in _invoke_guardrail_function.
When the agent produces invalid JSON that fails Pydantic validation, the system
should continue retrying instead of crashing.
"""
from pydantic import BaseModel, Field
class OutputModel(BaseModel):
title: str = Field(description="The title")
content: str = Field(description="The content")
call_count = 0
def mock_execute_task(*args, **kwargs):
nonlocal call_count
call_count += 1
if call_count == 1:
return "invalid json that will fail pydantic validation"
elif call_count == 2:
return "still invalid { broken json"
else:
return '{"title": "Valid Title", "content": "Valid Content"}'
def always_fail_guardrail(result: TaskOutput) -> tuple[bool, str]:
if call_count < 3:
return (False, "Output not valid yet")
return (True, result.raw)
agent = Mock()
agent.role = "test_agent"
agent.execute_task = mock_execute_task
agent.crew = None
agent.last_messages = []
task = create_smart_task(
description="Test pydantic validation during guardrail retries",
expected_output="Valid structured output",
guardrail=always_fail_guardrail,
output_pydantic=OutputModel,
guardrail_max_retries=3,
)
result = task.execute_sync(agent=agent)
assert call_count == 3
assert result.pydantic is not None
assert result.pydantic.title == "Valid Title"
assert result.pydantic.content == "Valid Content"
def test_guardrail_max_retries_exhausted_with_invalid_pydantic():
"""Test that max retries are properly exhausted even with pydantic validation errors.
This ensures that when the agent consistently produces invalid output that fails
pydantic validation, the retry loop continues until max_retries is exhausted,
rather than crashing on the first validation error.
"""
from pydantic import BaseModel, Field
class StrictModel(BaseModel):
required_field: str = Field(description="A required field")
call_count = 0
def mock_execute_task(*args, **kwargs):
nonlocal call_count
call_count += 1
return "this is not valid json and will always fail"
def always_fail_guardrail(result: TaskOutput) -> tuple[bool, str]:
return (False, "Output is not valid")
agent = Mock()
agent.role = "test_agent"
agent.execute_task = mock_execute_task
agent.crew = None
agent.last_messages = []
task = create_smart_task(
description="Test max retries with invalid pydantic",
expected_output="Structured output",
guardrail=always_fail_guardrail,
output_pydantic=StrictModel,
guardrail_max_retries=2,
)
with pytest.raises(Exception) as exc_info:
task.execute_sync(agent=agent)
assert "Task failed guardrail validation after 2 retries" in str(exc_info.value)
assert call_count == 3
def test_guardrail_with_pydantic_validation_error_continues_retry():
"""Test that pydantic ValidationError during retry doesn't crash the loop.
This is a regression test for issue #4126. The bug was that when _export_output
raised a ValidationError during the guardrail retry loop, it would crash instead
of continuing to the next retry attempt.
"""
from pydantic import BaseModel, Field
class TestModel(BaseModel):
value: int = Field(description="An integer value")
execution_results = [
"not json",
'{"value": "not_an_int"}',
'{"value": 42}',
]
call_index = 0
def mock_execute_task(*args, **kwargs):
nonlocal call_index
result = execution_results[call_index]
call_index += 1
return result
guardrail_calls = 0
def counting_guardrail(result: TaskOutput) -> tuple[bool, str]:
nonlocal guardrail_calls
guardrail_calls += 1
if guardrail_calls < 3:
return (False, f"Retry attempt {guardrail_calls}")
return (True, result.raw)
agent = Mock()
agent.role = "test_agent"
agent.execute_task = mock_execute_task
agent.crew = None
agent.last_messages = []
task = create_smart_task(
description="Test ValidationError handling in retry loop",
expected_output="Integer output",
guardrail=counting_guardrail,
output_pydantic=TestModel,
guardrail_max_retries=3,
)
result = task.execute_sync(agent=agent)
assert call_index == 3
assert guardrail_calls == 3
assert result.pydantic is not None
assert result.pydantic.value == 42