fix(blocks/llm): Validate LLM summary responses are strings (#11275)

### Changes 🏗️

- Added validation to ensure that the `summary` and `final_summary`
returned by the LLM are strings.
- Raises a `ValueError` if the LLM returns a list or other non-string
type, providing a descriptive error message to aid debugging.

Fixes
[AUTOGPT-SERVER-6M4](https://sentry.io/organizations/significant-gravitas/issues/6978480131/).
The issue was that: LLM returned list of strings instead of single
string summary, causing `_combine_summaries` to fail on `join`.

This fix was generated by Seer in Sentry, triggered by Craig Swift. 👁️
Run ID: 2230933

Not quite right? [Click here to continue debugging with
Seer.](https://sentry.io/organizations/significant-gravitas/issues/6978480131/?seerDrawer=true)

### Checklist 📋

#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
  <!-- Put your test plan here: -->
- [x] Added a unit test to verify that a ValueError is raised when the
LLM returns a list instead of a string for summary or final_summary.

---------

Co-authored-by: seer-by-sentry[bot] <157164994+seer-by-sentry[bot]@users.noreply.github.com>
Co-authored-by: Swifty <craigswift13@gmail.com>
This commit is contained in:
seer-by-sentry[bot]
2025-10-30 09:52:50 +00:00
committed by GitHub
parent 594b1adcf7
commit 4140331731
2 changed files with 206 additions and 2 deletions

View File

@@ -1451,7 +1451,20 @@ class AITextSummarizerBlock(AIBlockBase):
credentials=credentials,
)
return llm_response["summary"]
summary = llm_response["summary"]
# Validate that the LLM returned a string and not a list or other type
if not isinstance(summary, str):
from backend.util.truncate import truncate
truncated_summary = truncate(summary, 500)
raise ValueError(
f"LLM generation failed: Expected a string summary, but received {type(summary).__name__}. "
f"The language model incorrectly formatted its response. "
f"Received value: {json.dumps(truncated_summary)}"
)
return summary
async def _combine_summaries(
self, summaries: list[str], input_data: Input, credentials: APIKeyCredentials
@@ -1473,7 +1486,20 @@ class AITextSummarizerBlock(AIBlockBase):
credentials=credentials,
)
return llm_response["final_summary"]
final_summary = llm_response["final_summary"]
# Validate that the LLM returned a string and not a list or other type
if not isinstance(final_summary, str):
from backend.util.truncate import truncate
truncated_final_summary = truncate(final_summary, 500)
raise ValueError(
f"LLM generation failed: Expected a string final summary, but received {type(final_summary).__name__}. "
f"The language model incorrectly formatted its response. "
f"Received value: {json.dumps(truncated_final_summary)}"
)
return final_summary
else:
# If combined summaries are still too long, recursively summarize
block = AITextSummarizerBlock()

View File

@@ -500,3 +500,181 @@ class TestLLMStatsTracking:
# Check output
assert "response" in outputs
assert outputs["response"] == {"result": "test"}
class TestAITextSummarizerValidation:
"""Test that AITextSummarizerBlock validates LLM responses are strings."""
@pytest.mark.asyncio
async def test_summarize_chunk_rejects_list_response(self):
"""Test that _summarize_chunk raises ValueError when LLM returns a list instead of string."""
import backend.blocks.llm as llm
block = llm.AITextSummarizerBlock()
# Mock llm_call to return a list instead of a string
async def mock_llm_call(input_data, credentials):
# Simulate LLM returning a list when it should return a string
return {"summary": ["bullet point 1", "bullet point 2", "bullet point 3"]}
block.llm_call = mock_llm_call # type: ignore
# Create input data
input_data = llm.AITextSummarizerBlock.Input(
text="Some text to summarize",
model=llm.LlmModel.GPT4O,
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
style=llm.SummaryStyle.BULLET_POINTS,
)
# Should raise ValueError with descriptive message
with pytest.raises(ValueError) as exc_info:
await block._summarize_chunk(
"Some text to summarize",
input_data,
credentials=llm.TEST_CREDENTIALS,
)
error_message = str(exc_info.value)
assert "Expected a string summary" in error_message
assert "received list" in error_message
assert "incorrectly formatted" in error_message
@pytest.mark.asyncio
async def test_combine_summaries_rejects_list_response(self):
"""Test that _combine_summaries raises ValueError when LLM returns a list instead of string."""
import backend.blocks.llm as llm
block = llm.AITextSummarizerBlock()
# Mock llm_call to return a list instead of a string
async def mock_llm_call(input_data, credentials):
# Check if this is the final summary call
if "final_summary" in input_data.expected_format:
# Simulate LLM returning a list when it should return a string
return {
"final_summary": [
"bullet point 1",
"bullet point 2",
"bullet point 3",
]
}
else:
return {"summary": "Valid summary"}
block.llm_call = mock_llm_call # type: ignore
# Create input data
input_data = llm.AITextSummarizerBlock.Input(
text="Some text to summarize",
model=llm.LlmModel.GPT4O,
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
style=llm.SummaryStyle.BULLET_POINTS,
max_tokens=1000,
)
# Should raise ValueError with descriptive message
with pytest.raises(ValueError) as exc_info:
await block._combine_summaries(
["summary 1", "summary 2"],
input_data,
credentials=llm.TEST_CREDENTIALS,
)
error_message = str(exc_info.value)
assert "Expected a string final summary" in error_message
assert "received list" in error_message
assert "incorrectly formatted" in error_message
@pytest.mark.asyncio
async def test_summarize_chunk_accepts_valid_string_response(self):
"""Test that _summarize_chunk accepts valid string responses."""
import backend.blocks.llm as llm
block = llm.AITextSummarizerBlock()
# Mock llm_call to return a valid string
async def mock_llm_call(input_data, credentials):
return {"summary": "This is a valid string summary"}
block.llm_call = mock_llm_call # type: ignore
# Create input data
input_data = llm.AITextSummarizerBlock.Input(
text="Some text to summarize",
model=llm.LlmModel.GPT4O,
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
)
# Should not raise any error
result = await block._summarize_chunk(
"Some text to summarize",
input_data,
credentials=llm.TEST_CREDENTIALS,
)
assert result == "This is a valid string summary"
assert isinstance(result, str)
@pytest.mark.asyncio
async def test_combine_summaries_accepts_valid_string_response(self):
"""Test that _combine_summaries accepts valid string responses."""
import backend.blocks.llm as llm
block = llm.AITextSummarizerBlock()
# Mock llm_call to return a valid string
async def mock_llm_call(input_data, credentials):
return {"final_summary": "This is a valid final summary string"}
block.llm_call = mock_llm_call # type: ignore
# Create input data
input_data = llm.AITextSummarizerBlock.Input(
text="Some text to summarize",
model=llm.LlmModel.GPT4O,
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
max_tokens=1000,
)
# Should not raise any error
result = await block._combine_summaries(
["summary 1", "summary 2"],
input_data,
credentials=llm.TEST_CREDENTIALS,
)
assert result == "This is a valid final summary string"
assert isinstance(result, str)
@pytest.mark.asyncio
async def test_summarize_chunk_rejects_dict_response(self):
"""Test that _summarize_chunk raises ValueError when LLM returns a dict instead of string."""
import backend.blocks.llm as llm
block = llm.AITextSummarizerBlock()
# Mock llm_call to return a dict instead of a string
async def mock_llm_call(input_data, credentials):
return {"summary": {"nested": "object", "with": "data"}}
block.llm_call = mock_llm_call # type: ignore
# Create input data
input_data = llm.AITextSummarizerBlock.Input(
text="Some text to summarize",
model=llm.LlmModel.GPT4O,
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
)
# Should raise ValueError
with pytest.raises(ValueError) as exc_info:
await block._summarize_chunk(
"Some text to summarize",
input_data,
credentials=llm.TEST_CREDENTIALS,
)
error_message = str(exc_info.value)
assert "Expected a string summary" in error_message
assert "received dict" in error_message