mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-01-09 15:17:59 -05:00
fix(blocks/llm): Validate LLM summary responses are strings (#11275)
### Changes 🏗️ - Added validation to ensure that the `summary` and `final_summary` returned by the LLM are strings. - Raises a `ValueError` if the LLM returns a list or other non-string type, providing a descriptive error message to aid debugging. Fixes [AUTOGPT-SERVER-6M4](https://sentry.io/organizations/significant-gravitas/issues/6978480131/). The issue was that: LLM returned list of strings instead of single string summary, causing `_combine_summaries` to fail on `join`. This fix was generated by Seer in Sentry, triggered by Craig Swift. 👁️ Run ID: 2230933 Not quite right? [Click here to continue debugging with Seer.](https://sentry.io/organizations/significant-gravitas/issues/6978480131/?seerDrawer=true) ### Checklist 📋 #### For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan: <!-- Put your test plan here: --> - [x] Added a unit test to verify that a ValueError is raised when the LLM returns a list instead of a string for summary or final_summary. --------- Co-authored-by: seer-by-sentry[bot] <157164994+seer-by-sentry[bot]@users.noreply.github.com> Co-authored-by: Swifty <craigswift13@gmail.com>
This commit is contained in:
committed by
GitHub
parent
594b1adcf7
commit
4140331731
@@ -1451,7 +1451,20 @@ class AITextSummarizerBlock(AIBlockBase):
|
||||
credentials=credentials,
|
||||
)
|
||||
|
||||
return llm_response["summary"]
|
||||
summary = llm_response["summary"]
|
||||
|
||||
# Validate that the LLM returned a string and not a list or other type
|
||||
if not isinstance(summary, str):
|
||||
from backend.util.truncate import truncate
|
||||
|
||||
truncated_summary = truncate(summary, 500)
|
||||
raise ValueError(
|
||||
f"LLM generation failed: Expected a string summary, but received {type(summary).__name__}. "
|
||||
f"The language model incorrectly formatted its response. "
|
||||
f"Received value: {json.dumps(truncated_summary)}"
|
||||
)
|
||||
|
||||
return summary
|
||||
|
||||
async def _combine_summaries(
|
||||
self, summaries: list[str], input_data: Input, credentials: APIKeyCredentials
|
||||
@@ -1473,7 +1486,20 @@ class AITextSummarizerBlock(AIBlockBase):
|
||||
credentials=credentials,
|
||||
)
|
||||
|
||||
return llm_response["final_summary"]
|
||||
final_summary = llm_response["final_summary"]
|
||||
|
||||
# Validate that the LLM returned a string and not a list or other type
|
||||
if not isinstance(final_summary, str):
|
||||
from backend.util.truncate import truncate
|
||||
|
||||
truncated_final_summary = truncate(final_summary, 500)
|
||||
raise ValueError(
|
||||
f"LLM generation failed: Expected a string final summary, but received {type(final_summary).__name__}. "
|
||||
f"The language model incorrectly formatted its response. "
|
||||
f"Received value: {json.dumps(truncated_final_summary)}"
|
||||
)
|
||||
|
||||
return final_summary
|
||||
else:
|
||||
# If combined summaries are still too long, recursively summarize
|
||||
block = AITextSummarizerBlock()
|
||||
|
||||
@@ -500,3 +500,181 @@ class TestLLMStatsTracking:
|
||||
# Check output
|
||||
assert "response" in outputs
|
||||
assert outputs["response"] == {"result": "test"}
|
||||
|
||||
|
||||
class TestAITextSummarizerValidation:
|
||||
"""Test that AITextSummarizerBlock validates LLM responses are strings."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_summarize_chunk_rejects_list_response(self):
|
||||
"""Test that _summarize_chunk raises ValueError when LLM returns a list instead of string."""
|
||||
import backend.blocks.llm as llm
|
||||
|
||||
block = llm.AITextSummarizerBlock()
|
||||
|
||||
# Mock llm_call to return a list instead of a string
|
||||
async def mock_llm_call(input_data, credentials):
|
||||
# Simulate LLM returning a list when it should return a string
|
||||
return {"summary": ["bullet point 1", "bullet point 2", "bullet point 3"]}
|
||||
|
||||
block.llm_call = mock_llm_call # type: ignore
|
||||
|
||||
# Create input data
|
||||
input_data = llm.AITextSummarizerBlock.Input(
|
||||
text="Some text to summarize",
|
||||
model=llm.LlmModel.GPT4O,
|
||||
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
style=llm.SummaryStyle.BULLET_POINTS,
|
||||
)
|
||||
|
||||
# Should raise ValueError with descriptive message
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
await block._summarize_chunk(
|
||||
"Some text to summarize",
|
||||
input_data,
|
||||
credentials=llm.TEST_CREDENTIALS,
|
||||
)
|
||||
|
||||
error_message = str(exc_info.value)
|
||||
assert "Expected a string summary" in error_message
|
||||
assert "received list" in error_message
|
||||
assert "incorrectly formatted" in error_message
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_combine_summaries_rejects_list_response(self):
|
||||
"""Test that _combine_summaries raises ValueError when LLM returns a list instead of string."""
|
||||
import backend.blocks.llm as llm
|
||||
|
||||
block = llm.AITextSummarizerBlock()
|
||||
|
||||
# Mock llm_call to return a list instead of a string
|
||||
async def mock_llm_call(input_data, credentials):
|
||||
# Check if this is the final summary call
|
||||
if "final_summary" in input_data.expected_format:
|
||||
# Simulate LLM returning a list when it should return a string
|
||||
return {
|
||||
"final_summary": [
|
||||
"bullet point 1",
|
||||
"bullet point 2",
|
||||
"bullet point 3",
|
||||
]
|
||||
}
|
||||
else:
|
||||
return {"summary": "Valid summary"}
|
||||
|
||||
block.llm_call = mock_llm_call # type: ignore
|
||||
|
||||
# Create input data
|
||||
input_data = llm.AITextSummarizerBlock.Input(
|
||||
text="Some text to summarize",
|
||||
model=llm.LlmModel.GPT4O,
|
||||
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
style=llm.SummaryStyle.BULLET_POINTS,
|
||||
max_tokens=1000,
|
||||
)
|
||||
|
||||
# Should raise ValueError with descriptive message
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
await block._combine_summaries(
|
||||
["summary 1", "summary 2"],
|
||||
input_data,
|
||||
credentials=llm.TEST_CREDENTIALS,
|
||||
)
|
||||
|
||||
error_message = str(exc_info.value)
|
||||
assert "Expected a string final summary" in error_message
|
||||
assert "received list" in error_message
|
||||
assert "incorrectly formatted" in error_message
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_summarize_chunk_accepts_valid_string_response(self):
|
||||
"""Test that _summarize_chunk accepts valid string responses."""
|
||||
import backend.blocks.llm as llm
|
||||
|
||||
block = llm.AITextSummarizerBlock()
|
||||
|
||||
# Mock llm_call to return a valid string
|
||||
async def mock_llm_call(input_data, credentials):
|
||||
return {"summary": "This is a valid string summary"}
|
||||
|
||||
block.llm_call = mock_llm_call # type: ignore
|
||||
|
||||
# Create input data
|
||||
input_data = llm.AITextSummarizerBlock.Input(
|
||||
text="Some text to summarize",
|
||||
model=llm.LlmModel.GPT4O,
|
||||
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
)
|
||||
|
||||
# Should not raise any error
|
||||
result = await block._summarize_chunk(
|
||||
"Some text to summarize",
|
||||
input_data,
|
||||
credentials=llm.TEST_CREDENTIALS,
|
||||
)
|
||||
|
||||
assert result == "This is a valid string summary"
|
||||
assert isinstance(result, str)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_combine_summaries_accepts_valid_string_response(self):
|
||||
"""Test that _combine_summaries accepts valid string responses."""
|
||||
import backend.blocks.llm as llm
|
||||
|
||||
block = llm.AITextSummarizerBlock()
|
||||
|
||||
# Mock llm_call to return a valid string
|
||||
async def mock_llm_call(input_data, credentials):
|
||||
return {"final_summary": "This is a valid final summary string"}
|
||||
|
||||
block.llm_call = mock_llm_call # type: ignore
|
||||
|
||||
# Create input data
|
||||
input_data = llm.AITextSummarizerBlock.Input(
|
||||
text="Some text to summarize",
|
||||
model=llm.LlmModel.GPT4O,
|
||||
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
max_tokens=1000,
|
||||
)
|
||||
|
||||
# Should not raise any error
|
||||
result = await block._combine_summaries(
|
||||
["summary 1", "summary 2"],
|
||||
input_data,
|
||||
credentials=llm.TEST_CREDENTIALS,
|
||||
)
|
||||
|
||||
assert result == "This is a valid final summary string"
|
||||
assert isinstance(result, str)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_summarize_chunk_rejects_dict_response(self):
|
||||
"""Test that _summarize_chunk raises ValueError when LLM returns a dict instead of string."""
|
||||
import backend.blocks.llm as llm
|
||||
|
||||
block = llm.AITextSummarizerBlock()
|
||||
|
||||
# Mock llm_call to return a dict instead of a string
|
||||
async def mock_llm_call(input_data, credentials):
|
||||
return {"summary": {"nested": "object", "with": "data"}}
|
||||
|
||||
block.llm_call = mock_llm_call # type: ignore
|
||||
|
||||
# Create input data
|
||||
input_data = llm.AITextSummarizerBlock.Input(
|
||||
text="Some text to summarize",
|
||||
model=llm.LlmModel.GPT4O,
|
||||
credentials=llm.TEST_CREDENTIALS_INPUT, # type: ignore
|
||||
)
|
||||
|
||||
# Should raise ValueError
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
await block._summarize_chunk(
|
||||
"Some text to summarize",
|
||||
input_data,
|
||||
credentials=llm.TEST_CREDENTIALS,
|
||||
)
|
||||
|
||||
error_message = str(exc_info.value)
|
||||
assert "Expected a string summary" in error_message
|
||||
assert "received dict" in error_message
|
||||
|
||||
Reference in New Issue
Block a user