fix(blocks/llm): Validate LLM summary responses are strings (#11275)

### Changes 🏗️ - Added validation to ensure that the `summary` and `final_summary` returned by the LLM are strings. - Raises a `ValueError` if the LLM returns a list or other non-string type, providing a descriptive error message to aid debugging. Fixes [AUTOGPT-SERVER-6M4](https://sentry.io/organizations/significant-gravitas/issues/6978480131/). The issue was that: LLM returned list of strings instead of single string summary, causing `_combine_summaries` to fail on `join`. This fix was generated by Seer in Sentry, triggered by Craig Swift. 👁️ Run ID: 2230933 Not quite right? [Click here to continue debugging with Seer.](https://sentry.io/organizations/significant-gravitas/issues/6978480131/?seerDrawer=true) ### Checklist 📋 #### For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan:  - [x] Added a unit test to verify that a ValueError is raised when the LLM returns a list instead of a string for summary or final_summary. --------- Co-authored-by: seer-by-sentry[bot] <157164994+seer-by-sentry[bot]@users.noreply.github.com> Co-authored-by: Swifty <craigswift13@gmail.com>
2026-01-09 15:17:59 -05:00 · 2025-10-30 09:52:50 +00:00
parent 594b1adcf7
commit 4140331731
2 changed files with 206 additions and 2 deletions
--- a/autogpt_platform/backend/backend/blocks/llm.py
+++ b/autogpt_platform/backend/backend/blocks/llm.py
@@ -1451,7 +1451,20 @@ class AITextSummarizerBlock(AIBlockBase):
            credentials=credentials,
        )

-        return llm_response["summary"]
+        summary = llm_response["summary"]
+
+        # Validate that the LLM returned a string and not a list or other type
+        if not isinstance(summary, str):
+            from backend.util.truncate import truncate
+
+            truncated_summary = truncate(summary, 500)
+            raise ValueError(
+                f"LLM generation failed: Expected a string summary, but received {type(summary).__name__}. "
+                f"The language model incorrectly formatted its response. "
+                f"Received value: {json.dumps(truncated_summary)}"
+            )
+
+        return summary

    async def _combine_summaries(
        self, summaries: list[str], input_data: Input, credentials: APIKeyCredentials
@@ -1473,7 +1486,20 @@ class AITextSummarizerBlock(AIBlockBase):
                credentials=credentials,
            )

-            return llm_response["final_summary"]
+            final_summary = llm_response["final_summary"]
+
+            # Validate that the LLM returned a string and not a list or other type
+            if not isinstance(final_summary, str):
+                from backend.util.truncate import truncate
+
+                truncated_final_summary = truncate(final_summary, 500)
+                raise ValueError(
+                    f"LLM generation failed: Expected a string final summary, but received {type(final_summary).__name__}. "
+                    f"The language model incorrectly formatted its response. "
+                    f"Received value: {json.dumps(truncated_final_summary)}"
+                )
+
+            return final_summary
        else:
            # If combined summaries are still too long, recursively summarize
            block = AITextSummarizerBlock()
--- a/autogpt_platform/backend/backend/blocks/test/test_llm.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_llm.py
@@ -500,3 +500,181 @@ class TestLLMStatsTracking:
        # Check output
        assert "response" in outputs
        assert outputs["response"] == {"result": "test"}
+
+
+class TestAITextSummarizerValidation:
+    """Test that AITextSummarizerBlock validates LLM responses are strings."""
+
+    @pytest.mark.asyncio
+    async def test_summarize_chunk_rejects_list_response(self):
+        """Test that _summarize_chunk raises ValueError when LLM returns a list instead of string."""
+        import backend.blocks.llm as llm
+
+        block = llm.AITextSummarizerBlock()
+
+        # Mock llm_call to return a list instead of a string
+        async def mock_llm_call(input_data, credentials):
+            # Simulate LLM returning a list when it should return a string
+            return {"summary": ["bullet point 1", "bullet point 2", "bullet point 3"]}
+
+        block.llm_call = mock_llm_call  # type: ignore
+
+        # Create input data
+        input_data = llm.AITextSummarizerBlock.Input(
+            text="Some text to summarize",
+            model=llm.LlmModel.GPT4O,
+            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
+            style=llm.SummaryStyle.BULLET_POINTS,
+        )
+
+        # Should raise ValueError with descriptive message
+        with pytest.raises(ValueError) as exc_info:
+            await block._summarize_chunk(
+                "Some text to summarize",
+                input_data,
+                credentials=llm.TEST_CREDENTIALS,
+            )
+
+        error_message = str(exc_info.value)
+        assert "Expected a string summary" in error_message
+        assert "received list" in error_message
+        assert "incorrectly formatted" in error_message
+
+    @pytest.mark.asyncio
+    async def test_combine_summaries_rejects_list_response(self):
+        """Test that _combine_summaries raises ValueError when LLM returns a list instead of string."""
+        import backend.blocks.llm as llm
+
+        block = llm.AITextSummarizerBlock()
+
+        # Mock llm_call to return a list instead of a string
+        async def mock_llm_call(input_data, credentials):
+            # Check if this is the final summary call
+            if "final_summary" in input_data.expected_format:
+                # Simulate LLM returning a list when it should return a string
+                return {
+                    "final_summary": [
+                        "bullet point 1",
+                        "bullet point 2",
+                        "bullet point 3",
+                    ]
+                }
+            else:
+                return {"summary": "Valid summary"}
+
+        block.llm_call = mock_llm_call  # type: ignore
+
+        # Create input data
+        input_data = llm.AITextSummarizerBlock.Input(
+            text="Some text to summarize",
+            model=llm.LlmModel.GPT4O,
+            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
+            style=llm.SummaryStyle.BULLET_POINTS,
+            max_tokens=1000,
+        )
+
+        # Should raise ValueError with descriptive message
+        with pytest.raises(ValueError) as exc_info:
+            await block._combine_summaries(
+                ["summary 1", "summary 2"],
+                input_data,
+                credentials=llm.TEST_CREDENTIALS,
+            )
+
+        error_message = str(exc_info.value)
+        assert "Expected a string final summary" in error_message
+        assert "received list" in error_message
+        assert "incorrectly formatted" in error_message
+
+    @pytest.mark.asyncio
+    async def test_summarize_chunk_accepts_valid_string_response(self):
+        """Test that _summarize_chunk accepts valid string responses."""
+        import backend.blocks.llm as llm
+
+        block = llm.AITextSummarizerBlock()
+
+        # Mock llm_call to return a valid string
+        async def mock_llm_call(input_data, credentials):
+            return {"summary": "This is a valid string summary"}
+
+        block.llm_call = mock_llm_call  # type: ignore
+
+        # Create input data
+        input_data = llm.AITextSummarizerBlock.Input(
+            text="Some text to summarize",
+            model=llm.LlmModel.GPT4O,
+            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
+        )
+
+        # Should not raise any error
+        result = await block._summarize_chunk(
+            "Some text to summarize",
+            input_data,
+            credentials=llm.TEST_CREDENTIALS,
+        )
+
+        assert result == "This is a valid string summary"
+        assert isinstance(result, str)
+
+    @pytest.mark.asyncio
+    async def test_combine_summaries_accepts_valid_string_response(self):
+        """Test that _combine_summaries accepts valid string responses."""
+        import backend.blocks.llm as llm
+
+        block = llm.AITextSummarizerBlock()
+
+        # Mock llm_call to return a valid string
+        async def mock_llm_call(input_data, credentials):
+            return {"final_summary": "This is a valid final summary string"}
+
+        block.llm_call = mock_llm_call  # type: ignore
+
+        # Create input data
+        input_data = llm.AITextSummarizerBlock.Input(
+            text="Some text to summarize",
+            model=llm.LlmModel.GPT4O,
+            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
+            max_tokens=1000,
+        )
+
+        # Should not raise any error
+        result = await block._combine_summaries(
+            ["summary 1", "summary 2"],
+            input_data,
+            credentials=llm.TEST_CREDENTIALS,
+        )
+
+        assert result == "This is a valid final summary string"
+        assert isinstance(result, str)
+
+    @pytest.mark.asyncio
+    async def test_summarize_chunk_rejects_dict_response(self):
+        """Test that _summarize_chunk raises ValueError when LLM returns a dict instead of string."""
+        import backend.blocks.llm as llm
+
+        block = llm.AITextSummarizerBlock()
+
+        # Mock llm_call to return a dict instead of a string
+        async def mock_llm_call(input_data, credentials):
+            return {"summary": {"nested": "object", "with": "data"}}
+
+        block.llm_call = mock_llm_call  # type: ignore
+
+        # Create input data
+        input_data = llm.AITextSummarizerBlock.Input(
+            text="Some text to summarize",
+            model=llm.LlmModel.GPT4O,
+            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
+        )
+
+        # Should raise ValueError
+        with pytest.raises(ValueError) as exc_info:
+            await block._summarize_chunk(
+                "Some text to summarize",
+                input_data,
+                credentials=llm.TEST_CREDENTIALS,
+            )
+
+        error_message = str(exc_info.value)
+        assert "Expected a string summary" in error_message
+        assert "received dict" in error_message