fix(gmail): fallback to raw HTML when html2text conversion fails (#12369)

## Summary
- keep Gmail body extraction resilient when `html2text` converter raises
- fallback to raw HTML instead of failing extraction
- add regression test for converter failure path

Closes #12368

## Testing
- added unit test in
`autogpt_platform/backend/test/blocks/test_gmail.py`

---------

Co-authored-by: Zamil Majdy <zamil.majdy@agpt.co>
This commit is contained in:
Shunyu Wu
2026-03-11 19:46:57 +08:00
committed by GitHub
parent 89a5b3178a
commit c2e79fa5e1
2 changed files with 23 additions and 2 deletions

View File

@@ -241,8 +241,8 @@ class GmailBase(Block, ABC):
h.ignore_links = False
h.ignore_images = True
return h.handle(html_content)
except ImportError:
# Fallback: return raw HTML if html2text is not available
except Exception:
# Keep extraction resilient if html2text is unavailable or fails.
return html_content
# Handle content stored as attachment

View File

@@ -84,6 +84,27 @@ class TestGmailReadBlock:
assert "Hello World" in result
assert "This is HTML content" in result
@pytest.mark.asyncio
async def test_html_fallback_when_html2text_conversion_fails(self):
"""Fallback to raw HTML when html2text converter raises unexpectedly."""
html_text = "<html><body><p>Broken <b>HTML</p></body></html>"
msg = {
"id": "test_msg_html_error",
"payload": {
"mimeType": "text/html",
"body": {"data": self._encode_base64(html_text)},
},
}
with patch("html2text.HTML2Text") as mock_html2text:
mock_converter = Mock()
mock_converter.handle.side_effect = ValueError("conversion failed")
mock_html2text.return_value = mock_converter
result = await self.gmail_block._get_email_body(msg, self.mock_service)
assert result == html_text
@pytest.mark.asyncio
async def test_html_fallback_when_html2text_unavailable(self):
"""Test fallback to raw HTML when html2text is not available."""