mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-02-05 20:35:10 -05:00
Need: The Gmail integration had several parsing issues that were causing data loss and workflow incompatibilities: 1. Email recipient parsing only captured the first recipient, losing CC/BCC and multiple TO recipients 2. Email body parsing was inconsistent between blocks, sometimes showing "This email does not contain a readable body" for valid emails 3. Type mismatches between blocks caused serialization issues when connecting them in workflows (lists being converted to string representations like "[\"email@example.com\"]") # Changes 🏗️ 1. Enhanced Email Model: - Added cc and bcc fields to capture all recipients - Changed to field from string to list for consistency - Now captures all recipients instead of just the first one 2. Improved Email Parsing: - Updated GmailReadBlock and GmailGetThreadBlock to parse all recipients using getaddresses() - Unified email body parsing logic across blocks with robust multipart handling - Added support for HTML to plain text conversion - Fixed handling of emails with attachments as body content 3. Fixed Block Compatibility: - Updated GmailSendBlock and GmailCreateDraftBlock to accept lists for recipient fields - Added validation to ensure at least one recipient is provided - All blocks now consistently use lists for recipient fields, preventing serialization issues 4. Updated Test Data: - Modified all test inputs/outputs to use the new list format for recipients - Ensures tests reflect the new data structure # Checklist 📋 For code changes: - I have clearly listed my changes in the PR description - I have made a test plan - I have tested my changes according to the test plan: - Run existing Gmail block unit tests with poetry run test - Create a workflow that reads emails with multiple recipients and verify all TO, CC, BCC recipients are captured - Test email body parsing with plain text, HTML, and multipart emails - Connect GmailReadBlock → GmailSendBlock in a workflow and verify recipient data flows correctly - Connect GmailReplyBlock → GmailSendBlock and verify no serialization errors occur - Test sending emails with multiple recipients via GmailSendBlock - Test creating drafts with multiple recipients via GmailCreateDraftBlock - Verify backwards compatibility by testing with single recipient strings (should now require lists) - Create from scratch and execute an agent with at least 3 blocks - Import an agent from file upload, and confirm it executes correctly - Upload agent to marketplace - Import an agent from marketplace and confirm it executes correctly - Edit an agent from monitor, and confirm it executes correctly # Breaking Change Note: The to field in GmailSendBlock and GmailCreateDraftBlock now requires a list instead of accepting both string and list. Existing workflows using strings will need to be updated to use lists (e.g., ["email@example.com"] instead of "email@example.com"). --------- Co-authored-by: Zamil Majdy <zamil.majdy@agpt.co>
232 lines
8.0 KiB
Python
232 lines
8.0 KiB
Python
import base64
|
|
from unittest.mock import Mock, patch
|
|
|
|
import pytest
|
|
|
|
from backend.blocks.google.gmail import GmailReadBlock
|
|
|
|
|
|
class TestGmailReadBlock:
|
|
"""Test cases for GmailReadBlock email body parsing functionality."""
|
|
|
|
def setup_method(self):
|
|
"""Set up test fixtures."""
|
|
self.gmail_block = GmailReadBlock()
|
|
self.mock_service = Mock()
|
|
|
|
def _encode_base64(self, text: str) -> str:
|
|
"""Helper to encode text as base64 URL-safe."""
|
|
return base64.urlsafe_b64encode(text.encode("utf-8")).decode("utf-8")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_single_part_text_plain(self):
|
|
"""Test parsing single-part text/plain email."""
|
|
body_text = "This is a plain text email body."
|
|
msg = {
|
|
"id": "test_msg_1",
|
|
"payload": {
|
|
"mimeType": "text/plain",
|
|
"body": {"data": self._encode_base64(body_text)},
|
|
},
|
|
}
|
|
|
|
result = await self.gmail_block._get_email_body(msg, self.mock_service)
|
|
assert result == body_text
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_multipart_alternative_plain_and_html(self):
|
|
"""Test parsing multipart/alternative with both plain and HTML parts."""
|
|
plain_text = "This is the plain text version."
|
|
html_text = "<html><body><p>This is the HTML version.</p></body></html>"
|
|
|
|
msg = {
|
|
"id": "test_msg_2",
|
|
"payload": {
|
|
"mimeType": "multipart/alternative",
|
|
"parts": [
|
|
{
|
|
"mimeType": "text/plain",
|
|
"body": {"data": self._encode_base64(plain_text)},
|
|
},
|
|
{
|
|
"mimeType": "text/html",
|
|
"body": {"data": self._encode_base64(html_text)},
|
|
},
|
|
],
|
|
},
|
|
}
|
|
|
|
result = await self.gmail_block._get_email_body(msg, self.mock_service)
|
|
# Should prefer plain text over HTML
|
|
assert result == plain_text
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_html_only_email(self):
|
|
"""Test parsing HTML-only email with conversion to plain text."""
|
|
html_text = (
|
|
"<html><body><h1>Hello World</h1><p>This is HTML content.</p></body></html>"
|
|
)
|
|
|
|
msg = {
|
|
"id": "test_msg_3",
|
|
"payload": {
|
|
"mimeType": "text/html",
|
|
"body": {"data": self._encode_base64(html_text)},
|
|
},
|
|
}
|
|
|
|
with patch("html2text.HTML2Text") as mock_html2text:
|
|
mock_converter = Mock()
|
|
mock_converter.handle.return_value = "Hello World\n\nThis is HTML content."
|
|
mock_html2text.return_value = mock_converter
|
|
|
|
result = await self.gmail_block._get_email_body(msg, self.mock_service)
|
|
assert "Hello World" in result
|
|
assert "This is HTML content" in result
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_html_fallback_when_html2text_unavailable(self):
|
|
"""Test fallback to raw HTML when html2text is not available."""
|
|
html_text = "<html><body><p>HTML content</p></body></html>"
|
|
|
|
msg = {
|
|
"id": "test_msg_4",
|
|
"payload": {
|
|
"mimeType": "text/html",
|
|
"body": {"data": self._encode_base64(html_text)},
|
|
},
|
|
}
|
|
|
|
with patch("html2text.HTML2Text", side_effect=ImportError):
|
|
result = await self.gmail_block._get_email_body(msg, self.mock_service)
|
|
assert result == html_text
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_nested_multipart_structure(self):
|
|
"""Test parsing deeply nested multipart structure."""
|
|
plain_text = "Nested plain text content."
|
|
|
|
msg = {
|
|
"id": "test_msg_5",
|
|
"payload": {
|
|
"mimeType": "multipart/mixed",
|
|
"parts": [
|
|
{
|
|
"mimeType": "multipart/alternative",
|
|
"parts": [
|
|
{
|
|
"mimeType": "text/plain",
|
|
"body": {"data": self._encode_base64(plain_text)},
|
|
},
|
|
],
|
|
},
|
|
],
|
|
},
|
|
}
|
|
|
|
result = await self.gmail_block._get_email_body(msg, self.mock_service)
|
|
assert result == plain_text
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_attachment_body_content(self):
|
|
"""Test parsing email where body is stored as attachment."""
|
|
attachment_data = self._encode_base64("Body content from attachment.")
|
|
|
|
msg = {
|
|
"id": "test_msg_6",
|
|
"payload": {
|
|
"mimeType": "text/plain",
|
|
"body": {"attachmentId": "attachment_123"},
|
|
},
|
|
}
|
|
|
|
# Mock the attachment download
|
|
self.mock_service.users().messages().attachments().get().execute.return_value = {
|
|
"data": attachment_data
|
|
}
|
|
|
|
result = await self.gmail_block._get_email_body(msg, self.mock_service)
|
|
assert result == "Body content from attachment."
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_no_readable_body(self):
|
|
"""Test email with no readable body content."""
|
|
msg = {
|
|
"id": "test_msg_7",
|
|
"payload": {
|
|
"mimeType": "application/octet-stream",
|
|
"body": {},
|
|
},
|
|
}
|
|
|
|
result = await self.gmail_block._get_email_body(msg, self.mock_service)
|
|
assert result == "This email does not contain a readable body."
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_base64_padding_handling(self):
|
|
"""Test proper handling of base64 data with missing padding."""
|
|
# Create base64 data with missing padding
|
|
text = "Test content"
|
|
encoded = base64.urlsafe_b64encode(text.encode("utf-8")).decode("utf-8")
|
|
# Remove padding
|
|
encoded_no_padding = encoded.rstrip("=")
|
|
|
|
result = self.gmail_block._decode_base64(encoded_no_padding)
|
|
assert result == text
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_recursion_depth_limit(self):
|
|
"""Test that recursion depth is properly limited."""
|
|
|
|
# Create a deeply nested structure that would exceed the limit
|
|
def create_nested_part(depth):
|
|
if depth > 15: # Exceed the limit of 10
|
|
return {
|
|
"mimeType": "text/plain",
|
|
"body": {"data": self._encode_base64("Deep content")},
|
|
}
|
|
return {
|
|
"mimeType": "multipart/mixed",
|
|
"parts": [create_nested_part(depth + 1)],
|
|
}
|
|
|
|
msg = {
|
|
"id": "test_msg_8",
|
|
"payload": create_nested_part(0),
|
|
}
|
|
|
|
result = await self.gmail_block._get_email_body(msg, self.mock_service)
|
|
# Should return fallback message due to depth limit
|
|
assert result == "This email does not contain a readable body."
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_malformed_base64_handling(self):
|
|
"""Test handling of malformed base64 data."""
|
|
result = self.gmail_block._decode_base64("invalid_base64_data!!!")
|
|
assert result is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_empty_data_handling(self):
|
|
"""Test handling of empty or None data."""
|
|
assert self.gmail_block._decode_base64("") is None
|
|
assert self.gmail_block._decode_base64(None) is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_attachment_download_failure(self):
|
|
"""Test handling of attachment download failure."""
|
|
msg = {
|
|
"id": "test_msg_9",
|
|
"payload": {
|
|
"mimeType": "text/plain",
|
|
"body": {"attachmentId": "invalid_attachment"},
|
|
},
|
|
}
|
|
|
|
# Mock attachment download failure
|
|
self.mock_service.users().messages().attachments().get().execute.side_effect = (
|
|
Exception("Download failed")
|
|
)
|
|
|
|
result = await self.gmail_block._get_email_body(msg, self.mock_service)
|
|
assert result == "This email does not contain a readable body."
|