Files
AutoGPT/autogpt_platform/backend/test/blocks/test_gmail.py
Zamil Majdy 423b22214a feat(blocks): Add Excel support to ReadSpreadsheetBlock and introduced FileReadBlock (#10393)
This PR adds Excel file support to CSV processing and enhances text file
reading capabilities.

### Changes 🏗️

**ReadSpreadsheetBlock (formerly ReadCsvBlock):**
- Renamed `ReadCsvBlock` to `ReadSpreadsheetBlock` for better clarity
- Added Excel file support (.xlsx, .xls) with automatic conversion to
CSV using pandas
- Enhanced parameter `file_in` to `file_input` for consistency
- Excel files are automatically detected by extension and converted to
CSV format
- Maintains all existing CSV processing functionality (delimiters,
headers, etc.)
- Graceful error handling when pandas library is not available

**FileReadBlock:**
- Enhanced text file reading with advanced chunking capabilities
- Added parameters: `skip_size`, `skip_rows`, `row_limit`, `size_limit`,
`delimiter`
- Supports both character-based and row-based processing
- Chunked output for large files based on size limits
- Proper file handling with UTF-8 and latin-1 encoding fallbacks
- Uses `store_media_file` for secure file processing (URLs, data URIs,
local paths)
- Fixed test input to use data URI instead of non-existent file

**General Improvements:**
- Consistent parameter naming across blocks (`file_input`)
- Enhanced error handling and validation
- Comprehensive test coverage
- All existing functionality preserved

### Checklist 📋

#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
- [x] Both ReadSpreadsheetBlock and FileReadBlock instantiate correctly
- [x] ReadSpreadsheetBlock processes CSV data with existing
functionality
  - [x] FileReadBlock reads text files with data URI input
  - [x] All block tests pass (457 passed, 83 skipped)
  - [x] No linting errors in modified files
  - [x] Excel support gracefully handles missing pandas dependency

#### For configuration changes:
- [ ] `.env.example` is updated or already compatible with my changes
- [ ] `docker-compose.yml` is updated or already compatible with my
changes
- [ ] I have included a list of my configuration changes in the PR
description (under **Changes**)

*Note: No configuration changes required for this PR.*
2025-07-16 12:00:40 +00:00

218 lines
7.6 KiB
Python

import base64
from unittest.mock import Mock, patch
from backend.blocks.google.gmail import GmailReadBlock
class TestGmailReadBlock:
"""Test cases for GmailReadBlock email body parsing functionality."""
def setup_method(self):
"""Set up test fixtures."""
self.gmail_block = GmailReadBlock()
self.mock_service = Mock()
def _encode_base64(self, text: str) -> str:
"""Helper to encode text as base64 URL-safe."""
return base64.urlsafe_b64encode(text.encode("utf-8")).decode("utf-8")
def test_single_part_text_plain(self):
"""Test parsing single-part text/plain email."""
body_text = "This is a plain text email body."
msg = {
"id": "test_msg_1",
"payload": {
"mimeType": "text/plain",
"body": {"data": self._encode_base64(body_text)},
},
}
result = self.gmail_block._get_email_body(msg, self.mock_service)
assert result == body_text
def test_multipart_alternative_plain_and_html(self):
"""Test parsing multipart/alternative with both plain and HTML parts."""
plain_text = "This is the plain text version."
html_text = "<html><body><p>This is the HTML version.</p></body></html>"
msg = {
"id": "test_msg_2",
"payload": {
"mimeType": "multipart/alternative",
"parts": [
{
"mimeType": "text/plain",
"body": {"data": self._encode_base64(plain_text)},
},
{
"mimeType": "text/html",
"body": {"data": self._encode_base64(html_text)},
},
],
},
}
result = self.gmail_block._get_email_body(msg, self.mock_service)
# Should prefer plain text over HTML
assert result == plain_text
def test_html_only_email(self):
"""Test parsing HTML-only email with conversion to plain text."""
html_text = (
"<html><body><h1>Hello World</h1><p>This is HTML content.</p></body></html>"
)
msg = {
"id": "test_msg_3",
"payload": {
"mimeType": "text/html",
"body": {"data": self._encode_base64(html_text)},
},
}
with patch("html2text.HTML2Text") as mock_html2text:
mock_converter = Mock()
mock_converter.handle.return_value = "Hello World\n\nThis is HTML content."
mock_html2text.return_value = mock_converter
result = self.gmail_block._get_email_body(msg, self.mock_service)
assert "Hello World" in result
assert "This is HTML content" in result
def test_html_fallback_when_html2text_unavailable(self):
"""Test fallback to raw HTML when html2text is not available."""
html_text = "<html><body><p>HTML content</p></body></html>"
msg = {
"id": "test_msg_4",
"payload": {
"mimeType": "text/html",
"body": {"data": self._encode_base64(html_text)},
},
}
with patch("html2text.HTML2Text", side_effect=ImportError):
result = self.gmail_block._get_email_body(msg, self.mock_service)
assert result == html_text
def test_nested_multipart_structure(self):
"""Test parsing deeply nested multipart structure."""
plain_text = "Nested plain text content."
msg = {
"id": "test_msg_5",
"payload": {
"mimeType": "multipart/mixed",
"parts": [
{
"mimeType": "multipart/alternative",
"parts": [
{
"mimeType": "text/plain",
"body": {"data": self._encode_base64(plain_text)},
},
],
},
],
},
}
result = self.gmail_block._get_email_body(msg, self.mock_service)
assert result == plain_text
def test_attachment_body_content(self):
"""Test parsing email where body is stored as attachment."""
attachment_data = self._encode_base64("Body content from attachment.")
msg = {
"id": "test_msg_6",
"payload": {
"mimeType": "text/plain",
"body": {"attachmentId": "attachment_123"},
},
}
# Mock the attachment download
self.mock_service.users().messages().attachments().get().execute.return_value = {
"data": attachment_data
}
result = self.gmail_block._get_email_body(msg, self.mock_service)
assert result == "Body content from attachment."
def test_no_readable_body(self):
"""Test email with no readable body content."""
msg = {
"id": "test_msg_7",
"payload": {
"mimeType": "application/octet-stream",
"body": {},
},
}
result = self.gmail_block._get_email_body(msg, self.mock_service)
assert result == "This email does not contain a readable body."
def test_base64_padding_handling(self):
"""Test proper handling of base64 data with missing padding."""
# Create base64 data with missing padding
text = "Test content"
encoded = base64.urlsafe_b64encode(text.encode("utf-8")).decode("utf-8")
# Remove padding
encoded_no_padding = encoded.rstrip("=")
result = self.gmail_block._decode_base64(encoded_no_padding)
assert result == text
def test_recursion_depth_limit(self):
"""Test that recursion depth is properly limited."""
# Create a deeply nested structure that would exceed the limit
def create_nested_part(depth):
if depth > 15: # Exceed the limit of 10
return {
"mimeType": "text/plain",
"body": {"data": self._encode_base64("Deep content")},
}
return {
"mimeType": "multipart/mixed",
"parts": [create_nested_part(depth + 1)],
}
msg = {
"id": "test_msg_8",
"payload": create_nested_part(0),
}
result = self.gmail_block._get_email_body(msg, self.mock_service)
# Should return fallback message due to depth limit
assert result == "This email does not contain a readable body."
def test_malformed_base64_handling(self):
"""Test handling of malformed base64 data."""
result = self.gmail_block._decode_base64("invalid_base64_data!!!")
assert result is None
def test_empty_data_handling(self):
"""Test handling of empty or None data."""
assert self.gmail_block._decode_base64("") is None
assert self.gmail_block._decode_base64(None) is None
def test_attachment_download_failure(self):
"""Test handling of attachment download failure."""
msg = {
"id": "test_msg_9",
"payload": {
"mimeType": "text/plain",
"body": {"attachmentId": "invalid_attachment"},
},
}
# Mock attachment download failure
self.mock_service.users().messages().attachments().get().execute.side_effect = (
Exception("Download failed")
)
result = self.gmail_block._get_email_body(msg, self.mock_service)
assert result == "This email does not contain a readable body."