fix(blocks): validate email recipients in Gmail blocks before API call (#12546)

### Why / What / How

**Why:** When a user or LLM supplies a malformed recipient string (e.g.
a bare username, a JSON blob, or an empty value) to `GmailSendBlock`,
`GmailCreateDraftBlock`, or any reply block, the Gmail API returns an
opaque `HttpError 400: "Invalid To header"`. This surfaces as a
`BlockUnknownError` with no actionable guidance, making it impossible
for the LLM to self-correct. (Fixes #11954)

**What:** Adds a lightweight `validate_email_recipients()` function that
checks every recipient against a simplified RFC 5322 pattern
(`local@domain.tld`) and raises a clear `ValueError` listing all invalid
entries before any API call is made.

**How:** The validation is called in two shared code paths —
`create_mime_message()` (used by send and draft blocks) and
`_build_reply_message()` (used by reply blocks) — so all Gmail blocks
that compose outgoing email benefit from it with zero per-block changes.
The regex is intentionally permissive (any `x@y.z` passes) to avoid
false positives on unusual but valid addresses.

### Changes 🏗️

- Added `validate_email_recipients()` helper in `gmail.py` with a
compiled regex
- Hooked validation into `create_mime_message()` for `to`, `cc`, and
`bcc` fields
- Hooked validation into `_build_reply_message()` for reply/draft-reply
blocks
- Added `TestValidateEmailRecipients` test class covering valid,
invalid, mixed, empty, JSON-string, and field-name scenarios

### Checklist 📋

#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
- [x] Verified `validate_email_recipients` correctly accepts valid
emails (`user@example.com`, `a@b.com`, `test@sub.domain.co`)
- [x] Verified it rejects malformed entries (bare names, missing domain
dot, empty strings, JSON strings)
- [x] Verified error messages include the field name and all invalid
entries
  - [x] Verified empty recipient lists pass without error
  - [x] Confirmed `gmail.py` and test file parse correctly (AST check)

---------

Co-authored-by: Zamil Majdy <zamil.majdy@agpt.co>
This commit is contained in:
Krishna Chaitanya
2026-03-31 05:37:33 -07:00
committed by GitHub
parent 914efc53e5
commit 2f42ff9b47
2 changed files with 316 additions and 12 deletions

View File

@@ -1,5 +1,6 @@
import asyncio
import base64
import re
from abc import ABC
from email import encoders
from email.mime.base import MIMEBase
@@ -8,7 +9,7 @@ from email.mime.text import MIMEText
from email.policy import SMTP
from email.utils import getaddresses, parseaddr
from pathlib import Path
from typing import List, Literal, Optional
from typing import List, Literal, Optional, Protocol, runtime_checkable
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build
@@ -42,8 +43,52 @@ NO_WRAP_POLICY = SMTP.clone(max_line_length=0)
def serialize_email_recipients(recipients: list[str]) -> str:
"""Serialize recipients list to comma-separated string."""
return ", ".join(recipients)
"""Serialize recipients list to comma-separated string.
Strips leading/trailing whitespace from each address to keep MIME
headers clean (mirrors the strip done in ``validate_email_recipients``).
"""
return ", ".join(addr.strip() for addr in recipients)
# RFC 5322 simplified pattern: local@domain where domain has at least one dot
_EMAIL_RE = re.compile(r"^[^@\s]+@[^@\s]+\.[^@\s]+$")
def validate_email_recipients(recipients: list[str], field_name: str = "to") -> None:
"""Validate that all recipients are plausible email addresses.
Raises ``ValueError`` with a user-friendly message listing every
invalid entry so the caller (or LLM) can correct them in one pass.
"""
invalid = [addr for addr in recipients if not _EMAIL_RE.match(addr.strip())]
if invalid:
formatted = ", ".join(f"'{a}'" for a in invalid)
raise ValueError(
f"Invalid email address(es) in '{field_name}': {formatted}. "
f"Each entry must be a valid email address (e.g. user@example.com)."
)
@runtime_checkable
class HasRecipients(Protocol):
to: list[str]
cc: list[str]
bcc: list[str]
def validate_all_recipients(input_data: HasRecipients) -> None:
"""Validate to/cc/bcc recipient fields on an input namespace.
Calls ``validate_email_recipients`` for ``to`` (required) and
``cc``/``bcc`` (when non-empty), raising ``ValueError`` on the
first field that contains an invalid address.
"""
validate_email_recipients(input_data.to, "to")
if input_data.cc:
validate_email_recipients(input_data.cc, "cc")
if input_data.bcc:
validate_email_recipients(input_data.bcc, "bcc")
def _make_mime_text(
@@ -100,14 +145,16 @@ async def create_mime_message(
) -> str:
"""Create a MIME message with attachments and return base64-encoded raw message."""
validate_all_recipients(input_data)
message = MIMEMultipart()
message["to"] = serialize_email_recipients(input_data.to)
message["subject"] = input_data.subject
if input_data.cc:
message["cc"] = ", ".join(input_data.cc)
message["cc"] = serialize_email_recipients(input_data.cc)
if input_data.bcc:
message["bcc"] = ", ".join(input_data.bcc)
message["bcc"] = serialize_email_recipients(input_data.bcc)
# Use the new helper function with content_type if available
content_type = getattr(input_data, "content_type", None)
@@ -1167,13 +1214,15 @@ async def _build_reply_message(
references.append(headers["message-id"])
# Create MIME message
validate_all_recipients(input_data)
msg = MIMEMultipart()
if input_data.to:
msg["To"] = ", ".join(input_data.to)
msg["To"] = serialize_email_recipients(input_data.to)
if input_data.cc:
msg["Cc"] = ", ".join(input_data.cc)
msg["Cc"] = serialize_email_recipients(input_data.cc)
if input_data.bcc:
msg["Bcc"] = ", ".join(input_data.bcc)
msg["Bcc"] = serialize_email_recipients(input_data.bcc)
msg["Subject"] = subject
if headers.get("message-id"):
msg["In-Reply-To"] = headers["message-id"]
@@ -1685,13 +1734,16 @@ To: {original_to}
else:
body = f"{forward_header}\n\n{original_body}"
# Validate all recipient lists before building the MIME message
validate_all_recipients(input_data)
# Create MIME message
msg = MIMEMultipart()
msg["To"] = ", ".join(input_data.to)
msg["To"] = serialize_email_recipients(input_data.to)
if input_data.cc:
msg["Cc"] = ", ".join(input_data.cc)
msg["Cc"] = serialize_email_recipients(input_data.cc)
if input_data.bcc:
msg["Bcc"] = ", ".join(input_data.bcc)
msg["Bcc"] = serialize_email_recipients(input_data.bcc)
msg["Subject"] = subject
# Add body with proper content type

View File

@@ -1,9 +1,20 @@
import base64
from types import SimpleNamespace
from typing import cast
from unittest.mock import Mock, patch
import pytest
from backend.blocks.google.gmail import GmailReadBlock
from backend.blocks.google.gmail import (
GmailForwardBlock,
GmailReadBlock,
HasRecipients,
_build_reply_message,
create_mime_message,
validate_all_recipients,
validate_email_recipients,
)
from backend.data.execution import ExecutionContext
class TestGmailReadBlock:
@@ -250,3 +261,244 @@ class TestGmailReadBlock:
result = await self.gmail_block._get_email_body(msg, self.mock_service)
assert result == "This email does not contain a readable body."
class TestValidateEmailRecipients:
"""Test cases for validate_email_recipients."""
def test_valid_single_email(self):
validate_email_recipients(["user@example.com"])
def test_valid_multiple_emails(self):
validate_email_recipients(["a@b.com", "x@y.org", "test@sub.domain.co"])
def test_invalid_missing_at(self):
with pytest.raises(ValueError, match="Invalid email address"):
validate_email_recipients(["not-an-email"])
def test_invalid_missing_domain_dot(self):
with pytest.raises(ValueError, match="Invalid email address"):
validate_email_recipients(["user@localhost"])
def test_invalid_empty_string(self):
with pytest.raises(ValueError, match="Invalid email address"):
validate_email_recipients([""])
def test_invalid_json_object_string(self):
with pytest.raises(ValueError, match="Invalid email address"):
validate_email_recipients(['{"email": "user@example.com"}'])
def test_mixed_valid_and_invalid(self):
with pytest.raises(ValueError, match="'bad-addr'"):
validate_email_recipients(["good@example.com", "bad-addr"])
def test_field_name_in_error(self):
with pytest.raises(ValueError, match="'cc'"):
validate_email_recipients(["nope"], field_name="cc")
def test_whitespace_trimmed(self):
validate_email_recipients([" user@example.com "])
def test_empty_list_passes(self):
validate_email_recipients([])
class TestValidateAllRecipients:
"""Test cases for validate_all_recipients."""
def test_valid_all_fields(self):
data = cast(
HasRecipients,
SimpleNamespace(to=["a@b.com"], cc=["c@d.com"], bcc=["e@f.com"]),
)
validate_all_recipients(data)
def test_invalid_to_raises(self):
data = cast(HasRecipients, SimpleNamespace(to=["bad"], cc=[], bcc=[]))
with pytest.raises(ValueError, match="'to'"):
validate_all_recipients(data)
def test_invalid_cc_raises(self):
data = cast(HasRecipients, SimpleNamespace(to=["a@b.com"], cc=["bad"], bcc=[]))
with pytest.raises(ValueError, match="'cc'"):
validate_all_recipients(data)
def test_invalid_bcc_raises(self):
data = cast(
HasRecipients,
SimpleNamespace(to=["a@b.com"], cc=["c@d.com"], bcc=["bad"]),
)
with pytest.raises(ValueError, match="'bcc'"):
validate_all_recipients(data)
def test_empty_cc_bcc_skipped(self):
data = cast(HasRecipients, SimpleNamespace(to=["a@b.com"], cc=[], bcc=[]))
validate_all_recipients(data)
class TestCreateMimeMessageValidation:
"""Integration tests verifying validation hooks in create_mime_message()."""
@pytest.mark.asyncio
async def test_invalid_to_raises_before_mime_construction(self):
"""Invalid 'to' recipients should raise ValueError before any MIME work."""
input_data = SimpleNamespace(
to=["not-an-email"],
cc=[],
bcc=[],
subject="Test",
body="Hello",
attachments=[],
)
exec_ctx = cast(ExecutionContext, SimpleNamespace(graph_exec_id="test-exec-id"))
with pytest.raises(ValueError, match="Invalid email address"):
await create_mime_message(input_data, exec_ctx)
@pytest.mark.asyncio
async def test_invalid_cc_raises_before_mime_construction(self):
"""Invalid 'cc' recipients should raise ValueError."""
input_data = SimpleNamespace(
to=["valid@example.com"],
cc=["bad-addr"],
bcc=[],
subject="Test",
body="Hello",
attachments=[],
)
exec_ctx = cast(ExecutionContext, SimpleNamespace(graph_exec_id="test-exec-id"))
with pytest.raises(ValueError, match="'cc'"):
await create_mime_message(input_data, exec_ctx)
@pytest.mark.asyncio
async def test_valid_recipients_passes_validation(self):
"""Valid recipients should not raise during validation."""
input_data = SimpleNamespace(
to=["user@example.com"],
cc=["other@example.com"],
bcc=[],
subject="Test",
body="Hello",
attachments=[],
)
exec_ctx = cast(ExecutionContext, SimpleNamespace(graph_exec_id="test-exec-id"))
# Should succeed without raising
result = await create_mime_message(input_data, exec_ctx)
assert isinstance(result, str)
class TestBuildReplyMessageValidation:
"""Integration tests verifying validation hooks in _build_reply_message()."""
@pytest.mark.asyncio
async def test_invalid_to_raises_before_reply_construction(self):
"""Invalid 'to' in reply should raise ValueError before MIME work."""
mock_service = Mock()
mock_parent = {
"threadId": "thread-1",
"payload": {
"headers": [
{"name": "Subject", "value": "Original"},
{"name": "Message-ID", "value": "<msg@example.com>"},
{"name": "From", "value": "sender@example.com"},
]
},
}
mock_service.users().messages().get().execute.return_value = mock_parent
input_data = SimpleNamespace(
parentMessageId="msg-1",
to=["not-valid"],
cc=[],
bcc=[],
subject="",
body="Reply body",
replyAll=False,
attachments=[],
)
exec_ctx = cast(ExecutionContext, SimpleNamespace(graph_exec_id="test-exec-id"))
with pytest.raises(ValueError, match="Invalid email address"):
await _build_reply_message(mock_service, input_data, exec_ctx)
class TestForwardMessageValidation:
"""Test that _forward_message() raises ValueError for invalid recipients."""
@staticmethod
def _make_input(
to: list[str] | None = None,
cc: list[str] | None = None,
bcc: list[str] | None = None,
) -> "GmailForwardBlock.Input":
mock = Mock(spec=GmailForwardBlock.Input)
mock.messageId = "m1"
mock.to = to or []
mock.cc = cc or []
mock.bcc = bcc or []
mock.subject = ""
mock.forwardMessage = "FYI"
mock.includeAttachments = False
mock.content_type = None
mock.additionalAttachments = []
mock.credentials = None
return mock
@staticmethod
def _exec_ctx():
return ExecutionContext(user_id="u1", graph_exec_id="g1")
@staticmethod
def _mock_service():
"""Build a mock Gmail service that returns a parent message."""
parent_message = {
"id": "m1",
"payload": {
"headers": [
{"name": "Subject", "value": "Original subject"},
{"name": "From", "value": "sender@example.com"},
{"name": "To", "value": "me@example.com"},
{"name": "Date", "value": "Mon, 31 Mar 2026 00:00:00 +0000"},
],
"mimeType": "text/plain",
"body": {
"data": base64.urlsafe_b64encode(b"Hello world").decode(),
},
"parts": [],
},
}
svc = Mock()
svc.users().messages().get().execute.return_value = parent_message
return svc
@pytest.mark.asyncio
async def test_invalid_to_raises(self):
block = GmailForwardBlock()
with pytest.raises(ValueError, match="Invalid email address.*'to'"):
await block._forward_message(
self._mock_service(),
self._make_input(to=["bad-addr"]),
self._exec_ctx(),
)
@pytest.mark.asyncio
async def test_invalid_cc_raises(self):
block = GmailForwardBlock()
with pytest.raises(ValueError, match="Invalid email address.*'cc'"):
await block._forward_message(
self._mock_service(),
self._make_input(to=["valid@example.com"], cc=["not-valid"]),
self._exec_ctx(),
)
@pytest.mark.asyncio
async def test_invalid_bcc_raises(self):
block = GmailForwardBlock()
with pytest.raises(ValueError, match="Invalid email address.*'bcc'"):
await block._forward_message(
self._mock_service(),
self._make_input(to=["valid@example.com"], bcc=["nope"]),
self._exec_ctx(),
)