From c32c7fb95999999fa3f3e108eeb061bf2ca6b6d9 Mon Sep 17 00:00:00 2001 From: Otto Date: Thu, 5 Feb 2026 19:11:56 +0000 Subject: [PATCH] fix: Handle RFC 2045 line-wrapped base64 in binary detection - Strip whitespace before base64 decoding (encoders often add line breaks) - Update regex comment to clarify whitespace handling - Add test case for line-wrapped base64 Addresses CodeRabbit review feedback. --- .../features/chat/tools/binary_output_processor.py | 10 +++++++--- .../chat/tools/test_binary_output_processor.py | 14 ++++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/binary_output_processor.py b/autogpt_platform/backend/backend/api/features/chat/tools/binary_output_processor.py index 9ac0812b0f..c2c246534c 100644 --- a/autogpt_platform/backend/backend/api/features/chat/tools/binary_output_processor.py +++ b/autogpt_platform/backend/backend/api/features/chat/tools/binary_output_processor.py @@ -47,7 +47,8 @@ ALLOWED_MIMETYPES = { } # Base64 character validation (strict - must be pure base64) -BASE64_PATTERN = re.compile(r"^[A-Za-z0-9+/\n\r]+=*$") +# Allows whitespace which will be stripped before decoding (RFC 2045 line wrapping) +BASE64_PATTERN = re.compile(r"^[A-Za-z0-9+/\s]+=*$") # Magic numbers for binary file detection # Note: WebP requires two-step detection: RIFF prefix + WEBP at offset 8 @@ -171,12 +172,15 @@ def _detect_raw_base64(value: str) -> Optional[tuple[bytes, str]]: Returns (content, extension) or None. """ - # Pre-filter: must look like base64 (no spaces, punctuation, etc.) + # Pre-filter: must look like base64 (allows whitespace for RFC 2045 line wrapping) if not BASE64_PATTERN.match(value): return None + # Strip whitespace before decoding (RFC 2045 allows line breaks in base64) + normalized = re.sub(r"\s+", "", value) + try: - content = base64.b64decode(value, validate=True) + content = base64.b64decode(normalized, validate=True) except (ValueError, binascii.Error): return None diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/test_binary_output_processor.py b/autogpt_platform/backend/backend/api/features/chat/tools/test_binary_output_processor.py index b6f54a3a17..312fafa4be 100644 --- a/autogpt_platform/backend/backend/api/features/chat/tools/test_binary_output_processor.py +++ b/autogpt_platform/backend/backend/api/features/chat/tools/test_binary_output_processor.py @@ -198,6 +198,20 @@ class TestDetectRawBase64: result = _detect_raw_base64("not-valid-base64!!!") assert result is None + def test_detects_base64_with_line_breaks(self): + """Should detect raw base64 with RFC 2045 line breaks.""" + png_content = b"\x89PNG\r\n\x1a\n" + b"\x00" * 100 + png_b64 = base64.b64encode(png_content).decode() + # Simulate RFC 2045 line wrapping at 76 chars + wrapped = png_b64[:76] + "\n" + png_b64[76:] + + result = _detect_raw_base64(wrapped) + + assert result is not None + content, ext = result + assert ext == "png" + assert content == png_content + # ============================================================================= # Process Binary Outputs Tests