mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-02-05 20:35:10 -05:00
fix: Handle RFC 2045 line-wrapped base64 in binary detection
- Strip whitespace before base64 decoding (encoders often add line breaks) - Update regex comment to clarify whitespace handling - Add test case for line-wrapped base64 Addresses CodeRabbit review feedback.
This commit is contained in:
@@ -47,7 +47,8 @@ ALLOWED_MIMETYPES = {
|
||||
}
|
||||
|
||||
# Base64 character validation (strict - must be pure base64)
|
||||
BASE64_PATTERN = re.compile(r"^[A-Za-z0-9+/\n\r]+=*$")
|
||||
# Allows whitespace which will be stripped before decoding (RFC 2045 line wrapping)
|
||||
BASE64_PATTERN = re.compile(r"^[A-Za-z0-9+/\s]+=*$")
|
||||
|
||||
# Magic numbers for binary file detection
|
||||
# Note: WebP requires two-step detection: RIFF prefix + WEBP at offset 8
|
||||
@@ -171,12 +172,15 @@ def _detect_raw_base64(value: str) -> Optional[tuple[bytes, str]]:
|
||||
|
||||
Returns (content, extension) or None.
|
||||
"""
|
||||
# Pre-filter: must look like base64 (no spaces, punctuation, etc.)
|
||||
# Pre-filter: must look like base64 (allows whitespace for RFC 2045 line wrapping)
|
||||
if not BASE64_PATTERN.match(value):
|
||||
return None
|
||||
|
||||
# Strip whitespace before decoding (RFC 2045 allows line breaks in base64)
|
||||
normalized = re.sub(r"\s+", "", value)
|
||||
|
||||
try:
|
||||
content = base64.b64decode(value, validate=True)
|
||||
content = base64.b64decode(normalized, validate=True)
|
||||
except (ValueError, binascii.Error):
|
||||
return None
|
||||
|
||||
|
||||
@@ -198,6 +198,20 @@ class TestDetectRawBase64:
|
||||
result = _detect_raw_base64("not-valid-base64!!!")
|
||||
assert result is None
|
||||
|
||||
def test_detects_base64_with_line_breaks(self):
|
||||
"""Should detect raw base64 with RFC 2045 line breaks."""
|
||||
png_content = b"\x89PNG\r\n\x1a\n" + b"\x00" * 100
|
||||
png_b64 = base64.b64encode(png_content).decode()
|
||||
# Simulate RFC 2045 line wrapping at 76 chars
|
||||
wrapped = png_b64[:76] + "\n" + png_b64[76:]
|
||||
|
||||
result = _detect_raw_base64(wrapped)
|
||||
|
||||
assert result is not None
|
||||
content, ext = result
|
||||
assert ext == "png"
|
||||
assert content == png_content
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Process Binary Outputs Tests
|
||||
|
||||
Reference in New Issue
Block a user