mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-02-05 20:35:10 -05:00
fix: Handle RFC 2045 line-wrapped base64 in embedded detection
Sentry correctly identified that the regex and decode would fail on line-wrapped base64. Fixed by: - Updated regex to allow whitespace in base64 chunks - Strip whitespace before decoding with validate=True - Added test for line-wrapped base64
This commit is contained in:
@@ -28,8 +28,9 @@ logger = logging.getLogger(__name__)
|
||||
MIN_DECODED_SIZE = 1024 # 1KB
|
||||
|
||||
# Pattern to find base64 chunks in text (at least 100 chars to be worth checking)
|
||||
# Matches continuous base64 characters, optionally ending with = padding
|
||||
EMBEDDED_BASE64_PATTERN = re.compile(r"[A-Za-z0-9+/]{100,}={0,2}")
|
||||
# Matches continuous base64 characters (with optional whitespace for line wrapping),
|
||||
# optionally ending with = padding
|
||||
EMBEDDED_BASE64_PATTERN = re.compile(r"[A-Za-z0-9+/\s]{100,}={0,2}")
|
||||
|
||||
# Magic numbers for binary file detection
|
||||
MAGIC_SIGNATURES = [
|
||||
@@ -141,8 +142,11 @@ def _decode_and_validate(b64_str: str) -> Optional[tuple[bytes, str]]:
|
||||
|
||||
Returns (content, extension) if valid binary, None otherwise.
|
||||
"""
|
||||
# Strip whitespace for RFC 2045 line-wrapped base64
|
||||
normalized = re.sub(r"\s+", "", b64_str)
|
||||
|
||||
try:
|
||||
content = base64.b64decode(b64_str, validate=True)
|
||||
content = base64.b64decode(normalized, validate=True)
|
||||
except (ValueError, binascii.Error):
|
||||
return None
|
||||
|
||||
|
||||
@@ -116,6 +116,18 @@ class TestDecodeAndValidate:
|
||||
result = _decode_and_validate(wav_b64)
|
||||
assert result is None
|
||||
|
||||
def test_handles_line_wrapped_base64(self):
|
||||
"""Should handle RFC 2045 line-wrapped base64."""
|
||||
pdf_content = b"%PDF-1.4 " + b"x" * 2000
|
||||
pdf_b64 = base64.b64encode(pdf_content).decode()
|
||||
# Simulate line wrapping at 76 chars
|
||||
wrapped = "\n".join(pdf_b64[i : i + 76] for i in range(0, len(pdf_b64), 76))
|
||||
result = _decode_and_validate(wrapped)
|
||||
assert result is not None
|
||||
content, ext = result
|
||||
assert ext == "pdf"
|
||||
assert content == pdf_content
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Marker Expansion Tests
|
||||
|
||||
Reference in New Issue
Block a user