mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
fix(backend): use actual byte size for string size guard, narrow exception handling
- Size guard in _expand now computes len(content.encode("utf-8")) for
strings instead of len(content) which returns character count. This
fixes a security issue where multi-byte UTF-8 strings (e.g. emoji)
could pass up to 40MB through a 10MB byte limit.
- Narrow except Exception in _infer_format_from_workspace to only catch
expected IO/lookup failures (ValueError, FileNotFoundError, OSError,
PermissionError).
- Narrow except Exception in parse_file_content to only catch expected
parse failures, letting programming bugs surface.
This commit is contained in:
@@ -286,7 +286,7 @@ async def _infer_format_from_workspace(
|
||||
if fmt:
|
||||
return fmt
|
||||
return infer_format(info.name)
|
||||
except Exception:
|
||||
except (ValueError, FileNotFoundError, OSError, PermissionError):
|
||||
logger.debug("workspace metadata lookup failed for %s", uri, exc_info=True)
|
||||
return None
|
||||
|
||||
@@ -447,9 +447,14 @@ async def expand_file_refs_in_args(
|
||||
raise FileRefExpansionError(str(exc)) from exc
|
||||
|
||||
# Guard against oversized content before parsing.
|
||||
# For strings, len() returns character count which is a lower
|
||||
# bound on UTF-8 byte size — sufficient for a safety guard.
|
||||
content_size = len(content)
|
||||
if isinstance(content, bytes):
|
||||
content_size = len(content)
|
||||
else:
|
||||
# len() on str returns character count, but multi-byte
|
||||
# UTF-8 chars (e.g. emoji) mean byte size can be up to
|
||||
# 4x the character count. Use the actual encoded byte
|
||||
# length for an accurate guard.
|
||||
content_size = len(content.encode("utf-8"))
|
||||
if content_size > _MAX_BARE_REF_BYTES:
|
||||
raise FileRefExpansionError(
|
||||
f"File too large for structured parsing "
|
||||
|
||||
@@ -244,7 +244,17 @@ def parse_file_content(content: str | bytes, fmt: str, *, strict: bool = False)
|
||||
content = content.decode("utf-8", errors="replace")
|
||||
return parser(content)
|
||||
|
||||
except Exception:
|
||||
except (
|
||||
json.JSONDecodeError,
|
||||
csv.Error,
|
||||
yaml.YAMLError,
|
||||
tomllib.TOMLDecodeError,
|
||||
ValueError,
|
||||
UnicodeDecodeError,
|
||||
pd.errors.ParserError,
|
||||
ImportError,
|
||||
OSError,
|
||||
):
|
||||
if strict:
|
||||
raise
|
||||
logger.debug("Structured parsing failed for format=%s, falling back", fmt)
|
||||
|
||||
Reference in New Issue
Block a user