mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
fix(backend/copilot): address PR review comments
- Narrow `except Exception` to specific types in `read_file_bytes` - Use `itertools.zip_longest` in `_tabular_to_list_of_dicts` - Add NaN handling test for parquet bare refs
This commit is contained in:
@@ -160,7 +160,9 @@ async def read_file_bytes(
|
||||
raise ValueError(f"File not found: {plain}")
|
||||
except (PermissionError, OSError) as exc:
|
||||
raise ValueError(f"Failed to read {plain}: {exc}") from exc
|
||||
except Exception as exc:
|
||||
except (AttributeError, TypeError, RuntimeError) as exc:
|
||||
# AttributeError/TypeError: workspace manager returned an
|
||||
# unexpected type or interface; RuntimeError: async runtime issues.
|
||||
logger.warning("Unexpected error reading %s: %s", plain, exc)
|
||||
raise ValueError(f"Failed to read {plain}: {exc}") from exc
|
||||
# NOTE: Workspace API does not support pre-read size checks;
|
||||
@@ -396,10 +398,8 @@ def _tabular_to_list_of_dicts(parsed: list) -> list[dict[str, Any]]:
|
||||
Extra values beyond the header length are silently dropped.
|
||||
"""
|
||||
header = parsed[0]
|
||||
n = len(header)
|
||||
return [
|
||||
# Pad short rows with None; trim extra values beyond header length.
|
||||
dict(zip(header, (row + [None] * n)[:n]))
|
||||
dict(itertools.zip_longest(header, row[: len(header)], fillvalue=None))
|
||||
for row in parsed[1:]
|
||||
]
|
||||
|
||||
|
||||
@@ -1930,3 +1930,41 @@ async def test_bare_ref_binary_format_ignores_line_range():
|
||||
)
|
||||
# Full 2-row table is returned, not just row 1.
|
||||
assert result["data"] == [["A", "B"], [1, 3], [2, 4]]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# NaN handling in bare ref binary format (parquet/xlsx)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_bare_ref_parquet_nan_replaced_with_none():
|
||||
"""NaN values in Parquet bare refs must become None for JSON serializability."""
|
||||
import io
|
||||
import math
|
||||
|
||||
import pandas as pd
|
||||
|
||||
df = pd.DataFrame({"A": [1.0, float("nan"), 3.0], "B": ["x", None, "z"]})
|
||||
buf = io.BytesIO()
|
||||
df.to_parquet(buf, index=False)
|
||||
parquet_bytes = buf.getvalue()
|
||||
|
||||
with patch(
|
||||
"backend.copilot.sdk.file_ref.read_file_bytes",
|
||||
new=AsyncMock(return_value=parquet_bytes),
|
||||
):
|
||||
result = await expand_file_refs_in_args(
|
||||
{"data": "@@agptfile:workspace:///data.parquet"},
|
||||
user_id="u1",
|
||||
session=_make_session(),
|
||||
)
|
||||
rows = result["data"]
|
||||
# Row with NaN in float col → None
|
||||
assert rows[2][0] is None # float NaN → None
|
||||
assert rows[2][1] is None # str None → None
|
||||
# Ensure no NaN leaks
|
||||
for row in rows[1:]:
|
||||
for cell in row:
|
||||
if isinstance(cell, float):
|
||||
assert not math.isnan(cell), f"NaN leaked: {row}"
|
||||
|
||||
Reference in New Issue
Block a user