fix(backend/copilot): address PR review comments

- Narrow `except Exception` to specific types in `read_file_bytes`
- Use `itertools.zip_longest` in `_tabular_to_list_of_dicts`
- Add NaN handling test for parquet bare refs
This commit is contained in:
Zamil Majdy
2026-03-16 17:27:53 +07:00
parent c44d8d7a98
commit 981891aebe
2 changed files with 42 additions and 4 deletions

View File

@@ -160,7 +160,9 @@ async def read_file_bytes(
raise ValueError(f"File not found: {plain}")
except (PermissionError, OSError) as exc:
raise ValueError(f"Failed to read {plain}: {exc}") from exc
except Exception as exc:
except (AttributeError, TypeError, RuntimeError) as exc:
# AttributeError/TypeError: workspace manager returned an
# unexpected type or interface; RuntimeError: async runtime issues.
logger.warning("Unexpected error reading %s: %s", plain, exc)
raise ValueError(f"Failed to read {plain}: {exc}") from exc
# NOTE: Workspace API does not support pre-read size checks;
@@ -396,10 +398,8 @@ def _tabular_to_list_of_dicts(parsed: list) -> list[dict[str, Any]]:
Extra values beyond the header length are silently dropped.
"""
header = parsed[0]
n = len(header)
return [
# Pad short rows with None; trim extra values beyond header length.
dict(zip(header, (row + [None] * n)[:n]))
dict(itertools.zip_longest(header, row[: len(header)], fillvalue=None))
for row in parsed[1:]
]

View File

@@ -1930,3 +1930,41 @@ async def test_bare_ref_binary_format_ignores_line_range():
)
# Full 2-row table is returned, not just row 1.
assert result["data"] == [["A", "B"], [1, 3], [2, 4]]
# ---------------------------------------------------------------------------
# NaN handling in bare ref binary format (parquet/xlsx)
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_bare_ref_parquet_nan_replaced_with_none():
"""NaN values in Parquet bare refs must become None for JSON serializability."""
import io
import math
import pandas as pd
df = pd.DataFrame({"A": [1.0, float("nan"), 3.0], "B": ["x", None, "z"]})
buf = io.BytesIO()
df.to_parquet(buf, index=False)
parquet_bytes = buf.getvalue()
with patch(
"backend.copilot.sdk.file_ref.read_file_bytes",
new=AsyncMock(return_value=parquet_bytes),
):
result = await expand_file_refs_in_args(
{"data": "@@agptfile:workspace:///data.parquet"},
user_id="u1",
session=_make_session(),
)
rows = result["data"]
# Row with NaN in float col → None
assert rows[2][0] is None # float NaN → None
assert rows[2][1] is None # str None → None
# Ensure no NaN leaks
for row in rows[1:]:
for cell in row:
if isinstance(cell, float):
assert not math.isnan(cell), f"NaN leaked: {row}"