fix(backend/copilot): address PR review comments

- Narrow `except Exception` to specific types in `read_file_bytes` - Use `itertools.zip_longest` in `_tabular_to_list_of_dicts` - Add NaN handling test for parquet bare refs
2026-04-08 03:00:28 -04:00 · 2026-03-16 17:27:53 +07:00
parent c44d8d7a98
commit 981891aebe
2 changed files with 42 additions and 4 deletions
--- a/autogpt_platform/backend/backend/copilot/sdk/file_ref.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/file_ref.py
@@ -160,7 +160,9 @@ async def read_file_bytes(
            raise ValueError(f"File not found: {plain}")
        except (PermissionError, OSError) as exc:
            raise ValueError(f"Failed to read {plain}: {exc}") from exc
-        except Exception as exc:
+        except (AttributeError, TypeError, RuntimeError) as exc:
+            # AttributeError/TypeError: workspace manager returned an
+            # unexpected type or interface; RuntimeError: async runtime issues.
            logger.warning("Unexpected error reading %s: %s", plain, exc)
            raise ValueError(f"Failed to read {plain}: {exc}") from exc
        # NOTE: Workspace API does not support pre-read size checks;
@@ -396,10 +398,8 @@ def _tabular_to_list_of_dicts(parsed: list) -> list[dict[str, Any]]:
    Extra values beyond the header length are silently dropped.
    """
    header = parsed[0]
-    n = len(header)
    return [
-        # Pad short rows with None; trim extra values beyond header length.
-        dict(zip(header, (row + [None] * n)[:n]))
+        dict(itertools.zip_longest(header, row[: len(header)], fillvalue=None))
        for row in parsed[1:]
    ]

--- a/autogpt_platform/backend/backend/copilot/sdk/file_ref_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/file_ref_test.py
@@ -1930,3 +1930,41 @@ async def test_bare_ref_binary_format_ignores_line_range():
        )
    # Full 2-row table is returned, not just row 1.
    assert result["data"] == [["A", "B"], [1, 3], [2, 4]]
+
+
+# ---------------------------------------------------------------------------
+# NaN handling in bare ref binary format (parquet/xlsx)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_bare_ref_parquet_nan_replaced_with_none():
+    """NaN values in Parquet bare refs must become None for JSON serializability."""
+    import io
+    import math
+
+    import pandas as pd
+
+    df = pd.DataFrame({"A": [1.0, float("nan"), 3.0], "B": ["x", None, "z"]})
+    buf = io.BytesIO()
+    df.to_parquet(buf, index=False)
+    parquet_bytes = buf.getvalue()
+
+    with patch(
+        "backend.copilot.sdk.file_ref.read_file_bytes",
+        new=AsyncMock(return_value=parquet_bytes),
+    ):
+        result = await expand_file_refs_in_args(
+            {"data": "@@agptfile:workspace:///data.parquet"},
+            user_id="u1",
+            session=_make_session(),
+        )
+    rows = result["data"]
+    # Row with NaN in float col → None
+    assert rows[2][0] is None  # float NaN → None
+    assert rows[2][1] is None  # str None → None
+    # Ensure no NaN leaks
+    for row in rows[1:]:
+        for cell in row:
+            if isinstance(cell, float):
+                assert not math.isnan(cell), f"NaN leaked: {row}"