fix(copilot): resolve circular imports and add full format×schema matrix tests

- Move get_workspace_manager to copilot/context.py to break the tools → sdk → tools circular import chain - Make sdk/__init__.py use PEP 562 lazy loading to avoid eagerly importing service.py (which chains to tools/) - Replace local import in run_block.py with top-level import - Add parametrized matrix tests covering JSON/CSV/TSV/JSONL/YAML/TOML × string-typed/non-string/opaque-object schema combinations - Add two-phase expansion integration test (tool-level skip + block-level expand)
2026-04-08 03:00:28 -04:00 · 2026-03-13 13:56:03 +07:00
parent 79b881393e
commit c342ccc42c
6 changed files with 390 additions and 22 deletions
--- a/autogpt_platform/backend/backend/copilot/context.py
+++ b/autogpt_platform/backend/backend/copilot/context.py
@@ -11,6 +11,8 @@ from contextvars import ContextVar
 from typing import TYPE_CHECKING

 from backend.copilot.model import ChatSession
+from backend.data.db_accessors import workspace_db
+from backend.util.workspace import WorkspaceManager

 if TYPE_CHECKING:
    from e2b import AsyncSandbox
@@ -82,6 +84,17 @@ def resolve_sandbox_path(path: str) -> str:
    return normalized


+async def get_workspace_manager(user_id: str, session_id: str) -> WorkspaceManager:
+    """Create a session-scoped :class:`WorkspaceManager`.
+
+    Placed here (rather than in ``tools/workspace_files``) so that modules
+    like ``sdk/file_ref`` can import it without triggering the heavy
+    ``tools/__init__`` import chain.
+    """
+    workspace = await workspace_db().get_or_create_workspace(user_id)
+    return WorkspaceManager(user_id, workspace.id, session_id)
+
+
 def is_allowed_local_path(path: str, sdk_cwd: str | None = None) -> bool:
    """Return True if *path* is within an allowed host-filesystem location.

--- a/autogpt_platform/backend/backend/copilot/sdk/init.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/init.py
@@ -3,12 +3,26 @@
 This module provides the integration layer between the Claude Agent SDK
 and the existing CoPilot tool system, enabling drop-in replacement of
 the current LLM orchestration with the battle-tested Claude Agent SDK.
+
+Submodule imports are deferred (PEP 562) to avoid circular imports:
+tools → sdk → service → prompting → tools.
 """

-from .service import stream_chat_completion_sdk
-from .tool_adapter import create_copilot_mcp_server
+from typing import Any

 __all__ = [
    "stream_chat_completion_sdk",
    "create_copilot_mcp_server",
 ]
+
+
+def __getattr__(name: str) -> Any:
+    if name == "stream_chat_completion_sdk":
+        from .service import stream_chat_completion_sdk
+
+        return stream_chat_completion_sdk
+    if name == "create_copilot_mcp_server":
+        from .tool_adapter import create_copilot_mcp_server
+
+        return create_copilot_mcp_server
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
--- a/autogpt_platform/backend/backend/copilot/sdk/file_ref.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/file_ref.py
@@ -41,11 +41,11 @@ from typing import Any
 from backend.copilot.context import (
    get_current_sandbox,
    get_sdk_cwd,
+    get_workspace_manager,
    is_allowed_local_path,
    resolve_sandbox_path,
 )
 from backend.copilot.model import ChatSession
-from backend.copilot.tools.workspace_files import get_manager
 from backend.util.file import parse_workspace_uri
 from backend.util.file_content_parser import (
    BINARY_FORMATS,
@@ -138,7 +138,7 @@ async def read_file_bytes(
    if plain.startswith("workspace://"):
        if not user_id:
            raise ValueError("workspace:// file references require authentication")
-        manager = await get_manager(user_id, session.session_id)
+        manager = await get_workspace_manager(user_id, session.session_id)
        ws = parse_workspace_uri(plain)
        try:
            return await (
@@ -272,7 +272,7 @@ async def _infer_format_from_workspace(
        return None
    try:
        ws = parse_workspace_uri(uri)
-        manager = await get_manager(user_id, session.session_id)
+        manager = await get_workspace_manager(user_id, session.session_id)
        info = await (
            manager.get_file_info(ws.file_ref)
            if not ws.is_path
@@ -329,7 +329,13 @@ async def expand_file_refs_in_args(

    properties = (input_schema or {}).get("properties", {})

-    async def _expand(value: Any, *, expect_string: bool = False) -> Any:
+    async def _expand(
+        value: Any,
+        *,
+        prop_schema: dict[str, Any] | None = None,
+    ) -> Any:
+        expect_string = (prop_schema or {}).get("type") == "string"
+
        if isinstance(value, str):
            # Check for a bare file reference first — enables structured parsing.
            ref = parse_file_ref(value)
@@ -369,9 +375,9 @@ async def expand_file_refs_in_args(
                        f"({content_size} bytes, limit {_MAX_BARE_REF_BYTES})"
                    )

-                # When the tool's input schema declares this parameter as
-                # "string", return raw file content — don't parse into a
-                # structured type that would need json.dumps() serialisation.
+                # When the schema declares this parameter as "string",
+                # return raw file content — don't parse into a structured
+                # type that would need json.dumps() serialisation.
                if expect_string:
                    if isinstance(content, bytes):
                        return content.decode("utf-8", errors="replace")
@@ -411,15 +417,23 @@ async def expand_file_refs_in_args(
                value, user_id, session, raise_on_error=True
            )
        if isinstance(value, dict):
-            return {k: await _expand(v) for k, v in value.items()}
+            # When the schema says this is an object but doesn't define
+            # inner properties, skip expansion — the caller (e.g.
+            # RunBlockTool) will expand with the actual nested schema.
+            if (
+                prop_schema is not None
+                and prop_schema.get("type") == "object"
+                and "properties" not in prop_schema
+            ):
+                return value
+            nested_props = (prop_schema or {}).get("properties", {})
+            return {
+                k: await _expand(v, prop_schema=nested_props.get(k))
+                for k, v in value.items()
+            }
        if isinstance(value, list):
-            return [await _expand(item) for item in value]
+            items_schema = (prop_schema or {}).get("items")
+            return [await _expand(item, prop_schema=items_schema) for item in value]
        return value

-    return {
-        k: await _expand(
-            v,
-            expect_string=properties.get(k, {}).get("type") == "string",
-        )
-        for k, v in args.items()
-    }
+    return {k: await _expand(v, prop_schema=properties.get(k)) for k, v in args.items()}
--- a/autogpt_platform/backend/backend/copilot/sdk/file_ref_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/file_ref_test.py
@@ -535,6 +535,88 @@ async def test_bare_ref_csv_parses_when_no_schema_provided():
    assert result["data"] == [["Name", "Score"], ["Alice", "90"], ["Bob", "85"]]


+@pytest.mark.asyncio
+async def test_opaque_object_skips_inner_expansion():
+    """When the schema declares a property as {type: "object"} with no
+    properties, inner file refs are NOT expanded — they stay as-is for
+    the tool to expand later with the correct nested schema."""
+
+    schema = {
+        "type": "object",
+        "properties": {
+            "block_id": {"type": "string"},
+            "input_data": {
+                "type": "object",
+                "description": "Opaque block inputs",
+            },
+        },
+    }
+
+    # resolve_file_ref should NOT be called for the inner ref
+    with patch(
+        "backend.copilot.sdk.file_ref.resolve_file_ref",
+        new=AsyncMock(side_effect=AssertionError("should not be called")),
+    ):
+        result = await expand_file_refs_in_args(
+            {
+                "block_id": "some-id",
+                "input_data": {"text": "@@agptfile:workspace:///data.csv"},
+            },
+            user_id="u1",
+            session=_make_session(),
+            input_schema=schema,
+        )
+    # input_data should be returned unchanged
+    assert result["input_data"]["text"] == "@@agptfile:workspace:///data.csv"
+
+
+@pytest.mark.asyncio
+async def test_nested_schema_propagation():
+    """When the schema declares nested properties, the inner type
+    information is used for expand/skip decisions."""
+    csv_content = "Name,Score\nAlice,90\nBob,85"
+
+    async def _resolve(ref, *a, **kw):  # noqa: ARG001
+        return csv_content
+
+    schema = {
+        "type": "object",
+        "properties": {
+            "input_data": {
+                "type": "object",
+                "properties": {
+                    "text": {"type": "string"},
+                    "rows": {"type": "array"},
+                },
+            },
+        },
+    }
+
+    with patch(
+        "backend.copilot.sdk.file_ref.resolve_file_ref",
+        new=AsyncMock(side_effect=_resolve),
+    ):
+        result = await expand_file_refs_in_args(
+            {
+                "input_data": {
+                    "text": "@@agptfile:workspace:///data.csv",
+                    "rows": "@@agptfile:workspace:///data.csv",
+                },
+            },
+            user_id="u1",
+            session=_make_session(),
+            input_schema=schema,
+        )
+    # string-typed field: raw CSV text
+    assert result["input_data"]["text"] == csv_content
+    # array-typed field: parsed rows
+    assert result["input_data"]["rows"] == [
+        ["Name", "Score"],
+        ["Alice", "90"],
+        ["Bob", "85"],
+    ]
+
+
 # ---------------------------------------------------------------------------
 # Per-file truncation and aggregate budget
 # ---------------------------------------------------------------------------
@@ -584,3 +666,221 @@ async def test_expand_aggregate_budget_exhausted():
        )

    assert "budget exhausted" in result
+
+
+# ---------------------------------------------------------------------------
+# Full format × schema-type matrix
+# ---------------------------------------------------------------------------
+# Each text format is tested against two schema types:
+#   - type: string  → raw file content returned as-is
+#   - no type / non-string → structured parsing applied
+# This ensures CSV→string blocks get raw CSV, not json.dumps(list[list[str]]).
+
+_FORMAT_SAMPLES: dict[str, tuple[str, str, object]] = {
+    # format_label: (extension, raw_content, expected_parsed_value)
+    "json": (
+        ".json",
+        '{"name": "Alice", "score": 90}',
+        {"name": "Alice", "score": 90},
+    ),
+    "csv": (
+        ".csv",
+        "Name,Score\nAlice,90\nBob,85",
+        [["Name", "Score"], ["Alice", "90"], ["Bob", "85"]],
+    ),
+    "tsv": (
+        ".tsv",
+        "Name\tScore\nAlice\t90\nBob\t85",
+        [["Name", "Score"], ["Alice", "90"], ["Bob", "85"]],
+    ),
+    "jsonl": (
+        ".jsonl",
+        '{"a":1}\n{"a":2}',
+        [{"a": 1}, {"a": 2}],
+    ),
+    "yaml": (
+        ".yaml",
+        "name: Alice\nscore: 90",
+        {"name": "Alice", "score": 90},
+    ),
+    "toml": (
+        ".toml",
+        '[person]\nname = "Alice"\nscore = 90',
+        {"person": {"name": "Alice", "score": 90}},
+    ),
+}
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("fmt", _FORMAT_SAMPLES.keys())
+async def test_matrix_format_to_string_schema(fmt: str):
+    """When the schema declares the parameter as type: string,
+    every text format returns the raw file content as a plain string."""
+    ext, raw_content, _ = _FORMAT_SAMPLES[fmt]
+
+    async def _resolve(ref, *a, **kw):  # noqa: ARG001
+        return raw_content
+
+    schema = {
+        "type": "object",
+        "properties": {"text": {"type": "string"}},
+    }
+
+    with patch(
+        "backend.copilot.sdk.file_ref.resolve_file_ref",
+        new=AsyncMock(side_effect=_resolve),
+    ):
+        result = await expand_file_refs_in_args(
+            {"text": f"@@agptfile:workspace:///data{ext}"},
+            user_id="u1",
+            session=_make_session(),
+            input_schema=schema,
+        )
+    assert isinstance(
+        result["text"], str
+    ), f"{fmt}: expected str, got {type(result['text'])}"
+    assert result["text"] == raw_content, f"{fmt}: raw content mismatch"
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("fmt", _FORMAT_SAMPLES.keys())
+async def test_matrix_format_to_nonstring_schema(fmt: str):
+    """Without a string schema constraint, every text format returns
+    a structured (parsed) value."""
+    ext, raw_content, expected_parsed = _FORMAT_SAMPLES[fmt]
+
+    async def _resolve(ref, *a, **kw):  # noqa: ARG001
+        return raw_content
+
+    # No input_schema → structured parsing is the default
+    with patch(
+        "backend.copilot.sdk.file_ref.resolve_file_ref",
+        new=AsyncMock(side_effect=_resolve),
+    ):
+        result = await expand_file_refs_in_args(
+            {"data": f"@@agptfile:workspace:///data{ext}"},
+            user_id="u1",
+            session=_make_session(),
+        )
+    assert result["data"] == expected_parsed, f"{fmt}: parsed value mismatch"
+    assert not isinstance(
+        result["data"], str
+    ), f"{fmt}: expected structured type, got str"
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("fmt", _FORMAT_SAMPLES.keys())
+async def test_matrix_format_opaque_object_preserves_ref(fmt: str):
+    """When the parameter is an opaque object (type: object, no properties),
+    inner file refs are preserved for second-phase expansion."""
+    ext, _, _ = _FORMAT_SAMPLES[fmt]
+
+    schema = {
+        "type": "object",
+        "properties": {
+            "input_data": {"type": "object"},
+        },
+    }
+
+    with patch(
+        "backend.copilot.sdk.file_ref.resolve_file_ref",
+        new=AsyncMock(side_effect=AssertionError("should not be called")),
+    ):
+        ref_token = f"@@agptfile:workspace:///data{ext}"
+        result = await expand_file_refs_in_args(
+            {"input_data": {"field": ref_token}},
+            user_id="u1",
+            session=_make_session(),
+            input_schema=schema,
+        )
+    assert result["input_data"]["field"] == ref_token, f"{fmt}: ref should be preserved"
+
+
+@pytest.mark.asyncio
+async def test_matrix_mixed_fields_string_and_array():
+    """A single call with both string-typed and array-typed fields:
+    CSV→string returns raw text, CSV→array returns parsed rows."""
+    csv_content = "A,B\n1,2\n3,4"
+
+    async def _resolve(ref, *a, **kw):  # noqa: ARG001
+        return csv_content
+
+    schema = {
+        "type": "object",
+        "properties": {
+            "raw_text": {"type": "string"},
+            "parsed_rows": {"type": "array"},
+        },
+    }
+
+    with patch(
+        "backend.copilot.sdk.file_ref.resolve_file_ref",
+        new=AsyncMock(side_effect=_resolve),
+    ):
+        result = await expand_file_refs_in_args(
+            {
+                "raw_text": "@@agptfile:workspace:///data.csv",
+                "parsed_rows": "@@agptfile:workspace:///data.csv",
+            },
+            user_id="u1",
+            session=_make_session(),
+            input_schema=schema,
+        )
+    assert isinstance(result["raw_text"], str)
+    assert result["raw_text"] == csv_content
+    assert result["parsed_rows"] == [["A", "B"], ["1", "2"], ["3", "4"]]
+
+
+@pytest.mark.asyncio
+async def test_matrix_second_phase_expansion_with_block_schema():
+    """Simulates the two-phase expansion: first phase skips opaque input_data,
+    second phase expands with the block's actual schema."""
+    csv_content = "X,Y\n10,20"
+
+    async def _resolve(ref, *a, **kw):  # noqa: ARG001
+        return csv_content
+
+    # Phase 1: tool-level schema (input_data is opaque)
+    tool_schema = {
+        "type": "object",
+        "properties": {
+            "block_id": {"type": "string"},
+            "input_data": {"type": "object"},
+        },
+    }
+
+    ref_token = "@@agptfile:workspace:///data.csv"
+    with patch(
+        "backend.copilot.sdk.file_ref.resolve_file_ref",
+        new=AsyncMock(side_effect=AssertionError("phase 1 should not expand")),
+    ):
+        phase1_result = await expand_file_refs_in_args(
+            {"block_id": "some-id", "input_data": {"text": ref_token}},
+            user_id="u1",
+            session=_make_session(),
+            input_schema=tool_schema,
+        )
+    # Ref should survive phase 1
+    assert phase1_result["input_data"]["text"] == ref_token
+
+    # Phase 2: block-level schema (text is string)
+    block_schema = {
+        "type": "object",
+        "properties": {
+            "text": {"type": "string"},
+        },
+    }
+
+    with patch(
+        "backend.copilot.sdk.file_ref.resolve_file_ref",
+        new=AsyncMock(side_effect=_resolve),
+    ):
+        phase2_result = await expand_file_refs_in_args(
+            phase1_result["input_data"],
+            user_id="u1",
+            session=_make_session(),
+            input_schema=block_schema,
+        )
+    # Phase 2 should return raw CSV since text is string-typed
+    assert isinstance(phase2_result["text"], str)
+    assert phase2_result["text"] == csv_content
--- a/autogpt_platform/backend/backend/copilot/tools/run_block.py
+++ b/autogpt_platform/backend/backend/copilot/tools/run_block.py
@@ -12,6 +12,7 @@ from backend.copilot.constants import (
    COPILOT_SESSION_PREFIX,
 )
 from backend.copilot.model import ChatSession
+from backend.copilot.sdk.file_ref import FileRefExpansionError, expand_file_refs_in_args
 from backend.data.db_accessors import review_db
 from backend.data.execution import ExecutionContext

@@ -197,6 +198,29 @@ class RunBlockTool(BaseTool):
                session_id=session_id,
            )

+        # Expand @@agptfile: refs in input_data with the block's input
+        # schema.  The generic _truncating wrapper skips opaque object
+        # properties (input_data has no declared inner properties in the
+        # tool schema), so file ref tokens are still intact here.
+        # Using the block's schema lets us return raw text for string-typed
+        # fields and parsed structures for list/dict-typed fields.
+        if input_data:
+            try:
+                input_data = await expand_file_refs_in_args(
+                    input_data,
+                    user_id,
+                    session,
+                    input_schema=input_schema,
+                )
+            except FileRefExpansionError as exc:
+                return ErrorResponse(
+                    message=(
+                        f"Failed to resolve file reference: {exc}. "
+                        "Ensure the file exists before referencing it."
+                    ),
+                    session_id=session_id,
+                )
+
        if missing_credentials:
            # Return setup requirements response with missing credentials
            credentials_fields_info = block.input_schema.get_credentials_fields_info()
--- a/autogpt_platform/backend/backend/copilot/tools/workspace_files.py
+++ b/autogpt_platform/backend/backend/copilot/tools/workspace_files.py
@@ -10,11 +10,11 @@ from pydantic import BaseModel
 from backend.copilot.context import (
    E2B_WORKDIR,
    get_current_sandbox,
+    get_workspace_manager,
    resolve_sandbox_path,
 )
 from backend.copilot.model import ChatSession
 from backend.copilot.tools.sandbox import make_session_path
-from backend.data.db_accessors import workspace_db
 from backend.util.settings import Config
 from backend.util.virus_scanner import scan_content_safe
 from backend.util.workspace import WorkspaceManager
@@ -219,9 +219,12 @@ def _is_text_mime(mime_type: str) -> bool:


 async def get_manager(user_id: str, session_id: str) -> WorkspaceManager:
-    """Create a session-scoped WorkspaceManager."""
-    workspace = await workspace_db().get_or_create_workspace(user_id)
-    return WorkspaceManager(user_id, workspace.id, session_id)
+    """Create a session-scoped WorkspaceManager.
+
+    Delegates to :func:`backend.copilot.context.get_workspace_manager`.
+    Kept here for backward-compatibility with callers inside ``tools/``.
+    """
+    return await get_workspace_manager(user_id, session_id)


 async def _resolve_file(