fix(copilot): resolve circular imports and add full format×schema matrix tests

- Move get_workspace_manager to copilot/context.py to break the
  tools → sdk → tools circular import chain
- Make sdk/__init__.py use PEP 562 lazy loading to avoid eagerly
  importing service.py (which chains to tools/)
- Replace local import in run_block.py with top-level import
- Add parametrized matrix tests covering JSON/CSV/TSV/JSONL/YAML/TOML
  × string-typed/non-string/opaque-object schema combinations
- Add two-phase expansion integration test (tool-level skip + block-level expand)
This commit is contained in:
Zamil Majdy
2026-03-13 13:56:03 +07:00
parent 79b881393e
commit c342ccc42c
6 changed files with 390 additions and 22 deletions

View File

@@ -11,6 +11,8 @@ from contextvars import ContextVar
from typing import TYPE_CHECKING
from backend.copilot.model import ChatSession
from backend.data.db_accessors import workspace_db
from backend.util.workspace import WorkspaceManager
if TYPE_CHECKING:
from e2b import AsyncSandbox
@@ -82,6 +84,17 @@ def resolve_sandbox_path(path: str) -> str:
return normalized
async def get_workspace_manager(user_id: str, session_id: str) -> WorkspaceManager:
"""Create a session-scoped :class:`WorkspaceManager`.
Placed here (rather than in ``tools/workspace_files``) so that modules
like ``sdk/file_ref`` can import it without triggering the heavy
``tools/__init__`` import chain.
"""
workspace = await workspace_db().get_or_create_workspace(user_id)
return WorkspaceManager(user_id, workspace.id, session_id)
def is_allowed_local_path(path: str, sdk_cwd: str | None = None) -> bool:
"""Return True if *path* is within an allowed host-filesystem location.

View File

@@ -3,12 +3,26 @@
This module provides the integration layer between the Claude Agent SDK
and the existing CoPilot tool system, enabling drop-in replacement of
the current LLM orchestration with the battle-tested Claude Agent SDK.
Submodule imports are deferred (PEP 562) to avoid circular imports:
tools → sdk → service → prompting → tools.
"""
from .service import stream_chat_completion_sdk
from .tool_adapter import create_copilot_mcp_server
from typing import Any
__all__ = [
"stream_chat_completion_sdk",
"create_copilot_mcp_server",
]
def __getattr__(name: str) -> Any:
if name == "stream_chat_completion_sdk":
from .service import stream_chat_completion_sdk
return stream_chat_completion_sdk
if name == "create_copilot_mcp_server":
from .tool_adapter import create_copilot_mcp_server
return create_copilot_mcp_server
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

View File

@@ -41,11 +41,11 @@ from typing import Any
from backend.copilot.context import (
get_current_sandbox,
get_sdk_cwd,
get_workspace_manager,
is_allowed_local_path,
resolve_sandbox_path,
)
from backend.copilot.model import ChatSession
from backend.copilot.tools.workspace_files import get_manager
from backend.util.file import parse_workspace_uri
from backend.util.file_content_parser import (
BINARY_FORMATS,
@@ -138,7 +138,7 @@ async def read_file_bytes(
if plain.startswith("workspace://"):
if not user_id:
raise ValueError("workspace:// file references require authentication")
manager = await get_manager(user_id, session.session_id)
manager = await get_workspace_manager(user_id, session.session_id)
ws = parse_workspace_uri(plain)
try:
return await (
@@ -272,7 +272,7 @@ async def _infer_format_from_workspace(
return None
try:
ws = parse_workspace_uri(uri)
manager = await get_manager(user_id, session.session_id)
manager = await get_workspace_manager(user_id, session.session_id)
info = await (
manager.get_file_info(ws.file_ref)
if not ws.is_path
@@ -329,7 +329,13 @@ async def expand_file_refs_in_args(
properties = (input_schema or {}).get("properties", {})
async def _expand(value: Any, *, expect_string: bool = False) -> Any:
async def _expand(
value: Any,
*,
prop_schema: dict[str, Any] | None = None,
) -> Any:
expect_string = (prop_schema or {}).get("type") == "string"
if isinstance(value, str):
# Check for a bare file reference first — enables structured parsing.
ref = parse_file_ref(value)
@@ -369,9 +375,9 @@ async def expand_file_refs_in_args(
f"({content_size} bytes, limit {_MAX_BARE_REF_BYTES})"
)
# When the tool's input schema declares this parameter as
# "string", return raw file content — don't parse into a
# structured type that would need json.dumps() serialisation.
# When the schema declares this parameter as "string",
# return raw file content — don't parse into a structured
# type that would need json.dumps() serialisation.
if expect_string:
if isinstance(content, bytes):
return content.decode("utf-8", errors="replace")
@@ -411,15 +417,23 @@ async def expand_file_refs_in_args(
value, user_id, session, raise_on_error=True
)
if isinstance(value, dict):
return {k: await _expand(v) for k, v in value.items()}
# When the schema says this is an object but doesn't define
# inner properties, skip expansion — the caller (e.g.
# RunBlockTool) will expand with the actual nested schema.
if (
prop_schema is not None
and prop_schema.get("type") == "object"
and "properties" not in prop_schema
):
return value
nested_props = (prop_schema or {}).get("properties", {})
return {
k: await _expand(v, prop_schema=nested_props.get(k))
for k, v in value.items()
}
if isinstance(value, list):
return [await _expand(item) for item in value]
items_schema = (prop_schema or {}).get("items")
return [await _expand(item, prop_schema=items_schema) for item in value]
return value
return {
k: await _expand(
v,
expect_string=properties.get(k, {}).get("type") == "string",
)
for k, v in args.items()
}
return {k: await _expand(v, prop_schema=properties.get(k)) for k, v in args.items()}

View File

@@ -535,6 +535,88 @@ async def test_bare_ref_csv_parses_when_no_schema_provided():
assert result["data"] == [["Name", "Score"], ["Alice", "90"], ["Bob", "85"]]
@pytest.mark.asyncio
async def test_opaque_object_skips_inner_expansion():
"""When the schema declares a property as {type: "object"} with no
properties, inner file refs are NOT expanded — they stay as-is for
the tool to expand later with the correct nested schema."""
schema = {
"type": "object",
"properties": {
"block_id": {"type": "string"},
"input_data": {
"type": "object",
"description": "Opaque block inputs",
},
},
}
# resolve_file_ref should NOT be called for the inner ref
with patch(
"backend.copilot.sdk.file_ref.resolve_file_ref",
new=AsyncMock(side_effect=AssertionError("should not be called")),
):
result = await expand_file_refs_in_args(
{
"block_id": "some-id",
"input_data": {"text": "@@agptfile:workspace:///data.csv"},
},
user_id="u1",
session=_make_session(),
input_schema=schema,
)
# input_data should be returned unchanged
assert result["input_data"]["text"] == "@@agptfile:workspace:///data.csv"
@pytest.mark.asyncio
async def test_nested_schema_propagation():
"""When the schema declares nested properties, the inner type
information is used for expand/skip decisions."""
csv_content = "Name,Score\nAlice,90\nBob,85"
async def _resolve(ref, *a, **kw): # noqa: ARG001
return csv_content
schema = {
"type": "object",
"properties": {
"input_data": {
"type": "object",
"properties": {
"text": {"type": "string"},
"rows": {"type": "array"},
},
},
},
}
with patch(
"backend.copilot.sdk.file_ref.resolve_file_ref",
new=AsyncMock(side_effect=_resolve),
):
result = await expand_file_refs_in_args(
{
"input_data": {
"text": "@@agptfile:workspace:///data.csv",
"rows": "@@agptfile:workspace:///data.csv",
},
},
user_id="u1",
session=_make_session(),
input_schema=schema,
)
# string-typed field: raw CSV text
assert result["input_data"]["text"] == csv_content
# array-typed field: parsed rows
assert result["input_data"]["rows"] == [
["Name", "Score"],
["Alice", "90"],
["Bob", "85"],
]
# ---------------------------------------------------------------------------
# Per-file truncation and aggregate budget
# ---------------------------------------------------------------------------
@@ -584,3 +666,221 @@ async def test_expand_aggregate_budget_exhausted():
)
assert "budget exhausted" in result
# ---------------------------------------------------------------------------
# Full format × schema-type matrix
# ---------------------------------------------------------------------------
# Each text format is tested against two schema types:
# - type: string → raw file content returned as-is
# - no type / non-string → structured parsing applied
# This ensures CSV→string blocks get raw CSV, not json.dumps(list[list[str]]).
_FORMAT_SAMPLES: dict[str, tuple[str, str, object]] = {
# format_label: (extension, raw_content, expected_parsed_value)
"json": (
".json",
'{"name": "Alice", "score": 90}',
{"name": "Alice", "score": 90},
),
"csv": (
".csv",
"Name,Score\nAlice,90\nBob,85",
[["Name", "Score"], ["Alice", "90"], ["Bob", "85"]],
),
"tsv": (
".tsv",
"Name\tScore\nAlice\t90\nBob\t85",
[["Name", "Score"], ["Alice", "90"], ["Bob", "85"]],
),
"jsonl": (
".jsonl",
'{"a":1}\n{"a":2}',
[{"a": 1}, {"a": 2}],
),
"yaml": (
".yaml",
"name: Alice\nscore: 90",
{"name": "Alice", "score": 90},
),
"toml": (
".toml",
'[person]\nname = "Alice"\nscore = 90',
{"person": {"name": "Alice", "score": 90}},
),
}
@pytest.mark.asyncio
@pytest.mark.parametrize("fmt", _FORMAT_SAMPLES.keys())
async def test_matrix_format_to_string_schema(fmt: str):
"""When the schema declares the parameter as type: string,
every text format returns the raw file content as a plain string."""
ext, raw_content, _ = _FORMAT_SAMPLES[fmt]
async def _resolve(ref, *a, **kw): # noqa: ARG001
return raw_content
schema = {
"type": "object",
"properties": {"text": {"type": "string"}},
}
with patch(
"backend.copilot.sdk.file_ref.resolve_file_ref",
new=AsyncMock(side_effect=_resolve),
):
result = await expand_file_refs_in_args(
{"text": f"@@agptfile:workspace:///data{ext}"},
user_id="u1",
session=_make_session(),
input_schema=schema,
)
assert isinstance(
result["text"], str
), f"{fmt}: expected str, got {type(result['text'])}"
assert result["text"] == raw_content, f"{fmt}: raw content mismatch"
@pytest.mark.asyncio
@pytest.mark.parametrize("fmt", _FORMAT_SAMPLES.keys())
async def test_matrix_format_to_nonstring_schema(fmt: str):
"""Without a string schema constraint, every text format returns
a structured (parsed) value."""
ext, raw_content, expected_parsed = _FORMAT_SAMPLES[fmt]
async def _resolve(ref, *a, **kw): # noqa: ARG001
return raw_content
# No input_schema → structured parsing is the default
with patch(
"backend.copilot.sdk.file_ref.resolve_file_ref",
new=AsyncMock(side_effect=_resolve),
):
result = await expand_file_refs_in_args(
{"data": f"@@agptfile:workspace:///data{ext}"},
user_id="u1",
session=_make_session(),
)
assert result["data"] == expected_parsed, f"{fmt}: parsed value mismatch"
assert not isinstance(
result["data"], str
), f"{fmt}: expected structured type, got str"
@pytest.mark.asyncio
@pytest.mark.parametrize("fmt", _FORMAT_SAMPLES.keys())
async def test_matrix_format_opaque_object_preserves_ref(fmt: str):
"""When the parameter is an opaque object (type: object, no properties),
inner file refs are preserved for second-phase expansion."""
ext, _, _ = _FORMAT_SAMPLES[fmt]
schema = {
"type": "object",
"properties": {
"input_data": {"type": "object"},
},
}
with patch(
"backend.copilot.sdk.file_ref.resolve_file_ref",
new=AsyncMock(side_effect=AssertionError("should not be called")),
):
ref_token = f"@@agptfile:workspace:///data{ext}"
result = await expand_file_refs_in_args(
{"input_data": {"field": ref_token}},
user_id="u1",
session=_make_session(),
input_schema=schema,
)
assert result["input_data"]["field"] == ref_token, f"{fmt}: ref should be preserved"
@pytest.mark.asyncio
async def test_matrix_mixed_fields_string_and_array():
"""A single call with both string-typed and array-typed fields:
CSV→string returns raw text, CSV→array returns parsed rows."""
csv_content = "A,B\n1,2\n3,4"
async def _resolve(ref, *a, **kw): # noqa: ARG001
return csv_content
schema = {
"type": "object",
"properties": {
"raw_text": {"type": "string"},
"parsed_rows": {"type": "array"},
},
}
with patch(
"backend.copilot.sdk.file_ref.resolve_file_ref",
new=AsyncMock(side_effect=_resolve),
):
result = await expand_file_refs_in_args(
{
"raw_text": "@@agptfile:workspace:///data.csv",
"parsed_rows": "@@agptfile:workspace:///data.csv",
},
user_id="u1",
session=_make_session(),
input_schema=schema,
)
assert isinstance(result["raw_text"], str)
assert result["raw_text"] == csv_content
assert result["parsed_rows"] == [["A", "B"], ["1", "2"], ["3", "4"]]
@pytest.mark.asyncio
async def test_matrix_second_phase_expansion_with_block_schema():
"""Simulates the two-phase expansion: first phase skips opaque input_data,
second phase expands with the block's actual schema."""
csv_content = "X,Y\n10,20"
async def _resolve(ref, *a, **kw): # noqa: ARG001
return csv_content
# Phase 1: tool-level schema (input_data is opaque)
tool_schema = {
"type": "object",
"properties": {
"block_id": {"type": "string"},
"input_data": {"type": "object"},
},
}
ref_token = "@@agptfile:workspace:///data.csv"
with patch(
"backend.copilot.sdk.file_ref.resolve_file_ref",
new=AsyncMock(side_effect=AssertionError("phase 1 should not expand")),
):
phase1_result = await expand_file_refs_in_args(
{"block_id": "some-id", "input_data": {"text": ref_token}},
user_id="u1",
session=_make_session(),
input_schema=tool_schema,
)
# Ref should survive phase 1
assert phase1_result["input_data"]["text"] == ref_token
# Phase 2: block-level schema (text is string)
block_schema = {
"type": "object",
"properties": {
"text": {"type": "string"},
},
}
with patch(
"backend.copilot.sdk.file_ref.resolve_file_ref",
new=AsyncMock(side_effect=_resolve),
):
phase2_result = await expand_file_refs_in_args(
phase1_result["input_data"],
user_id="u1",
session=_make_session(),
input_schema=block_schema,
)
# Phase 2 should return raw CSV since text is string-typed
assert isinstance(phase2_result["text"], str)
assert phase2_result["text"] == csv_content

View File

@@ -12,6 +12,7 @@ from backend.copilot.constants import (
COPILOT_SESSION_PREFIX,
)
from backend.copilot.model import ChatSession
from backend.copilot.sdk.file_ref import FileRefExpansionError, expand_file_refs_in_args
from backend.data.db_accessors import review_db
from backend.data.execution import ExecutionContext
@@ -197,6 +198,29 @@ class RunBlockTool(BaseTool):
session_id=session_id,
)
# Expand @@agptfile: refs in input_data with the block's input
# schema. The generic _truncating wrapper skips opaque object
# properties (input_data has no declared inner properties in the
# tool schema), so file ref tokens are still intact here.
# Using the block's schema lets us return raw text for string-typed
# fields and parsed structures for list/dict-typed fields.
if input_data:
try:
input_data = await expand_file_refs_in_args(
input_data,
user_id,
session,
input_schema=input_schema,
)
except FileRefExpansionError as exc:
return ErrorResponse(
message=(
f"Failed to resolve file reference: {exc}. "
"Ensure the file exists before referencing it."
),
session_id=session_id,
)
if missing_credentials:
# Return setup requirements response with missing credentials
credentials_fields_info = block.input_schema.get_credentials_fields_info()

View File

@@ -10,11 +10,11 @@ from pydantic import BaseModel
from backend.copilot.context import (
E2B_WORKDIR,
get_current_sandbox,
get_workspace_manager,
resolve_sandbox_path,
)
from backend.copilot.model import ChatSession
from backend.copilot.tools.sandbox import make_session_path
from backend.data.db_accessors import workspace_db
from backend.util.settings import Config
from backend.util.virus_scanner import scan_content_safe
from backend.util.workspace import WorkspaceManager
@@ -219,9 +219,12 @@ def _is_text_mime(mime_type: str) -> bool:
async def get_manager(user_id: str, session_id: str) -> WorkspaceManager:
"""Create a session-scoped WorkspaceManager."""
workspace = await workspace_db().get_or_create_workspace(user_id)
return WorkspaceManager(user_id, workspace.id, session_id)
"""Create a session-scoped WorkspaceManager.
Delegates to :func:`backend.copilot.context.get_workspace_manager`.
Kept here for backward-compatibility with callers inside ``tools/``.
"""
return await get_workspace_manager(user_id, session_id)
async def _resolve_file(