mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
fix(copilot): resolve circular imports and add full format×schema matrix tests
- Move get_workspace_manager to copilot/context.py to break the tools → sdk → tools circular import chain - Make sdk/__init__.py use PEP 562 lazy loading to avoid eagerly importing service.py (which chains to tools/) - Replace local import in run_block.py with top-level import - Add parametrized matrix tests covering JSON/CSV/TSV/JSONL/YAML/TOML × string-typed/non-string/opaque-object schema combinations - Add two-phase expansion integration test (tool-level skip + block-level expand)
This commit is contained in:
@@ -11,6 +11,8 @@ from contextvars import ContextVar
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from backend.copilot.model import ChatSession
|
||||
from backend.data.db_accessors import workspace_db
|
||||
from backend.util.workspace import WorkspaceManager
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from e2b import AsyncSandbox
|
||||
@@ -82,6 +84,17 @@ def resolve_sandbox_path(path: str) -> str:
|
||||
return normalized
|
||||
|
||||
|
||||
async def get_workspace_manager(user_id: str, session_id: str) -> WorkspaceManager:
|
||||
"""Create a session-scoped :class:`WorkspaceManager`.
|
||||
|
||||
Placed here (rather than in ``tools/workspace_files``) so that modules
|
||||
like ``sdk/file_ref`` can import it without triggering the heavy
|
||||
``tools/__init__`` import chain.
|
||||
"""
|
||||
workspace = await workspace_db().get_or_create_workspace(user_id)
|
||||
return WorkspaceManager(user_id, workspace.id, session_id)
|
||||
|
||||
|
||||
def is_allowed_local_path(path: str, sdk_cwd: str | None = None) -> bool:
|
||||
"""Return True if *path* is within an allowed host-filesystem location.
|
||||
|
||||
|
||||
@@ -3,12 +3,26 @@
|
||||
This module provides the integration layer between the Claude Agent SDK
|
||||
and the existing CoPilot tool system, enabling drop-in replacement of
|
||||
the current LLM orchestration with the battle-tested Claude Agent SDK.
|
||||
|
||||
Submodule imports are deferred (PEP 562) to avoid circular imports:
|
||||
tools → sdk → service → prompting → tools.
|
||||
"""
|
||||
|
||||
from .service import stream_chat_completion_sdk
|
||||
from .tool_adapter import create_copilot_mcp_server
|
||||
from typing import Any
|
||||
|
||||
__all__ = [
|
||||
"stream_chat_completion_sdk",
|
||||
"create_copilot_mcp_server",
|
||||
]
|
||||
|
||||
|
||||
def __getattr__(name: str) -> Any:
|
||||
if name == "stream_chat_completion_sdk":
|
||||
from .service import stream_chat_completion_sdk
|
||||
|
||||
return stream_chat_completion_sdk
|
||||
if name == "create_copilot_mcp_server":
|
||||
from .tool_adapter import create_copilot_mcp_server
|
||||
|
||||
return create_copilot_mcp_server
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
||||
@@ -41,11 +41,11 @@ from typing import Any
|
||||
from backend.copilot.context import (
|
||||
get_current_sandbox,
|
||||
get_sdk_cwd,
|
||||
get_workspace_manager,
|
||||
is_allowed_local_path,
|
||||
resolve_sandbox_path,
|
||||
)
|
||||
from backend.copilot.model import ChatSession
|
||||
from backend.copilot.tools.workspace_files import get_manager
|
||||
from backend.util.file import parse_workspace_uri
|
||||
from backend.util.file_content_parser import (
|
||||
BINARY_FORMATS,
|
||||
@@ -138,7 +138,7 @@ async def read_file_bytes(
|
||||
if plain.startswith("workspace://"):
|
||||
if not user_id:
|
||||
raise ValueError("workspace:// file references require authentication")
|
||||
manager = await get_manager(user_id, session.session_id)
|
||||
manager = await get_workspace_manager(user_id, session.session_id)
|
||||
ws = parse_workspace_uri(plain)
|
||||
try:
|
||||
return await (
|
||||
@@ -272,7 +272,7 @@ async def _infer_format_from_workspace(
|
||||
return None
|
||||
try:
|
||||
ws = parse_workspace_uri(uri)
|
||||
manager = await get_manager(user_id, session.session_id)
|
||||
manager = await get_workspace_manager(user_id, session.session_id)
|
||||
info = await (
|
||||
manager.get_file_info(ws.file_ref)
|
||||
if not ws.is_path
|
||||
@@ -329,7 +329,13 @@ async def expand_file_refs_in_args(
|
||||
|
||||
properties = (input_schema or {}).get("properties", {})
|
||||
|
||||
async def _expand(value: Any, *, expect_string: bool = False) -> Any:
|
||||
async def _expand(
|
||||
value: Any,
|
||||
*,
|
||||
prop_schema: dict[str, Any] | None = None,
|
||||
) -> Any:
|
||||
expect_string = (prop_schema or {}).get("type") == "string"
|
||||
|
||||
if isinstance(value, str):
|
||||
# Check for a bare file reference first — enables structured parsing.
|
||||
ref = parse_file_ref(value)
|
||||
@@ -369,9 +375,9 @@ async def expand_file_refs_in_args(
|
||||
f"({content_size} bytes, limit {_MAX_BARE_REF_BYTES})"
|
||||
)
|
||||
|
||||
# When the tool's input schema declares this parameter as
|
||||
# "string", return raw file content — don't parse into a
|
||||
# structured type that would need json.dumps() serialisation.
|
||||
# When the schema declares this parameter as "string",
|
||||
# return raw file content — don't parse into a structured
|
||||
# type that would need json.dumps() serialisation.
|
||||
if expect_string:
|
||||
if isinstance(content, bytes):
|
||||
return content.decode("utf-8", errors="replace")
|
||||
@@ -411,15 +417,23 @@ async def expand_file_refs_in_args(
|
||||
value, user_id, session, raise_on_error=True
|
||||
)
|
||||
if isinstance(value, dict):
|
||||
return {k: await _expand(v) for k, v in value.items()}
|
||||
# When the schema says this is an object but doesn't define
|
||||
# inner properties, skip expansion — the caller (e.g.
|
||||
# RunBlockTool) will expand with the actual nested schema.
|
||||
if (
|
||||
prop_schema is not None
|
||||
and prop_schema.get("type") == "object"
|
||||
and "properties" not in prop_schema
|
||||
):
|
||||
return value
|
||||
nested_props = (prop_schema or {}).get("properties", {})
|
||||
return {
|
||||
k: await _expand(v, prop_schema=nested_props.get(k))
|
||||
for k, v in value.items()
|
||||
}
|
||||
if isinstance(value, list):
|
||||
return [await _expand(item) for item in value]
|
||||
items_schema = (prop_schema or {}).get("items")
|
||||
return [await _expand(item, prop_schema=items_schema) for item in value]
|
||||
return value
|
||||
|
||||
return {
|
||||
k: await _expand(
|
||||
v,
|
||||
expect_string=properties.get(k, {}).get("type") == "string",
|
||||
)
|
||||
for k, v in args.items()
|
||||
}
|
||||
return {k: await _expand(v, prop_schema=properties.get(k)) for k, v in args.items()}
|
||||
|
||||
@@ -535,6 +535,88 @@ async def test_bare_ref_csv_parses_when_no_schema_provided():
|
||||
assert result["data"] == [["Name", "Score"], ["Alice", "90"], ["Bob", "85"]]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_opaque_object_skips_inner_expansion():
|
||||
"""When the schema declares a property as {type: "object"} with no
|
||||
properties, inner file refs are NOT expanded — they stay as-is for
|
||||
the tool to expand later with the correct nested schema."""
|
||||
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"block_id": {"type": "string"},
|
||||
"input_data": {
|
||||
"type": "object",
|
||||
"description": "Opaque block inputs",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
# resolve_file_ref should NOT be called for the inner ref
|
||||
with patch(
|
||||
"backend.copilot.sdk.file_ref.resolve_file_ref",
|
||||
new=AsyncMock(side_effect=AssertionError("should not be called")),
|
||||
):
|
||||
result = await expand_file_refs_in_args(
|
||||
{
|
||||
"block_id": "some-id",
|
||||
"input_data": {"text": "@@agptfile:workspace:///data.csv"},
|
||||
},
|
||||
user_id="u1",
|
||||
session=_make_session(),
|
||||
input_schema=schema,
|
||||
)
|
||||
# input_data should be returned unchanged
|
||||
assert result["input_data"]["text"] == "@@agptfile:workspace:///data.csv"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_nested_schema_propagation():
|
||||
"""When the schema declares nested properties, the inner type
|
||||
information is used for expand/skip decisions."""
|
||||
csv_content = "Name,Score\nAlice,90\nBob,85"
|
||||
|
||||
async def _resolve(ref, *a, **kw): # noqa: ARG001
|
||||
return csv_content
|
||||
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_data": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"text": {"type": "string"},
|
||||
"rows": {"type": "array"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
with patch(
|
||||
"backend.copilot.sdk.file_ref.resolve_file_ref",
|
||||
new=AsyncMock(side_effect=_resolve),
|
||||
):
|
||||
result = await expand_file_refs_in_args(
|
||||
{
|
||||
"input_data": {
|
||||
"text": "@@agptfile:workspace:///data.csv",
|
||||
"rows": "@@agptfile:workspace:///data.csv",
|
||||
},
|
||||
},
|
||||
user_id="u1",
|
||||
session=_make_session(),
|
||||
input_schema=schema,
|
||||
)
|
||||
# string-typed field: raw CSV text
|
||||
assert result["input_data"]["text"] == csv_content
|
||||
# array-typed field: parsed rows
|
||||
assert result["input_data"]["rows"] == [
|
||||
["Name", "Score"],
|
||||
["Alice", "90"],
|
||||
["Bob", "85"],
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-file truncation and aggregate budget
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -584,3 +666,221 @@ async def test_expand_aggregate_budget_exhausted():
|
||||
)
|
||||
|
||||
assert "budget exhausted" in result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Full format × schema-type matrix
|
||||
# ---------------------------------------------------------------------------
|
||||
# Each text format is tested against two schema types:
|
||||
# - type: string → raw file content returned as-is
|
||||
# - no type / non-string → structured parsing applied
|
||||
# This ensures CSV→string blocks get raw CSV, not json.dumps(list[list[str]]).
|
||||
|
||||
_FORMAT_SAMPLES: dict[str, tuple[str, str, object]] = {
|
||||
# format_label: (extension, raw_content, expected_parsed_value)
|
||||
"json": (
|
||||
".json",
|
||||
'{"name": "Alice", "score": 90}',
|
||||
{"name": "Alice", "score": 90},
|
||||
),
|
||||
"csv": (
|
||||
".csv",
|
||||
"Name,Score\nAlice,90\nBob,85",
|
||||
[["Name", "Score"], ["Alice", "90"], ["Bob", "85"]],
|
||||
),
|
||||
"tsv": (
|
||||
".tsv",
|
||||
"Name\tScore\nAlice\t90\nBob\t85",
|
||||
[["Name", "Score"], ["Alice", "90"], ["Bob", "85"]],
|
||||
),
|
||||
"jsonl": (
|
||||
".jsonl",
|
||||
'{"a":1}\n{"a":2}',
|
||||
[{"a": 1}, {"a": 2}],
|
||||
),
|
||||
"yaml": (
|
||||
".yaml",
|
||||
"name: Alice\nscore: 90",
|
||||
{"name": "Alice", "score": 90},
|
||||
),
|
||||
"toml": (
|
||||
".toml",
|
||||
'[person]\nname = "Alice"\nscore = 90',
|
||||
{"person": {"name": "Alice", "score": 90}},
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("fmt", _FORMAT_SAMPLES.keys())
|
||||
async def test_matrix_format_to_string_schema(fmt: str):
|
||||
"""When the schema declares the parameter as type: string,
|
||||
every text format returns the raw file content as a plain string."""
|
||||
ext, raw_content, _ = _FORMAT_SAMPLES[fmt]
|
||||
|
||||
async def _resolve(ref, *a, **kw): # noqa: ARG001
|
||||
return raw_content
|
||||
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {"text": {"type": "string"}},
|
||||
}
|
||||
|
||||
with patch(
|
||||
"backend.copilot.sdk.file_ref.resolve_file_ref",
|
||||
new=AsyncMock(side_effect=_resolve),
|
||||
):
|
||||
result = await expand_file_refs_in_args(
|
||||
{"text": f"@@agptfile:workspace:///data{ext}"},
|
||||
user_id="u1",
|
||||
session=_make_session(),
|
||||
input_schema=schema,
|
||||
)
|
||||
assert isinstance(
|
||||
result["text"], str
|
||||
), f"{fmt}: expected str, got {type(result['text'])}"
|
||||
assert result["text"] == raw_content, f"{fmt}: raw content mismatch"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("fmt", _FORMAT_SAMPLES.keys())
|
||||
async def test_matrix_format_to_nonstring_schema(fmt: str):
|
||||
"""Without a string schema constraint, every text format returns
|
||||
a structured (parsed) value."""
|
||||
ext, raw_content, expected_parsed = _FORMAT_SAMPLES[fmt]
|
||||
|
||||
async def _resolve(ref, *a, **kw): # noqa: ARG001
|
||||
return raw_content
|
||||
|
||||
# No input_schema → structured parsing is the default
|
||||
with patch(
|
||||
"backend.copilot.sdk.file_ref.resolve_file_ref",
|
||||
new=AsyncMock(side_effect=_resolve),
|
||||
):
|
||||
result = await expand_file_refs_in_args(
|
||||
{"data": f"@@agptfile:workspace:///data{ext}"},
|
||||
user_id="u1",
|
||||
session=_make_session(),
|
||||
)
|
||||
assert result["data"] == expected_parsed, f"{fmt}: parsed value mismatch"
|
||||
assert not isinstance(
|
||||
result["data"], str
|
||||
), f"{fmt}: expected structured type, got str"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("fmt", _FORMAT_SAMPLES.keys())
|
||||
async def test_matrix_format_opaque_object_preserves_ref(fmt: str):
|
||||
"""When the parameter is an opaque object (type: object, no properties),
|
||||
inner file refs are preserved for second-phase expansion."""
|
||||
ext, _, _ = _FORMAT_SAMPLES[fmt]
|
||||
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_data": {"type": "object"},
|
||||
},
|
||||
}
|
||||
|
||||
with patch(
|
||||
"backend.copilot.sdk.file_ref.resolve_file_ref",
|
||||
new=AsyncMock(side_effect=AssertionError("should not be called")),
|
||||
):
|
||||
ref_token = f"@@agptfile:workspace:///data{ext}"
|
||||
result = await expand_file_refs_in_args(
|
||||
{"input_data": {"field": ref_token}},
|
||||
user_id="u1",
|
||||
session=_make_session(),
|
||||
input_schema=schema,
|
||||
)
|
||||
assert result["input_data"]["field"] == ref_token, f"{fmt}: ref should be preserved"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_matrix_mixed_fields_string_and_array():
|
||||
"""A single call with both string-typed and array-typed fields:
|
||||
CSV→string returns raw text, CSV→array returns parsed rows."""
|
||||
csv_content = "A,B\n1,2\n3,4"
|
||||
|
||||
async def _resolve(ref, *a, **kw): # noqa: ARG001
|
||||
return csv_content
|
||||
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"raw_text": {"type": "string"},
|
||||
"parsed_rows": {"type": "array"},
|
||||
},
|
||||
}
|
||||
|
||||
with patch(
|
||||
"backend.copilot.sdk.file_ref.resolve_file_ref",
|
||||
new=AsyncMock(side_effect=_resolve),
|
||||
):
|
||||
result = await expand_file_refs_in_args(
|
||||
{
|
||||
"raw_text": "@@agptfile:workspace:///data.csv",
|
||||
"parsed_rows": "@@agptfile:workspace:///data.csv",
|
||||
},
|
||||
user_id="u1",
|
||||
session=_make_session(),
|
||||
input_schema=schema,
|
||||
)
|
||||
assert isinstance(result["raw_text"], str)
|
||||
assert result["raw_text"] == csv_content
|
||||
assert result["parsed_rows"] == [["A", "B"], ["1", "2"], ["3", "4"]]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_matrix_second_phase_expansion_with_block_schema():
|
||||
"""Simulates the two-phase expansion: first phase skips opaque input_data,
|
||||
second phase expands with the block's actual schema."""
|
||||
csv_content = "X,Y\n10,20"
|
||||
|
||||
async def _resolve(ref, *a, **kw): # noqa: ARG001
|
||||
return csv_content
|
||||
|
||||
# Phase 1: tool-level schema (input_data is opaque)
|
||||
tool_schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"block_id": {"type": "string"},
|
||||
"input_data": {"type": "object"},
|
||||
},
|
||||
}
|
||||
|
||||
ref_token = "@@agptfile:workspace:///data.csv"
|
||||
with patch(
|
||||
"backend.copilot.sdk.file_ref.resolve_file_ref",
|
||||
new=AsyncMock(side_effect=AssertionError("phase 1 should not expand")),
|
||||
):
|
||||
phase1_result = await expand_file_refs_in_args(
|
||||
{"block_id": "some-id", "input_data": {"text": ref_token}},
|
||||
user_id="u1",
|
||||
session=_make_session(),
|
||||
input_schema=tool_schema,
|
||||
)
|
||||
# Ref should survive phase 1
|
||||
assert phase1_result["input_data"]["text"] == ref_token
|
||||
|
||||
# Phase 2: block-level schema (text is string)
|
||||
block_schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"text": {"type": "string"},
|
||||
},
|
||||
}
|
||||
|
||||
with patch(
|
||||
"backend.copilot.sdk.file_ref.resolve_file_ref",
|
||||
new=AsyncMock(side_effect=_resolve),
|
||||
):
|
||||
phase2_result = await expand_file_refs_in_args(
|
||||
phase1_result["input_data"],
|
||||
user_id="u1",
|
||||
session=_make_session(),
|
||||
input_schema=block_schema,
|
||||
)
|
||||
# Phase 2 should return raw CSV since text is string-typed
|
||||
assert isinstance(phase2_result["text"], str)
|
||||
assert phase2_result["text"] == csv_content
|
||||
|
||||
@@ -12,6 +12,7 @@ from backend.copilot.constants import (
|
||||
COPILOT_SESSION_PREFIX,
|
||||
)
|
||||
from backend.copilot.model import ChatSession
|
||||
from backend.copilot.sdk.file_ref import FileRefExpansionError, expand_file_refs_in_args
|
||||
from backend.data.db_accessors import review_db
|
||||
from backend.data.execution import ExecutionContext
|
||||
|
||||
@@ -197,6 +198,29 @@ class RunBlockTool(BaseTool):
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
# Expand @@agptfile: refs in input_data with the block's input
|
||||
# schema. The generic _truncating wrapper skips opaque object
|
||||
# properties (input_data has no declared inner properties in the
|
||||
# tool schema), so file ref tokens are still intact here.
|
||||
# Using the block's schema lets us return raw text for string-typed
|
||||
# fields and parsed structures for list/dict-typed fields.
|
||||
if input_data:
|
||||
try:
|
||||
input_data = await expand_file_refs_in_args(
|
||||
input_data,
|
||||
user_id,
|
||||
session,
|
||||
input_schema=input_schema,
|
||||
)
|
||||
except FileRefExpansionError as exc:
|
||||
return ErrorResponse(
|
||||
message=(
|
||||
f"Failed to resolve file reference: {exc}. "
|
||||
"Ensure the file exists before referencing it."
|
||||
),
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
if missing_credentials:
|
||||
# Return setup requirements response with missing credentials
|
||||
credentials_fields_info = block.input_schema.get_credentials_fields_info()
|
||||
|
||||
@@ -10,11 +10,11 @@ from pydantic import BaseModel
|
||||
from backend.copilot.context import (
|
||||
E2B_WORKDIR,
|
||||
get_current_sandbox,
|
||||
get_workspace_manager,
|
||||
resolve_sandbox_path,
|
||||
)
|
||||
from backend.copilot.model import ChatSession
|
||||
from backend.copilot.tools.sandbox import make_session_path
|
||||
from backend.data.db_accessors import workspace_db
|
||||
from backend.util.settings import Config
|
||||
from backend.util.virus_scanner import scan_content_safe
|
||||
from backend.util.workspace import WorkspaceManager
|
||||
@@ -219,9 +219,12 @@ def _is_text_mime(mime_type: str) -> bool:
|
||||
|
||||
|
||||
async def get_manager(user_id: str, session_id: str) -> WorkspaceManager:
|
||||
"""Create a session-scoped WorkspaceManager."""
|
||||
workspace = await workspace_db().get_or_create_workspace(user_id)
|
||||
return WorkspaceManager(user_id, workspace.id, session_id)
|
||||
"""Create a session-scoped WorkspaceManager.
|
||||
|
||||
Delegates to :func:`backend.copilot.context.get_workspace_manager`.
|
||||
Kept here for backward-compatibility with callers inside ``tools/``.
|
||||
"""
|
||||
return await get_workspace_manager(user_id, session_id)
|
||||
|
||||
|
||||
async def _resolve_file(
|
||||
|
||||
Reference in New Issue
Block a user