mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-02-24 03:00:28 -05:00
Compare commits
27 Commits
dev
...
fix/messed
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9106f0b5ce | ||
|
|
37355f7581 | ||
|
|
e1e3b6094e | ||
|
|
e18b3c561f | ||
|
|
d937c6839a | ||
|
|
8c2363ea88 | ||
|
|
a408b45542 | ||
|
|
3a38b5e9bd | ||
|
|
3491365b45 | ||
|
|
d3299cfd7f | ||
|
|
9161090944 | ||
|
|
372f9bff32 | ||
|
|
17995596db | ||
|
|
7acbbd0f05 | ||
|
|
4be03fcc08 | ||
|
|
eb7bd6bdae | ||
|
|
d81e7dd6c9 | ||
|
|
78b52b956d | ||
|
|
e476185c3a | ||
|
|
b1c5000937 | ||
|
|
7ee870ed70 | ||
|
|
240e403592 | ||
|
|
c3e94f7d9c | ||
|
|
a0a040f102 | ||
|
|
23225aa323 | ||
|
|
fed645cb79 | ||
|
|
009753f2b3 |
@@ -432,13 +432,23 @@ async def _get_session_from_db(session_id: str) -> ChatSession | None:
|
||||
return session
|
||||
|
||||
|
||||
async def upsert_chat_session(session: ChatSession) -> ChatSession:
|
||||
async def upsert_chat_session(
|
||||
session: ChatSession,
|
||||
*,
|
||||
existing_message_count: int | None = None,
|
||||
) -> ChatSession:
|
||||
"""Update a chat session in both cache and database.
|
||||
|
||||
Uses session-level locking to prevent race conditions when concurrent
|
||||
operations (e.g., background title update and main stream handler)
|
||||
attempt to upsert the same session simultaneously.
|
||||
|
||||
Args:
|
||||
existing_message_count: If provided, skip the DB query to count
|
||||
existing messages. The caller is responsible for tracking this
|
||||
accurately. Useful for incremental saves in a streaming loop
|
||||
where the caller already knows how many messages are persisted.
|
||||
|
||||
Raises:
|
||||
DatabaseError: If the database write fails. The cache is still updated
|
||||
as a best-effort optimization, but the error is propagated to ensure
|
||||
@@ -450,15 +460,20 @@ async def upsert_chat_session(session: ChatSession) -> ChatSession:
|
||||
|
||||
async with lock:
|
||||
# Get existing message count from DB for incremental saves
|
||||
existing_message_count = await chat_db().get_chat_session_message_count(
|
||||
session.session_id
|
||||
)
|
||||
if existing_message_count is None:
|
||||
existing_message_count = await chat_db().get_chat_session_message_count(
|
||||
session.session_id
|
||||
)
|
||||
|
||||
db_error: Exception | None = None
|
||||
|
||||
# Save to database (primary storage)
|
||||
try:
|
||||
await _save_session_to_db(session, existing_message_count)
|
||||
await _save_session_to_db(
|
||||
session,
|
||||
existing_message_count,
|
||||
skip_existence_check=existing_message_count > 0,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to save session {session.session_id} to database: {e}"
|
||||
@@ -489,21 +504,31 @@ async def upsert_chat_session(session: ChatSession) -> ChatSession:
|
||||
|
||||
|
||||
async def _save_session_to_db(
|
||||
session: ChatSession, existing_message_count: int
|
||||
session: ChatSession,
|
||||
existing_message_count: int,
|
||||
*,
|
||||
skip_existence_check: bool = False,
|
||||
) -> None:
|
||||
"""Save or update a chat session in the database."""
|
||||
"""Save or update a chat session in the database.
|
||||
|
||||
Args:
|
||||
skip_existence_check: When True, skip the ``get_chat_session`` query
|
||||
and assume the session row already exists. Saves one DB round trip
|
||||
for incremental saves during streaming.
|
||||
"""
|
||||
db = chat_db()
|
||||
|
||||
# Check if session exists in DB
|
||||
existing = await db.get_chat_session(session.session_id)
|
||||
if not skip_existence_check:
|
||||
# Check if session exists in DB
|
||||
existing = await db.get_chat_session(session.session_id)
|
||||
|
||||
if not existing:
|
||||
# Create new session
|
||||
await db.create_chat_session(
|
||||
session_id=session.session_id,
|
||||
user_id=session.user_id,
|
||||
)
|
||||
existing_message_count = 0
|
||||
if not existing:
|
||||
# Create new session
|
||||
await db.create_chat_session(
|
||||
session_id=session.session_id,
|
||||
user_id=session.user_id,
|
||||
)
|
||||
existing_message_count = 0
|
||||
|
||||
# Calculate total tokens from usage
|
||||
total_prompt = sum(u.prompt_tokens for u in session.usage)
|
||||
|
||||
@@ -47,8 +47,9 @@ class SDKResponseAdapter:
|
||||
text blocks, tool calls, and message lifecycle.
|
||||
"""
|
||||
|
||||
def __init__(self, message_id: str | None = None):
|
||||
def __init__(self, message_id: str | None = None, session_id: str | None = None):
|
||||
self.message_id = message_id or str(uuid.uuid4())
|
||||
self.session_id = session_id
|
||||
self.text_block_id = str(uuid.uuid4())
|
||||
self.has_started_text = False
|
||||
self.has_ended_text = False
|
||||
@@ -61,6 +62,11 @@ class SDKResponseAdapter:
|
||||
"""Set the task ID for reconnection support."""
|
||||
self.task_id = task_id
|
||||
|
||||
@property
|
||||
def has_unresolved_tool_calls(self) -> bool:
|
||||
"""True when there are tool calls that haven't received output yet."""
|
||||
return bool(self.current_tool_calls.keys() - self.resolved_tool_calls)
|
||||
|
||||
def convert_message(self, sdk_message: Message) -> list[StreamBaseResponse]:
|
||||
"""Convert a single SDK message to Vercel AI SDK format."""
|
||||
responses: list[StreamBaseResponse] = []
|
||||
@@ -77,7 +83,12 @@ class SDKResponseAdapter:
|
||||
elif isinstance(sdk_message, AssistantMessage):
|
||||
# Flush any SDK built-in tool calls that didn't get a UserMessage
|
||||
# result (e.g. WebSearch, Read handled internally by the CLI).
|
||||
self._flush_unresolved_tool_calls(responses)
|
||||
# BUT skip flush when this AssistantMessage is a parallel tool
|
||||
# continuation (contains only ToolUseBlocks) — the prior tools
|
||||
# are still executing concurrently and haven't finished yet.
|
||||
is_tool_only = all(isinstance(b, ToolUseBlock) for b in sdk_message.content)
|
||||
if not is_tool_only:
|
||||
self._flush_unresolved_tool_calls(responses)
|
||||
|
||||
# After tool results, the SDK sends a new AssistantMessage for the
|
||||
# next LLM turn. Open a new step if the previous one was closed.
|
||||
@@ -118,8 +129,24 @@ class SDKResponseAdapter:
|
||||
blocks = content if isinstance(content, list) else []
|
||||
resolved_in_blocks: set[str] = set()
|
||||
|
||||
sid = (self.session_id or "?")[:12]
|
||||
parent_id_preview = getattr(sdk_message, "parent_tool_use_id", None)
|
||||
logger.info(
|
||||
"[SDK] [%s] UserMessage: %d blocks, content_type=%s, "
|
||||
"parent_tool_use_id=%s",
|
||||
sid,
|
||||
len(blocks),
|
||||
type(content).__name__,
|
||||
parent_id_preview[:12] if parent_id_preview else "None",
|
||||
)
|
||||
|
||||
for block in blocks:
|
||||
if isinstance(block, ToolResultBlock) and block.tool_use_id:
|
||||
# Skip if already resolved (e.g. by flush) — the real
|
||||
# result supersedes the empty flush, but re-emitting
|
||||
# would confuse the frontend's state machine.
|
||||
if block.tool_use_id in self.resolved_tool_calls:
|
||||
continue
|
||||
tool_info = self.current_tool_calls.get(block.tool_use_id, {})
|
||||
tool_name = tool_info.get("name", "unknown")
|
||||
|
||||
@@ -144,7 +171,11 @@ class SDKResponseAdapter:
|
||||
# Handle SDK built-in tool results carried via parent_tool_use_id
|
||||
# instead of (or in addition to) ToolResultBlock content.
|
||||
parent_id = sdk_message.parent_tool_use_id
|
||||
if parent_id and parent_id not in resolved_in_blocks:
|
||||
if (
|
||||
parent_id
|
||||
and parent_id not in resolved_in_blocks
|
||||
and parent_id not in self.resolved_tool_calls
|
||||
):
|
||||
tool_info = self.current_tool_calls.get(parent_id, {})
|
||||
tool_name = tool_info.get("name", "unknown")
|
||||
|
||||
@@ -228,11 +259,28 @@ class SDKResponseAdapter:
|
||||
output, which we pop and emit here before the next ``AssistantMessage``
|
||||
starts.
|
||||
"""
|
||||
unresolved = [
|
||||
(tid, info.get("name", "unknown"))
|
||||
for tid, info in self.current_tool_calls.items()
|
||||
if tid not in self.resolved_tool_calls
|
||||
]
|
||||
sid = (self.session_id or "?")[:12]
|
||||
if not unresolved:
|
||||
logger.info(
|
||||
"[SDK] [%s] Flush called but all %d tool(s) already resolved",
|
||||
sid,
|
||||
len(self.current_tool_calls),
|
||||
)
|
||||
return
|
||||
logger.info(
|
||||
"[SDK] [%s] Flushing %d unresolved tool call(s): %s",
|
||||
sid,
|
||||
len(unresolved),
|
||||
", ".join(f"{name}({tid[:12]})" for tid, name in unresolved),
|
||||
)
|
||||
|
||||
flushed = False
|
||||
for tool_id, tool_info in self.current_tool_calls.items():
|
||||
if tool_id in self.resolved_tool_calls:
|
||||
continue
|
||||
tool_name = tool_info.get("name", "unknown")
|
||||
for tool_id, tool_name in unresolved:
|
||||
output = pop_pending_tool_output(tool_name)
|
||||
if output is not None:
|
||||
responses.append(
|
||||
@@ -245,9 +293,12 @@ class SDKResponseAdapter:
|
||||
)
|
||||
self.resolved_tool_calls.add(tool_id)
|
||||
flushed = True
|
||||
logger.debug(
|
||||
f"Flushed pending output for built-in tool {tool_name} "
|
||||
f"(call {tool_id})"
|
||||
logger.info(
|
||||
"[SDK] [%s] Flushed stashed output for %s " "(call %s, %d chars)",
|
||||
sid,
|
||||
tool_name,
|
||||
tool_id[:12],
|
||||
len(output),
|
||||
)
|
||||
else:
|
||||
# No output available — emit an empty output so the frontend
|
||||
@@ -263,9 +314,14 @@ class SDKResponseAdapter:
|
||||
)
|
||||
self.resolved_tool_calls.add(tool_id)
|
||||
flushed = True
|
||||
logger.debug(
|
||||
f"Flushed empty output for unresolved tool {tool_name} "
|
||||
f"(call {tool_id})"
|
||||
logger.warning(
|
||||
"[SDK] [%s] Flushed EMPTY output for unresolved tool %s "
|
||||
"(call %s) — stash was empty (likely SDK hook race "
|
||||
"condition: PostToolUse hook hadn't completed before "
|
||||
"flush was triggered)",
|
||||
sid,
|
||||
tool_name,
|
||||
tool_id[:12],
|
||||
)
|
||||
|
||||
if flushed and self.step_open:
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
"""Unit tests for the SDK response adapter."""
|
||||
|
||||
import asyncio
|
||||
|
||||
import pytest
|
||||
from claude_agent_sdk import (
|
||||
AssistantMessage,
|
||||
ResultMessage,
|
||||
@@ -27,6 +30,10 @@ from backend.copilot.response_model import (
|
||||
|
||||
from .response_adapter import SDKResponseAdapter
|
||||
from .tool_adapter import MCP_TOOL_PREFIX
|
||||
from .tool_adapter import _pending_tool_outputs as _pto
|
||||
from .tool_adapter import _stash_event
|
||||
from .tool_adapter import stash_pending_tool_output as _stash
|
||||
from .tool_adapter import wait_for_stash
|
||||
|
||||
|
||||
def _adapter() -> SDKResponseAdapter:
|
||||
@@ -364,3 +371,310 @@ def test_full_conversation_flow():
|
||||
"StreamFinishStep", # step 2 closed
|
||||
"StreamFinish",
|
||||
]
|
||||
|
||||
|
||||
# -- Flush unresolved tool calls --------------------------------------------
|
||||
|
||||
|
||||
def test_flush_unresolved_at_result_message():
|
||||
"""Built-in tools (WebSearch) without UserMessage results get flushed at ResultMessage."""
|
||||
adapter = _adapter()
|
||||
all_responses: list[StreamBaseResponse] = []
|
||||
|
||||
# 1. Init
|
||||
all_responses.extend(
|
||||
adapter.convert_message(SystemMessage(subtype="init", data={}))
|
||||
)
|
||||
# 2. Tool use (built-in tool — no MCP prefix)
|
||||
all_responses.extend(
|
||||
adapter.convert_message(
|
||||
AssistantMessage(
|
||||
content=[
|
||||
ToolUseBlock(id="ws-1", name="WebSearch", input={"query": "test"})
|
||||
],
|
||||
model="test",
|
||||
)
|
||||
)
|
||||
)
|
||||
# 3. No UserMessage for this tool — go straight to ResultMessage
|
||||
all_responses.extend(
|
||||
adapter.convert_message(
|
||||
ResultMessage(
|
||||
subtype="success",
|
||||
duration_ms=100,
|
||||
duration_api_ms=50,
|
||||
is_error=False,
|
||||
num_turns=1,
|
||||
session_id="s1",
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
types = [type(r).__name__ for r in all_responses]
|
||||
assert types == [
|
||||
"StreamStart",
|
||||
"StreamStartStep",
|
||||
"StreamToolInputStart",
|
||||
"StreamToolInputAvailable",
|
||||
"StreamToolOutputAvailable", # flushed with empty output
|
||||
"StreamFinishStep", # step closed by flush
|
||||
"StreamFinish",
|
||||
]
|
||||
# The flushed output should be empty (no stash available)
|
||||
output_event = [
|
||||
r for r in all_responses if isinstance(r, StreamToolOutputAvailable)
|
||||
][0]
|
||||
assert output_event.toolCallId == "ws-1"
|
||||
assert output_event.toolName == "WebSearch"
|
||||
assert output_event.output == ""
|
||||
|
||||
|
||||
def test_flush_unresolved_at_next_assistant_message():
|
||||
"""Built-in tools get flushed when the next AssistantMessage arrives."""
|
||||
adapter = _adapter()
|
||||
all_responses: list[StreamBaseResponse] = []
|
||||
|
||||
# 1. Init
|
||||
all_responses.extend(
|
||||
adapter.convert_message(SystemMessage(subtype="init", data={}))
|
||||
)
|
||||
# 2. Tool use (built-in — no UserMessage will come)
|
||||
all_responses.extend(
|
||||
adapter.convert_message(
|
||||
AssistantMessage(
|
||||
content=[
|
||||
ToolUseBlock(id="ws-1", name="WebSearch", input={"query": "test"})
|
||||
],
|
||||
model="test",
|
||||
)
|
||||
)
|
||||
)
|
||||
# 3. Next AssistantMessage triggers flush before processing its blocks
|
||||
all_responses.extend(
|
||||
adapter.convert_message(
|
||||
AssistantMessage(
|
||||
content=[TextBlock(text="Here are the results")], model="test"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
types = [type(r).__name__ for r in all_responses]
|
||||
assert types == [
|
||||
"StreamStart",
|
||||
"StreamStartStep",
|
||||
"StreamToolInputStart",
|
||||
"StreamToolInputAvailable",
|
||||
# Flush at next AssistantMessage:
|
||||
"StreamToolOutputAvailable",
|
||||
"StreamFinishStep", # step closed by flush
|
||||
# New step for continuation text:
|
||||
"StreamStartStep",
|
||||
"StreamTextStart",
|
||||
"StreamTextDelta",
|
||||
]
|
||||
|
||||
|
||||
def test_flush_with_stashed_output():
|
||||
"""Stashed output from PostToolUse hook is used when flushing."""
|
||||
adapter = _adapter()
|
||||
|
||||
# Simulate PostToolUse hook stashing output
|
||||
_pto.set({})
|
||||
_stash("WebSearch", "Search result: 5 items found")
|
||||
|
||||
all_responses: list[StreamBaseResponse] = []
|
||||
|
||||
# Tool use
|
||||
all_responses.extend(
|
||||
adapter.convert_message(
|
||||
AssistantMessage(
|
||||
content=[
|
||||
ToolUseBlock(id="ws-1", name="WebSearch", input={"query": "test"})
|
||||
],
|
||||
model="test",
|
||||
)
|
||||
)
|
||||
)
|
||||
# ResultMessage triggers flush
|
||||
all_responses.extend(
|
||||
adapter.convert_message(
|
||||
ResultMessage(
|
||||
subtype="success",
|
||||
duration_ms=100,
|
||||
duration_api_ms=50,
|
||||
is_error=False,
|
||||
num_turns=1,
|
||||
session_id="s1",
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
output_events = [
|
||||
r for r in all_responses if isinstance(r, StreamToolOutputAvailable)
|
||||
]
|
||||
assert len(output_events) == 1
|
||||
assert output_events[0].output == "Search result: 5 items found"
|
||||
|
||||
# Cleanup
|
||||
_pto.set({}) # type: ignore[arg-type]
|
||||
|
||||
|
||||
# -- wait_for_stash synchronisation tests --
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_wait_for_stash_signaled():
|
||||
"""wait_for_stash returns True when stash_pending_tool_output signals."""
|
||||
_pto.set({})
|
||||
event = asyncio.Event()
|
||||
_stash_event.set(event)
|
||||
|
||||
# Simulate a PostToolUse hook that stashes output after a short delay
|
||||
async def delayed_stash():
|
||||
await asyncio.sleep(0.01)
|
||||
_stash("WebSearch", "result data")
|
||||
|
||||
asyncio.create_task(delayed_stash())
|
||||
result = await wait_for_stash(timeout=1.0)
|
||||
|
||||
assert result is True
|
||||
assert _pto.get({}).get("WebSearch") == ["result data"]
|
||||
|
||||
# Cleanup
|
||||
_pto.set({}) # type: ignore[arg-type]
|
||||
_stash_event.set(None)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_wait_for_stash_timeout():
|
||||
"""wait_for_stash returns False on timeout when no stash occurs."""
|
||||
_pto.set({})
|
||||
event = asyncio.Event()
|
||||
_stash_event.set(event)
|
||||
|
||||
result = await wait_for_stash(timeout=0.05)
|
||||
assert result is False
|
||||
|
||||
# Cleanup
|
||||
_pto.set({}) # type: ignore[arg-type]
|
||||
_stash_event.set(None)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_wait_for_stash_already_stashed():
|
||||
"""wait_for_stash picks up a stash that happened just before the wait."""
|
||||
_pto.set({})
|
||||
event = asyncio.Event()
|
||||
_stash_event.set(event)
|
||||
|
||||
# Stash before waiting — simulates hook completing before message arrives
|
||||
_stash("Read", "file contents")
|
||||
# Event is now set; wait_for_stash detects the fast path and returns
|
||||
# immediately without timing out.
|
||||
result = await wait_for_stash(timeout=0.05)
|
||||
assert result is True
|
||||
|
||||
# But the stash itself is populated
|
||||
assert _pto.get({}).get("Read") == ["file contents"]
|
||||
|
||||
# Cleanup
|
||||
_pto.set({}) # type: ignore[arg-type]
|
||||
_stash_event.set(None)
|
||||
|
||||
|
||||
# -- Parallel tool call tests --
|
||||
|
||||
|
||||
def test_parallel_tool_calls_not_flushed_prematurely():
|
||||
"""Parallel tool calls should NOT be flushed when the next AssistantMessage
|
||||
only contains ToolUseBlocks (parallel continuation)."""
|
||||
adapter = SDKResponseAdapter()
|
||||
|
||||
# Init
|
||||
adapter.convert_message(SystemMessage(subtype="init", data={}))
|
||||
|
||||
# First AssistantMessage: tool call #1
|
||||
msg1 = AssistantMessage(
|
||||
content=[ToolUseBlock(id="t1", name="WebSearch", input={"q": "foo"})],
|
||||
model="test",
|
||||
)
|
||||
r1 = adapter.convert_message(msg1)
|
||||
assert any(isinstance(r, StreamToolInputAvailable) for r in r1)
|
||||
assert adapter.has_unresolved_tool_calls
|
||||
|
||||
# Second AssistantMessage: tool call #2 (parallel continuation)
|
||||
msg2 = AssistantMessage(
|
||||
content=[ToolUseBlock(id="t2", name="WebSearch", input={"q": "bar"})],
|
||||
model="test",
|
||||
)
|
||||
r2 = adapter.convert_message(msg2)
|
||||
|
||||
# No flush should have happened — t1 should NOT have StreamToolOutputAvailable
|
||||
output_events = [r for r in r2 if isinstance(r, StreamToolOutputAvailable)]
|
||||
assert len(output_events) == 0, (
|
||||
f"Tool-only AssistantMessage should not flush prior tools, "
|
||||
f"but got {len(output_events)} output events"
|
||||
)
|
||||
|
||||
# Both t1 and t2 should still be unresolved
|
||||
assert "t1" not in adapter.resolved_tool_calls
|
||||
assert "t2" not in adapter.resolved_tool_calls
|
||||
|
||||
|
||||
def test_text_assistant_message_flushes_prior_tools():
|
||||
"""An AssistantMessage with text (new turn) should flush unresolved tools."""
|
||||
adapter = SDKResponseAdapter()
|
||||
|
||||
# Init
|
||||
adapter.convert_message(SystemMessage(subtype="init", data={}))
|
||||
|
||||
# Tool call
|
||||
msg1 = AssistantMessage(
|
||||
content=[ToolUseBlock(id="t1", name="WebSearch", input={"q": "foo"})],
|
||||
model="test",
|
||||
)
|
||||
adapter.convert_message(msg1)
|
||||
assert adapter.has_unresolved_tool_calls
|
||||
|
||||
# Text AssistantMessage (new turn after tools completed)
|
||||
msg2 = AssistantMessage(
|
||||
content=[TextBlock(text="Here are the results")],
|
||||
model="test",
|
||||
)
|
||||
r2 = adapter.convert_message(msg2)
|
||||
|
||||
# Flush SHOULD have happened — t1 gets empty output
|
||||
output_events = [r for r in r2 if isinstance(r, StreamToolOutputAvailable)]
|
||||
assert len(output_events) == 1
|
||||
assert output_events[0].toolCallId == "t1"
|
||||
assert "t1" in adapter.resolved_tool_calls
|
||||
|
||||
|
||||
def test_already_resolved_tool_skipped_in_user_message():
|
||||
"""A tool result in UserMessage should be skipped if already resolved by flush."""
|
||||
adapter = SDKResponseAdapter()
|
||||
|
||||
adapter.convert_message(SystemMessage(subtype="init", data={}))
|
||||
|
||||
# Tool call + flush via text message
|
||||
adapter.convert_message(
|
||||
AssistantMessage(
|
||||
content=[ToolUseBlock(id="t1", name="WebSearch", input={})],
|
||||
model="test",
|
||||
)
|
||||
)
|
||||
adapter.convert_message(
|
||||
AssistantMessage(
|
||||
content=[TextBlock(text="Done")],
|
||||
model="test",
|
||||
)
|
||||
)
|
||||
assert "t1" in adapter.resolved_tool_calls
|
||||
|
||||
# Now UserMessage arrives with the real result — should be skipped
|
||||
user_msg = UserMessage(content=[ToolResultBlock(tool_use_id="t1", content="real")])
|
||||
r = adapter.convert_message(user_msg)
|
||||
output_events = [r_ for r_ in r if isinstance(r_, StreamToolOutputAvailable)]
|
||||
assert (
|
||||
len(output_events) == 0
|
||||
), "Already-resolved tool should not emit duplicate output"
|
||||
|
||||
194
autogpt_platform/backend/backend/copilot/sdk/sdk_compat_test.py
Normal file
194
autogpt_platform/backend/backend/copilot/sdk/sdk_compat_test.py
Normal file
@@ -0,0 +1,194 @@
|
||||
"""SDK compatibility tests — verify the claude-agent-sdk public API surface we depend on.
|
||||
|
||||
Instead of pinning to a narrow version range, these tests verify that the
|
||||
installed SDK exposes every class, function, attribute, and method the copilot
|
||||
integration relies on. If an SDK upgrade removes or renames something these
|
||||
tests will catch it immediately.
|
||||
"""
|
||||
|
||||
import inspect
|
||||
|
||||
import pytest
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public types & factories
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_sdk_exports_client_and_options():
|
||||
from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
|
||||
|
||||
assert inspect.isclass(ClaudeSDKClient)
|
||||
assert inspect.isclass(ClaudeAgentOptions)
|
||||
|
||||
|
||||
def test_sdk_exports_message_types():
|
||||
from claude_agent_sdk import (
|
||||
AssistantMessage,
|
||||
Message,
|
||||
ResultMessage,
|
||||
SystemMessage,
|
||||
UserMessage,
|
||||
)
|
||||
|
||||
for cls in (AssistantMessage, ResultMessage, SystemMessage, UserMessage):
|
||||
assert inspect.isclass(cls), f"{cls.__name__} is not a class"
|
||||
# Message is a Union type alias, just verify it's importable
|
||||
assert Message is not None
|
||||
|
||||
|
||||
def test_sdk_exports_content_block_types():
|
||||
from claude_agent_sdk import TextBlock, ToolResultBlock, ToolUseBlock
|
||||
|
||||
for cls in (TextBlock, ToolResultBlock, ToolUseBlock):
|
||||
assert inspect.isclass(cls), f"{cls.__name__} is not a class"
|
||||
|
||||
|
||||
def test_sdk_exports_mcp_helpers():
|
||||
from claude_agent_sdk import create_sdk_mcp_server, tool
|
||||
|
||||
assert callable(create_sdk_mcp_server)
|
||||
assert callable(tool)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ClaudeSDKClient interface
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_client_has_required_methods():
|
||||
from claude_agent_sdk import ClaudeSDKClient
|
||||
|
||||
required = ["connect", "disconnect", "query", "receive_messages"]
|
||||
for name in required:
|
||||
attr = getattr(ClaudeSDKClient, name, None)
|
||||
assert attr is not None, f"ClaudeSDKClient.{name} missing"
|
||||
assert callable(attr), f"ClaudeSDKClient.{name} is not callable"
|
||||
|
||||
|
||||
def test_client_supports_async_context_manager():
|
||||
from claude_agent_sdk import ClaudeSDKClient
|
||||
|
||||
assert hasattr(ClaudeSDKClient, "__aenter__")
|
||||
assert hasattr(ClaudeSDKClient, "__aexit__")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ClaudeAgentOptions fields
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_agent_options_accepts_required_fields():
|
||||
"""Verify ClaudeAgentOptions accepts all kwargs our code passes."""
|
||||
from claude_agent_sdk import ClaudeAgentOptions
|
||||
|
||||
opts = ClaudeAgentOptions(
|
||||
system_prompt="test",
|
||||
cwd="/tmp",
|
||||
)
|
||||
assert opts.system_prompt == "test"
|
||||
assert opts.cwd == "/tmp"
|
||||
|
||||
|
||||
def test_agent_options_accepts_all_our_fields():
|
||||
"""Comprehensive check of every field we use in service.py."""
|
||||
from claude_agent_sdk import ClaudeAgentOptions
|
||||
|
||||
fields_we_use = [
|
||||
"system_prompt",
|
||||
"mcp_servers",
|
||||
"allowed_tools",
|
||||
"disallowed_tools",
|
||||
"hooks",
|
||||
"cwd",
|
||||
"model",
|
||||
"env",
|
||||
"resume",
|
||||
"max_buffer_size",
|
||||
]
|
||||
sig = inspect.signature(ClaudeAgentOptions)
|
||||
for field in fields_we_use:
|
||||
assert field in sig.parameters, (
|
||||
f"ClaudeAgentOptions no longer accepts '{field}' — "
|
||||
f"available params: {list(sig.parameters.keys())}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Message attributes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_assistant_message_has_content_and_model():
|
||||
from claude_agent_sdk import AssistantMessage, TextBlock
|
||||
|
||||
msg = AssistantMessage(content=[TextBlock(text="hi")], model="test")
|
||||
assert hasattr(msg, "content")
|
||||
assert hasattr(msg, "model")
|
||||
|
||||
|
||||
def test_result_message_has_required_attrs():
|
||||
from claude_agent_sdk import ResultMessage
|
||||
|
||||
msg = ResultMessage(
|
||||
subtype="success",
|
||||
duration_ms=100,
|
||||
duration_api_ms=50,
|
||||
is_error=False,
|
||||
num_turns=1,
|
||||
session_id="s1",
|
||||
)
|
||||
assert msg.subtype == "success"
|
||||
assert hasattr(msg, "result")
|
||||
|
||||
|
||||
def test_system_message_has_subtype_and_data():
|
||||
from claude_agent_sdk import SystemMessage
|
||||
|
||||
msg = SystemMessage(subtype="init", data={})
|
||||
assert msg.subtype == "init"
|
||||
assert msg.data == {}
|
||||
|
||||
|
||||
def test_user_message_has_parent_tool_use_id():
|
||||
from claude_agent_sdk import UserMessage
|
||||
|
||||
msg = UserMessage(content="test")
|
||||
assert hasattr(msg, "parent_tool_use_id")
|
||||
assert hasattr(msg, "tool_use_result")
|
||||
|
||||
|
||||
def test_tool_use_block_has_id_name_input():
|
||||
from claude_agent_sdk import ToolUseBlock
|
||||
|
||||
block = ToolUseBlock(id="t1", name="test", input={"key": "val"})
|
||||
assert block.id == "t1"
|
||||
assert block.name == "test"
|
||||
assert block.input == {"key": "val"}
|
||||
|
||||
|
||||
def test_tool_result_block_has_required_attrs():
|
||||
from claude_agent_sdk import ToolResultBlock
|
||||
|
||||
block = ToolResultBlock(tool_use_id="t1", content="result")
|
||||
assert block.tool_use_id == "t1"
|
||||
assert block.content == "result"
|
||||
assert hasattr(block, "is_error")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Hook types
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"hook_event",
|
||||
["PreToolUse", "PostToolUse", "Stop"],
|
||||
)
|
||||
def test_sdk_exports_hook_event_type(hook_event: str):
|
||||
"""Verify HookEvent literal includes the events our security_hooks use."""
|
||||
from claude_agent_sdk.types import HookEvent
|
||||
|
||||
# HookEvent is a Literal type — check that our events are valid values.
|
||||
# We can't easily inspect Literal at runtime, so just verify the type exists.
|
||||
assert HookEvent is not None
|
||||
@@ -246,15 +246,33 @@ def create_security_hooks(
|
||||
"""
|
||||
_ = context
|
||||
tool_name = cast(str, input_data.get("tool_name", ""))
|
||||
logger.debug(f"[SDK] Tool success: {tool_name}, tool_use_id={tool_use_id}")
|
||||
is_builtin = not tool_name.startswith(MCP_TOOL_PREFIX)
|
||||
logger.info(
|
||||
"[SDK] PostToolUse: %s (builtin=%s, tool_use_id=%s)",
|
||||
tool_name,
|
||||
is_builtin,
|
||||
(tool_use_id or "")[:12],
|
||||
)
|
||||
|
||||
# Stash output for SDK built-in tools so the response adapter can
|
||||
# emit StreamToolOutputAvailable even when the CLI doesn't surface
|
||||
# a separate UserMessage with ToolResultBlock content.
|
||||
if not tool_name.startswith(MCP_TOOL_PREFIX):
|
||||
if is_builtin:
|
||||
tool_response = input_data.get("tool_response")
|
||||
if tool_response is not None:
|
||||
resp_preview = str(tool_response)[:100]
|
||||
logger.info(
|
||||
"[SDK] Stashing builtin output for %s (%d chars): %s...",
|
||||
tool_name,
|
||||
len(str(tool_response)),
|
||||
resp_preview,
|
||||
)
|
||||
stash_pending_tool_output(tool_name, tool_response)
|
||||
else:
|
||||
logger.warning(
|
||||
"[SDK] PostToolUse for builtin %s but tool_response is None",
|
||||
tool_name,
|
||||
)
|
||||
|
||||
return cast(SyncHookJSONOutput, {})
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ from ..response_model import (
|
||||
StreamBaseResponse,
|
||||
StreamError,
|
||||
StreamFinish,
|
||||
StreamFinishStep,
|
||||
StreamHeartbeat,
|
||||
StreamStart,
|
||||
StreamTextDelta,
|
||||
@@ -46,6 +47,7 @@ from .tool_adapter import (
|
||||
LongRunningCallback,
|
||||
create_copilot_mcp_server,
|
||||
set_execution_context,
|
||||
wait_for_stash,
|
||||
)
|
||||
from .transcript import (
|
||||
cleanup_cli_project_dir,
|
||||
@@ -344,15 +346,15 @@ async def _compress_conversation_history(
|
||||
|
||||
Returns the compressed prior messages (everything except the current message).
|
||||
"""
|
||||
prior = session.messages[:-1]
|
||||
if len(prior) < 2:
|
||||
return prior
|
||||
messages = session.messages[:-1]
|
||||
if len(messages) < 2:
|
||||
return messages
|
||||
|
||||
from backend.util.prompt import compress_context
|
||||
|
||||
# Convert ChatMessages to dicts for compress_context
|
||||
messages_dict = []
|
||||
for msg in prior:
|
||||
for msg in messages:
|
||||
msg_dict: dict[str, Any] = {"role": msg.role}
|
||||
if msg.content:
|
||||
msg_dict["content"] = msg.content
|
||||
@@ -400,7 +402,7 @@ async def _compress_conversation_history(
|
||||
for m in result.messages
|
||||
]
|
||||
|
||||
return prior
|
||||
return messages
|
||||
|
||||
|
||||
def _format_conversation_context(messages: list[ChatMessage]) -> str | None:
|
||||
@@ -442,8 +444,8 @@ def _format_conversation_context(messages: list[ChatMessage]) -> str | None:
|
||||
def _is_tool_error_or_denial(content: str | None) -> bool:
|
||||
"""Check if a tool message content indicates an error or denial.
|
||||
|
||||
We include these in conversation context so the agent doesn't
|
||||
hallucinate success for operations that actually failed.
|
||||
Currently unused — ``_format_conversation_context`` includes all tool
|
||||
results. Kept as a utility for future selective filtering.
|
||||
"""
|
||||
if not content:
|
||||
return False
|
||||
@@ -458,7 +460,7 @@ def _is_tool_error_or_denial(content: str | None) -> bool:
|
||||
"maximum", # subtask-limit denial
|
||||
"denied",
|
||||
"blocked",
|
||||
"failed", # internal tool execution failures
|
||||
"failed to", # internal tool execution failures
|
||||
'"iserror": true', # MCP protocol error flag
|
||||
)
|
||||
)
|
||||
@@ -674,7 +676,7 @@ async def stream_chat_completion_sdk(
|
||||
|
||||
options = ClaudeAgentOptions(**sdk_options_kwargs) # type: ignore[arg-type]
|
||||
|
||||
adapter = SDKResponseAdapter(message_id=message_id)
|
||||
adapter = SDKResponseAdapter(message_id=message_id, session_id=session_id)
|
||||
adapter.set_task_id(task_id)
|
||||
|
||||
async with ClaudeSDKClient(options=options) as client:
|
||||
@@ -699,10 +701,13 @@ async def stream_chat_completion_sdk(
|
||||
transcript_msg_count,
|
||||
session_id,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"[SDK] Sending query ({len(session.messages)} msgs, "
|
||||
f"resume={use_resume})"
|
||||
"[SDK] [%s] Sending query — resume=%s, "
|
||||
"total_msgs=%d, query_len=%d",
|
||||
session_id[:12],
|
||||
use_resume,
|
||||
len(session.messages),
|
||||
len(query_message),
|
||||
)
|
||||
await client.query(query_message, session_id=session_id)
|
||||
|
||||
@@ -710,98 +715,296 @@ async def stream_chat_completion_sdk(
|
||||
accumulated_tool_calls: list[dict[str, Any]] = []
|
||||
has_appended_assistant = False
|
||||
has_tool_results = False
|
||||
# Track persisted message count to skip DB count queries
|
||||
# on incremental saves. Initial save happened at line 545.
|
||||
saved_msg_count = len(session.messages)
|
||||
|
||||
# Use an explicit async iterator with timeout to send
|
||||
# heartbeats when the CLI is idle (e.g. executing tools).
|
||||
# This prevents proxies/LBs from closing the SSE connection.
|
||||
# asyncio.timeout() is preferred over asyncio.wait_for()
|
||||
# because wait_for wraps in a separate Task whose cancellation
|
||||
# can leave the async generator in a broken state.
|
||||
# Use an explicit async iterator with non-cancelling heartbeats.
|
||||
# CRITICAL: we must NOT cancel __anext__() mid-flight — doing so
|
||||
# (via asyncio.timeout or wait_for) corrupts the SDK's internal
|
||||
# anyio memory stream, causing StopAsyncIteration on the next
|
||||
# call and silently dropping all in-flight tool results.
|
||||
# Instead, wrap __anext__() in a Task and use asyncio.wait()
|
||||
# with a timeout. On timeout we emit a heartbeat but keep the
|
||||
# Task alive so it can deliver the next message.
|
||||
msg_iter = client.receive_messages().__aiter__()
|
||||
while not stream_completed:
|
||||
try:
|
||||
async with asyncio.timeout(_HEARTBEAT_INTERVAL):
|
||||
sdk_msg = await msg_iter.__anext__()
|
||||
except TimeoutError:
|
||||
yield StreamHeartbeat()
|
||||
continue
|
||||
except StopAsyncIteration:
|
||||
break
|
||||
pending_task: asyncio.Task[Any] | None = None
|
||||
try:
|
||||
while not stream_completed:
|
||||
if pending_task is None:
|
||||
|
||||
logger.debug(
|
||||
f"[SDK] Received: {type(sdk_msg).__name__} "
|
||||
f"{getattr(sdk_msg, 'subtype', '')}"
|
||||
)
|
||||
for response in adapter.convert_message(sdk_msg):
|
||||
if isinstance(response, StreamStart):
|
||||
async def _next_msg() -> Any:
|
||||
return await msg_iter.__anext__()
|
||||
|
||||
pending_task = asyncio.create_task(_next_msg())
|
||||
|
||||
done, _ = await asyncio.wait(
|
||||
{pending_task}, timeout=_HEARTBEAT_INTERVAL
|
||||
)
|
||||
|
||||
if not done:
|
||||
# Timeout — emit heartbeat but keep the task alive
|
||||
yield StreamHeartbeat()
|
||||
continue
|
||||
|
||||
# Log tool events for debugging visibility issues
|
||||
# Task completed — get result
|
||||
pending_task = None
|
||||
try:
|
||||
sdk_msg = done.pop().result()
|
||||
except StopAsyncIteration:
|
||||
logger.info(
|
||||
"[SDK] [%s] Stream ended normally "
|
||||
"(StopAsyncIteration)",
|
||||
session_id[:12],
|
||||
)
|
||||
break
|
||||
except Exception as stream_err:
|
||||
# SDK sends {"type": "error"} which raises
|
||||
# Exception in receive_messages() — capture it
|
||||
# so the session can still be saved and the
|
||||
# frontend gets a clean finish.
|
||||
logger.error(
|
||||
"[SDK] [%s] Stream error from SDK: %s",
|
||||
session_id[:12],
|
||||
stream_err,
|
||||
exc_info=True,
|
||||
)
|
||||
yield StreamError(
|
||||
errorText=f"SDK stream error: {stream_err}",
|
||||
code="sdk_stream_error",
|
||||
)
|
||||
break
|
||||
|
||||
logger.info(
|
||||
"[SDK] [%s] Received: %s %s "
|
||||
"(unresolved=%d, current=%d, resolved=%d)",
|
||||
session_id[:12],
|
||||
type(sdk_msg).__name__,
|
||||
getattr(sdk_msg, "subtype", ""),
|
||||
len(adapter.current_tool_calls)
|
||||
- len(adapter.resolved_tool_calls),
|
||||
len(adapter.current_tool_calls),
|
||||
len(adapter.resolved_tool_calls),
|
||||
)
|
||||
|
||||
# Race-condition fix: SDK hooks (PostToolUse) are
|
||||
# executed asynchronously via start_soon() — the next
|
||||
# message can arrive before the hook stashes output.
|
||||
# wait_for_stash() awaits an asyncio.Event signaled by
|
||||
# stash_pending_tool_output(), completing as soon as
|
||||
# the hook finishes (typically <1ms). The sleep(0)
|
||||
# after lets any remaining concurrent hooks complete.
|
||||
#
|
||||
# Skip for parallel tool continuations: when the SDK
|
||||
# sends parallel tool calls as separate
|
||||
# AssistantMessages (each containing only
|
||||
# ToolUseBlocks), we must NOT wait/flush — the prior
|
||||
# tools are still executing concurrently.
|
||||
from claude_agent_sdk import (
|
||||
AssistantMessage,
|
||||
ResultMessage,
|
||||
ToolUseBlock,
|
||||
)
|
||||
|
||||
is_parallel_continuation = isinstance(
|
||||
sdk_msg, AssistantMessage
|
||||
) and all(isinstance(b, ToolUseBlock) for b in sdk_msg.content)
|
||||
|
||||
if (
|
||||
adapter.has_unresolved_tool_calls
|
||||
and isinstance(sdk_msg, (AssistantMessage, ResultMessage))
|
||||
and not is_parallel_continuation
|
||||
):
|
||||
if await wait_for_stash(timeout=0.5):
|
||||
await asyncio.sleep(0)
|
||||
else:
|
||||
logger.warning(
|
||||
"[SDK] [%s] Timed out waiting for "
|
||||
"PostToolUse hook stash "
|
||||
"(%d unresolved tool calls)",
|
||||
session_id[:12],
|
||||
len(adapter.current_tool_calls)
|
||||
- len(adapter.resolved_tool_calls),
|
||||
)
|
||||
|
||||
for response in adapter.convert_message(sdk_msg):
|
||||
if isinstance(response, StreamStart):
|
||||
continue
|
||||
|
||||
# Log tool events for debugging
|
||||
if isinstance(
|
||||
response,
|
||||
(
|
||||
StreamToolInputAvailable,
|
||||
StreamToolOutputAvailable,
|
||||
),
|
||||
):
|
||||
extra = ""
|
||||
if isinstance(response, StreamToolOutputAvailable):
|
||||
out_len = len(str(response.output))
|
||||
extra = f", output_len={out_len}"
|
||||
logger.info(
|
||||
"[SDK] [%s] Tool event: %s, tool=%s%s",
|
||||
session_id[:12],
|
||||
type(response).__name__,
|
||||
getattr(response, "toolName", "N/A"),
|
||||
extra,
|
||||
)
|
||||
|
||||
yield response
|
||||
|
||||
if isinstance(response, StreamTextDelta):
|
||||
delta = response.delta or ""
|
||||
# After tool results, start a new assistant
|
||||
# message for the post-tool text.
|
||||
if has_tool_results and has_appended_assistant:
|
||||
assistant_response = ChatMessage(
|
||||
role="assistant", content=delta
|
||||
)
|
||||
accumulated_tool_calls = []
|
||||
has_appended_assistant = False
|
||||
has_tool_results = False
|
||||
session.messages.append(assistant_response)
|
||||
has_appended_assistant = True
|
||||
else:
|
||||
assistant_response.content = (
|
||||
assistant_response.content or ""
|
||||
) + delta
|
||||
if not has_appended_assistant:
|
||||
session.messages.append(assistant_response)
|
||||
has_appended_assistant = True
|
||||
|
||||
elif isinstance(response, StreamToolInputAvailable):
|
||||
accumulated_tool_calls.append(
|
||||
{
|
||||
"id": response.toolCallId,
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": response.toolName,
|
||||
"arguments": json.dumps(
|
||||
response.input or {}
|
||||
),
|
||||
},
|
||||
}
|
||||
)
|
||||
assistant_response.tool_calls = accumulated_tool_calls
|
||||
if not has_appended_assistant:
|
||||
session.messages.append(assistant_response)
|
||||
has_appended_assistant = True
|
||||
# Save before tool execution starts so the
|
||||
# pending tool call is visible on refresh /
|
||||
# other devices.
|
||||
try:
|
||||
await upsert_chat_session(
|
||||
session,
|
||||
existing_message_count=saved_msg_count,
|
||||
)
|
||||
saved_msg_count = len(session.messages)
|
||||
except Exception as save_err:
|
||||
logger.warning(
|
||||
"[SDK] [%s] Incremental save " "failed: %s",
|
||||
session_id[:12],
|
||||
save_err,
|
||||
)
|
||||
|
||||
elif isinstance(response, StreamToolOutputAvailable):
|
||||
session.messages.append(
|
||||
ChatMessage(
|
||||
role="tool",
|
||||
content=(
|
||||
response.output
|
||||
if isinstance(response.output, str)
|
||||
else str(response.output)
|
||||
),
|
||||
tool_call_id=response.toolCallId,
|
||||
)
|
||||
)
|
||||
has_tool_results = True
|
||||
# Save after tool completes so the result is
|
||||
# visible on refresh / other devices.
|
||||
try:
|
||||
await upsert_chat_session(
|
||||
session,
|
||||
existing_message_count=saved_msg_count,
|
||||
)
|
||||
saved_msg_count = len(session.messages)
|
||||
except Exception as save_err:
|
||||
logger.warning(
|
||||
"[SDK] [%s] Incremental save " "failed: %s",
|
||||
session_id[:12],
|
||||
save_err,
|
||||
)
|
||||
|
||||
elif isinstance(response, StreamFinish):
|
||||
stream_completed = True
|
||||
|
||||
except asyncio.CancelledError:
|
||||
# Task/generator was cancelled (e.g. client disconnect,
|
||||
# server shutdown). Log and let the safety-net / finally
|
||||
# blocks handle cleanup.
|
||||
logger.warning(
|
||||
"[SDK] [%s] Streaming loop cancelled "
|
||||
"(asyncio.CancelledError)",
|
||||
session_id[:12],
|
||||
)
|
||||
raise
|
||||
finally:
|
||||
# Cancel the pending __anext__ task to avoid a leaked
|
||||
# coroutine. This is safe even if the task already
|
||||
# completed.
|
||||
if pending_task is not None and not pending_task.done():
|
||||
pending_task.cancel()
|
||||
try:
|
||||
await pending_task
|
||||
except (asyncio.CancelledError, StopAsyncIteration):
|
||||
pass
|
||||
|
||||
# Safety net: if tools are still unresolved after the
|
||||
# streaming loop (e.g. StopAsyncIteration before ResultMessage,
|
||||
# or SDK not sending UserMessages for built-in tools), flush
|
||||
# them now so the frontend stops showing spinners.
|
||||
if adapter.has_unresolved_tool_calls:
|
||||
logger.warning(
|
||||
"[SDK] [%s] %d unresolved tool(s) after stream loop — "
|
||||
"flushing as safety net",
|
||||
session_id[:12],
|
||||
len(adapter.current_tool_calls)
|
||||
- len(adapter.resolved_tool_calls),
|
||||
)
|
||||
safety_responses: list[StreamBaseResponse] = []
|
||||
adapter._flush_unresolved_tool_calls(safety_responses)
|
||||
for response in safety_responses:
|
||||
if isinstance(
|
||||
response,
|
||||
(StreamToolInputAvailable, StreamToolOutputAvailable),
|
||||
):
|
||||
logger.info(
|
||||
"[SDK] Tool event: %s, tool=%s",
|
||||
"[SDK] [%s] Safety flush: %s, tool=%s",
|
||||
session_id[:12],
|
||||
type(response).__name__,
|
||||
getattr(response, "toolName", "N/A"),
|
||||
)
|
||||
|
||||
yield response
|
||||
|
||||
if isinstance(response, StreamTextDelta):
|
||||
delta = response.delta or ""
|
||||
# After tool results, start a new assistant
|
||||
# message for the post-tool text.
|
||||
if has_tool_results and has_appended_assistant:
|
||||
assistant_response = ChatMessage(
|
||||
role="assistant", content=delta
|
||||
)
|
||||
accumulated_tool_calls = []
|
||||
has_appended_assistant = False
|
||||
has_tool_results = False
|
||||
session.messages.append(assistant_response)
|
||||
has_appended_assistant = True
|
||||
else:
|
||||
assistant_response.content = (
|
||||
assistant_response.content or ""
|
||||
) + delta
|
||||
if not has_appended_assistant:
|
||||
session.messages.append(assistant_response)
|
||||
has_appended_assistant = True
|
||||
|
||||
elif isinstance(response, StreamToolInputAvailable):
|
||||
accumulated_tool_calls.append(
|
||||
{
|
||||
"id": response.toolCallId,
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": response.toolName,
|
||||
"arguments": json.dumps(response.input or {}),
|
||||
},
|
||||
}
|
||||
)
|
||||
assistant_response.tool_calls = accumulated_tool_calls
|
||||
if not has_appended_assistant:
|
||||
session.messages.append(assistant_response)
|
||||
has_appended_assistant = True
|
||||
|
||||
elif isinstance(response, StreamToolOutputAvailable):
|
||||
session.messages.append(
|
||||
ChatMessage(
|
||||
role="tool",
|
||||
content=(
|
||||
response.output
|
||||
if isinstance(response.output, str)
|
||||
else str(response.output)
|
||||
),
|
||||
tool_call_id=response.toolCallId,
|
||||
)
|
||||
)
|
||||
has_tool_results = True
|
||||
|
||||
elif isinstance(response, StreamFinish):
|
||||
stream_completed = True
|
||||
# If the stream ended without a ResultMessage (no
|
||||
# StreamFinish), the SDK CLI exited unexpectedly. Close
|
||||
# the open step and emit StreamFinish so the frontend
|
||||
# transitions to the "ready" state.
|
||||
if not stream_completed:
|
||||
logger.warning(
|
||||
"[SDK] [%s] Stream ended without ResultMessage "
|
||||
"(StopAsyncIteration) — emitting StreamFinish",
|
||||
session_id[:12],
|
||||
)
|
||||
if adapter.step_open:
|
||||
yield StreamFinishStep()
|
||||
adapter.step_open = False
|
||||
closing_responses: list[StreamBaseResponse] = []
|
||||
adapter._end_text_if_open(closing_responses)
|
||||
for r in closing_responses:
|
||||
yield r
|
||||
yield StreamFinish()
|
||||
stream_completed = True
|
||||
|
||||
if (
|
||||
assistant_response.content or assistant_response.tool_calls
|
||||
@@ -857,12 +1060,20 @@ async def stream_chat_completion_sdk(
|
||||
)
|
||||
|
||||
await asyncio.shield(upsert_chat_session(session))
|
||||
logger.debug(
|
||||
f"[SDK] Session {session_id} saved with {len(session.messages)} messages"
|
||||
logger.info(
|
||||
"[SDK] [%s] Session saved with %d messages",
|
||||
session_id[:12],
|
||||
len(session.messages),
|
||||
)
|
||||
if not stream_completed:
|
||||
yield StreamFinish()
|
||||
|
||||
except asyncio.CancelledError:
|
||||
# Client disconnect / server shutdown — log but re-raise so
|
||||
# the framework can clean up. The finally block still runs
|
||||
# for transcript upload.
|
||||
logger.warning("[SDK] [%s] Session cancelled (CancelledError)", session_id[:12])
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[SDK] Error: {e}", exc_info=True)
|
||||
try:
|
||||
|
||||
@@ -9,6 +9,7 @@ via a callback provided by the service layer. This avoids wasteful SDK polling
|
||||
and makes results survive page refreshes.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import itertools
|
||||
import json
|
||||
import logging
|
||||
@@ -44,6 +45,14 @@ _current_session: ContextVar[ChatSession | None] = ContextVar(
|
||||
_pending_tool_outputs: ContextVar[dict[str, list[str]]] = ContextVar(
|
||||
"pending_tool_outputs", default=None # type: ignore[arg-type]
|
||||
)
|
||||
# Event signaled whenever stash_pending_tool_output() adds a new entry.
|
||||
# Used by the streaming loop to wait for PostToolUse hooks to complete
|
||||
# instead of sleeping an arbitrary duration. The SDK fires hooks via
|
||||
# start_soon (fire-and-forget) so the next message can arrive before
|
||||
# the hook stashes its output — this event bridges that gap.
|
||||
_stash_event: ContextVar[asyncio.Event | None] = ContextVar(
|
||||
"_stash_event", default=None
|
||||
)
|
||||
|
||||
# Callback type for delegating long-running tools to the non-SDK infrastructure.
|
||||
# Args: (tool_name, arguments, session) → MCP-formatted response dict.
|
||||
@@ -76,6 +85,7 @@ def set_execution_context(
|
||||
_current_user_id.set(user_id)
|
||||
_current_session.set(session)
|
||||
_pending_tool_outputs.set({})
|
||||
_stash_event.set(asyncio.Event())
|
||||
_long_running_callback.set(long_running_callback)
|
||||
|
||||
|
||||
@@ -134,6 +144,43 @@ def stash_pending_tool_output(tool_name: str, output: Any) -> None:
|
||||
except (TypeError, ValueError):
|
||||
text = str(output)
|
||||
pending.setdefault(tool_name, []).append(text)
|
||||
# Signal any waiters that new output is available.
|
||||
event = _stash_event.get(None)
|
||||
if event is not None:
|
||||
event.set()
|
||||
|
||||
|
||||
async def wait_for_stash(timeout: float = 0.5) -> bool:
|
||||
"""Wait for a PostToolUse hook to stash tool output.
|
||||
|
||||
The SDK fires PostToolUse hooks asynchronously via ``start_soon()`` —
|
||||
the next message (AssistantMessage/ResultMessage) can arrive before the
|
||||
hook completes and stashes its output. This function bridges that gap
|
||||
by waiting on the ``_stash_event``, which is signaled by
|
||||
:func:`stash_pending_tool_output`.
|
||||
|
||||
After the event fires, callers should ``await asyncio.sleep(0)`` to
|
||||
give any remaining concurrent hooks a chance to complete.
|
||||
|
||||
Returns ``True`` if a stash signal was received, ``False`` on timeout.
|
||||
The timeout is a safety net — normally the stash happens within
|
||||
microseconds of yielding to the event loop.
|
||||
"""
|
||||
event = _stash_event.get(None)
|
||||
if event is None:
|
||||
return False
|
||||
# Fast path: hook already completed before we got here.
|
||||
if event.is_set():
|
||||
event.clear()
|
||||
return True
|
||||
# Slow path: wait for the hook to signal.
|
||||
try:
|
||||
async with asyncio.timeout(timeout):
|
||||
await event.wait()
|
||||
event.clear()
|
||||
return True
|
||||
except TimeoutError:
|
||||
return False
|
||||
|
||||
|
||||
async def _execute_tool_sync(
|
||||
|
||||
14
autogpt_platform/backend/poetry.lock
generated
14
autogpt_platform/backend/poetry.lock
generated
@@ -899,17 +899,17 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "claude-agent-sdk"
|
||||
version = "0.1.35"
|
||||
version = "0.1.39"
|
||||
description = "Python SDK for Claude Code"
|
||||
optional = false
|
||||
python-versions = ">=3.10"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "claude_agent_sdk-0.1.35-py3-none-macosx_11_0_arm64.whl", hash = "sha256:df67f4deade77b16a9678b3a626c176498e40417f33b04beda9628287f375591"},
|
||||
{file = "claude_agent_sdk-0.1.35-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:14963944f55ded7c8ed518feebfa5b4284aa6dd8d81aeff2e5b21a962ce65097"},
|
||||
{file = "claude_agent_sdk-0.1.35-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:84344dcc535d179c1fc8a11c6f34c37c3b583447bdf09d869effb26514fd7a65"},
|
||||
{file = "claude_agent_sdk-0.1.35-py3-none-win_amd64.whl", hash = "sha256:1b3d54b47448c93f6f372acd4d1757f047c3c1e8ef5804be7a1e3e53e2c79a5f"},
|
||||
{file = "claude_agent_sdk-0.1.35.tar.gz", hash = "sha256:0f98e2b3c71ca85abfc042e7a35c648df88e87fda41c52e6779ef7b038dcbb52"},
|
||||
{file = "claude_agent_sdk-0.1.39-py3-none-macosx_11_0_arm64.whl", hash = "sha256:6ed6a79781f545b761b9fe467bc5ae213a103c9d3f0fe7a9dad3c01790ed58fa"},
|
||||
{file = "claude_agent_sdk-0.1.39-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:0c03b5a3772eaec42e29ea39240c7d24b760358082f2e36336db9e71dde3dda4"},
|
||||
{file = "claude_agent_sdk-0.1.39-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:d2665c9e87b6ffece590bcdd6eb9def47cde4809b0d2f66e0a61a719189be7c9"},
|
||||
{file = "claude_agent_sdk-0.1.39-py3-none-win_amd64.whl", hash = "sha256:d03324daf7076be79d2dd05944559aabf4cc11c98d3a574b992a442a7c7a26d6"},
|
||||
{file = "claude_agent_sdk-0.1.39.tar.gz", hash = "sha256:dcf0ebd5a638c9a7d9f3af7640932a9212b2705b7056e4f08bd3968a865b4268"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -8530,4 +8530,4 @@ cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and pyt
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = ">=3.10,<3.14"
|
||||
content-hash = "55e095de555482f0fe47de7695f390fe93e7bcf739b31c391b2e5e3c3d938ae3"
|
||||
content-hash = "3ef62836d8321b9a3b8e897dade8dc6ca9022fd9468c53f384b0871b521ab343"
|
||||
|
||||
@@ -16,7 +16,7 @@ anthropic = "^0.79.0"
|
||||
apscheduler = "^3.11.1"
|
||||
autogpt-libs = { path = "../autogpt_libs", develop = true }
|
||||
bleach = { extras = ["css"], version = "^6.2.0" }
|
||||
claude-agent-sdk = "^0.1.0"
|
||||
claude-agent-sdk = "^0.1.39" # see copilot/sdk/sdk_compat_test.py for capability checks
|
||||
click = "^8.2.0"
|
||||
cryptography = "^46.0"
|
||||
discord-py = "^2.5.2"
|
||||
|
||||
@@ -58,6 +58,7 @@ function toToolInput(rawArguments: unknown): unknown {
|
||||
export function convertChatSessionMessagesToUiMessages(
|
||||
sessionId: string,
|
||||
rawMessages: unknown[],
|
||||
options?: { isComplete?: boolean },
|
||||
): UIMessage<unknown, UIDataTypes, UITools>[] {
|
||||
const messages = coerceSessionChatMessages(rawMessages);
|
||||
const toolOutputsByCallId = new Map<string, unknown>();
|
||||
@@ -104,6 +105,16 @@ export function convertChatSessionMessagesToUiMessages(
|
||||
input,
|
||||
output: typeof output === "string" ? safeJsonParse(output) : output,
|
||||
});
|
||||
} else if (options?.isComplete) {
|
||||
// Session is complete (no active stream) but this tool call has
|
||||
// no output in the DB — mark as completed to stop stale spinners.
|
||||
parts.push({
|
||||
type: `tool-${toolName}`,
|
||||
toolCallId,
|
||||
state: "output-available",
|
||||
input,
|
||||
output: "",
|
||||
});
|
||||
} else {
|
||||
parts.push({
|
||||
type: `tool-${toolName}`,
|
||||
|
||||
@@ -40,16 +40,6 @@ export function useChatSession() {
|
||||
}
|
||||
}, [sessionId, queryClient]);
|
||||
|
||||
// Memoize so the effect in useCopilotPage doesn't infinite-loop on a new
|
||||
// array reference every render. Re-derives only when query data changes.
|
||||
const hydratedMessages = useMemo(() => {
|
||||
if (sessionQuery.data?.status !== 200 || !sessionId) return undefined;
|
||||
return convertChatSessionMessagesToUiMessages(
|
||||
sessionId,
|
||||
sessionQuery.data.data.messages ?? [],
|
||||
);
|
||||
}, [sessionQuery.data, sessionId]);
|
||||
|
||||
// Expose active_stream info so the caller can trigger manual resume
|
||||
// after hydration completes (rather than relying on AI SDK's built-in
|
||||
// resume which fires before hydration).
|
||||
@@ -58,6 +48,19 @@ export function useChatSession() {
|
||||
return !!sessionQuery.data.data.active_stream;
|
||||
}, [sessionQuery.data]);
|
||||
|
||||
// Memoize so the effect in useCopilotPage doesn't infinite-loop on a new
|
||||
// array reference every render. Re-derives only when query data changes.
|
||||
// When the session is complete (no active stream), mark dangling tool
|
||||
// calls as completed so stale spinners don't persist after refresh.
|
||||
const hydratedMessages = useMemo(() => {
|
||||
if (sessionQuery.data?.status !== 200 || !sessionId) return undefined;
|
||||
return convertChatSessionMessagesToUiMessages(
|
||||
sessionId,
|
||||
sessionQuery.data.data.messages ?? [],
|
||||
{ isComplete: !hasActiveStream },
|
||||
);
|
||||
}, [sessionQuery.data, sessionId, hasActiveStream]);
|
||||
|
||||
const { mutateAsync: createSessionMutation, isPending: isCreatingSession } =
|
||||
usePostV2CreateSession({
|
||||
mutation: {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import {
|
||||
getGetV2GetSessionQueryKey,
|
||||
getGetV2ListSessionsQueryKey,
|
||||
postV2CancelSessionTask,
|
||||
useDeleteV2DeleteSession,
|
||||
@@ -187,11 +188,35 @@ export function useCopilotPage() {
|
||||
});
|
||||
}, [hydratedMessages, setMessages, status]);
|
||||
|
||||
// Ref: tracks whether we've already resumed for a given session.
|
||||
// Reset when the stream ends so re-resume is possible if the backend
|
||||
// task is still running (SSE dropped but executor didn't finish).
|
||||
const hasResumedRef = useRef<string | null>(null);
|
||||
|
||||
// When the stream ends (or drops), invalidate the session cache so the
|
||||
// next hydration fetches fresh messages from the backend. Without this,
|
||||
// staleTime: Infinity means the cache keeps the pre-stream data forever,
|
||||
// and any messages added during streaming are lost on remount/navigation.
|
||||
const prevStatusRef = useRef(status);
|
||||
useEffect(() => {
|
||||
const prev = prevStatusRef.current;
|
||||
prevStatusRef.current = status;
|
||||
|
||||
const wasActive = prev === "streaming" || prev === "submitted";
|
||||
const isIdle = status === "ready" || status === "error";
|
||||
if (wasActive && isIdle && sessionId) {
|
||||
queryClient.invalidateQueries({
|
||||
queryKey: getGetV2GetSessionQueryKey(sessionId),
|
||||
});
|
||||
// Allow re-resume if the backend task is still running.
|
||||
hasResumedRef.current = null;
|
||||
}
|
||||
}, [status, sessionId, queryClient]);
|
||||
|
||||
// Resume an active stream AFTER hydration completes.
|
||||
// The backend returns active_stream info when a task is still running.
|
||||
// We wait for hydration so the AI SDK has the conversation history
|
||||
// before the resumed stream appends the in-progress assistant message.
|
||||
const hasResumedRef = useRef<string | null>(null);
|
||||
useEffect(() => {
|
||||
if (!hasActiveStream || !sessionId) return;
|
||||
if (!hydratedMessages || hydratedMessages.length === 0) return;
|
||||
@@ -202,18 +227,6 @@ export function useCopilotPage() {
|
||||
resumeStream();
|
||||
}, [hasActiveStream, sessionId, hydratedMessages, status, resumeStream]);
|
||||
|
||||
// When the stream finishes, resolve any tool parts still showing spinners.
|
||||
// This can happen if the backend didn't emit StreamToolOutputAvailable for
|
||||
// a tool call before sending StreamFinish (e.g. SDK built-in tools).
|
||||
const prevStatusRef = useRef(status);
|
||||
useEffect(() => {
|
||||
const prev = prevStatusRef.current;
|
||||
prevStatusRef.current = status;
|
||||
if (prev === "streaming" && status === "ready") {
|
||||
setMessages((msgs) => resolveInProgressTools(msgs, "completed"));
|
||||
}
|
||||
}, [status, setMessages]);
|
||||
|
||||
// Poll session endpoint when a long-running tool (create_agent, edit_agent)
|
||||
// is in progress. When the backend completes, the session data will contain
|
||||
// the final tool output — this hook detects the change and updates messages.
|
||||
|
||||
Reference in New Issue
Block a user