mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
## Summary - Remove ~1200 lines of broken/unmaintained non-SDK copilot streaming code (retry logic, parallel tool calls, context window management) - Add `stream_chat_completion_baseline()` as a clean fallback LLM path with full tool-calling support when `CHAT_USE_CLAUDE_AGENT_SDK=false` (e.g. when Anthropic is down) - Baseline reuses the same shared `TOOL_REGISTRY`, `get_available_tools()`, and `execute_tool()` as the SDK path - Move baseline code to dedicated `baseline/` folder (mirrors `sdk/` structure) - Clean up SDK service: remove unused params, fix model/env resolution, fix stream error persistence - Clean up config: remove `max_retries`, `thinking_enabled` fields (non-SDK only) ## Changes | File | Action | |------|--------| | `backend/copilot/baseline/__init__.py` | New — package export | | `backend/copilot/baseline/service.py` | New — baseline streaming with tool-call loop | | `backend/copilot/baseline/service_test.py` | New — multi-turn keyword recall test | | `backend/copilot/service.py` | Remove ~1200 lines of legacy code, keep shared helpers only | | `backend/copilot/executor/processor.py` | Simplify branching to SDK vs baseline | | `backend/copilot/sdk/service.py` | Remove unused params, fix model/env separation, fix stream error persistence | | `backend/copilot/config.py` | Remove `max_retries`, `thinking_enabled` | | `backend/copilot/service_test.py` | Keep SDK test only (baseline test moved) | | `backend/copilot/parallel_tool_calls_test.py` | Deleted (tested removed code) | ## Test plan - [x] `poetry run format` passes - [x] CI passes (all 3 Python versions, types, CodeQL) - [ ] SDK path works unchanged in production - [x] Baseline path (`CHAT_USE_CLAUDE_AGENT_SDK=false`) streams responses with tool calling - [x] Baseline emits correct Vercel AI SDK stream protocol events
100 lines
3.2 KiB
Python
100 lines
3.2 KiB
Python
import logging
|
|
from os import getenv
|
|
|
|
import pytest
|
|
|
|
from backend.copilot.baseline import stream_chat_completion_baseline
|
|
from backend.copilot.model import (
|
|
create_chat_session,
|
|
get_chat_session,
|
|
upsert_chat_session,
|
|
)
|
|
from backend.copilot.response_model import (
|
|
StreamError,
|
|
StreamFinish,
|
|
StreamStart,
|
|
StreamTextDelta,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@pytest.mark.asyncio(loop_scope="session")
|
|
async def test_baseline_multi_turn(setup_test_user, test_user_id):
|
|
"""Test that the baseline LLM path streams responses and maintains history.
|
|
|
|
Turn 1: Send a message with a unique keyword.
|
|
Turn 2: Ask the model to recall the keyword — proving conversation history
|
|
is correctly passed to the single-call LLM.
|
|
"""
|
|
api_key: str | None = getenv("OPEN_ROUTER_API_KEY")
|
|
if not api_key:
|
|
return pytest.skip("OPEN_ROUTER_API_KEY is not set, skipping test")
|
|
|
|
session = await create_chat_session(test_user_id)
|
|
session = await upsert_chat_session(session)
|
|
|
|
# --- Turn 1: send a message with a unique keyword ---
|
|
keyword = "QUASAR99"
|
|
turn1_msg = (
|
|
f"Please remember this special keyword: {keyword}. "
|
|
"Just confirm you've noted it, keep your response brief."
|
|
)
|
|
turn1_text = ""
|
|
turn1_errors: list[str] = []
|
|
got_start = False
|
|
got_finish = False
|
|
|
|
async for chunk in stream_chat_completion_baseline(
|
|
session.session_id,
|
|
turn1_msg,
|
|
user_id=test_user_id,
|
|
):
|
|
if isinstance(chunk, StreamStart):
|
|
got_start = True
|
|
elif isinstance(chunk, StreamTextDelta):
|
|
turn1_text += chunk.delta
|
|
elif isinstance(chunk, StreamError):
|
|
turn1_errors.append(chunk.errorText)
|
|
elif isinstance(chunk, StreamFinish):
|
|
got_finish = True
|
|
|
|
assert got_start, "Turn 1 did not yield StreamStart"
|
|
assert got_finish, "Turn 1 did not yield StreamFinish"
|
|
assert not turn1_errors, f"Turn 1 errors: {turn1_errors}"
|
|
assert turn1_text, "Turn 1 produced no text"
|
|
logger.info(f"Turn 1 response: {turn1_text[:100]}")
|
|
|
|
# Reload session for turn 2
|
|
session = await get_chat_session(session.session_id, test_user_id)
|
|
assert session, "Session not found after turn 1"
|
|
|
|
# Verify messages were persisted (user + assistant)
|
|
assert (
|
|
len(session.messages) >= 2
|
|
), f"Expected at least 2 messages after turn 1, got {len(session.messages)}"
|
|
|
|
# --- Turn 2: ask model to recall the keyword ---
|
|
turn2_msg = "What was the special keyword I asked you to remember?"
|
|
turn2_text = ""
|
|
turn2_errors: list[str] = []
|
|
|
|
async for chunk in stream_chat_completion_baseline(
|
|
session.session_id,
|
|
turn2_msg,
|
|
user_id=test_user_id,
|
|
session=session,
|
|
):
|
|
if isinstance(chunk, StreamTextDelta):
|
|
turn2_text += chunk.delta
|
|
elif isinstance(chunk, StreamError):
|
|
turn2_errors.append(chunk.errorText)
|
|
|
|
assert not turn2_errors, f"Turn 2 errors: {turn2_errors}"
|
|
assert turn2_text, "Turn 2 produced no text"
|
|
assert keyword in turn2_text, (
|
|
f"Model did not recall keyword '{keyword}' in turn 2. "
|
|
f"Response: {turn2_text[:200]}"
|
|
)
|
|
logger.info(f"Turn 2 recalled keyword successfully: {turn2_text[:100]}")
|