mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-30 03:00:41 -04:00
Compare commits
12 Commits
test-scree
...
test-scree
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c50cee86d2 | ||
|
|
f2b8f81bb1 | ||
|
|
ce0cb1e035 | ||
|
|
e5ea2e0d5b | ||
|
|
b7f242f163 | ||
|
|
be86a911e1 | ||
|
|
54763b660b | ||
|
|
6d60265221 | ||
|
|
0b8997eb01 | ||
|
|
2ff036b86b | ||
|
|
1fc3cc74ea | ||
|
|
815659d188 |
@@ -744,12 +744,12 @@ class LLMResponse(BaseModel):
|
||||
|
||||
def convert_openai_tool_fmt_to_anthropic(
|
||||
openai_tools: list[dict] | None = None,
|
||||
) -> Iterable[ToolParam] | anthropic.Omit:
|
||||
) -> Iterable[ToolParam] | anthropic.NotGiven:
|
||||
"""
|
||||
Convert OpenAI tool format to Anthropic tool format.
|
||||
"""
|
||||
if not openai_tools or len(openai_tools) == 0:
|
||||
return anthropic.omit
|
||||
return anthropic.NOT_GIVEN
|
||||
|
||||
anthropic_tools = []
|
||||
for tool in openai_tools:
|
||||
@@ -972,6 +972,11 @@ async def llm_call(
|
||||
elif provider == "anthropic":
|
||||
|
||||
an_tools = convert_openai_tool_fmt_to_anthropic(tools)
|
||||
# Cache tool definitions alongside the system prompt.
|
||||
# Placing cache_control on the last tool caches all tool schemas as a
|
||||
# single prefix — reads cost 10% of normal input tokens.
|
||||
if isinstance(an_tools, list) and an_tools:
|
||||
an_tools[-1] = {**an_tools[-1], "cache_control": {"type": "ephemeral"}}
|
||||
|
||||
system_messages = [p["content"] for p in prompt if p["role"] == "system"]
|
||||
sysprompt = " ".join(system_messages)
|
||||
@@ -994,14 +999,22 @@ async def llm_call(
|
||||
client = anthropic.AsyncAnthropic(
|
||||
api_key=credentials.api_key.get_secret_value()
|
||||
)
|
||||
resp = await client.messages.create(
|
||||
create_kwargs: dict[str, Any] = dict(
|
||||
model=llm_model.value,
|
||||
system=sysprompt,
|
||||
messages=messages,
|
||||
max_tokens=max_tokens,
|
||||
tools=an_tools,
|
||||
timeout=600,
|
||||
)
|
||||
if sysprompt.strip():
|
||||
create_kwargs["system"] = [
|
||||
{
|
||||
"type": "text",
|
||||
"text": sysprompt,
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
}
|
||||
]
|
||||
resp = await client.messages.create(**create_kwargs)
|
||||
|
||||
if not resp.content:
|
||||
raise ValueError("No content returned from Anthropic.")
|
||||
|
||||
@@ -1111,3 +1111,181 @@ class TestExtractOpenRouterCost:
|
||||
def test_returns_none_for_negative_cost(self):
|
||||
response = self._mk_response({"x-total-cost": "-0.005"})
|
||||
assert llm.extract_openrouter_cost(response) is None
|
||||
|
||||
|
||||
class TestAnthropicCacheControl:
|
||||
"""Verify that llm_call attaches cache_control to the system prompt block
|
||||
and to the last tool definition when calling the Anthropic API."""
|
||||
|
||||
def _make_anthropic_credentials(self) -> llm.APIKeyCredentials:
|
||||
from pydantic import SecretStr
|
||||
|
||||
return llm.APIKeyCredentials(
|
||||
id="test-anthropic-id",
|
||||
provider="anthropic",
|
||||
api_key=SecretStr("mock-anthropic-key"),
|
||||
title="Mock Anthropic key",
|
||||
expires_at=None,
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_system_prompt_sent_as_block_with_cache_control(self):
|
||||
"""The system prompt is wrapped in a structured block with cache_control ephemeral."""
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.content = [MagicMock(type="text", text="hello")]
|
||||
mock_resp.usage = MagicMock(input_tokens=5, output_tokens=3)
|
||||
|
||||
captured_kwargs: dict = {}
|
||||
|
||||
async def fake_create(**kwargs):
|
||||
captured_kwargs.update(kwargs)
|
||||
return mock_resp
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.messages.create = fake_create
|
||||
|
||||
credentials = self._make_anthropic_credentials()
|
||||
|
||||
with patch("anthropic.AsyncAnthropic", return_value=mock_client):
|
||||
await llm.llm_call(
|
||||
credentials=credentials,
|
||||
llm_model=llm.LlmModel.CLAUDE_4_6_SONNET,
|
||||
prompt=[
|
||||
{"role": "system", "content": "You are an assistant."},
|
||||
{"role": "user", "content": "Hello"},
|
||||
],
|
||||
max_tokens=100,
|
||||
)
|
||||
|
||||
system_arg = captured_kwargs.get("system")
|
||||
assert isinstance(system_arg, list), "system should be a list of blocks"
|
||||
assert len(system_arg) == 1
|
||||
block = system_arg[0]
|
||||
assert block["type"] == "text"
|
||||
assert block["text"] == "You are an assistant."
|
||||
assert block.get("cache_control") == {"type": "ephemeral"}
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_last_tool_gets_cache_control(self):
|
||||
"""cache_control is placed on the last tool in the Anthropic tools list."""
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.content = [MagicMock(type="text", text="ok")]
|
||||
mock_resp.usage = MagicMock(input_tokens=10, output_tokens=5)
|
||||
|
||||
captured_kwargs: dict = {}
|
||||
|
||||
async def fake_create(**kwargs):
|
||||
captured_kwargs.update(kwargs)
|
||||
return mock_resp
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.messages.create = fake_create
|
||||
|
||||
credentials = self._make_anthropic_credentials()
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "tool_a",
|
||||
"description": "First tool",
|
||||
"parameters": {"type": "object", "properties": {}, "required": []},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "tool_b",
|
||||
"description": "Second tool",
|
||||
"parameters": {"type": "object", "properties": {}, "required": []},
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
with patch("anthropic.AsyncAnthropic", return_value=mock_client):
|
||||
await llm.llm_call(
|
||||
credentials=credentials,
|
||||
llm_model=llm.LlmModel.CLAUDE_4_6_SONNET,
|
||||
prompt=[
|
||||
{"role": "system", "content": "System."},
|
||||
{"role": "user", "content": "Do something"},
|
||||
],
|
||||
max_tokens=100,
|
||||
tools=tools,
|
||||
)
|
||||
|
||||
an_tools = captured_kwargs.get("tools")
|
||||
assert isinstance(an_tools, list)
|
||||
assert len(an_tools) == 2
|
||||
assert (
|
||||
an_tools[0].get("cache_control") is None
|
||||
), "Only last tool gets cache_control"
|
||||
assert an_tools[-1].get("cache_control") == {"type": "ephemeral"}
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_tools_no_cache_control_on_tools(self):
|
||||
"""When there are no tools, the Anthropic call receives anthropic.NOT_GIVEN for tools."""
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.content = [MagicMock(type="text", text="ok")]
|
||||
mock_resp.usage = MagicMock(input_tokens=5, output_tokens=2)
|
||||
|
||||
captured_kwargs: dict = {}
|
||||
|
||||
async def fake_create(**kwargs):
|
||||
captured_kwargs.update(kwargs)
|
||||
return mock_resp
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.messages.create = fake_create
|
||||
|
||||
credentials = self._make_anthropic_credentials()
|
||||
|
||||
with patch("anthropic.AsyncAnthropic", return_value=mock_client):
|
||||
await llm.llm_call(
|
||||
credentials=credentials,
|
||||
llm_model=llm.LlmModel.CLAUDE_4_6_SONNET,
|
||||
prompt=[
|
||||
{"role": "system", "content": "System."},
|
||||
{"role": "user", "content": "Hello"},
|
||||
],
|
||||
max_tokens=100,
|
||||
tools=None,
|
||||
)
|
||||
|
||||
tools_arg = captured_kwargs.get("tools")
|
||||
assert tools_arg is llm.convert_openai_tool_fmt_to_anthropic(
|
||||
None
|
||||
), "Empty tools should pass anthropic.NOT_GIVEN sentinel"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_system_prompt_omits_system_key(self):
|
||||
"""When sysprompt is empty, the 'system' key must not be sent to Anthropic.
|
||||
|
||||
Anthropic rejects empty text blocks; the guard in llm_call must ensure
|
||||
the system argument is omitted entirely when no system messages are present.
|
||||
"""
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.content = [MagicMock(type="text", text="ok")]
|
||||
mock_resp.usage = MagicMock(input_tokens=3, output_tokens=2)
|
||||
|
||||
captured_kwargs: dict = {}
|
||||
|
||||
async def fake_create(**kwargs):
|
||||
captured_kwargs.update(kwargs)
|
||||
return mock_resp
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.messages.create = fake_create
|
||||
|
||||
credentials = self._make_anthropic_credentials()
|
||||
|
||||
with patch("anthropic.AsyncAnthropic", return_value=mock_client):
|
||||
await llm.llm_call(
|
||||
credentials=credentials,
|
||||
llm_model=llm.LlmModel.CLAUDE_4_6_SONNET,
|
||||
prompt=[{"role": "user", "content": "Hi"}],
|
||||
max_tokens=50,
|
||||
)
|
||||
|
||||
assert (
|
||||
"system" not in captured_kwargs
|
||||
), "system must be omitted when sysprompt is empty to avoid Anthropic 400"
|
||||
|
||||
@@ -27,6 +27,7 @@ from opentelemetry import trace as otel_trace
|
||||
|
||||
from backend.copilot.config import CopilotMode
|
||||
from backend.copilot.context import get_workspace_manager, set_execution_context
|
||||
from backend.copilot.db import update_message_content_by_sequence
|
||||
from backend.copilot.graphiti.config import is_enabled_for_user
|
||||
from backend.copilot.model import (
|
||||
ChatMessage,
|
||||
@@ -52,7 +53,7 @@ from backend.copilot.response_model import (
|
||||
StreamUsage,
|
||||
)
|
||||
from backend.copilot.service import (
|
||||
_build_system_prompt,
|
||||
_build_cacheable_system_prompt,
|
||||
_get_openai_client,
|
||||
_update_title_async,
|
||||
config,
|
||||
@@ -69,6 +70,7 @@ from backend.copilot.transcript import (
|
||||
validate_transcript,
|
||||
)
|
||||
from backend.copilot.transcript_builder import TranscriptBuilder
|
||||
from backend.data.understanding import format_understanding_for_prompt
|
||||
from backend.util.exceptions import NotFoundError
|
||||
from backend.util.prompt import (
|
||||
compress_context,
|
||||
@@ -958,35 +960,34 @@ async def stream_chat_completion_baseline(
|
||||
# Build system prompt only on the first turn to avoid mid-conversation
|
||||
# changes from concurrent chats updating business understanding.
|
||||
is_first_turn = len(session.messages) <= 1
|
||||
if is_first_turn:
|
||||
prompt_task = _build_system_prompt(user_id, has_conversation_history=False)
|
||||
# Gate context fetch on both first turn AND user message so that assistant-
|
||||
# role calls (e.g. tool-result submissions) on the first turn don't trigger
|
||||
# a needless DB lookup for user understanding.
|
||||
should_inject_user_context = is_first_turn and is_user_message
|
||||
if should_inject_user_context:
|
||||
prompt_task = _build_cacheable_system_prompt(user_id)
|
||||
else:
|
||||
prompt_task = _build_system_prompt(user_id=None, has_conversation_history=True)
|
||||
prompt_task = _build_cacheable_system_prompt(None)
|
||||
|
||||
# Run download + prompt build concurrently — both are independent I/O
|
||||
# on the request critical path.
|
||||
if user_id and len(session.messages) > 1:
|
||||
transcript_covers_prefix, (base_system_prompt, _) = await asyncio.gather(
|
||||
_load_prior_transcript(
|
||||
user_id=user_id,
|
||||
session_id=session_id,
|
||||
session_msg_count=len(session.messages),
|
||||
transcript_builder=transcript_builder,
|
||||
),
|
||||
prompt_task,
|
||||
transcript_covers_prefix, (base_system_prompt, understanding) = (
|
||||
await asyncio.gather(
|
||||
_load_prior_transcript(
|
||||
user_id=user_id,
|
||||
session_id=session_id,
|
||||
session_msg_count=len(session.messages),
|
||||
transcript_builder=transcript_builder,
|
||||
),
|
||||
prompt_task,
|
||||
)
|
||||
)
|
||||
else:
|
||||
base_system_prompt, _ = await prompt_task
|
||||
base_system_prompt, understanding = await prompt_task
|
||||
|
||||
# Append user message to transcript.
|
||||
# Always append when the message is present and is from the user,
|
||||
# even on duplicate-suppressed retries (is_new_message=False).
|
||||
# The loaded transcript may be stale (uploaded before the previous
|
||||
# attempt stored this message), so skipping it would leave the
|
||||
# transcript without the user turn, creating a malformed
|
||||
# assistant-after-assistant structure when the LLM reply is added.
|
||||
if message and is_user_message:
|
||||
transcript_builder.append_user(content=message)
|
||||
# Append user message to transcript after context injection below so the
|
||||
# transcript receives the prefixed message when user context is available.
|
||||
|
||||
# Generate title for new sessions
|
||||
if is_user_message and not session.title:
|
||||
@@ -1047,6 +1048,48 @@ async def stream_chat_completion_baseline(
|
||||
elif msg.role == "user" and msg.content:
|
||||
openai_messages.append({"role": msg.role, "content": msg.content})
|
||||
|
||||
# Inject user context into the first user message on first turn.
|
||||
# Done before attachment/URL injection so the context prefix lands at
|
||||
# the very start of the message content.
|
||||
# The prefixed content is also stored back into session.messages and the
|
||||
# transcript so that resumed sessions and the transcript both carry the
|
||||
# personalisation beyond the first request.
|
||||
user_message_for_transcript = message
|
||||
if should_inject_user_context and understanding:
|
||||
user_ctx = format_understanding_for_prompt(understanding)
|
||||
prefixed: str | None = None
|
||||
for msg in openai_messages:
|
||||
if msg["role"] == "user":
|
||||
prefixed = (
|
||||
f"<user_context>\n{user_ctx}\n</user_context>\n\n{msg['content']}"
|
||||
)
|
||||
msg["content"] = prefixed
|
||||
break
|
||||
if prefixed is not None:
|
||||
# Persist the prefixed content so subsequent turns and --resume
|
||||
# retain the user context.
|
||||
# The user message was already saved to DB before context injection
|
||||
# (at ~line 932); update the DB record so the prefixed content
|
||||
# survives page reload.
|
||||
for idx, session_msg in enumerate(session.messages):
|
||||
if session_msg.role == "user":
|
||||
session_msg.content = prefixed
|
||||
await update_message_content_by_sequence(session_id, idx, prefixed)
|
||||
break
|
||||
user_message_for_transcript = prefixed
|
||||
else:
|
||||
logger.warning("[Baseline] No user message found for context injection")
|
||||
|
||||
# Append user message to transcript.
|
||||
# Always append when the message is present and is from the user,
|
||||
# even on duplicate-suppressed retries (is_new_message=False).
|
||||
# The loaded transcript may be stale (uploaded before the previous
|
||||
# attempt stored this message), so skipping it would leave the
|
||||
# transcript without the user turn, creating a malformed
|
||||
# assistant-after-assistant structure when the LLM reply is added.
|
||||
if message and is_user_message:
|
||||
transcript_builder.append_user(content=user_message_for_transcript or message)
|
||||
|
||||
# --- File attachments (feature parity with SDK path) ---
|
||||
working_dir: str | None = None
|
||||
attachment_hint = ""
|
||||
|
||||
@@ -498,6 +498,42 @@ async def update_tool_message_content(
|
||||
return False
|
||||
|
||||
|
||||
async def update_message_content_by_sequence(
|
||||
session_id: str,
|
||||
sequence: int,
|
||||
new_content: str,
|
||||
) -> bool:
|
||||
"""Update the content of a specific message by its sequence number.
|
||||
|
||||
Used to persist content modifications (e.g. user-context prefix injection)
|
||||
to a message that was already saved to the DB.
|
||||
|
||||
Args:
|
||||
session_id: The chat session ID.
|
||||
sequence: The 0-based sequence number of the message to update.
|
||||
new_content: The new content to set.
|
||||
|
||||
Returns:
|
||||
True if a message was updated, False otherwise.
|
||||
"""
|
||||
try:
|
||||
result = await PrismaChatMessage.prisma().update_many(
|
||||
where={"sessionId": session_id, "sequence": sequence},
|
||||
data={"content": sanitize_string(new_content)},
|
||||
)
|
||||
if result == 0:
|
||||
logger.warning(
|
||||
f"No message found to update for session {session_id}, sequence {sequence}"
|
||||
)
|
||||
return False
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to update message for session {session_id}, sequence {sequence}: {e}"
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
async def set_turn_duration(session_id: str, duration_ms: int) -> None:
|
||||
"""Set durationMs on the last assistant message in a session.
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ from backend.copilot.db import (
|
||||
PaginatedMessages,
|
||||
get_chat_messages_paginated,
|
||||
set_turn_duration,
|
||||
update_message_content_by_sequence,
|
||||
)
|
||||
from backend.copilot.model import ChatMessage as CopilotChatMessage
|
||||
from backend.copilot.model import ChatSession, get_chat_session, upsert_chat_session
|
||||
@@ -386,3 +387,53 @@ async def test_set_turn_duration_no_assistant_message(setup_test_user, test_user
|
||||
assert cached is not None
|
||||
# User message should not have durationMs
|
||||
assert cached.messages[0].duration_ms is None
|
||||
|
||||
|
||||
# ---------- update_message_content_by_sequence ----------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_update_message_content_by_sequence_success():
|
||||
"""Returns True when update_many reports at least one row updated."""
|
||||
with patch.object(PrismaChatMessage, "prisma") as mock_prisma:
|
||||
mock_prisma.return_value.update_many = AsyncMock(return_value=1)
|
||||
|
||||
result = await update_message_content_by_sequence("sess-1", 0, "new content")
|
||||
|
||||
assert result is True
|
||||
mock_prisma.return_value.update_many.assert_called_once_with(
|
||||
where={"sessionId": "sess-1", "sequence": 0},
|
||||
data={"content": "new content"},
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_update_message_content_by_sequence_not_found():
|
||||
"""Returns False and logs a warning when no rows are updated."""
|
||||
with (
|
||||
patch.object(PrismaChatMessage, "prisma") as mock_prisma,
|
||||
patch("backend.copilot.db.logger") as mock_logger,
|
||||
):
|
||||
mock_prisma.return_value.update_many = AsyncMock(return_value=0)
|
||||
|
||||
result = await update_message_content_by_sequence("sess-1", 99, "content")
|
||||
|
||||
assert result is False
|
||||
mock_logger.warning.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_update_message_content_by_sequence_db_error():
|
||||
"""Returns False and logs an error when the DB raises an exception."""
|
||||
with (
|
||||
patch.object(PrismaChatMessage, "prisma") as mock_prisma,
|
||||
patch("backend.copilot.db.logger") as mock_logger,
|
||||
):
|
||||
mock_prisma.return_value.update_many = AsyncMock(
|
||||
side_effect=RuntimeError("db error")
|
||||
)
|
||||
|
||||
result = await update_message_content_by_sequence("sess-1", 0, "content")
|
||||
|
||||
assert result is False
|
||||
mock_logger.error.assert_called_once()
|
||||
|
||||
146
autogpt_platform/backend/backend/copilot/prompt_cache_test.py
Normal file
146
autogpt_platform/backend/backend/copilot/prompt_cache_test.py
Normal file
@@ -0,0 +1,146 @@
|
||||
"""Unit tests for the cacheable system prompt building logic.
|
||||
|
||||
These tests verify that _build_cacheable_system_prompt:
|
||||
- Returns the static _CACHEABLE_SYSTEM_PROMPT when no user_id is given
|
||||
- Returns the static prompt + understanding when user_id is given
|
||||
- Falls through to _CACHEABLE_SYSTEM_PROMPT when Langfuse is not configured
|
||||
- Returns the Langfuse-compiled prompt when Langfuse is configured
|
||||
- Handles DB errors and Langfuse errors gracefully
|
||||
"""
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
_SVC = "backend.copilot.service"
|
||||
|
||||
|
||||
class TestBuildCacheableSystemPrompt:
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_user_id_returns_static_prompt(self):
|
||||
"""When user_id is None, no DB lookup happens and the static prompt is returned."""
|
||||
with (patch(f"{_SVC}._is_langfuse_configured", return_value=False),):
|
||||
from backend.copilot.service import (
|
||||
_CACHEABLE_SYSTEM_PROMPT,
|
||||
_build_cacheable_system_prompt,
|
||||
)
|
||||
|
||||
prompt, understanding = await _build_cacheable_system_prompt(None)
|
||||
|
||||
assert prompt == _CACHEABLE_SYSTEM_PROMPT
|
||||
assert understanding is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_with_user_id_fetches_understanding(self):
|
||||
"""When user_id is provided, understanding is fetched and returned alongside prompt."""
|
||||
fake_understanding = MagicMock()
|
||||
mock_db = MagicMock()
|
||||
mock_db.get_business_understanding = AsyncMock(return_value=fake_understanding)
|
||||
|
||||
with (
|
||||
patch(f"{_SVC}._is_langfuse_configured", return_value=False),
|
||||
patch(f"{_SVC}.understanding_db", return_value=mock_db),
|
||||
):
|
||||
from backend.copilot.service import (
|
||||
_CACHEABLE_SYSTEM_PROMPT,
|
||||
_build_cacheable_system_prompt,
|
||||
)
|
||||
|
||||
prompt, understanding = await _build_cacheable_system_prompt("user-123")
|
||||
|
||||
assert prompt == _CACHEABLE_SYSTEM_PROMPT
|
||||
assert understanding is fake_understanding
|
||||
mock_db.get_business_understanding.assert_called_once_with("user-123")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_db_error_returns_prompt_with_no_understanding(self):
|
||||
"""When the DB raises an exception, understanding is None and prompt is still returned."""
|
||||
mock_db = MagicMock()
|
||||
mock_db.get_business_understanding = AsyncMock(
|
||||
side_effect=RuntimeError("db down")
|
||||
)
|
||||
|
||||
with (
|
||||
patch(f"{_SVC}._is_langfuse_configured", return_value=False),
|
||||
patch(f"{_SVC}.understanding_db", return_value=mock_db),
|
||||
):
|
||||
from backend.copilot.service import (
|
||||
_CACHEABLE_SYSTEM_PROMPT,
|
||||
_build_cacheable_system_prompt,
|
||||
)
|
||||
|
||||
prompt, understanding = await _build_cacheable_system_prompt("user-456")
|
||||
|
||||
assert prompt == _CACHEABLE_SYSTEM_PROMPT
|
||||
assert understanding is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_langfuse_compiled_prompt_returned(self):
|
||||
"""When Langfuse is configured and returns a prompt, the compiled text is returned."""
|
||||
fake_understanding = MagicMock()
|
||||
mock_db = MagicMock()
|
||||
mock_db.get_business_understanding = AsyncMock(return_value=fake_understanding)
|
||||
|
||||
langfuse_prompt_text = "You are a Langfuse-sourced assistant."
|
||||
mock_prompt_obj = MagicMock()
|
||||
mock_prompt_obj.compile.return_value = langfuse_prompt_text
|
||||
|
||||
mock_langfuse = MagicMock()
|
||||
mock_langfuse.get_prompt.return_value = mock_prompt_obj
|
||||
|
||||
with (
|
||||
patch(f"{_SVC}._is_langfuse_configured", return_value=True),
|
||||
patch(f"{_SVC}.understanding_db", return_value=mock_db),
|
||||
patch(f"{_SVC}._get_langfuse", return_value=mock_langfuse),
|
||||
patch(
|
||||
f"{_SVC}.asyncio.to_thread", new=AsyncMock(return_value=mock_prompt_obj)
|
||||
),
|
||||
):
|
||||
from backend.copilot.service import _build_cacheable_system_prompt
|
||||
|
||||
prompt, understanding = await _build_cacheable_system_prompt("user-789")
|
||||
|
||||
assert prompt == langfuse_prompt_text
|
||||
assert understanding is fake_understanding
|
||||
mock_prompt_obj.compile.assert_called_once_with(users_information="")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_langfuse_error_falls_back_to_static_prompt(self):
|
||||
"""When Langfuse raises an error, the fallback _CACHEABLE_SYSTEM_PROMPT is used."""
|
||||
mock_db = MagicMock()
|
||||
mock_db.get_business_understanding = AsyncMock(return_value=None)
|
||||
|
||||
with (
|
||||
patch(f"{_SVC}._is_langfuse_configured", return_value=True),
|
||||
patch(f"{_SVC}.understanding_db", return_value=mock_db),
|
||||
patch(
|
||||
f"{_SVC}.asyncio.to_thread",
|
||||
new=AsyncMock(side_effect=RuntimeError("langfuse down")),
|
||||
),
|
||||
):
|
||||
from backend.copilot.service import (
|
||||
_CACHEABLE_SYSTEM_PROMPT,
|
||||
_build_cacheable_system_prompt,
|
||||
)
|
||||
|
||||
prompt, understanding = await _build_cacheable_system_prompt("user-000")
|
||||
|
||||
assert prompt == _CACHEABLE_SYSTEM_PROMPT
|
||||
assert understanding is None
|
||||
|
||||
|
||||
class TestCacheableSystemPromptContent:
|
||||
"""Smoke-test the _CACHEABLE_SYSTEM_PROMPT constant for key structural requirements."""
|
||||
|
||||
def test_cacheable_prompt_has_no_placeholder(self):
|
||||
"""The static cacheable prompt must not contain format placeholders."""
|
||||
from backend.copilot.service import _CACHEABLE_SYSTEM_PROMPT
|
||||
|
||||
assert "{users_information}" not in _CACHEABLE_SYSTEM_PROMPT
|
||||
assert "{" not in _CACHEABLE_SYSTEM_PROMPT
|
||||
|
||||
def test_cacheable_prompt_mentions_user_context(self):
|
||||
"""The prompt instructs the model to parse <user_context> blocks."""
|
||||
from backend.copilot.service import _CACHEABLE_SYSTEM_PROMPT
|
||||
|
||||
assert "user_context" in _CACHEABLE_SYSTEM_PROMPT
|
||||
@@ -988,7 +988,7 @@ def _make_sdk_patches(
|
||||
dict(return_value=MagicMock(__enter__=MagicMock(), __exit__=MagicMock())),
|
||||
),
|
||||
(
|
||||
f"{_SVC}._build_system_prompt",
|
||||
f"{_SVC}._build_cacheable_system_prompt",
|
||||
dict(new_callable=AsyncMock, return_value=("system prompt", None)),
|
||||
),
|
||||
(
|
||||
|
||||
@@ -48,6 +48,7 @@ from backend.copilot.transcript import (
|
||||
)
|
||||
from backend.copilot.transcript_builder import TranscriptBuilder
|
||||
from backend.data.redis_client import get_redis_async
|
||||
from backend.data.understanding import format_understanding_for_prompt
|
||||
from backend.executor.cluster_lock import AsyncClusterLock
|
||||
from backend.util.exceptions import NotFoundError
|
||||
from backend.util.settings import Settings
|
||||
@@ -61,6 +62,7 @@ from ..constants import (
|
||||
is_transient_api_error,
|
||||
)
|
||||
from ..context import encode_cwd_for_cli
|
||||
from ..db import update_message_content_by_sequence
|
||||
from ..graphiti.config import is_enabled_for_user
|
||||
from ..model import (
|
||||
ChatMessage,
|
||||
@@ -85,7 +87,11 @@ from ..response_model import (
|
||||
StreamToolOutputAvailable,
|
||||
StreamUsage,
|
||||
)
|
||||
from ..service import _build_system_prompt, _is_langfuse_configured, _update_title_async
|
||||
from ..service import (
|
||||
_build_cacheable_system_prompt,
|
||||
_is_langfuse_configured,
|
||||
_update_title_async,
|
||||
)
|
||||
from ..token_tracking import persist_and_record_usage
|
||||
from ..tools.e2b_sandbox import get_or_create_sandbox, pause_sandbox_direct
|
||||
from ..tools.sandbox import WORKSPACE_PREFIX, make_session_path
|
||||
@@ -2052,9 +2058,9 @@ async def stream_chat_completion_sdk(
|
||||
)
|
||||
return None
|
||||
|
||||
e2b_sandbox, (base_system_prompt, _), dl = await asyncio.gather(
|
||||
e2b_sandbox, (base_system_prompt, understanding), dl = await asyncio.gather(
|
||||
_setup_e2b(),
|
||||
_build_system_prompt(user_id, has_conversation_history=has_history),
|
||||
_build_cacheable_system_prompt(user_id if not has_history else None),
|
||||
_fetch_transcript(),
|
||||
)
|
||||
|
||||
@@ -2285,6 +2291,30 @@ async def stream_chat_completion_sdk(
|
||||
transcript_msg_count,
|
||||
session_id,
|
||||
)
|
||||
# On the first turn inject user context into the message instead of the
|
||||
# system prompt — the system prompt is now static (same for all users)
|
||||
# so the LLM can cache it across sessions.
|
||||
# current_message is updated so the transcript and session.messages also
|
||||
# store the prefixed content, preserving personalisation across turns and
|
||||
# on --resume.
|
||||
if not has_history and understanding:
|
||||
user_ctx = format_understanding_for_prompt(understanding)
|
||||
prefixed_message = (
|
||||
f"<user_context>\n{user_ctx}\n</user_context>\n\n{current_message}"
|
||||
)
|
||||
current_message = prefixed_message
|
||||
query_message = prefixed_message
|
||||
# Persist the prefixed content so resumed sessions retain the context.
|
||||
# The user message was already saved to DB before context injection;
|
||||
# update the DB record so the prefixed content survives page reload
|
||||
# and --resume (the save at line ~1926 used the un-prefixed content).
|
||||
for idx, session_msg in enumerate(session.messages):
|
||||
if session_msg.role == "user":
|
||||
session_msg.content = prefixed_message
|
||||
await update_message_content_by_sequence(
|
||||
session_id, idx, prefixed_message
|
||||
)
|
||||
break
|
||||
# If files are attached, prepare them: images become vision
|
||||
# content blocks in the user message, other files go to sdk_cwd.
|
||||
attachments = await _prepare_file_attachments(
|
||||
|
||||
@@ -70,6 +70,21 @@ Your goal is to help users automate tasks by:
|
||||
|
||||
Be concise, proactive, and action-oriented. Bias toward showing working solutions over lengthy explanations."""
|
||||
|
||||
# Static system prompt for token caching — identical for all users.
|
||||
# User-specific context is injected into the first user message instead,
|
||||
# so the system prompt never changes and can be cached across all sessions.
|
||||
_CACHEABLE_SYSTEM_PROMPT = """You are an AI automation assistant helping users build and run automations.
|
||||
|
||||
Your goal is to help users automate tasks by:
|
||||
- Understanding their needs and business context
|
||||
- Building and running working automations
|
||||
- Delivering tangible value through action, not just explanation
|
||||
|
||||
Be concise, proactive, and action-oriented. Bias toward showing working solutions over lengthy explanations.
|
||||
|
||||
When the user provides a <user_context> block in their message, use it to personalise your responses.
|
||||
For users you are meeting for the first time with no context provided, greet them warmly and introduce them to the AutoGPT platform."""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shared helpers (used by SDK service and baseline)
|
||||
@@ -150,6 +165,50 @@ async def _build_system_prompt(
|
||||
return compiled, understanding
|
||||
|
||||
|
||||
async def _build_cacheable_system_prompt(
|
||||
user_id: str | None,
|
||||
) -> tuple[str, Any]:
|
||||
"""Build a fully static system prompt suitable for LLM token caching.
|
||||
|
||||
Unlike _build_system_prompt, user-specific context is NOT embedded here.
|
||||
Callers must inject the returned understanding into the first user message
|
||||
via format_understanding_for_prompt() so the system prompt stays identical
|
||||
across all users and sessions, enabling cross-session cache hits.
|
||||
|
||||
Returns:
|
||||
Tuple of (static_prompt, understanding_object_or_None)
|
||||
"""
|
||||
understanding = None
|
||||
if user_id:
|
||||
try:
|
||||
understanding = await understanding_db().get_business_understanding(user_id)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fetch business understanding: {e}")
|
||||
|
||||
if _is_langfuse_configured():
|
||||
try:
|
||||
label = (
|
||||
None
|
||||
if settings.config.app_env == AppEnvironment.PRODUCTION
|
||||
else "latest"
|
||||
)
|
||||
prompt = await asyncio.to_thread(
|
||||
_get_langfuse().get_prompt,
|
||||
config.langfuse_prompt_name,
|
||||
label=label,
|
||||
cache_ttl_seconds=config.langfuse_prompt_cache_ttl,
|
||||
)
|
||||
# Pass empty string so existing Langfuse templates stay static
|
||||
compiled = prompt.compile(users_information="")
|
||||
return compiled, understanding
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to fetch cacheable prompt from Langfuse, using default: {e}"
|
||||
)
|
||||
|
||||
return _CACHEABLE_SYSTEM_PROMPT, understanding
|
||||
|
||||
|
||||
async def _generate_session_title(
|
||||
message: str,
|
||||
user_id: str | None = None,
|
||||
|
||||
@@ -58,7 +58,7 @@ Tool and block identifiers provided in `tools` and `blocks` are validated at run
|
||||
| system_context | Optional additional context prepended to the prompt. Use this to constrain autopilot behavior, provide domain context, or set output format requirements. | str | No |
|
||||
| session_id | Session ID to continue an existing autopilot conversation. Leave empty to start a new session. Use the session_id output from a previous run to continue. | str | No |
|
||||
| max_recursion_depth | Maximum nesting depth when the autopilot calls this block recursively (sub-agent pattern). Prevents infinite loops. | int | No |
|
||||
| tools | Tool names to filter. Works with tools_exclude to form an allow-list or deny-list. Leave empty to apply no tool filter. | List["add_understanding" \| "ask_question" \| "bash_exec" \| "browser_act" \| "browser_navigate" \| "browser_screenshot" \| "connect_integration" \| "continue_run_block" \| "create_agent" \| "create_feature_request" \| "create_folder" \| "customize_agent" \| "delete_folder" \| "delete_workspace_file" \| "edit_agent" \| "find_agent" \| "find_block" \| "find_library_agent" \| "fix_agent_graph" \| "get_agent_building_guide" \| "get_doc_page" \| "get_mcp_guide" \| "list_folders" \| "list_workspace_files" \| "move_agents_to_folder" \| "move_folder" \| "read_workspace_file" \| "run_agent" \| "run_block" \| "run_mcp_tool" \| "search_docs" \| "search_feature_requests" \| "update_folder" \| "validate_agent_graph" \| "view_agent_output" \| "web_fetch" \| "write_workspace_file" \| "Agent" \| "Edit" \| "Glob" \| "Grep" \| "Read" \| "Task" \| "TodoWrite" \| "WebSearch" \| "Write"] | No |
|
||||
| tools | Tool names to filter. Works with tools_exclude to form an allow-list or deny-list. Leave empty to apply no tool filter. | List["add_understanding" \| "ask_question" \| "bash_exec" \| "browser_act" \| "browser_navigate" \| "browser_screenshot" \| "connect_integration" \| "continue_run_block" \| "create_agent" \| "create_feature_request" \| "create_folder" \| "customize_agent" \| "delete_folder" \| "delete_workspace_file" \| "edit_agent" \| "find_agent" \| "find_block" \| "find_library_agent" \| "fix_agent_graph" \| "get_agent_building_guide" \| "get_doc_page" \| "get_mcp_guide" \| "list_folders" \| "list_workspace_files" \| "memory_search" \| "memory_store" \| "move_agents_to_folder" \| "move_folder" \| "read_workspace_file" \| "run_agent" \| "run_block" \| "run_mcp_tool" \| "search_docs" \| "search_feature_requests" \| "update_folder" \| "validate_agent_graph" \| "view_agent_output" \| "web_fetch" \| "write_workspace_file" \| "Agent" \| "Edit" \| "Glob" \| "Grep" \| "Read" \| "Task" \| "TodoWrite" \| "WebSearch" \| "Write"] | No |
|
||||
| tools_exclude | Controls how the 'tools' list is interpreted. True (default): 'tools' is a deny-list — listed tools are blocked, all others are allowed. An empty 'tools' list means allow everything. False: 'tools' is an allow-list — only listed tools are permitted. | bool | No |
|
||||
| blocks | Block identifiers to filter when the copilot uses run_block. Each entry can be: a block name (e.g. 'HTTP Request'), a full block UUID, or the first 8 hex characters of the UUID (e.g. 'c069dc6b'). Works with blocks_exclude. Leave empty to apply no block filter. | List[str] | No |
|
||||
| blocks_exclude | Controls how the 'blocks' list is interpreted. True (default): 'blocks' is a deny-list — listed blocks are blocked, all others are allowed. An empty 'blocks' list means allow everything. False: 'blocks' is an allow-list — only listed blocks are permitted. | bool | No |
|
||||
|
||||
BIN
test-screenshots/PR-12725/01-copilot-page.png
Normal file
BIN
test-screenshots/PR-12725/01-copilot-page.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 101 KiB |
BIN
test-screenshots/PR-12725/02-chat-response.png
Normal file
BIN
test-screenshots/PR-12725/02-chat-response.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 67 KiB |
BIN
test-screenshots/PR-12725/03-chat-session.png
Normal file
BIN
test-screenshots/PR-12725/03-chat-session.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 67 KiB |
BIN
test-screenshots/PR-12725/04-chat-with-messages.png
Normal file
BIN
test-screenshots/PR-12725/04-chat-with-messages.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 66 KiB |
Reference in New Issue
Block a user