mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-30 03:00:41 -04:00
fix(backend/copilot): make system prompt fully static for cross-user prompt caching
The system prompt was not cacheable across sessions or users due to two sources of per-session dynamic content leaking into it: 1. sdk_cwd (/tmp/copilot-<uuid>) was embedded in the storage supplement via get_sdk_supplement(cwd=sdk_cwd). Every session has a unique UUID, making the system prompt unique per session — cache miss every first message. 2. Graphiti warm_ctx (user-specific memory facts) was appended directly to the system prompt on the first turn, making it unique per user per turn. Fix both by keeping the system prompt fully static: - get_sdk_supplement now ignores cwd and uses the constant placeholder "/tmp/copilot-<session-id>" in the supplement text. The actual cwd is still passed to ClaudeAgentOptions.cwd so the subprocess uses the right directory. - warm_ctx is now injected into the first user message as a trusted <memory_context> block (before inject_user_context runs), so it is persisted to DB alongside the <user_context> prefix and replayed correctly on --resume without re-fetching. After this change all users share the same system prompt text — one cache write globally per model, then cache reads for everyone.
This commit is contained in:
@@ -331,6 +331,9 @@ def _generate_tool_documentation() -> str:
|
||||
return docs
|
||||
|
||||
|
||||
_LOCAL_STORAGE_SUPPLEMENT: str | None = None
|
||||
|
||||
|
||||
def get_sdk_supplement(use_e2b: bool, cwd: str = "") -> str:
|
||||
"""Get the supplement for SDK mode (Claude Agent SDK).
|
||||
|
||||
@@ -338,16 +341,31 @@ def get_sdk_supplement(use_e2b: bool, cwd: str = "") -> str:
|
||||
receives tool schemas from the SDK. Only includes technical notes about
|
||||
storage systems and execution environment.
|
||||
|
||||
The system prompt must be **identical across all sessions and users** to
|
||||
enable cross-session LLM prompt-cache hits (Anthropic caches on exact
|
||||
content). To preserve this invariant, the local-mode supplement uses a
|
||||
generic placeholder for the working directory instead of the real
|
||||
session-specific UUID path. The actual ``cwd`` is passed to the CLI
|
||||
subprocess via ``ClaudeAgentOptions.cwd`` so the model's shell commands
|
||||
land in the right directory; the model can run ``pwd`` to confirm the
|
||||
exact path.
|
||||
|
||||
Args:
|
||||
use_e2b: Whether E2B cloud sandbox is being used
|
||||
cwd: Current working directory (only used in local_storage mode)
|
||||
cwd: Unused — kept for call-site compatibility.
|
||||
|
||||
Returns:
|
||||
The supplement string to append to the system prompt
|
||||
"""
|
||||
del cwd # intentionally unused — see docstring
|
||||
if use_e2b:
|
||||
return _get_cloud_sandbox_supplement()
|
||||
return _get_local_storage_supplement(cwd)
|
||||
global _LOCAL_STORAGE_SUPPLEMENT
|
||||
if _LOCAL_STORAGE_SUPPLEMENT is None:
|
||||
_LOCAL_STORAGE_SUPPLEMENT = _get_local_storage_supplement(
|
||||
"/tmp/copilot-<session-id>"
|
||||
)
|
||||
return _LOCAL_STORAGE_SUPPLEMENT
|
||||
|
||||
|
||||
def get_graphiti_supplement() -> str:
|
||||
|
||||
@@ -2172,13 +2172,15 @@ async def stream_chat_completion_sdk(
|
||||
+ graphiti_supplement
|
||||
)
|
||||
|
||||
# Warm context: pre-load relevant facts from Graphiti on first turn
|
||||
# Warm context: pre-load relevant facts from Graphiti on first turn.
|
||||
# Stored here and injected into the first user message (not the system
|
||||
# prompt) so the system prompt stays identical across all users and
|
||||
# sessions, enabling cross-session Anthropic prompt-cache hits.
|
||||
warm_ctx = ""
|
||||
if graphiti_enabled and user_id and len(session.messages) <= 1:
|
||||
from backend.copilot.graphiti.context import fetch_warm_context
|
||||
|
||||
warm_ctx = await fetch_warm_context(user_id, message or "")
|
||||
if warm_ctx:
|
||||
system_prompt += f"\n\n{warm_ctx}"
|
||||
warm_ctx = await fetch_warm_context(user_id, message or "") or ""
|
||||
|
||||
# Process transcript download result and restore CLI native session.
|
||||
# The CLI native session file (uploaded after each turn) is the
|
||||
@@ -2434,11 +2436,19 @@ async def stream_chat_completion_sdk(
|
||||
# cache it across sessions.
|
||||
#
|
||||
# On resume (has_history=True) we intentionally skip re-injection: the
|
||||
# transcript already contains the <user_context> prefix from the original
|
||||
# turn (persisted to the DB in inject_user_context), so the SDK replay
|
||||
# carries context continuity without us prepending it again. Adding it
|
||||
# a second time would duplicate the block and inflate tokens.
|
||||
# transcript already contains the <user_context> and <memory_context>
|
||||
# prefixes from the original turn (persisted to the DB via
|
||||
# inject_user_context), so the SDK replay carries context continuity
|
||||
# without us prepending them again.
|
||||
if not has_history:
|
||||
# Prepend Graphiti warm context as a trusted <memory_context> block
|
||||
# so it reaches the LLM without polluting the (cached) system prompt.
|
||||
# inject_user_context will persist the full prefixed message to DB.
|
||||
if warm_ctx:
|
||||
current_message = (
|
||||
f"<memory_context>\n{warm_ctx}\n</memory_context>\n\n"
|
||||
+ current_message
|
||||
)
|
||||
prefixed_message = await inject_user_context(
|
||||
understanding, current_message, session_id, session.messages
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user