From 1b67d4cf8ad8baef67885d4ee90d1246971d8cb3 Mon Sep 17 00:00:00 2001 From: majdyz Date: Wed, 15 Apr 2026 14:38:39 +0700 Subject: [PATCH] fix(backend/copilot): make system prompt fully static for cross-user prompt caching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The system prompt was not cacheable across sessions or users due to two sources of per-session dynamic content leaking into it: 1. sdk_cwd (/tmp/copilot-) was embedded in the storage supplement via get_sdk_supplement(cwd=sdk_cwd). Every session has a unique UUID, making the system prompt unique per session — cache miss every first message. 2. Graphiti warm_ctx (user-specific memory facts) was appended directly to the system prompt on the first turn, making it unique per user per turn. Fix both by keeping the system prompt fully static: - get_sdk_supplement now ignores cwd and uses the constant placeholder "/tmp/copilot-" in the supplement text. The actual cwd is still passed to ClaudeAgentOptions.cwd so the subprocess uses the right directory. - warm_ctx is now injected into the first user message as a trusted block (before inject_user_context runs), so it is persisted to DB alongside the prefix and replayed correctly on --resume without re-fetching. After this change all users share the same system prompt text — one cache write globally per model, then cache reads for everyone. --- .../backend/backend/copilot/prompting.py | 22 ++++++++++++++-- .../backend/backend/copilot/sdk/service.py | 26 +++++++++++++------ 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/autogpt_platform/backend/backend/copilot/prompting.py b/autogpt_platform/backend/backend/copilot/prompting.py index c500a2b865..d93acd52d4 100644 --- a/autogpt_platform/backend/backend/copilot/prompting.py +++ b/autogpt_platform/backend/backend/copilot/prompting.py @@ -331,6 +331,9 @@ def _generate_tool_documentation() -> str: return docs +_LOCAL_STORAGE_SUPPLEMENT: str | None = None + + def get_sdk_supplement(use_e2b: bool, cwd: str = "") -> str: """Get the supplement for SDK mode (Claude Agent SDK). @@ -338,16 +341,31 @@ def get_sdk_supplement(use_e2b: bool, cwd: str = "") -> str: receives tool schemas from the SDK. Only includes technical notes about storage systems and execution environment. + The system prompt must be **identical across all sessions and users** to + enable cross-session LLM prompt-cache hits (Anthropic caches on exact + content). To preserve this invariant, the local-mode supplement uses a + generic placeholder for the working directory instead of the real + session-specific UUID path. The actual ``cwd`` is passed to the CLI + subprocess via ``ClaudeAgentOptions.cwd`` so the model's shell commands + land in the right directory; the model can run ``pwd`` to confirm the + exact path. + Args: use_e2b: Whether E2B cloud sandbox is being used - cwd: Current working directory (only used in local_storage mode) + cwd: Unused — kept for call-site compatibility. Returns: The supplement string to append to the system prompt """ + del cwd # intentionally unused — see docstring if use_e2b: return _get_cloud_sandbox_supplement() - return _get_local_storage_supplement(cwd) + global _LOCAL_STORAGE_SUPPLEMENT + if _LOCAL_STORAGE_SUPPLEMENT is None: + _LOCAL_STORAGE_SUPPLEMENT = _get_local_storage_supplement( + "/tmp/copilot-" + ) + return _LOCAL_STORAGE_SUPPLEMENT def get_graphiti_supplement() -> str: diff --git a/autogpt_platform/backend/backend/copilot/sdk/service.py b/autogpt_platform/backend/backend/copilot/sdk/service.py index f291d96431..b319316520 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/service.py +++ b/autogpt_platform/backend/backend/copilot/sdk/service.py @@ -2172,13 +2172,15 @@ async def stream_chat_completion_sdk( + graphiti_supplement ) - # Warm context: pre-load relevant facts from Graphiti on first turn + # Warm context: pre-load relevant facts from Graphiti on first turn. + # Stored here and injected into the first user message (not the system + # prompt) so the system prompt stays identical across all users and + # sessions, enabling cross-session Anthropic prompt-cache hits. + warm_ctx = "" if graphiti_enabled and user_id and len(session.messages) <= 1: from backend.copilot.graphiti.context import fetch_warm_context - warm_ctx = await fetch_warm_context(user_id, message or "") - if warm_ctx: - system_prompt += f"\n\n{warm_ctx}" + warm_ctx = await fetch_warm_context(user_id, message or "") or "" # Process transcript download result and restore CLI native session. # The CLI native session file (uploaded after each turn) is the @@ -2434,11 +2436,19 @@ async def stream_chat_completion_sdk( # cache it across sessions. # # On resume (has_history=True) we intentionally skip re-injection: the - # transcript already contains the prefix from the original - # turn (persisted to the DB in inject_user_context), so the SDK replay - # carries context continuity without us prepending it again. Adding it - # a second time would duplicate the block and inflate tokens. + # transcript already contains the and + # prefixes from the original turn (persisted to the DB via + # inject_user_context), so the SDK replay carries context continuity + # without us prepending them again. if not has_history: + # Prepend Graphiti warm context as a trusted block + # so it reaches the LLM without polluting the (cached) system prompt. + # inject_user_context will persist the full prefixed message to DB. + if warm_ctx: + current_message = ( + f"\n{warm_ctx}\n\n\n" + + current_message + ) prefixed_message = await inject_user_context( understanding, current_message, session_id, session.messages )