From e17914d393fdcc42e46d1bb8818112aeb375e5a7 Mon Sep 17 00:00:00 2001 From: Zamil Majdy Date: Tue, 14 Apr 2026 21:30:28 +0700 Subject: [PATCH] perf(backend): enable cross-user prompt caching via SystemPromptPreset (#12758) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Use `SystemPromptPreset` with `exclude_dynamic_sections=True` in the SDK path so the Claude Code default prompt serves as a cacheable prefix shared across all users, reducing input token cost by ~90% - Add `claude_agent_cross_user_prompt_cache` config field (default `True`) to make this configurable, with fallback to raw string when disabled - Extract `_build_system_prompt_value()` helper for testability, with `_SystemPromptPreset` TypedDict for proper type annotation > **Depends on #12747** — requires SDK >=0.1.58 which adds `SystemPromptPreset` with `exclude_dynamic_sections`. Must be merged after #12747. ## Changes - **`config.py`**: New `claude_agent_cross_user_prompt_cache: bool = True` field on `ChatConfig` - **`sdk/service.py`**: `_SystemPromptPreset` TypedDict for type safety; `_build_system_prompt_value()` helper that constructs the preset dict or returns the raw string; call site uses the helper - **`sdk/service_test.py`**: Tests exercise the production `_build_system_prompt_value()` helper directly — verifying preset dict structure (enabled), raw string fallback (disabled), and default config value ## How it works The Claude Code CLI supports `SystemPromptPreset` which uses the built-in Claude Code default prompt as a static prefix. By setting `exclude_dynamic_sections=True`, per-user dynamic sections (working dir, git status, auto-memory) are stripped from that prefix so it stays identical across users and benefits from Anthropic's prompt caching. Our custom prompt (tool notes, supplements, graphiti context) is appended after the cacheable prefix. ## Test plan - [x] CI passes (formatting, linting, unit tests) - [x] Verify `_build_system_prompt_value()` returns correct preset dict when enabled - [x] Verify fallback to raw string when `CHAT_CLAUDE_AGENT_CROSS_USER_PROMPT_CACHE=false` --- .gitignore | 1 + .../backend/backend/copilot/config.py | 9 +++ .../backend/copilot/sdk/sdk_compat_test.py | 34 ++++++++++ .../backend/backend/copilot/sdk/service.py | 52 ++++++++++++++- .../backend/copilot/sdk/service_test.py | 63 +++++++++++++++++++ 5 files changed, 158 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 2b209b957a..97d6b18a76 100644 --- a/.gitignore +++ b/.gitignore @@ -194,3 +194,4 @@ test.db .next # Implementation plans (generated by AI agents) plans/ +.claude/worktrees/ diff --git a/autogpt_platform/backend/backend/copilot/config.py b/autogpt_platform/backend/backend/copilot/config.py index 28fa24f868..cfbc6feef4 100644 --- a/autogpt_platform/backend/backend/copilot/config.py +++ b/autogpt_platform/backend/backend/copilot/config.py @@ -197,6 +197,15 @@ class ChatConfig(BaseSettings): description="Maximum number of retries for transient API errors " "(429, 5xx, ECONNRESET) before surfacing the error to the user.", ) + claude_agent_cross_user_prompt_cache: bool = Field( + default=True, + description="Enable cross-user prompt caching via SystemPromptPreset. " + "The Claude Code default prompt becomes a cacheable prefix shared " + "across all users, and our custom prompt is appended after it. " + "Dynamic sections (working dir, git status, auto-memory) are excluded " + "from the prefix. Set to False to fall back to passing the system " + "prompt as a raw string.", + ) claude_agent_cli_path: str | None = Field( default=None, description="Optional explicit path to a Claude Code CLI binary. " diff --git a/autogpt_platform/backend/backend/copilot/sdk/sdk_compat_test.py b/autogpt_platform/backend/backend/copilot/sdk/sdk_compat_test.py index c705d26c22..5d132aa94d 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/sdk_compat_test.py +++ b/autogpt_platform/backend/backend/copilot/sdk/sdk_compat_test.py @@ -7,6 +7,7 @@ tests will catch it immediately. """ import inspect +from typing import cast import pytest @@ -90,6 +91,39 @@ def test_agent_options_accepts_required_fields(): assert opts.cwd == "/tmp" +def test_agent_options_accepts_system_prompt_preset_with_exclude_dynamic_sections(): + """Verify ClaudeAgentOptions accepts the exact preset dict _build_system_prompt_value produces. + + The production code always includes ``exclude_dynamic_sections=True`` in the preset + dict. This compat test mirrors that exact shape so any SDK version that starts + rejecting unknown keys will be caught here rather than at runtime. + """ + from claude_agent_sdk import ClaudeAgentOptions + from claude_agent_sdk.types import SystemPromptPreset + + from .service import _build_system_prompt_value + + # Call the production helper directly so this test is tied to the real + # dict shape rather than a hand-rolled copy. + preset = _build_system_prompt_value("custom system prompt", cross_user_cache=True) + assert isinstance( + preset, dict + ), "_build_system_prompt_value must return a dict when caching is on" + + sdk_preset = cast(SystemPromptPreset, preset) + opts = ClaudeAgentOptions(system_prompt=sdk_preset) + assert opts.system_prompt == sdk_preset + + +def test_build_system_prompt_value_returns_plain_string_when_cross_user_cache_off(): + """When cross_user_cache=False (e.g. on --resume turns), the helper must return + a plain string so the preset+resume crash is avoided.""" + from .service import _build_system_prompt_value + + result = _build_system_prompt_value("my prompt", cross_user_cache=False) + assert result == "my prompt", "Must return the raw string, not a preset dict" + + def test_agent_options_accepts_all_our_fields(): """Comprehensive check of every field we use in service.py.""" from claude_agent_sdk import ClaudeAgentOptions diff --git a/autogpt_platform/backend/backend/copilot/sdk/service.py b/autogpt_platform/backend/backend/copilot/sdk/service.py index 209b5fb056..f291d96431 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/service.py +++ b/autogpt_platform/backend/backend/copilot/sdk/service.py @@ -29,6 +29,7 @@ from claude_agent_sdk import ( ToolResultBlock, ToolUseBlock, ) +from claude_agent_sdk.types import SystemPromptPreset from langfuse import propagate_attributes from langsmith.integrations.claude_agent_sdk import configure_claude_agent_sdk from opentelemetry import trace as otel_trace @@ -705,6 +706,34 @@ def _is_fallback_stderr(line: str) -> bool: return "fallback model" in line.lower() +def _build_system_prompt_value( + system_prompt: str, + cross_user_cache: bool, +) -> str | SystemPromptPreset: + """Build the ``system_prompt`` argument for :class:`ClaudeAgentOptions`. + + When *cross_user_cache* is enabled, returns a :class:`SystemPromptPreset` + dict so the Claude Code default prompt becomes a cacheable prefix shared + across all users; our custom *system_prompt* is appended after it. + + When disabled (or if the SDK is too old to support ``SystemPromptPreset``), + the raw *system_prompt* string is returned unchanged. + + An empty *system_prompt* is accepted: the preset dict will have + ``append: ""`` which the SDK treats as no custom suffix. + """ + if cross_user_cache: + logger.debug("Using SystemPromptPreset for cross-user prompt cache") + return SystemPromptPreset( + type="preset", + preset="claude_code", + append=system_prompt, + exclude_dynamic_sections=True, + ) + logger.debug("Cross-user prompt cache disabled, using raw string") + return system_prompt + + def _make_sdk_cwd(session_id: str) -> str: """Create a safe, session-specific working directory path. @@ -2290,8 +2319,19 @@ async def stream_chat_completion_sdk( sid, ) + # Use SystemPromptPreset for cross-user prompt caching. + # WORKAROUND: CLI 2.1.97 (sdk 0.1.58) exits code 1 when + # excludeDynamicSections=True is in the initialize request AND + # --resume is active. Disable the preset on resumed turns. + # Turn 1 still gets the preset (no --resume). + _cross_user = config.claude_agent_cross_user_prompt_cache and not use_resume + system_prompt_value = _build_system_prompt_value( + system_prompt, + cross_user_cache=_cross_user, + ) + sdk_options_kwargs: dict[str, Any] = { - "system_prompt": system_prompt, + "system_prompt": system_prompt_value, "mcp_servers": {"copilot": mcp_server}, "allowed_tools": allowed, "disallowed_tools": disallowed, @@ -2528,6 +2568,16 @@ async def stream_chat_completion_sdk( # The upload guard skips T2+ no-resume turns anyway. sdk_options_kwargs_retry.pop("resume", None) sdk_options_kwargs_retry.pop("session_id", None) + # Recompute system_prompt for retry — ctx.use_resume may have + # changed (context reduction enabled --resume). CLI 2.1.97 + # crashes when excludeDynamicSections=True is combined with + # --resume, so disable the cross-user preset on resumed turns. + _cross_user_retry = ( + config.claude_agent_cross_user_prompt_cache and not ctx.use_resume + ) + sdk_options_kwargs_retry["system_prompt"] = _build_system_prompt_value( + system_prompt, cross_user_cache=_cross_user_retry + ) state.options = ClaudeAgentOptions(**sdk_options_kwargs_retry) # type: ignore[arg-type] # dynamic kwargs state.query_message, state.was_compacted = await _build_query_message( current_message, diff --git a/autogpt_platform/backend/backend/copilot/sdk/service_test.py b/autogpt_platform/backend/backend/copilot/sdk/service_test.py index 5eb9981c5b..caa3d1b597 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/service_test.py +++ b/autogpt_platform/backend/backend/copilot/sdk/service_test.py @@ -8,7 +8,10 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest +from backend.copilot import config as cfg_mod + from .service import ( + _build_system_prompt_value, _is_sdk_disconnect_error, _normalize_model_name, _prepare_file_attachments, @@ -397,6 +400,7 @@ _CONFIG_ENV_VARS = ( "OPENAI_BASE_URL", "CHAT_USE_CLAUDE_CODE_SUBSCRIPTION", "CHAT_USE_CLAUDE_AGENT_SDK", + "CHAT_CLAUDE_AGENT_CROSS_USER_PROMPT_CACHE", ) @@ -656,3 +660,62 @@ class TestSafeCloseSdkClient: client.__aexit__ = AsyncMock(side_effect=ValueError("invalid argument")) with pytest.raises(ValueError, match="invalid argument"): await _safe_close_sdk_client(client, "[test]") + + +# --------------------------------------------------------------------------- +# SystemPromptPreset — cross-user prompt caching +# --------------------------------------------------------------------------- + + +class TestSystemPromptPreset: + """Tests for _build_system_prompt_value — cross-user prompt caching.""" + + def test_preset_dict_structure_when_enabled(self): + """When cross_user_cache is True, returns a _SystemPromptPreset dict.""" + custom_prompt = "You are a helpful assistant." + result = _build_system_prompt_value(custom_prompt, cross_user_cache=True) + + assert isinstance(result, dict) + assert result["type"] == "preset" + assert result["preset"] == "claude_code" + assert result["append"] == custom_prompt + assert result["exclude_dynamic_sections"] is True + + def test_raw_string_when_disabled(self): + """When cross_user_cache is False, returns the raw string.""" + custom_prompt = "You are a helpful assistant." + result = _build_system_prompt_value(custom_prompt, cross_user_cache=False) + + assert isinstance(result, str) + assert result == custom_prompt + + def test_empty_string_with_cache_enabled(self): + """Empty system_prompt with cross_user_cache=True produces append=''.""" + result = _build_system_prompt_value("", cross_user_cache=True) + + assert isinstance(result, dict) + assert result["type"] == "preset" + assert result["preset"] == "claude_code" + assert result["append"] == "" + assert result["exclude_dynamic_sections"] is True + + def test_default_config_is_enabled(self, _clean_config_env): + """The default value for claude_agent_cross_user_prompt_cache is True.""" + cfg = cfg_mod.ChatConfig( + use_openrouter=False, + api_key=None, + base_url=None, + use_claude_code_subscription=False, + ) + assert cfg.claude_agent_cross_user_prompt_cache is True + + def test_env_var_disables_cache(self, _clean_config_env, monkeypatch): + """CHAT_CLAUDE_AGENT_CROSS_USER_PROMPT_CACHE=false disables caching.""" + monkeypatch.setenv("CHAT_CLAUDE_AGENT_CROSS_USER_PROMPT_CACHE", "false") + cfg = cfg_mod.ChatConfig( + use_openrouter=False, + api_key=None, + base_url=None, + use_claude_code_subscription=False, + ) + assert cfg.claude_agent_cross_user_prompt_cache is False