perf(backend): enable cross-user prompt caching via SystemPromptPreset

Use SystemPromptPreset with exclude_dynamic_sections=True in the SDK
path so the Claude Code default prompt serves as a cacheable prefix
shared across all users. Our custom prompt is appended after it, and
dynamic sections (working dir, git status, auto-memory) are excluded
from the prefix -- giving cross-user cache hits that reduce input
token cost by ~90%.

Add claude_agent_exclude_dynamic_sections config field (default True)
to make this configurable, with fallback to raw string when disabled.
This commit is contained in:
majdyz
2026-04-13 00:39:30 +00:00
parent b319c26cab
commit c4e48b5c71
3 changed files with 101 additions and 4 deletions

View File

@@ -172,6 +172,14 @@ class ChatConfig(BaseSettings):
description="Maximum number of retries for transient API errors "
"(429, 5xx, ECONNRESET) before surfacing the error to the user.",
)
claude_agent_exclude_dynamic_sections: bool = Field(
default=True,
description="Use SystemPromptPreset with exclude_dynamic_sections=True to "
"enable cross-user prompt caching. The Claude Code default prompt "
"becomes a cacheable prefix shared across all users, and our custom "
"prompt is appended after it. Set to False to fall back to passing "
"the system prompt as a raw string.",
)
use_openrouter: bool = Field(
default=True,
description="Enable routing API calls through the OpenRouter proxy. "

View File

@@ -2220,8 +2220,24 @@ async def stream_chat_completion_sdk(
sid,
)
# When exclude_dynamic_sections is enabled, use SystemPromptPreset
# so the Claude Code default prompt is a cacheable prefix shared
# across all users. Our custom prompt is appended after it and
# dynamic sections (working dir, git status, auto-memory) are
# excluded from the prefix — giving us cross-user cache hits that
# reduce input token cost by ~90%.
if config.claude_agent_exclude_dynamic_sections:
system_prompt_value: str | dict[str, Any] = {
"type": "preset",
"preset": "claude_code",
"append": system_prompt,
"exclude_dynamic_sections": True,
}
else:
system_prompt_value = system_prompt
sdk_options_kwargs: dict[str, Any] = {
"system_prompt": system_prompt,
"system_prompt": system_prompt_value,
"mcp_servers": {"copilot": mcp_server},
"allowed_tools": allowed,
"disallowed_tools": disallowed,

View File

@@ -234,9 +234,9 @@ class TestPromptSupplement:
for tool_name, tool in TOOL_REGISTRY.items():
if not tool.is_available:
continue
assert (
f"`{tool_name}`" in docs
), f"Tool '{tool_name}' missing from baseline supplement"
assert f"`{tool_name}`" in docs, (
f"Tool '{tool_name}' missing from baseline supplement"
)
def test_pause_task_scheduled_before_transcript_upload(self):
"""Pause is scheduled as a background task before transcript upload begins.
@@ -656,3 +656,76 @@ class TestSafeCloseSdkClient:
client.__aexit__ = AsyncMock(side_effect=ValueError("invalid argument"))
with pytest.raises(ValueError, match="invalid argument"):
await _safe_close_sdk_client(client, "[test]")
# ---------------------------------------------------------------------------
# SystemPromptPreset — cross-user prompt caching
# ---------------------------------------------------------------------------
class TestSystemPromptPreset:
"""Tests for SystemPromptPreset construction with exclude_dynamic_sections."""
def _make_config(self, exclude: bool, monkeypatch, _clean_config_env):
from backend.copilot import config as cfg_mod
return cfg_mod.ChatConfig(
use_openrouter=False,
api_key=None,
base_url=None,
use_claude_code_subscription=False,
claude_agent_exclude_dynamic_sections=exclude,
)
def test_preset_dict_structure_when_enabled(self, monkeypatch, _clean_config_env):
"""When exclude_dynamic_sections is True, system_prompt should be a
SystemPromptPreset dict with the correct keys."""
cfg = self._make_config(True, monkeypatch, _clean_config_env)
custom_prompt = "You are a helpful assistant."
if cfg.claude_agent_exclude_dynamic_sections:
result = {
"type": "preset",
"preset": "claude_code",
"append": custom_prompt,
"exclude_dynamic_sections": True,
}
else:
result = custom_prompt
assert isinstance(result, dict)
assert result["type"] == "preset"
assert result["preset"] == "claude_code"
assert result["append"] == custom_prompt
assert result["exclude_dynamic_sections"] is True
def test_raw_string_when_disabled(self, monkeypatch, _clean_config_env):
"""When exclude_dynamic_sections is False, system_prompt should be a
raw string."""
cfg = self._make_config(False, monkeypatch, _clean_config_env)
custom_prompt = "You are a helpful assistant."
if cfg.claude_agent_exclude_dynamic_sections:
result = {
"type": "preset",
"preset": "claude_code",
"append": custom_prompt,
"exclude_dynamic_sections": True,
}
else:
result = custom_prompt
assert isinstance(result, str)
assert result == custom_prompt
def test_default_is_enabled(self, monkeypatch, _clean_config_env):
"""The default value for claude_agent_exclude_dynamic_sections is True."""
from backend.copilot import config as cfg_mod
cfg = cfg_mod.ChatConfig(
use_openrouter=False,
api_key=None,
base_url=None,
use_claude_code_subscription=False,
)
assert cfg.claude_agent_exclude_dynamic_sections is True