mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-30 03:00:41 -04:00
perf(backend): enable cross-user prompt caching via SystemPromptPreset
Use SystemPromptPreset with exclude_dynamic_sections=True in the SDK path so the Claude Code default prompt serves as a cacheable prefix shared across all users. Our custom prompt is appended after it, and dynamic sections (working dir, git status, auto-memory) are excluded from the prefix -- giving cross-user cache hits that reduce input token cost by ~90%. Add claude_agent_exclude_dynamic_sections config field (default True) to make this configurable, with fallback to raw string when disabled.
This commit is contained in:
@@ -172,6 +172,14 @@ class ChatConfig(BaseSettings):
|
||||
description="Maximum number of retries for transient API errors "
|
||||
"(429, 5xx, ECONNRESET) before surfacing the error to the user.",
|
||||
)
|
||||
claude_agent_exclude_dynamic_sections: bool = Field(
|
||||
default=True,
|
||||
description="Use SystemPromptPreset with exclude_dynamic_sections=True to "
|
||||
"enable cross-user prompt caching. The Claude Code default prompt "
|
||||
"becomes a cacheable prefix shared across all users, and our custom "
|
||||
"prompt is appended after it. Set to False to fall back to passing "
|
||||
"the system prompt as a raw string.",
|
||||
)
|
||||
use_openrouter: bool = Field(
|
||||
default=True,
|
||||
description="Enable routing API calls through the OpenRouter proxy. "
|
||||
|
||||
@@ -2220,8 +2220,24 @@ async def stream_chat_completion_sdk(
|
||||
sid,
|
||||
)
|
||||
|
||||
# When exclude_dynamic_sections is enabled, use SystemPromptPreset
|
||||
# so the Claude Code default prompt is a cacheable prefix shared
|
||||
# across all users. Our custom prompt is appended after it and
|
||||
# dynamic sections (working dir, git status, auto-memory) are
|
||||
# excluded from the prefix — giving us cross-user cache hits that
|
||||
# reduce input token cost by ~90%.
|
||||
if config.claude_agent_exclude_dynamic_sections:
|
||||
system_prompt_value: str | dict[str, Any] = {
|
||||
"type": "preset",
|
||||
"preset": "claude_code",
|
||||
"append": system_prompt,
|
||||
"exclude_dynamic_sections": True,
|
||||
}
|
||||
else:
|
||||
system_prompt_value = system_prompt
|
||||
|
||||
sdk_options_kwargs: dict[str, Any] = {
|
||||
"system_prompt": system_prompt,
|
||||
"system_prompt": system_prompt_value,
|
||||
"mcp_servers": {"copilot": mcp_server},
|
||||
"allowed_tools": allowed,
|
||||
"disallowed_tools": disallowed,
|
||||
|
||||
@@ -234,9 +234,9 @@ class TestPromptSupplement:
|
||||
for tool_name, tool in TOOL_REGISTRY.items():
|
||||
if not tool.is_available:
|
||||
continue
|
||||
assert (
|
||||
f"`{tool_name}`" in docs
|
||||
), f"Tool '{tool_name}' missing from baseline supplement"
|
||||
assert f"`{tool_name}`" in docs, (
|
||||
f"Tool '{tool_name}' missing from baseline supplement"
|
||||
)
|
||||
|
||||
def test_pause_task_scheduled_before_transcript_upload(self):
|
||||
"""Pause is scheduled as a background task before transcript upload begins.
|
||||
@@ -656,3 +656,76 @@ class TestSafeCloseSdkClient:
|
||||
client.__aexit__ = AsyncMock(side_effect=ValueError("invalid argument"))
|
||||
with pytest.raises(ValueError, match="invalid argument"):
|
||||
await _safe_close_sdk_client(client, "[test]")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SystemPromptPreset — cross-user prompt caching
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSystemPromptPreset:
|
||||
"""Tests for SystemPromptPreset construction with exclude_dynamic_sections."""
|
||||
|
||||
def _make_config(self, exclude: bool, monkeypatch, _clean_config_env):
|
||||
from backend.copilot import config as cfg_mod
|
||||
|
||||
return cfg_mod.ChatConfig(
|
||||
use_openrouter=False,
|
||||
api_key=None,
|
||||
base_url=None,
|
||||
use_claude_code_subscription=False,
|
||||
claude_agent_exclude_dynamic_sections=exclude,
|
||||
)
|
||||
|
||||
def test_preset_dict_structure_when_enabled(self, monkeypatch, _clean_config_env):
|
||||
"""When exclude_dynamic_sections is True, system_prompt should be a
|
||||
SystemPromptPreset dict with the correct keys."""
|
||||
cfg = self._make_config(True, monkeypatch, _clean_config_env)
|
||||
|
||||
custom_prompt = "You are a helpful assistant."
|
||||
if cfg.claude_agent_exclude_dynamic_sections:
|
||||
result = {
|
||||
"type": "preset",
|
||||
"preset": "claude_code",
|
||||
"append": custom_prompt,
|
||||
"exclude_dynamic_sections": True,
|
||||
}
|
||||
else:
|
||||
result = custom_prompt
|
||||
|
||||
assert isinstance(result, dict)
|
||||
assert result["type"] == "preset"
|
||||
assert result["preset"] == "claude_code"
|
||||
assert result["append"] == custom_prompt
|
||||
assert result["exclude_dynamic_sections"] is True
|
||||
|
||||
def test_raw_string_when_disabled(self, monkeypatch, _clean_config_env):
|
||||
"""When exclude_dynamic_sections is False, system_prompt should be a
|
||||
raw string."""
|
||||
cfg = self._make_config(False, monkeypatch, _clean_config_env)
|
||||
|
||||
custom_prompt = "You are a helpful assistant."
|
||||
if cfg.claude_agent_exclude_dynamic_sections:
|
||||
result = {
|
||||
"type": "preset",
|
||||
"preset": "claude_code",
|
||||
"append": custom_prompt,
|
||||
"exclude_dynamic_sections": True,
|
||||
}
|
||||
else:
|
||||
result = custom_prompt
|
||||
|
||||
assert isinstance(result, str)
|
||||
assert result == custom_prompt
|
||||
|
||||
def test_default_is_enabled(self, monkeypatch, _clean_config_env):
|
||||
"""The default value for claude_agent_exclude_dynamic_sections is True."""
|
||||
from backend.copilot import config as cfg_mod
|
||||
|
||||
cfg = cfg_mod.ChatConfig(
|
||||
use_openrouter=False,
|
||||
api_key=None,
|
||||
base_url=None,
|
||||
use_claude_code_subscription=False,
|
||||
)
|
||||
assert cfg.claude_agent_exclude_dynamic_sections is True
|
||||
|
||||
Reference in New Issue
Block a user