feat(chat/sdk): Enable native SDK context compaction

- Remove manual truncation in conversation history formatting
- SDK's automatic compaction handles context limits intelligently
- Add observability hooks:
  - PreCompact: Log when SDK triggers context compaction
  - PostToolUse: Log successful tool executions
  - PostToolUseFailure: Log and debug failed tool executions
- Update config: increase max_context_messages (SDK handles compaction)
This commit is contained in:
Zamil Majdy
2026-02-06 12:44:48 +04:00
parent 32ee7e6cf8
commit 452544530d
3 changed files with 72 additions and 28 deletions

View File

@@ -27,12 +27,20 @@ class ChatConfig(BaseSettings):
session_ttl: int = Field(default=43200, description="Session TTL in seconds")
# Streaming Configuration
# Note: When using Claude Agent SDK, context management is handled automatically
# via the SDK's built-in compaction. This is mainly used for the fallback path.
max_context_messages: int = Field(
default=50, ge=1, le=200, description="Maximum context messages"
default=100,
ge=1,
le=500,
description="Max context messages (SDK handles compaction automatically)",
)
stream_timeout: int = Field(default=300, description="Stream timeout in seconds")
max_retries: int = Field(default=3, description="Maximum number of retries")
max_retries: int = Field(
default=3,
description="Max retries for fallback path (SDK handles retries internally)",
)
max_agent_runs: int = Field(default=30, description="Maximum number of agent runs")
max_agent_schedules: int = Field(
default=30, description="Maximum number of agent schedules"

View File

@@ -109,6 +109,12 @@ def _validate_user_isolation(
def create_security_hooks(user_id: str | None) -> dict[str, Any]:
"""Create the security hooks configuration for Claude Agent SDK.
Includes security validation and observability hooks:
- PreToolUse: Security validation before tool execution
- PostToolUse: Log successful tool executions
- PostToolUseFailure: Log and handle failed tool executions
- PreCompact: Log context compaction events (SDK handles compaction automatically)
Args:
user_id: Current user ID for isolation validation
@@ -126,7 +132,6 @@ def create_security_hooks(user_id: str | None) -> dict[str, Any]:
) -> SyncHookJSONOutput:
"""Combined pre-tool-use validation hook."""
_ = context # unused but required by signature
# Extract tool info from the typed input
tool_name = cast(str, input_data.get("tool_name", ""))
tool_input = cast(dict[str, Any], input_data.get("tool_input", {}))
@@ -140,21 +145,59 @@ def create_security_hooks(user_id: str | None) -> dict[str, Any]:
if result:
return cast(SyncHookJSONOutput, result)
# Log the usage
logger.debug(
f"[SDK Audit] Tool call: tool={tool_name}, "
logger.debug(f"[SDK] Tool start: {tool_name}, user={user_id}")
return cast(SyncHookJSONOutput, {})
async def post_tool_use_hook(
input_data: HookInput,
tool_use_id: str | None,
context: HookContext,
) -> SyncHookJSONOutput:
"""Log successful tool executions for observability."""
_ = context
tool_name = cast(str, input_data.get("tool_name", ""))
logger.debug(f"[SDK] Tool success: {tool_name}, tool_use_id={tool_use_id}")
return cast(SyncHookJSONOutput, {})
async def post_tool_failure_hook(
input_data: HookInput,
tool_use_id: str | None,
context: HookContext,
) -> SyncHookJSONOutput:
"""Log failed tool executions for debugging."""
_ = context
tool_name = cast(str, input_data.get("tool_name", ""))
error = input_data.get("error", "Unknown error")
logger.warning(
f"[SDK] Tool failed: {tool_name}, error={error}, "
f"user={user_id}, tool_use_id={tool_use_id}"
)
return cast(SyncHookJSONOutput, {})
async def pre_compact_hook(
input_data: HookInput,
tool_use_id: str | None,
context: HookContext,
) -> SyncHookJSONOutput:
"""Log when SDK triggers context compaction.
The SDK automatically compacts conversation history when it grows too large.
This hook provides visibility into when compaction happens.
"""
_ = context, tool_use_id
trigger = input_data.get("trigger", "auto")
logger.info(
f"[SDK] Context compaction triggered: {trigger}, user={user_id}"
)
return cast(SyncHookJSONOutput, {})
return {
"PreToolUse": [
HookMatcher(
matcher="*",
hooks=[pre_tool_use_hook],
),
"PreToolUse": [HookMatcher(matcher="*", hooks=[pre_tool_use_hook])],
"PostToolUse": [HookMatcher(matcher="*", hooks=[post_tool_use_hook])],
"PostToolUseFailure": [
HookMatcher(matcher="*", hooks=[post_tool_failure_hook])
],
"PreCompact": [HookMatcher(matcher="*", hooks=[pre_compact_hook])],
}
except ImportError:
# Fallback for when SDK isn't available - return empty hooks

View File

@@ -137,8 +137,8 @@ async def _build_system_prompt(
def _format_conversation_history(session: ChatSession) -> str:
"""Format conversation history as a prompt context.
The Claude Agent SDK doesn't support replaying full conversation history,
so we include it as context in the prompt.
The SDK handles context compaction automatically, so we pass full history
without manual truncation. The SDK will intelligently summarize if needed.
"""
if not session.messages:
return ""
@@ -148,30 +148,23 @@ def _format_conversation_history(session: ChatSession) -> str:
if not messages:
return ""
history_parts = []
history_parts.append("<conversation_history>")
history_parts = ["<conversation_history>"]
for msg in messages:
if msg.role == "user":
history_parts.append(f"User: {msg.content or ''}")
elif msg.role == "assistant":
content = msg.content or ""
# Truncate long assistant responses
if len(content) > 500:
content = content[:500] + "..."
history_parts.append(f"Assistant: {content}")
# Include tool calls summary if any
# Pass full content - SDK handles compaction automatically
history_parts.append(f"Assistant: {msg.content or ''}")
if msg.tool_calls:
for tc in msg.tool_calls:
func = tc.get("function", {})
tool_name = func.get("name", "unknown")
history_parts.append(f" [Called tool: {tool_name}]")
history_parts.append(
f" [Called tool: {func.get('name', 'unknown')}]"
)
elif msg.role == "tool":
# Summarize tool results
result = msg.content or ""
if len(result) > 200:
result = result[:200] + "..."
history_parts.append(f" [Tool result: {result}]")
# Pass full tool results - SDK handles compaction
history_parts.append(f" [Tool result: {msg.content or ''}]")
history_parts.append("</conversation_history>")
history_parts.append("")