mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-02-06 04:45:10 -05:00
feat(chat/sdk): Enable native SDK context compaction
- Remove manual truncation in conversation history formatting - SDK's automatic compaction handles context limits intelligently - Add observability hooks: - PreCompact: Log when SDK triggers context compaction - PostToolUse: Log successful tool executions - PostToolUseFailure: Log and debug failed tool executions - Update config: increase max_context_messages (SDK handles compaction)
This commit is contained in:
@@ -27,12 +27,20 @@ class ChatConfig(BaseSettings):
|
||||
session_ttl: int = Field(default=43200, description="Session TTL in seconds")
|
||||
|
||||
# Streaming Configuration
|
||||
# Note: When using Claude Agent SDK, context management is handled automatically
|
||||
# via the SDK's built-in compaction. This is mainly used for the fallback path.
|
||||
max_context_messages: int = Field(
|
||||
default=50, ge=1, le=200, description="Maximum context messages"
|
||||
default=100,
|
||||
ge=1,
|
||||
le=500,
|
||||
description="Max context messages (SDK handles compaction automatically)",
|
||||
)
|
||||
|
||||
stream_timeout: int = Field(default=300, description="Stream timeout in seconds")
|
||||
max_retries: int = Field(default=3, description="Maximum number of retries")
|
||||
max_retries: int = Field(
|
||||
default=3,
|
||||
description="Max retries for fallback path (SDK handles retries internally)",
|
||||
)
|
||||
max_agent_runs: int = Field(default=30, description="Maximum number of agent runs")
|
||||
max_agent_schedules: int = Field(
|
||||
default=30, description="Maximum number of agent schedules"
|
||||
|
||||
@@ -109,6 +109,12 @@ def _validate_user_isolation(
|
||||
def create_security_hooks(user_id: str | None) -> dict[str, Any]:
|
||||
"""Create the security hooks configuration for Claude Agent SDK.
|
||||
|
||||
Includes security validation and observability hooks:
|
||||
- PreToolUse: Security validation before tool execution
|
||||
- PostToolUse: Log successful tool executions
|
||||
- PostToolUseFailure: Log and handle failed tool executions
|
||||
- PreCompact: Log context compaction events (SDK handles compaction automatically)
|
||||
|
||||
Args:
|
||||
user_id: Current user ID for isolation validation
|
||||
|
||||
@@ -126,7 +132,6 @@ def create_security_hooks(user_id: str | None) -> dict[str, Any]:
|
||||
) -> SyncHookJSONOutput:
|
||||
"""Combined pre-tool-use validation hook."""
|
||||
_ = context # unused but required by signature
|
||||
# Extract tool info from the typed input
|
||||
tool_name = cast(str, input_data.get("tool_name", ""))
|
||||
tool_input = cast(dict[str, Any], input_data.get("tool_input", {}))
|
||||
|
||||
@@ -140,21 +145,59 @@ def create_security_hooks(user_id: str | None) -> dict[str, Any]:
|
||||
if result:
|
||||
return cast(SyncHookJSONOutput, result)
|
||||
|
||||
# Log the usage
|
||||
logger.debug(
|
||||
f"[SDK Audit] Tool call: tool={tool_name}, "
|
||||
logger.debug(f"[SDK] Tool start: {tool_name}, user={user_id}")
|
||||
return cast(SyncHookJSONOutput, {})
|
||||
|
||||
async def post_tool_use_hook(
|
||||
input_data: HookInput,
|
||||
tool_use_id: str | None,
|
||||
context: HookContext,
|
||||
) -> SyncHookJSONOutput:
|
||||
"""Log successful tool executions for observability."""
|
||||
_ = context
|
||||
tool_name = cast(str, input_data.get("tool_name", ""))
|
||||
logger.debug(f"[SDK] Tool success: {tool_name}, tool_use_id={tool_use_id}")
|
||||
return cast(SyncHookJSONOutput, {})
|
||||
|
||||
async def post_tool_failure_hook(
|
||||
input_data: HookInput,
|
||||
tool_use_id: str | None,
|
||||
context: HookContext,
|
||||
) -> SyncHookJSONOutput:
|
||||
"""Log failed tool executions for debugging."""
|
||||
_ = context
|
||||
tool_name = cast(str, input_data.get("tool_name", ""))
|
||||
error = input_data.get("error", "Unknown error")
|
||||
logger.warning(
|
||||
f"[SDK] Tool failed: {tool_name}, error={error}, "
|
||||
f"user={user_id}, tool_use_id={tool_use_id}"
|
||||
)
|
||||
return cast(SyncHookJSONOutput, {})
|
||||
|
||||
async def pre_compact_hook(
|
||||
input_data: HookInput,
|
||||
tool_use_id: str | None,
|
||||
context: HookContext,
|
||||
) -> SyncHookJSONOutput:
|
||||
"""Log when SDK triggers context compaction.
|
||||
|
||||
The SDK automatically compacts conversation history when it grows too large.
|
||||
This hook provides visibility into when compaction happens.
|
||||
"""
|
||||
_ = context, tool_use_id
|
||||
trigger = input_data.get("trigger", "auto")
|
||||
logger.info(
|
||||
f"[SDK] Context compaction triggered: {trigger}, user={user_id}"
|
||||
)
|
||||
return cast(SyncHookJSONOutput, {})
|
||||
|
||||
return {
|
||||
"PreToolUse": [
|
||||
HookMatcher(
|
||||
matcher="*",
|
||||
hooks=[pre_tool_use_hook],
|
||||
),
|
||||
"PreToolUse": [HookMatcher(matcher="*", hooks=[pre_tool_use_hook])],
|
||||
"PostToolUse": [HookMatcher(matcher="*", hooks=[post_tool_use_hook])],
|
||||
"PostToolUseFailure": [
|
||||
HookMatcher(matcher="*", hooks=[post_tool_failure_hook])
|
||||
],
|
||||
"PreCompact": [HookMatcher(matcher="*", hooks=[pre_compact_hook])],
|
||||
}
|
||||
except ImportError:
|
||||
# Fallback for when SDK isn't available - return empty hooks
|
||||
|
||||
@@ -137,8 +137,8 @@ async def _build_system_prompt(
|
||||
def _format_conversation_history(session: ChatSession) -> str:
|
||||
"""Format conversation history as a prompt context.
|
||||
|
||||
The Claude Agent SDK doesn't support replaying full conversation history,
|
||||
so we include it as context in the prompt.
|
||||
The SDK handles context compaction automatically, so we pass full history
|
||||
without manual truncation. The SDK will intelligently summarize if needed.
|
||||
"""
|
||||
if not session.messages:
|
||||
return ""
|
||||
@@ -148,30 +148,23 @@ def _format_conversation_history(session: ChatSession) -> str:
|
||||
if not messages:
|
||||
return ""
|
||||
|
||||
history_parts = []
|
||||
history_parts.append("<conversation_history>")
|
||||
history_parts = ["<conversation_history>"]
|
||||
|
||||
for msg in messages:
|
||||
if msg.role == "user":
|
||||
history_parts.append(f"User: {msg.content or ''}")
|
||||
elif msg.role == "assistant":
|
||||
content = msg.content or ""
|
||||
# Truncate long assistant responses
|
||||
if len(content) > 500:
|
||||
content = content[:500] + "..."
|
||||
history_parts.append(f"Assistant: {content}")
|
||||
# Include tool calls summary if any
|
||||
# Pass full content - SDK handles compaction automatically
|
||||
history_parts.append(f"Assistant: {msg.content or ''}")
|
||||
if msg.tool_calls:
|
||||
for tc in msg.tool_calls:
|
||||
func = tc.get("function", {})
|
||||
tool_name = func.get("name", "unknown")
|
||||
history_parts.append(f" [Called tool: {tool_name}]")
|
||||
history_parts.append(
|
||||
f" [Called tool: {func.get('name', 'unknown')}]"
|
||||
)
|
||||
elif msg.role == "tool":
|
||||
# Summarize tool results
|
||||
result = msg.content or ""
|
||||
if len(result) > 200:
|
||||
result = result[:200] + "..."
|
||||
history_parts.append(f" [Tool result: {result}]")
|
||||
# Pass full tool results - SDK handles compaction
|
||||
history_parts.append(f" [Tool result: {msg.content or ''}]")
|
||||
|
||||
history_parts.append("</conversation_history>")
|
||||
history_parts.append("")
|
||||
|
||||
Reference in New Issue
Block a user