feat(chat/sdk): Enable native SDK context compaction

- Remove manual truncation in conversation history formatting - SDK's automatic compaction handles context limits intelligently - Add observability hooks: - PreCompact: Log when SDK triggers context compaction - PostToolUse: Log successful tool executions - PostToolUseFailure: Log and debug failed tool executions - Update config: increase max_context_messages (SDK handles compaction)
2026-02-06 04:45:10 -05:00 · 2026-02-06 12:44:48 +04:00
parent 32ee7e6cf8
commit 452544530d
3 changed files with 72 additions and 28 deletions
--- a/autogpt_platform/backend/backend/api/features/chat/config.py
+++ b/autogpt_platform/backend/backend/api/features/chat/config.py
@@ -27,12 +27,20 @@ class ChatConfig(BaseSettings):
    session_ttl: int = Field(default=43200, description="Session TTL in seconds")

    # Streaming Configuration
+    # Note: When using Claude Agent SDK, context management is handled automatically
+    # via the SDK's built-in compaction. This is mainly used for the fallback path.
    max_context_messages: int = Field(
-        default=50, ge=1, le=200, description="Maximum context messages"
+        default=100,
+        ge=1,
+        le=500,
+        description="Max context messages (SDK handles compaction automatically)",
    )

    stream_timeout: int = Field(default=300, description="Stream timeout in seconds")
-    max_retries: int = Field(default=3, description="Maximum number of retries")
+    max_retries: int = Field(
+        default=3,
+        description="Max retries for fallback path (SDK handles retries internally)",
+    )
    max_agent_runs: int = Field(default=30, description="Maximum number of agent runs")
    max_agent_schedules: int = Field(
        default=30, description="Maximum number of agent schedules"
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks.py
@@ -109,6 +109,12 @@ def _validate_user_isolation(
 def create_security_hooks(user_id: str | None) -> dict[str, Any]:
    """Create the security hooks configuration for Claude Agent SDK.

+    Includes security validation and observability hooks:
+    - PreToolUse: Security validation before tool execution
+    - PostToolUse: Log successful tool executions
+    - PostToolUseFailure: Log and handle failed tool executions
+    - PreCompact: Log context compaction events (SDK handles compaction automatically)
+
    Args:
        user_id: Current user ID for isolation validation

@@ -126,7 +132,6 @@ def create_security_hooks(user_id: str | None) -> dict[str, Any]:
        ) -> SyncHookJSONOutput:
            """Combined pre-tool-use validation hook."""
            _ = context  # unused but required by signature
-            # Extract tool info from the typed input
            tool_name = cast(str, input_data.get("tool_name", ""))
            tool_input = cast(dict[str, Any], input_data.get("tool_input", {}))

@@ -140,21 +145,59 @@ def create_security_hooks(user_id: str | None) -> dict[str, Any]:
            if result:
                return cast(SyncHookJSONOutput, result)

-            # Log the usage
-            logger.debug(
-                f"[SDK Audit] Tool call: tool={tool_name}, "
+            logger.debug(f"[SDK] Tool start: {tool_name}, user={user_id}")
+            return cast(SyncHookJSONOutput, {})
+
+        async def post_tool_use_hook(
+            input_data: HookInput,
+            tool_use_id: str | None,
+            context: HookContext,
+        ) -> SyncHookJSONOutput:
+            """Log successful tool executions for observability."""
+            _ = context
+            tool_name = cast(str, input_data.get("tool_name", ""))
+            logger.debug(f"[SDK] Tool success: {tool_name}, tool_use_id={tool_use_id}")
+            return cast(SyncHookJSONOutput, {})
+
+        async def post_tool_failure_hook(
+            input_data: HookInput,
+            tool_use_id: str | None,
+            context: HookContext,
+        ) -> SyncHookJSONOutput:
+            """Log failed tool executions for debugging."""
+            _ = context
+            tool_name = cast(str, input_data.get("tool_name", ""))
+            error = input_data.get("error", "Unknown error")
+            logger.warning(
+                f"[SDK] Tool failed: {tool_name}, error={error}, "
                f"user={user_id}, tool_use_id={tool_use_id}"
            )
+            return cast(SyncHookJSONOutput, {})

+        async def pre_compact_hook(
+            input_data: HookInput,
+            tool_use_id: str | None,
+            context: HookContext,
+        ) -> SyncHookJSONOutput:
+            """Log when SDK triggers context compaction.
+
+            The SDK automatically compacts conversation history when it grows too large.
+            This hook provides visibility into when compaction happens.
+            """
+            _ = context, tool_use_id
+            trigger = input_data.get("trigger", "auto")
+            logger.info(
+                f"[SDK] Context compaction triggered: {trigger}, user={user_id}"
+            )
            return cast(SyncHookJSONOutput, {})

        return {
-            "PreToolUse": [
-                HookMatcher(
-                    matcher="*",
-                    hooks=[pre_tool_use_hook],
-                ),
+            "PreToolUse": [HookMatcher(matcher="*", hooks=[pre_tool_use_hook])],
+            "PostToolUse": [HookMatcher(matcher="*", hooks=[post_tool_use_hook])],
+            "PostToolUseFailure": [
+                HookMatcher(matcher="*", hooks=[post_tool_failure_hook])
            ],
+            "PreCompact": [HookMatcher(matcher="*", hooks=[pre_compact_hook])],
        }
    except ImportError:
        # Fallback for when SDK isn't available - return empty hooks
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/service.py
@@ -137,8 +137,8 @@ async def _build_system_prompt(
 def _format_conversation_history(session: ChatSession) -> str:
    """Format conversation history as a prompt context.

-    The Claude Agent SDK doesn't support replaying full conversation history,
-    so we include it as context in the prompt.
+    The SDK handles context compaction automatically, so we pass full history
+    without manual truncation. The SDK will intelligently summarize if needed.
    """
    if not session.messages:
        return ""
@@ -148,30 +148,23 @@ def _format_conversation_history(session: ChatSession) -> str:
    if not messages:
        return ""

-    history_parts = []
-    history_parts.append("<conversation_history>")
+    history_parts = ["<conversation_history>"]

    for msg in messages:
        if msg.role == "user":
            history_parts.append(f"User: {msg.content or ''}")
        elif msg.role == "assistant":
-            content = msg.content or ""
-            # Truncate long assistant responses
-            if len(content) > 500:
-                content = content[:500] + "..."
-            history_parts.append(f"Assistant: {content}")
-            # Include tool calls summary if any
+            # Pass full content - SDK handles compaction automatically
+            history_parts.append(f"Assistant: {msg.content or ''}")
            if msg.tool_calls:
                for tc in msg.tool_calls:
                    func = tc.get("function", {})
-                    tool_name = func.get("name", "unknown")
-                    history_parts.append(f"  [Called tool: {tool_name}]")
+                    history_parts.append(
+                        f"  [Called tool: {func.get('name', 'unknown')}]"
+                    )
        elif msg.role == "tool":
-            # Summarize tool results
-            result = msg.content or ""
-            if len(result) > 200:
-                result = result[:200] + "..."
-            history_parts.append(f"  [Tool result: {result}]")
+            # Pass full tool results - SDK handles compaction
+            history_parts.append(f"  [Tool result: {msg.content or ''}]")

    history_parts.append("</conversation_history>")
    history_parts.append("")