fix(backend/chat): Remove manual context truncation from SDK path, let SDK handle compaction

2026-02-10 06:45:28 -05:00 · 2026-02-10 13:52:49 +04:00
parent 587e11c60a
commit 43153a12e0
2 changed files with 11 additions and 17 deletions
--- a/autogpt_platform/backend/backend/api/features/chat/config.py
+++ b/autogpt_platform/backend/backend/api/features/chat/config.py
@@ -27,13 +27,13 @@ class ChatConfig(BaseSettings):
    session_ttl: int = Field(default=43200, description="Session TTL in seconds")

    # Streaming Configuration
-    # Note: When using Claude Agent SDK, context management is handled automatically
-    # via the SDK's built-in compaction. This is mainly used for the fallback path.
+    # Only used by the non-SDK fallback path (Anthropic direct).
+    # The SDK path passes full history and relies on SDK's built-in compaction.
    max_context_messages: int = Field(
        default=100,
        ge=1,
        le=500,
-        description="Max context messages (SDK handles compaction automatically)",
+        description="Max context messages for non-SDK fallback path only",
    )

    stream_timeout: int = Field(default=300, description="Stream timeout in seconds")
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/service.py
@@ -142,8 +142,8 @@ async def _build_system_prompt(
 def _format_conversation_history(session: ChatSession) -> str:
    """Format conversation history as a prompt context.

-    The SDK handles context compaction automatically, but we apply
-    max_context_messages as a safety guard to limit initial prompt size.
+    Passes full history to the SDK — the SDK handles context compaction
+    automatically when the context window approaches its limit.
    """
    if not session.messages:
        return ""
@@ -153,19 +153,12 @@ def _format_conversation_history(session: ChatSession) -> str:
    if not messages:
        return ""

-    # Apply max_context_messages limit as a safety guard
-    # (SDK handles compaction, but this prevents excessively large initial prompts)
-    max_messages = config.max_context_messages
-    if len(messages) > max_messages:
-        messages = messages[-max_messages:]
-
    history_parts = ["<conversation_history>"]

    for msg in messages:
        if msg.role == "user":
            history_parts.append(f"User: {msg.content or ''}")
        elif msg.role == "assistant":
-            # Pass full content - SDK handles compaction automatically
            history_parts.append(f"Assistant: {msg.content or ''}")
            if msg.tool_calls:
                for tc in msg.tool_calls:
@@ -174,11 +167,7 @@ def _format_conversation_history(session: ChatSession) -> str:
                        f"  [Called tool: {func.get('name', 'unknown')}]"
                    )
        elif msg.role == "tool":
-            # Truncate large tool results to avoid blowing context window
-            tool_content = msg.content or ""
-            if len(tool_content) > 500:
-                tool_content = tool_content[:500] + "... (truncated)"
-            history_parts.append(f"  [Tool result: {tool_content}]")
+            history_parts.append(f"  [Tool result: {msg.content or ''}]")

    history_parts.append("</conversation_history>")
    history_parts.append("")
@@ -333,6 +322,11 @@ async def stream_chat_completion_sdk(
                else:
                    prompt = current_message

+                logger.info(
+                    f"[SDK] Prompt built: {len(prompt)} chars, "
+                    f"{len(session.messages)} messages in session"
+                )
+
                # Guard against empty prompts
                if not prompt.strip():
                    yield StreamError(