From 90dfed68afba3bec6a4ddd897620fb2292103c45 Mon Sep 17 00:00:00 2001 From: Bentlybro Date: Mon, 26 Jan 2026 20:05:42 +0000 Subject: [PATCH] Improve chat context summarization logic Added timeout parameter to summarization client and limited conversation text length for safety. Enhanced message summarization to handle system prompts, avoid summarizing when too few old messages, and improved logging for summarization actions. --- .../backend/api/features/chat/service.py | 83 +++++++++++++------ 1 file changed, 59 insertions(+), 24 deletions(-) diff --git a/autogpt_platform/backend/backend/api/features/chat/service.py b/autogpt_platform/backend/backend/api/features/chat/service.py index 8e8a549f86..5ecd2c70d1 100644 --- a/autogpt_platform/backend/backend/api/features/chat/service.py +++ b/autogpt_platform/backend/backend/api/features/chat/service.py @@ -678,6 +678,7 @@ async def _summarize_messages( model: str = "openai/gpt-4o-mini", api_key: str | None = None, base_url: str | None = None, + timeout: float = 30.0, ) -> str: """Summarize a list of messages into concise context. @@ -686,6 +687,7 @@ async def _summarize_messages( model: Model to use for summarization (default: gpt-4o-mini) api_key: API key for OpenAI client base_url: Base URL for OpenAI client + timeout: Request timeout in seconds (default: 30.0) Returns: Summarized text @@ -700,10 +702,18 @@ async def _summarize_messages( conversation_text = "\n\n".join(conversation) + # Truncate conversation to fit within summarization model's context + # gpt-4o-mini has 128k context, but we limit to ~25k tokens (~100k chars) for safety + MAX_CHARS = 100_000 + if len(conversation_text) > MAX_CHARS: + conversation_text = conversation_text[:MAX_CHARS] + "\n\n[truncated]" + # Call LLM to summarize import openai - summarization_client = openai.AsyncOpenAI(api_key=api_key, base_url=base_url) + summarization_client = openai.AsyncOpenAI( + api_key=api_key, base_url=base_url, timeout=timeout + ) response = await summarization_client.chat.completions.create( model=model, @@ -784,37 +794,62 @@ async def _stream_chat_chunks( # If over threshold, summarize old messages if token_count > 120_000: KEEP_RECENT = 15 + MIN_MESSAGES_TO_SUMMARIZE = 5 # Don't summarize if too few old messages + + # Check if we have a system prompt at the start + has_system_prompt = ( + len(messages) > 0 and messages[0].get("role") == "system" + ) if len(messages) > KEEP_RECENT: - # Split messages + # Split messages based on whether system prompt exists recent_messages = messages[-KEEP_RECENT:] - old_messages_dict = messages_dict[ - 1:-KEEP_RECENT - ] # Exclude system prompt and recent - # Summarize old messages - summary_text = await _summarize_messages( - old_messages_dict, - model="openai/gpt-4o-mini", - api_key=config.api_key, - base_url=config.base_url, - ) + if has_system_prompt: + # Keep system prompt separate, summarize everything between system and recent + system_msg = messages[0] + old_messages_dict = messages_dict[1:-KEEP_RECENT] + else: + # No system prompt, summarize everything except recent + system_msg = None + old_messages_dict = messages_dict[:-KEEP_RECENT] - # Build new message list - from openai.types.chat import ChatCompletionSystemMessageParam + # Only summarize if we have enough old messages + if len(old_messages_dict) >= MIN_MESSAGES_TO_SUMMARIZE: + # Summarize old messages + summary_text = await _summarize_messages( + old_messages_dict, + model="openai/gpt-4o-mini", + api_key=config.api_key, + base_url=config.base_url, + ) - summary_msg = ChatCompletionSystemMessageParam( - role="system", - content=f"[Previous conversation summary]: {summary_text}", - ) + # Build new message list + from openai.types.chat import ChatCompletionSystemMessageParam - # Rebuild: system_prompt + summary + recent_messages - messages = [messages[0], summary_msg] + recent_messages + summary_msg = ChatCompletionSystemMessageParam( + role="system", + content=f"[Previous conversation summary]: {summary_text}", + ) - logger.info( - f"Context summarized: {token_count} tokens, " - f"kept last {KEEP_RECENT} messages + summary" - ) + # Rebuild messages based on whether we have a system prompt + if has_system_prompt: + # system_prompt + summary + recent_messages + messages = [system_msg, summary_msg] + recent_messages + else: + # summary + recent_messages (no original system prompt) + messages = [summary_msg] + recent_messages + + logger.info( + f"Context summarized: {token_count} tokens, " + f"summarized {len(old_messages_dict)} old messages, " + f"kept last {KEEP_RECENT} messages" + ) + else: + logger.info( + f"Skipping summarization: only {len(old_messages_dict)} old messages " + f"(minimum {MIN_MESSAGES_TO_SUMMARIZE} required)" + ) except Exception as e: logger.error(f"Context summarization failed: {e}", exc_info=True)