From dcc64d51c9441c57366c77fe48b5362f8c581b68 Mon Sep 17 00:00:00 2001
From: Bentlybro <Github@bentlybro.com>
Date: Tue, 27 Jan 2026 09:13:24 +0000
Subject: [PATCH] Improve chat message summarization and token limit handling

Removes the minimum message threshold for summarization, allowing any non-empty set of old messages to be summarized when the token count exceeds 120,000. Adds a fallback mechanism to progressively reduce the number of recent messages kept if the token count remains too high after summarization, with appropriate logging for each step and edge case.
---
 .../backend/api/features/chat/service.py      | 66 +++++++++++++++++--
 1 file changed, 60 insertions(+), 6 deletions(-)

diff --git a/autogpt_platform/backend/backend/api/features/chat/service.py b/autogpt_platform/backend/backend/api/features/chat/service.py
index 0f09b9e7d5..dc4bc095c3 100644
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -805,7 +805,6 @@ async def _stream_chat_chunks(
         # If over threshold, summarize old messages
         if token_count > 120_000:
             KEEP_RECENT = 15
-            MIN_MESSAGES_TO_SUMMARIZE = 5  # Don't summarize if too few old messages
 
             # Check if we have a system prompt at the start
             has_system_prompt = (
@@ -825,8 +824,9 @@ async def _stream_chat_chunks(
                     system_msg = None
                     old_messages_dict = messages_dict[:-KEEP_RECENT]
 
-                # Only summarize if we have enough old messages
-                if len(old_messages_dict) >= MIN_MESSAGES_TO_SUMMARIZE:
+                # Summarize any non-empty old messages (no minimum threshold)
+                # If we're over the token limit, we need to compress whatever we can
+                if old_messages_dict:
                     # Summarize old messages
                     summary_text = await _summarize_messages(
                         old_messages_dict,
@@ -861,10 +861,64 @@ async def _stream_chat_chunks(
                         f"summarized {len(old_messages_dict)} old messages, "
                         f"kept last {KEEP_RECENT} messages"
                     )
+
+                    # Fallback: If still over limit after summarization, progressively drop recent messages
+                    # This handles edge cases where recent messages are extremely large
+                    new_messages_dict = []
+                    for msg in messages:
+                        if isinstance(msg, dict):
+                            msg_dict = {k: v for k, v in msg.items() if v is not None}
+                        else:
+                            msg_dict = dict(msg)
+                        new_messages_dict.append(msg_dict)
+
+                    new_token_count = estimate_token_count(
+                        new_messages_dict, model=token_count_model
+                    )
+
+                    if new_token_count > 120_000:
+                        # Still over limit - progressively reduce KEEP_RECENT
+                        logger.warning(
+                            f"Still over limit after summarization: {new_token_count} tokens. "
+                            "Reducing number of recent messages kept."
+                        )
+
+                        for keep_count in [12, 10, 8, 5]:
+                            recent_messages = messages[-keep_count:]
+                            if has_system_prompt:
+                                messages = [system_msg, summary_msg] + recent_messages
+                            else:
+                                messages = [summary_msg] + recent_messages
+
+                            new_messages_dict = []
+                            for msg in messages:
+                                if isinstance(msg, dict):
+                                    msg_dict = {
+                                        k: v for k, v in msg.items() if v is not None
+                                    }
+                                else:
+                                    msg_dict = dict(msg)
+                                new_messages_dict.append(msg_dict)
+
+                            new_token_count = estimate_token_count(
+                                new_messages_dict, model=token_count_model
+                            )
+
+                            if new_token_count <= 120_000:
+                                logger.info(
+                                    f"Reduced to {keep_count} recent messages, "
+                                    f"now {new_token_count} tokens"
+                                )
+                                break
+                        else:
+                            logger.error(
+                                f"Unable to reduce token count below threshold even with 5 messages. "
+                                f"Final count: {new_token_count} tokens"
+                            )
                 else:
-                    logger.info(
-                        f"Skipping summarization: only {len(old_messages_dict)} old messages "
-                        f"(minimum {MIN_MESSAGES_TO_SUMMARIZE} required)"
+                    logger.warning(
+                        f"Token count {token_count} exceeds threshold but no old messages to summarize. "
+                        f"This may indicate recent messages are too large."
                     )
 
     except Exception as e: