Improve chat message summarization and token limit handling

Removes the minimum message threshold for summarization, allowing any non-empty set of old messages to be summarized when the token count exceeds 120,000. Adds a fallback mechanism to progressively reduce the number of recent messages kept if the token count remains too high after summarization, with appropriate logging for each step and edge case.
2026-02-07 13:25:01 -05:00 · 2026-01-27 09:13:24 +00:00
parent 759c5b428f
commit dcc64d51c9
1 changed files with 60 additions and 6 deletions
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -805,7 +805,6 @@ async def _stream_chat_chunks(
        # If over threshold, summarize old messages
        if token_count > 120_000:
            KEEP_RECENT = 15
-            MIN_MESSAGES_TO_SUMMARIZE = 5  # Don't summarize if too few old messages

            # Check if we have a system prompt at the start
            has_system_prompt = (
@@ -825,8 +824,9 @@ async def _stream_chat_chunks(
                    system_msg = None
                    old_messages_dict = messages_dict[:-KEEP_RECENT]

-                # Only summarize if we have enough old messages
-                if len(old_messages_dict) >= MIN_MESSAGES_TO_SUMMARIZE:
+                # Summarize any non-empty old messages (no minimum threshold)
+                # If we're over the token limit, we need to compress whatever we can
+                if old_messages_dict:
                    # Summarize old messages
                    summary_text = await _summarize_messages(
                        old_messages_dict,
@@ -861,10 +861,64 @@ async def _stream_chat_chunks(
                        f"summarized {len(old_messages_dict)} old messages, "
                        f"kept last {KEEP_RECENT} messages"
                    )
+
+                    # Fallback: If still over limit after summarization, progressively drop recent messages
+                    # This handles edge cases where recent messages are extremely large
+                    new_messages_dict = []
+                    for msg in messages:
+                        if isinstance(msg, dict):
+                            msg_dict = {k: v for k, v in msg.items() if v is not None}
+                        else:
+                            msg_dict = dict(msg)
+                        new_messages_dict.append(msg_dict)
+
+                    new_token_count = estimate_token_count(
+                        new_messages_dict, model=token_count_model
+                    )
+
+                    if new_token_count > 120_000:
+                        # Still over limit - progressively reduce KEEP_RECENT
+                        logger.warning(
+                            f"Still over limit after summarization: {new_token_count} tokens. "
+                            "Reducing number of recent messages kept."
+                        )
+
+                        for keep_count in [12, 10, 8, 5]:
+                            recent_messages = messages[-keep_count:]
+                            if has_system_prompt:
+                                messages = [system_msg, summary_msg] + recent_messages
+                            else:
+                                messages = [summary_msg] + recent_messages
+
+                            new_messages_dict = []
+                            for msg in messages:
+                                if isinstance(msg, dict):
+                                    msg_dict = {
+                                        k: v for k, v in msg.items() if v is not None
+                                    }
+                                else:
+                                    msg_dict = dict(msg)
+                                new_messages_dict.append(msg_dict)
+
+                            new_token_count = estimate_token_count(
+                                new_messages_dict, model=token_count_model
+                            )
+
+                            if new_token_count <= 120_000:
+                                logger.info(
+                                    f"Reduced to {keep_count} recent messages, "
+                                    f"now {new_token_count} tokens"
+                                )
+                                break
+                        else:
+                            logger.error(
+                                f"Unable to reduce token count below threshold even with 5 messages. "
+                                f"Final count: {new_token_count} tokens"
+                            )
                else:
-                    logger.info(
-                        f"Skipping summarization: only {len(old_messages_dict)} old messages "
-                        f"(minimum {MIN_MESSAGES_TO_SUMMARIZE} required)"
+                    logger.warning(
+                        f"Token count {token_count} exceeds threshold but no old messages to summarize. "
+                        f"This may indicate recent messages are too large."
                    )

    except Exception as e: