From b5697efddbd69135cc74f6c4b2cf09c3b6fea62e Mon Sep 17 00:00:00 2001
From: Bentlybro <Github@bentlybro.com>
Date: Tue, 27 Jan 2026 09:54:30 +0000
Subject: [PATCH] Add progressive truncation for large recent chat messages

Implements a fallback mechanism that progressively truncates recent chat messages when the token count exceeds the threshold and there are no old messages to summarize. This helps prevent exceeding token limits by reducing the number of recent messages included in the context.
---
 .../backend/api/features/chat/service.py      | 41 ++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/autogpt_platform/backend/backend/api/features/chat/service.py b/autogpt_platform/backend/backend/api/features/chat/service.py
index 6a04f9d822..f850caa3a4 100644
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -928,11 +928,50 @@ async def _stream_chat_chunks(
                                 f"Final count: {new_token_count} tokens"
                             )
                 else:
+                    # No old messages to summarize - all messages are "recent"
+                    # Apply progressive truncation to reduce token count
                     logger.warning(
                         f"Token count {token_count} exceeds threshold but no old messages to summarize. "
-                        f"This may indicate recent messages are too large."
+                        f"Applying progressive truncation to recent messages."
                     )
 
+                    # Try progressively smaller keep counts
+                    for keep_count in [12, 10, 8, 5]:
+                        if len(messages) <= keep_count:
+                            continue  # Skip if we don't have enough messages
+
+                        recent_messages = messages[-keep_count:]
+
+                        if has_system_prompt:
+                            messages = [messages[0]] + recent_messages
+                        else:
+                            messages = recent_messages
+
+                        new_messages_dict = []
+                        for msg in messages:
+                            if isinstance(msg, dict):
+                                msg_dict = {k: v for k, v in msg.items() if v is not None}
+                            else:
+                                msg_dict = dict(msg)
+                            new_messages_dict.append(msg_dict)
+
+                        new_token_count = estimate_token_count(
+                            new_messages_dict, model=token_count_model
+                        )
+
+                        if new_token_count <= 120_000:
+                            logger.info(
+                                f"Reduced to {keep_count} recent messages, "
+                                f"now {new_token_count} tokens"
+                            )
+                            break
+                    else:
+                        # Even with 5 messages still over limit
+                        logger.error(
+                            f"Unable to reduce token count below threshold even with 5 messages. "
+                            f"Final count: {new_token_count} tokens. Messages may be extremely large."
+                        )
+
     except Exception as e:
         logger.error(f"Context summarization failed: {e}", exc_info=True)
         # Continue with original messages (fallback)