diff --git a/autogpt_platform/backend/backend/api/features/chat/service.py b/autogpt_platform/backend/backend/api/features/chat/service.py index 6a04f9d822..f850caa3a4 100644 --- a/autogpt_platform/backend/backend/api/features/chat/service.py +++ b/autogpt_platform/backend/backend/api/features/chat/service.py @@ -928,11 +928,50 @@ async def _stream_chat_chunks( f"Final count: {new_token_count} tokens" ) else: + # No old messages to summarize - all messages are "recent" + # Apply progressive truncation to reduce token count logger.warning( f"Token count {token_count} exceeds threshold but no old messages to summarize. " - f"This may indicate recent messages are too large." + f"Applying progressive truncation to recent messages." ) + # Try progressively smaller keep counts + for keep_count in [12, 10, 8, 5]: + if len(messages) <= keep_count: + continue # Skip if we don't have enough messages + + recent_messages = messages[-keep_count:] + + if has_system_prompt: + messages = [messages[0]] + recent_messages + else: + messages = recent_messages + + new_messages_dict = [] + for msg in messages: + if isinstance(msg, dict): + msg_dict = {k: v for k, v in msg.items() if v is not None} + else: + msg_dict = dict(msg) + new_messages_dict.append(msg_dict) + + new_token_count = estimate_token_count( + new_messages_dict, model=token_count_model + ) + + if new_token_count <= 120_000: + logger.info( + f"Reduced to {keep_count} recent messages, " + f"now {new_token_count} tokens" + ) + break + else: + # Even with 5 messages still over limit + logger.error( + f"Unable to reduce token count below threshold even with 5 messages. " + f"Final count: {new_token_count} tokens. Messages may be extremely large." + ) + except Exception as e: logger.error(f"Context summarization failed: {e}", exc_info=True) # Continue with original messages (fallback)