From da5b0e667e505f903a5d1bacbcfeed62526f9b58 Mon Sep 17 00:00:00 2001
From: Bentlybro <Github@bentlybro.com>
Date: Tue, 27 Jan 2026 09:52:20 +0000
Subject: [PATCH] Improve token counting fallback for non-OpenAI models

Expanded the token counting logic to use the gpt-4o tokenizer for Claude and other non-OpenAI models, and added error handling to fall back to gpt-4o if token counting fails. This ensures more robust and consistent token estimation across various LLM providers.
---
 .../backend/api/features/chat/service.py      | 20 +++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/autogpt_platform/backend/backend/api/features/chat/service.py b/autogpt_platform/backend/backend/api/features/chat/service.py
index dc4bc095c3..6a04f9d822 100644
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -795,12 +795,24 @@ async def _stream_chat_chunks(
             # Strip provider prefix (e.g., "anthropic/claude-opus-4.5" -> "claude-opus-4.5")
             token_count_model = model.split("/")[-1]
 
-        # For Claude models, approximate with gpt-4o tokenizer
-        # Claude and GPT-4 have similar tokenization (~1 token per 4 chars)
-        if "claude" in token_count_model.lower():
+        # For Claude and other non-OpenAI models, approximate with gpt-4o tokenizer
+        # Most modern LLMs have similar tokenization (~1 token per 4 chars)
+        if "claude" in token_count_model.lower() or not any(
+            known in token_count_model.lower()
+            for known in ["gpt", "o1", "chatgpt", "text-"]
+        ):
             token_count_model = "gpt-4o"
 
-        token_count = estimate_token_count(messages_dict, model=token_count_model)
+        # Attempt token counting with error handling
+        try:
+            token_count = estimate_token_count(messages_dict, model=token_count_model)
+        except Exception as token_error:
+            # If token counting fails, use gpt-4o as fallback approximation
+            logger.warning(
+                f"Token counting failed for model {token_count_model}: {token_error}. "
+                "Using gpt-4o approximation."
+            )
+            token_count = estimate_token_count(messages_dict, model="gpt-4o")
 
         # If over threshold, summarize old messages
         if token_count > 120_000: