From 17853cd5bdf25b75f8a2fcde9a6c7d5446c308c5 Mon Sep 17 00:00:00 2001
From: Graham Neubig <neubig@gmail.com>
Date: Sun, 29 Jun 2025 21:57:34 -0400
Subject: [PATCH] Change default max_output_tokens to None and add
 comprehensive model tests (#9366)

Co-authored-by: openhands <openhands@all-hands.dev>
---
 openhands/llm/llm.py       | 39 ++++++++--------
 openhands/memory/memory.py |  1 -
 tests/unit/test_llm.py     | 95 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 111 insertions(+), 24 deletions(-)

diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index 382cfacf42..ca6c656a89 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -482,24 +482,26 @@ class LLM(RetryMixin, DebugMixin):
             )
             self.config.top_p = 0.9 if self.config.top_p == 1 else self.config.top_p
 
-        # Set the max tokens in an LM-specific way if not set
-        if self.config.max_input_tokens is None:
-            if (
-                self.model_info is not None
-                and 'max_input_tokens' in self.model_info
-                and isinstance(self.model_info['max_input_tokens'], int)
-            ):
-                self.config.max_input_tokens = self.model_info['max_input_tokens']
-            else:
-                # Safe fallback for any potentially viable model
-                self.config.max_input_tokens = 4096
+        # Set max_input_tokens from model info if not explicitly set
+        if (
+            self.config.max_input_tokens is None
+            and self.model_info is not None
+            and 'max_input_tokens' in self.model_info
+            and isinstance(self.model_info['max_input_tokens'], int)
+        ):
+            self.config.max_input_tokens = self.model_info['max_input_tokens']
 
+        # Set max_output_tokens from model info if not explicitly set
         if self.config.max_output_tokens is None:
-            # Safe default for any potentially viable model
-            self.config.max_output_tokens = 4096
-            if self.model_info is not None:
-                # max_output_tokens has precedence over max_tokens, if either exists.
-                # litellm has models with both, one or none of these 2 parameters!
+            # Special case for Claude 3.7 Sonnet models
+            if any(
+                model in self.config.model
+                for model in ['claude-3-7-sonnet', 'claude-3.7-sonnet']
+            ):
+                self.config.max_output_tokens = 64000  # litellm set max to 128k, but that requires a header to be set
+            # Try to get from model info
+            elif self.model_info is not None:
+                # max_output_tokens has precedence over max_tokens
                 if 'max_output_tokens' in self.model_info and isinstance(
                     self.model_info['max_output_tokens'], int
                 ):
@@ -508,11 +510,6 @@ class LLM(RetryMixin, DebugMixin):
                     self.model_info['max_tokens'], int
                 ):
                     self.config.max_output_tokens = self.model_info['max_tokens']
-            if any(
-                model in self.config.model
-                for model in ['claude-3-7-sonnet', 'claude-3.7-sonnet']
-            ):
-                self.config.max_output_tokens = 64000  # litellm set max to 128k, but that requires a header to be set
 
         # Initialize function calling capability
         # Check if model name is in our supported list
diff --git a/openhands/memory/memory.py b/openhands/memory/memory.py
index 13e58cbeae..63b5193cc1 100644
--- a/openhands/memory/memory.py
+++ b/openhands/memory/memory.py
@@ -296,7 +296,6 @@ class Memory:
                 self.knowledge_microagents[name] = agent_knowledge
             for name, agent_repo in repo_agents.items():
                 self.repo_microagents[name] = agent_repo
-
         except Exception as e:
             logger.warning(
                 f'Failed to load user microagents from {USER_MICROAGENTS_DIR}: {str(e)}'
diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py
index 77c24f7da1..8b5e106ce3 100644
--- a/tests/unit/test_llm.py
+++ b/tests/unit/test_llm.py
@@ -140,8 +140,8 @@ def test_llm_init_without_model_info(mock_get_model_info, default_config):
     mock_get_model_info.side_effect = Exception('Model info not available')
     llm = LLM(default_config)
     llm.init_model_info()
-    assert llm.config.max_input_tokens == 4096
-    assert llm.config.max_output_tokens == 4096
+    assert llm.config.max_input_tokens is None
+    assert llm.config.max_output_tokens is None
 
 
 def test_llm_init_with_custom_config():
@@ -981,3 +981,94 @@ def test_llm_base_url_auto_protocol_patch(mock_get):
 
     called_url = mock_get.call_args[0][0]
     assert called_url.startswith('http://') or called_url.startswith('https://')
+
+
+# Tests for max_output_tokens configuration and usage
+
+
+def test_unknown_model_token_limits():
+    """Test that models without known token limits get None for both max_output_tokens and max_input_tokens."""
+    # Create LLM instance with a non-existent model to avoid litellm having model info for it
+    config = LLMConfig(model='non-existent-model', api_key='test_key')
+    llm = LLM(config)
+
+    # Verify max_output_tokens and max_input_tokens are initialized to None (default value)
+    assert llm.config.max_output_tokens is None
+    assert llm.config.max_input_tokens is None
+
+
+def test_max_tokens_from_model_info():
+    """Test that max_output_tokens and max_input_tokens are correctly initialized from model info."""
+    # Create LLM instance with GPT-4 model which has known token limits
+    config = LLMConfig(model='gpt-4', api_key='test_key')
+    llm = LLM(config)
+
+    # GPT-4 has specific token limits
+    # These are the expected values from litellm
+    assert llm.config.max_output_tokens == 4096
+    assert llm.config.max_input_tokens == 8192
+
+
+def test_claude_3_7_sonnet_max_output_tokens():
+    """Test that Claude 3.7 Sonnet models get the special 64000 max_output_tokens value and default max_input_tokens."""
+    # Create LLM instance with Claude 3.7 Sonnet model
+    config = LLMConfig(model='claude-3-7-sonnet', api_key='test_key')
+    llm = LLM(config)
+
+    # Verify max_output_tokens is set to 64000 for Claude 3.7 Sonnet
+    assert llm.config.max_output_tokens == 64000
+    # Verify max_input_tokens is set to None (default value)
+    assert llm.config.max_input_tokens is None
+
+
+def test_claude_sonnet_4_max_output_tokens():
+    """Test that Claude Sonnet 4 models get the correct max_output_tokens and max_input_tokens values."""
+    # Create LLM instance with a Claude Sonnet 4 model
+    config = LLMConfig(model='claude-sonnet-4-20250514', api_key='test_key')
+    llm = LLM(config)
+
+    # Verify max_output_tokens is set to the expected value
+    assert llm.config.max_output_tokens == 64000
+    # Verify max_input_tokens is set to the expected value
+    # For Claude models, we expect a specific value from litellm
+    assert llm.config.max_input_tokens == 200000
+
+
+def test_sambanova_deepseek_model_max_output_tokens():
+    """Test that SambaNova DeepSeek-V3-0324 model gets the correct max_output_tokens value."""
+    # Create LLM instance with SambaNova DeepSeek model
+    config = LLMConfig(model='sambanova/DeepSeek-V3-0324', api_key='test_key')
+    llm = LLM(config)
+
+    # SambaNova DeepSeek model has specific token limits
+    # This is the expected value from litellm
+    assert llm.config.max_output_tokens == 32768
+
+
+def test_max_output_tokens_override_in_config():
+    """Test that max_output_tokens can be overridden in the config."""
+    # Create LLM instance with minimal config and overridden max_output_tokens
+    config = LLMConfig(
+        model='claude-sonnet-4-20250514', api_key='test_key', max_output_tokens=2048
+    )
+    llm = LLM(config)
+
+    # Verify the config has the overridden max_output_tokens value
+    assert llm.config.max_output_tokens == 2048
+
+
+def test_azure_model_default_max_tokens():
+    """Test that Azure models have the default max_output_tokens value."""
+    # Create minimal config for Azure model (without specifying max_output_tokens)
+    azure_config = LLMConfig(
+        model='azure/non-existent-model',  # Use a non-existent model to avoid litellm having model info for it
+        api_key='test_key',
+        base_url='https://test.openai.azure.com/',
+        api_version='2024-12-01-preview',
+    )
+
+    # Create LLM instance with Azure model
+    llm = LLM(azure_config)
+
+    # Verify the config has the default max_output_tokens value
+    assert llm.config.max_output_tokens is None  # Default value