From 17853cd5bdf25b75f8a2fcde9a6c7d5446c308c5 Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Sun, 29 Jun 2025 21:57:34 -0400 Subject: [PATCH] Change default max_output_tokens to None and add comprehensive model tests (#9366) Co-authored-by: openhands --- openhands/llm/llm.py | 39 ++++++++-------- openhands/memory/memory.py | 1 - tests/unit/test_llm.py | 95 +++++++++++++++++++++++++++++++++++++- 3 files changed, 111 insertions(+), 24 deletions(-) diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 382cfacf42..ca6c656a89 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -482,24 +482,26 @@ class LLM(RetryMixin, DebugMixin): ) self.config.top_p = 0.9 if self.config.top_p == 1 else self.config.top_p - # Set the max tokens in an LM-specific way if not set - if self.config.max_input_tokens is None: - if ( - self.model_info is not None - and 'max_input_tokens' in self.model_info - and isinstance(self.model_info['max_input_tokens'], int) - ): - self.config.max_input_tokens = self.model_info['max_input_tokens'] - else: - # Safe fallback for any potentially viable model - self.config.max_input_tokens = 4096 + # Set max_input_tokens from model info if not explicitly set + if ( + self.config.max_input_tokens is None + and self.model_info is not None + and 'max_input_tokens' in self.model_info + and isinstance(self.model_info['max_input_tokens'], int) + ): + self.config.max_input_tokens = self.model_info['max_input_tokens'] + # Set max_output_tokens from model info if not explicitly set if self.config.max_output_tokens is None: - # Safe default for any potentially viable model - self.config.max_output_tokens = 4096 - if self.model_info is not None: - # max_output_tokens has precedence over max_tokens, if either exists. - # litellm has models with both, one or none of these 2 parameters! + # Special case for Claude 3.7 Sonnet models + if any( + model in self.config.model + for model in ['claude-3-7-sonnet', 'claude-3.7-sonnet'] + ): + self.config.max_output_tokens = 64000 # litellm set max to 128k, but that requires a header to be set + # Try to get from model info + elif self.model_info is not None: + # max_output_tokens has precedence over max_tokens if 'max_output_tokens' in self.model_info and isinstance( self.model_info['max_output_tokens'], int ): @@ -508,11 +510,6 @@ class LLM(RetryMixin, DebugMixin): self.model_info['max_tokens'], int ): self.config.max_output_tokens = self.model_info['max_tokens'] - if any( - model in self.config.model - for model in ['claude-3-7-sonnet', 'claude-3.7-sonnet'] - ): - self.config.max_output_tokens = 64000 # litellm set max to 128k, but that requires a header to be set # Initialize function calling capability # Check if model name is in our supported list diff --git a/openhands/memory/memory.py b/openhands/memory/memory.py index 13e58cbeae..63b5193cc1 100644 --- a/openhands/memory/memory.py +++ b/openhands/memory/memory.py @@ -296,7 +296,6 @@ class Memory: self.knowledge_microagents[name] = agent_knowledge for name, agent_repo in repo_agents.items(): self.repo_microagents[name] = agent_repo - except Exception as e: logger.warning( f'Failed to load user microagents from {USER_MICROAGENTS_DIR}: {str(e)}' diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py index 77c24f7da1..8b5e106ce3 100644 --- a/tests/unit/test_llm.py +++ b/tests/unit/test_llm.py @@ -140,8 +140,8 @@ def test_llm_init_without_model_info(mock_get_model_info, default_config): mock_get_model_info.side_effect = Exception('Model info not available') llm = LLM(default_config) llm.init_model_info() - assert llm.config.max_input_tokens == 4096 - assert llm.config.max_output_tokens == 4096 + assert llm.config.max_input_tokens is None + assert llm.config.max_output_tokens is None def test_llm_init_with_custom_config(): @@ -981,3 +981,94 @@ def test_llm_base_url_auto_protocol_patch(mock_get): called_url = mock_get.call_args[0][0] assert called_url.startswith('http://') or called_url.startswith('https://') + + +# Tests for max_output_tokens configuration and usage + + +def test_unknown_model_token_limits(): + """Test that models without known token limits get None for both max_output_tokens and max_input_tokens.""" + # Create LLM instance with a non-existent model to avoid litellm having model info for it + config = LLMConfig(model='non-existent-model', api_key='test_key') + llm = LLM(config) + + # Verify max_output_tokens and max_input_tokens are initialized to None (default value) + assert llm.config.max_output_tokens is None + assert llm.config.max_input_tokens is None + + +def test_max_tokens_from_model_info(): + """Test that max_output_tokens and max_input_tokens are correctly initialized from model info.""" + # Create LLM instance with GPT-4 model which has known token limits + config = LLMConfig(model='gpt-4', api_key='test_key') + llm = LLM(config) + + # GPT-4 has specific token limits + # These are the expected values from litellm + assert llm.config.max_output_tokens == 4096 + assert llm.config.max_input_tokens == 8192 + + +def test_claude_3_7_sonnet_max_output_tokens(): + """Test that Claude 3.7 Sonnet models get the special 64000 max_output_tokens value and default max_input_tokens.""" + # Create LLM instance with Claude 3.7 Sonnet model + config = LLMConfig(model='claude-3-7-sonnet', api_key='test_key') + llm = LLM(config) + + # Verify max_output_tokens is set to 64000 for Claude 3.7 Sonnet + assert llm.config.max_output_tokens == 64000 + # Verify max_input_tokens is set to None (default value) + assert llm.config.max_input_tokens is None + + +def test_claude_sonnet_4_max_output_tokens(): + """Test that Claude Sonnet 4 models get the correct max_output_tokens and max_input_tokens values.""" + # Create LLM instance with a Claude Sonnet 4 model + config = LLMConfig(model='claude-sonnet-4-20250514', api_key='test_key') + llm = LLM(config) + + # Verify max_output_tokens is set to the expected value + assert llm.config.max_output_tokens == 64000 + # Verify max_input_tokens is set to the expected value + # For Claude models, we expect a specific value from litellm + assert llm.config.max_input_tokens == 200000 + + +def test_sambanova_deepseek_model_max_output_tokens(): + """Test that SambaNova DeepSeek-V3-0324 model gets the correct max_output_tokens value.""" + # Create LLM instance with SambaNova DeepSeek model + config = LLMConfig(model='sambanova/DeepSeek-V3-0324', api_key='test_key') + llm = LLM(config) + + # SambaNova DeepSeek model has specific token limits + # This is the expected value from litellm + assert llm.config.max_output_tokens == 32768 + + +def test_max_output_tokens_override_in_config(): + """Test that max_output_tokens can be overridden in the config.""" + # Create LLM instance with minimal config and overridden max_output_tokens + config = LLMConfig( + model='claude-sonnet-4-20250514', api_key='test_key', max_output_tokens=2048 + ) + llm = LLM(config) + + # Verify the config has the overridden max_output_tokens value + assert llm.config.max_output_tokens == 2048 + + +def test_azure_model_default_max_tokens(): + """Test that Azure models have the default max_output_tokens value.""" + # Create minimal config for Azure model (without specifying max_output_tokens) + azure_config = LLMConfig( + model='azure/non-existent-model', # Use a non-existent model to avoid litellm having model info for it + api_key='test_key', + base_url='https://test.openai.azure.com/', + api_version='2024-12-01-preview', + ) + + # Create LLM instance with Azure model + llm = LLM(azure_config) + + # Verify the config has the default max_output_tokens value + assert llm.config.max_output_tokens is None # Default value