diff --git a/openhands/core/config/llm_config.py b/openhands/core/config/llm_config.py index 0ff8a48060..0353fbcc37 100644 --- a/openhands/core/config/llm_config.py +++ b/openhands/core/config/llm_config.py @@ -28,6 +28,7 @@ class LLMConfig(BaseModel): max_message_chars: The approximate max number of characters in the content of an event included in the prompt to the LLM. Larger observations are truncated. temperature: The temperature for the API. top_p: The top p for the API. + top_k: The top k for the API. custom_llm_provider: The custom LLM provider to use. This is undocumented in openhands, and normally not used. It is documented on the litellm side. max_input_tokens: The maximum number of input tokens. Note that this is currently unused, and the value at runtime is actually the total tokens in OpenAI (e.g. 128,000 tokens for GPT-4). max_output_tokens: The maximum number of output tokens. This is sent to the LLM. @@ -66,6 +67,7 @@ class LLMConfig(BaseModel): ) # maximum number of characters in an observation's content when sent to the llm temperature: float = Field(default=0.0) top_p: float = Field(default=1.0) + top_k: float | None = Field(default=None) custom_llm_provider: str | None = Field(default=None) max_input_tokens: int | None = Field(default=None) max_output_tokens: int | None = Field(default=None) diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 180448ad22..5c9ae0861d 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -159,6 +159,11 @@ class LLM(RetryMixin, DebugMixin): 'temperature': self.config.temperature, 'max_completion_tokens': self.config.max_output_tokens, } + if self.config.top_k is not None: + # openai doesn't expose top_k + # litellm will handle it a bit differently than the openai-compatible params + kwargs['top_k'] = self.config.top_k + if ( self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS or self.config.model.split('/')[-1] in REASONING_EFFORT_SUPPORTED_MODELS diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py index cb7d48accc..e050d8e42a 100644 --- a/tests/unit/test_llm.py +++ b/tests/unit/test_llm.py @@ -152,6 +152,7 @@ def test_llm_init_with_custom_config(): max_output_tokens=1500, temperature=0.8, top_p=0.9, + top_k=None, ) llm = LLM(custom_config) assert llm.config.model == 'custom-model' @@ -160,6 +161,42 @@ def test_llm_init_with_custom_config(): assert llm.config.max_output_tokens == 1500 assert llm.config.temperature == 0.8 assert llm.config.top_p == 0.9 + assert llm.config.top_k is None + + +@patch('openhands.llm.llm.litellm_completion') +def test_llm_top_k_in_completion_when_set(mock_litellm_completion): + # Create a config with top_k set + config_with_top_k = LLMConfig(top_k=50) + llm = LLM(config_with_top_k) + + # Define a side effect function to check top_k + def side_effect(*args, **kwargs): + assert 'top_k' in kwargs + assert kwargs['top_k'] == 50 + return {'choices': [{'message': {'content': 'Mocked response'}}]} + + mock_litellm_completion.side_effect = side_effect + + # Call completion + llm.completion(messages=[{'role': 'system', 'content': 'Test message'}]) + + +@patch('openhands.llm.llm.litellm_completion') +def test_llm_top_k_not_in_completion_when_none(mock_litellm_completion): + # Create a config with top_k set to None + config_without_top_k = LLMConfig(top_k=None) + llm = LLM(config_without_top_k) + + # Define a side effect function to check top_k + def side_effect(*args, **kwargs): + assert 'top_k' not in kwargs + return {'choices': [{'message': {'content': 'Mocked response'}}]} + + mock_litellm_completion.side_effect = side_effect + + # Call completion + llm.completion(messages=[{'role': 'system', 'content': 'Test message'}]) def test_llm_init_with_metrics():