tests(LLM): add concurrency and provider-mapping reinit tests; fix style

Co-authored-by: openhands <openhands@all-hands.dev>
style(tests): apply pre-commit formatting fixes
2026-04-29 03:00:45 -04:00 · 2025-08-14 16:26:02 +00:00 · 2025-08-14 05:27:36 +00:00 · 2025-08-14 05:24:38 +00:00 · 2025-08-14 05:18:38 +00:00 · 2025-08-14 03:31:27 +00:00
2 changed files with 458 additions and 6 deletions
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -3,6 +3,7 @@ import os
 import time
 import warnings
 from functools import partial
+from threading import RLock
 from typing import Any, Callable

 import httpx
@@ -142,6 +143,7 @@ class LLM(RetryMixin, DebugMixin):
            metrics: The metrics to use.
        """
        self._tried_model_info = False
+        self._lock = RLock()
        self.metrics: Metrics = (
            metrics if metrics is not None else Metrics(model_name=config.model)
        )
@@ -157,11 +159,22 @@ class LLM(RetryMixin, DebugMixin):
                )
            os.makedirs(self.config.log_completions_folder, exist_ok=True)

-        # call init_model_info to initialize config.max_output_tokens
-        # which is used in partial function
+        # Initialize core internals in a single pass
+        self._initialize_core()
+
+    def _initialize_core(self) -> None:
+        """Initialize or re-initialize all components derived from config.
+
+        This centralizes initialization to avoid duplication between __init__ and reinit().
+        """
+        # call init_model_info to initialize config.max_output_tokens used in partial
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            self.init_model_info()
+
+        # Recompute function-calling capability regardless of model_info cache
+        self._compute_function_calling_active()
+
        if self.vision_is_active():
            logger.debug('LLM: model has vision enabled')
        if self.is_caching_prompt_active():
@@ -175,6 +188,17 @@ class LLM(RetryMixin, DebugMixin):
        else:
            self.tokenizer = None

+        # Initialize the completion function
+        self._build_completion_function()
+        # Build the completion wrapper with retry logic
+        self._rebuild_completion_wrapper()
+
+    def _build_completion_function(self) -> None:
+        """Build the completion function based on current configuration.
+
+        This method creates the partial function that will be used for LLM completions.
+        It can be called multiple times to rebuild the function when configuration changes.
+        """
        # set up the completion function
        kwargs: dict[str, Any] = {
            'temperature': self.config.temperature,
@@ -251,6 +275,78 @@ class LLM(RetryMixin, DebugMixin):

        self._completion_unwrapped = self._completion

+    def reinit(self, new_config: LLMConfig) -> None:
+        """Reinitialize the LLM with a new configuration.
+
+        This is a threadsafe operation that updates configuration and rebuilds
+        the completion pipeline (completion function + retry wrapper). It also
+        refreshes model info and tokenizer as needed.
+        """
+        with self._lock:
+            # Reset capability/cost flags so the new config gets a clean slate
+            self.cost_metric_supported = True
+            old_model = self.config.model
+            old_tokenizer = self.config.custom_tokenizer
+
+            # Update the configuration (deep copy to avoid external mutation)
+            self.config = copy.deepcopy(new_config)
+
+            # Update metrics model name if model changed and refresh model info
+            if old_model != new_config.model:
+                self.metrics.model_name = new_config.model
+                logger.debug(
+                    f'LLM model changed from {old_model} to {new_config.model}'
+                )
+                # Reset model info to force re-initialization
+                self._tried_model_info = False
+                self.model_info = None
+                with warnings.catch_warnings():
+                    warnings.simplefilter('ignore')
+                    self.init_model_info()
+                # Log new capabilities
+                if self.vision_is_active():
+                    logger.debug('LLM: model has vision enabled')
+                if self.is_caching_prompt_active():
+                    logger.debug('LLM: caching prompt enabled')
+                if self.is_function_calling_active():
+                    logger.debug('LLM: model supports function calling')
+
+            # Update tokenizer if custom_tokenizer changed
+            if old_tokenizer != new_config.custom_tokenizer:
+                if new_config.custom_tokenizer is not None:
+                    self.tokenizer = create_pretrained_tokenizer(
+                        new_config.custom_tokenizer
+                    )
+                    logger.debug(
+                        f'LLM tokenizer updated to {new_config.custom_tokenizer}'
+                    )
+                else:
+                    self.tokenizer = None
+                    logger.debug('LLM tokenizer reset to default')
+
+            # Handle log completions folder creation if needed
+            if new_config.log_completions:
+                if new_config.log_completions_folder is None:
+                    raise RuntimeError(
+                        'log_completions_folder is required when log_completions is enabled'
+                    )
+                os.makedirs(new_config.log_completions_folder, exist_ok=True)
+
+            # Re-initialize core internals (model info, tokenizer, completion & wrapper)
+            self._initialize_core()
+            logger.debug('LLM reinitialized successfully')
+
+    # Backward-compat: keep update_config as an alias
+    def update_config(self, new_config: LLMConfig) -> None:
+        self.reinit(new_config)
+
+    def _rebuild_completion_wrapper(self) -> None:
+        """Rebuild the completion wrapper with retry decorator.
+
+        This method recreates the wrapper function that includes retry logic
+        and other processing around the base completion function.
+        """
+
        @self.retry_decorator(
            num_retries=self.config.num_retries,
            retry_exceptions=LLM_RETRY_EXCEPTIONS,
@@ -553,14 +649,19 @@ class LLM(RetryMixin, DebugMixin):
                    self.config.max_output_tokens = self.model_info['max_tokens']

        # Initialize function calling capability
-        # Check if model name is in our supported list
+        self._compute_function_calling_active()
+
+    def _compute_function_calling_active(self) -> None:
+        """Compute and cache whether function calling is active for current config.
+
+        Respects user override via native_tool_calling. Otherwise bases decision on
+        supported model names.
+        """
        model_name_supported = (
            self.config.model in FUNCTION_CALLING_SUPPORTED_MODELS
            or self.config.model.split('/')[-1] in FUNCTION_CALLING_SUPPORTED_MODELS
            or any(m in self.config.model for m in FUNCTION_CALLING_SUPPORTED_MODELS)
        )
-
-        # Handle native_tool_calling user-defined configuration
        if self.config.native_tool_calling is None:
            self._function_calling_active = model_name_supported
        else:
--- a/tests/unit/llm/test_llm.py
+++ b/tests/unit/llm/test_llm.py
@@ -1104,7 +1104,219 @@ def test_azure_model_default_max_tokens():
    assert llm.config.max_output_tokens is None  # Default value


-# Gemini Performance Optimization Tests
+def test_llm_update_config_basic(default_config):
+    """Test basic LLM configuration update functionality."""
+    llm = LLM(default_config)
+
+    # Verify initial state
+    assert llm.config.model == 'gpt-4o'
+    assert llm.config.temperature == 0.0
+    assert llm.metrics.model_name == 'gpt-4o'
+
+    # Create new config with different settings
+    new_config = LLMConfig(
+        model='claude-3-5-sonnet-20241022',
+        api_key='new_test_key',
+        temperature=0.7,
+        max_output_tokens=2000,
+        top_p=0.9,
+    )
+
+    # Update the configuration
+    llm.update_config(new_config)
+
+    # Verify the configuration was updated
+    assert llm.config.model == 'claude-3-5-sonnet-20241022'
+    assert llm.config.api_key.get_secret_value() == 'new_test_key'
+    assert llm.config.temperature == 0.7
+    assert llm.config.max_output_tokens == 2000
+    assert llm.config.top_p == 0.9
+    assert llm.metrics.model_name == 'claude-3-5-sonnet-20241022'
+
+
+def test_llm_update_config_model_change_resets_model_info(default_config):
+    """Test that changing model resets model info for re-initialization."""
+    llm = LLM(default_config)
+
+    # Set some model info to verify it gets reset
+    llm.model_info = {'test': 'info'}
+    llm._tried_model_info = True
+
+    # Create new config with different model
+    new_config = copy.deepcopy(default_config)
+    new_config.model = 'claude-3-5-sonnet-20241022'
+
+    # Update the configuration
+    llm.update_config(new_config)
+
+    # Verify model info was reset and metrics updated
+    assert llm.metrics.model_name == 'claude-3-5-sonnet-20241022'
+    # _tried_model_info should be reset to False to force re-initialization
+    # (it will be set back to True after init_model_info() is called)
+
+
+def test_llm_update_config_same_model_preserves_model_info(default_config):
+    """Test that keeping the same model preserves model info."""
+    llm = LLM(default_config)
+
+    # Create new config with same model but different other settings
+    new_config = copy.deepcopy(default_config)
+    new_config.temperature = 0.5
+    new_config.max_output_tokens = 1500
+
+    # Update the configuration
+    llm.update_config(new_config)
+
+    # Verify model info was preserved but other settings changed
+    assert llm.config.temperature == 0.5
+    assert llm.config.max_output_tokens == 1500
+    assert llm.metrics.model_name == 'gpt-4o'  # Same model
+
+
+def test_llm_update_config_custom_tokenizer_update(default_config):
+    """Test updating custom tokenizer configuration."""
+    llm = LLM(default_config)
+
+    # Initially no custom tokenizer
+    assert llm.config.custom_tokenizer is None
+    assert llm.tokenizer is None
+
+    # Update config with custom tokenizer
+    new_config = copy.deepcopy(default_config)
+    new_config.custom_tokenizer = 'gpt2'
+
+    llm.update_config(new_config)
+
+    # Verify tokenizer was updated
+    assert llm.config.custom_tokenizer == 'gpt2'
+    assert llm.tokenizer is not None
+
+    # Update back to no custom tokenizer
+    new_config.custom_tokenizer = None
+    llm.update_config(new_config)
+
+    # Verify tokenizer was reset
+    assert llm.config.custom_tokenizer is None
+    assert llm.tokenizer is None
+
+
+def test_llm_update_config_log_completions_folder_creation(default_config):
+    """Test that log completions folder is created when needed."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        log_folder = Path(temp_dir) / 'test_completions'
+
+        llm = LLM(default_config)
+
+        # Update config to enable log completions
+        new_config = copy.deepcopy(default_config)
+        new_config.log_completions = True
+        new_config.log_completions_folder = str(log_folder)
+
+        # Folder shouldn't exist yet
+        assert not log_folder.exists()
+
+        # Update configuration
+        llm.update_config(new_config)
+
+        # Verify folder was created
+        assert log_folder.exists()
+        assert log_folder.is_dir()
+
+
+def test_llm_update_config_log_completions_folder_required():
+    """Test that log_completions_folder is required when log_completions is True."""
+    config = LLMConfig(model='gpt-4o', api_key='test_key')
+    llm = LLM(config)
+
+    # Create config with log_completions=True but no folder
+    new_config = copy.deepcopy(config)
+    new_config.log_completions = True
+    new_config.log_completions_folder = None
+
+    # Should raise RuntimeError
+    with pytest.raises(RuntimeError, match='log_completions_folder is required'):
+        llm.update_config(new_config)
+
+
+def test_llm_update_config_completion_function_rebuilt(default_config):
+    """Test that completion function is rebuilt after config update."""
+    llm = LLM(default_config)
+
+    # Store reference to original completion function
+
+    # Update configuration
+    new_config = copy.deepcopy(default_config)
+    new_config.temperature = 0.8
+    new_config.max_output_tokens = 1500
+
+    llm.update_config(new_config)
+
+    # Verify completion functions exist (they should be rebuilt)
+    assert llm._completion is not None
+    assert llm._completion_unwrapped is not None
+
+    # The functions should be different objects since they were rebuilt
+    # (though this is implementation detail, the important thing is they work)
+    assert callable(llm._completion)
+    assert callable(llm._completion_unwrapped)
+
+
+def test_llm_update_config_preserves_metrics_and_retry_listener(default_config):
+    """Test that metrics and retry listener are preserved during config update."""
+    # Create custom metrics and retry listener
+    custom_metrics = Metrics(model_name='initial_model')
+    retry_listener = MagicMock()
+
+    llm = LLM(default_config, metrics=custom_metrics, retry_listener=retry_listener)
+
+    # Verify initial state
+    assert llm.metrics is custom_metrics
+    assert llm.retry_listener is retry_listener
+
+    # Update configuration
+    new_config = copy.deepcopy(default_config)
+    new_config.model = 'claude-3-5-sonnet-20241022'
+
+    llm.update_config(new_config)
+
+    # Verify metrics and retry listener are preserved
+    assert llm.metrics is custom_metrics
+    assert llm.retry_listener is retry_listener
+    # But metrics model name should be updated
+    assert llm.metrics.model_name == 'claude-3-5-sonnet-20241022'
+
+
+def test_llm_update_config_deep_copy_independence():
+    """Test that config update uses deep copy and doesn't affect original config."""
+    original_config = LLMConfig(
+        model='gpt-4o',
+        api_key='test_key',
+        temperature=0.0,
+    )
+
+    llm = LLM(original_config)
+
+    # Create new config
+    new_config = LLMConfig(
+        model='claude-3-5-sonnet-20241022',
+        api_key='new_key',
+        temperature=0.7,
+    )
+
+    # Update LLM config
+    llm.update_config(new_config)
+
+    # Modify the new_config after update
+    new_config.temperature = 0.9
+    new_config.model = 'different-model'
+
+    # LLM config should not be affected by external changes
+    assert llm.config.temperature == 0.7
+    assert llm.config.model == 'claude-3-5-sonnet-20241022'
+
+    # Original config should also be unchanged
+    assert original_config.temperature == 0.0
+    assert original_config.model == 'gpt-4o'


 def test_gemini_model_keeps_none_reasoning_effort():
@@ -1302,3 +1514,142 @@ def test_gemini_performance_optimization_end_to_end(mock_completion):
    # Verify temperature and top_p were removed for reasoning models
    assert 'temperature' not in call_kwargs
    assert 'top_p' not in call_kwargs
+
+
+def test_llm_reinit_basic(default_config):
+    """Reinit should update config and metrics like update_config."""
+    llm = LLM(default_config)
+    assert llm.metrics.model_name == 'gpt-4o'
+
+    new_config = LLMConfig(
+        model='claude-3-5-sonnet-20241022',
+        api_key='new_test_key',
+        temperature=0.7,
+        max_output_tokens=1234,
+    )
+
+    llm.reinit(new_config)
+
+    assert llm.config.model == 'claude-3-5-sonnet-20241022'
+    assert llm.config.api_key.get_secret_value() == 'new_test_key'
+    assert llm.config.temperature == 0.7
+    assert llm.config.max_output_tokens == 1234
+    assert llm.metrics.model_name == 'claude-3-5-sonnet-20241022'
+    assert callable(llm._completion)
+    assert callable(llm._completion_unwrapped)
+
+
+def test_llm_reinit_recomputes_function_calling_capability():
+    """Reinit should recompute function-calling capability even if model doesn't change."""
+    base = LLMConfig(model='gpt-3.5-turbo', api_key='key')
+    llm = LLM(base)
+    # gpt-3.5-turbo is not in FUNCTION_CALLING_SUPPORTED_MODELS
+    assert llm.is_function_calling_active() is False
+
+    # Turn on native tool calling; same model
+    cfg_on = copy.deepcopy(base)
+    cfg_on.native_tool_calling = True
+    llm.reinit(cfg_on)
+    assert llm.is_function_calling_active() is True
+
+    # Turn off explicitly
+    cfg_off = copy.deepcopy(base)
+    cfg_off.native_tool_calling = False
+    llm.reinit(cfg_off)
+    assert llm.is_function_calling_active() is False
+
+
+def test_llm_reinit_resets_cost_flag(default_config):
+    """Reinit should reset cost_metric_supported so a new model can report cost."""
+    llm = LLM(default_config)
+    llm.cost_metric_supported = False
+
+    # Same model, but reinit should reset the flag to True
+    llm.reinit(copy.deepcopy(default_config))
+    assert llm.cost_metric_supported is True
+
+
+def test_llm_reinit_thread_safety_with_inflight_completion(default_config):
+    """Concurrent reinit during an in-flight completion should not raise,
+    and subsequent completions should use the new config.
+    """
+    import threading
+    import time
+    from unittest.mock import patch
+
+    llm = LLM(default_config)
+
+    calls = []
+
+    def fake_completion(*args, **kwargs):
+        # Simulate provider latency and record the model used
+        calls.append(kwargs.get('model'))
+        time.sleep(0.2)
+        return {
+            'id': 'test-1',
+            'choices': [{'message': {'content': 'ok'}}],
+            'usage': {'prompt_tokens': 1, 'completion_tokens': 1},
+        }
+
+    with patch('openhands.llm.llm.litellm_completion') as mock_completion:
+        mock_completion.side_effect = fake_completion
+
+        # Start an in-flight completion with the initial model
+        t = threading.Thread(
+            target=llm.completion, args=([{'role': 'user', 'content': 'hi'}],)
+        )
+        t.start()
+
+        # Reinit while the completion is in-flight
+        time.sleep(0.05)
+        new_cfg = copy.deepcopy(default_config)
+        new_cfg.model = 'claude-3-5-sonnet-20241022'
+        llm.reinit(new_cfg)
+
+        t.join()
+
+        # Subsequent completion should use the new config
+        llm.completion(messages=[{'role': 'user', 'content': 'again'}])
+
+        # Ensure the latest call used the new model and metrics reflect it
+        assert len(calls) >= 1
+        assert calls[-1] == 'claude-3-5-sonnet-20241022'
+        assert llm.metrics.model_name == 'claude-3-5-sonnet-20241022'
+
+
+@patch('openhands.llm.llm.litellm_completion')
+def test_llm_reinit_provider_mappings(mock_completion, default_config):
+    """Reinit should apply provider-specific mappings (openhands proxy, azure max_tokens)."""
+    # Return a minimal, valid response
+    mock_completion.return_value = {
+        'id': 'call',
+        'choices': [{'message': {'content': 'ok'}}],
+        'usage': {'prompt_tokens': 1, 'completion_tokens': 1},
+    }
+
+    llm = LLM(default_config)
+
+    # 1) OpenHands provider rewrite to litellm_proxy
+    cfg_proxy = copy.deepcopy(default_config)
+    cfg_proxy.model = 'openhands/qwen3-coder'
+    llm.reinit(cfg_proxy)
+    llm.completion(messages=[{'role': 'user', 'content': 'x'}])
+
+    model_arg = mock_completion.call_args.kwargs.get('model')
+    base_url_arg = mock_completion.call_args.kwargs.get('base_url')
+    assert model_arg == 'litellm_proxy/qwen3-coder'
+    assert base_url_arg == 'https://llm-proxy.app.all-hands.dev/'
+
+    # 2) Azure mapping: max_completion_tokens -> max_tokens
+    mock_completion.reset_mock()
+    cfg_azure = copy.deepcopy(default_config)
+    cfg_azure.model = 'azure/gpt-4o'
+    cfg_azure.max_output_tokens = 777
+    cfg_azure.api_version = '2024-06-01'
+    llm.reinit(cfg_azure)
+    llm.completion(messages=[{'role': 'user', 'content': 'y'}])
+
+    kwargs = mock_completion.call_args.kwargs
+    assert kwargs.get('model') == 'azure/gpt-4o'
+    assert 'max_tokens' in kwargs and kwargs['max_tokens'] == 777
+    assert 'max_completion_tokens' not in kwargs
Author	SHA1	Message	Date
Engel Nyst	c8d856cfa5	tests(LLM): add concurrency and provider-mapping reinit tests; fix style Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-14 16:26:02 +00:00
Engel Nyst	db5c0c687c	style(tests): apply pre-commit formatting fixes Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-14 05:27:36 +00:00
Engel Nyst	82d5b0388b	tests(LLM): add reinit tests (basic, capability recompute, cost flag reset) Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-14 05:24:38 +00:00
Engel Nyst	bf192688a5	LLM: add lock + reinit helper; recompute capabilities in centralized initializer; keep update_config alias Co-authored-by: openhands <openhands@all-hands.dev>	2025-08-14 05:18:38 +00:00
Engel Nyst	477d9b17c0	Merge branch 'main' into llm-init to resolve conflicts Co-authored-by: OpenHands-GPT-4.1 <openhands@all-hands.dev>	2025-08-14 03:31:27 +00:00
OpenHands Bot	6756427217	🤖 Auto-fix Python linting issues	2025-07-30 23:33:14 +00:00
Engel Nyst	1b74920509	Merge branch 'main' into llm-init	2025-07-31 01:31:31 +02:00
Engel Nyst	c933b88f36	Merge branch 'main' into llm-init	2025-07-30 02:01:52 +02:00
Engel Nyst	e0e9d3d07c	Merge branch 'main' into llm-init	2025-07-26 22:50:17 +02:00
Engel Nyst	00f9ff08c7	Merge branch 'main' into llm-init	2025-07-22 00:53:46 +02:00
Engel Nyst	f37f8fb723	Merge branch 'main' into llm-init	2025-07-21 01:57:07 +02:00
Engel Nyst	8628da0037	Refactor LLM class to support runtime configuration updates - Decouple partial function creation from LLM initialization - Extract _build_completion_function() method for reusable completion setup - Add update_config() method for hot-swapping LLM configuration at runtime - Add _rebuild_completion_wrapper() method to recreate retry decorator wrapper - Preserve metrics and retry listener instances during config updates - Handle model changes by resetting model info for re-initialization - Support custom tokenizer updates and log completion folder creation - Add comprehensive unit tests covering all update scenarios This prepares the LLM class for the upcoming unified configuration system that will enable runtime config reloading without restart. Co-authored-by: OpenHands <openhands@all-hands.dev>	2025-07-19 15:07:52 +02:00