Compare commits

...

12 Commits

Author SHA1 Message Date
Engel Nyst
c8d856cfa5 tests(LLM): add concurrency and provider-mapping reinit tests; fix style
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-14 16:26:02 +00:00
Engel Nyst
db5c0c687c style(tests): apply pre-commit formatting fixes
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-14 05:27:36 +00:00
Engel Nyst
82d5b0388b tests(LLM): add reinit tests (basic, capability recompute, cost flag reset)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-14 05:24:38 +00:00
Engel Nyst
bf192688a5 LLM: add lock + reinit helper; recompute capabilities in centralized initializer; keep update_config alias
Co-authored-by: openhands <openhands@all-hands.dev>
2025-08-14 05:18:38 +00:00
Engel Nyst
477d9b17c0 Merge branch 'main' into llm-init to resolve conflicts
Co-authored-by: OpenHands-GPT-4.1 <openhands@all-hands.dev>
2025-08-14 03:31:27 +00:00
OpenHands Bot
6756427217 🤖 Auto-fix Python linting issues 2025-07-30 23:33:14 +00:00
Engel Nyst
1b74920509 Merge branch 'main' into llm-init 2025-07-31 01:31:31 +02:00
Engel Nyst
c933b88f36 Merge branch 'main' into llm-init 2025-07-30 02:01:52 +02:00
Engel Nyst
e0e9d3d07c Merge branch 'main' into llm-init 2025-07-26 22:50:17 +02:00
Engel Nyst
00f9ff08c7 Merge branch 'main' into llm-init 2025-07-22 00:53:46 +02:00
Engel Nyst
f37f8fb723 Merge branch 'main' into llm-init 2025-07-21 01:57:07 +02:00
Engel Nyst
8628da0037 Refactor LLM class to support runtime configuration updates
- Decouple partial function creation from LLM initialization
- Extract _build_completion_function() method for reusable completion setup
- Add update_config() method for hot-swapping LLM configuration at runtime
- Add _rebuild_completion_wrapper() method to recreate retry decorator wrapper
- Preserve metrics and retry listener instances during config updates
- Handle model changes by resetting model info for re-initialization
- Support custom tokenizer updates and log completion folder creation
- Add comprehensive unit tests covering all update scenarios

This prepares the LLM class for the upcoming unified configuration system
that will enable runtime config reloading without restart.

Co-authored-by: OpenHands <openhands@all-hands.dev>
2025-07-19 15:07:52 +02:00
2 changed files with 458 additions and 6 deletions

View File

@@ -3,6 +3,7 @@ import os
import time
import warnings
from functools import partial
from threading import RLock
from typing import Any, Callable
import httpx
@@ -142,6 +143,7 @@ class LLM(RetryMixin, DebugMixin):
metrics: The metrics to use.
"""
self._tried_model_info = False
self._lock = RLock()
self.metrics: Metrics = (
metrics if metrics is not None else Metrics(model_name=config.model)
)
@@ -157,11 +159,22 @@ class LLM(RetryMixin, DebugMixin):
)
os.makedirs(self.config.log_completions_folder, exist_ok=True)
# call init_model_info to initialize config.max_output_tokens
# which is used in partial function
# Initialize core internals in a single pass
self._initialize_core()
def _initialize_core(self) -> None:
"""Initialize or re-initialize all components derived from config.
This centralizes initialization to avoid duplication between __init__ and reinit().
"""
# call init_model_info to initialize config.max_output_tokens used in partial
with warnings.catch_warnings():
warnings.simplefilter('ignore')
self.init_model_info()
# Recompute function-calling capability regardless of model_info cache
self._compute_function_calling_active()
if self.vision_is_active():
logger.debug('LLM: model has vision enabled')
if self.is_caching_prompt_active():
@@ -175,6 +188,17 @@ class LLM(RetryMixin, DebugMixin):
else:
self.tokenizer = None
# Initialize the completion function
self._build_completion_function()
# Build the completion wrapper with retry logic
self._rebuild_completion_wrapper()
def _build_completion_function(self) -> None:
"""Build the completion function based on current configuration.
This method creates the partial function that will be used for LLM completions.
It can be called multiple times to rebuild the function when configuration changes.
"""
# set up the completion function
kwargs: dict[str, Any] = {
'temperature': self.config.temperature,
@@ -251,6 +275,78 @@ class LLM(RetryMixin, DebugMixin):
self._completion_unwrapped = self._completion
def reinit(self, new_config: LLMConfig) -> None:
"""Reinitialize the LLM with a new configuration.
This is a threadsafe operation that updates configuration and rebuilds
the completion pipeline (completion function + retry wrapper). It also
refreshes model info and tokenizer as needed.
"""
with self._lock:
# Reset capability/cost flags so the new config gets a clean slate
self.cost_metric_supported = True
old_model = self.config.model
old_tokenizer = self.config.custom_tokenizer
# Update the configuration (deep copy to avoid external mutation)
self.config = copy.deepcopy(new_config)
# Update metrics model name if model changed and refresh model info
if old_model != new_config.model:
self.metrics.model_name = new_config.model
logger.debug(
f'LLM model changed from {old_model} to {new_config.model}'
)
# Reset model info to force re-initialization
self._tried_model_info = False
self.model_info = None
with warnings.catch_warnings():
warnings.simplefilter('ignore')
self.init_model_info()
# Log new capabilities
if self.vision_is_active():
logger.debug('LLM: model has vision enabled')
if self.is_caching_prompt_active():
logger.debug('LLM: caching prompt enabled')
if self.is_function_calling_active():
logger.debug('LLM: model supports function calling')
# Update tokenizer if custom_tokenizer changed
if old_tokenizer != new_config.custom_tokenizer:
if new_config.custom_tokenizer is not None:
self.tokenizer = create_pretrained_tokenizer(
new_config.custom_tokenizer
)
logger.debug(
f'LLM tokenizer updated to {new_config.custom_tokenizer}'
)
else:
self.tokenizer = None
logger.debug('LLM tokenizer reset to default')
# Handle log completions folder creation if needed
if new_config.log_completions:
if new_config.log_completions_folder is None:
raise RuntimeError(
'log_completions_folder is required when log_completions is enabled'
)
os.makedirs(new_config.log_completions_folder, exist_ok=True)
# Re-initialize core internals (model info, tokenizer, completion & wrapper)
self._initialize_core()
logger.debug('LLM reinitialized successfully')
# Backward-compat: keep update_config as an alias
def update_config(self, new_config: LLMConfig) -> None:
self.reinit(new_config)
def _rebuild_completion_wrapper(self) -> None:
"""Rebuild the completion wrapper with retry decorator.
This method recreates the wrapper function that includes retry logic
and other processing around the base completion function.
"""
@self.retry_decorator(
num_retries=self.config.num_retries,
retry_exceptions=LLM_RETRY_EXCEPTIONS,
@@ -553,14 +649,19 @@ class LLM(RetryMixin, DebugMixin):
self.config.max_output_tokens = self.model_info['max_tokens']
# Initialize function calling capability
# Check if model name is in our supported list
self._compute_function_calling_active()
def _compute_function_calling_active(self) -> None:
"""Compute and cache whether function calling is active for current config.
Respects user override via native_tool_calling. Otherwise bases decision on
supported model names.
"""
model_name_supported = (
self.config.model in FUNCTION_CALLING_SUPPORTED_MODELS
or self.config.model.split('/')[-1] in FUNCTION_CALLING_SUPPORTED_MODELS
or any(m in self.config.model for m in FUNCTION_CALLING_SUPPORTED_MODELS)
)
# Handle native_tool_calling user-defined configuration
if self.config.native_tool_calling is None:
self._function_calling_active = model_name_supported
else:

View File

@@ -1104,7 +1104,219 @@ def test_azure_model_default_max_tokens():
assert llm.config.max_output_tokens is None # Default value
# Gemini Performance Optimization Tests
def test_llm_update_config_basic(default_config):
"""Test basic LLM configuration update functionality."""
llm = LLM(default_config)
# Verify initial state
assert llm.config.model == 'gpt-4o'
assert llm.config.temperature == 0.0
assert llm.metrics.model_name == 'gpt-4o'
# Create new config with different settings
new_config = LLMConfig(
model='claude-3-5-sonnet-20241022',
api_key='new_test_key',
temperature=0.7,
max_output_tokens=2000,
top_p=0.9,
)
# Update the configuration
llm.update_config(new_config)
# Verify the configuration was updated
assert llm.config.model == 'claude-3-5-sonnet-20241022'
assert llm.config.api_key.get_secret_value() == 'new_test_key'
assert llm.config.temperature == 0.7
assert llm.config.max_output_tokens == 2000
assert llm.config.top_p == 0.9
assert llm.metrics.model_name == 'claude-3-5-sonnet-20241022'
def test_llm_update_config_model_change_resets_model_info(default_config):
"""Test that changing model resets model info for re-initialization."""
llm = LLM(default_config)
# Set some model info to verify it gets reset
llm.model_info = {'test': 'info'}
llm._tried_model_info = True
# Create new config with different model
new_config = copy.deepcopy(default_config)
new_config.model = 'claude-3-5-sonnet-20241022'
# Update the configuration
llm.update_config(new_config)
# Verify model info was reset and metrics updated
assert llm.metrics.model_name == 'claude-3-5-sonnet-20241022'
# _tried_model_info should be reset to False to force re-initialization
# (it will be set back to True after init_model_info() is called)
def test_llm_update_config_same_model_preserves_model_info(default_config):
"""Test that keeping the same model preserves model info."""
llm = LLM(default_config)
# Create new config with same model but different other settings
new_config = copy.deepcopy(default_config)
new_config.temperature = 0.5
new_config.max_output_tokens = 1500
# Update the configuration
llm.update_config(new_config)
# Verify model info was preserved but other settings changed
assert llm.config.temperature == 0.5
assert llm.config.max_output_tokens == 1500
assert llm.metrics.model_name == 'gpt-4o' # Same model
def test_llm_update_config_custom_tokenizer_update(default_config):
"""Test updating custom tokenizer configuration."""
llm = LLM(default_config)
# Initially no custom tokenizer
assert llm.config.custom_tokenizer is None
assert llm.tokenizer is None
# Update config with custom tokenizer
new_config = copy.deepcopy(default_config)
new_config.custom_tokenizer = 'gpt2'
llm.update_config(new_config)
# Verify tokenizer was updated
assert llm.config.custom_tokenizer == 'gpt2'
assert llm.tokenizer is not None
# Update back to no custom tokenizer
new_config.custom_tokenizer = None
llm.update_config(new_config)
# Verify tokenizer was reset
assert llm.config.custom_tokenizer is None
assert llm.tokenizer is None
def test_llm_update_config_log_completions_folder_creation(default_config):
"""Test that log completions folder is created when needed."""
with tempfile.TemporaryDirectory() as temp_dir:
log_folder = Path(temp_dir) / 'test_completions'
llm = LLM(default_config)
# Update config to enable log completions
new_config = copy.deepcopy(default_config)
new_config.log_completions = True
new_config.log_completions_folder = str(log_folder)
# Folder shouldn't exist yet
assert not log_folder.exists()
# Update configuration
llm.update_config(new_config)
# Verify folder was created
assert log_folder.exists()
assert log_folder.is_dir()
def test_llm_update_config_log_completions_folder_required():
"""Test that log_completions_folder is required when log_completions is True."""
config = LLMConfig(model='gpt-4o', api_key='test_key')
llm = LLM(config)
# Create config with log_completions=True but no folder
new_config = copy.deepcopy(config)
new_config.log_completions = True
new_config.log_completions_folder = None
# Should raise RuntimeError
with pytest.raises(RuntimeError, match='log_completions_folder is required'):
llm.update_config(new_config)
def test_llm_update_config_completion_function_rebuilt(default_config):
"""Test that completion function is rebuilt after config update."""
llm = LLM(default_config)
# Store reference to original completion function
# Update configuration
new_config = copy.deepcopy(default_config)
new_config.temperature = 0.8
new_config.max_output_tokens = 1500
llm.update_config(new_config)
# Verify completion functions exist (they should be rebuilt)
assert llm._completion is not None
assert llm._completion_unwrapped is not None
# The functions should be different objects since they were rebuilt
# (though this is implementation detail, the important thing is they work)
assert callable(llm._completion)
assert callable(llm._completion_unwrapped)
def test_llm_update_config_preserves_metrics_and_retry_listener(default_config):
"""Test that metrics and retry listener are preserved during config update."""
# Create custom metrics and retry listener
custom_metrics = Metrics(model_name='initial_model')
retry_listener = MagicMock()
llm = LLM(default_config, metrics=custom_metrics, retry_listener=retry_listener)
# Verify initial state
assert llm.metrics is custom_metrics
assert llm.retry_listener is retry_listener
# Update configuration
new_config = copy.deepcopy(default_config)
new_config.model = 'claude-3-5-sonnet-20241022'
llm.update_config(new_config)
# Verify metrics and retry listener are preserved
assert llm.metrics is custom_metrics
assert llm.retry_listener is retry_listener
# But metrics model name should be updated
assert llm.metrics.model_name == 'claude-3-5-sonnet-20241022'
def test_llm_update_config_deep_copy_independence():
"""Test that config update uses deep copy and doesn't affect original config."""
original_config = LLMConfig(
model='gpt-4o',
api_key='test_key',
temperature=0.0,
)
llm = LLM(original_config)
# Create new config
new_config = LLMConfig(
model='claude-3-5-sonnet-20241022',
api_key='new_key',
temperature=0.7,
)
# Update LLM config
llm.update_config(new_config)
# Modify the new_config after update
new_config.temperature = 0.9
new_config.model = 'different-model'
# LLM config should not be affected by external changes
assert llm.config.temperature == 0.7
assert llm.config.model == 'claude-3-5-sonnet-20241022'
# Original config should also be unchanged
assert original_config.temperature == 0.0
assert original_config.model == 'gpt-4o'
def test_gemini_model_keeps_none_reasoning_effort():
@@ -1302,3 +1514,142 @@ def test_gemini_performance_optimization_end_to_end(mock_completion):
# Verify temperature and top_p were removed for reasoning models
assert 'temperature' not in call_kwargs
assert 'top_p' not in call_kwargs
def test_llm_reinit_basic(default_config):
"""Reinit should update config and metrics like update_config."""
llm = LLM(default_config)
assert llm.metrics.model_name == 'gpt-4o'
new_config = LLMConfig(
model='claude-3-5-sonnet-20241022',
api_key='new_test_key',
temperature=0.7,
max_output_tokens=1234,
)
llm.reinit(new_config)
assert llm.config.model == 'claude-3-5-sonnet-20241022'
assert llm.config.api_key.get_secret_value() == 'new_test_key'
assert llm.config.temperature == 0.7
assert llm.config.max_output_tokens == 1234
assert llm.metrics.model_name == 'claude-3-5-sonnet-20241022'
assert callable(llm._completion)
assert callable(llm._completion_unwrapped)
def test_llm_reinit_recomputes_function_calling_capability():
"""Reinit should recompute function-calling capability even if model doesn't change."""
base = LLMConfig(model='gpt-3.5-turbo', api_key='key')
llm = LLM(base)
# gpt-3.5-turbo is not in FUNCTION_CALLING_SUPPORTED_MODELS
assert llm.is_function_calling_active() is False
# Turn on native tool calling; same model
cfg_on = copy.deepcopy(base)
cfg_on.native_tool_calling = True
llm.reinit(cfg_on)
assert llm.is_function_calling_active() is True
# Turn off explicitly
cfg_off = copy.deepcopy(base)
cfg_off.native_tool_calling = False
llm.reinit(cfg_off)
assert llm.is_function_calling_active() is False
def test_llm_reinit_resets_cost_flag(default_config):
"""Reinit should reset cost_metric_supported so a new model can report cost."""
llm = LLM(default_config)
llm.cost_metric_supported = False
# Same model, but reinit should reset the flag to True
llm.reinit(copy.deepcopy(default_config))
assert llm.cost_metric_supported is True
def test_llm_reinit_thread_safety_with_inflight_completion(default_config):
"""Concurrent reinit during an in-flight completion should not raise,
and subsequent completions should use the new config.
"""
import threading
import time
from unittest.mock import patch
llm = LLM(default_config)
calls = []
def fake_completion(*args, **kwargs):
# Simulate provider latency and record the model used
calls.append(kwargs.get('model'))
time.sleep(0.2)
return {
'id': 'test-1',
'choices': [{'message': {'content': 'ok'}}],
'usage': {'prompt_tokens': 1, 'completion_tokens': 1},
}
with patch('openhands.llm.llm.litellm_completion') as mock_completion:
mock_completion.side_effect = fake_completion
# Start an in-flight completion with the initial model
t = threading.Thread(
target=llm.completion, args=([{'role': 'user', 'content': 'hi'}],)
)
t.start()
# Reinit while the completion is in-flight
time.sleep(0.05)
new_cfg = copy.deepcopy(default_config)
new_cfg.model = 'claude-3-5-sonnet-20241022'
llm.reinit(new_cfg)
t.join()
# Subsequent completion should use the new config
llm.completion(messages=[{'role': 'user', 'content': 'again'}])
# Ensure the latest call used the new model and metrics reflect it
assert len(calls) >= 1
assert calls[-1] == 'claude-3-5-sonnet-20241022'
assert llm.metrics.model_name == 'claude-3-5-sonnet-20241022'
@patch('openhands.llm.llm.litellm_completion')
def test_llm_reinit_provider_mappings(mock_completion, default_config):
"""Reinit should apply provider-specific mappings (openhands proxy, azure max_tokens)."""
# Return a minimal, valid response
mock_completion.return_value = {
'id': 'call',
'choices': [{'message': {'content': 'ok'}}],
'usage': {'prompt_tokens': 1, 'completion_tokens': 1},
}
llm = LLM(default_config)
# 1) OpenHands provider rewrite to litellm_proxy
cfg_proxy = copy.deepcopy(default_config)
cfg_proxy.model = 'openhands/qwen3-coder'
llm.reinit(cfg_proxy)
llm.completion(messages=[{'role': 'user', 'content': 'x'}])
model_arg = mock_completion.call_args.kwargs.get('model')
base_url_arg = mock_completion.call_args.kwargs.get('base_url')
assert model_arg == 'litellm_proxy/qwen3-coder'
assert base_url_arg == 'https://llm-proxy.app.all-hands.dev/'
# 2) Azure mapping: max_completion_tokens -> max_tokens
mock_completion.reset_mock()
cfg_azure = copy.deepcopy(default_config)
cfg_azure.model = 'azure/gpt-4o'
cfg_azure.max_output_tokens = 777
cfg_azure.api_version = '2024-06-01'
llm.reinit(cfg_azure)
llm.completion(messages=[{'role': 'user', 'content': 'y'}])
kwargs = mock_completion.call_args.kwargs
assert kwargs.get('model') == 'azure/gpt-4o'
assert 'max_tokens' in kwargs and kwargs['max_tokens'] == 777
assert 'max_completion_tokens' not in kwargs