mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c8d856cfa5 | ||
|
|
db5c0c687c | ||
|
|
82d5b0388b | ||
|
|
bf192688a5 | ||
|
|
477d9b17c0 | ||
|
|
6756427217 | ||
|
|
1b74920509 | ||
|
|
c933b88f36 | ||
|
|
e0e9d3d07c | ||
|
|
00f9ff08c7 | ||
|
|
f37f8fb723 | ||
|
|
8628da0037 |
@@ -3,6 +3,7 @@ import os
|
||||
import time
|
||||
import warnings
|
||||
from functools import partial
|
||||
from threading import RLock
|
||||
from typing import Any, Callable
|
||||
|
||||
import httpx
|
||||
@@ -142,6 +143,7 @@ class LLM(RetryMixin, DebugMixin):
|
||||
metrics: The metrics to use.
|
||||
"""
|
||||
self._tried_model_info = False
|
||||
self._lock = RLock()
|
||||
self.metrics: Metrics = (
|
||||
metrics if metrics is not None else Metrics(model_name=config.model)
|
||||
)
|
||||
@@ -157,11 +159,22 @@ class LLM(RetryMixin, DebugMixin):
|
||||
)
|
||||
os.makedirs(self.config.log_completions_folder, exist_ok=True)
|
||||
|
||||
# call init_model_info to initialize config.max_output_tokens
|
||||
# which is used in partial function
|
||||
# Initialize core internals in a single pass
|
||||
self._initialize_core()
|
||||
|
||||
def _initialize_core(self) -> None:
|
||||
"""Initialize or re-initialize all components derived from config.
|
||||
|
||||
This centralizes initialization to avoid duplication between __init__ and reinit().
|
||||
"""
|
||||
# call init_model_info to initialize config.max_output_tokens used in partial
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('ignore')
|
||||
self.init_model_info()
|
||||
|
||||
# Recompute function-calling capability regardless of model_info cache
|
||||
self._compute_function_calling_active()
|
||||
|
||||
if self.vision_is_active():
|
||||
logger.debug('LLM: model has vision enabled')
|
||||
if self.is_caching_prompt_active():
|
||||
@@ -175,6 +188,17 @@ class LLM(RetryMixin, DebugMixin):
|
||||
else:
|
||||
self.tokenizer = None
|
||||
|
||||
# Initialize the completion function
|
||||
self._build_completion_function()
|
||||
# Build the completion wrapper with retry logic
|
||||
self._rebuild_completion_wrapper()
|
||||
|
||||
def _build_completion_function(self) -> None:
|
||||
"""Build the completion function based on current configuration.
|
||||
|
||||
This method creates the partial function that will be used for LLM completions.
|
||||
It can be called multiple times to rebuild the function when configuration changes.
|
||||
"""
|
||||
# set up the completion function
|
||||
kwargs: dict[str, Any] = {
|
||||
'temperature': self.config.temperature,
|
||||
@@ -251,6 +275,78 @@ class LLM(RetryMixin, DebugMixin):
|
||||
|
||||
self._completion_unwrapped = self._completion
|
||||
|
||||
def reinit(self, new_config: LLMConfig) -> None:
|
||||
"""Reinitialize the LLM with a new configuration.
|
||||
|
||||
This is a threadsafe operation that updates configuration and rebuilds
|
||||
the completion pipeline (completion function + retry wrapper). It also
|
||||
refreshes model info and tokenizer as needed.
|
||||
"""
|
||||
with self._lock:
|
||||
# Reset capability/cost flags so the new config gets a clean slate
|
||||
self.cost_metric_supported = True
|
||||
old_model = self.config.model
|
||||
old_tokenizer = self.config.custom_tokenizer
|
||||
|
||||
# Update the configuration (deep copy to avoid external mutation)
|
||||
self.config = copy.deepcopy(new_config)
|
||||
|
||||
# Update metrics model name if model changed and refresh model info
|
||||
if old_model != new_config.model:
|
||||
self.metrics.model_name = new_config.model
|
||||
logger.debug(
|
||||
f'LLM model changed from {old_model} to {new_config.model}'
|
||||
)
|
||||
# Reset model info to force re-initialization
|
||||
self._tried_model_info = False
|
||||
self.model_info = None
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('ignore')
|
||||
self.init_model_info()
|
||||
# Log new capabilities
|
||||
if self.vision_is_active():
|
||||
logger.debug('LLM: model has vision enabled')
|
||||
if self.is_caching_prompt_active():
|
||||
logger.debug('LLM: caching prompt enabled')
|
||||
if self.is_function_calling_active():
|
||||
logger.debug('LLM: model supports function calling')
|
||||
|
||||
# Update tokenizer if custom_tokenizer changed
|
||||
if old_tokenizer != new_config.custom_tokenizer:
|
||||
if new_config.custom_tokenizer is not None:
|
||||
self.tokenizer = create_pretrained_tokenizer(
|
||||
new_config.custom_tokenizer
|
||||
)
|
||||
logger.debug(
|
||||
f'LLM tokenizer updated to {new_config.custom_tokenizer}'
|
||||
)
|
||||
else:
|
||||
self.tokenizer = None
|
||||
logger.debug('LLM tokenizer reset to default')
|
||||
|
||||
# Handle log completions folder creation if needed
|
||||
if new_config.log_completions:
|
||||
if new_config.log_completions_folder is None:
|
||||
raise RuntimeError(
|
||||
'log_completions_folder is required when log_completions is enabled'
|
||||
)
|
||||
os.makedirs(new_config.log_completions_folder, exist_ok=True)
|
||||
|
||||
# Re-initialize core internals (model info, tokenizer, completion & wrapper)
|
||||
self._initialize_core()
|
||||
logger.debug('LLM reinitialized successfully')
|
||||
|
||||
# Backward-compat: keep update_config as an alias
|
||||
def update_config(self, new_config: LLMConfig) -> None:
|
||||
self.reinit(new_config)
|
||||
|
||||
def _rebuild_completion_wrapper(self) -> None:
|
||||
"""Rebuild the completion wrapper with retry decorator.
|
||||
|
||||
This method recreates the wrapper function that includes retry logic
|
||||
and other processing around the base completion function.
|
||||
"""
|
||||
|
||||
@self.retry_decorator(
|
||||
num_retries=self.config.num_retries,
|
||||
retry_exceptions=LLM_RETRY_EXCEPTIONS,
|
||||
@@ -553,14 +649,19 @@ class LLM(RetryMixin, DebugMixin):
|
||||
self.config.max_output_tokens = self.model_info['max_tokens']
|
||||
|
||||
# Initialize function calling capability
|
||||
# Check if model name is in our supported list
|
||||
self._compute_function_calling_active()
|
||||
|
||||
def _compute_function_calling_active(self) -> None:
|
||||
"""Compute and cache whether function calling is active for current config.
|
||||
|
||||
Respects user override via native_tool_calling. Otherwise bases decision on
|
||||
supported model names.
|
||||
"""
|
||||
model_name_supported = (
|
||||
self.config.model in FUNCTION_CALLING_SUPPORTED_MODELS
|
||||
or self.config.model.split('/')[-1] in FUNCTION_CALLING_SUPPORTED_MODELS
|
||||
or any(m in self.config.model for m in FUNCTION_CALLING_SUPPORTED_MODELS)
|
||||
)
|
||||
|
||||
# Handle native_tool_calling user-defined configuration
|
||||
if self.config.native_tool_calling is None:
|
||||
self._function_calling_active = model_name_supported
|
||||
else:
|
||||
|
||||
@@ -1104,7 +1104,219 @@ def test_azure_model_default_max_tokens():
|
||||
assert llm.config.max_output_tokens is None # Default value
|
||||
|
||||
|
||||
# Gemini Performance Optimization Tests
|
||||
def test_llm_update_config_basic(default_config):
|
||||
"""Test basic LLM configuration update functionality."""
|
||||
llm = LLM(default_config)
|
||||
|
||||
# Verify initial state
|
||||
assert llm.config.model == 'gpt-4o'
|
||||
assert llm.config.temperature == 0.0
|
||||
assert llm.metrics.model_name == 'gpt-4o'
|
||||
|
||||
# Create new config with different settings
|
||||
new_config = LLMConfig(
|
||||
model='claude-3-5-sonnet-20241022',
|
||||
api_key='new_test_key',
|
||||
temperature=0.7,
|
||||
max_output_tokens=2000,
|
||||
top_p=0.9,
|
||||
)
|
||||
|
||||
# Update the configuration
|
||||
llm.update_config(new_config)
|
||||
|
||||
# Verify the configuration was updated
|
||||
assert llm.config.model == 'claude-3-5-sonnet-20241022'
|
||||
assert llm.config.api_key.get_secret_value() == 'new_test_key'
|
||||
assert llm.config.temperature == 0.7
|
||||
assert llm.config.max_output_tokens == 2000
|
||||
assert llm.config.top_p == 0.9
|
||||
assert llm.metrics.model_name == 'claude-3-5-sonnet-20241022'
|
||||
|
||||
|
||||
def test_llm_update_config_model_change_resets_model_info(default_config):
|
||||
"""Test that changing model resets model info for re-initialization."""
|
||||
llm = LLM(default_config)
|
||||
|
||||
# Set some model info to verify it gets reset
|
||||
llm.model_info = {'test': 'info'}
|
||||
llm._tried_model_info = True
|
||||
|
||||
# Create new config with different model
|
||||
new_config = copy.deepcopy(default_config)
|
||||
new_config.model = 'claude-3-5-sonnet-20241022'
|
||||
|
||||
# Update the configuration
|
||||
llm.update_config(new_config)
|
||||
|
||||
# Verify model info was reset and metrics updated
|
||||
assert llm.metrics.model_name == 'claude-3-5-sonnet-20241022'
|
||||
# _tried_model_info should be reset to False to force re-initialization
|
||||
# (it will be set back to True after init_model_info() is called)
|
||||
|
||||
|
||||
def test_llm_update_config_same_model_preserves_model_info(default_config):
|
||||
"""Test that keeping the same model preserves model info."""
|
||||
llm = LLM(default_config)
|
||||
|
||||
# Create new config with same model but different other settings
|
||||
new_config = copy.deepcopy(default_config)
|
||||
new_config.temperature = 0.5
|
||||
new_config.max_output_tokens = 1500
|
||||
|
||||
# Update the configuration
|
||||
llm.update_config(new_config)
|
||||
|
||||
# Verify model info was preserved but other settings changed
|
||||
assert llm.config.temperature == 0.5
|
||||
assert llm.config.max_output_tokens == 1500
|
||||
assert llm.metrics.model_name == 'gpt-4o' # Same model
|
||||
|
||||
|
||||
def test_llm_update_config_custom_tokenizer_update(default_config):
|
||||
"""Test updating custom tokenizer configuration."""
|
||||
llm = LLM(default_config)
|
||||
|
||||
# Initially no custom tokenizer
|
||||
assert llm.config.custom_tokenizer is None
|
||||
assert llm.tokenizer is None
|
||||
|
||||
# Update config with custom tokenizer
|
||||
new_config = copy.deepcopy(default_config)
|
||||
new_config.custom_tokenizer = 'gpt2'
|
||||
|
||||
llm.update_config(new_config)
|
||||
|
||||
# Verify tokenizer was updated
|
||||
assert llm.config.custom_tokenizer == 'gpt2'
|
||||
assert llm.tokenizer is not None
|
||||
|
||||
# Update back to no custom tokenizer
|
||||
new_config.custom_tokenizer = None
|
||||
llm.update_config(new_config)
|
||||
|
||||
# Verify tokenizer was reset
|
||||
assert llm.config.custom_tokenizer is None
|
||||
assert llm.tokenizer is None
|
||||
|
||||
|
||||
def test_llm_update_config_log_completions_folder_creation(default_config):
|
||||
"""Test that log completions folder is created when needed."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
log_folder = Path(temp_dir) / 'test_completions'
|
||||
|
||||
llm = LLM(default_config)
|
||||
|
||||
# Update config to enable log completions
|
||||
new_config = copy.deepcopy(default_config)
|
||||
new_config.log_completions = True
|
||||
new_config.log_completions_folder = str(log_folder)
|
||||
|
||||
# Folder shouldn't exist yet
|
||||
assert not log_folder.exists()
|
||||
|
||||
# Update configuration
|
||||
llm.update_config(new_config)
|
||||
|
||||
# Verify folder was created
|
||||
assert log_folder.exists()
|
||||
assert log_folder.is_dir()
|
||||
|
||||
|
||||
def test_llm_update_config_log_completions_folder_required():
|
||||
"""Test that log_completions_folder is required when log_completions is True."""
|
||||
config = LLMConfig(model='gpt-4o', api_key='test_key')
|
||||
llm = LLM(config)
|
||||
|
||||
# Create config with log_completions=True but no folder
|
||||
new_config = copy.deepcopy(config)
|
||||
new_config.log_completions = True
|
||||
new_config.log_completions_folder = None
|
||||
|
||||
# Should raise RuntimeError
|
||||
with pytest.raises(RuntimeError, match='log_completions_folder is required'):
|
||||
llm.update_config(new_config)
|
||||
|
||||
|
||||
def test_llm_update_config_completion_function_rebuilt(default_config):
|
||||
"""Test that completion function is rebuilt after config update."""
|
||||
llm = LLM(default_config)
|
||||
|
||||
# Store reference to original completion function
|
||||
|
||||
# Update configuration
|
||||
new_config = copy.deepcopy(default_config)
|
||||
new_config.temperature = 0.8
|
||||
new_config.max_output_tokens = 1500
|
||||
|
||||
llm.update_config(new_config)
|
||||
|
||||
# Verify completion functions exist (they should be rebuilt)
|
||||
assert llm._completion is not None
|
||||
assert llm._completion_unwrapped is not None
|
||||
|
||||
# The functions should be different objects since they were rebuilt
|
||||
# (though this is implementation detail, the important thing is they work)
|
||||
assert callable(llm._completion)
|
||||
assert callable(llm._completion_unwrapped)
|
||||
|
||||
|
||||
def test_llm_update_config_preserves_metrics_and_retry_listener(default_config):
|
||||
"""Test that metrics and retry listener are preserved during config update."""
|
||||
# Create custom metrics and retry listener
|
||||
custom_metrics = Metrics(model_name='initial_model')
|
||||
retry_listener = MagicMock()
|
||||
|
||||
llm = LLM(default_config, metrics=custom_metrics, retry_listener=retry_listener)
|
||||
|
||||
# Verify initial state
|
||||
assert llm.metrics is custom_metrics
|
||||
assert llm.retry_listener is retry_listener
|
||||
|
||||
# Update configuration
|
||||
new_config = copy.deepcopy(default_config)
|
||||
new_config.model = 'claude-3-5-sonnet-20241022'
|
||||
|
||||
llm.update_config(new_config)
|
||||
|
||||
# Verify metrics and retry listener are preserved
|
||||
assert llm.metrics is custom_metrics
|
||||
assert llm.retry_listener is retry_listener
|
||||
# But metrics model name should be updated
|
||||
assert llm.metrics.model_name == 'claude-3-5-sonnet-20241022'
|
||||
|
||||
|
||||
def test_llm_update_config_deep_copy_independence():
|
||||
"""Test that config update uses deep copy and doesn't affect original config."""
|
||||
original_config = LLMConfig(
|
||||
model='gpt-4o',
|
||||
api_key='test_key',
|
||||
temperature=0.0,
|
||||
)
|
||||
|
||||
llm = LLM(original_config)
|
||||
|
||||
# Create new config
|
||||
new_config = LLMConfig(
|
||||
model='claude-3-5-sonnet-20241022',
|
||||
api_key='new_key',
|
||||
temperature=0.7,
|
||||
)
|
||||
|
||||
# Update LLM config
|
||||
llm.update_config(new_config)
|
||||
|
||||
# Modify the new_config after update
|
||||
new_config.temperature = 0.9
|
||||
new_config.model = 'different-model'
|
||||
|
||||
# LLM config should not be affected by external changes
|
||||
assert llm.config.temperature == 0.7
|
||||
assert llm.config.model == 'claude-3-5-sonnet-20241022'
|
||||
|
||||
# Original config should also be unchanged
|
||||
assert original_config.temperature == 0.0
|
||||
assert original_config.model == 'gpt-4o'
|
||||
|
||||
|
||||
def test_gemini_model_keeps_none_reasoning_effort():
|
||||
@@ -1302,3 +1514,142 @@ def test_gemini_performance_optimization_end_to_end(mock_completion):
|
||||
# Verify temperature and top_p were removed for reasoning models
|
||||
assert 'temperature' not in call_kwargs
|
||||
assert 'top_p' not in call_kwargs
|
||||
|
||||
|
||||
def test_llm_reinit_basic(default_config):
|
||||
"""Reinit should update config and metrics like update_config."""
|
||||
llm = LLM(default_config)
|
||||
assert llm.metrics.model_name == 'gpt-4o'
|
||||
|
||||
new_config = LLMConfig(
|
||||
model='claude-3-5-sonnet-20241022',
|
||||
api_key='new_test_key',
|
||||
temperature=0.7,
|
||||
max_output_tokens=1234,
|
||||
)
|
||||
|
||||
llm.reinit(new_config)
|
||||
|
||||
assert llm.config.model == 'claude-3-5-sonnet-20241022'
|
||||
assert llm.config.api_key.get_secret_value() == 'new_test_key'
|
||||
assert llm.config.temperature == 0.7
|
||||
assert llm.config.max_output_tokens == 1234
|
||||
assert llm.metrics.model_name == 'claude-3-5-sonnet-20241022'
|
||||
assert callable(llm._completion)
|
||||
assert callable(llm._completion_unwrapped)
|
||||
|
||||
|
||||
def test_llm_reinit_recomputes_function_calling_capability():
|
||||
"""Reinit should recompute function-calling capability even if model doesn't change."""
|
||||
base = LLMConfig(model='gpt-3.5-turbo', api_key='key')
|
||||
llm = LLM(base)
|
||||
# gpt-3.5-turbo is not in FUNCTION_CALLING_SUPPORTED_MODELS
|
||||
assert llm.is_function_calling_active() is False
|
||||
|
||||
# Turn on native tool calling; same model
|
||||
cfg_on = copy.deepcopy(base)
|
||||
cfg_on.native_tool_calling = True
|
||||
llm.reinit(cfg_on)
|
||||
assert llm.is_function_calling_active() is True
|
||||
|
||||
# Turn off explicitly
|
||||
cfg_off = copy.deepcopy(base)
|
||||
cfg_off.native_tool_calling = False
|
||||
llm.reinit(cfg_off)
|
||||
assert llm.is_function_calling_active() is False
|
||||
|
||||
|
||||
def test_llm_reinit_resets_cost_flag(default_config):
|
||||
"""Reinit should reset cost_metric_supported so a new model can report cost."""
|
||||
llm = LLM(default_config)
|
||||
llm.cost_metric_supported = False
|
||||
|
||||
# Same model, but reinit should reset the flag to True
|
||||
llm.reinit(copy.deepcopy(default_config))
|
||||
assert llm.cost_metric_supported is True
|
||||
|
||||
|
||||
def test_llm_reinit_thread_safety_with_inflight_completion(default_config):
|
||||
"""Concurrent reinit during an in-flight completion should not raise,
|
||||
and subsequent completions should use the new config.
|
||||
"""
|
||||
import threading
|
||||
import time
|
||||
from unittest.mock import patch
|
||||
|
||||
llm = LLM(default_config)
|
||||
|
||||
calls = []
|
||||
|
||||
def fake_completion(*args, **kwargs):
|
||||
# Simulate provider latency and record the model used
|
||||
calls.append(kwargs.get('model'))
|
||||
time.sleep(0.2)
|
||||
return {
|
||||
'id': 'test-1',
|
||||
'choices': [{'message': {'content': 'ok'}}],
|
||||
'usage': {'prompt_tokens': 1, 'completion_tokens': 1},
|
||||
}
|
||||
|
||||
with patch('openhands.llm.llm.litellm_completion') as mock_completion:
|
||||
mock_completion.side_effect = fake_completion
|
||||
|
||||
# Start an in-flight completion with the initial model
|
||||
t = threading.Thread(
|
||||
target=llm.completion, args=([{'role': 'user', 'content': 'hi'}],)
|
||||
)
|
||||
t.start()
|
||||
|
||||
# Reinit while the completion is in-flight
|
||||
time.sleep(0.05)
|
||||
new_cfg = copy.deepcopy(default_config)
|
||||
new_cfg.model = 'claude-3-5-sonnet-20241022'
|
||||
llm.reinit(new_cfg)
|
||||
|
||||
t.join()
|
||||
|
||||
# Subsequent completion should use the new config
|
||||
llm.completion(messages=[{'role': 'user', 'content': 'again'}])
|
||||
|
||||
# Ensure the latest call used the new model and metrics reflect it
|
||||
assert len(calls) >= 1
|
||||
assert calls[-1] == 'claude-3-5-sonnet-20241022'
|
||||
assert llm.metrics.model_name == 'claude-3-5-sonnet-20241022'
|
||||
|
||||
|
||||
@patch('openhands.llm.llm.litellm_completion')
|
||||
def test_llm_reinit_provider_mappings(mock_completion, default_config):
|
||||
"""Reinit should apply provider-specific mappings (openhands proxy, azure max_tokens)."""
|
||||
# Return a minimal, valid response
|
||||
mock_completion.return_value = {
|
||||
'id': 'call',
|
||||
'choices': [{'message': {'content': 'ok'}}],
|
||||
'usage': {'prompt_tokens': 1, 'completion_tokens': 1},
|
||||
}
|
||||
|
||||
llm = LLM(default_config)
|
||||
|
||||
# 1) OpenHands provider rewrite to litellm_proxy
|
||||
cfg_proxy = copy.deepcopy(default_config)
|
||||
cfg_proxy.model = 'openhands/qwen3-coder'
|
||||
llm.reinit(cfg_proxy)
|
||||
llm.completion(messages=[{'role': 'user', 'content': 'x'}])
|
||||
|
||||
model_arg = mock_completion.call_args.kwargs.get('model')
|
||||
base_url_arg = mock_completion.call_args.kwargs.get('base_url')
|
||||
assert model_arg == 'litellm_proxy/qwen3-coder'
|
||||
assert base_url_arg == 'https://llm-proxy.app.all-hands.dev/'
|
||||
|
||||
# 2) Azure mapping: max_completion_tokens -> max_tokens
|
||||
mock_completion.reset_mock()
|
||||
cfg_azure = copy.deepcopy(default_config)
|
||||
cfg_azure.model = 'azure/gpt-4o'
|
||||
cfg_azure.max_output_tokens = 777
|
||||
cfg_azure.api_version = '2024-06-01'
|
||||
llm.reinit(cfg_azure)
|
||||
llm.completion(messages=[{'role': 'user', 'content': 'y'}])
|
||||
|
||||
kwargs = mock_completion.call_args.kwargs
|
||||
assert kwargs.get('model') == 'azure/gpt-4o'
|
||||
assert 'max_tokens' in kwargs and kwargs['max_tokens'] == 777
|
||||
assert 'max_completion_tokens' not in kwargs
|
||||
|
||||
Reference in New Issue
Block a user