mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| c8d856cfa5 | |||
| db5c0c687c | |||
| 82d5b0388b | |||
| bf192688a5 | |||
| 477d9b17c0 | |||
| 6756427217 | |||
| 1b74920509 | |||
| c933b88f36 | |||
| e0e9d3d07c | |||
| 00f9ff08c7 | |||
| f37f8fb723 | |||
| 8628da0037 |
+106
-5
@@ -3,6 +3,7 @@ import os
|
|||||||
import time
|
import time
|
||||||
import warnings
|
import warnings
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
from threading import RLock
|
||||||
from typing import Any, Callable
|
from typing import Any, Callable
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
@@ -142,6 +143,7 @@ class LLM(RetryMixin, DebugMixin):
|
|||||||
metrics: The metrics to use.
|
metrics: The metrics to use.
|
||||||
"""
|
"""
|
||||||
self._tried_model_info = False
|
self._tried_model_info = False
|
||||||
|
self._lock = RLock()
|
||||||
self.metrics: Metrics = (
|
self.metrics: Metrics = (
|
||||||
metrics if metrics is not None else Metrics(model_name=config.model)
|
metrics if metrics is not None else Metrics(model_name=config.model)
|
||||||
)
|
)
|
||||||
@@ -157,11 +159,22 @@ class LLM(RetryMixin, DebugMixin):
|
|||||||
)
|
)
|
||||||
os.makedirs(self.config.log_completions_folder, exist_ok=True)
|
os.makedirs(self.config.log_completions_folder, exist_ok=True)
|
||||||
|
|
||||||
# call init_model_info to initialize config.max_output_tokens
|
# Initialize core internals in a single pass
|
||||||
# which is used in partial function
|
self._initialize_core()
|
||||||
|
|
||||||
|
def _initialize_core(self) -> None:
|
||||||
|
"""Initialize or re-initialize all components derived from config.
|
||||||
|
|
||||||
|
This centralizes initialization to avoid duplication between __init__ and reinit().
|
||||||
|
"""
|
||||||
|
# call init_model_info to initialize config.max_output_tokens used in partial
|
||||||
with warnings.catch_warnings():
|
with warnings.catch_warnings():
|
||||||
warnings.simplefilter('ignore')
|
warnings.simplefilter('ignore')
|
||||||
self.init_model_info()
|
self.init_model_info()
|
||||||
|
|
||||||
|
# Recompute function-calling capability regardless of model_info cache
|
||||||
|
self._compute_function_calling_active()
|
||||||
|
|
||||||
if self.vision_is_active():
|
if self.vision_is_active():
|
||||||
logger.debug('LLM: model has vision enabled')
|
logger.debug('LLM: model has vision enabled')
|
||||||
if self.is_caching_prompt_active():
|
if self.is_caching_prompt_active():
|
||||||
@@ -175,6 +188,17 @@ class LLM(RetryMixin, DebugMixin):
|
|||||||
else:
|
else:
|
||||||
self.tokenizer = None
|
self.tokenizer = None
|
||||||
|
|
||||||
|
# Initialize the completion function
|
||||||
|
self._build_completion_function()
|
||||||
|
# Build the completion wrapper with retry logic
|
||||||
|
self._rebuild_completion_wrapper()
|
||||||
|
|
||||||
|
def _build_completion_function(self) -> None:
|
||||||
|
"""Build the completion function based on current configuration.
|
||||||
|
|
||||||
|
This method creates the partial function that will be used for LLM completions.
|
||||||
|
It can be called multiple times to rebuild the function when configuration changes.
|
||||||
|
"""
|
||||||
# set up the completion function
|
# set up the completion function
|
||||||
kwargs: dict[str, Any] = {
|
kwargs: dict[str, Any] = {
|
||||||
'temperature': self.config.temperature,
|
'temperature': self.config.temperature,
|
||||||
@@ -251,6 +275,78 @@ class LLM(RetryMixin, DebugMixin):
|
|||||||
|
|
||||||
self._completion_unwrapped = self._completion
|
self._completion_unwrapped = self._completion
|
||||||
|
|
||||||
|
def reinit(self, new_config: LLMConfig) -> None:
|
||||||
|
"""Reinitialize the LLM with a new configuration.
|
||||||
|
|
||||||
|
This is a threadsafe operation that updates configuration and rebuilds
|
||||||
|
the completion pipeline (completion function + retry wrapper). It also
|
||||||
|
refreshes model info and tokenizer as needed.
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
# Reset capability/cost flags so the new config gets a clean slate
|
||||||
|
self.cost_metric_supported = True
|
||||||
|
old_model = self.config.model
|
||||||
|
old_tokenizer = self.config.custom_tokenizer
|
||||||
|
|
||||||
|
# Update the configuration (deep copy to avoid external mutation)
|
||||||
|
self.config = copy.deepcopy(new_config)
|
||||||
|
|
||||||
|
# Update metrics model name if model changed and refresh model info
|
||||||
|
if old_model != new_config.model:
|
||||||
|
self.metrics.model_name = new_config.model
|
||||||
|
logger.debug(
|
||||||
|
f'LLM model changed from {old_model} to {new_config.model}'
|
||||||
|
)
|
||||||
|
# Reset model info to force re-initialization
|
||||||
|
self._tried_model_info = False
|
||||||
|
self.model_info = None
|
||||||
|
with warnings.catch_warnings():
|
||||||
|
warnings.simplefilter('ignore')
|
||||||
|
self.init_model_info()
|
||||||
|
# Log new capabilities
|
||||||
|
if self.vision_is_active():
|
||||||
|
logger.debug('LLM: model has vision enabled')
|
||||||
|
if self.is_caching_prompt_active():
|
||||||
|
logger.debug('LLM: caching prompt enabled')
|
||||||
|
if self.is_function_calling_active():
|
||||||
|
logger.debug('LLM: model supports function calling')
|
||||||
|
|
||||||
|
# Update tokenizer if custom_tokenizer changed
|
||||||
|
if old_tokenizer != new_config.custom_tokenizer:
|
||||||
|
if new_config.custom_tokenizer is not None:
|
||||||
|
self.tokenizer = create_pretrained_tokenizer(
|
||||||
|
new_config.custom_tokenizer
|
||||||
|
)
|
||||||
|
logger.debug(
|
||||||
|
f'LLM tokenizer updated to {new_config.custom_tokenizer}'
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.tokenizer = None
|
||||||
|
logger.debug('LLM tokenizer reset to default')
|
||||||
|
|
||||||
|
# Handle log completions folder creation if needed
|
||||||
|
if new_config.log_completions:
|
||||||
|
if new_config.log_completions_folder is None:
|
||||||
|
raise RuntimeError(
|
||||||
|
'log_completions_folder is required when log_completions is enabled'
|
||||||
|
)
|
||||||
|
os.makedirs(new_config.log_completions_folder, exist_ok=True)
|
||||||
|
|
||||||
|
# Re-initialize core internals (model info, tokenizer, completion & wrapper)
|
||||||
|
self._initialize_core()
|
||||||
|
logger.debug('LLM reinitialized successfully')
|
||||||
|
|
||||||
|
# Backward-compat: keep update_config as an alias
|
||||||
|
def update_config(self, new_config: LLMConfig) -> None:
|
||||||
|
self.reinit(new_config)
|
||||||
|
|
||||||
|
def _rebuild_completion_wrapper(self) -> None:
|
||||||
|
"""Rebuild the completion wrapper with retry decorator.
|
||||||
|
|
||||||
|
This method recreates the wrapper function that includes retry logic
|
||||||
|
and other processing around the base completion function.
|
||||||
|
"""
|
||||||
|
|
||||||
@self.retry_decorator(
|
@self.retry_decorator(
|
||||||
num_retries=self.config.num_retries,
|
num_retries=self.config.num_retries,
|
||||||
retry_exceptions=LLM_RETRY_EXCEPTIONS,
|
retry_exceptions=LLM_RETRY_EXCEPTIONS,
|
||||||
@@ -553,14 +649,19 @@ class LLM(RetryMixin, DebugMixin):
|
|||||||
self.config.max_output_tokens = self.model_info['max_tokens']
|
self.config.max_output_tokens = self.model_info['max_tokens']
|
||||||
|
|
||||||
# Initialize function calling capability
|
# Initialize function calling capability
|
||||||
# Check if model name is in our supported list
|
self._compute_function_calling_active()
|
||||||
|
|
||||||
|
def _compute_function_calling_active(self) -> None:
|
||||||
|
"""Compute and cache whether function calling is active for current config.
|
||||||
|
|
||||||
|
Respects user override via native_tool_calling. Otherwise bases decision on
|
||||||
|
supported model names.
|
||||||
|
"""
|
||||||
model_name_supported = (
|
model_name_supported = (
|
||||||
self.config.model in FUNCTION_CALLING_SUPPORTED_MODELS
|
self.config.model in FUNCTION_CALLING_SUPPORTED_MODELS
|
||||||
or self.config.model.split('/')[-1] in FUNCTION_CALLING_SUPPORTED_MODELS
|
or self.config.model.split('/')[-1] in FUNCTION_CALLING_SUPPORTED_MODELS
|
||||||
or any(m in self.config.model for m in FUNCTION_CALLING_SUPPORTED_MODELS)
|
or any(m in self.config.model for m in FUNCTION_CALLING_SUPPORTED_MODELS)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Handle native_tool_calling user-defined configuration
|
|
||||||
if self.config.native_tool_calling is None:
|
if self.config.native_tool_calling is None:
|
||||||
self._function_calling_active = model_name_supported
|
self._function_calling_active = model_name_supported
|
||||||
else:
|
else:
|
||||||
|
|||||||
+352
-1
@@ -1104,7 +1104,219 @@ def test_azure_model_default_max_tokens():
|
|||||||
assert llm.config.max_output_tokens is None # Default value
|
assert llm.config.max_output_tokens is None # Default value
|
||||||
|
|
||||||
|
|
||||||
# Gemini Performance Optimization Tests
|
def test_llm_update_config_basic(default_config):
|
||||||
|
"""Test basic LLM configuration update functionality."""
|
||||||
|
llm = LLM(default_config)
|
||||||
|
|
||||||
|
# Verify initial state
|
||||||
|
assert llm.config.model == 'gpt-4o'
|
||||||
|
assert llm.config.temperature == 0.0
|
||||||
|
assert llm.metrics.model_name == 'gpt-4o'
|
||||||
|
|
||||||
|
# Create new config with different settings
|
||||||
|
new_config = LLMConfig(
|
||||||
|
model='claude-3-5-sonnet-20241022',
|
||||||
|
api_key='new_test_key',
|
||||||
|
temperature=0.7,
|
||||||
|
max_output_tokens=2000,
|
||||||
|
top_p=0.9,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update the configuration
|
||||||
|
llm.update_config(new_config)
|
||||||
|
|
||||||
|
# Verify the configuration was updated
|
||||||
|
assert llm.config.model == 'claude-3-5-sonnet-20241022'
|
||||||
|
assert llm.config.api_key.get_secret_value() == 'new_test_key'
|
||||||
|
assert llm.config.temperature == 0.7
|
||||||
|
assert llm.config.max_output_tokens == 2000
|
||||||
|
assert llm.config.top_p == 0.9
|
||||||
|
assert llm.metrics.model_name == 'claude-3-5-sonnet-20241022'
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_update_config_model_change_resets_model_info(default_config):
|
||||||
|
"""Test that changing model resets model info for re-initialization."""
|
||||||
|
llm = LLM(default_config)
|
||||||
|
|
||||||
|
# Set some model info to verify it gets reset
|
||||||
|
llm.model_info = {'test': 'info'}
|
||||||
|
llm._tried_model_info = True
|
||||||
|
|
||||||
|
# Create new config with different model
|
||||||
|
new_config = copy.deepcopy(default_config)
|
||||||
|
new_config.model = 'claude-3-5-sonnet-20241022'
|
||||||
|
|
||||||
|
# Update the configuration
|
||||||
|
llm.update_config(new_config)
|
||||||
|
|
||||||
|
# Verify model info was reset and metrics updated
|
||||||
|
assert llm.metrics.model_name == 'claude-3-5-sonnet-20241022'
|
||||||
|
# _tried_model_info should be reset to False to force re-initialization
|
||||||
|
# (it will be set back to True after init_model_info() is called)
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_update_config_same_model_preserves_model_info(default_config):
|
||||||
|
"""Test that keeping the same model preserves model info."""
|
||||||
|
llm = LLM(default_config)
|
||||||
|
|
||||||
|
# Create new config with same model but different other settings
|
||||||
|
new_config = copy.deepcopy(default_config)
|
||||||
|
new_config.temperature = 0.5
|
||||||
|
new_config.max_output_tokens = 1500
|
||||||
|
|
||||||
|
# Update the configuration
|
||||||
|
llm.update_config(new_config)
|
||||||
|
|
||||||
|
# Verify model info was preserved but other settings changed
|
||||||
|
assert llm.config.temperature == 0.5
|
||||||
|
assert llm.config.max_output_tokens == 1500
|
||||||
|
assert llm.metrics.model_name == 'gpt-4o' # Same model
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_update_config_custom_tokenizer_update(default_config):
|
||||||
|
"""Test updating custom tokenizer configuration."""
|
||||||
|
llm = LLM(default_config)
|
||||||
|
|
||||||
|
# Initially no custom tokenizer
|
||||||
|
assert llm.config.custom_tokenizer is None
|
||||||
|
assert llm.tokenizer is None
|
||||||
|
|
||||||
|
# Update config with custom tokenizer
|
||||||
|
new_config = copy.deepcopy(default_config)
|
||||||
|
new_config.custom_tokenizer = 'gpt2'
|
||||||
|
|
||||||
|
llm.update_config(new_config)
|
||||||
|
|
||||||
|
# Verify tokenizer was updated
|
||||||
|
assert llm.config.custom_tokenizer == 'gpt2'
|
||||||
|
assert llm.tokenizer is not None
|
||||||
|
|
||||||
|
# Update back to no custom tokenizer
|
||||||
|
new_config.custom_tokenizer = None
|
||||||
|
llm.update_config(new_config)
|
||||||
|
|
||||||
|
# Verify tokenizer was reset
|
||||||
|
assert llm.config.custom_tokenizer is None
|
||||||
|
assert llm.tokenizer is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_update_config_log_completions_folder_creation(default_config):
|
||||||
|
"""Test that log completions folder is created when needed."""
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
log_folder = Path(temp_dir) / 'test_completions'
|
||||||
|
|
||||||
|
llm = LLM(default_config)
|
||||||
|
|
||||||
|
# Update config to enable log completions
|
||||||
|
new_config = copy.deepcopy(default_config)
|
||||||
|
new_config.log_completions = True
|
||||||
|
new_config.log_completions_folder = str(log_folder)
|
||||||
|
|
||||||
|
# Folder shouldn't exist yet
|
||||||
|
assert not log_folder.exists()
|
||||||
|
|
||||||
|
# Update configuration
|
||||||
|
llm.update_config(new_config)
|
||||||
|
|
||||||
|
# Verify folder was created
|
||||||
|
assert log_folder.exists()
|
||||||
|
assert log_folder.is_dir()
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_update_config_log_completions_folder_required():
|
||||||
|
"""Test that log_completions_folder is required when log_completions is True."""
|
||||||
|
config = LLMConfig(model='gpt-4o', api_key='test_key')
|
||||||
|
llm = LLM(config)
|
||||||
|
|
||||||
|
# Create config with log_completions=True but no folder
|
||||||
|
new_config = copy.deepcopy(config)
|
||||||
|
new_config.log_completions = True
|
||||||
|
new_config.log_completions_folder = None
|
||||||
|
|
||||||
|
# Should raise RuntimeError
|
||||||
|
with pytest.raises(RuntimeError, match='log_completions_folder is required'):
|
||||||
|
llm.update_config(new_config)
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_update_config_completion_function_rebuilt(default_config):
|
||||||
|
"""Test that completion function is rebuilt after config update."""
|
||||||
|
llm = LLM(default_config)
|
||||||
|
|
||||||
|
# Store reference to original completion function
|
||||||
|
|
||||||
|
# Update configuration
|
||||||
|
new_config = copy.deepcopy(default_config)
|
||||||
|
new_config.temperature = 0.8
|
||||||
|
new_config.max_output_tokens = 1500
|
||||||
|
|
||||||
|
llm.update_config(new_config)
|
||||||
|
|
||||||
|
# Verify completion functions exist (they should be rebuilt)
|
||||||
|
assert llm._completion is not None
|
||||||
|
assert llm._completion_unwrapped is not None
|
||||||
|
|
||||||
|
# The functions should be different objects since they were rebuilt
|
||||||
|
# (though this is implementation detail, the important thing is they work)
|
||||||
|
assert callable(llm._completion)
|
||||||
|
assert callable(llm._completion_unwrapped)
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_update_config_preserves_metrics_and_retry_listener(default_config):
|
||||||
|
"""Test that metrics and retry listener are preserved during config update."""
|
||||||
|
# Create custom metrics and retry listener
|
||||||
|
custom_metrics = Metrics(model_name='initial_model')
|
||||||
|
retry_listener = MagicMock()
|
||||||
|
|
||||||
|
llm = LLM(default_config, metrics=custom_metrics, retry_listener=retry_listener)
|
||||||
|
|
||||||
|
# Verify initial state
|
||||||
|
assert llm.metrics is custom_metrics
|
||||||
|
assert llm.retry_listener is retry_listener
|
||||||
|
|
||||||
|
# Update configuration
|
||||||
|
new_config = copy.deepcopy(default_config)
|
||||||
|
new_config.model = 'claude-3-5-sonnet-20241022'
|
||||||
|
|
||||||
|
llm.update_config(new_config)
|
||||||
|
|
||||||
|
# Verify metrics and retry listener are preserved
|
||||||
|
assert llm.metrics is custom_metrics
|
||||||
|
assert llm.retry_listener is retry_listener
|
||||||
|
# But metrics model name should be updated
|
||||||
|
assert llm.metrics.model_name == 'claude-3-5-sonnet-20241022'
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_update_config_deep_copy_independence():
|
||||||
|
"""Test that config update uses deep copy and doesn't affect original config."""
|
||||||
|
original_config = LLMConfig(
|
||||||
|
model='gpt-4o',
|
||||||
|
api_key='test_key',
|
||||||
|
temperature=0.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
llm = LLM(original_config)
|
||||||
|
|
||||||
|
# Create new config
|
||||||
|
new_config = LLMConfig(
|
||||||
|
model='claude-3-5-sonnet-20241022',
|
||||||
|
api_key='new_key',
|
||||||
|
temperature=0.7,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update LLM config
|
||||||
|
llm.update_config(new_config)
|
||||||
|
|
||||||
|
# Modify the new_config after update
|
||||||
|
new_config.temperature = 0.9
|
||||||
|
new_config.model = 'different-model'
|
||||||
|
|
||||||
|
# LLM config should not be affected by external changes
|
||||||
|
assert llm.config.temperature == 0.7
|
||||||
|
assert llm.config.model == 'claude-3-5-sonnet-20241022'
|
||||||
|
|
||||||
|
# Original config should also be unchanged
|
||||||
|
assert original_config.temperature == 0.0
|
||||||
|
assert original_config.model == 'gpt-4o'
|
||||||
|
|
||||||
|
|
||||||
def test_gemini_model_keeps_none_reasoning_effort():
|
def test_gemini_model_keeps_none_reasoning_effort():
|
||||||
@@ -1302,3 +1514,142 @@ def test_gemini_performance_optimization_end_to_end(mock_completion):
|
|||||||
# Verify temperature and top_p were removed for reasoning models
|
# Verify temperature and top_p were removed for reasoning models
|
||||||
assert 'temperature' not in call_kwargs
|
assert 'temperature' not in call_kwargs
|
||||||
assert 'top_p' not in call_kwargs
|
assert 'top_p' not in call_kwargs
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_reinit_basic(default_config):
|
||||||
|
"""Reinit should update config and metrics like update_config."""
|
||||||
|
llm = LLM(default_config)
|
||||||
|
assert llm.metrics.model_name == 'gpt-4o'
|
||||||
|
|
||||||
|
new_config = LLMConfig(
|
||||||
|
model='claude-3-5-sonnet-20241022',
|
||||||
|
api_key='new_test_key',
|
||||||
|
temperature=0.7,
|
||||||
|
max_output_tokens=1234,
|
||||||
|
)
|
||||||
|
|
||||||
|
llm.reinit(new_config)
|
||||||
|
|
||||||
|
assert llm.config.model == 'claude-3-5-sonnet-20241022'
|
||||||
|
assert llm.config.api_key.get_secret_value() == 'new_test_key'
|
||||||
|
assert llm.config.temperature == 0.7
|
||||||
|
assert llm.config.max_output_tokens == 1234
|
||||||
|
assert llm.metrics.model_name == 'claude-3-5-sonnet-20241022'
|
||||||
|
assert callable(llm._completion)
|
||||||
|
assert callable(llm._completion_unwrapped)
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_reinit_recomputes_function_calling_capability():
|
||||||
|
"""Reinit should recompute function-calling capability even if model doesn't change."""
|
||||||
|
base = LLMConfig(model='gpt-3.5-turbo', api_key='key')
|
||||||
|
llm = LLM(base)
|
||||||
|
# gpt-3.5-turbo is not in FUNCTION_CALLING_SUPPORTED_MODELS
|
||||||
|
assert llm.is_function_calling_active() is False
|
||||||
|
|
||||||
|
# Turn on native tool calling; same model
|
||||||
|
cfg_on = copy.deepcopy(base)
|
||||||
|
cfg_on.native_tool_calling = True
|
||||||
|
llm.reinit(cfg_on)
|
||||||
|
assert llm.is_function_calling_active() is True
|
||||||
|
|
||||||
|
# Turn off explicitly
|
||||||
|
cfg_off = copy.deepcopy(base)
|
||||||
|
cfg_off.native_tool_calling = False
|
||||||
|
llm.reinit(cfg_off)
|
||||||
|
assert llm.is_function_calling_active() is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_reinit_resets_cost_flag(default_config):
|
||||||
|
"""Reinit should reset cost_metric_supported so a new model can report cost."""
|
||||||
|
llm = LLM(default_config)
|
||||||
|
llm.cost_metric_supported = False
|
||||||
|
|
||||||
|
# Same model, but reinit should reset the flag to True
|
||||||
|
llm.reinit(copy.deepcopy(default_config))
|
||||||
|
assert llm.cost_metric_supported is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm_reinit_thread_safety_with_inflight_completion(default_config):
|
||||||
|
"""Concurrent reinit during an in-flight completion should not raise,
|
||||||
|
and subsequent completions should use the new config.
|
||||||
|
"""
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
llm = LLM(default_config)
|
||||||
|
|
||||||
|
calls = []
|
||||||
|
|
||||||
|
def fake_completion(*args, **kwargs):
|
||||||
|
# Simulate provider latency and record the model used
|
||||||
|
calls.append(kwargs.get('model'))
|
||||||
|
time.sleep(0.2)
|
||||||
|
return {
|
||||||
|
'id': 'test-1',
|
||||||
|
'choices': [{'message': {'content': 'ok'}}],
|
||||||
|
'usage': {'prompt_tokens': 1, 'completion_tokens': 1},
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch('openhands.llm.llm.litellm_completion') as mock_completion:
|
||||||
|
mock_completion.side_effect = fake_completion
|
||||||
|
|
||||||
|
# Start an in-flight completion with the initial model
|
||||||
|
t = threading.Thread(
|
||||||
|
target=llm.completion, args=([{'role': 'user', 'content': 'hi'}],)
|
||||||
|
)
|
||||||
|
t.start()
|
||||||
|
|
||||||
|
# Reinit while the completion is in-flight
|
||||||
|
time.sleep(0.05)
|
||||||
|
new_cfg = copy.deepcopy(default_config)
|
||||||
|
new_cfg.model = 'claude-3-5-sonnet-20241022'
|
||||||
|
llm.reinit(new_cfg)
|
||||||
|
|
||||||
|
t.join()
|
||||||
|
|
||||||
|
# Subsequent completion should use the new config
|
||||||
|
llm.completion(messages=[{'role': 'user', 'content': 'again'}])
|
||||||
|
|
||||||
|
# Ensure the latest call used the new model and metrics reflect it
|
||||||
|
assert len(calls) >= 1
|
||||||
|
assert calls[-1] == 'claude-3-5-sonnet-20241022'
|
||||||
|
assert llm.metrics.model_name == 'claude-3-5-sonnet-20241022'
|
||||||
|
|
||||||
|
|
||||||
|
@patch('openhands.llm.llm.litellm_completion')
|
||||||
|
def test_llm_reinit_provider_mappings(mock_completion, default_config):
|
||||||
|
"""Reinit should apply provider-specific mappings (openhands proxy, azure max_tokens)."""
|
||||||
|
# Return a minimal, valid response
|
||||||
|
mock_completion.return_value = {
|
||||||
|
'id': 'call',
|
||||||
|
'choices': [{'message': {'content': 'ok'}}],
|
||||||
|
'usage': {'prompt_tokens': 1, 'completion_tokens': 1},
|
||||||
|
}
|
||||||
|
|
||||||
|
llm = LLM(default_config)
|
||||||
|
|
||||||
|
# 1) OpenHands provider rewrite to litellm_proxy
|
||||||
|
cfg_proxy = copy.deepcopy(default_config)
|
||||||
|
cfg_proxy.model = 'openhands/qwen3-coder'
|
||||||
|
llm.reinit(cfg_proxy)
|
||||||
|
llm.completion(messages=[{'role': 'user', 'content': 'x'}])
|
||||||
|
|
||||||
|
model_arg = mock_completion.call_args.kwargs.get('model')
|
||||||
|
base_url_arg = mock_completion.call_args.kwargs.get('base_url')
|
||||||
|
assert model_arg == 'litellm_proxy/qwen3-coder'
|
||||||
|
assert base_url_arg == 'https://llm-proxy.app.all-hands.dev/'
|
||||||
|
|
||||||
|
# 2) Azure mapping: max_completion_tokens -> max_tokens
|
||||||
|
mock_completion.reset_mock()
|
||||||
|
cfg_azure = copy.deepcopy(default_config)
|
||||||
|
cfg_azure.model = 'azure/gpt-4o'
|
||||||
|
cfg_azure.max_output_tokens = 777
|
||||||
|
cfg_azure.api_version = '2024-06-01'
|
||||||
|
llm.reinit(cfg_azure)
|
||||||
|
llm.completion(messages=[{'role': 'user', 'content': 'y'}])
|
||||||
|
|
||||||
|
kwargs = mock_completion.call_args.kwargs
|
||||||
|
assert kwargs.get('model') == 'azure/gpt-4o'
|
||||||
|
assert 'max_tokens' in kwargs and kwargs['max_tokens'] == 777
|
||||||
|
assert 'max_completion_tokens' not in kwargs
|
||||||
|
|||||||
Reference in New Issue
Block a user