from unittest.mock import ANY, AsyncMock, patch import pytest from litellm.exceptions import ( RateLimitError, ) from openhands.core.config.llm_config import LLMConfig from openhands.core.config.openhands_config import OpenHandsConfig from openhands.server.session.session import Session from openhands.storage.memory import InMemoryFileStore @pytest.fixture def mock_status_callback(): return AsyncMock() @pytest.fixture def mock_sio(): return AsyncMock() @pytest.fixture def default_llm_config(): return LLMConfig( model='gpt-4o', api_key='test_key', num_retries=2, retry_min_wait=1, retry_max_wait=2, ) @pytest.mark.asyncio @patch('openhands.llm.llm.litellm_completion') async def test_notify_on_llm_retry( mock_litellm_completion, mock_sio, default_llm_config ): config = OpenHandsConfig() config.set_llm_config(default_llm_config) session = Session( sid='..sid..', file_store=InMemoryFileStore({}), config=config, sio=mock_sio, user_id='..uid..', ) session.queue_status_message = AsyncMock() with patch('time.sleep') as _mock_sleep: mock_litellm_completion.side_effect = [ RateLimitError( 'Rate limit exceeded', llm_provider='test_provider', model='test_model' ), {'choices': [{'message': {'content': 'Retry successful'}}]}, ] llm = session._create_llm('..cls..') llm.completion( messages=[{'role': 'user', 'content': 'Hello!'}], stream=False, ) assert mock_litellm_completion.call_count == 2 session.queue_status_message.assert_called_once_with( 'info', 'STATUS$LLM_RETRY', ANY ) await session.close()