[Refactor]: Add LLMRegistry for llm services (#9589)

Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Graham Neubig <neubig@gmail.com>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
This commit is contained in:
Rohit Malhotra
2025-08-18 02:11:20 -04:00
committed by GitHub
parent 17b1a21296
commit 25d9cf2890
84 changed files with 2376 additions and 817 deletions

View File

@@ -21,6 +21,7 @@ from openhands.integrations.provider import (
PROVIDER_TOKEN_TYPE,
ProviderHandler,
)
from openhands.llm.llm_registry import LLMRegistry
from openhands.mcp import add_mcp_tools_to_agent
from openhands.memory.memory import Memory
from openhands.microagent.microagent import BaseMicroagent
@@ -29,6 +30,7 @@ from openhands.runtime.base import Runtime
from openhands.runtime.impl.remote.remote_runtime import RemoteRuntime
from openhands.runtime.runtime_status import RuntimeStatus
from openhands.security import SecurityAnalyzer, options
from openhands.server.services.conversation_stats import ConversationStats
from openhands.storage.data_models.user_secrets import UserSecrets
from openhands.storage.files import FileStore
from openhands.utils.async_utils import EXECUTOR, call_sync_from_async
@@ -48,6 +50,7 @@ class AgentSession:
sid: str
user_id: str | None
event_stream: EventStream
llm_registry: LLMRegistry
file_store: FileStore
controller: AgentController | None = None
runtime: Runtime | None = None
@@ -63,6 +66,8 @@ class AgentSession:
self,
sid: str,
file_store: FileStore,
llm_registry: LLMRegistry,
convo_stats: ConversationStats,
status_callback: Callable | None = None,
user_id: str | None = None,
) -> None:
@@ -80,6 +85,8 @@ class AgentSession:
self.logger = OpenHandsLoggerAdapter(
extra={'session_id': sid, 'user_id': user_id}
)
self.llm_registry = llm_registry
self.convo_stats = convo_stats
async def start(
self,
@@ -340,6 +347,7 @@ class AgentSession:
self.runtime = runtime_cls(
config=config,
event_stream=self.event_stream,
llm_registry=self.llm_registry,
sid=self.sid,
plugins=agent.sandbox_plugins,
status_callback=self._status_callback,
@@ -360,6 +368,7 @@ class AgentSession:
self.runtime = runtime_cls(
config=config,
event_stream=self.event_stream,
llm_registry=self.llm_registry,
sid=self.sid,
plugins=agent.sandbox_plugins,
status_callback=self._status_callback,
@@ -441,6 +450,7 @@ class AgentSession:
user_id=self.user_id,
file_store=self.file_store,
event_stream=self.event_stream,
convo_stats=self.convo_stats,
agent=agent,
iteration_delta=int(max_iterations),
budget_per_task_delta=max_budget_per_task,
@@ -490,6 +500,15 @@ class AgentSession:
)
return memory
def get_state(self) -> AgentState | None:
controller = self.controller
if controller:
return controller.state.agent_state
if time.time() > self._started_at + WAIT_TIME_BEFORE_CLOSE:
# If 5 minutes have elapsed and we still don't have a controller, something has gone wrong
return AgentState.ERROR
return None
def _maybe_restore_state(self) -> State | None:
"""Helper method to handle state restore logic."""
restored_state = None
@@ -510,14 +529,5 @@ class AgentSession:
self.logger.debug('No events found, no state to restore')
return restored_state
def get_state(self) -> AgentState | None:
controller = self.controller
if controller:
return controller.state.agent_state
if time.time() > self._started_at + WAIT_TIME_BEFORE_CLOSE:
# If 5 minutes have elapsed and we still don't have a controller, something has gone wrong
return AgentState.ERROR
return None
def is_closed(self) -> bool:
return self._closed

View File

@@ -2,6 +2,7 @@ import asyncio
from openhands.core.config import OpenHandsConfig
from openhands.events.stream import EventStream
from openhands.llm.llm_registry import LLMRegistry
from openhands.runtime import get_runtime_cls
from openhands.runtime.base import Runtime
from openhands.security import SecurityAnalyzer, options
@@ -45,6 +46,7 @@ class ServerConversation:
else:
runtime_cls = get_runtime_cls(self.config.runtime)
runtime = runtime_cls(
llm_registry=LLMRegistry(self.config),
config=config,
event_stream=self.event_stream,
sid=self.sid,

View File

@@ -1,6 +1,5 @@
import asyncio
import time
from copy import deepcopy
from logging import LoggerAdapter
import socketio
@@ -28,9 +27,10 @@ from openhands.events.observation.agent import RecallObservation
from openhands.events.observation.error import ErrorObservation
from openhands.events.serialization import event_from_dict, event_to_dict
from openhands.events.stream import EventStreamSubscriber
from openhands.llm.llm import LLM
from openhands.llm.llm_registry import LLMRegistry
from openhands.runtime.runtime_status import RuntimeStatus
from openhands.server.constants import ROOM_KEY
from openhands.server.services.conversation_stats import ConversationStats
from openhands.server.session.agent_session import AgentSession
from openhands.server.session.conversation_init_data import ConversationInitData
from openhands.storage.data_models.settings import Settings
@@ -45,6 +45,7 @@ class Session:
agent_session: AgentSession
loop: asyncio.AbstractEventLoop
config: OpenHandsConfig
llm_registry: LLMRegistry
file_store: FileStore
user_id: str | None
logger: LoggerAdapter
@@ -53,6 +54,8 @@ class Session:
self,
sid: str,
config: OpenHandsConfig,
llm_registry: LLMRegistry,
convo_stats: ConversationStats,
file_store: FileStore,
sio: socketio.AsyncServer | None,
user_id: str | None = None,
@@ -62,17 +65,21 @@ class Session:
self.last_active_ts = int(time.time())
self.file_store = file_store
self.logger = OpenHandsLoggerAdapter(extra={'session_id': sid})
self.llm_registry = llm_registry
self.convo_stats = convo_stats
self.agent_session = AgentSession(
sid,
file_store,
llm_registry=self.llm_registry,
convo_stats=convo_stats,
status_callback=self.queue_status_message,
user_id=user_id,
)
self.agent_session.event_stream.subscribe(
EventStreamSubscriber.SERVER, self.on_event, self.sid
)
# Copying this means that when we update variables they are not applied to the shared global configuration!
self.config = deepcopy(config)
self.config = config
# Lazy import to avoid circular dependency
from openhands.experiments.experiment_manager import ExperimentManagerImpl
@@ -140,13 +147,6 @@ class Session:
else self.config.max_budget_per_task
)
# This is a shallow copy of the default LLM config, so changes here will
# persist if we retrieve the default LLM config again when constructing
# the agent
default_llm_config = self.config.get_llm_config()
default_llm_config.model = settings.llm_model or ''
default_llm_config.api_key = settings.llm_api_key
default_llm_config.base_url = settings.llm_base_url
self.config.search_api_key = settings.search_api_key
if settings.sandbox_api_key:
self.config.sandbox.api_key = settings.sandbox_api_key.get_secret_value()
@@ -181,10 +181,9 @@ class Session:
)
# TODO: override other LLM config & agent config groups (#2075)
llm = self._create_llm(agent_cls)
agent_config = self.config.get_agent_config(agent_cls)
agent_name = agent_cls if agent_cls is not None else 'agent'
llm_config = self.config.get_llm_config_from_agent(agent_name)
if settings.enable_default_condenser:
# Default condenser chains three condensers together:
# 1. a conversation window condenser that handles explicit
@@ -200,7 +199,7 @@ class Session:
ConversationWindowCondenserConfig(),
BrowserOutputCondenserConfig(attention_window=2),
LLMSummarizingCondenserConfig(
llm_config=llm.config, keep_first=4, max_size=120
llm_config=llm_config, keep_first=4, max_size=120
),
]
)
@@ -208,12 +207,14 @@ class Session:
self.logger.info(
f'Enabling pipeline condenser with:'
f' browser_output_masking(attention_window=2), '
f' llm(model="{llm.config.model}", '
f' base_url="{llm.config.base_url}", '
f' llm(model="{llm_config.model}", '
f' base_url="{llm_config.base_url}", '
f' keep_first=4, max_size=80)'
)
agent_config.condenser = default_condenser_config
agent = Agent.get_cls(agent_cls)(llm, agent_config)
agent = Agent.get_cls(agent_cls)(agent_config, self.llm_registry)
self.llm_registry.retry_listner = self._notify_on_llm_retry
git_provider_tokens = None
selected_repository = None
@@ -269,14 +270,6 @@ class Session:
)
return
def _create_llm(self, agent_cls: str | None) -> LLM:
"""Initialize LLM, extracted for testing."""
agent_name = agent_cls if agent_cls is not None else 'agent'
return LLM(
config=self.config.get_llm_config_from_agent(agent_name),
retry_listener=self._notify_on_llm_retry,
)
def _notify_on_llm_retry(self, retries: int, max: int) -> None:
self.queue_status_message(
'info', RuntimeStatus.LLM_RETRY, f'Retrying LLM request, {retries} / {max}'