[Refactor]: Add LLMRegistry for llm services (#9589)

Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Graham Neubig <neubig@gmail.com> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2026-01-09 14:57:59 -05:00 · 2025-08-18 02:11:20 -04:00
parent 17b1a21296
commit 25d9cf2890
84 changed files with 2376 additions and 817 deletions
--- a/openhands/server/session/agent_session.py
+++ b/openhands/server/session/agent_session.py
@@ -21,6 +21,7 @@ from openhands.integrations.provider import (
    PROVIDER_TOKEN_TYPE,
    ProviderHandler,
 )
+from openhands.llm.llm_registry import LLMRegistry
 from openhands.mcp import add_mcp_tools_to_agent
 from openhands.memory.memory import Memory
 from openhands.microagent.microagent import BaseMicroagent
@@ -29,6 +30,7 @@ from openhands.runtime.base import Runtime
 from openhands.runtime.impl.remote.remote_runtime import RemoteRuntime
 from openhands.runtime.runtime_status import RuntimeStatus
 from openhands.security import SecurityAnalyzer, options
+from openhands.server.services.conversation_stats import ConversationStats
 from openhands.storage.data_models.user_secrets import UserSecrets
 from openhands.storage.files import FileStore
 from openhands.utils.async_utils import EXECUTOR, call_sync_from_async
@@ -48,6 +50,7 @@ class AgentSession:
    sid: str
    user_id: str | None
    event_stream: EventStream
+    llm_registry: LLMRegistry
    file_store: FileStore
    controller: AgentController | None = None
    runtime: Runtime | None = None
@@ -63,6 +66,8 @@ class AgentSession:
        self,
        sid: str,
        file_store: FileStore,
+        llm_registry: LLMRegistry,
+        convo_stats: ConversationStats,
        status_callback: Callable | None = None,
        user_id: str | None = None,
    ) -> None:
@@ -80,6 +85,8 @@ class AgentSession:
        self.logger = OpenHandsLoggerAdapter(
            extra={'session_id': sid, 'user_id': user_id}
        )
+        self.llm_registry = llm_registry
+        self.convo_stats = convo_stats

    async def start(
        self,
@@ -340,6 +347,7 @@ class AgentSession:
            self.runtime = runtime_cls(
                config=config,
                event_stream=self.event_stream,
+                llm_registry=self.llm_registry,
                sid=self.sid,
                plugins=agent.sandbox_plugins,
                status_callback=self._status_callback,
@@ -360,6 +368,7 @@ class AgentSession:
            self.runtime = runtime_cls(
                config=config,
                event_stream=self.event_stream,
+                llm_registry=self.llm_registry,
                sid=self.sid,
                plugins=agent.sandbox_plugins,
                status_callback=self._status_callback,
@@ -441,6 +450,7 @@ class AgentSession:
            user_id=self.user_id,
            file_store=self.file_store,
            event_stream=self.event_stream,
+            convo_stats=self.convo_stats,
            agent=agent,
            iteration_delta=int(max_iterations),
            budget_per_task_delta=max_budget_per_task,
@@ -490,6 +500,15 @@ class AgentSession:
                )
        return memory

+    def get_state(self) -> AgentState | None:
+        controller = self.controller
+        if controller:
+            return controller.state.agent_state
+        if time.time() > self._started_at + WAIT_TIME_BEFORE_CLOSE:
+            # If 5 minutes have elapsed and we still don't have a controller, something has gone wrong
+            return AgentState.ERROR
+        return None
+
    def _maybe_restore_state(self) -> State | None:
        """Helper method to handle state restore logic."""
        restored_state = None
@@ -510,14 +529,5 @@ class AgentSession:
                self.logger.debug('No events found, no state to restore')
        return restored_state

-    def get_state(self) -> AgentState | None:
-        controller = self.controller
-        if controller:
-            return controller.state.agent_state
-        if time.time() > self._started_at + WAIT_TIME_BEFORE_CLOSE:
-            # If 5 minutes have elapsed and we still don't have a controller, something has gone wrong
-            return AgentState.ERROR
-        return None
-
    def is_closed(self) -> bool:
        return self._closed
--- a/openhands/server/session/conversation.py
+++ b/openhands/server/session/conversation.py
@@ -2,6 +2,7 @@ import asyncio

 from openhands.core.config import OpenHandsConfig
 from openhands.events.stream import EventStream
+from openhands.llm.llm_registry import LLMRegistry
 from openhands.runtime import get_runtime_cls
 from openhands.runtime.base import Runtime
 from openhands.security import SecurityAnalyzer, options
@@ -45,6 +46,7 @@ class ServerConversation:
        else:
            runtime_cls = get_runtime_cls(self.config.runtime)
            runtime = runtime_cls(
+                llm_registry=LLMRegistry(self.config),
                config=config,
                event_stream=self.event_stream,
                sid=self.sid,
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -1,6 +1,5 @@
 import asyncio
 import time
-from copy import deepcopy
 from logging import LoggerAdapter

 import socketio
@@ -28,9 +27,10 @@ from openhands.events.observation.agent import RecallObservation
 from openhands.events.observation.error import ErrorObservation
 from openhands.events.serialization import event_from_dict, event_to_dict
 from openhands.events.stream import EventStreamSubscriber
-from openhands.llm.llm import LLM
+from openhands.llm.llm_registry import LLMRegistry
 from openhands.runtime.runtime_status import RuntimeStatus
 from openhands.server.constants import ROOM_KEY
+from openhands.server.services.conversation_stats import ConversationStats
 from openhands.server.session.agent_session import AgentSession
 from openhands.server.session.conversation_init_data import ConversationInitData
 from openhands.storage.data_models.settings import Settings
@@ -45,6 +45,7 @@ class Session:
    agent_session: AgentSession
    loop: asyncio.AbstractEventLoop
    config: OpenHandsConfig
+    llm_registry: LLMRegistry
    file_store: FileStore
    user_id: str | None
    logger: LoggerAdapter
@@ -53,6 +54,8 @@ class Session:
        self,
        sid: str,
        config: OpenHandsConfig,
+        llm_registry: LLMRegistry,
+        convo_stats: ConversationStats,
        file_store: FileStore,
        sio: socketio.AsyncServer | None,
        user_id: str | None = None,
@@ -62,17 +65,21 @@ class Session:
        self.last_active_ts = int(time.time())
        self.file_store = file_store
        self.logger = OpenHandsLoggerAdapter(extra={'session_id': sid})
+        self.llm_registry = llm_registry
+        self.convo_stats = convo_stats
        self.agent_session = AgentSession(
            sid,
            file_store,
+            llm_registry=self.llm_registry,
+            convo_stats=convo_stats,
            status_callback=self.queue_status_message,
            user_id=user_id,
        )
        self.agent_session.event_stream.subscribe(
            EventStreamSubscriber.SERVER, self.on_event, self.sid
        )
-        # Copying this means that when we update variables they are not applied to the shared global configuration!
-        self.config = deepcopy(config)
+        self.config = config
+
        # Lazy import to avoid circular dependency
        from openhands.experiments.experiment_manager import ExperimentManagerImpl

@@ -140,13 +147,6 @@ class Session:
            else self.config.max_budget_per_task
        )

-        # This is a shallow copy of the default LLM config, so changes here will
-        # persist if we retrieve the default LLM config again when constructing
-        # the agent
-        default_llm_config = self.config.get_llm_config()
-        default_llm_config.model = settings.llm_model or ''
-        default_llm_config.api_key = settings.llm_api_key
-        default_llm_config.base_url = settings.llm_base_url
        self.config.search_api_key = settings.search_api_key
        if settings.sandbox_api_key:
            self.config.sandbox.api_key = settings.sandbox_api_key.get_secret_value()
@@ -181,10 +181,9 @@ class Session:
        )

        # TODO: override other LLM config & agent config groups (#2075)
-
-        llm = self._create_llm(agent_cls)
        agent_config = self.config.get_agent_config(agent_cls)
-
+        agent_name = agent_cls if agent_cls is not None else 'agent'
+        llm_config = self.config.get_llm_config_from_agent(agent_name)
        if settings.enable_default_condenser:
            # Default condenser chains three condensers together:
            # 1. a conversation window condenser that handles explicit
@@ -200,7 +199,7 @@ class Session:
                    ConversationWindowCondenserConfig(),
                    BrowserOutputCondenserConfig(attention_window=2),
                    LLMSummarizingCondenserConfig(
-                        llm_config=llm.config, keep_first=4, max_size=120
+                        llm_config=llm_config, keep_first=4, max_size=120
                    ),
                ]
            )
@@ -208,12 +207,14 @@ class Session:
            self.logger.info(
                f'Enabling pipeline condenser with:'
                f' browser_output_masking(attention_window=2), '
-                f' llm(model="{llm.config.model}", '
-                f' base_url="{llm.config.base_url}", '
+                f' llm(model="{llm_config.model}", '
+                f' base_url="{llm_config.base_url}", '
                f' keep_first=4, max_size=80)'
            )
            agent_config.condenser = default_condenser_config
-        agent = Agent.get_cls(agent_cls)(llm, agent_config)
+        agent = Agent.get_cls(agent_cls)(agent_config, self.llm_registry)
+
+        self.llm_registry.retry_listner = self._notify_on_llm_retry

        git_provider_tokens = None
        selected_repository = None
@@ -269,14 +270,6 @@ class Session:
            )
            return

-    def _create_llm(self, agent_cls: str | None) -> LLM:
-        """Initialize LLM, extracted for testing."""
-        agent_name = agent_cls if agent_cls is not None else 'agent'
-        return LLM(
-            config=self.config.get_llm_config_from_agent(agent_name),
-            retry_listener=self._notify_on_llm_retry,
-        )
-
    def _notify_on_llm_retry(self, retries: int, max: int) -> None:
        self.queue_status_message(
            'info', RuntimeStatus.LLM_RETRY, f'Retrying LLM request, {retries} / {max}'