[agent] Add LLM risk analyzer (#9349)

Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Graham Neubig <neubig@gmail.com> Co-authored-by: llamantino <213239228+llamantino@users.noreply.github.com> Co-authored-by: mamoodi <mamoodiha@gmail.com> Co-authored-by: Tim O'Farrell <tofarr@gmail.com> Co-authored-by: Hiep Le <69354317+hieptl@users.noreply.github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Ryan H. Tran <descience.thh10@gmail.com> Co-authored-by: Neeraj Panwar <49247372+npneeraj@users.noreply.github.com> Co-authored-by: sp.wack <83104063+amanape@users.noreply.github.com> Co-authored-by: Insop <1240382+insop@users.noreply.github.com> Co-authored-by: test <test@test.com> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com> Co-authored-by: Zhonghao Jiang <zhonghao.J@outlook.com> Co-authored-by: Ray Myers <ray.myers@gmail.com>
2026-01-09 06:48:02 -05:00 · 2025-08-22 10:02:36 -04:00
parent 4507a25b85
commit ca424ec15d
53 changed files with 729 additions and 563 deletions
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -5,7 +5,10 @@ import copy
 import os
 import time
 import traceback
-from typing import Callable
+from typing import TYPE_CHECKING, Callable
+
+if TYPE_CHECKING:
+    from openhands.security.analyzer import SecurityAnalyzer

 from litellm.exceptions import (  # noqa
    APIConnectionError,
@@ -49,11 +52,15 @@ from openhands.events import (
 from openhands.events.action import (
    Action,
    ActionConfirmationStatus,
+    ActionSecurityRisk,
    AgentDelegateAction,
    AgentFinishAction,
    AgentRejectAction,
+    BrowseInteractiveAction,
    ChangeAgentStateAction,
    CmdRunAction,
+    FileEditAction,
+    FileReadAction,
    IPythonRunCellAction,
    MessageAction,
    NullAction,
@@ -123,6 +130,7 @@ class AgentController:
        headless_mode: bool = True,
        status_callback: Callable | None = None,
        replay_events: list[Event] | None = None,
+        security_analyzer: 'SecurityAnalyzer | None' = None,
    ):
        """Initializes a new instance of the AgentController class.

@@ -185,9 +193,52 @@ class AgentController:
        # replay-related
        self._replay_manager = ReplayManager(replay_events)

+        # security analyzer for direct access
+        self.security_analyzer = security_analyzer
+
        # Add the system message to the event stream
        self._add_system_message()

+    async def _handle_security_analyzer(self, action: Action) -> None:
+        """Handle security risk analysis for an action.
+
+        If a security analyzer is configured, use it to analyze the action.
+        If no security analyzer is configured, set the risk to HIGH (fail-safe approach).
+
+        Args:
+            action: The action to analyze for security risks.
+        """
+        if self.security_analyzer:
+            try:
+                if (
+                    hasattr(action, 'security_risk')
+                    and action.security_risk is not None
+                ):
+                    logger.debug(
+                        f'Original security risk for {action}: {action.security_risk})'
+                    )
+                if hasattr(action, 'security_risk'):
+                    action.security_risk = await self.security_analyzer.security_risk(
+                        action
+                    )
+                    logger.debug(
+                        f'[Security Analyzer: {self.security_analyzer.__class__}] Override security risk for action {action}: {action.security_risk}'
+                    )
+            except Exception as e:
+                logger.warning(
+                    f'Failed to analyze security risk for action {action}: {e}'
+                )
+                if hasattr(action, 'security_risk'):
+                    action.security_risk = ActionSecurityRisk.UNKNOWN
+        else:
+            # When no security analyzer is configured, treat all actions as HIGH risk
+            # This is a fail-safe approach that ensures confirmation is required
+            logger.debug(
+                f'No security analyzer configured, setting HIGH risk for action: {action}'
+            )
+            if hasattr(action, 'security_risk'):
+                action.security_risk = ActionSecurityRisk.HIGH
+
    def _add_system_message(self):
        for event in self.event_stream.search_events(start_id=self.state.start_id):
            if isinstance(event, MessageAction) and event.source == EventSource.USER:
@@ -695,6 +746,7 @@ class AgentController:
            initial_state=state,
            is_delegate=True,
            headless_mode=self.headless_mode,
+            security_analyzer=self.security_analyzer,
        )

    def end_delegate(self) -> None:
@@ -862,11 +914,37 @@ class AgentController:

        if action.runnable:
            if self.state.confirmation_mode and (
-                type(action) is CmdRunAction or type(action) is IPythonRunCellAction
+                type(action) is CmdRunAction
+                or type(action) is IPythonRunCellAction
+                or type(action) is BrowseInteractiveAction
+                or type(action) is FileEditAction
+                or type(action) is FileReadAction
            ):
-                action.confirmation_state = (
-                    ActionConfirmationStatus.AWAITING_CONFIRMATION
+                # Handle security risk analysis using the dedicated method
+                await self._handle_security_analyzer(action)
+
+                # Check if the action has a security_risk attribute set by the LLM or security analyzer
+                security_risk = getattr(
+                    action, 'security_risk', ActionSecurityRisk.UNKNOWN
                )
+
+                # If security_risk is HIGH, requires confirmation
+                # UNLESS it is CLI which will handle action risks it itself
+                if self.agent.config.cli_mode:
+                    # TODO(refactor): this is not ideal to have CLI been an exception
+                    # We should refactor agent controller to consider this in the future
+                    # See issue: https://github.com/All-Hands-AI/OpenHands/issues/10464
+                    action.confirmation_state = (  # type: ignore[union-attr]
+                        ActionConfirmationStatus.AWAITING_CONFIRMATION
+                    )
+                # Only HIGH security risk actions require confirmation
+                elif security_risk == ActionSecurityRisk.HIGH:
+                    logger.debug(
+                        f'[non-CLI mode] Detected HIGH security risk in action: {action}. Ask for confirmation'
+                    )
+                    action.confirmation_state = (  # type: ignore[union-attr]
+                        ActionConfirmationStatus.AWAITING_CONFIRMATION
+                    )
            self._pending_action = action

        if not isinstance(action, NullAction):