[agent] Add LLM risk analyzer (#9349)

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Graham Neubig <neubig@gmail.com>
Co-authored-by: llamantino <213239228+llamantino@users.noreply.github.com>
Co-authored-by: mamoodi <mamoodiha@gmail.com>
Co-authored-by: Tim O'Farrell <tofarr@gmail.com>
Co-authored-by: Hiep Le <69354317+hieptl@users.noreply.github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Ryan H. Tran <descience.thh10@gmail.com>
Co-authored-by: Neeraj Panwar <49247372+npneeraj@users.noreply.github.com>
Co-authored-by: sp.wack <83104063+amanape@users.noreply.github.com>
Co-authored-by: Insop <1240382+insop@users.noreply.github.com>
Co-authored-by: test <test@test.com>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
Co-authored-by: Zhonghao Jiang <zhonghao.J@outlook.com>
Co-authored-by: Ray Myers <ray.myers@gmail.com>
This commit is contained in:
Xingyao Wang
2025-08-22 10:02:36 -04:00
committed by GitHub
parent 4507a25b85
commit ca424ec15d
53 changed files with 729 additions and 563 deletions

View File

@@ -5,7 +5,10 @@ import copy
import os
import time
import traceback
from typing import Callable
from typing import TYPE_CHECKING, Callable
if TYPE_CHECKING:
from openhands.security.analyzer import SecurityAnalyzer
from litellm.exceptions import ( # noqa
APIConnectionError,
@@ -49,11 +52,15 @@ from openhands.events import (
from openhands.events.action import (
Action,
ActionConfirmationStatus,
ActionSecurityRisk,
AgentDelegateAction,
AgentFinishAction,
AgentRejectAction,
BrowseInteractiveAction,
ChangeAgentStateAction,
CmdRunAction,
FileEditAction,
FileReadAction,
IPythonRunCellAction,
MessageAction,
NullAction,
@@ -123,6 +130,7 @@ class AgentController:
headless_mode: bool = True,
status_callback: Callable | None = None,
replay_events: list[Event] | None = None,
security_analyzer: 'SecurityAnalyzer | None' = None,
):
"""Initializes a new instance of the AgentController class.
@@ -185,9 +193,52 @@ class AgentController:
# replay-related
self._replay_manager = ReplayManager(replay_events)
# security analyzer for direct access
self.security_analyzer = security_analyzer
# Add the system message to the event stream
self._add_system_message()
async def _handle_security_analyzer(self, action: Action) -> None:
"""Handle security risk analysis for an action.
If a security analyzer is configured, use it to analyze the action.
If no security analyzer is configured, set the risk to HIGH (fail-safe approach).
Args:
action: The action to analyze for security risks.
"""
if self.security_analyzer:
try:
if (
hasattr(action, 'security_risk')
and action.security_risk is not None
):
logger.debug(
f'Original security risk for {action}: {action.security_risk})'
)
if hasattr(action, 'security_risk'):
action.security_risk = await self.security_analyzer.security_risk(
action
)
logger.debug(
f'[Security Analyzer: {self.security_analyzer.__class__}] Override security risk for action {action}: {action.security_risk}'
)
except Exception as e:
logger.warning(
f'Failed to analyze security risk for action {action}: {e}'
)
if hasattr(action, 'security_risk'):
action.security_risk = ActionSecurityRisk.UNKNOWN
else:
# When no security analyzer is configured, treat all actions as HIGH risk
# This is a fail-safe approach that ensures confirmation is required
logger.debug(
f'No security analyzer configured, setting HIGH risk for action: {action}'
)
if hasattr(action, 'security_risk'):
action.security_risk = ActionSecurityRisk.HIGH
def _add_system_message(self):
for event in self.event_stream.search_events(start_id=self.state.start_id):
if isinstance(event, MessageAction) and event.source == EventSource.USER:
@@ -695,6 +746,7 @@ class AgentController:
initial_state=state,
is_delegate=True,
headless_mode=self.headless_mode,
security_analyzer=self.security_analyzer,
)
def end_delegate(self) -> None:
@@ -862,11 +914,37 @@ class AgentController:
if action.runnable:
if self.state.confirmation_mode and (
type(action) is CmdRunAction or type(action) is IPythonRunCellAction
type(action) is CmdRunAction
or type(action) is IPythonRunCellAction
or type(action) is BrowseInteractiveAction
or type(action) is FileEditAction
or type(action) is FileReadAction
):
action.confirmation_state = (
ActionConfirmationStatus.AWAITING_CONFIRMATION
# Handle security risk analysis using the dedicated method
await self._handle_security_analyzer(action)
# Check if the action has a security_risk attribute set by the LLM or security analyzer
security_risk = getattr(
action, 'security_risk', ActionSecurityRisk.UNKNOWN
)
# If security_risk is HIGH, requires confirmation
# UNLESS it is CLI which will handle action risks it itself
if self.agent.config.cli_mode:
# TODO(refactor): this is not ideal to have CLI been an exception
# We should refactor agent controller to consider this in the future
# See issue: https://github.com/All-Hands-AI/OpenHands/issues/10464
action.confirmation_state = ( # type: ignore[union-attr]
ActionConfirmationStatus.AWAITING_CONFIRMATION
)
# Only HIGH security risk actions require confirmation
elif security_risk == ActionSecurityRisk.HIGH:
logger.debug(
f'[non-CLI mode] Detected HIGH security risk in action: {action}. Ask for confirmation'
)
action.confirmation_state = ( # type: ignore[union-attr]
ActionConfirmationStatus.AWAITING_CONFIRMATION
)
self._pending_action = action
if not isinstance(action, NullAction):