[agent] Add LLM risk analyzer (#9349)

Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Graham Neubig <neubig@gmail.com> Co-authored-by: llamantino <213239228+llamantino@users.noreply.github.com> Co-authored-by: mamoodi <mamoodiha@gmail.com> Co-authored-by: Tim O'Farrell <tofarr@gmail.com> Co-authored-by: Hiep Le <69354317+hieptl@users.noreply.github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Ryan H. Tran <descience.thh10@gmail.com> Co-authored-by: Neeraj Panwar <49247372+npneeraj@users.noreply.github.com> Co-authored-by: sp.wack <83104063+amanape@users.noreply.github.com> Co-authored-by: Insop <1240382+insop@users.noreply.github.com> Co-authored-by: test <test@test.com> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com> Co-authored-by: Zhonghao Jiang <zhonghao.J@outlook.com> Co-authored-by: Ray Myers <ray.myers@gmail.com>
2026-01-10 07:18:10 -05:00 · 2025-08-22 10:02:36 -04:00
parent 4507a25b85
commit ca424ec15d
53 changed files with 729 additions and 563 deletions
--- a/openhands/agenthub/codeact_agent/function_calling.py
+++ b/openhands/agenthub/codeact_agent/function_calling.py
@@ -19,6 +19,7 @@ from openhands.agenthub.codeact_agent.tools import (
    create_cmd_run_tool,
    create_str_replace_editor_tool,
 )
+from openhands.agenthub.codeact_agent.tools.security_utils import RISK_LEVELS
 from openhands.core.exceptions import (
    FunctionCallNotExistsError,
    FunctionCallValidationError,
@@ -26,6 +27,7 @@ from openhands.core.exceptions import (
 from openhands.core.logger import openhands_logger as logger
 from openhands.events.action import (
    Action,
+    ActionSecurityRisk,
    AgentDelegateAction,
    AgentFinishAction,
    AgentThinkAction,
@@ -54,6 +56,20 @@ def combine_thought(action: Action, thought: str) -> Action:
    return action


+def set_security_risk(action: Action, arguments: dict) -> None:
+    """Set the security risk level for the action."""
+
+    # Set security_risk attribute if provided
+    if 'security_risk' in arguments:
+        if arguments['security_risk'] in RISK_LEVELS:
+            if hasattr(action, 'security_risk'):
+                action.security_risk = getattr(
+                    ActionSecurityRisk, arguments['security_risk']
+                )
+        else:
+            logger.warning(f'Invalid security_risk value: {arguments["security_risk"]}')
+
+
 def response_to_actions(
    response: ModelResponse, mcp_tool_names: list[str] | None = None
 ) -> list[Action]:
@@ -103,6 +119,7 @@ def response_to_actions(
                        raise FunctionCallValidationError(
                            f"Invalid float passed to 'timeout' argument: {arguments['timeout']}"
                        ) from e
+                set_security_risk(action, arguments)

            # ================================================
            # IPythonTool (Jupyter)
@@ -113,6 +130,11 @@ def response_to_actions(
                        f'Missing required argument "code" in tool call {tool_call.function.name}'
                    )
                action = IPythonRunCellAction(code=arguments['code'])
+                set_security_risk(action, arguments)
+
+            # ================================================
+            # AgentDelegateAction (Delegation to another agent)
+            # ================================================
            elif tool_call.function.name == 'delegate_to_browsing_agent':
                action = AgentDelegateAction(
                    agent='BrowsingAgent',
@@ -178,7 +200,7 @@ def response_to_actions(
                        other_kwargs.pop('view_range')

                    # Filter out unexpected arguments
-                    valid_kwargs = {}
+                    valid_kwargs_for_editor = {}
                    # Get valid parameters from the str_replace_editor tool definition
                    str_replace_editor_tool = create_str_replace_editor_tool()
                    valid_params = set(
@@ -186,9 +208,12 @@ def response_to_actions(
                            'properties'
                        ].keys()
                    )
+
                    for key, value in other_kwargs.items():
                        if key in valid_params:
-                            valid_kwargs[key] = value
+                            # security_risk is valid but should NOT be part of editor kwargs
+                            if key != 'security_risk':
+                                valid_kwargs_for_editor[key] = value
                        else:
                            raise FunctionCallValidationError(
                                f'Unexpected argument {key} in tool call {tool_call.function.name}. Allowed arguments are: {valid_params}'
@@ -198,8 +223,10 @@ def response_to_actions(
                        path=path,
                        command=command,
                        impl_source=FileEditSource.OH_ACI,
-                        **valid_kwargs,
+                        **valid_kwargs_for_editor,
                    )
+
+                set_security_risk(action, arguments)
            # ================================================
            # AgentThinkAction
            # ================================================
@@ -221,6 +248,7 @@ def response_to_actions(
                        f'Missing required argument "code" in tool call {tool_call.function.name}'
                    )
                action = BrowseInteractiveAction(browser_actions=arguments['code'])
+                set_security_risk(action, arguments)

            # ================================================
            # TaskTrackingAction