CLI(V1): Fix confirmation mode breaking on weaker models (#11274)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Rohit Malhotra
2025-10-09 13:31:09 -04:00
committed by GitHub
parent 4285c49edd
commit 9fe4e9715a
11 changed files with 249 additions and 72 deletions

View File

@@ -8,7 +8,6 @@ import sys
from datetime import datetime
from openhands.sdk import (
BaseConversation,
Message,
TextContent,
)
@@ -17,7 +16,7 @@ from prompt_toolkit import print_formatted_text
from prompt_toolkit.formatted_text import HTML
from openhands_cli.runner import ConversationRunner
from openhands_cli.setup import MissingAgentSpec, setup_conversation
from openhands_cli.setup import MissingAgentSpec, setup_conversation, start_fresh_conversation
from openhands_cli.tui.settings.mcp_screen import MCPScreen
from openhands_cli.tui.settings.settings_screen import SettingsScreen
from openhands_cli.tui.status import display_status
@@ -29,32 +28,6 @@ from openhands_cli.user_actions import UserConfirmation, exit_session_confirmati
from openhands_cli.user_actions.utils import get_session_prompter
def _start_fresh_conversation(resume_conversation_id: str | None = None) -> BaseConversation:
"""Start a fresh conversation by creating a new conversation instance.
Handles the complete conversation setup process including settings screen
if agent configuration is missing.
Args:
resume_conversation_id: Optional conversation ID to resume
Returns:
BaseConversation: A new conversation instance
"""
conversation = None
settings_screen = SettingsScreen()
try:
conversation = setup_conversation(resume_conversation_id)
return conversation
except MissingAgentSpec:
# For first-time users, show the full settings flow with choice between basic/advanced
settings_screen.configure_settings(first_time=True)
# Try once again after settings setup attempt
return setup_conversation(resume_conversation_id)
def _restore_tty() -> None:
"""
Ensure terminal modes are reset in case prompt_toolkit cleanup didn't run.
@@ -92,7 +65,7 @@ def run_cli_entry(resume_conversation_id: str | None = None) -> None:
"""
try:
conversation = _start_fresh_conversation(resume_conversation_id)
conversation = start_fresh_conversation(resume_conversation_id)
except MissingAgentSpec:
print_formatted_text(HTML('\n<yellow>Setup is required to use OpenHands CLI.</yellow>'))
print_formatted_text(HTML('\n<yellow>Goodbye! 👋</yellow>'))
@@ -152,7 +125,7 @@ def run_cli_entry(resume_conversation_id: str | None = None) -> None:
elif command == '/new':
try:
# Start a fresh conversation (no resume ID = new conversation)
conversation = _start_fresh_conversation()
conversation = setup_conversation()
runner = ConversationRunner(conversation)
display_welcome(conversation.id, resume=False)
print_formatted_text(
@@ -176,7 +149,7 @@ def run_cli_entry(resume_conversation_id: str | None = None) -> None:
elif command == '/confirm':
runner.toggle_confirmation_mode()
new_status = (
'enabled' if runner.is_confirmation_mode_enabled else 'disabled'
'enabled' if runner.is_confirmation_mode_active else 'disabled'
)
print_formatted_text(
HTML(f'<yellow>Confirmation mode {new_status}</yellow>')

View File

@@ -11,6 +11,7 @@ from openhands.sdk.security.confirmation_policy import (
from openhands_cli.listeners.pause_listener import PauseListener, pause_listener
from openhands_cli.user_actions import ask_user_confirmation
from openhands_cli.user_actions.types import UserConfirmation
from openhands_cli.setup import setup_conversation
class ConversationRunner:
@@ -20,20 +21,30 @@ class ConversationRunner:
self.conversation = conversation
@property
def is_confirmation_mode_enabled(self):
return self.conversation.confirmation_policy_active
def is_confirmation_mode_active(self):
return self.conversation.is_confirmation_mode_active
def toggle_confirmation_mode(self):
if self.is_confirmation_mode_enabled:
self.set_confirmation_policy(NeverConfirm())
else:
new_confirmation_mode_state = not self.is_confirmation_mode_active
self.conversation = setup_conversation(
self.conversation.id,
include_security_analyzer=new_confirmation_mode_state
)
if new_confirmation_mode_state:
# Enable confirmation mode: set AlwaysConfirm policy
self.set_confirmation_policy(AlwaysConfirm())
else:
# Disable confirmation mode: set NeverConfirm policy and remove security analyzer
self.set_confirmation_policy(NeverConfirm())
def set_confirmation_policy(
self, confirmation_policy: ConfirmationPolicyBase
) -> None:
self.conversation.set_confirmation_policy(confirmation_policy)
def _start_listener(self) -> None:
self.listener = PauseListener(on_pause=self.conversation.pause)
self.listener.start()
@@ -68,7 +79,7 @@ class ConversationRunner:
if message:
self.conversation.send_message(message)
if self.is_confirmation_mode_enabled:
if self.is_confirmation_mode_active:
self._run_with_confirmation()
else:
self._run_without_confirmation()
@@ -145,7 +156,9 @@ class ConversationRunner:
'<yellow>Confirmation mode disabled. Agent will proceed without asking.</yellow>'
)
)
self.set_confirmation_policy(policy_change)
# Remove security analyzer when policy is never confirm
self.toggle_confirmation_mode()
return decision
if isinstance(policy_change, ConfirmRisky):
@@ -155,6 +168,8 @@ class ConversationRunner:
'LOW/MEDIUM risk actions will auto-confirm, HIGH risk actions will ask for confirmation.</yellow>'
)
)
# Keep security analyzer, change existing policy
self.set_confirmation_policy(policy_change)
return decision

View File

@@ -9,6 +9,11 @@ from openhands.tools.task_tracker import TaskTrackerTool
from openhands_cli.listeners import LoadingContext
from openhands_cli.locations import CONVERSATIONS_DIR, WORK_DIR
from openhands_cli.tui.settings.store import AgentStore
from openhands.sdk.security.confirmation_policy import (
AlwaysConfirm,
)
from openhands_cli.tui.settings.settings_screen import SettingsScreen
register_tool('BashTool', BashTool)
register_tool('FileEditorTool', FileEditorTool)
@@ -21,7 +26,10 @@ class MissingAgentSpec(Exception):
pass
def setup_conversation(conversation_id: str | None = None) -> BaseConversation:
def setup_conversation(
conversation_id: str | None = None,
include_security_analyzer: bool = True
) -> BaseConversation:
"""
Setup the conversation with agent.
@@ -54,8 +62,15 @@ def setup_conversation(conversation_id: str | None = None) -> BaseConversation:
'Agent specification not found. Please configure your agent settings.'
)
if not include_security_analyzer:
# Remove security analyzer from agent spec
agent = agent.model_copy(
update={"security_analyzer": None}
)
# Create conversation - agent context is now set in AgentStore.load()
conversation = Conversation(
conversation: BaseConversation = Conversation(
agent=agent,
workspace=Workspace(working_dir=WORK_DIR),
# Conversation will add /<conversation_id> to this path
@@ -63,7 +78,39 @@ def setup_conversation(conversation_id: str | None = None) -> BaseConversation:
conversation_id=conversation_id,
)
if include_security_analyzer:
conversation.set_confirmation_policy(AlwaysConfirm())
print_formatted_text(
HTML(f'<green>✓ Agent initialized with model: {agent.llm.model}</green>')
)
return conversation
def start_fresh_conversation(
resume_conversation_id: str | None = None
) -> BaseConversation:
"""Start a fresh conversation by creating a new conversation instance.
Handles the complete conversation setup process including settings screen
if agent configuration is missing.
Args:
resume_conversation_id: Optional conversation ID to resume
Returns:
BaseConversation: A new conversation instance
"""
conversation = None
settings_screen = SettingsScreen()
try:
conversation = setup_conversation(resume_conversation_id)
return conversation
except MissingAgentSpec:
# For first-time users, show the full settings flow with choice between basic/advanced
settings_screen.configure_settings(first_time=True)
# Try once again after settings setup attempt
return setup_conversation(resume_conversation_id)

View File

@@ -67,9 +67,7 @@ class SettingsScreen:
(
' Confirmation Mode',
'Enabled'
if not isinstance(
self.conversation.state.confirmation_policy, NeverConfirm
)
if self.conversation.is_confirmation_mode_active
else 'Disabled',
),
(

View File

@@ -96,5 +96,5 @@ disallow_untyped_defs = true
ignore_missing_imports = true
[tool.uv.sources]
openhands-sdk = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands/sdk", rev = "3ce74a16565be0e3f7e7617174bd0323e866597f" }
openhands-tools = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands/tools", rev = "3ce74a16565be0e3f7e7617174bd0323e866597f" }
openhands-sdk = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands/sdk", rev = "189979a5013751aa86852ab41afe9a79555e62ac" }
openhands-tools = { git = "https://github.com/All-Hands-AI/agent-sdk.git", subdirectory = "openhands/tools", rev = "189979a5013751aa86852ab41afe9a79555e62ac" }

View File

@@ -0,0 +1,122 @@
#!/usr/bin/env python3
from unittest.mock import MagicMock, patch, call
import pytest
from openhands_cli.runner import ConversationRunner
from openhands.sdk.security.confirmation_policy import AlwaysConfirm, NeverConfirm
CONV_ID = "test-conversation-id"
# ---------- Helpers ----------
def make_conv(enabled: bool) -> MagicMock:
"""Return a conversation mock in enabled/disabled confirmation mode."""
m = MagicMock()
m.id = CONV_ID
m.agent.security_analyzer = MagicMock() if enabled else None
m.confirmation_policy_active = enabled
m.is_confirmation_mode_active = enabled
return m
@pytest.fixture
def runner_disabled() -> ConversationRunner:
"""Runner starting with confirmation mode disabled."""
return ConversationRunner(make_conv(enabled=False))
@pytest.fixture
def runner_enabled() -> ConversationRunner:
"""Runner starting with confirmation mode enabled."""
return ConversationRunner(make_conv(enabled=True))
# ---------- Core toggle behavior (parametrized) ----------
@pytest.mark.parametrize(
"start_enabled, include_security_analyzer, expected_enabled, expected_policy_cls",
[
# disabled -> enable
(False, True, True, AlwaysConfirm),
# enabled -> disable
(True, False, False, NeverConfirm),
],
)
def test_toggle_confirmation_mode_transitions(
start_enabled, include_security_analyzer, expected_enabled, expected_policy_cls
):
# Arrange: pick starting runner & prepare the target conversation
runner = ConversationRunner(make_conv(enabled=start_enabled))
target_conv = make_conv(enabled=expected_enabled)
with patch("openhands_cli.runner.setup_conversation", return_value=target_conv) as mock_setup:
# Act
runner.toggle_confirmation_mode()
# Assert state
assert runner.is_confirmation_mode_active is expected_enabled
assert runner.conversation is target_conv
# Assert setup called with same conversation ID + correct analyzer flag
mock_setup.assert_called_once_with(CONV_ID, include_security_analyzer=include_security_analyzer)
# Assert policy applied to the *new* conversation
target_conv.set_confirmation_policy.assert_called_once()
assert isinstance(target_conv.set_confirmation_policy.call_args.args[0], expected_policy_cls)
# ---------- Conversation ID is preserved across multiple toggles ----------
def test_maintains_conversation_id_across_toggles(runner_disabled: ConversationRunner):
enabled_conv = make_conv(enabled=True)
disabled_conv = make_conv(enabled=False)
with patch("openhands_cli.runner.setup_conversation") as mock_setup:
mock_setup.side_effect = [enabled_conv, disabled_conv]
# Toggle on, then off
runner_disabled.toggle_confirmation_mode()
runner_disabled.toggle_confirmation_mode()
assert runner_disabled.conversation.id == CONV_ID
mock_setup.assert_has_calls(
[
call(CONV_ID, include_security_analyzer=True),
call(CONV_ID, include_security_analyzer=False),
],
any_order=False,
)
# ---------- Idempotency under rapid alternating toggles ----------
def test_rapid_alternating_toggles_produce_expected_states(runner_disabled: ConversationRunner):
enabled_conv = make_conv(enabled=True)
disabled_conv = make_conv(enabled=False)
with patch("openhands_cli.runner.setup_conversation") as mock_setup:
mock_setup.side_effect = [enabled_conv, disabled_conv, enabled_conv, disabled_conv]
# Start disabled
assert runner_disabled.is_confirmation_mode_active is False
# Enable, Disable, Enable, Disable
runner_disabled.toggle_confirmation_mode()
assert runner_disabled.is_confirmation_mode_active is True
runner_disabled.toggle_confirmation_mode()
assert runner_disabled.is_confirmation_mode_active is False
runner_disabled.toggle_confirmation_mode()
assert runner_disabled.is_confirmation_mode_active is True
runner_disabled.toggle_confirmation_mode()
assert runner_disabled.is_confirmation_mode_active is False
mock_setup.assert_has_calls(
[
call(CONV_ID, include_security_analyzer=True),
call(CONV_ID, include_security_analyzer=False),
call(CONV_ID, include_security_analyzer=True),
call(CONV_ID, include_security_analyzer=False),
],
any_order=False,
)

View File

@@ -2,36 +2,34 @@
from unittest.mock import MagicMock, patch
from uuid import UUID
from openhands_cli.agent_chat import _start_fresh_conversation
from unittest.mock import MagicMock, patch
from prompt_toolkit.input.defaults import create_pipe_input
from prompt_toolkit.output.defaults import DummyOutput
from openhands_cli.setup import MissingAgentSpec
from openhands_cli.setup import MissingAgentSpec, start_fresh_conversation
from openhands_cli.user_actions import UserConfirmation
@patch('openhands_cli.agent_chat.setup_conversation')
@patch('openhands_cli.setup.setup_conversation')
def test_start_fresh_conversation_success(mock_setup_conversation):
"""Test that _start_fresh_conversation creates a new conversation successfully."""
"""Test that start_fresh_conversation creates a new conversation successfully."""
# Mock the conversation object
mock_conversation = MagicMock()
mock_conversation.id = UUID('12345678-1234-5678-9abc-123456789abc')
mock_setup_conversation.return_value = mock_conversation
# Call the function
result = _start_fresh_conversation()
result = start_fresh_conversation()
# Verify the result
assert result == mock_conversation
mock_setup_conversation.assert_called_once_with(None)
@patch('openhands_cli.agent_chat.SettingsScreen')
@patch('openhands_cli.agent_chat.setup_conversation')
@patch('openhands_cli.setup.SettingsScreen')
@patch('openhands_cli.setup.setup_conversation')
def test_start_fresh_conversation_missing_agent_spec(
mock_setup_conversation,
mock_settings_screen_class
):
"""Test that _start_fresh_conversation handles MissingAgentSpec exception."""
"""Test that start_fresh_conversation handles MissingAgentSpec exception."""
# Mock the SettingsScreen instance
mock_settings_screen = MagicMock()
mock_settings_screen_class.return_value = mock_settings_screen
@@ -45,7 +43,7 @@ def test_start_fresh_conversation_missing_agent_spec(
]
# Call the function
result = _start_fresh_conversation()
result = start_fresh_conversation()
# Verify the result
assert result == mock_conversation
@@ -61,9 +59,11 @@ def test_start_fresh_conversation_missing_agent_spec(
@patch('openhands_cli.agent_chat.exit_session_confirmation')
@patch('openhands_cli.agent_chat.get_session_prompter')
@patch('openhands_cli.agent_chat.setup_conversation')
@patch('openhands_cli.agent_chat.start_fresh_conversation')
@patch('openhands_cli.agent_chat.ConversationRunner')
def test_new_command_resets_confirmation_mode(
mock_runner_cls,
mock_start_fresh_conversation,
mock_setup_conversation,
mock_get_session_prompter,
mock_exit_confirm,
@@ -73,11 +73,12 @@ def test_new_command_resets_confirmation_mode(
conv1 = MagicMock(); conv1.id = UUID('aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa')
conv2 = MagicMock(); conv2.id = UUID('bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb')
mock_setup_conversation.side_effect = [conv1, conv2]
mock_start_fresh_conversation.return_value = conv1
mock_setup_conversation.side_effect = [conv2]
# Distinct runner instances for each conversation
runner1 = MagicMock(); runner1.is_confirmation_mode_enabled = True
runner2 = MagicMock(); runner2.is_confirmation_mode_enabled = False
runner1 = MagicMock(); runner1.is_confirmation_mode_active = True
runner2 = MagicMock(); runner2.is_confirmation_mode_active = False
mock_runner_cls.side_effect = [runner1, runner2]
# Real session fed by a pipe (no interactive confirmation now)

View File

@@ -98,6 +98,7 @@ class TestConfirmationMode:
mock_conversation = MagicMock()
mock_conversation.confirmation_policy_active = False
mock_conversation.is_confirmation_mode_active = False
runner = ConversationRunner(mock_conversation)
# Test enabling confirmation mode
@@ -113,10 +114,11 @@ class TestConfirmationMode:
mock_conversation = MagicMock()
mock_conversation.confirmation_policy_active = False
mock_conversation.is_confirmation_mode_active = False
runner = ConversationRunner(mock_conversation)
# Verify initial state
assert runner.is_confirmation_mode_enabled is False
assert runner.is_confirmation_mode_active is False
def test_ask_user_confirmation_empty_actions(self) -> None:
"""Test that ask_user_confirmation returns ACCEPT for empty actions list."""
@@ -373,11 +375,12 @@ class TestConfirmationMode:
"""Test that ConversationRunner disables confirmation mode when NeverConfirm policy is returned."""
mock_conversation = MagicMock()
mock_conversation.confirmation_policy_active = True
mock_conversation.is_confirmation_mode_active = True
runner = ConversationRunner(mock_conversation)
# Enable confirmation mode first
runner.set_confirmation_policy(AlwaysConfirm())
assert runner.is_confirmation_mode_enabled is True
assert runner.is_confirmation_mode_active is True
# Mock get_unmatched_actions to return some actions
with patch(
@@ -398,14 +401,26 @@ class TestConfirmationMode:
# Mock print_formatted_text to avoid output during test
with patch('openhands_cli.runner.print_formatted_text'):
result = runner._handle_confirmation_request()
# Mock setup_conversation to avoid real conversation creation
with patch('openhands_cli.runner.setup_conversation') as mock_setup:
# Return a new mock conversation with confirmation mode disabled
new_mock_conversation = MagicMock()
new_mock_conversation.id = mock_conversation.id
new_mock_conversation.is_confirmation_mode_active = False
mock_setup.return_value = new_mock_conversation
result = runner._handle_confirmation_request()
# Verify that confirmation mode was disabled
assert result == UserConfirmation.ACCEPT
# Should have called set_confirmation_policy with NeverConfirm
mock_conversation.set_confirmation_policy.assert_called_with(
NeverConfirm()
)
# Verify that confirmation mode was disabled
assert result == UserConfirmation.ACCEPT
# Should have called setup_conversation to toggle confirmation mode
mock_setup.assert_called_once_with(
mock_conversation.id, include_security_analyzer=False
)
# Should have called set_confirmation_policy with NeverConfirm on new conversation
new_mock_conversation.set_confirmation_policy.assert_called_with(
NeverConfirm()
)
@patch('openhands_cli.user_actions.agent_action.cli_confirm')
def test_ask_user_confirmation_auto_confirm_safe(
@@ -432,11 +447,12 @@ class TestConfirmationMode:
"""Test that ConversationRunner sets ConfirmRisky policy when policy_change is provided."""
mock_conversation = MagicMock()
mock_conversation.confirmation_policy_active = True
mock_conversation.is_confirmation_mode_active = True
runner = ConversationRunner(mock_conversation)
# Enable confirmation mode first
runner.set_confirmation_policy(AlwaysConfirm())
assert runner.is_confirmation_mode_enabled is True
assert runner.is_confirmation_mode_active is True
# Mock get_unmatched_actions to return some actions
with patch(

View File

@@ -102,6 +102,11 @@ class TestConversationRunner:
"""
if final_status == AgentExecutionStatus.FINISHED:
agent.finish_on_step = 1
# Add a mock security analyzer to enable confirmation mode
from unittest.mock import MagicMock
agent.security_analyzer = MagicMock()
convo = Conversation(agent)
convo.state.agent_status = AgentExecutionStatus.WAITING_FOR_CONFIRMATION
cr = ConversationRunner(convo)

8
openhands-cli/uv.lock generated
View File

@@ -1642,8 +1642,8 @@ dev = [
[package.metadata]
requires-dist = [
{ name = "openhands-sdk", git = "https://github.com/All-Hands-AI/agent-sdk.git?subdirectory=openhands%2Fsdk&rev=3ce74a16565be0e3f7e7617174bd0323e866597f" },
{ name = "openhands-tools", git = "https://github.com/All-Hands-AI/agent-sdk.git?subdirectory=openhands%2Ftools&rev=3ce74a16565be0e3f7e7617174bd0323e866597f" },
{ name = "openhands-sdk", git = "https://github.com/All-Hands-AI/agent-sdk.git?subdirectory=openhands%2Fsdk&rev=189979a5013751aa86852ab41afe9a79555e62ac" },
{ name = "openhands-tools", git = "https://github.com/All-Hands-AI/agent-sdk.git?subdirectory=openhands%2Ftools&rev=189979a5013751aa86852ab41afe9a79555e62ac" },
{ name = "prompt-toolkit", specifier = ">=3" },
{ name = "typer", specifier = ">=0.17.4" },
]
@@ -1667,7 +1667,7 @@ dev = [
[[package]]
name = "openhands-sdk"
version = "1.0.0"
source = { git = "https://github.com/All-Hands-AI/agent-sdk.git?subdirectory=openhands%2Fsdk&rev=3ce74a16565be0e3f7e7617174bd0323e866597f#3ce74a16565be0e3f7e7617174bd0323e866597f" }
source = { git = "https://github.com/All-Hands-AI/agent-sdk.git?subdirectory=openhands%2Fsdk&rev=189979a5013751aa86852ab41afe9a79555e62ac#189979a5013751aa86852ab41afe9a79555e62ac" }
dependencies = [
{ name = "fastmcp" },
{ name = "litellm" },
@@ -1681,7 +1681,7 @@ dependencies = [
[[package]]
name = "openhands-tools"
version = "1.0.0"
source = { git = "https://github.com/All-Hands-AI/agent-sdk.git?subdirectory=openhands%2Ftools&rev=3ce74a16565be0e3f7e7617174bd0323e866597f#3ce74a16565be0e3f7e7617174bd0323e866597f" }
source = { git = "https://github.com/All-Hands-AI/agent-sdk.git?subdirectory=openhands%2Ftools&rev=189979a5013751aa86852ab41afe9a79555e62ac#189979a5013751aa86852ab41afe9a79555e62ac" }
dependencies = [
{ name = "bashlex" },
{ name = "binaryornot" },