Implement graceful shutdown for headless mode (#11401)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Boxuan Li
2025-10-16 23:09:31 -07:00
committed by GitHub
parent 5219f85bfa
commit e1b94732a8

View File

@@ -1,6 +1,8 @@
import asyncio
import json
import os
import signal
import sys
from pathlib import Path
from typing import Callable, Protocol
@@ -174,6 +176,27 @@ async def run_controller(
f'{agent.llm.config.model}, with actions: {initial_user_action}'
)
# Set up asyncio-safe signal handler for graceful shutdown
sigint_count = 0
shutdown_event = asyncio.Event()
def signal_handler():
"""Handle SIGINT signals for graceful shutdown."""
nonlocal sigint_count
sigint_count += 1
if sigint_count == 1:
logger.info('Received SIGINT (Ctrl+C). Initiating graceful shutdown...')
logger.info('Press Ctrl+C again to force immediate exit.')
shutdown_event.set()
else:
logger.info('Received second SIGINT. Forcing immediate exit...')
sys.exit(1)
# Register the asyncio signal handler (safer for async contexts)
loop = asyncio.get_running_loop()
loop.add_signal_handler(signal.SIGINT, signal_handler)
# start event is a MessageAction with the task, either resumed or new
if initial_state is not None and initial_state.last_error:
# we're resuming the previous session
@@ -213,7 +236,52 @@ async def run_controller(
]
try:
await run_agent_until_done(controller, runtime, memory, end_states)
# Create a task for the main agent loop
agent_task = asyncio.create_task(
run_agent_until_done(controller, runtime, memory, end_states)
)
# Wait for either the agent to complete or shutdown signal
done, pending = await asyncio.wait(
[agent_task, asyncio.create_task(shutdown_event.wait())],
return_when=asyncio.FIRST_COMPLETED,
)
# Cancel any pending tasks
for task in pending:
task.cancel()
# Wait for all cancelled tasks to complete in parallel
await asyncio.gather(*pending, return_exceptions=True)
# Check if shutdown was requested
if shutdown_event.is_set():
logger.info('Graceful shutdown requested.')
# Perform graceful cleanup sequence
try:
# 1. Stop the agent controller first to prevent new LLM calls
logger.debug('Stopping agent controller...')
await controller.close()
# 2. Stop the EventStream to prevent new events from being processed
logger.debug('Stopping EventStream...')
event_stream.close()
# 3. Give time for in-flight operations to complete before closing runtime
logger.debug('Waiting for in-flight operations to complete...')
await asyncio.sleep(0.3)
# 4. Close the runtime to avoid bash session interruption errors
logger.debug('Closing runtime...')
runtime.close()
# 5. Give a brief moment for final cleanup to complete
await asyncio.sleep(0.1)
except Exception as e:
logger.warning(f'Error during graceful cleanup: {e}')
except Exception as e:
logger.error(f'Exception in main loop: {e}')