mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-01-10 15:28:14 -05:00
Implement graceful shutdown for headless mode (#11401)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Callable, Protocol
|
||||
|
||||
@@ -174,6 +176,27 @@ async def run_controller(
|
||||
f'{agent.llm.config.model}, with actions: {initial_user_action}'
|
||||
)
|
||||
|
||||
# Set up asyncio-safe signal handler for graceful shutdown
|
||||
sigint_count = 0
|
||||
shutdown_event = asyncio.Event()
|
||||
|
||||
def signal_handler():
|
||||
"""Handle SIGINT signals for graceful shutdown."""
|
||||
nonlocal sigint_count
|
||||
sigint_count += 1
|
||||
|
||||
if sigint_count == 1:
|
||||
logger.info('Received SIGINT (Ctrl+C). Initiating graceful shutdown...')
|
||||
logger.info('Press Ctrl+C again to force immediate exit.')
|
||||
shutdown_event.set()
|
||||
else:
|
||||
logger.info('Received second SIGINT. Forcing immediate exit...')
|
||||
sys.exit(1)
|
||||
|
||||
# Register the asyncio signal handler (safer for async contexts)
|
||||
loop = asyncio.get_running_loop()
|
||||
loop.add_signal_handler(signal.SIGINT, signal_handler)
|
||||
|
||||
# start event is a MessageAction with the task, either resumed or new
|
||||
if initial_state is not None and initial_state.last_error:
|
||||
# we're resuming the previous session
|
||||
@@ -213,7 +236,52 @@ async def run_controller(
|
||||
]
|
||||
|
||||
try:
|
||||
await run_agent_until_done(controller, runtime, memory, end_states)
|
||||
# Create a task for the main agent loop
|
||||
agent_task = asyncio.create_task(
|
||||
run_agent_until_done(controller, runtime, memory, end_states)
|
||||
)
|
||||
|
||||
# Wait for either the agent to complete or shutdown signal
|
||||
done, pending = await asyncio.wait(
|
||||
[agent_task, asyncio.create_task(shutdown_event.wait())],
|
||||
return_when=asyncio.FIRST_COMPLETED,
|
||||
)
|
||||
|
||||
# Cancel any pending tasks
|
||||
for task in pending:
|
||||
task.cancel()
|
||||
|
||||
# Wait for all cancelled tasks to complete in parallel
|
||||
await asyncio.gather(*pending, return_exceptions=True)
|
||||
|
||||
# Check if shutdown was requested
|
||||
if shutdown_event.is_set():
|
||||
logger.info('Graceful shutdown requested.')
|
||||
|
||||
# Perform graceful cleanup sequence
|
||||
try:
|
||||
# 1. Stop the agent controller first to prevent new LLM calls
|
||||
logger.debug('Stopping agent controller...')
|
||||
await controller.close()
|
||||
|
||||
# 2. Stop the EventStream to prevent new events from being processed
|
||||
logger.debug('Stopping EventStream...')
|
||||
event_stream.close()
|
||||
|
||||
# 3. Give time for in-flight operations to complete before closing runtime
|
||||
logger.debug('Waiting for in-flight operations to complete...')
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
# 4. Close the runtime to avoid bash session interruption errors
|
||||
logger.debug('Closing runtime...')
|
||||
runtime.close()
|
||||
|
||||
# 5. Give a brief moment for final cleanup to complete
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f'Error during graceful cleanup: {e}')
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'Exception in main loop: {e}')
|
||||
|
||||
|
||||
Reference in New Issue
Block a user