Compare commits

...

1 Commits

Author SHA1 Message Date
openhands
c92f71c8e2 fix: Properly propagate AgentRuntimeTimeoutError to evaluation loop 2025-07-26 17:01:34 +00:00
2 changed files with 24 additions and 2 deletions

View File

@@ -641,7 +641,9 @@ def process_instance(
)
)
# if fatal error, throw EvalError to trigger re-run
# if state is None or has a fatal error, throw EvalError to trigger re-run
if state is None:
raise EvalException('State is None, likely due to a runtime error')
if is_fatal_evaluation_error(state.last_error):
raise EvalException('Fatal error detected: ' + state.last_error)
@@ -671,8 +673,9 @@ def process_instance(
# If you are working on some simpler benchmark that only evaluates the final model output (e.g., in a MessageAction)
# You can simply get the LAST `MessageAction` from the returned `state.history` and parse it for evaluation.
# This check is redundant since we already check above, but keeping it for safety
if state is None:
raise ValueError('State should not be None.')
raise EvalException('State is None, likely due to a runtime error')
# NOTE: this is NO LONGER the event stream, but an agent history that includes delegate agent's events
histories = [event_to_dict(event) for event in state.history]

View File

@@ -15,6 +15,12 @@ from openhands.core.config import (
setup_config_from_args,
)
from openhands.core.config.mcp_config import OpenHandsMCPConfigImpl
from openhands.core.exceptions import (
AgentRuntimeDisconnectedError,
AgentRuntimeNotFoundError,
AgentRuntimeTimeoutError,
AgentRuntimeUnavailableError,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.loop import run_agent_until_done
from openhands.core.schema import AgentState
@@ -207,6 +213,19 @@ async def run_controller(
await run_agent_until_done(controller, runtime, memory, end_states)
except Exception as e:
logger.error(f'Exception in main loop: {e}')
# Set the error in the state so it can be detected by is_fatal_evaluation_error
controller.state.last_error = f'{type(e).__name__}: {str(e)}'
# If it's a fatal runtime error, we should re-raise it so the evaluation loop can handle it
if isinstance(
e,
(
AgentRuntimeTimeoutError,
AgentRuntimeDisconnectedError,
AgentRuntimeUnavailableError,
AgentRuntimeNotFoundError,
),
):
raise e
# save session when we're about to close
if config.file_store is not None and config.file_store != 'memory':