mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-01-10 07:18:10 -05:00
feat(eval): reliability improvement for SWE-Bench eval_infer (#6347)
This commit is contained in:
@@ -355,7 +355,9 @@ def _process_instance_wrapper(
|
||||
)
|
||||
# e is likely an EvalException, so we can't directly infer it from type
|
||||
# but rather check if it's a fatal error
|
||||
if is_fatal_runtime_error(str(e)):
|
||||
# But it can also be AgentRuntime**Error (e.g., swe_bench/eval_infer.py)
|
||||
_error_str = type(e).__name__ + ': ' + str(e)
|
||||
if is_fatal_runtime_error(_error_str):
|
||||
runtime_failure_count += 1
|
||||
msg += f'Runtime disconnected error detected for instance {instance.instance_id}, runtime failure count: {runtime_failure_count}'
|
||||
msg += '\n' + '-' * 10 + '\n'
|
||||
@@ -531,6 +533,7 @@ def is_fatal_runtime_error(error: str | None) -> bool:
|
||||
return False
|
||||
|
||||
FATAL_RUNTIME_ERRORS = [
|
||||
AgentRuntimeTimeoutError,
|
||||
AgentRuntimeUnavailableError,
|
||||
AgentRuntimeDisconnectedError,
|
||||
AgentRuntimeNotFoundError,
|
||||
|
||||
Reference in New Issue
Block a user