feat: runtime improvements for rate-limit and 502/503/404 error (#5975)

This commit is contained in:
Xingyao Wang
2025-01-03 10:36:19 -05:00
committed by GitHub
parent ef8e04aee3
commit f14f75b064
3 changed files with 28 additions and 5 deletions

View File

@@ -376,7 +376,12 @@ def _process_instance_wrapper(
+ '\n'
)
if isinstance(
e, (AgentRuntimeDisconnectedError, AgentRuntimeUnavailableError)
e,
(
AgentRuntimeDisconnectedError,
AgentRuntimeUnavailableError,
AgentRuntimeNotFoundError,
),
):
runtime_failure_count += 1
msg += f'Runtime disconnected error detected for instance {instance.instance_id}, runtime failure count: {runtime_failure_count}'

View File

@@ -21,7 +21,6 @@ from openhands.runtime.impl.action_execution.action_execution_client import (
from openhands.runtime.plugins import PluginRequirement
from openhands.runtime.utils.command import get_remote_startup_command
from openhands.runtime.utils.request import (
RequestHTTPError,
send_request,
)
from openhands.runtime.utils.runtime_build import build_runtime_image
@@ -367,10 +366,14 @@ class RemoteRuntime(ActionExecutionClient):
except requests.Timeout:
self.log('error', 'No response received within the timeout period.')
raise
except RequestHTTPError as e:
if e.response.status_code in (404, 502):
except requests.HTTPError as e:
if e.response.status_code == 404:
raise AgentRuntimeNotFoundError(
'Runtime unavailable: System resources may be exhausted due to running commands. This may be fixed by retrying.'
) from e
elif e.response.status_code == 502:
raise AgentRuntimeDisconnectedError(
f'{e.response.status_code} error while connecting to {self.runtime_url}'
'Runtime disconnected: System resources may be exhausted due to running commands. This may be fixed by retrying.'
) from e
elif e.response.status_code == 503:
self.log('warning', 'Runtime appears to be paused. Resuming...')

View File

@@ -2,6 +2,9 @@ import json
from typing import Any
import requests
from tenacity import retry, retry_if_exception, stop_after_attempt, wait_exponential
from openhands.utils.tenacity_stop import stop_if_should_exit
class RequestHTTPError(requests.HTTPError):
@@ -18,6 +21,18 @@ class RequestHTTPError(requests.HTTPError):
return s
def is_rate_limit_error(exception):
return (
isinstance(exception, requests.HTTPError)
and exception.response.status_code == 429
)
@retry(
retry=retry_if_exception(is_rate_limit_error),
stop=stop_after_attempt(3) | stop_if_should_exit(),
wait=wait_exponential(multiplier=1, min=4, max=60),
)
def send_request(
session: requests.Session,
method: str,