Merge branch 'main' into fix-runtime-error-handling

Merge runtime error handling tests into a single file
2026-04-29 03:00:45 -04:00 · 2025-06-12 17:27:40 -04:00 · 2025-06-03 19:13:07 +00:00 · 2025-06-03 15:06:19 -04:00 · 2025-06-03 18:44:57 +00:00 · 2025-06-03 18:42:28 +00:00
3 changed files with 543 additions and 12 deletions
--- a/openhands/core/config/sandbox_config.py
+++ b/openhands/core/config/sandbox_config.py
@@ -58,6 +58,7 @@ class SandboxConfig(BaseModel):
    remote_runtime_init_timeout: int = Field(default=180)
    remote_runtime_api_timeout: int = Field(default=10)
    remote_runtime_enable_retries: bool = Field(default=True)
+    retry_on_unrecoverable_runtime_error: bool = Field(default=False)
    remote_runtime_class: str | None = Field(
        default=None
    )  # can be "None" (default to gvisor) or "sysbox" (support docker inside runtime + more stable)
--- a/openhands/runtime/base.py
+++ b/openhands/runtime/base.py
@@ -14,10 +14,15 @@ from typing import Callable, cast
 from zipfile import ZipFile

 import httpx
+import tenacity
+from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed

 from openhands.core.config import OpenHandsConfig, SandboxConfig
 from openhands.core.config.mcp_config import MCPConfig, MCPStdioServerConfig
-from openhands.core.exceptions import AgentRuntimeDisconnectedError
+from openhands.core.exceptions import (
+    AgentRuntimeDisconnectedError,
+    AgentRuntimeUnavailableError,
+)
 from openhands.core.logger import openhands_logger as logger
 from openhands.events import EventSource, EventStream, EventStreamSubscriber
 from openhands.events.action import (
@@ -333,22 +338,134 @@ class Runtime(FileEditRuntimeMixin):
                f'Failed export latest github token to runtime: {self.sid}, {e}'
            )

+    async def _handle_runtime_error(
+        self,
+        event: Action,
+        error: Exception,
+        retry_count: int,
+        max_retries: int = 3,
+        retry_delay: int = 10,
+    ) -> None:
+        """
+        Handle runtime-related errors with retry logic.
+
+        Args:
+            event: The action that caused the error
+            error: The exception that was raised
+            retry_count: Current retry attempt number
+            max_retries: Maximum number of retry attempts
+            retry_delay: Delay in seconds between retries
+
+        Returns:
+            None
+        """
+        error_message = f'{type(error).__name__}: {str(error)}'
+        self.log('error', f'Runtime error while running action: {error_message}')
+        self.log('error', f'Problematic action: {str(event)}')
+
+        # Reset MCP stdio servers tracking when error happens
+        if hasattr(self, '_last_updated_mcp_stdio_servers'):
+            from openhands.core.config.mcp_config import MCPStdioServerConfig
+
+            self._last_updated_mcp_stdio_servers: list[MCPStdioServerConfig] = []
+            self.log(
+                'debug',
+                'Reset _last_updated_mcp_stdio_servers to empty list due to runtime error',
+            )
+
+        # Create error message for the observation
+        error_content = (
+            f'Your command may have consumed too much resources, and the previous runtime died. '
+            f'You are connected to a new runtime container, all dependencies you have installed '
+            f'outside /workspace may not be persisted. (Retry {retry_count} of {max_retries})'
+        )
+
+        # Create an error observation
+        observation = ErrorObservation(content=error_content)
+
+        # Add the observation to the event stream
+        observation._cause = event.id  # type: ignore[attr-defined]
+        observation.tool_call_metadata = event.tool_call_metadata
+        self.event_stream.add_event(observation, EventSource.ENVIRONMENT)  # type: ignore[arg-type]
+
+        # Log the retry attempt
+        self.log(
+            'warning',
+            f'Runtime error occurred. Retry {retry_count} of {max_retries}.',
+        )
+
+    async def _execute_action_core(self, event: Action) -> Observation:
+        """
+        Core logic for executing an action.
+
+        Args:
+            event: The action to execute
+
+        Returns:
+            The observation resulting from the action
+        """
+        await self._export_latest_git_provider_tokens(event)
+        if isinstance(event, MCPAction):
+            observation: Observation = await self.call_tool_mcp(event)
+        else:
+            observation = await call_sync_from_async(self.run_action, event)
+        return observation
+
    async def _handle_action(self, event: Action) -> None:
        if event.timeout is None:
            # We don't block the command if this is a default timeout action
            event.set_hard_timeout(self.config.sandbox.timeout, blocking=False)
        assert event.timeout is not None
+
+        # Define a before_sleep callback for tenacity
+        async def before_sleep_callback(retry_state: tenacity.RetryCallState) -> None:
+            exception = retry_state.outcome.exception()
+            if exception:
+                await self._handle_runtime_error(
+                    event,
+                    exception,
+                    retry_state.attempt_number,
+                    max_retries=3,
+                    retry_delay=10,
+                )
+
+        # Create a retry decorator based on configuration
+        if self.config.sandbox.retry_on_unrecoverable_runtime_error:
+            retry_decorator = retry(
+                retry=retry_if_exception_type(
+                    (AgentRuntimeDisconnectedError, AgentRuntimeUnavailableError)
+                ),
+                stop=stop_after_attempt(3),
+                wait=wait_fixed(10),
+                before_sleep=before_sleep_callback,
+                reraise=True,
+            )
+            execute_with_retry = retry_decorator(self._execute_action_core)
+        else:
+            # No retry if not enabled in config
+            execute_with_retry = self._execute_action_core
+
        try:
-            await self._export_latest_git_provider_tokens(event)
-            if isinstance(event, MCPAction):
-                observation: Observation = await self.call_tool_mcp(event)
-            else:
-                observation = await call_sync_from_async(self.run_action, event)
+            # Execute the action with retry if configured
+            observation: Observation = await execute_with_retry(event)
+
+            # Set observation metadata
+            observation._cause = event.id  # type: ignore[attr-defined]
+            observation.tool_call_metadata = event.tool_call_metadata
+
+        except (AgentRuntimeDisconnectedError, AgentRuntimeUnavailableError) as e:
+            # This will only be reached if retries are disabled or all retries failed
+            err_id = 'STATUS$ERROR_RUNTIME_DISCONNECTED'
+            error_message = f'{type(e).__name__}: {str(e)}'
+            self.log('error', f'Runtime error while running action: {error_message}')
+            self.log('error', f'Problematic action: {str(event)}')
+            self.send_error_message(err_id, error_message)
+            return
+
        except Exception as e:
+            # Handle other exceptions
            err_id = ''
-            if isinstance(e, httpx.NetworkError) or isinstance(
-                e, AgentRuntimeDisconnectedError
-            ):
+            if isinstance(e, httpx.NetworkError):
                err_id = 'STATUS$ERROR_RUNTIME_DISCONNECTED'
            error_message = f'{type(e).__name__}: {str(e)}'
            self.log('error', f'Unexpected error while running action: {error_message}')
@@ -356,9 +473,6 @@ class Runtime(FileEditRuntimeMixin):
            self.send_error_message(err_id, error_message)
            return

-        observation._cause = event.id  # type: ignore[attr-defined]
-        observation.tool_call_metadata = event.tool_call_metadata
-
        # this might be unnecessary, since source should be set by the event stream when we're here
        source = event.source if event.source else EventSource.AGENT
        if isinstance(observation, NullObservation):
--- a/tests/unit/test_runtime_error_handling.py
+++ b/tests/unit/test_runtime_error_handling.py
@@ -0,0 +1,416 @@
+from unittest.mock import AsyncMock, Mock, patch
+
+import httpx
+import pytest
+
+from openhands.core.config.mcp_config import MCPStdioServerConfig
+from openhands.core.exceptions import (
+    AgentRuntimeDisconnectedError,
+    AgentRuntimeTimeoutError,
+)
+from openhands.events.action import CmdRunAction, MCPAction
+from openhands.events.event import EventSource
+from openhands.events.observation import ErrorObservation, Observation
+from openhands.runtime.base import Runtime
+
+
+class TestRuntimeErrorHandling:
+    """Tests for runtime error handling functionality."""
+
+    @pytest.fixture
+    def mock_runtime(self):
+        """Create a mock Runtime with necessary attributes."""
+        runtime = Mock(spec=Runtime)
+        runtime._last_updated_mcp_stdio_servers = [
+            MCPStdioServerConfig(name='test-server-1', command='test-command-1'),
+            MCPStdioServerConfig(name='test-server-2', command='test-command-2'),
+        ]
+        runtime.log = Mock()
+        runtime.event_stream = Mock()
+        runtime.event_stream.add_event = AsyncMock()
+        runtime.send_error_message = Mock()
+        runtime.config = Mock()
+        runtime.config.sandbox = Mock()
+        return runtime
+
+    @pytest.mark.asyncio
+    async def test_handle_runtime_error_resets_mcp_servers(self, mock_runtime):
+        """Test that _handle_runtime_error resets _last_updated_mcp_stdio_servers."""
+        # Create a command action
+        action = CmdRunAction(command='test command')
+        action.set_hard_timeout(120)
+
+        # Patch the id property to return a valid integer
+        with patch(
+            'openhands.events.action.commands.CmdRunAction.id',
+            new_callable=Mock,
+            return_value=12345,
+        ):
+            # Call the error handling method directly
+            await Runtime._handle_runtime_error(
+                mock_runtime,
+                action,
+                AgentRuntimeTimeoutError('Runtime timeout'),
+                retry_count=1,
+                max_retries=3,
+            )
+
+            # Verify _last_updated_mcp_stdio_servers was reset
+            assert mock_runtime._last_updated_mcp_stdio_servers == []
+
+            # Verify log message was called
+            mock_runtime.log.assert_any_call(
+                'debug',
+                'Reset _last_updated_mcp_stdio_servers to empty list due to runtime error',
+            )
+
+            # Verify an error observation was added to the event stream
+            mock_runtime.event_stream.add_event.assert_called_once()
+
+            # Get the observation that was added
+            call_args = mock_runtime.event_stream.add_event.call_args[0]
+            observation = call_args[0]
+            source = call_args[1]
+
+            # Verify it's an ErrorObservation with the right source
+            assert isinstance(observation, ErrorObservation)
+            assert source == EventSource.ENVIRONMENT
+
+            # Verify the error message contains the standard runtime error text
+            assert (
+                'Your command may have consumed too much resources'
+                in observation.content
+            )
+            assert 'Retry 1 of 3' in observation.content
+
+    @pytest.mark.asyncio
+    async def test_handle_runtime_error_on_disconnected(self, mock_runtime):
+        """Test that _handle_runtime_error handles disconnected errors correctly."""
+        # Create a command action
+        action = CmdRunAction(command='test command')
+        action.set_hard_timeout(120)
+
+        # Patch the id property to return a valid integer
+        with patch(
+            'openhands.events.action.commands.CmdRunAction.id',
+            new_callable=Mock,
+            return_value=12345,
+        ):
+            # Call the error handling method directly
+            await Runtime._handle_runtime_error(
+                mock_runtime,
+                action,
+                AgentRuntimeDisconnectedError('Runtime disconnected'),
+                retry_count=2,
+                max_retries=3,
+            )
+
+            # Verify _last_updated_mcp_stdio_servers was reset
+            assert mock_runtime._last_updated_mcp_stdio_servers == []
+
+            # Verify log message was called
+            mock_runtime.log.assert_any_call(
+                'debug',
+                'Reset _last_updated_mcp_stdio_servers to empty list due to runtime error',
+            )
+
+            # Verify an error observation was added to the event stream
+            mock_runtime.event_stream.add_event.assert_called_once()
+
+            # Get the observation that was added
+            call_args = mock_runtime.event_stream.add_event.call_args[0]
+            observation = call_args[0]
+            source = call_args[1]
+
+            # Verify it's an ErrorObservation with the right source
+            assert isinstance(observation, ErrorObservation)
+            assert source == EventSource.ENVIRONMENT
+
+            # Verify the error message contains the standard runtime error text
+            assert (
+                'Your command may have consumed too much resources'
+                in observation.content
+            )
+            assert 'Retry 2 of 3' in observation.content
+
+    @pytest.mark.asyncio
+    async def test_handle_runtime_error_on_http_error(self, mock_runtime):
+        """Test that _handle_runtime_error handles HTTP errors correctly."""
+        # Create a command action
+        action = CmdRunAction(command='test command')
+        action.set_hard_timeout(120)
+
+        # Create a mock response with a 502 status code
+        mock_response = Mock()
+        mock_response.status_code = 502
+
+        # Patch the id property to return a valid integer
+        with patch(
+            'openhands.events.action.commands.CmdRunAction.id',
+            new_callable=Mock,
+            return_value=12345,
+        ):
+            # Call the error handling method directly
+            await Runtime._handle_runtime_error(
+                mock_runtime,
+                action,
+                httpx.HTTPStatusError(
+                    'Bad Gateway', request=Mock(), response=mock_response
+                ),
+                retry_count=1,
+                max_retries=3,
+            )
+
+            # Verify _last_updated_mcp_stdio_servers was reset
+            assert mock_runtime._last_updated_mcp_stdio_servers == []
+
+            # Verify log message was called
+            mock_runtime.log.assert_any_call(
+                'debug',
+                'Reset _last_updated_mcp_stdio_servers to empty list due to runtime error',
+            )
+
+            # Verify an error observation was added to the event stream
+            mock_runtime.event_stream.add_event.assert_called_once()
+
+            # Get the observation that was added
+            call_args = mock_runtime.event_stream.add_event.call_args[0]
+            observation = call_args[0]
+            source = call_args[1]
+
+            # Verify it's an ErrorObservation with the right source
+            assert isinstance(observation, ErrorObservation)
+            assert source == EventSource.ENVIRONMENT
+
+            # Verify the error message contains the standard runtime error text
+            assert (
+                'Your command may have consumed too much resources'
+                in observation.content
+            )
+            assert 'Retry 1 of 3' in observation.content
+
+    @pytest.mark.asyncio
+    async def test_handle_runtime_error_on_max_retries(self, mock_runtime):
+        """Test that _handle_runtime_error handles max retries correctly."""
+        # Create a command action
+        action = CmdRunAction(command='test command')
+        action.set_hard_timeout(120)
+
+        # Patch the id property to return a valid integer
+        with patch(
+            'openhands.events.action.commands.CmdRunAction.id',
+            new_callable=Mock,
+            return_value=12345,
+        ):
+            # Call the error handling method directly
+            await Runtime._handle_runtime_error(
+                mock_runtime,
+                action,
+                Exception('Generic error'),
+                retry_count=3,  # Same as max_retries
+                max_retries=3,
+            )
+
+            # Verify _last_updated_mcp_stdio_servers was reset
+            assert mock_runtime._last_updated_mcp_stdio_servers == []
+
+            # Verify log message was called
+            mock_runtime.log.assert_any_call(
+                'debug',
+                'Reset _last_updated_mcp_stdio_servers to empty list due to runtime error',
+            )
+
+            # Verify an error observation was added to the event stream
+            mock_runtime.event_stream.add_event.assert_called_once()
+
+            # Get the observation that was added
+            call_args = mock_runtime.event_stream.add_event.call_args[0]
+            observation = call_args[0]
+            source = call_args[1]
+
+            # Verify it's an ErrorObservation with the right source
+            assert isinstance(observation, ErrorObservation)
+            assert source == EventSource.ENVIRONMENT
+
+            # Verify the error message contains the standard runtime error text
+            assert (
+                'Your command may have consumed too much resources'
+                in observation.content
+            )
+            assert 'Retry 3 of 3' in observation.content
+
+    @pytest.mark.asyncio
+    async def test_execute_action_core(self, mock_runtime):
+        """Test the _execute_action_core method."""
+        # Create a command action
+        action = CmdRunAction(command='test command')
+
+        # Mock the run_action method
+        mock_observation = Mock(spec=Observation)
+        mock_runtime.run_action = Mock(return_value=mock_observation)
+
+        # Patch the call_sync_from_async function
+        with patch(
+            'openhands.runtime.base.call_sync_from_async',
+            return_value=mock_observation,
+        ):
+            # Call the method
+            result = await Runtime._execute_action_core(mock_runtime, action)
+
+            # Verify the result
+            assert result == mock_observation
+
+            # Verify _export_latest_git_provider_tokens was called
+            mock_runtime._export_latest_git_provider_tokens.assert_called_once_with(
+                action
+            )
+
+    @pytest.mark.asyncio
+    async def test_execute_action_core_with_mcp_action(self, mock_runtime):
+        """Test the _execute_action_core method with an MCP action."""
+        # Create an MCP action
+        action = Mock(spec=MCPAction)
+
+        # Mock the call_tool_mcp method
+        mock_observation = Mock(spec=Observation)
+        mock_runtime.call_tool_mcp = AsyncMock(return_value=mock_observation)
+
+        # Call the method
+        result = await Runtime._execute_action_core(mock_runtime, action)
+
+        # Verify the result
+        assert result == mock_observation
+
+        # Verify _export_latest_git_provider_tokens was called
+        mock_runtime._export_latest_git_provider_tokens.assert_called_once_with(action)
+
+        # Verify call_tool_mcp was called
+        mock_runtime.call_tool_mcp.assert_called_once_with(action)
+
+    @pytest.mark.asyncio
+    async def test_handle_action_with_retry_enabled(self, mock_runtime):
+        """Test _handle_action with retry enabled."""
+        # Configure the mock runtime
+        mock_runtime.config.sandbox.retry_on_unrecoverable_runtime_error = True
+
+        # Create a command action
+        action = CmdRunAction(command='test command')
+        action.set_hard_timeout(120)
+
+        # Mock the _execute_action_core method
+        mock_observation = Mock(spec=Observation)
+        mock_runtime._execute_action_core = AsyncMock(return_value=mock_observation)
+
+        # Since we can't easily mock the tenacity.retry decorator directly,
+        # we'll test the behavior by checking that the right configuration is used
+        # when retry_on_unrecoverable_runtime_error is True
+
+        # Call the method with a patched _execute_action_core
+        await Runtime._handle_action(mock_runtime, action)
+
+        # Verify _execute_action_core was called
+        mock_runtime._execute_action_core.assert_called_once_with(action)
+
+        # Verify the observation was processed correctly
+        assert hasattr(mock_observation, '_cause')
+        assert hasattr(mock_observation, 'tool_call_metadata')
+
+    @pytest.mark.asyncio
+    async def test_handle_action_with_retry_disabled(self, mock_runtime):
+        """Test _handle_action with retry disabled."""
+        # Configure the mock runtime
+        mock_runtime.config.sandbox.retry_on_unrecoverable_runtime_error = False
+
+        # Create a command action
+        action = CmdRunAction(command='test command')
+        action.set_hard_timeout(120)
+
+        # Mock the _execute_action_core method
+        mock_observation = Mock(spec=Observation)
+        mock_runtime._execute_action_core = AsyncMock(return_value=mock_observation)
+
+        # Call the method
+        await Runtime._handle_action(mock_runtime, action)
+
+        # Verify _execute_action_core was called
+        mock_runtime._execute_action_core.assert_called_once_with(action)
+
+        # Verify the observation was added to the event stream
+        assert mock_observation._cause == action.id
+        assert mock_observation.tool_call_metadata == action.tool_call_metadata
+
+    @pytest.mark.asyncio
+    async def test_handle_action_with_runtime_error(self, mock_runtime):
+        """Test _handle_action when a runtime error occurs."""
+        # Configure the mock runtime
+        mock_runtime.config.sandbox.retry_on_unrecoverable_runtime_error = False
+
+        # Create a command action
+        action = CmdRunAction(command='test command')
+        action.set_hard_timeout(120)
+
+        # Mock the _execute_action_core method to raise an error
+        error = AgentRuntimeDisconnectedError('Runtime disconnected')
+        mock_runtime._execute_action_core = AsyncMock(side_effect=error)
+
+        # Call the method
+        await Runtime._handle_action(mock_runtime, action)
+
+        # Verify _execute_action_core was called
+        mock_runtime._execute_action_core.assert_called_once_with(action)
+
+        # Verify send_error_message was called
+        mock_runtime.send_error_message.assert_called_once_with(
+            'STATUS$ERROR_RUNTIME_DISCONNECTED',
+            'AgentRuntimeDisconnectedError: Runtime disconnected',
+        )
+
+    @pytest.mark.asyncio
+    async def test_handle_action_with_other_exception(self, mock_runtime):
+        """Test _handle_action when a non-runtime error occurs."""
+        # Configure the mock runtime
+        mock_runtime.config.sandbox.retry_on_unrecoverable_runtime_error = False
+
+        # Create a command action
+        action = CmdRunAction(command='test command')
+        action.set_hard_timeout(120)
+
+        # Mock the _execute_action_core method to raise an error
+        error = ValueError('Invalid value')
+        mock_runtime._execute_action_core = AsyncMock(side_effect=error)
+
+        # Call the method
+        await Runtime._handle_action(mock_runtime, action)
+
+        # Verify _execute_action_core was called
+        mock_runtime._execute_action_core.assert_called_once_with(action)
+
+        # Verify send_error_message was called
+        mock_runtime.send_error_message.assert_called_once_with(
+            '', 'ValueError: Invalid value'
+        )
+
+    @pytest.mark.asyncio
+    async def test_handle_action_with_network_error(self, mock_runtime):
+        """Test _handle_action when a network error occurs."""
+        # Configure the mock runtime
+        mock_runtime.config.sandbox.retry_on_unrecoverable_runtime_error = False
+
+        # Create a command action
+        action = CmdRunAction(command='test command')
+        action.set_hard_timeout(120)
+
+        # Mock the _execute_action_core method to raise an error
+        error = httpx.NetworkError('Connection error')
+        mock_runtime._execute_action_core = AsyncMock(side_effect=error)
+
+        # Call the method
+        await Runtime._handle_action(mock_runtime, action)
+
+        # Verify _execute_action_core was called
+        mock_runtime._execute_action_core.assert_called_once_with(action)
+
+        # Verify send_error_message was called
+        mock_runtime.send_error_message.assert_called_once_with(
+            'STATUS$ERROR_RUNTIME_DISCONNECTED', 'NetworkError: Connection error'
+        )
Author	SHA1	Message	Date
Xingyao Wang	fd1414f7d6	Merge branch 'main' into fix-runtime-error-handling	2025-06-12 17:27:40 -04:00
openhands	9d4b0cc29b	Merge runtime error handling tests into a single file	2025-06-03 19:13:07 +00:00
Xingyao Wang	c3e272cdf5	Merge branch 'main' into fix-runtime-error-handling	2025-06-03 15:06:19 -04:00
openhands	4c9c501ad0	Add simplified tests for resetting _last_updated_mcp_stdio_servers on runtime errors	2025-06-03 18:44:57 +00:00
openhands	1277b5a67c	Add comprehensive tests for resetting _last_updated_mcp_stdio_servers on runtime errors	2025-06-03 18:42:28 +00:00
openhands	22e29885a1	Add tests for resetting _last_updated_mcp_stdio_servers on runtime errors	2025-06-03 18:39:34 +00:00
openhands	b0a53e6ab5	Reset _last_updated_mcp_stdio_servers to empty list when error happens	2025-06-03 18:15:17 +00:00
openhands	716c1ec5b7	Refactor runtime error handling to use tenacity instead of while loop	2025-06-03 15:20:37 +00:00
openhands	7ea2763fa2	Simplify runtime error handling in base.py and remove agent controller changes	2025-06-03 14:39:59 +00:00
openhands	ca2d9dece1	Move runtime error handling logic from agent_controller to runtime/base.py	2025-06-03 14:24:13 +00:00
Xingyao Wang	e450e126f9	Merge branch 'main' into fix-runtime-error-handling	2025-06-03 10:11:22 -04:00
Xingyao Wang	3c79f06dfa	Merge branch 'main' into fix-runtime-error-handling	2025-05-23 23:54:22 +08:00
openhands	79816cf582	Merge main into fix-runtime-error-handling	2025-05-19 02:02:56 +00:00
openhands	93cce89313	Fix runtime error handling and linter issues	2025-04-29 14:28:05 +00:00
openhands	6cb7066900	Merge main into fix-runtime-error-handling	2025-04-29 02:13:25 +00:00
openhands	531603c391	Fix linting issues in test_agent_controller.py	2025-03-19 21:26:51 +00:00
openhands	20d51944a2	Merge main into fix-runtime-error-handling and resolve merge conflicts	2025-03-19 21:20:01 +00:00
openhands	044cd4fbab	Fix docstring linting issues	2025-03-18 17:14:34 +00:00
openhands	3aa9f40fd3	Fix linting issues in agent_controller.py	2025-03-18 17:12:42 +00:00
openhands	cd12e465cd	Merge main into fix-runtime-error-handling, resolving conflicts	2025-03-18 17:02:58 +00:00
Xingyao Wang	1c0d800041	Merge branch 'main' into fix-runtime-error-handling	2025-03-17 21:05:13 -04:00
openhands	d113abbd8b	Fix linting issues with docstrings and formatting	2025-03-17 18:33:40 +00:00
openhands	bd05a4b2e1	Fix tests to match updated error message	2025-03-17 17:15:14 +00:00
openhands	20cc2538e9	Remove redundant test_runtime_error_handling.py file	2025-03-17 16:57:40 +00:00
Xingyao Wang	dbfc471490	Update openhands/controller/agent_controller.py	2025-03-18 00:56:31 +08:00
openhands	922341c3f1	Simplify test by removing counter reset test	2025-03-17 16:55:18 +00:00
openhands	129989dd09	Fix test for counter reset functionality	2025-03-17 16:54:40 +00:00
openhands	0a39bb83b1	Fix test assertions for event source	2025-03-17 16:53:51 +00:00
openhands	dcf9e9f559	Fix EventSource.SYSTEM to EventSource.ENVIRONMENT	2025-03-17 16:52:57 +00:00
openhands	8246d6bcb8	Add missing imports for runtime error exceptions	2025-03-17 16:51:54 +00:00
openhands	301ddeb4e9	Add proper unit tests for runtime error handling in agent controller	2025-03-17 16:51:01 +00:00
openhands	649acd3d9c	Remove _try_migrate_workspace method as requested	2025-03-17 16:48:26 +00:00
openhands	4f33f0e35f	Fix #6032 : Better handling of Critical Runtime Error - Add retry mechanism for runtime errors with a maximum of 3 retries - Add workspace migration functionality to recover from failed runtimes - Add informative error messages to the agent with retry count information - Reset retry counter on successful steps - Add comprehensive tests for the implementation	2025-03-17 16:45:59 +00:00