Enhance dead-loop recovery by pausing agent and reprompting (#11439)

Co-authored-by: Engel Nyst <enyst@users.noreply.github.com> Co-authored-by: openhands <openhands@all-hands.dev>
2026-01-10 07:18:10 -05:00 · 2025-10-24 19:25:14 +08:00
parent 4b303ec9b4
commit 17e32af6fe
21 changed files with 932 additions and 43 deletions
--- a/openhands/controller/stuck.py
+++ b/openhands/controller/stuck.py
@@ -1,10 +1,13 @@
+from dataclasses import dataclass
+from typing import Optional
+
 from openhands.controller.state.state import State
 from openhands.core.logger import openhands_logger as logger
+from openhands.events import Event, EventSource
 from openhands.events.action.action import Action
 from openhands.events.action.commands import IPythonRunCellAction
 from openhands.events.action.empty import NullAction
 from openhands.events.action.message import MessageAction
-from openhands.events.event import Event, EventSource
 from openhands.events.observation import (
    CmdOutputObservation,
    IPythonRunCellObservation,
@@ -22,8 +25,15 @@ class StuckDetector:
        'SyntaxError: incomplete input',
    ]

+    @dataclass
+    class StuckAnalysis:
+        loop_type: str
+        loop_repeat_times: int
+        loop_start_idx: int  # in filtered_history
+
    def __init__(self, state: State):
        self.state = state
+        self.stuck_analysis: Optional[StuckDetector.StuckAnalysis] = None

    def is_stuck(self, headless_mode: bool = True) -> bool:
        """Checks if the agent is stuck in a loop.
@@ -36,6 +46,7 @@ class StuckDetector:
        Returns:
            bool: True if the agent is stuck in a loop, False otherwise.
        """
+        filtered_history_offset = 0
        if not headless_mode:
            # In interactive mode, only look at history after the last user message
            last_user_msg_idx = -1
@@ -46,7 +57,7 @@ class StuckDetector:
                ):
                    last_user_msg_idx = len(self.state.history) - i - 1
                    break
-
+            filtered_history_offset = last_user_msg_idx + 1
            history_to_check = self.state.history[last_user_msg_idx + 1 :]
        else:
            # In headless mode, look at all history
@@ -86,31 +97,45 @@ class StuckDetector:
                break

        # scenario 1: same action, same observation
-        if self._is_stuck_repeating_action_observation(last_actions, last_observations):
+        if self._is_stuck_repeating_action_observation(
+            last_actions, last_observations, filtered_history, filtered_history_offset
+        ):
            return True

        # scenario 2: same action, errors
-        if self._is_stuck_repeating_action_error(last_actions, last_observations):
+        if self._is_stuck_repeating_action_error(
+            last_actions, last_observations, filtered_history, filtered_history_offset
+        ):
            return True

        # scenario 3: monologue
-        if self._is_stuck_monologue(filtered_history):
+        if self._is_stuck_monologue(filtered_history, filtered_history_offset):
            return True

        # scenario 4: action, observation pattern on the last six steps
        if len(filtered_history) >= 6:
-            if self._is_stuck_action_observation_pattern(filtered_history):
+            if self._is_stuck_action_observation_pattern(
+                filtered_history, filtered_history_offset
+            ):
                return True

        # scenario 5: context window error loop
        if len(filtered_history) >= 10:
-            if self._is_stuck_context_window_error(filtered_history):
+            if self._is_stuck_context_window_error(
+                filtered_history, filtered_history_offset
+            ):
                return True

+        # Empty stuck_analysis when not stuck
+        self.stuck_analysis = None
        return False

    def _is_stuck_repeating_action_observation(
-        self, last_actions: list[Event], last_observations: list[Event]
+        self,
+        last_actions: list[Event],
+        last_observations: list[Event],
+        filtered_history: list[Event],
+        filtered_history_offset: int = 0,
    ) -> bool:
        # scenario 1: same action, same observation
        # it takes 4 actions and 4 observations to detect a loop
@@ -128,12 +153,22 @@ class StuckDetector:

            if actions_equal and observations_equal:
                logger.warning('Action, Observation loop detected')
+                self.stuck_analysis = StuckDetector.StuckAnalysis(
+                    loop_type='repeating_action_observation',
+                    loop_repeat_times=4,
+                    loop_start_idx=filtered_history.index(last_actions[-1])
+                    + filtered_history_offset,
+                )
                return True

        return False

    def _is_stuck_repeating_action_error(
-        self, last_actions: list[Event], last_observations: list[Event]
+        self,
+        last_actions: list[Event],
+        last_observations: list[Event],
+        filtered_history: list[Event],
+        filtered_history_offset: int = 0,
    ) -> bool:
        # scenario 2: same action, errors
        # it takes 3 actions and 3 observations to detect a loop
@@ -147,6 +182,12 @@ class StuckDetector:
            # and the last three observations are all errors?
            if all(isinstance(obs, ErrorObservation) for obs in last_observations[:3]):
                logger.warning('Action, ErrorObservation loop detected')
+                self.stuck_analysis = StuckDetector.StuckAnalysis(
+                    loop_type='repeating_action_error',
+                    loop_repeat_times=3,
+                    loop_start_idx=filtered_history.index(last_actions[-1])
+                    + filtered_history_offset,
+                )
                return True
            # or, are the last three observations all IPythonRunCellObservation with SyntaxError?
            elif all(
@@ -167,6 +208,12 @@ class StuckDetector:
                            error_message,
                        ):
                            logger.warning(warning)
+                            self.stuck_analysis = StuckDetector.StuckAnalysis(
+                                loop_type='repeating_action_error',
+                                loop_repeat_times=3,
+                                loop_start_idx=filtered_history.index(last_actions[-1])
+                                + filtered_history_offset,
+                            )
                            return True
                    elif error_message in (
                        'SyntaxError: invalid syntax. Perhaps you forgot a comma?',
@@ -180,6 +227,12 @@ class StuckDetector:
                        error_message,
                    ):
                        logger.warning(warning)
+                        self.stuck_analysis = StuckDetector.StuckAnalysis(
+                            loop_type='repeating_action_error',
+                            loop_repeat_times=3,
+                            loop_start_idx=filtered_history.index(last_actions[-1])
+                            + filtered_history_offset,
+                        )
                        return True
        return False

@@ -255,7 +308,9 @@ class StuckDetector:
        # and the 3rd-to-last line is identical across all occurrences
        return len(error_lines) == 3 and len(set(error_lines)) == 1

-    def _is_stuck_monologue(self, filtered_history: list[Event]) -> bool:
+    def _is_stuck_monologue(
+        self, filtered_history: list[Event], filtered_history_offset: int = 0
+    ) -> bool:
        # scenario 3: monologue
        # check for repeated MessageActions with source=AGENT
        # see if the agent is engaged in a good old monologue, telling itself the same thing over and over
@@ -286,11 +341,16 @@ class StuckDetector:

                if not has_observation_between:
                    logger.warning('Repeated MessageAction with source=AGENT detected')
+                    self.stuck_analysis = StuckDetector.StuckAnalysis(
+                        loop_type='monologue',
+                        loop_repeat_times=3,
+                        loop_start_idx=start_index + filtered_history_offset,
+                    )
                    return True
        return False

    def _is_stuck_action_observation_pattern(
-        self, filtered_history: list[Event]
+        self, filtered_history: list[Event], filtered_history_offset: int = 0
    ) -> bool:
        # scenario 4: action, observation pattern on the last six steps
        # check if the agent repeats the same (Action, Observation)
@@ -330,10 +390,18 @@ class StuckDetector:

            if actions_equal and observations_equal:
                logger.warning('Action, Observation pattern detected')
+                self.stuck_analysis = StuckDetector.StuckAnalysis(
+                    loop_type='repeating_action_observation_pattern',
+                    loop_repeat_times=3,
+                    loop_start_idx=filtered_history.index(last_six_actions[-1])
+                    + filtered_history_offset,
+                )
                return True
        return False

-    def _is_stuck_context_window_error(self, filtered_history: list[Event]) -> bool:
+    def _is_stuck_context_window_error(
+        self, filtered_history: list[Event], filtered_history_offset: int = 0
+    ) -> bool:
        """Detects if we're stuck in a loop of context window errors.

        This happens when we repeatedly get context window errors and try to trim,
@@ -377,6 +445,11 @@ class StuckDetector:
                logger.warning(
                    'Context window error loop detected - repeated condensation events'
                )
+                self.stuck_analysis = StuckDetector.StuckAnalysis(
+                    loop_type='context_window_error',
+                    loop_repeat_times=2,
+                    loop_start_idx=start_idx + filtered_history_offset,
+                )
                return True

        return False