fix(classic): register finish result before task continuation

AgentFinished is caught before execute() registers a result, leaving the finish episode with result=None. The interaction loop sees this as "episode in progress" and reuses the old finish proposal instead of calling the LLM for the new task. Register a success result before continuing so the loop calls propose_action() for the new task. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
feat(classic): preserve action history across task continuations
2026-04-08 03:00:28 -04:00 · 2026-04-04 17:45:30 +02:00 · 2026-04-03 18:36:23 +02:00
2 changed files with 55 additions and 55 deletions
--- a/classic/forge/tests/test_action_history_cursor.py
+++ b/classic/forge/tests/test_action_history_cursor.py
@@ -1,10 +1,8 @@
-"""Test for cursor reset bug when clearing episode history between tasks.
+"""Tests for EpisodicActionHistory cursor safety and task continuation.

-Reproduces: IndexError in EpisodicActionHistory.current_episode when
-episodes.clear() is called without resetting cursor to 0.
-
-This is the exact crash from run_interaction_loop when the user starts a
-second task after finishing the first one.
+Covers:
+- Cursor >= len guard in current_episode (prevents IndexError)
+- History preserved across task changes (no clearing)
 """

 from unittest.mock import MagicMock
@@ -16,42 +14,14 @@ def _make_history_with_episodes(n: int) -> EpisodicActionHistory:
    """Create a history with n completed episodes (cursor advanced past all)."""
    history = EpisodicActionHistory()
    for i in range(n):
-        # Directly append mock episodes and advance cursor,
-        # simulating what register_action + register_result does
        ep = MagicMock()
-        ep.result = MagicMock()  # has a result = completed
+        ep.result = MagicMock()
        history.episodes.append(ep)
        history.cursor += 1
    return history


-class TestEpisodicActionHistoryCursorReset:
-    def test_current_episode_after_clear_without_cursor_reset_crashes(self):
-        """REPRODUCER: This is the exact bug.
-
-        After completing a task, the interaction loop clears episodes but
-        doesn't reset cursor. On the next task, current_episode does
-        `self[self.cursor]` where cursor > len(episodes) -> IndexError.
-        """
-        history = _make_history_with_episodes(2)
-        assert history.cursor == 2
-        assert len(history.episodes) == 2
-
-        # This is what main.py line 759 does between tasks:
-        history.episodes.clear()
-
-        # cursor is still 2, but episodes is empty
-        assert history.cursor == 2
-        assert len(history.episodes) == 0
-
-        # This is what main.py line 687 calls at the start of the next task.
-        # BUG: cursor (2) != len(episodes) (0), so it falls through to
-        # self.episodes[2] on an empty list -> IndexError
-        #
-        # After the fix, this should return None (no current episode).
-        result = history.current_episode
-        assert result is None
-
+class TestEpisodicActionHistoryCursor:
    def test_current_episode_returns_none_on_empty_history(self):
        history = EpisodicActionHistory()
        assert history.current_episode is None
@@ -64,26 +34,48 @@ class TestEpisodicActionHistoryCursorReset:
    def test_current_episode_returns_episode_when_cursor_valid(self):
        history = EpisodicActionHistory()
        ep = MagicMock()
-        ep.result = None  # not yet completed
+        ep.result = None
        history.episodes.append(ep)
        history.cursor = 0
        assert history.current_episode is ep

-    def test_clear_and_reset_allows_new_task(self):
-        """After properly clearing episodes AND resetting cursor,
-        the history should work correctly for a new task."""
-        history = _make_history_with_episodes(3)
-
-        # Clean reset between tasks
-        history.episodes.clear()
-        history.cursor = 0
-
-        assert history.current_episode is None
-        assert len(history) == 0
-
    def test_cursor_beyond_episodes_returns_none(self):
-        """Any cursor value beyond the episode list should return None,
-        not raise IndexError."""
+        """Any cursor value beyond the episode list should return None."""
        history = EpisodicActionHistory()
-        history.cursor = 100  # way past empty list
+        history.cursor = 100
        assert history.current_episode is None
+
+    def test_cursor_safe_after_clear(self):
+        """Even if episodes are cleared without resetting cursor,
+        current_episode must not crash (>= guard)."""
+        history = _make_history_with_episodes(2)
+        history.episodes.clear()
+        assert history.cursor == 2
+        assert history.current_episode is None
+
+
+class TestHistoryPreservedAcrossTasks:
+    def test_episodes_survive_task_change(self):
+        """When user starts a new task, episodes from the previous task
+        should still be present — the compression system handles overflow."""
+        history = _make_history_with_episodes(3)
+        assert len(history.episodes) == 3
+        assert history.cursor == 3
+
+        # Simulate what main.py does on task change (no clearing)
+        # history is untouched — episodes remain
+
+        assert len(history.episodes) == 3
+        assert history.current_episode is None  # cursor at end
+
+    def test_new_episode_appends_after_previous(self):
+        """New task actions append to existing history."""
+        history = _make_history_with_episodes(2)
+
+        # New task starts — add a new episode
+        new_ep = MagicMock()
+        new_ep.result = None
+        history.episodes.append(new_ep)
+        # cursor still at 2, which is now the new episode
+        assert history.current_episode is new_ep
+        assert len(history.episodes) == 3
--- a/classic/original_autogpt/autogpt/app/main.py
+++ b/classic/original_autogpt/autogpt/app/main.py
@@ -754,10 +754,18 @@ async def run_interaction_loop(
                logger.info("User chose to exit after task completion.")
                return

-            # Start new task in same workspace
+            # Close the finish episode so the loop doesn't reuse it.
+            # AgentFinished is caught before execute() can register
+            # a result, leaving result=None — which the loop
+            # interprets as "episode in progress, reuse proposal".
+            from forge.models.action import ActionSuccessResult
+
+            agent.event_history.register_result(
+                ActionSuccessResult(outputs=e.message)
+            )
+
+            # Start new task in same workspace, keeping prior context
            agent.state.task = next_task
-            agent.event_history.episodes.clear()  # Clear history for fresh context
-            agent.event_history.cursor = 0

            # Reset cycle budget for new task
            cycles_remaining = _get_cycle_budget(