feat(classic): preserve action history across task continuations

Stop clearing episodes when the user enters a new task after finishing. The compression system (4 recent episodes full, older ones summarized, 1024 token budget) already handles context overflow. Keeping history lets the agent build on prior work instead of starting from zero. Restart the process for a clean slate. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 03:00:28 -04:00 · 2026-04-03 18:36:23 +02:00
parent 09e42041ce
commit 17e1578c46
2 changed files with 45 additions and 55 deletions
--- a/classic/forge/tests/test_action_history_cursor.py
+++ b/classic/forge/tests/test_action_history_cursor.py
@@ -1,10 +1,8 @@
-"""Test for cursor reset bug when clearing episode history between tasks.
+"""Tests for EpisodicActionHistory cursor safety and task continuation.

-Reproduces: IndexError in EpisodicActionHistory.current_episode when
-episodes.clear() is called without resetting cursor to 0.
-
-This is the exact crash from run_interaction_loop when the user starts a
-second task after finishing the first one.
+Covers:
+- Cursor >= len guard in current_episode (prevents IndexError)
+- History preserved across task changes (no clearing)
 """

 from unittest.mock import MagicMock
@@ -16,42 +14,14 @@ def _make_history_with_episodes(n: int) -> EpisodicActionHistory:
    """Create a history with n completed episodes (cursor advanced past all)."""
    history = EpisodicActionHistory()
    for i in range(n):
-        # Directly append mock episodes and advance cursor,
-        # simulating what register_action + register_result does
        ep = MagicMock()
-        ep.result = MagicMock()  # has a result = completed
+        ep.result = MagicMock()
        history.episodes.append(ep)
        history.cursor += 1
    return history


-class TestEpisodicActionHistoryCursorReset:
-    def test_current_episode_after_clear_without_cursor_reset_crashes(self):
-        """REPRODUCER: This is the exact bug.
-
-        After completing a task, the interaction loop clears episodes but
-        doesn't reset cursor. On the next task, current_episode does
-        `self[self.cursor]` where cursor > len(episodes) -> IndexError.
-        """
-        history = _make_history_with_episodes(2)
-        assert history.cursor == 2
-        assert len(history.episodes) == 2
-
-        # This is what main.py line 759 does between tasks:
-        history.episodes.clear()
-
-        # cursor is still 2, but episodes is empty
-        assert history.cursor == 2
-        assert len(history.episodes) == 0
-
-        # This is what main.py line 687 calls at the start of the next task.
-        # BUG: cursor (2) != len(episodes) (0), so it falls through to
-        # self.episodes[2] on an empty list -> IndexError
-        #
-        # After the fix, this should return None (no current episode).
-        result = history.current_episode
-        assert result is None
-
+class TestEpisodicActionHistoryCursor:
    def test_current_episode_returns_none_on_empty_history(self):
        history = EpisodicActionHistory()
        assert history.current_episode is None
@@ -64,26 +34,48 @@ class TestEpisodicActionHistoryCursorReset:
    def test_current_episode_returns_episode_when_cursor_valid(self):
        history = EpisodicActionHistory()
        ep = MagicMock()
-        ep.result = None  # not yet completed
+        ep.result = None
        history.episodes.append(ep)
        history.cursor = 0
        assert history.current_episode is ep

-    def test_clear_and_reset_allows_new_task(self):
-        """After properly clearing episodes AND resetting cursor,
-        the history should work correctly for a new task."""
-        history = _make_history_with_episodes(3)
-
-        # Clean reset between tasks
-        history.episodes.clear()
-        history.cursor = 0
-
-        assert history.current_episode is None
-        assert len(history) == 0
-
    def test_cursor_beyond_episodes_returns_none(self):
-        """Any cursor value beyond the episode list should return None,
-        not raise IndexError."""
+        """Any cursor value beyond the episode list should return None."""
        history = EpisodicActionHistory()
-        history.cursor = 100  # way past empty list
+        history.cursor = 100
        assert history.current_episode is None
+
+    def test_cursor_safe_after_clear(self):
+        """Even if episodes are cleared without resetting cursor,
+        current_episode must not crash (>= guard)."""
+        history = _make_history_with_episodes(2)
+        history.episodes.clear()
+        assert history.cursor == 2
+        assert history.current_episode is None
+
+
+class TestHistoryPreservedAcrossTasks:
+    def test_episodes_survive_task_change(self):
+        """When user starts a new task, episodes from the previous task
+        should still be present — the compression system handles overflow."""
+        history = _make_history_with_episodes(3)
+        assert len(history.episodes) == 3
+        assert history.cursor == 3
+
+        # Simulate what main.py does on task change (no clearing)
+        # history is untouched — episodes remain
+
+        assert len(history.episodes) == 3
+        assert history.current_episode is None  # cursor at end
+
+    def test_new_episode_appends_after_previous(self):
+        """New task actions append to existing history."""
+        history = _make_history_with_episodes(2)
+
+        # New task starts — add a new episode
+        new_ep = MagicMock()
+        new_ep.result = None
+        history.episodes.append(new_ep)
+        # cursor still at 2, which is now the new episode
+        assert history.current_episode is new_ep
+        assert len(history.episodes) == 3
--- a/classic/original_autogpt/autogpt/app/main.py
+++ b/classic/original_autogpt/autogpt/app/main.py
@@ -754,10 +754,8 @@ async def run_interaction_loop(
                logger.info("User chose to exit after task completion.")
                return

-            # Start new task in same workspace
+            # Start new task in same workspace, keeping prior context
            agent.state.task = next_task
-            agent.event_history.episodes.clear()  # Clear history for fresh context
-            agent.event_history.cursor = 0

            # Reset cycle budget for new task
            cycles_remaining = _get_cycle_budget(