fix(classic): register finish result before task continuation

AgentFinished is caught before execute() registers a result, leaving the finish episode with result=None. The interaction loop sees this as "episode in progress" and reuses the old finish proposal instead of calling the LLM for the new task. Register a success result before continuing so the loop calls propose_action() for the new task. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
feat(classic): preserve action history across task continuations
2026-04-08 03:00:28 -04:00 · 2026-04-04 17:45:30 +02:00 · 2026-04-03 18:36:23 +02:00
2 changed files with 55 additions and 55 deletions
--- a/classic/forge/tests/test_action_history_cursor.py
+++ b/classic/forge/tests/test_action_history_cursor.py
@@ -1,10 +1,8 @@
-"""Test for cursor reset bug when clearing episode history between tasks.
+"""Tests for EpisodicActionHistory cursor safety and task continuation.
-Reproduces: IndexError in EpisodicActionHistory.current_episode when
+Covers:
-episodes.clear() is called without resetting cursor to 0.
+- Cursor >= len guard in current_episode (prevents IndexError)
-
+- History preserved across task changes (no clearing)
 This is the exact crash from run_interaction_loop when the user starts a
 second task after finishing the first one.
 """
 from unittest.mock import MagicMock
@@ -16,42 +14,14 @@ def _make_history_with_episodes(n: int) -> EpisodicActionHistory:
    """Create a history with n completed episodes (cursor advanced past all)."""
    history = EpisodicActionHistory()
    for i in range(n):
        # Directly append mock episodes and advance cursor,
        # simulating what register_action + register_result does
        ep = MagicMock()
-        ep.result = MagicMock()  # has a result = completed
+        ep.result = MagicMock()
        history.episodes.append(ep)
        history.cursor += 1
    return history
-class TestEpisodicActionHistoryCursorReset:
+class TestEpisodicActionHistoryCursor:
    def test_current_episode_after_clear_without_cursor_reset_crashes(self):
        """REPRODUCER: This is the exact bug.
        After completing a task, the interaction loop clears episodes but
        doesn't reset cursor. On the next task, current_episode does
        `self[self.cursor]` where cursor > len(episodes) -> IndexError.
        """
        history = _make_history_with_episodes(2)
        assert history.cursor == 2
        assert len(history.episodes) == 2
        # This is what main.py line 759 does between tasks:
        history.episodes.clear()
        # cursor is still 2, but episodes is empty
        assert history.cursor == 2
        assert len(history.episodes) == 0
        # This is what main.py line 687 calls at the start of the next task.
        # BUG: cursor (2) != len(episodes) (0), so it falls through to
        # self.episodes[2] on an empty list -> IndexError
        #
        # After the fix, this should return None (no current episode).
        result = history.current_episode
        assert result is None
    def test_current_episode_returns_none_on_empty_history(self):
        history = EpisodicActionHistory()
        assert history.current_episode is None
@@ -64,26 +34,48 @@ class TestEpisodicActionHistoryCursorReset:
    def test_current_episode_returns_episode_when_cursor_valid(self):
        history = EpisodicActionHistory()
        ep = MagicMock()
-        ep.result = None  # not yet completed
+        ep.result = None
        history.episodes.append(ep)
        history.cursor = 0
        assert history.current_episode is ep
    def test_clear_and_reset_allows_new_task(self):
        """After properly clearing episodes AND resetting cursor,
        the history should work correctly for a new task."""
        history = _make_history_with_episodes(3)
        # Clean reset between tasks
        history.episodes.clear()
        history.cursor = 0
        assert history.current_episode is None
        assert len(history) == 0
    def test_cursor_beyond_episodes_returns_none(self):
-        """Any cursor value beyond the episode list should return None,
+        """Any cursor value beyond the episode list should return None."""
        not raise IndexError."""
        history = EpisodicActionHistory()
-        history.cursor = 100  # way past empty list
+        history.cursor = 100
        assert history.current_episode is None
    def test_cursor_safe_after_clear(self):
        """Even if episodes are cleared without resetting cursor,
        current_episode must not crash (>= guard)."""
        history = _make_history_with_episodes(2)
        history.episodes.clear()
        assert history.cursor == 2
        assert history.current_episode is None
 class TestHistoryPreservedAcrossTasks:
    def test_episodes_survive_task_change(self):
        """When user starts a new task, episodes from the previous task
        should still be present — the compression system handles overflow."""
        history = _make_history_with_episodes(3)
        assert len(history.episodes) == 3
        assert history.cursor == 3
        # Simulate what main.py does on task change (no clearing)
        # history is untouched — episodes remain
        assert len(history.episodes) == 3
        assert history.current_episode is None  # cursor at end
    def test_new_episode_appends_after_previous(self):
        """New task actions append to existing history."""
        history = _make_history_with_episodes(2)
        # New task starts — add a new episode
        new_ep = MagicMock()
        new_ep.result = None
        history.episodes.append(new_ep)
        # cursor still at 2, which is now the new episode
        assert history.current_episode is new_ep
        assert len(history.episodes) == 3
--- a/classic/original_autogpt/autogpt/app/main.py
+++ b/classic/original_autogpt/autogpt/app/main.py
@@ -754,10 +754,18 @@ async def run_interaction_loop(
                logger.info("User chose to exit after task completion.")
                return
-            # Start new task in same workspace
+            # Close the finish episode so the loop doesn't reuse it.
            # AgentFinished is caught before execute() can register
            # a result, leaving result=None — which the loop
            # interprets as "episode in progress, reuse proposal".
            from forge.models.action import ActionSuccessResult
            agent.event_history.register_result(
                ActionSuccessResult(outputs=e.message)
            )
            # Start new task in same workspace, keeping prior context
            agent.state.task = next_task
            agent.event_history.episodes.clear()  # Clear history for fresh context
            agent.event_history.cursor = 0
            # Reset cycle budget for new task
            cycles_remaining = _get_cycle_budget(