Compare commits

...

2 Commits

Author SHA1 Message Date
Nicholas Tindle
6aed43d708 fix(classic): register finish result before task continuation
AgentFinished is caught before execute() registers a result, leaving
the finish episode with result=None. The interaction loop sees this as
"episode in progress" and reuses the old finish proposal instead of
calling the LLM for the new task. Register a success result before
continuing so the loop calls propose_action() for the new task.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-04 17:45:30 +02:00
Nicholas Tindle
17e1578c46 feat(classic): preserve action history across task continuations
Stop clearing episodes when the user enters a new task after finishing.
The compression system (4 recent episodes full, older ones summarized,
1024 token budget) already handles context overflow. Keeping history
lets the agent build on prior work instead of starting from zero.

Restart the process for a clean slate.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-03 18:36:23 +02:00
2 changed files with 55 additions and 55 deletions

View File

@@ -1,10 +1,8 @@
"""Test for cursor reset bug when clearing episode history between tasks.
"""Tests for EpisodicActionHistory cursor safety and task continuation.
Reproduces: IndexError in EpisodicActionHistory.current_episode when
episodes.clear() is called without resetting cursor to 0.
This is the exact crash from run_interaction_loop when the user starts a
second task after finishing the first one.
Covers:
- Cursor >= len guard in current_episode (prevents IndexError)
- History preserved across task changes (no clearing)
"""
from unittest.mock import MagicMock
@@ -16,42 +14,14 @@ def _make_history_with_episodes(n: int) -> EpisodicActionHistory:
"""Create a history with n completed episodes (cursor advanced past all)."""
history = EpisodicActionHistory()
for i in range(n):
# Directly append mock episodes and advance cursor,
# simulating what register_action + register_result does
ep = MagicMock()
ep.result = MagicMock() # has a result = completed
ep.result = MagicMock()
history.episodes.append(ep)
history.cursor += 1
return history
class TestEpisodicActionHistoryCursorReset:
def test_current_episode_after_clear_without_cursor_reset_crashes(self):
"""REPRODUCER: This is the exact bug.
After completing a task, the interaction loop clears episodes but
doesn't reset cursor. On the next task, current_episode does
`self[self.cursor]` where cursor > len(episodes) -> IndexError.
"""
history = _make_history_with_episodes(2)
assert history.cursor == 2
assert len(history.episodes) == 2
# This is what main.py line 759 does between tasks:
history.episodes.clear()
# cursor is still 2, but episodes is empty
assert history.cursor == 2
assert len(history.episodes) == 0
# This is what main.py line 687 calls at the start of the next task.
# BUG: cursor (2) != len(episodes) (0), so it falls through to
# self.episodes[2] on an empty list -> IndexError
#
# After the fix, this should return None (no current episode).
result = history.current_episode
assert result is None
class TestEpisodicActionHistoryCursor:
def test_current_episode_returns_none_on_empty_history(self):
history = EpisodicActionHistory()
assert history.current_episode is None
@@ -64,26 +34,48 @@ class TestEpisodicActionHistoryCursorReset:
def test_current_episode_returns_episode_when_cursor_valid(self):
history = EpisodicActionHistory()
ep = MagicMock()
ep.result = None # not yet completed
ep.result = None
history.episodes.append(ep)
history.cursor = 0
assert history.current_episode is ep
def test_clear_and_reset_allows_new_task(self):
"""After properly clearing episodes AND resetting cursor,
the history should work correctly for a new task."""
history = _make_history_with_episodes(3)
# Clean reset between tasks
history.episodes.clear()
history.cursor = 0
assert history.current_episode is None
assert len(history) == 0
def test_cursor_beyond_episodes_returns_none(self):
"""Any cursor value beyond the episode list should return None,
not raise IndexError."""
"""Any cursor value beyond the episode list should return None."""
history = EpisodicActionHistory()
history.cursor = 100 # way past empty list
history.cursor = 100
assert history.current_episode is None
def test_cursor_safe_after_clear(self):
"""Even if episodes are cleared without resetting cursor,
current_episode must not crash (>= guard)."""
history = _make_history_with_episodes(2)
history.episodes.clear()
assert history.cursor == 2
assert history.current_episode is None
class TestHistoryPreservedAcrossTasks:
def test_episodes_survive_task_change(self):
"""When user starts a new task, episodes from the previous task
should still be present — the compression system handles overflow."""
history = _make_history_with_episodes(3)
assert len(history.episodes) == 3
assert history.cursor == 3
# Simulate what main.py does on task change (no clearing)
# history is untouched — episodes remain
assert len(history.episodes) == 3
assert history.current_episode is None # cursor at end
def test_new_episode_appends_after_previous(self):
"""New task actions append to existing history."""
history = _make_history_with_episodes(2)
# New task starts — add a new episode
new_ep = MagicMock()
new_ep.result = None
history.episodes.append(new_ep)
# cursor still at 2, which is now the new episode
assert history.current_episode is new_ep
assert len(history.episodes) == 3

View File

@@ -754,10 +754,18 @@ async def run_interaction_loop(
logger.info("User chose to exit after task completion.")
return
# Start new task in same workspace
# Close the finish episode so the loop doesn't reuse it.
# AgentFinished is caught before execute() can register
# a result, leaving result=None — which the loop
# interprets as "episode in progress, reuse proposal".
from forge.models.action import ActionSuccessResult
agent.event_history.register_result(
ActionSuccessResult(outputs=e.message)
)
# Start new task in same workspace, keeping prior context
agent.state.task = next_task
agent.event_history.episodes.clear() # Clear history for fresh context
agent.event_history.cursor = 0
# Reset cycle budget for new task
cycles_remaining = _get_cycle_budget(