mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
Compare commits
2 Commits
ntindle/sy
...
feat/keep-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6aed43d708 | ||
|
|
17e1578c46 |
@@ -1,10 +1,8 @@
|
|||||||
"""Test for cursor reset bug when clearing episode history between tasks.
|
"""Tests for EpisodicActionHistory cursor safety and task continuation.
|
||||||
|
|
||||||
Reproduces: IndexError in EpisodicActionHistory.current_episode when
|
Covers:
|
||||||
episodes.clear() is called without resetting cursor to 0.
|
- Cursor >= len guard in current_episode (prevents IndexError)
|
||||||
|
- History preserved across task changes (no clearing)
|
||||||
This is the exact crash from run_interaction_loop when the user starts a
|
|
||||||
second task after finishing the first one.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
@@ -16,42 +14,14 @@ def _make_history_with_episodes(n: int) -> EpisodicActionHistory:
|
|||||||
"""Create a history with n completed episodes (cursor advanced past all)."""
|
"""Create a history with n completed episodes (cursor advanced past all)."""
|
||||||
history = EpisodicActionHistory()
|
history = EpisodicActionHistory()
|
||||||
for i in range(n):
|
for i in range(n):
|
||||||
# Directly append mock episodes and advance cursor,
|
|
||||||
# simulating what register_action + register_result does
|
|
||||||
ep = MagicMock()
|
ep = MagicMock()
|
||||||
ep.result = MagicMock() # has a result = completed
|
ep.result = MagicMock()
|
||||||
history.episodes.append(ep)
|
history.episodes.append(ep)
|
||||||
history.cursor += 1
|
history.cursor += 1
|
||||||
return history
|
return history
|
||||||
|
|
||||||
|
|
||||||
class TestEpisodicActionHistoryCursorReset:
|
class TestEpisodicActionHistoryCursor:
|
||||||
def test_current_episode_after_clear_without_cursor_reset_crashes(self):
|
|
||||||
"""REPRODUCER: This is the exact bug.
|
|
||||||
|
|
||||||
After completing a task, the interaction loop clears episodes but
|
|
||||||
doesn't reset cursor. On the next task, current_episode does
|
|
||||||
`self[self.cursor]` where cursor > len(episodes) -> IndexError.
|
|
||||||
"""
|
|
||||||
history = _make_history_with_episodes(2)
|
|
||||||
assert history.cursor == 2
|
|
||||||
assert len(history.episodes) == 2
|
|
||||||
|
|
||||||
# This is what main.py line 759 does between tasks:
|
|
||||||
history.episodes.clear()
|
|
||||||
|
|
||||||
# cursor is still 2, but episodes is empty
|
|
||||||
assert history.cursor == 2
|
|
||||||
assert len(history.episodes) == 0
|
|
||||||
|
|
||||||
# This is what main.py line 687 calls at the start of the next task.
|
|
||||||
# BUG: cursor (2) != len(episodes) (0), so it falls through to
|
|
||||||
# self.episodes[2] on an empty list -> IndexError
|
|
||||||
#
|
|
||||||
# After the fix, this should return None (no current episode).
|
|
||||||
result = history.current_episode
|
|
||||||
assert result is None
|
|
||||||
|
|
||||||
def test_current_episode_returns_none_on_empty_history(self):
|
def test_current_episode_returns_none_on_empty_history(self):
|
||||||
history = EpisodicActionHistory()
|
history = EpisodicActionHistory()
|
||||||
assert history.current_episode is None
|
assert history.current_episode is None
|
||||||
@@ -64,26 +34,48 @@ class TestEpisodicActionHistoryCursorReset:
|
|||||||
def test_current_episode_returns_episode_when_cursor_valid(self):
|
def test_current_episode_returns_episode_when_cursor_valid(self):
|
||||||
history = EpisodicActionHistory()
|
history = EpisodicActionHistory()
|
||||||
ep = MagicMock()
|
ep = MagicMock()
|
||||||
ep.result = None # not yet completed
|
ep.result = None
|
||||||
history.episodes.append(ep)
|
history.episodes.append(ep)
|
||||||
history.cursor = 0
|
history.cursor = 0
|
||||||
assert history.current_episode is ep
|
assert history.current_episode is ep
|
||||||
|
|
||||||
def test_clear_and_reset_allows_new_task(self):
|
|
||||||
"""After properly clearing episodes AND resetting cursor,
|
|
||||||
the history should work correctly for a new task."""
|
|
||||||
history = _make_history_with_episodes(3)
|
|
||||||
|
|
||||||
# Clean reset between tasks
|
|
||||||
history.episodes.clear()
|
|
||||||
history.cursor = 0
|
|
||||||
|
|
||||||
assert history.current_episode is None
|
|
||||||
assert len(history) == 0
|
|
||||||
|
|
||||||
def test_cursor_beyond_episodes_returns_none(self):
|
def test_cursor_beyond_episodes_returns_none(self):
|
||||||
"""Any cursor value beyond the episode list should return None,
|
"""Any cursor value beyond the episode list should return None."""
|
||||||
not raise IndexError."""
|
|
||||||
history = EpisodicActionHistory()
|
history = EpisodicActionHistory()
|
||||||
history.cursor = 100 # way past empty list
|
history.cursor = 100
|
||||||
assert history.current_episode is None
|
assert history.current_episode is None
|
||||||
|
|
||||||
|
def test_cursor_safe_after_clear(self):
|
||||||
|
"""Even if episodes are cleared without resetting cursor,
|
||||||
|
current_episode must not crash (>= guard)."""
|
||||||
|
history = _make_history_with_episodes(2)
|
||||||
|
history.episodes.clear()
|
||||||
|
assert history.cursor == 2
|
||||||
|
assert history.current_episode is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestHistoryPreservedAcrossTasks:
|
||||||
|
def test_episodes_survive_task_change(self):
|
||||||
|
"""When user starts a new task, episodes from the previous task
|
||||||
|
should still be present — the compression system handles overflow."""
|
||||||
|
history = _make_history_with_episodes(3)
|
||||||
|
assert len(history.episodes) == 3
|
||||||
|
assert history.cursor == 3
|
||||||
|
|
||||||
|
# Simulate what main.py does on task change (no clearing)
|
||||||
|
# history is untouched — episodes remain
|
||||||
|
|
||||||
|
assert len(history.episodes) == 3
|
||||||
|
assert history.current_episode is None # cursor at end
|
||||||
|
|
||||||
|
def test_new_episode_appends_after_previous(self):
|
||||||
|
"""New task actions append to existing history."""
|
||||||
|
history = _make_history_with_episodes(2)
|
||||||
|
|
||||||
|
# New task starts — add a new episode
|
||||||
|
new_ep = MagicMock()
|
||||||
|
new_ep.result = None
|
||||||
|
history.episodes.append(new_ep)
|
||||||
|
# cursor still at 2, which is now the new episode
|
||||||
|
assert history.current_episode is new_ep
|
||||||
|
assert len(history.episodes) == 3
|
||||||
|
|||||||
@@ -754,10 +754,18 @@ async def run_interaction_loop(
|
|||||||
logger.info("User chose to exit after task completion.")
|
logger.info("User chose to exit after task completion.")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Start new task in same workspace
|
# Close the finish episode so the loop doesn't reuse it.
|
||||||
|
# AgentFinished is caught before execute() can register
|
||||||
|
# a result, leaving result=None — which the loop
|
||||||
|
# interprets as "episode in progress, reuse proposal".
|
||||||
|
from forge.models.action import ActionSuccessResult
|
||||||
|
|
||||||
|
agent.event_history.register_result(
|
||||||
|
ActionSuccessResult(outputs=e.message)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Start new task in same workspace, keeping prior context
|
||||||
agent.state.task = next_task
|
agent.state.task = next_task
|
||||||
agent.event_history.episodes.clear() # Clear history for fresh context
|
|
||||||
agent.event_history.cursor = 0
|
|
||||||
|
|
||||||
# Reset cycle budget for new task
|
# Reset cycle budget for new task
|
||||||
cycles_remaining = _get_cycle_budget(
|
cycles_remaining = _get_cycle_budget(
|
||||||
|
|||||||
Reference in New Issue
Block a user