mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-01-10 07:18:10 -05:00
[Refactor, Fix]: Agent controller state/metrics management (#9012)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
104
openhands/controller/state/control_flags.py
Normal file
104
openhands/controller/state/control_flags.py
Normal file
@@ -0,0 +1,104 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Generic, TypeVar
|
||||
|
||||
T = TypeVar(
|
||||
'T', int, float
|
||||
) # Type for the value (int for iterations, float for budget)
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class ControlFlag(Generic[T]):
|
||||
"""Base class for control flags that manage limits and state transitions."""
|
||||
|
||||
limit_increase_amount: T
|
||||
current_value: T
|
||||
max_value: T
|
||||
headless_mode: bool = False
|
||||
_hit_limit: bool = False
|
||||
|
||||
def reached_limit(self) -> bool:
|
||||
"""Check if the limit has been reached.
|
||||
|
||||
Returns:
|
||||
bool: True if the limit has been reached, False otherwise.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def increase_limit(self, headless_mode: bool) -> None:
|
||||
"""Expand the limit when needed."""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def step(self):
|
||||
"""Determine the next state based on the current state and mode.
|
||||
|
||||
Returns:
|
||||
ControlFlagState: The next state.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@dataclass
|
||||
class IterationControlFlag(ControlFlag[int]):
|
||||
"""Control flag for managing iteration limits."""
|
||||
|
||||
def reached_limit(self) -> bool:
|
||||
"""Check if the iteration limit has been reached."""
|
||||
self._hit_limit = self.current_value >= self.max_value
|
||||
return self._hit_limit
|
||||
|
||||
def increase_limit(self, headless_mode: bool) -> None:
|
||||
"""Expand the iteration limit by adding the initial value."""
|
||||
if not headless_mode and self._hit_limit:
|
||||
self.max_value += self.limit_increase_amount
|
||||
self._hit_limit = False
|
||||
|
||||
|
||||
def step(self):
|
||||
if self.reached_limit():
|
||||
raise RuntimeError(
|
||||
f'Agent reached maximum iteration. '
|
||||
f'Current iteration: {self.current_value}, max iteration: {self.max_value}'
|
||||
)
|
||||
|
||||
# Increment the current value
|
||||
self.current_value += 1
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class BudgetControlFlag(ControlFlag[float]):
|
||||
"""Control flag for managing budget limits."""
|
||||
|
||||
def reached_limit(self) -> bool:
|
||||
"""Check if the budget limit has been reached."""
|
||||
self._hit_limit = self.current_value >= self.max_value
|
||||
return self._hit_limit
|
||||
|
||||
def increase_limit(self, headless_mode) -> None:
|
||||
"""Expand the budget limit by adding the initial value to the current value."""
|
||||
if self._hit_limit:
|
||||
self.max_value = self.current_value + self.limit_increase_amount
|
||||
self._hit_limit = False
|
||||
|
||||
def step(self):
|
||||
"""Check if we've reached the limit and update state accordingly.
|
||||
|
||||
Note: Unlike IterationControlFlag, this doesn't increment the value
|
||||
as the budget is updated externally.
|
||||
"""
|
||||
if self.reached_limit():
|
||||
current_str = f'{self.current_value:.2f}'
|
||||
max_str = f'{self.max_value:.2f}'
|
||||
raise RuntimeError(
|
||||
f'Agent reached maximum budget for conversation.'
|
||||
f'Current budget: {current_str}, max budget: {max_str}'
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -8,6 +8,10 @@ from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
import openhands
|
||||
from openhands.controller.state.control_flags import (
|
||||
BudgetControlFlag,
|
||||
IterationControlFlag,
|
||||
)
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.schema import AgentState
|
||||
from openhands.events.action import (
|
||||
@@ -20,7 +24,15 @@ from openhands.memory.view import View
|
||||
from openhands.storage.files import FileStore
|
||||
from openhands.storage.locations import get_conversation_agent_state_filename
|
||||
|
||||
RESUMABLE_STATES = [
|
||||
AgentState.RUNNING,
|
||||
AgentState.PAUSED,
|
||||
AgentState.AWAITING_USER_INPUT,
|
||||
AgentState.FINISHED,
|
||||
]
|
||||
|
||||
|
||||
# NOTE: this is deprecated
|
||||
class TrafficControlState(str, Enum):
|
||||
# default state, no rate limiting
|
||||
NORMAL = 'normal'
|
||||
@@ -32,14 +44,6 @@ class TrafficControlState(str, Enum):
|
||||
PAUSED = 'paused'
|
||||
|
||||
|
||||
RESUMABLE_STATES = [
|
||||
AgentState.RUNNING,
|
||||
AgentState.PAUSED,
|
||||
AgentState.AWAITING_USER_INPUT,
|
||||
AgentState.FINISHED,
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class State:
|
||||
"""
|
||||
@@ -75,35 +79,43 @@ class State:
|
||||
"""
|
||||
|
||||
session_id: str = ''
|
||||
# global iteration for the current task
|
||||
iteration: int = 0
|
||||
# local iteration for the current subtask
|
||||
local_iteration: int = 0
|
||||
# max number of iterations for the current task
|
||||
max_iterations: int = 100
|
||||
iteration_flag: IterationControlFlag = field(
|
||||
default_factory=lambda: IterationControlFlag(
|
||||
limit_increase_amount=100, current_value=0, max_value=100
|
||||
)
|
||||
)
|
||||
budget_flag: BudgetControlFlag | None = None
|
||||
confirmation_mode: bool = False
|
||||
history: list[Event] = field(default_factory=list)
|
||||
inputs: dict = field(default_factory=dict)
|
||||
outputs: dict = field(default_factory=dict)
|
||||
agent_state: AgentState = AgentState.LOADING
|
||||
resume_state: AgentState | None = None
|
||||
traffic_control_state: TrafficControlState = TrafficControlState.NORMAL
|
||||
# global metrics for the current task
|
||||
metrics: Metrics = field(default_factory=Metrics)
|
||||
# local metrics for the current subtask
|
||||
local_metrics: Metrics = field(default_factory=Metrics)
|
||||
# root agent has level 0, and every delegate increases the level by one
|
||||
delegate_level: int = 0
|
||||
# start_id and end_id track the range of events in history
|
||||
start_id: int = -1
|
||||
end_id: int = -1
|
||||
|
||||
delegates: dict[tuple[int, int], tuple[str, str]] = field(default_factory=dict)
|
||||
# NOTE: This will never be used by the controller, but it can be used by different
|
||||
parent_metrics_snapshot: Metrics | None = None
|
||||
parent_iteration: int = 100
|
||||
|
||||
# NOTE: this is used by the controller to track parent's metrics snapshot before delegation
|
||||
# evaluation tasks to store extra data needed to track the progress/state of the task.
|
||||
extra_data: dict[str, Any] = field(default_factory=dict)
|
||||
last_error: str = ''
|
||||
|
||||
# NOTE: deprecated args, kept here temporarily for backwards compatability
|
||||
# Will be remove in 30 days
|
||||
iteration: int | None = None
|
||||
local_iteration: int | None = None
|
||||
max_iterations: int | None = None
|
||||
traffic_control_state: TrafficControlState | None = None
|
||||
local_metrics: Metrics | None = None
|
||||
delegates: dict[tuple[int, int], tuple[str, str]] | None = None
|
||||
|
||||
def save_to_session(
|
||||
self, sid: str, file_store: FileStore, user_id: str | None
|
||||
) -> None:
|
||||
@@ -165,6 +177,10 @@ class State:
|
||||
|
||||
# first state after restore
|
||||
state.agent_state = AgentState.LOADING
|
||||
|
||||
# We don't need to clean up deprecated fields here
|
||||
# They will be handled by __getstate__ when the state is saved again
|
||||
|
||||
return state
|
||||
|
||||
def __getstate__(self) -> dict:
|
||||
@@ -177,15 +193,52 @@ class State:
|
||||
state.pop('_history_checksum', None)
|
||||
state.pop('_view', None)
|
||||
|
||||
# Remove deprecated fields before pickling
|
||||
state.pop('iteration', None)
|
||||
state.pop('local_iteration', None)
|
||||
state.pop('max_iterations', None)
|
||||
state.pop('traffic_control_state', None)
|
||||
state.pop('local_metrics', None)
|
||||
state.pop('delegates', None)
|
||||
|
||||
return state
|
||||
|
||||
def __setstate__(self, state: dict) -> None:
|
||||
# Check if we're restoring from an older version (before control flags)
|
||||
is_old_version = 'iteration' in state
|
||||
|
||||
# Convert old iteration tracking to new iteration_flag if needed
|
||||
if is_old_version:
|
||||
# Create iteration_flag from old values
|
||||
max_iterations = state.get('max_iterations', 100)
|
||||
current_iteration = state.get('iteration', 0)
|
||||
|
||||
# Add the iteration_flag to the state
|
||||
state['iteration_flag'] = IterationControlFlag(
|
||||
limit_increase_amount=max_iterations,
|
||||
current_value=current_iteration,
|
||||
max_value=max_iterations,
|
||||
)
|
||||
|
||||
# Update the state
|
||||
self.__dict__.update(state)
|
||||
|
||||
# We keep the deprecated fields for backward compatibility
|
||||
# They will be removed by __getstate__ when the state is saved again
|
||||
|
||||
# make sure we always have the attribute history
|
||||
if not hasattr(self, 'history'):
|
||||
self.history = []
|
||||
|
||||
# Ensure we have default values for new fields if they're missing
|
||||
if not hasattr(self, 'iteration_flag'):
|
||||
self.iteration_flag = IterationControlFlag(
|
||||
limit_increase_amount=100, current_value=0, max_value=100
|
||||
)
|
||||
|
||||
if not hasattr(self, 'budget_flag'):
|
||||
self.budget_flag = None
|
||||
|
||||
def get_current_user_intent(self) -> tuple[str | None, list[str] | None]:
|
||||
"""Returns the latest user message and image(if provided) that appears after a FinishAction, or the first (the task) if nothing was finished yet."""
|
||||
last_user_message = None
|
||||
@@ -223,6 +276,17 @@ class State:
|
||||
],
|
||||
}
|
||||
|
||||
def get_local_step(self):
|
||||
if not self.parent_iteration:
|
||||
return self.iteration_flag.current_value
|
||||
|
||||
return self.iteration_flag.current_value - self.parent_iteration
|
||||
|
||||
def get_local_metrics(self):
|
||||
if not self.parent_metrics_snapshot:
|
||||
return self.metrics
|
||||
return self.metrics.diff(self.parent_metrics_snapshot)
|
||||
|
||||
@property
|
||||
def view(self) -> View:
|
||||
# Compute a simple checksum from the history to see if we can re-use any
|
||||
|
||||
282
openhands/controller/state/state_tracker.py
Normal file
282
openhands/controller/state/state_tracker.py
Normal file
@@ -0,0 +1,282 @@
|
||||
from openhands.controller.agent import Agent
|
||||
from openhands.controller.state.control_flags import BudgetControlFlag, IterationControlFlag
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action.agent import AgentDelegateAction, ChangeAgentStateAction
|
||||
from openhands.events.action.empty import NullAction
|
||||
from openhands.events.event import Event
|
||||
from openhands.events.event_filter import EventFilter
|
||||
from openhands.events.observation.agent import AgentStateChangedObservation
|
||||
from openhands.events.observation.delegate import AgentDelegateObservation
|
||||
from openhands.events.observation.empty import NullObservation
|
||||
from openhands.events.serialization.event import event_to_trajectory
|
||||
from openhands.events.stream import EventStream
|
||||
from openhands.llm.metrics import Metrics
|
||||
from openhands.storage.files import FileStore
|
||||
|
||||
|
||||
class StateTracker:
|
||||
"""Manages and synchronizes the state of an agent throughout its lifecycle.
|
||||
|
||||
It is responsible for:
|
||||
1. Maintaining agent state persistence across sessions
|
||||
2. Managing agent history by filtering and tracking relevant events (previously done in the agent controller)
|
||||
3. Synchronizing metrics between the controller and LLM components
|
||||
4. Updating control flags for budget and iteration limits
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, sid: str | None, file_store: FileStore | None, user_id: str | None
|
||||
):
|
||||
self.sid = sid
|
||||
self.file_store = file_store
|
||||
self.user_id = user_id
|
||||
|
||||
# filter out events that are not relevant to the agent
|
||||
# so they will not be included in the agent history
|
||||
self.agent_history_filter = EventFilter(
|
||||
exclude_types=(
|
||||
NullAction,
|
||||
NullObservation,
|
||||
ChangeAgentStateAction,
|
||||
AgentStateChangedObservation,
|
||||
),
|
||||
exclude_hidden=True,
|
||||
)
|
||||
|
||||
def set_initial_state(
|
||||
self,
|
||||
id: str,
|
||||
agent: Agent,
|
||||
state: State | None,
|
||||
max_iterations: int,
|
||||
max_budget_per_task: float | None,
|
||||
confirmation_mode: bool = False,
|
||||
) -> None:
|
||||
"""Sets the initial state for the agent, either from the previous session, or from a parent agent, or by creating a new one.
|
||||
|
||||
Args:
|
||||
state: The state to initialize with, or None to create a new state.
|
||||
max_iterations: The maximum number of iterations allowed for the task.
|
||||
confirmation_mode: Whether to enable confirmation mode.
|
||||
"""
|
||||
# state can come from:
|
||||
# - the previous session, in which case it has history
|
||||
# - from a parent agent, in which case it has no history
|
||||
# - None / a new state
|
||||
|
||||
# If state is None, we create a brand new state and still load the event stream so we can restore the history
|
||||
if state is None:
|
||||
self.state = State(
|
||||
session_id=id.removesuffix('-delegate'),
|
||||
inputs={},
|
||||
iteration_flag=IterationControlFlag(limit_increase_amount=max_iterations, current_value=0, max_value= max_iterations),
|
||||
budget_flag=None if not max_budget_per_task else BudgetControlFlag(limit_increase_amount=max_budget_per_task, current_value=0, max_value=max_budget_per_task),
|
||||
confirmation_mode=confirmation_mode
|
||||
)
|
||||
self.state.start_id = 0
|
||||
|
||||
logger.info(
|
||||
f'AgentController {id} - created new state. start_id: {self.state.start_id}'
|
||||
)
|
||||
else:
|
||||
self.state = state
|
||||
if self.state.start_id <= -1:
|
||||
self.state.start_id = 0
|
||||
|
||||
logger.info(
|
||||
f'AgentController {id} initializing history from event {self.state.start_id}',
|
||||
)
|
||||
|
||||
|
||||
# Share the state metrics with the agent's LLM metrics
|
||||
# This ensures that all accumulated metrics are always in sync between controller and llm
|
||||
agent.llm.metrics = self.state.metrics
|
||||
|
||||
def _init_history(self, event_stream: EventStream) -> None:
|
||||
"""Initializes the agent's history from the event stream.
|
||||
|
||||
The history is a list of events that:
|
||||
- Excludes events of types listed in self.filter_out
|
||||
- Excludes events with hidden=True attribute
|
||||
- For delegate events (between AgentDelegateAction and AgentDelegateObservation):
|
||||
- Excludes all events between the action and observation
|
||||
- Includes the delegate action and observation themselves
|
||||
"""
|
||||
# define range of events to fetch
|
||||
# delegates start with a start_id and initially won't find any events
|
||||
# otherwise we're restoring a previous session
|
||||
start_id = self.state.start_id if self.state.start_id >= 0 else 0
|
||||
end_id = (
|
||||
self.state.end_id
|
||||
if self.state.end_id >= 0
|
||||
else event_stream.get_latest_event_id()
|
||||
)
|
||||
|
||||
# sanity check
|
||||
if start_id > end_id + 1:
|
||||
logger.warning(
|
||||
f'start_id {start_id} is greater than end_id + 1 ({end_id + 1}). History will be empty.',
|
||||
)
|
||||
self.state.history = []
|
||||
return
|
||||
|
||||
events: list[Event] = []
|
||||
|
||||
# Get rest of history
|
||||
events_to_add = list(
|
||||
event_stream.search_events(
|
||||
start_id=start_id,
|
||||
end_id=end_id,
|
||||
reverse=False,
|
||||
filter=self.agent_history_filter,
|
||||
)
|
||||
)
|
||||
events.extend(events_to_add)
|
||||
|
||||
# Find all delegate action/observation pairs
|
||||
delegate_ranges: list[tuple[int, int]] = []
|
||||
delegate_action_ids: list[int] = [] # stack of unmatched delegate action IDs
|
||||
|
||||
for event in events:
|
||||
if isinstance(event, AgentDelegateAction):
|
||||
delegate_action_ids.append(event.id)
|
||||
# Note: we can get agent=event.agent and task=event.inputs.get('task','')
|
||||
# if we need to track these in the future
|
||||
|
||||
elif isinstance(event, AgentDelegateObservation):
|
||||
# Match with most recent unmatched delegate action
|
||||
if not delegate_action_ids:
|
||||
logger.warning(
|
||||
f'Found AgentDelegateObservation without matching action at id={event.id}',
|
||||
)
|
||||
continue
|
||||
|
||||
action_id = delegate_action_ids.pop()
|
||||
delegate_ranges.append((action_id, event.id))
|
||||
|
||||
# Filter out events between delegate action/observation pairs
|
||||
if delegate_ranges:
|
||||
filtered_events: list[Event] = []
|
||||
current_idx = 0
|
||||
|
||||
for start_id, end_id in sorted(delegate_ranges):
|
||||
# Add events before delegate range
|
||||
filtered_events.extend(
|
||||
event for event in events[current_idx:] if event.id < start_id
|
||||
)
|
||||
|
||||
# Add delegate action and observation
|
||||
filtered_events.extend(
|
||||
event for event in events if event.id in (start_id, end_id)
|
||||
)
|
||||
|
||||
# Update index to after delegate range
|
||||
current_idx = next(
|
||||
(i for i, e in enumerate(events) if e.id > end_id), len(events)
|
||||
)
|
||||
|
||||
# Add any remaining events after last delegate range
|
||||
filtered_events.extend(events[current_idx:])
|
||||
|
||||
self.state.history = filtered_events
|
||||
else:
|
||||
self.state.history = events
|
||||
|
||||
# make sure history is in sync
|
||||
self.state.start_id = start_id
|
||||
|
||||
def close(self, event_stream: EventStream):
|
||||
# we made history, now is the time to rewrite it!
|
||||
# the final state.history will be used by external scripts like evals, tests, etc.
|
||||
# history will need to be complete WITH delegates events
|
||||
# like the regular agent history, it does not include:
|
||||
# - 'hidden' events, events with hidden=True
|
||||
# - backend events (the default 'filtered out' types, types in self.filter_out)
|
||||
start_id = self.state.start_id if self.state.start_id >= 0 else 0
|
||||
end_id = (
|
||||
self.state.end_id
|
||||
if self.state.end_id >= 0
|
||||
else event_stream.get_latest_event_id()
|
||||
)
|
||||
|
||||
self.state.history = list(
|
||||
event_stream.search_events(
|
||||
start_id=start_id,
|
||||
end_id=end_id,
|
||||
reverse=False,
|
||||
filter=self.agent_history_filter,
|
||||
)
|
||||
)
|
||||
|
||||
def add_history(self, event: Event):
|
||||
# if the event is not filtered out, add it to the history
|
||||
if self.agent_history_filter.include(event):
|
||||
self.state.history.append(event)
|
||||
|
||||
def get_trajectory(self, include_screenshots: bool = False) -> list[dict]:
|
||||
return [
|
||||
event_to_trajectory(event, include_screenshots)
|
||||
for event in self.state.history
|
||||
]
|
||||
|
||||
def maybe_increase_control_flags_limits(
|
||||
self, headless_mode: bool
|
||||
):
|
||||
# Iteration and budget extensions are independent of each other
|
||||
# An error will be thrown if any one of the control flags have reached or exceeded its limit
|
||||
self.state.iteration_flag.increase_limit(headless_mode)
|
||||
if self.state.budget_flag:
|
||||
self.state.budget_flag.increase_limit(headless_mode)
|
||||
|
||||
def get_metrics_snapshot(self):
|
||||
"""
|
||||
Deep copy of metrics
|
||||
This serves as a snapshot for the parent's metrics at the time a delegate is created
|
||||
It will be stored and used to compute local metrics for the delegate
|
||||
(since delegates now accumulate metrics from where its parent left off)
|
||||
"""
|
||||
|
||||
return self.state.metrics.copy()
|
||||
|
||||
def save_state(self):
|
||||
"""
|
||||
Save's current state to persistent store
|
||||
"""
|
||||
if self.sid and self.file_store:
|
||||
self.state.save_to_session(self.sid, self.file_store, self.user_id)
|
||||
|
||||
|
||||
def run_control_flags(self):
|
||||
"""
|
||||
Performs one step of the control flags
|
||||
"""
|
||||
self.state.iteration_flag.step()
|
||||
if self.state.budget_flag:
|
||||
self.state.budget_flag.step()
|
||||
|
||||
|
||||
def sync_budget_flag_with_metrics(self):
|
||||
"""
|
||||
Ensures that budget flag is up to date with accumulated costs from llm completions
|
||||
Budget flag will monitor for when budget is exceeded
|
||||
"""
|
||||
if self.state.budget_flag:
|
||||
self.state.budget_flag.current_value = self.state.metrics.accumulated_cost
|
||||
|
||||
def merge_metrics(self, metrics: Metrics):
|
||||
"""
|
||||
Merges metrics with the state metrics
|
||||
|
||||
NOTE: this should be refactored in the future. We should have services (draft llm, title autocomplete, condenser, etc)
|
||||
use their own LLMs, but the metrics object should be shared. This way we have one source of truth for accumulated costs from
|
||||
all services
|
||||
|
||||
This would prevent having fragmented stores for metrics, and we don't have the burden of deciding where and how to store them
|
||||
if we decide introduce more specialized services that require llm completions
|
||||
|
||||
"""
|
||||
self.state.metrics.merge(metrics)
|
||||
if self.state.budget_flag:
|
||||
self.state.budget_flag.current_value = self.state.metrics.accumulated_cost
|
||||
Reference in New Issue
Block a user