Compare commits

...

23 Commits

Author SHA1 Message Date
Engel Nyst 2a041e70a4 add condenser as cli argument 2025-04-30 15:41:47 +02:00
openhands 0c85ef8a55 Merge remote-tracking branch 'upstream/main' into condenser_experiment 2025-04-29 08:36:26 +02:00
openhands 0e58611dc2 keep_first=4, like LLMSummaryCondenser before 2025-04-24 17:15:01 +02:00
openhands cdbbc1e4e6 Merge branch 'upstream-main' into condenser_experiment 2025-04-24 13:03:31 +02:00
openhands 068f86b102 added config for keep_first 2025-04-24 12:58:11 +02:00
openhands dee7305826 made the keep-user-messages mechanism configurable.
also only look for real-user messages, not any event with source=USER
2025-04-24 12:58:11 +02:00
openhands 1e6e817c53 disable cache on the last message. 2025-04-24 12:57:58 +02:00
openhands 7b08b89460 Merge branch 'condenser_experiment' of https://github.com/happyherp/OpenHands into condenser_experiment 2025-04-23 16:00:27 +02:00
openhands f7ad303842 Implement condenser improvements: keep first user message and disable cache for condensation 2025-04-23 13:20:41 +00:00
Carlos Freund 0642ddd16d Merge branch 'main' into condenser_experiment 2025-04-23 14:53:23 +02:00
openhands 0c55c9acec log condensation stats 2025-04-23 13:15:54 +02:00
openhands b0fa78ed6a Fix import order in test_llm_agent_cache_condenser.py 2025-04-23 11:04:28 +00:00
openhands 59de045b26 example for new condenser configuration via .toml 2025-04-23 12:51:02 +02:00
openhands 8497b8d6d2 renamed CACHE_PROMPT_SUPPORTED_MODELS to EXPLICIT_CACHE_MODELS and removed gpt-4o-mini. from it. also removed the check that the used llm must be one of these. 2025-04-23 12:43:09 +02:00
openhands e2c6dfb0ab rename Agent._get_messages to get_messages to indicate its a public method. 2025-04-23 12:28:30 +02:00
openhands d86cb1e4cd Update condenser files to use Python 3.12+ builtins instead of typing module classes 2025-04-23 12:28:21 +02:00
openhands b51fe287e9 Improve test_condensation_triggered_by_user_message_in_context assertion to be more robust 2025-04-23 10:14:12 +00:00
openhands 5d42adcb42 tests for condensation trigger message in context. 2025-04-22 12:26:47 +02:00
openhands 6a1f5fd812 DROP ME: LLMAgentCacheCondenser as default 2025-04-22 11:59:26 +02:00
openhands 8cde944f1a New condenser LLMAgentCacheCondenser that uses the agents cache. 2025-04-21 20:54:37 +02:00
openhands 5aa64e64d8 allow CondensationAction.summary_offset to be None, if summary is set. 2025-04-21 20:50:14 +02:00
openhands 6b8cd2025d additional agent interface LLMCompletionProvider 2025-04-21 20:45:07 +02:00
openhands 797acd021e Condenser.condense: added parameters state and agent 2025-04-21 20:32:48 +02:00
30 changed files with 1419 additions and 95 deletions
+10 -1
View File
@@ -201,7 +201,6 @@ model = "gpt-4o"
#native_tool_calling = None
[llm.gpt4o-mini]
api_key = ""
model = "gpt-4o"
@@ -386,6 +385,16 @@ type = "noop"
# Maximum size of history before triggering attention mechanism
#max_size = 100
# 7. LLM Agent Cache Condenser
#type = "agentcache"
# Maximum number of events before condensation is triggered
#max_size = 100
# Word that triggers condensation when found in user messages
#trigger_word = "CONDENSE!"
# Number of initial events to always keep (typically includes task description)
#keep_first = 1
# Note: This condenser should only be used for LLM models that make use of caching.
# Example of a custom LLM configuration for condensers that require an LLM
# If not provided, it falls back to the default LLM
#[llm.condenser]
@@ -44,6 +44,8 @@ from openhands.core.config import (
get_llm_config_arg,
get_parser,
)
from openhands.core.config.utils import get_condenser_config_arg
from openhands.core.config.condenser_config import NoOpCondenserConfig
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.critic import AgentFinishedCritic
@@ -744,6 +746,12 @@ if __name__ == '__main__':
choices=['swe', 'swt', 'swt-ci'],
help="mode to run the evaluation, either 'swe', 'swt', or 'swt-ci'",
)
parser.add_argument(
'--condenser-config',
type=str,
default=None,
help='Name of the condenser config to use, e.g., "default_4_20" for [condenser.default_4_20] section in config.toml',
)
args, _ = parser.parse_known_args()
# NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
@@ -780,6 +788,18 @@ if __name__ == '__main__':
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
condenser_config = None
if args.condenser_config:
condenser_config = get_condenser_config_arg(args.condenser_config)
if condenser_config is None:
raise ValueError(
f'Could not find Condenser config: --condenser-config {args.condenser_config}'
)
else:
# If no specific condenser config is provided via args, default to NoOpCondenser
condenser_config = NoOpCondenserConfig()
logger.warning('No Condenser config provided via --condenser-config, using NoOpCondenser.')
details = {'mode': args.mode}
_agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls)
@@ -794,6 +814,7 @@ if __name__ == '__main__':
args.eval_note,
args.eval_output_dir,
details=details,
condenser_config=condenser_config,
)
output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
@@ -11,8 +11,10 @@ MAX_ITER=$5
NUM_WORKERS=$6
DATASET=$7
SPLIT=$8
N_RUNS=$9
MODE=${10}
CONDENSER_CONFIG=$9
N_RUNS=${10}
MODE=${11}
if [ -z "$NUM_WORKERS" ]; then
NUM_WORKERS=1
@@ -51,6 +53,12 @@ if [ -z "$MODE" ]; then
echo "MODE not specified, use default $MODE"
fi
if [ -n "$CONDENSER_CONFIG" ]; then
echo "Using Condenser Config: $CONDENSER_CONFIG"
else
echo "No Condenser Config provided, use default (NoOpCondenser)."
fi
export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"
@@ -91,7 +99,19 @@ fi
function run_eval() {
local eval_note="${1}"
COMMAND="poetry run python evaluation/benchmarks/swe_bench/run_infer.py \
local base_command="evaluation/benchmarks/swe_bench/run_infer.py"
# Check if DEBUG_EVAL environment variable is set to true
if [[ "${DEBUG_EVAL}" == "true" ]]; then
echo "Running in DEBUG mode with debugpy, listening on port 5678"
# Prepend DEBUG=true to set the environment variable for the python process
# Use standard debugpy port 5678
COMMAND="DEBUG=true poetry run debugpy --listen 0.0.0.0:5678 --wait-for-client $base_command"
else
COMMAND="poetry run python $base_command"
fi
COMMAND="$COMMAND \
--agent-cls $AGENT \
--llm-config $MODEL_CONFIG \
--max-iterations $MAX_ITER \
@@ -101,6 +121,12 @@ function run_eval() {
--split $SPLIT \
--mode $MODE"
# Conditionally add the condenser config argument.
# The Python script defaults to NoOpCondenserConfig if this argument is not provided.
if [ -n "$CONDENSER_CONFIG" ]; then
COMMAND="$COMMAND --condenser-config $CONDENSER_CONFIG"
fi
if [ -n "$EVAL_LIMIT" ]; then
echo "EVAL_LIMIT: $EVAL_LIMIT"
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
@@ -1,6 +1,7 @@
import copy
import os
from collections import deque
from typing import Any
from litellm import ChatCompletionToolParam
@@ -15,7 +16,7 @@ from openhands.agenthub.codeact_agent.tools.str_replace_editor import (
)
from openhands.agenthub.codeact_agent.tools.think import ThinkTool
from openhands.agenthub.codeact_agent.tools.web_read import WebReadTool
from openhands.controller.agent import Agent
from openhands.controller.agent import Agent, LLMCompletionProvider
from openhands.controller.state.state import State
from openhands.core.config import AgentConfig
from openhands.core.logger import openhands_logger as logger
@@ -37,7 +38,7 @@ from openhands.runtime.plugins import (
from openhands.utils.prompt import PromptManager
class CodeActAgent(Agent):
class CodeActAgent(Agent, LLMCompletionProvider):
VERSION = '2.2'
"""
The Code Act Agent is a minimalist agent.
@@ -162,7 +163,7 @@ class CodeActAgent(Agent):
# event we'll just return that instead of an action. The controller will
# immediately ask the agent to step again with the new view.
condensed_history: list[Event] = []
match self.condenser.condensed_history(state):
match self.condenser.condensed_history(state, self):
case View(events=events):
condensed_history = events
@@ -173,7 +174,19 @@ class CodeActAgent(Agent):
f'Processing {len(condensed_history)} events from a total of {len(state.history)} events'
)
messages = self._get_messages(condensed_history)
params = self.build_llm_completion_params(condensed_history, state)
response = self.llm.completion(**params)
logger.debug(f'Response from LLM: {response}')
actions = self.response_to_actions_fn(response)
logger.debug(f'Actions after response_to_actions: {actions}')
for action in actions:
self.pending_actions.append(action)
return self.pending_actions.popleft()
def build_llm_completion_params(
self, condensed_history: list[Event], state: State
) -> dict[str, Any]:
messages = self.get_messages(condensed_history)
params: dict = {
'messages': self.llm.format_messages_for_llm(messages),
}
@@ -208,15 +221,9 @@ class CodeActAgent(Agent):
params['tools'] += unique_mcp_tools
# log to litellm proxy if possible
params['extra_body'] = {'metadata': state.to_llm_metadata(agent_name=self.name)}
response = self.llm.completion(**params)
logger.debug(f'Response from LLM: {response}')
actions = self.response_to_actions_fn(response)
logger.debug(f'Actions after response_to_actions: {actions}')
for action in actions:
self.pending_actions.append(action)
return self.pending_actions.popleft()
return params
def _get_messages(self, events: list[Event]) -> list[Message]:
def get_messages(self, events: list[Event]) -> list[Message]:
"""Constructs the message history for the LLM conversation.
This method builds a structured conversation history by processing events from the state
+55 -15
View File
@@ -1,10 +1,12 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Type
from typing import TYPE_CHECKING, Any, TypedDict
from openhands.controller.state.state import State
from openhands.core.message import Message
if TYPE_CHECKING:
from openhands.controller.state.state import State
from openhands.core.config import AgentConfig
from openhands.events.action import Action
from openhands.events.action.message import SystemMessageAction
@@ -13,7 +15,7 @@ from openhands.core.exceptions import (
AgentNotRegisteredError,
)
from openhands.core.logger import openhands_logger as logger
from openhands.events.event import EventSource
from openhands.events.event import Event, EventSource
from openhands.llm.llm import LLM
from openhands.runtime.plugins import PluginRequirement
@@ -30,7 +32,7 @@ class Agent(ABC):
It tracks the execution status and maintains a history of interactions.
"""
_registry: dict[str, Type['Agent']] = {}
_registry: dict[str, type['Agent']] = {}
sandbox_plugins: list[PluginRequirement] = []
def __init__(
@@ -46,8 +48,8 @@ class Agent(ABC):
self.tools: list = []
def get_system_message(self) -> 'SystemMessageAction | None':
"""
Returns a SystemMessageAction containing the system message and tools.
"""Returns a SystemMessageAction containing the system message and tools.
This will be added to the event stream as the first message.
Returns:
@@ -91,15 +93,16 @@ class Agent(ABC):
@abstractmethod
def step(self, state: 'State') -> 'Action':
"""Starts the execution of the assigned instruction. This method should
be implemented by subclasses to define the specific execution logic.
"""Starts the execution of the assigned instruction.
This method should be implemented by subclasses to define the specific execution logic.
"""
pass
def reset(self) -> None:
"""Resets the agent's execution status and clears the history. This method can be used
to prepare the agent for restarting the instruction or cleaning up before destruction.
"""Resets the agent's execution status and clears the history.
This method can be used to prepare the agent for restarting the instruction or cleaning up before destruction.
"""
# TODO clear history
self._complete = False
@@ -112,12 +115,12 @@ class Agent(ABC):
return self.__class__.__name__
@classmethod
def register(cls, name: str, agent_cls: Type['Agent']) -> None:
def register(cls, name: str, agent_cls: type['Agent']) -> None:
"""Registers an agent class in the registry.
Parameters:
- name (str): The name to register the class under.
- agent_cls (Type['Agent']): The class to register.
- agent_cls (type['Agent']): The class to register.
Raises:
- AgentAlreadyRegisteredError: If name already registered
@@ -127,14 +130,14 @@ class Agent(ABC):
cls._registry[name] = agent_cls
@classmethod
def get_cls(cls, name: str) -> Type['Agent']:
def get_cls(cls, name: str) -> type['Agent']:
"""Retrieves an agent class from the registry.
Parameters:
- name (str): The name of the class to retrieve
Returns:
- agent_cls (Type['Agent']): The class registered under the specified name.
- agent_cls (type['Agent']): The class registered under the specified name.
Raises:
- AgentNotRegisteredError: If name not registered
@@ -158,6 +161,43 @@ class Agent(ABC):
"""Sets the list of MCP tools for the agent.
Args:
- mcp_tools (list[dict]): The list of MCP tools.
mcp_tools: The list of MCP tools.
"""
self.mcp_tools = mcp_tools
class LLMCompletionParams(TypedDict, total=False):
messages: list[Message]
tools: list[Any] | None
extra_body: dict[str, Any] | None
extra: dict[str, Any] | None
class LLMCompletionProvider(ABC):
"""Mixin interface for agents that can expose their LLM call generation details.
This interface is used by condensers that need to use the agent's LLM completion
parameters to ensure consistent caching between the agent and condenser.
"""
llm: LLM
@abstractmethod
def get_messages(self, condensed_history: list[Event]) -> list[Message]:
"""Convert events to messages for the LLM."""
pass
@abstractmethod
def build_llm_completion_params(
self, condensed_history: list[Event], state: State
) -> dict[str, Any]:
"""Build parameters for LLM completion.
Args:
condensed_history: list of events to convert to messages for the LLM
state: Current state
Returns:
dict of parameters for LLM completion
"""
pass
+24
View File
@@ -58,6 +58,28 @@ class RecentEventsCondenserConfig(BaseModel):
model_config = {'extra': 'forbid'}
class LLMAgentCacheCondenserConfig(BaseModel):
"""Configuration for LLMAgentCacheCondenser."""
type: Literal['agentcache'] = Field('agentcache')
max_size: int = Field(
default=100,
description='Maximum number of events before condensation is triggered.',
ge=1,
)
trigger_word: str = Field(
default='CONDENSE!',
description='Word that triggers condensation when found in user messages.',
)
keep_first: int = Field(
default=1,
description='Number of initial events to always keep in history.',
ge=0,
)
model_config = {'extra': 'forbid'}
class LLMSummarizingCondenserConfig(BaseModel):
"""Configuration for LLMCondenser."""
@@ -181,6 +203,7 @@ CondenserConfig = (
| LLMAttentionCondenserConfig
| StructuredSummaryCondenserConfig
| CondenserPipelineConfig
| LLMAgentCacheCondenserConfig
)
@@ -284,6 +307,7 @@ def create_condenser_config(condenser_type: str, data: dict) -> CondenserConfig:
'amortized': AmortizedForgettingCondenserConfig,
'llm_attention': LLMAttentionCondenserConfig,
'structured': StructuredSummaryCondenserConfig,
'agentcache': LLMAgentCacheCondenserConfig,
}
if condenser_type not in condenser_classes:
+117 -1
View File
@@ -16,7 +16,11 @@ from openhands import __version__
from openhands.core import logger
from openhands.core.config.agent_config import AgentConfig
from openhands.core.config.app_config import AppConfig
from openhands.core.config.condenser_config import condenser_config_from_toml_section
from openhands.core.config.condenser_config import (
CondenserConfig,
condenser_config_from_toml_section,
create_condenser_config,
)
from openhands.core.config.config_utils import (
OH_DEFAULT_AGENT,
OH_MAX_ITERATIONS,
@@ -436,6 +440,118 @@ def get_llm_config_arg(
return None
def get_condenser_config_arg(
condenser_config_arg: str, toml_file: str = 'config.toml'
) -> CondenserConfig | None:
"""Get a group of condenser settings from the config file by name.
A group in config.toml can look like this:
```
[condenser.my_summarizer]
type = 'llm'
llm_config = 'gpt-4o' # References [llm.gpt-4o]
max_size = 50
...
```
The user-defined group name, like "my_summarizer", is the argument to this function.
The function will load the CondenserConfig object with the settings of this group,
from the config file.
Note that the group must be under the "condenser" group, or in other words,
the group name must start with "condenser.".
Args:
condenser_config_arg: The group of condenser settings to get from the config.toml file.
toml_file: Path to the configuration file to read from. Defaults to 'config.toml'.
Returns:
CondenserConfig: The CondenserConfig object with the settings from the config file, or None if not found/error.
"""
# keep only the name, just in case
condenser_config_arg = condenser_config_arg.strip('[]')
# truncate the prefix, just in case
if condenser_config_arg.startswith('condenser.'):
condenser_config_arg = condenser_config_arg[10:]
logger.openhands_logger.debug(
f'Loading condenser config [{condenser_config_arg}] from {toml_file}'
)
# load the toml file
try:
with open(toml_file, 'r', encoding='utf-8') as toml_contents:
toml_config = toml.load(toml_contents)
except FileNotFoundError as e:
logger.openhands_logger.error(f'Config file not found: {toml_file}. Error: {e}')
return None
except toml.TomlDecodeError as e:
logger.openhands_logger.error(
f'Cannot parse condenser group [{condenser_config_arg}] from {toml_file}. Exception: {e}'
)
return None
# Check if the condenser section and the specific config exist
if (
'condenser' not in toml_config
or condenser_config_arg not in toml_config['condenser']
):
logger.openhands_logger.error(
f'Condenser config section [condenser.{condenser_config_arg}] not found in {toml_file}'
)
return None
condenser_data = toml_config['condenser'][
condenser_config_arg
].copy() # Use copy to modify
# Determine the type and handle potential LLM dependency
condenser_type = condenser_data.get('type')
if not condenser_type:
logger.openhands_logger.error(
f'Missing "type" field in [condenser.{condenser_config_arg}] section of {toml_file}'
)
return None
# Handle LLM config reference if needed, using get_llm_config_arg
if (
condenser_type in ('llm', 'llm_attention', 'structured')
and 'llm_config' in condenser_data
and isinstance(condenser_data['llm_config'], str)
):
llm_config_name = condenser_data['llm_config']
logger.openhands_logger.debug(
f'Condenser [{condenser_config_arg}] requires LLM config [{llm_config_name}]. Loading it...'
)
# Use the existing function to load the specific LLM config
referenced_llm_config = get_llm_config_arg(llm_config_name, toml_file=toml_file)
if referenced_llm_config:
# Replace the string reference with the actual LLMConfig object
condenser_data['llm_config'] = referenced_llm_config
else:
# get_llm_config_arg already logs the error if not found
logger.openhands_logger.error(
f"Failed to load required LLM config '{llm_config_name}' for condenser '{condenser_config_arg}'."
)
return None
# Create the condenser config instance
try:
config = create_condenser_config(condenser_type, condenser_data)
logger.openhands_logger.info(
f'Successfully loaded condenser config [{condenser_config_arg}] from {toml_file}'
)
return config
except (ValidationError, ValueError) as e:
logger.openhands_logger.error(
f'Invalid condenser configuration for [{condenser_config_arg}]: {e}.'
)
return None
# Command line arguments
def get_parser() -> argparse.ArgumentParser:
"""Get the argument parser."""
+3 -4
View File
@@ -158,11 +158,10 @@ class CondensationAction(Action):
# Either way, we can only have one of the two valid configurations.
forgotten_event_configuration = using_event_ids ^ using_event_range
# We also need to check that if the summary is provided, so is the
# offset (and vice versa).
# Check that if we have a summary_offset, we also have a summary
summary_configuration = (
self.summary is None and self.summary_offset is None
) or (self.summary is not None and self.summary_offset is not None)
self.summary is not None if self.summary_offset is not None else True
)
return forgotten_event_configuration and summary_configuration
+15 -9
View File
@@ -40,14 +40,14 @@ __all__ = ['LLM']
# tuple of exceptions to retry on
LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (
RateLimitError,
litellm.Timeout,
litellm.InternalServerError,
litellm.Timeout, # type: ignore
litellm.InternalServerError, # type: ignore
LLMNoResponseError,
)
# cache prompt supporting models
# remove this when we gemini and deepseek are supported
CACHE_PROMPT_SUPPORTED_MODELS = [
# these models require special treatment so that caching
# works
EXPLICIT_CACHE_MODELS = [
'claude-3-7-sonnet-20250219',
'claude-3-5-sonnet-20241022',
'claude-3-5-sonnet-20240620',
@@ -249,11 +249,17 @@ class LLM(RetryMixin, DebugMixin):
kwargs.pop('tool_choice', None)
# if we have no messages, something went very wrong
if not messages:
if not messages or len(messages) < 1:
raise ValueError(
'The messages list is empty. At least one message is required.'
)
# anthropic requires at least one user message.
if not any(message.get('role') == 'user' for message in messages):
raise ValueError(
'At least one message with role "user" is required for the completion.'
)
# log the entire LLM prompt
self.log_prompt(messages)
@@ -523,8 +529,8 @@ class LLM(RetryMixin, DebugMixin):
return (
self.config.caching_prompt is True
and (
self.config.model in CACHE_PROMPT_SUPPORTED_MODELS
or self.config.model.split('/')[-1] in CACHE_PROMPT_SUPPORTED_MODELS
self.config.model in EXPLICIT_CACHE_MODELS
or self.config.model.split('/')[-1] in EXPLICIT_CACHE_MODELS
)
# We don't need to look-up model_info, because only Anthropic models needs the explicit caching breakpoint
)
@@ -662,7 +668,7 @@ class LLM(RetryMixin, DebugMixin):
boolean: True if executing a local model.
"""
if self.config.base_url is not None:
for substring in ['localhost', '127.0.0.1' '0.0.0.0']:
for substring in ['localhost', '127.0.0.1', '0.0.0.0']:
if substring in self.config.base_url:
return True
elif self.config.model is not None:
+3 -4
View File
@@ -1,9 +1,8 @@
import openhands.memory.condenser.impl # noqa F401 (we import this to get the condensers registered)
from openhands.memory.condenser.condenser import (
Condenser,
get_condensation_metadata,
View,
Condensation,
Condenser,
View,
get_condensation_metadata,
)
__all__ = [
+21 -8
View File
@@ -87,23 +87,33 @@ class Condenser(ABC):
self.write_metadata(state)
@abstractmethod
def condense(self, View) -> View | Condensation:
def condense(self, view: View, state: State, agent=None) -> View | Condensation:
"""Condense a sequence of events into a potentially smaller list.
New condenser strategies should override this method to implement their own condensation logic. Call `self.add_metadata` in the implementation to record any relevant per-condensation diagnostic information.
Args:
View: A view of the history containing all events that should be condensed.
view: A view of the history containing all events that should be condensed.
state: Optional state for context.
agent: Optional agent for agent-aware condensation.
Returns:
View | Condensation: A condensed view of the events or an event indicating the history has been condensed.
"""
def condensed_history(self, state: State) -> View | Condensation:
"""Condense the state's history."""
def condensed_history(self, state: State, agent=None) -> View | Condensation:
"""Condense the state's history.
Args:
state: The current state.
agent: Optional agent to use for agent-aware condensation.
Returns:
A View or Condensation object.
"""
self._llm_metadata = state.to_llm_metadata('condenser')
with self.metadata_batch(state):
return self.condense(state.view)
return self.condense(state.view, state, agent)
@classmethod
def register_config(cls, configuration_type: type[CondenserConfig]) -> None:
@@ -136,6 +146,9 @@ class Condenser(ABC):
Raises:
ValueError: If the condenser type is not recognized.
"""
# trigger the conderser implementations to register themselves
import openhands.memory.condenser.impl # noqa: F401
try:
condenser_class = CONDENSER_REGISTRY[type(config)]
return condenser_class.from_config(config)
@@ -156,14 +169,14 @@ class RollingCondenser(Condenser, ABC):
"""Determine if a view should be condensed."""
@abstractmethod
def get_condensation(self, view: View) -> Condensation:
def get_condensation(self, view: View, state: State, agent=None) -> Condensation:
"""Get the condensation from a view."""
def condense(self, view: View) -> View | Condensation:
def condense(self, view: View, state: State, agent=None) -> View | Condensation:
# If we trigger the condenser-specific condensation threshold, compute and return
# the condensation.
if self.should_condense(view):
return self.get_condensation(view)
return self.get_condensation(view, state, agent)
# Otherwise we're safe to just return the view.
else:
@@ -4,6 +4,9 @@ from openhands.memory.condenser.impl.amortized_forgetting_condenser import (
from openhands.memory.condenser.impl.browser_output_condenser import (
BrowserOutputCondenser,
)
from openhands.memory.condenser.impl.llm_agent_cache_condenser import (
LLMAgentCacheCondenser,
)
from openhands.memory.condenser.impl.llm_attention_condenser import (
ImportantEventSelection,
LLMAttentionCondenser,
@@ -25,6 +28,7 @@ from openhands.memory.condenser.impl.structured_summary_condenser import (
__all__ = [
'AmortizedForgettingCondenser',
'LLMAgentCacheCondenser',
'LLMAttentionCondenser',
'ImportantEventSelection',
'LLMSummarizingCondenser',
@@ -1,5 +1,6 @@
from __future__ import annotations
from openhands.controller.state.state import State
from openhands.core.config.condenser_config import AmortizedForgettingCondenserConfig
from openhands.events.action.agent import CondensationAction
from openhands.memory.condenser.condenser import (
@@ -36,7 +37,7 @@ class AmortizedForgettingCondenser(RollingCondenser):
super().__init__()
def get_condensation(self, view: View) -> Condensation:
def get_condensation(self, view: View, state: State, agent=None) -> Condensation:
target_size = self.max_size // 2
head = view[: self.keep_first]
@@ -1,5 +1,6 @@
from __future__ import annotations
from openhands.controller.state.state import State
from openhands.core.config.condenser_config import BrowserOutputCondenserConfig
from openhands.events.event import Event
from openhands.events.observation import BrowserOutputObservation
@@ -17,7 +18,7 @@ class BrowserOutputCondenser(Condenser):
self.attention_window = attention_window
super().__init__()
def condense(self, view: View) -> View | Condensation:
def condense(self, view: View, state: State, agent=None) -> View | Condensation:
"""Replace the content of browser observations outside of the attention window with a placeholder."""
results: list[Event] = []
cnt: int = 0
@@ -0,0 +1,158 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Any
from openhands.controller.agent import LLMCompletionProvider
from openhands.controller.state.state import State
from openhands.core.logger import openhands_logger as logger
from openhands.core.message import Message
from openhands.events.event import Event
from openhands.memory.condenser.condenser import Condensation, Condenser, View
class CachingCondenser(Condenser, ABC):
"""Abstract base class for condensers that use prompt caching.
This class provides a framework for condensers that begin their prompt with the
whole current prompt, so they can use caching. They then add their own messages
to instruct the LLM.
Subclasses need to implement:
- createCondensationPrompt: Create the prompt for condensation
- processResponse: Process the LLM response to create a Condensation
"""
def condense(self, view: View, state: State, agent=None) -> View | Condensation:
"""Condense the events in the view using the agent's LLM.
This implementation requires an agent that implements the LLMCompletionProvider
interface to provide access to the agent's LLM and message formatting.
Args:
view: The view to condense
state: The current state
agent: The agent to use for condensation
Returns:
A View or Condensation object
"""
if not state:
raise ValueError('CachingCondenser: No state provided, cannot condense')
if not agent:
raise ValueError('CachingCondenser: No agent provided, cannot condense')
# Check if the agent implements the LLMCompletionProvider interface
if not isinstance(agent, LLMCompletionProvider):
raise ValueError(
f'CachingCondenser: Agent {agent.__class__.__name__} does not implement '
'LLMCompletionProvider interface, cannot condense'
)
# Check if we should condense
if not self.should_condense(view):
return view
# Do the condensation
return self._do_condensation(view.events, state, agent)
def _do_condensation(
self, events: list[Event], state: State, agent: LLMCompletionProvider
) -> Condensation | View:
"""Do a condensation for the given events.
Args:
events: The events to condense
state: The current state
agent: The agent to use for condensation
Returns:
A Condensation or View object
"""
# Use the agent's method to build the parameters
# This ensures that the parameters are consistent with the agent's LLM
params = agent.build_llm_completion_params(events, state)
# Convert events to messages using the agent's method
messages = agent.get_messages(events)
# Now we add our own prompt at the end
messages.append(self.createCondensationPrompt(events, state, messages))
params['messages'] = agent.llm.format_messages_for_llm(messages)
self._disable_cache(params['messages'])
# Get the LLM response
response = agent.llm.completion(**params)
self.add_metadata('response', response.model_dump())
logger.info(f'Summarized {len(events)} events. Usage:{response}')
self.add_metadata('metrics', agent.llm.metrics.get())
# Process the response
return self.processResponse(events, state, response, messages)
def _disable_cache(self, messages: list[dict]) -> None:
"""Disable the cache for the given messages. We need to do this because
this conversation will not continue as we are just doing a condensation. So there
is no way to cache could be used, so we save a little money this way.
Effectively reversing ConversationMemory.apply_prompt_caching
"""
if len(messages) == 0:
return
# only disable the cache for the last message(our new prompt), so
# we can have a cache read for rest of the conversation.
content = messages[-1]['content']
if content is not None:
if isinstance(content, list):
content = content[-1]
if isinstance(content, dict) and content.get('cache_control') is not None:
content['cache_control'] = None
@abstractmethod
def createCondensationPrompt(
self, events: list[Event], state: State, base_messages: list[Message]
) -> Message:
"""Create the prompt for condensation.
Args:
events: The events to condense
state: The current state
messages: the messages that are already in the prompt(cached)
Returns:
The message with condensation instructions
"""
pass
@abstractmethod
def processResponse(
self, events: list[Event], state: State, response: Any, messages: list[Message]
) -> Condensation | View:
"""Process the LLM response to create a Condensation.
Args:
events: The events that were condensed
state: The current state
response: The LLM response
messages: The messages that were already in the prompt(cached)
Returns:
A Condensation or View object
"""
pass
@abstractmethod
def should_condense(self, view: View) -> bool:
"""Determine if a view should be condensed.
Args:
view: The view to check
Returns:
True if the view should be condensed, False otherwise
"""
pass
@@ -0,0 +1,217 @@
from __future__ import annotations
from typing import Any
from openhands.controller.state.state import State
from openhands.core.config.condenser_config import LLMAgentCacheCondenserConfig
from openhands.core.logger import openhands_logger as logger
from openhands.core.message import Message, TextContent
from openhands.core.schema.action import ActionType
from openhands.events.action.agent import CondensationAction
from openhands.events.action.message import MessageAction
from openhands.events.event import Event, EventSource
from openhands.memory.condenser.condenser import Condensation, View
from openhands.memory.condenser.impl.caching_condenser import CachingCondenser
class LLMAgentCacheCondenser(CachingCondenser):
"""A version of LLMSummarizingCondenser that uses a caching."""
def __init__(
self,
max_size: int = 100,
trigger_word: str = 'CONDENSE!',
keep_user_messages: bool = False,
keep_first: int = 1,
):
"""Initialize the condenser.
Args:
max_size: Maximum number of events before condensation is triggered
trigger_word: Word that triggers condensation when found in user messages
keep_first: Number of initial events to always retain
"""
if keep_first >= max_size:
raise ValueError(
f'keep_first ({keep_first}) must be less than max_size ({max_size})'
)
if keep_first < 0:
raise ValueError(f'keep_first ({keep_first}) cannot be negative')
self.keep_first = keep_first
self.max_size = max_size
self.trigger_word = trigger_word
self.keep_user_messages = keep_user_messages
super().__init__()
def createCondensationPrompt(
self, events: list[Event], state: State, base_messages: list[Message]
) -> Message:
"""Create the prompt for condensation using a similar approach to LLMSummarizingCondenser.
This method is required by the CachingCondenser abstract base class.
Args:
events: The events to condense
state: The current state
base_messages: The messages that are already in the prompt (cached)
Returns:
The message with condensation instructions
"""
# Create the condensation instructions similar to LLMSummarizingCondenser
prompt = """You are maintaining a context-aware state summary for an interactive agent.
The whole conversation above will be removed from the context window. Therefore you need to track:
USER_CONTEXT: (Preserve essential user requirements, goals, and clarifications in concise form)
COMPLETED: (Tasks completed so far, with brief results)
PENDING: (Tasks that still need to be done)
CURRENT_STATE: (Current variables, data structures, or relevant state)
For code-specific tasks, also include:
CODE_STATE: {File paths, function signatures, data structures}
TESTS: {Failing cases, error messages, outputs}
CHANGES: {Code edits, variable updates}
DEPS: {Dependencies, imports, external calls}
VERSION_CONTROL_STATUS: {Repository state, current branch, PR status, commit history}
PRIORITIZE:
1. Adapt tracking format to match the actual task type
2. Capture key user requirements and goals
3. Distinguish between completed and pending tasks
4. Keep all sections concise and relevant
SKIP: Tracking irrelevant details for the current task type
Example formats:
For code tasks:
USER_CONTEXT: Fix FITS card float representation issue
COMPLETED: Modified mod_float() in card.py, all tests passing
PENDING: Create PR, update documentation
CODE_STATE: mod_float() in card.py updated
TESTS: test_format() passed
CHANGES: str(val) replaces f"{val:.16G}"
DEPS: None modified
VERSION_CONTROL_STATUS: Branch: fix-float-precision, Latest commit: a1b2c3d
For other tasks:
USER_CONTEXT: Write 20 haikus based on coin flip results
COMPLETED: 15 haikus written for results [T,H,T,H,T,H,T,T,H,T,H,T,H,T,H]
PENDING: 5 more haikus needed
CURRENT_STATE: Last flip: Heads, Haiku count: 15/20"""
prompt += '\n\n'
# Create a message with the condensation instructions
return Message(
role='user',
content=[TextContent(text=prompt)],
)
def processResponse(
self, events: list[Event], state: State, response: Any, messages: list[Message]
) -> Condensation | View:
# Extract the summary from the response
summary = response.choices[0].message.content
# Keep the first `keep_first` events (e.g., system messages)
events_to_keep = events[: self.keep_first]
events_to_forget = events[self.keep_first :]
# Ensure essential user messages are not forgotten
if self.keep_user_messages:
self._filter_user_messages_to_keep(events, events_to_forget)
# If we have events to forget, create a condensation
if events_to_forget:
forgotten_event_ids = [event.id for event in events_to_forget]
return Condensation(
action=CondensationAction(
forgotten_event_ids=forgotten_event_ids, summary=summary
)
)
else:
return View(events=events_to_keep + events_to_forget)
def should_condense(self, view: View) -> bool:
"""Determine if the view should be condensed.
Condensation is triggered in two cases:
1. When the number of events exceeds max_size
2. When the last event is from the user and contains the trigger word
Args:
view: The view to check
Returns:
True if the view should be condensed, False otherwise
"""
events = view.events
# Check if the number of events exceeds max_size
if len(events) > self.max_size:
logger.info(f'Condensing events due to max size({self.max_size}) limit.')
return True
# Check if any recent user message contains the trigger word
if self._contains_trigger_word(events):
logger.info(f"Condensing events due to trigger word '{self.trigger_word}'.")
return True
return False
def _contains_trigger_word(self, events: list[Event]) -> bool:
"""Check if the most recent user message contains the trigger word.
Args:
events: The events to check
Returns:
True if the most recent user message contains the trigger word, False otherwise
"""
if not events or len(events) < 2: # Need at least 2 events to condense
return False
# Iterate through events in reverse order to find the last user message
for event in reversed(events):
if (
hasattr(event, 'source')
and event.source == EventSource.USER
and hasattr(event, 'action')
and event.action == ActionType.MESSAGE
and event.message is not None
):
return self.trigger_word in event.message
# If we did a condensation, stop looking
if hasattr(event, 'action') and event.action == ActionType.CONDENSATION:
return False
return False
def _filter_user_messages_to_keep(
self, events: list[Event], events_to_forget: list[Event]
) -> None:
"""Ensure essential user messages are not forgotten."""
user_events = [event for event in events if isinstance(event, MessageAction)]
# Always keep the first user message to maintain context
first_user_message = next((event for event in user_events), None)
if first_user_message and first_user_message in events_to_forget:
events_to_forget.remove(first_user_message)
# Also keep the most recent user message if it's different from the first
if len(user_events) > 1:
last_user_message = user_events[-1]
if (
last_user_message != first_user_message
and last_user_message in events_to_forget
):
events_to_forget.remove(last_user_message)
@classmethod
def from_config(
cls, config: LLMAgentCacheCondenserConfig
) -> LLMAgentCacheCondenser:
return LLMAgentCacheCondenser(
max_size=config.max_size,
trigger_word=config.trigger_word,
keep_first=config.keep_first,
)
LLMAgentCacheCondenser.register_config(LLMAgentCacheCondenserConfig)
@@ -3,6 +3,7 @@ from __future__ import annotations
from litellm import supports_response_schema
from pydantic import BaseModel
from openhands.controller.state.state import State
from openhands.core.config.condenser_config import LLMAttentionCondenserConfig
from openhands.events.action.agent import CondensationAction
from openhands.llm.llm import LLM
@@ -47,7 +48,7 @@ class LLMAttentionCondenser(RollingCondenser):
super().__init__()
def get_condensation(self, view: View) -> Condensation:
def get_condensation(self, view: View, state: State, agent=None) -> Condensation:
target_size = self.max_size // 2
head_event_ids = [event.id for event in view.events[: self.keep_first]]
@@ -1,5 +1,6 @@
from __future__ import annotations
from openhands.controller.state.state import State
from openhands.core.config.condenser_config import LLMSummarizingCondenserConfig
from openhands.core.message import Message, TextContent
from openhands.events.action.agent import CondensationAction
@@ -48,7 +49,7 @@ class LLMSummarizingCondenser(RollingCondenser):
"""Truncate the content to fit within the specified maximum event length."""
return truncate_content(content, max_chars=self.max_event_length)
def get_condensation(self, view: View) -> Condensation:
def get_condensation(self, view: View, state: State, agent=None) -> Condensation:
head = view[: self.keep_first]
target_size = self.max_size // 2
# Number of events to keep from the tail -- target size, minus however many
@@ -1,5 +1,6 @@
from __future__ import annotations
from openhands.controller.state.state import State
from openhands.core.config.condenser_config import NoOpCondenserConfig
from openhands.memory.condenser.condenser import Condensation, Condenser, View
@@ -7,7 +8,7 @@ from openhands.memory.condenser.condenser import Condensation, Condenser, View
class NoOpCondenser(Condenser):
"""A condenser that does nothing to the event sequence."""
def condense(self, view: View) -> View | Condensation:
def condense(self, view: View, state: State, agent=None) -> View | Condensation:
"""Returns the list of events unchanged."""
return view
@@ -1,5 +1,6 @@
from __future__ import annotations
from openhands.controller.state.state import State
from openhands.core.config.condenser_config import ObservationMaskingCondenserConfig
from openhands.events.event import Event
from openhands.events.observation import Observation
@@ -15,7 +16,7 @@ class ObservationMaskingCondenser(Condenser):
super().__init__()
def condense(self, view: View) -> View | Condensation:
def condense(self, view: View, state: State, agent=None) -> View | Condensation:
"""Replace the content of observations outside of the attention window with a placeholder."""
results: list[Event] = []
for i, event in enumerate(view):
+2 -2
View File
@@ -30,10 +30,10 @@ class CondenserPipeline(Condenser):
for condenser in self.condensers:
condenser.write_metadata(state)
def condense(self, view: View) -> View | Condensation:
def condense(self, view: View, state: State, agent=None) -> View | Condensation:
result: View | Condensation = view
for condenser in self.condensers:
result = condenser.condense(result)
result = condenser.condense(result, state, agent)
if isinstance(result, Condensation):
break
return result
@@ -1,5 +1,6 @@
from __future__ import annotations
from openhands.controller.state.state import State
from openhands.core.config.condenser_config import RecentEventsCondenserConfig
from openhands.memory.condenser.condenser import Condensation, Condenser, View
@@ -13,7 +14,7 @@ class RecentEventsCondenser(Condenser):
super().__init__()
def condense(self, view: View) -> View | Condensation:
def condense(self, view: View, state: State, agent=None) -> View | Condensation:
"""Keep only the most recent events (up to `max_events`)."""
head = view[: self.keep_first]
tail_length = max(0, self.max_events - len(head))
@@ -196,7 +196,7 @@ class StructuredSummaryCondenser(RollingCondenser):
"""Truncate the content to fit within the specified maximum event length."""
return truncate_content(content, max_chars=self.max_event_length)
def get_condensation(self, view: View) -> Condensation:
def get_condensation(self, view: View, state=None, agent=None) -> Condensation:
head = view[: self.keep_first]
target_size = self.max_size // 2
# Number of events to keep from the tail -- target size, minus however many
+7 -5
View File
@@ -59,14 +59,16 @@ class View(BaseModel):
# The relevant summary is always in the last condensation event (i.e., the most recent one).
for event in reversed(events):
if isinstance(event, CondensationAction):
if event.summary is not None and event.summary_offset is not None:
if event.summary is not None:
summary = event.summary
summary_offset = event.summary_offset
break
if summary is not None and summary_offset is not None:
kept_events.insert(
summary_offset, AgentCondensationObservation(content=summary)
)
if summary is not None:
summary_obs = AgentCondensationObservation(content=summary)
if summary_offset is not None:
kept_events.insert(summary_offset, summary_obs)
else:
kept_events.append(summary_obs)
return View(events=kept_events)
+5 -3
View File
@@ -10,7 +10,7 @@ from openhands.core.config import AppConfig
from openhands.core.config.condenser_config import (
BrowserOutputCondenserConfig,
CondenserPipelineConfig,
LLMSummarizingCondenserConfig,
LLMAgentCacheCondenserConfig,
)
from openhands.core.logger import OpenHandsLoggerAdapter
from openhands.core.schema import AgentState
@@ -138,8 +138,10 @@ class Session:
default_condenser_config = CondenserPipelineConfig(
condensers=[
BrowserOutputCondenserConfig(),
LLMSummarizingCondenserConfig(
llm_config=llm.config, keep_first=4, max_size=80
LLMAgentCacheCondenserConfig(
max_size=100, # Default max size
trigger_word='CONDENSE!', # Default trigger word
keep_first=4,
),
]
)
+4 -4
View File
@@ -341,7 +341,7 @@ def test_mismatched_tool_call_events_and_auto_add_system_message(
# 2. The action message
# 3. The observation message
mock_state.history = [action, observation]
messages = agent._get_messages(mock_state.history)
messages = agent.get_messages(mock_state.history)
assert len(messages) == 3
assert messages[0].role == 'system' # First message should be the system message
assert messages[1].role == 'assistant' # Second message should be the action
@@ -349,21 +349,21 @@ def test_mismatched_tool_call_events_and_auto_add_system_message(
# The same should hold if the events are presented out-of-order
mock_state.history = [observation, action]
messages = agent._get_messages(mock_state.history)
messages = agent.get_messages(mock_state.history)
assert len(messages) == 3
assert messages[0].role == 'system' # First message should be the system message
# If only one of the two events is present, then we should just get the system message
# plus any valid message from the event
mock_state.history = [action]
messages = agent._get_messages(mock_state.history)
messages = agent.get_messages(mock_state.history)
assert (
len(messages) == 1
) # Only system message, action is waiting for its observation
assert messages[0].role == 'system'
mock_state.history = [observation]
messages = agent._get_messages(mock_state.history)
messages = agent.get_messages(mock_state.history)
assert len(messages) == 1 # Only system message, observation has no matching action
assert messages[0].role == 'system'
@@ -0,0 +1,612 @@
from typing import cast
from unittest.mock import MagicMock, Mock, patch
import pytest
from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
from openhands.controller.state.state import State
from openhands.core.config.agent_config import AgentConfig
from openhands.core.config.llm_config import LLMConfig
from openhands.core.message import Message
from openhands.events.action.agent import ChangeAgentStateAction, RecallAction
from openhands.events.action.files import FileReadAction
from openhands.events.action.message import MessageAction, SystemMessageAction
from openhands.events.event import Event, EventSource, RecallType
from openhands.events.observation.agent import (
RecallObservation,
)
from openhands.events.observation.files import FileReadObservation
from openhands.llm import LLM
from openhands.llm.metrics import Metrics
from openhands.memory.condenser.condenser import Condensation, View
from openhands.memory.condenser.impl.llm_agent_cache_condenser import (
LLMAgentCacheCondenser,
)
def format_messages_for_llm(messages: Message | list[Message]) -> list[dict]:
if isinstance(messages, Message):
messages = [messages]
return [message.model_dump() for message in messages]
@pytest.fixture
def agent() -> CodeActAgent:
config = AgentConfig()
agent = CodeActAgent(llm=LLM(LLMConfig()), config=config)
agent.llm = Mock(LLM)
agent.llm.config = Mock()
agent.llm.config.max_message_chars = 1000
agent.llm.is_caching_prompt_active.return_value = True
agent.llm.format_messages_for_llm = format_messages_for_llm
agent.llm.metrics = Metrics()
return agent
def set_next_llm_response(agent, response: str):
"""Set the next LLM response for the given agent."""
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = response
agent.llm.completion.return_value = mock_response
def test_contains_trigger_word():
"""Test that the containsTriggerWord method correctly identifies the CONDENSE! keyword."""
# Create the condenser
condenser = LLMAgentCacheCondenser(max_size=10)
# Test case 1: Empty events list
assert not condenser._contains_trigger_word([])
# Test case 2: Single event (not enough events)
event = MessageAction('Please CONDENSE! the conversation history.')
assert not condenser._contains_trigger_word([event])
# Test case 3: User message with CONDENSE! keyword
user_event = MessageAction('Please CONDENSE! the conversation history.')
user_event._source = 'user' # type: ignore [attr-defined]
agent_event = MessageAction('Agent response')
agent_event._source = 'agent' # type: ignore [attr-defined]
assert condenser._contains_trigger_word([user_event, agent_event])
# Test case 4: User message without CONDENSE! keyword
user_event.content = 'Please summarize the conversation history.'
assert not condenser._contains_trigger_word([user_event, agent_event])
# Test case 5: RecallObservation followed by user message with CONDENSE! keyword
user_event.content = 'Please CONDENSE! the conversation history.'
recall_event = RecallObservation(
recall_type=RecallType.KNOWLEDGE, content='saw a thing'
)
events = [agent_event, user_event, recall_event]
assert condenser._contains_trigger_word(events)
# Test case 6: Multiple user messages, only the most recent one matters
user_event1 = MessageAction('First message without keyword')
user_event1._source = 'user' # type: ignore [attr-defined]
user_event2 = MessageAction('Please CONDENSE! the conversation history.')
user_event2._source = 'user' # type: ignore [attr-defined]
events = [user_event1, agent_event, user_event2]
assert condenser._contains_trigger_word(events)
# Test case 7: Multiple user messages, most recent one doesn't have keyword
events = [user_event2, agent_event, user_event1]
assert not condenser._contains_trigger_word(events)
def test_no_condensation(agent: CodeActAgent):
"""Test that the LLMAgentCacheCondenser returns a View when no condensation is needed."""
condenser = LLMAgentCacheCondenser(max_size=10)
# Create real events
events = [MessageAction(f'Message {i}') for i in range(5)]
for i, event in enumerate(events):
event._id = i # type: ignore [attr-defined]
state = State(history=cast(list[Event], events))
result = condenser.condensed_history(state, agent)
# Verify that a View is returned
assert isinstance(result, View)
assert len(result.events) == 5
def test_condense(agent: CodeActAgent):
"""Test that the condenser uses the LLM to condense events."""
llm_summary = """
USER_CONTEXT: Testing file read operations
COMPLETED: Read 4 files with varying content
PENDING: None
CURRENT_STATE: Files read: 0.txt, 1.txt, 2.txt, 3.txt
"""
set_next_llm_response(agent, llm_summary)
condenser = LLMAgentCacheCondenser(max_size=5, keep_user_messages=True)
agent.condenser = condenser
system_message = SystemMessageAction(content='System Message')
system_message._source = EventSource.AGENT # type: ignore
user_message = MessageAction('User message')
user_message._source = EventSource.USER # type: ignore
events = [system_message, user_message]
events += [FileReadObservation(f'{i}.txt', 'content.' * i) for i in range(4)]
assert len(events) == 6
for i, event in enumerate(events):
event._id = i + 1 # type: ignore [attr-defined]
result = condenser.condensed_history(
State(history=cast(list[Event], events)), agent
)
assert isinstance(result, Condensation)
assert hasattr(result, 'action')
# 1(system-prompt) is not forgotten
# 2(user-message) is not forgotten
assert result.action.forgotten_event_ids == [3, 4, 5, 6]
assert result.action.summary == llm_summary
assert result.action.summary_offset is None
def test_llm_agent_cache_condenser_with_state_with_rewrite(agent: CodeActAgent):
"""Test that the condenser correctly handles summaries."""
set_next_llm_response(
agent,
"""
USER_CONTEXT: File exploration task
COMPLETED: Read 6 files with varying content
PENDING: None
CODE_STATE: Files read: 0.txt, 1.txt, 2.txt, 3.txt, 4.txt, 5.txt
CHANGES: User asked about database schema and agent explained the tables and relationships.
""",
)
condenser = LLMAgentCacheCondenser(max_size=5)
agent.condenser = condenser
events = [FileReadObservation(f'{i}.txt', 'content.' * i) for i in range(6)]
for i, event in enumerate(events):
event._id = i # type: ignore [attr-defined]
state = State(history=cast(list[Event], events))
result = condenser.condensed_history(state, agent)
# Verify that a Condensation is returned with a summary
assert isinstance(result, Condensation)
assert hasattr(result, 'action')
assert result.action.summary is not None
assert 'User asked about database schema' in result.action.summary
def test_should_condense_max_size():
"""Test that the LLMAgentCacheCondenser correctly determines when to condense based on size."""
condenser = LLMAgentCacheCondenser(max_size=10)
# Create mock events
events_small = [MessageAction(f'Message {i}') for i in range(5)]
events_large = [MessageAction(f'Message {i}') for i in range(11)]
# Test should_condense with small number of events
assert not condenser.should_condense(View(events=events_small))
# Test should_condense with large number of events
assert condenser.should_condense(View(events=events_large))
def test_llm_agent_cache_condenser_simulated_mixed_condensation(agent: CodeActAgent):
"""Test simulated condensation with a mix of messages and observations."""
from tests.unit.testing_utils import create_tool_call_metadata
set_next_llm_response(
agent,
"""
USER_CONTEXT: Mixed file and message operations
COMPLETED: Processed 7 events (messages and file reads)
PENDING: None
CURRENT_STATE: Last message: Test message 6, Last file: 7.txt
CHANGES: Summary <mention content of message 4,5>
""",
)
condenser = LLMAgentCacheCondenser(max_size=5)
agent.condenser = condenser
events = []
for i in range(1, 8):
if i % 2 == 0:
# Create a FileReadAction with proper tool_call_metadata
event = FileReadAction(f'{i}.txt')
event._source = 'agent'
# Use the utility function to create tool_call_metadata
event.tool_call_metadata = create_tool_call_metadata(
tool_call_id=f'tool_call_{i}',
model_response_id=f'model_response_{i}',
function_name='str_replace_editor',
)
else:
event = FileReadObservation(f'File content for event {i}', f'{i}.txt')
event._id = i # type: ignore [attr-defined]
events.append(event)
state = State(history=cast(list[Event], events))
result = condenser.condensed_history(state, agent)
# Verify that a Condensation is returned
assert isinstance(result, Condensation)
assert len(result.action.forgotten_event_ids) > 0
# Check that the summary contains the expected content
assert 'Mixed file and message operations' in result.action.summary
def test_llm_agent_cache_condenser_always_keep_system_prompt(agent: CodeActAgent):
"""Test that the system prompt is preserved in the final messages."""
set_next_llm_response(
agent,
"""
USER_CONTEXT: Simple greeting exchange
COMPLETED: User greeted agent, agent responded
PENDING: None
CURRENT_STATE: Conversation in progress
""",
)
# Create a condenser with a small max_size to ensure condensation
# but large enough to not trigger again after adding the condensation action
condenser = LLMAgentCacheCondenser(max_size=5)
agent.condenser = condenser
# Create a lot of events to ensure we exceed max_size
events = []
for i in range(10):
event = MessageAction(f'Message {i}')
event._source = 'user' if i % 2 == 0 else 'agent' # type: ignore [attr-defined]
event._id = i + 1 # type: ignore [attr-defined]
events.append(event)
state = State(history=cast(list[Event], events))
result = condenser.condensed_history(state, agent)
# Verify that a Condensation is returned
assert isinstance(result, Condensation)
result.action._id = 20 # type: ignore [attr-defined]
# Create a new state with just a few events and the condensation action
# to avoid triggering condensation again
new_state = State(
history=[
events[-1], # Keep the last event
result.action, # Add the condensation action
]
)
view = condenser.condensed_history(new_state, agent)
assert isinstance(view, View)
# Check that the system prompt is preserved in the messages
messages = agent.get_messages(view.events)
assert messages[0].role == 'system'
assert 'You are OpenHands' in messages[0].content[0].text
def test_llm_agent_cache_condenser_first_message_user_message(agent: CodeActAgent):
"""Test that at least one user message is preserved."""
# Create a condenser with a small max_size to ensure condensation
# but large enough to not trigger again after adding the condensation action
condenser = LLMAgentCacheCondenser(max_size=5, keep_user_messages=True)
agent.condenser = condenser
# Create events with only one user message
user_message = MessageAction('Hello, how are you?')
user_message._source = 'user' # type: ignore [attr-defined]
user_message._id = 1 # type: ignore [attr-defined]
# Add many agent messages to exceed max_size
events = [user_message]
for i in range(10):
event = MessageAction(f'Agent response {i}')
event._source = 'agent' # type: ignore [attr-defined]
event._id = i + 2 # type: ignore [attr-defined]
events.append(event)
state = State(history=cast(list[Event], events))
set_next_llm_response(
agent,
"""
USER_CONTEXT: Initial greeting
COMPLETED: User said hello, agent responded
PENDING: None
CURRENT_STATE: Conversation started
""",
)
result = condenser.condensed_history(state, agent)
# Verify that a Condensation is returned
assert isinstance(result, Condensation)
result.action._id = 20 # type: ignore [attr-defined]
# Create a new state with just the user message and the condensation action
# to avoid triggering condensation again
new_state = State(
history=[
user_message, # Keep the user message
result.action, # Add the condensation action
]
)
view = condenser.condensed_history(new_state, agent)
assert isinstance(view, View)
# Check that at least one user message is preserved in the view
user_messages = [
event
for event in view.events
if hasattr(event, '_source') and event._source == 'user'
]
assert len(user_messages) > 0
# Check that the system prompt is preserved in the messages
messages = agent.get_messages(view.events)
assert messages[0].role == 'system'
assert 'You are OpenHands' in messages[0].content[0].text
def test_llm_agent_cache_condenser_full_rewrite(agent: CodeActAgent):
"""Test a complete condensation of the conversation."""
# Create a condenser with a small max_size to ensure condensation
# but large enough to not trigger again after adding the condensation action
condenser = LLMAgentCacheCondenser(max_size=5)
agent.condenser = condenser
# Create many events to exceed max_size
events = []
for i in range(10):
event = MessageAction(f'Message {i}')
event._source = 'user' if i % 2 == 0 else 'agent' # type: ignore [attr-defined]
event._id = i + 1 # type: ignore [attr-defined]
events.append(event)
state = State(history=cast(list[Event], events))
set_next_llm_response(
agent,
"""
USER_CONTEXT: Simple greeting
COMPLETED: User and AI greeted each other
PENDING: None
CURRENT_STATE: Conversation initialized
""",
)
result = condenser.condensed_history(state, agent)
# Verify that a Condensation is returned
assert isinstance(result, Condensation)
result.action._id = 20 # type: ignore [attr-defined]
# Check that we've forgotten some events
assert len(result.action.forgotten_event_ids) > 0
# Check that the summary contains the greeting information
assert 'User and AI greeted each other' in result.action.summary
# Create a new state with just the condensation action
# to avoid triggering condensation again
new_state = State(history=[result.action])
view = condenser.condensed_history(new_state, agent)
assert isinstance(view, View)
# Check that the condensation action is in the view
assert result.action in view.events
# Check that the system prompt is preserved in the messages
messages = agent.get_messages(view.events)
assert messages[0].role == 'system'
assert 'You are OpenHands' in messages[0].content[0].text
def test_condensation_triggered_by_user_message_in_context(agent):
"""Test that the user message triggering condensation is part of the context passed to the LLM."""
condenser = LLMAgentCacheCondenser(trigger_word='CONDENSE!', max_size=500)
agent.condenser = condenser
# Create events with a user message containing a goal
user_message_goal = MessageAction('I want you to do some things for me.')
user_message_goal._source = 'user' # type: ignore [attr-defined]
user_message_goal._id = 1 # type: ignore [attr-defined]
# Add agent messages
agent_messages = []
for i in range(3):
event = MessageAction(f'Agent response {i}')
event._source = 'agent' # type: ignore [attr-defined]
event._id = i + 2 # type: ignore [attr-defined]
agent_messages.append(event)
# Add a user message containing the trigger word
user_message_trigger = MessageAction('Please CONDENSE! the conversation history.')
user_message_trigger._source = 'user' # type: ignore [attr-defined]
user_message_trigger._id = 5 # type: ignore [attr-defined]
# Combine all events
events = [user_message_goal] + agent_messages + [user_message_trigger]
state = State(history=cast(list[Event], events))
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = """
USER_CONTEXT: Simple greeting
COMPLETED: User and AI greeted each other
PENDING: None
CURRENT_STATE: Conversation initialized
"""
with patch.object(
agent.llm, 'completion', return_value=mock_response
) as mock_completion:
# Perform condensation
condenser.condensed_history(state, agent)
# Verify that the LLM completion was called
mock_completion.assert_called_once()
# Extract the parameters passed to the LLM
params = mock_completion.call_args[1]
messages = params.get('messages', [])
# Check that both the first user message and the trigger message are part of the context
# First, check for the initial user message with the goal
assert any(
'I want you to do some things for me.' in message['content']
for message in messages
), 'First user message should be preserved in the context'
# Then, check for the trigger message
assert any(
'Please CONDENSE! the conversation history.' in message['content']
for message in messages
), 'Trigger message should be included in the context'
def test_condensation_with_followup_events(agent):
"""Test that the user message triggering condensation and follow-up events are part of the context passed to the LLM."""
condenser = LLMAgentCacheCondenser(
trigger_word='CONDENSE!', max_size=500, keep_user_messages=True
)
agent.condenser = condenser
# Create events with a user message containing a goal
user_message_goal = MessageAction('I want you to do some things for me.')
user_message_goal._source = EventSource.USER # type: ignore [attr-defined]
user_message_goal._id = 1 # type: ignore [attr-defined]
# Add agent messages
agent_messages = []
for i in range(3):
event = MessageAction(f'Agent response {i}')
event._source = EventSource.AGENT # type: ignore [attr-defined]
event._id = i + 2 # type: ignore [attr-defined]
agent_messages.append(event)
# Add a user message containing the trigger word
user_message_trigger = MessageAction('Please CONDENSE! the conversation history.')
user_message_trigger._source = EventSource.USER # type: ignore [attr-defined]
user_message_trigger._id = 5 # type: ignore [attr-defined]
# Add follow-up events
followup_event_1 = ChangeAgentStateAction(
agent_state='running',
thought='',
)
followup_event_1._id = 6 # type: ignore [attr-defined]
followup_event_1._source = EventSource.ENVIRONMENT # type: ignore [attr-defined]
followup_event_2 = RecallAction(
recall_type=RecallType.WORKSPACE_CONTEXT,
query='hi',
thought='',
)
followup_event_2._id = 7 # type: ignore [attr-defined]
followup_event_2._source = EventSource.USER # type: ignore [attr-defined]
# Combine all events
events = [
user_message_goal,
*agent_messages,
user_message_trigger,
followup_event_1,
followup_event_2,
]
state = State(history=cast(list[Event], events))
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = """
USER_CONTEXT: Simple greeting
COMPLETED: User and AI greeted each other
PENDING: None
CURRENT_STATE: Conversation initialized
"""
with patch.object(
agent.llm, 'completion', return_value=mock_response
) as mock_completion:
# Perform condensation
condensation = condenser.condensed_history(state, agent)
# Verify that the LLM completion was called
mock_completion.assert_called_once()
# Extract the parameters passed to the LLM
params = mock_completion.call_args[1]
messages = params.get('messages', [])
# Check that the trigger message is included in the context
assert any(
'Please CONDENSE! the conversation history.' in message['content']
for message in messages
), 'Trigger message should be included in the context'
assert isinstance(condensation, Condensation)
assert hasattr(condensation, 'action')
# only agent messages forgotten
assert condensation.action.forgotten_event_ids == [
e.id for e in agent_messages + [followup_event_1, followup_event_2]
]
assert condensation.action.summary == mock_response.choices[0].message.content
assert condensation.action.summary_offset is None
def test_keep_first_functionality(agent: CodeActAgent):
"""Test that the LLMAgentCacheCondenser keeps the first `keep_first` events."""
condenser = LLMAgentCacheCondenser(max_size=5, keep_first=2)
agent.condenser = condenser
# Create events exceeding max_size
events = []
for i in range(10):
event = MessageAction(f'Message {i}')
event._source = 'user' if i % 2 == 0 else 'agent' # type: ignore [attr-defined]
event._id = i + 1 # type: ignore [attr-defined]
events.append(event)
state = State(history=cast(list[Event], events))
set_next_llm_response(
agent,
"""
USER_CONTEXT: Simple greeting
COMPLETED: User and AI greeted each other
PENDING: None
CURRENT_STATE: Conversation initialized
""",
)
result = condenser.condensed_history(state, agent)
# Verify that a Condensation is returned
assert isinstance(result, Condensation)
result.action._id = 20 # type: ignore [attr-defined]
# Check that the first `keep_first` events are preserved
preserved_event_ids = [event._id for event in events[:2]] # type: ignore [attr-defined]
assert (
all(
event_id in preserved_event_ids
for event_id in result.action.forgotten_event_ids
)
is False
)
# Check that the summary contains the greeting information
assert 'User and AI greeted each other' in result.action.summary
+19 -12
View File
@@ -5,6 +5,7 @@ from openhands.core.message_utils import (
from openhands.events.event import Event
from openhands.events.tool import ToolCallMetadata
from openhands.llm.metrics import Metrics, TokenUsage
from tests.unit.testing_utils import create_tool_call_metadata
def test_get_token_usage_for_event():
@@ -28,15 +29,14 @@ def test_get_token_usage_for_event():
# Create an event referencing that response_id
event = Event()
mock_tool_call_metadata = ToolCallMetadata(
# Use our utility function to create tool_call_metadata
mock_tool_call_metadata = create_tool_call_metadata(
tool_call_id='test-tool-call',
function_name='fake_function',
model_response={'id': 'test-response-id'},
model_response_id='test-response-id',
total_calls_in_response=1,
)
event._tool_call_metadata = (
mock_tool_call_metadata # normally you'd do event.tool_call_metadata = ...
)
event._tool_call_metadata = mock_tool_call_metadata
# We should find that usage record
found = get_token_usage_for_event(event, metrics)
@@ -45,7 +45,14 @@ def test_get_token_usage_for_event():
assert found.response_id == 'test-response-id'
# If we change the event's response ID, we won't find anything
mock_tool_call_metadata.model_response.id = 'some-other-id'
# Create a new tool_call_metadata with a different response ID
mock_tool_call_metadata = create_tool_call_metadata(
tool_call_id='test-tool-call',
function_name='fake_function',
model_response_id='some-other-id',
total_calls_in_response=1,
)
event._tool_call_metadata = mock_tool_call_metadata
found2 = get_token_usage_for_event(event, metrics)
assert found2 is None
@@ -87,17 +94,17 @@ def test_get_token_usage_for_event_id():
e._id = i
# We'll attach usage_1 to event 1, usage_2 to event 3
if i == 1:
e._tool_call_metadata = ToolCallMetadata(
e._tool_call_metadata = create_tool_call_metadata(
tool_call_id='tid1',
function_name='fn1',
model_response={'id': 'resp-1'},
model_response_id='resp-1',
total_calls_in_response=1,
)
elif i == 3:
e._tool_call_metadata = ToolCallMetadata(
e._tool_call_metadata = create_tool_call_metadata(
tool_call_id='tid2',
function_name='fn2',
model_response={'id': 'resp-2'},
model_response_id='resp-2',
total_calls_in_response=1,
)
events.append(e)
@@ -141,10 +148,10 @@ def test_get_token_usage_for_event_fallback():
event = Event()
# Provide some mismatched tool_call_metadata:
event._tool_call_metadata = ToolCallMetadata(
event._tool_call_metadata = create_tool_call_metadata(
tool_call_id='irrelevant-tool-call',
function_name='fake_function',
model_response={'id': 'not-matching-any-usage'},
model_response_id='not-matching-any-usage',
total_calls_in_response=1,
)
# But also set event.response_id to the actual usage ID
+4 -4
View File
@@ -52,7 +52,7 @@ def response_mock(content: str, tool_call_id: str):
return ModelResponse(**MockModelResponse(content, tool_call_id).model_dump())
def test_get_messages(codeact_agent: CodeActAgent):
def testget_messages(codeact_agent: CodeActAgent):
# Add some events to history
history = list()
# Add system message action
@@ -76,7 +76,7 @@ def test_get_messages(codeact_agent: CodeActAgent):
history.append(message_action_5)
codeact_agent.reset()
messages = codeact_agent._get_messages(history)
messages = codeact_agent.get_messages(history)
assert (
len(messages) == 6
@@ -99,7 +99,7 @@ def test_get_messages(codeact_agent: CodeActAgent):
assert messages[5].content[0].cache_prompt
def test_get_messages_prompt_caching(codeact_agent: CodeActAgent):
def testget_messages_prompt_caching(codeact_agent: CodeActAgent):
history = list()
# Add system message action
system_message_action = codeact_agent.get_system_message()
@@ -115,7 +115,7 @@ def test_get_messages_prompt_caching(codeact_agent: CodeActAgent):
history.append(message_action_agent)
codeact_agent.reset()
messages = codeact_agent._get_messages(history)
messages = codeact_agent.get_messages(history)
# Check that only the last two user messages have cache_prompt=True
cached_user_messages = [
+55
View File
@@ -0,0 +1,55 @@
"""Utility functions for tests."""
from openhands.events.tool import ToolCallMetadata
def create_tool_call_metadata(
tool_call_id: str = 'tool_call_0',
function_name: str = 'str_replace_editor',
model_response_id: str = 'model_response_0',
total_calls_in_response: int = 1,
) -> ToolCallMetadata:
"""
Create a properly structured ToolCallMetadata object for testing.
This function creates a ToolCallMetadata object with a properly structured
model_response dictionary that includes the necessary nested objects.
Args:
tool_call_id: The ID of the tool call
function_name: The name of the function being called
model_response_id: The ID of the model response
total_calls_in_response: The total number of calls in the response
Returns:
A properly structured ToolCallMetadata object
"""
# Create a dictionary representation of the model response
model_response = {
'id': model_response_id,
'choices': [
{
'message': {
'role': 'assistant',
'content': '',
'tool_calls': [
{
'id': tool_call_id,
'type': 'function',
'function': {
'name': function_name,
'arguments': '{}', # Empty JSON object as string
},
}
],
}
}
],
}
return ToolCallMetadata(
tool_call_id=tool_call_id,
function_name=function_name,
model_response=model_response,
total_calls_in_response=total_calls_in_response,
)