mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
23 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 2a041e70a4 | |||
| 0c85ef8a55 | |||
| 0e58611dc2 | |||
| cdbbc1e4e6 | |||
| 068f86b102 | |||
| dee7305826 | |||
| 1e6e817c53 | |||
| 7b08b89460 | |||
| f7ad303842 | |||
| 0642ddd16d | |||
| 0c55c9acec | |||
| b0fa78ed6a | |||
| 59de045b26 | |||
| 8497b8d6d2 | |||
| e2c6dfb0ab | |||
| d86cb1e4cd | |||
| b51fe287e9 | |||
| 5d42adcb42 | |||
| 6a1f5fd812 | |||
| 8cde944f1a | |||
| 5aa64e64d8 | |||
| 6b8cd2025d | |||
| 797acd021e |
+10
-1
@@ -201,7 +201,6 @@ model = "gpt-4o"
|
||||
#native_tool_calling = None
|
||||
|
||||
|
||||
|
||||
[llm.gpt4o-mini]
|
||||
api_key = ""
|
||||
model = "gpt-4o"
|
||||
@@ -386,6 +385,16 @@ type = "noop"
|
||||
# Maximum size of history before triggering attention mechanism
|
||||
#max_size = 100
|
||||
|
||||
# 7. LLM Agent Cache Condenser
|
||||
#type = "agentcache"
|
||||
# Maximum number of events before condensation is triggered
|
||||
#max_size = 100
|
||||
# Word that triggers condensation when found in user messages
|
||||
#trigger_word = "CONDENSE!"
|
||||
# Number of initial events to always keep (typically includes task description)
|
||||
#keep_first = 1
|
||||
# Note: This condenser should only be used for LLM models that make use of caching.
|
||||
|
||||
# Example of a custom LLM configuration for condensers that require an LLM
|
||||
# If not provided, it falls back to the default LLM
|
||||
#[llm.condenser]
|
||||
|
||||
@@ -44,6 +44,8 @@ from openhands.core.config import (
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
from openhands.core.config.utils import get_condenser_config_arg
|
||||
from openhands.core.config.condenser_config import NoOpCondenserConfig
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.main import create_runtime, run_controller
|
||||
from openhands.critic import AgentFinishedCritic
|
||||
@@ -744,6 +746,12 @@ if __name__ == '__main__':
|
||||
choices=['swe', 'swt', 'swt-ci'],
|
||||
help="mode to run the evaluation, either 'swe', 'swt', or 'swt-ci'",
|
||||
)
|
||||
parser.add_argument(
|
||||
'--condenser-config',
|
||||
type=str,
|
||||
default=None,
|
||||
help='Name of the condenser config to use, e.g., "default_4_20" for [condenser.default_4_20] section in config.toml',
|
||||
)
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
# NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
|
||||
@@ -780,6 +788,18 @@ if __name__ == '__main__':
|
||||
if llm_config is None:
|
||||
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
|
||||
|
||||
condenser_config = None
|
||||
if args.condenser_config:
|
||||
condenser_config = get_condenser_config_arg(args.condenser_config)
|
||||
if condenser_config is None:
|
||||
raise ValueError(
|
||||
f'Could not find Condenser config: --condenser-config {args.condenser_config}'
|
||||
)
|
||||
else:
|
||||
# If no specific condenser config is provided via args, default to NoOpCondenser
|
||||
condenser_config = NoOpCondenserConfig()
|
||||
logger.warning('No Condenser config provided via --condenser-config, using NoOpCondenser.')
|
||||
|
||||
details = {'mode': args.mode}
|
||||
_agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls)
|
||||
|
||||
@@ -794,6 +814,7 @@ if __name__ == '__main__':
|
||||
args.eval_note,
|
||||
args.eval_output_dir,
|
||||
details=details,
|
||||
condenser_config=condenser_config,
|
||||
)
|
||||
|
||||
output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
|
||||
|
||||
@@ -11,8 +11,10 @@ MAX_ITER=$5
|
||||
NUM_WORKERS=$6
|
||||
DATASET=$7
|
||||
SPLIT=$8
|
||||
N_RUNS=$9
|
||||
MODE=${10}
|
||||
CONDENSER_CONFIG=$9
|
||||
N_RUNS=${10}
|
||||
MODE=${11}
|
||||
|
||||
|
||||
if [ -z "$NUM_WORKERS" ]; then
|
||||
NUM_WORKERS=1
|
||||
@@ -51,6 +53,12 @@ if [ -z "$MODE" ]; then
|
||||
echo "MODE not specified, use default $MODE"
|
||||
fi
|
||||
|
||||
if [ -n "$CONDENSER_CONFIG" ]; then
|
||||
echo "Using Condenser Config: $CONDENSER_CONFIG"
|
||||
else
|
||||
echo "No Condenser Config provided, use default (NoOpCondenser)."
|
||||
fi
|
||||
|
||||
export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
|
||||
echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"
|
||||
|
||||
@@ -91,7 +99,19 @@ fi
|
||||
|
||||
function run_eval() {
|
||||
local eval_note="${1}"
|
||||
COMMAND="poetry run python evaluation/benchmarks/swe_bench/run_infer.py \
|
||||
local base_command="evaluation/benchmarks/swe_bench/run_infer.py"
|
||||
|
||||
# Check if DEBUG_EVAL environment variable is set to true
|
||||
if [[ "${DEBUG_EVAL}" == "true" ]]; then
|
||||
echo "Running in DEBUG mode with debugpy, listening on port 5678"
|
||||
# Prepend DEBUG=true to set the environment variable for the python process
|
||||
# Use standard debugpy port 5678
|
||||
COMMAND="DEBUG=true poetry run debugpy --listen 0.0.0.0:5678 --wait-for-client $base_command"
|
||||
else
|
||||
COMMAND="poetry run python $base_command"
|
||||
fi
|
||||
|
||||
COMMAND="$COMMAND \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations $MAX_ITER \
|
||||
@@ -101,6 +121,12 @@ function run_eval() {
|
||||
--split $SPLIT \
|
||||
--mode $MODE"
|
||||
|
||||
# Conditionally add the condenser config argument.
|
||||
# The Python script defaults to NoOpCondenserConfig if this argument is not provided.
|
||||
if [ -n "$CONDENSER_CONFIG" ]; then
|
||||
COMMAND="$COMMAND --condenser-config $CONDENSER_CONFIG"
|
||||
fi
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import copy
|
||||
import os
|
||||
from collections import deque
|
||||
from typing import Any
|
||||
|
||||
from litellm import ChatCompletionToolParam
|
||||
|
||||
@@ -15,7 +16,7 @@ from openhands.agenthub.codeact_agent.tools.str_replace_editor import (
|
||||
)
|
||||
from openhands.agenthub.codeact_agent.tools.think import ThinkTool
|
||||
from openhands.agenthub.codeact_agent.tools.web_read import WebReadTool
|
||||
from openhands.controller.agent import Agent
|
||||
from openhands.controller.agent import Agent, LLMCompletionProvider
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import AgentConfig
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
@@ -37,7 +38,7 @@ from openhands.runtime.plugins import (
|
||||
from openhands.utils.prompt import PromptManager
|
||||
|
||||
|
||||
class CodeActAgent(Agent):
|
||||
class CodeActAgent(Agent, LLMCompletionProvider):
|
||||
VERSION = '2.2'
|
||||
"""
|
||||
The Code Act Agent is a minimalist agent.
|
||||
@@ -162,7 +163,7 @@ class CodeActAgent(Agent):
|
||||
# event we'll just return that instead of an action. The controller will
|
||||
# immediately ask the agent to step again with the new view.
|
||||
condensed_history: list[Event] = []
|
||||
match self.condenser.condensed_history(state):
|
||||
match self.condenser.condensed_history(state, self):
|
||||
case View(events=events):
|
||||
condensed_history = events
|
||||
|
||||
@@ -173,7 +174,19 @@ class CodeActAgent(Agent):
|
||||
f'Processing {len(condensed_history)} events from a total of {len(state.history)} events'
|
||||
)
|
||||
|
||||
messages = self._get_messages(condensed_history)
|
||||
params = self.build_llm_completion_params(condensed_history, state)
|
||||
response = self.llm.completion(**params)
|
||||
logger.debug(f'Response from LLM: {response}')
|
||||
actions = self.response_to_actions_fn(response)
|
||||
logger.debug(f'Actions after response_to_actions: {actions}')
|
||||
for action in actions:
|
||||
self.pending_actions.append(action)
|
||||
return self.pending_actions.popleft()
|
||||
|
||||
def build_llm_completion_params(
|
||||
self, condensed_history: list[Event], state: State
|
||||
) -> dict[str, Any]:
|
||||
messages = self.get_messages(condensed_history)
|
||||
params: dict = {
|
||||
'messages': self.llm.format_messages_for_llm(messages),
|
||||
}
|
||||
@@ -208,15 +221,9 @@ class CodeActAgent(Agent):
|
||||
params['tools'] += unique_mcp_tools
|
||||
# log to litellm proxy if possible
|
||||
params['extra_body'] = {'metadata': state.to_llm_metadata(agent_name=self.name)}
|
||||
response = self.llm.completion(**params)
|
||||
logger.debug(f'Response from LLM: {response}')
|
||||
actions = self.response_to_actions_fn(response)
|
||||
logger.debug(f'Actions after response_to_actions: {actions}')
|
||||
for action in actions:
|
||||
self.pending_actions.append(action)
|
||||
return self.pending_actions.popleft()
|
||||
return params
|
||||
|
||||
def _get_messages(self, events: list[Event]) -> list[Message]:
|
||||
def get_messages(self, events: list[Event]) -> list[Message]:
|
||||
"""Constructs the message history for the LLM conversation.
|
||||
|
||||
This method builds a structured conversation history by processing events from the state
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import TYPE_CHECKING, Type
|
||||
from typing import TYPE_CHECKING, Any, TypedDict
|
||||
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.message import Message
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import AgentConfig
|
||||
from openhands.events.action import Action
|
||||
from openhands.events.action.message import SystemMessageAction
|
||||
@@ -13,7 +15,7 @@ from openhands.core.exceptions import (
|
||||
AgentNotRegisteredError,
|
||||
)
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.event import EventSource
|
||||
from openhands.events.event import Event, EventSource
|
||||
from openhands.llm.llm import LLM
|
||||
from openhands.runtime.plugins import PluginRequirement
|
||||
|
||||
@@ -30,7 +32,7 @@ class Agent(ABC):
|
||||
It tracks the execution status and maintains a history of interactions.
|
||||
"""
|
||||
|
||||
_registry: dict[str, Type['Agent']] = {}
|
||||
_registry: dict[str, type['Agent']] = {}
|
||||
sandbox_plugins: list[PluginRequirement] = []
|
||||
|
||||
def __init__(
|
||||
@@ -46,8 +48,8 @@ class Agent(ABC):
|
||||
self.tools: list = []
|
||||
|
||||
def get_system_message(self) -> 'SystemMessageAction | None':
|
||||
"""
|
||||
Returns a SystemMessageAction containing the system message and tools.
|
||||
"""Returns a SystemMessageAction containing the system message and tools.
|
||||
|
||||
This will be added to the event stream as the first message.
|
||||
|
||||
Returns:
|
||||
@@ -91,15 +93,16 @@ class Agent(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def step(self, state: 'State') -> 'Action':
|
||||
"""Starts the execution of the assigned instruction. This method should
|
||||
be implemented by subclasses to define the specific execution logic.
|
||||
"""Starts the execution of the assigned instruction.
|
||||
|
||||
This method should be implemented by subclasses to define the specific execution logic.
|
||||
"""
|
||||
pass
|
||||
|
||||
def reset(self) -> None:
|
||||
"""Resets the agent's execution status and clears the history. This method can be used
|
||||
to prepare the agent for restarting the instruction or cleaning up before destruction.
|
||||
"""Resets the agent's execution status and clears the history.
|
||||
|
||||
This method can be used to prepare the agent for restarting the instruction or cleaning up before destruction.
|
||||
"""
|
||||
# TODO clear history
|
||||
self._complete = False
|
||||
@@ -112,12 +115,12 @@ class Agent(ABC):
|
||||
return self.__class__.__name__
|
||||
|
||||
@classmethod
|
||||
def register(cls, name: str, agent_cls: Type['Agent']) -> None:
|
||||
def register(cls, name: str, agent_cls: type['Agent']) -> None:
|
||||
"""Registers an agent class in the registry.
|
||||
|
||||
Parameters:
|
||||
- name (str): The name to register the class under.
|
||||
- agent_cls (Type['Agent']): The class to register.
|
||||
- agent_cls (type['Agent']): The class to register.
|
||||
|
||||
Raises:
|
||||
- AgentAlreadyRegisteredError: If name already registered
|
||||
@@ -127,14 +130,14 @@ class Agent(ABC):
|
||||
cls._registry[name] = agent_cls
|
||||
|
||||
@classmethod
|
||||
def get_cls(cls, name: str) -> Type['Agent']:
|
||||
def get_cls(cls, name: str) -> type['Agent']:
|
||||
"""Retrieves an agent class from the registry.
|
||||
|
||||
Parameters:
|
||||
- name (str): The name of the class to retrieve
|
||||
|
||||
Returns:
|
||||
- agent_cls (Type['Agent']): The class registered under the specified name.
|
||||
- agent_cls (type['Agent']): The class registered under the specified name.
|
||||
|
||||
Raises:
|
||||
- AgentNotRegisteredError: If name not registered
|
||||
@@ -158,6 +161,43 @@ class Agent(ABC):
|
||||
"""Sets the list of MCP tools for the agent.
|
||||
|
||||
Args:
|
||||
- mcp_tools (list[dict]): The list of MCP tools.
|
||||
mcp_tools: The list of MCP tools.
|
||||
"""
|
||||
self.mcp_tools = mcp_tools
|
||||
|
||||
|
||||
class LLMCompletionParams(TypedDict, total=False):
|
||||
messages: list[Message]
|
||||
tools: list[Any] | None
|
||||
extra_body: dict[str, Any] | None
|
||||
extra: dict[str, Any] | None
|
||||
|
||||
|
||||
class LLMCompletionProvider(ABC):
|
||||
"""Mixin interface for agents that can expose their LLM call generation details.
|
||||
|
||||
This interface is used by condensers that need to use the agent's LLM completion
|
||||
parameters to ensure consistent caching between the agent and condenser.
|
||||
"""
|
||||
|
||||
llm: LLM
|
||||
|
||||
@abstractmethod
|
||||
def get_messages(self, condensed_history: list[Event]) -> list[Message]:
|
||||
"""Convert events to messages for the LLM."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def build_llm_completion_params(
|
||||
self, condensed_history: list[Event], state: State
|
||||
) -> dict[str, Any]:
|
||||
"""Build parameters for LLM completion.
|
||||
|
||||
Args:
|
||||
condensed_history: list of events to convert to messages for the LLM
|
||||
state: Current state
|
||||
|
||||
Returns:
|
||||
dict of parameters for LLM completion
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -58,6 +58,28 @@ class RecentEventsCondenserConfig(BaseModel):
|
||||
model_config = {'extra': 'forbid'}
|
||||
|
||||
|
||||
class LLMAgentCacheCondenserConfig(BaseModel):
|
||||
"""Configuration for LLMAgentCacheCondenser."""
|
||||
|
||||
type: Literal['agentcache'] = Field('agentcache')
|
||||
max_size: int = Field(
|
||||
default=100,
|
||||
description='Maximum number of events before condensation is triggered.',
|
||||
ge=1,
|
||||
)
|
||||
trigger_word: str = Field(
|
||||
default='CONDENSE!',
|
||||
description='Word that triggers condensation when found in user messages.',
|
||||
)
|
||||
keep_first: int = Field(
|
||||
default=1,
|
||||
description='Number of initial events to always keep in history.',
|
||||
ge=0,
|
||||
)
|
||||
|
||||
model_config = {'extra': 'forbid'}
|
||||
|
||||
|
||||
class LLMSummarizingCondenserConfig(BaseModel):
|
||||
"""Configuration for LLMCondenser."""
|
||||
|
||||
@@ -181,6 +203,7 @@ CondenserConfig = (
|
||||
| LLMAttentionCondenserConfig
|
||||
| StructuredSummaryCondenserConfig
|
||||
| CondenserPipelineConfig
|
||||
| LLMAgentCacheCondenserConfig
|
||||
)
|
||||
|
||||
|
||||
@@ -284,6 +307,7 @@ def create_condenser_config(condenser_type: str, data: dict) -> CondenserConfig:
|
||||
'amortized': AmortizedForgettingCondenserConfig,
|
||||
'llm_attention': LLMAttentionCondenserConfig,
|
||||
'structured': StructuredSummaryCondenserConfig,
|
||||
'agentcache': LLMAgentCacheCondenserConfig,
|
||||
}
|
||||
|
||||
if condenser_type not in condenser_classes:
|
||||
|
||||
@@ -16,7 +16,11 @@ from openhands import __version__
|
||||
from openhands.core import logger
|
||||
from openhands.core.config.agent_config import AgentConfig
|
||||
from openhands.core.config.app_config import AppConfig
|
||||
from openhands.core.config.condenser_config import condenser_config_from_toml_section
|
||||
from openhands.core.config.condenser_config import (
|
||||
CondenserConfig,
|
||||
condenser_config_from_toml_section,
|
||||
create_condenser_config,
|
||||
)
|
||||
from openhands.core.config.config_utils import (
|
||||
OH_DEFAULT_AGENT,
|
||||
OH_MAX_ITERATIONS,
|
||||
@@ -436,6 +440,118 @@ def get_llm_config_arg(
|
||||
return None
|
||||
|
||||
|
||||
def get_condenser_config_arg(
|
||||
condenser_config_arg: str, toml_file: str = 'config.toml'
|
||||
) -> CondenserConfig | None:
|
||||
"""Get a group of condenser settings from the config file by name.
|
||||
|
||||
A group in config.toml can look like this:
|
||||
|
||||
```
|
||||
[condenser.my_summarizer]
|
||||
type = 'llm'
|
||||
llm_config = 'gpt-4o' # References [llm.gpt-4o]
|
||||
max_size = 50
|
||||
...
|
||||
```
|
||||
|
||||
The user-defined group name, like "my_summarizer", is the argument to this function.
|
||||
The function will load the CondenserConfig object with the settings of this group,
|
||||
from the config file.
|
||||
|
||||
Note that the group must be under the "condenser" group, or in other words,
|
||||
the group name must start with "condenser.".
|
||||
|
||||
Args:
|
||||
condenser_config_arg: The group of condenser settings to get from the config.toml file.
|
||||
toml_file: Path to the configuration file to read from. Defaults to 'config.toml'.
|
||||
|
||||
Returns:
|
||||
CondenserConfig: The CondenserConfig object with the settings from the config file, or None if not found/error.
|
||||
"""
|
||||
# keep only the name, just in case
|
||||
condenser_config_arg = condenser_config_arg.strip('[]')
|
||||
|
||||
# truncate the prefix, just in case
|
||||
if condenser_config_arg.startswith('condenser.'):
|
||||
condenser_config_arg = condenser_config_arg[10:]
|
||||
|
||||
logger.openhands_logger.debug(
|
||||
f'Loading condenser config [{condenser_config_arg}] from {toml_file}'
|
||||
)
|
||||
|
||||
# load the toml file
|
||||
try:
|
||||
with open(toml_file, 'r', encoding='utf-8') as toml_contents:
|
||||
toml_config = toml.load(toml_contents)
|
||||
except FileNotFoundError as e:
|
||||
logger.openhands_logger.error(f'Config file not found: {toml_file}. Error: {e}')
|
||||
return None
|
||||
except toml.TomlDecodeError as e:
|
||||
logger.openhands_logger.error(
|
||||
f'Cannot parse condenser group [{condenser_config_arg}] from {toml_file}. Exception: {e}'
|
||||
)
|
||||
return None
|
||||
|
||||
# Check if the condenser section and the specific config exist
|
||||
if (
|
||||
'condenser' not in toml_config
|
||||
or condenser_config_arg not in toml_config['condenser']
|
||||
):
|
||||
logger.openhands_logger.error(
|
||||
f'Condenser config section [condenser.{condenser_config_arg}] not found in {toml_file}'
|
||||
)
|
||||
return None
|
||||
|
||||
condenser_data = toml_config['condenser'][
|
||||
condenser_config_arg
|
||||
].copy() # Use copy to modify
|
||||
|
||||
# Determine the type and handle potential LLM dependency
|
||||
condenser_type = condenser_data.get('type')
|
||||
if not condenser_type:
|
||||
logger.openhands_logger.error(
|
||||
f'Missing "type" field in [condenser.{condenser_config_arg}] section of {toml_file}'
|
||||
)
|
||||
return None
|
||||
|
||||
# Handle LLM config reference if needed, using get_llm_config_arg
|
||||
if (
|
||||
condenser_type in ('llm', 'llm_attention', 'structured')
|
||||
and 'llm_config' in condenser_data
|
||||
and isinstance(condenser_data['llm_config'], str)
|
||||
):
|
||||
llm_config_name = condenser_data['llm_config']
|
||||
logger.openhands_logger.debug(
|
||||
f'Condenser [{condenser_config_arg}] requires LLM config [{llm_config_name}]. Loading it...'
|
||||
)
|
||||
# Use the existing function to load the specific LLM config
|
||||
referenced_llm_config = get_llm_config_arg(llm_config_name, toml_file=toml_file)
|
||||
|
||||
if referenced_llm_config:
|
||||
# Replace the string reference with the actual LLMConfig object
|
||||
condenser_data['llm_config'] = referenced_llm_config
|
||||
else:
|
||||
# get_llm_config_arg already logs the error if not found
|
||||
logger.openhands_logger.error(
|
||||
f"Failed to load required LLM config '{llm_config_name}' for condenser '{condenser_config_arg}'."
|
||||
)
|
||||
return None
|
||||
|
||||
# Create the condenser config instance
|
||||
try:
|
||||
config = create_condenser_config(condenser_type, condenser_data)
|
||||
logger.openhands_logger.info(
|
||||
f'Successfully loaded condenser config [{condenser_config_arg}] from {toml_file}'
|
||||
)
|
||||
return config
|
||||
except (ValidationError, ValueError) as e:
|
||||
logger.openhands_logger.error(
|
||||
f'Invalid condenser configuration for [{condenser_config_arg}]: {e}.'
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
# Command line arguments
|
||||
def get_parser() -> argparse.ArgumentParser:
|
||||
"""Get the argument parser."""
|
||||
|
||||
@@ -158,11 +158,10 @@ class CondensationAction(Action):
|
||||
# Either way, we can only have one of the two valid configurations.
|
||||
forgotten_event_configuration = using_event_ids ^ using_event_range
|
||||
|
||||
# We also need to check that if the summary is provided, so is the
|
||||
# offset (and vice versa).
|
||||
# Check that if we have a summary_offset, we also have a summary
|
||||
summary_configuration = (
|
||||
self.summary is None and self.summary_offset is None
|
||||
) or (self.summary is not None and self.summary_offset is not None)
|
||||
self.summary is not None if self.summary_offset is not None else True
|
||||
)
|
||||
|
||||
return forgotten_event_configuration and summary_configuration
|
||||
|
||||
|
||||
+15
-9
@@ -40,14 +40,14 @@ __all__ = ['LLM']
|
||||
# tuple of exceptions to retry on
|
||||
LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (
|
||||
RateLimitError,
|
||||
litellm.Timeout,
|
||||
litellm.InternalServerError,
|
||||
litellm.Timeout, # type: ignore
|
||||
litellm.InternalServerError, # type: ignore
|
||||
LLMNoResponseError,
|
||||
)
|
||||
|
||||
# cache prompt supporting models
|
||||
# remove this when we gemini and deepseek are supported
|
||||
CACHE_PROMPT_SUPPORTED_MODELS = [
|
||||
# these models require special treatment so that caching
|
||||
# works
|
||||
EXPLICIT_CACHE_MODELS = [
|
||||
'claude-3-7-sonnet-20250219',
|
||||
'claude-3-5-sonnet-20241022',
|
||||
'claude-3-5-sonnet-20240620',
|
||||
@@ -249,11 +249,17 @@ class LLM(RetryMixin, DebugMixin):
|
||||
kwargs.pop('tool_choice', None)
|
||||
|
||||
# if we have no messages, something went very wrong
|
||||
if not messages:
|
||||
if not messages or len(messages) < 1:
|
||||
raise ValueError(
|
||||
'The messages list is empty. At least one message is required.'
|
||||
)
|
||||
|
||||
# anthropic requires at least one user message.
|
||||
if not any(message.get('role') == 'user' for message in messages):
|
||||
raise ValueError(
|
||||
'At least one message with role "user" is required for the completion.'
|
||||
)
|
||||
|
||||
# log the entire LLM prompt
|
||||
self.log_prompt(messages)
|
||||
|
||||
@@ -523,8 +529,8 @@ class LLM(RetryMixin, DebugMixin):
|
||||
return (
|
||||
self.config.caching_prompt is True
|
||||
and (
|
||||
self.config.model in CACHE_PROMPT_SUPPORTED_MODELS
|
||||
or self.config.model.split('/')[-1] in CACHE_PROMPT_SUPPORTED_MODELS
|
||||
self.config.model in EXPLICIT_CACHE_MODELS
|
||||
or self.config.model.split('/')[-1] in EXPLICIT_CACHE_MODELS
|
||||
)
|
||||
# We don't need to look-up model_info, because only Anthropic models needs the explicit caching breakpoint
|
||||
)
|
||||
@@ -662,7 +668,7 @@ class LLM(RetryMixin, DebugMixin):
|
||||
boolean: True if executing a local model.
|
||||
"""
|
||||
if self.config.base_url is not None:
|
||||
for substring in ['localhost', '127.0.0.1' '0.0.0.0']:
|
||||
for substring in ['localhost', '127.0.0.1', '0.0.0.0']:
|
||||
if substring in self.config.base_url:
|
||||
return True
|
||||
elif self.config.model is not None:
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
import openhands.memory.condenser.impl # noqa F401 (we import this to get the condensers registered)
|
||||
from openhands.memory.condenser.condenser import (
|
||||
Condenser,
|
||||
get_condensation_metadata,
|
||||
View,
|
||||
Condensation,
|
||||
Condenser,
|
||||
View,
|
||||
get_condensation_metadata,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
|
||||
@@ -87,23 +87,33 @@ class Condenser(ABC):
|
||||
self.write_metadata(state)
|
||||
|
||||
@abstractmethod
|
||||
def condense(self, View) -> View | Condensation:
|
||||
def condense(self, view: View, state: State, agent=None) -> View | Condensation:
|
||||
"""Condense a sequence of events into a potentially smaller list.
|
||||
|
||||
New condenser strategies should override this method to implement their own condensation logic. Call `self.add_metadata` in the implementation to record any relevant per-condensation diagnostic information.
|
||||
|
||||
Args:
|
||||
View: A view of the history containing all events that should be condensed.
|
||||
view: A view of the history containing all events that should be condensed.
|
||||
state: Optional state for context.
|
||||
agent: Optional agent for agent-aware condensation.
|
||||
|
||||
Returns:
|
||||
View | Condensation: A condensed view of the events or an event indicating the history has been condensed.
|
||||
"""
|
||||
|
||||
def condensed_history(self, state: State) -> View | Condensation:
|
||||
"""Condense the state's history."""
|
||||
def condensed_history(self, state: State, agent=None) -> View | Condensation:
|
||||
"""Condense the state's history.
|
||||
|
||||
Args:
|
||||
state: The current state.
|
||||
agent: Optional agent to use for agent-aware condensation.
|
||||
|
||||
Returns:
|
||||
A View or Condensation object.
|
||||
"""
|
||||
self._llm_metadata = state.to_llm_metadata('condenser')
|
||||
with self.metadata_batch(state):
|
||||
return self.condense(state.view)
|
||||
return self.condense(state.view, state, agent)
|
||||
|
||||
@classmethod
|
||||
def register_config(cls, configuration_type: type[CondenserConfig]) -> None:
|
||||
@@ -136,6 +146,9 @@ class Condenser(ABC):
|
||||
Raises:
|
||||
ValueError: If the condenser type is not recognized.
|
||||
"""
|
||||
# trigger the conderser implementations to register themselves
|
||||
import openhands.memory.condenser.impl # noqa: F401
|
||||
|
||||
try:
|
||||
condenser_class = CONDENSER_REGISTRY[type(config)]
|
||||
return condenser_class.from_config(config)
|
||||
@@ -156,14 +169,14 @@ class RollingCondenser(Condenser, ABC):
|
||||
"""Determine if a view should be condensed."""
|
||||
|
||||
@abstractmethod
|
||||
def get_condensation(self, view: View) -> Condensation:
|
||||
def get_condensation(self, view: View, state: State, agent=None) -> Condensation:
|
||||
"""Get the condensation from a view."""
|
||||
|
||||
def condense(self, view: View) -> View | Condensation:
|
||||
def condense(self, view: View, state: State, agent=None) -> View | Condensation:
|
||||
# If we trigger the condenser-specific condensation threshold, compute and return
|
||||
# the condensation.
|
||||
if self.should_condense(view):
|
||||
return self.get_condensation(view)
|
||||
return self.get_condensation(view, state, agent)
|
||||
|
||||
# Otherwise we're safe to just return the view.
|
||||
else:
|
||||
|
||||
@@ -4,6 +4,9 @@ from openhands.memory.condenser.impl.amortized_forgetting_condenser import (
|
||||
from openhands.memory.condenser.impl.browser_output_condenser import (
|
||||
BrowserOutputCondenser,
|
||||
)
|
||||
from openhands.memory.condenser.impl.llm_agent_cache_condenser import (
|
||||
LLMAgentCacheCondenser,
|
||||
)
|
||||
from openhands.memory.condenser.impl.llm_attention_condenser import (
|
||||
ImportantEventSelection,
|
||||
LLMAttentionCondenser,
|
||||
@@ -25,6 +28,7 @@ from openhands.memory.condenser.impl.structured_summary_condenser import (
|
||||
|
||||
__all__ = [
|
||||
'AmortizedForgettingCondenser',
|
||||
'LLMAgentCacheCondenser',
|
||||
'LLMAttentionCondenser',
|
||||
'ImportantEventSelection',
|
||||
'LLMSummarizingCondenser',
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config.condenser_config import AmortizedForgettingCondenserConfig
|
||||
from openhands.events.action.agent import CondensationAction
|
||||
from openhands.memory.condenser.condenser import (
|
||||
@@ -36,7 +37,7 @@ class AmortizedForgettingCondenser(RollingCondenser):
|
||||
|
||||
super().__init__()
|
||||
|
||||
def get_condensation(self, view: View) -> Condensation:
|
||||
def get_condensation(self, view: View, state: State, agent=None) -> Condensation:
|
||||
target_size = self.max_size // 2
|
||||
head = view[: self.keep_first]
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config.condenser_config import BrowserOutputCondenserConfig
|
||||
from openhands.events.event import Event
|
||||
from openhands.events.observation import BrowserOutputObservation
|
||||
@@ -17,7 +18,7 @@ class BrowserOutputCondenser(Condenser):
|
||||
self.attention_window = attention_window
|
||||
super().__init__()
|
||||
|
||||
def condense(self, view: View) -> View | Condensation:
|
||||
def condense(self, view: View, state: State, agent=None) -> View | Condensation:
|
||||
"""Replace the content of browser observations outside of the attention window with a placeholder."""
|
||||
results: list[Event] = []
|
||||
cnt: int = 0
|
||||
|
||||
@@ -0,0 +1,158 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any
|
||||
|
||||
from openhands.controller.agent import LLMCompletionProvider
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.message import Message
|
||||
from openhands.events.event import Event
|
||||
from openhands.memory.condenser.condenser import Condensation, Condenser, View
|
||||
|
||||
|
||||
class CachingCondenser(Condenser, ABC):
|
||||
"""Abstract base class for condensers that use prompt caching.
|
||||
|
||||
This class provides a framework for condensers that begin their prompt with the
|
||||
whole current prompt, so they can use caching. They then add their own messages
|
||||
to instruct the LLM.
|
||||
|
||||
Subclasses need to implement:
|
||||
- createCondensationPrompt: Create the prompt for condensation
|
||||
- processResponse: Process the LLM response to create a Condensation
|
||||
"""
|
||||
|
||||
def condense(self, view: View, state: State, agent=None) -> View | Condensation:
|
||||
"""Condense the events in the view using the agent's LLM.
|
||||
|
||||
This implementation requires an agent that implements the LLMCompletionProvider
|
||||
interface to provide access to the agent's LLM and message formatting.
|
||||
|
||||
Args:
|
||||
view: The view to condense
|
||||
state: The current state
|
||||
agent: The agent to use for condensation
|
||||
|
||||
Returns:
|
||||
A View or Condensation object
|
||||
"""
|
||||
if not state:
|
||||
raise ValueError('CachingCondenser: No state provided, cannot condense')
|
||||
|
||||
if not agent:
|
||||
raise ValueError('CachingCondenser: No agent provided, cannot condense')
|
||||
|
||||
# Check if the agent implements the LLMCompletionProvider interface
|
||||
if not isinstance(agent, LLMCompletionProvider):
|
||||
raise ValueError(
|
||||
f'CachingCondenser: Agent {agent.__class__.__name__} does not implement '
|
||||
'LLMCompletionProvider interface, cannot condense'
|
||||
)
|
||||
|
||||
# Check if we should condense
|
||||
if not self.should_condense(view):
|
||||
return view
|
||||
|
||||
# Do the condensation
|
||||
return self._do_condensation(view.events, state, agent)
|
||||
|
||||
def _do_condensation(
|
||||
self, events: list[Event], state: State, agent: LLMCompletionProvider
|
||||
) -> Condensation | View:
|
||||
"""Do a condensation for the given events.
|
||||
|
||||
Args:
|
||||
events: The events to condense
|
||||
state: The current state
|
||||
agent: The agent to use for condensation
|
||||
|
||||
Returns:
|
||||
A Condensation or View object
|
||||
"""
|
||||
|
||||
# Use the agent's method to build the parameters
|
||||
# This ensures that the parameters are consistent with the agent's LLM
|
||||
params = agent.build_llm_completion_params(events, state)
|
||||
|
||||
# Convert events to messages using the agent's method
|
||||
messages = agent.get_messages(events)
|
||||
|
||||
# Now we add our own prompt at the end
|
||||
messages.append(self.createCondensationPrompt(events, state, messages))
|
||||
|
||||
params['messages'] = agent.llm.format_messages_for_llm(messages)
|
||||
self._disable_cache(params['messages'])
|
||||
|
||||
# Get the LLM response
|
||||
response = agent.llm.completion(**params)
|
||||
self.add_metadata('response', response.model_dump())
|
||||
logger.info(f'Summarized {len(events)} events. Usage:{response}')
|
||||
self.add_metadata('metrics', agent.llm.metrics.get())
|
||||
|
||||
# Process the response
|
||||
return self.processResponse(events, state, response, messages)
|
||||
|
||||
def _disable_cache(self, messages: list[dict]) -> None:
|
||||
"""Disable the cache for the given messages. We need to do this because
|
||||
this conversation will not continue as we are just doing a condensation. So there
|
||||
is no way to cache could be used, so we save a little money this way.
|
||||
Effectively reversing ConversationMemory.apply_prompt_caching
|
||||
"""
|
||||
if len(messages) == 0:
|
||||
return
|
||||
|
||||
# only disable the cache for the last message(our new prompt), so
|
||||
# we can have a cache read for rest of the conversation.
|
||||
content = messages[-1]['content']
|
||||
|
||||
if content is not None:
|
||||
if isinstance(content, list):
|
||||
content = content[-1]
|
||||
if isinstance(content, dict) and content.get('cache_control') is not None:
|
||||
content['cache_control'] = None
|
||||
|
||||
@abstractmethod
|
||||
def createCondensationPrompt(
|
||||
self, events: list[Event], state: State, base_messages: list[Message]
|
||||
) -> Message:
|
||||
"""Create the prompt for condensation.
|
||||
|
||||
Args:
|
||||
events: The events to condense
|
||||
state: The current state
|
||||
messages: the messages that are already in the prompt(cached)
|
||||
|
||||
Returns:
|
||||
The message with condensation instructions
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def processResponse(
|
||||
self, events: list[Event], state: State, response: Any, messages: list[Message]
|
||||
) -> Condensation | View:
|
||||
"""Process the LLM response to create a Condensation.
|
||||
|
||||
Args:
|
||||
events: The events that were condensed
|
||||
state: The current state
|
||||
response: The LLM response
|
||||
messages: The messages that were already in the prompt(cached)
|
||||
|
||||
Returns:
|
||||
A Condensation or View object
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def should_condense(self, view: View) -> bool:
|
||||
"""Determine if a view should be condensed.
|
||||
|
||||
Args:
|
||||
view: The view to check
|
||||
|
||||
Returns:
|
||||
True if the view should be condensed, False otherwise
|
||||
"""
|
||||
pass
|
||||
@@ -0,0 +1,217 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config.condenser_config import LLMAgentCacheCondenserConfig
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.message import Message, TextContent
|
||||
from openhands.core.schema.action import ActionType
|
||||
from openhands.events.action.agent import CondensationAction
|
||||
from openhands.events.action.message import MessageAction
|
||||
from openhands.events.event import Event, EventSource
|
||||
from openhands.memory.condenser.condenser import Condensation, View
|
||||
from openhands.memory.condenser.impl.caching_condenser import CachingCondenser
|
||||
|
||||
|
||||
class LLMAgentCacheCondenser(CachingCondenser):
|
||||
"""A version of LLMSummarizingCondenser that uses a caching."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
max_size: int = 100,
|
||||
trigger_word: str = 'CONDENSE!',
|
||||
keep_user_messages: bool = False,
|
||||
keep_first: int = 1,
|
||||
):
|
||||
"""Initialize the condenser.
|
||||
Args:
|
||||
max_size: Maximum number of events before condensation is triggered
|
||||
trigger_word: Word that triggers condensation when found in user messages
|
||||
keep_first: Number of initial events to always retain
|
||||
"""
|
||||
if keep_first >= max_size:
|
||||
raise ValueError(
|
||||
f'keep_first ({keep_first}) must be less than max_size ({max_size})'
|
||||
)
|
||||
if keep_first < 0:
|
||||
raise ValueError(f'keep_first ({keep_first}) cannot be negative')
|
||||
|
||||
self.keep_first = keep_first
|
||||
self.max_size = max_size
|
||||
self.trigger_word = trigger_word
|
||||
self.keep_user_messages = keep_user_messages
|
||||
super().__init__()
|
||||
|
||||
def createCondensationPrompt(
|
||||
self, events: list[Event], state: State, base_messages: list[Message]
|
||||
) -> Message:
|
||||
"""Create the prompt for condensation using a similar approach to LLMSummarizingCondenser.
|
||||
This method is required by the CachingCondenser abstract base class.
|
||||
Args:
|
||||
events: The events to condense
|
||||
state: The current state
|
||||
base_messages: The messages that are already in the prompt (cached)
|
||||
Returns:
|
||||
The message with condensation instructions
|
||||
"""
|
||||
# Create the condensation instructions similar to LLMSummarizingCondenser
|
||||
prompt = """You are maintaining a context-aware state summary for an interactive agent.
|
||||
The whole conversation above will be removed from the context window. Therefore you need to track:
|
||||
|
||||
USER_CONTEXT: (Preserve essential user requirements, goals, and clarifications in concise form)
|
||||
|
||||
COMPLETED: (Tasks completed so far, with brief results)
|
||||
PENDING: (Tasks that still need to be done)
|
||||
CURRENT_STATE: (Current variables, data structures, or relevant state)
|
||||
|
||||
For code-specific tasks, also include:
|
||||
CODE_STATE: {File paths, function signatures, data structures}
|
||||
TESTS: {Failing cases, error messages, outputs}
|
||||
CHANGES: {Code edits, variable updates}
|
||||
DEPS: {Dependencies, imports, external calls}
|
||||
VERSION_CONTROL_STATUS: {Repository state, current branch, PR status, commit history}
|
||||
|
||||
PRIORITIZE:
|
||||
1. Adapt tracking format to match the actual task type
|
||||
2. Capture key user requirements and goals
|
||||
3. Distinguish between completed and pending tasks
|
||||
4. Keep all sections concise and relevant
|
||||
|
||||
SKIP: Tracking irrelevant details for the current task type
|
||||
|
||||
Example formats:
|
||||
|
||||
For code tasks:
|
||||
USER_CONTEXT: Fix FITS card float representation issue
|
||||
COMPLETED: Modified mod_float() in card.py, all tests passing
|
||||
PENDING: Create PR, update documentation
|
||||
CODE_STATE: mod_float() in card.py updated
|
||||
TESTS: test_format() passed
|
||||
CHANGES: str(val) replaces f"{val:.16G}"
|
||||
DEPS: None modified
|
||||
VERSION_CONTROL_STATUS: Branch: fix-float-precision, Latest commit: a1b2c3d
|
||||
|
||||
For other tasks:
|
||||
USER_CONTEXT: Write 20 haikus based on coin flip results
|
||||
COMPLETED: 15 haikus written for results [T,H,T,H,T,H,T,T,H,T,H,T,H,T,H]
|
||||
PENDING: 5 more haikus needed
|
||||
CURRENT_STATE: Last flip: Heads, Haiku count: 15/20"""
|
||||
|
||||
prompt += '\n\n'
|
||||
|
||||
# Create a message with the condensation instructions
|
||||
return Message(
|
||||
role='user',
|
||||
content=[TextContent(text=prompt)],
|
||||
)
|
||||
|
||||
def processResponse(
|
||||
self, events: list[Event], state: State, response: Any, messages: list[Message]
|
||||
) -> Condensation | View:
|
||||
# Extract the summary from the response
|
||||
summary = response.choices[0].message.content
|
||||
|
||||
# Keep the first `keep_first` events (e.g., system messages)
|
||||
events_to_keep = events[: self.keep_first]
|
||||
events_to_forget = events[self.keep_first :]
|
||||
|
||||
# Ensure essential user messages are not forgotten
|
||||
if self.keep_user_messages:
|
||||
self._filter_user_messages_to_keep(events, events_to_forget)
|
||||
|
||||
# If we have events to forget, create a condensation
|
||||
if events_to_forget:
|
||||
forgotten_event_ids = [event.id for event in events_to_forget]
|
||||
|
||||
return Condensation(
|
||||
action=CondensationAction(
|
||||
forgotten_event_ids=forgotten_event_ids, summary=summary
|
||||
)
|
||||
)
|
||||
else:
|
||||
return View(events=events_to_keep + events_to_forget)
|
||||
|
||||
def should_condense(self, view: View) -> bool:
|
||||
"""Determine if the view should be condensed.
|
||||
Condensation is triggered in two cases:
|
||||
1. When the number of events exceeds max_size
|
||||
2. When the last event is from the user and contains the trigger word
|
||||
Args:
|
||||
view: The view to check
|
||||
Returns:
|
||||
True if the view should be condensed, False otherwise
|
||||
"""
|
||||
events = view.events
|
||||
|
||||
# Check if the number of events exceeds max_size
|
||||
if len(events) > self.max_size:
|
||||
logger.info(f'Condensing events due to max size({self.max_size}) limit.')
|
||||
return True
|
||||
|
||||
# Check if any recent user message contains the trigger word
|
||||
if self._contains_trigger_word(events):
|
||||
logger.info(f"Condensing events due to trigger word '{self.trigger_word}'.")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _contains_trigger_word(self, events: list[Event]) -> bool:
|
||||
"""Check if the most recent user message contains the trigger word.
|
||||
Args:
|
||||
events: The events to check
|
||||
Returns:
|
||||
True if the most recent user message contains the trigger word, False otherwise
|
||||
"""
|
||||
if not events or len(events) < 2: # Need at least 2 events to condense
|
||||
return False
|
||||
|
||||
# Iterate through events in reverse order to find the last user message
|
||||
for event in reversed(events):
|
||||
if (
|
||||
hasattr(event, 'source')
|
||||
and event.source == EventSource.USER
|
||||
and hasattr(event, 'action')
|
||||
and event.action == ActionType.MESSAGE
|
||||
and event.message is not None
|
||||
):
|
||||
return self.trigger_word in event.message
|
||||
|
||||
# If we did a condensation, stop looking
|
||||
if hasattr(event, 'action') and event.action == ActionType.CONDENSATION:
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
def _filter_user_messages_to_keep(
|
||||
self, events: list[Event], events_to_forget: list[Event]
|
||||
) -> None:
|
||||
"""Ensure essential user messages are not forgotten."""
|
||||
user_events = [event for event in events if isinstance(event, MessageAction)]
|
||||
|
||||
# Always keep the first user message to maintain context
|
||||
first_user_message = next((event for event in user_events), None)
|
||||
if first_user_message and first_user_message in events_to_forget:
|
||||
events_to_forget.remove(first_user_message)
|
||||
|
||||
# Also keep the most recent user message if it's different from the first
|
||||
if len(user_events) > 1:
|
||||
last_user_message = user_events[-1]
|
||||
if (
|
||||
last_user_message != first_user_message
|
||||
and last_user_message in events_to_forget
|
||||
):
|
||||
events_to_forget.remove(last_user_message)
|
||||
|
||||
@classmethod
|
||||
def from_config(
|
||||
cls, config: LLMAgentCacheCondenserConfig
|
||||
) -> LLMAgentCacheCondenser:
|
||||
return LLMAgentCacheCondenser(
|
||||
max_size=config.max_size,
|
||||
trigger_word=config.trigger_word,
|
||||
keep_first=config.keep_first,
|
||||
)
|
||||
|
||||
|
||||
LLMAgentCacheCondenser.register_config(LLMAgentCacheCondenserConfig)
|
||||
@@ -3,6 +3,7 @@ from __future__ import annotations
|
||||
from litellm import supports_response_schema
|
||||
from pydantic import BaseModel
|
||||
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config.condenser_config import LLMAttentionCondenserConfig
|
||||
from openhands.events.action.agent import CondensationAction
|
||||
from openhands.llm.llm import LLM
|
||||
@@ -47,7 +48,7 @@ class LLMAttentionCondenser(RollingCondenser):
|
||||
|
||||
super().__init__()
|
||||
|
||||
def get_condensation(self, view: View) -> Condensation:
|
||||
def get_condensation(self, view: View, state: State, agent=None) -> Condensation:
|
||||
target_size = self.max_size // 2
|
||||
head_event_ids = [event.id for event in view.events[: self.keep_first]]
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config.condenser_config import LLMSummarizingCondenserConfig
|
||||
from openhands.core.message import Message, TextContent
|
||||
from openhands.events.action.agent import CondensationAction
|
||||
@@ -48,7 +49,7 @@ class LLMSummarizingCondenser(RollingCondenser):
|
||||
"""Truncate the content to fit within the specified maximum event length."""
|
||||
return truncate_content(content, max_chars=self.max_event_length)
|
||||
|
||||
def get_condensation(self, view: View) -> Condensation:
|
||||
def get_condensation(self, view: View, state: State, agent=None) -> Condensation:
|
||||
head = view[: self.keep_first]
|
||||
target_size = self.max_size // 2
|
||||
# Number of events to keep from the tail -- target size, minus however many
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config.condenser_config import NoOpCondenserConfig
|
||||
from openhands.memory.condenser.condenser import Condensation, Condenser, View
|
||||
|
||||
@@ -7,7 +8,7 @@ from openhands.memory.condenser.condenser import Condensation, Condenser, View
|
||||
class NoOpCondenser(Condenser):
|
||||
"""A condenser that does nothing to the event sequence."""
|
||||
|
||||
def condense(self, view: View) -> View | Condensation:
|
||||
def condense(self, view: View, state: State, agent=None) -> View | Condensation:
|
||||
"""Returns the list of events unchanged."""
|
||||
return view
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config.condenser_config import ObservationMaskingCondenserConfig
|
||||
from openhands.events.event import Event
|
||||
from openhands.events.observation import Observation
|
||||
@@ -15,7 +16,7 @@ class ObservationMaskingCondenser(Condenser):
|
||||
|
||||
super().__init__()
|
||||
|
||||
def condense(self, view: View) -> View | Condensation:
|
||||
def condense(self, view: View, state: State, agent=None) -> View | Condensation:
|
||||
"""Replace the content of observations outside of the attention window with a placeholder."""
|
||||
results: list[Event] = []
|
||||
for i, event in enumerate(view):
|
||||
|
||||
@@ -30,10 +30,10 @@ class CondenserPipeline(Condenser):
|
||||
for condenser in self.condensers:
|
||||
condenser.write_metadata(state)
|
||||
|
||||
def condense(self, view: View) -> View | Condensation:
|
||||
def condense(self, view: View, state: State, agent=None) -> View | Condensation:
|
||||
result: View | Condensation = view
|
||||
for condenser in self.condensers:
|
||||
result = condenser.condense(result)
|
||||
result = condenser.condense(result, state, agent)
|
||||
if isinstance(result, Condensation):
|
||||
break
|
||||
return result
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config.condenser_config import RecentEventsCondenserConfig
|
||||
from openhands.memory.condenser.condenser import Condensation, Condenser, View
|
||||
|
||||
@@ -13,7 +14,7 @@ class RecentEventsCondenser(Condenser):
|
||||
|
||||
super().__init__()
|
||||
|
||||
def condense(self, view: View) -> View | Condensation:
|
||||
def condense(self, view: View, state: State, agent=None) -> View | Condensation:
|
||||
"""Keep only the most recent events (up to `max_events`)."""
|
||||
head = view[: self.keep_first]
|
||||
tail_length = max(0, self.max_events - len(head))
|
||||
|
||||
@@ -196,7 +196,7 @@ class StructuredSummaryCondenser(RollingCondenser):
|
||||
"""Truncate the content to fit within the specified maximum event length."""
|
||||
return truncate_content(content, max_chars=self.max_event_length)
|
||||
|
||||
def get_condensation(self, view: View) -> Condensation:
|
||||
def get_condensation(self, view: View, state=None, agent=None) -> Condensation:
|
||||
head = view[: self.keep_first]
|
||||
target_size = self.max_size // 2
|
||||
# Number of events to keep from the tail -- target size, minus however many
|
||||
|
||||
@@ -59,14 +59,16 @@ class View(BaseModel):
|
||||
# The relevant summary is always in the last condensation event (i.e., the most recent one).
|
||||
for event in reversed(events):
|
||||
if isinstance(event, CondensationAction):
|
||||
if event.summary is not None and event.summary_offset is not None:
|
||||
if event.summary is not None:
|
||||
summary = event.summary
|
||||
summary_offset = event.summary_offset
|
||||
break
|
||||
|
||||
if summary is not None and summary_offset is not None:
|
||||
kept_events.insert(
|
||||
summary_offset, AgentCondensationObservation(content=summary)
|
||||
)
|
||||
if summary is not None:
|
||||
summary_obs = AgentCondensationObservation(content=summary)
|
||||
if summary_offset is not None:
|
||||
kept_events.insert(summary_offset, summary_obs)
|
||||
else:
|
||||
kept_events.append(summary_obs)
|
||||
|
||||
return View(events=kept_events)
|
||||
|
||||
@@ -10,7 +10,7 @@ from openhands.core.config import AppConfig
|
||||
from openhands.core.config.condenser_config import (
|
||||
BrowserOutputCondenserConfig,
|
||||
CondenserPipelineConfig,
|
||||
LLMSummarizingCondenserConfig,
|
||||
LLMAgentCacheCondenserConfig,
|
||||
)
|
||||
from openhands.core.logger import OpenHandsLoggerAdapter
|
||||
from openhands.core.schema import AgentState
|
||||
@@ -138,8 +138,10 @@ class Session:
|
||||
default_condenser_config = CondenserPipelineConfig(
|
||||
condensers=[
|
||||
BrowserOutputCondenserConfig(),
|
||||
LLMSummarizingCondenserConfig(
|
||||
llm_config=llm.config, keep_first=4, max_size=80
|
||||
LLMAgentCacheCondenserConfig(
|
||||
max_size=100, # Default max size
|
||||
trigger_word='CONDENSE!', # Default trigger word
|
||||
keep_first=4,
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -341,7 +341,7 @@ def test_mismatched_tool_call_events_and_auto_add_system_message(
|
||||
# 2. The action message
|
||||
# 3. The observation message
|
||||
mock_state.history = [action, observation]
|
||||
messages = agent._get_messages(mock_state.history)
|
||||
messages = agent.get_messages(mock_state.history)
|
||||
assert len(messages) == 3
|
||||
assert messages[0].role == 'system' # First message should be the system message
|
||||
assert messages[1].role == 'assistant' # Second message should be the action
|
||||
@@ -349,21 +349,21 @@ def test_mismatched_tool_call_events_and_auto_add_system_message(
|
||||
|
||||
# The same should hold if the events are presented out-of-order
|
||||
mock_state.history = [observation, action]
|
||||
messages = agent._get_messages(mock_state.history)
|
||||
messages = agent.get_messages(mock_state.history)
|
||||
assert len(messages) == 3
|
||||
assert messages[0].role == 'system' # First message should be the system message
|
||||
|
||||
# If only one of the two events is present, then we should just get the system message
|
||||
# plus any valid message from the event
|
||||
mock_state.history = [action]
|
||||
messages = agent._get_messages(mock_state.history)
|
||||
messages = agent.get_messages(mock_state.history)
|
||||
assert (
|
||||
len(messages) == 1
|
||||
) # Only system message, action is waiting for its observation
|
||||
assert messages[0].role == 'system'
|
||||
|
||||
mock_state.history = [observation]
|
||||
messages = agent._get_messages(mock_state.history)
|
||||
messages = agent.get_messages(mock_state.history)
|
||||
assert len(messages) == 1 # Only system message, observation has no matching action
|
||||
assert messages[0].role == 'system'
|
||||
|
||||
|
||||
@@ -0,0 +1,612 @@
|
||||
from typing import cast
|
||||
from unittest.mock import MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config.agent_config import AgentConfig
|
||||
from openhands.core.config.llm_config import LLMConfig
|
||||
from openhands.core.message import Message
|
||||
from openhands.events.action.agent import ChangeAgentStateAction, RecallAction
|
||||
from openhands.events.action.files import FileReadAction
|
||||
from openhands.events.action.message import MessageAction, SystemMessageAction
|
||||
from openhands.events.event import Event, EventSource, RecallType
|
||||
from openhands.events.observation.agent import (
|
||||
RecallObservation,
|
||||
)
|
||||
from openhands.events.observation.files import FileReadObservation
|
||||
from openhands.llm import LLM
|
||||
from openhands.llm.metrics import Metrics
|
||||
from openhands.memory.condenser.condenser import Condensation, View
|
||||
from openhands.memory.condenser.impl.llm_agent_cache_condenser import (
|
||||
LLMAgentCacheCondenser,
|
||||
)
|
||||
|
||||
|
||||
def format_messages_for_llm(messages: Message | list[Message]) -> list[dict]:
|
||||
if isinstance(messages, Message):
|
||||
messages = [messages]
|
||||
return [message.model_dump() for message in messages]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def agent() -> CodeActAgent:
|
||||
config = AgentConfig()
|
||||
agent = CodeActAgent(llm=LLM(LLMConfig()), config=config)
|
||||
agent.llm = Mock(LLM)
|
||||
agent.llm.config = Mock()
|
||||
agent.llm.config.max_message_chars = 1000
|
||||
agent.llm.is_caching_prompt_active.return_value = True
|
||||
agent.llm.format_messages_for_llm = format_messages_for_llm
|
||||
agent.llm.metrics = Metrics()
|
||||
return agent
|
||||
|
||||
|
||||
def set_next_llm_response(agent, response: str):
|
||||
"""Set the next LLM response for the given agent."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = response
|
||||
agent.llm.completion.return_value = mock_response
|
||||
|
||||
|
||||
def test_contains_trigger_word():
|
||||
"""Test that the containsTriggerWord method correctly identifies the CONDENSE! keyword."""
|
||||
# Create the condenser
|
||||
condenser = LLMAgentCacheCondenser(max_size=10)
|
||||
|
||||
# Test case 1: Empty events list
|
||||
assert not condenser._contains_trigger_word([])
|
||||
|
||||
# Test case 2: Single event (not enough events)
|
||||
event = MessageAction('Please CONDENSE! the conversation history.')
|
||||
assert not condenser._contains_trigger_word([event])
|
||||
|
||||
# Test case 3: User message with CONDENSE! keyword
|
||||
user_event = MessageAction('Please CONDENSE! the conversation history.')
|
||||
user_event._source = 'user' # type: ignore [attr-defined]
|
||||
agent_event = MessageAction('Agent response')
|
||||
agent_event._source = 'agent' # type: ignore [attr-defined]
|
||||
assert condenser._contains_trigger_word([user_event, agent_event])
|
||||
|
||||
# Test case 4: User message without CONDENSE! keyword
|
||||
user_event.content = 'Please summarize the conversation history.'
|
||||
assert not condenser._contains_trigger_word([user_event, agent_event])
|
||||
|
||||
# Test case 5: RecallObservation followed by user message with CONDENSE! keyword
|
||||
user_event.content = 'Please CONDENSE! the conversation history.'
|
||||
recall_event = RecallObservation(
|
||||
recall_type=RecallType.KNOWLEDGE, content='saw a thing'
|
||||
)
|
||||
events = [agent_event, user_event, recall_event]
|
||||
assert condenser._contains_trigger_word(events)
|
||||
|
||||
# Test case 6: Multiple user messages, only the most recent one matters
|
||||
user_event1 = MessageAction('First message without keyword')
|
||||
user_event1._source = 'user' # type: ignore [attr-defined]
|
||||
user_event2 = MessageAction('Please CONDENSE! the conversation history.')
|
||||
user_event2._source = 'user' # type: ignore [attr-defined]
|
||||
events = [user_event1, agent_event, user_event2]
|
||||
assert condenser._contains_trigger_word(events)
|
||||
|
||||
# Test case 7: Multiple user messages, most recent one doesn't have keyword
|
||||
events = [user_event2, agent_event, user_event1]
|
||||
assert not condenser._contains_trigger_word(events)
|
||||
|
||||
|
||||
def test_no_condensation(agent: CodeActAgent):
|
||||
"""Test that the LLMAgentCacheCondenser returns a View when no condensation is needed."""
|
||||
condenser = LLMAgentCacheCondenser(max_size=10)
|
||||
|
||||
# Create real events
|
||||
events = [MessageAction(f'Message {i}') for i in range(5)]
|
||||
for i, event in enumerate(events):
|
||||
event._id = i # type: ignore [attr-defined]
|
||||
|
||||
state = State(history=cast(list[Event], events))
|
||||
|
||||
result = condenser.condensed_history(state, agent)
|
||||
|
||||
# Verify that a View is returned
|
||||
assert isinstance(result, View)
|
||||
assert len(result.events) == 5
|
||||
|
||||
|
||||
def test_condense(agent: CodeActAgent):
|
||||
"""Test that the condenser uses the LLM to condense events."""
|
||||
llm_summary = """
|
||||
USER_CONTEXT: Testing file read operations
|
||||
COMPLETED: Read 4 files with varying content
|
||||
PENDING: None
|
||||
CURRENT_STATE: Files read: 0.txt, 1.txt, 2.txt, 3.txt
|
||||
"""
|
||||
set_next_llm_response(agent, llm_summary)
|
||||
|
||||
condenser = LLMAgentCacheCondenser(max_size=5, keep_user_messages=True)
|
||||
agent.condenser = condenser
|
||||
|
||||
system_message = SystemMessageAction(content='System Message')
|
||||
system_message._source = EventSource.AGENT # type: ignore
|
||||
user_message = MessageAction('User message')
|
||||
user_message._source = EventSource.USER # type: ignore
|
||||
events = [system_message, user_message]
|
||||
events += [FileReadObservation(f'{i}.txt', 'content.' * i) for i in range(4)]
|
||||
assert len(events) == 6
|
||||
for i, event in enumerate(events):
|
||||
event._id = i + 1 # type: ignore [attr-defined]
|
||||
|
||||
result = condenser.condensed_history(
|
||||
State(history=cast(list[Event], events)), agent
|
||||
)
|
||||
|
||||
assert isinstance(result, Condensation)
|
||||
assert hasattr(result, 'action')
|
||||
# 1(system-prompt) is not forgotten
|
||||
# 2(user-message) is not forgotten
|
||||
assert result.action.forgotten_event_ids == [3, 4, 5, 6]
|
||||
assert result.action.summary == llm_summary
|
||||
assert result.action.summary_offset is None
|
||||
|
||||
|
||||
def test_llm_agent_cache_condenser_with_state_with_rewrite(agent: CodeActAgent):
|
||||
"""Test that the condenser correctly handles summaries."""
|
||||
set_next_llm_response(
|
||||
agent,
|
||||
"""
|
||||
USER_CONTEXT: File exploration task
|
||||
COMPLETED: Read 6 files with varying content
|
||||
PENDING: None
|
||||
CODE_STATE: Files read: 0.txt, 1.txt, 2.txt, 3.txt, 4.txt, 5.txt
|
||||
CHANGES: User asked about database schema and agent explained the tables and relationships.
|
||||
""",
|
||||
)
|
||||
|
||||
condenser = LLMAgentCacheCondenser(max_size=5)
|
||||
agent.condenser = condenser
|
||||
|
||||
events = [FileReadObservation(f'{i}.txt', 'content.' * i) for i in range(6)]
|
||||
for i, event in enumerate(events):
|
||||
event._id = i # type: ignore [attr-defined]
|
||||
|
||||
state = State(history=cast(list[Event], events))
|
||||
|
||||
result = condenser.condensed_history(state, agent)
|
||||
|
||||
# Verify that a Condensation is returned with a summary
|
||||
assert isinstance(result, Condensation)
|
||||
assert hasattr(result, 'action')
|
||||
assert result.action.summary is not None
|
||||
assert 'User asked about database schema' in result.action.summary
|
||||
|
||||
|
||||
def test_should_condense_max_size():
|
||||
"""Test that the LLMAgentCacheCondenser correctly determines when to condense based on size."""
|
||||
condenser = LLMAgentCacheCondenser(max_size=10)
|
||||
|
||||
# Create mock events
|
||||
events_small = [MessageAction(f'Message {i}') for i in range(5)]
|
||||
events_large = [MessageAction(f'Message {i}') for i in range(11)]
|
||||
|
||||
# Test should_condense with small number of events
|
||||
assert not condenser.should_condense(View(events=events_small))
|
||||
|
||||
# Test should_condense with large number of events
|
||||
assert condenser.should_condense(View(events=events_large))
|
||||
|
||||
|
||||
def test_llm_agent_cache_condenser_simulated_mixed_condensation(agent: CodeActAgent):
|
||||
"""Test simulated condensation with a mix of messages and observations."""
|
||||
from tests.unit.testing_utils import create_tool_call_metadata
|
||||
|
||||
set_next_llm_response(
|
||||
agent,
|
||||
"""
|
||||
USER_CONTEXT: Mixed file and message operations
|
||||
COMPLETED: Processed 7 events (messages and file reads)
|
||||
PENDING: None
|
||||
CURRENT_STATE: Last message: Test message 6, Last file: 7.txt
|
||||
CHANGES: Summary <mention content of message 4,5>
|
||||
""",
|
||||
)
|
||||
|
||||
condenser = LLMAgentCacheCondenser(max_size=5)
|
||||
agent.condenser = condenser
|
||||
|
||||
events = []
|
||||
|
||||
for i in range(1, 8):
|
||||
if i % 2 == 0:
|
||||
# Create a FileReadAction with proper tool_call_metadata
|
||||
event = FileReadAction(f'{i}.txt')
|
||||
event._source = 'agent'
|
||||
|
||||
# Use the utility function to create tool_call_metadata
|
||||
event.tool_call_metadata = create_tool_call_metadata(
|
||||
tool_call_id=f'tool_call_{i}',
|
||||
model_response_id=f'model_response_{i}',
|
||||
function_name='str_replace_editor',
|
||||
)
|
||||
else:
|
||||
event = FileReadObservation(f'File content for event {i}', f'{i}.txt')
|
||||
|
||||
event._id = i # type: ignore [attr-defined]
|
||||
events.append(event)
|
||||
|
||||
state = State(history=cast(list[Event], events))
|
||||
result = condenser.condensed_history(state, agent)
|
||||
|
||||
# Verify that a Condensation is returned
|
||||
assert isinstance(result, Condensation)
|
||||
assert len(result.action.forgotten_event_ids) > 0
|
||||
# Check that the summary contains the expected content
|
||||
assert 'Mixed file and message operations' in result.action.summary
|
||||
|
||||
|
||||
def test_llm_agent_cache_condenser_always_keep_system_prompt(agent: CodeActAgent):
|
||||
"""Test that the system prompt is preserved in the final messages."""
|
||||
set_next_llm_response(
|
||||
agent,
|
||||
"""
|
||||
USER_CONTEXT: Simple greeting exchange
|
||||
COMPLETED: User greeted agent, agent responded
|
||||
PENDING: None
|
||||
CURRENT_STATE: Conversation in progress
|
||||
""",
|
||||
)
|
||||
|
||||
# Create a condenser with a small max_size to ensure condensation
|
||||
# but large enough to not trigger again after adding the condensation action
|
||||
condenser = LLMAgentCacheCondenser(max_size=5)
|
||||
agent.condenser = condenser
|
||||
|
||||
# Create a lot of events to ensure we exceed max_size
|
||||
events = []
|
||||
for i in range(10):
|
||||
event = MessageAction(f'Message {i}')
|
||||
event._source = 'user' if i % 2 == 0 else 'agent' # type: ignore [attr-defined]
|
||||
event._id = i + 1 # type: ignore [attr-defined]
|
||||
events.append(event)
|
||||
|
||||
state = State(history=cast(list[Event], events))
|
||||
|
||||
result = condenser.condensed_history(state, agent)
|
||||
|
||||
# Verify that a Condensation is returned
|
||||
assert isinstance(result, Condensation)
|
||||
result.action._id = 20 # type: ignore [attr-defined]
|
||||
|
||||
# Create a new state with just a few events and the condensation action
|
||||
# to avoid triggering condensation again
|
||||
new_state = State(
|
||||
history=[
|
||||
events[-1], # Keep the last event
|
||||
result.action, # Add the condensation action
|
||||
]
|
||||
)
|
||||
|
||||
view = condenser.condensed_history(new_state, agent)
|
||||
assert isinstance(view, View)
|
||||
|
||||
# Check that the system prompt is preserved in the messages
|
||||
messages = agent.get_messages(view.events)
|
||||
assert messages[0].role == 'system'
|
||||
assert 'You are OpenHands' in messages[0].content[0].text
|
||||
|
||||
|
||||
def test_llm_agent_cache_condenser_first_message_user_message(agent: CodeActAgent):
|
||||
"""Test that at least one user message is preserved."""
|
||||
# Create a condenser with a small max_size to ensure condensation
|
||||
# but large enough to not trigger again after adding the condensation action
|
||||
condenser = LLMAgentCacheCondenser(max_size=5, keep_user_messages=True)
|
||||
agent.condenser = condenser
|
||||
|
||||
# Create events with only one user message
|
||||
user_message = MessageAction('Hello, how are you?')
|
||||
user_message._source = 'user' # type: ignore [attr-defined]
|
||||
user_message._id = 1 # type: ignore [attr-defined]
|
||||
|
||||
# Add many agent messages to exceed max_size
|
||||
events = [user_message]
|
||||
for i in range(10):
|
||||
event = MessageAction(f'Agent response {i}')
|
||||
event._source = 'agent' # type: ignore [attr-defined]
|
||||
event._id = i + 2 # type: ignore [attr-defined]
|
||||
events.append(event)
|
||||
|
||||
state = State(history=cast(list[Event], events))
|
||||
|
||||
set_next_llm_response(
|
||||
agent,
|
||||
"""
|
||||
USER_CONTEXT: Initial greeting
|
||||
COMPLETED: User said hello, agent responded
|
||||
PENDING: None
|
||||
CURRENT_STATE: Conversation started
|
||||
""",
|
||||
)
|
||||
|
||||
result = condenser.condensed_history(state, agent)
|
||||
|
||||
# Verify that a Condensation is returned
|
||||
assert isinstance(result, Condensation)
|
||||
result.action._id = 20 # type: ignore [attr-defined]
|
||||
|
||||
# Create a new state with just the user message and the condensation action
|
||||
# to avoid triggering condensation again
|
||||
new_state = State(
|
||||
history=[
|
||||
user_message, # Keep the user message
|
||||
result.action, # Add the condensation action
|
||||
]
|
||||
)
|
||||
|
||||
view = condenser.condensed_history(new_state, agent)
|
||||
assert isinstance(view, View)
|
||||
|
||||
# Check that at least one user message is preserved in the view
|
||||
user_messages = [
|
||||
event
|
||||
for event in view.events
|
||||
if hasattr(event, '_source') and event._source == 'user'
|
||||
]
|
||||
assert len(user_messages) > 0
|
||||
|
||||
# Check that the system prompt is preserved in the messages
|
||||
messages = agent.get_messages(view.events)
|
||||
assert messages[0].role == 'system'
|
||||
assert 'You are OpenHands' in messages[0].content[0].text
|
||||
|
||||
|
||||
def test_llm_agent_cache_condenser_full_rewrite(agent: CodeActAgent):
|
||||
"""Test a complete condensation of the conversation."""
|
||||
# Create a condenser with a small max_size to ensure condensation
|
||||
# but large enough to not trigger again after adding the condensation action
|
||||
condenser = LLMAgentCacheCondenser(max_size=5)
|
||||
agent.condenser = condenser
|
||||
|
||||
# Create many events to exceed max_size
|
||||
events = []
|
||||
for i in range(10):
|
||||
event = MessageAction(f'Message {i}')
|
||||
event._source = 'user' if i % 2 == 0 else 'agent' # type: ignore [attr-defined]
|
||||
event._id = i + 1 # type: ignore [attr-defined]
|
||||
events.append(event)
|
||||
|
||||
state = State(history=cast(list[Event], events))
|
||||
|
||||
set_next_llm_response(
|
||||
agent,
|
||||
"""
|
||||
USER_CONTEXT: Simple greeting
|
||||
COMPLETED: User and AI greeted each other
|
||||
PENDING: None
|
||||
CURRENT_STATE: Conversation initialized
|
||||
""",
|
||||
)
|
||||
|
||||
result = condenser.condensed_history(state, agent)
|
||||
|
||||
# Verify that a Condensation is returned
|
||||
assert isinstance(result, Condensation)
|
||||
result.action._id = 20 # type: ignore [attr-defined]
|
||||
|
||||
# Check that we've forgotten some events
|
||||
assert len(result.action.forgotten_event_ids) > 0
|
||||
|
||||
# Check that the summary contains the greeting information
|
||||
assert 'User and AI greeted each other' in result.action.summary
|
||||
|
||||
# Create a new state with just the condensation action
|
||||
# to avoid triggering condensation again
|
||||
new_state = State(history=[result.action])
|
||||
|
||||
view = condenser.condensed_history(new_state, agent)
|
||||
assert isinstance(view, View)
|
||||
|
||||
# Check that the condensation action is in the view
|
||||
assert result.action in view.events
|
||||
|
||||
# Check that the system prompt is preserved in the messages
|
||||
messages = agent.get_messages(view.events)
|
||||
assert messages[0].role == 'system'
|
||||
assert 'You are OpenHands' in messages[0].content[0].text
|
||||
|
||||
|
||||
def test_condensation_triggered_by_user_message_in_context(agent):
|
||||
"""Test that the user message triggering condensation is part of the context passed to the LLM."""
|
||||
condenser = LLMAgentCacheCondenser(trigger_word='CONDENSE!', max_size=500)
|
||||
agent.condenser = condenser
|
||||
|
||||
# Create events with a user message containing a goal
|
||||
user_message_goal = MessageAction('I want you to do some things for me.')
|
||||
user_message_goal._source = 'user' # type: ignore [attr-defined]
|
||||
user_message_goal._id = 1 # type: ignore [attr-defined]
|
||||
|
||||
# Add agent messages
|
||||
agent_messages = []
|
||||
for i in range(3):
|
||||
event = MessageAction(f'Agent response {i}')
|
||||
event._source = 'agent' # type: ignore [attr-defined]
|
||||
event._id = i + 2 # type: ignore [attr-defined]
|
||||
agent_messages.append(event)
|
||||
|
||||
# Add a user message containing the trigger word
|
||||
user_message_trigger = MessageAction('Please CONDENSE! the conversation history.')
|
||||
user_message_trigger._source = 'user' # type: ignore [attr-defined]
|
||||
user_message_trigger._id = 5 # type: ignore [attr-defined]
|
||||
|
||||
# Combine all events
|
||||
events = [user_message_goal] + agent_messages + [user_message_trigger]
|
||||
|
||||
state = State(history=cast(list[Event], events))
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = """
|
||||
USER_CONTEXT: Simple greeting
|
||||
COMPLETED: User and AI greeted each other
|
||||
PENDING: None
|
||||
CURRENT_STATE: Conversation initialized
|
||||
"""
|
||||
|
||||
with patch.object(
|
||||
agent.llm, 'completion', return_value=mock_response
|
||||
) as mock_completion:
|
||||
# Perform condensation
|
||||
condenser.condensed_history(state, agent)
|
||||
|
||||
# Verify that the LLM completion was called
|
||||
mock_completion.assert_called_once()
|
||||
|
||||
# Extract the parameters passed to the LLM
|
||||
params = mock_completion.call_args[1]
|
||||
messages = params.get('messages', [])
|
||||
|
||||
# Check that both the first user message and the trigger message are part of the context
|
||||
# First, check for the initial user message with the goal
|
||||
assert any(
|
||||
'I want you to do some things for me.' in message['content']
|
||||
for message in messages
|
||||
), 'First user message should be preserved in the context'
|
||||
|
||||
# Then, check for the trigger message
|
||||
assert any(
|
||||
'Please CONDENSE! the conversation history.' in message['content']
|
||||
for message in messages
|
||||
), 'Trigger message should be included in the context'
|
||||
|
||||
|
||||
def test_condensation_with_followup_events(agent):
|
||||
"""Test that the user message triggering condensation and follow-up events are part of the context passed to the LLM."""
|
||||
condenser = LLMAgentCacheCondenser(
|
||||
trigger_word='CONDENSE!', max_size=500, keep_user_messages=True
|
||||
)
|
||||
agent.condenser = condenser
|
||||
|
||||
# Create events with a user message containing a goal
|
||||
user_message_goal = MessageAction('I want you to do some things for me.')
|
||||
user_message_goal._source = EventSource.USER # type: ignore [attr-defined]
|
||||
user_message_goal._id = 1 # type: ignore [attr-defined]
|
||||
|
||||
# Add agent messages
|
||||
agent_messages = []
|
||||
for i in range(3):
|
||||
event = MessageAction(f'Agent response {i}')
|
||||
event._source = EventSource.AGENT # type: ignore [attr-defined]
|
||||
event._id = i + 2 # type: ignore [attr-defined]
|
||||
agent_messages.append(event)
|
||||
|
||||
# Add a user message containing the trigger word
|
||||
user_message_trigger = MessageAction('Please CONDENSE! the conversation history.')
|
||||
user_message_trigger._source = EventSource.USER # type: ignore [attr-defined]
|
||||
user_message_trigger._id = 5 # type: ignore [attr-defined]
|
||||
|
||||
# Add follow-up events
|
||||
followup_event_1 = ChangeAgentStateAction(
|
||||
agent_state='running',
|
||||
thought='',
|
||||
)
|
||||
followup_event_1._id = 6 # type: ignore [attr-defined]
|
||||
followup_event_1._source = EventSource.ENVIRONMENT # type: ignore [attr-defined]
|
||||
|
||||
followup_event_2 = RecallAction(
|
||||
recall_type=RecallType.WORKSPACE_CONTEXT,
|
||||
query='hi',
|
||||
thought='',
|
||||
)
|
||||
followup_event_2._id = 7 # type: ignore [attr-defined]
|
||||
followup_event_2._source = EventSource.USER # type: ignore [attr-defined]
|
||||
|
||||
# Combine all events
|
||||
events = [
|
||||
user_message_goal,
|
||||
*agent_messages,
|
||||
user_message_trigger,
|
||||
followup_event_1,
|
||||
followup_event_2,
|
||||
]
|
||||
|
||||
state = State(history=cast(list[Event], events))
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = """
|
||||
USER_CONTEXT: Simple greeting
|
||||
COMPLETED: User and AI greeted each other
|
||||
PENDING: None
|
||||
CURRENT_STATE: Conversation initialized
|
||||
"""
|
||||
|
||||
with patch.object(
|
||||
agent.llm, 'completion', return_value=mock_response
|
||||
) as mock_completion:
|
||||
# Perform condensation
|
||||
condensation = condenser.condensed_history(state, agent)
|
||||
|
||||
# Verify that the LLM completion was called
|
||||
mock_completion.assert_called_once()
|
||||
|
||||
# Extract the parameters passed to the LLM
|
||||
params = mock_completion.call_args[1]
|
||||
messages = params.get('messages', [])
|
||||
|
||||
# Check that the trigger message is included in the context
|
||||
assert any(
|
||||
'Please CONDENSE! the conversation history.' in message['content']
|
||||
for message in messages
|
||||
), 'Trigger message should be included in the context'
|
||||
|
||||
assert isinstance(condensation, Condensation)
|
||||
assert hasattr(condensation, 'action')
|
||||
# only agent messages forgotten
|
||||
assert condensation.action.forgotten_event_ids == [
|
||||
e.id for e in agent_messages + [followup_event_1, followup_event_2]
|
||||
]
|
||||
assert condensation.action.summary == mock_response.choices[0].message.content
|
||||
assert condensation.action.summary_offset is None
|
||||
|
||||
|
||||
def test_keep_first_functionality(agent: CodeActAgent):
|
||||
"""Test that the LLMAgentCacheCondenser keeps the first `keep_first` events."""
|
||||
condenser = LLMAgentCacheCondenser(max_size=5, keep_first=2)
|
||||
agent.condenser = condenser
|
||||
|
||||
# Create events exceeding max_size
|
||||
events = []
|
||||
for i in range(10):
|
||||
event = MessageAction(f'Message {i}')
|
||||
event._source = 'user' if i % 2 == 0 else 'agent' # type: ignore [attr-defined]
|
||||
event._id = i + 1 # type: ignore [attr-defined]
|
||||
events.append(event)
|
||||
|
||||
state = State(history=cast(list[Event], events))
|
||||
|
||||
set_next_llm_response(
|
||||
agent,
|
||||
"""
|
||||
USER_CONTEXT: Simple greeting
|
||||
COMPLETED: User and AI greeted each other
|
||||
PENDING: None
|
||||
CURRENT_STATE: Conversation initialized
|
||||
""",
|
||||
)
|
||||
|
||||
result = condenser.condensed_history(state, agent)
|
||||
|
||||
# Verify that a Condensation is returned
|
||||
assert isinstance(result, Condensation)
|
||||
result.action._id = 20 # type: ignore [attr-defined]
|
||||
|
||||
# Check that the first `keep_first` events are preserved
|
||||
preserved_event_ids = [event._id for event in events[:2]] # type: ignore [attr-defined]
|
||||
assert (
|
||||
all(
|
||||
event_id in preserved_event_ids
|
||||
for event_id in result.action.forgotten_event_ids
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
||||
# Check that the summary contains the greeting information
|
||||
assert 'User and AI greeted each other' in result.action.summary
|
||||
@@ -5,6 +5,7 @@ from openhands.core.message_utils import (
|
||||
from openhands.events.event import Event
|
||||
from openhands.events.tool import ToolCallMetadata
|
||||
from openhands.llm.metrics import Metrics, TokenUsage
|
||||
from tests.unit.testing_utils import create_tool_call_metadata
|
||||
|
||||
|
||||
def test_get_token_usage_for_event():
|
||||
@@ -28,15 +29,14 @@ def test_get_token_usage_for_event():
|
||||
|
||||
# Create an event referencing that response_id
|
||||
event = Event()
|
||||
mock_tool_call_metadata = ToolCallMetadata(
|
||||
# Use our utility function to create tool_call_metadata
|
||||
mock_tool_call_metadata = create_tool_call_metadata(
|
||||
tool_call_id='test-tool-call',
|
||||
function_name='fake_function',
|
||||
model_response={'id': 'test-response-id'},
|
||||
model_response_id='test-response-id',
|
||||
total_calls_in_response=1,
|
||||
)
|
||||
event._tool_call_metadata = (
|
||||
mock_tool_call_metadata # normally you'd do event.tool_call_metadata = ...
|
||||
)
|
||||
event._tool_call_metadata = mock_tool_call_metadata
|
||||
|
||||
# We should find that usage record
|
||||
found = get_token_usage_for_event(event, metrics)
|
||||
@@ -45,7 +45,14 @@ def test_get_token_usage_for_event():
|
||||
assert found.response_id == 'test-response-id'
|
||||
|
||||
# If we change the event's response ID, we won't find anything
|
||||
mock_tool_call_metadata.model_response.id = 'some-other-id'
|
||||
# Create a new tool_call_metadata with a different response ID
|
||||
mock_tool_call_metadata = create_tool_call_metadata(
|
||||
tool_call_id='test-tool-call',
|
||||
function_name='fake_function',
|
||||
model_response_id='some-other-id',
|
||||
total_calls_in_response=1,
|
||||
)
|
||||
event._tool_call_metadata = mock_tool_call_metadata
|
||||
found2 = get_token_usage_for_event(event, metrics)
|
||||
assert found2 is None
|
||||
|
||||
@@ -87,17 +94,17 @@ def test_get_token_usage_for_event_id():
|
||||
e._id = i
|
||||
# We'll attach usage_1 to event 1, usage_2 to event 3
|
||||
if i == 1:
|
||||
e._tool_call_metadata = ToolCallMetadata(
|
||||
e._tool_call_metadata = create_tool_call_metadata(
|
||||
tool_call_id='tid1',
|
||||
function_name='fn1',
|
||||
model_response={'id': 'resp-1'},
|
||||
model_response_id='resp-1',
|
||||
total_calls_in_response=1,
|
||||
)
|
||||
elif i == 3:
|
||||
e._tool_call_metadata = ToolCallMetadata(
|
||||
e._tool_call_metadata = create_tool_call_metadata(
|
||||
tool_call_id='tid2',
|
||||
function_name='fn2',
|
||||
model_response={'id': 'resp-2'},
|
||||
model_response_id='resp-2',
|
||||
total_calls_in_response=1,
|
||||
)
|
||||
events.append(e)
|
||||
@@ -141,10 +148,10 @@ def test_get_token_usage_for_event_fallback():
|
||||
|
||||
event = Event()
|
||||
# Provide some mismatched tool_call_metadata:
|
||||
event._tool_call_metadata = ToolCallMetadata(
|
||||
event._tool_call_metadata = create_tool_call_metadata(
|
||||
tool_call_id='irrelevant-tool-call',
|
||||
function_name='fake_function',
|
||||
model_response={'id': 'not-matching-any-usage'},
|
||||
model_response_id='not-matching-any-usage',
|
||||
total_calls_in_response=1,
|
||||
)
|
||||
# But also set event.response_id to the actual usage ID
|
||||
|
||||
@@ -52,7 +52,7 @@ def response_mock(content: str, tool_call_id: str):
|
||||
return ModelResponse(**MockModelResponse(content, tool_call_id).model_dump())
|
||||
|
||||
|
||||
def test_get_messages(codeact_agent: CodeActAgent):
|
||||
def testget_messages(codeact_agent: CodeActAgent):
|
||||
# Add some events to history
|
||||
history = list()
|
||||
# Add system message action
|
||||
@@ -76,7 +76,7 @@ def test_get_messages(codeact_agent: CodeActAgent):
|
||||
history.append(message_action_5)
|
||||
|
||||
codeact_agent.reset()
|
||||
messages = codeact_agent._get_messages(history)
|
||||
messages = codeact_agent.get_messages(history)
|
||||
|
||||
assert (
|
||||
len(messages) == 6
|
||||
@@ -99,7 +99,7 @@ def test_get_messages(codeact_agent: CodeActAgent):
|
||||
assert messages[5].content[0].cache_prompt
|
||||
|
||||
|
||||
def test_get_messages_prompt_caching(codeact_agent: CodeActAgent):
|
||||
def testget_messages_prompt_caching(codeact_agent: CodeActAgent):
|
||||
history = list()
|
||||
# Add system message action
|
||||
system_message_action = codeact_agent.get_system_message()
|
||||
@@ -115,7 +115,7 @@ def test_get_messages_prompt_caching(codeact_agent: CodeActAgent):
|
||||
history.append(message_action_agent)
|
||||
|
||||
codeact_agent.reset()
|
||||
messages = codeact_agent._get_messages(history)
|
||||
messages = codeact_agent.get_messages(history)
|
||||
|
||||
# Check that only the last two user messages have cache_prompt=True
|
||||
cached_user_messages = [
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
"""Utility functions for tests."""
|
||||
|
||||
from openhands.events.tool import ToolCallMetadata
|
||||
|
||||
|
||||
def create_tool_call_metadata(
|
||||
tool_call_id: str = 'tool_call_0',
|
||||
function_name: str = 'str_replace_editor',
|
||||
model_response_id: str = 'model_response_0',
|
||||
total_calls_in_response: int = 1,
|
||||
) -> ToolCallMetadata:
|
||||
"""
|
||||
Create a properly structured ToolCallMetadata object for testing.
|
||||
|
||||
This function creates a ToolCallMetadata object with a properly structured
|
||||
model_response dictionary that includes the necessary nested objects.
|
||||
|
||||
Args:
|
||||
tool_call_id: The ID of the tool call
|
||||
function_name: The name of the function being called
|
||||
model_response_id: The ID of the model response
|
||||
total_calls_in_response: The total number of calls in the response
|
||||
|
||||
Returns:
|
||||
A properly structured ToolCallMetadata object
|
||||
"""
|
||||
# Create a dictionary representation of the model response
|
||||
model_response = {
|
||||
'id': model_response_id,
|
||||
'choices': [
|
||||
{
|
||||
'message': {
|
||||
'role': 'assistant',
|
||||
'content': '',
|
||||
'tool_calls': [
|
||||
{
|
||||
'id': tool_call_id,
|
||||
'type': 'function',
|
||||
'function': {
|
||||
'name': function_name,
|
||||
'arguments': '{}', # Empty JSON object as string
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
return ToolCallMetadata(
|
||||
tool_call_id=tool_call_id,
|
||||
function_name=function_name,
|
||||
model_response=model_response,
|
||||
total_calls_in_response=total_calls_in_response,
|
||||
)
|
||||
Reference in New Issue
Block a user