add condenser as cli argument

Merge remote-tracking branch 'upstream/main' into condenser_experiment
keep_first=4, like LLMSummaryCondenser before
2026-04-29 03:00:45 -04:00 · 2025-04-30 15:41:47 +02:00 · 2025-04-29 08:36:26 +02:00 · 2025-04-24 17:15:01 +02:00 · 2025-04-24 13:03:31 +02:00 · 2025-04-24 12:58:11 +02:00
30 changed files with 1419 additions and 95 deletions
@@ -201,7 +201,6 @@ model = "gpt-4o"
 #native_tool_calling = None


-
 [llm.gpt4o-mini]
 api_key = ""
 model = "gpt-4o"
@@ -386,6 +385,16 @@ type = "noop"
 # Maximum size of history before triggering attention mechanism
 #max_size = 100

+# 7. LLM Agent Cache Condenser
+#type = "agentcache"
+# Maximum number of events before condensation is triggered
+#max_size = 100
+# Word that triggers condensation when found in user messages
+#trigger_word = "CONDENSE!"
+# Number of initial events to always keep (typically includes task description)
+#keep_first = 1
+# Note: This condenser should only be used for LLM models that make use of caching.
+
 # Example of a custom LLM configuration for condensers that require an LLM
 # If not provided, it falls back to the default LLM
 #[llm.condenser]
@@ -44,6 +44,8 @@ from openhands.core.config import (
    get_llm_config_arg,
    get_parser,
 )
+from openhands.core.config.utils import get_condenser_config_arg
+from openhands.core.config.condenser_config import NoOpCondenserConfig
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
 from openhands.critic import AgentFinishedCritic
@@ -744,6 +746,12 @@ if __name__ == '__main__':
        choices=['swe', 'swt', 'swt-ci'],
        help="mode to run the evaluation, either 'swe', 'swt', or 'swt-ci'",
    )
+    parser.add_argument(
+        '--condenser-config',
+        type=str,
+        default=None,
+        help='Name of the condenser config to use, e.g., "default_4_20" for [condenser.default_4_20] section in config.toml',
+    )
    args, _ = parser.parse_known_args()

    # NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
@@ -780,6 +788,18 @@ if __name__ == '__main__':
    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

+    condenser_config = None
+    if args.condenser_config:
+        condenser_config = get_condenser_config_arg(args.condenser_config)
+        if condenser_config is None:
+            raise ValueError(
+                f'Could not find Condenser config: --condenser-config {args.condenser_config}'
+            )
+    else:
+        # If no specific condenser config is provided via args, default to NoOpCondenser
+        condenser_config = NoOpCondenserConfig()
+        logger.warning('No Condenser config provided via --condenser-config, using NoOpCondenser.')
+
    details = {'mode': args.mode}
    _agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls)

@@ -794,6 +814,7 @@ if __name__ == '__main__':
        args.eval_note,
        args.eval_output_dir,
        details=details,
+        condenser_config=condenser_config,
    )

    output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
@@ -11,8 +11,10 @@ MAX_ITER=$5
 NUM_WORKERS=$6
 DATASET=$7
 SPLIT=$8
-N_RUNS=$9
-MODE=${10}
+CONDENSER_CONFIG=$9
+N_RUNS=${10}
+MODE=${11}
+

 if [ -z "$NUM_WORKERS" ]; then
  NUM_WORKERS=1
@@ -51,6 +53,12 @@ if [ -z "$MODE" ]; then
  echo "MODE not specified, use default $MODE"
 fi

+if [ -n "$CONDENSER_CONFIG" ]; then
+  echo "Using Condenser Config: $CONDENSER_CONFIG"
+else
+  echo "No Condenser Config provided, use default (NoOpCondenser)."
+fi
+
 export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
 echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"

@@ -91,7 +99,19 @@ fi

 function run_eval() {
  local eval_note="${1}"
-  COMMAND="poetry run python evaluation/benchmarks/swe_bench/run_infer.py \
+  local base_command="evaluation/benchmarks/swe_bench/run_infer.py"
+
+  # Check if DEBUG_EVAL environment variable is set to true
+  if [[ "${DEBUG_EVAL}" == "true" ]]; then
+    echo "Running in DEBUG mode with debugpy, listening on port 5678"
+    # Prepend DEBUG=true to set the environment variable for the python process
+    # Use standard debugpy port 5678
+    COMMAND="DEBUG=true poetry run debugpy --listen 0.0.0.0:5678 --wait-for-client $base_command"
+  else
+    COMMAND="poetry run python $base_command"
+  fi
+
+  COMMAND="$COMMAND \
    --agent-cls $AGENT \
    --llm-config $MODEL_CONFIG \
    --max-iterations $MAX_ITER \
@@ -101,6 +121,12 @@ function run_eval() {
    --split $SPLIT \
    --mode $MODE"

+  # Conditionally add the condenser config argument.
+  # The Python script defaults to NoOpCondenserConfig if this argument is not provided.
+  if [ -n "$CONDENSER_CONFIG" ]; then
+    COMMAND="$COMMAND --condenser-config $CONDENSER_CONFIG"
+  fi
+
  if [ -n "$EVAL_LIMIT" ]; then
    echo "EVAL_LIMIT: $EVAL_LIMIT"
    COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
@@ -1,6 +1,7 @@
 import copy
 import os
 from collections import deque
+from typing import Any

 from litellm import ChatCompletionToolParam

@@ -15,7 +16,7 @@ from openhands.agenthub.codeact_agent.tools.str_replace_editor import (
 )
 from openhands.agenthub.codeact_agent.tools.think import ThinkTool
 from openhands.agenthub.codeact_agent.tools.web_read import WebReadTool
-from openhands.controller.agent import Agent
+from openhands.controller.agent import Agent, LLMCompletionProvider
 from openhands.controller.state.state import State
 from openhands.core.config import AgentConfig
 from openhands.core.logger import openhands_logger as logger
@@ -37,7 +38,7 @@ from openhands.runtime.plugins import (
 from openhands.utils.prompt import PromptManager


-class CodeActAgent(Agent):
+class CodeActAgent(Agent, LLMCompletionProvider):
    VERSION = '2.2'
    """
    The Code Act Agent is a minimalist agent.
@@ -162,7 +163,7 @@ class CodeActAgent(Agent):
        # event we'll just return that instead of an action. The controller will
        # immediately ask the agent to step again with the new view.
        condensed_history: list[Event] = []
-        match self.condenser.condensed_history(state):
+        match self.condenser.condensed_history(state, self):
            case View(events=events):
                condensed_history = events

@@ -173,7 +174,19 @@ class CodeActAgent(Agent):
            f'Processing {len(condensed_history)} events from a total of {len(state.history)} events'
        )

-        messages = self._get_messages(condensed_history)
+        params = self.build_llm_completion_params(condensed_history, state)
+        response = self.llm.completion(**params)
+        logger.debug(f'Response from LLM: {response}')
+        actions = self.response_to_actions_fn(response)
+        logger.debug(f'Actions after response_to_actions: {actions}')
+        for action in actions:
+            self.pending_actions.append(action)
+        return self.pending_actions.popleft()
+
+    def build_llm_completion_params(
+        self, condensed_history: list[Event], state: State
+    ) -> dict[str, Any]:
+        messages = self.get_messages(condensed_history)
        params: dict = {
            'messages': self.llm.format_messages_for_llm(messages),
        }
@@ -208,15 +221,9 @@ class CodeActAgent(Agent):
            params['tools'] += unique_mcp_tools
        # log to litellm proxy if possible
        params['extra_body'] = {'metadata': state.to_llm_metadata(agent_name=self.name)}
-        response = self.llm.completion(**params)
-        logger.debug(f'Response from LLM: {response}')
-        actions = self.response_to_actions_fn(response)
-        logger.debug(f'Actions after response_to_actions: {actions}')
-        for action in actions:
-            self.pending_actions.append(action)
-        return self.pending_actions.popleft()
+        return params

-    def _get_messages(self, events: list[Event]) -> list[Message]:
+    def get_messages(self, events: list[Event]) -> list[Message]:
        """Constructs the message history for the LLM conversation.

        This method builds a structured conversation history by processing events from the state
@@ -1,10 +1,12 @@
 from __future__ import annotations

 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Type
+from typing import TYPE_CHECKING, Any, TypedDict
+
+from openhands.controller.state.state import State
+from openhands.core.message import Message

 if TYPE_CHECKING:
-    from openhands.controller.state.state import State
    from openhands.core.config import AgentConfig
    from openhands.events.action import Action
    from openhands.events.action.message import SystemMessageAction
@@ -13,7 +15,7 @@ from openhands.core.exceptions import (
    AgentNotRegisteredError,
 )
 from openhands.core.logger import openhands_logger as logger
-from openhands.events.event import EventSource
+from openhands.events.event import Event, EventSource
 from openhands.llm.llm import LLM
 from openhands.runtime.plugins import PluginRequirement

@@ -30,7 +32,7 @@ class Agent(ABC):
    It tracks the execution status and maintains a history of interactions.
    """

-    _registry: dict[str, Type['Agent']] = {}
+    _registry: dict[str, type['Agent']] = {}
    sandbox_plugins: list[PluginRequirement] = []

    def __init__(
@@ -46,8 +48,8 @@ class Agent(ABC):
        self.tools: list = []

    def get_system_message(self) -> 'SystemMessageAction | None':
-        """
-        Returns a SystemMessageAction containing the system message and tools.
+        """Returns a SystemMessageAction containing the system message and tools.
+
        This will be added to the event stream as the first message.

        Returns:
@@ -91,15 +93,16 @@ class Agent(ABC):

    @abstractmethod
    def step(self, state: 'State') -> 'Action':
-        """Starts the execution of the assigned instruction. This method should
-        be implemented by subclasses to define the specific execution logic.
+        """Starts the execution of the assigned instruction.
+
+        This method should be implemented by subclasses to define the specific execution logic.
        """
        pass

    def reset(self) -> None:
-        """Resets the agent's execution status and clears the history. This method can be used
-        to prepare the agent for restarting the instruction or cleaning up before destruction.
+        """Resets the agent's execution status and clears the history.

+        This method can be used to prepare the agent for restarting the instruction or cleaning up before destruction.
        """
        # TODO clear history
        self._complete = False
@@ -112,12 +115,12 @@ class Agent(ABC):
        return self.__class__.__name__

    @classmethod
-    def register(cls, name: str, agent_cls: Type['Agent']) -> None:
+    def register(cls, name: str, agent_cls: type['Agent']) -> None:
        """Registers an agent class in the registry.

        Parameters:
        - name (str): The name to register the class under.
-        - agent_cls (Type['Agent']): The class to register.
+        - agent_cls (type['Agent']): The class to register.

        Raises:
        - AgentAlreadyRegisteredError: If name already registered
@@ -127,14 +130,14 @@ class Agent(ABC):
        cls._registry[name] = agent_cls

    @classmethod
-    def get_cls(cls, name: str) -> Type['Agent']:
+    def get_cls(cls, name: str) -> type['Agent']:
        """Retrieves an agent class from the registry.

        Parameters:
        - name (str): The name of the class to retrieve

        Returns:
-        - agent_cls (Type['Agent']): The class registered under the specified name.
+        - agent_cls (type['Agent']): The class registered under the specified name.

        Raises:
        - AgentNotRegisteredError: If name not registered
@@ -158,6 +161,43 @@ class Agent(ABC):
        """Sets the list of MCP tools for the agent.

        Args:
-        - mcp_tools (list[dict]): The list of MCP tools.
+            mcp_tools: The list of MCP tools.
        """
        self.mcp_tools = mcp_tools
+
+
+class LLMCompletionParams(TypedDict, total=False):
+    messages: list[Message]
+    tools: list[Any] | None
+    extra_body: dict[str, Any] | None
+    extra: dict[str, Any] | None
+
+
+class LLMCompletionProvider(ABC):
+    """Mixin interface for agents that can expose their LLM call generation details.
+
+    This interface is used by condensers that need to use the agent's LLM completion
+    parameters to ensure consistent caching between the agent and condenser.
+    """
+
+    llm: LLM
+
+    @abstractmethod
+    def get_messages(self, condensed_history: list[Event]) -> list[Message]:
+        """Convert events to messages for the LLM."""
+        pass
+
+    @abstractmethod
+    def build_llm_completion_params(
+        self, condensed_history: list[Event], state: State
+    ) -> dict[str, Any]:
+        """Build parameters for LLM completion.
+
+        Args:
+            condensed_history: list of events to convert to messages for the LLM
+            state: Current state
+
+        Returns:
+            dict of parameters for LLM completion
+        """
+        pass
@@ -58,6 +58,28 @@ class RecentEventsCondenserConfig(BaseModel):
    model_config = {'extra': 'forbid'}


+class LLMAgentCacheCondenserConfig(BaseModel):
+    """Configuration for LLMAgentCacheCondenser."""
+
+    type: Literal['agentcache'] = Field('agentcache')
+    max_size: int = Field(
+        default=100,
+        description='Maximum number of events before condensation is triggered.',
+        ge=1,
+    )
+    trigger_word: str = Field(
+        default='CONDENSE!',
+        description='Word that triggers condensation when found in user messages.',
+    )
+    keep_first: int = Field(
+        default=1,
+        description='Number of initial events to always keep in history.',
+        ge=0,
+    )
+
+    model_config = {'extra': 'forbid'}
+
+
 class LLMSummarizingCondenserConfig(BaseModel):
    """Configuration for LLMCondenser."""

@@ -181,6 +203,7 @@ CondenserConfig = (
    | LLMAttentionCondenserConfig
    | StructuredSummaryCondenserConfig
    | CondenserPipelineConfig
+    | LLMAgentCacheCondenserConfig
 )


@@ -284,6 +307,7 @@ def create_condenser_config(condenser_type: str, data: dict) -> CondenserConfig:
        'amortized': AmortizedForgettingCondenserConfig,
        'llm_attention': LLMAttentionCondenserConfig,
        'structured': StructuredSummaryCondenserConfig,
+        'agentcache': LLMAgentCacheCondenserConfig,
    }

    if condenser_type not in condenser_classes:
@@ -16,7 +16,11 @@ from openhands import __version__
 from openhands.core import logger
 from openhands.core.config.agent_config import AgentConfig
 from openhands.core.config.app_config import AppConfig
-from openhands.core.config.condenser_config import condenser_config_from_toml_section
+from openhands.core.config.condenser_config import (
+    CondenserConfig,
+    condenser_config_from_toml_section,
+    create_condenser_config,
+)
 from openhands.core.config.config_utils import (
    OH_DEFAULT_AGENT,
    OH_MAX_ITERATIONS,
@@ -436,6 +440,118 @@ def get_llm_config_arg(
    return None


+def get_condenser_config_arg(
+    condenser_config_arg: str, toml_file: str = 'config.toml'
+) -> CondenserConfig | None:
+    """Get a group of condenser settings from the config file by name.
+
+    A group in config.toml can look like this:
+
+    ```
+    [condenser.my_summarizer]
+    type = 'llm'
+    llm_config = 'gpt-4o' # References [llm.gpt-4o]
+    max_size = 50
+    ...
+    ```
+
+    The user-defined group name, like "my_summarizer", is the argument to this function.
+    The function will load the CondenserConfig object with the settings of this group,
+    from the config file.
+
+    Note that the group must be under the "condenser" group, or in other words,
+    the group name must start with "condenser.".
+
+    Args:
+        condenser_config_arg: The group of condenser settings to get from the config.toml file.
+        toml_file: Path to the configuration file to read from. Defaults to 'config.toml'.
+
+    Returns:
+        CondenserConfig: The CondenserConfig object with the settings from the config file, or None if not found/error.
+    """
+    # keep only the name, just in case
+    condenser_config_arg = condenser_config_arg.strip('[]')
+
+    # truncate the prefix, just in case
+    if condenser_config_arg.startswith('condenser.'):
+        condenser_config_arg = condenser_config_arg[10:]
+
+    logger.openhands_logger.debug(
+        f'Loading condenser config [{condenser_config_arg}] from {toml_file}'
+    )
+
+    # load the toml file
+    try:
+        with open(toml_file, 'r', encoding='utf-8') as toml_contents:
+            toml_config = toml.load(toml_contents)
+    except FileNotFoundError as e:
+        logger.openhands_logger.error(f'Config file not found: {toml_file}. Error: {e}')
+        return None
+    except toml.TomlDecodeError as e:
+        logger.openhands_logger.error(
+            f'Cannot parse condenser group [{condenser_config_arg}] from {toml_file}. Exception: {e}'
+        )
+        return None
+
+    # Check if the condenser section and the specific config exist
+    if (
+        'condenser' not in toml_config
+        or condenser_config_arg not in toml_config['condenser']
+    ):
+        logger.openhands_logger.error(
+            f'Condenser config section [condenser.{condenser_config_arg}] not found in {toml_file}'
+        )
+        return None
+
+    condenser_data = toml_config['condenser'][
+        condenser_config_arg
+    ].copy()  # Use copy to modify
+
+    # Determine the type and handle potential LLM dependency
+    condenser_type = condenser_data.get('type')
+    if not condenser_type:
+        logger.openhands_logger.error(
+            f'Missing "type" field in [condenser.{condenser_config_arg}] section of {toml_file}'
+        )
+        return None
+
+    # Handle LLM config reference if needed, using get_llm_config_arg
+    if (
+        condenser_type in ('llm', 'llm_attention', 'structured')
+        and 'llm_config' in condenser_data
+        and isinstance(condenser_data['llm_config'], str)
+    ):
+        llm_config_name = condenser_data['llm_config']
+        logger.openhands_logger.debug(
+            f'Condenser [{condenser_config_arg}] requires LLM config [{llm_config_name}]. Loading it...'
+        )
+        # Use the existing function to load the specific LLM config
+        referenced_llm_config = get_llm_config_arg(llm_config_name, toml_file=toml_file)
+
+        if referenced_llm_config:
+            # Replace the string reference with the actual LLMConfig object
+            condenser_data['llm_config'] = referenced_llm_config
+        else:
+            # get_llm_config_arg already logs the error if not found
+            logger.openhands_logger.error(
+                f"Failed to load required LLM config '{llm_config_name}' for condenser '{condenser_config_arg}'."
+            )
+            return None
+
+    # Create the condenser config instance
+    try:
+        config = create_condenser_config(condenser_type, condenser_data)
+        logger.openhands_logger.info(
+            f'Successfully loaded condenser config [{condenser_config_arg}] from {toml_file}'
+        )
+        return config
+    except (ValidationError, ValueError) as e:
+        logger.openhands_logger.error(
+            f'Invalid condenser configuration for [{condenser_config_arg}]: {e}.'
+        )
+        return None
+
+
 # Command line arguments
 def get_parser() -> argparse.ArgumentParser:
    """Get the argument parser."""
@@ -158,11 +158,10 @@ class CondensationAction(Action):
        # Either way, we can only have one of the two valid configurations.
        forgotten_event_configuration = using_event_ids ^ using_event_range

-        # We also need to check that if the summary is provided, so is the
-        # offset (and vice versa).
+        # Check that if we have a summary_offset, we also have a summary
        summary_configuration = (
-            self.summary is None and self.summary_offset is None
-        ) or (self.summary is not None and self.summary_offset is not None)
+            self.summary is not None if self.summary_offset is not None else True
+        )

        return forgotten_event_configuration and summary_configuration

@@ -40,14 +40,14 @@ __all__ = ['LLM']
 # tuple of exceptions to retry on
 LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (
    RateLimitError,
-    litellm.Timeout,
-    litellm.InternalServerError,
+    litellm.Timeout,  # type: ignore
+    litellm.InternalServerError,  # type: ignore
    LLMNoResponseError,
 )

-# cache prompt supporting models
-# remove this when we gemini and deepseek are supported
-CACHE_PROMPT_SUPPORTED_MODELS = [
+# these models require special treatment so that caching
+# works
+EXPLICIT_CACHE_MODELS = [
    'claude-3-7-sonnet-20250219',
    'claude-3-5-sonnet-20241022',
    'claude-3-5-sonnet-20240620',
@@ -249,11 +249,17 @@ class LLM(RetryMixin, DebugMixin):
                    kwargs.pop('tool_choice', None)

            # if we have no messages, something went very wrong
-            if not messages:
+            if not messages or len(messages) < 1:
                raise ValueError(
                    'The messages list is empty. At least one message is required.'
                )

+            # anthropic requires at least one user message.
+            if not any(message.get('role') == 'user' for message in messages):
+                raise ValueError(
+                    'At least one message with role "user" is required for the completion.'
+                )
+
            # log the entire LLM prompt
            self.log_prompt(messages)

@@ -523,8 +529,8 @@ class LLM(RetryMixin, DebugMixin):
        return (
            self.config.caching_prompt is True
            and (
-                self.config.model in CACHE_PROMPT_SUPPORTED_MODELS
-                or self.config.model.split('/')[-1] in CACHE_PROMPT_SUPPORTED_MODELS
+                self.config.model in EXPLICIT_CACHE_MODELS
+                or self.config.model.split('/')[-1] in EXPLICIT_CACHE_MODELS
            )
            # We don't need to look-up model_info, because only Anthropic models needs the explicit caching breakpoint
        )
@@ -662,7 +668,7 @@ class LLM(RetryMixin, DebugMixin):
            boolean: True if executing a local model.
        """
        if self.config.base_url is not None:
-            for substring in ['localhost', '127.0.0.1' '0.0.0.0']:
+            for substring in ['localhost', '127.0.0.1', '0.0.0.0']:
                if substring in self.config.base_url:
                    return True
        elif self.config.model is not None:
@@ -1,9 +1,8 @@
-import openhands.memory.condenser.impl  # noqa F401 (we import this to get the condensers registered)
 from openhands.memory.condenser.condenser import (
-    Condenser,
-    get_condensation_metadata,
-    View,
    Condensation,
+    Condenser,
+    View,
+    get_condensation_metadata,
 )

 __all__ = [
@@ -87,23 +87,33 @@ class Condenser(ABC):
            self.write_metadata(state)

    @abstractmethod
-    def condense(self, View) -> View | Condensation:
+    def condense(self, view: View, state: State, agent=None) -> View | Condensation:
        """Condense a sequence of events into a potentially smaller list.

        New condenser strategies should override this method to implement their own condensation logic. Call `self.add_metadata` in the implementation to record any relevant per-condensation diagnostic information.

        Args:
-            View: A view of the history containing all events that should be condensed.
+            view: A view of the history containing all events that should be condensed.
+            state: Optional state for context.
+            agent: Optional agent for agent-aware condensation.

        Returns:
            View | Condensation: A condensed view of the events or an event indicating the history has been condensed.
        """

-    def condensed_history(self, state: State) -> View | Condensation:
-        """Condense the state's history."""
+    def condensed_history(self, state: State, agent=None) -> View | Condensation:
+        """Condense the state's history.
+
+        Args:
+            state: The current state.
+            agent: Optional agent to use for agent-aware condensation.
+
+        Returns:
+            A View or Condensation object.
+        """
        self._llm_metadata = state.to_llm_metadata('condenser')
        with self.metadata_batch(state):
-            return self.condense(state.view)
+            return self.condense(state.view, state, agent)

    @classmethod
    def register_config(cls, configuration_type: type[CondenserConfig]) -> None:
@@ -136,6 +146,9 @@ class Condenser(ABC):
        Raises:
            ValueError: If the condenser type is not recognized.
        """
+        # trigger the conderser implementations to register themselves
+        import openhands.memory.condenser.impl  # noqa: F401
+
        try:
            condenser_class = CONDENSER_REGISTRY[type(config)]
            return condenser_class.from_config(config)
@@ -156,14 +169,14 @@ class RollingCondenser(Condenser, ABC):
        """Determine if a view should be condensed."""

    @abstractmethod
-    def get_condensation(self, view: View) -> Condensation:
+    def get_condensation(self, view: View, state: State, agent=None) -> Condensation:
        """Get the condensation from a view."""

-    def condense(self, view: View) -> View | Condensation:
+    def condense(self, view: View, state: State, agent=None) -> View | Condensation:
        # If we trigger the condenser-specific condensation threshold, compute and return
        # the condensation.
        if self.should_condense(view):
-            return self.get_condensation(view)
+            return self.get_condensation(view, state, agent)

        # Otherwise we're safe to just return the view.
        else:
@@ -4,6 +4,9 @@ from openhands.memory.condenser.impl.amortized_forgetting_condenser import (
 from openhands.memory.condenser.impl.browser_output_condenser import (
    BrowserOutputCondenser,
 )
+from openhands.memory.condenser.impl.llm_agent_cache_condenser import (
+    LLMAgentCacheCondenser,
+)
 from openhands.memory.condenser.impl.llm_attention_condenser import (
    ImportantEventSelection,
    LLMAttentionCondenser,
@@ -25,6 +28,7 @@ from openhands.memory.condenser.impl.structured_summary_condenser import (

 __all__ = [
    'AmortizedForgettingCondenser',
+    'LLMAgentCacheCondenser',
    'LLMAttentionCondenser',
    'ImportantEventSelection',
    'LLMSummarizingCondenser',
@@ -1,5 +1,6 @@
 from __future__ import annotations

+from openhands.controller.state.state import State
 from openhands.core.config.condenser_config import AmortizedForgettingCondenserConfig
 from openhands.events.action.agent import CondensationAction
 from openhands.memory.condenser.condenser import (
@@ -36,7 +37,7 @@ class AmortizedForgettingCondenser(RollingCondenser):

        super().__init__()

-    def get_condensation(self, view: View) -> Condensation:
+    def get_condensation(self, view: View, state: State, agent=None) -> Condensation:
        target_size = self.max_size // 2
        head = view[: self.keep_first]

@@ -1,5 +1,6 @@
 from __future__ import annotations

+from openhands.controller.state.state import State
 from openhands.core.config.condenser_config import BrowserOutputCondenserConfig
 from openhands.events.event import Event
 from openhands.events.observation import BrowserOutputObservation
@@ -17,7 +18,7 @@ class BrowserOutputCondenser(Condenser):
        self.attention_window = attention_window
        super().__init__()

-    def condense(self, view: View) -> View | Condensation:
+    def condense(self, view: View, state: State, agent=None) -> View | Condensation:
        """Replace the content of browser observations outside of the attention window with a placeholder."""
        results: list[Event] = []
        cnt: int = 0
@@ -0,0 +1,158 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Any
+
+from openhands.controller.agent import LLMCompletionProvider
+from openhands.controller.state.state import State
+from openhands.core.logger import openhands_logger as logger
+from openhands.core.message import Message
+from openhands.events.event import Event
+from openhands.memory.condenser.condenser import Condensation, Condenser, View
+
+
+class CachingCondenser(Condenser, ABC):
+    """Abstract base class for condensers that use prompt caching.
+
+    This class provides a framework for condensers that begin their prompt with the
+    whole current prompt, so they can use caching. They then add their own messages
+    to instruct the LLM.
+
+    Subclasses need to implement:
+    - createCondensationPrompt: Create the prompt for condensation
+    - processResponse: Process the LLM response to create a Condensation
+    """
+
+    def condense(self, view: View, state: State, agent=None) -> View | Condensation:
+        """Condense the events in the view using the agent's LLM.
+
+        This implementation requires an agent that implements the LLMCompletionProvider
+        interface to provide access to the agent's LLM and message formatting.
+
+        Args:
+            view: The view to condense
+            state: The current state
+            agent: The agent to use for condensation
+
+        Returns:
+            A View or Condensation object
+        """
+        if not state:
+            raise ValueError('CachingCondenser: No state provided, cannot condense')
+
+        if not agent:
+            raise ValueError('CachingCondenser: No agent provided, cannot condense')
+
+        # Check if the agent implements the LLMCompletionProvider interface
+        if not isinstance(agent, LLMCompletionProvider):
+            raise ValueError(
+                f'CachingCondenser: Agent {agent.__class__.__name__} does not implement '
+                'LLMCompletionProvider interface, cannot condense'
+            )
+
+        # Check if we should condense
+        if not self.should_condense(view):
+            return view
+
+        # Do the condensation
+        return self._do_condensation(view.events, state, agent)
+
+    def _do_condensation(
+        self, events: list[Event], state: State, agent: LLMCompletionProvider
+    ) -> Condensation | View:
+        """Do a condensation for the given events.
+
+        Args:
+            events: The events to condense
+            state: The current state
+            agent: The agent to use for condensation
+
+        Returns:
+            A Condensation or View object
+        """
+
+        # Use the agent's method to build the parameters
+        # This ensures that the parameters are consistent with the agent's LLM
+        params = agent.build_llm_completion_params(events, state)
+
+        # Convert events to messages using the agent's method
+        messages = agent.get_messages(events)
+
+        # Now we add our own prompt at the end
+        messages.append(self.createCondensationPrompt(events, state, messages))
+
+        params['messages'] = agent.llm.format_messages_for_llm(messages)
+        self._disable_cache(params['messages'])
+
+        # Get the LLM response
+        response = agent.llm.completion(**params)
+        self.add_metadata('response', response.model_dump())
+        logger.info(f'Summarized {len(events)} events. Usage:{response}')
+        self.add_metadata('metrics', agent.llm.metrics.get())
+
+        # Process the response
+        return self.processResponse(events, state, response, messages)
+
+    def _disable_cache(self, messages: list[dict]) -> None:
+        """Disable the cache for the given messages. We need to do this because
+        this conversation will not continue as we are just doing a condensation. So there
+        is no way to cache could be used, so we save a little money this way.
+        Effectively reversing ConversationMemory.apply_prompt_caching
+        """
+        if len(messages) == 0:
+            return
+
+        # only disable the cache for the last message(our new prompt), so
+        # we can have a cache read for rest of the conversation.
+        content = messages[-1]['content']
+
+        if content is not None:
+            if isinstance(content, list):
+                content = content[-1]
+            if isinstance(content, dict) and content.get('cache_control') is not None:
+                content['cache_control'] = None
+
+    @abstractmethod
+    def createCondensationPrompt(
+        self, events: list[Event], state: State, base_messages: list[Message]
+    ) -> Message:
+        """Create the prompt for condensation.
+
+        Args:
+            events: The events to condense
+            state: The current state
+            messages: the messages that are already in the prompt(cached)
+
+        Returns:
+            The message with condensation instructions
+        """
+        pass
+
+    @abstractmethod
+    def processResponse(
+        self, events: list[Event], state: State, response: Any, messages: list[Message]
+    ) -> Condensation | View:
+        """Process the LLM response to create a Condensation.
+
+        Args:
+            events: The events that were condensed
+            state: The current state
+            response: The LLM response
+            messages: The messages that were already in the prompt(cached)
+
+        Returns:
+            A Condensation or View object
+        """
+        pass
+
+    @abstractmethod
+    def should_condense(self, view: View) -> bool:
+        """Determine if a view should be condensed.
+
+        Args:
+            view: The view to check
+
+        Returns:
+            True if the view should be condensed, False otherwise
+        """
+        pass
@@ -0,0 +1,217 @@
+from __future__ import annotations
+
+from typing import Any
+
+from openhands.controller.state.state import State
+from openhands.core.config.condenser_config import LLMAgentCacheCondenserConfig
+from openhands.core.logger import openhands_logger as logger
+from openhands.core.message import Message, TextContent
+from openhands.core.schema.action import ActionType
+from openhands.events.action.agent import CondensationAction
+from openhands.events.action.message import MessageAction
+from openhands.events.event import Event, EventSource
+from openhands.memory.condenser.condenser import Condensation, View
+from openhands.memory.condenser.impl.caching_condenser import CachingCondenser
+
+
+class LLMAgentCacheCondenser(CachingCondenser):
+    """A version of LLMSummarizingCondenser that uses a caching."""
+
+    def __init__(
+        self,
+        max_size: int = 100,
+        trigger_word: str = 'CONDENSE!',
+        keep_user_messages: bool = False,
+        keep_first: int = 1,
+    ):
+        """Initialize the condenser.
+        Args:
+            max_size: Maximum number of events before condensation is triggered
+            trigger_word: Word that triggers condensation when found in user messages
+            keep_first: Number of initial events to always retain
+        """
+        if keep_first >= max_size:
+            raise ValueError(
+                f'keep_first ({keep_first}) must be less than max_size ({max_size})'
+            )
+        if keep_first < 0:
+            raise ValueError(f'keep_first ({keep_first}) cannot be negative')
+
+        self.keep_first = keep_first
+        self.max_size = max_size
+        self.trigger_word = trigger_word
+        self.keep_user_messages = keep_user_messages
+        super().__init__()
+
+    def createCondensationPrompt(
+        self, events: list[Event], state: State, base_messages: list[Message]
+    ) -> Message:
+        """Create the prompt for condensation using a similar approach to LLMSummarizingCondenser.
+        This method is required by the CachingCondenser abstract base class.
+        Args:
+            events: The events to condense
+            state: The current state
+            base_messages: The messages that are already in the prompt (cached)
+        Returns:
+            The message with condensation instructions
+        """
+        # Create the condensation instructions similar to LLMSummarizingCondenser
+        prompt = """You are maintaining a context-aware state summary for an interactive agent. 
+The whole conversation above will be removed from the context window. Therefore you need to track:
+
+USER_CONTEXT: (Preserve essential user requirements, goals, and clarifications in concise form)
+
+COMPLETED: (Tasks completed so far, with brief results)
+PENDING: (Tasks that still need to be done)
+CURRENT_STATE: (Current variables, data structures, or relevant state)
+
+For code-specific tasks, also include:
+CODE_STATE: {File paths, function signatures, data structures}
+TESTS: {Failing cases, error messages, outputs}
+CHANGES: {Code edits, variable updates}
+DEPS: {Dependencies, imports, external calls}
+VERSION_CONTROL_STATUS: {Repository state, current branch, PR status, commit history}
+
+PRIORITIZE:
+1. Adapt tracking format to match the actual task type
+2. Capture key user requirements and goals
+3. Distinguish between completed and pending tasks
+4. Keep all sections concise and relevant
+
+SKIP: Tracking irrelevant details for the current task type
+
+Example formats:
+
+For code tasks:
+USER_CONTEXT: Fix FITS card float representation issue
+COMPLETED: Modified mod_float() in card.py, all tests passing
+PENDING: Create PR, update documentation
+CODE_STATE: mod_float() in card.py updated
+TESTS: test_format() passed
+CHANGES: str(val) replaces f"{val:.16G}"
+DEPS: None modified
+VERSION_CONTROL_STATUS: Branch: fix-float-precision, Latest commit: a1b2c3d
+
+For other tasks:
+USER_CONTEXT: Write 20 haikus based on coin flip results
+COMPLETED: 15 haikus written for results [T,H,T,H,T,H,T,T,H,T,H,T,H,T,H]
+PENDING: 5 more haikus needed
+CURRENT_STATE: Last flip: Heads, Haiku count: 15/20"""
+
+        prompt += '\n\n'
+
+        # Create a message with the condensation instructions
+        return Message(
+            role='user',
+            content=[TextContent(text=prompt)],
+        )
+
+    def processResponse(
+        self, events: list[Event], state: State, response: Any, messages: list[Message]
+    ) -> Condensation | View:
+        # Extract the summary from the response
+        summary = response.choices[0].message.content
+
+        # Keep the first `keep_first` events (e.g., system messages)
+        events_to_keep = events[: self.keep_first]
+        events_to_forget = events[self.keep_first :]
+
+        # Ensure essential user messages are not forgotten
+        if self.keep_user_messages:
+            self._filter_user_messages_to_keep(events, events_to_forget)
+
+        # If we have events to forget, create a condensation
+        if events_to_forget:
+            forgotten_event_ids = [event.id for event in events_to_forget]
+
+            return Condensation(
+                action=CondensationAction(
+                    forgotten_event_ids=forgotten_event_ids, summary=summary
+                )
+            )
+        else:
+            return View(events=events_to_keep + events_to_forget)
+
+    def should_condense(self, view: View) -> bool:
+        """Determine if the view should be condensed.
+        Condensation is triggered in two cases:
+        1. When the number of events exceeds max_size
+        2. When the last event is from the user and contains the trigger word
+        Args:
+            view: The view to check
+        Returns:
+            True if the view should be condensed, False otherwise
+        """
+        events = view.events
+
+        # Check if the number of events exceeds max_size
+        if len(events) > self.max_size:
+            logger.info(f'Condensing events due to max size({self.max_size}) limit.')
+            return True
+
+        # Check if any recent user message contains the trigger word
+        if self._contains_trigger_word(events):
+            logger.info(f"Condensing events due to trigger word '{self.trigger_word}'.")
+            return True
+
+        return False
+
+    def _contains_trigger_word(self, events: list[Event]) -> bool:
+        """Check if the most recent user message contains the trigger word.
+        Args:
+            events: The events to check
+        Returns:
+            True if the most recent user message contains the trigger word, False otherwise
+        """
+        if not events or len(events) < 2:  # Need at least 2 events to condense
+            return False
+
+        # Iterate through events in reverse order to find the last user message
+        for event in reversed(events):
+            if (
+                hasattr(event, 'source')
+                and event.source == EventSource.USER
+                and hasattr(event, 'action')
+                and event.action == ActionType.MESSAGE
+                and event.message is not None
+            ):
+                return self.trigger_word in event.message
+
+            # If we did a condensation, stop looking
+            if hasattr(event, 'action') and event.action == ActionType.CONDENSATION:
+                return False
+
+        return False
+
+    def _filter_user_messages_to_keep(
+        self, events: list[Event], events_to_forget: list[Event]
+    ) -> None:
+        """Ensure essential user messages are not forgotten."""
+        user_events = [event for event in events if isinstance(event, MessageAction)]
+
+        # Always keep the first user message to maintain context
+        first_user_message = next((event for event in user_events), None)
+        if first_user_message and first_user_message in events_to_forget:
+            events_to_forget.remove(first_user_message)
+
+        # Also keep the most recent user message if it's different from the first
+        if len(user_events) > 1:
+            last_user_message = user_events[-1]
+            if (
+                last_user_message != first_user_message
+                and last_user_message in events_to_forget
+            ):
+                events_to_forget.remove(last_user_message)
+
+    @classmethod
+    def from_config(
+        cls, config: LLMAgentCacheCondenserConfig
+    ) -> LLMAgentCacheCondenser:
+        return LLMAgentCacheCondenser(
+            max_size=config.max_size,
+            trigger_word=config.trigger_word,
+            keep_first=config.keep_first,
+        )
+
+
+LLMAgentCacheCondenser.register_config(LLMAgentCacheCondenserConfig)
@@ -3,6 +3,7 @@ from __future__ import annotations
 from litellm import supports_response_schema
 from pydantic import BaseModel

+from openhands.controller.state.state import State
 from openhands.core.config.condenser_config import LLMAttentionCondenserConfig
 from openhands.events.action.agent import CondensationAction
 from openhands.llm.llm import LLM
@@ -47,7 +48,7 @@ class LLMAttentionCondenser(RollingCondenser):

        super().__init__()

-    def get_condensation(self, view: View) -> Condensation:
+    def get_condensation(self, view: View, state: State, agent=None) -> Condensation:
        target_size = self.max_size // 2
        head_event_ids = [event.id for event in view.events[: self.keep_first]]

@@ -1,5 +1,6 @@
 from __future__ import annotations

+from openhands.controller.state.state import State
 from openhands.core.config.condenser_config import LLMSummarizingCondenserConfig
 from openhands.core.message import Message, TextContent
 from openhands.events.action.agent import CondensationAction
@@ -48,7 +49,7 @@ class LLMSummarizingCondenser(RollingCondenser):
        """Truncate the content to fit within the specified maximum event length."""
        return truncate_content(content, max_chars=self.max_event_length)

-    def get_condensation(self, view: View) -> Condensation:
+    def get_condensation(self, view: View, state: State, agent=None) -> Condensation:
        head = view[: self.keep_first]
        target_size = self.max_size // 2
        # Number of events to keep from the tail -- target size, minus however many
@@ -1,5 +1,6 @@
 from __future__ import annotations

+from openhands.controller.state.state import State
 from openhands.core.config.condenser_config import NoOpCondenserConfig
 from openhands.memory.condenser.condenser import Condensation, Condenser, View

@@ -7,7 +8,7 @@ from openhands.memory.condenser.condenser import Condensation, Condenser, View
 class NoOpCondenser(Condenser):
    """A condenser that does nothing to the event sequence."""

-    def condense(self, view: View) -> View | Condensation:
+    def condense(self, view: View, state: State, agent=None) -> View | Condensation:
        """Returns the list of events unchanged."""
        return view

@@ -1,5 +1,6 @@
 from __future__ import annotations

+from openhands.controller.state.state import State
 from openhands.core.config.condenser_config import ObservationMaskingCondenserConfig
 from openhands.events.event import Event
 from openhands.events.observation import Observation
@@ -15,7 +16,7 @@ class ObservationMaskingCondenser(Condenser):

        super().__init__()

-    def condense(self, view: View) -> View | Condensation:
+    def condense(self, view: View, state: State, agent=None) -> View | Condensation:
        """Replace the content of observations outside of the attention window with a placeholder."""
        results: list[Event] = []
        for i, event in enumerate(view):
@@ -30,10 +30,10 @@ class CondenserPipeline(Condenser):
            for condenser in self.condensers:
                condenser.write_metadata(state)

-    def condense(self, view: View) -> View | Condensation:
+    def condense(self, view: View, state: State, agent=None) -> View | Condensation:
        result: View | Condensation = view
        for condenser in self.condensers:
-            result = condenser.condense(result)
+            result = condenser.condense(result, state, agent)
            if isinstance(result, Condensation):
                break
        return result
@@ -1,5 +1,6 @@
 from __future__ import annotations

+from openhands.controller.state.state import State
 from openhands.core.config.condenser_config import RecentEventsCondenserConfig
 from openhands.memory.condenser.condenser import Condensation, Condenser, View

@@ -13,7 +14,7 @@ class RecentEventsCondenser(Condenser):

        super().__init__()

-    def condense(self, view: View) -> View | Condensation:
+    def condense(self, view: View, state: State, agent=None) -> View | Condensation:
        """Keep only the most recent events (up to `max_events`)."""
        head = view[: self.keep_first]
        tail_length = max(0, self.max_events - len(head))
@@ -196,7 +196,7 @@ class StructuredSummaryCondenser(RollingCondenser):
        """Truncate the content to fit within the specified maximum event length."""
        return truncate_content(content, max_chars=self.max_event_length)

-    def get_condensation(self, view: View) -> Condensation:
+    def get_condensation(self, view: View, state=None, agent=None) -> Condensation:
        head = view[: self.keep_first]
        target_size = self.max_size // 2
        # Number of events to keep from the tail -- target size, minus however many
@@ -59,14 +59,16 @@ class View(BaseModel):
        # The relevant summary is always in the last condensation event (i.e., the most recent one).
        for event in reversed(events):
            if isinstance(event, CondensationAction):
-                if event.summary is not None and event.summary_offset is not None:
+                if event.summary is not None:
                    summary = event.summary
                    summary_offset = event.summary_offset
                    break

-        if summary is not None and summary_offset is not None:
-            kept_events.insert(
-                summary_offset, AgentCondensationObservation(content=summary)
-            )
+        if summary is not None:
+            summary_obs = AgentCondensationObservation(content=summary)
+            if summary_offset is not None:
+                kept_events.insert(summary_offset, summary_obs)
+            else:
+                kept_events.append(summary_obs)

        return View(events=kept_events)
@@ -10,7 +10,7 @@ from openhands.core.config import AppConfig
 from openhands.core.config.condenser_config import (
    BrowserOutputCondenserConfig,
    CondenserPipelineConfig,
-    LLMSummarizingCondenserConfig,
+    LLMAgentCacheCondenserConfig,
 )
 from openhands.core.logger import OpenHandsLoggerAdapter
 from openhands.core.schema import AgentState
@@ -138,8 +138,10 @@ class Session:
            default_condenser_config = CondenserPipelineConfig(
                condensers=[
                    BrowserOutputCondenserConfig(),
-                    LLMSummarizingCondenserConfig(
-                        llm_config=llm.config, keep_first=4, max_size=80
+                    LLMAgentCacheCondenserConfig(
+                        max_size=100,  # Default max size
+                        trigger_word='CONDENSE!',  # Default trigger word
+                        keep_first=4,
                    ),
                ]
            )
@@ -341,7 +341,7 @@ def test_mismatched_tool_call_events_and_auto_add_system_message(
    # 2. The action message
    # 3. The observation message
    mock_state.history = [action, observation]
-    messages = agent._get_messages(mock_state.history)
+    messages = agent.get_messages(mock_state.history)
    assert len(messages) == 3
    assert messages[0].role == 'system'  # First message should be the system message
    assert messages[1].role == 'assistant'  # Second message should be the action
@@ -349,21 +349,21 @@ def test_mismatched_tool_call_events_and_auto_add_system_message(

    # The same should hold if the events are presented out-of-order
    mock_state.history = [observation, action]
-    messages = agent._get_messages(mock_state.history)
+    messages = agent.get_messages(mock_state.history)
    assert len(messages) == 3
    assert messages[0].role == 'system'  # First message should be the system message

    # If only one of the two events is present, then we should just get the system message
    # plus any valid message from the event
    mock_state.history = [action]
-    messages = agent._get_messages(mock_state.history)
+    messages = agent.get_messages(mock_state.history)
    assert (
        len(messages) == 1
    )  # Only system message, action is waiting for its observation
    assert messages[0].role == 'system'

    mock_state.history = [observation]
-    messages = agent._get_messages(mock_state.history)
+    messages = agent.get_messages(mock_state.history)
    assert len(messages) == 1  # Only system message, observation has no matching action
    assert messages[0].role == 'system'

@@ -0,0 +1,612 @@
+from typing import cast
+from unittest.mock import MagicMock, Mock, patch
+
+import pytest
+
+from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
+from openhands.controller.state.state import State
+from openhands.core.config.agent_config import AgentConfig
+from openhands.core.config.llm_config import LLMConfig
+from openhands.core.message import Message
+from openhands.events.action.agent import ChangeAgentStateAction, RecallAction
+from openhands.events.action.files import FileReadAction
+from openhands.events.action.message import MessageAction, SystemMessageAction
+from openhands.events.event import Event, EventSource, RecallType
+from openhands.events.observation.agent import (
+    RecallObservation,
+)
+from openhands.events.observation.files import FileReadObservation
+from openhands.llm import LLM
+from openhands.llm.metrics import Metrics
+from openhands.memory.condenser.condenser import Condensation, View
+from openhands.memory.condenser.impl.llm_agent_cache_condenser import (
+    LLMAgentCacheCondenser,
+)
+
+
+def format_messages_for_llm(messages: Message | list[Message]) -> list[dict]:
+    if isinstance(messages, Message):
+        messages = [messages]
+    return [message.model_dump() for message in messages]
+
+
+@pytest.fixture
+def agent() -> CodeActAgent:
+    config = AgentConfig()
+    agent = CodeActAgent(llm=LLM(LLMConfig()), config=config)
+    agent.llm = Mock(LLM)
+    agent.llm.config = Mock()
+    agent.llm.config.max_message_chars = 1000
+    agent.llm.is_caching_prompt_active.return_value = True
+    agent.llm.format_messages_for_llm = format_messages_for_llm
+    agent.llm.metrics = Metrics()
+    return agent
+
+
+def set_next_llm_response(agent, response: str):
+    """Set the next LLM response for the given agent."""
+    mock_response = MagicMock()
+    mock_response.choices = [MagicMock()]
+    mock_response.choices[0].message.content = response
+    agent.llm.completion.return_value = mock_response
+
+
+def test_contains_trigger_word():
+    """Test that the containsTriggerWord method correctly identifies the CONDENSE! keyword."""
+    # Create the condenser
+    condenser = LLMAgentCacheCondenser(max_size=10)
+
+    # Test case 1: Empty events list
+    assert not condenser._contains_trigger_word([])
+
+    # Test case 2: Single event (not enough events)
+    event = MessageAction('Please CONDENSE! the conversation history.')
+    assert not condenser._contains_trigger_word([event])
+
+    # Test case 3: User message with CONDENSE! keyword
+    user_event = MessageAction('Please CONDENSE! the conversation history.')
+    user_event._source = 'user'  # type: ignore [attr-defined]
+    agent_event = MessageAction('Agent response')
+    agent_event._source = 'agent'  # type: ignore [attr-defined]
+    assert condenser._contains_trigger_word([user_event, agent_event])
+
+    # Test case 4: User message without CONDENSE! keyword
+    user_event.content = 'Please summarize the conversation history.'
+    assert not condenser._contains_trigger_word([user_event, agent_event])
+
+    # Test case 5: RecallObservation followed by user message with CONDENSE! keyword
+    user_event.content = 'Please CONDENSE! the conversation history.'
+    recall_event = RecallObservation(
+        recall_type=RecallType.KNOWLEDGE, content='saw a thing'
+    )
+    events = [agent_event, user_event, recall_event]
+    assert condenser._contains_trigger_word(events)
+
+    # Test case 6: Multiple user messages, only the most recent one matters
+    user_event1 = MessageAction('First message without keyword')
+    user_event1._source = 'user'  # type: ignore [attr-defined]
+    user_event2 = MessageAction('Please CONDENSE! the conversation history.')
+    user_event2._source = 'user'  # type: ignore [attr-defined]
+    events = [user_event1, agent_event, user_event2]
+    assert condenser._contains_trigger_word(events)
+
+    # Test case 7: Multiple user messages, most recent one doesn't have keyword
+    events = [user_event2, agent_event, user_event1]
+    assert not condenser._contains_trigger_word(events)
+
+
+def test_no_condensation(agent: CodeActAgent):
+    """Test that the LLMAgentCacheCondenser returns a View when no condensation is needed."""
+    condenser = LLMAgentCacheCondenser(max_size=10)
+
+    # Create real events
+    events = [MessageAction(f'Message {i}') for i in range(5)]
+    for i, event in enumerate(events):
+        event._id = i  # type: ignore [attr-defined]
+
+    state = State(history=cast(list[Event], events))
+
+    result = condenser.condensed_history(state, agent)
+
+    # Verify that a View is returned
+    assert isinstance(result, View)
+    assert len(result.events) == 5
+
+
+def test_condense(agent: CodeActAgent):
+    """Test that the condenser uses the LLM to condense events."""
+    llm_summary = """
+USER_CONTEXT: Testing file read operations
+COMPLETED: Read 4 files with varying content
+PENDING: None
+CURRENT_STATE: Files read: 0.txt, 1.txt, 2.txt, 3.txt
+    """
+    set_next_llm_response(agent, llm_summary)
+
+    condenser = LLMAgentCacheCondenser(max_size=5, keep_user_messages=True)
+    agent.condenser = condenser
+
+    system_message = SystemMessageAction(content='System Message')
+    system_message._source = EventSource.AGENT  # type: ignore
+    user_message = MessageAction('User message')
+    user_message._source = EventSource.USER  # type: ignore
+    events = [system_message, user_message]
+    events += [FileReadObservation(f'{i}.txt', 'content.' * i) for i in range(4)]
+    assert len(events) == 6
+    for i, event in enumerate(events):
+        event._id = i + 1  # type: ignore [attr-defined]
+
+    result = condenser.condensed_history(
+        State(history=cast(list[Event], events)), agent
+    )
+
+    assert isinstance(result, Condensation)
+    assert hasattr(result, 'action')
+    # 1(system-prompt) is not forgotten
+    # 2(user-message) is not forgotten
+    assert result.action.forgotten_event_ids == [3, 4, 5, 6]
+    assert result.action.summary == llm_summary
+    assert result.action.summary_offset is None
+
+
+def test_llm_agent_cache_condenser_with_state_with_rewrite(agent: CodeActAgent):
+    """Test that the condenser correctly handles summaries."""
+    set_next_llm_response(
+        agent,
+        """
+USER_CONTEXT: File exploration task
+COMPLETED: Read 6 files with varying content
+PENDING: None
+CODE_STATE: Files read: 0.txt, 1.txt, 2.txt, 3.txt, 4.txt, 5.txt
+CHANGES: User asked about database schema and agent explained the tables and relationships.
+        """,
+    )
+
+    condenser = LLMAgentCacheCondenser(max_size=5)
+    agent.condenser = condenser
+
+    events = [FileReadObservation(f'{i}.txt', 'content.' * i) for i in range(6)]
+    for i, event in enumerate(events):
+        event._id = i  # type: ignore [attr-defined]
+
+    state = State(history=cast(list[Event], events))
+
+    result = condenser.condensed_history(state, agent)
+
+    # Verify that a Condensation is returned with a summary
+    assert isinstance(result, Condensation)
+    assert hasattr(result, 'action')
+    assert result.action.summary is not None
+    assert 'User asked about database schema' in result.action.summary
+
+
+def test_should_condense_max_size():
+    """Test that the LLMAgentCacheCondenser correctly determines when to condense based on size."""
+    condenser = LLMAgentCacheCondenser(max_size=10)
+
+    # Create mock events
+    events_small = [MessageAction(f'Message {i}') for i in range(5)]
+    events_large = [MessageAction(f'Message {i}') for i in range(11)]
+
+    # Test should_condense with small number of events
+    assert not condenser.should_condense(View(events=events_small))
+
+    # Test should_condense with large number of events
+    assert condenser.should_condense(View(events=events_large))
+
+
+def test_llm_agent_cache_condenser_simulated_mixed_condensation(agent: CodeActAgent):
+    """Test simulated condensation with a mix of messages and observations."""
+    from tests.unit.testing_utils import create_tool_call_metadata
+
+    set_next_llm_response(
+        agent,
+        """
+USER_CONTEXT: Mixed file and message operations
+COMPLETED: Processed 7 events (messages and file reads)
+PENDING: None
+CURRENT_STATE: Last message: Test message 6, Last file: 7.txt
+CHANGES: Summary <mention content of message 4,5>
+        """,
+    )
+
+    condenser = LLMAgentCacheCondenser(max_size=5)
+    agent.condenser = condenser
+
+    events = []
+
+    for i in range(1, 8):
+        if i % 2 == 0:
+            # Create a FileReadAction with proper tool_call_metadata
+            event = FileReadAction(f'{i}.txt')
+            event._source = 'agent'
+
+            # Use the utility function to create tool_call_metadata
+            event.tool_call_metadata = create_tool_call_metadata(
+                tool_call_id=f'tool_call_{i}',
+                model_response_id=f'model_response_{i}',
+                function_name='str_replace_editor',
+            )
+        else:
+            event = FileReadObservation(f'File content for event {i}', f'{i}.txt')
+
+        event._id = i  # type: ignore [attr-defined]
+        events.append(event)
+
+    state = State(history=cast(list[Event], events))
+    result = condenser.condensed_history(state, agent)
+
+    # Verify that a Condensation is returned
+    assert isinstance(result, Condensation)
+    assert len(result.action.forgotten_event_ids) > 0
+    # Check that the summary contains the expected content
+    assert 'Mixed file and message operations' in result.action.summary
+
+
+def test_llm_agent_cache_condenser_always_keep_system_prompt(agent: CodeActAgent):
+    """Test that the system prompt is preserved in the final messages."""
+    set_next_llm_response(
+        agent,
+        """
+USER_CONTEXT: Simple greeting exchange
+COMPLETED: User greeted agent, agent responded
+PENDING: None
+CURRENT_STATE: Conversation in progress
+    """,
+    )
+
+    # Create a condenser with a small max_size to ensure condensation
+    # but large enough to not trigger again after adding the condensation action
+    condenser = LLMAgentCacheCondenser(max_size=5)
+    agent.condenser = condenser
+
+    # Create a lot of events to ensure we exceed max_size
+    events = []
+    for i in range(10):
+        event = MessageAction(f'Message {i}')
+        event._source = 'user' if i % 2 == 0 else 'agent'  # type: ignore [attr-defined]
+        event._id = i + 1  # type: ignore [attr-defined]
+        events.append(event)
+
+    state = State(history=cast(list[Event], events))
+
+    result = condenser.condensed_history(state, agent)
+
+    # Verify that a Condensation is returned
+    assert isinstance(result, Condensation)
+    result.action._id = 20  # type: ignore [attr-defined]
+
+    # Create a new state with just a few events and the condensation action
+    # to avoid triggering condensation again
+    new_state = State(
+        history=[
+            events[-1],  # Keep the last event
+            result.action,  # Add the condensation action
+        ]
+    )
+
+    view = condenser.condensed_history(new_state, agent)
+    assert isinstance(view, View)
+
+    # Check that the system prompt is preserved in the messages
+    messages = agent.get_messages(view.events)
+    assert messages[0].role == 'system'
+    assert 'You are OpenHands' in messages[0].content[0].text
+
+
+def test_llm_agent_cache_condenser_first_message_user_message(agent: CodeActAgent):
+    """Test that at least one user message is preserved."""
+    # Create a condenser with a small max_size to ensure condensation
+    # but large enough to not trigger again after adding the condensation action
+    condenser = LLMAgentCacheCondenser(max_size=5, keep_user_messages=True)
+    agent.condenser = condenser
+
+    # Create events with only one user message
+    user_message = MessageAction('Hello, how are you?')
+    user_message._source = 'user'  # type: ignore [attr-defined]
+    user_message._id = 1  # type: ignore [attr-defined]
+
+    # Add many agent messages to exceed max_size
+    events = [user_message]
+    for i in range(10):
+        event = MessageAction(f'Agent response {i}')
+        event._source = 'agent'  # type: ignore [attr-defined]
+        event._id = i + 2  # type: ignore [attr-defined]
+        events.append(event)
+
+    state = State(history=cast(list[Event], events))
+
+    set_next_llm_response(
+        agent,
+        """
+USER_CONTEXT: Initial greeting
+COMPLETED: User said hello, agent responded
+PENDING: None
+CURRENT_STATE: Conversation started
+    """,
+    )
+
+    result = condenser.condensed_history(state, agent)
+
+    # Verify that a Condensation is returned
+    assert isinstance(result, Condensation)
+    result.action._id = 20  # type: ignore [attr-defined]
+
+    # Create a new state with just the user message and the condensation action
+    # to avoid triggering condensation again
+    new_state = State(
+        history=[
+            user_message,  # Keep the user message
+            result.action,  # Add the condensation action
+        ]
+    )
+
+    view = condenser.condensed_history(new_state, agent)
+    assert isinstance(view, View)
+
+    # Check that at least one user message is preserved in the view
+    user_messages = [
+        event
+        for event in view.events
+        if hasattr(event, '_source') and event._source == 'user'
+    ]
+    assert len(user_messages) > 0
+
+    # Check that the system prompt is preserved in the messages
+    messages = agent.get_messages(view.events)
+    assert messages[0].role == 'system'
+    assert 'You are OpenHands' in messages[0].content[0].text
+
+
+def test_llm_agent_cache_condenser_full_rewrite(agent: CodeActAgent):
+    """Test a complete condensation of the conversation."""
+    # Create a condenser with a small max_size to ensure condensation
+    # but large enough to not trigger again after adding the condensation action
+    condenser = LLMAgentCacheCondenser(max_size=5)
+    agent.condenser = condenser
+
+    # Create many events to exceed max_size
+    events = []
+    for i in range(10):
+        event = MessageAction(f'Message {i}')
+        event._source = 'user' if i % 2 == 0 else 'agent'  # type: ignore [attr-defined]
+        event._id = i + 1  # type: ignore [attr-defined]
+        events.append(event)
+
+    state = State(history=cast(list[Event], events))
+
+    set_next_llm_response(
+        agent,
+        """
+USER_CONTEXT: Simple greeting
+COMPLETED: User and AI greeted each other
+PENDING: None
+CURRENT_STATE: Conversation initialized
+        """,
+    )
+
+    result = condenser.condensed_history(state, agent)
+
+    # Verify that a Condensation is returned
+    assert isinstance(result, Condensation)
+    result.action._id = 20  # type: ignore [attr-defined]
+
+    # Check that we've forgotten some events
+    assert len(result.action.forgotten_event_ids) > 0
+
+    # Check that the summary contains the greeting information
+    assert 'User and AI greeted each other' in result.action.summary
+
+    # Create a new state with just the condensation action
+    # to avoid triggering condensation again
+    new_state = State(history=[result.action])
+
+    view = condenser.condensed_history(new_state, agent)
+    assert isinstance(view, View)
+
+    # Check that the condensation action is in the view
+    assert result.action in view.events
+
+    # Check that the system prompt is preserved in the messages
+    messages = agent.get_messages(view.events)
+    assert messages[0].role == 'system'
+    assert 'You are OpenHands' in messages[0].content[0].text
+
+
+def test_condensation_triggered_by_user_message_in_context(agent):
+    """Test that the user message triggering condensation is part of the context passed to the LLM."""
+    condenser = LLMAgentCacheCondenser(trigger_word='CONDENSE!', max_size=500)
+    agent.condenser = condenser
+
+    # Create events with a user message containing a goal
+    user_message_goal = MessageAction('I want you to do some things for me.')
+    user_message_goal._source = 'user'  # type: ignore [attr-defined]
+    user_message_goal._id = 1  # type: ignore [attr-defined]
+
+    # Add agent messages
+    agent_messages = []
+    for i in range(3):
+        event = MessageAction(f'Agent response {i}')
+        event._source = 'agent'  # type: ignore [attr-defined]
+        event._id = i + 2  # type: ignore [attr-defined]
+        agent_messages.append(event)
+
+    # Add a user message containing the trigger word
+    user_message_trigger = MessageAction('Please CONDENSE! the conversation history.')
+    user_message_trigger._source = 'user'  # type: ignore [attr-defined]
+    user_message_trigger._id = 5  # type: ignore [attr-defined]
+
+    # Combine all events
+    events = [user_message_goal] + agent_messages + [user_message_trigger]
+
+    state = State(history=cast(list[Event], events))
+
+    mock_response = MagicMock()
+    mock_response.choices = [MagicMock()]
+    mock_response.choices[0].message.content = """
+USER_CONTEXT: Simple greeting
+COMPLETED: User and AI greeted each other
+PENDING: None
+CURRENT_STATE: Conversation initialized
+        """
+
+    with patch.object(
+        agent.llm, 'completion', return_value=mock_response
+    ) as mock_completion:
+        # Perform condensation
+        condenser.condensed_history(state, agent)
+
+        # Verify that the LLM completion was called
+        mock_completion.assert_called_once()
+
+        # Extract the parameters passed to the LLM
+        params = mock_completion.call_args[1]
+        messages = params.get('messages', [])
+
+        # Check that both the first user message and the trigger message are part of the context
+        # First, check for the initial user message with the goal
+        assert any(
+            'I want you to do some things for me.' in message['content']
+            for message in messages
+        ), 'First user message should be preserved in the context'
+
+        # Then, check for the trigger message
+        assert any(
+            'Please CONDENSE! the conversation history.' in message['content']
+            for message in messages
+        ), 'Trigger message should be included in the context'
+
+
+def test_condensation_with_followup_events(agent):
+    """Test that the user message triggering condensation and follow-up events are part of the context passed to the LLM."""
+    condenser = LLMAgentCacheCondenser(
+        trigger_word='CONDENSE!', max_size=500, keep_user_messages=True
+    )
+    agent.condenser = condenser
+
+    # Create events with a user message containing a goal
+    user_message_goal = MessageAction('I want you to do some things for me.')
+    user_message_goal._source = EventSource.USER  # type: ignore [attr-defined]
+    user_message_goal._id = 1  # type: ignore [attr-defined]
+
+    # Add agent messages
+    agent_messages = []
+    for i in range(3):
+        event = MessageAction(f'Agent response {i}')
+        event._source = EventSource.AGENT  # type: ignore [attr-defined]
+        event._id = i + 2  # type: ignore [attr-defined]
+        agent_messages.append(event)
+
+    # Add a user message containing the trigger word
+    user_message_trigger = MessageAction('Please CONDENSE! the conversation history.')
+    user_message_trigger._source = EventSource.USER  # type: ignore [attr-defined]
+    user_message_trigger._id = 5  # type: ignore [attr-defined]
+
+    # Add follow-up events
+    followup_event_1 = ChangeAgentStateAction(
+        agent_state='running',
+        thought='',
+    )
+    followup_event_1._id = 6  # type: ignore [attr-defined]
+    followup_event_1._source = EventSource.ENVIRONMENT  # type: ignore [attr-defined]
+
+    followup_event_2 = RecallAction(
+        recall_type=RecallType.WORKSPACE_CONTEXT,
+        query='hi',
+        thought='',
+    )
+    followup_event_2._id = 7  # type: ignore [attr-defined]
+    followup_event_2._source = EventSource.USER  # type: ignore [attr-defined]
+
+    # Combine all events
+    events = [
+        user_message_goal,
+        *agent_messages,
+        user_message_trigger,
+        followup_event_1,
+        followup_event_2,
+    ]
+
+    state = State(history=cast(list[Event], events))
+
+    mock_response = MagicMock()
+    mock_response.choices = [MagicMock()]
+    mock_response.choices[0].message.content = """
+USER_CONTEXT: Simple greeting
+COMPLETED: User and AI greeted each other
+PENDING: None
+CURRENT_STATE: Conversation initialized
+    """
+
+    with patch.object(
+        agent.llm, 'completion', return_value=mock_response
+    ) as mock_completion:
+        # Perform condensation
+        condensation = condenser.condensed_history(state, agent)
+
+        # Verify that the LLM completion was called
+        mock_completion.assert_called_once()
+
+        # Extract the parameters passed to the LLM
+        params = mock_completion.call_args[1]
+        messages = params.get('messages', [])
+
+        # Check that the trigger message is included in the context
+        assert any(
+            'Please CONDENSE! the conversation history.' in message['content']
+            for message in messages
+        ), 'Trigger message should be included in the context'
+
+        assert isinstance(condensation, Condensation)
+        assert hasattr(condensation, 'action')
+        # only agent messages forgotten
+        assert condensation.action.forgotten_event_ids == [
+            e.id for e in agent_messages + [followup_event_1, followup_event_2]
+        ]
+        assert condensation.action.summary == mock_response.choices[0].message.content
+        assert condensation.action.summary_offset is None
+
+
+def test_keep_first_functionality(agent: CodeActAgent):
+    """Test that the LLMAgentCacheCondenser keeps the first `keep_first` events."""
+    condenser = LLMAgentCacheCondenser(max_size=5, keep_first=2)
+    agent.condenser = condenser
+
+    # Create events exceeding max_size
+    events = []
+    for i in range(10):
+        event = MessageAction(f'Message {i}')
+        event._source = 'user' if i % 2 == 0 else 'agent'  # type: ignore [attr-defined]
+        event._id = i + 1  # type: ignore [attr-defined]
+        events.append(event)
+
+    state = State(history=cast(list[Event], events))
+
+    set_next_llm_response(
+        agent,
+        """
+USER_CONTEXT: Simple greeting
+COMPLETED: User and AI greeted each other
+PENDING: None
+CURRENT_STATE: Conversation initialized
+        """,
+    )
+
+    result = condenser.condensed_history(state, agent)
+
+    # Verify that a Condensation is returned
+    assert isinstance(result, Condensation)
+    result.action._id = 20  # type: ignore [attr-defined]
+
+    # Check that the first `keep_first` events are preserved
+    preserved_event_ids = [event._id for event in events[:2]]  # type: ignore [attr-defined]
+    assert (
+        all(
+            event_id in preserved_event_ids
+            for event_id in result.action.forgotten_event_ids
+        )
+        is False
+    )
+
+    # Check that the summary contains the greeting information
+    assert 'User and AI greeted each other' in result.action.summary
@@ -5,6 +5,7 @@ from openhands.core.message_utils import (
 from openhands.events.event import Event
 from openhands.events.tool import ToolCallMetadata
 from openhands.llm.metrics import Metrics, TokenUsage
+from tests.unit.testing_utils import create_tool_call_metadata


 def test_get_token_usage_for_event():
@@ -28,15 +29,14 @@ def test_get_token_usage_for_event():

    # Create an event referencing that response_id
    event = Event()
-    mock_tool_call_metadata = ToolCallMetadata(
+    # Use our utility function to create tool_call_metadata
+    mock_tool_call_metadata = create_tool_call_metadata(
        tool_call_id='test-tool-call',
        function_name='fake_function',
-        model_response={'id': 'test-response-id'},
+        model_response_id='test-response-id',
        total_calls_in_response=1,
    )
-    event._tool_call_metadata = (
-        mock_tool_call_metadata  # normally you'd do event.tool_call_metadata = ...
-    )
+    event._tool_call_metadata = mock_tool_call_metadata

    # We should find that usage record
    found = get_token_usage_for_event(event, metrics)
@@ -45,7 +45,14 @@ def test_get_token_usage_for_event():
    assert found.response_id == 'test-response-id'

    # If we change the event's response ID, we won't find anything
-    mock_tool_call_metadata.model_response.id = 'some-other-id'
+    # Create a new tool_call_metadata with a different response ID
+    mock_tool_call_metadata = create_tool_call_metadata(
+        tool_call_id='test-tool-call',
+        function_name='fake_function',
+        model_response_id='some-other-id',
+        total_calls_in_response=1,
+    )
+    event._tool_call_metadata = mock_tool_call_metadata
    found2 = get_token_usage_for_event(event, metrics)
    assert found2 is None

@@ -87,17 +94,17 @@ def test_get_token_usage_for_event_id():
        e._id = i
        # We'll attach usage_1 to event 1, usage_2 to event 3
        if i == 1:
-            e._tool_call_metadata = ToolCallMetadata(
+            e._tool_call_metadata = create_tool_call_metadata(
                tool_call_id='tid1',
                function_name='fn1',
-                model_response={'id': 'resp-1'},
+                model_response_id='resp-1',
                total_calls_in_response=1,
            )
        elif i == 3:
-            e._tool_call_metadata = ToolCallMetadata(
+            e._tool_call_metadata = create_tool_call_metadata(
                tool_call_id='tid2',
                function_name='fn2',
-                model_response={'id': 'resp-2'},
+                model_response_id='resp-2',
                total_calls_in_response=1,
            )
        events.append(e)
@@ -141,10 +148,10 @@ def test_get_token_usage_for_event_fallback():

    event = Event()
    # Provide some mismatched tool_call_metadata:
-    event._tool_call_metadata = ToolCallMetadata(
+    event._tool_call_metadata = create_tool_call_metadata(
        tool_call_id='irrelevant-tool-call',
        function_name='fake_function',
-        model_response={'id': 'not-matching-any-usage'},
+        model_response_id='not-matching-any-usage',
        total_calls_in_response=1,
    )
    # But also set event.response_id to the actual usage ID
@@ -52,7 +52,7 @@ def response_mock(content: str, tool_call_id: str):
    return ModelResponse(**MockModelResponse(content, tool_call_id).model_dump())


-def test_get_messages(codeact_agent: CodeActAgent):
+def testget_messages(codeact_agent: CodeActAgent):
    # Add some events to history
    history = list()
    # Add system message action
@@ -76,7 +76,7 @@ def test_get_messages(codeact_agent: CodeActAgent):
    history.append(message_action_5)

    codeact_agent.reset()
-    messages = codeact_agent._get_messages(history)
+    messages = codeact_agent.get_messages(history)

    assert (
        len(messages) == 6
@@ -99,7 +99,7 @@ def test_get_messages(codeact_agent: CodeActAgent):
    assert messages[5].content[0].cache_prompt


-def test_get_messages_prompt_caching(codeact_agent: CodeActAgent):
+def testget_messages_prompt_caching(codeact_agent: CodeActAgent):
    history = list()
    # Add system message action
    system_message_action = codeact_agent.get_system_message()
@@ -115,7 +115,7 @@ def test_get_messages_prompt_caching(codeact_agent: CodeActAgent):
        history.append(message_action_agent)

    codeact_agent.reset()
-    messages = codeact_agent._get_messages(history)
+    messages = codeact_agent.get_messages(history)

    # Check that only the last two user messages have cache_prompt=True
    cached_user_messages = [
@@ -0,0 +1,55 @@
+"""Utility functions for tests."""
+
+from openhands.events.tool import ToolCallMetadata
+
+
+def create_tool_call_metadata(
+    tool_call_id: str = 'tool_call_0',
+    function_name: str = 'str_replace_editor',
+    model_response_id: str = 'model_response_0',
+    total_calls_in_response: int = 1,
+) -> ToolCallMetadata:
+    """
+    Create a properly structured ToolCallMetadata object for testing.
+
+    This function creates a ToolCallMetadata object with a properly structured
+    model_response dictionary that includes the necessary nested objects.
+
+    Args:
+        tool_call_id: The ID of the tool call
+        function_name: The name of the function being called
+        model_response_id: The ID of the model response
+        total_calls_in_response: The total number of calls in the response
+
+    Returns:
+        A properly structured ToolCallMetadata object
+    """
+    # Create a dictionary representation of the model response
+    model_response = {
+        'id': model_response_id,
+        'choices': [
+            {
+                'message': {
+                    'role': 'assistant',
+                    'content': '',
+                    'tool_calls': [
+                        {
+                            'id': tool_call_id,
+                            'type': 'function',
+                            'function': {
+                                'name': function_name,
+                                'arguments': '{}',  # Empty JSON object as string
+                            },
+                        }
+                    ],
+                }
+            }
+        ],
+    }
+
+    return ToolCallMetadata(
+        tool_call_id=tool_call_id,
+        function_name=function_name,
+        model_response=model_response,
+        total_calls_in_response=total_calls_in_response,
+    )
Author	SHA1	Message	Date
Engel Nyst	2a041e70a4	add condenser as cli argument	2025-04-30 15:41:47 +02:00
openhands	0c85ef8a55	Merge remote-tracking branch 'upstream/main' into condenser_experiment	2025-04-29 08:36:26 +02:00
openhands	0e58611dc2	keep_first=4, like LLMSummaryCondenser before	2025-04-24 17:15:01 +02:00
openhands	cdbbc1e4e6	Merge branch 'upstream-main' into condenser_experiment	2025-04-24 13:03:31 +02:00
openhands	068f86b102	added config for keep_first	2025-04-24 12:58:11 +02:00
openhands	dee7305826	made the keep-user-messages mechanism configurable. also only look for real-user messages, not any event with source=USER	2025-04-24 12:58:11 +02:00
openhands	1e6e817c53	disable cache on the last message.	2025-04-24 12:57:58 +02:00
openhands	7b08b89460	Merge branch 'condenser_experiment' of https://github.com/happyherp/OpenHands into condenser_experiment	2025-04-23 16:00:27 +02:00
openhands	f7ad303842	Implement condenser improvements: keep first user message and disable cache for condensation	2025-04-23 13:20:41 +00:00
Carlos Freund	0642ddd16d	Merge branch 'main' into condenser_experiment	2025-04-23 14:53:23 +02:00
openhands	0c55c9acec	log condensation stats	2025-04-23 13:15:54 +02:00
openhands	b0fa78ed6a	Fix import order in test_llm_agent_cache_condenser.py	2025-04-23 11:04:28 +00:00
openhands	59de045b26	example for new condenser configuration via .toml	2025-04-23 12:51:02 +02:00
openhands	8497b8d6d2	renamed CACHE_PROMPT_SUPPORTED_MODELS to EXPLICIT_CACHE_MODELS and removed gpt-4o-mini. from it. also removed the check that the used llm must be one of these.	2025-04-23 12:43:09 +02:00
openhands	e2c6dfb0ab	rename Agent._get_messages to get_messages to indicate its a public method.	2025-04-23 12:28:30 +02:00
openhands	d86cb1e4cd	Update condenser files to use Python 3.12+ builtins instead of typing module classes	2025-04-23 12:28:21 +02:00
openhands	b51fe287e9	Improve test_condensation_triggered_by_user_message_in_context assertion to be more robust	2025-04-23 10:14:12 +00:00
openhands	5d42adcb42	tests for condensation trigger message in context.	2025-04-22 12:26:47 +02:00
openhands	6a1f5fd812	DROP ME: LLMAgentCacheCondenser as default	2025-04-22 11:59:26 +02:00
openhands	8cde944f1a	New condenser LLMAgentCacheCondenser that uses the agents cache.	2025-04-21 20:54:37 +02:00
openhands	5aa64e64d8	allow CondensationAction.summary_offset to be None, if summary is set.	2025-04-21 20:50:14 +02:00
openhands	6b8cd2025d	additional agent interface LLMCompletionProvider	2025-04-21 20:45:07 +02:00
openhands	797acd021e	Condenser.condense: added parameters state and agent	2025-04-21 20:32:48 +02:00