Revert "Refactor monologue to use the messages in state history"

This reverts commit 76b4b765ef.
2026-01-10 07:18:10 -05:00 · 2024-05-18 22:02:00 +02:00
parent eb6f68a61b
commit 6d1963d7ae
2 changed files with 94 additions and 112 deletions
--- a/agenthub/monologue_agent/agent.py
+++ b/agenthub/monologue_agent/agent.py
@@ -1,5 +1,4 @@
 import agenthub.monologue_agent.utils.prompts as prompts
-from agenthub.monologue_agent.utils.prompts import INITIAL_THOUGHTS
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
 from opendevin.core.config import config
@@ -26,6 +25,7 @@ from opendevin.events.observation import (
 from opendevin.events.serialization.event import event_to_memory
 from opendevin.llm.llm import LLM
 from opendevin.memory.condenser import MemoryCondenser
+from opendevin.memory.history import ShortTermHistory

 if config.agent.memory_enabled:
    from opendevin.memory.memory import LongTermMemory
@@ -33,33 +33,50 @@ if config.agent.memory_enabled:
 MAX_TOKEN_COUNT_PADDING = 512
 MAX_OUTPUT_LENGTH = 5000

-
-def truncate_output(
-    observation: dict, max_chars: int = MAX_OUTPUT_LENGTH
-) -> dict[str, str]:
-    """
-    Truncates the output of an observation to a maximum number of characters.
-
-    Parameters:
-    - output (str): The observation whose output to truncate
-    - max_chars (int): The maximum number of characters to allow
-
-    Returns:
-    - str: The truncated output
-    """
-    if (
-        'args' in observation
-        and 'output' in observation['args']
-        and len(observation['args']['output']) > max_chars
-    ):
-        output = observation['args']['output']
-        half = max_chars // 2
-        observation['args']['output'] = (
-            output[:half]
-            + '\n[... Output truncated due to length...]\n'
-            + output[-half:]
-        )
-    return observation
+INITIAL_THOUGHTS = [
+    'I exist!',
+    'Hmm...looks like I can type in a command line prompt',
+    'Looks like I have a web browser too!',
+    "Here's what I want to do: $TASK",
+    'How am I going to get there though?',
+    'It seems like I have some kind of short term memory.',
+    'Each of my thoughts seems to be stored in a JSON array.',
+    'It seems whatever I say next will be added as an object to the list.',
+    'But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.',
+    'Fortunately I have long term memory!',
+    'I can just perform a recall action, followed by the thing I want to remember. And then related thoughts just spill out!',
+    "Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
+    "Let's try it out!",
+    'RECALL what it is I want to do',
+    "Here's what I want to do: $TASK",
+    'How am I going to get there though?',
+    "Neat! And it looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
+    'RUN echo "hello world"',
+    'hello world',
+    'Cool! I bet I can write files too using the write action.',
+    'WRITE echo "console.log(\'hello world\')" > test.js',
+    '',
+    "I just created test.js. I'll try and run it now.",
+    'RUN node test.js',
+    'hello world',
+    'It works!',
+    "I'm going to try reading it now using the read action.",
+    'READ test.js',
+    "console.log('hello world')",
+    'Nice! I can read files too!',
+    'And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument',
+    "Let's try that...",
+    'BROWSE google.com',
+    '<form><input type="text"></input><button type="submit"></button></form>',
+    'I can browse the web too!',
+    'And once I have completed my task, I can use the finish action to stop working.',
+    "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
+    'Very cool. Now to accomplish my task.',
+    "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
+    'In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.',
+    "OK so my task is to $TASK. I haven't made any progress yet. Where should I start?",
+    'It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.',
+]


 class MonologueAgent(Agent):
@@ -70,7 +87,7 @@ class MonologueAgent(Agent):
    """

    _initialized = False
-    monologue: list[dict[str, str]]  # initial thoughts ONLY
+    monologue: ShortTermHistory
    memory: 'LongTermMemory | None'
    memory_condenser: MemoryCondenser

@@ -83,6 +100,44 @@ class MonologueAgent(Agent):
        """
        super().__init__(llm)

+    def _add_event(self, event_dict: dict):
+        """
+        Adds a new event to the agent's monologue and memory.
+        Monologue automatically condenses when it gets too large.
+
+        Parameters:
+        - event (dict): The event that will be added to monologue and memory
+        """
+
+        if (
+            'args' in event_dict
+            and 'output' in event_dict['args']
+            and len(event_dict['args']['output']) > MAX_OUTPUT_LENGTH
+        ):
+            event_dict['args']['output'] = (
+                event_dict['args']['output'][:MAX_OUTPUT_LENGTH] + '...'
+            )
+
+        self.monologue.add_event(event_dict)
+        if self.memory is not None:
+            self.memory.add_event(event_dict)
+
+        # Test monologue token length
+        prompt = prompts.get_request_action_prompt(
+            '',
+            self.monologue.get_events(),
+            [],
+        )
+        messages = [{'content': prompt, 'role': 'user'}]
+        token_count = self.llm.get_token_count(messages)
+
+        if token_count + MAX_TOKEN_COUNT_PADDING > self.llm.max_input_tokens:
+            prompt = prompts.get_summarize_monologue_prompt(self.monologue.events)
+            summary_response = self.memory_condenser.condense(
+                summarize_prompt=prompt, llm=self.llm
+            )
+            self.monologue.events = prompts.parse_summary_response(summary_response)
+
    def _initialize(self, task: str):
        """
        Utilizes the INITIAL_THOUGHTS list to give the agent a context for its capabilities
@@ -103,7 +158,7 @@ class MonologueAgent(Agent):
        if task is None or task == '':
            raise AgentNoInstructionError()

-        self.monologue = []
+        self.monologue = ShortTermHistory()
        if config.agent.memory_enabled:
            self.memory = LongTermMemory()
        else:
@@ -132,7 +187,7 @@ class MonologueAgent(Agent):
                    observation = BrowserOutputObservation(
                        content=thought, url='', screenshot=''
                    )
-                self.monologue.append(event_to_memory(observation))
+                self._add_event(event_to_memory(observation))
                previous_action = ''
            else:
                action: Action = NullAction()
@@ -159,7 +214,7 @@ class MonologueAgent(Agent):
                    previous_action = ActionType.BROWSE
                else:
                    action = MessageAction(thought)
-                self.monologue.append(event_to_memory(action))
+                self._add_event(event_to_memory(action))

    def step(self, state: State) -> Action:
        """
@@ -174,49 +229,21 @@ class MonologueAgent(Agent):

        goal = state.get_current_user_intent()
        self._initialize(goal)
+        for prev_action, obs in state.updated_info:
+            self._add_event(event_to_memory(prev_action))
+            self._add_event(event_to_memory(obs))
+
+        state.updated_info = []

-        # the action prompt starts with the initial thoughts
        prompt = prompts.get_request_action_prompt(
            goal,
-            self.monologue,
+            self.monologue.get_events(),
            state.background_commands_obs,
        )
-        messages: list[dict[str, str]] = [
-            {'role': 'system', 'content': prompt},
-        ]
-
-        # add the messages from state.history
-        for prev_action, obs in state.history:
-            messages.append(event_to_memory(prev_action))
-            messages.append(truncate_output(event_to_memory(obs)))
-
-        # add the last messages to long term memory
-        if self.memory is not None and state.history and len(state.history) > 0:
-            self.memory.add_event(event_to_memory(state.history[-1][0]))
-            self.memory.add_event(
-                truncate_output(event_to_memory(state.history[-1][1]))
-            )
-
-        # FIXME this has to go
-        # Test monologue token length
-        token_count = self.llm.get_token_count(messages[1:])
-        if token_count + MAX_TOKEN_COUNT_PADDING > self.llm.max_input_tokens:
-            prompt = prompts.get_summarize_monologue_prompt(messages[1:])
-            summary_response = self.memory_condenser.condense(
-                summarize_prompt=prompt, llm=self.llm
-            )
-            prompts.parse_summary_response(summary_response)
-        # end FIXME
-
-        # request the next action from the LLM
+        messages = [{'content': prompt, 'role': 'user'}]
        resp = self.llm.completion(messages=messages)
-
-        # get the next action from the response
        action_resp = resp['choices'][0]['message']['content']
-
-        # keep track of a fallback option to stop the prompting when it gets too large
        state.num_of_chars += len(prompt) + len(action_resp)
-
        action = prompts.parse_action_response(action_resp)
        self.latest_action = action
        return action
--- a/agenthub/monologue_agent/utils/prompts.py
+++ b/agenthub/monologue_agent/utils/prompts.py
@@ -92,51 +92,6 @@ The action key may be `summarize`, and `args.summary` should contain the summary
 You can also use the same action and args from the source monologue.
 """

-INITIAL_THOUGHTS = [
-    'I exist!',
-    'Hmm...looks like I can type in a command line prompt',
-    'Looks like I have a web browser too!',
-    "Here's what I want to do: $TASK",
-    'How am I going to get there though?',
-    'It seems like I have some kind of short term memory.',
-    'Each of my thoughts seems to be stored in a JSON array.',
-    'It seems whatever I say next will be added as an object to the list.',
-    'But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.',
-    'Fortunately I have long term memory!',
-    'I can just perform a recall action, followed by the thing I want to remember. And then related thoughts just spill out!',
-    "Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
-    "Let's try it out!",
-    'RECALL what it is I want to do',
-    "Here's what I want to do: $TASK",
-    'How am I going to get there though?',
-    "Neat! And it looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
-    'RUN echo "hello world"',
-    'hello world',
-    'Cool! I bet I can write files too using the write action.',
-    'WRITE echo "console.log(\'hello world\')" > test.js',
-    '',
-    "I just created test.js. I'll try and run it now.",
-    'RUN node test.js',
-    'hello world',
-    'It works!',
-    "I'm going to try reading it now using the read action.",
-    'READ test.js',
-    "console.log('hello world')",
-    'Nice! I can read files too!',
-    'And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument',
-    "Let's try that...",
-    'BROWSE google.com',
-    '<form><input type="text"></input><button type="submit"></button></form>',
-    'I can browse the web too!',
-    'And once I have completed my task, I can use the finish action to stop working.',
-    "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
-    'Very cool. Now to accomplish my task.',
-    "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
-    'In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.',
-    "OK so my task is to $TASK. I haven't made any progress yet. Where should I start?",
-    'It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.',
-]
-

 def get_summarize_monologue_prompt(thoughts: list[dict]):
    """