Revert "Refactor monologue to use the messages in state history"

This reverts commit 76b4b765ef.
This commit is contained in:
Engel Nyst
2024-05-18 22:02:00 +02:00
parent eb6f68a61b
commit 6d1963d7ae
2 changed files with 94 additions and 112 deletions

View File

@@ -1,5 +1,4 @@
import agenthub.monologue_agent.utils.prompts as prompts
from agenthub.monologue_agent.utils.prompts import INITIAL_THOUGHTS
from opendevin.controller.agent import Agent
from opendevin.controller.state.state import State
from opendevin.core.config import config
@@ -26,6 +25,7 @@ from opendevin.events.observation import (
from opendevin.events.serialization.event import event_to_memory
from opendevin.llm.llm import LLM
from opendevin.memory.condenser import MemoryCondenser
from opendevin.memory.history import ShortTermHistory
if config.agent.memory_enabled:
from opendevin.memory.memory import LongTermMemory
@@ -33,33 +33,50 @@ if config.agent.memory_enabled:
MAX_TOKEN_COUNT_PADDING = 512
MAX_OUTPUT_LENGTH = 5000
def truncate_output(
observation: dict, max_chars: int = MAX_OUTPUT_LENGTH
) -> dict[str, str]:
"""
Truncates the output of an observation to a maximum number of characters.
Parameters:
- output (str): The observation whose output to truncate
- max_chars (int): The maximum number of characters to allow
Returns:
- str: The truncated output
"""
if (
'args' in observation
and 'output' in observation['args']
and len(observation['args']['output']) > max_chars
):
output = observation['args']['output']
half = max_chars // 2
observation['args']['output'] = (
output[:half]
+ '\n[... Output truncated due to length...]\n'
+ output[-half:]
)
return observation
INITIAL_THOUGHTS = [
'I exist!',
'Hmm...looks like I can type in a command line prompt',
'Looks like I have a web browser too!',
"Here's what I want to do: $TASK",
'How am I going to get there though?',
'It seems like I have some kind of short term memory.',
'Each of my thoughts seems to be stored in a JSON array.',
'It seems whatever I say next will be added as an object to the list.',
'But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.',
'Fortunately I have long term memory!',
'I can just perform a recall action, followed by the thing I want to remember. And then related thoughts just spill out!',
"Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
"Let's try it out!",
'RECALL what it is I want to do',
"Here's what I want to do: $TASK",
'How am I going to get there though?',
"Neat! And it looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
'RUN echo "hello world"',
'hello world',
'Cool! I bet I can write files too using the write action.',
'WRITE echo "console.log(\'hello world\')" > test.js',
'',
"I just created test.js. I'll try and run it now.",
'RUN node test.js',
'hello world',
'It works!',
"I'm going to try reading it now using the read action.",
'READ test.js',
"console.log('hello world')",
'Nice! I can read files too!',
'And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument',
"Let's try that...",
'BROWSE google.com',
'<form><input type="text"></input><button type="submit"></button></form>',
'I can browse the web too!',
'And once I have completed my task, I can use the finish action to stop working.',
"But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
'Very cool. Now to accomplish my task.',
"I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
'In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.',
"OK so my task is to $TASK. I haven't made any progress yet. Where should I start?",
'It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.',
]
class MonologueAgent(Agent):
@@ -70,7 +87,7 @@ class MonologueAgent(Agent):
"""
_initialized = False
monologue: list[dict[str, str]] # initial thoughts ONLY
monologue: ShortTermHistory
memory: 'LongTermMemory | None'
memory_condenser: MemoryCondenser
@@ -83,6 +100,44 @@ class MonologueAgent(Agent):
"""
super().__init__(llm)
def _add_event(self, event_dict: dict):
"""
Adds a new event to the agent's monologue and memory.
Monologue automatically condenses when it gets too large.
Parameters:
- event (dict): The event that will be added to monologue and memory
"""
if (
'args' in event_dict
and 'output' in event_dict['args']
and len(event_dict['args']['output']) > MAX_OUTPUT_LENGTH
):
event_dict['args']['output'] = (
event_dict['args']['output'][:MAX_OUTPUT_LENGTH] + '...'
)
self.monologue.add_event(event_dict)
if self.memory is not None:
self.memory.add_event(event_dict)
# Test monologue token length
prompt = prompts.get_request_action_prompt(
'',
self.monologue.get_events(),
[],
)
messages = [{'content': prompt, 'role': 'user'}]
token_count = self.llm.get_token_count(messages)
if token_count + MAX_TOKEN_COUNT_PADDING > self.llm.max_input_tokens:
prompt = prompts.get_summarize_monologue_prompt(self.monologue.events)
summary_response = self.memory_condenser.condense(
summarize_prompt=prompt, llm=self.llm
)
self.monologue.events = prompts.parse_summary_response(summary_response)
def _initialize(self, task: str):
"""
Utilizes the INITIAL_THOUGHTS list to give the agent a context for its capabilities
@@ -103,7 +158,7 @@ class MonologueAgent(Agent):
if task is None or task == '':
raise AgentNoInstructionError()
self.monologue = []
self.monologue = ShortTermHistory()
if config.agent.memory_enabled:
self.memory = LongTermMemory()
else:
@@ -132,7 +187,7 @@ class MonologueAgent(Agent):
observation = BrowserOutputObservation(
content=thought, url='', screenshot=''
)
self.monologue.append(event_to_memory(observation))
self._add_event(event_to_memory(observation))
previous_action = ''
else:
action: Action = NullAction()
@@ -159,7 +214,7 @@ class MonologueAgent(Agent):
previous_action = ActionType.BROWSE
else:
action = MessageAction(thought)
self.monologue.append(event_to_memory(action))
self._add_event(event_to_memory(action))
def step(self, state: State) -> Action:
"""
@@ -174,49 +229,21 @@ class MonologueAgent(Agent):
goal = state.get_current_user_intent()
self._initialize(goal)
for prev_action, obs in state.updated_info:
self._add_event(event_to_memory(prev_action))
self._add_event(event_to_memory(obs))
state.updated_info = []
# the action prompt starts with the initial thoughts
prompt = prompts.get_request_action_prompt(
goal,
self.monologue,
self.monologue.get_events(),
state.background_commands_obs,
)
messages: list[dict[str, str]] = [
{'role': 'system', 'content': prompt},
]
# add the messages from state.history
for prev_action, obs in state.history:
messages.append(event_to_memory(prev_action))
messages.append(truncate_output(event_to_memory(obs)))
# add the last messages to long term memory
if self.memory is not None and state.history and len(state.history) > 0:
self.memory.add_event(event_to_memory(state.history[-1][0]))
self.memory.add_event(
truncate_output(event_to_memory(state.history[-1][1]))
)
# FIXME this has to go
# Test monologue token length
token_count = self.llm.get_token_count(messages[1:])
if token_count + MAX_TOKEN_COUNT_PADDING > self.llm.max_input_tokens:
prompt = prompts.get_summarize_monologue_prompt(messages[1:])
summary_response = self.memory_condenser.condense(
summarize_prompt=prompt, llm=self.llm
)
prompts.parse_summary_response(summary_response)
# end FIXME
# request the next action from the LLM
messages = [{'content': prompt, 'role': 'user'}]
resp = self.llm.completion(messages=messages)
# get the next action from the response
action_resp = resp['choices'][0]['message']['content']
# keep track of a fallback option to stop the prompting when it gets too large
state.num_of_chars += len(prompt) + len(action_resp)
action = prompts.parse_action_response(action_resp)
self.latest_action = action
return action

View File

@@ -92,51 +92,6 @@ The action key may be `summarize`, and `args.summary` should contain the summary
You can also use the same action and args from the source monologue.
"""
INITIAL_THOUGHTS = [
'I exist!',
'Hmm...looks like I can type in a command line prompt',
'Looks like I have a web browser too!',
"Here's what I want to do: $TASK",
'How am I going to get there though?',
'It seems like I have some kind of short term memory.',
'Each of my thoughts seems to be stored in a JSON array.',
'It seems whatever I say next will be added as an object to the list.',
'But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.',
'Fortunately I have long term memory!',
'I can just perform a recall action, followed by the thing I want to remember. And then related thoughts just spill out!',
"Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
"Let's try it out!",
'RECALL what it is I want to do',
"Here's what I want to do: $TASK",
'How am I going to get there though?',
"Neat! And it looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
'RUN echo "hello world"',
'hello world',
'Cool! I bet I can write files too using the write action.',
'WRITE echo "console.log(\'hello world\')" > test.js',
'',
"I just created test.js. I'll try and run it now.",
'RUN node test.js',
'hello world',
'It works!',
"I'm going to try reading it now using the read action.",
'READ test.js',
"console.log('hello world')",
'Nice! I can read files too!',
'And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument',
"Let's try that...",
'BROWSE google.com',
'<form><input type="text"></input><button type="submit"></button></form>',
'I can browse the web too!',
'And once I have completed my task, I can use the finish action to stop working.',
"But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
'Very cool. Now to accomplish my task.',
"I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
'In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.',
"OK so my task is to $TASK. I haven't made any progress yet. Where should I start?",
'It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.',
]
def get_summarize_monologue_prompt(thoughts: list[dict]):
"""