mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-01-09 14:57:59 -05:00
Refactor monologue to use the messages in state history
remove now unused method
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import agenthub.monologue_agent.utils.prompts as prompts
|
||||
from agenthub.monologue_agent.utils.prompts import INITIAL_THOUGHTS
|
||||
from opendevin.controller.agent import Agent
|
||||
from opendevin.controller.state.state import State
|
||||
from opendevin.core.config import config
|
||||
@@ -25,7 +26,6 @@ from opendevin.events.observation import (
|
||||
from opendevin.events.serialization.event import event_to_memory
|
||||
from opendevin.llm.llm import LLM
|
||||
from opendevin.memory.condenser import MemoryCondenser
|
||||
from opendevin.memory.history import ShortTermHistory
|
||||
|
||||
if config.agent.memory_enabled:
|
||||
from opendevin.memory.memory import LongTermMemory
|
||||
@@ -33,50 +33,33 @@ if config.agent.memory_enabled:
|
||||
MAX_TOKEN_COUNT_PADDING = 512
|
||||
MAX_OUTPUT_LENGTH = 5000
|
||||
|
||||
INITIAL_THOUGHTS = [
|
||||
'I exist!',
|
||||
'Hmm...looks like I can type in a command line prompt',
|
||||
'Looks like I have a web browser too!',
|
||||
"Here's what I want to do: $TASK",
|
||||
'How am I going to get there though?',
|
||||
'It seems like I have some kind of short term memory.',
|
||||
'Each of my thoughts seems to be stored in a JSON array.',
|
||||
'It seems whatever I say next will be added as an object to the list.',
|
||||
'But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.',
|
||||
'Fortunately I have long term memory!',
|
||||
'I can just perform a recall action, followed by the thing I want to remember. And then related thoughts just spill out!',
|
||||
"Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
|
||||
"Let's try it out!",
|
||||
'RECALL what it is I want to do',
|
||||
"Here's what I want to do: $TASK",
|
||||
'How am I going to get there though?',
|
||||
"Neat! And it looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
|
||||
'RUN echo "hello world"',
|
||||
'hello world',
|
||||
'Cool! I bet I can write files too using the write action.',
|
||||
'WRITE echo "console.log(\'hello world\')" > test.js',
|
||||
'',
|
||||
"I just created test.js. I'll try and run it now.",
|
||||
'RUN node test.js',
|
||||
'hello world',
|
||||
'It works!',
|
||||
"I'm going to try reading it now using the read action.",
|
||||
'READ test.js',
|
||||
"console.log('hello world')",
|
||||
'Nice! I can read files too!',
|
||||
'And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument',
|
||||
"Let's try that...",
|
||||
'BROWSE google.com',
|
||||
'<form><input type="text"></input><button type="submit"></button></form>',
|
||||
'I can browse the web too!',
|
||||
'And once I have completed my task, I can use the finish action to stop working.',
|
||||
"But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
|
||||
'Very cool. Now to accomplish my task.',
|
||||
"I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
|
||||
'In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.',
|
||||
"OK so my task is to $TASK. I haven't made any progress yet. Where should I start?",
|
||||
'It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.',
|
||||
]
|
||||
|
||||
def truncate_output(
|
||||
observation: dict, max_chars: int = MAX_OUTPUT_LENGTH
|
||||
) -> dict[str, str]:
|
||||
"""
|
||||
Truncates the output of an observation to a maximum number of characters.
|
||||
|
||||
Parameters:
|
||||
- output (str): The observation whose output to truncate
|
||||
- max_chars (int): The maximum number of characters to allow
|
||||
|
||||
Returns:
|
||||
- str: The truncated output
|
||||
"""
|
||||
if (
|
||||
'args' in observation
|
||||
and 'output' in observation['args']
|
||||
and len(observation['args']['output']) > max_chars
|
||||
):
|
||||
output = observation['args']['output']
|
||||
half = max_chars // 2
|
||||
observation['args']['output'] = (
|
||||
output[:half]
|
||||
+ '\n[... Output truncated due to length...]\n'
|
||||
+ output[-half:]
|
||||
)
|
||||
return observation
|
||||
|
||||
|
||||
class MonologueAgent(Agent):
|
||||
@@ -87,7 +70,7 @@ class MonologueAgent(Agent):
|
||||
"""
|
||||
|
||||
_initialized = False
|
||||
monologue: ShortTermHistory
|
||||
monologue: list[dict[str, str]] # initial thoughts ONLY
|
||||
memory: 'LongTermMemory | None'
|
||||
memory_condenser: MemoryCondenser
|
||||
|
||||
@@ -100,44 +83,6 @@ class MonologueAgent(Agent):
|
||||
"""
|
||||
super().__init__(llm)
|
||||
|
||||
def _add_event(self, event_dict: dict):
|
||||
"""
|
||||
Adds a new event to the agent's monologue and memory.
|
||||
Monologue automatically condenses when it gets too large.
|
||||
|
||||
Parameters:
|
||||
- event (dict): The event that will be added to monologue and memory
|
||||
"""
|
||||
|
||||
if (
|
||||
'args' in event_dict
|
||||
and 'output' in event_dict['args']
|
||||
and len(event_dict['args']['output']) > MAX_OUTPUT_LENGTH
|
||||
):
|
||||
event_dict['args']['output'] = (
|
||||
event_dict['args']['output'][:MAX_OUTPUT_LENGTH] + '...'
|
||||
)
|
||||
|
||||
self.monologue.add_event(event_dict)
|
||||
if self.memory is not None:
|
||||
self.memory.add_event(event_dict)
|
||||
|
||||
# Test monologue token length
|
||||
prompt = prompts.get_request_action_prompt(
|
||||
'',
|
||||
self.monologue.get_events(),
|
||||
[],
|
||||
)
|
||||
messages = [{'content': prompt, 'role': 'user'}]
|
||||
token_count = self.llm.get_token_count(messages)
|
||||
|
||||
if token_count + MAX_TOKEN_COUNT_PADDING > self.llm.max_input_tokens:
|
||||
prompt = prompts.get_summarize_monologue_prompt(self.monologue.events)
|
||||
summary_response = self.memory_condenser.condense(
|
||||
summarize_prompt=prompt, llm=self.llm
|
||||
)
|
||||
self.monologue.events = prompts.parse_summary_response(summary_response)
|
||||
|
||||
def _initialize(self, task: str):
|
||||
"""
|
||||
Utilizes the INITIAL_THOUGHTS list to give the agent a context for its capabilities
|
||||
@@ -158,7 +103,7 @@ class MonologueAgent(Agent):
|
||||
if task is None or task == '':
|
||||
raise AgentNoInstructionError()
|
||||
|
||||
self.monologue = ShortTermHistory()
|
||||
self.monologue = []
|
||||
if config.agent.memory_enabled:
|
||||
self.memory = LongTermMemory()
|
||||
else:
|
||||
@@ -187,7 +132,7 @@ class MonologueAgent(Agent):
|
||||
observation = BrowserOutputObservation(
|
||||
content=thought, url='', screenshot=''
|
||||
)
|
||||
self._add_event(event_to_memory(observation))
|
||||
self.monologue.append(event_to_memory(observation))
|
||||
previous_action = ''
|
||||
else:
|
||||
action: Action = NullAction()
|
||||
@@ -214,7 +159,7 @@ class MonologueAgent(Agent):
|
||||
previous_action = ActionType.BROWSE
|
||||
else:
|
||||
action = MessageAction(thought)
|
||||
self._add_event(event_to_memory(action))
|
||||
self.monologue.append(event_to_memory(action))
|
||||
|
||||
def step(self, state: State) -> Action:
|
||||
"""
|
||||
@@ -229,21 +174,49 @@ class MonologueAgent(Agent):
|
||||
|
||||
goal = state.get_current_user_intent()
|
||||
self._initialize(goal)
|
||||
for prev_action, obs in state.updated_info:
|
||||
self._add_event(event_to_memory(prev_action))
|
||||
self._add_event(event_to_memory(obs))
|
||||
|
||||
state.updated_info = []
|
||||
|
||||
# the action prompt starts with the initial thoughts
|
||||
prompt = prompts.get_request_action_prompt(
|
||||
goal,
|
||||
self.monologue.get_events(),
|
||||
self.monologue,
|
||||
state.background_commands_obs,
|
||||
)
|
||||
messages = [{'content': prompt, 'role': 'user'}]
|
||||
messages: list[dict[str, str]] = [
|
||||
{'role': 'system', 'content': prompt},
|
||||
]
|
||||
|
||||
# add the messages from state.history
|
||||
for prev_action, obs in state.history:
|
||||
messages.append(event_to_memory(prev_action))
|
||||
messages.append(truncate_output(event_to_memory(obs)))
|
||||
|
||||
# add the last messages to long term memory
|
||||
if self.memory is not None and state.history and len(state.history) > 0:
|
||||
self.memory.add_event(event_to_memory(state.history[-1][0]))
|
||||
self.memory.add_event(
|
||||
truncate_output(event_to_memory(state.history[-1][1]))
|
||||
)
|
||||
|
||||
# FIXME this has to go
|
||||
# Test monologue token length
|
||||
token_count = self.llm.get_token_count(messages[1:])
|
||||
if token_count + MAX_TOKEN_COUNT_PADDING > self.llm.max_input_tokens:
|
||||
prompt = prompts.get_summarize_monologue_prompt(messages[1:])
|
||||
summary_response = self.memory_condenser.condense(
|
||||
summarize_prompt=prompt, llm=self.llm
|
||||
)
|
||||
prompts.parse_summary_response(summary_response)
|
||||
# end FIXME
|
||||
|
||||
# request the next action from the LLM
|
||||
resp = self.llm.completion(messages=messages)
|
||||
|
||||
# get the next action from the response
|
||||
action_resp = resp['choices'][0]['message']['content']
|
||||
|
||||
# keep track of a fallback option to stop the prompting when it gets too large
|
||||
state.num_of_chars += len(prompt) + len(action_resp)
|
||||
|
||||
action = prompts.parse_action_response(action_resp)
|
||||
self.latest_action = action
|
||||
return action
|
||||
|
||||
@@ -92,6 +92,51 @@ The action key may be `summarize`, and `args.summary` should contain the summary
|
||||
You can also use the same action and args from the source monologue.
|
||||
"""
|
||||
|
||||
INITIAL_THOUGHTS = [
|
||||
'I exist!',
|
||||
'Hmm...looks like I can type in a command line prompt',
|
||||
'Looks like I have a web browser too!',
|
||||
"Here's what I want to do: $TASK",
|
||||
'How am I going to get there though?',
|
||||
'It seems like I have some kind of short term memory.',
|
||||
'Each of my thoughts seems to be stored in a JSON array.',
|
||||
'It seems whatever I say next will be added as an object to the list.',
|
||||
'But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.',
|
||||
'Fortunately I have long term memory!',
|
||||
'I can just perform a recall action, followed by the thing I want to remember. And then related thoughts just spill out!',
|
||||
"Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
|
||||
"Let's try it out!",
|
||||
'RECALL what it is I want to do',
|
||||
"Here's what I want to do: $TASK",
|
||||
'How am I going to get there though?',
|
||||
"Neat! And it looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
|
||||
'RUN echo "hello world"',
|
||||
'hello world',
|
||||
'Cool! I bet I can write files too using the write action.',
|
||||
'WRITE echo "console.log(\'hello world\')" > test.js',
|
||||
'',
|
||||
"I just created test.js. I'll try and run it now.",
|
||||
'RUN node test.js',
|
||||
'hello world',
|
||||
'It works!',
|
||||
"I'm going to try reading it now using the read action.",
|
||||
'READ test.js',
|
||||
"console.log('hello world')",
|
||||
'Nice! I can read files too!',
|
||||
'And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument',
|
||||
"Let's try that...",
|
||||
'BROWSE google.com',
|
||||
'<form><input type="text"></input><button type="submit"></button></form>',
|
||||
'I can browse the web too!',
|
||||
'And once I have completed my task, I can use the finish action to stop working.',
|
||||
"But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
|
||||
'Very cool. Now to accomplish my task.',
|
||||
"I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
|
||||
'In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.',
|
||||
"OK so my task is to $TASK. I haven't made any progress yet. Where should I start?",
|
||||
'It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.',
|
||||
]
|
||||
|
||||
|
||||
def get_summarize_monologue_prompt(thoughts: list[dict]):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user