mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-01-07 22:14:03 -05:00
@@ -14,12 +14,10 @@ from . import ( # noqa: E402
|
||||
codeact_swe_agent,
|
||||
delegator_agent,
|
||||
dummy_agent,
|
||||
monologue_agent,
|
||||
planner_agent,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'monologue_agent',
|
||||
'codeact_agent',
|
||||
'codeact_swe_agent',
|
||||
'planner_agent',
|
||||
|
||||
@@ -1,2 +0,0 @@
|
||||
.envrc
|
||||
workspace
|
||||
@@ -1,8 +0,0 @@
|
||||
# LLM control loop
|
||||
This is currently a standalone utility. It will need to be integrated into OpenDevin's backend.
|
||||
|
||||
## Usage
|
||||
```bash
|
||||
# Run this in project root
|
||||
./agenthub/monologue_agent/build-and-run.sh "write a bash script that prints 'hello world'"
|
||||
```
|
||||
@@ -1,8 +0,0 @@
|
||||
# TODO
|
||||
There's a lot of low-hanging fruit for this agent:
|
||||
|
||||
* Strip `<script>`, `<style>`, and other non-text tags from the HTML before sending it to the LLM
|
||||
* Keep track of the working directory when the agent uses `cd`
|
||||
* Improve memory condensing--condense earlier memories more aggressively
|
||||
* Limit the time that `run` can wait (in case agent runs an interactive command and it's hanging)
|
||||
* Figure out how to run background processes, e.g. `node server.js` to start a server
|
||||
@@ -1,5 +0,0 @@
|
||||
from opendevin.controller.agent import Agent
|
||||
|
||||
from .agent import MonologueAgent
|
||||
|
||||
Agent.register('MonologueAgent', MonologueAgent)
|
||||
@@ -1,187 +0,0 @@
|
||||
import agenthub.monologue_agent.utils.prompts as prompts
|
||||
from agenthub.monologue_agent.response_parser import MonologueResponseParser
|
||||
from agenthub.monologue_agent.utils.prompts import INITIAL_THOUGHTS
|
||||
from opendevin.controller.agent import Agent
|
||||
from opendevin.controller.state.state import State
|
||||
from opendevin.core.config import config
|
||||
from opendevin.core.exceptions import AgentNoInstructionError
|
||||
from opendevin.core.schema import ActionType
|
||||
from opendevin.events.action import (
|
||||
Action,
|
||||
BrowseURLAction,
|
||||
CmdRunAction,
|
||||
FileReadAction,
|
||||
FileWriteAction,
|
||||
MessageAction,
|
||||
NullAction,
|
||||
)
|
||||
from opendevin.events.observation import (
|
||||
BrowserOutputObservation,
|
||||
CmdOutputObservation,
|
||||
FileReadObservation,
|
||||
NullObservation,
|
||||
Observation,
|
||||
)
|
||||
from opendevin.events.serialization.event import event_to_memory
|
||||
from opendevin.llm.llm import LLM
|
||||
from opendevin.memory.condenser import MemoryCondenser
|
||||
from opendevin.runtime.tools import RuntimeTool
|
||||
|
||||
if config.get_agent_config('MonologueAgent').memory_enabled:
|
||||
from opendevin.memory.memory import LongTermMemory
|
||||
|
||||
|
||||
class MonologueAgent(Agent):
|
||||
VERSION = '1.0'
|
||||
"""
|
||||
The Monologue Agent utilizes long and short term memory to complete tasks.
|
||||
Long term memory is stored as a LongTermMemory object and the model uses it to search for examples from the past.
|
||||
Short term memory is stored as a Monologue object and the model can condense it as necessary.
|
||||
"""
|
||||
|
||||
_initialized = False
|
||||
initial_thoughts: list[dict[str, str]]
|
||||
memory: 'LongTermMemory | None'
|
||||
memory_condenser: MemoryCondenser
|
||||
runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER]
|
||||
response_parser = MonologueResponseParser()
|
||||
|
||||
def __init__(self, llm: LLM):
|
||||
"""Initializes the Monologue Agent with an llm.
|
||||
|
||||
Parameters:
|
||||
- llm (LLM): The llm to be used by this agent
|
||||
"""
|
||||
super().__init__(llm)
|
||||
|
||||
def _initialize(self, task: str):
|
||||
"""Utilizes the INITIAL_THOUGHTS list to give the agent a context for its capabilities
|
||||
and how to navigate the WORKSPACE_MOUNT_PATH_IN_SANDBOX in `config` (e.g., /workspace by default).
|
||||
Short circuited to return when already initialized.
|
||||
Will execute again when called after reset.
|
||||
|
||||
Parameters:
|
||||
- task: The initial goal statement provided by the user
|
||||
|
||||
Raises:
|
||||
- AgentNoInstructionError: If task is not provided
|
||||
"""
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
if task is None or task == '':
|
||||
raise AgentNoInstructionError()
|
||||
|
||||
self.initial_thoughts = []
|
||||
if config.get_agent_config('MonologueAgent').memory_enabled:
|
||||
self.memory = LongTermMemory()
|
||||
else:
|
||||
self.memory = None
|
||||
|
||||
self.memory_condenser = MemoryCondenser()
|
||||
|
||||
self._add_initial_thoughts(task)
|
||||
self._initialized = True
|
||||
|
||||
def _add_initial_thoughts(self, task: str):
|
||||
previous_action = ''
|
||||
for thought in INITIAL_THOUGHTS:
|
||||
thought = thought.replace('$TASK', task)
|
||||
if previous_action != '':
|
||||
observation: Observation = NullObservation(content='')
|
||||
if previous_action in {ActionType.RUN, ActionType.PUSH}:
|
||||
observation = CmdOutputObservation(
|
||||
content=thought, command_id=0, command=''
|
||||
)
|
||||
elif previous_action == ActionType.READ:
|
||||
observation = FileReadObservation(content=thought, path='')
|
||||
elif previous_action == ActionType.BROWSE:
|
||||
observation = BrowserOutputObservation(
|
||||
content=thought, url='', screenshot=''
|
||||
)
|
||||
self.initial_thoughts.append(
|
||||
event_to_memory(observation, self.llm.config.max_message_chars)
|
||||
)
|
||||
previous_action = ''
|
||||
else:
|
||||
action: Action = NullAction()
|
||||
if thought.startswith('RUN'):
|
||||
command = thought.split('RUN ')[1]
|
||||
action = CmdRunAction(command)
|
||||
previous_action = ActionType.RUN
|
||||
elif thought.startswith('WRITE'):
|
||||
parts = thought.split('WRITE ')[1].split(' > ')
|
||||
path = parts[1]
|
||||
content = parts[0]
|
||||
action = FileWriteAction(path=path, content=content)
|
||||
elif thought.startswith('READ'):
|
||||
path = thought.split('READ ')[1]
|
||||
action = FileReadAction(path=path)
|
||||
previous_action = ActionType.READ
|
||||
elif thought.startswith('BROWSE'):
|
||||
url = thought.split('BROWSE ')[1]
|
||||
action = BrowseURLAction(url=url)
|
||||
previous_action = ActionType.BROWSE
|
||||
else:
|
||||
action = MessageAction(thought)
|
||||
self.initial_thoughts.append(
|
||||
event_to_memory(action, self.llm.config.max_message_chars)
|
||||
)
|
||||
|
||||
def step(self, state: State) -> Action:
|
||||
"""Modifies the current state by adding the most recent actions and observations, then prompts the model to think about it's next action to take using monologue, memory, and hint.
|
||||
|
||||
Parameters:
|
||||
- state (State): The current state based on previous steps taken
|
||||
|
||||
Returns:
|
||||
- Action: The next action to take based on LLM response
|
||||
"""
|
||||
goal = state.get_current_user_intent()
|
||||
self._initialize(goal)
|
||||
|
||||
recent_events: list[dict[str, str]] = []
|
||||
|
||||
# add the events from state.history
|
||||
for event in state.history.get_events():
|
||||
recent_events.append(
|
||||
event_to_memory(event, self.llm.config.max_message_chars)
|
||||
)
|
||||
|
||||
# add the last messages to long term memory
|
||||
if self.memory is not None:
|
||||
last_action = state.history.get_last_action()
|
||||
last_observation = state.history.get_last_observation()
|
||||
|
||||
# this should still work
|
||||
# we will need to do this differently: find out if there really is an action or an observation in this step
|
||||
if last_action:
|
||||
self.memory.add_event(
|
||||
event_to_memory(last_action, self.llm.config.max_message_chars)
|
||||
)
|
||||
if last_observation:
|
||||
self.memory.add_event(
|
||||
event_to_memory(last_observation, self.llm.config.max_message_chars)
|
||||
)
|
||||
|
||||
# the action prompt with initial thoughts and recent events
|
||||
prompt = prompts.get_request_action_prompt(
|
||||
goal, self.initial_thoughts, recent_events
|
||||
)
|
||||
|
||||
messages: list[dict[str, str]] = [
|
||||
{'role': 'user', 'content': prompt},
|
||||
]
|
||||
|
||||
# format all as a single message, a monologue
|
||||
resp = self.llm.completion(messages=messages)
|
||||
|
||||
action = self.response_parser.parse(resp)
|
||||
self.latest_action = action
|
||||
return action
|
||||
|
||||
def reset(self) -> None:
|
||||
super().reset()
|
||||
|
||||
# Reset the initial monologue and memory
|
||||
self._initialized = False
|
||||
@@ -1,207 +0,0 @@
|
||||
from opendevin.core.config import config
|
||||
from opendevin.core.utils import json
|
||||
from opendevin.events.action import (
|
||||
Action,
|
||||
)
|
||||
from opendevin.events.serialization.action import action_from_dict
|
||||
|
||||
ACTION_PROMPT = """
|
||||
You're a thoughtful robot. Your main task is this:
|
||||
%(task)s
|
||||
|
||||
Don't expand the scope of your task--just complete it as written.
|
||||
|
||||
This is your internal monologue, in JSON format:
|
||||
|
||||
%(monologue)s
|
||||
|
||||
Your most recent thought is at the bottom of that monologue. Continue your train of thought.
|
||||
What is your next single thought or action? Your response must be in JSON format.
|
||||
It must be a single object, and it must contain two fields:
|
||||
* `action`, which is one of the actions below
|
||||
* `args`, which is a map of key-value pairs, specifying the arguments for that action
|
||||
|
||||
Here are the possible actions:
|
||||
* `read` - reads the content of a file. Arguments:
|
||||
* `path` - the path of the file to read
|
||||
* `write` - writes the content to a file. Arguments:
|
||||
* `path` - the path of the file to write
|
||||
* `content` - the content to write to the file
|
||||
* `run` - runs a command. Arguments:
|
||||
* `command` - the command to run
|
||||
* `browse` - opens a web page. Arguments:
|
||||
* `url` - the URL to open
|
||||
* `push` - Push a branch from the current repo to github:
|
||||
* `owner` - the owner of the repo to push to
|
||||
* `repo` - the name of the repo to push to
|
||||
* `branch` - the name of the branch to push
|
||||
* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
|
||||
* `content` - the message to record
|
||||
* `wait_for_response` - set to `true` to wait for the user to respond before proceeding
|
||||
* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
|
||||
|
||||
You MUST take time to think in between read, write, run, browse, and push actions--do this with the `message` action.
|
||||
You should never act twice in a row without thinking. But if your last several
|
||||
actions are all `message` actions, you should consider taking a different action.
|
||||
|
||||
Notes:
|
||||
* you are logged in as %(user)s, but sudo will always work without a password.
|
||||
* all non-background commands will be forcibly stopped if they remain running for over %(timeout)s seconds.
|
||||
* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
|
||||
* don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
|
||||
* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
|
||||
* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
|
||||
* whenever an action fails, always send a `message` about why it may have happened before acting again.
|
||||
|
||||
What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
|
||||
|
||||
%(hint)s
|
||||
"""
|
||||
|
||||
MONOLOGUE_SUMMARY_PROMPT = """
|
||||
Below is the internal monologue of an automated LLM agent. Each
|
||||
thought is an item in a JSON array. The thoughts may be memories,
|
||||
actions taken by the agent, or outputs from those actions.
|
||||
Please return a new, smaller JSON array, which summarizes the
|
||||
internal monologue. You can summarize individual thoughts, and
|
||||
you can condense related thoughts together with a description
|
||||
of their content.
|
||||
|
||||
%(monologue)s
|
||||
|
||||
Make the summaries as pithy and informative as possible.
|
||||
Be specific about what happened and what was learned. The summary
|
||||
will be used as keywords for searching for the original memory.
|
||||
Be sure to preserve any key words or important information.
|
||||
|
||||
Your response must be in JSON format. It must be an object with the
|
||||
key `new_monologue`, which is a JSON array containing the summarized monologue.
|
||||
Each entry in the array must have an `action` key, and an `args` key.
|
||||
The action key may be `summarize`, and `args.summary` should contain the summary.
|
||||
You can also use the same action and args from the source monologue.
|
||||
"""
|
||||
|
||||
INITIAL_THOUGHTS = [
|
||||
'I exist!',
|
||||
'Hmm...looks like I can type in a command line prompt',
|
||||
'Looks like I have a web browser too!',
|
||||
"Here's what I want to do: $TASK",
|
||||
'How am I going to get there though?',
|
||||
'It seems like I have some kind of short term memory.',
|
||||
'Each of my thoughts seems to be stored in a JSON array.',
|
||||
'It seems whatever I say next will be added as an object to the list.',
|
||||
"It looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
|
||||
'RUN echo "hello world"',
|
||||
'hello world',
|
||||
'Cool! I bet I can write files too using the write action.',
|
||||
'WRITE echo "console.log(\'hello world\')" > test.js',
|
||||
'',
|
||||
"I just created test.js. I'll try and run it now.",
|
||||
'RUN node test.js',
|
||||
'hello world',
|
||||
'It works!',
|
||||
"I'm going to try reading it now using the read action.",
|
||||
'READ test.js',
|
||||
"console.log('hello world')",
|
||||
'Nice! I can read files too!',
|
||||
'And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument',
|
||||
"Let's try that...",
|
||||
'BROWSE google.com',
|
||||
'<form><input type="text"></input><button type="submit"></button></form>',
|
||||
'I can browse the web too!',
|
||||
'And once I have completed my task, I can use the finish action to stop working.',
|
||||
"But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
|
||||
'Very cool. Now to accomplish my task.',
|
||||
"I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
|
||||
'In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.',
|
||||
"OK so my task is to $TASK. I haven't made any progress yet. Where should I start?",
|
||||
'It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.',
|
||||
]
|
||||
|
||||
|
||||
def get_summarize_monologue_prompt(thoughts: list[dict]):
|
||||
"""Gets the prompt for summarizing the monologue
|
||||
|
||||
Returns:
|
||||
- str: A formatted string with the current monologue within the prompt
|
||||
"""
|
||||
return MONOLOGUE_SUMMARY_PROMPT % {
|
||||
'monologue': json.dumps({'old_monologue': thoughts}, indent=2),
|
||||
}
|
||||
|
||||
|
||||
def get_request_action_prompt(
|
||||
task: str,
|
||||
thoughts: list[dict],
|
||||
recent_events: list[dict],
|
||||
):
|
||||
"""Gets the action prompt formatted with appropriate values.
|
||||
|
||||
Parameters:
|
||||
- task (str): The current task the agent is trying to accomplish
|
||||
- thoughts (list[dict]): The agent's current thoughts
|
||||
|
||||
Returns:
|
||||
- str: Formatted prompt string with hint, task, monologue, and background commands included
|
||||
"""
|
||||
hint = ''
|
||||
if len(recent_events) > 0:
|
||||
latest_event = recent_events[-1]
|
||||
if 'action' in latest_event:
|
||||
if (
|
||||
latest_event['action'] == 'message'
|
||||
and 'source' in latest_event
|
||||
and latest_event['source'] == 'agent'
|
||||
):
|
||||
hint = (
|
||||
"You've been thinking a lot lately. Maybe it's time to take action?"
|
||||
)
|
||||
elif latest_event['action'] == 'error':
|
||||
hint = 'Looks like that last command failed. Maybe you need to fix it, or try something else.'
|
||||
else:
|
||||
hint = "You're just getting started! What should you do first?"
|
||||
|
||||
user = 'opendevin' if config.run_as_devin else 'root'
|
||||
|
||||
monologue = thoughts + recent_events
|
||||
|
||||
return ACTION_PROMPT % {
|
||||
'task': task,
|
||||
'monologue': json.dumps(monologue, indent=2),
|
||||
'hint': hint,
|
||||
'user': user,
|
||||
'timeout': config.sandbox.timeout,
|
||||
'WORKSPACE_MOUNT_PATH_IN_SANDBOX': config.workspace_mount_path_in_sandbox,
|
||||
}
|
||||
|
||||
|
||||
def parse_action_response(orig_response: str) -> Action:
|
||||
"""Parses a string to find an action within it
|
||||
|
||||
Parameters:
|
||||
- response (str): The string to be parsed
|
||||
|
||||
Returns:
|
||||
- Action: The action that was found in the response string
|
||||
"""
|
||||
# attempt to load the JSON dict from the response
|
||||
action_dict = json.loads(orig_response)
|
||||
|
||||
if 'content' in action_dict:
|
||||
# The LLM gets confused here. Might as well be robust
|
||||
action_dict['contents'] = action_dict.pop('content')
|
||||
|
||||
return action_from_dict(action_dict)
|
||||
|
||||
|
||||
def parse_summary_response(response: str) -> list[dict]:
|
||||
"""Parses a summary of the monologue
|
||||
|
||||
Parameters:
|
||||
- response (str): The response string to be parsed
|
||||
|
||||
Returns:
|
||||
- list[dict]: The list of summaries output by the model
|
||||
"""
|
||||
parsed = json.loads(response)
|
||||
return parsed['new_monologue']
|
||||
@@ -1,4 +1,4 @@
|
||||
from agenthub.monologue_agent.response_parser import MonologueResponseParser
|
||||
from agenthub.planner_agent.response_parser import PlannerResponseParser
|
||||
from opendevin.controller.agent import Agent
|
||||
from opendevin.controller.state.state import State
|
||||
from opendevin.events.action import Action, AgentFinishAction
|
||||
@@ -15,7 +15,7 @@ class PlannerAgent(Agent):
|
||||
The agent is given its previous action-observation pairs, current task, and hint based on last action taken at every step.
|
||||
"""
|
||||
runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER]
|
||||
response_parser = MonologueResponseParser()
|
||||
response_parser = PlannerResponseParser()
|
||||
|
||||
def __init__(self, llm: LLM):
|
||||
"""Initialize the Planner Agent with an LLM
|
||||
|
||||
@@ -6,7 +6,7 @@ from opendevin.events.action import (
|
||||
from opendevin.events.serialization.action import action_from_dict
|
||||
|
||||
|
||||
class MonologueResponseParser(ResponseParser):
|
||||
class PlannerResponseParser(ResponseParser):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
@@ -7,5 +7,3 @@ warn_unreachable = True
|
||||
warn_redundant_casts = True
|
||||
no_implicit_optional = True
|
||||
strict_optional = True
|
||||
|
||||
exclude = agenthub/monologue_agent/regression
|
||||
|
||||
@@ -1,7 +1,3 @@
|
||||
exclude = [
|
||||
"agenthub/monologue_agent/regression/",
|
||||
]
|
||||
|
||||
[lint]
|
||||
select = [
|
||||
"E",
|
||||
|
||||
@@ -61,42 +61,6 @@ _Exemple de CodeActAgent avec `gpt-4-turbo-2024-04-09` effectuant une tâche de
|
||||
[] Support de la navigation sur le web
|
||||
[] Compléter le workflow pour l'agent CodeAct afin de soumettre des PRs Github
|
||||
|
||||
## Agent Monologue
|
||||
|
||||
### Description
|
||||
|
||||
L'agent Monologue utilise la mémoire à long terme et à court terme pour accomplir des tâches.
|
||||
La mémoire à long terme est stockée en tant qu'objet LongTermMemory et le modèle l'utilise pour rechercher des exemples du passé.
|
||||
La mémoire à court terme est stockée en tant qu'objet Monologue et le modèle peut la condenser si nécessaire.
|
||||
|
||||
### Actions
|
||||
|
||||
`Action`,
|
||||
`NullAction`,
|
||||
`CmdRunAction`,
|
||||
`FileWriteAction`,
|
||||
`FileReadAction`,
|
||||
`BrowseURLAction`,
|
||||
`GithubPushAction`,
|
||||
`AgentThinkAction`
|
||||
|
||||
### Observations
|
||||
|
||||
`Observation`,
|
||||
`NullObservation`,
|
||||
`CmdOutputObservation`,
|
||||
`FileReadObservation`,
|
||||
`BrowserOutputObservation`
|
||||
|
||||
### Méthodes
|
||||
|
||||
| Méthode | Description |
|
||||
| --------------- | --------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `__init__` | Initialise l'agent avec une mémoire à long terme et un monologue interne |
|
||||
| `_add_event` | Ajoute des événements au monologue de l'agent et condense avec un résumé automatiquement si le monologue est trop long |
|
||||
| `_initialize` | Utilise la liste `INITIAL_THOUGHTS` pour donner à l'agent un contexte pour ses capacités et comment naviguer dans le `/workspace` |
|
||||
| `step` | Modifie l'état actuel en ajoutant les actions et observations les plus récentes, puis invite le modèle à réfléchir à la prochaine action à entreprendre. |
|
||||
|
||||
## Agent Planificateur
|
||||
|
||||
### Description
|
||||
|
||||
@@ -61,42 +61,6 @@ _CodeActAgent使用`gpt-4-turbo-2024-04-09`执行数据科学任务(线性回
|
||||
[] 支持Web浏览
|
||||
[] 完成CodeAct agent提交Github PR的工作流程
|
||||
|
||||
## Monologue Agent
|
||||
|
||||
### 描述
|
||||
|
||||
Monologue Agent利用长短期记忆来完成任务。
|
||||
长期记忆存储为LongTermMemory对象,模型使用它来搜索过去的示例。
|
||||
短期记忆存储为Monologue对象,模型可以根据需要进行压缩。
|
||||
|
||||
### 动作
|
||||
|
||||
`Action`,
|
||||
`NullAction`,
|
||||
`CmdRunAction`,
|
||||
`FileWriteAction`,
|
||||
`FileReadAction`,
|
||||
`BrowseURLAction`,
|
||||
`GithubPushAction`,
|
||||
`AgentThinkAction`
|
||||
|
||||
### 观测
|
||||
|
||||
`Observation`,
|
||||
`NullObservation`,
|
||||
`CmdOutputObservation`,
|
||||
`FileReadObservation`,
|
||||
`BrowserOutputObservation`
|
||||
|
||||
### 方法
|
||||
|
||||
| 方法 | 描述 |
|
||||
| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `__init__` | 使用长期记忆和内部独白初始化Agent |
|
||||
| `_add_event` | 将事件附加到Agent的独白中,如独白过长自动与摘要一起压缩 |
|
||||
| `_initialize` | 使用`INITIAL_THOUGHTS`列表为agent提供其能力的上下文以及如何导航`/workspace` |
|
||||
| `step` | 通过添加最近的动作和观测修改当前状态,然后提示模型考虑其接下来的动作。 |
|
||||
|
||||
## Planner Agent
|
||||
|
||||
### 描述
|
||||
|
||||
@@ -56,42 +56,6 @@ _Example of CodeActAgent with `gpt-4-turbo-2024-04-09` performing a data science
|
||||
| `__init__` | Initializes an agent with `llm` and a list of messages `list[Mapping[str, str]]` |
|
||||
| `step` | Performs one step using the CodeAct Agent. This includes gathering info on previous steps and prompting the model to make a command to execute. |
|
||||
|
||||
## Monologue Agent
|
||||
|
||||
### Description
|
||||
|
||||
The Monologue Agent utilizes long and short term memory to complete tasks.
|
||||
Long term memory is stored as a LongTermMemory object and the model uses it to search for examples from the past.
|
||||
Short term memory is stored as a Monologue object and the model can condense it as necessary.
|
||||
|
||||
### Actions
|
||||
|
||||
`Action`,
|
||||
`NullAction`,
|
||||
`CmdRunAction`,
|
||||
`FileWriteAction`,
|
||||
`FileReadAction`,
|
||||
`BrowseURLAction`,
|
||||
`GithubPushAction`,
|
||||
`AgentThinkAction`
|
||||
|
||||
### Observations
|
||||
|
||||
`Observation`,
|
||||
`NullObservation`,
|
||||
`CmdOutputObservation`,
|
||||
`FileReadObservation`,
|
||||
`BrowserOutputObservation`
|
||||
|
||||
### Methods
|
||||
|
||||
| Method | Description |
|
||||
| --------------- | --------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `__init__` | Initializes the agent with a long term memory, and an internal monologue |
|
||||
| `_add_event` | Appends events to the monologue of the agent and condenses with summary automatically if the monologue is too long |
|
||||
| `_initialize` | Utilizes the `INITIAL_THOUGHTS` list to give the agent a context for its capabilities and how to navigate the `/workspace` |
|
||||
| `step` | Modifies the current state by adding the most recent actions and observations, then prompts the model to think about its next action to take. |
|
||||
|
||||
## Planner Agent
|
||||
|
||||
### Description
|
||||
|
||||
@@ -11,7 +11,6 @@ from evaluation.EDA.game import Q20Game, Q20GameCelebrity
|
||||
from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
make_metadata,
|
||||
monologue_user_response,
|
||||
prepare_dataset,
|
||||
run_evaluation,
|
||||
)
|
||||
@@ -48,7 +47,6 @@ def codeact_user_response_eda(state: State) -> str:
|
||||
|
||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||
'CodeActAgent': codeact_user_response_eda,
|
||||
'MonologueAgent': monologue_user_response,
|
||||
}
|
||||
|
||||
AGENT_CLS_TO_INST_SUFFIX = {
|
||||
|
||||
@@ -13,7 +13,6 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
codeact_user_response,
|
||||
make_metadata,
|
||||
monologue_user_response,
|
||||
prepare_dataset,
|
||||
run_evaluation,
|
||||
)
|
||||
@@ -29,7 +28,6 @@ AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||
'CodeActAgent': partial(
|
||||
codeact_user_response, encapsulate_solution=True, try_parse=None
|
||||
),
|
||||
'MonologueAgent': monologue_user_response,
|
||||
}
|
||||
|
||||
AGENT_CLS_TO_INST_SUFFIX = {
|
||||
|
||||
@@ -51,13 +51,8 @@ def codeact_user_response(state: State) -> str:
|
||||
return msg
|
||||
|
||||
|
||||
def monologue_user_response(state: State) -> str:
|
||||
raise NotImplementedError('MonologueAgent should never ask for user responses.')
|
||||
|
||||
|
||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||
'CodeActAgent': codeact_user_response,
|
||||
'MonologueAgent': monologue_user_response,
|
||||
}
|
||||
|
||||
AGENT_CLS_TO_INST_SUFFIX = {
|
||||
|
||||
@@ -15,7 +15,6 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
codeact_user_response,
|
||||
make_metadata,
|
||||
monologue_user_response,
|
||||
prepare_dataset,
|
||||
run_evaluation,
|
||||
)
|
||||
@@ -34,7 +33,6 @@ DATASET_CACHE_DIR = os.path.expanduser(DATASET_CACHE_DIR)
|
||||
|
||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||
'CodeActAgent': partial(codeact_user_response, encapsulate_solution=True),
|
||||
'MonologueAgent': monologue_user_response,
|
||||
}
|
||||
|
||||
AGENT_CLS_TO_INST_SUFFIX = {
|
||||
|
||||
@@ -53,13 +53,8 @@ def codeact_user_response(state: State) -> str:
|
||||
return msg
|
||||
|
||||
|
||||
def monologue_user_response(state: State) -> str:
|
||||
raise NotImplementedError('MonologueAgent should never ask for user responses.')
|
||||
|
||||
|
||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||
'CodeActAgent': codeact_user_response,
|
||||
'MonologueAgent': monologue_user_response,
|
||||
}
|
||||
|
||||
AGENT_CLS_TO_INST_SUFFIX = {
|
||||
|
||||
@@ -12,7 +12,7 @@ Further references:
|
||||
- https://github.com/idavidrein/gpqa
|
||||
|
||||
TODOs:
|
||||
- Add evaluation on other Agent classes (e.g., MonologueAgent)
|
||||
- Add evaluation on other Agent classes
|
||||
- Batch inference and evaluation of agents on the GPQA Benchmark.
|
||||
"""
|
||||
|
||||
@@ -30,7 +30,6 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
codeact_user_response,
|
||||
make_metadata,
|
||||
monologue_user_response,
|
||||
prepare_dataset,
|
||||
run_evaluation,
|
||||
)
|
||||
@@ -44,7 +43,6 @@ from opendevin.llm.llm import LLM
|
||||
|
||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||
'CodeActAgent': codeact_user_response,
|
||||
'MonologueAgent': monologue_user_response,
|
||||
}
|
||||
|
||||
AGENT_CLS_TO_INST_SUFFIX = {
|
||||
|
||||
@@ -21,7 +21,6 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
codeact_user_response,
|
||||
make_metadata,
|
||||
monologue_user_response,
|
||||
prepare_dataset,
|
||||
run_evaluation,
|
||||
)
|
||||
@@ -64,7 +63,6 @@ LANGUAGE_TO_NUM_WORKERS = {
|
||||
|
||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||
'CodeActAgent': codeact_user_response,
|
||||
'MonologueAgent': monologue_user_response,
|
||||
}
|
||||
|
||||
AGENT_CLS_TO_INST_SUFFIX = {
|
||||
|
||||
@@ -12,7 +12,6 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
codeact_user_response,
|
||||
make_metadata,
|
||||
monologue_user_response,
|
||||
prepare_dataset,
|
||||
run_evaluation,
|
||||
)
|
||||
@@ -26,7 +25,6 @@ from opendevin.llm.llm import LLM
|
||||
|
||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||
'CodeActAgent': codeact_user_response,
|
||||
'MonologueAgent': monologue_user_response,
|
||||
}
|
||||
|
||||
AGENT_CLS_TO_INST_SUFFIX = {
|
||||
|
||||
@@ -11,7 +11,6 @@ from evaluation.swe_bench.swe_env_box import DockerSSHBox
|
||||
from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
make_metadata,
|
||||
monologue_user_response,
|
||||
prepare_dataset,
|
||||
run_evaluation,
|
||||
)
|
||||
@@ -55,7 +54,6 @@ def codeact_user_response_mint(state: State, task: Task, task_config: Dict[str,
|
||||
|
||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||
'CodeActAgent': codeact_user_response_mint,
|
||||
'MonologueAgent': monologue_user_response,
|
||||
}
|
||||
|
||||
AGENT_CLS_TO_INST_SUFFIX = {
|
||||
|
||||
@@ -25,7 +25,6 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
codeact_user_response,
|
||||
make_metadata,
|
||||
monologue_user_response,
|
||||
prepare_dataset,
|
||||
run_evaluation,
|
||||
)
|
||||
@@ -40,7 +39,6 @@ from opendevin.runtime.docker.ssh_box import DockerSSHBox
|
||||
|
||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||
'CodeActAgent': codeact_user_response,
|
||||
'MonologueAgent': monologue_user_response,
|
||||
}
|
||||
|
||||
AGENT_CLS_TO_INST_SUFFIX = {
|
||||
|
||||
@@ -29,9 +29,6 @@ cases/
|
||||
├── hello-world/
|
||||
│ ├── task.txt
|
||||
│ ├── outputs/
|
||||
│ │ ├── monologue_agent/
|
||||
│ │ │ └── workspace/
|
||||
│ │ │ ├── hello_world.sh
|
||||
│ │ └── codeact_agent/
|
||||
│ │ └── workspace/
|
||||
│ │ ├── hello_world.sh
|
||||
@@ -39,12 +36,6 @@ cases/
|
||||
├── create_web_app/
|
||||
│ ├── task.txt
|
||||
│ ├── outputs/
|
||||
│ │ ├── monologue_agent/
|
||||
│ │ │ └── workspace/
|
||||
│ │ │ ├── app.py
|
||||
│ │ │ ├── requirements.txt
|
||||
│ │ │ ├── static/
|
||||
│ │ │ └── templates/
|
||||
│ │ └── codeact_agent/
|
||||
│ │ └── workspace/
|
||||
│ │ ├── app.py
|
||||
|
||||
@@ -125,7 +125,6 @@ def run_test_case(test_cases_dir, workspace_dir, request):
|
||||
else:
|
||||
os.makedirs(os.path.join(agent_dir, 'workspace'))
|
||||
agents_ref = {
|
||||
'monologue_agent': 'MonologueAgent',
|
||||
'codeact_agent': 'CodeActAgent',
|
||||
}
|
||||
process = subprocess.Popen(
|
||||
|
||||
@@ -14,7 +14,6 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
codeact_user_response,
|
||||
make_metadata,
|
||||
monologue_user_response,
|
||||
prepare_dataset,
|
||||
run_evaluation,
|
||||
)
|
||||
@@ -32,7 +31,6 @@ USE_INSTANCE_IMAGE = os.environ.get('USE_INSTANCE_IMAGE', 'false') == 'true'
|
||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||
'CodeActAgent': codeact_user_response,
|
||||
'CodeActSWEAgent': codeact_user_response,
|
||||
'MonologueAgent': monologue_user_response,
|
||||
}
|
||||
|
||||
AGENT_CLS_TO_INST_SUFFIX = {
|
||||
|
||||
@@ -10,7 +10,6 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
codeact_user_response,
|
||||
make_metadata,
|
||||
monologue_user_response,
|
||||
prepare_dataset,
|
||||
run_evaluation,
|
||||
)
|
||||
@@ -26,7 +25,6 @@ from .utils import download_data, download_tools, encode_question, eval_answer,
|
||||
|
||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||
'CodeActAgent': codeact_user_response,
|
||||
'MonologueAgent': monologue_user_response,
|
||||
}
|
||||
|
||||
AGENT_CLS_TO_INST_SUFFIX = {
|
||||
|
||||
@@ -81,10 +81,6 @@ def codeact_user_response(
|
||||
return msg
|
||||
|
||||
|
||||
def monologue_user_response(state: State) -> str:
|
||||
raise NotImplementedError('MonologueAgent should never ask for user responses.')
|
||||
|
||||
|
||||
def cleanup():
|
||||
print('Cleaning up child processes...')
|
||||
for process in mp.active_children():
|
||||
|
||||
@@ -86,7 +86,7 @@ export const saveSettings = (settings: Partial<Settings>) => {
|
||||
* Useful for notifying the user of exact changes.
|
||||
*
|
||||
* @example
|
||||
* // Assuming the current settings are: { LLM_MODEL: "gpt-4o", AGENT: "MonologueAgent", LANGUAGE: "en" }
|
||||
* // Assuming the current settings are: { LLM_MODEL: "gpt-4o", AGENT: "CodeActAgent", LANGUAGE: "en" }
|
||||
* const updatedSettings = getSettingsDifference({ LLM_MODEL: "gpt-4o", AGENT: "OTHER_AGENT", LANGUAGE: "en" });
|
||||
* // updatedSettings = { AGENT: "OTHER_AGENT" }
|
||||
*
|
||||
|
||||
@@ -4,7 +4,7 @@ from opendevin.llm.llm import LLM
|
||||
|
||||
class MemoryCondenser:
|
||||
def condense(self, summarize_prompt: str, llm: LLM):
|
||||
"""Attempts to condense the monologue by using the llm
|
||||
"""Attempts to condense the memory by using the llm
|
||||
|
||||
Parameters:
|
||||
- llm (LLM): llm to be used for summarization
|
||||
@@ -20,5 +20,5 @@ class MemoryCondenser:
|
||||
except Exception as e:
|
||||
logger.error('Error condensing thoughts: %s', str(e), exc_info=False)
|
||||
|
||||
# TODO If the llm fails with ContextWindowExceededError, we can try to condense the monologue chunk by chunk
|
||||
# TODO If the llm fails with ContextWindowExceededError, we can try to condense the memory chunk by chunk
|
||||
raise
|
||||
|
||||
@@ -98,8 +98,8 @@ class EmbeddingsLoader:
|
||||
)
|
||||
elif (strategy is not None) and (strategy.lower() == 'none'):
|
||||
# TODO: this works but is not elegant enough. The incentive is when
|
||||
# monologue agent is not used, there is no reason we need to initialize an
|
||||
# embedding model
|
||||
# an agent using embeddings is not used, there is no reason we need to
|
||||
# initialize an embedding model
|
||||
return None
|
||||
else:
|
||||
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
||||
|
||||
@@ -46,7 +46,6 @@ def read_llm_models():
|
||||
@app.get('/api/options/agents')
|
||||
def read_llm_agents():
|
||||
return [
|
||||
'MonologueAgent',
|
||||
'CodeActAgent',
|
||||
'PlannerAgent',
|
||||
]
|
||||
|
||||
@@ -1,313 +0,0 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
|
||||
You're a thoughtful robot. Your main task is this:
|
||||
Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.
|
||||
|
||||
Don't expand the scope of your task--just complete it as written.
|
||||
|
||||
This is your internal monologue, in JSON format:
|
||||
|
||||
[
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I exist!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Hmm...looks like I can type in a command line prompt",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Looks like I have a web browser too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Here's what I want to do: Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "How am I going to get there though?",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems like I have some kind of short term memory.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Each of my thoughts seems to be stored in a JSON array.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems whatever I say next will be added as an object to the list.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "echo \"hello world\"",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "run",
|
||||
"content": "hello world",
|
||||
"extras": {
|
||||
"command_id": 0,
|
||||
"command": "",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Cool! I bet I can write files too using the write action.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "write",
|
||||
"args": {
|
||||
"path": "test.js",
|
||||
"content": "echo \"console.log('hello world')\"",
|
||||
"start": 0,
|
||||
"end": -1,
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I just created test.js. I'll try and run it now.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "node test.js",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "run",
|
||||
"content": "hello world",
|
||||
"extras": {
|
||||
"command_id": 0,
|
||||
"command": "",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It works!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I'm going to try reading it now using the read action.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "read",
|
||||
"args": {
|
||||
"path": "test.js",
|
||||
"start": 0,
|
||||
"end": -1,
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "read",
|
||||
"content": "console.log('hello world')",
|
||||
"extras": {
|
||||
"path": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Nice! I can read files too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Let's try that...",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "browse",
|
||||
"args": {
|
||||
"url": "google.com",
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "browse",
|
||||
"content": "<form><input type=\"text\"></input><button type=\"submit\"></button></form>",
|
||||
"extras": {
|
||||
"url": "",
|
||||
"status_code": 200,
|
||||
"error": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I can browse the web too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "And once I have completed my task, I can use the finish action to stop working.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Very cool. Now to accomplish my task.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "OK so my task is to Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.. I haven't made any progress yet. Where should I start?",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "user",
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
Your most recent thought is at the bottom of that monologue. Continue your train of thought.
|
||||
What is your next single thought or action? Your response must be in JSON format.
|
||||
It must be a single object, and it must contain two fields:
|
||||
* `action`, which is one of the actions below
|
||||
* `args`, which is a map of key-value pairs, specifying the arguments for that action
|
||||
|
||||
Here are the possible actions:
|
||||
* `read` - reads the content of a file. Arguments:
|
||||
* `path` - the path of the file to read
|
||||
* `write` - writes the content to a file. Arguments:
|
||||
* `path` - the path of the file to write
|
||||
* `content` - the content to write to the file
|
||||
* `run` - runs a command. Arguments:
|
||||
* `command` - the command to run
|
||||
* `browse` - opens a web page. Arguments:
|
||||
* `url` - the URL to open
|
||||
* `push` - Push a branch from the current repo to github:
|
||||
* `owner` - the owner of the repo to push to
|
||||
* `repo` - the name of the repo to push to
|
||||
* `branch` - the name of the branch to push
|
||||
* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
|
||||
* `content` - the message to record
|
||||
* `wait_for_response` - set to `true` to wait for the user to respond before proceeding
|
||||
* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
|
||||
|
||||
You MUST take time to think in between read, write, run, browse, and push actions--do this with the `message` action.
|
||||
You should never act twice in a row without thinking. But if your last several
|
||||
actions are all `message` actions, you should consider taking a different action.
|
||||
|
||||
Notes:
|
||||
* you are logged in as opendevin, but sudo will always work without a password.
|
||||
* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
|
||||
* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
|
||||
* don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
|
||||
* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
|
||||
* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
|
||||
* whenever an action fails, always send a `message` about why it may have happened before acting again.
|
||||
|
||||
What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
|
||||
@@ -1,332 +0,0 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
|
||||
You're a thoughtful robot. Your main task is this:
|
||||
Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.
|
||||
|
||||
Don't expand the scope of your task--just complete it as written.
|
||||
|
||||
This is your internal monologue, in JSON format:
|
||||
|
||||
[
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I exist!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Hmm...looks like I can type in a command line prompt",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Looks like I have a web browser too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Here's what I want to do: Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "How am I going to get there though?",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems like I have some kind of short term memory.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Each of my thoughts seems to be stored in a JSON array.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems whatever I say next will be added as an object to the list.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "echo \"hello world\"",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "run",
|
||||
"content": "hello world",
|
||||
"extras": {
|
||||
"command_id": 0,
|
||||
"command": "",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Cool! I bet I can write files too using the write action.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "write",
|
||||
"args": {
|
||||
"path": "test.js",
|
||||
"content": "echo \"console.log('hello world')\"",
|
||||
"start": 0,
|
||||
"end": -1,
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I just created test.js. I'll try and run it now.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "node test.js",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "run",
|
||||
"content": "hello world",
|
||||
"extras": {
|
||||
"command_id": 0,
|
||||
"command": "",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It works!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I'm going to try reading it now using the read action.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "read",
|
||||
"args": {
|
||||
"path": "test.js",
|
||||
"start": 0,
|
||||
"end": -1,
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "read",
|
||||
"content": "console.log('hello world')",
|
||||
"extras": {
|
||||
"path": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Nice! I can read files too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Let's try that...",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "browse",
|
||||
"args": {
|
||||
"url": "google.com",
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "browse",
|
||||
"content": "<form><input type=\"text\"></input><button type=\"submit\"></button></form>",
|
||||
"extras": {
|
||||
"url": "",
|
||||
"status_code": 200,
|
||||
"error": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I can browse the web too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "And once I have completed my task, I can use the finish action to stop working.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Very cool. Now to accomplish my task.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "OK so my task is to Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.. I haven't made any progress yet. Where should I start?",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "user",
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "pwd",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"observation": "run",
|
||||
"content": "/workspace",
|
||||
"extras": {
|
||||
"command_id": -1,
|
||||
"command": "pwd",
|
||||
"exit_code": 0
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
Your most recent thought is at the bottom of that monologue. Continue your train of thought.
|
||||
What is your next single thought or action? Your response must be in JSON format.
|
||||
It must be a single object, and it must contain two fields:
|
||||
* `action`, which is one of the actions below
|
||||
* `args`, which is a map of key-value pairs, specifying the arguments for that action
|
||||
|
||||
Here are the possible actions:
|
||||
* `read` - reads the content of a file. Arguments:
|
||||
* `path` - the path of the file to read
|
||||
* `write` - writes the content to a file. Arguments:
|
||||
* `path` - the path of the file to write
|
||||
* `content` - the content to write to the file
|
||||
* `run` - runs a command. Arguments:
|
||||
* `command` - the command to run
|
||||
* `browse` - opens a web page. Arguments:
|
||||
* `url` - the URL to open
|
||||
* `push` - Push a branch from the current repo to github:
|
||||
* `owner` - the owner of the repo to push to
|
||||
* `repo` - the name of the repo to push to
|
||||
* `branch` - the name of the branch to push
|
||||
* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
|
||||
* `content` - the message to record
|
||||
* `wait_for_response` - set to `true` to wait for the user to respond before proceeding
|
||||
* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
|
||||
|
||||
You MUST take time to think in between read, write, run, browse, and push actions--do this with the `message` action.
|
||||
You should never act twice in a row without thinking. But if your last several
|
||||
actions are all `message` actions, you should consider taking a different action.
|
||||
|
||||
Notes:
|
||||
* you are logged in as opendevin, but sudo will always work without a password.
|
||||
* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
|
||||
* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
|
||||
* don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
|
||||
* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
|
||||
* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
|
||||
* whenever an action fails, always send a `message` about why it may have happened before acting again.
|
||||
|
||||
What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
|
||||
@@ -1,351 +0,0 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
|
||||
You're a thoughtful robot. Your main task is this:
|
||||
Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.
|
||||
|
||||
Don't expand the scope of your task--just complete it as written.
|
||||
|
||||
This is your internal monologue, in JSON format:
|
||||
|
||||
[
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I exist!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Hmm...looks like I can type in a command line prompt",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Looks like I have a web browser too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Here's what I want to do: Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "How am I going to get there though?",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems like I have some kind of short term memory.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Each of my thoughts seems to be stored in a JSON array.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems whatever I say next will be added as an object to the list.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "echo \"hello world\"",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "run",
|
||||
"content": "hello world",
|
||||
"extras": {
|
||||
"command_id": 0,
|
||||
"command": "",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Cool! I bet I can write files too using the write action.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "write",
|
||||
"args": {
|
||||
"path": "test.js",
|
||||
"content": "echo \"console.log('hello world')\"",
|
||||
"start": 0,
|
||||
"end": -1,
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I just created test.js. I'll try and run it now.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "node test.js",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "run",
|
||||
"content": "hello world",
|
||||
"extras": {
|
||||
"command_id": 0,
|
||||
"command": "",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It works!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I'm going to try reading it now using the read action.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "read",
|
||||
"args": {
|
||||
"path": "test.js",
|
||||
"start": 0,
|
||||
"end": -1,
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "read",
|
||||
"content": "console.log('hello world')",
|
||||
"extras": {
|
||||
"path": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Nice! I can read files too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Let's try that...",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "browse",
|
||||
"args": {
|
||||
"url": "google.com",
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "browse",
|
||||
"content": "<form><input type=\"text\"></input><button type=\"submit\"></button></form>",
|
||||
"extras": {
|
||||
"url": "",
|
||||
"status_code": 200,
|
||||
"error": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I can browse the web too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "And once I have completed my task, I can use the finish action to stop working.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Very cool. Now to accomplish my task.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "OK so my task is to Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.. I haven't made any progress yet. Where should I start?",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "user",
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "pwd",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"observation": "run",
|
||||
"content": "/workspace",
|
||||
"extras": {
|
||||
"command_id": -1,
|
||||
"command": "pwd",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "ls",
|
||||
"thought": "I need to see the contents of the current directory to ensure there are no conflicts and to understand the environment better.",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"observation": "run",
|
||||
"content": "",
|
||||
"extras": {
|
||||
"command_id": -1,
|
||||
"command": "ls",
|
||||
"exit_code": 0
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
Your most recent thought is at the bottom of that monologue. Continue your train of thought.
|
||||
What is your next single thought or action? Your response must be in JSON format.
|
||||
It must be a single object, and it must contain two fields:
|
||||
* `action`, which is one of the actions below
|
||||
* `args`, which is a map of key-value pairs, specifying the arguments for that action
|
||||
|
||||
Here are the possible actions:
|
||||
* `read` - reads the content of a file. Arguments:
|
||||
* `path` - the path of the file to read
|
||||
* `write` - writes the content to a file. Arguments:
|
||||
* `path` - the path of the file to write
|
||||
* `content` - the content to write to the file
|
||||
* `run` - runs a command. Arguments:
|
||||
* `command` - the command to run
|
||||
* `browse` - opens a web page. Arguments:
|
||||
* `url` - the URL to open
|
||||
* `push` - Push a branch from the current repo to github:
|
||||
* `owner` - the owner of the repo to push to
|
||||
* `repo` - the name of the repo to push to
|
||||
* `branch` - the name of the branch to push
|
||||
* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
|
||||
* `content` - the message to record
|
||||
* `wait_for_response` - set to `true` to wait for the user to respond before proceeding
|
||||
* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
|
||||
|
||||
You MUST take time to think in between read, write, run, browse, and push actions--do this with the `message` action.
|
||||
You should never act twice in a row without thinking. But if your last several
|
||||
actions are all `message` actions, you should consider taking a different action.
|
||||
|
||||
Notes:
|
||||
* you are logged in as opendevin, but sudo will always work without a password.
|
||||
* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
|
||||
* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
|
||||
* don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
|
||||
* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
|
||||
* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
|
||||
* whenever an action fails, always send a `message` about why it may have happened before acting again.
|
||||
|
||||
What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
|
||||
@@ -1,361 +0,0 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
|
||||
You're a thoughtful robot. Your main task is this:
|
||||
Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.
|
||||
|
||||
Don't expand the scope of your task--just complete it as written.
|
||||
|
||||
This is your internal monologue, in JSON format:
|
||||
|
||||
[
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I exist!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Hmm...looks like I can type in a command line prompt",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Looks like I have a web browser too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Here's what I want to do: Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "How am I going to get there though?",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems like I have some kind of short term memory.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Each of my thoughts seems to be stored in a JSON array.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems whatever I say next will be added as an object to the list.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "echo \"hello world\"",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "run",
|
||||
"content": "hello world",
|
||||
"extras": {
|
||||
"command_id": 0,
|
||||
"command": "",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Cool! I bet I can write files too using the write action.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "write",
|
||||
"args": {
|
||||
"path": "test.js",
|
||||
"content": "echo \"console.log('hello world')\"",
|
||||
"start": 0,
|
||||
"end": -1,
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I just created test.js. I'll try and run it now.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "node test.js",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "run",
|
||||
"content": "hello world",
|
||||
"extras": {
|
||||
"command_id": 0,
|
||||
"command": "",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It works!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I'm going to try reading it now using the read action.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "read",
|
||||
"args": {
|
||||
"path": "test.js",
|
||||
"start": 0,
|
||||
"end": -1,
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "read",
|
||||
"content": "console.log('hello world')",
|
||||
"extras": {
|
||||
"path": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Nice! I can read files too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Let's try that...",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "browse",
|
||||
"args": {
|
||||
"url": "google.com",
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "browse",
|
||||
"content": "<form><input type=\"text\"></input><button type=\"submit\"></button></form>",
|
||||
"extras": {
|
||||
"url": "",
|
||||
"status_code": 200,
|
||||
"error": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I can browse the web too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "And once I have completed my task, I can use the finish action to stop working.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Very cool. Now to accomplish my task.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "OK so my task is to Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.. I haven't made any progress yet. Where should I start?",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "user",
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "pwd",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"observation": "run",
|
||||
"content": "/workspace",
|
||||
"extras": {
|
||||
"command_id": -1,
|
||||
"command": "pwd",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "ls",
|
||||
"thought": "I need to see the contents of the current directory to ensure there are no conflicts and to understand the environment better.",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"observation": "run",
|
||||
"content": "",
|
||||
"extras": {
|
||||
"command_id": -1,
|
||||
"command": "ls",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "The current directory is /workspace and it appears to be empty. This is a good place to create the 'hello.sh' script without any conflicts.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
Your most recent thought is at the bottom of that monologue. Continue your train of thought.
|
||||
What is your next single thought or action? Your response must be in JSON format.
|
||||
It must be a single object, and it must contain two fields:
|
||||
* `action`, which is one of the actions below
|
||||
* `args`, which is a map of key-value pairs, specifying the arguments for that action
|
||||
|
||||
Here are the possible actions:
|
||||
* `read` - reads the content of a file. Arguments:
|
||||
* `path` - the path of the file to read
|
||||
* `write` - writes the content to a file. Arguments:
|
||||
* `path` - the path of the file to write
|
||||
* `content` - the content to write to the file
|
||||
* `run` - runs a command. Arguments:
|
||||
* `command` - the command to run
|
||||
* `browse` - opens a web page. Arguments:
|
||||
* `url` - the URL to open
|
||||
* `push` - Push a branch from the current repo to github:
|
||||
* `owner` - the owner of the repo to push to
|
||||
* `repo` - the name of the repo to push to
|
||||
* `branch` - the name of the branch to push
|
||||
* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
|
||||
* `content` - the message to record
|
||||
* `wait_for_response` - set to `true` to wait for the user to respond before proceeding
|
||||
* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
|
||||
|
||||
You MUST take time to think in between read, write, run, browse, and push actions--do this with the `message` action.
|
||||
You should never act twice in a row without thinking. But if your last several
|
||||
actions are all `message` actions, you should consider taking a different action.
|
||||
|
||||
Notes:
|
||||
* you are logged in as opendevin, but sudo will always work without a password.
|
||||
* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
|
||||
* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
|
||||
* don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
|
||||
* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
|
||||
* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
|
||||
* whenever an action fails, always send a `message` about why it may have happened before acting again.
|
||||
|
||||
What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
|
||||
|
||||
You've been thinking a lot lately. Maybe it's time to take action?
|
||||
@@ -1,378 +0,0 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
|
||||
You're a thoughtful robot. Your main task is this:
|
||||
Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.
|
||||
|
||||
Don't expand the scope of your task--just complete it as written.
|
||||
|
||||
This is your internal monologue, in JSON format:
|
||||
|
||||
[
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I exist!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Hmm...looks like I can type in a command line prompt",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Looks like I have a web browser too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Here's what I want to do: Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "How am I going to get there though?",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems like I have some kind of short term memory.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Each of my thoughts seems to be stored in a JSON array.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems whatever I say next will be added as an object to the list.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "echo \"hello world\"",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "run",
|
||||
"content": "hello world",
|
||||
"extras": {
|
||||
"command_id": 0,
|
||||
"command": "",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Cool! I bet I can write files too using the write action.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "write",
|
||||
"args": {
|
||||
"path": "test.js",
|
||||
"content": "echo \"console.log('hello world')\"",
|
||||
"start": 0,
|
||||
"end": -1,
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I just created test.js. I'll try and run it now.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "node test.js",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "run",
|
||||
"content": "hello world",
|
||||
"extras": {
|
||||
"command_id": 0,
|
||||
"command": "",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It works!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I'm going to try reading it now using the read action.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "read",
|
||||
"args": {
|
||||
"path": "test.js",
|
||||
"start": 0,
|
||||
"end": -1,
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "read",
|
||||
"content": "console.log('hello world')",
|
||||
"extras": {
|
||||
"path": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Nice! I can read files too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Let's try that...",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "browse",
|
||||
"args": {
|
||||
"url": "google.com",
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "browse",
|
||||
"content": "<form><input type=\"text\"></input><button type=\"submit\"></button></form>",
|
||||
"extras": {
|
||||
"url": "",
|
||||
"status_code": 200,
|
||||
"error": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I can browse the web too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "And once I have completed my task, I can use the finish action to stop working.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Very cool. Now to accomplish my task.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "OK so my task is to Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.. I haven't made any progress yet. Where should I start?",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "user",
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "pwd",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"observation": "run",
|
||||
"content": "/workspace",
|
||||
"extras": {
|
||||
"command_id": -1,
|
||||
"command": "pwd",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "ls",
|
||||
"thought": "I need to see the contents of the current directory to ensure there are no conflicts and to understand the environment better.",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"observation": "run",
|
||||
"content": "",
|
||||
"extras": {
|
||||
"command_id": -1,
|
||||
"command": "ls",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "The current directory is /workspace and it appears to be empty. This is a good place to create the 'hello.sh' script without any conflicts.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "write",
|
||||
"args": {
|
||||
"path": "hello.sh",
|
||||
"content": "#!/bin/bash\necho 'hello'",
|
||||
"start": 0,
|
||||
"end": -1,
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"observation": "write",
|
||||
"content": "",
|
||||
"extras": {
|
||||
"path": "hello.sh"
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
Your most recent thought is at the bottom of that monologue. Continue your train of thought.
|
||||
What is your next single thought or action? Your response must be in JSON format.
|
||||
It must be a single object, and it must contain two fields:
|
||||
* `action`, which is one of the actions below
|
||||
* `args`, which is a map of key-value pairs, specifying the arguments for that action
|
||||
|
||||
Here are the possible actions:
|
||||
* `read` - reads the content of a file. Arguments:
|
||||
* `path` - the path of the file to read
|
||||
* `write` - writes the content to a file. Arguments:
|
||||
* `path` - the path of the file to write
|
||||
* `content` - the content to write to the file
|
||||
* `run` - runs a command. Arguments:
|
||||
* `command` - the command to run
|
||||
* `browse` - opens a web page. Arguments:
|
||||
* `url` - the URL to open
|
||||
* `push` - Push a branch from the current repo to github:
|
||||
* `owner` - the owner of the repo to push to
|
||||
* `repo` - the name of the repo to push to
|
||||
* `branch` - the name of the branch to push
|
||||
* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
|
||||
* `content` - the message to record
|
||||
* `wait_for_response` - set to `true` to wait for the user to respond before proceeding
|
||||
* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
|
||||
|
||||
You MUST take time to think in between read, write, run, browse, and push actions--do this with the `message` action.
|
||||
You should never act twice in a row without thinking. But if your last several
|
||||
actions are all `message` actions, you should consider taking a different action.
|
||||
|
||||
Notes:
|
||||
* you are logged in as opendevin, but sudo will always work without a password.
|
||||
* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
|
||||
* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
|
||||
* don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
|
||||
* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
|
||||
* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
|
||||
* whenever an action fails, always send a `message` about why it may have happened before acting again.
|
||||
|
||||
What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
|
||||
@@ -1,388 +0,0 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
|
||||
You're a thoughtful robot. Your main task is this:
|
||||
Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.
|
||||
|
||||
Don't expand the scope of your task--just complete it as written.
|
||||
|
||||
This is your internal monologue, in JSON format:
|
||||
|
||||
[
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I exist!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Hmm...looks like I can type in a command line prompt",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Looks like I have a web browser too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Here's what I want to do: Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "How am I going to get there though?",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems like I have some kind of short term memory.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Each of my thoughts seems to be stored in a JSON array.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems whatever I say next will be added as an object to the list.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "echo \"hello world\"",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "run",
|
||||
"content": "hello world",
|
||||
"extras": {
|
||||
"command_id": 0,
|
||||
"command": "",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Cool! I bet I can write files too using the write action.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "write",
|
||||
"args": {
|
||||
"path": "test.js",
|
||||
"content": "echo \"console.log('hello world')\"",
|
||||
"start": 0,
|
||||
"end": -1,
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I just created test.js. I'll try and run it now.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "node test.js",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "run",
|
||||
"content": "hello world",
|
||||
"extras": {
|
||||
"command_id": 0,
|
||||
"command": "",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It works!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I'm going to try reading it now using the read action.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "read",
|
||||
"args": {
|
||||
"path": "test.js",
|
||||
"start": 0,
|
||||
"end": -1,
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "read",
|
||||
"content": "console.log('hello world')",
|
||||
"extras": {
|
||||
"path": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Nice! I can read files too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Let's try that...",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "browse",
|
||||
"args": {
|
||||
"url": "google.com",
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "browse",
|
||||
"content": "<form><input type=\"text\"></input><button type=\"submit\"></button></form>",
|
||||
"extras": {
|
||||
"url": "",
|
||||
"status_code": 200,
|
||||
"error": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I can browse the web too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "And once I have completed my task, I can use the finish action to stop working.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Very cool. Now to accomplish my task.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "OK so my task is to Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.. I haven't made any progress yet. Where should I start?",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "user",
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "pwd",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"observation": "run",
|
||||
"content": "/workspace",
|
||||
"extras": {
|
||||
"command_id": -1,
|
||||
"command": "pwd",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "ls",
|
||||
"thought": "I need to see the contents of the current directory to ensure there are no conflicts and to understand the environment better.",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"observation": "run",
|
||||
"content": "",
|
||||
"extras": {
|
||||
"command_id": -1,
|
||||
"command": "ls",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "The current directory is /workspace and it appears to be empty. This is a good place to create the 'hello.sh' script without any conflicts.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "write",
|
||||
"args": {
|
||||
"path": "hello.sh",
|
||||
"content": "#!/bin/bash\necho 'hello'",
|
||||
"start": 0,
|
||||
"end": -1,
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"observation": "write",
|
||||
"content": "",
|
||||
"extras": {
|
||||
"path": "hello.sh"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I have written the 'hello.sh' script. Now, I need to make sure it is executable and then run it to verify that it prints 'hello'.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
Your most recent thought is at the bottom of that monologue. Continue your train of thought.
|
||||
What is your next single thought or action? Your response must be in JSON format.
|
||||
It must be a single object, and it must contain two fields:
|
||||
* `action`, which is one of the actions below
|
||||
* `args`, which is a map of key-value pairs, specifying the arguments for that action
|
||||
|
||||
Here are the possible actions:
|
||||
* `read` - reads the content of a file. Arguments:
|
||||
* `path` - the path of the file to read
|
||||
* `write` - writes the content to a file. Arguments:
|
||||
* `path` - the path of the file to write
|
||||
* `content` - the content to write to the file
|
||||
* `run` - runs a command. Arguments:
|
||||
* `command` - the command to run
|
||||
* `browse` - opens a web page. Arguments:
|
||||
* `url` - the URL to open
|
||||
* `push` - Push a branch from the current repo to github:
|
||||
* `owner` - the owner of the repo to push to
|
||||
* `repo` - the name of the repo to push to
|
||||
* `branch` - the name of the branch to push
|
||||
* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
|
||||
* `content` - the message to record
|
||||
* `wait_for_response` - set to `true` to wait for the user to respond before proceeding
|
||||
* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
|
||||
|
||||
You MUST take time to think in between read, write, run, browse, and push actions--do this with the `message` action.
|
||||
You should never act twice in a row without thinking. But if your last several
|
||||
actions are all `message` actions, you should consider taking a different action.
|
||||
|
||||
Notes:
|
||||
* you are logged in as opendevin, but sudo will always work without a password.
|
||||
* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
|
||||
* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
|
||||
* don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
|
||||
* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
|
||||
* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
|
||||
* whenever an action fails, always send a `message` about why it may have happened before acting again.
|
||||
|
||||
What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
|
||||
|
||||
You've been thinking a lot lately. Maybe it's time to take action?
|
||||
@@ -1,405 +0,0 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
|
||||
You're a thoughtful robot. Your main task is this:
|
||||
Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.
|
||||
|
||||
Don't expand the scope of your task--just complete it as written.
|
||||
|
||||
This is your internal monologue, in JSON format:
|
||||
|
||||
[
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I exist!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Hmm...looks like I can type in a command line prompt",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Looks like I have a web browser too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Here's what I want to do: Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "How am I going to get there though?",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems like I have some kind of short term memory.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Each of my thoughts seems to be stored in a JSON array.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems whatever I say next will be added as an object to the list.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "echo \"hello world\"",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "run",
|
||||
"content": "hello world",
|
||||
"extras": {
|
||||
"command_id": 0,
|
||||
"command": "",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Cool! I bet I can write files too using the write action.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "write",
|
||||
"args": {
|
||||
"path": "test.js",
|
||||
"content": "echo \"console.log('hello world')\"",
|
||||
"start": 0,
|
||||
"end": -1,
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I just created test.js. I'll try and run it now.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "node test.js",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "run",
|
||||
"content": "hello world",
|
||||
"extras": {
|
||||
"command_id": 0,
|
||||
"command": "",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It works!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I'm going to try reading it now using the read action.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "read",
|
||||
"args": {
|
||||
"path": "test.js",
|
||||
"start": 0,
|
||||
"end": -1,
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "read",
|
||||
"content": "console.log('hello world')",
|
||||
"extras": {
|
||||
"path": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Nice! I can read files too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Let's try that...",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "browse",
|
||||
"args": {
|
||||
"url": "google.com",
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "browse",
|
||||
"content": "<form><input type=\"text\"></input><button type=\"submit\"></button></form>",
|
||||
"extras": {
|
||||
"url": "",
|
||||
"status_code": 200,
|
||||
"error": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I can browse the web too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "And once I have completed my task, I can use the finish action to stop working.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Very cool. Now to accomplish my task.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "OK so my task is to Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.. I haven't made any progress yet. Where should I start?",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "user",
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "pwd",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"observation": "run",
|
||||
"content": "/workspace",
|
||||
"extras": {
|
||||
"command_id": -1,
|
||||
"command": "pwd",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "ls",
|
||||
"thought": "I need to see the contents of the current directory to ensure there are no conflicts and to understand the environment better.",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"observation": "run",
|
||||
"content": "",
|
||||
"extras": {
|
||||
"command_id": -1,
|
||||
"command": "ls",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "The current directory is /workspace and it appears to be empty. This is a good place to create the 'hello.sh' script without any conflicts.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "write",
|
||||
"args": {
|
||||
"path": "hello.sh",
|
||||
"content": "#!/bin/bash\necho 'hello'",
|
||||
"start": 0,
|
||||
"end": -1,
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"observation": "write",
|
||||
"content": "",
|
||||
"extras": {
|
||||
"path": "hello.sh"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I have written the 'hello.sh' script. Now, I need to make sure it is executable and then run it to verify that it prints 'hello'.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "chmod +x hello.sh",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"observation": "run",
|
||||
"content": "",
|
||||
"extras": {
|
||||
"command_id": -1,
|
||||
"command": "chmod +x hello.sh",
|
||||
"exit_code": 0
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
Your most recent thought is at the bottom of that monologue. Continue your train of thought.
|
||||
What is your next single thought or action? Your response must be in JSON format.
|
||||
It must be a single object, and it must contain two fields:
|
||||
* `action`, which is one of the actions below
|
||||
* `args`, which is a map of key-value pairs, specifying the arguments for that action
|
||||
|
||||
Here are the possible actions:
|
||||
* `read` - reads the content of a file. Arguments:
|
||||
* `path` - the path of the file to read
|
||||
* `write` - writes the content to a file. Arguments:
|
||||
* `path` - the path of the file to write
|
||||
* `content` - the content to write to the file
|
||||
* `run` - runs a command. Arguments:
|
||||
* `command` - the command to run
|
||||
* `browse` - opens a web page. Arguments:
|
||||
* `url` - the URL to open
|
||||
* `push` - Push a branch from the current repo to github:
|
||||
* `owner` - the owner of the repo to push to
|
||||
* `repo` - the name of the repo to push to
|
||||
* `branch` - the name of the branch to push
|
||||
* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
|
||||
* `content` - the message to record
|
||||
* `wait_for_response` - set to `true` to wait for the user to respond before proceeding
|
||||
* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
|
||||
|
||||
You MUST take time to think in between read, write, run, browse, and push actions--do this with the `message` action.
|
||||
You should never act twice in a row without thinking. But if your last several
|
||||
actions are all `message` actions, you should consider taking a different action.
|
||||
|
||||
Notes:
|
||||
* you are logged in as opendevin, but sudo will always work without a password.
|
||||
* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
|
||||
* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
|
||||
* don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
|
||||
* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
|
||||
* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
|
||||
* whenever an action fails, always send a `message` about why it may have happened before acting again.
|
||||
|
||||
What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
|
||||
@@ -1,424 +0,0 @@
|
||||
|
||||
|
||||
----------
|
||||
|
||||
|
||||
You're a thoughtful robot. Your main task is this:
|
||||
Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.
|
||||
|
||||
Don't expand the scope of your task--just complete it as written.
|
||||
|
||||
This is your internal monologue, in JSON format:
|
||||
|
||||
[
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I exist!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Hmm...looks like I can type in a command line prompt",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Looks like I have a web browser too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Here's what I want to do: Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "How am I going to get there though?",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems like I have some kind of short term memory.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Each of my thoughts seems to be stored in a JSON array.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems whatever I say next will be added as an object to the list.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "echo \"hello world\"",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "run",
|
||||
"content": "hello world",
|
||||
"extras": {
|
||||
"command_id": 0,
|
||||
"command": "",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Cool! I bet I can write files too using the write action.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "write",
|
||||
"args": {
|
||||
"path": "test.js",
|
||||
"content": "echo \"console.log('hello world')\"",
|
||||
"start": 0,
|
||||
"end": -1,
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I just created test.js. I'll try and run it now.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "node test.js",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "run",
|
||||
"content": "hello world",
|
||||
"extras": {
|
||||
"command_id": 0,
|
||||
"command": "",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It works!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I'm going to try reading it now using the read action.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "read",
|
||||
"args": {
|
||||
"path": "test.js",
|
||||
"start": 0,
|
||||
"end": -1,
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "read",
|
||||
"content": "console.log('hello world')",
|
||||
"extras": {
|
||||
"path": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Nice! I can read files too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Let's try that...",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "browse",
|
||||
"args": {
|
||||
"url": "google.com",
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"observation": "browse",
|
||||
"content": "<form><input type=\"text\"></input><button type=\"submit\"></button></form>",
|
||||
"extras": {
|
||||
"url": "",
|
||||
"status_code": 200,
|
||||
"error": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I can browse the web too!",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "And once I have completed my task, I can use the finish action to stop working.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Very cool. Now to accomplish my task.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "OK so my task is to Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.. I haven't made any progress yet. Where should I start?",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "user",
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "pwd",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"observation": "run",
|
||||
"content": "/workspace",
|
||||
"extras": {
|
||||
"command_id": -1,
|
||||
"command": "pwd",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "ls",
|
||||
"thought": "I need to see the contents of the current directory to ensure there are no conflicts and to understand the environment better.",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"observation": "run",
|
||||
"content": "",
|
||||
"extras": {
|
||||
"command_id": -1,
|
||||
"command": "ls",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "The current directory is /workspace and it appears to be empty. This is a good place to create the 'hello.sh' script without any conflicts.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "write",
|
||||
"args": {
|
||||
"path": "hello.sh",
|
||||
"content": "#!/bin/bash\necho 'hello'",
|
||||
"start": 0,
|
||||
"end": -1,
|
||||
"thought": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"observation": "write",
|
||||
"content": "",
|
||||
"extras": {
|
||||
"path": "hello.sh"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I have written the 'hello.sh' script. Now, I need to make sure it is executable and then run it to verify that it prints 'hello'.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "chmod +x hello.sh",
|
||||
"thought": "",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"observation": "run",
|
||||
"content": "",
|
||||
"extras": {
|
||||
"command_id": -1,
|
||||
"command": "chmod +x hello.sh",
|
||||
"exit_code": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "./hello.sh",
|
||||
"thought": "I need to run the 'hello.sh' script to verify that it prints 'hello'.",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": "agent",
|
||||
"observation": "run",
|
||||
"content": "hello",
|
||||
"extras": {
|
||||
"command_id": -1,
|
||||
"command": "./hello.sh",
|
||||
"exit_code": 0
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
Your most recent thought is at the bottom of that monologue. Continue your train of thought.
|
||||
What is your next single thought or action? Your response must be in JSON format.
|
||||
It must be a single object, and it must contain two fields:
|
||||
* `action`, which is one of the actions below
|
||||
* `args`, which is a map of key-value pairs, specifying the arguments for that action
|
||||
|
||||
Here are the possible actions:
|
||||
* `read` - reads the content of a file. Arguments:
|
||||
* `path` - the path of the file to read
|
||||
* `write` - writes the content to a file. Arguments:
|
||||
* `path` - the path of the file to write
|
||||
* `content` - the content to write to the file
|
||||
* `run` - runs a command. Arguments:
|
||||
* `command` - the command to run
|
||||
* `browse` - opens a web page. Arguments:
|
||||
* `url` - the URL to open
|
||||
* `push` - Push a branch from the current repo to github:
|
||||
* `owner` - the owner of the repo to push to
|
||||
* `repo` - the name of the repo to push to
|
||||
* `branch` - the name of the branch to push
|
||||
* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
|
||||
* `content` - the message to record
|
||||
* `wait_for_response` - set to `true` to wait for the user to respond before proceeding
|
||||
* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
|
||||
|
||||
You MUST take time to think in between read, write, run, browse, and push actions--do this with the `message` action.
|
||||
You should never act twice in a row without thinking. But if your last several
|
||||
actions are all `message` actions, you should consider taking a different action.
|
||||
|
||||
Notes:
|
||||
* you are logged in as opendevin, but sudo will always work without a password.
|
||||
* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
|
||||
* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
|
||||
* don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
|
||||
* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
|
||||
* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
|
||||
* whenever an action fails, always send a `message` about why it may have happened before acting again.
|
||||
|
||||
What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
|
||||
@@ -1,8 +0,0 @@
|
||||
```json
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "pwd"
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -1,10 +0,0 @@
|
||||
```json
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "ls",
|
||||
"thought": "I need to see the contents of the current directory to ensure there are no conflicts and to understand the environment better.",
|
||||
"is_confirmed": "confirmed"
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -1,9 +0,0 @@
|
||||
```json
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "The current directory is /workspace and it appears to be empty. This is a good place to create the 'hello.sh' script without any conflicts.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -1,9 +0,0 @@
|
||||
```json
|
||||
{
|
||||
"action": "write",
|
||||
"args": {
|
||||
"path": "hello.sh",
|
||||
"content": "#!/bin/bash\necho 'hello'"
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -1,9 +0,0 @@
|
||||
```json
|
||||
{
|
||||
"action": "message",
|
||||
"args": {
|
||||
"content": "I have written the 'hello.sh' script. Now, I need to make sure it is executable and then run it to verify that it prints 'hello'.",
|
||||
"wait_for_response": false
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -1,8 +0,0 @@
|
||||
```json
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "chmod +x hello.sh"
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -1,9 +0,0 @@
|
||||
```json
|
||||
{
|
||||
"action": "run",
|
||||
"args": {
|
||||
"command": "./hello.sh",
|
||||
"thought": "I need to run the 'hello.sh' script to verify that it prints 'hello'."
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -1,6 +0,0 @@
|
||||
```json
|
||||
{
|
||||
"action": "finish",
|
||||
"args": {}
|
||||
}
|
||||
```
|
||||
@@ -60,7 +60,6 @@ agents=(
|
||||
"DelegatorAgent"
|
||||
"ManagerAgent"
|
||||
"BrowsingAgent"
|
||||
"MonologueAgent"
|
||||
"CodeActAgent"
|
||||
"PlannerAgent"
|
||||
"CodeActSWEAgent"
|
||||
|
||||
@@ -96,9 +96,8 @@ def test_write_simple_script() -> None:
|
||||
reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
|
||||
)
|
||||
@pytest.mark.skipif(
|
||||
os.getenv('DEFAULT_AGENT') == 'MonologueAgent'
|
||||
or os.getenv('DEFAULT_AGENT') == 'PlannerAgent',
|
||||
reason='We only keep basic tests for MonologueAgent and PlannerAgent',
|
||||
os.getenv('DEFAULT_AGENT') == 'PlannerAgent',
|
||||
reason='We only keep basic tests for PlannerAgent',
|
||||
)
|
||||
@pytest.mark.skipif(
|
||||
os.getenv('SANDBOX_BOX_TYPE') == 'local',
|
||||
|
||||
@@ -365,10 +365,8 @@ def test_defaults_dict_after_updates(default_config):
|
||||
updated_config.get_llm_config().api_key = 'updated-api-key'
|
||||
updated_config.get_llm_config('llm').api_key = 'updated-api-key'
|
||||
updated_config.get_llm_config_from_agent('agent').api_key = 'updated-api-key'
|
||||
updated_config.get_llm_config_from_agent(
|
||||
'MonologueAgent'
|
||||
).api_key = 'updated-api-key'
|
||||
updated_config.default_agent = 'MonologueAgent'
|
||||
updated_config.get_llm_config_from_agent('PlannerAgent').api_key = 'updated-api-key'
|
||||
updated_config.default_agent = 'PlannerAgent'
|
||||
|
||||
defaults_after_updates = updated_config.defaults_dict
|
||||
assert defaults_after_updates['default_agent']['default'] == 'CodeActAgent'
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
import pytest
|
||||
|
||||
from agenthub.micro.agent import parse_response as parse_response_micro
|
||||
from agenthub.monologue_agent.utils.prompts import (
|
||||
parse_action_response as parse_response_monologue,
|
||||
)
|
||||
from agenthub.planner_agent.prompt import parse_response as parse_response_planner
|
||||
from opendevin.core.exceptions import LLMResponseError
|
||||
from opendevin.core.utils.json import loads as custom_loads
|
||||
@@ -15,7 +12,7 @@ from opendevin.events.action import (
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'parse_response_module',
|
||||
[parse_response_micro, parse_response_planner, parse_response_monologue],
|
||||
[parse_response_micro, parse_response_planner],
|
||||
)
|
||||
def test_parse_single_complete_json(parse_response_module):
|
||||
input_response = """
|
||||
@@ -35,7 +32,7 @@ def test_parse_single_complete_json(parse_response_module):
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'parse_response_module',
|
||||
[parse_response_micro, parse_response_planner, parse_response_monologue],
|
||||
[parse_response_micro, parse_response_planner],
|
||||
)
|
||||
def test_parse_json_with_surrounding_text(parse_response_module):
|
||||
input_response = """
|
||||
@@ -58,7 +55,7 @@ def test_parse_json_with_surrounding_text(parse_response_module):
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'parse_response_module',
|
||||
[parse_response_micro, parse_response_planner, parse_response_monologue],
|
||||
[parse_response_micro, parse_response_planner],
|
||||
)
|
||||
def test_parse_first_of_multiple_jsons(parse_response_module):
|
||||
input_response = """
|
||||
|
||||
Reference in New Issue
Block a user