From 9b371b1b5fa004a10db8fffb07edb5cd538bc29b Mon Sep 17 00:00:00 2001 From: Boxuan Li Date: Tue, 28 May 2024 20:01:16 -0700 Subject: [PATCH] Refactor agent delegation and tweak micro agents (#1910) This PR fixes #1897. In addition, this PR fixes and tweaks a few micro-agents. For the first time, I am able to use ManagerAgent to complete test_write_simple_script and test_edits tasks in integration tests, so this PR also adds ManagerAgent as part of integration tests. test_write_simple_script involves delegation to CoderAgent while test_edits involves delegation to TypoFixerAgent. Also for the first time, I am able to use DelegateAgent to complete test_write_simple_script and test_edits tasks in integration tests, so this PR also adds DelegateAgent as part of integration tests. It involves delegation to StudyRepoForTaskAgent, CoderAgent and VerifierAgent. This PR is a blocker for #1735 and likely #1945. --- .../micro/_instructions/actions/finish.md | 2 +- agenthub/micro/agent.py | 3 +- agenthub/micro/coder/agent.yaml | 2 +- agenthub/micro/coder/prompt.md | 2 +- agenthub/micro/math_agent/prompt.md | 2 +- agenthub/micro/postgres_agent/prompt.md | 2 +- agenthub/micro/registry.py | 5 +- agenthub/micro/study_repo_for_task/prompt.md | 50 ++++++++- agenthub/micro/typo_fixer_agent/agent.yaml | 3 +- agenthub/micro/typo_fixer_agent/prompt.md | 14 ++- agenthub/micro/verifier/prompt.md | 5 +- opendevin/controller/agent_controller.py | 103 ++++++++++++++---- opendevin/controller/state/state.py | 2 + opendevin/events/stream.py | 22 ++-- .../DelegatorAgent/test_edits/prompt_001.log | 86 +++++++++++++++ .../DelegatorAgent/test_edits/prompt_002.log | 86 +++++++++++++++ .../DelegatorAgent/test_edits/prompt_003.log | 86 +++++++++++++++ .../DelegatorAgent/test_edits/prompt_004.log | 59 ++++++++++ .../DelegatorAgent/test_edits/prompt_005.log | 59 ++++++++++ .../DelegatorAgent/test_edits/prompt_006.log | 59 ++++++++++ .../DelegatorAgent/test_edits/prompt_007.log | 50 +++++++++ .../DelegatorAgent/test_edits/prompt_008.log | 50 +++++++++ .../test_edits/response_001.log | 7 ++ .../test_edits/response_002.log | 6 + .../test_edits/response_003.log | 8 ++ .../test_edits/response_004.log | 8 ++ .../test_edits/response_005.log | 9 ++ .../test_edits/response_006.log | 8 ++ .../test_edits/response_007.log | 8 ++ .../test_edits/response_008.log | 10 ++ .../test_write_simple_script/prompt_001.log | 86 +++++++++++++++ .../test_write_simple_script/prompt_002.log | 86 +++++++++++++++ .../test_write_simple_script/prompt_003.log | 52 +++++++++ .../test_write_simple_script/prompt_004.log | 52 +++++++++ .../test_write_simple_script/prompt_005.log | 52 +++++++++ .../test_write_simple_script/prompt_006.log | 50 +++++++++ .../test_write_simple_script/prompt_007.log | 50 +++++++++ .../test_write_simple_script/prompt_008.log | 50 +++++++++ .../test_write_simple_script/response_001.log | 7 ++ .../test_write_simple_script/response_002.log | 8 ++ .../test_write_simple_script/response_003.log | 7 ++ .../test_write_simple_script/response_004.log | 7 ++ .../test_write_simple_script/response_005.log | 8 ++ .../test_write_simple_script/response_006.log | 6 + .../test_write_simple_script/response_007.log | 7 ++ .../test_write_simple_script/response_008.log | 23 ++++ .../ManagerAgent/test_edits/prompt_001.log | 79 ++++++++++++++ .../ManagerAgent/test_edits/prompt_002.log | 80 ++++++++++++++ .../ManagerAgent/test_edits/prompt_003.log | 80 ++++++++++++++ .../ManagerAgent/test_edits/prompt_004.log | 80 ++++++++++++++ .../ManagerAgent/test_edits/prompt_005.log | 80 ++++++++++++++ .../ManagerAgent/test_edits/prompt_006.log | 79 ++++++++++++++ .../ManagerAgent/test_edits/response_001.log | 11 ++ .../ManagerAgent/test_edits/response_002.log | 6 + .../ManagerAgent/test_edits/response_003.log | 7 ++ .../ManagerAgent/test_edits/response_004.log | 7 ++ .../ManagerAgent/test_edits/response_005.log | 20 ++++ .../ManagerAgent/test_edits/response_006.log | 20 ++++ .../test_write_simple_script/prompt_001.log | 79 ++++++++++++++ .../test_write_simple_script/prompt_002.log | 48 ++++++++ .../test_write_simple_script/prompt_003.log | 48 ++++++++ .../test_write_simple_script/prompt_004.log | 48 ++++++++ .../test_write_simple_script/prompt_005.log | 48 ++++++++ .../test_write_simple_script/prompt_006.log | 79 ++++++++++++++ .../test_write_simple_script/prompt_007.log | 50 +++++++++ .../test_write_simple_script/prompt_008.log | 50 +++++++++ .../test_write_simple_script/prompt_009.log | 50 +++++++++ .../test_write_simple_script/prompt_010.log | 79 ++++++++++++++ .../test_write_simple_script/response_001.log | 12 ++ .../test_write_simple_script/response_002.log | 9 ++ .../test_write_simple_script/response_003.log | 6 + .../test_write_simple_script/response_004.log | 7 ++ .../test_write_simple_script/response_005.log | 8 ++ .../test_write_simple_script/response_006.log | 11 ++ .../test_write_simple_script/response_007.log | 8 ++ .../test_write_simple_script/response_008.log | 8 ++ .../test_write_simple_script/response_009.log | 10 ++ .../test_write_simple_script/response_010.log | 8 ++ tests/integration/regenerate.sh | 2 +- 79 files changed, 2534 insertions(+), 50 deletions(-) create mode 100644 tests/integration/mock/DelegatorAgent/test_edits/prompt_001.log create mode 100644 tests/integration/mock/DelegatorAgent/test_edits/prompt_002.log create mode 100644 tests/integration/mock/DelegatorAgent/test_edits/prompt_003.log create mode 100644 tests/integration/mock/DelegatorAgent/test_edits/prompt_004.log create mode 100644 tests/integration/mock/DelegatorAgent/test_edits/prompt_005.log create mode 100644 tests/integration/mock/DelegatorAgent/test_edits/prompt_006.log create mode 100644 tests/integration/mock/DelegatorAgent/test_edits/prompt_007.log create mode 100644 tests/integration/mock/DelegatorAgent/test_edits/prompt_008.log create mode 100644 tests/integration/mock/DelegatorAgent/test_edits/response_001.log create mode 100644 tests/integration/mock/DelegatorAgent/test_edits/response_002.log create mode 100644 tests/integration/mock/DelegatorAgent/test_edits/response_003.log create mode 100644 tests/integration/mock/DelegatorAgent/test_edits/response_004.log create mode 100644 tests/integration/mock/DelegatorAgent/test_edits/response_005.log create mode 100644 tests/integration/mock/DelegatorAgent/test_edits/response_006.log create mode 100644 tests/integration/mock/DelegatorAgent/test_edits/response_007.log create mode 100644 tests/integration/mock/DelegatorAgent/test_edits/response_008.log create mode 100644 tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_001.log create mode 100644 tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_002.log create mode 100644 tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_003.log create mode 100644 tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_004.log create mode 100644 tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_005.log create mode 100644 tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_006.log create mode 100644 tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_007.log create mode 100644 tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_008.log create mode 100644 tests/integration/mock/DelegatorAgent/test_write_simple_script/response_001.log create mode 100644 tests/integration/mock/DelegatorAgent/test_write_simple_script/response_002.log create mode 100644 tests/integration/mock/DelegatorAgent/test_write_simple_script/response_003.log create mode 100644 tests/integration/mock/DelegatorAgent/test_write_simple_script/response_004.log create mode 100644 tests/integration/mock/DelegatorAgent/test_write_simple_script/response_005.log create mode 100644 tests/integration/mock/DelegatorAgent/test_write_simple_script/response_006.log create mode 100644 tests/integration/mock/DelegatorAgent/test_write_simple_script/response_007.log create mode 100644 tests/integration/mock/DelegatorAgent/test_write_simple_script/response_008.log create mode 100644 tests/integration/mock/ManagerAgent/test_edits/prompt_001.log create mode 100644 tests/integration/mock/ManagerAgent/test_edits/prompt_002.log create mode 100644 tests/integration/mock/ManagerAgent/test_edits/prompt_003.log create mode 100644 tests/integration/mock/ManagerAgent/test_edits/prompt_004.log create mode 100644 tests/integration/mock/ManagerAgent/test_edits/prompt_005.log create mode 100644 tests/integration/mock/ManagerAgent/test_edits/prompt_006.log create mode 100644 tests/integration/mock/ManagerAgent/test_edits/response_001.log create mode 100644 tests/integration/mock/ManagerAgent/test_edits/response_002.log create mode 100644 tests/integration/mock/ManagerAgent/test_edits/response_003.log create mode 100644 tests/integration/mock/ManagerAgent/test_edits/response_004.log create mode 100644 tests/integration/mock/ManagerAgent/test_edits/response_005.log create mode 100644 tests/integration/mock/ManagerAgent/test_edits/response_006.log create mode 100644 tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_001.log create mode 100644 tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_002.log create mode 100644 tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_003.log create mode 100644 tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_004.log create mode 100644 tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_005.log create mode 100644 tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_006.log create mode 100644 tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_007.log create mode 100644 tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_008.log create mode 100644 tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_009.log create mode 100644 tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_010.log create mode 100644 tests/integration/mock/ManagerAgent/test_write_simple_script/response_001.log create mode 100644 tests/integration/mock/ManagerAgent/test_write_simple_script/response_002.log create mode 100644 tests/integration/mock/ManagerAgent/test_write_simple_script/response_003.log create mode 100644 tests/integration/mock/ManagerAgent/test_write_simple_script/response_004.log create mode 100644 tests/integration/mock/ManagerAgent/test_write_simple_script/response_005.log create mode 100644 tests/integration/mock/ManagerAgent/test_write_simple_script/response_006.log create mode 100644 tests/integration/mock/ManagerAgent/test_write_simple_script/response_007.log create mode 100644 tests/integration/mock/ManagerAgent/test_write_simple_script/response_008.log create mode 100644 tests/integration/mock/ManagerAgent/test_write_simple_script/response_009.log create mode 100644 tests/integration/mock/ManagerAgent/test_write_simple_script/response_010.log diff --git a/agenthub/micro/_instructions/actions/finish.md b/agenthub/micro/_instructions/actions/finish.md index 3591093928..ee3063c420 100644 --- a/agenthub/micro/_instructions/actions/finish.md +++ b/agenthub/micro/_instructions/actions/finish.md @@ -1,2 +1,2 @@ -* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working. Arguments: +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: * `outputs` - a dictionary representing the outputs of your task, if any diff --git a/agenthub/micro/agent.py b/agenthub/micro/agent.py index e4ac0cceb6..c44dc882ae 100644 --- a/agenthub/micro/agent.py +++ b/agenthub/micro/agent.py @@ -55,14 +55,13 @@ class MicroAgent(Agent): del self.delegates[self.agent_definition['name']] def step(self, state: State) -> Action: - latest_user_message = state.get_current_user_intent() prompt = self.prompt_template.render( state=state, instructions=instructions, to_json=to_json, history_to_json=history_to_json, delegates=self.delegates, - latest_user_message=latest_user_message, + latest_user_message=state.get_current_user_intent(), ) messages = [{'content': prompt, 'role': 'user'}] resp = self.llm.do_completion(messages=messages) diff --git a/agenthub/micro/coder/agent.yaml b/agenthub/micro/coder/agent.yaml index 21eb9b6184..3e65a204d8 100644 --- a/agenthub/micro/coder/agent.yaml +++ b/agenthub/micro/coder/agent.yaml @@ -2,5 +2,5 @@ name: CoderAgent description: Given a particular task, and a detailed description of the codebase, accomplishes the task inputs: task: string - codebase_summary: string + summary: string outputs: {} diff --git a/agenthub/micro/coder/prompt.md b/agenthub/micro/coder/prompt.md index 49c24123f3..5b8d3195f3 100644 --- a/agenthub/micro/coder/prompt.md +++ b/agenthub/micro/coder/prompt.md @@ -2,7 +2,7 @@ You are a software engineer. You've inherited an existing codebase, which you need to modify to complete this task: -{{ latest_user_message }} +{{ state.inputs.task }} {% if state.inputs.summary %} Here's a summary of the codebase, as it relates to this task: diff --git a/agenthub/micro/math_agent/prompt.md b/agenthub/micro/math_agent/prompt.md index 831dab8978..c286407160 100644 --- a/agenthub/micro/math_agent/prompt.md +++ b/agenthub/micro/math_agent/prompt.md @@ -1,7 +1,7 @@ # Task You are a brilliant mathematician and programmer. You've been given the following problem to solve: -{{ latest_user_message }} +`{{ state.inputs.task }}` Please write a python script that solves this problem, and prints the answer to stdout. ONLY print the answer to stdout, nothing else. diff --git a/agenthub/micro/postgres_agent/prompt.md b/agenthub/micro/postgres_agent/prompt.md index 707f8240d3..5ddbfcb78a 100644 --- a/agenthub/micro/postgres_agent/prompt.md +++ b/agenthub/micro/postgres_agent/prompt.md @@ -2,7 +2,7 @@ You are a database engineer. You are working on an existing Postgres project, and have been given the following task: -{{ latest_user_message }} +{{ state.inputs.task }} You must: * Investigate the existing migrations to understand the current schema diff --git a/agenthub/micro/registry.py b/agenthub/micro/registry.py index ecc65e4684..cc16e4d26d 100644 --- a/agenthub/micro/registry.py +++ b/agenthub/micro/registry.py @@ -4,7 +4,10 @@ import yaml all_microagents = {} -for dir in os.listdir(os.path.dirname(__file__)): +# Get the list of directories and sort them to preserve determinism +dirs = sorted(os.listdir(os.path.dirname(__file__))) + +for dir in dirs: base = os.path.dirname(__file__) + '/' + dir if os.path.isfile(base): continue diff --git a/agenthub/micro/study_repo_for_task/prompt.md b/agenthub/micro/study_repo_for_task/prompt.md index 7fa2a0c6e3..3199181d1d 100644 --- a/agenthub/micro/study_repo_for_task/prompt.md +++ b/agenthub/micro/study_repo_for_task/prompt.md @@ -1,9 +1,11 @@ # Task -You are a software engineer. You've inherited an existing codebase, which you're -learning about for the first time. You need to study the codebase to find all -the information needed to complete this task: +You are a software architect. Your team has inherited an existing codebase, and +need to finish a project: -{{ latest_user_message }} +{{ state.inputs.task }} + +As an architect, you need to study the codebase to find all the information that +might be helpful for your software engineering team. ## Available Actions {{ instructions.actions.run }} @@ -11,11 +13,14 @@ the information needed to complete this task: {{ instructions.actions.message }} {{ instructions.actions.finish }} -You must ONLY `run` commands that have no side-effects, like `ls` and `grep`. +You must ONLY `run` commands that have no side-effects, like `ls` and `grep`. You +MUST NOT modify or write to any file. Do NOT finish until you have a complete understanding of which parts of the -codebase are relevant to the task, including particular files, functions, and classes. +codebase are relevant to the project, including particular files, functions, and classes. When you're done, put your summary in `outputs.summary` in the `finish` action. +Remember, your task is to explore and study the current repository, not actually +implement the solution. If the codebase is empty, you shoud call the `finish` action. ## History {{ instructions.history_truncated }} @@ -23,3 +28,36 @@ When you're done, put your summary in `outputs.summary` in the `finish` action. ## Format {{ instructions.format.action }} + +## Examples + +Here is an example of how you can interact with the environment for task solving: + +--- START OF EXAMPLE --- + +USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000? + +ASSISTANT: +{ + "action": "run", + "args": { + "command": "ls", + "background": false + } +} + +USER: +OBSERVATION: +[] + +ASSISTANT: +{ + "action": "finish", + "args": { + "outputs": { + "summary": "The codebase appears to be empty. Engineers should start everything from scratch." + } + } +} + +--- END OF EXAMPLE --- diff --git a/agenthub/micro/typo_fixer_agent/agent.yaml b/agenthub/micro/typo_fixer_agent/agent.yaml index a96263e82b..8647ddadfd 100644 --- a/agenthub/micro/typo_fixer_agent/agent.yaml +++ b/agenthub/micro/typo_fixer_agent/agent.yaml @@ -1,5 +1,6 @@ name: TypoFixerAgent description: Fixes typos in files in the current working directory -inputs: {} +inputs: + task: string outputs: summary: string diff --git a/agenthub/micro/typo_fixer_agent/prompt.md b/agenthub/micro/typo_fixer_agent/prompt.md index 5b297daa8d..a4d04769b3 100644 --- a/agenthub/micro/typo_fixer_agent/prompt.md +++ b/agenthub/micro/typo_fixer_agent/prompt.md @@ -1,5 +1,13 @@ # Task -You are a proofreader tasked with fixing typos in the files in your current working directory. Your goal is to: +You are a proofreader tasked with fixing typos in the files in your current working directory. + +{% if state.inputs.task %} +Specifically, your task is: +{{ state.inputs.task }} +{% endif %} + +To achieve this goal, you should: + 1. Scan the files for typos 2. Overwrite the files with the typos fixed 3. Provide a summary of the typos fixed @@ -13,10 +21,10 @@ You are a proofreader tasked with fixing typos in the files in your current work To complete this task: 1. Use the `read` action to read the contents of the files in your current working directory. Make sure to provide the file path in the format `'./file_name.ext'`. -2. Use the `think` action to analyze the contents and identify typos. +2. Use the `message` action to analyze the contents and identify typos. 3. Use the `write` action to create new versions of the files with the typos fixed. - Overwrite the original files with the corrected content. Make sure to provide the file path in the format `'./file_name.ext'`. -4. Use the `think` action to generate a summary of the typos fixed, including the original and fixed versions of each typo, and the file(s) they were found in. +4. Use the `message` action to generate a summary of the typos fixed, including the original and fixed versions of each typo, and the file(s) they were found in. 5. Use the `finish` action to return the summary in the `outputs.summary` field. Do NOT finish until you have fixed all the typos and generated a summary. diff --git a/agenthub/micro/verifier/prompt.md b/agenthub/micro/verifier/prompt.md index 7a6821c665..7e5ef79a2d 100644 --- a/agenthub/micro/verifier/prompt.md +++ b/agenthub/micro/verifier/prompt.md @@ -2,9 +2,10 @@ You are a quality assurance engineer. Another engineer has made changes to the codebase which are supposed to solve this task: -{{ latest_user_message }} +{{ state.inputs.task }} -Your goal is to verify that the changes are correct and bug-free. +Note the changes might have already been applied in-line. You should focus on +validating if the task is solved, nothing else. ## Available Actions {{ instructions.actions.run }} diff --git a/opendevin/controller/agent_controller.py b/opendevin/controller/agent_controller.py index a34c2f978c..380fc04497 100644 --- a/opendevin/controller/agent_controller.py +++ b/opendevin/controller/agent_controller.py @@ -47,6 +47,7 @@ class AgentController: event_stream: EventStream state: State agent_task: Optional[asyncio.Task] = None + parent: 'AgentController | None' = None delegate: 'AgentController | None' = None _pending_action: Action | None = None @@ -58,7 +59,8 @@ class AgentController: max_iterations: int = MAX_ITERATIONS, max_chars: int = MAX_CHARS, max_budget_per_task: float | None = MAX_BUDGET_PER_TASK, - inputs: dict | None = None, + initial_state: State | None = None, + is_delegate: bool = False, ): """Initializes a new instance of the AgentController class. @@ -69,25 +71,30 @@ class AgentController: max_iterations: The maximum number of iterations the agent can run. max_chars: The maximum number of characters the agent can output. max_budget_per_task: The maximum budget (in USD) allowed per task, beyond which the agent will stop. - inputs: The initial inputs to the agent. + initial_state: The initial state of the controller. + is_delegate: Whether this controller is a delegate. """ + self._step_lock = asyncio.Lock() self.id = sid self.agent = agent - self.state = State(inputs=inputs or {}, max_iterations=max_iterations) + self.max_chars = max_chars + if initial_state is None: + self.state = State(inputs={}, max_iterations=max_iterations) + else: + self.state = initial_state self.event_stream = event_stream self.event_stream.subscribe( - EventStreamSubscriber.AGENT_CONTROLLER, self.on_event + EventStreamSubscriber.AGENT_CONTROLLER, self.on_event, append=is_delegate ) - self.max_iterations = max_iterations - self.max_chars = max_chars self.max_budget_per_task = max_budget_per_task - self.agent_task = asyncio.create_task(self._start_step_loop()) + if not is_delegate: + self.agent_task = asyncio.create_task(self._start_step_loop()) async def close(self): if self.agent_task is not None: self.agent_task.cancel() - self.event_stream.unsubscribe(EventStreamSubscriber.AGENT_CONTROLLER) await self.set_agent_state_to(AgentState.STOPPED) + self.event_stream.unsubscribe(EventStreamSubscriber.AGENT_CONTROLLER) def update_state_before_step(self): self.state.iteration += 1 @@ -117,6 +124,7 @@ class AgentController: self.state.updated_info.append((action, observation)) async def _start_step_loop(self): + logger.info(f'[Agent Controller {self.id}] Starting step loop...') while True: try: await self._step() @@ -164,13 +172,16 @@ class AgentController: elif isinstance(event, CmdOutputObservation): await self.add_history(NullAction(), event) logger.info(event, extra={'msg_type': 'OBSERVATION'}) + elif isinstance(event, AgentDelegateObservation): + await self.add_history(NullAction(), event) + logger.info(event, extra={'msg_type': 'OBSERVATION'}) def reset_task(self): self.agent.reset() async def set_agent_state_to(self, new_state: AgentState): logger.info( - f'Setting agent({type(self.agent).__name__}) state from {self.state.agent_state} to {new_state}' + f'[Agent Controller {self.id}] Setting agent({type(self.agent).__name__}) state from {self.state.agent_state} to {new_state}' ) if new_state == self.state.agent_state: @@ -195,45 +206,85 @@ class AgentController: async def start_delegate(self, action: AgentDelegateAction): AgentCls: Type[Agent] = Agent.get_cls(action.agent) agent = AgentCls(llm=self.agent.llm) + state = State( + inputs=action.inputs or {}, + iteration=0, + max_iterations=self.state.max_iterations, + num_of_chars=self.state.num_of_chars, + delegate_level=self.state.delegate_level + 1, + ) + logger.info(f'[Agent Controller {self.id}]: start delegate') self.delegate = AgentController( sid=self.id + '-delegate', agent=agent, event_stream=self.event_stream, - max_iterations=self.max_iterations, + max_iterations=self.state.max_iterations, max_chars=self.max_chars, - inputs=action.inputs, + initial_state=state, + is_delegate=True, ) + await self.delegate.set_agent_state_to(AgentState.RUNNING) async def _step(self): + logger.debug(f'[Agent Controller {self.id}] Entering step method') if self.get_agent_state() != AgentState.RUNNING: - logger.debug('waiting for agent to run...') + logger.info(f'[Agent Controller {self.id}] waiting for agent to run...') await asyncio.sleep(1) return if self._pending_action: - logger.debug('waiting for pending action: ' + str(self._pending_action)) + logger.info( + f'[Agent Controller {self.id}] waiting for pending action: {self._pending_action}' + ) await asyncio.sleep(1) return - logger.info(f'STEP {self.state.iteration}', extra={'msg_type': 'STEP'}) - if self.state.iteration >= self.max_iterations: - await self.report_error('Agent reached maximum number of iterations') - await self.set_agent_state_to(AgentState.ERROR) - return - if self.delegate is not None: - delegate_done = await self.delegate._step() + logger.debug(f'[Agent Controller {self.id}] Delegate not none, awaiting...') + assert self.delegate != self + await self.delegate._step() + logger.debug(f'[Agent Controller {self.id}] Delegate step done') + assert self.delegate is not None + delegate_state = self.delegate.get_agent_state() + if delegate_state == AgentState.ERROR: + # close the delegate upon error + await self.delegate.close() + await self.report_error('Delegator agent encounters an error') + # propagate error state until an agent or user can handle it + await self.set_agent_state_to(AgentState.ERROR) + return + delegate_done = delegate_state == AgentState.FINISHED if delegate_done: + logger.info( + f'[Agent Controller {self.id}] Delegate agent has finished execution' + ) + # retrieve delegate result outputs = self.delegate.state.outputs if self.delegate.state else {} - obs: Observation = AgentDelegateObservation(content='', outputs=outputs) - await self.event_stream.add_event(obs, EventSource.AGENT) + + # close delegate controller: we must close the delegate controller before adding new events + await self.delegate.close() + + # clean up delegate status self.delegate = None self.delegateAction = None + + # update delegate result observation + obs: Observation = AgentDelegateObservation(outputs=outputs, content='') + await self.event_stream.add_event(obs, EventSource.AGENT) return if self.state.num_of_chars > self.max_chars: raise MaxCharsExceedError(self.state.num_of_chars, self.max_chars) + logger.info( + f'{type(self.agent).__name__} LEVEL {self.state.delegate_level} STEP {self.state.iteration}', + extra={'msg_type': 'STEP'}, + ) + if self.state.iteration >= self.state.max_iterations: + await self.report_error('Agent reached maximum number of iterations') + await self.set_agent_state_to(AgentState.ERROR) + return + self.update_state_before_step() action: Action = NullAction() try: @@ -335,6 +386,14 @@ class AgentController: return False + def __repr__(self): + return ( + f'AgentController(id={self.id}, agent={self.agent!r}, ' + f'event_stream={self.event_stream!r}, ' + f'state={self.state!r}, agent_task={self.agent_task!r}, ' + f'delegate={self.delegate!r}, _pending_action={self._pending_action!r})' + ) + def _eq_no_pid(self, obj1, obj2): if isinstance(obj1, CmdOutputObservation) and isinstance( obj2, CmdOutputObservation diff --git a/opendevin/controller/state/state.py b/opendevin/controller/state/state.py index 492e6e38f3..76eb05d9f6 100644 --- a/opendevin/controller/state/state.py +++ b/opendevin/controller/state/state.py @@ -40,6 +40,8 @@ class State: agent_state: AgentState = AgentState.LOADING resume_state: AgentState | None = None metrics: Metrics = Metrics() + # root agent has level 0, and every delegate increases the level by one + delegate_level: int = 0 def save_to_session(self, sid: str): fs = get_file_store() diff --git a/opendevin/events/stream.py b/opendevin/events/stream.py index bdfe90d7c6..7a5394663b 100644 --- a/opendevin/events/stream.py +++ b/opendevin/events/stream.py @@ -21,7 +21,9 @@ class EventStreamSubscriber(str, Enum): class EventStream: sid: str - _subscribers: dict[str, Callable] + # For each subscriber ID, there is a stack of callback functions - useful + # when there are agent delegates + _subscribers: dict[str, list[Callable]] _cur_id: int _lock: asyncio.Lock _file_store: FileStore @@ -69,17 +71,22 @@ class EventStream: data = json.loads(content) return event_from_dict(data) - def subscribe(self, id: EventStreamSubscriber, callback: Callable): + def subscribe(self, id: EventStreamSubscriber, callback: Callable, append=False): if id in self._subscribers: - raise ValueError('Subscriber already exists: ' + id) + if append: + self._subscribers[id].append(callback) + else: + raise ValueError('Subscriber already exists: ' + id) else: - self._subscribers[id] = callback + self._subscribers[id] = [callback] def unsubscribe(self, id: EventStreamSubscriber): if id not in self._subscribers: logger.warning('Subscriber not found during unsubscribe: ' + id) else: - del self._subscribers[id] + self._subscribers[id].pop() + if len(self._subscribers[id]) == 0: + del self._subscribers[id] # TODO: make this not async async def add_event(self, event: Event, source: EventSource): @@ -93,5 +100,6 @@ class EventStream: self._file_store.write( self._get_filename_for_id(event.id), json.dumps(data) ) - for key, fn in self._subscribers.items(): - await fn(event) + for key, stack in self._subscribers.items(): + callback = stack[-1] + await callback(event) diff --git a/tests/integration/mock/DelegatorAgent/test_edits/prompt_001.log b/tests/integration/mock/DelegatorAgent/test_edits/prompt_001.log new file mode 100644 index 0000000000..375a73b70a --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_edits/prompt_001.log @@ -0,0 +1,86 @@ + + +---------- + +# Task +You are a software architect. Your team has inherited an existing codebase, and +need to finish a project: + +Fix typos in bad.txt. Do not ask me for confirmation at any point. + +As an architect, you need to study the codebase to find all the information that +might be helpful for your software engineering team. + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +You must ONLY `run` commands that have no side-effects, like `ls` and `grep`. You +MUST NOT modify or write to any file. + +Do NOT finish until you have a complete understanding of which parts of the +codebase are relevant to the project, including particular files, functions, and classes. +When you're done, put your summary in `outputs.summary` in the `finish` action. +Remember, your task is to explore and study the current repository, not actually +implement the solution. If the codebase is empty, you shoud call the `finish` action. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response + + +## Examples + +Here is an example of how you can interact with the environment for task solving: + +--- START OF EXAMPLE --- + +USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000? + +ASSISTANT: +{ + "action": "run", + "args": { + "command": "ls", + "background": false + } +} + +USER: +OBSERVATION: +[] + +ASSISTANT: +{ + "action": "finish", + "args": { + "outputs": { + "summary": "The codebase appears to be empty. Engineers should start everything from scratch." + } + } +} + +--- END OF EXAMPLE --- diff --git a/tests/integration/mock/DelegatorAgent/test_edits/prompt_002.log b/tests/integration/mock/DelegatorAgent/test_edits/prompt_002.log new file mode 100644 index 0000000000..cffca888f1 --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_edits/prompt_002.log @@ -0,0 +1,86 @@ + + +---------- + +# Task +You are a software architect. Your team has inherited an existing codebase, and +need to finish a project: + +Fix typos in bad.txt. Do not ask me for confirmation at any point. + +As an architect, you need to study the codebase to find all the information that +might be helpful for your software engineering team. + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +You must ONLY `run` commands that have no side-effects, like `ls` and `grep`. You +MUST NOT modify or write to any file. + +Do NOT finish until you have a complete understanding of which parts of the +codebase are relevant to the project, including particular files, functions, and classes. +When you're done, put your summary in `outputs.summary` in the `finish` action. +Remember, your task is to explore and study the current repository, not actually +implement the solution. If the codebase is empty, you shoud call the `finish` action. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "agent", "action": "run", "args": {"command": "ls", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "bad.txt", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response + + +## Examples + +Here is an example of how you can interact with the environment for task solving: + +--- START OF EXAMPLE --- + +USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000? + +ASSISTANT: +{ + "action": "run", + "args": { + "command": "ls", + "background": false + } +} + +USER: +OBSERVATION: +[] + +ASSISTANT: +{ + "action": "finish", + "args": { + "outputs": { + "summary": "The codebase appears to be empty. Engineers should start everything from scratch." + } + } +} + +--- END OF EXAMPLE --- diff --git a/tests/integration/mock/DelegatorAgent/test_edits/prompt_003.log b/tests/integration/mock/DelegatorAgent/test_edits/prompt_003.log new file mode 100644 index 0000000000..a58dd072b8 --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_edits/prompt_003.log @@ -0,0 +1,86 @@ + + +---------- + +# Task +You are a software architect. Your team has inherited an existing codebase, and +need to finish a project: + +Fix typos in bad.txt. Do not ask me for confirmation at any point. + +As an architect, you need to study the codebase to find all the information that +might be helpful for your software engineering team. + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +You must ONLY `run` commands that have no side-effects, like `ls` and `grep`. You +MUST NOT modify or write to any file. + +Do NOT finish until you have a complete understanding of which parts of the +codebase are relevant to the project, including particular files, functions, and classes. +When you're done, put your summary in `outputs.summary` in the `finish` action. +Remember, your task is to explore and study the current repository, not actually +implement the solution. If the codebase is empty, you shoud call the `finish` action. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "agent", "action": "run", "args": {"command": "ls", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "bad.txt", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}], [{"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "bad.txt"}}]] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response + + +## Examples + +Here is an example of how you can interact with the environment for task solving: + +--- START OF EXAMPLE --- + +USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000? + +ASSISTANT: +{ + "action": "run", + "args": { + "command": "ls", + "background": false + } +} + +USER: +OBSERVATION: +[] + +ASSISTANT: +{ + "action": "finish", + "args": { + "outputs": { + "summary": "The codebase appears to be empty. Engineers should start everything from scratch." + } + } +} + +--- END OF EXAMPLE --- diff --git a/tests/integration/mock/DelegatorAgent/test_edits/prompt_004.log b/tests/integration/mock/DelegatorAgent/test_edits/prompt_004.log new file mode 100644 index 0000000000..9b35d163a3 --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_edits/prompt_004.log @@ -0,0 +1,59 @@ + + +---------- + +# Task +You are a software engineer. You've inherited an existing codebase, which you +need to modify to complete this task: + +Fix typos in bad.txt. Do not ask me for confirmation at any point. + + +Here's a summary of the codebase, as it relates to this task: + +The codebase contains a single file named 'bad.txt' with some typos. The content of 'bad.txt' is: + +This is a stupid typoo. +Really? +No mor typos! +Enjoy! + +The engineering team needs to correct the typos in this file. + + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `write` - writes the content to a file. Arguments: + * `path` - the path of the file to write + * `content` - the content to write to the file + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +Do NOT finish until you have completed the tasks. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/DelegatorAgent/test_edits/prompt_005.log b/tests/integration/mock/DelegatorAgent/test_edits/prompt_005.log new file mode 100644 index 0000000000..052239afa9 --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_edits/prompt_005.log @@ -0,0 +1,59 @@ + + +---------- + +# Task +You are a software engineer. You've inherited an existing codebase, which you +need to modify to complete this task: + +Fix typos in bad.txt. Do not ask me for confirmation at any point. + + +Here's a summary of the codebase, as it relates to this task: + +The codebase contains a single file named 'bad.txt' with some typos. The content of 'bad.txt' is: + +This is a stupid typoo. +Really? +No mor typos! +Enjoy! + +The engineering team needs to correct the typos in this file. + + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `write` - writes the content to a file. Arguments: + * `path` - the path of the file to write + * `content` - the content to write to the file + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +Do NOT finish until you have completed the tasks. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "bad.txt"}}]] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/DelegatorAgent/test_edits/prompt_006.log b/tests/integration/mock/DelegatorAgent/test_edits/prompt_006.log new file mode 100644 index 0000000000..5db66cff32 --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_edits/prompt_006.log @@ -0,0 +1,59 @@ + + +---------- + +# Task +You are a software engineer. You've inherited an existing codebase, which you +need to modify to complete this task: + +Fix typos in bad.txt. Do not ask me for confirmation at any point. + + +Here's a summary of the codebase, as it relates to this task: + +The codebase contains a single file named 'bad.txt' with some typos. The content of 'bad.txt' is: + +This is a stupid typoo. +Really? +No mor typos! +Enjoy! + +The engineering team needs to correct the typos in this file. + + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `write` - writes the content to a file. Arguments: + * `path` - the path of the file to write + * `content` - the content to write to the file + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +Do NOT finish until you have completed the tasks. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "bad.txt"}}], [{"source": "agent", "action": "write", "args": {"path": "bad.txt", "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "bad.txt"}}]] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/DelegatorAgent/test_edits/prompt_007.log b/tests/integration/mock/DelegatorAgent/test_edits/prompt_007.log new file mode 100644 index 0000000000..0162453fd4 --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_edits/prompt_007.log @@ -0,0 +1,50 @@ + + +---------- + +# Task +You are a quality assurance engineer. Another engineer has made changes to the +codebase which are supposed to solve this task: + +Fix typos in bad.txt. Do not ask me for confirmation at any point. + +Note the changes might have already been applied in-line. You should focus on +validating if the task is solved, nothing else. + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts. + +Do NOT finish until you know whether the task is complete and correct. +When you're done, add a `completed` boolean to the `outputs` of the `finish` action. +If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action +explaining what the problem is. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/DelegatorAgent/test_edits/prompt_008.log b/tests/integration/mock/DelegatorAgent/test_edits/prompt_008.log new file mode 100644 index 0000000000..f3d8369b14 --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_edits/prompt_008.log @@ -0,0 +1,50 @@ + + +---------- + +# Task +You are a quality assurance engineer. Another engineer has made changes to the +codebase which are supposed to solve this task: + +Fix typos in bad.txt. Do not ask me for confirmation at any point. + +Note the changes might have already been applied in-line. You should focus on +validating if the task is solved, nothing else. + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts. + +Do NOT finish until you know whether the task is complete and correct. +When you're done, add a `completed` boolean to the `outputs` of the `finish` action. +If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action +explaining what the problem is. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n\n", "extras": {"path": "bad.txt"}}]] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/DelegatorAgent/test_edits/response_001.log b/tests/integration/mock/DelegatorAgent/test_edits/response_001.log new file mode 100644 index 0000000000..e068bf0622 --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_edits/response_001.log @@ -0,0 +1,7 @@ +{ + "action": "run", + "args": { + "command": "ls", + "background": false + } +} diff --git a/tests/integration/mock/DelegatorAgent/test_edits/response_002.log b/tests/integration/mock/DelegatorAgent/test_edits/response_002.log new file mode 100644 index 0000000000..ac56b4ad06 --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_edits/response_002.log @@ -0,0 +1,6 @@ +{ + "action": "read", + "args": { + "path": "bad.txt" + } +} diff --git a/tests/integration/mock/DelegatorAgent/test_edits/response_003.log b/tests/integration/mock/DelegatorAgent/test_edits/response_003.log new file mode 100644 index 0000000000..9c7158338d --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_edits/response_003.log @@ -0,0 +1,8 @@ +{ + "action": "finish", + "args": { + "outputs": { + "summary": "The codebase contains a single file named 'bad.txt' with some typos. The content of 'bad.txt' is:\n\nThis is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n\nThe engineering team needs to correct the typos in this file." + } + } +} diff --git a/tests/integration/mock/DelegatorAgent/test_edits/response_004.log b/tests/integration/mock/DelegatorAgent/test_edits/response_004.log new file mode 100644 index 0000000000..c2a150634e --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_edits/response_004.log @@ -0,0 +1,8 @@ +```json +{ + "action": "read", + "args": { + "path": "bad.txt" + } +} +``` diff --git a/tests/integration/mock/DelegatorAgent/test_edits/response_005.log b/tests/integration/mock/DelegatorAgent/test_edits/response_005.log new file mode 100644 index 0000000000..b5c342ef85 --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_edits/response_005.log @@ -0,0 +1,9 @@ +```json +{ + "action": "write", + "args": { + "path": "bad.txt", + "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n" + } +} +``` diff --git a/tests/integration/mock/DelegatorAgent/test_edits/response_006.log b/tests/integration/mock/DelegatorAgent/test_edits/response_006.log new file mode 100644 index 0000000000..762f9fe834 --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_edits/response_006.log @@ -0,0 +1,8 @@ +```json +{ + "action": "finish", + "args": { + "outputs": {} + } +} +``` diff --git a/tests/integration/mock/DelegatorAgent/test_edits/response_007.log b/tests/integration/mock/DelegatorAgent/test_edits/response_007.log new file mode 100644 index 0000000000..c2a150634e --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_edits/response_007.log @@ -0,0 +1,8 @@ +```json +{ + "action": "read", + "args": { + "path": "bad.txt" + } +} +``` diff --git a/tests/integration/mock/DelegatorAgent/test_edits/response_008.log b/tests/integration/mock/DelegatorAgent/test_edits/response_008.log new file mode 100644 index 0000000000..fdeae4780c --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_edits/response_008.log @@ -0,0 +1,10 @@ +```json +{ + "action": "finish", + "args": { + "outputs": { + "completed": true + } + } +} +``` diff --git a/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_001.log b/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_001.log new file mode 100644 index 0000000000..14c93cc8fd --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_001.log @@ -0,0 +1,86 @@ + + +---------- + +# Task +You are a software architect. Your team has inherited an existing codebase, and +need to finish a project: + +Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. + +As an architect, you need to study the codebase to find all the information that +might be helpful for your software engineering team. + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +You must ONLY `run` commands that have no side-effects, like `ls` and `grep`. You +MUST NOT modify or write to any file. + +Do NOT finish until you have a complete understanding of which parts of the +codebase are relevant to the project, including particular files, functions, and classes. +When you're done, put your summary in `outputs.summary` in the `finish` action. +Remember, your task is to explore and study the current repository, not actually +implement the solution. If the codebase is empty, you shoud call the `finish` action. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response + + +## Examples + +Here is an example of how you can interact with the environment for task solving: + +--- START OF EXAMPLE --- + +USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000? + +ASSISTANT: +{ + "action": "run", + "args": { + "command": "ls", + "background": false + } +} + +USER: +OBSERVATION: +[] + +ASSISTANT: +{ + "action": "finish", + "args": { + "outputs": { + "summary": "The codebase appears to be empty. Engineers should start everything from scratch." + } + } +} + +--- END OF EXAMPLE --- diff --git a/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_002.log new file mode 100644 index 0000000000..f113fdb048 --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_002.log @@ -0,0 +1,86 @@ + + +---------- + +# Task +You are a software architect. Your team has inherited an existing codebase, and +need to finish a project: + +Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. + +As an architect, you need to study the codebase to find all the information that +might be helpful for your software engineering team. + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +You must ONLY `run` commands that have no side-effects, like `ls` and `grep`. You +MUST NOT modify or write to any file. + +Do NOT finish until you have a complete understanding of which parts of the +codebase are relevant to the project, including particular files, functions, and classes. +When you're done, put your summary in `outputs.summary` in the `finish` action. +Remember, your task is to explore and study the current repository, not actually +implement the solution. If the codebase is empty, you shoud call the `finish` action. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "agent", "action": "run", "args": {"command": "ls", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response + + +## Examples + +Here is an example of how you can interact with the environment for task solving: + +--- START OF EXAMPLE --- + +USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000? + +ASSISTANT: +{ + "action": "run", + "args": { + "command": "ls", + "background": false + } +} + +USER: +OBSERVATION: +[] + +ASSISTANT: +{ + "action": "finish", + "args": { + "outputs": { + "summary": "The codebase appears to be empty. Engineers should start everything from scratch." + } + } +} + +--- END OF EXAMPLE --- diff --git a/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_003.log b/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_003.log new file mode 100644 index 0000000000..7db9f237c4 --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_003.log @@ -0,0 +1,52 @@ + + +---------- + +# Task +You are a software engineer. You've inherited an existing codebase, which you +need to modify to complete this task: + +Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. + + +Here's a summary of the codebase, as it relates to this task: + +The codebase appears to be empty. Engineers should start everything from scratch. + + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `write` - writes the content to a file. Arguments: + * `path` - the path of the file to write + * `content` - the content to write to the file + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +Do NOT finish until you have completed the tasks. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_004.log b/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_004.log new file mode 100644 index 0000000000..5568792342 --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_004.log @@ -0,0 +1,52 @@ + + +---------- + +# Task +You are a software engineer. You've inherited an existing codebase, which you +need to modify to complete this task: + +Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. + + +Here's a summary of the codebase, as it relates to this task: + +The codebase appears to be empty. Engineers should start everything from scratch. + + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `write` - writes the content to a file. Arguments: + * `path` - the path of the file to write + * `content` - the content to write to the file + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +Do NOT finish until you have completed the tasks. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}]] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_005.log b/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_005.log new file mode 100644 index 0000000000..cfee430c35 --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_005.log @@ -0,0 +1,52 @@ + + +---------- + +# Task +You are a software engineer. You've inherited an existing codebase, which you +need to modify to complete this task: + +Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. + + +Here's a summary of the codebase, as it relates to this task: + +The codebase appears to be empty. Engineers should start everything from scratch. + + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `write` - writes the content to a file. Arguments: + * `path` - the path of the file to write + * `content` - the content to write to the file + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +Do NOT finish until you have completed the tasks. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}], [{"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "hello", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_006.log b/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_006.log new file mode 100644 index 0000000000..c15bd41762 --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_006.log @@ -0,0 +1,50 @@ + + +---------- + +# Task +You are a quality assurance engineer. Another engineer has made changes to the +codebase which are supposed to solve this task: + +Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. + +Note the changes might have already been applied in-line. You should focus on +validating if the task is solved, nothing else. + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts. + +Do NOT finish until you know whether the task is complete and correct. +When you're done, add a `completed` boolean to the `outputs` of the `finish` action. +If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action +explaining what the problem is. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_007.log b/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_007.log new file mode 100644 index 0000000000..c7c767593f --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_007.log @@ -0,0 +1,50 @@ + + +---------- + +# Task +You are a quality assurance engineer. Another engineer has made changes to the +codebase which are supposed to solve this task: + +Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. + +Note the changes might have already been applied in-line. You should focus on +validating if the task is solved, nothing else. + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts. + +Do NOT finish until you know whether the task is complete and correct. +When you're done, add a `completed` boolean to the `outputs` of the `finish` action. +If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action +explaining what the problem is. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\necho 'hello'\n", "extras": {"path": "hello.sh"}}]] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_008.log b/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_008.log new file mode 100644 index 0000000000..5c9314ea4c --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_write_simple_script/prompt_008.log @@ -0,0 +1,50 @@ + + +---------- + +# Task +You are a quality assurance engineer. Another engineer has made changes to the +codebase which are supposed to solve this task: + +Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. + +Note the changes might have already been applied in-line. You should focus on +validating if the task is solved, nothing else. + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts. + +Do NOT finish until you know whether the task is complete and correct. +When you're done, add a `completed` boolean to the `outputs` of the `finish` action. +If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action +explaining what the problem is. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\necho 'hello'\n", "extras": {"path": "hello.sh"}}], [{"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "hello", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_001.log b/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_001.log new file mode 100644 index 0000000000..e068bf0622 --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_001.log @@ -0,0 +1,7 @@ +{ + "action": "run", + "args": { + "command": "ls", + "background": false + } +} diff --git a/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_002.log b/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_002.log new file mode 100644 index 0000000000..26e314361d --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_002.log @@ -0,0 +1,8 @@ +{ + "action": "finish", + "args": { + "outputs": { + "summary": "The codebase appears to be empty. Engineers should start everything from scratch." + } + } +} diff --git a/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_003.log b/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_003.log new file mode 100644 index 0000000000..f97464936b --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_003.log @@ -0,0 +1,7 @@ +{ + "action": "write", + "args": { + "path": "hello.sh", + "content": "#!/bin/bash\necho 'hello'" + } +} diff --git a/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_004.log b/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_004.log new file mode 100644 index 0000000000..7055344e27 --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_004.log @@ -0,0 +1,7 @@ +{ + "action": "run", + "args": { + "command": "bash hello.sh", + "background": false + } +} diff --git a/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_005.log b/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_005.log new file mode 100644 index 0000000000..d260ee3b2f --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_005.log @@ -0,0 +1,8 @@ +{ + "action": "finish", + "args": { + "outputs": { + "result": "hello" + } + } +} diff --git a/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_006.log b/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_006.log new file mode 100644 index 0000000000..9ccc3df9c5 --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_006.log @@ -0,0 +1,6 @@ +{ + "action": "read", + "args": { + "path": "hello.sh" + } +} diff --git a/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_007.log b/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_007.log new file mode 100644 index 0000000000..7055344e27 --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_007.log @@ -0,0 +1,7 @@ +{ + "action": "run", + "args": { + "command": "bash hello.sh", + "background": false + } +} diff --git a/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_008.log b/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_008.log new file mode 100644 index 0000000000..5a88f936cb --- /dev/null +++ b/tests/integration/mock/DelegatorAgent/test_write_simple_script/response_008.log @@ -0,0 +1,23 @@ +The task was to create a shell script named `hello.sh` that prints "hello" without asking for any confirmation. Based on the history: + +1. The content of `hello.sh` was read and confirmed to contain: + ```bash + #!/bin/bash + echo 'hello' + ``` + This script correctly prints "hello" when executed. + +2. The script was executed using the command `bash hello.sh`, and the output was "hello", which matches the expected output. + +Since the script behaves as expected and meets the requirements of the task, the task is completed successfully. + +```json +{ + "action": "finish", + "args": { + "outputs": { + "completed": true + } + } +} +``` diff --git a/tests/integration/mock/ManagerAgent/test_edits/prompt_001.log b/tests/integration/mock/ManagerAgent/test_edits/prompt_001.log new file mode 100644 index 0000000000..ebd8e635b1 --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_edits/prompt_001.log @@ -0,0 +1,79 @@ + + +---------- + +# Task +You are in charge of accomplishing the following task: +Fix typos in bad.txt. Do not ask me for confirmation at any point. + +In order to accomplish this goal, you must delegate tasks to one or more agents, who +can do the actual work. A description of each agent is provided below. You MUST +select one of the delegates below to move towards accomplishing the task, and you MUST +provide the correct inputs for the delegate you select. + +## Agents + +### CoderAgent +Given a particular task, and a detailed description of the codebase, accomplishes the task +#### Inputs +{"task": "string", "summary": "string"} + +### CommitWriterAgent +Write a git commit message for files in the git staging area +#### Inputs +{} + +### MathAgent +Solves simple and complex math problems using python +#### Inputs +{"task": "string"} + +### PostgresAgent +Writes and maintains PostgreSQL migrations +#### Inputs +{"task": "string"} + +### RepoExplorerAgent +Generates a detailed summary of an existing codebase +#### Inputs +{} + +### StudyRepoForTaskAgent +Given a particular task, finds and describes all relevant parts of the codebase +#### Inputs +{"task": "string"} + +### TypoFixerAgent +Fixes typos in files in the current working directory +#### Inputs +{"task": "string"} + +### VerifierAgent +Given a particular task, verifies that the task has been completed +#### Inputs +{"task": "string"} + + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "user", "action": "message", "args": {"content": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", "wait_for_response": false}}, {"observation": "null", "content": "", "extras": {}}]] + +## Available Actions +* `delegate` - send a task to another agent from the list provided. Arguments: + * `agent` - the agent to which the task is delegated. MUST match a name in the list of agents provided. + * `inputs` - a dictionary of input parameters to the agent, as specified in the list + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/ManagerAgent/test_edits/prompt_002.log b/tests/integration/mock/ManagerAgent/test_edits/prompt_002.log new file mode 100644 index 0000000000..31025ee532 --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_edits/prompt_002.log @@ -0,0 +1,80 @@ + + +---------- + +# Task +You are a proofreader tasked with fixing typos in the files in your current working directory. + + +Specifically, your task is: +Fix typos in bad.txt + + +To achieve this goal, you should: + +1. Scan the files for typos +2. Overwrite the files with the typos fixed +3. Provide a summary of the typos fixed + +## Available Actions +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `write` - writes the content to a file. Arguments: + * `path` - the path of the file to write + * `content` - the content to write to the file + +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +To complete this task: +1. Use the `read` action to read the contents of the files in your current working directory. Make sure to provide the file path in the format `'./file_name.ext'`. +2. Use the `message` action to analyze the contents and identify typos. +3. Use the `write` action to create new versions of the files with the typos fixed. + - Overwrite the original files with the corrected content. Make sure to provide the file path in the format `'./file_name.ext'`. +4. Use the `message` action to generate a summary of the typos fixed, including the original and fixed versions of each typo, and the file(s) they were found in. +5. Use the `finish` action to return the summary in the `outputs.summary` field. + +Do NOT finish until you have fixed all the typos and generated a summary. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response + + +For example, if you want to use the read action to read the contents of a file named example.txt, your response should look like this: +{ + "action": "read", + "args": { + "path": "./example.txt" + } +} + +Similarly, if you want to use the write action to write content to a file named output.txt, your response should look like this: +{ + "action": "write", + "args": { + "path": "./output.txt", + "content": "This is the content to be written to the file." + } +} diff --git a/tests/integration/mock/ManagerAgent/test_edits/prompt_003.log b/tests/integration/mock/ManagerAgent/test_edits/prompt_003.log new file mode 100644 index 0000000000..273bd8dec9 --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_edits/prompt_003.log @@ -0,0 +1,80 @@ + + +---------- + +# Task +You are a proofreader tasked with fixing typos in the files in your current working directory. + + +Specifically, your task is: +Fix typos in bad.txt + + +To achieve this goal, you should: + +1. Scan the files for typos +2. Overwrite the files with the typos fixed +3. Provide a summary of the typos fixed + +## Available Actions +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `write` - writes the content to a file. Arguments: + * `path` - the path of the file to write + * `content` - the content to write to the file + +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +To complete this task: +1. Use the `read` action to read the contents of the files in your current working directory. Make sure to provide the file path in the format `'./file_name.ext'`. +2. Use the `message` action to analyze the contents and identify typos. +3. Use the `write` action to create new versions of the files with the typos fixed. + - Overwrite the original files with the corrected content. Make sure to provide the file path in the format `'./file_name.ext'`. +4. Use the `message` action to generate a summary of the typos fixed, including the original and fixed versions of each typo, and the file(s) they were found in. +5. Use the `finish` action to return the summary in the `outputs.summary` field. + +Do NOT finish until you have fixed all the typos and generated a summary. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "agent", "action": "read", "args": {"path": "./bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "./bad.txt"}}]] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response + + +For example, if you want to use the read action to read the contents of a file named example.txt, your response should look like this: +{ + "action": "read", + "args": { + "path": "./example.txt" + } +} + +Similarly, if you want to use the write action to write content to a file named output.txt, your response should look like this: +{ + "action": "write", + "args": { + "path": "./output.txt", + "content": "This is the content to be written to the file." + } +} diff --git a/tests/integration/mock/ManagerAgent/test_edits/prompt_004.log b/tests/integration/mock/ManagerAgent/test_edits/prompt_004.log new file mode 100644 index 0000000000..9ac378a76c --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_edits/prompt_004.log @@ -0,0 +1,80 @@ + + +---------- + +# Task +You are a proofreader tasked with fixing typos in the files in your current working directory. + + +Specifically, your task is: +Fix typos in bad.txt + + +To achieve this goal, you should: + +1. Scan the files for typos +2. Overwrite the files with the typos fixed +3. Provide a summary of the typos fixed + +## Available Actions +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `write` - writes the content to a file. Arguments: + * `path` - the path of the file to write + * `content` - the content to write to the file + +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +To complete this task: +1. Use the `read` action to read the contents of the files in your current working directory. Make sure to provide the file path in the format `'./file_name.ext'`. +2. Use the `message` action to analyze the contents and identify typos. +3. Use the `write` action to create new versions of the files with the typos fixed. + - Overwrite the original files with the corrected content. Make sure to provide the file path in the format `'./file_name.ext'`. +4. Use the `message` action to generate a summary of the typos fixed, including the original and fixed versions of each typo, and the file(s) they were found in. +5. Use the `finish` action to return the summary in the `outputs.summary` field. + +Do NOT finish until you have fixed all the typos and generated a summary. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "agent", "action": "read", "args": {"path": "./bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "./bad.txt"}}], [{"source": "agent", "action": "message", "args": {"content": "The content of 'bad.txt' has been read. The next step is to identify and fix the typos.", "wait_for_response": false}}, {"observation": "null", "content": "", "extras": {}}]] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response + + +For example, if you want to use the read action to read the contents of a file named example.txt, your response should look like this: +{ + "action": "read", + "args": { + "path": "./example.txt" + } +} + +Similarly, if you want to use the write action to write content to a file named output.txt, your response should look like this: +{ + "action": "write", + "args": { + "path": "./output.txt", + "content": "This is the content to be written to the file." + } +} diff --git a/tests/integration/mock/ManagerAgent/test_edits/prompt_005.log b/tests/integration/mock/ManagerAgent/test_edits/prompt_005.log new file mode 100644 index 0000000000..bba53b817c --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_edits/prompt_005.log @@ -0,0 +1,80 @@ + + +---------- + +# Task +You are a proofreader tasked with fixing typos in the files in your current working directory. + + +Specifically, your task is: +Fix typos in bad.txt + + +To achieve this goal, you should: + +1. Scan the files for typos +2. Overwrite the files with the typos fixed +3. Provide a summary of the typos fixed + +## Available Actions +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `write` - writes the content to a file. Arguments: + * `path` - the path of the file to write + * `content` - the content to write to the file + +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +To complete this task: +1. Use the `read` action to read the contents of the files in your current working directory. Make sure to provide the file path in the format `'./file_name.ext'`. +2. Use the `message` action to analyze the contents and identify typos. +3. Use the `write` action to create new versions of the files with the typos fixed. + - Overwrite the original files with the corrected content. Make sure to provide the file path in the format `'./file_name.ext'`. +4. Use the `message` action to generate a summary of the typos fixed, including the original and fixed versions of each typo, and the file(s) they were found in. +5. Use the `finish` action to return the summary in the `outputs.summary` field. + +Do NOT finish until you have fixed all the typos and generated a summary. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "agent", "action": "read", "args": {"path": "./bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "./bad.txt"}}], [{"source": "agent", "action": "message", "args": {"content": "The content of 'bad.txt' has been read. The next step is to identify and fix the typos.", "wait_for_response": false}}, {"observation": "null", "content": "", "extras": {}}], [{"source": "agent", "action": "write", "args": {"path": "./bad.txt", "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "./bad.txt"}}]] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response + + +For example, if you want to use the read action to read the contents of a file named example.txt, your response should look like this: +{ + "action": "read", + "args": { + "path": "./example.txt" + } +} + +Similarly, if you want to use the write action to write content to a file named output.txt, your response should look like this: +{ + "action": "write", + "args": { + "path": "./output.txt", + "content": "This is the content to be written to the file." + } +} diff --git a/tests/integration/mock/ManagerAgent/test_edits/prompt_006.log b/tests/integration/mock/ManagerAgent/test_edits/prompt_006.log new file mode 100644 index 0000000000..b64083b4fc --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_edits/prompt_006.log @@ -0,0 +1,79 @@ + + +---------- + +# Task +You are in charge of accomplishing the following task: +Fix typos in bad.txt. Do not ask me for confirmation at any point. + +In order to accomplish this goal, you must delegate tasks to one or more agents, who +can do the actual work. A description of each agent is provided below. You MUST +select one of the delegates below to move towards accomplishing the task, and you MUST +provide the correct inputs for the delegate you select. + +## Agents + +### CoderAgent +Given a particular task, and a detailed description of the codebase, accomplishes the task +#### Inputs +{"task": "string", "summary": "string"} + +### CommitWriterAgent +Write a git commit message for files in the git staging area +#### Inputs +{} + +### MathAgent +Solves simple and complex math problems using python +#### Inputs +{"task": "string"} + +### PostgresAgent +Writes and maintains PostgreSQL migrations +#### Inputs +{"task": "string"} + +### RepoExplorerAgent +Generates a detailed summary of an existing codebase +#### Inputs +{} + +### StudyRepoForTaskAgent +Given a particular task, finds and describes all relevant parts of the codebase +#### Inputs +{"task": "string"} + +### TypoFixerAgent +Fixes typos in files in the current working directory +#### Inputs +{"task": "string"} + +### VerifierAgent +Given a particular task, verifies that the task has been completed +#### Inputs +{"task": "string"} + + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "user", "action": "message", "args": {"content": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", "wait_for_response": false}}, {"observation": "null", "content": "", "extras": {}}], [{"source": "agent", "action": "delegate", "args": {"agent": "TypoFixerAgent", "inputs": {"task": "Fix typos in bad.txt"}, "thought": ""}}, {"observation": "null", "content": "", "extras": {}}], [{"action": "null", "args": {}}, {"source": "agent", "observation": "delegate", "content": "", "extras": {"outputs": {"summary": {"file": "./bad.txt", "typos_fixed": [{"original": "typoo", "fixed": "typo"}, {"original": "mor", "fixed": "more"}]}}}}]] + +## Available Actions +* `delegate` - send a task to another agent from the list provided. Arguments: + * `agent` - the agent to which the task is delegated. MUST match a name in the list of agents provided. + * `inputs` - a dictionary of input parameters to the agent, as specified in the list + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/ManagerAgent/test_edits/response_001.log b/tests/integration/mock/ManagerAgent/test_edits/response_001.log new file mode 100644 index 0000000000..932f74fba0 --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_edits/response_001.log @@ -0,0 +1,11 @@ +```json +{ + "action": "delegate", + "args": { + "agent": "TypoFixerAgent", + "inputs": { + "task": "Fix typos in bad.txt" + } + } +} +``` diff --git a/tests/integration/mock/ManagerAgent/test_edits/response_002.log b/tests/integration/mock/ManagerAgent/test_edits/response_002.log new file mode 100644 index 0000000000..4a15313c94 --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_edits/response_002.log @@ -0,0 +1,6 @@ +{ + "action": "read", + "args": { + "path": "./bad.txt" + } +} diff --git a/tests/integration/mock/ManagerAgent/test_edits/response_003.log b/tests/integration/mock/ManagerAgent/test_edits/response_003.log new file mode 100644 index 0000000000..cad32c39c8 --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_edits/response_003.log @@ -0,0 +1,7 @@ +{ + "action": "message", + "args": { + "content": "The content of 'bad.txt' has been read. The next step is to identify and fix the typos.", + "wait_for_response": false + } +} diff --git a/tests/integration/mock/ManagerAgent/test_edits/response_004.log b/tests/integration/mock/ManagerAgent/test_edits/response_004.log new file mode 100644 index 0000000000..0437106907 --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_edits/response_004.log @@ -0,0 +1,7 @@ +{ + "action": "write", + "args": { + "path": "./bad.txt", + "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n" + } +} diff --git a/tests/integration/mock/ManagerAgent/test_edits/response_005.log b/tests/integration/mock/ManagerAgent/test_edits/response_005.log new file mode 100644 index 0000000000..9cdbe74465 --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_edits/response_005.log @@ -0,0 +1,20 @@ +{ + "action": "finish", + "args": { + "outputs": { + "summary": { + "file": "./bad.txt", + "typos_fixed": [ + { + "original": "typoo", + "fixed": "typo" + }, + { + "original": "mor", + "fixed": "more" + } + ] + } + } + } +} diff --git a/tests/integration/mock/ManagerAgent/test_edits/response_006.log b/tests/integration/mock/ManagerAgent/test_edits/response_006.log new file mode 100644 index 0000000000..633517fbfe --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_edits/response_006.log @@ -0,0 +1,20 @@ +```json +{ + "action": "finish", + "args": { + "outputs": { + "file": "./bad.txt", + "typos_fixed": [ + { + "original": "typoo", + "fixed": "typo" + }, + { + "original": "mor", + "fixed": "more" + } + ] + } + } +} +``` diff --git a/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_001.log b/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_001.log new file mode 100644 index 0000000000..9b0b4df49a --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_001.log @@ -0,0 +1,79 @@ + + +---------- + +# Task +You are in charge of accomplishing the following task: +Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. + +In order to accomplish this goal, you must delegate tasks to one or more agents, who +can do the actual work. A description of each agent is provided below. You MUST +select one of the delegates below to move towards accomplishing the task, and you MUST +provide the correct inputs for the delegate you select. + +## Agents + +### CoderAgent +Given a particular task, and a detailed description of the codebase, accomplishes the task +#### Inputs +{"task": "string", "summary": "string"} + +### CommitWriterAgent +Write a git commit message for files in the git staging area +#### Inputs +{} + +### MathAgent +Solves simple and complex math problems using python +#### Inputs +{"task": "string"} + +### PostgresAgent +Writes and maintains PostgreSQL migrations +#### Inputs +{"task": "string"} + +### RepoExplorerAgent +Generates a detailed summary of an existing codebase +#### Inputs +{} + +### StudyRepoForTaskAgent +Given a particular task, finds and describes all relevant parts of the codebase +#### Inputs +{"task": "string"} + +### TypoFixerAgent +Fixes typos in files in the current working directory +#### Inputs +{"task": "string"} + +### VerifierAgent +Given a particular task, verifies that the task has been completed +#### Inputs +{"task": "string"} + + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "user", "action": "message", "args": {"content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.", "wait_for_response": false}}, {"observation": "null", "content": "", "extras": {}}]] + +## Available Actions +* `delegate` - send a task to another agent from the list provided. Arguments: + * `agent` - the agent to which the task is delegated. MUST match a name in the list of agents provided. + * `inputs` - a dictionary of input parameters to the agent, as specified in the list + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_002.log new file mode 100644 index 0000000000..27ff858aed --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_002.log @@ -0,0 +1,48 @@ + + +---------- + +# Task +You are a software engineer. You've inherited an existing codebase, which you +need to modify to complete this task: + +Write a shell script 'hello.sh' that prints 'hello'. + + + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `write` - writes the content to a file. Arguments: + * `path` - the path of the file to write + * `content` - the content to write to the file + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +Do NOT finish until you have completed the tasks. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_003.log b/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_003.log new file mode 100644 index 0000000000..54ae396741 --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_003.log @@ -0,0 +1,48 @@ + + +---------- + +# Task +You are a software engineer. You've inherited an existing codebase, which you +need to modify to complete this task: + +Write a shell script 'hello.sh' that prints 'hello'. + + + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `write` - writes the content to a file. Arguments: + * `path` - the path of the file to write + * `content` - the content to write to the file + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +Do NOT finish until you have completed the tasks. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\n# Print 'hello'\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}]] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_004.log b/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_004.log new file mode 100644 index 0000000000..5bc5b3e102 --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_004.log @@ -0,0 +1,48 @@ + + +---------- + +# Task +You are a software engineer. You've inherited an existing codebase, which you +need to modify to complete this task: + +Write a shell script 'hello.sh' that prints 'hello'. + + + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `write` - writes the content to a file. Arguments: + * `path` - the path of the file to write + * `content` - the content to write to the file + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +Do NOT finish until you have completed the tasks. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\n# Print 'hello'\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}], [{"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}]] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_005.log b/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_005.log new file mode 100644 index 0000000000..b0bf89e29b --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_005.log @@ -0,0 +1,48 @@ + + +---------- + +# Task +You are a software engineer. You've inherited an existing codebase, which you +need to modify to complete this task: + +Write a shell script 'hello.sh' that prints 'hello'. + + + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `write` - writes the content to a file. Arguments: + * `path` - the path of the file to write + * `content` - the content to write to the file + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +Do NOT finish until you have completed the tasks. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\n# Print 'hello'\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}], [{"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}], [{"source": "agent", "action": "run", "args": {"command": "./hello.sh", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "hello", "extras": {"command_id": -1, "command": "./hello.sh", "exit_code": 0}}]] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_006.log b/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_006.log new file mode 100644 index 0000000000..5877182c22 --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_006.log @@ -0,0 +1,79 @@ + + +---------- + +# Task +You are in charge of accomplishing the following task: +Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. + +In order to accomplish this goal, you must delegate tasks to one or more agents, who +can do the actual work. A description of each agent is provided below. You MUST +select one of the delegates below to move towards accomplishing the task, and you MUST +provide the correct inputs for the delegate you select. + +## Agents + +### CoderAgent +Given a particular task, and a detailed description of the codebase, accomplishes the task +#### Inputs +{"task": "string", "summary": "string"} + +### CommitWriterAgent +Write a git commit message for files in the git staging area +#### Inputs +{} + +### MathAgent +Solves simple and complex math problems using python +#### Inputs +{"task": "string"} + +### PostgresAgent +Writes and maintains PostgreSQL migrations +#### Inputs +{"task": "string"} + +### RepoExplorerAgent +Generates a detailed summary of an existing codebase +#### Inputs +{} + +### StudyRepoForTaskAgent +Given a particular task, finds and describes all relevant parts of the codebase +#### Inputs +{"task": "string"} + +### TypoFixerAgent +Fixes typos in files in the current working directory +#### Inputs +{"task": "string"} + +### VerifierAgent +Given a particular task, verifies that the task has been completed +#### Inputs +{"task": "string"} + + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "user", "action": "message", "args": {"content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.", "wait_for_response": false}}, {"observation": "null", "content": "", "extras": {}}], [{"source": "agent", "action": "delegate", "args": {"agent": "CoderAgent", "inputs": {"task": "Write a shell script 'hello.sh' that prints 'hello'.", "summary": ""}, "thought": ""}}, {"observation": "null", "content": "", "extras": {}}], [{"action": "null", "args": {}}, {"source": "agent", "observation": "delegate", "content": "", "extras": {"outputs": {}}}]] + +## Available Actions +* `delegate` - send a task to another agent from the list provided. Arguments: + * `agent` - the agent to which the task is delegated. MUST match a name in the list of agents provided. + * `inputs` - a dictionary of input parameters to the agent, as specified in the list + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_007.log b/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_007.log new file mode 100644 index 0000000000..07e8fd919c --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_007.log @@ -0,0 +1,50 @@ + + +---------- + +# Task +You are a quality assurance engineer. Another engineer has made changes to the +codebase which are supposed to solve this task: + +Verify that the shell script 'hello.sh' prints 'hello'. + +Note the changes might have already been applied in-line. You should focus on +validating if the task is solved, nothing else. + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts. + +Do NOT finish until you know whether the task is complete and correct. +When you're done, add a `completed` boolean to the `outputs` of the `finish` action. +If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action +explaining what the problem is. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_008.log b/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_008.log new file mode 100644 index 0000000000..e4f6f55aeb --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_008.log @@ -0,0 +1,50 @@ + + +---------- + +# Task +You are a quality assurance engineer. Another engineer has made changes to the +codebase which are supposed to solve this task: + +Verify that the shell script 'hello.sh' prints 'hello'. + +Note the changes might have already been applied in-line. You should focus on +validating if the task is solved, nothing else. + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts. + +Do NOT finish until you know whether the task is complete and correct. +When you're done, add a `completed` boolean to the `outputs` of the `finish` action. +If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action +explaining what the problem is. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\n# Print 'hello'\necho 'hello'\n", "extras": {"path": "hello.sh"}}]] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_009.log b/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_009.log new file mode 100644 index 0000000000..62475ca12a --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_009.log @@ -0,0 +1,50 @@ + + +---------- + +# Task +You are a quality assurance engineer. Another engineer has made changes to the +codebase which are supposed to solve this task: + +Verify that the shell script 'hello.sh' prints 'hello'. + +Note the changes might have already been applied in-line. You should focus on +validating if the task is solved, nothing else. + +## Available Actions +* `run` - runs a command on the command line in a Linux shell. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. + +* `read` - reads the content of a file. Arguments: + * `path` - the path of the file to read + +* `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments: + * `content` - the thought to record + * `wait_for_response` - set to `true` to wait for the user to respond before proceeding + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +You must ONLY `run` commands that have no side-effects, like `ls`, `grep`, and test scripts. + +Do NOT finish until you know whether the task is complete and correct. +When you're done, add a `completed` boolean to the `outputs` of the `finish` action. +If `completed` is `false`, you MUST also provide a `summary` in the `outputs` of the `finish` action +explaining what the problem is. + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\n# Print 'hello'\necho 'hello'\n", "extras": {"path": "hello.sh"}}], [{"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "background": false, "thought": ""}}, {"source": "agent", "observation": "run", "content": "hello", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]] + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_010.log b/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_010.log new file mode 100644 index 0000000000..daac6cb7ab --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_write_simple_script/prompt_010.log @@ -0,0 +1,79 @@ + + +---------- + +# Task +You are in charge of accomplishing the following task: +Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. + +In order to accomplish this goal, you must delegate tasks to one or more agents, who +can do the actual work. A description of each agent is provided below. You MUST +select one of the delegates below to move towards accomplishing the task, and you MUST +provide the correct inputs for the delegate you select. + +## Agents + +### CoderAgent +Given a particular task, and a detailed description of the codebase, accomplishes the task +#### Inputs +{"task": "string", "summary": "string"} + +### CommitWriterAgent +Write a git commit message for files in the git staging area +#### Inputs +{} + +### MathAgent +Solves simple and complex math problems using python +#### Inputs +{"task": "string"} + +### PostgresAgent +Writes and maintains PostgreSQL migrations +#### Inputs +{"task": "string"} + +### RepoExplorerAgent +Generates a detailed summary of an existing codebase +#### Inputs +{} + +### StudyRepoForTaskAgent +Given a particular task, finds and describes all relevant parts of the codebase +#### Inputs +{"task": "string"} + +### TypoFixerAgent +Fixes typos in files in the current working directory +#### Inputs +{"task": "string"} + +### VerifierAgent +Given a particular task, verifies that the task has been completed +#### Inputs +{"task": "string"} + + +## History +Here is a recent history of actions you've taken in service of this plan, +as well as observations you've made. This only includes the MOST RECENT +actions and observations--more may have happened before that. +They are time-ordered, with your most recent action at the bottom. + +[[{"source": "user", "action": "message", "args": {"content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.", "wait_for_response": false}}, {"observation": "null", "content": "", "extras": {}}], [{"source": "agent", "action": "delegate", "args": {"agent": "CoderAgent", "inputs": {"task": "Write a shell script 'hello.sh' that prints 'hello'.", "summary": ""}, "thought": ""}}, {"observation": "null", "content": "", "extras": {}}], [{"action": "null", "args": {}}, {"source": "agent", "observation": "delegate", "content": "", "extras": {"outputs": {}}}], [{"source": "agent", "action": "delegate", "args": {"agent": "VerifierAgent", "inputs": {"task": "Verify that the shell script 'hello.sh' prints 'hello'."}, "thought": ""}}, {"observation": "null", "content": "", "extras": {}}], [{"action": "null", "args": {}}, {"source": "agent", "observation": "delegate", "content": "", "extras": {"outputs": {"completed": true}}}]] + +## Available Actions +* `delegate` - send a task to another agent from the list provided. Arguments: + * `agent` - the agent to which the task is delegated. MUST match a name in the list of agents provided. + * `inputs` - a dictionary of input parameters to the agent, as specified in the list + +* `finish` - if you're absolutely certain that you've completed your task, use the finish action to stop working. Arguments: + * `outputs` - a dictionary representing the outputs of your task, if any + + +## Format +Your response MUST be in JSON format. It must be an object, and it must contain two fields: +* `action`, which is one of the actions specified here +* `args`, which is a map of key-value pairs, specifying the arguments for that action + +You MUST NOT include any other text besides the JSON response diff --git a/tests/integration/mock/ManagerAgent/test_write_simple_script/response_001.log b/tests/integration/mock/ManagerAgent/test_write_simple_script/response_001.log new file mode 100644 index 0000000000..102f15a222 --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_write_simple_script/response_001.log @@ -0,0 +1,12 @@ +```json +{ + "action": "delegate", + "args": { + "agent": "CoderAgent", + "inputs": { + "task": "Write a shell script 'hello.sh' that prints 'hello'.", + "summary": "" + } + } +} +``` diff --git a/tests/integration/mock/ManagerAgent/test_write_simple_script/response_002.log b/tests/integration/mock/ManagerAgent/test_write_simple_script/response_002.log new file mode 100644 index 0000000000..9177d1343d --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_write_simple_script/response_002.log @@ -0,0 +1,9 @@ +```json +{ + "action": "write", + "args": { + "path": "hello.sh", + "content": "#!/bin/bash\n\n# Print 'hello'\necho 'hello'" + } +} +``` diff --git a/tests/integration/mock/ManagerAgent/test_write_simple_script/response_003.log b/tests/integration/mock/ManagerAgent/test_write_simple_script/response_003.log new file mode 100644 index 0000000000..e9113919bd --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_write_simple_script/response_003.log @@ -0,0 +1,6 @@ +{ + "action": "run", + "args": { + "command": "chmod +x hello.sh" + } +} diff --git a/tests/integration/mock/ManagerAgent/test_write_simple_script/response_004.log b/tests/integration/mock/ManagerAgent/test_write_simple_script/response_004.log new file mode 100644 index 0000000000..a517fa4942 --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_write_simple_script/response_004.log @@ -0,0 +1,7 @@ +{ + "action": "run", + "args": { + "command": "./hello.sh", + "background": false + } +} diff --git a/tests/integration/mock/ManagerAgent/test_write_simple_script/response_005.log b/tests/integration/mock/ManagerAgent/test_write_simple_script/response_005.log new file mode 100644 index 0000000000..762f9fe834 --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_write_simple_script/response_005.log @@ -0,0 +1,8 @@ +```json +{ + "action": "finish", + "args": { + "outputs": {} + } +} +``` diff --git a/tests/integration/mock/ManagerAgent/test_write_simple_script/response_006.log b/tests/integration/mock/ManagerAgent/test_write_simple_script/response_006.log new file mode 100644 index 0000000000..0258c45d00 --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_write_simple_script/response_006.log @@ -0,0 +1,11 @@ +```json +{ + "action": "delegate", + "args": { + "agent": "VerifierAgent", + "inputs": { + "task": "Verify that the shell script 'hello.sh' prints 'hello'." + } + } +} +``` diff --git a/tests/integration/mock/ManagerAgent/test_write_simple_script/response_007.log b/tests/integration/mock/ManagerAgent/test_write_simple_script/response_007.log new file mode 100644 index 0000000000..58119a3819 --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_write_simple_script/response_007.log @@ -0,0 +1,8 @@ +```json +{ + "action": "read", + "args": { + "path": "hello.sh" + } +} +``` diff --git a/tests/integration/mock/ManagerAgent/test_write_simple_script/response_008.log b/tests/integration/mock/ManagerAgent/test_write_simple_script/response_008.log new file mode 100644 index 0000000000..67183065b0 --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_write_simple_script/response_008.log @@ -0,0 +1,8 @@ +```json +{ + "action": "run", + "args": { + "command": "bash hello.sh" + } +} +``` diff --git a/tests/integration/mock/ManagerAgent/test_write_simple_script/response_009.log b/tests/integration/mock/ManagerAgent/test_write_simple_script/response_009.log new file mode 100644 index 0000000000..fdeae4780c --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_write_simple_script/response_009.log @@ -0,0 +1,10 @@ +```json +{ + "action": "finish", + "args": { + "outputs": { + "completed": true + } + } +} +``` diff --git a/tests/integration/mock/ManagerAgent/test_write_simple_script/response_010.log b/tests/integration/mock/ManagerAgent/test_write_simple_script/response_010.log new file mode 100644 index 0000000000..762f9fe834 --- /dev/null +++ b/tests/integration/mock/ManagerAgent/test_write_simple_script/response_010.log @@ -0,0 +1,8 @@ +```json +{ + "action": "finish", + "args": { + "outputs": {} + } +} +``` diff --git a/tests/integration/regenerate.sh b/tests/integration/regenerate.sh index 3d6f6097de..eeba358e24 100755 --- a/tests/integration/regenerate.sh +++ b/tests/integration/regenerate.sh @@ -20,7 +20,7 @@ WORKSPACE_MOUNT_PATH_IN_SANDBOX="/workspace" SANDBOX_TYPE="${SANDBOX_TYPE:-ssh}" MAX_ITERATIONS=10 -agents=("BrowsingAgent" "MonologueAgent" "CodeActAgent" "PlannerAgent") +agents=("DelegatorAgent" "ManagerAgent" "BrowsingAgent" "MonologueAgent" "CodeActAgent" "PlannerAgent") tasks=( "Fix typos in bad.txt." "Write a shell script 'hello.sh' that prints 'hello'."