diff --git a/.github/.codecov.yml b/.github/.codecov.yml new file mode 100644 index 0000000000..b5e047ba27 --- /dev/null +++ b/.github/.codecov.yml @@ -0,0 +1,14 @@ +codecov: + notify: + wait_for_ci: true + +coverage: + status: + patch: + default: + threshold: 10% # allow patch coverage to be lower than project coverage by at most 10% + project: + default: + threshold: 5% # allow project coverage to drop at most 5% + +comment: false diff --git a/agenthub/README.md b/agenthub/README.md index c0b151eda9..6dd6b54c83 100644 --- a/agenthub/README.md +++ b/agenthub/README.md @@ -21,7 +21,7 @@ The `state` contains: - A history of actions taken by the agent, as well as any observations (e.g. file content, command output) from those actions - A list of actions/observations that have happened since the most recent step -- A [`plan`](https://github.com/OpenDevin/OpenDevin/blob/main/opendevin/plan.py), which contains the main goal +- A [`root_task`](https://github.com/OpenDevin/OpenDevin/blob/main/opendevin/controller/state/task.py), which contains a plan of action - The agent can add and modify subtasks through the `AddTaskAction` and `ModifyTaskAction` ## Actions diff --git a/agenthub/SWE_agent/agent.py b/agenthub/SWE_agent/agent.py index 0efb9112b8..8fbe21ab0d 100644 --- a/agenthub/SWE_agent/agent.py +++ b/agenthub/SWE_agent/agent.py @@ -69,7 +69,8 @@ class SWEAgent(Agent): for prev_action, obs in state.updated_info: self._remember(prev_action, obs) - prompt = STEP_PROMPT(state.plan.main_goal, self.cur_file, self.cur_line) + goal = state.get_current_user_intent() + prompt = STEP_PROMPT(goal, self.cur_file, self.cur_line) msgs = [ {'content': SYSTEM_MESSAGE, 'role': 'system'}, diff --git a/agenthub/SWE_agent/prompts.py b/agenthub/SWE_agent/prompts.py index e4ed57396f..9022367836 100644 --- a/agenthub/SWE_agent/prompts.py +++ b/agenthub/SWE_agent/prompts.py @@ -1,4 +1,3 @@ - DEFAULT_COMMANDS_DICT = { 'exit': 'Executed when task is complete', 'read [] []': "Shows a given file's contents starting from up to . Default: start_line = 0, end_line = -1. By default the whole file will be read.", @@ -6,12 +5,12 @@ DEFAULT_COMMANDS_DICT = { 'browse ': 'Returns the text version of any url, this can be useful to look up documentation or finding issues on github', 'scroll_up': 'Takes no arguments. This will scroll up and show you the 100 lines above your current lines', 'scroll_down': 'Takes no arguments. This will scroll down and show you the 100 lines below your current lines', - 'edit ': 'This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes', + 'edit ': 'This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes. Set end_line to -1 to denote the end of the file', 'goto ': 'This will take you directly to a line and show you the 100 lines below it.', ' ': 'You can use any bash command you need (cd, ls, rm, grep, dir, mv, wget, git, zip, etc.) with their arguments included', 'pip install ': 'You can use this to import python packages. Make sure you include the correct package name when using this command.', 'ls': 'Use the ls command to view all the files in your current directory, this is a good starting point.', - 'NOT ALLOWED': 'You cannot use interactive commands like python or node' + 'NOT ALLOWED': 'You cannot use interactive commands like python or node', } COMMAND_USAGE = { @@ -25,8 +24,7 @@ COMMAND_USAGE = { 'browse': 'Args:\n\nUsage:\n```\nbrowse https://github.com/OpenDevin/OpenDevin\n```\nThis will fetch the Text elements from the given url and show them to you.', } -DEFAULT_COMMANDS = '\n'.join( - [k + ' - ' + v for k, v in DEFAULT_COMMANDS_DICT.items()]) +DEFAULT_COMMANDS = '\n'.join([k + ' - ' + v for k, v in DEFAULT_COMMANDS_DICT.items()]) # from opendevin.parse_commands import parse_command_file # USE parse_command_file(filepath) to get the custom commands @@ -126,7 +124,8 @@ You have access to a variety of tools and commands that you can use to help you """.strip() -def NO_ACTION(latest): return f""" +def NO_ACTION(latest): + return f""" You did not include any action to take in your most recent output: ===== Output ====== @@ -154,7 +153,8 @@ def file_info(file: str, line: int): """ -def STEP_PROMPT(task, file, line_num): return f""" +def STEP_PROMPT(task, file, line_num): + return f""" {RESPONSE_FORMAT} You are currently trying to complete this task: {task} @@ -185,7 +185,8 @@ def unpack_dict(data: dict, restrict: list[str] = []): return '\n'.join(lines) -def MEMORY_FORMAT(act, obs): return f""" +def MEMORY_FORMAT(act, obs): + return f""" Previous Action: {unpack_dict(act, ["content"])} diff --git a/agenthub/codeact_agent/codeact_agent.py b/agenthub/codeact_agent/codeact_agent.py index ad54484e46..c8f002f0b2 100644 --- a/agenthub/codeact_agent/codeact_agent.py +++ b/agenthub/codeact_agent/codeact_agent.py @@ -132,6 +132,7 @@ class CodeActAgent(Agent): IPythonRunCellObservation, NullObservation, ) + messages: list[dict] = [] def __init__( self, @@ -144,7 +145,20 @@ class CodeActAgent(Agent): - llm (LLM): The llm to be used by this agent """ super().__init__(llm) - self.messages: list[Mapping[str, str]] = [] + self.reset() + + def reset(self) -> None: + """ + Resets the CodeAct Agent. + """ + super().reset() + self.messages: list[Mapping[str, str]] = [ + {'role': 'system', 'content': SYSTEM_MESSAGE}, + { + 'role': 'user', + 'content': f"Here is an example of how you can interact with the environment for task solving:\n{EXAMPLES}\n\nNOW, LET'S START!", + }, + ] self.cost_accumulator = 0 def step(self, state: State) -> Action: @@ -162,18 +176,6 @@ class CodeActAgent(Agent): - AgentFinishAction() - end the interaction """ - if len(self.messages) == 0: - assert state.plan.main_goal, 'Expecting instruction to be set' - self.messages = [ - {'role': 'system', 'content': SYSTEM_MESSAGE}, - { - 'role': 'user', - 'content': ( - f'Here is an example of how you can interact with the environment for task solving:\n{EXAMPLES}\n\n' - f"NOW, LET'S START!\n\n{state.plan.main_goal}" - ), - }, - ] updated_info = state.updated_info if updated_info: for prev_action, obs in updated_info: @@ -237,6 +239,9 @@ class CodeActAgent(Agent): ) + len(action_str) self.messages.append({'role': 'assistant', 'content': action_str}) + if finish_command := re.search(r'.*', action_str, re.DOTALL): + thought = action_str.replace(finish_command.group(0), '').strip() + return AgentFinishAction(thought=thought) if bash_command := re.search( r'(.*)', action_str, re.DOTALL ): diff --git a/agenthub/delegator_agent/agent.py b/agenthub/delegator_agent/agent.py index 2d150f5025..42e8c5cedd 100644 --- a/agenthub/delegator_agent/agent.py +++ b/agenthub/delegator_agent/agent.py @@ -36,20 +36,22 @@ class DelegatorAgent(Agent): """ if self.current_delegate == '': self.current_delegate = 'study' + task = state.get_current_user_intent() return AgentDelegateAction( - agent='StudyRepoForTaskAgent', inputs={'task': state.plan.main_goal} + agent='StudyRepoForTaskAgent', inputs={'task': task} ) last_observation = state.history[-1][1] if not isinstance(last_observation, AgentDelegateObservation): raise Exception('Last observation is not an AgentDelegateObservation') + goal = state.get_current_user_intent() if self.current_delegate == 'study': self.current_delegate = 'coder' return AgentDelegateAction( agent='CoderAgent', inputs={ - 'task': state.plan.main_goal, + 'task': goal, 'summary': last_observation.outputs['summary'], }, ) @@ -58,7 +60,7 @@ class DelegatorAgent(Agent): return AgentDelegateAction( agent='VerifierAgent', inputs={ - 'task': state.plan.main_goal, + 'task': goal, }, ) elif self.current_delegate == 'verifier': @@ -72,7 +74,7 @@ class DelegatorAgent(Agent): return AgentDelegateAction( agent='CoderAgent', inputs={ - 'task': state.plan.main_goal, + 'task': goal, 'summary': last_observation.outputs['summary'], }, ) diff --git a/agenthub/micro/agent.py b/agenthub/micro/agent.py index 36e579ad78..c575484c56 100644 --- a/agenthub/micro/agent.py +++ b/agenthub/micro/agent.py @@ -38,11 +38,13 @@ class MicroAgent(Agent): del self.delegates[self.agent_definition['name']] def step(self, state: State) -> Action: + latest_user_message = state.get_current_user_intent() prompt = self.prompt_template.render( state=state, instructions=instructions, to_json=to_json, delegates=self.delegates, + latest_user_message=latest_user_message, ) messages = [{'content': prompt, 'role': 'user'}] resp = self.llm.completion(messages=messages) diff --git a/agenthub/micro/coder/prompt.md b/agenthub/micro/coder/prompt.md index ca39eca61d..28a196628f 100644 --- a/agenthub/micro/coder/prompt.md +++ b/agenthub/micro/coder/prompt.md @@ -2,7 +2,7 @@ You are a software engineer. You've inherited an existing codebase, which you need to modify to complete this task: -{{ state.plan.main_goal }} +{{ latest_user_message }} {% if state.inputs.summary %} Here's a summary of the codebase, as it relates to this task: diff --git a/agenthub/micro/manager/prompt.md b/agenthub/micro/manager/prompt.md index 419002a284..283bd40473 100644 --- a/agenthub/micro/manager/prompt.md +++ b/agenthub/micro/manager/prompt.md @@ -1,6 +1,6 @@ # Task You are in charge of accomplishing the following task: -{{ state.plan.main_goal }} +{{ latest_user_message }} In order to accomplish this goal, you must delegate tasks to one or more agents, who can do the actual work. A description of each agent is provided below. You MUST diff --git a/agenthub/micro/math_agent/prompt.md b/agenthub/micro/math_agent/prompt.md index 39ab08bc35..fc362fc644 100644 --- a/agenthub/micro/math_agent/prompt.md +++ b/agenthub/micro/math_agent/prompt.md @@ -1,7 +1,7 @@ # Task You are a brilliant mathematician and programmer. You've been given the following problem to solve: -{{ state.plan.main_goal }} +{{ latest_user_message }} Please write a python script that solves this problem, and prints the answer to stdout. ONLY print the answer to stdout, nothing else. diff --git a/agenthub/micro/postgres_agent/prompt.md b/agenthub/micro/postgres_agent/prompt.md index 79acd530f1..9250bdd54f 100644 --- a/agenthub/micro/postgres_agent/prompt.md +++ b/agenthub/micro/postgres_agent/prompt.md @@ -2,7 +2,7 @@ You are a database engineer. You are working on an existing Postgres project, and have been given the following task: -{{ state.plan.main_goal }} +{{ latest_user_message }} You must: * Investigate the existing migrations to understand the current schema diff --git a/agenthub/micro/study_repo_for_task/prompt.md b/agenthub/micro/study_repo_for_task/prompt.md index 820898e003..cee45798cd 100644 --- a/agenthub/micro/study_repo_for_task/prompt.md +++ b/agenthub/micro/study_repo_for_task/prompt.md @@ -3,7 +3,7 @@ You are a software engineer. You've inherited an existing codebase, which you're learning about for the first time. You need to study the codebase to find all the information needed to complete this task: -{{ state.plan.main_goal }} +{{ latest_user_message }} ## Available Actions {{ instructions.actions.run }} diff --git a/agenthub/micro/verifier/prompt.md b/agenthub/micro/verifier/prompt.md index 1a43f8600c..ba991cbe10 100644 --- a/agenthub/micro/verifier/prompt.md +++ b/agenthub/micro/verifier/prompt.md @@ -2,7 +2,7 @@ You are a quality assurance engineer. Another engineer has made changes to the codebase which are supposed to solve this task: -{{ state.plan.main_goal }} +{{ latest_user_message }} Your goal is to verify that the changes are correct and bug-free. diff --git a/agenthub/monologue_agent/agent.py b/agenthub/monologue_agent/agent.py index e1d71599ed..73892946d1 100644 --- a/agenthub/monologue_agent/agent.py +++ b/agenthub/monologue_agent/agent.py @@ -225,7 +225,9 @@ class MonologueAgent(Agent): Returns: - Action: The next action to take based on LLM response """ - self._initialize(state.plan.main_goal) + + goal = state.get_current_user_intent() + self._initialize(goal) for prev_action, obs in state.updated_info: self._add_event(prev_action.to_memory()) self._add_event(obs.to_memory()) @@ -233,7 +235,7 @@ class MonologueAgent(Agent): state.updated_info = [] prompt = prompts.get_request_action_prompt( - state.plan.main_goal, + goal, self.monologue.get_events(), state.background_commands_obs, ) diff --git a/agenthub/planner_agent/agent.py b/agenthub/planner_agent/agent.py index adc565bc0f..0d78b479e1 100644 --- a/agenthub/planner_agent/agent.py +++ b/agenthub/planner_agent/agent.py @@ -34,9 +34,13 @@ class PlannerAgent(Agent): - Action: The next action to take based on llm response """ - if state.plan.task.state in ['completed', 'verified', 'abandoned']: + if state.root_task.state in [ + 'completed', + 'verified', + 'abandoned', + ]: return AgentFinishAction() - prompt = get_prompt(state.plan, state.history) + prompt = get_prompt(state) messages = [{'content': prompt, 'role': 'user'}] resp = self.llm.completion(messages=messages) action_resp = resp['choices'][0]['message']['content'] diff --git a/agenthub/planner_agent/prompt.py b/agenthub/planner_agent/prompt.py index f37e3cda65..8363ec46f5 100644 --- a/agenthub/planner_agent/prompt.py +++ b/agenthub/planner_agent/prompt.py @@ -1,4 +1,4 @@ -from opendevin.controller.state.plan import Plan +from opendevin.controller.state.state import State from opendevin.core.logger import opendevin_logger as logger from opendevin.core.schema import ActionType from opendevin.core.utils import json @@ -9,7 +9,6 @@ from opendevin.events.action import ( ) from opendevin.events.observation import ( NullObservation, - Observation, ) HISTORY_SIZE = 10 @@ -85,7 +84,7 @@ It must be an object, and it must contain two fields: * `message` - make a plan, set a goal, or record your thoughts. Arguments: * `content` - the message to record * `add_task` - add a task to your plan. Arguments: - * `parent` - the ID of the parent task + * `parent` - the ID of the parent task (leave empty if it should go at the top level) * `goal` - the goal of the task * `subtasks` - a list of subtasks, each of which is a map with a `goal` key. * `modify_task` - close a task. Arguments: @@ -122,21 +121,20 @@ def get_hint(latest_action_id: str) -> str: return hints.get(latest_action_id, '') -def get_prompt(plan: Plan, history: list[tuple[Action, Observation]]) -> str: +def get_prompt(state: State) -> str: """ Gets the prompt for the planner agent. Formatted with the most recent action-observation pairs, current task, and hint based on last action Parameters: - - plan (Plan): The original plan outlined by the user with LLM defined tasks - - history (list[tuple[Action, Observation]]): list of corresponding action-observation pairs + - state (State): The state of the current agent Returns: - str: The formatted string prompt with historical values """ - plan_str = json.dumps(plan.task.to_dict(), indent=2) - sub_history = history[-HISTORY_SIZE:] + plan_str = json.dumps(state.root_task.to_dict(), indent=2) + sub_history = state.history[-HISTORY_SIZE:] history_dicts = [] latest_action: Action = NullAction() for action, observation in sub_history: @@ -147,7 +145,7 @@ def get_prompt(plan: Plan, history: list[tuple[Action, Observation]]) -> str: observation_dict = observation.to_memory() history_dicts.append(observation_dict) history_str = json.dumps(history_dicts, indent=2) - current_task = plan.get_current_task() + current_task = state.root_task.get_current_task() if current_task is not None: plan_status = f"You're currently working on this task:\n{current_task.goal}." if len(current_task.subtasks) == 0: @@ -156,8 +154,9 @@ def get_prompt(plan: Plan, history: list[tuple[Action, Observation]]) -> str: plan_status = "You're not currently working on any tasks. Your next action MUST be to mark a task as in_progress." hint = get_hint(latest_action.to_dict()['action']) logger.info('HINT:\n' + hint, extra={'msg_type': 'INFO'}) + task = state.get_current_user_intent() return prompt % { - 'task': plan.main_goal, + 'task': task, 'plan': plan_str, 'history': history_str, 'hint': hint, diff --git a/frontend/src/components/AgentStatusBar.tsx b/frontend/src/components/AgentStatusBar.tsx index e92fa826ab..5e3eb6060d 100644 --- a/frontend/src/components/AgentStatusBar.tsx +++ b/frontend/src/components/AgentStatusBar.tsx @@ -23,6 +23,10 @@ const AgentStatusMap: { [k: string]: { message: string; indicator: string } } = message: "Agent has paused.", indicator: "bg-yellow-500", }, + [AgentState.LOADING]: { + message: "Agent is initializing...", + indicator: "bg-yellow-500", + }, [AgentState.STOPPED]: { message: "Agent has stopped.", indicator: "bg-red-500", diff --git a/frontend/src/components/Planner.tsx b/frontend/src/components/Planner.tsx index c7f41919e7..dbb497a572 100644 --- a/frontend/src/components/Planner.tsx +++ b/frontend/src/components/Planner.tsx @@ -11,7 +11,7 @@ import { import { VscListOrdered } from "react-icons/vsc"; import { useSelector } from "react-redux"; import { I18nKey } from "#/i18n/declaration"; -import { Plan, Task, TaskState } from "#/services/planService"; +import { Task, TaskState } from "#/services/taskService"; import { RootState } from "#/store"; function StatusIcon({ status }: { status: TaskState }): JSX.Element { @@ -53,14 +53,11 @@ function TaskCard({ task, level }: { task: Task; level: number }): JSX.Element { ); } -interface PlanProps { - plan: Plan; -} - -function PlanContainer({ plan }: PlanProps): JSX.Element { +function Planner(): JSX.Element { const { t } = useTranslation(); + const task = useSelector((state: RootState) => state.task.task); - if (plan.mainGoal === undefined) { + if (!task || !task.subtasks?.length) { return (
@@ -68,19 +65,14 @@ function PlanContainer({ plan }: PlanProps): JSX.Element {
); } - return ( -
- -
- ); -} - -function Planner(): JSX.Element { - const plan = useSelector((state: RootState) => state.plan.plan); return (
- +
+ {task.subtasks.map((subtask) => ( + + ))} +
); } diff --git a/frontend/src/components/Workspace.tsx b/frontend/src/components/Workspace.tsx index b9b3aa8b78..4b5f5f76f5 100644 --- a/frontend/src/components/Workspace.tsx +++ b/frontend/src/components/Workspace.tsx @@ -17,7 +17,7 @@ import { getSettings } from "#/services/settings"; function Workspace() { const { t } = useTranslation(); - const plan = useSelector((state: RootState) => state.plan.plan); + const task = useSelector((state: RootState) => state.task.task); const code = useSelector((state: RootState) => state.code.code); const { AGENT } = getSettings(); @@ -69,18 +69,18 @@ function Workspace() { ); useEffect(() => { - if (activeTab !== TabOption.PLANNER && plan.mainGoal !== undefined) { + if (activeTab !== TabOption.PLANNER && task) { setChanges((prev) => ({ ...prev, [TabOption.PLANNER]: true })); } // eslint-disable-next-line react-hooks/exhaustive-deps - }, [plan]); + }, [task]); useEffect(() => { if (activeTab !== TabOption.CODE && code !== initialCodeState.code) { setChanges((prev) => ({ ...prev, [TabOption.CODE]: true })); } // eslint-disable-next-line react-hooks/exhaustive-deps - }, [plan]); + }, [task]); useEffect(() => { if ( diff --git a/frontend/src/components/chat/ChatInterface.test.tsx b/frontend/src/components/chat/ChatInterface.test.tsx index e0a654e710..ff4013dc38 100644 --- a/frontend/src/components/chat/ChatInterface.test.tsx +++ b/frontend/src/components/chat/ChatInterface.test.tsx @@ -30,9 +30,6 @@ describe("ChatInterface", () => { it("should render the new message the user has typed", async () => { renderWithProviders(, { preloadedState: { - task: { - completed: false, - }, agent: { curAgentState: AgentState.INIT, }, @@ -82,7 +79,10 @@ describe("ChatInterface", () => { userEvent.type(input, "my message{enter}"); }); - const event = { action: ActionType.START, args: { task: "my message" } }; + const event = { + action: ActionType.MESSAGE, + args: { content: "my message" }, + }; expect(socketSpy).toHaveBeenCalledWith(JSON.stringify(event)); }); diff --git a/frontend/src/components/chat/ChatInterface.tsx b/frontend/src/components/chat/ChatInterface.tsx index c11c138d4d..b1142f7e49 100644 --- a/frontend/src/components/chat/ChatInterface.tsx +++ b/frontend/src/components/chat/ChatInterface.tsx @@ -18,11 +18,8 @@ function ChatInterface() { const { curAgentState } = useSelector((state: RootState) => state.agent); const handleSendMessage = (content: string) => { - const isTask = - curAgentState === AgentState.INIT || - curAgentState === AgentState.FINISHED; dispatch(addUserMessage(content)); - sendChatMessage(content, isTask); + sendChatMessage(content); }; const { t } = useTranslation(); diff --git a/frontend/src/services/actions.ts b/frontend/src/services/actions.ts index c44ca8fe79..ea095ca3e4 100644 --- a/frontend/src/services/actions.ts +++ b/frontend/src/services/actions.ts @@ -3,13 +3,13 @@ import { addAssistantMessage } from "#/state/chatSlice"; import { setCode, updatePath } from "#/state/codeSlice"; import { appendInput } from "#/state/commandSlice"; import { appendJupyterInput } from "#/state/jupyterSlice"; -import { setPlan } from "#/state/planSlice"; +import { setRootTask } from "#/state/taskSlice"; import store from "#/store"; import ActionType from "#/types/ActionType"; import { ActionMessage } from "#/types/Message"; import { SocketMessage } from "#/types/ResponseType"; import { handleObservationMessage } from "./observations"; -import { getPlan } from "./planService"; +import { getRootTask } from "./taskService"; const messageActions = { [ActionType.BROWSE]: (message: ActionMessage) => { @@ -41,10 +41,14 @@ const messageActions = { store.dispatch(appendJupyterInput(message.args.code)); }, [ActionType.ADD_TASK]: () => { - getPlan().then((fetchedPlan) => store.dispatch(setPlan(fetchedPlan))); + getRootTask().then((fetchedRootTask) => + store.dispatch(setRootTask(fetchedRootTask)), + ); }, [ActionType.MODIFY_TASK]: () => { - getPlan().then((fetchedPlan) => store.dispatch(setPlan(fetchedPlan))); + getRootTask().then((fetchedRootTask) => + store.dispatch(setRootTask(fetchedRootTask)), + ); }, }; diff --git a/frontend/src/services/chatService.ts b/frontend/src/services/chatService.ts index e3d160e5c1..52cb4821e1 100644 --- a/frontend/src/services/chatService.ts +++ b/frontend/src/services/chatService.ts @@ -5,13 +5,8 @@ import { ActionMessage } from "#/types/Message"; import Socket from "./socket"; import { addUserMessage } from "#/state/chatSlice"; -export function sendChatMessage(message: string, isTask: boolean = true): void { - let event; - if (isTask) { - event = { action: ActionType.START, args: { task: message } }; - } else { - event = { action: ActionType.MESSAGE, args: { content: message } }; - } +export function sendChatMessage(message: string): void { + const event = { action: ActionType.MESSAGE, args: { content: message } }; const eventString = JSON.stringify(event); Socket.send(eventString); } diff --git a/frontend/src/services/planService.ts b/frontend/src/services/taskService.ts similarity index 64% rename from frontend/src/services/planService.ts rename to frontend/src/services/taskService.ts index b910c3c2e8..88b877abf3 100644 --- a/frontend/src/services/planService.ts +++ b/frontend/src/services/taskService.ts @@ -1,12 +1,6 @@ -export type Plan = { - mainGoal: string | undefined; - task: Task; -}; - export type Task = { id: string; goal: string; - parent: "Task | None"; subtasks: Task[]; state: TaskState; }; @@ -19,15 +13,15 @@ export enum TaskState { VERIFIED_STATE = "verified", } -export async function getPlan(): Promise { +export async function getRootTask(): Promise { const headers = new Headers({ "Content-Type": "application/json", Authorization: `Bearer ${localStorage.getItem("token")}`, }); - const res = await fetch("/api/plan", { headers }); + const res = await fetch("/api/root_task", { headers }); if (res.status !== 200 && res.status !== 204) { return undefined; } - const data = await res.json(); - return JSON.parse(data) as Plan; + const data = (await res.json()) as Task; + return data; } diff --git a/frontend/src/state/planSlice.ts b/frontend/src/state/planSlice.ts deleted file mode 100644 index 77a23ba49c..0000000000 --- a/frontend/src/state/planSlice.ts +++ /dev/null @@ -1,27 +0,0 @@ -import { createSlice } from "@reduxjs/toolkit"; -import { Plan, TaskState } from "#/services/planService"; - -export const planSlice = createSlice({ - name: "plan", - initialState: { - plan: { - mainGoal: undefined, - task: { - id: "", - goal: "", - parent: "Task | None", - subtasks: [], - state: TaskState.OPEN_STATE, - }, - } as Plan, - }, - reducers: { - setPlan: (state, action) => { - state.plan = action.payload as Plan; - }, - }, -}); - -export const { setPlan } = planSlice.actions; - -export default planSlice.reducer; diff --git a/frontend/src/state/taskSlice.ts b/frontend/src/state/taskSlice.ts index 29c6f1cf76..9726318c56 100644 --- a/frontend/src/state/taskSlice.ts +++ b/frontend/src/state/taskSlice.ts @@ -1,17 +1,23 @@ import { createSlice } from "@reduxjs/toolkit"; +import { Task, TaskState } from "#/services/taskService"; export const taskSlice = createSlice({ name: "task", initialState: { - completed: false, + task: { + id: "", + goal: "", + subtasks: [], + state: TaskState.OPEN_STATE, + } as Task, }, reducers: { - setCompleted: (state, action) => { - state.completed = action.payload; + setRootTask: (state, action) => { + state.task = action.payload as Task; }, }, }); -export const { setCompleted } = taskSlice.actions; +export const { setRootTask } = taskSlice.actions; export default taskSlice.reducer; diff --git a/frontend/src/store.ts b/frontend/src/store.ts index f675f929ec..9b3c20099b 100644 --- a/frontend/src/store.ts +++ b/frontend/src/store.ts @@ -5,7 +5,6 @@ import chatReducer from "./state/chatSlice"; import codeReducer from "./state/codeSlice"; import commandReducer from "./state/commandSlice"; import errorsReducer from "./state/errorsSlice"; -import planReducer from "./state/planSlice"; import taskReducer from "./state/taskSlice"; import jupyterReducer from "./state/jupyterSlice"; @@ -14,9 +13,8 @@ export const rootReducer = combineReducers({ chat: chatReducer, code: codeReducer, cmd: commandReducer, - task: taskReducer, errors: errorsReducer, - plan: planReducer, + task: taskReducer, agent: agentReducer, jupyter: jupyterReducer, }); diff --git a/frontend/src/types/ActionType.tsx b/frontend/src/types/ActionType.tsx index 1b8e8b4333..f6e170ea1d 100644 --- a/frontend/src/types/ActionType.tsx +++ b/frontend/src/types/ActionType.tsx @@ -2,9 +2,6 @@ enum ActionType { // Initializes the agent. Only sent by client. INIT = "initialize", - // Starts a new development task. - START = "start", - // Represents a message from the user or agent. MESSAGE = "message", diff --git a/opendevin/controller/agent_controller.py b/opendevin/controller/agent_controller.py index d4bfb6513d..1a7a485341 100644 --- a/opendevin/controller/agent_controller.py +++ b/opendevin/controller/agent_controller.py @@ -3,7 +3,6 @@ from typing import Optional, Type from agenthub.codeact_agent.codeact_agent import CodeActAgent from opendevin.controller.agent import Agent -from opendevin.controller.state.plan import Plan from opendevin.controller.state.state import State from opendevin.core.config import config from opendevin.core.exceptions import ( @@ -47,9 +46,9 @@ class AgentController: max_iterations: int runtime: Runtime event_stream: EventStream + state: State agent_task: Optional[asyncio.Task] = None delegate: 'AgentController | None' = None - state: State | None = None _agent_state: AgentState = AgentState.LOADING _cur_step: int = 0 @@ -60,6 +59,7 @@ class AgentController: sid: str = 'default', max_iterations: int = MAX_ITERATIONS, max_chars: int = MAX_CHARS, + inputs: dict | None = None, sandbox: Optional[Sandbox] = None, remind_iterations: bool = config.remind_iterations, ): @@ -67,14 +67,17 @@ class AgentController: Args: agent: The agent instance to control. + event_stream: The event stream to publish events to. sid: The session ID of the agent. max_iterations: The maximum number of iterations the agent can run. max_chars: The maximum number of characters the agent can output. + inputs: The initial inputs to the agent. sandbox: An optional initialized sandbox to run the agent in. If not provided, a default sandbox will be created based on config. remind_iterations: A boolean value indicating whether to remind the agent its remaining budget of interaction. """ self.id = sid self.agent = agent + self.state = State(inputs=inputs or {}) self.event_stream = event_stream self.event_stream.subscribe( EventStreamSubscriber.AGENT_CONTROLLER, self.on_event @@ -108,14 +111,10 @@ class AgentController: await self.set_agent_state_to(AgentState.STOPPED) def update_state_for_step(self, i): - if self.state is None: - return self.state.iteration = i self.state.background_commands_obs = self.runtime.get_background_obs() def update_state_after_step(self): - if self.state is None: - return self.state.updated_info = [] async def add_error_to_history(self, message: str): @@ -124,8 +123,6 @@ class AgentController: async def add_history( self, action: Action, observation: Observation, add_to_stream=True ): - if self.state is None: - raise ValueError('Added history while state was None') if not isinstance(action, Action): raise TypeError( f'action must be an instance of Action, got {type(action).__name__} instead' @@ -141,9 +138,6 @@ class AgentController: await self.event_stream.add_event(observation, EventSource.AGENT) async def _run(self): - if self.state is None: - return - if self._agent_state != AgentState.RUNNING: raise ValueError('Task is not in running state') @@ -176,24 +170,18 @@ class AgentController: if final_state == AgentState.RUNNING: await self.set_agent_state_to(AgentState.PAUSED) - async def setup_task(self, task: str, inputs: dict = {}): - """Sets up the agent controller with a task.""" - await self.set_agent_state_to(AgentState.INIT) - self.state = State(Plan(task)) - self.state.inputs = inputs - async def on_event(self, event: Event): if isinstance(event, ChangeAgentStateAction): await self.set_agent_state_to(event.agent_state) # type: ignore elif isinstance(event, MessageAction) and event.source == EventSource.USER: await self.add_history(event, NullObservation(''), add_to_stream=False) - if self.get_agent_state() == AgentState.AWAITING_USER_INPUT: + if self.get_agent_state() != AgentState.RUNNING: await self.set_agent_state_to(AgentState.RUNNING) async def reset_task(self): if self.agent_task is not None: self.agent_task.cancel() - self.state = None + self.state = State() self._cur_step = 0 self.agent.reset() @@ -214,11 +202,7 @@ class AgentController: self._cur_step += 1 if self.agent_task is not None: self.agent_task.cancel() - elif ( - new_state == AgentState.STOPPED - or new_state == AgentState.ERROR - or new_state == AgentState.FINISHED - ): + elif new_state == AgentState.STOPPED or new_state == AgentState.ERROR: await self.reset_task() await self.event_stream.add_event( @@ -238,9 +222,8 @@ class AgentController: event_stream=self.event_stream, max_iterations=self.max_iterations, max_chars=self.max_chars, + inputs=action.inputs, ) - task = action.inputs.get('task') or '' - await self.delegate.setup_task(task, action.inputs) def add_iteration_reminder_when_needed(self, i: int, obs: Observation): """Add iteration reminder to the observation if needed. @@ -254,8 +237,6 @@ class AgentController: return obs async def step(self, i: int) -> bool: - if self.state is None: - raise ValueError('No task to run') if self.delegate is not None: delegate_done = await self.delegate.step(i) if delegate_done: @@ -267,8 +248,6 @@ class AgentController: return False logger.info(f'STEP {i}', extra={'msg_type': 'STEP'}) - if i == 0: - logger.info(self.state.plan.main_goal, extra={'msg_type': 'PLAN'}) if self.state.num_of_chars > self.max_chars: raise MaxCharsExceedError(self.state.num_of_chars, self.max_chars) @@ -303,9 +282,11 @@ class AgentController: elif isinstance(action, AgentDelegateAction): await self.start_delegate(action) elif isinstance(action, AddTaskAction): - self.state.plan.add_subtask(action.parent, action.goal, action.subtasks) + self.state.root_task.add_subtask( + action.parent, action.goal, action.subtasks + ) elif isinstance(action, ModifyTaskAction): - self.state.plan.set_subtask_state(action.id, action.state) + self.state.root_task.set_subtask_state(action.id, action.state) elif not isinstance(observation, ErrorObservation): observation = await self.runtime.run_action(action) @@ -322,11 +303,7 @@ class AgentController: # check if delegate stuck if self.delegate and self.delegate._is_stuck(): return True - if ( - self.state is None - or self.state.history is None - or len(self.state.history) < 3 - ): + if len(self.state.history) < 3: return False # if the last three (Action, Observation) tuples are too repetitive diff --git a/opendevin/controller/state/state.py b/opendevin/controller/state/state.py index ec9475add0..25b77fb312 100644 --- a/opendevin/controller/state/state.py +++ b/opendevin/controller/state/state.py @@ -1,8 +1,9 @@ from dataclasses import dataclass, field -from opendevin.controller.state.plan import Plan +from opendevin.controller.state.task import RootTask from opendevin.events.action import ( Action, + MessageAction, ) from opendevin.events.observation import ( CmdOutputObservation, @@ -12,7 +13,7 @@ from opendevin.events.observation import ( @dataclass class State: - plan: Plan + root_task: RootTask = field(default_factory=RootTask) iteration: int = 0 # number of characters we have sent to and received from LLM so far for current task num_of_chars: int = 0 @@ -21,3 +22,11 @@ class State: updated_info: list[tuple[Action, Observation]] = field(default_factory=list) inputs: dict = field(default_factory=dict) outputs: dict = field(default_factory=dict) + + def get_current_user_intent(self): + # TODO: this is used to understand the user's main goal, but it's possible + # the latest message is an interruption. We should look for a space where + # the agent goes to FINISHED, and then look for the next user message. + for action, obs in reversed(self.history): + if isinstance(action, MessageAction) and action.source == 'user': + return action.content diff --git a/opendevin/controller/state/plan.py b/opendevin/controller/state/task.py similarity index 73% rename from opendevin/controller/state/plan.py rename to opendevin/controller/state/task.py index 73b58b40e4..a47a2a01da 100644 --- a/opendevin/controller/state/plan.py +++ b/opendevin/controller/state/task.py @@ -1,4 +1,7 @@ -from opendevin.core.exceptions import PlanInvalidStateError +from opendevin.core.exceptions import ( + AgentMalformedActionError, + TaskInvalidStateError, +) from opendevin.core.logger import opendevin_logger as logger OPEN_STATE = 'open' @@ -23,7 +26,7 @@ class Task: def __init__( self, - parent: 'Task | None', + parent: 'Task', goal: str, state: str = OPEN_STATE, subtasks: list = [], @@ -36,10 +39,10 @@ class Task: state: The initial state of the task. subtasks: A list of subtasks associated with this task. """ - if parent is None: - self.id = '0' - else: + if parent.id: self.id = parent.id + '.' + str(len(parent.subtasks)) + else: + self.id = str(len(parent.subtasks)) self.parent = parent self.goal = goal self.subtasks = [] @@ -98,11 +101,11 @@ class Task: Args: state: The new state of the task. Raises: - PlanInvalidStateError: If the provided state is invalid. + TaskInvalidStateError: If the provided state is invalid. """ if state not in STATES: logger.error('Invalid state: %s', state) - raise PlanInvalidStateError(state) + raise TaskInvalidStateError(state) self.state = state if ( state == COMPLETED_STATE @@ -130,33 +133,35 @@ class Task: return None -class Plan: - """Represents a plan consisting of tasks. +class RootTask(Task): + """Serves as the root node in a tree of tasks. + Because we want the top-level of the root_task to be a list of tasks (1, 2, 3, etc.), + the "root node" of the data structure is kind of invisible--it just + holds references to the top-level tasks. Attributes: - main_goal: The main goal of the plan. - task: The root task of the plan. + id: Kept blank for root_task + goal: Kept blank for root_task + parent: None for root_task + subtasks: The top-level list of tasks associated with the root_task. + state: The state of the root_task. """ - main_goal: str - task: Task + id: str = '' + goal: str = '' + parent: None = None - def __init__(self, task: str): - """Initializes a new instance of the Plan class. - - Args: - task: The main goal of the plan. - """ - self.main_goal = task - self.task = Task(parent=None, goal=task, subtasks=[]) + def __init__(self): + self.subtasks = [] + self.state = OPEN_STATE def __str__(self): - """Returns a string representation of the plan. + """Returns a string representation of the root_task. Returns: - A string representation of the plan. + A string representation of the root_task. """ - return self.task.to_string() + return self.to_string() def get_task_by_id(self, id: str) -> Task: """Retrieves a task by its ID. @@ -168,19 +173,20 @@ class Plan: The task with the specified ID. Raises: - ValueError: If the provided task ID is invalid or does not exist. + AgentMalformedActionError: If the provided task ID is invalid or does not exist. """ + if id == '': + return self + if len(self.subtasks) == 0: + raise AgentMalformedActionError('Task does not exist:' + id) try: parts = [int(p) for p in id.split('.')] except ValueError: - raise ValueError('Invalid task id, non-integer:' + id) - if parts[0] != 0: - raise ValueError('Invalid task id, must start with 0:' + id) - parts = parts[1:] - task = self.task + raise AgentMalformedActionError('Invalid task id:' + id) + task: Task = self for part in parts: if part >= len(task.subtasks): - raise ValueError('Task does not exist:' + id) + raise AgentMalformedActionError('Task does not exist:' + id) task = task.subtasks[part] return task @@ -205,11 +211,10 @@ class Plan: """ task = self.get_task_by_id(id) task.set_state(state) - - def get_current_task(self): - """Retrieves the current task in progress. - - Returns: - The current task in progress, or None if no task is in progress. - """ - return self.task.get_current_task() + unfinished_tasks = [ + t + for t in self.subtasks + if t.state not in [COMPLETED_STATE, VERIFIED_STATE, ABANDONED_STATE] + ] + if len(unfinished_tasks) == 0: + self.set_state(COMPLETED_STATE) diff --git a/opendevin/core/exceptions.py b/opendevin/core/exceptions.py index 39bc7f9582..61b0fb1d7c 100644 --- a/opendevin/core/exceptions.py +++ b/opendevin/core/exceptions.py @@ -49,7 +49,7 @@ class SandboxInvalidBackgroundCommandError(Exception): super().__init__(message) -class PlanInvalidStateError(Exception): +class TaskInvalidStateError(Exception): def __init__(self, state=None): if state is not None: message = f'Invalid state {state}' diff --git a/opendevin/core/main.py b/opendevin/core/main.py index 20557b7685..1f47bddbc1 100644 --- a/opendevin/core/main.py +++ b/opendevin/core/main.py @@ -76,7 +76,7 @@ async def main(task_str: str = '', exit_on_message: bool = False) -> None: event_stream=event_stream, ) - await controller.setup_task(task) + await event_stream.add_event(MessageAction(content=task), EventSource.USER) await event_stream.add_event( ChangeAgentStateAction(agent_state=AgentState.RUNNING), EventSource.USER ) diff --git a/opendevin/server/agent/agent.py b/opendevin/server/agent/agent.py index 93ac80e75d..745a905635 100644 --- a/opendevin/server/agent/agent.py +++ b/opendevin/server/agent/agent.py @@ -75,16 +75,6 @@ class AgentUnit: ChangeAgentStateAction(AgentState.INIT), EventSource.USER ) return - elif action == ActionType.START: - if self.controller is None: - await self.send_error('No agent started.') - return - task = data['args']['task'] - await self.controller.setup_task(task) - await self.event_stream.add_event( - ChangeAgentStateAction(agent_state=AgentState.RUNNING), EventSource.USER - ) - return action_dict = data.copy() action_dict['action'] = action diff --git a/opendevin/server/listen.py b/opendevin/server/listen.py index 4fa81f7c99..540b108ca1 100644 --- a/opendevin/server/listen.py +++ b/opendevin/server/listen.py @@ -1,4 +1,3 @@ -import json import shutil import uuid import warnings @@ -82,11 +81,11 @@ async def websocket_endpoint(websocket: WebSocket): ```json {"action": "recall", "args": {"query": "past projects"}} ``` - - Add a task to the plan: + - Add a task to the root_task: ```json {"action": "add_task", "args": {"task": "Implement feature X"}} ``` - - Update a task in the plan: + - Update a task in the root_task: ```json {"action": "modify_task", "args": {"id": "0", "state": "in_progress", "thought": ""}} ``` @@ -293,16 +292,16 @@ async def upload_file(file: UploadFile): return {'filename': file.filename, 'location': str(file_path)} -@app.get('/api/plan') -def get_plan( +@app.get('/api/root_task') +def get_root_task( credentials: HTTPAuthorizationCredentials = Depends(security_scheme), ): """ - Get plan. + Get root_task. - To get the plan: + To get the root_task: ```sh - curl -H "Authorization: Bearer " http://localhost:3000/api/plan + curl -H "Authorization: Bearer " http://localhost:3000/api/root_task ``` """ sid = get_sid_from_token(credentials.credentials) @@ -310,15 +309,10 @@ def get_plan( controller = agent.controller if controller is not None: state = controller.get_state() - if state is not None: + if state: return JSONResponse( status_code=status.HTTP_200_OK, - content=json.dumps( - { - 'mainGoal': state.plan.main_goal, - 'task': state.plan.task.to_dict(), - } - ), + content=state.root_task.to_dict(), ) return Response(status_code=status.HTTP_204_NO_CONTENT) diff --git a/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log b/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log index be32b3b752..3dfd747c66 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log +++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log @@ -216,4 +216,6 @@ The server is running on port 5000 with PID 126. You can access the list of numb NOW, LET'S START! +---------- + Fix typos in bad.txt. Do not ask me for confirmation at any point. diff --git a/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log b/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log index f19c856c77..3fdbb03939 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log +++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log @@ -216,11 +216,14 @@ The server is running on port 5000 with PID 126. You can access the list of numb NOW, LET'S START! +---------- + Fix typos in bad.txt. Do not ask me for confirmation at any point. ---------- -Sure! Let's first take a look at the content of the file `bad.txt` to identify the typos that need to be fixed. Let me display the content of the file: +Let's start by examining the contents of `bad.txt` to identify the typos. I'll display the content first. + cat bad.txt diff --git a/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log b/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log index 4fda82d811..a755ab7cf0 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log +++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log @@ -216,11 +216,14 @@ The server is running on port 5000 with PID 126. You can access the list of numb NOW, LET'S START! +---------- + Fix typos in bad.txt. Do not ask me for confirmation at any point. ---------- -Sure! Let's first take a look at the content of the file `bad.txt` to identify the typos that need to be fixed. Let me display the content of the file: +Let's start by examining the contents of `bad.txt` to identify the typos. I'll display the content first. + cat bad.txt @@ -238,19 +241,26 @@ ENVIRONMENT REMINDER: You have 9 turns left to complete the task. ---------- -I see the typos that need to be fixed in the file `bad.txt`. Let's correct them without asking for confirmation. Let me open the file for editing: +I've identified the typos in the file. Here are the corrections: +- "typoo" should be "typo" +- "mor" should be "more" + +I will now proceed to correct these typos in `bad.txt`. + -open bad.txt +edit 1:1 +This is a stupid typo. +end_of_edit +edit 3:3 +No more typos! +end_of_edit ---------- OBSERVATION: -[File: /workspace/bad.txt (4 lines total)] -1:This is a stupid typoo. -2:Really? -3:No mor typos! -4:Enjoy! +> > No file open. Use the `open` command first. +> > No file open. Use the `open` command first. ENVIRONMENT REMINDER: You have 8 turns left to complete the task. [Command -1 finished with exit code 0]] diff --git a/tests/integration/mock/CodeActAgent/test_edits/prompt_004.log b/tests/integration/mock/CodeActAgent/test_edits/prompt_004.log index 202eb00373..000739e717 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/prompt_004.log +++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_004.log @@ -216,11 +216,14 @@ The server is running on port 5000 with PID 126. You can access the list of numb NOW, LET'S START! +---------- + Fix typos in bad.txt. Do not ask me for confirmation at any point. ---------- -Sure! Let's first take a look at the content of the file `bad.txt` to identify the typos that need to be fixed. Let me display the content of the file: +Let's start by examining the contents of `bad.txt` to identify the typos. I'll display the content first. + cat bad.txt @@ -238,9 +241,42 @@ ENVIRONMENT REMINDER: You have 9 turns left to complete the task. ---------- -I see the typos that need to be fixed in the file `bad.txt`. Let's correct them without asking for confirmation. Let me open the file for editing: +I've identified the typos in the file. Here are the corrections: +- "typoo" should be "typo" +- "mor" should be "more" + +I will now proceed to correct these typos in `bad.txt`. + + +edit 1:1 +This is a stupid typo. +end_of_edit +edit 3:3 +No more typos! +end_of_edit + + +---------- + +OBSERVATION: +> > No file open. Use the `open` command first. +> > No file open. Use the `open` command first. + +ENVIRONMENT REMINDER: You have 8 turns left to complete the task. +[Command -1 finished with exit code 0]] + +---------- + +It seems I forgot to open the file before attempting to edit it. Let me open `bad.txt` and then proceed with the corrections. + open bad.txt +edit 1:1 +This is a stupid typo. +end_of_edit +edit 3:3 +No more typos! +end_of_edit ---------- @@ -250,26 +286,14 @@ OBSERVATION: 1:This is a stupid typoo. 2:Really? 3:No mor typos! -4:Enjoy! - -ENVIRONMENT REMINDER: You have 8 turns left to complete the task. -[Command -1 finished with exit code 0]] - ----------- - -I have opened the file `bad.txt` for editing. Let's correct the typos without asking for confirmation. Let me update the file: - -edit 1:3 -This is a stupid typo. -Really? -No more typos! -end_of_edit - - ----------- - -OBSERVATION: -> > > > [File: /workspace/bad.txt (4 lines total)] +4:Enjoy! +> > [File: /workspace/bad.txt (4 lines total)] +1:This is a stupid typo. +2:Really? +3:No mor typos! +4:Enjoy! +File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary. +> > [File: /workspace/bad.txt (4 lines total)] 1:This is a stupid typo. 2:Really? 3:No more typos! diff --git a/tests/integration/mock/CodeActAgent/test_edits/response_001.log b/tests/integration/mock/CodeActAgent/test_edits/response_001.log index 103b8ea402..609a0227da 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/response_001.log +++ b/tests/integration/mock/CodeActAgent/test_edits/response_001.log @@ -1,3 +1,4 @@ -Sure! Let's first take a look at the content of the file `bad.txt` to identify the typos that need to be fixed. Let me display the content of the file: +Let's start by examining the contents of `bad.txt` to identify the typos. I'll display the content first. + cat bad.txt diff --git a/tests/integration/mock/CodeActAgent/test_edits/response_002.log b/tests/integration/mock/CodeActAgent/test_edits/response_002.log index b306a50a46..2f7aaf6d3a 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/response_002.log +++ b/tests/integration/mock/CodeActAgent/test_edits/response_002.log @@ -1,3 +1,13 @@ -I see the typos that need to be fixed in the file `bad.txt`. Let's correct them without asking for confirmation. Let me open the file for editing: +I've identified the typos in the file. Here are the corrections: +- "typoo" should be "typo" +- "mor" should be "more" + +I will now proceed to correct these typos in `bad.txt`. + -open bad.txt +edit 1:1 +This is a stupid typo. +end_of_edit +edit 3:3 +No more typos! +end_of_edit diff --git a/tests/integration/mock/CodeActAgent/test_edits/response_003.log b/tests/integration/mock/CodeActAgent/test_edits/response_003.log index 7572e0c3e1..1be8d844ad 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/response_003.log +++ b/tests/integration/mock/CodeActAgent/test_edits/response_003.log @@ -1,7 +1,10 @@ -I have opened the file `bad.txt` for editing. Let's correct the typos without asking for confirmation. Let me update the file: +It seems I forgot to open the file before attempting to edit it. Let me open `bad.txt` and then proceed with the corrections. + -edit 1:3 +open bad.txt +edit 1:1 This is a stupid typo. -Really? +end_of_edit +edit 3:3 No more typos! end_of_edit diff --git a/tests/integration/mock/CodeActAgent/test_edits/response_004.log b/tests/integration/mock/CodeActAgent/test_edits/response_004.log index 2d332728b8..3f57dd4365 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/response_004.log +++ b/tests/integration/mock/CodeActAgent/test_edits/response_004.log @@ -1 +1,8 @@ -The typos in the file `bad.txt` have been corrected. Let's save the changes and close the file. Now, the file should be free of typos. If you have any more tasks or questions, feel free to let me know! +The typos in `bad.txt` have been successfully corrected. Here's the final content of the file: + +1. This is a stupid typo. +2. Really? +3. No more typos! +4. Enjoy! + +The task is completed. If you have any more requests or need further assistance, feel free to let me know! diff --git a/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log b/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log index 472299b5ca..72ac7ddfb1 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log +++ b/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log @@ -216,4 +216,6 @@ The server is running on port 5000 with PID 126. You can access the list of numb NOW, LET'S START! +---------- + Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'. Do not ask me for confirmation at any point. diff --git a/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log b/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log index 4427beecd6..939ebae5ac 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log +++ b/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log @@ -216,19 +216,21 @@ The server is running on port 5000 with PID 126. You can access the list of numb NOW, LET'S START! +---------- + Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'. Do not ask me for confirmation at any point. ---------- -Sure! Let me create a text file with 'hello world' content in '/workspace/test.txt': - -echo 'hello world' > /workspace/test.txt - +I will write the text 'hello world' to a file named 'test.txt' in the '/workspace' directory without asking for confirmation. Let me execute the code to create the file: + +with open('/workspace/test.txt', 'w') as f: + f.write('hello world') + ---------- OBSERVATION: - +[Code executed successfully with no output] ENVIRONMENT REMINDER: You have 9 turns left to complete the task. -[Command -1 finished with exit code 0]] diff --git a/tests/integration/mock/CodeActAgent/test_ipython/response_001.log b/tests/integration/mock/CodeActAgent/test_ipython/response_001.log index 90d6188528..6e7da22e2e 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython/response_001.log +++ b/tests/integration/mock/CodeActAgent/test_ipython/response_001.log @@ -1,3 +1,4 @@ -Sure! Let me create a text file with 'hello world' content in '/workspace/test.txt': - -echo 'hello world' > /workspace/test.txt +I will write the text 'hello world' to a file named 'test.txt' in the '/workspace' directory without asking for confirmation. Let me execute the code to create the file: + +with open('/workspace/test.txt', 'w') as f: + f.write('hello world') diff --git a/tests/integration/mock/CodeActAgent/test_ipython/response_002.log b/tests/integration/mock/CodeActAgent/test_ipython/response_002.log index b8218f2b6e..819ef4a830 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython/response_002.log +++ b/tests/integration/mock/CodeActAgent/test_ipython/response_002.log @@ -1 +1 @@ -Great! The text file with 'hello world' content has been successfully created in '/workspace/test.txt'. If you need any further assistance or have any more tasks in mind, feel free to let me know! +The text file 'test.txt' with the content 'hello world' has been successfully created in the '/workspace' directory. If you have any more tasks or questions, feel free to let me know! diff --git a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log index fb5fa43f38..63251612f7 100644 --- a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log +++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log @@ -216,4 +216,6 @@ The server is running on port 5000 with PID 126. You can access the list of numb NOW, LET'S START! +---------- + Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. diff --git a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log index 74bf9caebd..024a20029f 100644 --- a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log +++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log @@ -216,19 +216,24 @@ The server is running on port 5000 with PID 126. You can access the list of numb NOW, LET'S START! +---------- + Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. ---------- -I will create a shell script named 'hello.sh' that prints 'hello' without asking for confirmation. Let's proceed with creating the script: +Sure! I will create a shell script named 'hello.sh' that prints 'hello' without asking for confirmation. Let me write the script for you: -echo "echo 'hello'" > hello.sh +cat > hello.sh << 'EOF' +#!/bin/bash +echo "hello" +EOF ---------- OBSERVATION: - +> > > ENVIRONMENT REMINDER: You have 9 turns left to complete the task. [Command -1 finished with exit code 0]] diff --git a/tests/integration/mock/CodeActAgent/test_write_simple_script/response_001.log b/tests/integration/mock/CodeActAgent/test_write_simple_script/response_001.log index 9f0aabd275..d895db3db6 100644 --- a/tests/integration/mock/CodeActAgent/test_write_simple_script/response_001.log +++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/response_001.log @@ -1,3 +1,6 @@ -I will create a shell script named 'hello.sh' that prints 'hello' without asking for confirmation. Let's proceed with creating the script: +Sure! I will create a shell script named 'hello.sh' that prints 'hello' without asking for confirmation. Let me write the script for you: -echo "echo 'hello'" > hello.sh +cat > hello.sh << 'EOF' +#!/bin/bash +echo "hello" +EOF diff --git a/tests/integration/mock/CodeActAgent/test_write_simple_script/response_002.log b/tests/integration/mock/CodeActAgent/test_write_simple_script/response_002.log index f7127cfa36..df44c01302 100644 --- a/tests/integration/mock/CodeActAgent/test_write_simple_script/response_002.log +++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/response_002.log @@ -1 +1 @@ -The shell script 'hello.sh' has been successfully created. You can run it to print 'hello'. If you need any further assistance or have any more requests, feel free to let me know! +The shell script 'hello.sh' has been successfully created. You can run it to see the output "hello". If you have any more requests or questions, feel free to ask! diff --git a/tests/integration/mock/MonologueAgent/test_edits/prompt_001.log b/tests/integration/mock/MonologueAgent/test_edits/prompt_001.log index e5fe419e1b..81e4038957 100644 --- a/tests/integration/mock/MonologueAgent/test_edits/prompt_001.log +++ b/tests/integration/mock/MonologueAgent/test_edits/prompt_001.log @@ -318,6 +318,18 @@ This is your internal monologue, in JSON format: "content": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.", "wait_for_response": false } + }, + { + "action": "message", + "args": { + "content": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", + "wait_for_response": false + } + }, + { + "observation": "null", + "content": "", + "extras": {} } ] @@ -367,5 +379,3 @@ Notes: * whenever an action fails, always send a `message` about why it may have happened before acting again. What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object. - -You've been thinking a lot lately. Maybe it's time to take action? diff --git a/tests/integration/mock/MonologueAgent/test_edits/prompt_002.log b/tests/integration/mock/MonologueAgent/test_edits/prompt_002.log index 6c605aebfa..630150ae7e 100644 --- a/tests/integration/mock/MonologueAgent/test_edits/prompt_002.log +++ b/tests/integration/mock/MonologueAgent/test_edits/prompt_002.log @@ -319,6 +319,18 @@ This is your internal monologue, in JSON format: "wait_for_response": false } }, + { + "action": "message", + "args": { + "content": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", + "wait_for_response": false + } + }, + { + "observation": "null", + "content": "", + "extras": {} + }, { "action": "read", "args": { diff --git a/tests/integration/mock/MonologueAgent/test_edits/prompt_003.log b/tests/integration/mock/MonologueAgent/test_edits/prompt_003.log index cb27078320..e23264009d 100644 --- a/tests/integration/mock/MonologueAgent/test_edits/prompt_003.log +++ b/tests/integration/mock/MonologueAgent/test_edits/prompt_003.log @@ -319,6 +319,18 @@ This is your internal monologue, in JSON format: "wait_for_response": false } }, + { + "action": "message", + "args": { + "content": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", + "wait_for_response": false + } + }, + { + "observation": "null", + "content": "", + "extras": {} + }, { "action": "read", "args": { diff --git a/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_001.log b/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_001.log index 8ac0724752..8bba25b530 100644 --- a/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_001.log +++ b/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_001.log @@ -318,6 +318,18 @@ This is your internal monologue, in JSON format: "content": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.", "wait_for_response": false } + }, + { + "action": "message", + "args": { + "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.", + "wait_for_response": false + } + }, + { + "observation": "null", + "content": "", + "extras": {} } ] @@ -367,5 +379,3 @@ Notes: * whenever an action fails, always send a `message` about why it may have happened before acting again. What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object. - -You've been thinking a lot lately. Maybe it's time to take action? diff --git a/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_002.log index 08cac84825..7fcc17773d 100644 --- a/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_002.log +++ b/tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_002.log @@ -319,6 +319,18 @@ This is your internal monologue, in JSON format: "wait_for_response": false } }, + { + "action": "message", + "args": { + "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.", + "wait_for_response": false + } + }, + { + "observation": "null", + "content": "", + "extras": {} + }, { "action": "write", "args": { diff --git a/tests/integration/mock/PlannerAgent/test_edits/prompt_001.log b/tests/integration/mock/PlannerAgent/test_edits/prompt_001.log index a03bee38bf..7ac2573270 100644 --- a/tests/integration/mock/PlannerAgent/test_edits/prompt_001.log +++ b/tests/integration/mock/PlannerAgent/test_edits/prompt_001.log @@ -16,8 +16,8 @@ As you complete this task, you're building a plan and keeping track of your progress. Here's a JSON representation of your plan: { - "id": "0", - "goal": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", + "id": "", + "goal": "", "state": "open", "subtasks": [] } @@ -51,7 +51,15 @@ Here is a recent history of actions you've taken in service of this plan, as well as observations you've made. This only includes the MOST RECENT ten actions--more happened before that. -[] +[ + { + "action": "message", + "args": { + "content": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", + "wait_for_response": false + } + } +] Your most recent action is at the bottom of that history. @@ -78,7 +86,7 @@ It must be an object, and it must contain two fields: * `message` - make a plan, set a goal, or record your thoughts. Arguments: * `content` - the message to record * `add_task` - add a task to your plan. Arguments: - * `parent` - the ID of the parent task + * `parent` - the ID of the parent task (leave empty if it should go at the top level) * `goal` - the goal of the task * `subtasks` - a list of subtasks, each of which is a map with a `goal` key. * `modify_task` - close a task. Arguments: @@ -91,3 +99,6 @@ You should never act twice in a row without thinking. But if your last several actions are all `message` actions, you should consider taking a different action. What is your next thought or action? Again, you must reply with JSON, and only with JSON. + +Look at your last thought in the history above. What does it suggest? Don't think anymore--take action. + diff --git a/tests/integration/mock/PlannerAgent/test_edits/prompt_002.log b/tests/integration/mock/PlannerAgent/test_edits/prompt_002.log index 004e5a5ca7..f8360a9dd8 100644 --- a/tests/integration/mock/PlannerAgent/test_edits/prompt_002.log +++ b/tests/integration/mock/PlannerAgent/test_edits/prompt_002.log @@ -16,16 +16,14 @@ As you complete this task, you're building a plan and keeping track of your progress. Here's a JSON representation of your plan: { - "id": "0", - "goal": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", - "state": "in_progress", + "id": "", + "goal": "", + "state": "open", "subtasks": [] } -You're currently working on this task: -Fix typos in bad.txt. Do not ask me for confirmation at any point.. -If it's not achievable AND verifiable with a SINGLE action, you MUST break it down into subtasks NOW. +You're not currently working on any tasks. Your next action MUST be to mark a task as in_progress. You're responsible for managing this plan and the status of tasks in it, by using the `add_task` and `modify_task` actions described below. @@ -55,12 +53,27 @@ ten actions--more happened before that. [ { - "action": "modify_task", + "action": "message", "args": { - "id": "0", - "state": "in_progress", + "content": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", + "wait_for_response": false + } + }, + { + "action": "read", + "args": { + "path": "bad.txt", + "start": 0, + "end": -1, "thought": "" } + }, + { + "observation": "read", + "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", + "extras": { + "path": "bad.txt" + } } ] @@ -89,7 +102,7 @@ It must be an object, and it must contain two fields: * `message` - make a plan, set a goal, or record your thoughts. Arguments: * `content` - the message to record * `add_task` - add a task to your plan. Arguments: - * `parent` - the ID of the parent task + * `parent` - the ID of the parent task (leave empty if it should go at the top level) * `goal` - the goal of the task * `subtasks` - a list of subtasks, each of which is a map with a `goal` key. * `modify_task` - close a task. Arguments: @@ -103,4 +116,4 @@ actions are all `message` actions, you should consider taking a different action What is your next thought or action? Again, you must reply with JSON, and only with JSON. -You should think about the next action to take. +You should think about the file you just read, what you learned from it, and how that affects your plan. diff --git a/tests/integration/mock/PlannerAgent/test_edits/prompt_003.log b/tests/integration/mock/PlannerAgent/test_edits/prompt_003.log index 156f36dbb9..94448a8f9e 100644 --- a/tests/integration/mock/PlannerAgent/test_edits/prompt_003.log +++ b/tests/integration/mock/PlannerAgent/test_edits/prompt_003.log @@ -16,16 +16,14 @@ As you complete this task, you're building a plan and keeping track of your progress. Here's a JSON representation of your plan: { - "id": "0", - "goal": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", - "state": "in_progress", + "id": "", + "goal": "", + "state": "open", "subtasks": [] } -You're currently working on this task: -Fix typos in bad.txt. Do not ask me for confirmation at any point.. -If it's not achievable AND verifiable with a SINGLE action, you MUST break it down into subtasks NOW. +You're not currently working on any tasks. Your next action MUST be to mark a task as in_progress. You're responsible for managing this plan and the status of tasks in it, by using the `add_task` and `modify_task` actions described below. @@ -55,18 +53,43 @@ ten actions--more happened before that. [ { - "action": "modify_task", + "action": "message", "args": { - "id": "0", - "state": "in_progress", + "content": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", + "wait_for_response": false + } + }, + { + "action": "read", + "args": { + "path": "bad.txt", + "start": 0, + "end": -1, "thought": "" } }, { - "action": "message", + "observation": "read", + "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", + "extras": { + "path": "bad.txt" + } + }, + { + "action": "write", "args": { - "content": "To fix the typos in bad.txt, I need to first read the content of the file to understand the extent of the errors and determine the necessary corrections.", - "wait_for_response": false + "path": "bad.txt", + "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n", + "start": 0, + "end": -1, + "thought": "" + } + }, + { + "observation": "write", + "content": "", + "extras": { + "path": "bad.txt" } } ] @@ -96,7 +119,7 @@ It must be an object, and it must contain two fields: * `message` - make a plan, set a goal, or record your thoughts. Arguments: * `content` - the message to record * `add_task` - add a task to your plan. Arguments: - * `parent` - the ID of the parent task + * `parent` - the ID of the parent task (leave empty if it should go at the top level) * `goal` - the goal of the task * `subtasks` - a list of subtasks, each of which is a map with a `goal` key. * `modify_task` - close a task. Arguments: @@ -110,4 +133,4 @@ actions are all `message` actions, you should consider taking a different action What is your next thought or action? Again, you must reply with JSON, and only with JSON. -Look at your last thought in the history above. What does it suggest? Don't think anymore--take action. +You just changed a file. You should think about how it affects your plan. diff --git a/tests/integration/mock/PlannerAgent/test_edits/prompt_005.log b/tests/integration/mock/PlannerAgent/test_edits/prompt_005.log deleted file mode 100644 index b16c456e2d..0000000000 --- a/tests/integration/mock/PlannerAgent/test_edits/prompt_005.log +++ /dev/null @@ -1,146 +0,0 @@ - - ----------- - - -# Task -You're a diligent software engineer AI. You can't see, draw, or interact with a -browser, but you can read and write files, and you can run commands, and you can think. - -You've been given the following task: - -Fix typos in bad.txt. Do not ask me for confirmation at any point. - -## Plan -As you complete this task, you're building a plan and keeping -track of your progress. Here's a JSON representation of your plan: - -{ - "id": "0", - "goal": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", - "state": "in_progress", - "subtasks": [] -} - - -You're currently working on this task: -Fix typos in bad.txt. Do not ask me for confirmation at any point.. -If it's not achievable AND verifiable with a SINGLE action, you MUST break it down into subtasks NOW. - -You're responsible for managing this plan and the status of tasks in -it, by using the `add_task` and `modify_task` actions described below. - -If the History below contradicts the state of any of these tasks, you -MUST modify the task using the `modify_task` action described below. - -Be sure NOT to duplicate any tasks. Do NOT use the `add_task` action for -a task that's already represented. Every task must be represented only once. - -Tasks that are sequential MUST be siblings. They must be added in order -to their parent task. - -If you mark a task as 'completed', 'verified', or 'abandoned', -all non-abandoned subtasks will be marked the same way. -So before closing a task this way, you MUST not only be sure that it has -been completed successfully--you must ALSO be sure that all its subtasks -are ready to be marked the same way. - -If, and only if, ALL tasks have already been marked verified, -you MUST respond with the `finish` action. - -## History -Here is a recent history of actions you've taken in service of this plan, -as well as observations you've made. This only includes the MOST RECENT -ten actions--more happened before that. - -[ - { - "action": "modify_task", - "args": { - "id": "0", - "state": "in_progress", - "thought": "" - } - }, - { - "action": "message", - "args": { - "content": "To fix the typos in bad.txt, I need to first read the content of the file to understand the extent of the errors and determine the necessary corrections.", - "wait_for_response": false - } - }, - { - "action": "read", - "args": { - "path": "bad.txt", - "start": 0, - "end": -1, - "thought": "" - } - }, - { - "observation": "read", - "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", - "extras": { - "path": "bad.txt" - } - }, - { - "action": "write", - "args": { - "path": "bad.txt", - "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n", - "start": 0, - "end": -1, - "thought": "" - } - }, - { - "observation": "write", - "content": "", - "extras": { - "path": "bad.txt" - } - } -] - - -Your most recent action is at the bottom of that history. - -## Action -What is your next thought or action? Your response must be in JSON format. - -It must be an object, and it must contain two fields: -* `action`, which is one of the actions below -* `args`, which is a map of key-value pairs, specifying the arguments for that action - -* `read` - reads the content of a file. Arguments: - * `path` - the path of the file to read -* `write` - writes the content to a file. Arguments: - * `path` - the path of the file to write - * `content` - the content to write to the file -* `run` - runs a command on the command line in a Linux shell. Arguments: - * `command` - the command to run - * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. -* `kill` - kills a background command - * `id` - the ID of the background command to kill -* `browse` - opens a web page. Arguments: - * `url` - the URL to open -* `message` - make a plan, set a goal, or record your thoughts. Arguments: - * `content` - the message to record -* `add_task` - add a task to your plan. Arguments: - * `parent` - the ID of the parent task - * `goal` - the goal of the task - * `subtasks` - a list of subtasks, each of which is a map with a `goal` key. -* `modify_task` - close a task. Arguments: - * `id` - the ID of the task to close - * `state` - set to 'in_progress' to start the task, 'completed' to finish it, 'verified' to assert that it was successful, 'abandoned' to give up on it permanently, or `open` to stop working on it for now. -* `finish` - if ALL of your tasks and subtasks have been verified or abandoned, and you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working. - -You MUST take time to think in between read, write, run, browse, and recall actions--do this with the `message` action. -You should never act twice in a row without thinking. But if your last several -actions are all `message` actions, you should consider taking a different action. - -What is your next thought or action? Again, you must reply with JSON, and only with JSON. - -You just changed a file. You should think about how it affects your plan. diff --git a/tests/integration/mock/PlannerAgent/test_edits/prompt_006.log b/tests/integration/mock/PlannerAgent/test_edits/prompt_006.log deleted file mode 100644 index fa69a2e95d..0000000000 --- a/tests/integration/mock/PlannerAgent/test_edits/prompt_006.log +++ /dev/null @@ -1,153 +0,0 @@ - - ----------- - - -# Task -You're a diligent software engineer AI. You can't see, draw, or interact with a -browser, but you can read and write files, and you can run commands, and you can think. - -You've been given the following task: - -Fix typos in bad.txt. Do not ask me for confirmation at any point. - -## Plan -As you complete this task, you're building a plan and keeping -track of your progress. Here's a JSON representation of your plan: - -{ - "id": "0", - "goal": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", - "state": "in_progress", - "subtasks": [] -} - - -You're currently working on this task: -Fix typos in bad.txt. Do not ask me for confirmation at any point.. -If it's not achievable AND verifiable with a SINGLE action, you MUST break it down into subtasks NOW. - -You're responsible for managing this plan and the status of tasks in -it, by using the `add_task` and `modify_task` actions described below. - -If the History below contradicts the state of any of these tasks, you -MUST modify the task using the `modify_task` action described below. - -Be sure NOT to duplicate any tasks. Do NOT use the `add_task` action for -a task that's already represented. Every task must be represented only once. - -Tasks that are sequential MUST be siblings. They must be added in order -to their parent task. - -If you mark a task as 'completed', 'verified', or 'abandoned', -all non-abandoned subtasks will be marked the same way. -So before closing a task this way, you MUST not only be sure that it has -been completed successfully--you must ALSO be sure that all its subtasks -are ready to be marked the same way. - -If, and only if, ALL tasks have already been marked verified, -you MUST respond with the `finish` action. - -## History -Here is a recent history of actions you've taken in service of this plan, -as well as observations you've made. This only includes the MOST RECENT -ten actions--more happened before that. - -[ - { - "action": "modify_task", - "args": { - "id": "0", - "state": "in_progress", - "thought": "" - } - }, - { - "action": "message", - "args": { - "content": "To fix the typos in bad.txt, I need to first read the content of the file to understand the extent of the errors and determine the necessary corrections.", - "wait_for_response": false - } - }, - { - "action": "read", - "args": { - "path": "bad.txt", - "start": 0, - "end": -1, - "thought": "" - } - }, - { - "observation": "read", - "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", - "extras": { - "path": "bad.txt" - } - }, - { - "action": "write", - "args": { - "path": "bad.txt", - "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n", - "start": 0, - "end": -1, - "thought": "" - } - }, - { - "observation": "write", - "content": "", - "extras": { - "path": "bad.txt" - } - }, - { - "action": "message", - "args": { - "content": "I have corrected the typos in bad.txt. I should now verify that the corrections were made accurately by reading the file again.", - "wait_for_response": false - } - } -] - - -Your most recent action is at the bottom of that history. - -## Action -What is your next thought or action? Your response must be in JSON format. - -It must be an object, and it must contain two fields: -* `action`, which is one of the actions below -* `args`, which is a map of key-value pairs, specifying the arguments for that action - -* `read` - reads the content of a file. Arguments: - * `path` - the path of the file to read -* `write` - writes the content to a file. Arguments: - * `path` - the path of the file to write - * `content` - the content to write to the file -* `run` - runs a command on the command line in a Linux shell. Arguments: - * `command` - the command to run - * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. -* `kill` - kills a background command - * `id` - the ID of the background command to kill -* `browse` - opens a web page. Arguments: - * `url` - the URL to open -* `message` - make a plan, set a goal, or record your thoughts. Arguments: - * `content` - the message to record -* `add_task` - add a task to your plan. Arguments: - * `parent` - the ID of the parent task - * `goal` - the goal of the task - * `subtasks` - a list of subtasks, each of which is a map with a `goal` key. -* `modify_task` - close a task. Arguments: - * `id` - the ID of the task to close - * `state` - set to 'in_progress' to start the task, 'completed' to finish it, 'verified' to assert that it was successful, 'abandoned' to give up on it permanently, or `open` to stop working on it for now. -* `finish` - if ALL of your tasks and subtasks have been verified or abandoned, and you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working. - -You MUST take time to think in between read, write, run, browse, and recall actions--do this with the `message` action. -You should never act twice in a row without thinking. But if your last several -actions are all `message` actions, you should consider taking a different action. - -What is your next thought or action? Again, you must reply with JSON, and only with JSON. - -Look at your last thought in the history above. What does it suggest? Don't think anymore--take action. diff --git a/tests/integration/mock/PlannerAgent/test_edits/prompt_007.log b/tests/integration/mock/PlannerAgent/test_edits/prompt_007.log deleted file mode 100644 index 9429067cdf..0000000000 --- a/tests/integration/mock/PlannerAgent/test_edits/prompt_007.log +++ /dev/null @@ -1,169 +0,0 @@ - - ----------- - - -# Task -You're a diligent software engineer AI. You can't see, draw, or interact with a -browser, but you can read and write files, and you can run commands, and you can think. - -You've been given the following task: - -Fix typos in bad.txt. Do not ask me for confirmation at any point. - -## Plan -As you complete this task, you're building a plan and keeping -track of your progress. Here's a JSON representation of your plan: - -{ - "id": "0", - "goal": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", - "state": "in_progress", - "subtasks": [] -} - - -You're currently working on this task: -Fix typos in bad.txt. Do not ask me for confirmation at any point.. -If it's not achievable AND verifiable with a SINGLE action, you MUST break it down into subtasks NOW. - -You're responsible for managing this plan and the status of tasks in -it, by using the `add_task` and `modify_task` actions described below. - -If the History below contradicts the state of any of these tasks, you -MUST modify the task using the `modify_task` action described below. - -Be sure NOT to duplicate any tasks. Do NOT use the `add_task` action for -a task that's already represented. Every task must be represented only once. - -Tasks that are sequential MUST be siblings. They must be added in order -to their parent task. - -If you mark a task as 'completed', 'verified', or 'abandoned', -all non-abandoned subtasks will be marked the same way. -So before closing a task this way, you MUST not only be sure that it has -been completed successfully--you must ALSO be sure that all its subtasks -are ready to be marked the same way. - -If, and only if, ALL tasks have already been marked verified, -you MUST respond with the `finish` action. - -## History -Here is a recent history of actions you've taken in service of this plan, -as well as observations you've made. This only includes the MOST RECENT -ten actions--more happened before that. - -[ - { - "action": "modify_task", - "args": { - "id": "0", - "state": "in_progress", - "thought": "" - } - }, - { - "action": "message", - "args": { - "content": "To fix the typos in bad.txt, I need to first read the content of the file to understand the extent of the errors and determine the necessary corrections.", - "wait_for_response": false - } - }, - { - "action": "read", - "args": { - "path": "bad.txt", - "start": 0, - "end": -1, - "thought": "" - } - }, - { - "observation": "read", - "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", - "extras": { - "path": "bad.txt" - } - }, - { - "action": "write", - "args": { - "path": "bad.txt", - "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n", - "start": 0, - "end": -1, - "thought": "" - } - }, - { - "observation": "write", - "content": "", - "extras": { - "path": "bad.txt" - } - }, - { - "action": "message", - "args": { - "content": "I have corrected the typos in bad.txt. I should now verify that the corrections were made accurately by reading the file again.", - "wait_for_response": false - } - }, - { - "action": "read", - "args": { - "path": "bad.txt", - "start": 0, - "end": -1, - "thought": "" - } - }, - { - "observation": "read", - "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n\n", - "extras": { - "path": "bad.txt" - } - } -] - - -Your most recent action is at the bottom of that history. - -## Action -What is your next thought or action? Your response must be in JSON format. - -It must be an object, and it must contain two fields: -* `action`, which is one of the actions below -* `args`, which is a map of key-value pairs, specifying the arguments for that action - -* `read` - reads the content of a file. Arguments: - * `path` - the path of the file to read -* `write` - writes the content to a file. Arguments: - * `path` - the path of the file to write - * `content` - the content to write to the file -* `run` - runs a command on the command line in a Linux shell. Arguments: - * `command` - the command to run - * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. -* `kill` - kills a background command - * `id` - the ID of the background command to kill -* `browse` - opens a web page. Arguments: - * `url` - the URL to open -* `message` - make a plan, set a goal, or record your thoughts. Arguments: - * `content` - the message to record -* `add_task` - add a task to your plan. Arguments: - * `parent` - the ID of the parent task - * `goal` - the goal of the task - * `subtasks` - a list of subtasks, each of which is a map with a `goal` key. -* `modify_task` - close a task. Arguments: - * `id` - the ID of the task to close - * `state` - set to 'in_progress' to start the task, 'completed' to finish it, 'verified' to assert that it was successful, 'abandoned' to give up on it permanently, or `open` to stop working on it for now. -* `finish` - if ALL of your tasks and subtasks have been verified or abandoned, and you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working. - -You MUST take time to think in between read, write, run, browse, and recall actions--do this with the `message` action. -You should never act twice in a row without thinking. But if your last several -actions are all `message` actions, you should consider taking a different action. - -What is your next thought or action? Again, you must reply with JSON, and only with JSON. - -You should think about the file you just read, what you learned from it, and how that affects your plan. diff --git a/tests/integration/mock/PlannerAgent/test_edits/response_001.log b/tests/integration/mock/PlannerAgent/test_edits/response_001.log index f4f1a41bf1..ac56b4ad06 100644 --- a/tests/integration/mock/PlannerAgent/test_edits/response_001.log +++ b/tests/integration/mock/PlannerAgent/test_edits/response_001.log @@ -1,7 +1,6 @@ { - "action": "modify_task", + "action": "read", "args": { - "id": "0", - "state": "in_progress" + "path": "bad.txt" } } diff --git a/tests/integration/mock/PlannerAgent/test_edits/response_002.log b/tests/integration/mock/PlannerAgent/test_edits/response_002.log index 790f1d01eb..64308f91f8 100644 --- a/tests/integration/mock/PlannerAgent/test_edits/response_002.log +++ b/tests/integration/mock/PlannerAgent/test_edits/response_002.log @@ -1,6 +1,7 @@ { - "action": "message", + "action": "write", "args": { - "content": "To fix the typos in bad.txt, I need to first read the content of the file to understand the extent of the errors and determine the necessary corrections." + "path": "bad.txt", + "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n" } } diff --git a/tests/integration/mock/PlannerAgent/test_edits/response_003.log b/tests/integration/mock/PlannerAgent/test_edits/response_003.log index ac56b4ad06..4c19964bc6 100644 --- a/tests/integration/mock/PlannerAgent/test_edits/response_003.log +++ b/tests/integration/mock/PlannerAgent/test_edits/response_003.log @@ -1,6 +1,3 @@ { - "action": "read", - "args": { - "path": "bad.txt" - } + "action": "finish" } diff --git a/tests/integration/mock/PlannerAgent/test_edits/response_004.log b/tests/integration/mock/PlannerAgent/test_edits/response_004.log deleted file mode 100644 index 64308f91f8..0000000000 --- a/tests/integration/mock/PlannerAgent/test_edits/response_004.log +++ /dev/null @@ -1,7 +0,0 @@ -{ - "action": "write", - "args": { - "path": "bad.txt", - "content": "This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n" - } -} diff --git a/tests/integration/mock/PlannerAgent/test_edits/response_005.log b/tests/integration/mock/PlannerAgent/test_edits/response_005.log deleted file mode 100644 index 8ec80d11d5..0000000000 --- a/tests/integration/mock/PlannerAgent/test_edits/response_005.log +++ /dev/null @@ -1,6 +0,0 @@ -{ - "action": "message", - "args": { - "content": "I have corrected the typos in bad.txt. I should now verify that the corrections were made accurately by reading the file again." - } -} diff --git a/tests/integration/mock/PlannerAgent/test_edits/response_006.log b/tests/integration/mock/PlannerAgent/test_edits/response_006.log deleted file mode 100644 index ac56b4ad06..0000000000 --- a/tests/integration/mock/PlannerAgent/test_edits/response_006.log +++ /dev/null @@ -1,6 +0,0 @@ -{ - "action": "read", - "args": { - "path": "bad.txt" - } -} diff --git a/tests/integration/mock/PlannerAgent/test_edits/response_007.log b/tests/integration/mock/PlannerAgent/test_edits/response_007.log deleted file mode 100644 index cf05f4192c..0000000000 --- a/tests/integration/mock/PlannerAgent/test_edits/response_007.log +++ /dev/null @@ -1,8 +0,0 @@ -{ - "action": "modify_task", - "args": { - "id": "0", - "state": "verified", - "thought": "The typos in bad.txt have been corrected and verified by reading the file again. The content is now accurate." - } -} diff --git a/tests/integration/mock/PlannerAgent/test_write_simple_script/prompt_001.log b/tests/integration/mock/PlannerAgent/test_write_simple_script/prompt_001.log index 924d907ee0..9b569ed09c 100644 --- a/tests/integration/mock/PlannerAgent/test_write_simple_script/prompt_001.log +++ b/tests/integration/mock/PlannerAgent/test_write_simple_script/prompt_001.log @@ -16,8 +16,8 @@ As you complete this task, you're building a plan and keeping track of your progress. Here's a JSON representation of your plan: { - "id": "0", - "goal": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.", + "id": "", + "goal": "", "state": "open", "subtasks": [] } @@ -51,7 +51,15 @@ Here is a recent history of actions you've taken in service of this plan, as well as observations you've made. This only includes the MOST RECENT ten actions--more happened before that. -[] +[ + { + "action": "message", + "args": { + "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.", + "wait_for_response": false + } + } +] Your most recent action is at the bottom of that history. @@ -78,7 +86,7 @@ It must be an object, and it must contain two fields: * `message` - make a plan, set a goal, or record your thoughts. Arguments: * `content` - the message to record * `add_task` - add a task to your plan. Arguments: - * `parent` - the ID of the parent task + * `parent` - the ID of the parent task (leave empty if it should go at the top level) * `goal` - the goal of the task * `subtasks` - a list of subtasks, each of which is a map with a `goal` key. * `modify_task` - close a task. Arguments: @@ -91,3 +99,6 @@ You should never act twice in a row without thinking. But if your last several actions are all `message` actions, you should consider taking a different action. What is your next thought or action? Again, you must reply with JSON, and only with JSON. + +Look at your last thought in the history above. What does it suggest? Don't think anymore--take action. + diff --git a/tests/integration/mock/PlannerAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/PlannerAgent/test_write_simple_script/prompt_002.log index 2c83fea179..d1500940fd 100644 --- a/tests/integration/mock/PlannerAgent/test_write_simple_script/prompt_002.log +++ b/tests/integration/mock/PlannerAgent/test_write_simple_script/prompt_002.log @@ -16,10 +16,17 @@ As you complete this task, you're building a plan and keeping track of your progress. Here's a JSON representation of your plan: { - "id": "0", - "goal": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.", + "id": "", + "goal": "", "state": "open", - "subtasks": [] + "subtasks": [ + { + "id": "0", + "goal": "Create a shell script 'hello.sh' that prints 'hello'", + "state": "open", + "subtasks": [] + } + ] } @@ -53,20 +60,19 @@ ten actions--more happened before that. [ { - "action": "write", + "action": "message", "args": { - "path": "hello.sh", - "content": "#!/bin/bash\n\necho 'hello'", - "start": 0, - "end": -1, - "thought": "" + "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.", + "wait_for_response": false } }, { - "observation": "write", - "content": "", - "extras": { - "path": "hello.sh" + "action": "add_task", + "args": { + "parent": "", + "goal": "Create a shell script 'hello.sh' that prints 'hello'", + "subtasks": [], + "thought": "" } } ] @@ -96,7 +102,7 @@ It must be an object, and it must contain two fields: * `message` - make a plan, set a goal, or record your thoughts. Arguments: * `content` - the message to record * `add_task` - add a task to your plan. Arguments: - * `parent` - the ID of the parent task + * `parent` - the ID of the parent task (leave empty if it should go at the top level) * `goal` - the goal of the task * `subtasks` - a list of subtasks, each of which is a map with a `goal` key. * `modify_task` - close a task. Arguments: @@ -110,4 +116,4 @@ actions are all `message` actions, you should consider taking a different action What is your next thought or action? Again, you must reply with JSON, and only with JSON. -You just changed a file. You should think about how it affects your plan. +You should think about the next action to take. diff --git a/tests/integration/mock/PlannerAgent/test_edits/prompt_004.log b/tests/integration/mock/PlannerAgent/test_write_simple_script/prompt_003.log similarity index 80% rename from tests/integration/mock/PlannerAgent/test_edits/prompt_004.log rename to tests/integration/mock/PlannerAgent/test_write_simple_script/prompt_003.log index 9c16646717..d5721bde89 100644 --- a/tests/integration/mock/PlannerAgent/test_edits/prompt_004.log +++ b/tests/integration/mock/PlannerAgent/test_write_simple_script/prompt_003.log @@ -9,23 +9,28 @@ browser, but you can read and write files, and you can run commands, and you can You've been given the following task: -Fix typos in bad.txt. Do not ask me for confirmation at any point. +Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. ## Plan As you complete this task, you're building a plan and keeping track of your progress. Here's a JSON representation of your plan: { - "id": "0", - "goal": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", - "state": "in_progress", - "subtasks": [] + "id": "", + "goal": "", + "state": "open", + "subtasks": [ + { + "id": "0", + "goal": "Create a shell script 'hello.sh' that prints 'hello'", + "state": "open", + "subtasks": [] + } + ] } -You're currently working on this task: -Fix typos in bad.txt. Do not ask me for confirmation at any point.. -If it's not achievable AND verifiable with a SINGLE action, you MUST break it down into subtasks NOW. +You're not currently working on any tasks. Your next action MUST be to mark a task as in_progress. You're responsible for managing this plan and the status of tasks in it, by using the `add_task` and `modify_task` actions described below. @@ -54,35 +59,37 @@ as well as observations you've made. This only includes the MOST RECENT ten actions--more happened before that. [ - { - "action": "modify_task", - "args": { - "id": "0", - "state": "in_progress", - "thought": "" - } - }, { "action": "message", "args": { - "content": "To fix the typos in bad.txt, I need to first read the content of the file to understand the extent of the errors and determine the necessary corrections.", + "content": "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point.", "wait_for_response": false } }, { - "action": "read", + "action": "add_task", "args": { - "path": "bad.txt", + "parent": "", + "goal": "Create a shell script 'hello.sh' that prints 'hello'", + "subtasks": [], + "thought": "" + } + }, + { + "action": "write", + "args": { + "path": "hello.sh", + "content": "echo 'hello'", "start": 0, "end": -1, "thought": "" } }, { - "observation": "read", - "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", + "observation": "write", + "content": "", "extras": { - "path": "bad.txt" + "path": "hello.sh" } } ] @@ -112,7 +119,7 @@ It must be an object, and it must contain two fields: * `message` - make a plan, set a goal, or record your thoughts. Arguments: * `content` - the message to record * `add_task` - add a task to your plan. Arguments: - * `parent` - the ID of the parent task + * `parent` - the ID of the parent task (leave empty if it should go at the top level) * `goal` - the goal of the task * `subtasks` - a list of subtasks, each of which is a map with a `goal` key. * `modify_task` - close a task. Arguments: @@ -126,4 +133,4 @@ actions are all `message` actions, you should consider taking a different action What is your next thought or action? Again, you must reply with JSON, and only with JSON. -You should think about the file you just read, what you learned from it, and how that affects your plan. +You just changed a file. You should think about how it affects your plan. diff --git a/tests/integration/mock/PlannerAgent/test_write_simple_script/response_001.log b/tests/integration/mock/PlannerAgent/test_write_simple_script/response_001.log index c146bbc016..7d8ab30c50 100644 --- a/tests/integration/mock/PlannerAgent/test_write_simple_script/response_001.log +++ b/tests/integration/mock/PlannerAgent/test_write_simple_script/response_001.log @@ -1,9 +1,8 @@ -```json { - "action": "write", + "action": "add_task", "args": { - "path": "hello.sh", - "content": "#!/bin/bash\n\necho 'hello'" + "goal": "Create a shell script 'hello.sh' that prints 'hello'", + "parent": "", + "subtasks": [] } } -``` diff --git a/tests/integration/mock/PlannerAgent/test_write_simple_script/response_002.log b/tests/integration/mock/PlannerAgent/test_write_simple_script/response_002.log index 1f770b1f55..9fce7a12dc 100644 --- a/tests/integration/mock/PlannerAgent/test_write_simple_script/response_002.log +++ b/tests/integration/mock/PlannerAgent/test_write_simple_script/response_002.log @@ -1,7 +1,7 @@ { - "action": "modify_task", + "action": "write", "args": { - "id": "0", - "state": "completed" + "path": "hello.sh", + "content": "echo 'hello'" } } diff --git a/tests/integration/mock/PlannerAgent/test_write_simple_script/response_003.log b/tests/integration/mock/PlannerAgent/test_write_simple_script/response_003.log new file mode 100644 index 0000000000..1f770b1f55 --- /dev/null +++ b/tests/integration/mock/PlannerAgent/test_write_simple_script/response_003.log @@ -0,0 +1,7 @@ +{ + "action": "modify_task", + "args": { + "id": "0", + "state": "completed" + } +} diff --git a/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_001.log b/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_001.log index dd4462031e..3e700d6259 100644 --- a/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_001.log +++ b/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_001.log @@ -40,7 +40,7 @@ write [] [] - Modifies a by replac browse - Returns the text version of any url, this can be useful to look up documentation or finding issues on github scroll_up - Takes no arguments. This will scroll up and show you the 100 lines above your current lines scroll_down - Takes no arguments. This will scroll down and show you the 100 lines below your current lines -edit - This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes +edit - This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes. Set end_line to -1 to denote the end of the file goto - This will take you directly to a line and show you the 100 lines below it. - You can use any bash command you need (cd, ls, rm, grep, dir, mv, wget, git, zip, etc.) with their arguments included pip install - You can use this to import python packages. Make sure you include the correct package name when using this command. @@ -56,6 +56,25 @@ when using write and edit do not surround the code with any "" just write the co ---------- +These are your past 4 actions: +===== Previous Actions ===== + +Memory 0: +Previous Action: +action: message +args: + wait_for_response: False + +Output from Action: +observation: null +content: +extras: +======= End Actions ======= +Use these memories to provide additional context to the problem you are solving. +Remember that you have already completed these steps so you do not need to perform them again. + +---------- + RESPONSE FORMAT: This is the format of the response you will make in order to solve the current issue. You will be given multiple iterations to complete this task so break it into steps and solve them one by one. diff --git a/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_002.log index 4ab9c83550..b489b21764 100644 --- a/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_002.log +++ b/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_002.log @@ -40,7 +40,7 @@ write [] [] - Modifies a by replac browse - Returns the text version of any url, this can be useful to look up documentation or finding issues on github scroll_up - Takes no arguments. This will scroll up and show you the 100 lines above your current lines scroll_down - Takes no arguments. This will scroll down and show you the 100 lines below your current lines -edit - This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes +edit - This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes. Set end_line to -1 to denote the end of the file goto - This will take you directly to a line and show you the 100 lines below it. - You can use any bash command you need (cd, ls, rm, grep, dir, mv, wget, git, zip, etc.) with their arguments included pip install - You can use this to import python packages. Make sure you include the correct package name when using this command. @@ -61,13 +61,24 @@ These are your past 4 actions: Memory 0: Previous Action: +action: message +args: + wait_for_response: False + +Output from Action: +observation: null +content: +extras: + +Memory 1: +Previous Action: action: write args: path: hello.sh start: 0 end: -1 thought: Thoughts: - I need to create a shell script named 'hello.sh' that prints 'hello' without asking for confirmation. I will use the echo command to print 'hello' to the console. + I need to create a shell script named 'hello.sh' that will print 'hello' when executed. I will need to ensure that the script does not prompt for any confirmation during execution. Action: diff --git a/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_003.log b/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_003.log index e1ec916182..e1017a631e 100644 --- a/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_003.log +++ b/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_003.log @@ -40,7 +40,7 @@ write [] [] - Modifies a by replac browse - Returns the text version of any url, this can be useful to look up documentation or finding issues on github scroll_up - Takes no arguments. This will scroll up and show you the 100 lines above your current lines scroll_down - Takes no arguments. This will scroll down and show you the 100 lines below your current lines -edit - This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes +edit - This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes. Set end_line to -1 to denote the end of the file goto - This will take you directly to a line and show you the 100 lines below it. - You can use any bash command you need (cd, ls, rm, grep, dir, mv, wget, git, zip, etc.) with their arguments included pip install - You can use this to import python packages. Make sure you include the correct package name when using this command. @@ -61,21 +61,14 @@ These are your past 4 actions: Memory 0: Previous Action: -action: write +action: message args: - path: hello.sh - start: 0 - end: -1 - thought: Thoughts: - I need to create a shell script named 'hello.sh' that prints 'hello' without asking for confirmation. I will use the echo command to print 'hello' to the console. - - Action: + wait_for_response: False Output from Action: -observation: write +observation: null content: extras: - path: hello.sh Memory 1: Previous Action: @@ -85,7 +78,25 @@ args: start: 0 end: -1 thought: Thoughts: - I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. + I need to create a shell script named 'hello.sh' that will print 'hello' when executed. I will need to ensure that the script does not prompt for any confirmation during execution. + + Action: + +Output from Action: +observation: write +content: +extras: + path: hello.sh + +Memory 2: +Previous Action: +action: write +args: + path: hello.sh + start: 0 + end: -1 + thought: Thoughts: + I need to add the code to the 'hello.sh' script that will print 'hello' when executed without any prompts for confirmation. Action: diff --git a/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_004.log b/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_004.log index 6eeb3b1b86..035da21266 100644 --- a/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_004.log +++ b/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_004.log @@ -40,7 +40,7 @@ write [] [] - Modifies a by replac browse - Returns the text version of any url, this can be useful to look up documentation or finding issues on github scroll_up - Takes no arguments. This will scroll up and show you the 100 lines above your current lines scroll_down - Takes no arguments. This will scroll down and show you the 100 lines below your current lines -edit - This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes +edit - This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes. Set end_line to -1 to denote the end of the file goto - This will take you directly to a line and show you the 100 lines below it. - You can use any bash command you need (cd, ls, rm, grep, dir, mv, wget, git, zip, etc.) with their arguments included pip install - You can use this to import python packages. Make sure you include the correct package name when using this command. @@ -61,21 +61,14 @@ These are your past 4 actions: Memory 0: Previous Action: -action: write +action: message args: - path: hello.sh - start: 0 - end: -1 - thought: Thoughts: - I need to create a shell script named 'hello.sh' that prints 'hello' without asking for confirmation. I will use the echo command to print 'hello' to the console. - - Action: + wait_for_response: False Output from Action: -observation: write +observation: null content: extras: - path: hello.sh Memory 1: Previous Action: @@ -85,7 +78,7 @@ args: start: 0 end: -1 thought: Thoughts: - I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. + I need to create a shell script named 'hello.sh' that will print 'hello' when executed. I will need to ensure that the script does not prompt for any confirmation during execution. Action: @@ -103,7 +96,25 @@ args: start: 0 end: -1 thought: Thoughts: - I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. + I need to add the code to the 'hello.sh' script that will print 'hello' when executed without any prompts for confirmation. + + Action: + +Output from Action: +observation: write +content: +extras: + path: hello.sh + +Memory 3: +Previous Action: +action: write +args: + path: hello.sh + start: 0 + end: -1 + thought: Thoughts: + I need to add the code to the 'hello.sh' script that will print 'hello' when executed without any prompts for confirmation. I will simply add the command to print 'hello' to the script. Action: diff --git a/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_005.log b/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_005.log index 6a160dd8f8..d8fab91b5a 100644 --- a/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_005.log +++ b/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_005.log @@ -40,7 +40,7 @@ write [] [] - Modifies a by replac browse - Returns the text version of any url, this can be useful to look up documentation or finding issues on github scroll_up - Takes no arguments. This will scroll up and show you the 100 lines above your current lines scroll_down - Takes no arguments. This will scroll down and show you the 100 lines below your current lines -edit - This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes +edit - This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes. Set end_line to -1 to denote the end of the file goto - This will take you directly to a line and show you the 100 lines below it. - You can use any bash command you need (cd, ls, rm, grep, dir, mv, wget, git, zip, etc.) with their arguments included pip install - You can use this to import python packages. Make sure you include the correct package name when using this command. @@ -67,7 +67,7 @@ args: start: 0 end: -1 thought: Thoughts: - I need to create a shell script named 'hello.sh' that prints 'hello' without asking for confirmation. I will use the echo command to print 'hello' to the console. + I need to create a shell script named 'hello.sh' that will print 'hello' when executed. I will need to ensure that the script does not prompt for any confirmation during execution. Action: @@ -85,7 +85,7 @@ args: start: 0 end: -1 thought: Thoughts: - I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. + I need to add the code to the 'hello.sh' script that will print 'hello' when executed without any prompts for confirmation. Action: @@ -103,7 +103,7 @@ args: start: 0 end: -1 thought: Thoughts: - I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. + I need to add the code to the 'hello.sh' script that will print 'hello' when executed without any prompts for confirmation. I will simply add the command to print 'hello' to the script. Action: @@ -121,7 +121,7 @@ args: start: 0 end: -1 thought: Thoughts: - I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. + I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already created, I will simply edit the existing script to include the command to print 'hello'. Action: diff --git a/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_006.log b/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_006.log index c24a1c5356..fd84712294 100644 --- a/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_006.log +++ b/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_006.log @@ -40,7 +40,7 @@ write [] [] - Modifies a by replac browse - Returns the text version of any url, this can be useful to look up documentation or finding issues on github scroll_up - Takes no arguments. This will scroll up and show you the 100 lines above your current lines scroll_down - Takes no arguments. This will scroll down and show you the 100 lines below your current lines -edit - This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes +edit - This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes. Set end_line to -1 to denote the end of the file goto - This will take you directly to a line and show you the 100 lines below it. - You can use any bash command you need (cd, ls, rm, grep, dir, mv, wget, git, zip, etc.) with their arguments included pip install - You can use this to import python packages. Make sure you include the correct package name when using this command. @@ -67,7 +67,7 @@ args: start: 0 end: -1 thought: Thoughts: - I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. + I need to add the code to the 'hello.sh' script that will print 'hello' when executed without any prompts for confirmation. Action: @@ -85,7 +85,7 @@ args: start: 0 end: -1 thought: Thoughts: - I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. + I need to add the code to the 'hello.sh' script that will print 'hello' when executed without any prompts for confirmation. I will simply add the command to print 'hello' to the script. Action: @@ -103,7 +103,7 @@ args: start: 0 end: -1 thought: Thoughts: - I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. + I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already created, I will simply edit the existing script to include the command to print 'hello'. Action: @@ -121,7 +121,7 @@ args: start: 0 end: -1 thought: Thoughts: - I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. + I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already created, I will simply edit the existing script to include the command to print 'hello'. Action: diff --git a/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_007.log b/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_007.log index c24a1c5356..466fe2d70d 100644 --- a/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_007.log +++ b/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_007.log @@ -40,7 +40,7 @@ write [] [] - Modifies a by replac browse - Returns the text version of any url, this can be useful to look up documentation or finding issues on github scroll_up - Takes no arguments. This will scroll up and show you the 100 lines above your current lines scroll_down - Takes no arguments. This will scroll down and show you the 100 lines below your current lines -edit - This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes +edit - This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes. Set end_line to -1 to denote the end of the file goto - This will take you directly to a line and show you the 100 lines below it. - You can use any bash command you need (cd, ls, rm, grep, dir, mv, wget, git, zip, etc.) with their arguments included pip install - You can use this to import python packages. Make sure you include the correct package name when using this command. @@ -67,7 +67,7 @@ args: start: 0 end: -1 thought: Thoughts: - I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. + I need to add the code to the 'hello.sh' script that will print 'hello' when executed without any prompts for confirmation. I will simply add the command to print 'hello' to the script. Action: @@ -85,7 +85,7 @@ args: start: 0 end: -1 thought: Thoughts: - I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. + I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already created, I will simply edit the existing script to include the command to print 'hello'. Action: @@ -103,7 +103,7 @@ args: start: 0 end: -1 thought: Thoughts: - I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. + I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already created, I will simply edit the existing script to include the command to print 'hello'. Action: @@ -121,7 +121,7 @@ args: start: 0 end: -1 thought: Thoughts: - I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. + I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already open, I will simply write the command to print 'hello' in the script. Action: diff --git a/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_008.log b/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_008.log index 58458f332d..347c5cfcbd 100644 --- a/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_008.log +++ b/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_008.log @@ -40,7 +40,7 @@ write [] [] - Modifies a by replac browse - Returns the text version of any url, this can be useful to look up documentation or finding issues on github scroll_up - Takes no arguments. This will scroll up and show you the 100 lines above your current lines scroll_down - Takes no arguments. This will scroll down and show you the 100 lines below your current lines -edit - This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes +edit - This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes. Set end_line to -1 to denote the end of the file goto - This will take you directly to a line and show you the 100 lines below it. - You can use any bash command you need (cd, ls, rm, grep, dir, mv, wget, git, zip, etc.) with their arguments included pip install - You can use this to import python packages. Make sure you include the correct package name when using this command. @@ -67,7 +67,7 @@ args: start: 0 end: -1 thought: Thoughts: - I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. + I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already created, I will simply edit the existing script to include the command to print 'hello'. Action: @@ -85,7 +85,7 @@ args: start: 0 end: -1 thought: Thoughts: - I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. + I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already created, I will simply edit the existing script to include the command to print 'hello'. Action: @@ -103,7 +103,7 @@ args: start: 0 end: -1 thought: Thoughts: - I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. + I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already open, I will simply write the command to print 'hello' in the script. Action: @@ -115,20 +115,19 @@ extras: Memory 3: Previous Action: -action: read +action: write args: path: hello.sh start: 0 end: -1 thought: Thoughts: - I have already written the shell script 'hello.sh' to print 'hello' without asking for confirmation. I need to verify the content of the file to ensure that the script is correctly implemented. + I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already open, I will write the command to print 'hello' directly into the script. Action: Output from Action: -observation: read -content: echo 'hello' - +observation: write +content: extras: path: hello.sh ======= End Actions ======= diff --git a/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_009.log b/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_009.log index bd2529d885..a83aeaa4b8 100644 --- a/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_009.log +++ b/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_009.log @@ -40,7 +40,7 @@ write [] [] - Modifies a by replac browse - Returns the text version of any url, this can be useful to look up documentation or finding issues on github scroll_up - Takes no arguments. This will scroll up and show you the 100 lines above your current lines scroll_down - Takes no arguments. This will scroll down and show you the 100 lines below your current lines -edit - This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes +edit - This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes. Set end_line to -1 to denote the end of the file goto - This will take you directly to a line and show you the 100 lines below it. - You can use any bash command you need (cd, ls, rm, grep, dir, mv, wget, git, zip, etc.) with their arguments included pip install - You can use this to import python packages. Make sure you include the correct package name when using this command. @@ -67,7 +67,7 @@ args: start: 0 end: -1 thought: Thoughts: - I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. + I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already created, I will simply edit the existing script to include the command to print 'hello'. Action: @@ -85,7 +85,7 @@ args: start: 0 end: -1 thought: Thoughts: - I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. + I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already open, I will simply write the command to print 'hello' in the script. Action: @@ -97,38 +97,39 @@ extras: Memory 2: Previous Action: -action: read +action: write args: path: hello.sh start: 0 end: -1 thought: Thoughts: - I have already written the shell script 'hello.sh' to print 'hello' without asking for confirmation. I need to verify the content of the file to ensure that the script is correctly implemented. + I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already open, I will write the command to print 'hello' directly into the script. Action: Output from Action: -observation: read -content: echo 'hello' - +observation: write +content: extras: path: hello.sh Memory 3: Previous Action: -action: run +action: write args: - command: bash chmod +x hello.sh - background: False - thought: + path: hello.sh + start: 0 + end: -1 + thought: Thoughts: + I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already open, I will directly write the command to print 'hello' in the script. + + Action: Output from Action: -observation: run -content: /usr/bin/chmod: /usr/bin/chmod: cannot execute binary file +observation: write +content: extras: - command_id: -1 - command: bash chmod +x hello.sh - exit_code: 126 + path: hello.sh ======= End Actions ======= Use these memories to provide additional context to the problem you are solving. Remember that you have already completed these steps so you do not need to perform them again. diff --git a/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_010.log b/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_010.log index 580bcc5eb1..a52567c69a 100644 --- a/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_010.log +++ b/tests/integration/mock/SWEAgent/test_write_simple_script/prompt_010.log @@ -40,7 +40,7 @@ write [] [] - Modifies a by replac browse - Returns the text version of any url, this can be useful to look up documentation or finding issues on github scroll_up - Takes no arguments. This will scroll up and show you the 100 lines above your current lines scroll_down - Takes no arguments. This will scroll down and show you the 100 lines below your current lines -edit - This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes +edit - This will modify lines in the currently open file. use start_line and end_line to designate which lines to change and then write the multiline changes. Set end_line to -1 to denote the end of the file goto - This will take you directly to a line and show you the 100 lines below it. - You can use any bash command you need (cd, ls, rm, grep, dir, mv, wget, git, zip, etc.) with their arguments included pip install - You can use this to import python packages. Make sure you include the correct package name when using this command. @@ -67,7 +67,7 @@ args: start: 0 end: -1 thought: Thoughts: - I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. + I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already open, I will simply write the command to print 'hello' in the script. Action: @@ -79,54 +79,57 @@ extras: Memory 1: Previous Action: -action: read +action: write args: path: hello.sh start: 0 end: -1 thought: Thoughts: - I have already written the shell script 'hello.sh' to print 'hello' without asking for confirmation. I need to verify the content of the file to ensure that the script is correctly implemented. + I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already open, I will write the command to print 'hello' directly into the script. Action: Output from Action: -observation: read -content: echo 'hello' - +observation: write +content: extras: path: hello.sh Memory 2: Previous Action: -action: run +action: write args: - command: bash chmod +x hello.sh - background: False - thought: + path: hello.sh + start: 0 + end: -1 + thought: Thoughts: + I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already open, I will directly write the command to print 'hello' in the script. + + Action: Output from Action: -observation: run -content: /usr/bin/chmod: /usr/bin/chmod: cannot execute binary file +observation: write +content: extras: - command_id: -1 - command: bash chmod +x hello.sh - exit_code: 126 + path: hello.sh Memory 3: Previous Action: -action: run +action: write args: - command: chmod +x hello.sh - background: False - thought: + path: hello.sh + start: 0 + end: -1 + thought: Thoughts: + I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already open, I will directly write the command to print 'hello' in the script. + + Action: Output from Action: -observation: run +observation: write content: extras: - command_id: -1 - command: chmod +x hello.sh - exit_code: 0 + path: hello.sh ======= End Actions ======= Use these memories to provide additional context to the problem you are solving. Remember that you have already completed these steps so you do not need to perform them again. diff --git a/tests/integration/mock/SWEAgent/test_write_simple_script/response_001.log b/tests/integration/mock/SWEAgent/test_write_simple_script/response_001.log index 33c064e3fc..03220e8b99 100644 --- a/tests/integration/mock/SWEAgent/test_write_simple_script/response_001.log +++ b/tests/integration/mock/SWEAgent/test_write_simple_script/response_001.log @@ -1,5 +1,5 @@ Thoughts: -I need to create a shell script named 'hello.sh' that prints 'hello' without asking for confirmation. I will use the echo command to print 'hello' to the console. +I need to create a shell script named 'hello.sh' that will print 'hello' when executed. I will need to ensure that the script does not prompt for any confirmation during execution. Action: ``` diff --git a/tests/integration/mock/SWEAgent/test_write_simple_script/response_002.log b/tests/integration/mock/SWEAgent/test_write_simple_script/response_002.log index ddd480a15e..13f18be918 100644 --- a/tests/integration/mock/SWEAgent/test_write_simple_script/response_002.log +++ b/tests/integration/mock/SWEAgent/test_write_simple_script/response_002.log @@ -1,5 +1,5 @@ Thoughts: -I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. +I need to add the code to the 'hello.sh' script that will print 'hello' when executed without any prompts for confirmation. Action: ``` diff --git a/tests/integration/mock/SWEAgent/test_write_simple_script/response_003.log b/tests/integration/mock/SWEAgent/test_write_simple_script/response_003.log index ddd480a15e..9f0044d6de 100644 --- a/tests/integration/mock/SWEAgent/test_write_simple_script/response_003.log +++ b/tests/integration/mock/SWEAgent/test_write_simple_script/response_003.log @@ -1,5 +1,5 @@ Thoughts: -I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. +I need to add the code to the 'hello.sh' script that will print 'hello' when executed without any prompts for confirmation. I will simply add the command to print 'hello' to the script. Action: ``` diff --git a/tests/integration/mock/SWEAgent/test_write_simple_script/response_004.log b/tests/integration/mock/SWEAgent/test_write_simple_script/response_004.log index ddd480a15e..6bb1d6faa4 100644 --- a/tests/integration/mock/SWEAgent/test_write_simple_script/response_004.log +++ b/tests/integration/mock/SWEAgent/test_write_simple_script/response_004.log @@ -1,5 +1,5 @@ Thoughts: -I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. +I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already created, I will simply edit the existing script to include the command to print 'hello'. Action: ``` diff --git a/tests/integration/mock/SWEAgent/test_write_simple_script/response_005.log b/tests/integration/mock/SWEAgent/test_write_simple_script/response_005.log index ddd480a15e..6bb1d6faa4 100644 --- a/tests/integration/mock/SWEAgent/test_write_simple_script/response_005.log +++ b/tests/integration/mock/SWEAgent/test_write_simple_script/response_005.log @@ -1,5 +1,5 @@ Thoughts: -I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. +I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already created, I will simply edit the existing script to include the command to print 'hello'. Action: ``` diff --git a/tests/integration/mock/SWEAgent/test_write_simple_script/response_006.log b/tests/integration/mock/SWEAgent/test_write_simple_script/response_006.log index ddd480a15e..e172f51c28 100644 --- a/tests/integration/mock/SWEAgent/test_write_simple_script/response_006.log +++ b/tests/integration/mock/SWEAgent/test_write_simple_script/response_006.log @@ -1,5 +1,5 @@ Thoughts: -I need to add the code to the 'hello.sh' script to print 'hello' without asking for confirmation. I will use the echo command to achieve this. +I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already open, I will simply write the command to print 'hello' in the script. Action: ``` diff --git a/tests/integration/mock/SWEAgent/test_write_simple_script/response_007.log b/tests/integration/mock/SWEAgent/test_write_simple_script/response_007.log index 9b051dea70..7a57a456b6 100644 --- a/tests/integration/mock/SWEAgent/test_write_simple_script/response_007.log +++ b/tests/integration/mock/SWEAgent/test_write_simple_script/response_007.log @@ -1,7 +1,7 @@ Thoughts: -I have already written the shell script 'hello.sh' to print 'hello' without asking for confirmation. I need to verify the content of the file to ensure that the script is correctly implemented. +I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already open, I will write the command to print 'hello' directly into the script. Action: ``` -read hello.sh +edit 0 -1 "echo 'hello'" ``` diff --git a/tests/integration/mock/SWEAgent/test_write_simple_script/response_008.log b/tests/integration/mock/SWEAgent/test_write_simple_script/response_008.log index 90ceb6e92c..5d1b1485f7 100644 --- a/tests/integration/mock/SWEAgent/test_write_simple_script/response_008.log +++ b/tests/integration/mock/SWEAgent/test_write_simple_script/response_008.log @@ -1,7 +1,7 @@ Thoughts: -I need to ensure that the 'hello.sh' script is executable. This can be done by using the chmod command to add the execute permission to the file. +I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already open, I will directly write the command to print 'hello' in the script. Action: ``` -bash chmod +x hello.sh +edit 0 -1 "echo 'hello'" ``` diff --git a/tests/integration/mock/SWEAgent/test_write_simple_script/response_009.log b/tests/integration/mock/SWEAgent/test_write_simple_script/response_009.log index d7dd519ffe..5d1b1485f7 100644 --- a/tests/integration/mock/SWEAgent/test_write_simple_script/response_009.log +++ b/tests/integration/mock/SWEAgent/test_write_simple_script/response_009.log @@ -1,7 +1,7 @@ Thoughts: -I need to make the 'hello.sh' script executable so that it can be run. The previous attempt to make it executable using the 'bash chmod +x hello.sh' command resulted in an error. I will use the 'chmod' command directly to make the script executable. +I need to add the command to print 'hello' in the 'hello.sh' script without any prompts for confirmation. Since the script is already open, I will directly write the command to print 'hello' in the script. Action: ``` -chmod +x hello.sh +edit 0 -1 "echo 'hello'" ``` diff --git a/tests/integration/mock/SWEAgent/test_write_simple_script/response_010.log b/tests/integration/mock/SWEAgent/test_write_simple_script/response_010.log index 79d9ecb941..d5eaf7733d 100644 --- a/tests/integration/mock/SWEAgent/test_write_simple_script/response_010.log +++ b/tests/integration/mock/SWEAgent/test_write_simple_script/response_010.log @@ -1,7 +1,7 @@ Thoughts: -The shell script 'hello.sh' has been created to print 'hello' without asking for confirmation. The script content is currently set to "echo 'hello'". The next step is to make sure that the script is executable and then test it to ensure that it prints 'hello' without asking for confirmation. +I have already opened the 'hello.sh' script and need to add the command to print 'hello' without any prompts for confirmation. I will use the `edit` command to insert the necessary code to achieve this. Action: ``` -chmod +x hello.sh +edit 0 -1 "echo 'hello'" ``` diff --git a/tests/unit/test_micro_agents.py b/tests/unit/test_micro_agents.py index 8533bc036c..6010fc4e52 100644 --- a/tests/unit/test_micro_agents.py +++ b/tests/unit/test_micro_agents.py @@ -6,8 +6,9 @@ import yaml from agenthub.micro.registry import all_microagents from opendevin.controller.agent import Agent -from opendevin.controller.state.plan import Plan from opendevin.controller.state.state import State +from opendevin.events.action import MessageAction +from opendevin.events.observation import NullObservation def test_all_agents_are_loaded(): @@ -33,10 +34,12 @@ def test_coder_agent_with_summary(): coder_agent = Agent.get_cls('CoderAgent')(llm=mock_llm) assert coder_agent is not None + task = 'This is a dummy task' - plan = Plan(task) + history = [(MessageAction(content=task), NullObservation(''))] + history[0][0]._source = 'user' summary = 'This is a dummy summary about this repo' - state = State(plan, inputs={'summary': summary}) + state = State(history=history, inputs={'summary': summary}) coder_agent.step(state) mock_llm.completion.assert_called_once() @@ -58,9 +61,11 @@ def test_coder_agent_without_summary(): coder_agent = Agent.get_cls('CoderAgent')(llm=mock_llm) assert coder_agent is not None + task = 'This is a dummy task' - plan = Plan(task) - state = State(plan) + history = [(MessageAction(content=task), NullObservation(''))] + history[0][0]._source = 'user' + state = State(history=history) coder_agent.step(state) mock_llm.completion.assert_called_once()