diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/run-unit-tests.yml index 6f23998bd3..83bfaa7286 100644 --- a/.github/workflows/run-unit-tests.yml +++ b/.github/workflows/run-unit-tests.yml @@ -59,7 +59,7 @@ jobs: run: make build - name: Run Tests - run: poetry run pytest --cov=agenthub --cov=opendevin --cov-report=xml ./tests/unit -k "not test_sandbox" + run: poetry run pytest --forked --cov=agenthub --cov=opendevin --cov-report=xml ./tests/unit -k "not test_sandbox" - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 @@ -93,7 +93,7 @@ jobs: run: make build - name: Run Tests - run: poetry run pytest --cov=agenthub --cov=opendevin --cov-report=xml ./tests/unit -k "not test_sandbox" + run: poetry run pytest --forked --cov=agenthub --cov=opendevin --cov-report=xml ./tests/unit -k "not test_sandbox" - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 diff --git a/agenthub/codeact_agent/codeact_agent.py b/agenthub/codeact_agent/codeact_agent.py index e7f2c12690..8bbc9fb72e 100644 --- a/agenthub/codeact_agent/codeact_agent.py +++ b/agenthub/codeact_agent/codeact_agent.py @@ -24,9 +24,9 @@ from opendevin.events.observation import ( ) from opendevin.llm.llm import LLM from opendevin.runtime.plugins import ( + AgentSkillsRequirement, JupyterRequirement, PluginRequirement, - SWEAgentCommandsRequirement, ) ENABLE_GITHUB = True @@ -106,7 +106,7 @@ def truncate_observation(observation: str, max_chars: int = 10_000) -> str: class CodeActAgent(Agent): - VERSION = '1.4' + VERSION = '1.5' """ The Code Act Agent is a minimalist agent. The agent works by passing the model a list of action-observation pairs and prompting the model to take the next step. @@ -144,9 +144,13 @@ class CodeActAgent(Agent): """ sandbox_plugins: list[PluginRequirement] = [ + # NOTE: AgentSkillsRequirement need to go before JupyterRequirement, since + # AgentSkillsRequirement provides a lot of Python functions + # and it need to be initialized before Jupyter for Jupyter to use those functions. + AgentSkillsRequirement(), JupyterRequirement(), - SWEAgentCommandsRequirement(), ] + jupyter_kernel_init_code: str = 'from agentskills import *' system_message: str = ( f'{SYSTEM_PREFIX}\n{GITHUB_MESSAGE}\n\n{COMMAND_DOCS}\n\n{SYSTEM_SUFFIX}' @@ -248,7 +252,11 @@ class CodeActAgent(Agent): # a code block was found code_group = python_code.group(1).strip() thought = action_str.replace(python_code.group(0), '').strip() - return IPythonRunCellAction(code=code_group, thought=thought) + return IPythonRunCellAction( + code=code_group, + thought=thought, + kernel_init_code=self.jupyter_kernel_init_code, + ) elif browse_command := re.search( r'(.*)', action_str, re.DOTALL ): diff --git a/agenthub/codeact_agent/prompt.py b/agenthub/codeact_agent/prompt.py index d9c3556d4c..e55028195f 100644 --- a/agenthub/codeact_agent/prompt.py +++ b/agenthub/codeact_agent/prompt.py @@ -1,30 +1,11 @@ -from opendevin.runtime.plugins import SWEAgentCommandsRequirement +from opendevin.runtime.plugins import AgentSkillsRequirement -_SWEAGENT_BASH_DOCS = '\n'.join( - filter( - lambda x: not x.startswith('submit'), - SWEAgentCommandsRequirement.documentation.split('\n'), - ) -) -# _SWEAGENT_BASH_DOCS content below: -""" -open [] - opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line -goto - moves the window to show -scroll_down - moves the window down {WINDOW} lines -scroll_up - moves the window down {WINDOW} lines -create - creates and opens a new file with the given name -search_dir [] - searches for search_term in all files in dir. If dir is not provided, searches in the current directory -search_file [] - searches for search_term in file. If file is not provided, searches in the current open file -find_file [] - finds all files with the given name in dir. If dir is not provided, searches in the current directory -edit : < -EOF - replaces lines through (inclusive) with the given text in the open file. The replacement text is delineated using heredoc syntax. All of the will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again. Remember, the file must be open before editing. -""" +_AGENT_SKILLS_DOCS = AgentSkillsRequirement.documentation COMMAND_DOCS = ( - '\nApart from the standard bash commands, you can also use the following special commands in environment:\n' - f'{_SWEAGENT_BASH_DOCS}' - "Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run." + '\nApart from the standard Python library, the assistant can also use the following functions (already imported) in environment:\n' + f'{_AGENT_SKILLS_DOCS}' + "Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run." ) SYSTEM_PREFIX = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. @@ -39,13 +20,13 @@ For example, you can browse a given URL by goto("") %pip install [package needed] and should always import packages and define variables before starting to use them.""" -GITHUB_MESSAGE = """To do any activities on GitHub, you should use the token in the $GITHUB_TOKEN environment variable. -For instance, to push a local branch `my_branch` to the github repo `owner/repo`, you can use the following four commands: +GITHUB_MESSAGE = """To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable. +For instance, to push a local branch `my_branch` to the github repo `owner/repo`, the assistant can use the following four commands: git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch -If you require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it for you.""" +If the assistant require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it.""" SYSTEM_SUFFIX = """The assistant's response should be concise. -You should include ONLY ONE or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. +The assistant should include ONLY ONE or or in every one of the responses, unless the assistant is finished with the task or need more input or action from the user in order to proceed. IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. """ @@ -55,10 +36,21 @@ EXAMPLES = """ USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000? ASSISTANT: -Sure! Let me write the Python code for starting a web server and save it to a file `app.py`: +Sure! Let me create a file first: -CODE=''' -from flask import Flask +create_file('app.py') + + +USER: +OBSERVATION: +[File: /workspace/app.py (1 lines total)] +1| +[File app.py created.] + +ASSISTANT: +Now I will write the Python code for starting a web server and save it to the file `app.py`: + +EDITED_CODE=\"\"\"from flask import Flask app = Flask(__name__) @app.route('/') @@ -67,15 +59,23 @@ def index(): return str(numbers) if __name__ == '__main__': - app.run(port=5000) -''' -with open('app.py', 'w') as f: - f.write(CODE) + app.run(port=5000)\"\"\" +edit_file(start=1, end=1, content=EDITED_CODE) USER: -Observation: -[Code executed successfully with no output] +OBSERVATION: +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: @@ -84,7 +84,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1 ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log: @@ -93,7 +93,7 @@ cat server.log USER: -Observation: +OBSERVATION: Traceback (most recent call last): File "/workspace/app.py", line 2, in from flask import Flask @@ -106,7 +106,7 @@ pip install flask USER: -Observation: +OBSERVATION: Defaulting to user installation because normal site-packages is not writeable Collecting flask Using cached flask-3.0.3-py3-none-any.whl (101 kB) @@ -130,7 +130,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 124 ASSISTANT: @@ -140,7 +140,7 @@ cat server.log USER: -Observation: +OBSERVATION: * Serving Flask app 'app' * Debug mode: off WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. @@ -159,7 +159,7 @@ goto("http://127.0.0.1:5000") USER: -Observation: +OBSERVATION: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] ASSISTANT: @@ -174,52 +174,48 @@ kill 124 USER: -Observation: +OBSERVATION: [1]+ Terminated python3 app.py > server.log 2>&1 ASSISTANT: The server has been stopped. Let me open the Python file and modify it. - -open app.py - + +open_file('app.py') + USER: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return str(numbers) -9: -10:if __name__ == '__main__': -11: app.run(port=5000) +[File: /workspace/app.py (10 lines total)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) ASSISTANT: I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file: - -edit 8:8 <' + ''.join([f'{i}' for i in numbers]) + '' -EOF - + +edit_file(start=7, end=7, content=" return '' + ''.join([f'' for i in numbers]) + '
{i}
'") +
USER: Observation: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return '' + ''.join([f'' for i in numbers]) + '
{i}
' -9: -10:if __name__ == '__main__': -11: app.run(port=5000) -File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary. +[File: /workspace/app.py (10 lines total after edit)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return '' + ''.join([f'' for i in numbers]) + '
{i}
' +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: The file has been updated. Let me run the Python file again with the new changes: diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py index eade089ffc..2eb68f8d02 100644 --- a/evaluation/swe_bench/run_infer.py +++ b/evaluation/swe_bench/run_infer.py @@ -14,6 +14,7 @@ import whatthepatch from datasets import load_dataset from tqdm import tqdm +import agenthub from evaluation.swe_bench.swe_env_box import SWEBenchSSHBox from opendevin.controller.state.state import State from opendevin.core.config import args, config, get_llm_config_arg @@ -185,11 +186,11 @@ def get_test_result(instance, sandbox, workspace_dir_name): def process_instance( - instance, - agent_class, - metadata, - skip_workspace_mount, - eval_output_dir, + instance: dict, + agent_class: str, + metadata: dict, + skip_workspace_mount: bool, + eval_output_dir: str, reset_logger: bool = True, ): workspace_mount_path = os.path.join(config.workspace_mount_path, '_eval_workspace') @@ -236,6 +237,7 @@ def process_instance( workspace_dir_name, skip_workspace_mount=skip_workspace_mount, workspace_mount_path=workspace_mount_path, + sandbox_plugins=agenthub.Agent.get_cls(agent_class).sandbox_plugins, ) # Prepare instruction @@ -280,6 +282,8 @@ def process_instance( if state is None: raise ValueError('State should not be None.') + metrics = state.metrics.get() if state.metrics else None + # Save the output output = { 'instance_id': instance.instance_id, @@ -290,6 +294,7 @@ def process_instance( 'history': [ (event_to_dict(action), event_to_dict(obs)) for action, obs in state.history ], + 'metrics': metrics, 'error': state.error if state and state.error else None, 'test_result': test_result, } diff --git a/evaluation/swe_bench/scripts/run_infer.sh b/evaluation/swe_bench/scripts/run_infer.sh index 73ac7ea988..7f0f4af244 100755 --- a/evaluation/swe_bench/scripts/run_infer.sh +++ b/evaluation/swe_bench/scripts/run_infer.sh @@ -19,7 +19,7 @@ echo "MODEL_CONFIG: $MODEL_CONFIG" COMMAND="poetry run python evaluation/swe_bench/run_infer.py \ --agent-cls $AGENT \ --llm-config $MODEL_CONFIG \ - --max-iterations 50 \ + --max-iterations 30 \ --max-chars 10000000 \ --eval-num-workers 8 \ --eval-note $AGENT_VERSION" diff --git a/evaluation/swe_bench/swe_env_box.py b/evaluation/swe_bench/swe_env_box.py index 67b205e554..c57e2b612d 100644 --- a/evaluation/swe_bench/swe_env_box.py +++ b/evaluation/swe_bench/swe_env_box.py @@ -6,7 +6,11 @@ from datasets import load_dataset from opendevin.core.config import config from opendevin.core.logger import opendevin_logger as logger from opendevin.runtime.docker.ssh_box import DockerSSHBox -from opendevin.runtime.plugins import JupyterRequirement, SWEAgentCommandsRequirement +from opendevin.runtime.plugins import ( + AgentSkillsRequirement, + JupyterRequirement, + PluginRequirement, +) SWE_BENCH_CONTAINER_IMAGE = 'ghcr.io/opendevin/eval-swe-bench:full-v1.1' @@ -20,6 +24,7 @@ class SWEBenchSSHBox(DockerSSHBox): swe_instance_id: str | None = None, swe_instance: dict | None = None, skip_workspace_mount: bool = True, + sandbox_plugins: list[PluginRequirement] = [], # noqa: B006 ): if swe_instance_id is None: raise ValueError('swe_instance_id must be provided!') @@ -33,6 +38,7 @@ class SWEBenchSSHBox(DockerSSHBox): # Need to run as root to use SWEBench container sid = f'swe_bench_{swe_instance_id}' + str(uuid.uuid4()) super().__init__(container_image, timeout, sid) + self.init_plugins(sandbox_plugins) exit_code, output = self.execute('mv ~/.bashrc ~/.bashrc.bak') assert exit_code == 0, f'Failed to backup ~/.bashrc: {output}' @@ -65,9 +71,9 @@ class SWEBenchSSHBox(DockerSSHBox): cls, instance, workspace_dir_name=None, - n_tries=5, skip_workspace_mount: bool = True, workspace_mount_path: str | None = None, + sandbox_plugins: list[PluginRequirement] = [], # noqa: B006 ) -> 'SWEBenchSSHBox': if workspace_dir_name is None: workspace_dir_name = f"{instance['repo']}__{instance['version']}".replace( @@ -84,6 +90,7 @@ class SWEBenchSSHBox(DockerSSHBox): swe_instance_id=instance['instance_id'], swe_instance=instance, skip_workspace_mount=skip_workspace_mount, + sandbox_plugins=sandbox_plugins, ) logger.info(f"SSH box started for instance {instance['instance_id']}.") @@ -135,10 +142,10 @@ if __name__ == '__main__': swe_bench_tests = swe_bench_tests[swe_bench_tests['instance_id'] == INSTANCE_ID] EXAMPLE_INSTANCE = swe_bench_tests.iloc[0].to_dict() - sandbox = SWEBenchSSHBox.get_box_for_instance(instance=EXAMPLE_INSTANCE) - - # in actual eval, this will be initialized by the controller - sandbox.init_plugins([JupyterRequirement(), SWEAgentCommandsRequirement()]) + sandbox = SWEBenchSSHBox.get_box_for_instance( + instance=EXAMPLE_INSTANCE, + sandbox_plugins=[AgentSkillsRequirement(), JupyterRequirement()], + ) # PRE TEST exit_code, output = sandbox.execute('cd $REPO_PATH') diff --git a/opendevin/events/action/commands.py b/opendevin/events/action/commands.py index 4148e35301..4d09ee883f 100644 --- a/opendevin/events/action/commands.py +++ b/opendevin/events/action/commands.py @@ -47,6 +47,7 @@ class IPythonRunCellAction(Action): thought: str = '' action: str = ActionType.RUN_IPYTHON runnable: ClassVar[bool] = True + kernel_init_code: str = '' # code to run in the kernel (if the kernel is restarted) def __str__(self) -> str: ret = '**IPythonRunCellAction**\n' diff --git a/opendevin/runtime/docker/ssh_box.py b/opendevin/runtime/docker/ssh_box.py index 94fa04c5c3..75ac3fd860 100644 --- a/opendevin/runtime/docker/ssh_box.py +++ b/opendevin/runtime/docker/ssh_box.py @@ -18,10 +18,7 @@ from opendevin.core.exceptions import SandboxInvalidBackgroundCommandError from opendevin.core.logger import opendevin_logger as logger from opendevin.core.schema import CancellableStream from opendevin.runtime.docker.process import DockerProcess, Process -from opendevin.runtime.plugins import ( - JupyterRequirement, - SWEAgentCommandsRequirement, -) +from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement from opendevin.runtime.sandbox import Sandbox from opendevin.runtime.utils import find_available_tcp_port @@ -721,10 +718,10 @@ if __name__ == '__main__': ) # Initialize required plugins - ssh_box.init_plugins([JupyterRequirement(), SWEAgentCommandsRequirement()]) + ssh_box.init_plugins([AgentSkillsRequirement(), JupyterRequirement()]) logger.info( - '--- SWE-AGENT COMMAND DOCUMENTATION ---\n' - f'{SWEAgentCommandsRequirement().documentation}\n' + '--- AgentSkills COMMAND DOCUMENTATION ---\n' + f'{AgentSkillsRequirement().documentation}\n' '---' ) diff --git a/opendevin/runtime/plugins/__init__.py b/opendevin/runtime/plugins/__init__.py index 10b479fd2b..a32f9972aa 100644 --- a/opendevin/runtime/plugins/__init__.py +++ b/opendevin/runtime/plugins/__init__.py @@ -1,4 +1,5 @@ # Requirements +from .agent_skills import AgentSkillsRequirement from .jupyter import JupyterRequirement from .mixin import PluginMixin from .requirement import PluginRequirement @@ -7,6 +8,7 @@ from .swe_agent_commands import SWEAgentCommandsRequirement __all__ = [ 'PluginMixin', 'PluginRequirement', + 'AgentSkillsRequirement', 'JupyterRequirement', 'SWEAgentCommandsRequirement', ] diff --git a/opendevin/runtime/plugins/agent_skills/README.md b/opendevin/runtime/plugins/agent_skills/README.md new file mode 100644 index 0000000000..0ed57d6730 --- /dev/null +++ b/opendevin/runtime/plugins/agent_skills/README.md @@ -0,0 +1,57 @@ +# OpenDevin Skill Sets + +This folder implements a skill/tool set `agentskills` for OpenDevin. + +It is intended to be used by the agent **inside sandbox**. +The skill set will be exposed as a `pip` package that can be installed as a plugin inside the sandbox. + +The skill set can contains a bunch of wrapped tools for agent ([many examples here](https://github.com/OpenDevin/OpenDevin/pull/1914)), for example: +- Audio/Video to text (these are a temporary solution, and we should switch to multimodal models when they are sufficiently cheap +- PDF to text +- etc. + +# Inclusion Criteria + +We are walking a fine line here. +We DON't want to *wrap* every possible python packages and re-teach agent their usage (e.g., LLM already knows `pandas` pretty well, so we don't really need create a skill that reads `csv` - it can just use `pandas`). + +We ONLY want to add a new skill, when: +- Such skill is not easily achievable for LLM to write code directly (e.g., edit code and replace certain line) +- It involves calling an external model (e.g., you need to call a speech to text model, editor model for speculative editing) + +# Intended functionality + +- Tool/skill usage (through `IPythonRunAction`) + +```python +# In[1] +from agentskills import open_file, edit_file +open_file("/workspace/a.txt") +# Out[1] +[SWE-agent open output] + +# In[2] +edit_file( + "/workspace/a.txt", + start=1, end=3, + content=( + ("REPLACE TEXT") +)) +# Out[1] +[SWE-agent edit output] +``` + +- Tool/skill retrieval (through `IPythonRunAction`) + +```python +# In[1] +from agentskills import help_me + +help_me("I want to solve a task that involves reading a bunch of PDFs and reason about them") + +# Out[1] +"Here are the top skills that may be helpful to you: +- `pdf_to_text`: [documentation about the tools] +... +" +``` diff --git a/opendevin/runtime/plugins/agent_skills/__init__.py b/opendevin/runtime/plugins/agent_skills/__init__.py new file mode 100644 index 0000000000..4456512062 --- /dev/null +++ b/opendevin/runtime/plugins/agent_skills/__init__.py @@ -0,0 +1,16 @@ +import os +from dataclasses import dataclass + +from opendevin.runtime.plugins.agent_skills.agentskills import DOCUMENTATION +from opendevin.runtime.plugins.requirement import PluginRequirement + + +@dataclass +class AgentSkillsRequirement(PluginRequirement): + name: str = 'agent_skills' + host_src: str = os.path.dirname( + os.path.abspath(__file__) + ) # The directory of this file (opendevin/runtime/plugins/jupyter) + sandbox_dest: str = '/opendevin/plugins/agent_skills' + bash_script_path: str = 'setup.sh' + documentation: str = DOCUMENTATION diff --git a/opendevin/runtime/plugins/agent_skills/agentskills.py b/opendevin/runtime/plugins/agent_skills/agentskills.py new file mode 100644 index 0000000000..9ae7a59ef4 --- /dev/null +++ b/opendevin/runtime/plugins/agent_skills/agentskills.py @@ -0,0 +1,407 @@ +""" +agentskills.py + +This module provides various file manipulation skills for the OpenDevin agent. + +Functions: +- open_file(path, line_number=None): Opens a file and optionally moves to a specific line. +- goto_line(line_number): Moves the window to show the specified line number. +- scroll_down(): Moves the window down by the number of lines specified in WINDOW. +- scroll_up(): Moves the window up by the number of lines specified in WINDOW. +- create_file(filename): Creates and opens a new file with the given name. +- search_dir(search_term, dir_path='./'): Searches for a term in all files in the specified directory. +- search_file(search_term, file_path=None): Searches for a term in the specified file or the currently open file. +- find_file(file_name, dir_path='./'): Finds all files with the given name in the specified directory. +- edit_file(path, start, end, content): Replaces lines in a file with the given content. +""" + +import os +import subprocess +from inspect import signature +from typing import Optional + +CURRENT_FILE = None +CURRENT_LINE = 1 +WINDOW = 100 + +ENABLE_AUTO_LINT = os.getenv('ENABLE_AUTO_LINT', 'false').lower() == 'true' + + +def _lint_file(file_path: str) -> Optional[str]: + """ + Lint the file at the given path. + + Returns: + Optional[str]: A string containing the linting report if the file failed to lint, None otherwise. + """ + + # Check if the file ends with .py and if auto-linting is enabled + if file_path.endswith('.py'): + # Define the flake8 command with selected error codes + command = [ + 'flake8', + '--isolated', + '--select=F821,F822,F831,E112,E113,E999,E902', + file_path, + ] + + # Run the command using subprocess and redirect stderr to stdout + result = subprocess.run( + command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + if result.returncode == 0: + # Linting successful. No issues found. + return None + else: + ret = 'ERRORS:\n' + ret += result.stdout.decode().strip() + return ret.rstrip('\n') + # Linting skipped. Either the file is not a Python file or auto-linting is disabled. + return None + + +def _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=False): + if CURRENT_FILE is None: + raise FileNotFoundError('No file open. Use the open_file function first.') + with open(CURRENT_FILE, 'r') as file: + lines = file.readlines() + start = max(0, CURRENT_LINE - WINDOW // 2) + end = min(len(lines), CURRENT_LINE + WINDOW // 2) + output = '' + for i in range(start, end): + _new_line = f'{i + 1}|{lines[i]}' + if not _new_line.endswith('\n'): + _new_line += '\n' + output += _new_line + output = output.rstrip() + if return_str: + return output + else: + print(output) + + +def _cur_file_header(CURRENT_FILE, total_lines): + return f'[File: {os.path.abspath(CURRENT_FILE)} ({total_lines} lines total)]\n' + + +def open_file(path: str, line_number: Optional[int] = None) -> None: + """ + Opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line. + + Args: + path: str: The path to the file to open. + line_number: Optional[int]: The line number to move to. + """ + global CURRENT_FILE, CURRENT_LINE + if not os.path.isfile(path): + raise FileNotFoundError(f'File {path} not found') + + CURRENT_FILE = path + with open(CURRENT_FILE) as file: + total_lines = sum(1 for _ in file) + + if line_number is not None: + if ( + not isinstance(line_number, int) + or line_number < 1 + or line_number > total_lines + ): + raise ValueError(f'Line number must be between 1 and {total_lines}') + CURRENT_LINE = line_number + else: + CURRENT_LINE = 1 + + output = _cur_file_header(CURRENT_FILE, total_lines) + output += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True) + print(output) + + +def goto_line(line_number: int) -> None: + """ + Moves the window to show the specified line number. + + Args: + line_number: int: The line number to move to. + """ + global CURRENT_FILE, CURRENT_LINE, WINDOW + if CURRENT_FILE is None: + raise FileNotFoundError('No file open. Use the open_file function first.') + + total_lines = sum(1 for _ in open(CURRENT_FILE)) + if not isinstance(line_number, int) or line_number < 1 or line_number > total_lines: + raise ValueError(f'Line number must be between 1 and {total_lines}') + + CURRENT_LINE = line_number + + output = _cur_file_header(CURRENT_FILE, total_lines) + output += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True) + print(output) + + +def scroll_down() -> None: + """Moves the window down by 100 lines. + + Args: + None + """ + global CURRENT_FILE, CURRENT_LINE, WINDOW + if CURRENT_FILE is None: + raise FileNotFoundError('No file open. Use the open_file function first.') + + total_lines = sum(1 for _ in open(CURRENT_FILE)) + CURRENT_LINE = min(CURRENT_LINE + WINDOW, total_lines) + output = _cur_file_header(CURRENT_FILE, total_lines) + output += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True) + print(output) + + +def scroll_up() -> None: + """Moves the window up by 100 lines. + + Args: + None + """ + global CURRENT_FILE, CURRENT_LINE, WINDOW + if CURRENT_FILE is None: + raise FileNotFoundError('No file open. Use the open_file function first.') + + CURRENT_LINE = max(CURRENT_LINE - WINDOW, 1) + total_lines = sum(1 for _ in open(CURRENT_FILE)) + output = _cur_file_header(CURRENT_FILE, total_lines) + output += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True) + print(output) + + +def create_file(filename: str) -> None: + """Creates and opens a new file with the given name. + + Args: + filename: str: The name of the file to create. + """ + global CURRENT_FILE, CURRENT_LINE + if os.path.exists(filename): + raise FileExistsError(f"File '{filename}' already exists.") + + with open(filename, 'w') as file: + file.write('\n') + + open_file(filename) + print(f'[File {filename} created.]') + + +def edit_file(start: int, end: int, content: str) -> None: + """Edit a file. + + It replaces lines `start` through `end` (inclusive) with the given text `content` in the open file. Remember, the file must be open before editing. + + Args: + start: int: The start line number. Must be greater or equal to 1. + end: int: The end line number. Must be greater or equal to 1 AND greater than start AND less than or equal to the number of lines in the file. + content: str: The content to replace the lines with. + """ + global CURRENT_FILE, CURRENT_LINE, WINDOW + if not CURRENT_FILE or not os.path.isfile(CURRENT_FILE): + raise FileNotFoundError('No file open. Use the open_file function first.') + + # Load the file + with open(CURRENT_FILE, 'r') as file: + lines = file.readlines() + + # Check arguments + if not (1 <= start <= len(lines)): + raise ValueError( + f'Invalid start line number: {start}. Line numbers must be between 1 and {len(lines)} (inclusive).' + ) + + if not (1 <= end <= len(lines)): + raise ValueError( + f'Invalid end line number: {end}. Line numbers must be between 1 and {len(lines)} (inclusive).' + ) + + if start > end: + raise ValueError( + f'Invalid line range: {start}-{end}. Start must be less than or equal to end.' + ) + + edited_content = content + '\n' + n_edited_lines = len(edited_content.split('\n')) + new_lines = lines[: start - 1] + [edited_content] + lines[end:] + + # directly write editted lines to the file + with open(CURRENT_FILE, 'w') as file: + file.writelines(new_lines) + + # Handle linting + if ENABLE_AUTO_LINT: + # BACKUP the original file + original_file_backup_path = os.path.join( + os.path.dirname(CURRENT_FILE), f'.backup.{os.path.basename(CURRENT_FILE)}' + ) + with open(original_file_backup_path, 'w') as f: + f.writelines(lines) + + lint_error = _lint_file(CURRENT_FILE) + if lint_error: + print( + '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]' + ) + print(lint_error) + + print('[This is how your edit would have looked if applied]') + print('-------------------------------------------------') + cur_line = (n_edited_lines // 2) + start + _print_window(CURRENT_FILE, cur_line, WINDOW) + print('-------------------------------------------------\n') + + print('[This is the original code before your edit]') + print('-------------------------------------------------') + _print_window(original_file_backup_path, CURRENT_LINE, WINDOW) + print('-------------------------------------------------') + + # recover the original file + with open(original_file_backup_path, 'r') as fin, open( + CURRENT_FILE, 'w' + ) as fout: + fout.write(fin.read()) + os.remove(original_file_backup_path) + return + + os.remove(original_file_backup_path) + + with open(CURRENT_FILE, 'r') as file: + n_total_lines = len(file.readlines()) + # set current line to the center of the edited lines + CURRENT_LINE = (start + end) // 2 + print( + f'[File: {os.path.abspath(CURRENT_FILE)} ({n_total_lines} lines total after edit)]' + ) + _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW) + print( + '[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]' + ) + + +def search_dir(search_term: str, dir_path: str = './') -> None: + """Searches for search_term in all files in dir. If dir is not provided, searches in the current directory. + + Args: + search_term: str: The term to search for. + dir_path: Optional[str]: The path to the directory to search. + """ + if not os.path.isdir(dir_path): + raise FileNotFoundError(f'Directory {dir_path} not found') + + matches = [] + for root, _, files in os.walk(dir_path): + for file in files: + if file.startswith('.'): + continue + file_path = os.path.join(root, file) + with open(file_path, 'r', errors='ignore') as f: + for line_num, line in enumerate(f, 1): + if search_term in line: + matches.append((file_path, line_num, line.strip())) + + if not matches: + print(f'No matches found for "{search_term}" in {dir_path}') + return + + num_matches = len(matches) + num_files = len(set(match[0] for match in matches)) + + if num_files > 100: + print( + f'More than {num_files} files matched for "{search_term}" in {dir_path}. Please narrow your search.' + ) + return + + print(f'[Found {num_matches} matches for "{search_term}" in {dir_path}]') + for file_path, line_num, line in matches: + print(f'{file_path} (Line {line_num}): {line}') + print(f'[End of matches for "{search_term}" in {dir_path}]') + + +def search_file(search_term: str, file_path: Optional[str] = None) -> None: + """Searches for search_term in file. If file is not provided, searches in the current open file. + + Args: + search_term: str: The term to search for. + file_path: Optional[str]: The path to the file to search. + """ + global CURRENT_FILE + if file_path is None: + file_path = CURRENT_FILE + if file_path is None: + raise FileNotFoundError( + 'No file specified or open. Use the open_file function first.' + ) + if not os.path.isfile(file_path): + raise FileNotFoundError(f'File {file_path} not found') + + matches = [] + with open(file_path, 'r') as file: + for i, line in enumerate(file, 1): + if search_term in line: + matches.append((i, line.strip())) + + if matches: + print(f'[Found {len(matches)} matches for "{search_term}" in {file_path}]') + for match in matches: + print(f'Line {match[0]}: {match[1]}') + print(f'[End of matches for "{search_term}" in {file_path}]') + else: + print(f'[No matches found for "{search_term}" in {file_path}]') + + +def find_file(file_name: str, dir_path: str = './') -> None: + """Finds all files with the given name in the specified directory. + + Args: + file_name: str: The name of the file to find. + dir_path: Optional[str]: The path to the directory to search. + """ + if not os.path.isdir(dir_path): + raise FileNotFoundError(f'Directory {dir_path} not found') + + matches = [] + for root, _, files in os.walk(dir_path): + for file in files: + if file_name in file: + matches.append(os.path.join(root, file)) + + if matches: + print(f'[Found {len(matches)} matches for "{file_name}" in {dir_path}]') + for match in matches: + print(f'{match}') + print(f'[End of matches for "{file_name}" in {dir_path}]') + else: + print(f'[No matches found for "{file_name}" in {dir_path}]') + + +__all__ = [ + 'open_file', + 'goto_line', + 'scroll_down', + 'scroll_up', + 'create_file', + 'edit_file', + 'search_dir', + 'search_file', + 'find_file', +] + + +DOCUMENTATION = '' +for func_name in __all__: + func = globals()[func_name] + + cur_doc = func.__doc__ + # remove indentation from docstring and extra empty lines + cur_doc = '\n'.join(filter(None, map(lambda x: x.strip(), cur_doc.split('\n')))) + # now add a consistent 4 indentation + cur_doc = '\n'.join(map(lambda x: ' ' * 4 + x, cur_doc.split('\n'))) + + fn_signature = f'{func.__name__}' + str(signature(func)) + DOCUMENTATION += f'{fn_signature}:\n{cur_doc}\n\n' diff --git a/opendevin/runtime/plugins/agent_skills/setup.sh b/opendevin/runtime/plugins/agent_skills/setup.sh new file mode 100755 index 0000000000..3fc3d789ae --- /dev/null +++ b/opendevin/runtime/plugins/agent_skills/setup.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +set -e + +# add agent_skills to PATH +echo 'export PATH=/opendevin/plugins/agent_skills:$PATH' >> ~/.bashrc +export PATH=/opendevin/plugins/agent_skills:$PATH + +# add agent_skills to PYTHONPATH +echo 'export PYTHONPATH=/opendevin/plugins/agent_skills:$PYTHONPATH' >> ~/.bashrc +export PYTHONPATH=/opendevin/plugins/agent_skills:$PYTHONPATH + +pip install flake8 diff --git a/opendevin/runtime/plugins/jupyter/execute_server b/opendevin/runtime/plugins/jupyter/execute_server index 5b6f589e0c..cce59aaaa6 100755 --- a/opendevin/runtime/plugins/jupyter/execute_server +++ b/opendevin/runtime/plugins/jupyter/execute_server @@ -71,6 +71,8 @@ class JupyterKernel: self.tools_to_run = [ # TODO: You can add code for your pre-defined tools here ] + if os.path.exists('/opendevin/plugins/agent_skills/agentskills.py'): + self.tools_to_run.append('from agentskills import *') for tool in self.tools_to_run: # logging.info(f'Tool initialized:\n{tool}') await self.execute(tool) diff --git a/opendevin/runtime/plugins/jupyter/setup.sh b/opendevin/runtime/plugins/jupyter/setup.sh index b8f8c02eaa..16901496da 100755 --- a/opendevin/runtime/plugins/jupyter/setup.sh +++ b/opendevin/runtime/plugins/jupyter/setup.sh @@ -2,7 +2,7 @@ set -e - +source ~/.bashrc # ADD /opendevin/plugins to PATH to make `jupyter_cli` available echo 'export PATH=$PATH:/opendevin/plugins/jupyter' >> ~/.bashrc export PATH=/opendevin/plugins/jupyter:$PATH diff --git a/opendevin/runtime/server/runtime.py b/opendevin/runtime/server/runtime.py index 8f04984d59..ef73bc608f 100644 --- a/opendevin/runtime/server/runtime.py +++ b/opendevin/runtime/server/runtime.py @@ -77,10 +77,28 @@ class ServerRuntime(Runtime): ('cat /tmp/opendevin_jupyter_temp.py | execute_cli'), background=False, ) - output = 'Package installed successfully' + output = '[Package installed successfully]' if "{'status': 'ok', 'restart': True}" != obs.content.strip(): print(obs.content) - output += '\n But failed to restart the kernel' + output += '\n[But failed to restart the kernel to load the package]' + else: + output += '\n[Kernel restarted successfully to load the package]' + + # re-init the kernel after restart + if action.kernel_init_code: + obs = self._run_command( + ( + f'cat > /tmp/opendevin_jupyter_init.py < Observation: diff --git a/poetry.lock b/poetry.lock index 922e5b71a1..9eb578c8ba 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1431,6 +1431,22 @@ docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1 testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"] typing = ["typing-extensions (>=4.8)"] +[[package]] +name = "flake8" +version = "7.0.0" +description = "the modular source code checker: pep8 pyflakes and co" +optional = false +python-versions = ">=3.8.1" +files = [ + {file = "flake8-7.0.0-py2.py3-none-any.whl", hash = "sha256:a6dfbb75e03252917f2473ea9653f7cd799c3064e54d4c8140044c5c065f53c3"}, + {file = "flake8-7.0.0.tar.gz", hash = "sha256:33f96621059e65eec474169085dc92bf26e7b2d47366b70be2f67ab80dc25132"}, +] + +[package.dependencies] +mccabe = ">=0.7.0,<0.8.0" +pycodestyle = ">=2.11.0,<2.12.0" +pyflakes = ">=3.2.0,<3.3.0" + [[package]] name = "flask" version = "3.0.3" @@ -3066,13 +3082,9 @@ files = [ {file = "lxml-5.2.2-cp36-cp36m-win_amd64.whl", hash = "sha256:edcfa83e03370032a489430215c1e7783128808fd3e2e0a3225deee278585196"}, {file = "lxml-5.2.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:28bf95177400066596cdbcfc933312493799382879da504633d16cf60bba735b"}, {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3a745cc98d504d5bd2c19b10c79c61c7c3df9222629f1b6210c0368177589fb8"}, - {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b590b39ef90c6b22ec0be925b211298e810b4856909c8ca60d27ffbca6c12e6"}, {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b336b0416828022bfd5a2e3083e7f5ba54b96242159f83c7e3eebaec752f1716"}, - {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:c2faf60c583af0d135e853c86ac2735ce178f0e338a3c7f9ae8f622fd2eb788c"}, {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:4bc6cb140a7a0ad1f7bc37e018d0ed690b7b6520ade518285dc3171f7a117905"}, - {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7ff762670cada8e05b32bf1e4dc50b140790909caa8303cfddc4d702b71ea184"}, {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:57f0a0bbc9868e10ebe874e9f129d2917750adf008fe7b9c1598c0fbbfdde6a6"}, - {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:a6d2092797b388342c1bc932077ad232f914351932353e2e8706851c870bca1f"}, {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:60499fe961b21264e17a471ec296dcbf4365fbea611bf9e303ab69db7159ce61"}, {file = "lxml-5.2.2-cp37-cp37m-win32.whl", hash = "sha256:d9b342c76003c6b9336a80efcc766748a333573abf9350f4094ee46b006ec18f"}, {file = "lxml-5.2.2-cp37-cp37m-win_amd64.whl", hash = "sha256:b16db2770517b8799c79aa80f4053cd6f8b716f21f8aca962725a9565ce3ee40"}, @@ -3307,6 +3319,17 @@ python-dateutil = ">=2.7" [package.extras] dev = ["meson-python (>=0.13.1)", "numpy (>=1.25)", "pybind11 (>=2.6)", "setuptools (>=64)", "setuptools_scm (>=7)"] +[[package]] +name = "mccabe" +version = "0.7.0" +description = "McCabe checker, plugin for flake8" +optional = false +python-versions = ">=3.6" +files = [ + {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, + {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, +] + [[package]] name = "mdurl" version = "0.1.2" @@ -4596,6 +4619,17 @@ files = [ {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"}, ] +[[package]] +name = "py" +version = "1.11.0" +description = "library with cross-python path, ini-parsing, io, code, log facilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, + {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, +] + [[package]] name = "pyarrow" version = "16.1.0" @@ -4680,6 +4714,17 @@ files = [ [package.dependencies] pyasn1 = ">=0.4.6,<0.7.0" +[[package]] +name = "pycodestyle" +version = "2.11.1" +description = "Python style guide checker" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"}, + {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"}, +] + [[package]] name = "pycparser" version = "2.22" @@ -4878,6 +4923,17 @@ typing-extensions = "*" [package.extras] dev = ["black", "flake8", "flake8-black", "isort", "jupyter-console", "mkdocs", "mkdocs-include-markdown-plugin", "mkdocstrings[python]", "pytest", "pytest-asyncio", "pytest-trio", "toml", "tox", "trio", "trio", "trio-typing", "twine", "twisted", "validate-pyproject[all]"] +[[package]] +name = "pyflakes" +version = "3.2.0" +description = "passive checker of Python programs" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyflakes-3.2.0-py2.py3-none-any.whl", hash = "sha256:84b5be138a2dfbb40689ca07e2152deb896a65c3a3e24c251c5c62489568074a"}, + {file = "pyflakes-3.2.0.tar.gz", hash = "sha256:1c61603ff154621fb2a9172037d84dca3500def8c8b630657d1701f026f8af3f"}, +] + [[package]] name = "pygments" version = "2.18.0" @@ -5032,6 +5088,21 @@ pytest = ">=4.6" [package.extras] testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] +[[package]] +name = "pytest-forked" +version = "1.6.0" +description = "run tests in isolated forked subprocesses" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-forked-1.6.0.tar.gz", hash = "sha256:4dafd46a9a600f65d822b8f605133ecf5b3e1941ebb3588e943b4e3eb71a5a3f"}, + {file = "pytest_forked-1.6.0-py3-none-any.whl", hash = "sha256:810958f66a91afb1a1e2ae83089d8dc1cd2437ac96b12963042fbb9fb4d16af0"}, +] + +[package.dependencies] +py = "*" +pytest = ">=3.10" + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -7328,4 +7399,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "797df3be428ae7c43c8b858b222b0c32e386345df9bac9711c261ec8ad481b94" +content-hash = "2edc090faaececb3de4b4b797fa7c20367cd39afd2d793e8d943eba6040e2697" diff --git a/pyproject.toml b/pyproject.toml index c62ff040bd..19de60c0d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,10 +51,13 @@ pre-commit = "3.7.1" pytest = "*" pytest-cov = "*" pytest-asyncio = "*" +pytest-forked = "*" +flake8 = "*" [tool.coverage.run] concurrency = ["gevent"] + [tool.poetry.group.evaluation.dependencies] streamlit = "*" whatthepatch = "*" diff --git a/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log b/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log index 98e003c197..1393d3d112 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log +++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log @@ -13,28 +13,69 @@ The assistant can browse the Internet with commands on behalf of the user by wra For example, you can browse a given URL by goto("") . The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: %pip install [package needed] and should always import packages and define variables before starting to use them. -To do any activities on GitHub, you should use the token in the $GITHUB_TOKEN environment variable. -For instance, to push a local branch `my_branch` to the github repo `owner/repo`, you can use the following four commands: +To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable. +For instance, to push a local branch `my_branch` to the github repo `owner/repo`, the assistant can use the following four commands: git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch -If you require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it for you. +If the assistant require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it. -Apart from the standard bash commands, you can also use the following special commands in environment: -open [] - opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line -goto - moves the window to show -scroll_down - moves the window down {WINDOW} lines -scroll_up - moves the window down {WINDOW} lines -create - creates and opens a new file with the given name -search_dir [] - searches for search_term in all files in dir. If dir is not provided, searches in the current directory -search_file [] - searches for search_term in file. If file is not provided, searches in the current open file -find_file [] - finds all files with the given name in dir. If dir is not provided, searches in the current directory -edit : < -EOF - replaces lines through (inclusive) with the given text in the open file. The replacement text is delineated using heredoc syntax. All of the will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again. Remember, the file must be open before editing. -Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. +Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: +open_file(path: str, line_number: Optional[int] = None) -> None: + Opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line. + Args: + path: str: The path to the file to open. + line_number: Optional[int]: The line number to move to. + +goto_line(line_number: int) -> None: + Moves the window to show the specified line number. + Args: + line_number: int: The line number to move to. + +scroll_down() -> None: + Moves the window down by 100 lines. + Args: + None + +scroll_up() -> None: + Moves the window up by 100 lines. + Args: + None + +create_file(filename: str) -> None: + Creates and opens a new file with the given name. + Args: + filename: str: The name of the file to create. + +edit_file(start: int, end: int, content: str) -> None: + Edit a file. + It replaces lines `start` through `end` (inclusive) with the given text `content` in the open file. Remember, the file must be open before editing. + Args: + start: int: The start line number. Must be greater or equal to 1. + end: int: The end line number. Must be greater or equal to 1 AND greater than start AND less than or equal to the number of lines in the file. + content: str: The content to replace the lines with. + +search_dir(search_term: str, dir_path: str = './') -> None: + Searches for search_term in all files in dir. If dir is not provided, searches in the current directory. + Args: + search_term: str: The term to search for. + dir_path: Optional[str]: The path to the directory to search. + +search_file(search_term: str, file_path: Optional[str] = None) -> None: + Searches for search_term in file. If file is not provided, searches in the current open file. + Args: + search_term: str: The term to search for. + file_path: Optional[str]: The path to the file to search. + +find_file(file_name: str, dir_path: str = './') -> None: + Finds all files with the given name in the specified directory. + Args: + file_name: str: The name of the file to find. + dir_path: Optional[str]: The path to the directory to search. + +Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. -You should include ONLY ONE or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. +The assistant should include ONLY ONE or or in every one of the responses, unless the assistant is finished with the task or need more input or action from the user in order to proceed. IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. @@ -47,10 +88,21 @@ Here is an example of how you can interact with the environment for task solving USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000? ASSISTANT: -Sure! Let me write the Python code for starting a web server and save it to a file `app.py`: +Sure! Let me create a file first: -CODE=''' -from flask import Flask +create_file('app.py') + + +USER: +OBSERVATION: +[File: /workspace/app.py (1 lines total)] +1| +[File app.py created.] + +ASSISTANT: +Now I will write the Python code for starting a web server and save it to the file `app.py`: + +EDITED_CODE="""from flask import Flask app = Flask(__name__) @app.route('/') @@ -59,15 +111,23 @@ def index(): return str(numbers) if __name__ == '__main__': - app.run(port=5000) -''' -with open('app.py', 'w') as f: - f.write(CODE) + app.run(port=5000)""" +edit_file(start=1, end=1, content=EDITED_CODE) USER: -Observation: -[Code executed successfully with no output] +OBSERVATION: +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: @@ -76,7 +136,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1 ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log: @@ -85,7 +145,7 @@ cat server.log USER: -Observation: +OBSERVATION: Traceback (most recent call last): File "/workspace/app.py", line 2, in from flask import Flask @@ -98,7 +158,7 @@ pip install flask USER: -Observation: +OBSERVATION: Defaulting to user installation because normal site-packages is not writeable Collecting flask Using cached flask-3.0.3-py3-none-any.whl (101 kB) @@ -122,7 +182,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 124 ASSISTANT: @@ -132,7 +192,7 @@ cat server.log USER: -Observation: +OBSERVATION: * Serving Flask app 'app' * Debug mode: off WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. @@ -151,7 +211,7 @@ goto("http://127.0.0.1:5000") USER: -Observation: +OBSERVATION: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] ASSISTANT: @@ -166,52 +226,48 @@ kill 124 USER: -Observation: +OBSERVATION: [1]+ Terminated python3 app.py > server.log 2>&1 ASSISTANT: The server has been stopped. Let me open the Python file and modify it. - -open app.py - + +open_file('app.py') + USER: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return str(numbers) -9: -10:if __name__ == '__main__': -11: app.run(port=5000) +[File: /workspace/app.py (10 lines total)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) ASSISTANT: I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file: - -edit 8:8 <' + ''.join([f'{i}' for i in numbers]) + '' -EOF - + +edit_file(start=7, end=7, content=" return '' + ''.join([f'' for i in numbers]) + '
{i}
'") +
USER: Observation: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return '' + ''.join([f'' for i in numbers]) + '
{i}
' -9: -10:if __name__ == '__main__': -11: app.run(port=5000) -File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary. +[File: /workspace/app.py (10 lines total after edit)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return '' + ''.join([f'' for i in numbers]) + '
{i}
' +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: The file has been updated. Let me run the Python file again with the new changes: diff --git a/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log b/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log index a26581a0d0..ee03ee3e56 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log +++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log @@ -13,28 +13,69 @@ The assistant can browse the Internet with commands on behalf of the user by wra For example, you can browse a given URL by goto("") . The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: %pip install [package needed] and should always import packages and define variables before starting to use them. -To do any activities on GitHub, you should use the token in the $GITHUB_TOKEN environment variable. -For instance, to push a local branch `my_branch` to the github repo `owner/repo`, you can use the following four commands: +To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable. +For instance, to push a local branch `my_branch` to the github repo `owner/repo`, the assistant can use the following four commands: git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch -If you require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it for you. +If the assistant require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it. -Apart from the standard bash commands, you can also use the following special commands in environment: -open [] - opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line -goto - moves the window to show -scroll_down - moves the window down {WINDOW} lines -scroll_up - moves the window down {WINDOW} lines -create - creates and opens a new file with the given name -search_dir [] - searches for search_term in all files in dir. If dir is not provided, searches in the current directory -search_file [] - searches for search_term in file. If file is not provided, searches in the current open file -find_file [] - finds all files with the given name in dir. If dir is not provided, searches in the current directory -edit : < -EOF - replaces lines through (inclusive) with the given text in the open file. The replacement text is delineated using heredoc syntax. All of the will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again. Remember, the file must be open before editing. -Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. +Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: +open_file(path: str, line_number: Optional[int] = None) -> None: + Opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line. + Args: + path: str: The path to the file to open. + line_number: Optional[int]: The line number to move to. + +goto_line(line_number: int) -> None: + Moves the window to show the specified line number. + Args: + line_number: int: The line number to move to. + +scroll_down() -> None: + Moves the window down by 100 lines. + Args: + None + +scroll_up() -> None: + Moves the window up by 100 lines. + Args: + None + +create_file(filename: str) -> None: + Creates and opens a new file with the given name. + Args: + filename: str: The name of the file to create. + +edit_file(start: int, end: int, content: str) -> None: + Edit a file. + It replaces lines `start` through `end` (inclusive) with the given text `content` in the open file. Remember, the file must be open before editing. + Args: + start: int: The start line number. Must be greater or equal to 1. + end: int: The end line number. Must be greater or equal to 1 AND greater than start AND less than or equal to the number of lines in the file. + content: str: The content to replace the lines with. + +search_dir(search_term: str, dir_path: str = './') -> None: + Searches for search_term in all files in dir. If dir is not provided, searches in the current directory. + Args: + search_term: str: The term to search for. + dir_path: Optional[str]: The path to the directory to search. + +search_file(search_term: str, file_path: Optional[str] = None) -> None: + Searches for search_term in file. If file is not provided, searches in the current open file. + Args: + search_term: str: The term to search for. + file_path: Optional[str]: The path to the file to search. + +find_file(file_name: str, dir_path: str = './') -> None: + Finds all files with the given name in the specified directory. + Args: + file_name: str: The name of the file to find. + dir_path: Optional[str]: The path to the directory to search. + +Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. -You should include ONLY ONE or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. +The assistant should include ONLY ONE or or in every one of the responses, unless the assistant is finished with the task or need more input or action from the user in order to proceed. IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. @@ -47,10 +88,21 @@ Here is an example of how you can interact with the environment for task solving USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000? ASSISTANT: -Sure! Let me write the Python code for starting a web server and save it to a file `app.py`: +Sure! Let me create a file first: -CODE=''' -from flask import Flask +create_file('app.py') + + +USER: +OBSERVATION: +[File: /workspace/app.py (1 lines total)] +1| +[File app.py created.] + +ASSISTANT: +Now I will write the Python code for starting a web server and save it to the file `app.py`: + +EDITED_CODE="""from flask import Flask app = Flask(__name__) @app.route('/') @@ -59,15 +111,23 @@ def index(): return str(numbers) if __name__ == '__main__': - app.run(port=5000) -''' -with open('app.py', 'w') as f: - f.write(CODE) + app.run(port=5000)""" +edit_file(start=1, end=1, content=EDITED_CODE) USER: -Observation: -[Code executed successfully with no output] +OBSERVATION: +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: @@ -76,7 +136,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1 ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log: @@ -85,7 +145,7 @@ cat server.log USER: -Observation: +OBSERVATION: Traceback (most recent call last): File "/workspace/app.py", line 2, in from flask import Flask @@ -98,7 +158,7 @@ pip install flask USER: -Observation: +OBSERVATION: Defaulting to user installation because normal site-packages is not writeable Collecting flask Using cached flask-3.0.3-py3-none-any.whl (101 kB) @@ -122,7 +182,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 124 ASSISTANT: @@ -132,7 +192,7 @@ cat server.log USER: -Observation: +OBSERVATION: * Serving Flask app 'app' * Debug mode: off WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. @@ -151,7 +211,7 @@ goto("http://127.0.0.1:5000") USER: -Observation: +OBSERVATION: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] ASSISTANT: @@ -166,52 +226,48 @@ kill 124 USER: -Observation: +OBSERVATION: [1]+ Terminated python3 app.py > server.log 2>&1 ASSISTANT: The server has been stopped. Let me open the Python file and modify it. - -open app.py - + +open_file('app.py') + USER: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return str(numbers) -9: -10:if __name__ == '__main__': -11: app.run(port=5000) +[File: /workspace/app.py (10 lines total)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) ASSISTANT: I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file: - -edit 8:8 <' + ''.join([f'{i}' for i in numbers]) + '' -EOF - + +edit_file(start=7, end=7, content=" return '' + ''.join([f'' for i in numbers]) + '
{i}
'") +
USER: Observation: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return '' + ''.join([f'' for i in numbers]) + '
{i}
' -9: -10:if __name__ == '__main__': -11: app.run(port=5000) -File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary. +[File: /workspace/app.py (10 lines total after edit)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return '' + ''.join([f'' for i in numbers]) + '
{i}
' +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: The file has been updated. Let me run the Python file again with the new changes: @@ -237,18 +293,19 @@ Fix typos in bad.txt. Do not ask me for confirmation at any point. ---------- -Let's first view the content of the file `bad.txt` to identify the typos that need to be fixed. - -cat bad.txt - +Sure! Let's start by opening the `bad.txt` file to see its content. + +open_file('bad.txt') + ---------- OBSERVATION: -This is a stupid typoo. -Really? -No mor typos! -Enjoy! -[Command -1 finished with exit code 0]] +[File: /workspace/bad.txt (4 lines total)] +1|This is a stupid typoo. +2|Really? +3|No mor typos! +4|Enjoy! + ENVIRONMENT REMINDER: You have 8 turns left to complete the task. diff --git a/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log b/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log index 110e0b3e10..173edca584 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log +++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log @@ -13,28 +13,69 @@ The assistant can browse the Internet with commands on behalf of the user by wra For example, you can browse a given URL by goto("") . The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: %pip install [package needed] and should always import packages and define variables before starting to use them. -To do any activities on GitHub, you should use the token in the $GITHUB_TOKEN environment variable. -For instance, to push a local branch `my_branch` to the github repo `owner/repo`, you can use the following four commands: +To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable. +For instance, to push a local branch `my_branch` to the github repo `owner/repo`, the assistant can use the following four commands: git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch -If you require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it for you. +If the assistant require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it. -Apart from the standard bash commands, you can also use the following special commands in environment: -open [] - opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line -goto - moves the window to show -scroll_down - moves the window down {WINDOW} lines -scroll_up - moves the window down {WINDOW} lines -create - creates and opens a new file with the given name -search_dir [] - searches for search_term in all files in dir. If dir is not provided, searches in the current directory -search_file [] - searches for search_term in file. If file is not provided, searches in the current open file -find_file [] - finds all files with the given name in dir. If dir is not provided, searches in the current directory -edit : < -EOF - replaces lines through (inclusive) with the given text in the open file. The replacement text is delineated using heredoc syntax. All of the will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again. Remember, the file must be open before editing. -Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. +Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: +open_file(path: str, line_number: Optional[int] = None) -> None: + Opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line. + Args: + path: str: The path to the file to open. + line_number: Optional[int]: The line number to move to. + +goto_line(line_number: int) -> None: + Moves the window to show the specified line number. + Args: + line_number: int: The line number to move to. + +scroll_down() -> None: + Moves the window down by 100 lines. + Args: + None + +scroll_up() -> None: + Moves the window up by 100 lines. + Args: + None + +create_file(filename: str) -> None: + Creates and opens a new file with the given name. + Args: + filename: str: The name of the file to create. + +edit_file(start: int, end: int, content: str) -> None: + Edit a file. + It replaces lines `start` through `end` (inclusive) with the given text `content` in the open file. Remember, the file must be open before editing. + Args: + start: int: The start line number. Must be greater or equal to 1. + end: int: The end line number. Must be greater or equal to 1 AND greater than start AND less than or equal to the number of lines in the file. + content: str: The content to replace the lines with. + +search_dir(search_term: str, dir_path: str = './') -> None: + Searches for search_term in all files in dir. If dir is not provided, searches in the current directory. + Args: + search_term: str: The term to search for. + dir_path: Optional[str]: The path to the directory to search. + +search_file(search_term: str, file_path: Optional[str] = None) -> None: + Searches for search_term in file. If file is not provided, searches in the current open file. + Args: + search_term: str: The term to search for. + file_path: Optional[str]: The path to the file to search. + +find_file(file_name: str, dir_path: str = './') -> None: + Finds all files with the given name in the specified directory. + Args: + file_name: str: The name of the file to find. + dir_path: Optional[str]: The path to the directory to search. + +Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. -You should include ONLY ONE or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. +The assistant should include ONLY ONE or or in every one of the responses, unless the assistant is finished with the task or need more input or action from the user in order to proceed. IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. @@ -47,10 +88,21 @@ Here is an example of how you can interact with the environment for task solving USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000? ASSISTANT: -Sure! Let me write the Python code for starting a web server and save it to a file `app.py`: +Sure! Let me create a file first: -CODE=''' -from flask import Flask +create_file('app.py') + + +USER: +OBSERVATION: +[File: /workspace/app.py (1 lines total)] +1| +[File app.py created.] + +ASSISTANT: +Now I will write the Python code for starting a web server and save it to the file `app.py`: + +EDITED_CODE="""from flask import Flask app = Flask(__name__) @app.route('/') @@ -59,15 +111,23 @@ def index(): return str(numbers) if __name__ == '__main__': - app.run(port=5000) -''' -with open('app.py', 'w') as f: - f.write(CODE) + app.run(port=5000)""" +edit_file(start=1, end=1, content=EDITED_CODE) USER: -Observation: -[Code executed successfully with no output] +OBSERVATION: +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: @@ -76,7 +136,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1 ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log: @@ -85,7 +145,7 @@ cat server.log USER: -Observation: +OBSERVATION: Traceback (most recent call last): File "/workspace/app.py", line 2, in from flask import Flask @@ -98,7 +158,7 @@ pip install flask USER: -Observation: +OBSERVATION: Defaulting to user installation because normal site-packages is not writeable Collecting flask Using cached flask-3.0.3-py3-none-any.whl (101 kB) @@ -122,7 +182,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 124 ASSISTANT: @@ -132,7 +192,7 @@ cat server.log USER: -Observation: +OBSERVATION: * Serving Flask app 'app' * Debug mode: off WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. @@ -151,7 +211,7 @@ goto("http://127.0.0.1:5000") USER: -Observation: +OBSERVATION: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] ASSISTANT: @@ -166,52 +226,48 @@ kill 124 USER: -Observation: +OBSERVATION: [1]+ Terminated python3 app.py > server.log 2>&1 ASSISTANT: The server has been stopped. Let me open the Python file and modify it. - -open app.py - + +open_file('app.py') + USER: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return str(numbers) -9: -10:if __name__ == '__main__': -11: app.run(port=5000) +[File: /workspace/app.py (10 lines total)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) ASSISTANT: I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file: - -edit 8:8 <' + ''.join([f'{i}' for i in numbers]) + '' -EOF - + +edit_file(start=7, end=7, content=" return '' + ''.join([f'' for i in numbers]) + '
{i}
'") +
USER: Observation: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return '' + ''.join([f'' for i in numbers]) + '
{i}
' -9: -10:if __name__ == '__main__': -11: app.run(port=5000) -File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary. +[File: /workspace/app.py (10 lines total after edit)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return '' + ''.join([f'' for i in numbers]) + '
{i}
' +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: The file has been updated. Let me run the Python file again with the new changes: @@ -237,35 +293,40 @@ Fix typos in bad.txt. Do not ask me for confirmation at any point. ---------- -Let's first view the content of the file `bad.txt` to identify the typos that need to be fixed. - -cat bad.txt - - ----------- - -OBSERVATION: -This is a stupid typoo. -Really? -No mor typos! -Enjoy! -[Command -1 finished with exit code 0]] - ----------- - -Let's correct the typos in the file `bad.txt`. I will open the file for editing and fix the typos without asking for confirmation. - -open bad.txt - +Sure! Let's start by opening the `bad.txt` file to see its content. + +open_file('bad.txt') + ---------- OBSERVATION: [File: /workspace/bad.txt (4 lines total)] -1:This is a stupid typoo. -2:Really? -3:No mor typos! -4:Enjoy! -[Command -1 finished with exit code 0]] +1|This is a stupid typoo. +2|Really? +3|No mor typos! +4|Enjoy! + + +---------- + +Let's correct the typos in the `bad.txt` file. + +edit_file(start=1, end=4, content="""This is a stupid typo. +Really? +No more typos! +Enjoy!""") + + +---------- + +OBSERVATION: +[File: /workspace/bad.txt (4 lines total after edit)] +1|This is a stupid typo. +2|Really? +3|No more typos! +4|Enjoy! +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] + ENVIRONMENT REMINDER: You have 7 turns left to complete the task. diff --git a/tests/integration/mock/CodeActAgent/test_edits/prompt_004.log b/tests/integration/mock/CodeActAgent/test_edits/prompt_004.log deleted file mode 100644 index 11c3a20391..0000000000 --- a/tests/integration/mock/CodeActAgent/test_edits/prompt_004.log +++ /dev/null @@ -1,293 +0,0 @@ - - ----------- - -A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. -The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "" tag, for example: - -print("Hello World!") - -The assistant can execute bash commands on behalf of the user by wrapping them with and . -For example, you can list the files in the current directory by ls . -The assistant can browse the Internet with commands on behalf of the user by wrapping them with and . -For example, you can browse a given URL by goto("") . -The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. -The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: %pip install [package needed] and should always import packages and define variables before starting to use them. -To do any activities on GitHub, you should use the token in the $GITHUB_TOKEN environment variable. -For instance, to push a local branch `my_branch` to the github repo `owner/repo`, you can use the following four commands: - git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch -If you require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it for you. - - -Apart from the standard bash commands, you can also use the following special commands in environment: -open [] - opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line -goto - moves the window to show -scroll_down - moves the window down {WINDOW} lines -scroll_up - moves the window down {WINDOW} lines -create - creates and opens a new file with the given name -search_dir [] - searches for search_term in all files in dir. If dir is not provided, searches in the current directory -search_file [] - searches for search_term in file. If file is not provided, searches in the current open file -find_file [] - finds all files with the given name in dir. If dir is not provided, searches in the current directory -edit : < -EOF - replaces lines through (inclusive) with the given text in the open file. The replacement text is delineated using heredoc syntax. All of the will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again. Remember, the file must be open before editing. -Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. - -The assistant's response should be concise. -You should include ONLY ONE or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. -IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. - - ----------- - -Here is an example of how you can interact with the environment for task solving: - ---- START OF EXAMPLE --- - -USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000? - -ASSISTANT: -Sure! Let me write the Python code for starting a web server and save it to a file `app.py`: - -CODE=''' -from flask import Flask -app = Flask(__name__) - -@app.route('/') -def index(): - numbers = list(range(1, 11)) - return str(numbers) - -if __name__ == '__main__': - app.run(port=5000) -''' -with open('app.py', 'w') as f: - f.write(CODE) - - -USER: -Observation: -[Code executed successfully with no output] - -ASSISTANT: -I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: - -python3 app.py > server.log 2>&1 & - - -USER: -Observation: -[1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1 - -ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log: - -cat server.log - - -USER: -Observation: -Traceback (most recent call last): - File "/workspace/app.py", line 2, in - from flask import Flask -ModuleNotFoundError: No module named 'flask' - -ASSISTANT: -It seems that Flask is not installed. Let me install Flask for you: - -pip install flask - - -USER: -Observation: -Defaulting to user installation because normal site-packages is not writeable -Collecting flask - Using cached flask-3.0.3-py3-none-any.whl (101 kB) -Collecting blinker>=1.6.2 - Using cached blinker-1.7.0-py3-none-any.whl (13 kB) -Collecting Werkzeug>=3.0.0 - Using cached werkzeug-3.0.2-py3-none-any.whl (226 kB) -Collecting click>=8.1.3 - Using cached click-8.1.7-py3-none-any.whl (97 kB) -Collecting itsdangerous>=2.1.2 - Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB) -Requirement already satisfied: Jinja2>=3.1.2 in /home/opendevin/.local/lib/python3.10/site-packages (from flask) (3.1.3) -Requirement already satisfied: MarkupSafe>=2.0 in /home/opendevin/.local/lib/python3.10/site-packages (from Jinja2>=3.1.2->flask) (2.1.5) -Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask -Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0 - -ASSISTANT: -Now that Flask is installed, let me run the Python file again: - -python3 app.py > server.log 2>&1 & - - -USER: -Observation: -[1] 124 - -ASSISTANT: -Let me check the server log again: - -cat server.log - - -USER: -Observation: -* Serving Flask app 'app' - * Debug mode: off -WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. - * Running on http://127.0.0.1:5000 -Press CTRL+C to quit - -ASSISTANT: -The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask! - -USER: Now browse the newly started server's homepage and show me the content. - -ASSISTANT: -Sure! Let me browse the server's homepage at http://127.0.0.1:5000: - -goto("http://127.0.0.1:5000") - - -USER: -Observation: -[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - -ASSISTANT: -The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask! - -USER: Now kill the server, make it display the numbers in a table format. - -ASSISTANT: -Sure! Let me stop the server first: - -kill 124 - - -USER: -Observation: -[1]+ Terminated python3 app.py > server.log 2>&1 - -ASSISTANT: -The server has been stopped. Let me open the Python file and modify it. - -open app.py - - -USER: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return str(numbers) -9: -10:if __name__ == '__main__': -11: app.run(port=5000) - -ASSISTANT: -I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file: - -edit 8:8 <' + ''.join([f'{i}' for i in numbers]) + '' -EOF - - -USER: -Observation: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return '' + ''.join([f'' for i in numbers]) + '
{i}
' -9: -10:if __name__ == '__main__': -11: app.run(port=5000) -File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary. - -ASSISTANT: -The file has been updated. Let me run the Python file again with the new changes: - -python3 app.py > server.log 2>&1 & - - -USER: -Observation: -[1] 126 - -ASSISTANT: -The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Free free to let me know if you have any further requests! - ---- END OF EXAMPLE --- - - -NOW, LET'S START! - ----------- - -Fix typos in bad.txt. Do not ask me for confirmation at any point. - ----------- - -Let's first view the content of the file `bad.txt` to identify the typos that need to be fixed. - -cat bad.txt - - ----------- - -OBSERVATION: -This is a stupid typoo. -Really? -No mor typos! -Enjoy! -[Command -1 finished with exit code 0]] - ----------- - -Let's correct the typos in the file `bad.txt`. I will open the file for editing and fix the typos without asking for confirmation. - -open bad.txt - - ----------- - -OBSERVATION: -[File: /workspace/bad.txt (4 lines total)] -1:This is a stupid typoo. -2:Really? -3:No mor typos! -4:Enjoy! -[Command -1 finished with exit code 0]] - ----------- - -I will now fix the typos in the file `bad.txt` without asking for confirmation. Let me edit the file and correct the typos. - -edit 1:3 < - ----------- - -OBSERVATION: -> > > > [File: /workspace/bad.txt (4 lines total)] -1:This is a stupid typo. -2:Really? -3:No more typos! -4:Enjoy! -File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary. -[Command -1 finished with exit code 0]] - -ENVIRONMENT REMINDER: You have 6 turns left to complete the task. diff --git a/tests/integration/mock/CodeActAgent/test_edits/response_001.log b/tests/integration/mock/CodeActAgent/test_edits/response_001.log index d17564eb43..ad0e602b8f 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/response_001.log +++ b/tests/integration/mock/CodeActAgent/test_edits/response_001.log @@ -1,3 +1,4 @@ -Let's first view the content of the file `bad.txt` to identify the typos that need to be fixed. - -cat bad.txt +Sure! Let's start by opening the `bad.txt` file to see its content. + + +open_file('bad.txt') diff --git a/tests/integration/mock/CodeActAgent/test_edits/response_002.log b/tests/integration/mock/CodeActAgent/test_edits/response_002.log index 70378a8be5..adeafcc6d6 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/response_002.log +++ b/tests/integration/mock/CodeActAgent/test_edits/response_002.log @@ -1,3 +1,7 @@ -Let's correct the typos in the file `bad.txt`. I will open the file for editing and fix the typos without asking for confirmation. - -open bad.txt +Let's correct the typos in the `bad.txt` file. + + +edit_file(start=1, end=4, content="""This is a stupid typo. +Really? +No more typos! +Enjoy!""") diff --git a/tests/integration/mock/CodeActAgent/test_edits/response_003.log b/tests/integration/mock/CodeActAgent/test_edits/response_003.log index 8bb7cd59a8..613b77e658 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/response_003.log +++ b/tests/integration/mock/CodeActAgent/test_edits/response_003.log @@ -1,7 +1 @@ -I will now fix the typos in the file `bad.txt` without asking for confirmation. Let me edit the file and correct the typos. - -edit 1:3 < -save bad.txt diff --git a/tests/integration/mock/CodeActAgent/test_edits/response_005.log b/tests/integration/mock/CodeActAgent/test_edits/response_005.log deleted file mode 100644 index c663d376a9..0000000000 --- a/tests/integration/mock/CodeActAgent/test_edits/response_005.log +++ /dev/null @@ -1,3 +0,0 @@ -Apologies for the confusion. Let me save and close the file `bad.txt` for you without using the `save` command. - -exit diff --git a/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log b/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log index 7dc44dac6f..9756c823bb 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log +++ b/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log @@ -13,28 +13,69 @@ The assistant can browse the Internet with commands on behalf of the user by wra For example, you can browse a given URL by goto("") . The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: %pip install [package needed] and should always import packages and define variables before starting to use them. -To do any activities on GitHub, you should use the token in the $GITHUB_TOKEN environment variable. -For instance, to push a local branch `my_branch` to the github repo `owner/repo`, you can use the following four commands: +To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable. +For instance, to push a local branch `my_branch` to the github repo `owner/repo`, the assistant can use the following four commands: git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch -If you require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it for you. +If the assistant require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it. -Apart from the standard bash commands, you can also use the following special commands in environment: -open [] - opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line -goto - moves the window to show -scroll_down - moves the window down {WINDOW} lines -scroll_up - moves the window down {WINDOW} lines -create - creates and opens a new file with the given name -search_dir [] - searches for search_term in all files in dir. If dir is not provided, searches in the current directory -search_file [] - searches for search_term in file. If file is not provided, searches in the current open file -find_file [] - finds all files with the given name in dir. If dir is not provided, searches in the current directory -edit : < -EOF - replaces lines through (inclusive) with the given text in the open file. The replacement text is delineated using heredoc syntax. All of the will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again. Remember, the file must be open before editing. -Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. +Apart from the standard Python library, you can also use the following functions (already imported) in environment: +open_file(path: str, line_number: Optional[int] = None) -> None: + Opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line. + Args: + path: str: The path to the file to open. + line_number: Optional[int]: The line number to move to. + +goto_line(line_number: int) -> None: + Moves the window to show the specified line number. + Args: + line_number: int: The line number to move to. + +scroll_down() -> None: + Moves the window down by 100 lines. + Args: + None + +scroll_up() -> None: + Moves the window up by 100 lines. + Args: + None + +create_file(filename: str) -> None: + Creates and opens a new file with the given name. + Args: + filename: str: The name of the file to create. + +edit_file(start: int, end: int, content: str) -> None: + Edit a file. + It replaces lines `start` through `end` (inclusive) with the given text `content` in the open file. Remember, the file must be open before editing. + Args: + start: int: The start line number. Must be greater or equal to 1. + end: int: The end line number. Must be greater or equal to 1 AND greater than start AND less than or equal to the number of lines in the file. + content: str: The content to replace the lines with. + +search_dir(search_term: str, dir_path: str = './') -> None: + Searches for search_term in all files in dir. If dir is not provided, searches in the current directory. + Args: + search_term: str: The term to search for. + dir_path: Optional[str]: The path to the directory to search. + +search_file(search_term: str, file_path: Optional[str] = None) -> None: + Searches for search_term in file. If file is not provided, searches in the current open file. + Args: + search_term: str: The term to search for. + file_path: Optional[str]: The path to the file to search. + +find_file(file_name: str, dir_path: str = './') -> None: + Finds all files with the given name in the specified directory. + Args: + file_name: str: The name of the file to find. + dir_path: Optional[str]: The path to the directory to search. + +Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. -You should include ONLY ONE or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. +The assistant should include ONLY ONE or or in every one of the responses, unless the assistant is finished with the task or need more input or action from the user in order to proceed. IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. @@ -47,10 +88,21 @@ Here is an example of how you can interact with the environment for task solving USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000? ASSISTANT: -Sure! Let me write the Python code for starting a web server and save it to a file `app.py`: +Sure! Let me create a file first: -CODE=''' -from flask import Flask +create_file('app.py') + + +USER: +OBSERVATION: +[File: /workspace/app.py (1 lines total)] +1| +[File app.py created.] + +ASSISTANT: +Now I will write the Python code for starting a web server and save it to a file `app.py`: + +EDITED_CODE="""from flask import Flask app = Flask(__name__) @app.route('/') @@ -59,15 +111,23 @@ def index(): return str(numbers) if __name__ == '__main__': - app.run(port=5000) -''' -with open('app.py', 'w') as f: - f.write(CODE) + app.run(port=5000)""" +edit_file(start=1, end=1, content=EDITED_CODE) USER: -Observation: -[Code executed successfully with no output] +OBSERVATION: +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: @@ -76,7 +136,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1 ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log: @@ -85,7 +145,7 @@ cat server.log USER: -Observation: +OBSERVATION: Traceback (most recent call last): File "/workspace/app.py", line 2, in from flask import Flask @@ -98,7 +158,7 @@ pip install flask USER: -Observation: +OBSERVATION: Defaulting to user installation because normal site-packages is not writeable Collecting flask Using cached flask-3.0.3-py3-none-any.whl (101 kB) @@ -122,7 +182,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 124 ASSISTANT: @@ -132,7 +192,7 @@ cat server.log USER: -Observation: +OBSERVATION: * Serving Flask app 'app' * Debug mode: off WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. @@ -151,7 +211,7 @@ goto("http://127.0.0.1:5000") USER: -Observation: +OBSERVATION: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] ASSISTANT: @@ -166,52 +226,48 @@ kill 124 USER: -Observation: +OBSERVATION: [1]+ Terminated python3 app.py > server.log 2>&1 ASSISTANT: The server has been stopped. Let me open the Python file and modify it. - -open app.py - + +open_file('app.py') + USER: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return str(numbers) -9: -10:if __name__ == '__main__': -11: app.run(port=5000) +[File: /workspace/app.py (10 lines total)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) ASSISTANT: I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file: - -edit 8:8 <' + ''.join([f'{i}' for i in numbers]) + '' -EOF - + +edit_file(start=7, end=7, content=" return '' + ''.join([f'' for i in numbers]) + '
{i}
'") +
USER: Observation: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return '' + ''.join([f'' for i in numbers]) + '
{i}
' -9: -10:if __name__ == '__main__': -11: app.run(port=5000) -File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary. +[File: /workspace/app.py (10 lines total after edit)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return '' + ''.join([f'' for i in numbers]) + '
{i}
' +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: The file has been updated. Let me run the Python file again with the new changes: diff --git a/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log b/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log index 03383f4a3e..1c399f3445 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log +++ b/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log @@ -13,28 +13,69 @@ The assistant can browse the Internet with commands on behalf of the user by wra For example, you can browse a given URL by goto("") . The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: %pip install [package needed] and should always import packages and define variables before starting to use them. -To do any activities on GitHub, you should use the token in the $GITHUB_TOKEN environment variable. -For instance, to push a local branch `my_branch` to the github repo `owner/repo`, you can use the following four commands: +To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable. +For instance, to push a local branch `my_branch` to the github repo `owner/repo`, the assistant can use the following four commands: git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch -If you require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it for you. +If the assistant require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it. -Apart from the standard bash commands, you can also use the following special commands in environment: -open [] - opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line -goto - moves the window to show -scroll_down - moves the window down {WINDOW} lines -scroll_up - moves the window down {WINDOW} lines -create - creates and opens a new file with the given name -search_dir [] - searches for search_term in all files in dir. If dir is not provided, searches in the current directory -search_file [] - searches for search_term in file. If file is not provided, searches in the current open file -find_file [] - finds all files with the given name in dir. If dir is not provided, searches in the current directory -edit : < -EOF - replaces lines through (inclusive) with the given text in the open file. The replacement text is delineated using heredoc syntax. All of the will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again. Remember, the file must be open before editing. -Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. +Apart from the standard Python library, you can also use the following functions (already imported) in environment: +open_file(path: str, line_number: Optional[int] = None) -> None: + Opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line. + Args: + path: str: The path to the file to open. + line_number: Optional[int]: The line number to move to. + +goto_line(line_number: int) -> None: + Moves the window to show the specified line number. + Args: + line_number: int: The line number to move to. + +scroll_down() -> None: + Moves the window down by 100 lines. + Args: + None + +scroll_up() -> None: + Moves the window up by 100 lines. + Args: + None + +create_file(filename: str) -> None: + Creates and opens a new file with the given name. + Args: + filename: str: The name of the file to create. + +edit_file(start: int, end: int, content: str) -> None: + Edit a file. + It replaces lines `start` through `end` (inclusive) with the given text `content` in the open file. Remember, the file must be open before editing. + Args: + start: int: The start line number. Must be greater or equal to 1. + end: int: The end line number. Must be greater or equal to 1 AND greater than start AND less than or equal to the number of lines in the file. + content: str: The content to replace the lines with. + +search_dir(search_term: str, dir_path: str = './') -> None: + Searches for search_term in all files in dir. If dir is not provided, searches in the current directory. + Args: + search_term: str: The term to search for. + dir_path: Optional[str]: The path to the directory to search. + +search_file(search_term: str, file_path: Optional[str] = None) -> None: + Searches for search_term in file. If file is not provided, searches in the current open file. + Args: + search_term: str: The term to search for. + file_path: Optional[str]: The path to the file to search. + +find_file(file_name: str, dir_path: str = './') -> None: + Finds all files with the given name in the specified directory. + Args: + file_name: str: The name of the file to find. + dir_path: Optional[str]: The path to the directory to search. + +Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. -You should include ONLY ONE or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. +The assistant should include ONLY ONE or or in every one of the responses, unless the assistant is finished with the task or need more input or action from the user in order to proceed. IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. @@ -47,10 +88,21 @@ Here is an example of how you can interact with the environment for task solving USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000? ASSISTANT: -Sure! Let me write the Python code for starting a web server and save it to a file `app.py`: +Sure! Let me create a file first: -CODE=''' -from flask import Flask +create_file('app.py') + + +USER: +OBSERVATION: +[File: /workspace/app.py (1 lines total)] +1| +[File app.py created.] + +ASSISTANT: +Now I will write the Python code for starting a web server and save it to a file `app.py`: + +EDITED_CODE="""from flask import Flask app = Flask(__name__) @app.route('/') @@ -59,15 +111,23 @@ def index(): return str(numbers) if __name__ == '__main__': - app.run(port=5000) -''' -with open('app.py', 'w') as f: - f.write(CODE) + app.run(port=5000)""" +edit_file(start=1, end=1, content=EDITED_CODE) USER: -Observation: -[Code executed successfully with no output] +OBSERVATION: +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: @@ -76,7 +136,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1 ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log: @@ -85,7 +145,7 @@ cat server.log USER: -Observation: +OBSERVATION: Traceback (most recent call last): File "/workspace/app.py", line 2, in from flask import Flask @@ -98,7 +158,7 @@ pip install flask USER: -Observation: +OBSERVATION: Defaulting to user installation because normal site-packages is not writeable Collecting flask Using cached flask-3.0.3-py3-none-any.whl (101 kB) @@ -122,7 +182,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 124 ASSISTANT: @@ -132,7 +192,7 @@ cat server.log USER: -Observation: +OBSERVATION: * Serving Flask app 'app' * Debug mode: off WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. @@ -151,7 +211,7 @@ goto("http://127.0.0.1:5000") USER: -Observation: +OBSERVATION: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] ASSISTANT: @@ -166,52 +226,48 @@ kill 124 USER: -Observation: +OBSERVATION: [1]+ Terminated python3 app.py > server.log 2>&1 ASSISTANT: The server has been stopped. Let me open the Python file and modify it. - -open app.py - + +open_file('app.py') + USER: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return str(numbers) -9: -10:if __name__ == '__main__': -11: app.run(port=5000) +[File: /workspace/app.py (10 lines total)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) ASSISTANT: I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file: - -edit 8:8 <' + ''.join([f'{i}' for i in numbers]) + '' -EOF - + +edit_file(start=7, end=7, content=" return '' + ''.join([f'' for i in numbers]) + '
{i}
'") +
USER: Observation: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return '' + ''.join([f'' for i in numbers]) + '
{i}
' -9: -10:if __name__ == '__main__': -11: app.run(port=5000) -File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary. +[File: /workspace/app.py (10 lines total after edit)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return '' + ''.join([f'' for i in numbers]) + '
{i}
' +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: The file has been updated. Let me run the Python file again with the new changes: diff --git a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_001.log b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_001.log index 412503e631..4e6627e81d 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_001.log +++ b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_001.log @@ -13,28 +13,69 @@ The assistant can browse the Internet with commands on behalf of the user by wra For example, you can browse a given URL by goto("") . The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: %pip install [package needed] and should always import packages and define variables before starting to use them. -To do any activities on GitHub, you should use the token in the $GITHUB_TOKEN environment variable. -For instance, to push a local branch `my_branch` to the github repo `owner/repo`, you can use the following four commands: +To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable. +For instance, to push a local branch `my_branch` to the github repo `owner/repo`, the assistant can use the following four commands: git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch -If you require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it for you. +If the assistant require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it. -Apart from the standard bash commands, you can also use the following special commands in environment: -open [] - opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line -goto - moves the window to show -scroll_down - moves the window down {WINDOW} lines -scroll_up - moves the window down {WINDOW} lines -create - creates and opens a new file with the given name -search_dir [] - searches for search_term in all files in dir. If dir is not provided, searches in the current directory -search_file [] - searches for search_term in file. If file is not provided, searches in the current open file -find_file [] - finds all files with the given name in dir. If dir is not provided, searches in the current directory -edit : < -EOF - replaces lines through (inclusive) with the given text in the open file. The replacement text is delineated using heredoc syntax. All of the will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again. Remember, the file must be open before editing. -Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. +Apart from the standard Python library, you can also use the following functions (already imported) in environment: +open_file(path: str, line_number: Optional[int] = None) -> None: + Opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line. + Args: + path: str: The path to the file to open. + line_number: Optional[int]: The line number to move to. + +goto_line(line_number: int) -> None: + Moves the window to show the specified line number. + Args: + line_number: int: The line number to move to. + +scroll_down() -> None: + Moves the window down by 100 lines. + Args: + None + +scroll_up() -> None: + Moves the window up by 100 lines. + Args: + None + +create_file(filename: str) -> None: + Creates and opens a new file with the given name. + Args: + filename: str: The name of the file to create. + +edit_file(start: int, end: int, content: str) -> None: + Edit a file. + It replaces lines `start` through `end` (inclusive) with the given text `content` in the open file. Remember, the file must be open before editing. + Args: + start: int: The start line number. Must be greater or equal to 1. + end: int: The end line number. Must be greater or equal to 1 AND greater than start AND less than or equal to the number of lines in the file. + content: str: The content to replace the lines with. + +search_dir(search_term: str, dir_path: str = './') -> None: + Searches for search_term in all files in dir. If dir is not provided, searches in the current directory. + Args: + search_term: str: The term to search for. + dir_path: Optional[str]: The path to the directory to search. + +search_file(search_term: str, file_path: Optional[str] = None) -> None: + Searches for search_term in file. If file is not provided, searches in the current open file. + Args: + search_term: str: The term to search for. + file_path: Optional[str]: The path to the file to search. + +find_file(file_name: str, dir_path: str = './') -> None: + Finds all files with the given name in the specified directory. + Args: + file_name: str: The name of the file to find. + dir_path: Optional[str]: The path to the directory to search. + +Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. -You should include ONLY ONE or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. +The assistant should include ONLY ONE or or in every one of the responses, unless the assistant is finished with the task or need more input or action from the user in order to proceed. IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. @@ -47,10 +88,21 @@ Here is an example of how you can interact with the environment for task solving USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000? ASSISTANT: -Sure! Let me write the Python code for starting a web server and save it to a file `app.py`: +Sure! Let me create a file first: -CODE=''' -from flask import Flask +create_file('app.py') + + +USER: +OBSERVATION: +[File: /workspace/app.py (1 lines total)] +1| +[File app.py created.] + +ASSISTANT: +Now I will write the Python code for starting a web server and save it to a file `app.py`: + +EDITED_CODE="""from flask import Flask app = Flask(__name__) @app.route('/') @@ -59,15 +111,23 @@ def index(): return str(numbers) if __name__ == '__main__': - app.run(port=5000) -''' -with open('app.py', 'w') as f: - f.write(CODE) + app.run(port=5000)""" +edit_file(start=1, end=1, content=EDITED_CODE) USER: -Observation: -[Code executed successfully with no output] +OBSERVATION: +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: @@ -76,7 +136,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1 ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log: @@ -85,7 +145,7 @@ cat server.log USER: -Observation: +OBSERVATION: Traceback (most recent call last): File "/workspace/app.py", line 2, in from flask import Flask @@ -98,7 +158,7 @@ pip install flask USER: -Observation: +OBSERVATION: Defaulting to user installation because normal site-packages is not writeable Collecting flask Using cached flask-3.0.3-py3-none-any.whl (101 kB) @@ -122,7 +182,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 124 ASSISTANT: @@ -132,7 +192,7 @@ cat server.log USER: -Observation: +OBSERVATION: * Serving Flask app 'app' * Debug mode: off WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. @@ -151,7 +211,7 @@ goto("http://127.0.0.1:5000") USER: -Observation: +OBSERVATION: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] ASSISTANT: @@ -166,52 +226,48 @@ kill 124 USER: -Observation: +OBSERVATION: [1]+ Terminated python3 app.py > server.log 2>&1 ASSISTANT: The server has been stopped. Let me open the Python file and modify it. - -open app.py - + +open_file('app.py') + USER: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return str(numbers) -9: -10:if __name__ == '__main__': -11: app.run(port=5000) +[File: /workspace/app.py (10 lines total)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) ASSISTANT: I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file: - -edit 8:8 <' + ''.join([f'{i}' for i in numbers]) + '' -EOF - + +edit_file(start=7, end=7, content=" return '' + ''.join([f'' for i in numbers]) + '
{i}
'") +
USER: Observation: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return '' + ''.join([f'' for i in numbers]) + '
{i}
' -9: -10:if __name__ == '__main__': -11: app.run(port=5000) -File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary. +[File: /workspace/app.py (10 lines total after edit)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return '' + ''.join([f'' for i in numbers]) + '
{i}
' +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: The file has been updated. Let me run the Python file again with the new changes: @@ -235,4 +291,4 @@ NOW, LET'S START! Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt. Do not ask me for confirmation at any point. -ENVIRONMENT REMINDER: You have 9 turns left to complete the task. \ No newline at end of file +ENVIRONMENT REMINDER: You have 9 turns left to complete the task. diff --git a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_002.log b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_002.log index 256aea25b2..8dba682146 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_002.log +++ b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_002.log @@ -13,28 +13,69 @@ The assistant can browse the Internet with commands on behalf of the user by wra For example, you can browse a given URL by goto("") . The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: %pip install [package needed] and should always import packages and define variables before starting to use them. -To do any activities on GitHub, you should use the token in the $GITHUB_TOKEN environment variable. -For instance, to push a local branch `my_branch` to the github repo `owner/repo`, you can use the following four commands: +To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable. +For instance, to push a local branch `my_branch` to the github repo `owner/repo`, the assistant can use the following four commands: git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch -If you require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it for you. +If the assistant require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it. -Apart from the standard bash commands, you can also use the following special commands in environment: -open [] - opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line -goto - moves the window to show -scroll_down - moves the window down {WINDOW} lines -scroll_up - moves the window down {WINDOW} lines -create - creates and opens a new file with the given name -search_dir [] - searches for search_term in all files in dir. If dir is not provided, searches in the current directory -search_file [] - searches for search_term in file. If file is not provided, searches in the current open file -find_file [] - finds all files with the given name in dir. If dir is not provided, searches in the current directory -edit : < -EOF - replaces lines through (inclusive) with the given text in the open file. The replacement text is delineated using heredoc syntax. All of the will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again. Remember, the file must be open before editing. -Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. +Apart from the standard Python library, you can also use the following functions (already imported) in environment: +open_file(path: str, line_number: Optional[int] = None) -> None: + Opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line. + Args: + path: str: The path to the file to open. + line_number: Optional[int]: The line number to move to. + +goto_line(line_number: int) -> None: + Moves the window to show the specified line number. + Args: + line_number: int: The line number to move to. + +scroll_down() -> None: + Moves the window down by 100 lines. + Args: + None + +scroll_up() -> None: + Moves the window up by 100 lines. + Args: + None + +create_file(filename: str) -> None: + Creates and opens a new file with the given name. + Args: + filename: str: The name of the file to create. + +edit_file(start: int, end: int, content: str) -> None: + Edit a file. + It replaces lines `start` through `end` (inclusive) with the given text `content` in the open file. Remember, the file must be open before editing. + Args: + start: int: The start line number. Must be greater or equal to 1. + end: int: The end line number. Must be greater or equal to 1 AND greater than start AND less than or equal to the number of lines in the file. + content: str: The content to replace the lines with. + +search_dir(search_term: str, dir_path: str = './') -> None: + Searches for search_term in all files in dir. If dir is not provided, searches in the current directory. + Args: + search_term: str: The term to search for. + dir_path: Optional[str]: The path to the directory to search. + +search_file(search_term: str, file_path: Optional[str] = None) -> None: + Searches for search_term in file. If file is not provided, searches in the current open file. + Args: + search_term: str: The term to search for. + file_path: Optional[str]: The path to the file to search. + +find_file(file_name: str, dir_path: str = './') -> None: + Finds all files with the given name in the specified directory. + Args: + file_name: str: The name of the file to find. + dir_path: Optional[str]: The path to the directory to search. + +Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. -You should include ONLY ONE or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. +The assistant should include ONLY ONE or or in every one of the responses, unless the assistant is finished with the task or need more input or action from the user in order to proceed. IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. @@ -47,10 +88,21 @@ Here is an example of how you can interact with the environment for task solving USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000? ASSISTANT: -Sure! Let me write the Python code for starting a web server and save it to a file `app.py`: +Sure! Let me create a file first: -CODE=''' -from flask import Flask +create_file('app.py') + + +USER: +OBSERVATION: +[File: /workspace/app.py (1 lines total)] +1| +[File app.py created.] + +ASSISTANT: +Now I will write the Python code for starting a web server and save it to a file `app.py`: + +EDITED_CODE="""from flask import Flask app = Flask(__name__) @app.route('/') @@ -59,15 +111,23 @@ def index(): return str(numbers) if __name__ == '__main__': - app.run(port=5000) -''' -with open('app.py', 'w') as f: - f.write(CODE) + app.run(port=5000)""" +edit_file(start=1, end=1, content=EDITED_CODE) USER: -Observation: -[Code executed successfully with no output] +OBSERVATION: +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: @@ -76,7 +136,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1 ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log: @@ -85,7 +145,7 @@ cat server.log USER: -Observation: +OBSERVATION: Traceback (most recent call last): File "/workspace/app.py", line 2, in from flask import Flask @@ -98,7 +158,7 @@ pip install flask USER: -Observation: +OBSERVATION: Defaulting to user installation because normal site-packages is not writeable Collecting flask Using cached flask-3.0.3-py3-none-any.whl (101 kB) @@ -122,7 +182,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 124 ASSISTANT: @@ -132,7 +192,7 @@ cat server.log USER: -Observation: +OBSERVATION: * Serving Flask app 'app' * Debug mode: off WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. @@ -151,7 +211,7 @@ goto("http://127.0.0.1:5000") USER: -Observation: +OBSERVATION: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] ASSISTANT: @@ -166,52 +226,48 @@ kill 124 USER: -Observation: +OBSERVATION: [1]+ Terminated python3 app.py > server.log 2>&1 ASSISTANT: The server has been stopped. Let me open the Python file and modify it. - -open app.py - + +open_file('app.py') + USER: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return str(numbers) -9: -10:if __name__ == '__main__': -11: app.run(port=5000) +[File: /workspace/app.py (10 lines total)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) ASSISTANT: I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file: - -edit 8:8 <' + ''.join([f'{i}' for i in numbers]) + '' -EOF - + +edit_file(start=7, end=7, content=" return '' + ''.join([f'' for i in numbers]) + '
{i}
'") +
USER: Observation: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return '' + ''.join([f'' for i in numbers]) + '
{i}
' -9: -10:if __name__ == '__main__': -11: app.run(port=5000) -File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary. +[File: /workspace/app.py (10 lines total after edit)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return '' + ''.join([f'' for i in numbers]) + '
{i}
' +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: The file has been updated. Let me run the Python file again with the new changes: @@ -252,6 +308,7 @@ Once the installation is complete, we'll import `pymsgbox` and print its version ---------- OBSERVATION: -Package installed successfully +[Package installed successfully] +[Kernel restarted successfully to load the package] -ENVIRONMENT REMINDER: You have 8 turns left to complete the task. \ No newline at end of file +ENVIRONMENT REMINDER: You have 8 turns left to complete the task. diff --git a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_003.log b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_003.log index 3a6ca045a0..ef92a6085e 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_003.log +++ b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_003.log @@ -13,28 +13,69 @@ The assistant can browse the Internet with commands on behalf of the user by wra For example, you can browse a given URL by goto("") . The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: %pip install [package needed] and should always import packages and define variables before starting to use them. -To do any activities on GitHub, you should use the token in the $GITHUB_TOKEN environment variable. -For instance, to push a local branch `my_branch` to the github repo `owner/repo`, you can use the following four commands: +To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable. +For instance, to push a local branch `my_branch` to the github repo `owner/repo`, the assistant can use the following four commands: git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch -If you require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it for you. +If the assistant require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it. -Apart from the standard bash commands, you can also use the following special commands in environment: -open [] - opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line -goto - moves the window to show -scroll_down - moves the window down {WINDOW} lines -scroll_up - moves the window down {WINDOW} lines -create - creates and opens a new file with the given name -search_dir [] - searches for search_term in all files in dir. If dir is not provided, searches in the current directory -search_file [] - searches for search_term in file. If file is not provided, searches in the current open file -find_file [] - finds all files with the given name in dir. If dir is not provided, searches in the current directory -edit : < -EOF - replaces lines through (inclusive) with the given text in the open file. The replacement text is delineated using heredoc syntax. All of the will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again. Remember, the file must be open before editing. -Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. +Apart from the standard Python library, you can also use the following functions (already imported) in environment: +open_file(path: str, line_number: Optional[int] = None) -> None: + Opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line. + Args: + path: str: The path to the file to open. + line_number: Optional[int]: The line number to move to. + +goto_line(line_number: int) -> None: + Moves the window to show the specified line number. + Args: + line_number: int: The line number to move to. + +scroll_down() -> None: + Moves the window down by 100 lines. + Args: + None + +scroll_up() -> None: + Moves the window up by 100 lines. + Args: + None + +create_file(filename: str) -> None: + Creates and opens a new file with the given name. + Args: + filename: str: The name of the file to create. + +edit_file(start: int, end: int, content: str) -> None: + Edit a file. + It replaces lines `start` through `end` (inclusive) with the given text `content` in the open file. Remember, the file must be open before editing. + Args: + start: int: The start line number. Must be greater or equal to 1. + end: int: The end line number. Must be greater or equal to 1 AND greater than start AND less than or equal to the number of lines in the file. + content: str: The content to replace the lines with. + +search_dir(search_term: str, dir_path: str = './') -> None: + Searches for search_term in all files in dir. If dir is not provided, searches in the current directory. + Args: + search_term: str: The term to search for. + dir_path: Optional[str]: The path to the directory to search. + +search_file(search_term: str, file_path: Optional[str] = None) -> None: + Searches for search_term in file. If file is not provided, searches in the current open file. + Args: + search_term: str: The term to search for. + file_path: Optional[str]: The path to the file to search. + +find_file(file_name: str, dir_path: str = './') -> None: + Finds all files with the given name in the specified directory. + Args: + file_name: str: The name of the file to find. + dir_path: Optional[str]: The path to the directory to search. + +Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. -You should include ONLY ONE or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. +The assistant should include ONLY ONE or or in every one of the responses, unless the assistant is finished with the task or need more input or action from the user in order to proceed. IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. @@ -47,10 +88,21 @@ Here is an example of how you can interact with the environment for task solving USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000? ASSISTANT: -Sure! Let me write the Python code for starting a web server and save it to a file `app.py`: +Sure! Let me create a file first: -CODE=''' -from flask import Flask +create_file('app.py') + + +USER: +OBSERVATION: +[File: /workspace/app.py (1 lines total)] +1| +[File app.py created.] + +ASSISTANT: +Now I will write the Python code for starting a web server and save it to a file `app.py`: + +EDITED_CODE="""from flask import Flask app = Flask(__name__) @app.route('/') @@ -59,15 +111,23 @@ def index(): return str(numbers) if __name__ == '__main__': - app.run(port=5000) -''' -with open('app.py', 'w') as f: - f.write(CODE) + app.run(port=5000)""" +edit_file(start=1, end=1, content=EDITED_CODE) USER: -Observation: -[Code executed successfully with no output] +OBSERVATION: +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: @@ -76,7 +136,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1 ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log: @@ -85,7 +145,7 @@ cat server.log USER: -Observation: +OBSERVATION: Traceback (most recent call last): File "/workspace/app.py", line 2, in from flask import Flask @@ -98,7 +158,7 @@ pip install flask USER: -Observation: +OBSERVATION: Defaulting to user installation because normal site-packages is not writeable Collecting flask Using cached flask-3.0.3-py3-none-any.whl (101 kB) @@ -122,7 +182,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 124 ASSISTANT: @@ -132,7 +192,7 @@ cat server.log USER: -Observation: +OBSERVATION: * Serving Flask app 'app' * Debug mode: off WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. @@ -151,7 +211,7 @@ goto("http://127.0.0.1:5000") USER: -Observation: +OBSERVATION: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] ASSISTANT: @@ -166,52 +226,48 @@ kill 124 USER: -Observation: +OBSERVATION: [1]+ Terminated python3 app.py > server.log 2>&1 ASSISTANT: The server has been stopped. Let me open the Python file and modify it. - -open app.py - + +open_file('app.py') + USER: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return str(numbers) -9: -10:if __name__ == '__main__': -11: app.run(port=5000) +[File: /workspace/app.py (10 lines total)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) ASSISTANT: I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file: - -edit 8:8 <' + ''.join([f'{i}' for i in numbers]) + '' -EOF - + +edit_file(start=7, end=7, content=" return '' + ''.join([f'' for i in numbers]) + '
{i}
'") +
USER: Observation: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return '' + ''.join([f'' for i in numbers]) + '
{i}
' -9: -10:if __name__ == '__main__': -11: app.run(port=5000) -File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary. +[File: /workspace/app.py (10 lines total after edit)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return '' + ''.join([f'' for i in numbers]) + '
{i}
' +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: The file has been updated. Let me run the Python file again with the new changes: @@ -252,7 +308,8 @@ Once the installation is complete, we'll import `pymsgbox` and print its version ---------- OBSERVATION: -Package installed successfully +[Package installed successfully] +[Kernel restarted successfully to load the package] ---------- @@ -279,4 +336,4 @@ with open("/workspace/test.txt", "w") as f: OBSERVATION: [Code executed successfully with no output] -ENVIRONMENT REMINDER: You have 7 turns left to complete the task. \ No newline at end of file +ENVIRONMENT REMINDER: You have 7 turns left to complete the task. diff --git a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log index ed65c82dc2..208e8d4968 100644 --- a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log +++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log @@ -13,28 +13,69 @@ The assistant can browse the Internet with commands on behalf of the user by wra For example, you can browse a given URL by goto("") . The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: %pip install [package needed] and should always import packages and define variables before starting to use them. -To do any activities on GitHub, you should use the token in the $GITHUB_TOKEN environment variable. -For instance, to push a local branch `my_branch` to the github repo `owner/repo`, you can use the following four commands: +To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable. +For instance, to push a local branch `my_branch` to the github repo `owner/repo`, the assistant can use the following four commands: git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch -If you require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it for you. +If the assistant require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it. -Apart from the standard bash commands, you can also use the following special commands in environment: -open [] - opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line -goto - moves the window to show -scroll_down - moves the window down {WINDOW} lines -scroll_up - moves the window down {WINDOW} lines -create - creates and opens a new file with the given name -search_dir [] - searches for search_term in all files in dir. If dir is not provided, searches in the current directory -search_file [] - searches for search_term in file. If file is not provided, searches in the current open file -find_file [] - finds all files with the given name in dir. If dir is not provided, searches in the current directory -edit : < -EOF - replaces lines through (inclusive) with the given text in the open file. The replacement text is delineated using heredoc syntax. All of the will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again. Remember, the file must be open before editing. -Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. +Apart from the standard Python library, you can also use the following functions (already imported) in environment: +open_file(path: str, line_number: Optional[int] = None) -> None: + Opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line. + Args: + path: str: The path to the file to open. + line_number: Optional[int]: The line number to move to. + +goto_line(line_number: int) -> None: + Moves the window to show the specified line number. + Args: + line_number: int: The line number to move to. + +scroll_down() -> None: + Moves the window down by 100 lines. + Args: + None + +scroll_up() -> None: + Moves the window up by 100 lines. + Args: + None + +create_file(filename: str) -> None: + Creates and opens a new file with the given name. + Args: + filename: str: The name of the file to create. + +edit_file(start: int, end: int, content: str) -> None: + Edit a file. + It replaces lines `start` through `end` (inclusive) with the given text `content` in the open file. Remember, the file must be open before editing. + Args: + start: int: The start line number. Must be greater or equal to 1. + end: int: The end line number. Must be greater or equal to 1 AND greater than start AND less than or equal to the number of lines in the file. + content: str: The content to replace the lines with. + +search_dir(search_term: str, dir_path: str = './') -> None: + Searches for search_term in all files in dir. If dir is not provided, searches in the current directory. + Args: + search_term: str: The term to search for. + dir_path: Optional[str]: The path to the directory to search. + +search_file(search_term: str, file_path: Optional[str] = None) -> None: + Searches for search_term in file. If file is not provided, searches in the current open file. + Args: + search_term: str: The term to search for. + file_path: Optional[str]: The path to the file to search. + +find_file(file_name: str, dir_path: str = './') -> None: + Finds all files with the given name in the specified directory. + Args: + file_name: str: The name of the file to find. + dir_path: Optional[str]: The path to the directory to search. + +Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. -You should include ONLY ONE or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. +The assistant should include ONLY ONE or or in every one of the responses, unless the assistant is finished with the task or need more input or action from the user in order to proceed. IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. @@ -47,10 +88,21 @@ Here is an example of how you can interact with the environment for task solving USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000? ASSISTANT: -Sure! Let me write the Python code for starting a web server and save it to a file `app.py`: +Sure! Let me create a file first: -CODE=''' -from flask import Flask +create_file('app.py') + + +USER: +OBSERVATION: +[File: /workspace/app.py (1 lines total)] +1| +[File app.py created.] + +ASSISTANT: +Now I will write the Python code for starting a web server and save it to a file `app.py`: + +EDITED_CODE="""from flask import Flask app = Flask(__name__) @app.route('/') @@ -59,15 +111,23 @@ def index(): return str(numbers) if __name__ == '__main__': - app.run(port=5000) -''' -with open('app.py', 'w') as f: - f.write(CODE) + app.run(port=5000)""" +edit_file(start=1, end=1, content=EDITED_CODE) USER: -Observation: -[Code executed successfully with no output] +OBSERVATION: +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: @@ -76,7 +136,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1 ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log: @@ -85,7 +145,7 @@ cat server.log USER: -Observation: +OBSERVATION: Traceback (most recent call last): File "/workspace/app.py", line 2, in from flask import Flask @@ -98,7 +158,7 @@ pip install flask USER: -Observation: +OBSERVATION: Defaulting to user installation because normal site-packages is not writeable Collecting flask Using cached flask-3.0.3-py3-none-any.whl (101 kB) @@ -122,7 +182,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 124 ASSISTANT: @@ -132,7 +192,7 @@ cat server.log USER: -Observation: +OBSERVATION: * Serving Flask app 'app' * Debug mode: off WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. @@ -151,7 +211,7 @@ goto("http://127.0.0.1:5000") USER: -Observation: +OBSERVATION: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] ASSISTANT: @@ -166,52 +226,48 @@ kill 124 USER: -Observation: +OBSERVATION: [1]+ Terminated python3 app.py > server.log 2>&1 ASSISTANT: The server has been stopped. Let me open the Python file and modify it. - -open app.py - + +open_file('app.py') + USER: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return str(numbers) -9: -10:if __name__ == '__main__': -11: app.run(port=5000) +[File: /workspace/app.py (10 lines total)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) ASSISTANT: I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file: - -edit 8:8 <' + ''.join([f'{i}' for i in numbers]) + '' -EOF - + +edit_file(start=7, end=7, content=" return '' + ''.join([f'' for i in numbers]) + '
{i}
'") +
USER: Observation: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return '' + ''.join([f'' for i in numbers]) + '
{i}
' -9: -10:if __name__ == '__main__': -11: app.run(port=5000) -File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary. +[File: /workspace/app.py (10 lines total after edit)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return '' + ''.join([f'' for i in numbers]) + '
{i}
' +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: The file has been updated. Let me run the Python file again with the new changes: diff --git a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log index 5f0cb3c14b..e0c2cc69f6 100644 --- a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log +++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log @@ -13,28 +13,69 @@ The assistant can browse the Internet with commands on behalf of the user by wra For example, you can browse a given URL by goto("") . The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: %pip install [package needed] and should always import packages and define variables before starting to use them. -To do any activities on GitHub, you should use the token in the $GITHUB_TOKEN environment variable. -For instance, to push a local branch `my_branch` to the github repo `owner/repo`, you can use the following four commands: +To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable. +For instance, to push a local branch `my_branch` to the github repo `owner/repo`, the assistant can use the following four commands: git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch -If you require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it for you. +If the assistant require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it. -Apart from the standard bash commands, you can also use the following special commands in environment: -open [] - opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line -goto - moves the window to show -scroll_down - moves the window down {WINDOW} lines -scroll_up - moves the window down {WINDOW} lines -create - creates and opens a new file with the given name -search_dir [] - searches for search_term in all files in dir. If dir is not provided, searches in the current directory -search_file [] - searches for search_term in file. If file is not provided, searches in the current open file -find_file [] - finds all files with the given name in dir. If dir is not provided, searches in the current directory -edit : < -EOF - replaces lines through (inclusive) with the given text in the open file. The replacement text is delineated using heredoc syntax. All of the will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again. Remember, the file must be open before editing. -Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. +Apart from the standard Python library, you can also use the following functions (already imported) in environment: +open_file(path: str, line_number: Optional[int] = None) -> None: + Opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line. + Args: + path: str: The path to the file to open. + line_number: Optional[int]: The line number to move to. + +goto_line(line_number: int) -> None: + Moves the window to show the specified line number. + Args: + line_number: int: The line number to move to. + +scroll_down() -> None: + Moves the window down by 100 lines. + Args: + None + +scroll_up() -> None: + Moves the window up by 100 lines. + Args: + None + +create_file(filename: str) -> None: + Creates and opens a new file with the given name. + Args: + filename: str: The name of the file to create. + +edit_file(start: int, end: int, content: str) -> None: + Edit a file. + It replaces lines `start` through `end` (inclusive) with the given text `content` in the open file. Remember, the file must be open before editing. + Args: + start: int: The start line number. Must be greater or equal to 1. + end: int: The end line number. Must be greater or equal to 1 AND greater than start AND less than or equal to the number of lines in the file. + content: str: The content to replace the lines with. + +search_dir(search_term: str, dir_path: str = './') -> None: + Searches for search_term in all files in dir. If dir is not provided, searches in the current directory. + Args: + search_term: str: The term to search for. + dir_path: Optional[str]: The path to the directory to search. + +search_file(search_term: str, file_path: Optional[str] = None) -> None: + Searches for search_term in file. If file is not provided, searches in the current open file. + Args: + search_term: str: The term to search for. + file_path: Optional[str]: The path to the file to search. + +find_file(file_name: str, dir_path: str = './') -> None: + Finds all files with the given name in the specified directory. + Args: + file_name: str: The name of the file to find. + dir_path: Optional[str]: The path to the directory to search. + +Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. -You should include ONLY ONE or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. +The assistant should include ONLY ONE or or in every one of the responses, unless the assistant is finished with the task or need more input or action from the user in order to proceed. IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. @@ -47,10 +88,21 @@ Here is an example of how you can interact with the environment for task solving USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000? ASSISTANT: -Sure! Let me write the Python code for starting a web server and save it to a file `app.py`: +Sure! Let me create a file first: -CODE=''' -from flask import Flask +create_file('app.py') + + +USER: +OBSERVATION: +[File: /workspace/app.py (1 lines total)] +1| +[File app.py created.] + +ASSISTANT: +Now I will write the Python code for starting a web server and save it to a file `app.py`: + +EDITED_CODE="""from flask import Flask app = Flask(__name__) @app.route('/') @@ -59,15 +111,23 @@ def index(): return str(numbers) if __name__ == '__main__': - app.run(port=5000) -''' -with open('app.py', 'w') as f: - f.write(CODE) + app.run(port=5000)""" +edit_file(start=1, end=1, content=EDITED_CODE) USER: -Observation: -[Code executed successfully with no output] +OBSERVATION: +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: @@ -76,7 +136,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1 ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log: @@ -85,7 +145,7 @@ cat server.log USER: -Observation: +OBSERVATION: Traceback (most recent call last): File "/workspace/app.py", line 2, in from flask import Flask @@ -98,7 +158,7 @@ pip install flask USER: -Observation: +OBSERVATION: Defaulting to user installation because normal site-packages is not writeable Collecting flask Using cached flask-3.0.3-py3-none-any.whl (101 kB) @@ -122,7 +182,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 124 ASSISTANT: @@ -132,7 +192,7 @@ cat server.log USER: -Observation: +OBSERVATION: * Serving Flask app 'app' * Debug mode: off WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. @@ -151,7 +211,7 @@ goto("http://127.0.0.1:5000") USER: -Observation: +OBSERVATION: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] ASSISTANT: @@ -166,52 +226,48 @@ kill 124 USER: -Observation: +OBSERVATION: [1]+ Terminated python3 app.py > server.log 2>&1 ASSISTANT: The server has been stopped. Let me open the Python file and modify it. - -open app.py - + +open_file('app.py') + USER: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return str(numbers) -9: -10:if __name__ == '__main__': -11: app.run(port=5000) +[File: /workspace/app.py (10 lines total)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) ASSISTANT: I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file: - -edit 8:8 <' + ''.join([f'{i}' for i in numbers]) + '' -EOF - + +edit_file(start=7, end=7, content=" return '' + ''.join([f'' for i in numbers]) + '
{i}
'") +
USER: Observation: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return '' + ''.join([f'' for i in numbers]) + '
{i}
' -9: -10:if __name__ == '__main__': -11: app.run(port=5000) -File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary. +[File: /workspace/app.py (10 lines total after edit)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return '' + ''.join([f'' for i in numbers]) + '
{i}
' +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: The file has been updated. Let me run the Python file again with the new changes: @@ -237,18 +293,16 @@ Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmat ---------- -I will create a shell script named 'hello.sh' that prints 'hello' without asking for confirmation. Let me write the script for you: +Sure! I'll create the shell script `hello.sh` for you. -cat < hello.sh -#!/bin/bash -echo "hello" -EOF +echo 'echo "hello"' > hello.sh +chmod +x hello.sh ---------- OBSERVATION: -> > > + [Command -1 finished with exit code 0]] ENVIRONMENT REMINDER: You have 8 turns left to complete the task. diff --git a/tests/integration/mock/CodeActAgent/test_edits/prompt_005.log b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_003.log similarity index 53% rename from tests/integration/mock/CodeActAgent/test_edits/prompt_005.log rename to tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_003.log index f7272cc41d..3d7e3840a6 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/prompt_005.log +++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_003.log @@ -13,28 +13,69 @@ The assistant can browse the Internet with commands on behalf of the user by wra For example, you can browse a given URL by goto("") . The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block. The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: %pip install [package needed] and should always import packages and define variables before starting to use them. -To do any activities on GitHub, you should use the token in the $GITHUB_TOKEN environment variable. -For instance, to push a local branch `my_branch` to the github repo `owner/repo`, you can use the following four commands: +To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable. +For instance, to push a local branch `my_branch` to the github repo `owner/repo`, the assistant can use the following four commands: git push https://$GITHUB_TOKEN@github.com/owner/repo.git my_branch -If you require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it for you. +If the assistant require access to GitHub but $GITHUB_TOKEN is not set, ask the user to set it. -Apart from the standard bash commands, you can also use the following special commands in environment: -open [] - opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line -goto - moves the window to show -scroll_down - moves the window down {WINDOW} lines -scroll_up - moves the window down {WINDOW} lines -create - creates and opens a new file with the given name -search_dir [] - searches for search_term in all files in dir. If dir is not provided, searches in the current directory -search_file [] - searches for search_term in file. If file is not provided, searches in the current open file -find_file [] - finds all files with the given name in dir. If dir is not provided, searches in the current directory -edit : < -EOF - replaces lines through (inclusive) with the given text in the open file. The replacement text is delineated using heredoc syntax. All of the will be entered, so make sure your indentation is formatted properly. Python files will be checked for syntax errors after the edit. If the system detects a syntax error, the edit will not be executed. Simply try to edit the file again, but make sure to read the error message and modify the edit command you issue accordingly. Issuing the same command a second time will just lead to the same error message again. Remember, the file must be open before editing. -Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. +Apart from the standard Python library, you can also use the following functions (already imported) in environment: +open_file(path: str, line_number: Optional[int] = None) -> None: + Opens the file at the given path in the editor. If line_number is provided, the window will be move to include that line. + Args: + path: str: The path to the file to open. + line_number: Optional[int]: The line number to move to. + +goto_line(line_number: int) -> None: + Moves the window to show the specified line number. + Args: + line_number: int: The line number to move to. + +scroll_down() -> None: + Moves the window down by 100 lines. + Args: + None + +scroll_up() -> None: + Moves the window up by 100 lines. + Args: + None + +create_file(filename: str) -> None: + Creates and opens a new file with the given name. + Args: + filename: str: The name of the file to create. + +edit_file(start: int, end: int, content: str) -> None: + Edit a file. + It replaces lines `start` through `end` (inclusive) with the given text `content` in the open file. Remember, the file must be open before editing. + Args: + start: int: The start line number. Must be greater or equal to 1. + end: int: The end line number. Must be greater or equal to 1 AND greater than start AND less than or equal to the number of lines in the file. + content: str: The content to replace the lines with. + +search_dir(search_term: str, dir_path: str = './') -> None: + Searches for search_term in all files in dir. If dir is not provided, searches in the current directory. + Args: + search_term: str: The term to search for. + dir_path: Optional[str]: The path to the directory to search. + +search_file(search_term: str, file_path: Optional[str] = None) -> None: + Searches for search_term in file. If file is not provided, searches in the current open file. + Args: + search_term: str: The term to search for. + file_path: Optional[str]: The path to the file to search. + +find_file(file_name: str, dir_path: str = './') -> None: + Finds all files with the given name in the specified directory. + Args: + file_name: str: The name of the file to find. + dir_path: Optional[str]: The path to the directory to search. + +Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. -You should include ONLY ONE or or in every one of your responses, unless you are finished with the task or need more input or action from the user in order to proceed. +The assistant should include ONLY ONE or or in every one of the responses, unless the assistant is finished with the task or need more input or action from the user in order to proceed. IMPORTANT: Whenever possible, execute the code for the user using or or instead of providing it. @@ -47,10 +88,21 @@ Here is an example of how you can interact with the environment for task solving USER: Can you create a list of numbers from 1 to 10, and create a web page to display them at port 5000? ASSISTANT: -Sure! Let me write the Python code for starting a web server and save it to a file `app.py`: +Sure! Let me create a file first: -CODE=''' -from flask import Flask +create_file('app.py') + + +USER: +OBSERVATION: +[File: /workspace/app.py (1 lines total)] +1| +[File app.py created.] + +ASSISTANT: +Now I will write the Python code for starting a web server and save it to a file `app.py`: + +EDITED_CODE="""from flask import Flask app = Flask(__name__) @app.route('/') @@ -59,15 +111,23 @@ def index(): return str(numbers) if __name__ == '__main__': - app.run(port=5000) -''' -with open('app.py', 'w') as f: - f.write(CODE) + app.run(port=5000)""" +edit_file(start=1, end=1, content=EDITED_CODE) USER: -Observation: -[Code executed successfully with no output] +OBSERVATION: +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: @@ -76,7 +136,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1 ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log: @@ -85,7 +145,7 @@ cat server.log USER: -Observation: +OBSERVATION: Traceback (most recent call last): File "/workspace/app.py", line 2, in from flask import Flask @@ -98,7 +158,7 @@ pip install flask USER: -Observation: +OBSERVATION: Defaulting to user installation because normal site-packages is not writeable Collecting flask Using cached flask-3.0.3-py3-none-any.whl (101 kB) @@ -122,7 +182,7 @@ python3 app.py > server.log 2>&1 & USER: -Observation: +OBSERVATION: [1] 124 ASSISTANT: @@ -132,7 +192,7 @@ cat server.log USER: -Observation: +OBSERVATION: * Serving Flask app 'app' * Debug mode: off WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. @@ -151,7 +211,7 @@ goto("http://127.0.0.1:5000") USER: -Observation: +OBSERVATION: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] ASSISTANT: @@ -166,52 +226,48 @@ kill 124 USER: -Observation: +OBSERVATION: [1]+ Terminated python3 app.py > server.log 2>&1 ASSISTANT: The server has been stopped. Let me open the Python file and modify it. - -open app.py - + +open_file('app.py') + USER: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return str(numbers) -9: -10:if __name__ == '__main__': -11: app.run(port=5000) +[File: /workspace/app.py (10 lines total)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) ASSISTANT: I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file: - -edit 8:8 <' + ''.join([f'{i}' for i in numbers]) + '' -EOF - + +edit_file(start=7, end=7, content=" return '' + ''.join([f'' for i in numbers]) + '
{i}
'") +
USER: Observation: -[File: /workspace/app.py (11 lines total)] -1: -2:from flask import Flask -3:app = Flask(__name__) -4: -5:@app.route('/') -6:def index(): -7: numbers = list(range(1, 11)) -8: return '' + ''.join([f'' for i in numbers]) + '
{i}
' -9: -10:if __name__ == '__main__': -11: app.run(port=5000) -File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary. +[File: /workspace/app.py (10 lines total after edit)] +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return '' + ''.join([f'' for i in numbers]) + '
{i}
' +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] ASSISTANT: The file has been updated. Let me run the Python file again with the new changes: @@ -233,74 +289,33 @@ NOW, LET'S START! ---------- -Fix typos in bad.txt. Do not ask me for confirmation at any point. +Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point. ---------- -Let's first view the content of the file `bad.txt` to identify the typos that need to be fixed. +Sure! I'll create the shell script `hello.sh` for you. -cat bad.txt +echo 'echo "hello"' > hello.sh +chmod +x hello.sh ---------- OBSERVATION: -This is a stupid typoo. -Really? -No mor typos! -Enjoy! + [Command -1 finished with exit code 0]] ---------- -Let's correct the typos in the file `bad.txt`. I will open the file for editing and fix the typos without asking for confirmation. +The shell script `hello.sh` has been created and made executable. Let's run it to ensure it works correctly. -open bad.txt +./hello.sh ---------- OBSERVATION: -[File: /workspace/bad.txt (4 lines total)] -1:This is a stupid typoo. -2:Really? -3:No mor typos! -4:Enjoy! +hello [Command -1 finished with exit code 0]] ----------- - -I will now fix the typos in the file `bad.txt` without asking for confirmation. Let me edit the file and correct the typos. - -edit 1:3 < - ----------- - -OBSERVATION: -> > > > [File: /workspace/bad.txt (4 lines total)] -1:This is a stupid typo. -2:Really? -3:No more typos! -4:Enjoy! -File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary. -[Command -1 finished with exit code 0]] - ----------- - -The typos in the file `bad.txt` have been corrected. Let me save the changes and close the file. - -save bad.txt - - ----------- - -OBSERVATION: --bash: save: command not found -[Command -1 finished with exit code 127]] - -ENVIRONMENT REMINDER: You have 5 turns left to complete the task. +ENVIRONMENT REMINDER: You have 7 turns left to complete the task. diff --git a/tests/integration/mock/CodeActAgent/test_write_simple_script/response_001.log b/tests/integration/mock/CodeActAgent/test_write_simple_script/response_001.log index 6bfb95cd45..32afd1f026 100644 --- a/tests/integration/mock/CodeActAgent/test_write_simple_script/response_001.log +++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/response_001.log @@ -1,7 +1,5 @@ -I will create a shell script named 'hello.sh' that prints 'hello' without asking for confirmation. Let me write the script for you: +Sure! I'll create the shell script `hello.sh` for you. -cat < hello.sh -#!/bin/bash -echo "hello" -EOF +echo 'echo "hello"' > hello.sh +chmod +x hello.sh diff --git a/tests/integration/mock/CodeActAgent/test_write_simple_script/response_002.log b/tests/integration/mock/CodeActAgent/test_write_simple_script/response_002.log index cf8d2d1418..ffcfd2a17a 100644 --- a/tests/integration/mock/CodeActAgent/test_write_simple_script/response_002.log +++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/response_002.log @@ -1 +1,4 @@ -The shell script 'hello.sh' that prints 'hello' has been successfully created. If you have any more requests or need further assistance, feel free to let me know! +The shell script `hello.sh` has been created and made executable. Let's run it to ensure it works correctly. + + +./hello.sh diff --git a/tests/integration/mock/CodeActAgent/test_write_simple_script/response_003.log b/tests/integration/mock/CodeActAgent/test_write_simple_script/response_003.log new file mode 100644 index 0000000000..5a4c481ab4 --- /dev/null +++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/response_003.log @@ -0,0 +1 @@ +The shell script `hello.sh` successfully prints "hello". If you have any further tasks or questions, feel free to let me know! diff --git a/tests/unit/test_agent_skill.py b/tests/unit/test_agent_skill.py new file mode 100644 index 0000000000..f1c30cda2f --- /dev/null +++ b/tests/unit/test_agent_skill.py @@ -0,0 +1,590 @@ +import contextlib +import io + +import pytest + +from opendevin.runtime.plugins.agent_skills.agentskills import ( + create_file, + edit_file, + find_file, + goto_line, + open_file, + scroll_down, + scroll_up, + search_dir, + search_file, +) + + +def test_open_file_unexist_path(): + with pytest.raises(FileNotFoundError): + open_file('/unexist/path/a.txt') + + +def test_open_file(tmp_path): + temp_file_path = tmp_path / 'a.txt' + temp_file_path.write_text('Line 1\nLine 2\nLine 3\nLine 4\nLine 5') + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + open_file(str(temp_file_path)) + result = buf.getvalue() + assert result is not None + expected = ( + f'[File: {temp_file_path} (5 lines total)]\n' + '1|Line 1\n' + '2|Line 2\n' + '3|Line 3\n' + '4|Line 4\n' + '5|Line 5\n' + ) + assert result.split('\n') == expected.split('\n') + + +def test_open_file_with_indentation(tmp_path): + temp_file_path = tmp_path / 'a.txt' + temp_file_path.write_text('Line 1\n Line 2\nLine 3\nLine 4\nLine 5') + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + open_file(str(temp_file_path)) + result = buf.getvalue() + assert result is not None + expected = ( + f'[File: {temp_file_path} (5 lines total)]\n' + '1|Line 1\n' + '2| Line 2\n' + '3|Line 3\n' + '4|Line 4\n' + '5|Line 5\n' + ) + assert result.split('\n') == expected.split('\n') + + +def test_open_file_long(tmp_path): + temp_file_path = tmp_path / 'a.txt' + content = '\n'.join([f'Line {i}' for i in range(1, 1001)]) + temp_file_path.write_text(content) + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + open_file(str(temp_file_path)) + result = buf.getvalue() + assert result is not None + expected = f'[File: {temp_file_path} (1000 lines total)]\n' + for i in range(1, 52): + expected += f'{i}|Line {i}\n' + assert result.split('\n') == expected.split('\n') + + +def test_open_file_long_with_lineno(tmp_path): + temp_file_path = tmp_path / 'a.txt' + content = '\n'.join([f'Line {i}' for i in range(1, 1001)]) + temp_file_path.write_text(content) + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + open_file(str(temp_file_path), 100) + result = buf.getvalue() + assert result is not None + expected = f'[File: {temp_file_path} (1000 lines total)]\n' + for i in range(51, 151): + expected += f'{i}|Line {i}\n' + assert result.split('\n') == expected.split('\n') + + +def test_create_file_unexist_path(): + with pytest.raises(FileNotFoundError): + create_file('/unexist/path/a.txt') + + +def test_create_file(tmp_path): + temp_file_path = tmp_path / 'a.txt' + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + create_file(str(temp_file_path)) + result = buf.getvalue() + + expected = ( + f'[File: {temp_file_path} (1 lines total)]\n' + '1|\n' + f'[File {temp_file_path} created.]\n' + ) + assert result.split('\n') == expected.split('\n') + + +def test_goto_line(tmp_path): + temp_file_path = tmp_path / 'a.txt' + content = '\n'.join([f'Line {i}' for i in range(1, 1001)]) + temp_file_path.write_text(content) + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + open_file(str(temp_file_path)) + result = buf.getvalue() + assert result is not None + + expected = f'[File: {temp_file_path} (1000 lines total)]\n' + for i in range(1, 52): + expected += f'{i}|Line {i}\n' + assert result.split('\n') == expected.split('\n') + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + goto_line(100) + result = buf.getvalue() + assert result is not None + + expected = f'[File: {temp_file_path} (1000 lines total)]\n' + for i in range(51, 151): + expected += f'{i}|Line {i}\n' + assert result.split('\n') == expected.split('\n') + + +def test_goto_line_negative(tmp_path): + temp_file_path = tmp_path / 'a.txt' + content = '\n'.join([f'Line {i}' for i in range(1, 5)]) + temp_file_path.write_text(content) + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + open_file(str(temp_file_path)) + with pytest.raises(ValueError): + goto_line(-1) + + +def test_goto_line_out_of_bound(tmp_path): + temp_file_path = tmp_path / 'a.txt' + content = '\n'.join([f'Line {i}' for i in range(1, 5)]) + temp_file_path.write_text(content) + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + open_file(str(temp_file_path)) + with pytest.raises(ValueError): + goto_line(100) + + +def test_scroll_down(tmp_path): + temp_file_path = tmp_path / 'a.txt' + content = '\n'.join([f'Line {i}' for i in range(1, 1001)]) + temp_file_path.write_text(content) + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + open_file(str(temp_file_path)) + result = buf.getvalue() + assert result is not None + + expected = f'[File: {temp_file_path} (1000 lines total)]\n' + for i in range(1, 52): + expected += f'{i}|Line {i}\n' + assert result.split('\n') == expected.split('\n') + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + scroll_down() + result = buf.getvalue() + assert result is not None + + expected = f'[File: {temp_file_path} (1000 lines total)]\n' + for i in range(52, 152): + expected += f'{i}|Line {i}\n' + assert result.split('\n') == expected.split('\n') + + +def test_scroll_up(tmp_path): + temp_file_path = tmp_path / 'a.txt' + content = '\n'.join([f'Line {i}' for i in range(1, 1001)]) + temp_file_path.write_text(content) + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + open_file(str(temp_file_path), 300) + result = buf.getvalue() + assert result is not None + + expected = f'[File: {temp_file_path} (1000 lines total)]\n' + for i in range(251, 351): + expected += f'{i}|Line {i}\n' + assert result.split('\n') == expected.split('\n') + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + scroll_up() + result = buf.getvalue() + assert result is not None + + expected = f'[File: {temp_file_path} (1000 lines total)]\n' + for i in range(151, 251): + expected += f'{i}|Line {i}\n' + assert result.split('\n') == expected.split('\n') + + +def test_scroll_down_edge(tmp_path): + temp_file_path = tmp_path / 'a.txt' + content = '\n'.join([f'Line {i}' for i in range(1, 10)]) + temp_file_path.write_text(content) + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + open_file(str(temp_file_path)) + result = buf.getvalue() + assert result is not None + + expected = f'[File: {temp_file_path} (9 lines total)]\n' + for i in range(1, 10): + expected += f'{i}|Line {i}\n' + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + scroll_down() + result = buf.getvalue() + assert result is not None + + # expected should be unchanged + assert result.split('\n') == expected.split('\n') + + +def test_edit_file(tmp_path): + temp_file_path = tmp_path / 'a.txt' + content = 'Line 1\nLine 2\nLine 3\nLine 4\nLine 5' + temp_file_path.write_text(content) + + open_file(str(temp_file_path)) + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + edit_file(start=1, end=3, content='REPLACE TEXT') + result = buf.getvalue() + expected = ( + f'[File: {temp_file_path} (3 lines total after edit)]\n' + '1|REPLACE TEXT\n' + '2|Line 4\n' + '3|Line 5\n' + '[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]\n' + ) + assert result.split('\n') == expected.split('\n') + + with open(temp_file_path, 'r') as file: + lines = file.readlines() + assert len(lines) == 3 + assert lines[0].rstrip() == 'REPLACE TEXT' + assert lines[1].rstrip() == 'Line 4' + assert lines[2].rstrip() == 'Line 5' + + +def test_edit_file_from_scratch(tmp_path): + temp_file_path = tmp_path / 'a.txt' + create_file(str(temp_file_path)) + open_file(str(temp_file_path)) + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + edit_file(start=1, end=1, content='REPLACE TEXT') + result = buf.getvalue() + expected = ( + f'[File: {temp_file_path} (1 lines total after edit)]\n' + '1|REPLACE TEXT\n' + '[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]\n' + ) + assert result.split('\n') == expected.split('\n') + + with open(temp_file_path, 'r') as file: + lines = file.readlines() + assert len(lines) == 1 + assert lines[0].rstrip() == 'REPLACE TEXT' + + +def test_edit_file_from_scratch_multiline(tmp_path): + temp_file_path = tmp_path / 'a.txt' + create_file(str(temp_file_path)) + open_file(temp_file_path) + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + edit_file( + start=1, + end=1, + content='REPLACE TEXT1\nREPLACE TEXT2\nREPLACE TEXT3', + ) + result = buf.getvalue() + expected = ( + f'[File: {temp_file_path} (3 lines total after edit)]\n' + '1|REPLACE TEXT1\n' + '2|REPLACE TEXT2\n' + '3|REPLACE TEXT3\n' + '[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]\n' + ) + assert result.split('\n') == expected.split('\n') + + with open(temp_file_path, 'r') as file: + lines = file.readlines() + assert len(lines) == 3 + assert lines[0].rstrip() == 'REPLACE TEXT1' + assert lines[1].rstrip() == 'REPLACE TEXT2' + assert lines[2].rstrip() == 'REPLACE TEXT3' + + +def test_edit_file_not_opened(): + with pytest.raises(FileNotFoundError): + edit_file(start=1, end=3, content='REPLACE TEXT') + + +def test_search_dir(tmp_path): + # create files with the search term "bingo" + for i in range(1, 101): + temp_file_path = tmp_path / f'a{i}.txt' + with open(temp_file_path, 'w') as file: + file.write('Line 1\nLine 2\nLine 3\nLine 4\nLine 5\n') + if i == 50: + file.write('bingo') + + # test + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + search_dir('bingo', str(tmp_path)) + result = buf.getvalue() + assert result is not None + + expected = ( + f'[Found 1 matches for "bingo" in {tmp_path}]\n' + f'{tmp_path}/a50.txt (Line 6): bingo\n' + f'[End of matches for "bingo" in {tmp_path}]\n' + ) + assert result.split('\n') == expected.split('\n') + + +def test_search_dir_not_exist_term(tmp_path): + # create files with the search term "bingo" + for i in range(1, 101): + temp_file_path = tmp_path / f'a{i}.txt' + with open(temp_file_path, 'w') as file: + file.write('Line 1\nLine 2\nLine 3\nLine 4\nLine 5\n') + + # test + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + search_dir('non-exist', str(tmp_path)) + result = buf.getvalue() + assert result is not None + + expected = f'No matches found for "non-exist" in {tmp_path}\n' + assert result.split('\n') == expected.split('\n') + + +def test_search_dir_too_much_match(tmp_path): + # create files with the search term "Line 5" + for i in range(1, 1000): + temp_file_path = tmp_path / f'a{i}.txt' + with open(temp_file_path, 'w') as file: + file.write('Line 1\nLine 2\nLine 3\nLine 4\nLine 5\n') + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + search_dir('Line 5', str(tmp_path)) + result = buf.getvalue() + assert result is not None + + expected = f'More than 999 files matched for "Line 5" in {tmp_path}. Please narrow your search.\n' + assert result.split('\n') == expected.split('\n') + + +def test_search_dir_cwd(tmp_path, monkeypatch): + # Using pytest's monkeypatch to change directory without affecting other tests + monkeypatch.chdir(tmp_path) + # create files with the search term "bingo" + for i in range(1, 101): + temp_file_path = tmp_path / f'a{i}.txt' + with open(temp_file_path, 'w') as file: + file.write('Line 1\nLine 2\nLine 3\nLine 4\nLine 5\n') + if i == 50: + file.write('bingo') + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + search_dir('bingo') + result = buf.getvalue() + assert result is not None + + expected = ( + '[Found 1 matches for "bingo" in ./]\n' + './a50.txt (Line 6): bingo\n' + '[End of matches for "bingo" in ./]\n' + ) + assert result.split('\n') == expected.split('\n') + + +def test_search_file(tmp_path): + temp_file_path = tmp_path / 'a.txt' + temp_file_path.write_text('Line 1\nLine 2\nLine 3\nLine 4\nLine 5') + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + search_file('Line 5', str(temp_file_path)) + result = buf.getvalue() + assert result is not None + expected = f'[Found 1 matches for "Line 5" in {temp_file_path}]\n' + expected += 'Line 5: Line 5\n' + expected += f'[End of matches for "Line 5" in {temp_file_path}]\n' + assert result.split('\n') == expected.split('\n') + + +def test_search_file_not_exist_term(tmp_path): + temp_file_path = tmp_path / 'a.txt' + temp_file_path.write_text('Line 1\nLine 2\nLine 3\nLine 4\nLine 5') + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + search_file('Line 6', str(temp_file_path)) + result = buf.getvalue() + assert result is not None + + expected = f'[No matches found for "Line 6" in {temp_file_path}]\n' + assert result.split('\n') == expected.split('\n') + + +def test_search_file_not_exist_file(): + with pytest.raises(FileNotFoundError): + search_file('Line 6', '/unexist/path/a.txt') + + +def test_find_file(tmp_path): + temp_file_path = tmp_path / 'a.txt' + temp_file_path.write_text('Line 1\nLine 2\nLine 3\nLine 4\nLine 5') + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + find_file('a.txt', str(tmp_path)) + result = buf.getvalue() + assert result is not None + + expected = f'[Found 1 matches for "a.txt" in {tmp_path}]\n' + expected += f'{tmp_path}/a.txt\n' + expected += f'[End of matches for "a.txt" in {tmp_path}]\n' + assert result.split('\n') == expected.split('\n') + + +def test_find_file_cwd(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + temp_file_path = tmp_path / 'a.txt' + temp_file_path.write_text('Line 1\nLine 2\nLine 3\nLine 4\nLine 5') + + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + find_file('a.txt') + result = buf.getvalue() + assert result is not None + + +def test_find_file_not_exist_file(): + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + find_file('unexist.txt') + result = buf.getvalue() + assert result is not None + + expected = '[No matches found for "unexist.txt" in ./]\n' + assert result.split('\n') == expected.split('\n') + + +def test_find_file_not_exist_file_specific_path(tmp_path): + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + find_file('unexist.txt', str(tmp_path)) + result = buf.getvalue() + assert result is not None + + expected = f'[No matches found for "unexist.txt" in {tmp_path}]\n' + assert result.split('\n') == expected.split('\n') + + +def test_edit_lint_file_pass(tmp_path, monkeypatch): + # Create a Python file with correct syntax + file_path = tmp_path / 'test_file.py' + file_path.write_text('\n') + + # patch ENABLE_AUTO_LINT + monkeypatch.setattr( + 'opendevin.runtime.plugins.agent_skills.agentskills.ENABLE_AUTO_LINT', True + ) + + # Test linting functionality + with io.StringIO() as buf: + with contextlib.redirect_stdout(buf): + open_file(str(file_path)) + edit_file(1, 1, "print('hello')\n") + result = buf.getvalue() + + assert result is not None + expected = ( + f'[File: {file_path} (1 lines total)]\n' + '1|\n' + f'[File: {file_path} (2 lines total after edit)]\n' + "1|print('hello')\n" + '2|\n' + '[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]\n' + ) + assert result.split('\n') == expected.split('\n') + + +def test_lint_file_fail_undefined_name(tmp_path, monkeypatch, capsys): + # Create a Python file with a syntax error + file_path = tmp_path / 'test_file.py' + file_path.write_text('\n') + + # Set environment variable to enable linting + monkeypatch.setattr( + 'opendevin.runtime.plugins.agent_skills.agentskills.ENABLE_AUTO_LINT', True + ) + + open_file(str(file_path)) + edit_file(1, 1, 'undefined_name()\n') + + result = capsys.readouterr().out + print(result) + + assert result is not None + expected = ( + f'[File: {file_path} (1 lines total)]\n' + '1|\n' + '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n' + 'ERRORS:\n' + f"{file_path}:1:1: F821 undefined name 'undefined_name'\n" + '[This is how your edit would have looked if applied]\n' + '-------------------------------------------------\n' + '1|undefined_name()\n' + '2|\n' + '-------------------------------------------------\n\n' + '[This is the original code before your edit]\n' + '-------------------------------------------------\n' + '1|\n' + '-------------------------------------------------\n' + ) + assert result.split('\n') == expected.split('\n') + + +def test_lint_file_disabled_undefined_name(tmp_path, monkeypatch, capsys): + # Create a Python file with a syntax error + file_path = tmp_path / 'test_file.py' + file_path.write_text('\n') + + # Set environment variable to enable linting + monkeypatch.setattr( + 'opendevin.runtime.plugins.agent_skills.agentskills.ENABLE_AUTO_LINT', False + ) + + open_file(str(file_path)) + edit_file(1, 1, 'undefined_name()\n') + + result = capsys.readouterr().out + assert result is not None + expected = ( + f'[File: {file_path} (1 lines total)]\n' + '1|\n' + f'[File: {file_path} (2 lines total after edit)]\n' + '1|undefined_name()\n' + '2|\n' + '[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]\n' + ) + assert result.split('\n') == expected.split('\n')