stop printing

allow specifying max iter in cmdline script
fix issue for CodeActSWEAgent
2026-04-29 03:00:45 -04:00 · 2024-05-28 23:01:35 +08:00 · 2024-05-28 22:42:30 +08:00 · 2024-05-28 22:26:49 +08:00 · 2024-05-28 22:25:56 +08:00 · 2024-05-28 22:20:12 +08:00
14 changed files with 1017 additions and 74 deletions
--- a/agenthub/init.py
+++ b/agenthub/init.py
@@ -12,6 +12,7 @@ from . import (  # noqa: E402
    SWE_agent,
    browsing_agent,
    codeact_agent,
+    codeact_swe_agent,
    delegator_agent,
    dummy_agent,
    monologue_agent,
@@ -21,6 +22,7 @@ from . import (  # noqa: E402
 __all__ = [
    'monologue_agent',
    'codeact_agent',
+    'codeact_swe_agent',
    'planner_agent',
    'SWE_agent',
    'delegator_agent',
--- a/agenthub/codeact_agent/codeact_agent.py
+++ b/agenthub/codeact_agent/codeact_agent.py
@@ -105,6 +105,18 @@ def truncate_observation(observation: str, max_chars: int = 10_000) -> str:
    )


+# FIXME: We can tweak these two settings to create MicroAgents specialized toward different area
+def get_system_message() -> str:
+    if ENABLE_GITHUB:
+        return f'{SYSTEM_PREFIX}\n{GITHUB_MESSAGE}\n\n{COMMAND_DOCS}\n\n{SYSTEM_SUFFIX}'
+    else:
+        return f'{SYSTEM_PREFIX}\n\n{COMMAND_DOCS}\n\n{SYSTEM_SUFFIX}'
+
+
+def get_in_context_example() -> str:
+    return EXAMPLES
+
+
 class CodeActAgent(Agent):
    VERSION = '1.5'
    """
@@ -152,11 +164,8 @@ class CodeActAgent(Agent):
    ]
    jupyter_kernel_init_code: str = 'from agentskills import *'

-    system_message: str = (
-        f'{SYSTEM_PREFIX}\n{GITHUB_MESSAGE}\n\n{COMMAND_DOCS}\n\n{SYSTEM_SUFFIX}'
-        if ENABLE_GITHUB
-        else f'{SYSTEM_PREFIX}\n\n{COMMAND_DOCS}\n\n{SYSTEM_SUFFIX}'
-    )
+    system_message: str = get_system_message()
+    in_context_example: str = f"Here is an example of how you can interact with the environment for task solving:\n{get_in_context_example()}\n\nNOW, LET'S START!"

    def __init__(
        self,
@@ -194,10 +203,7 @@ class CodeActAgent(Agent):
        """
        messages: list[dict[str, str]] = [
            {'role': 'system', 'content': self.system_message},
-            {
-                'role': 'user',
-                'content': f"Here is an example of how you can interact with the environment for task solving:\n{EXAMPLES}\n\nNOW, LET'S START!",
-            },
+            {'role': 'user', 'content': self.in_context_example},
        ]

        for prev_action, obs in state.history:
--- a/agenthub/codeact_agent/prompt.py
+++ b/agenthub/codeact_agent/prompt.py
@@ -8,17 +8,23 @@ COMMAND_DOCS = (
    "Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line '        print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run."
 )

-SYSTEM_PREFIX = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
+# ======= SYSTEM MESSAGE =======
+MINIMAL_SYSTEM_PREFIX = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
 The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute_ipython>" tag, for example:
 <execute_ipython>
 print("Hello World!")
 </execute_ipython>
 The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
 For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
-The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
+"""
+
+BROWSING_PREFIX = """The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
 For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
 The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
-The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them."""
+"""
+PIP_INSTALL_PREFIX = """The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them."""
+
+SYSTEM_PREFIX = MINIMAL_SYSTEM_PREFIX + BROWSING_PREFIX + PIP_INSTALL_PREFIX

 GITHUB_MESSAGE = """To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable.
 For instance, to push a local branch `my_branch` to the github repo `owner/repo`, the assistant can use the following four commands:
@@ -30,6 +36,8 @@ The assistant should include ONLY ONE <execute_ipython> or <execute_bash> or <ex
 IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.
 """

+
+# ======= EXAMPLE MESSAGE =======
 EXAMPLES = """
 --- START OF EXAMPLE ---

--- a/agenthub/codeact_swe_agent/README.md
+++ b/agenthub/codeact_swe_agent/README.md
@@ -0,0 +1,7 @@
+# CodeAct (SWE Edit Specialized)
+
+This agent is an adaptation of the original [SWE Agent](https://swe-agent.com/) based on CodeAct using the `agentskills` library of OpenDevin.
+
+It is intended use is **solving Github issues**.
+
+It removes web-browsing and Github capability from the original CodeAct agent to avoid confusion to the agent.
--- a/agenthub/codeact_swe_agent/init.py
+++ b/agenthub/codeact_swe_agent/init.py
@@ -0,0 +1,5 @@
+from opendevin.controller.agent import Agent
+
+from .codeact_swe_agent import CodeActSWEAgent
+
+Agent.register('CodeActSWEAgent', CodeActSWEAgent)
--- a/agenthub/codeact_swe_agent/codeact_swe_agent.py
+++ b/agenthub/codeact_swe_agent/codeact_swe_agent.py
@@ -0,0 +1,246 @@
+import re
+
+from agenthub.codeact_swe_agent.prompt import (
+    COMMAND_DOCS,
+    MINIMAL_SYSTEM_PREFIX,
+    SWE_EXAMPLE,
+    SYSTEM_SUFFIX,
+)
+from opendevin.controller.agent import Agent
+from opendevin.controller.state.state import State
+from opendevin.events.action import (
+    Action,
+    AgentFinishAction,
+    BrowseInteractiveAction,
+    CmdRunAction,
+    IPythonRunCellAction,
+    MessageAction,
+)
+from opendevin.events.observation import (
+    BrowserOutputObservation,
+    CmdOutputObservation,
+    IPythonRunCellObservation,
+)
+from opendevin.llm.llm import LLM
+from opendevin.runtime.plugins import (
+    AgentSkillsRequirement,
+    JupyterRequirement,
+    PluginRequirement,
+)
+
+
+def parse_response(response) -> str:
+    action = response.choices[0].message.content
+    for lang in ['bash', 'ipython', 'browse']:
+        if f'<execute_{lang}>' in action and f'</execute_{lang}>' not in action:
+            action += f'</execute_{lang}>'
+    return action
+
+
+def action_to_str(action: Action) -> str:
+    if isinstance(action, CmdRunAction):
+        return f'{action.thought}\n<execute_bash>\n{action.command}\n</execute_bash>'
+    elif isinstance(action, IPythonRunCellAction):
+        return f'{action.thought}\n<execute_ipython>\n{action.code}\n</execute_ipython>'
+    elif isinstance(action, BrowseInteractiveAction):
+        return f'{action.thought}\n<execute_browse>\n{action.browser_actions}\n</execute_browse>'
+    elif isinstance(action, MessageAction):
+        return action.content
+    return ''
+
+
+def get_action_message(action: Action) -> dict[str, str] | None:
+    if (
+        isinstance(action, BrowseInteractiveAction)
+        or isinstance(action, CmdRunAction)
+        or isinstance(action, IPythonRunCellAction)
+        or isinstance(action, MessageAction)
+    ):
+        return {
+            'role': 'user' if action.source == 'user' else 'assistant',
+            'content': action_to_str(action),
+        }
+    return None
+
+
+def get_observation_message(obs) -> dict[str, str] | None:
+    if isinstance(obs, CmdOutputObservation):
+        content = 'OBSERVATION:\n' + truncate_observation(obs.content)
+        content += (
+            f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]]'
+        )
+        return {'role': 'user', 'content': content}
+    elif isinstance(obs, IPythonRunCellObservation):
+        content = 'OBSERVATION:\n' + obs.content
+        # replace base64 images with a placeholder
+        splitted = content.split('\n')
+        for i, line in enumerate(splitted):
+            if '![image](data:image/png;base64,' in line:
+                splitted[i] = (
+                    '![image](data:image/png;base64, ...) already displayed to user'
+                )
+        content = '\n'.join(splitted)
+        content = truncate_observation(content)
+        return {'role': 'user', 'content': content}
+    elif isinstance(obs, BrowserOutputObservation):
+        content = 'OBSERVATION:\n' + truncate_observation(obs.content)
+        return {'role': 'user', 'content': content}
+    return None
+
+
+def truncate_observation(observation: str, max_chars: int = 10_000) -> str:
+    """
+    Truncate the middle of the observation if it is too long.
+    """
+    if len(observation) <= max_chars:
+        return observation
+    half = max_chars // 2
+    return (
+        observation[:half]
+        + '\n[... Observation truncated due to length ...]\n'
+        + observation[-half:]
+    )
+
+
+def get_system_message() -> str:
+    return f'{MINIMAL_SYSTEM_PREFIX}\n\n{COMMAND_DOCS}\n\n{SYSTEM_SUFFIX}'
+
+
+def get_in_context_example() -> str:
+    return SWE_EXAMPLE
+
+
+class CodeActSWEAgent(Agent):
+    VERSION = '1.5'
+    """
+    This agent is an adaptation of the original [SWE Agent](https://swe-agent.com/) based on CodeAct 1.5 using the `agentskills` library of OpenDevin.
+
+    It is intended use is **solving Github issues**.
+
+    It removes web-browsing and Github capability from the original CodeAct agent to avoid confusion to the agent.
+    """
+
+    sandbox_plugins: list[PluginRequirement] = [
+        # NOTE: AgentSkillsRequirement need to go before JupyterRequirement, since
+        # AgentSkillsRequirement provides a lot of Python functions
+        # and it need to be initialized before Jupyter for Jupyter to use those functions.
+        AgentSkillsRequirement(),
+        JupyterRequirement(),
+    ]
+    jupyter_kernel_init_code: str = 'from agentskills import *'
+
+    system_message: str = get_system_message()
+    in_context_example: str = f"Here is an example of how you can interact with the environment for task solving:\n{get_in_context_example()}\n\nNOW, LET'S START!"
+
+    def __init__(
+        self,
+        llm: LLM,
+    ) -> None:
+        """
+        Initializes a new instance of the CodeActAgent class.
+
+        Parameters:
+        - llm (LLM): The llm to be used by this agent
+        """
+        super().__init__(llm)
+        self.reset()
+
+    def reset(self) -> None:
+        """
+        Resets the CodeAct Agent.
+        """
+        super().reset()
+
+    def step(self, state: State) -> Action:
+        """
+        Performs one step using the CodeAct Agent.
+        This includes gathering info on previous steps and prompting the model to make a command to execute.
+
+        Parameters:
+        - state (State): used to get updated info and background commands
+
+        Returns:
+        - CmdRunAction(command) - bash command to run
+        - IPythonRunCellAction(code) - IPython code to run
+        - BrowseInteractiveAction(browsergym_command) - BrowserGym commands to run
+        - MessageAction(content) - Message action to run (e.g. ask for clarification)
+        - AgentFinishAction() - end the interaction
+        """
+        messages: list[dict[str, str]] = [
+            {'role': 'system', 'content': self.system_message},
+            {'role': 'user', 'content': self.in_context_example},
+        ]
+
+        for prev_action, obs in state.history:
+            action_message = get_action_message(prev_action)
+            if action_message:
+                messages.append(action_message)
+
+            obs_message = get_observation_message(obs)
+            if obs_message:
+                messages.append(obs_message)
+
+        latest_user_message = [m for m in messages if m['role'] == 'user'][-1]
+        if latest_user_message:
+            if latest_user_message['content'].strip() == '/exit':
+                return AgentFinishAction()
+            latest_user_message['content'] += (
+                f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task.'
+            )
+
+        response = self.llm.do_completion(
+            messages=messages,
+            stop=[
+                '</execute_ipython>',
+                '</execute_bash>',
+                '</execute_browse>',
+            ],
+            temperature=0.0,
+        )
+
+        action_str: str = parse_response(response)
+        state.num_of_chars += sum(
+            len(message['content']) for message in messages
+        ) + len(action_str)
+
+        if finish_command := re.search(r'<finish>.*</finish>', action_str, re.DOTALL):
+            thought = action_str.replace(finish_command.group(0), '').strip()
+            return AgentFinishAction(thought=thought)
+        if bash_command := re.search(
+            r'<execute_bash>(.*?)</execute_bash>', action_str, re.DOTALL
+        ):
+            # remove the command from the action string to get thought
+            thought = action_str.replace(bash_command.group(0), '').strip()
+            # a command was found
+            command_group = bash_command.group(1).strip()
+
+            if command_group.strip() == 'exit':
+                return AgentFinishAction()
+            return CmdRunAction(command=command_group, thought=thought)
+        elif python_code := re.search(
+            r'<execute_ipython>(.*?)</execute_ipython>', action_str, re.DOTALL
+        ):
+            # a code block was found
+            code_group = python_code.group(1).strip()
+            thought = action_str.replace(python_code.group(0), '').strip()
+            return IPythonRunCellAction(
+                code=code_group,
+                thought=thought,
+                kernel_init_code=self.jupyter_kernel_init_code,
+            )
+        elif browse_command := re.search(
+            r'<execute_browse>(.*)</execute_browse>', action_str, re.DOTALL
+        ):
+            # BrowserGym actions was found
+            browse_actions = browse_command.group(1).strip()
+            thought = action_str.replace(browse_command.group(0), '').strip()
+            return BrowseInteractiveAction(
+                browser_actions=browse_actions, thought=thought
+            )
+        else:
+            # We assume the LLM is GOOD enough that when it returns pure natural language
+            # it want to talk to the user
+            return MessageAction(content=action_str, wait_for_response=True)
+
+    def search_memory(self, query: str) -> list[str]:
+        raise NotImplementedError('Implement this abstract method')
--- a/agenthub/codeact_swe_agent/prompt.py
+++ b/agenthub/codeact_swe_agent/prompt.py
@@ -0,0 +1,451 @@
+from opendevin.runtime.plugins import AgentSkillsRequirement
+
+_AGENT_SKILLS_DOCS = AgentSkillsRequirement.documentation
+
+COMMAND_DOCS = (
+    '\nApart from the standard Python library, the assistant can also use the following functions (already imported) in <execute_ipython> environment:\n'
+    f'{_AGENT_SKILLS_DOCS}'
+    "Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line '        print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run."
+)
+
+# ======= SYSTEM MESSAGE =======
+MINIMAL_SYSTEM_PREFIX = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
+The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute_ipython>" tag, for example:
+<execute_ipython>
+print("Hello World!")
+</execute_ipython>
+The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
+For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
+"""
+
+SYSTEM_SUFFIX = """The assistant's response should be concise.
+The assistant should include ONLY ONE <execute_ipython> or <execute_bash> or <execute_browse> in every one of the responses, unless the assistant is finished with the task or need more input or action from the user in order to proceed.
+IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.
+"""
+
+SWE_EXAMPLE = """
+--- START OF EXAMPLE ---
+
+USER: We are currently solving the following issue within our repository. Here is the issue text:
+--- BEGIN ISSUE ---
+TimeDelta serialization precision
+Hi there!
+
+I just found quite strange behaviour of `TimeDelta` field serialization
+
+```python3
+from marshmallow.fields import TimeDelta
+from datetime import timedelta
+
+td_field = TimeDelta(precision="milliseconds")
+
+obj = dict()
+obj["td_field"] = timedelta(milliseconds=345)
+
+print(td_field.serialize("td_field", obj))
+```
+
+Output of this snippet is `344`, but it seems that `345` is correct.
+
+Looks like a rounding issue here: https://github.com/marshmallow-code/marshmallow/blob/dev/src/marshmallow/fields.py#L1474
+--- END ISSUE ---
+
+[Current directory: /workspace/marshmallow-code__marshmallow]
+
+ASSISTANT:
+Let's first start by reproducing the results of the issue. The issue includes some example code for reproduction, which we can use. We'll create a new file called `reproduce.py` and paste the example code into it.
+<execute_ipython>
+create_file('/workspace/marshmallow-code__marshmallow/reproduce.py')
+</execute_ipython>
+
+USER:
+OBSERVATION:
+[File: /workspace/marshmallow-code__marshmallow/reproduce.py (1 lines total)]
+1|
+[File /workspace/marshmallow-code__marshmallow/reproduce.py created.]
+
+ASSISTANT:
+Now let's paste in the example code from the issue.
+<execute_ipython>
+EDITED_CODE = \"\"\"from marshmallow.fields import TimeDelta
+from datetime import timedelta
+
+td_field = TimeDelta(precision="milliseconds")
+
+obj = dict()
+obj["td_field"] = timedelta(milliseconds=345)
+
+print(td_field.serialize("td_field", obj))
+\"\"\"
+edit_file(start=1, end=1, content=EDITED_CODE)
+</execute_ipython>
+
+USER:
+OBSERVATION:
+[File: /workspace/marshmallow-code__marshmallow/reproduce.py (9 lines total)]
+1|from marshmallow.fields import TimeDelta
+2|from datetime import timedelta
+3|
+4|td_field = TimeDelta(precision="milliseconds")
+5|
+6|obj = dict()
+7|obj["td_field"] = timedelta(milliseconds=345)
+8|
+9|print(td_field.serialize("td_field", obj))
+[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
+
+ASSISTANT:
+Now let's run the code to see if we see the same output as the issue.
+<execute_bash>
+python reproduce.py
+</execute_bash>
+
+USER:
+OBSERVATION:
+344
+
+ASSISTANT:
+We are indeed seeing the same output as the issue. The issue suggests that we should look at line 1474 of the `fields.py` file to see if there is a rounding issue. Let's open the `fields.py` file and navigate to line 1474 to see the relevant code. First we should find the `fields.py` file in the repository. It is likely to be in the `src/marshmallow` directory, but we should check to be sure.
+<execute_bash>
+ls -F
+</execute_bash>
+
+USER:
+OBSERVATION:
+AUTHORS.rst
+CHANGELOG.rst
+CODE_OF_CONDUCT.md
+CONTRIBUTING.rst
+LICENSE
+MANIFEST.in
+NOTICE
+README.rst
+RELEASING.md
+azure-pipelines.yml
+docs/
+examples/
+performance/
+pyproject.toml
+reproduce.py
+setup.cfg
+setup.py
+src/
+tests/
+tox.ini
+
+ASSISTANT:
+It looks like the `src` directory is present, which suggests that the `fields.py` file is likely to be in the `src` directory. Let's use find_file to see where it is.
+<execute_ipython>
+find_file("fields.py", "/workspace/marshmallow-code__marshmallow/src")
+</execute_ipython>
+
+USER:
+OBSERVATION:
+[Found 1 matches for "fields.py" in /workspace/marshmallow-code__marshmallow/src]
+/workspace/marshmallow-code__marshmallow/src/marshmallow/fields.py
+
+ASSISTANT:
+It looks like the `fields.py` file is present in the `./src/marshmallow/` directory. The issue also points to a specific URL with line number 1474. We should navigate to that line in fields.py to see the relevant code for the `TimeDelta` serialization.
+<execute_ipython>
+open_file("/workspace/marshmallow-code__marshmallow/src/marshmallow/fields.py", 1474)
+</execute_ipython>
+
+USER:
+OBSERVATION:
+[File: /workspace/marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)]
+(1458 more lines above)
+1459|            self.WEEKS,
+1460|        )
+1461|
+1462|        if precision not in units:
+1463|            msg = 'The precision must be {} or "{}".'.format(
+1464|                ", ".join([f'"{each}"' for each in units[:-1]]), units[-1]
+1465|            )
+1466|            raise ValueError(msg)
+1467|
+1468|        self.precision = precision
+1469|        super().__init__(**kwargs)
+1470|
+1471|    def _serialize(self, value, attr, obj, **kwargs):
+1472|        if value is None:
+1473|            return None
+1474|        base_unit = dt.timedelta(**{self.precision: 1})
+1475|        return int(value.total_seconds() / base_unit.total_seconds())
+1476|
+1477|    def _deserialize(self, value, attr, data, **kwargs):
+1478|        try:
+1479|            value = int(value)
+1480|        except (TypeError, ValueError) as error:
+1481|            raise self.make_error("invalid") from error
+1482|
+1483|        kwargs = {self.precision: value}
+1484|
+1485|        try:
+1486|            return dt.timedelta(**kwargs)
+1487|        except OverflowError as error:
+1488|            raise self.make_error("invalid") from error
+1489|
+1490|
+1491|class Mapping(Field):
+1492|    \"\"\"An abstract class for objects with key-value pairs.
+1493|
+1494|    :param keys: A field class or instance for dict keys.
+1495|    :param values: A field class or instance for dict values.
+1496|    :param kwargs: The same keyword arguments that :class:`Field` receives.
+1497|
+1498|    .. note::
+1499|        When the structure of nested data is not known, you may omit the
+1500|        `keys` and `values` arguments to prevent content validation.
+1501|
+1502|    .. versionadded:: 3.0.0rc4
+1503|    \"\"\"
+1504|
+1505|    mapping_type = dict
+1506|
+1507|    #: Default error messages.
+1508|    default_error_messages = {"invalid": "Not a valid mapping type."}
+1509|
+1510|    def __init__(
+1511|        self,
+1512|        keys: typing.Optional[typing.Union[Field, type]] = None,
+1513|        values: typing.Optional[typing.Union[Field, type]] = None,
+1514|        **kwargs,
+1515|    ):
+1516|        super().__init__(**kwargs)
+1517|        if keys is None:
+1518|            self.key_field = None
+1519|        else:
+1520|            try:
+1521|                self.key_field = resolve_field_instance(keys)
+1522|            except FieldInstanceResolutionError as error:
+1523|                raise ValueError(
+1524|                    '"keys" must be a subclass or instance of '
+1525|                    "marshmallow.base.FieldABC."
+1526|                ) from error
+1527|
+1528|        if values is None:
+1529|            self.value_field = None
+1530|        else:
+1531|            try:
+1532|                self.value_field = resolve_field_instance(values)
+1533|            except FieldInstanceResolutionError as error:
+1534|                raise ValueError(
+1535|                    '"values" must be a subclass or instance of '
+1536|                    "marshmallow.base.FieldABC."
+1537|                ) from error
+1538|            if isinstance(self.value_field, Nested):
+1539|                self.only = self.value_field.only
+1540|                self.exclude = self.value_field.exclude
+1541|
+1542|    def _bind_to_schema(self, field_name, schema):
+1543|        super()._bind_to_schema(field_name, schema)
+1544|        if self.value_field:
+1545|            self.value_field = copy.deepcopy(self.value_field)
+1546|            self.value_field._bind_to_schema(field_name, self)
+1547|        if isinstance(self.value_field, Nested):
+1548|            self.value_field.only = self.only
+1549|            self.value_field.exclude = self.exclude
+1550|        if self.key_field:
+1551|            self.key_field = copy.deepcopy(self.key_field)
+1552|            self.key_field._bind_to_schema(field_name, self)
+1553|
+1554|    def _serialize(self, value, attr, obj, **kwargs):
+1555|        if value is None:
+1556|            return None
+1557|        if not self.value_field and not self.key_field:
+1558|            return self.mapping_type(value)
+(439 more lines below)
+
+ASSISTANT:
+We are now looking at the relevant section of the `fields.py` file where the `TimeDelta` serialization occurs. The issue suggests that there is a rounding problem near line 1474. The current implementation uses integer division, which truncates the decimal part and can lead to incorrect rounding on line 1475. To fix this issue, we should ensure that the division results in a float, and then round the result to the nearest integer before converting it to an int. This should preserve the milliseconds precision as expected. Let's make the necessary edit to the code.
+<execute_ipython>
+edit_file(start=1475, end=1475, content="return int(round(value.total_seconds() / base_unit.total_seconds()))  # round to nearest int")
+</execute_ipython>
+
+USER:
+OBSERVATION:
+[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]
+
+ERRORS:
+- E999 IndentationError: unexpected indent
+
+[This is how your edit would have looked if applied]
+-------------------------------------------------
+[File: /marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)]
+(1468 more lines above)
+1469|        super().__init__(**kwargs)
+1470|
+1471|    def _serialize(self, value, attr, obj, **kwargs):
+1472|        if value is None:
+1473|            return None
+1474|        base_unit = dt.timedelta(**{self.precision: 1})
+1475|return int(round(value.total_seconds() / base_unit.total_seconds()))  # round to nearest int
+1476|
+1477|    def _deserialize(self, value, attr, data, **kwargs):
+1478|        try:
+1479|            value = int(value)
+(518 more lines below)
+-------------------------------------------------
+
+[This is the original code before your edit]
+-------------------------------------------------
+[File: /marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)]
+(1469 more lines above)
+1470|
+1471|    def _serialize(self, value, attr, obj, **kwargs):
+1472|        if value is None:
+1473|            return None
+1474|        base_unit = dt.timedelta(**{self.precision: 1})
+1475|        return int(value.total_seconds() / base_unit.total_seconds())
+1476|
+1477|    def _deserialize(self, value, attr, data, **kwargs):
+1478|        try:
+1479|            value = int(value)
+1480|        except (TypeError, ValueError) as error:
+(517 more lines below)
+-------------------------------------------------
+Your changes have NOT been applied. Please fix your edit command and try again.
+You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.
+DO NOT re-run the same failed edit command. Running it again will lead to the same error.
+
+ASSISTANT:
+My edit command did not use the proper indentation, I will fix my syntax in this follow up edit command.
+<execute_ipython>
+EDITED_CODE="        return int(round(value.total_seconds() / base_unit.total_seconds()))  # round to nearest int"
+edit_file(start=1475, end=1475, content=EDITED_CODE)
+</execute_ipython>
+
+USER:
+OBSERVATION:
+[File: /marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)]
+(1424 more lines above)
+1425|
+1426|    :param precision: Influences how the integer is interpreted during
+1427|        (de)serialization. Must be 'days', 'seconds', 'microseconds',
+1428|        'milliseconds', 'minutes', 'hours' or 'weeks'.
+1429|    :param kwargs: The same keyword arguments that :class:`Field` receives.
+1430|
+1431|    .. versionchanged:: 2.0.0
+1432|        Always serializes to an integer value to avoid rounding errors.
+1433|        Add `precision` parameter.
+1434|    \"\"\"
+1435|
+1436|    DAYS = "days"
+1437|    SECONDS = "seconds"
+1438|    MICROSECONDS = "microseconds"
+1439|    MILLISECONDS = "milliseconds"
+1440|    MINUTES = "minutes"
+1441|    HOURS = "hours"
+1442|    WEEKS = "weeks"
+1443|
+1444|    #: Default error messages.
+1445|    default_error_messages = {
+1446|        "invalid": "Not a valid period of time.",
+1447|        "format": "{input!r} cannot be formatted as a timedelta.",
+1448|    }
+1449|
+1450|    def __init__(self, precision: str = SECONDS, **kwargs):
+1451|        precision = precision.lower()
+1452|        units = (
+1453|            self.DAYS,
+1454|            self.SECONDS,
+1455|            self.MICROSECONDS,
+1456|            self.MILLISECONDS,
+1457|            self.MINUTES,
+1458|            self.HOURS,
+1459|            self.WEEKS,
+1460|        )
+1461|
+1462|        if precision not in units:
+1463|            msg = 'The precision must be {} or "{}".'.format(
+1464|                ", ".join([f'"{each}"' for each in units[:-1]]), units[-1]
+1465|            )
+1466|            raise ValueError(msg)
+1467|
+1468|        self.precision = precision
+1469|        super().__init__(**kwargs)
+1470|
+1471|    def _serialize(self, value, attr, obj, **kwargs):
+1472|        if value is None:
+1473|            return None
+1474|        base_unit = dt.timedelta(**{self.precision: 1})
+1475|        return int(round(value.total_seconds() / base_unit.total_seconds()))  # round to nearest int
+1476|
+1477|    def _deserialize(self, value, attr, data, **kwargs):
+1478|        try:
+1479|            value = int(value)
+1480|        except (TypeError, ValueError) as error:
+1481|            raise self.make_error("invalid") from error
+1482|
+1483|        kwargs = {self.precision: value}
+1484|
+1485|        try:
+1486|            return dt.timedelta(**kwargs)
+1487|        except OverflowError as error:
+1488|            raise self.make_error("invalid") from error
+1489|
+1490|
+1491|class Mapping(Field):
+1492|    \"\"\"An abstract class for objects with key-value pairs.
+1493|
+1494|    :param keys: A field class or instance for dict keys.
+1495|    :param values: A field class or instance for dict values.
+1496|    :param kwargs: The same keyword arguments that :class:`Field` receives.
+1497|
+1498|    .. note::
+1499|        When the structure of nested data is not known, you may omit the
+1500|        `keys` and `values` arguments to prevent content validation.
+1501|
+1502|    .. versionadded:: 3.0.0rc4
+1503|    \"\"\"
+1504|
+1505|    mapping_type = dict
+1506|
+1507|    #: Default error messages.
+1508|    default_error_messages = {"invalid": "Not a valid mapping type."}
+1509|
+1510|    def __init__(
+1511|        self,
+1512|        keys: typing.Optional[typing.Union[Field, type]] = None,
+1513|        values: typing.Optional[typing.Union[Field, type]] = None,
+1514|        **kwargs,
+1515|    ):
+1516|        super().__init__(**kwargs)
+1517|        if keys is None:
+1518|            self.key_field = None
+1519|        else:
+1520|            try:
+1521|                self.key_field = resolve_field_instance(keys)
+1522|            except FieldInstanceResolutionError as error:
+1523|                raise ValueError(
+1524|                    '"keys" must be a subclass or instance of '
+(473 more lines below)
+[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
+
+ASSISTANT:
+The code has been updated to use the `round` function, which should fix the rounding issue. Before submitting the changes, it would be prudent to run the reproduce.py code again to ensure that this change has the desired effect. I see that "Current directory" is still `/marshmallow-code__marshmallow`, so we can just run `reproduce.py` directly.
+<execute_bash>
+python reproduce.py
+</execute_bash>
+
+USER:
+OBSERVATION:
+345
+
+ASSISTANT:
+The output has changed from 344 to 345, which suggests that the rounding issue has been fixed. Let's remove the reproduce.py file since it is no longer needed.
+<execute_bash>
+rm reproduce.py
+</execute_bash>
+
+USER:
+OBSERVATION:
+
+ASSISTANT:
+rm doesn't have any output when it deletes successfully, so that must have worked. Now that we have fixed the issue, we can exit the current task.
+<execute_bash>
+exit
+</execute_bash>
+
+--- END OF EXAMPLE ---
+"""
--- a/evaluation/swe_bench/run_infer.py
+++ b/evaluation/swe_bench/run_infer.py
@@ -62,11 +62,13 @@ def monologue_user_response(state: State) -> str:

 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
    'CodeActAgent': codeact_user_response,
+    'CodeActSWEAgent': codeact_user_response,
    'MonologueAgent': monologue_user_response,
 }

 AGENT_CLS_TO_INST_SUFFIX = {
-    'CodeActAgent': 'When you think you have fixed the issue through code changes, please run the following command: <execute_bash> exit </execute_bash>.\n'
+    'CodeActAgent': 'When you think you have fixed the issue through code changes, please run the following command: <execute_bash> exit </execute_bash>.\n',
+    'CodeActSWEAgent': 'When you think you have fixed the issue through code changes, please run the following command: <execute_bash> exit </execute_bash>.\n',
 }


@@ -243,19 +245,62 @@ def process_instance(
    )

    # Prepare instruction
-    instruction = (
-        f'Please fix the following issue for the repository in /workspace/{workspace_dir_name}.\n'
-        'Environment has been set up for you to start working. You may assume all necessary tools are installed.\n\n'
-        '# Problem Statement\n'
-        f'{instance.problem_statement}\n\n'
-    )
-    if USE_HINT_TEXT and instance.hints_text:
-        instruction += f'# Hints\n{instance.hints_text}\n\n'
-    instruction += (
-        'IMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.\n'
-        'You should NOT modify any existing test case files. If needed, you can add new test cases in a NEW file to reproduce the issue.\n'
-        'You SHOULD INCLUDE PROPER INDENTATION in your edit commands.\n'
-    )
+    if agent_class == 'CodeActSWEAgent':
+        instruction = (
+            'We are currently solving the following issue within our repository. Here is the issue text:\n'
+            '--- BEGIN ISSUE ---\n'
+            f'{instance.problem_statement}\n'
+            '--- END ISSUE ---\n\n'
+        )
+
+        if USE_HINT_TEXT and instance.hints_text:
+            instruction += (
+                f'--- BEGIN HINTS ---\n{instance.hints_text}\n--- END HINTS ---\n'
+            )
+        instruction += f"""Now, you're going to solve this issue on your own. Your terminal session has started and you're in the repository's root directory. You can use any bash commands or the special interface to help you. Edit all the files you need to and run any checks or tests that you want.
+Remember, YOU CAN ONLY ENTER ONE COMMAND AT A TIME. You should always wait for feedback after every command.
+When you're satisfied with all of the changes you've made, you can run the following command: <execute_bash> exit </execute_bash>.
+Note however that you cannot use any interactive session commands (e.g. vim) in this environment, but you can write scripts and run them. E.g. you can write a python script and then run it with `python <script_name>.py`.
+
+NOTE ABOUT THE EDIT COMMAND: Indentation really matters! When editing a file, make sure to insert appropriate indentation before each line!
+
+IMPORTANT TIPS:
+1. Always start by trying to replicate the bug that the issues discusses.
+    If the issue includes code for reproducing the bug, we recommend that you re-implement that in your environment, and run it to make sure you can reproduce the bug.
+    Then start trying to fix it.
+    When you think you've fixed the bug, re-run the bug reproduction script to make sure that the bug has indeed been fixed.
+
+    If the bug reproduction script does not print anything when it successfully runs, we recommend adding a print("Script completed successfully, no errors.") command at the end of the file,
+    so that you can be sure that the script indeed ran fine all the way through.
+
+2. If you run a command and it doesn't work, try running a different command. A command that did not work once will not work the second time unless you modify it!
+
+3. If you open a file and need to get to an area around a specific line that is not in the first 100 lines, say line 583, don't just use the scroll_down command multiple times. Instead, use the goto 583 command. It's much quicker.
+
+4. If the bug reproduction script requires inputting/reading a specific file, such as buggy-input.png, and you'd like to understand how to input that file, conduct a search in the existing repo code, to see whether someone else has already done that. Do this by running the command: find_file("buggy-input.png") If that doesn't work, use the linux 'find' command.
+
+5. Always make sure to look at the currently open file and the current working directory (which appears right after the currently open file). The currently open file might be in a different directory than the working directory! Note that some commands, such as 'create', open files, so they might change the current  open file.
+
+6. When editing files, it is easy to accidentally specify a wrong line number or to write code with incorrect indentation. Always check the code after you issue an edit to make sure that it reflects what you wanted to accomplish. If it didn't, issue another command to fix it.
+
+[Current directory: /workspace/{workspace_dir_name}]
+"""
+    else:
+        # Testing general agents
+        instruction = (
+            f'Please fix the following issue for the repository in /workspace/{workspace_dir_name}.\n'
+            'Environment has been set up for you to start working. You may assume all necessary tools are installed.\n\n'
+            '# Problem Statement\n'
+            f'{instance.problem_statement}\n\n'
+        )
+        if USE_HINT_TEXT and instance.hints_text:
+            instruction += f'# Hints\n{instance.hints_text}\n\n'
+        instruction += (
+            'IMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.\n'
+            'You should NOT modify any existing test case files. If needed, you can add new test cases in a NEW file to reproduce the issue.\n'
+            'You SHOULD INCLUDE PROPER INDENTATION in your edit commands.\n'
+        )
+
    # NOTE: You can actually set slightly different instruction for different agents
    instruction += AGENT_CLS_TO_INST_SUFFIX.get(agent_class, '')

@@ -370,6 +415,11 @@ if __name__ == '__main__':
        .decode('utf-8')
        .strip(),
    }
+    _agent_cls = agenthub.Agent.get_cls(agent_class)
+    if hasattr(_agent_cls, 'system_message'):
+        metadata['system_message'] = _agent_cls.system_message
+    if hasattr(_agent_cls, 'in_context_example'):
+        metadata['in_context_example'] = _agent_cls.in_context_example
    logger.info(f'Metadata: {metadata}')
    with open(os.path.join(eval_output_dir, 'metadata.json'), 'w') as f:
        json.dump(metadata, f)
--- a/evaluation/swe_bench/scripts/run_infer.sh
+++ b/evaluation/swe_bench/scripts/run_infer.sh
@@ -2,12 +2,18 @@
 MODEL_CONFIG=$1
 AGENT=$2
 EVAL_LIMIT=$3
+MAX_ITER=$4

 if [ -z "$AGENT" ]; then
  echo "Agent not specified, use default CodeActAgent"
  AGENT="CodeActAgent"
 fi

+if [ -z "$MAX_ITER" ]; then
+  echo "MAX_ITER not specified, use default 30"
+  MAX_ITER=30
+fi
+
 # IMPORTANT: Because Agent's prompt changes fairly often in the rapidly evolving codebase of OpenDevin
 # We need to track the version of Agent in the evaluation to make sure results are comparable
 AGENT_VERSION=v$(poetry run python -c "import agenthub; from opendevin.controller.agent import Agent; print(Agent.get_cls('$AGENT').VERSION)")
@@ -32,7 +38,7 @@ unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token t
 COMMAND="poetry run python evaluation/swe_bench/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
-  --max-iterations 30 \
+  --max-iterations $MAX_ITER \
  --max-chars 10000000 \
  --eval-num-workers 8 \
  --eval-note $EVAL_NOTE"
--- a/evaluation/swe_bench/swe_env_box.py
+++ b/evaluation/swe_bench/swe_env_box.py
@@ -25,12 +25,14 @@ class SWEBenchSSHBox(DockerSSHBox):
        swe_instance: dict | None = None,
        skip_workspace_mount: bool = True,
        sandbox_plugins: list[PluginRequirement] = [],  # noqa: B006
+        workspace_dir_name: str | None = None,
    ):
        if swe_instance_id is None:
            raise ValueError('swe_instance_id must be provided!')
        self.swe_instance_id = swe_instance_id
        self.swe_instance = swe_instance
        self.skip_workspace_mount = skip_workspace_mount
+        self.workspace_dir_name = workspace_dir_name

        assert (
            container_image is not None
@@ -94,6 +96,7 @@ class SWEBenchSSHBox(DockerSSHBox):
            swe_instance=instance,
            skip_workspace_mount=skip_workspace_mount,
            sandbox_plugins=sandbox_plugins,
+            workspace_dir_name=workspace_dir_name,
        )
        logger.info(f"SSH box started for instance {instance['instance_id']}.")

@@ -123,7 +126,13 @@ class SWEBenchSSHBox(DockerSSHBox):

    def get_diff_patch(self):
        # add everything to the index
-        exit_code, output = self.execute('git add --all')
+        exit_code, output = self.execute(f'cd /workspace/{self.workspace_dir_name}')
+        if exit_code != 0:
+            logger.error('Failed to cd to the repo')
+            return ''
+
+        # add everything to the index
+        exit_code, output = self.execute('git add -A')
        if exit_code != 0:
            logger.error('Failed to add everything to the index')
            return ''
--- a/opendevin/runtime/plugins/agent_skills/agentskills.py
+++ b/opendevin/runtime/plugins/agent_skills/agentskills.py
@@ -16,6 +16,7 @@ Functions:
 """

 import base64
+import functools
 import os
 import subprocess
 from inspect import signature
@@ -46,6 +47,22 @@ OPENAI_PROXY = f'{OPENAI_BASE_URL}/chat/completions'
 client = OpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL)


+# Define the decorator using the functionality of UpdatePwd
+def update_pwd_decorator(func):
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        old_pwd = os.getcwd()
+        jupyter_pwd = os.environ.get('JUPYTER_PWD', None)
+        if jupyter_pwd:
+            os.chdir(jupyter_pwd)
+        try:
+            return func(*args, **kwargs)
+        finally:
+            os.chdir(old_pwd)
+
+    return wrapper
+
+
 def _lint_file(file_path: str) -> Optional[str]:
    """
    Lint the file at the given path.
@@ -88,12 +105,21 @@ def _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=False):
        start = max(0, CURRENT_LINE - WINDOW // 2)
        end = min(len(lines), CURRENT_LINE + WINDOW // 2)
        output = ''
+
+        # only display this when there's line above
+        if start > 0:
+            n_above_lines = start
+            output += f'({n_above_lines} more lines above)\n'
        for i in range(start, end):
            _new_line = f'{i + 1}|{lines[i]}'
            if not _new_line.endswith('\n'):
                _new_line += '\n'
            output += _new_line
+        if end < len(lines):
+            n_below_lines = len(lines) - end
+            output += f'({n_below_lines} more lines below)\n'
        output = output.rstrip()
+
        if return_str:
            return output
        else:
@@ -104,6 +130,7 @@ def _cur_file_header(CURRENT_FILE, total_lines):
    return f'[File: {os.path.abspath(CURRENT_FILE)} ({total_lines} lines total)]\n'


+@update_pwd_decorator
 def open_file(path: str, line_number: Optional[int] = None) -> None:
    """
    Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line.
@@ -116,7 +143,7 @@ def open_file(path: str, line_number: Optional[int] = None) -> None:
    if not os.path.isfile(path):
        raise FileNotFoundError(f'File {path} not found')

-    CURRENT_FILE = path
+    CURRENT_FILE = os.path.abspath(path)
    with open(CURRENT_FILE) as file:
        total_lines = sum(1 for _ in file)

@@ -136,6 +163,7 @@ def open_file(path: str, line_number: Optional[int] = None) -> None:
    print(output)


+@update_pwd_decorator
 def goto_line(line_number: int) -> None:
    """
    Moves the window to show the specified line number.
@@ -158,6 +186,7 @@ def goto_line(line_number: int) -> None:
    print(output)


+@update_pwd_decorator
 def scroll_down() -> None:
    """Moves the window down by 100 lines.

@@ -175,6 +204,7 @@ def scroll_down() -> None:
    print(output)


+@update_pwd_decorator
 def scroll_up() -> None:
    """Moves the window up by 100 lines.

@@ -192,6 +222,7 @@ def scroll_up() -> None:
    print(output)


+@update_pwd_decorator
 def create_file(filename: str) -> None:
    """Creates and opens a new file with the given name.

@@ -209,6 +240,7 @@ def create_file(filename: str) -> None:
    print(f'[File {filename} created.]')


+@update_pwd_decorator
 def edit_file(start: int, end: int, content: str) -> None:
    """Edit a file.

@@ -227,21 +259,35 @@ def edit_file(start: int, end: int, content: str) -> None:
    with open(CURRENT_FILE, 'r') as file:
        lines = file.readlines()

+    ERROR_MSG = f'[Error editing opened file {CURRENT_FILE}. Please confirm the opened file is correct.]'
+    ERROR_MSG_SUFFIX = (
+        'Your changes have NOT been applied. Please fix your edit command and try again.\n'
+        'You either need to 1) Open the correct file and try again or 2) Specify the correct start/end line arguments.\n'
+        'DO NOT re-run the same failed edit command. Running it again will lead to the same error.'
+    )
    # Check arguments
    if not (1 <= start <= len(lines)):
-        raise ValueError(
-            f'Invalid start line number: {start}. Line numbers must be between 1 and {len(lines)} (inclusive).'
+        print(
+            f'{ERROR_MSG}\n'
+            f'Invalid start line number: {start}. Line numbers must be between 1 and {len(lines)} (inclusive).\n'
+            f'{ERROR_MSG_SUFFIX}'
        )
+        return

    if not (1 <= end <= len(lines)):
-        raise ValueError(
-            f'Invalid end line number: {end}. Line numbers must be between 1 and {len(lines)} (inclusive).'
+        print(
+            f'{ERROR_MSG}\n'
+            f'Invalid end line number: {end}. Line numbers must be between 1 and {len(lines)} (inclusive).\n'
+            f'{ERROR_MSG_SUFFIX}'
        )
-
+        return
    if start > end:
-        raise ValueError(
-            f'Invalid line range: {start}-{end}. Start must be less than or equal to end.'
+        print(
+            f'{ERROR_MSG}\n'
+            f'Invalid line range: {start}-{end}. Start must be less than or equal to end.\n'
+            f'{ERROR_MSG_SUFFIX}'
        )
+        return

    edited_content = content + '\n'
    n_edited_lines = len(edited_content.split('\n'))
@@ -270,14 +316,20 @@ def edit_file(start: int, end: int, content: str) -> None:
            print('[This is how your edit would have looked if applied]')
            print('-------------------------------------------------')
            cur_line = (n_edited_lines // 2) + start
-            _print_window(CURRENT_FILE, cur_line, WINDOW)
+            _print_window(CURRENT_FILE, cur_line, 10)
            print('-------------------------------------------------\n')

            print('[This is the original code before your edit]')
            print('-------------------------------------------------')
-            _print_window(original_file_backup_path, CURRENT_LINE, WINDOW)
+            _print_window(original_file_backup_path, cur_line, 10)
            print('-------------------------------------------------')

+            print(
+                'Your changes have NOT been applied. Please fix your edit command and try again.\n'
+                'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n'
+                'DO NOT re-run the same failed edit command. Running it again will lead to the same error.'
+            )
+
            # recover the original file
            with open(original_file_backup_path, 'r') as fin, open(
                CURRENT_FILE, 'w'
@@ -301,6 +353,7 @@ def edit_file(start: int, end: int, content: str) -> None:
    )


+@update_pwd_decorator
 def search_dir(search_term: str, dir_path: str = './') -> None:
    """Searches for search_term in all files in dir. If dir is not provided, searches in the current directory.

@@ -310,7 +363,6 @@ def search_dir(search_term: str, dir_path: str = './') -> None:
    """
    if not os.path.isdir(dir_path):
        raise FileNotFoundError(f'Directory {dir_path} not found')
-
    matches = []
    for root, _, files in os.walk(dir_path):
        for file in files:
@@ -341,6 +393,7 @@ def search_dir(search_term: str, dir_path: str = './') -> None:
    print(f'[End of matches for "{search_term}" in {dir_path}]')


+@update_pwd_decorator
 def search_file(search_term: str, file_path: Optional[str] = None) -> None:
    """Searches for search_term in file. If file is not provided, searches in the current open file.

@@ -373,6 +426,7 @@ def search_file(search_term: str, file_path: Optional[str] = None) -> None:
        print(f'[No matches found for "{search_term}" in {file_path}]')


+@update_pwd_decorator
 def find_file(file_name: str, dir_path: str = './') -> None:
    """Finds all files with the given name in the specified directory.

@@ -398,6 +452,7 @@ def find_file(file_name: str, dir_path: str = './') -> None:
        print(f'[No matches found for "{file_name}" in {dir_path}]')


+@update_pwd_decorator
 def parse_pdf(file_path: str) -> None:
    """Parses the content of a PDF file and prints it.

@@ -416,6 +471,7 @@ def parse_pdf(file_path: str) -> None:
    print(text.strip())


+@update_pwd_decorator
 def parse_docx(file_path: str) -> None:
    """
    Parses the content of a DOCX file and prints it.
@@ -431,6 +487,7 @@ def parse_docx(file_path: str) -> None:
    print(text)


+@update_pwd_decorator
 def parse_latex(file_path: str) -> None:
    """
    Parses the content of a LaTex file and prints it.
@@ -484,6 +541,7 @@ def _prepare_image_messages(task: str, base64_image: str):
    ]


+@update_pwd_decorator
 def parse_audio(file_path: str, model: str = 'whisper-1') -> None:
    """
    Parses the content of an audio file and prints it.
@@ -503,6 +561,7 @@ def parse_audio(file_path: str, model: str = 'whisper-1') -> None:
        print(f'Error transcribing audio file: {e}')


+@update_pwd_decorator
 def parse_image(
    file_path: str, task: str = 'Describe this image as detail as possible.'
 ) -> None:
@@ -529,6 +588,7 @@ def parse_image(
        print(f'Error with the request: {error}')


+@update_pwd_decorator
 def parse_video(
    file_path: str,
    task: str = 'Describe this image as detail as possible.',
@@ -577,6 +637,7 @@ def parse_video(
            print(f'Error with the request: {error}')


+@update_pwd_decorator
 def parse_pptx(file_path: str) -> None:
    """
    Parses the content of a pptx file and prints it.
--- a/opendevin/runtime/plugins/jupyter/execute_cli.py
+++ b/opendevin/runtime/plugins/jupyter/execute_cli.py
@@ -7,20 +7,30 @@ import requests
 # Read the Python code from STDIN
 code = sys.stdin.read()

-# Set the default kernel ID
-kernel_id = 'default'

-PORT = os.environ.get('JUPYTER_EXEC_SERVER_PORT')
-POST_URL = f'http://localhost:{PORT}/execute'
+def execute_code(code):
+    PORT = os.environ.get('JUPYTER_EXEC_SERVER_PORT')
+    POST_URL = f'http://localhost:{PORT}/execute'

-for i in range(10):
-    try:
-        response = requests.post(POST_URL, json={'kernel_id': kernel_id, 'code': code})
-        if '500: Internal Server Error' not in response.text:
-            print(response.text)
-            break
-    except requests.exceptions.ConnectionError:
-        pass
-    time.sleep(2)
-else:
-    print('Failed to connect to the Jupyter server')
+    # Set the default kernel ID
+    kernel_id = 'default'
+
+    for i in range(10):
+        try:
+            response = requests.post(
+                POST_URL, json={'kernel_id': kernel_id, 'code': code}
+            )
+            if '500: Internal Server Error' not in response.text:
+                print(response.text)
+                break
+        except requests.exceptions.ConnectionError:
+            pass
+        time.sleep(2)
+    else:
+        print('Failed to connect to the Jupyter server')
+
+
+if jupyter_pwd := os.environ.get('JUPYTER_PWD'):
+    execute_code(f'import os\nos.environ["JUPYTER_PWD"] = "{jupyter_pwd}"\n')
+
+execute_code(code)
--- a/opendevin/runtime/server/runtime.py
+++ b/opendevin/runtime/server/runtime.py
@@ -55,7 +55,10 @@ class ServerRuntime(Runtime):

        # run the code
        obs = self._run_command(
-            ('cat /tmp/opendevin_jupyter_temp.py | execute_cli'), background=False
+            (
+                'export JUPYTER_PWD=$(pwd) && cat /tmp/opendevin_jupyter_temp.py | execute_cli'
+            ),
+            background=False,
        )
        output = obs.content
        if 'pip install' in action.code and 'Successfully installed' in output:
--- a/tests/unit/test_agent_skill.py
+++ b/tests/unit/test_agent_skill.py
@@ -1,9 +1,9 @@
 import contextlib
 import io
+import sys

 import docx
 import pytest
-import sys

 from opendevin.runtime.plugins.agent_skills.agentskills import (
    create_file,
@@ -11,15 +11,14 @@ from opendevin.runtime.plugins.agent_skills.agentskills import (
    find_file,
    goto_line,
    open_file,
-    scroll_down,
-    scroll_up,
-    search_dir,
-    search_file,
    parse_docx,
    parse_latex,
    parse_pdf,
    parse_pptx,
-    parse_image
+    scroll_down,
+    scroll_up,
+    search_dir,
+    search_file,
 )


@@ -81,6 +80,7 @@ def test_open_file_long(tmp_path):
    expected = f'[File: {temp_file_path} (1000 lines total)]\n'
    for i in range(1, 52):
        expected += f'{i}|Line {i}\n'
+    expected += '(949 more lines below)\n'
    assert result.split('\n') == expected.split('\n')


@@ -95,8 +95,10 @@ def test_open_file_long_with_lineno(tmp_path):
        result = buf.getvalue()
    assert result is not None
    expected = f'[File: {temp_file_path} (1000 lines total)]\n'
+    expected += '(50 more lines above)\n'
    for i in range(51, 151):
        expected += f'{i}|Line {i}\n'
+    expected += '(850 more lines below)\n'
    assert result.split('\n') == expected.split('\n')


@@ -134,6 +136,7 @@ def test_goto_line(tmp_path):
    expected = f'[File: {temp_file_path} (1000 lines total)]\n'
    for i in range(1, 52):
        expected += f'{i}|Line {i}\n'
+    expected += '(949 more lines below)\n'
    assert result.split('\n') == expected.split('\n')

    with io.StringIO() as buf:
@@ -143,8 +146,10 @@ def test_goto_line(tmp_path):
    assert result is not None

    expected = f'[File: {temp_file_path} (1000 lines total)]\n'
+    expected += '(50 more lines above)\n'
    for i in range(51, 151):
        expected += f'{i}|Line {i}\n'
+    expected += '(850 more lines below)\n'
    assert result.split('\n') == expected.split('\n')


@@ -186,6 +191,7 @@ def test_scroll_down(tmp_path):
    expected = f'[File: {temp_file_path} (1000 lines total)]\n'
    for i in range(1, 52):
        expected += f'{i}|Line {i}\n'
+    expected += '(949 more lines below)\n'
    assert result.split('\n') == expected.split('\n')

    with io.StringIO() as buf:
@@ -195,8 +201,10 @@ def test_scroll_down(tmp_path):
    assert result is not None

    expected = f'[File: {temp_file_path} (1000 lines total)]\n'
+    expected += '(51 more lines above)\n'
    for i in range(52, 152):
        expected += f'{i}|Line {i}\n'
+    expected += '(849 more lines below)\n'
    assert result.split('\n') == expected.split('\n')


@@ -212,8 +220,10 @@ def test_scroll_up(tmp_path):
    assert result is not None

    expected = f'[File: {temp_file_path} (1000 lines total)]\n'
+    expected += '(250 more lines above)\n'
    for i in range(251, 351):
        expected += f'{i}|Line {i}\n'
+    expected += '(650 more lines below)\n'
    assert result.split('\n') == expected.split('\n')

    with io.StringIO() as buf:
@@ -223,8 +233,10 @@ def test_scroll_up(tmp_path):
    assert result is not None

    expected = f'[File: {temp_file_path} (1000 lines total)]\n'
+    expected += '(150 more lines above)\n'
    for i in range(151, 251):
        expected += f'{i}|Line {i}\n'
+    expected += '(750 more lines below)\n'
    assert result.split('\n') == expected.split('\n')


@@ -567,6 +579,72 @@ def test_lint_file_fail_undefined_name(tmp_path, monkeypatch, capsys):
        '-------------------------------------------------\n'
        '1|\n'
        '-------------------------------------------------\n'
+        'Your changes have NOT been applied. Please fix your edit command and try again.\n'
+        'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n'
+        'DO NOT re-run the same failed edit command. Running it again will lead to the same error.\n'
+    )
+    assert result.split('\n') == expected.split('\n')
+
+
+def test_lint_file_fail_undefined_name_long(tmp_path, monkeypatch, capsys):
+    # Create a Python file with a syntax error
+    file_path = tmp_path / 'test_file.py'
+    file_path.write_text('\n' * 1000)
+
+    # Set environment variable to enable linting
+    monkeypatch.setattr(
+        'opendevin.runtime.plugins.agent_skills.agentskills.ENABLE_AUTO_LINT', True
+    )
+
+    open_file(str(file_path))
+    edit_file(500, 500, 'undefined_name()\n')
+
+    result = capsys.readouterr().out
+    print(result)
+
+    assert result is not None
+
+    open_lines = '\n'.join([f'{i+1}|' for i in range(51)])
+    expected = (
+        f'[File: {file_path} (1000 lines total)]\n'
+        f'{open_lines}\n'
+        '(949 more lines below)\n'
+        '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n'
+        'ERRORS:\n'
+        f"{file_path}:500:1: F821 undefined name 'undefined_name'\n"
+        '[This is how your edit would have looked if applied]\n'
+        '-------------------------------------------------\n'
+        '(496 more lines above)\n'
+        '497|\n'
+        '498|\n'
+        '499|\n'
+        '500|undefined_name()\n'
+        '501|\n'
+        '502|\n'
+        '503|\n'
+        '504|\n'
+        '505|\n'
+        '506|\n'
+        '(495 more lines below)\n'
+        '-------------------------------------------------\n\n'
+        '[This is the original code before your edit]\n'
+        '-------------------------------------------------\n'
+        '(496 more lines above)\n'
+        '497|\n'
+        '498|\n'
+        '499|\n'
+        '500|\n'
+        '501|\n'
+        '502|\n'
+        '503|\n'
+        '504|\n'
+        '505|\n'
+        '506|\n'
+        '(494 more lines below)\n'
+        '-------------------------------------------------\n'
+        'Your changes have NOT been applied. Please fix your edit command and try again.\n'
+        'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n'
+        'DO NOT re-run the same failed edit command. Running it again will lead to the same error.\n'
    )
    assert result.split('\n') == expected.split('\n')

@@ -599,7 +677,7 @@ def test_lint_file_disabled_undefined_name(tmp_path, monkeypatch, capsys):

 def test_parse_docx(tmp_path):
    # Create a DOCX file with some content
-    test_docx_path = tmp_path / "test.docx"
+    test_docx_path = tmp_path / 'test.docx'
    doc = docx.Document()
    doc.add_paragraph('Hello, this is a test document.')
    doc.add_paragraph('This is the second paragraph.')
@@ -621,19 +699,19 @@ def test_parse_docx(tmp_path):
        '@@ Page 1 @@\nHello, this is a test document.\n\n'
        '@@ Page 2 @@\nThis is the second paragraph.\n\n\n'
    )
-    assert output == expected_output, f"Expected output does not match. Got: {output}"
+    assert output == expected_output, f'Expected output does not match. Got: {output}'


 def test_parse_latex(tmp_path):
    # Create a LaTeX file with some content
-    test_latex_path = tmp_path / "test.tex"
+    test_latex_path = tmp_path / 'test.tex'
    with open(test_latex_path, 'w') as f:
-        f.write(r'''
+        f.write(r"""
        \documentclass{article}
        \begin{document}
        Hello, this is a test LaTeX document.
        \end{document}
-        ''')
+        """)

    old_stdout = sys.stdout
    sys.stdout = io.StringIO()
@@ -650,17 +728,17 @@ def test_parse_latex(tmp_path):
        f'[Reading LaTex file from {test_latex_path}]\n'
        'Hello, this is a test LaTeX document.\n'
    )
-    assert output == expected_output, f"Expected output does not match. Got: {output}"
+    assert output == expected_output, f'Expected output does not match. Got: {output}'


 def test_parse_pdf(tmp_path):
    # Create a PDF file with some content
-    test_pdf_path = tmp_path / "test.pdf"
+    test_pdf_path = tmp_path / 'test.pdf'
    from reportlab.lib.pagesizes import letter
    from reportlab.pdfgen import canvas

    c = canvas.Canvas(str(test_pdf_path), pagesize=letter)
-    c.drawString(100, 750, "Hello, this is a test PDF document.")
+    c.drawString(100, 750, 'Hello, this is a test PDF document.')
    c.save()

    old_stdout = sys.stdout
@@ -679,21 +757,22 @@ def test_parse_pdf(tmp_path):
        '@@ Page 1 @@\n'
        'Hello, this is a test PDF document.\n'
    )
-    assert output == expected_output, f"Expected output does not match. Got: {output}"
+    assert output == expected_output, f'Expected output does not match. Got: {output}'


 def test_parse_pptx(tmp_path):
-    test_pptx_path = tmp_path / "test.pptx"
+    test_pptx_path = tmp_path / 'test.pptx'
    from pptx import Presentation
+
    pres = Presentation()

    slide1 = pres.slides.add_slide(pres.slide_layouts[0])
    title1 = slide1.shapes.title
-    title1.text = "Hello, this is the first test PPTX slide."
+    title1.text = 'Hello, this is the first test PPTX slide.'

    slide2 = pres.slides.add_slide(pres.slide_layouts[0])
    title2 = slide2.shapes.title
-    title2.text = "Hello, this is the second test PPTX slide."
+    title2.text = 'Hello, this is the second test PPTX slide.'

    pres.save(str(test_pptx_path))

@@ -712,4 +791,4 @@ def test_parse_pptx(tmp_path):
        '@@ Slide 2 @@\n'
        'Hello, this is the second test PPTX slide.\n\n'
    )
-    assert output == expected_output, f"Expected output does not match. Got: {output}"
+    assert output == expected_output, f'Expected output does not match. Got: {output}'
Author	SHA1	Message	Date
Xingyao Wang	a4af937dc4	stop printing	2024-05-28 23:01:35 +08:00
Xingyao Wang	95eb048672	allow specifying max iter in cmdline script	2024-05-28 22:42:30 +08:00
Xingyao Wang	832a82867f	fix issue for CodeActSWEAgent	2024-05-28 22:26:49 +08:00
Xingyao Wang	3eaa6fbcbb	add codeact swe agent	2024-05-28 22:25:56 +08:00
Xingyao Wang	e699f21f19	update max iter	2024-05-28 22:20:12 +08:00
Xingyao Wang	368f0b9434	update README	2024-05-28 22:00:41 +08:00
Xingyao Wang	a27b0bb748	revert instructions for run infer	2024-05-28 21:57:47 +08:00
Xingyao Wang	a9dc3ce6f3	revert instructions for run infer	2024-05-28 21:56:06 +08:00
Xingyao Wang	a98f15ae95	revert changes to codeact	2024-05-28 20:36:32 +08:00
Xingyao Wang	fa97e57360	revert changes from codeact agent and create new CodeActSWEAgent	2024-05-28 20:33:22 +08:00
Xingyao Wang	cb23bdbf62	default to 50 turns	2024-05-28 12:51:29 +08:00
Xingyao Wang	a36f6f5d33	update hint string	2024-05-28 12:45:05 +08:00
Xingyao Wang	6e2736f46b	improve git get patch	2024-05-28 11:42:28 +08:00
Xingyao Wang	851df736b9	update prompt	2024-05-28 10:51:18 +08:00
Xingyao Wang	604c8d9888	update edit error message	2024-05-28 01:43:32 +08:00
Xingyao Wang	c2a284fde2	change cwd for jupyter if needed	2024-05-28 01:36:21 +08:00
Xingyao Wang	7783c10f82	update error message to include current file info	2024-05-28 01:13:40 +08:00
Xingyao Wang	deef10b43e	change prompt to abs path	2024-05-28 01:09:25 +08:00
Xingyao Wang	2a1cc9a089	remove extra print	2024-05-28 01:07:21 +08:00
Xingyao Wang	4f853e79cf	also log in_context_example to run infer	2024-05-28 00:53:33 +08:00
Xingyao Wang	4aeb002901	add icl for swebench	2024-05-28 00:48:24 +08:00
Xingyao Wang	80c0a33c6b	fix cwd	2024-05-28 00:48:17 +08:00
Xingyao Wang	1e58a12dbf	update infer prompt	2024-05-28 00:45:27 +08:00
Xingyao Wang	8ec58d2618	upgrade agentskills and update testcases	2024-05-28 00:43:17 +08:00
Xingyao Wang	e9d788959d	update swe_bench prompt; use minimal prompt for codeact;	2024-05-27 23:44:25 +08:00