Compare commits

...

25 Commits

Author SHA1 Message Date
Xingyao Wang
a4af937dc4 stop printing 2024-05-28 23:01:35 +08:00
Xingyao Wang
95eb048672 allow specifying max iter in cmdline script 2024-05-28 22:42:30 +08:00
Xingyao Wang
832a82867f fix issue for CodeActSWEAgent 2024-05-28 22:26:49 +08:00
Xingyao Wang
3eaa6fbcbb add codeact swe agent 2024-05-28 22:25:56 +08:00
Xingyao Wang
e699f21f19 update max iter 2024-05-28 22:20:12 +08:00
Xingyao Wang
368f0b9434 update README 2024-05-28 22:00:41 +08:00
Xingyao Wang
a27b0bb748 revert instructions for run infer 2024-05-28 21:57:47 +08:00
Xingyao Wang
a9dc3ce6f3 revert instructions for run infer 2024-05-28 21:56:06 +08:00
Xingyao Wang
a98f15ae95 revert changes to codeact 2024-05-28 20:36:32 +08:00
Xingyao Wang
fa97e57360 revert changes from codeact agent and create new CodeActSWEAgent 2024-05-28 20:33:22 +08:00
Xingyao Wang
cb23bdbf62 default to 50 turns 2024-05-28 12:51:29 +08:00
Xingyao Wang
a36f6f5d33 update hint string 2024-05-28 12:45:05 +08:00
Xingyao Wang
6e2736f46b improve git get patch 2024-05-28 11:42:28 +08:00
Xingyao Wang
851df736b9 update prompt 2024-05-28 10:51:18 +08:00
Xingyao Wang
604c8d9888 update edit error message 2024-05-28 01:43:32 +08:00
Xingyao Wang
c2a284fde2 change cwd for jupyter if needed 2024-05-28 01:36:21 +08:00
Xingyao Wang
7783c10f82 update error message to include current file info 2024-05-28 01:13:40 +08:00
Xingyao Wang
deef10b43e change prompt to abs path 2024-05-28 01:09:25 +08:00
Xingyao Wang
2a1cc9a089 remove extra print 2024-05-28 01:07:21 +08:00
Xingyao Wang
4f853e79cf also log in_context_example to run infer 2024-05-28 00:53:33 +08:00
Xingyao Wang
4aeb002901 add icl for swebench 2024-05-28 00:48:24 +08:00
Xingyao Wang
80c0a33c6b fix cwd 2024-05-28 00:48:17 +08:00
Xingyao Wang
1e58a12dbf update infer prompt 2024-05-28 00:45:27 +08:00
Xingyao Wang
8ec58d2618 upgrade agentskills and update testcases 2024-05-28 00:43:17 +08:00
Xingyao Wang
e9d788959d update swe_bench prompt;
use minimal prompt for codeact;
2024-05-27 23:44:25 +08:00
14 changed files with 1017 additions and 74 deletions

View File

@@ -12,6 +12,7 @@ from . import ( # noqa: E402
SWE_agent,
browsing_agent,
codeact_agent,
codeact_swe_agent,
delegator_agent,
dummy_agent,
monologue_agent,
@@ -21,6 +22,7 @@ from . import ( # noqa: E402
__all__ = [
'monologue_agent',
'codeact_agent',
'codeact_swe_agent',
'planner_agent',
'SWE_agent',
'delegator_agent',

View File

@@ -105,6 +105,18 @@ def truncate_observation(observation: str, max_chars: int = 10_000) -> str:
)
# FIXME: We can tweak these two settings to create MicroAgents specialized toward different area
def get_system_message() -> str:
if ENABLE_GITHUB:
return f'{SYSTEM_PREFIX}\n{GITHUB_MESSAGE}\n\n{COMMAND_DOCS}\n\n{SYSTEM_SUFFIX}'
else:
return f'{SYSTEM_PREFIX}\n\n{COMMAND_DOCS}\n\n{SYSTEM_SUFFIX}'
def get_in_context_example() -> str:
return EXAMPLES
class CodeActAgent(Agent):
VERSION = '1.5'
"""
@@ -152,11 +164,8 @@ class CodeActAgent(Agent):
]
jupyter_kernel_init_code: str = 'from agentskills import *'
system_message: str = (
f'{SYSTEM_PREFIX}\n{GITHUB_MESSAGE}\n\n{COMMAND_DOCS}\n\n{SYSTEM_SUFFIX}'
if ENABLE_GITHUB
else f'{SYSTEM_PREFIX}\n\n{COMMAND_DOCS}\n\n{SYSTEM_SUFFIX}'
)
system_message: str = get_system_message()
in_context_example: str = f"Here is an example of how you can interact with the environment for task solving:\n{get_in_context_example()}\n\nNOW, LET'S START!"
def __init__(
self,
@@ -194,10 +203,7 @@ class CodeActAgent(Agent):
"""
messages: list[dict[str, str]] = [
{'role': 'system', 'content': self.system_message},
{
'role': 'user',
'content': f"Here is an example of how you can interact with the environment for task solving:\n{EXAMPLES}\n\nNOW, LET'S START!",
},
{'role': 'user', 'content': self.in_context_example},
]
for prev_action, obs in state.history:

View File

@@ -8,17 +8,23 @@ COMMAND_DOCS = (
"Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run."
)
SYSTEM_PREFIX = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
# ======= SYSTEM MESSAGE =======
MINIMAL_SYSTEM_PREFIX = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute_ipython>" tag, for example:
<execute_ipython>
print("Hello World!")
</execute_ipython>
The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
"""
BROWSING_PREFIX = """The assistant can browse the Internet with commands on behalf of the user by wrapping them with <execute_browse> and </execute_browse>.
For example, you can browse a given URL by <execute_browse> goto("<URL>") </execute_browse>.
The assistant should attempt fewer things at a time instead of putting too much commands OR code in one "execute" block.
The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them."""
"""
PIP_INSTALL_PREFIX = """The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them."""
SYSTEM_PREFIX = MINIMAL_SYSTEM_PREFIX + BROWSING_PREFIX + PIP_INSTALL_PREFIX
GITHUB_MESSAGE = """To do any activities on GitHub, the assistant should use the token in the $GITHUB_TOKEN environment variable.
For instance, to push a local branch `my_branch` to the github repo `owner/repo`, the assistant can use the following four commands:
@@ -30,6 +36,8 @@ The assistant should include ONLY ONE <execute_ipython> or <execute_bash> or <ex
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.
"""
# ======= EXAMPLE MESSAGE =======
EXAMPLES = """
--- START OF EXAMPLE ---

View File

@@ -0,0 +1,7 @@
# CodeAct (SWE Edit Specialized)
This agent is an adaptation of the original [SWE Agent](https://swe-agent.com/) based on CodeAct using the `agentskills` library of OpenDevin.
It is intended use is **solving Github issues**.
It removes web-browsing and Github capability from the original CodeAct agent to avoid confusion to the agent.

View File

@@ -0,0 +1,5 @@
from opendevin.controller.agent import Agent
from .codeact_swe_agent import CodeActSWEAgent
Agent.register('CodeActSWEAgent', CodeActSWEAgent)

View File

@@ -0,0 +1,246 @@
import re
from agenthub.codeact_swe_agent.prompt import (
COMMAND_DOCS,
MINIMAL_SYSTEM_PREFIX,
SWE_EXAMPLE,
SYSTEM_SUFFIX,
)
from opendevin.controller.agent import Agent
from opendevin.controller.state.state import State
from opendevin.events.action import (
Action,
AgentFinishAction,
BrowseInteractiveAction,
CmdRunAction,
IPythonRunCellAction,
MessageAction,
)
from opendevin.events.observation import (
BrowserOutputObservation,
CmdOutputObservation,
IPythonRunCellObservation,
)
from opendevin.llm.llm import LLM
from opendevin.runtime.plugins import (
AgentSkillsRequirement,
JupyterRequirement,
PluginRequirement,
)
def parse_response(response) -> str:
action = response.choices[0].message.content
for lang in ['bash', 'ipython', 'browse']:
if f'<execute_{lang}>' in action and f'</execute_{lang}>' not in action:
action += f'</execute_{lang}>'
return action
def action_to_str(action: Action) -> str:
if isinstance(action, CmdRunAction):
return f'{action.thought}\n<execute_bash>\n{action.command}\n</execute_bash>'
elif isinstance(action, IPythonRunCellAction):
return f'{action.thought}\n<execute_ipython>\n{action.code}\n</execute_ipython>'
elif isinstance(action, BrowseInteractiveAction):
return f'{action.thought}\n<execute_browse>\n{action.browser_actions}\n</execute_browse>'
elif isinstance(action, MessageAction):
return action.content
return ''
def get_action_message(action: Action) -> dict[str, str] | None:
if (
isinstance(action, BrowseInteractiveAction)
or isinstance(action, CmdRunAction)
or isinstance(action, IPythonRunCellAction)
or isinstance(action, MessageAction)
):
return {
'role': 'user' if action.source == 'user' else 'assistant',
'content': action_to_str(action),
}
return None
def get_observation_message(obs) -> dict[str, str] | None:
if isinstance(obs, CmdOutputObservation):
content = 'OBSERVATION:\n' + truncate_observation(obs.content)
content += (
f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]]'
)
return {'role': 'user', 'content': content}
elif isinstance(obs, IPythonRunCellObservation):
content = 'OBSERVATION:\n' + obs.content
# replace base64 images with a placeholder
splitted = content.split('\n')
for i, line in enumerate(splitted):
if '![image](data:image/png;base64,' in line:
splitted[i] = (
'![image](data:image/png;base64, ...) already displayed to user'
)
content = '\n'.join(splitted)
content = truncate_observation(content)
return {'role': 'user', 'content': content}
elif isinstance(obs, BrowserOutputObservation):
content = 'OBSERVATION:\n' + truncate_observation(obs.content)
return {'role': 'user', 'content': content}
return None
def truncate_observation(observation: str, max_chars: int = 10_000) -> str:
"""
Truncate the middle of the observation if it is too long.
"""
if len(observation) <= max_chars:
return observation
half = max_chars // 2
return (
observation[:half]
+ '\n[... Observation truncated due to length ...]\n'
+ observation[-half:]
)
def get_system_message() -> str:
return f'{MINIMAL_SYSTEM_PREFIX}\n\n{COMMAND_DOCS}\n\n{SYSTEM_SUFFIX}'
def get_in_context_example() -> str:
return SWE_EXAMPLE
class CodeActSWEAgent(Agent):
VERSION = '1.5'
"""
This agent is an adaptation of the original [SWE Agent](https://swe-agent.com/) based on CodeAct 1.5 using the `agentskills` library of OpenDevin.
It is intended use is **solving Github issues**.
It removes web-browsing and Github capability from the original CodeAct agent to avoid confusion to the agent.
"""
sandbox_plugins: list[PluginRequirement] = [
# NOTE: AgentSkillsRequirement need to go before JupyterRequirement, since
# AgentSkillsRequirement provides a lot of Python functions
# and it need to be initialized before Jupyter for Jupyter to use those functions.
AgentSkillsRequirement(),
JupyterRequirement(),
]
jupyter_kernel_init_code: str = 'from agentskills import *'
system_message: str = get_system_message()
in_context_example: str = f"Here is an example of how you can interact with the environment for task solving:\n{get_in_context_example()}\n\nNOW, LET'S START!"
def __init__(
self,
llm: LLM,
) -> None:
"""
Initializes a new instance of the CodeActAgent class.
Parameters:
- llm (LLM): The llm to be used by this agent
"""
super().__init__(llm)
self.reset()
def reset(self) -> None:
"""
Resets the CodeAct Agent.
"""
super().reset()
def step(self, state: State) -> Action:
"""
Performs one step using the CodeAct Agent.
This includes gathering info on previous steps and prompting the model to make a command to execute.
Parameters:
- state (State): used to get updated info and background commands
Returns:
- CmdRunAction(command) - bash command to run
- IPythonRunCellAction(code) - IPython code to run
- BrowseInteractiveAction(browsergym_command) - BrowserGym commands to run
- MessageAction(content) - Message action to run (e.g. ask for clarification)
- AgentFinishAction() - end the interaction
"""
messages: list[dict[str, str]] = [
{'role': 'system', 'content': self.system_message},
{'role': 'user', 'content': self.in_context_example},
]
for prev_action, obs in state.history:
action_message = get_action_message(prev_action)
if action_message:
messages.append(action_message)
obs_message = get_observation_message(obs)
if obs_message:
messages.append(obs_message)
latest_user_message = [m for m in messages if m['role'] == 'user'][-1]
if latest_user_message:
if latest_user_message['content'].strip() == '/exit':
return AgentFinishAction()
latest_user_message['content'] += (
f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task.'
)
response = self.llm.do_completion(
messages=messages,
stop=[
'</execute_ipython>',
'</execute_bash>',
'</execute_browse>',
],
temperature=0.0,
)
action_str: str = parse_response(response)
state.num_of_chars += sum(
len(message['content']) for message in messages
) + len(action_str)
if finish_command := re.search(r'<finish>.*</finish>', action_str, re.DOTALL):
thought = action_str.replace(finish_command.group(0), '').strip()
return AgentFinishAction(thought=thought)
if bash_command := re.search(
r'<execute_bash>(.*?)</execute_bash>', action_str, re.DOTALL
):
# remove the command from the action string to get thought
thought = action_str.replace(bash_command.group(0), '').strip()
# a command was found
command_group = bash_command.group(1).strip()
if command_group.strip() == 'exit':
return AgentFinishAction()
return CmdRunAction(command=command_group, thought=thought)
elif python_code := re.search(
r'<execute_ipython>(.*?)</execute_ipython>', action_str, re.DOTALL
):
# a code block was found
code_group = python_code.group(1).strip()
thought = action_str.replace(python_code.group(0), '').strip()
return IPythonRunCellAction(
code=code_group,
thought=thought,
kernel_init_code=self.jupyter_kernel_init_code,
)
elif browse_command := re.search(
r'<execute_browse>(.*)</execute_browse>', action_str, re.DOTALL
):
# BrowserGym actions was found
browse_actions = browse_command.group(1).strip()
thought = action_str.replace(browse_command.group(0), '').strip()
return BrowseInteractiveAction(
browser_actions=browse_actions, thought=thought
)
else:
# We assume the LLM is GOOD enough that when it returns pure natural language
# it want to talk to the user
return MessageAction(content=action_str, wait_for_response=True)
def search_memory(self, query: str) -> list[str]:
raise NotImplementedError('Implement this abstract method')

View File

@@ -0,0 +1,451 @@
from opendevin.runtime.plugins import AgentSkillsRequirement
_AGENT_SKILLS_DOCS = AgentSkillsRequirement.documentation
COMMAND_DOCS = (
'\nApart from the standard Python library, the assistant can also use the following functions (already imported) in <execute_ipython> environment:\n'
f'{_AGENT_SKILLS_DOCS}'
"Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run."
)
# ======= SYSTEM MESSAGE =======
MINIMAL_SYSTEM_PREFIX = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute_ipython>" tag, for example:
<execute_ipython>
print("Hello World!")
</execute_ipython>
The assistant can execute bash commands on behalf of the user by wrapping them with <execute_bash> and </execute_bash>.
For example, you can list the files in the current directory by <execute_bash> ls </execute_bash>.
"""
SYSTEM_SUFFIX = """The assistant's response should be concise.
The assistant should include ONLY ONE <execute_ipython> or <execute_bash> or <execute_browse> in every one of the responses, unless the assistant is finished with the task or need more input or action from the user in order to proceed.
IMPORTANT: Whenever possible, execute the code for the user using <execute_ipython> or <execute_bash> or <execute_browse> instead of providing it.
"""
SWE_EXAMPLE = """
--- START OF EXAMPLE ---
USER: We are currently solving the following issue within our repository. Here is the issue text:
--- BEGIN ISSUE ---
TimeDelta serialization precision
Hi there!
I just found quite strange behaviour of `TimeDelta` field serialization
```python3
from marshmallow.fields import TimeDelta
from datetime import timedelta
td_field = TimeDelta(precision="milliseconds")
obj = dict()
obj["td_field"] = timedelta(milliseconds=345)
print(td_field.serialize("td_field", obj))
```
Output of this snippet is `344`, but it seems that `345` is correct.
Looks like a rounding issue here: https://github.com/marshmallow-code/marshmallow/blob/dev/src/marshmallow/fields.py#L1474
--- END ISSUE ---
[Current directory: /workspace/marshmallow-code__marshmallow]
ASSISTANT:
Let's first start by reproducing the results of the issue. The issue includes some example code for reproduction, which we can use. We'll create a new file called `reproduce.py` and paste the example code into it.
<execute_ipython>
create_file('/workspace/marshmallow-code__marshmallow/reproduce.py')
</execute_ipython>
USER:
OBSERVATION:
[File: /workspace/marshmallow-code__marshmallow/reproduce.py (1 lines total)]
1|
[File /workspace/marshmallow-code__marshmallow/reproduce.py created.]
ASSISTANT:
Now let's paste in the example code from the issue.
<execute_ipython>
EDITED_CODE = \"\"\"from marshmallow.fields import TimeDelta
from datetime import timedelta
td_field = TimeDelta(precision="milliseconds")
obj = dict()
obj["td_field"] = timedelta(milliseconds=345)
print(td_field.serialize("td_field", obj))
\"\"\"
edit_file(start=1, end=1, content=EDITED_CODE)
</execute_ipython>
USER:
OBSERVATION:
[File: /workspace/marshmallow-code__marshmallow/reproduce.py (9 lines total)]
1|from marshmallow.fields import TimeDelta
2|from datetime import timedelta
3|
4|td_field = TimeDelta(precision="milliseconds")
5|
6|obj = dict()
7|obj["td_field"] = timedelta(milliseconds=345)
8|
9|print(td_field.serialize("td_field", obj))
[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
ASSISTANT:
Now let's run the code to see if we see the same output as the issue.
<execute_bash>
python reproduce.py
</execute_bash>
USER:
OBSERVATION:
344
ASSISTANT:
We are indeed seeing the same output as the issue. The issue suggests that we should look at line 1474 of the `fields.py` file to see if there is a rounding issue. Let's open the `fields.py` file and navigate to line 1474 to see the relevant code. First we should find the `fields.py` file in the repository. It is likely to be in the `src/marshmallow` directory, but we should check to be sure.
<execute_bash>
ls -F
</execute_bash>
USER:
OBSERVATION:
AUTHORS.rst
CHANGELOG.rst
CODE_OF_CONDUCT.md
CONTRIBUTING.rst
LICENSE
MANIFEST.in
NOTICE
README.rst
RELEASING.md
azure-pipelines.yml
docs/
examples/
performance/
pyproject.toml
reproduce.py
setup.cfg
setup.py
src/
tests/
tox.ini
ASSISTANT:
It looks like the `src` directory is present, which suggests that the `fields.py` file is likely to be in the `src` directory. Let's use find_file to see where it is.
<execute_ipython>
find_file("fields.py", "/workspace/marshmallow-code__marshmallow/src")
</execute_ipython>
USER:
OBSERVATION:
[Found 1 matches for "fields.py" in /workspace/marshmallow-code__marshmallow/src]
/workspace/marshmallow-code__marshmallow/src/marshmallow/fields.py
ASSISTANT:
It looks like the `fields.py` file is present in the `./src/marshmallow/` directory. The issue also points to a specific URL with line number 1474. We should navigate to that line in fields.py to see the relevant code for the `TimeDelta` serialization.
<execute_ipython>
open_file("/workspace/marshmallow-code__marshmallow/src/marshmallow/fields.py", 1474)
</execute_ipython>
USER:
OBSERVATION:
[File: /workspace/marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)]
(1458 more lines above)
1459| self.WEEKS,
1460| )
1461|
1462| if precision not in units:
1463| msg = 'The precision must be {} or "{}".'.format(
1464| ", ".join([f'"{each}"' for each in units[:-1]]), units[-1]
1465| )
1466| raise ValueError(msg)
1467|
1468| self.precision = precision
1469| super().__init__(**kwargs)
1470|
1471| def _serialize(self, value, attr, obj, **kwargs):
1472| if value is None:
1473| return None
1474| base_unit = dt.timedelta(**{self.precision: 1})
1475| return int(value.total_seconds() / base_unit.total_seconds())
1476|
1477| def _deserialize(self, value, attr, data, **kwargs):
1478| try:
1479| value = int(value)
1480| except (TypeError, ValueError) as error:
1481| raise self.make_error("invalid") from error
1482|
1483| kwargs = {self.precision: value}
1484|
1485| try:
1486| return dt.timedelta(**kwargs)
1487| except OverflowError as error:
1488| raise self.make_error("invalid") from error
1489|
1490|
1491|class Mapping(Field):
1492| \"\"\"An abstract class for objects with key-value pairs.
1493|
1494| :param keys: A field class or instance for dict keys.
1495| :param values: A field class or instance for dict values.
1496| :param kwargs: The same keyword arguments that :class:`Field` receives.
1497|
1498| .. note::
1499| When the structure of nested data is not known, you may omit the
1500| `keys` and `values` arguments to prevent content validation.
1501|
1502| .. versionadded:: 3.0.0rc4
1503| \"\"\"
1504|
1505| mapping_type = dict
1506|
1507| #: Default error messages.
1508| default_error_messages = {"invalid": "Not a valid mapping type."}
1509|
1510| def __init__(
1511| self,
1512| keys: typing.Optional[typing.Union[Field, type]] = None,
1513| values: typing.Optional[typing.Union[Field, type]] = None,
1514| **kwargs,
1515| ):
1516| super().__init__(**kwargs)
1517| if keys is None:
1518| self.key_field = None
1519| else:
1520| try:
1521| self.key_field = resolve_field_instance(keys)
1522| except FieldInstanceResolutionError as error:
1523| raise ValueError(
1524| '"keys" must be a subclass or instance of '
1525| "marshmallow.base.FieldABC."
1526| ) from error
1527|
1528| if values is None:
1529| self.value_field = None
1530| else:
1531| try:
1532| self.value_field = resolve_field_instance(values)
1533| except FieldInstanceResolutionError as error:
1534| raise ValueError(
1535| '"values" must be a subclass or instance of '
1536| "marshmallow.base.FieldABC."
1537| ) from error
1538| if isinstance(self.value_field, Nested):
1539| self.only = self.value_field.only
1540| self.exclude = self.value_field.exclude
1541|
1542| def _bind_to_schema(self, field_name, schema):
1543| super()._bind_to_schema(field_name, schema)
1544| if self.value_field:
1545| self.value_field = copy.deepcopy(self.value_field)
1546| self.value_field._bind_to_schema(field_name, self)
1547| if isinstance(self.value_field, Nested):
1548| self.value_field.only = self.only
1549| self.value_field.exclude = self.exclude
1550| if self.key_field:
1551| self.key_field = copy.deepcopy(self.key_field)
1552| self.key_field._bind_to_schema(field_name, self)
1553|
1554| def _serialize(self, value, attr, obj, **kwargs):
1555| if value is None:
1556| return None
1557| if not self.value_field and not self.key_field:
1558| return self.mapping_type(value)
(439 more lines below)
ASSISTANT:
We are now looking at the relevant section of the `fields.py` file where the `TimeDelta` serialization occurs. The issue suggests that there is a rounding problem near line 1474. The current implementation uses integer division, which truncates the decimal part and can lead to incorrect rounding on line 1475. To fix this issue, we should ensure that the division results in a float, and then round the result to the nearest integer before converting it to an int. This should preserve the milliseconds precision as expected. Let's make the necessary edit to the code.
<execute_ipython>
edit_file(start=1475, end=1475, content="return int(round(value.total_seconds() / base_unit.total_seconds())) # round to nearest int")
</execute_ipython>
USER:
OBSERVATION:
[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]
ERRORS:
- E999 IndentationError: unexpected indent
[This is how your edit would have looked if applied]
-------------------------------------------------
[File: /marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)]
(1468 more lines above)
1469| super().__init__(**kwargs)
1470|
1471| def _serialize(self, value, attr, obj, **kwargs):
1472| if value is None:
1473| return None
1474| base_unit = dt.timedelta(**{self.precision: 1})
1475|return int(round(value.total_seconds() / base_unit.total_seconds())) # round to nearest int
1476|
1477| def _deserialize(self, value, attr, data, **kwargs):
1478| try:
1479| value = int(value)
(518 more lines below)
-------------------------------------------------
[This is the original code before your edit]
-------------------------------------------------
[File: /marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)]
(1469 more lines above)
1470|
1471| def _serialize(self, value, attr, obj, **kwargs):
1472| if value is None:
1473| return None
1474| base_unit = dt.timedelta(**{self.precision: 1})
1475| return int(value.total_seconds() / base_unit.total_seconds())
1476|
1477| def _deserialize(self, value, attr, data, **kwargs):
1478| try:
1479| value = int(value)
1480| except (TypeError, ValueError) as error:
(517 more lines below)
-------------------------------------------------
Your changes have NOT been applied. Please fix your edit command and try again.
You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.
DO NOT re-run the same failed edit command. Running it again will lead to the same error.
ASSISTANT:
My edit command did not use the proper indentation, I will fix my syntax in this follow up edit command.
<execute_ipython>
EDITED_CODE=" return int(round(value.total_seconds() / base_unit.total_seconds())) # round to nearest int"
edit_file(start=1475, end=1475, content=EDITED_CODE)
</execute_ipython>
USER:
OBSERVATION:
[File: /marshmallow-code__marshmallow/src/marshmallow/fields.py (1997 lines total)]
(1424 more lines above)
1425|
1426| :param precision: Influences how the integer is interpreted during
1427| (de)serialization. Must be 'days', 'seconds', 'microseconds',
1428| 'milliseconds', 'minutes', 'hours' or 'weeks'.
1429| :param kwargs: The same keyword arguments that :class:`Field` receives.
1430|
1431| .. versionchanged:: 2.0.0
1432| Always serializes to an integer value to avoid rounding errors.
1433| Add `precision` parameter.
1434| \"\"\"
1435|
1436| DAYS = "days"
1437| SECONDS = "seconds"
1438| MICROSECONDS = "microseconds"
1439| MILLISECONDS = "milliseconds"
1440| MINUTES = "minutes"
1441| HOURS = "hours"
1442| WEEKS = "weeks"
1443|
1444| #: Default error messages.
1445| default_error_messages = {
1446| "invalid": "Not a valid period of time.",
1447| "format": "{input!r} cannot be formatted as a timedelta.",
1448| }
1449|
1450| def __init__(self, precision: str = SECONDS, **kwargs):
1451| precision = precision.lower()
1452| units = (
1453| self.DAYS,
1454| self.SECONDS,
1455| self.MICROSECONDS,
1456| self.MILLISECONDS,
1457| self.MINUTES,
1458| self.HOURS,
1459| self.WEEKS,
1460| )
1461|
1462| if precision not in units:
1463| msg = 'The precision must be {} or "{}".'.format(
1464| ", ".join([f'"{each}"' for each in units[:-1]]), units[-1]
1465| )
1466| raise ValueError(msg)
1467|
1468| self.precision = precision
1469| super().__init__(**kwargs)
1470|
1471| def _serialize(self, value, attr, obj, **kwargs):
1472| if value is None:
1473| return None
1474| base_unit = dt.timedelta(**{self.precision: 1})
1475| return int(round(value.total_seconds() / base_unit.total_seconds())) # round to nearest int
1476|
1477| def _deserialize(self, value, attr, data, **kwargs):
1478| try:
1479| value = int(value)
1480| except (TypeError, ValueError) as error:
1481| raise self.make_error("invalid") from error
1482|
1483| kwargs = {self.precision: value}
1484|
1485| try:
1486| return dt.timedelta(**kwargs)
1487| except OverflowError as error:
1488| raise self.make_error("invalid") from error
1489|
1490|
1491|class Mapping(Field):
1492| \"\"\"An abstract class for objects with key-value pairs.
1493|
1494| :param keys: A field class or instance for dict keys.
1495| :param values: A field class or instance for dict values.
1496| :param kwargs: The same keyword arguments that :class:`Field` receives.
1497|
1498| .. note::
1499| When the structure of nested data is not known, you may omit the
1500| `keys` and `values` arguments to prevent content validation.
1501|
1502| .. versionadded:: 3.0.0rc4
1503| \"\"\"
1504|
1505| mapping_type = dict
1506|
1507| #: Default error messages.
1508| default_error_messages = {"invalid": "Not a valid mapping type."}
1509|
1510| def __init__(
1511| self,
1512| keys: typing.Optional[typing.Union[Field, type]] = None,
1513| values: typing.Optional[typing.Union[Field, type]] = None,
1514| **kwargs,
1515| ):
1516| super().__init__(**kwargs)
1517| if keys is None:
1518| self.key_field = None
1519| else:
1520| try:
1521| self.key_field = resolve_field_instance(keys)
1522| except FieldInstanceResolutionError as error:
1523| raise ValueError(
1524| '"keys" must be a subclass or instance of '
(473 more lines below)
[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
ASSISTANT:
The code has been updated to use the `round` function, which should fix the rounding issue. Before submitting the changes, it would be prudent to run the reproduce.py code again to ensure that this change has the desired effect. I see that "Current directory" is still `/marshmallow-code__marshmallow`, so we can just run `reproduce.py` directly.
<execute_bash>
python reproduce.py
</execute_bash>
USER:
OBSERVATION:
345
ASSISTANT:
The output has changed from 344 to 345, which suggests that the rounding issue has been fixed. Let's remove the reproduce.py file since it is no longer needed.
<execute_bash>
rm reproduce.py
</execute_bash>
USER:
OBSERVATION:
ASSISTANT:
rm doesn't have any output when it deletes successfully, so that must have worked. Now that we have fixed the issue, we can exit the current task.
<execute_bash>
exit
</execute_bash>
--- END OF EXAMPLE ---
"""

View File

@@ -62,11 +62,13 @@ def monologue_user_response(state: State) -> str:
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
'CodeActAgent': codeact_user_response,
'CodeActSWEAgent': codeact_user_response,
'MonologueAgent': monologue_user_response,
}
AGENT_CLS_TO_INST_SUFFIX = {
'CodeActAgent': 'When you think you have fixed the issue through code changes, please run the following command: <execute_bash> exit </execute_bash>.\n'
'CodeActAgent': 'When you think you have fixed the issue through code changes, please run the following command: <execute_bash> exit </execute_bash>.\n',
'CodeActSWEAgent': 'When you think you have fixed the issue through code changes, please run the following command: <execute_bash> exit </execute_bash>.\n',
}
@@ -243,19 +245,62 @@ def process_instance(
)
# Prepare instruction
instruction = (
f'Please fix the following issue for the repository in /workspace/{workspace_dir_name}.\n'
'Environment has been set up for you to start working. You may assume all necessary tools are installed.\n\n'
'# Problem Statement\n'
f'{instance.problem_statement}\n\n'
)
if USE_HINT_TEXT and instance.hints_text:
instruction += f'# Hints\n{instance.hints_text}\n\n'
instruction += (
'IMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.\n'
'You should NOT modify any existing test case files. If needed, you can add new test cases in a NEW file to reproduce the issue.\n'
'You SHOULD INCLUDE PROPER INDENTATION in your edit commands.\n'
)
if agent_class == 'CodeActSWEAgent':
instruction = (
'We are currently solving the following issue within our repository. Here is the issue text:\n'
'--- BEGIN ISSUE ---\n'
f'{instance.problem_statement}\n'
'--- END ISSUE ---\n\n'
)
if USE_HINT_TEXT and instance.hints_text:
instruction += (
f'--- BEGIN HINTS ---\n{instance.hints_text}\n--- END HINTS ---\n'
)
instruction += f"""Now, you're going to solve this issue on your own. Your terminal session has started and you're in the repository's root directory. You can use any bash commands or the special interface to help you. Edit all the files you need to and run any checks or tests that you want.
Remember, YOU CAN ONLY ENTER ONE COMMAND AT A TIME. You should always wait for feedback after every command.
When you're satisfied with all of the changes you've made, you can run the following command: <execute_bash> exit </execute_bash>.
Note however that you cannot use any interactive session commands (e.g. vim) in this environment, but you can write scripts and run them. E.g. you can write a python script and then run it with `python <script_name>.py`.
NOTE ABOUT THE EDIT COMMAND: Indentation really matters! When editing a file, make sure to insert appropriate indentation before each line!
IMPORTANT TIPS:
1. Always start by trying to replicate the bug that the issues discusses.
If the issue includes code for reproducing the bug, we recommend that you re-implement that in your environment, and run it to make sure you can reproduce the bug.
Then start trying to fix it.
When you think you've fixed the bug, re-run the bug reproduction script to make sure that the bug has indeed been fixed.
If the bug reproduction script does not print anything when it successfully runs, we recommend adding a print("Script completed successfully, no errors.") command at the end of the file,
so that you can be sure that the script indeed ran fine all the way through.
2. If you run a command and it doesn't work, try running a different command. A command that did not work once will not work the second time unless you modify it!
3. If you open a file and need to get to an area around a specific line that is not in the first 100 lines, say line 583, don't just use the scroll_down command multiple times. Instead, use the goto 583 command. It's much quicker.
4. If the bug reproduction script requires inputting/reading a specific file, such as buggy-input.png, and you'd like to understand how to input that file, conduct a search in the existing repo code, to see whether someone else has already done that. Do this by running the command: find_file("buggy-input.png") If that doesn't work, use the linux 'find' command.
5. Always make sure to look at the currently open file and the current working directory (which appears right after the currently open file). The currently open file might be in a different directory than the working directory! Note that some commands, such as 'create', open files, so they might change the current open file.
6. When editing files, it is easy to accidentally specify a wrong line number or to write code with incorrect indentation. Always check the code after you issue an edit to make sure that it reflects what you wanted to accomplish. If it didn't, issue another command to fix it.
[Current directory: /workspace/{workspace_dir_name}]
"""
else:
# Testing general agents
instruction = (
f'Please fix the following issue for the repository in /workspace/{workspace_dir_name}.\n'
'Environment has been set up for you to start working. You may assume all necessary tools are installed.\n\n'
'# Problem Statement\n'
f'{instance.problem_statement}\n\n'
)
if USE_HINT_TEXT and instance.hints_text:
instruction += f'# Hints\n{instance.hints_text}\n\n'
instruction += (
'IMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.\n'
'You should NOT modify any existing test case files. If needed, you can add new test cases in a NEW file to reproduce the issue.\n'
'You SHOULD INCLUDE PROPER INDENTATION in your edit commands.\n'
)
# NOTE: You can actually set slightly different instruction for different agents
instruction += AGENT_CLS_TO_INST_SUFFIX.get(agent_class, '')
@@ -370,6 +415,11 @@ if __name__ == '__main__':
.decode('utf-8')
.strip(),
}
_agent_cls = agenthub.Agent.get_cls(agent_class)
if hasattr(_agent_cls, 'system_message'):
metadata['system_message'] = _agent_cls.system_message
if hasattr(_agent_cls, 'in_context_example'):
metadata['in_context_example'] = _agent_cls.in_context_example
logger.info(f'Metadata: {metadata}')
with open(os.path.join(eval_output_dir, 'metadata.json'), 'w') as f:
json.dump(metadata, f)

View File

@@ -2,12 +2,18 @@
MODEL_CONFIG=$1
AGENT=$2
EVAL_LIMIT=$3
MAX_ITER=$4
if [ -z "$AGENT" ]; then
echo "Agent not specified, use default CodeActAgent"
AGENT="CodeActAgent"
fi
if [ -z "$MAX_ITER" ]; then
echo "MAX_ITER not specified, use default 30"
MAX_ITER=30
fi
# IMPORTANT: Because Agent's prompt changes fairly often in the rapidly evolving codebase of OpenDevin
# We need to track the version of Agent in the evaluation to make sure results are comparable
AGENT_VERSION=v$(poetry run python -c "import agenthub; from opendevin.controller.agent import Agent; print(Agent.get_cls('$AGENT').VERSION)")
@@ -32,7 +38,7 @@ unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token t
COMMAND="poetry run python evaluation/swe_bench/run_infer.py \
--agent-cls $AGENT \
--llm-config $MODEL_CONFIG \
--max-iterations 30 \
--max-iterations $MAX_ITER \
--max-chars 10000000 \
--eval-num-workers 8 \
--eval-note $EVAL_NOTE"

View File

@@ -25,12 +25,14 @@ class SWEBenchSSHBox(DockerSSHBox):
swe_instance: dict | None = None,
skip_workspace_mount: bool = True,
sandbox_plugins: list[PluginRequirement] = [], # noqa: B006
workspace_dir_name: str | None = None,
):
if swe_instance_id is None:
raise ValueError('swe_instance_id must be provided!')
self.swe_instance_id = swe_instance_id
self.swe_instance = swe_instance
self.skip_workspace_mount = skip_workspace_mount
self.workspace_dir_name = workspace_dir_name
assert (
container_image is not None
@@ -94,6 +96,7 @@ class SWEBenchSSHBox(DockerSSHBox):
swe_instance=instance,
skip_workspace_mount=skip_workspace_mount,
sandbox_plugins=sandbox_plugins,
workspace_dir_name=workspace_dir_name,
)
logger.info(f"SSH box started for instance {instance['instance_id']}.")
@@ -123,7 +126,13 @@ class SWEBenchSSHBox(DockerSSHBox):
def get_diff_patch(self):
# add everything to the index
exit_code, output = self.execute('git add --all')
exit_code, output = self.execute(f'cd /workspace/{self.workspace_dir_name}')
if exit_code != 0:
logger.error('Failed to cd to the repo')
return ''
# add everything to the index
exit_code, output = self.execute('git add -A')
if exit_code != 0:
logger.error('Failed to add everything to the index')
return ''

View File

@@ -16,6 +16,7 @@ Functions:
"""
import base64
import functools
import os
import subprocess
from inspect import signature
@@ -46,6 +47,22 @@ OPENAI_PROXY = f'{OPENAI_BASE_URL}/chat/completions'
client = OpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL)
# Define the decorator using the functionality of UpdatePwd
def update_pwd_decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
old_pwd = os.getcwd()
jupyter_pwd = os.environ.get('JUPYTER_PWD', None)
if jupyter_pwd:
os.chdir(jupyter_pwd)
try:
return func(*args, **kwargs)
finally:
os.chdir(old_pwd)
return wrapper
def _lint_file(file_path: str) -> Optional[str]:
"""
Lint the file at the given path.
@@ -88,12 +105,21 @@ def _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=False):
start = max(0, CURRENT_LINE - WINDOW // 2)
end = min(len(lines), CURRENT_LINE + WINDOW // 2)
output = ''
# only display this when there's line above
if start > 0:
n_above_lines = start
output += f'({n_above_lines} more lines above)\n'
for i in range(start, end):
_new_line = f'{i + 1}|{lines[i]}'
if not _new_line.endswith('\n'):
_new_line += '\n'
output += _new_line
if end < len(lines):
n_below_lines = len(lines) - end
output += f'({n_below_lines} more lines below)\n'
output = output.rstrip()
if return_str:
return output
else:
@@ -104,6 +130,7 @@ def _cur_file_header(CURRENT_FILE, total_lines):
return f'[File: {os.path.abspath(CURRENT_FILE)} ({total_lines} lines total)]\n'
@update_pwd_decorator
def open_file(path: str, line_number: Optional[int] = None) -> None:
"""
Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line.
@@ -116,7 +143,7 @@ def open_file(path: str, line_number: Optional[int] = None) -> None:
if not os.path.isfile(path):
raise FileNotFoundError(f'File {path} not found')
CURRENT_FILE = path
CURRENT_FILE = os.path.abspath(path)
with open(CURRENT_FILE) as file:
total_lines = sum(1 for _ in file)
@@ -136,6 +163,7 @@ def open_file(path: str, line_number: Optional[int] = None) -> None:
print(output)
@update_pwd_decorator
def goto_line(line_number: int) -> None:
"""
Moves the window to show the specified line number.
@@ -158,6 +186,7 @@ def goto_line(line_number: int) -> None:
print(output)
@update_pwd_decorator
def scroll_down() -> None:
"""Moves the window down by 100 lines.
@@ -175,6 +204,7 @@ def scroll_down() -> None:
print(output)
@update_pwd_decorator
def scroll_up() -> None:
"""Moves the window up by 100 lines.
@@ -192,6 +222,7 @@ def scroll_up() -> None:
print(output)
@update_pwd_decorator
def create_file(filename: str) -> None:
"""Creates and opens a new file with the given name.
@@ -209,6 +240,7 @@ def create_file(filename: str) -> None:
print(f'[File {filename} created.]')
@update_pwd_decorator
def edit_file(start: int, end: int, content: str) -> None:
"""Edit a file.
@@ -227,21 +259,35 @@ def edit_file(start: int, end: int, content: str) -> None:
with open(CURRENT_FILE, 'r') as file:
lines = file.readlines()
ERROR_MSG = f'[Error editing opened file {CURRENT_FILE}. Please confirm the opened file is correct.]'
ERROR_MSG_SUFFIX = (
'Your changes have NOT been applied. Please fix your edit command and try again.\n'
'You either need to 1) Open the correct file and try again or 2) Specify the correct start/end line arguments.\n'
'DO NOT re-run the same failed edit command. Running it again will lead to the same error.'
)
# Check arguments
if not (1 <= start <= len(lines)):
raise ValueError(
f'Invalid start line number: {start}. Line numbers must be between 1 and {len(lines)} (inclusive).'
print(
f'{ERROR_MSG}\n'
f'Invalid start line number: {start}. Line numbers must be between 1 and {len(lines)} (inclusive).\n'
f'{ERROR_MSG_SUFFIX}'
)
return
if not (1 <= end <= len(lines)):
raise ValueError(
f'Invalid end line number: {end}. Line numbers must be between 1 and {len(lines)} (inclusive).'
print(
f'{ERROR_MSG}\n'
f'Invalid end line number: {end}. Line numbers must be between 1 and {len(lines)} (inclusive).\n'
f'{ERROR_MSG_SUFFIX}'
)
return
if start > end:
raise ValueError(
f'Invalid line range: {start}-{end}. Start must be less than or equal to end.'
print(
f'{ERROR_MSG}\n'
f'Invalid line range: {start}-{end}. Start must be less than or equal to end.\n'
f'{ERROR_MSG_SUFFIX}'
)
return
edited_content = content + '\n'
n_edited_lines = len(edited_content.split('\n'))
@@ -270,14 +316,20 @@ def edit_file(start: int, end: int, content: str) -> None:
print('[This is how your edit would have looked if applied]')
print('-------------------------------------------------')
cur_line = (n_edited_lines // 2) + start
_print_window(CURRENT_FILE, cur_line, WINDOW)
_print_window(CURRENT_FILE, cur_line, 10)
print('-------------------------------------------------\n')
print('[This is the original code before your edit]')
print('-------------------------------------------------')
_print_window(original_file_backup_path, CURRENT_LINE, WINDOW)
_print_window(original_file_backup_path, cur_line, 10)
print('-------------------------------------------------')
print(
'Your changes have NOT been applied. Please fix your edit command and try again.\n'
'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n'
'DO NOT re-run the same failed edit command. Running it again will lead to the same error.'
)
# recover the original file
with open(original_file_backup_path, 'r') as fin, open(
CURRENT_FILE, 'w'
@@ -301,6 +353,7 @@ def edit_file(start: int, end: int, content: str) -> None:
)
@update_pwd_decorator
def search_dir(search_term: str, dir_path: str = './') -> None:
"""Searches for search_term in all files in dir. If dir is not provided, searches in the current directory.
@@ -310,7 +363,6 @@ def search_dir(search_term: str, dir_path: str = './') -> None:
"""
if not os.path.isdir(dir_path):
raise FileNotFoundError(f'Directory {dir_path} not found')
matches = []
for root, _, files in os.walk(dir_path):
for file in files:
@@ -341,6 +393,7 @@ def search_dir(search_term: str, dir_path: str = './') -> None:
print(f'[End of matches for "{search_term}" in {dir_path}]')
@update_pwd_decorator
def search_file(search_term: str, file_path: Optional[str] = None) -> None:
"""Searches for search_term in file. If file is not provided, searches in the current open file.
@@ -373,6 +426,7 @@ def search_file(search_term: str, file_path: Optional[str] = None) -> None:
print(f'[No matches found for "{search_term}" in {file_path}]')
@update_pwd_decorator
def find_file(file_name: str, dir_path: str = './') -> None:
"""Finds all files with the given name in the specified directory.
@@ -398,6 +452,7 @@ def find_file(file_name: str, dir_path: str = './') -> None:
print(f'[No matches found for "{file_name}" in {dir_path}]')
@update_pwd_decorator
def parse_pdf(file_path: str) -> None:
"""Parses the content of a PDF file and prints it.
@@ -416,6 +471,7 @@ def parse_pdf(file_path: str) -> None:
print(text.strip())
@update_pwd_decorator
def parse_docx(file_path: str) -> None:
"""
Parses the content of a DOCX file and prints it.
@@ -431,6 +487,7 @@ def parse_docx(file_path: str) -> None:
print(text)
@update_pwd_decorator
def parse_latex(file_path: str) -> None:
"""
Parses the content of a LaTex file and prints it.
@@ -484,6 +541,7 @@ def _prepare_image_messages(task: str, base64_image: str):
]
@update_pwd_decorator
def parse_audio(file_path: str, model: str = 'whisper-1') -> None:
"""
Parses the content of an audio file and prints it.
@@ -503,6 +561,7 @@ def parse_audio(file_path: str, model: str = 'whisper-1') -> None:
print(f'Error transcribing audio file: {e}')
@update_pwd_decorator
def parse_image(
file_path: str, task: str = 'Describe this image as detail as possible.'
) -> None:
@@ -529,6 +588,7 @@ def parse_image(
print(f'Error with the request: {error}')
@update_pwd_decorator
def parse_video(
file_path: str,
task: str = 'Describe this image as detail as possible.',
@@ -577,6 +637,7 @@ def parse_video(
print(f'Error with the request: {error}')
@update_pwd_decorator
def parse_pptx(file_path: str) -> None:
"""
Parses the content of a pptx file and prints it.

View File

@@ -7,20 +7,30 @@ import requests
# Read the Python code from STDIN
code = sys.stdin.read()
# Set the default kernel ID
kernel_id = 'default'
PORT = os.environ.get('JUPYTER_EXEC_SERVER_PORT')
POST_URL = f'http://localhost:{PORT}/execute'
def execute_code(code):
PORT = os.environ.get('JUPYTER_EXEC_SERVER_PORT')
POST_URL = f'http://localhost:{PORT}/execute'
for i in range(10):
try:
response = requests.post(POST_URL, json={'kernel_id': kernel_id, 'code': code})
if '500: Internal Server Error' not in response.text:
print(response.text)
break
except requests.exceptions.ConnectionError:
pass
time.sleep(2)
else:
print('Failed to connect to the Jupyter server')
# Set the default kernel ID
kernel_id = 'default'
for i in range(10):
try:
response = requests.post(
POST_URL, json={'kernel_id': kernel_id, 'code': code}
)
if '500: Internal Server Error' not in response.text:
print(response.text)
break
except requests.exceptions.ConnectionError:
pass
time.sleep(2)
else:
print('Failed to connect to the Jupyter server')
if jupyter_pwd := os.environ.get('JUPYTER_PWD'):
execute_code(f'import os\nos.environ["JUPYTER_PWD"] = "{jupyter_pwd}"\n')
execute_code(code)

View File

@@ -55,7 +55,10 @@ class ServerRuntime(Runtime):
# run the code
obs = self._run_command(
('cat /tmp/opendevin_jupyter_temp.py | execute_cli'), background=False
(
'export JUPYTER_PWD=$(pwd) && cat /tmp/opendevin_jupyter_temp.py | execute_cli'
),
background=False,
)
output = obs.content
if 'pip install' in action.code and 'Successfully installed' in output:

View File

@@ -1,9 +1,9 @@
import contextlib
import io
import sys
import docx
import pytest
import sys
from opendevin.runtime.plugins.agent_skills.agentskills import (
create_file,
@@ -11,15 +11,14 @@ from opendevin.runtime.plugins.agent_skills.agentskills import (
find_file,
goto_line,
open_file,
scroll_down,
scroll_up,
search_dir,
search_file,
parse_docx,
parse_latex,
parse_pdf,
parse_pptx,
parse_image
scroll_down,
scroll_up,
search_dir,
search_file,
)
@@ -81,6 +80,7 @@ def test_open_file_long(tmp_path):
expected = f'[File: {temp_file_path} (1000 lines total)]\n'
for i in range(1, 52):
expected += f'{i}|Line {i}\n'
expected += '(949 more lines below)\n'
assert result.split('\n') == expected.split('\n')
@@ -95,8 +95,10 @@ def test_open_file_long_with_lineno(tmp_path):
result = buf.getvalue()
assert result is not None
expected = f'[File: {temp_file_path} (1000 lines total)]\n'
expected += '(50 more lines above)\n'
for i in range(51, 151):
expected += f'{i}|Line {i}\n'
expected += '(850 more lines below)\n'
assert result.split('\n') == expected.split('\n')
@@ -134,6 +136,7 @@ def test_goto_line(tmp_path):
expected = f'[File: {temp_file_path} (1000 lines total)]\n'
for i in range(1, 52):
expected += f'{i}|Line {i}\n'
expected += '(949 more lines below)\n'
assert result.split('\n') == expected.split('\n')
with io.StringIO() as buf:
@@ -143,8 +146,10 @@ def test_goto_line(tmp_path):
assert result is not None
expected = f'[File: {temp_file_path} (1000 lines total)]\n'
expected += '(50 more lines above)\n'
for i in range(51, 151):
expected += f'{i}|Line {i}\n'
expected += '(850 more lines below)\n'
assert result.split('\n') == expected.split('\n')
@@ -186,6 +191,7 @@ def test_scroll_down(tmp_path):
expected = f'[File: {temp_file_path} (1000 lines total)]\n'
for i in range(1, 52):
expected += f'{i}|Line {i}\n'
expected += '(949 more lines below)\n'
assert result.split('\n') == expected.split('\n')
with io.StringIO() as buf:
@@ -195,8 +201,10 @@ def test_scroll_down(tmp_path):
assert result is not None
expected = f'[File: {temp_file_path} (1000 lines total)]\n'
expected += '(51 more lines above)\n'
for i in range(52, 152):
expected += f'{i}|Line {i}\n'
expected += '(849 more lines below)\n'
assert result.split('\n') == expected.split('\n')
@@ -212,8 +220,10 @@ def test_scroll_up(tmp_path):
assert result is not None
expected = f'[File: {temp_file_path} (1000 lines total)]\n'
expected += '(250 more lines above)\n'
for i in range(251, 351):
expected += f'{i}|Line {i}\n'
expected += '(650 more lines below)\n'
assert result.split('\n') == expected.split('\n')
with io.StringIO() as buf:
@@ -223,8 +233,10 @@ def test_scroll_up(tmp_path):
assert result is not None
expected = f'[File: {temp_file_path} (1000 lines total)]\n'
expected += '(150 more lines above)\n'
for i in range(151, 251):
expected += f'{i}|Line {i}\n'
expected += '(750 more lines below)\n'
assert result.split('\n') == expected.split('\n')
@@ -567,6 +579,72 @@ def test_lint_file_fail_undefined_name(tmp_path, monkeypatch, capsys):
'-------------------------------------------------\n'
'1|\n'
'-------------------------------------------------\n'
'Your changes have NOT been applied. Please fix your edit command and try again.\n'
'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n'
'DO NOT re-run the same failed edit command. Running it again will lead to the same error.\n'
)
assert result.split('\n') == expected.split('\n')
def test_lint_file_fail_undefined_name_long(tmp_path, monkeypatch, capsys):
# Create a Python file with a syntax error
file_path = tmp_path / 'test_file.py'
file_path.write_text('\n' * 1000)
# Set environment variable to enable linting
monkeypatch.setattr(
'opendevin.runtime.plugins.agent_skills.agentskills.ENABLE_AUTO_LINT', True
)
open_file(str(file_path))
edit_file(500, 500, 'undefined_name()\n')
result = capsys.readouterr().out
print(result)
assert result is not None
open_lines = '\n'.join([f'{i+1}|' for i in range(51)])
expected = (
f'[File: {file_path} (1000 lines total)]\n'
f'{open_lines}\n'
'(949 more lines below)\n'
'[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n'
'ERRORS:\n'
f"{file_path}:500:1: F821 undefined name 'undefined_name'\n"
'[This is how your edit would have looked if applied]\n'
'-------------------------------------------------\n'
'(496 more lines above)\n'
'497|\n'
'498|\n'
'499|\n'
'500|undefined_name()\n'
'501|\n'
'502|\n'
'503|\n'
'504|\n'
'505|\n'
'506|\n'
'(495 more lines below)\n'
'-------------------------------------------------\n\n'
'[This is the original code before your edit]\n'
'-------------------------------------------------\n'
'(496 more lines above)\n'
'497|\n'
'498|\n'
'499|\n'
'500|\n'
'501|\n'
'502|\n'
'503|\n'
'504|\n'
'505|\n'
'506|\n'
'(494 more lines below)\n'
'-------------------------------------------------\n'
'Your changes have NOT been applied. Please fix your edit command and try again.\n'
'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n'
'DO NOT re-run the same failed edit command. Running it again will lead to the same error.\n'
)
assert result.split('\n') == expected.split('\n')
@@ -599,7 +677,7 @@ def test_lint_file_disabled_undefined_name(tmp_path, monkeypatch, capsys):
def test_parse_docx(tmp_path):
# Create a DOCX file with some content
test_docx_path = tmp_path / "test.docx"
test_docx_path = tmp_path / 'test.docx'
doc = docx.Document()
doc.add_paragraph('Hello, this is a test document.')
doc.add_paragraph('This is the second paragraph.')
@@ -621,19 +699,19 @@ def test_parse_docx(tmp_path):
'@@ Page 1 @@\nHello, this is a test document.\n\n'
'@@ Page 2 @@\nThis is the second paragraph.\n\n\n'
)
assert output == expected_output, f"Expected output does not match. Got: {output}"
assert output == expected_output, f'Expected output does not match. Got: {output}'
def test_parse_latex(tmp_path):
# Create a LaTeX file with some content
test_latex_path = tmp_path / "test.tex"
test_latex_path = tmp_path / 'test.tex'
with open(test_latex_path, 'w') as f:
f.write(r'''
f.write(r"""
\documentclass{article}
\begin{document}
Hello, this is a test LaTeX document.
\end{document}
''')
""")
old_stdout = sys.stdout
sys.stdout = io.StringIO()
@@ -650,17 +728,17 @@ def test_parse_latex(tmp_path):
f'[Reading LaTex file from {test_latex_path}]\n'
'Hello, this is a test LaTeX document.\n'
)
assert output == expected_output, f"Expected output does not match. Got: {output}"
assert output == expected_output, f'Expected output does not match. Got: {output}'
def test_parse_pdf(tmp_path):
# Create a PDF file with some content
test_pdf_path = tmp_path / "test.pdf"
test_pdf_path = tmp_path / 'test.pdf'
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
c = canvas.Canvas(str(test_pdf_path), pagesize=letter)
c.drawString(100, 750, "Hello, this is a test PDF document.")
c.drawString(100, 750, 'Hello, this is a test PDF document.')
c.save()
old_stdout = sys.stdout
@@ -679,21 +757,22 @@ def test_parse_pdf(tmp_path):
'@@ Page 1 @@\n'
'Hello, this is a test PDF document.\n'
)
assert output == expected_output, f"Expected output does not match. Got: {output}"
assert output == expected_output, f'Expected output does not match. Got: {output}'
def test_parse_pptx(tmp_path):
test_pptx_path = tmp_path / "test.pptx"
test_pptx_path = tmp_path / 'test.pptx'
from pptx import Presentation
pres = Presentation()
slide1 = pres.slides.add_slide(pres.slide_layouts[0])
title1 = slide1.shapes.title
title1.text = "Hello, this is the first test PPTX slide."
title1.text = 'Hello, this is the first test PPTX slide.'
slide2 = pres.slides.add_slide(pres.slide_layouts[0])
title2 = slide2.shapes.title
title2.text = "Hello, this is the second test PPTX slide."
title2.text = 'Hello, this is the second test PPTX slide.'
pres.save(str(test_pptx_path))
@@ -712,4 +791,4 @@ def test_parse_pptx(tmp_path):
'@@ Slide 2 @@\n'
'Hello, this is the second test PPTX slide.\n\n'
)
assert output == expected_output, f"Expected output does not match. Got: {output}"
assert output == expected_output, f'Expected output does not match. Got: {output}'