mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-01-10 07:18:10 -05:00
* initialize plugin definition * initialize plugin definition * simplify mixin * further improve plugin mixin * add cache dir for pip * support clean up cache * add script for setup jupyter and execution server * integrate JupyterRequirement to ssh_box * source bashrc at the end of plugin load * add execute_cli that accept code via stdin * make JUPYTER_EXEC_SERVER_PORT configurable via env var * increase background cmd sleep time * Update opendevin/sandbox/plugins/mixin.py Co-authored-by: Robert Brennan <accounts@rbren.io> * add mixin to base class * make jupyter requirement a dataclass * source plugins only when >0 requirements * add `sandbox_plugins` for each agent & have controller take care of it * update build.sh to make logs available in /opendevin/logs * switch to use config for lib and cache dir * Add SANDBOX_WORKSPACE_DIR into config * Add SANDBOX_WORKSPACE_DIR into config * fix occurence of /workspace * fix permission issue with /workspace * use python to implement execute_cli to avoid stdin escape issue * add IPythonRunCellAction and get it working * wait until jupyter is avaialble * support plugin via copying instead of mounting * add agent talk action * support follow-up user language feedback * add __str__ for action to be printed better * only print PLAN at the beginning * wip: update codeact agent * get rid the initial messate * update codeact agent to handle null action; add thought to bash * dispatch thought for RUN action as well * fix weird behavior of pxssh where the output would not flush correctly * make ssh box can handle exit_code properly as well * add initial version of swe-agent plugin; * rename swe cursors * split setup script into two and create two requirements * print SWE-agent command documentation * update swe-agent to default to no custom docs * add initial version of swe-agent plugin; * rename swe cursors * split setup script into two and create two requirements * print SWE-agent command documentation * update swe-agent to default to no custom docs * update dockerfile with dependency from swe-agent * make env setup a separate script for .bashrc source * add wip prompt * fix mount_dir for ssh_box * update prompt * fix mount_dir for ssh_box * default to use host network * default to use host network * move prompt to a separate file * fix swe-tool plugins; add missing _split_string * remove hostname from sshbox * update the prompt with edit functionality * fix swe-tool plugins; add missing _split_string * add awaiting into status bar * fix the bug of additional send event * remove some print action * move logic to config.py * remove debugging comments * make host network as default * make WORKSPACE_MOUNT_PATH as abspath * implement execute_cli via file cp * Revert "implement execute_cli via file cp" This reverts commit06f0155bc1. * add codeact dependencies to default container * add IPythonRunCellObservation * add back cache dir and default to /tmp * make USE_HOST_NETWORK a bool * revert use host network to false * add temporarily fix for IPython RUN action * update prompt * revert USE_HOST_NETWORK to true since it is not affecting anything * attempt to fix lint * remove newline * fix jupyter execution server * add `thought` to most action class * fix unit tests for current action abstraction * support user exit * update test cases with the latest action format (added 'thought') * fix integration test for CodeActAGent by mocking stdin * only mock stdin for tests with user_responses.log * remove -exec integration test for CodeActAgent since it is not supported * remove specific stop word * fix comments * improve clarity of prompt * fix py lint * fix integration tests * sandbox might failed in chown due to mounting, but it won't be fatal * update debug instruction for sshbox * fix typo * get RUN_AS_DEVIN and network=host working with app sandbox * get RUN_AS_DEVIN and network=host working with app sandbox * attempt to fix the workspace base permission * sandbox might failed in chown due to mounting, but it won't be fatal * update sshbox instruction * remove default user id since it will be passed in the instruction * revert permission fix since it should be resolved by correct SANDBOX_USER_ID * the permission issue can be fixed by simply provide correct env var * remove log * set sandbox user id to getuid by default * move logging to initializer * make the uid consistent across host, app container, and sandbox * remove hostname as it causes sudo issue * fix permission of entrypoint script * make the uvicron app run as host user uid for jupyter plugin * add warning message * update dev md for instruction of running unit tests * add back unit tests * revert back to the original sandbox implementation to fix testcases * revert use host network * get docker socket gid and usermod instead of chmod 777 * allow unit test workflow to find docker.sock * make sandbox test working via patch * fix arg parser that's broken for some reason * try to fix app build disk space issue * fix integration test * Revert "fix arg parser that's broken for some reason" This reverts commit6cc8961133. * update Development.md * cleanup intergration tests & add exception for CodeAct+execbox * fix config * implement user_message action * fix doc * fix event dict error * fix frontend lint * revert accidentally changes to integration tests * revert accidentally changes to integration tests --------- Co-authored-by: Robert Brennan <accounts@rbren.io> Co-authored-by: Robert Brennan <contact@rbren.io>
101 lines
3.8 KiB
Python
101 lines
3.8 KiB
Python
import io
|
|
import os
|
|
import re
|
|
from functools import partial
|
|
|
|
import pytest
|
|
from litellm import completion
|
|
|
|
script_dir = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
|
|
def filter_out_symbols(input):
|
|
return ' '.join([char for char in input if char.isalpha()])
|
|
|
|
|
|
def get_log_id(prompt_log_name):
|
|
match = re.search(r'prompt_(\d+).log', prompt_log_name)
|
|
if match:
|
|
return match.group(1)
|
|
|
|
|
|
def get_mock_response(test_name, messages):
|
|
"""
|
|
Find mock response based on prompt. Prompts are stored under nested
|
|
folders under mock folder. If prompt_{id}.log matches,
|
|
then the mock response we're looking for is at response_{id}.log.
|
|
|
|
Note: we filter out all non alpha-numerical characters, otherwise we would
|
|
see surprising mismatches caused by linters and minor discrepancies between
|
|
different platforms.
|
|
|
|
We could have done a slightly more efficient string match with the same time
|
|
complexity (early-out upon first character mismatch), but it is unnecessary
|
|
for tests. Empirically, different prompts of the same task usually only
|
|
differ near the end of file, so the comparison would be more efficient if
|
|
we start from the end of the file, but again, that is unnecessary and only
|
|
makes test code harder to understand.
|
|
"""
|
|
mock_dir = os.path.join(script_dir, 'mock', os.environ.get('AGENT'), test_name)
|
|
prompt = filter_out_symbols(messages)
|
|
for root, _, files in os.walk(mock_dir):
|
|
for file in files:
|
|
if file.startswith('prompt_') and file.endswith('.log'):
|
|
file_path = os.path.join(root, file)
|
|
# Open the prompt file and compare its contents
|
|
with open(file_path, 'r') as f:
|
|
file_content = filter_out_symbols(f.read())
|
|
if file_content == prompt:
|
|
# If a match is found, construct the corresponding response file path
|
|
log_id = get_log_id(file_path)
|
|
resp_file_path = os.path.join(root, f'response_{log_id}.log')
|
|
# Read the response file and return its content
|
|
with open(resp_file_path, 'r') as resp_file:
|
|
return resp_file.read()
|
|
|
|
|
|
def mock_user_response(*args, test_name, **kwargs):
|
|
"""The agent will ask for user input using `input()` when calling `asyncio.run(main(task))`.
|
|
This function mocks the user input by providing the response from the mock response file.
|
|
|
|
It will read the `user_responses.log` file in the test directory and set as
|
|
STDIN input for the agent to read.
|
|
"""
|
|
user_response_file = os.path.join(
|
|
script_dir,
|
|
'mock',
|
|
os.environ.get('AGENT'),
|
|
test_name,
|
|
'user_responses.log'
|
|
)
|
|
if not os.path.exists(user_response_file):
|
|
return ''
|
|
with open(user_response_file, 'r') as f:
|
|
ret = f.read().rstrip()
|
|
ret += '\n'
|
|
return ret
|
|
|
|
|
|
def mock_completion(*args, test_name, **kwargs):
|
|
messages = kwargs['messages']
|
|
message_str = ''
|
|
for message in messages:
|
|
message_str += message['content']
|
|
mock_response = get_mock_response(test_name, message_str)
|
|
assert mock_response is not None, 'Mock response for prompt is not found'
|
|
response = completion(**kwargs, mock_response=mock_response)
|
|
return response
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def patch_completion(monkeypatch, request):
|
|
test_name = request.node.name
|
|
# Mock LLM completion
|
|
monkeypatch.setattr('opendevin.llm.llm.litellm_completion', partial(mock_completion, test_name=test_name))
|
|
|
|
# Mock user input (only for tests that have user_responses.log)
|
|
user_responses_str = mock_user_response(test_name=test_name)
|
|
if user_responses_str:
|
|
user_responses = io.StringIO(user_responses_str)
|
|
monkeypatch.setattr('sys.stdin', user_responses)
|