mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-01-09 14:57:59 -05:00
INT: prevent error on repeat integration tests after failed test(s) (#2935)
* Integration tests: prevent File not found error * forgot to remove debug calls in regenerate.sh
This commit is contained in:
@@ -47,6 +47,16 @@ class BrowserEnv:
|
||||
self.process = multiprocessing.Process(
|
||||
target=self.browser_process,
|
||||
)
|
||||
|
||||
try:
|
||||
self.original_cwd = os.getcwd()
|
||||
except FileNotFoundError:
|
||||
logger.warning(
|
||||
'Current working directory does not exist. Using /tmp as fallback.'
|
||||
)
|
||||
self.original_cwd = '/tmp'
|
||||
os.chdir('/tmp')
|
||||
|
||||
if is_async:
|
||||
threading.Thread(target=self.init_browser).start()
|
||||
else:
|
||||
@@ -66,7 +76,23 @@ class BrowserEnv:
|
||||
|
||||
def init_browser(self):
|
||||
logger.info('Starting browser env...')
|
||||
self.process.start()
|
||||
|
||||
# Ensure we're in a valid directory before starting the process
|
||||
try:
|
||||
os.chdir(self.original_cwd)
|
||||
logger.debug(f'Changed back to original directory: {self.original_cwd}')
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to change to original directory: {e}')
|
||||
# If we can't change to the original directory, try to use a known valid directory
|
||||
os.chdir('/tmp')
|
||||
logger.debug('Changed to /tmp directory as fallback')
|
||||
|
||||
try:
|
||||
self.process.start()
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to start browser process: {e}')
|
||||
raise
|
||||
|
||||
if not self.check_alive():
|
||||
self.close()
|
||||
raise BrowserInitException('Failed to start browser environment.')
|
||||
|
||||
@@ -79,9 +79,9 @@ class ServerRuntime(Runtime):
|
||||
self.browser: BrowserEnv | None = None
|
||||
|
||||
async def close(self):
|
||||
if not self._is_external_sandbox:
|
||||
if hasattr(self, '_is_external_sandbox') and not self._is_external_sandbox:
|
||||
self.sandbox.close()
|
||||
if self.browser is not None:
|
||||
if hasattr(self, 'browser') and self.browser is not None:
|
||||
self.browser.close()
|
||||
|
||||
def init_sandbox_plugins(self, plugins: list[PluginRequirement]) -> None:
|
||||
@@ -109,7 +109,7 @@ class ServerRuntime(Runtime):
|
||||
return self._run_command(action.command)
|
||||
|
||||
async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
|
||||
obs = self._run_command(
|
||||
self._run_command(
|
||||
("cat > /tmp/opendevin_jupyter_temp.py <<'EOL'\n" f'{action.code}\n' 'EOL'),
|
||||
)
|
||||
|
||||
@@ -150,7 +150,7 @@ class ServerRuntime(Runtime):
|
||||
|
||||
# re-init the kernel after restart
|
||||
if action.kernel_init_code:
|
||||
obs = self._run_command(
|
||||
self._run_command(
|
||||
(
|
||||
f"cat > /tmp/opendevin_jupyter_init.py <<'EOL'\n"
|
||||
f'{action.kernel_init_code}\n'
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import io
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from functools import partial
|
||||
@@ -12,8 +13,13 @@ from litellm import completion
|
||||
|
||||
from opendevin.llm.llm import message_separator
|
||||
|
||||
script_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
workspace_path = os.getenv('WORKSPACE_BASE')
|
||||
script_dir = os.environ.get('SCRIPT_DIR')
|
||||
project_root = os.environ.get('PROJECT_ROOT')
|
||||
workspace_path = os.environ.get('WORKSPACE_BASE')
|
||||
|
||||
assert script_dir is not None, 'SCRIPT_DIR environment variable is not set'
|
||||
assert project_root is not None, 'PROJECT_ROOT environment variable is not set'
|
||||
assert workspace_path is not None, 'WORKSPACE_BASE environment variable is not set'
|
||||
|
||||
|
||||
class SecretExit(Exception):
|
||||
@@ -204,16 +210,41 @@ def http_server():
|
||||
def set_up():
|
||||
global cur_id
|
||||
cur_id = 0
|
||||
assert workspace_path is not None
|
||||
assert workspace_path is not None, 'workspace_path is not set'
|
||||
|
||||
# Remove and recreate the workspace_path
|
||||
if os.path.exists(workspace_path):
|
||||
for file in os.listdir(workspace_path):
|
||||
os.remove(os.path.join(workspace_path, file))
|
||||
shutil.rmtree(workspace_path)
|
||||
os.makedirs(workspace_path)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def resource_setup():
|
||||
set_up()
|
||||
if not os.path.exists(workspace_path):
|
||||
os.makedirs(workspace_path)
|
||||
# Yield to test execution
|
||||
yield
|
||||
try:
|
||||
original_cwd = os.getcwd()
|
||||
except FileNotFoundError:
|
||||
print(
|
||||
'[DEBUG] Original working directory does not exist. Using /tmp as fallback.'
|
||||
)
|
||||
original_cwd = '/tmp'
|
||||
os.chdir('/tmp')
|
||||
|
||||
try:
|
||||
set_up()
|
||||
yield
|
||||
finally:
|
||||
try:
|
||||
print(f'[DEBUG] Final working directory: {os.getcwd()}')
|
||||
except FileNotFoundError:
|
||||
print('[DEBUG] Final working directory does not exist')
|
||||
|
||||
if os.path.exists(workspace_path):
|
||||
shutil.rmtree(workspace_path)
|
||||
os.makedirs(workspace_path, exist_ok=True)
|
||||
|
||||
# Try to change back to the original directory
|
||||
try:
|
||||
os.chdir(original_cwd)
|
||||
print(f'[DEBUG] Changed back to original directory: {original_cwd}')
|
||||
except Exception:
|
||||
os.chdir('/tmp')
|
||||
|
||||
@@ -11,6 +11,17 @@ unset SANDBOX_ENV_OPENAI_API_KEY
|
||||
unset OPENAI_BASE_URL
|
||||
unset OPENAI_MODEL
|
||||
|
||||
# Get the absolute path of the script directory
|
||||
get_script_dir() {
|
||||
local source="${BASH_SOURCE[0]}"
|
||||
while [ -h "$source" ]; do
|
||||
local dir="$( cd -P "$( dirname "$source" )" && pwd )"
|
||||
source="$(readlink "$source")"
|
||||
[[ $source != /* ]] && source="$dir/$source"
|
||||
done
|
||||
echo "$( cd -P "$( dirname "$source" )" && pwd )"
|
||||
}
|
||||
|
||||
TMP_FILE="${TMP_FILE:-tmp.log}"
|
||||
|
||||
if [ -z $WORKSPACE_MOUNT_PATH ]; then
|
||||
@@ -20,14 +31,23 @@ if [ -z $WORKSPACE_BASE ]; then
|
||||
WORKSPACE_BASE=$(pwd)
|
||||
fi
|
||||
|
||||
WORKSPACE_MOUNT_PATH+="/_test_workspace"
|
||||
WORKSPACE_BASE+="/_test_workspace"
|
||||
export SCRIPT_DIR=$(get_script_dir)
|
||||
export PROJECT_ROOT=$(realpath "$SCRIPT_DIR/../..")
|
||||
|
||||
WORKSPACE_MOUNT_PATH=$(realpath "${WORKSPACE_MOUNT_PATH}/_test_workspace")
|
||||
WORKSPACE_BASE=$(realpath "${WORKSPACE_BASE}/_test_workspace")
|
||||
WORKSPACE_MOUNT_PATH_IN_SANDBOX="/workspace"
|
||||
|
||||
echo "Current working directory: $(pwd)"
|
||||
echo "SCRIPT_DIR: $SCRIPT_DIR"
|
||||
echo "PROJECT_ROOT: $PROJECT_ROOT"
|
||||
echo "WORKSPACE_BASE: $WORKSPACE_BASE"
|
||||
echo "WORKSPACE_MOUNT_PATH: $WORKSPACE_MOUNT_PATH"
|
||||
echo "WORKSPACE_MOUNT_PATH_IN_SANDBOX: $WORKSPACE_MOUNT_PATH_IN_SANDBOX"
|
||||
|
||||
# Ensure we're in the correct directory
|
||||
cd "$PROJECT_ROOT" || exit 1
|
||||
|
||||
mkdir -p $WORKSPACE_BASE
|
||||
|
||||
# use environmental variable if exists, otherwise use "ssh"
|
||||
@@ -71,14 +91,18 @@ num_of_agents=${#agents[@]}
|
||||
|
||||
# run integration test against a specific agent & test
|
||||
run_test() {
|
||||
local pytest_cmd="poetry run pytest -s ./tests/integration/test_agent.py::$test_name"
|
||||
# Ensure we're in the correct directory
|
||||
cd "$PROJECT_ROOT" || exit 1
|
||||
|
||||
local pytest_cmd="poetry run pytest --cache-clear -s $SCRIPT_DIR/test_agent.py::$test_name"
|
||||
# Check if TEST_IN_CI is defined
|
||||
if [ -n "$TEST_IN_CI" ]; then
|
||||
pytest_cmd+=" --cov=agenthub --cov=opendevin --cov-report=xml --cov-append"
|
||||
fi
|
||||
|
||||
SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
|
||||
env SCRIPT_DIR="$SCRIPT_DIR" \
|
||||
PROJECT_ROOT="$PROJECT_ROOT" \
|
||||
SANDBOX_BOX_TYPE="$SANDBOX_BOX_TYPE" \
|
||||
PERSIST_SANDBOX=$PERSIST_SANDBOX \
|
||||
WORKSPACE_BASE=$WORKSPACE_BASE \
|
||||
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
|
||||
@@ -120,7 +144,7 @@ run_test() {
|
||||
|
||||
# browsing capability needs a local http server
|
||||
launch_http_server() {
|
||||
poetry run python tests/integration/start_http_server.py &
|
||||
poetry run python $SCRIPT_DIR/start_http_server.py &
|
||||
HTTP_SERVER_PID=$!
|
||||
echo "Test http server launched, PID = $HTTP_SERVER_PID"
|
||||
sleep 10
|
||||
@@ -147,15 +171,17 @@ trap cleanup EXIT
|
||||
regenerate_without_llm() {
|
||||
# set -x to print the command being executed
|
||||
set -x
|
||||
SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
|
||||
PERSIST_SANDBOX=$PERSIST_SANDBOX \
|
||||
WORKSPACE_BASE=$WORKSPACE_BASE \
|
||||
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
|
||||
WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
|
||||
MAX_ITERATIONS=$MAX_ITERATIONS \
|
||||
FORCE_APPLY_PROMPTS=true \
|
||||
DEFAULT_AGENT=$agent \
|
||||
poetry run pytest -s ./tests/integration/test_agent.py::$test_name
|
||||
env SCRIPT_DIR="$SCRIPT_DIR" \
|
||||
PROJECT_ROOT="$PROJECT_ROOT" \
|
||||
SANDBOX_BOX_TYPE="$SANDBOX_BOX_TYPE" \
|
||||
PERSIST_SANDBOX=$PERSIST_SANDBOX \
|
||||
WORKSPACE_BASE=$WORKSPACE_BASE \
|
||||
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
|
||||
WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
|
||||
MAX_ITERATIONS=$MAX_ITERATIONS \
|
||||
FORCE_APPLY_PROMPTS=true \
|
||||
DEFAULT_AGENT=$agent \
|
||||
poetry run pytest -s $SCRIPT_DIR/test_agent.py::$test_name
|
||||
set +x
|
||||
}
|
||||
|
||||
@@ -165,29 +191,32 @@ regenerate_with_llm() {
|
||||
fi
|
||||
|
||||
rm -rf $WORKSPACE_BASE/*
|
||||
if [ -d "tests/integration/workspace/$test_name" ]; then
|
||||
cp -r tests/integration/workspace/$test_name/* $WORKSPACE_BASE
|
||||
if [ -d "$SCRIPT_DIR/workspace/$test_name" ]; then
|
||||
cp -r "$SCRIPT_DIR/workspace/$test_name"/* $WORKSPACE_BASE
|
||||
fi
|
||||
|
||||
rm -rf logs
|
||||
rm -rf tests/integration/mock/$agent/$test_name/*
|
||||
rm -rf "$SCRIPT_DIR/mock/$agent/$test_name/*"
|
||||
# set -x to print the command being executed
|
||||
set -x
|
||||
echo -e "/exit\n" | \
|
||||
DEBUG=true \
|
||||
SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
|
||||
PERSIST_SANDBOX=$PERSIST_SANDBOX \
|
||||
WORKSPACE_BASE=$WORKSPACE_BASE \
|
||||
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH AGENT=$agent \
|
||||
WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
|
||||
poetry run python ./opendevin/core/main.py \
|
||||
-i $MAX_ITERATIONS \
|
||||
-t "$task Do not ask me for confirmation at any point." \
|
||||
-c $agent
|
||||
env SCRIPT_DIR="$SCRIPT_DIR" \
|
||||
PROJECT_ROOT="$PROJECT_ROOT" \
|
||||
DEBUG=true \
|
||||
SANDBOX_BOX_TYPE="$SANDBOX_BOX_TYPE" \
|
||||
PERSIST_SANDBOX=$PERSIST_SANDBOX \
|
||||
WORKSPACE_BASE=$WORKSPACE_BASE \
|
||||
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
|
||||
AGENT=$agent \
|
||||
WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
|
||||
poetry run python "$PROJECT_ROOT/opendevin/core/main.py" \
|
||||
-i $MAX_ITERATIONS \
|
||||
-t "$task Do not ask me for confirmation at any point." \
|
||||
-c $agent
|
||||
set +x
|
||||
|
||||
mkdir -p tests/integration/mock/$agent/$test_name/
|
||||
mv logs/llm/**/* tests/integration/mock/$agent/$test_name/
|
||||
mkdir -p "$SCRIPT_DIR/mock/$agent/$test_name/"
|
||||
mv logs/llm/**/* "$SCRIPT_DIR/mock/$agent/$test_name/"
|
||||
|
||||
}
|
||||
|
||||
@@ -195,7 +224,6 @@ regenerate_with_llm() {
|
||||
## MAIN PROGRAM ##
|
||||
##############################################################
|
||||
|
||||
|
||||
if [ "$num_of_tests" -ne "${#test_names[@]}" ]; then
|
||||
echo "Every task must correspond to one test case"
|
||||
exit 1
|
||||
@@ -222,8 +250,8 @@ for ((i = 0; i < num_of_tests; i++)); do
|
||||
|
||||
echo -e "\n\n\n\n========STEP 1: Running $test_name for $agent========\n\n\n\n"
|
||||
rm -rf $WORKSPACE_BASE/*
|
||||
if [ -d "tests/integration/workspace/$test_name" ]; then
|
||||
cp -r "tests/integration/workspace/$test_name"/* $WORKSPACE_BASE
|
||||
if [ -d "$SCRIPT_DIR/workspace/$test_name" ]; then
|
||||
cp -r "$SCRIPT_DIR/workspace/$test_name"/* $WORKSPACE_BASE
|
||||
fi
|
||||
|
||||
if [ "$TEST_ONLY" = true ]; then
|
||||
@@ -245,7 +273,7 @@ for ((i = 0; i < num_of_tests; i++)); do
|
||||
|
||||
if [ "$FORCE_USE_LLM" = true ]; then
|
||||
echo -e "\n\n\n\n========FORCE_USE_LLM, skipping step 2 & 3========\n\n\n\n"
|
||||
elif [ ! -d "tests/integration/mock/$agent/$test_name" ]; then
|
||||
elif [ ! -d "$SCRIPT_DIR/mock/$agent/$test_name" ]; then
|
||||
echo -e "\n\n\n\n========No existing mock responses for $agent/$test_name, skipping step 2 & 3========\n\n\n\n"
|
||||
else
|
||||
echo -e "\n\n\n\n========STEP 2: $test_name failed, regenerating prompts for $agent WITHOUT money cost========\n\n\n\n"
|
||||
|
||||
Reference in New Issue
Block a user