INT: prevent error on repeat integration tests after failed test(s) (#2935)

* Integration tests: prevent File not found error

* forgot to remove debug calls in regenerate.sh
This commit is contained in:
tobitege
2024-07-18 06:29:15 +02:00
committed by GitHub
parent 728131ff1d
commit 5a5713009f
4 changed files with 133 additions and 48 deletions

View File

@@ -47,6 +47,16 @@ class BrowserEnv:
self.process = multiprocessing.Process(
target=self.browser_process,
)
try:
self.original_cwd = os.getcwd()
except FileNotFoundError:
logger.warning(
'Current working directory does not exist. Using /tmp as fallback.'
)
self.original_cwd = '/tmp'
os.chdir('/tmp')
if is_async:
threading.Thread(target=self.init_browser).start()
else:
@@ -66,7 +76,23 @@ class BrowserEnv:
def init_browser(self):
logger.info('Starting browser env...')
self.process.start()
# Ensure we're in a valid directory before starting the process
try:
os.chdir(self.original_cwd)
logger.debug(f'Changed back to original directory: {self.original_cwd}')
except Exception as e:
logger.error(f'Failed to change to original directory: {e}')
# If we can't change to the original directory, try to use a known valid directory
os.chdir('/tmp')
logger.debug('Changed to /tmp directory as fallback')
try:
self.process.start()
except Exception as e:
logger.error(f'Failed to start browser process: {e}')
raise
if not self.check_alive():
self.close()
raise BrowserInitException('Failed to start browser environment.')

View File

@@ -79,9 +79,9 @@ class ServerRuntime(Runtime):
self.browser: BrowserEnv | None = None
async def close(self):
if not self._is_external_sandbox:
if hasattr(self, '_is_external_sandbox') and not self._is_external_sandbox:
self.sandbox.close()
if self.browser is not None:
if hasattr(self, 'browser') and self.browser is not None:
self.browser.close()
def init_sandbox_plugins(self, plugins: list[PluginRequirement]) -> None:
@@ -109,7 +109,7 @@ class ServerRuntime(Runtime):
return self._run_command(action.command)
async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
obs = self._run_command(
self._run_command(
("cat > /tmp/opendevin_jupyter_temp.py <<'EOL'\n" f'{action.code}\n' 'EOL'),
)
@@ -150,7 +150,7 @@ class ServerRuntime(Runtime):
# re-init the kernel after restart
if action.kernel_init_code:
obs = self._run_command(
self._run_command(
(
f"cat > /tmp/opendevin_jupyter_init.py <<'EOL'\n"
f'{action.kernel_init_code}\n'

View File

@@ -1,6 +1,7 @@
import io
import os
import re
import shutil
import subprocess
import tempfile
from functools import partial
@@ -12,8 +13,13 @@ from litellm import completion
from opendevin.llm.llm import message_separator
script_dir = os.path.dirname(os.path.realpath(__file__))
workspace_path = os.getenv('WORKSPACE_BASE')
script_dir = os.environ.get('SCRIPT_DIR')
project_root = os.environ.get('PROJECT_ROOT')
workspace_path = os.environ.get('WORKSPACE_BASE')
assert script_dir is not None, 'SCRIPT_DIR environment variable is not set'
assert project_root is not None, 'PROJECT_ROOT environment variable is not set'
assert workspace_path is not None, 'WORKSPACE_BASE environment variable is not set'
class SecretExit(Exception):
@@ -204,16 +210,41 @@ def http_server():
def set_up():
global cur_id
cur_id = 0
assert workspace_path is not None
assert workspace_path is not None, 'workspace_path is not set'
# Remove and recreate the workspace_path
if os.path.exists(workspace_path):
for file in os.listdir(workspace_path):
os.remove(os.path.join(workspace_path, file))
shutil.rmtree(workspace_path)
os.makedirs(workspace_path)
@pytest.fixture(autouse=True)
def resource_setup():
set_up()
if not os.path.exists(workspace_path):
os.makedirs(workspace_path)
# Yield to test execution
yield
try:
original_cwd = os.getcwd()
except FileNotFoundError:
print(
'[DEBUG] Original working directory does not exist. Using /tmp as fallback.'
)
original_cwd = '/tmp'
os.chdir('/tmp')
try:
set_up()
yield
finally:
try:
print(f'[DEBUG] Final working directory: {os.getcwd()}')
except FileNotFoundError:
print('[DEBUG] Final working directory does not exist')
if os.path.exists(workspace_path):
shutil.rmtree(workspace_path)
os.makedirs(workspace_path, exist_ok=True)
# Try to change back to the original directory
try:
os.chdir(original_cwd)
print(f'[DEBUG] Changed back to original directory: {original_cwd}')
except Exception:
os.chdir('/tmp')

View File

@@ -11,6 +11,17 @@ unset SANDBOX_ENV_OPENAI_API_KEY
unset OPENAI_BASE_URL
unset OPENAI_MODEL
# Get the absolute path of the script directory
get_script_dir() {
local source="${BASH_SOURCE[0]}"
while [ -h "$source" ]; do
local dir="$( cd -P "$( dirname "$source" )" && pwd )"
source="$(readlink "$source")"
[[ $source != /* ]] && source="$dir/$source"
done
echo "$( cd -P "$( dirname "$source" )" && pwd )"
}
TMP_FILE="${TMP_FILE:-tmp.log}"
if [ -z $WORKSPACE_MOUNT_PATH ]; then
@@ -20,14 +31,23 @@ if [ -z $WORKSPACE_BASE ]; then
WORKSPACE_BASE=$(pwd)
fi
WORKSPACE_MOUNT_PATH+="/_test_workspace"
WORKSPACE_BASE+="/_test_workspace"
export SCRIPT_DIR=$(get_script_dir)
export PROJECT_ROOT=$(realpath "$SCRIPT_DIR/../..")
WORKSPACE_MOUNT_PATH=$(realpath "${WORKSPACE_MOUNT_PATH}/_test_workspace")
WORKSPACE_BASE=$(realpath "${WORKSPACE_BASE}/_test_workspace")
WORKSPACE_MOUNT_PATH_IN_SANDBOX="/workspace"
echo "Current working directory: $(pwd)"
echo "SCRIPT_DIR: $SCRIPT_DIR"
echo "PROJECT_ROOT: $PROJECT_ROOT"
echo "WORKSPACE_BASE: $WORKSPACE_BASE"
echo "WORKSPACE_MOUNT_PATH: $WORKSPACE_MOUNT_PATH"
echo "WORKSPACE_MOUNT_PATH_IN_SANDBOX: $WORKSPACE_MOUNT_PATH_IN_SANDBOX"
# Ensure we're in the correct directory
cd "$PROJECT_ROOT" || exit 1
mkdir -p $WORKSPACE_BASE
# use environmental variable if exists, otherwise use "ssh"
@@ -71,14 +91,18 @@ num_of_agents=${#agents[@]}
# run integration test against a specific agent & test
run_test() {
local pytest_cmd="poetry run pytest -s ./tests/integration/test_agent.py::$test_name"
# Ensure we're in the correct directory
cd "$PROJECT_ROOT" || exit 1
local pytest_cmd="poetry run pytest --cache-clear -s $SCRIPT_DIR/test_agent.py::$test_name"
# Check if TEST_IN_CI is defined
if [ -n "$TEST_IN_CI" ]; then
pytest_cmd+=" --cov=agenthub --cov=opendevin --cov-report=xml --cov-append"
fi
SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
env SCRIPT_DIR="$SCRIPT_DIR" \
PROJECT_ROOT="$PROJECT_ROOT" \
SANDBOX_BOX_TYPE="$SANDBOX_BOX_TYPE" \
PERSIST_SANDBOX=$PERSIST_SANDBOX \
WORKSPACE_BASE=$WORKSPACE_BASE \
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
@@ -120,7 +144,7 @@ run_test() {
# browsing capability needs a local http server
launch_http_server() {
poetry run python tests/integration/start_http_server.py &
poetry run python $SCRIPT_DIR/start_http_server.py &
HTTP_SERVER_PID=$!
echo "Test http server launched, PID = $HTTP_SERVER_PID"
sleep 10
@@ -147,15 +171,17 @@ trap cleanup EXIT
regenerate_without_llm() {
# set -x to print the command being executed
set -x
SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
PERSIST_SANDBOX=$PERSIST_SANDBOX \
WORKSPACE_BASE=$WORKSPACE_BASE \
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
MAX_ITERATIONS=$MAX_ITERATIONS \
FORCE_APPLY_PROMPTS=true \
DEFAULT_AGENT=$agent \
poetry run pytest -s ./tests/integration/test_agent.py::$test_name
env SCRIPT_DIR="$SCRIPT_DIR" \
PROJECT_ROOT="$PROJECT_ROOT" \
SANDBOX_BOX_TYPE="$SANDBOX_BOX_TYPE" \
PERSIST_SANDBOX=$PERSIST_SANDBOX \
WORKSPACE_BASE=$WORKSPACE_BASE \
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
MAX_ITERATIONS=$MAX_ITERATIONS \
FORCE_APPLY_PROMPTS=true \
DEFAULT_AGENT=$agent \
poetry run pytest -s $SCRIPT_DIR/test_agent.py::$test_name
set +x
}
@@ -165,29 +191,32 @@ regenerate_with_llm() {
fi
rm -rf $WORKSPACE_BASE/*
if [ -d "tests/integration/workspace/$test_name" ]; then
cp -r tests/integration/workspace/$test_name/* $WORKSPACE_BASE
if [ -d "$SCRIPT_DIR/workspace/$test_name" ]; then
cp -r "$SCRIPT_DIR/workspace/$test_name"/* $WORKSPACE_BASE
fi
rm -rf logs
rm -rf tests/integration/mock/$agent/$test_name/*
rm -rf "$SCRIPT_DIR/mock/$agent/$test_name/*"
# set -x to print the command being executed
set -x
echo -e "/exit\n" | \
DEBUG=true \
SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
PERSIST_SANDBOX=$PERSIST_SANDBOX \
WORKSPACE_BASE=$WORKSPACE_BASE \
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH AGENT=$agent \
WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
poetry run python ./opendevin/core/main.py \
-i $MAX_ITERATIONS \
-t "$task Do not ask me for confirmation at any point." \
-c $agent
env SCRIPT_DIR="$SCRIPT_DIR" \
PROJECT_ROOT="$PROJECT_ROOT" \
DEBUG=true \
SANDBOX_BOX_TYPE="$SANDBOX_BOX_TYPE" \
PERSIST_SANDBOX=$PERSIST_SANDBOX \
WORKSPACE_BASE=$WORKSPACE_BASE \
WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
AGENT=$agent \
WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
poetry run python "$PROJECT_ROOT/opendevin/core/main.py" \
-i $MAX_ITERATIONS \
-t "$task Do not ask me for confirmation at any point." \
-c $agent
set +x
mkdir -p tests/integration/mock/$agent/$test_name/
mv logs/llm/**/* tests/integration/mock/$agent/$test_name/
mkdir -p "$SCRIPT_DIR/mock/$agent/$test_name/"
mv logs/llm/**/* "$SCRIPT_DIR/mock/$agent/$test_name/"
}
@@ -195,7 +224,6 @@ regenerate_with_llm() {
## MAIN PROGRAM ##
##############################################################
if [ "$num_of_tests" -ne "${#test_names[@]}" ]; then
echo "Every task must correspond to one test case"
exit 1
@@ -222,8 +250,8 @@ for ((i = 0; i < num_of_tests; i++)); do
echo -e "\n\n\n\n========STEP 1: Running $test_name for $agent========\n\n\n\n"
rm -rf $WORKSPACE_BASE/*
if [ -d "tests/integration/workspace/$test_name" ]; then
cp -r "tests/integration/workspace/$test_name"/* $WORKSPACE_BASE
if [ -d "$SCRIPT_DIR/workspace/$test_name" ]; then
cp -r "$SCRIPT_DIR/workspace/$test_name"/* $WORKSPACE_BASE
fi
if [ "$TEST_ONLY" = true ]; then
@@ -245,7 +273,7 @@ for ((i = 0; i < num_of_tests; i++)); do
if [ "$FORCE_USE_LLM" = true ]; then
echo -e "\n\n\n\n========FORCE_USE_LLM, skipping step 2 & 3========\n\n\n\n"
elif [ ! -d "tests/integration/mock/$agent/$test_name" ]; then
elif [ ! -d "$SCRIPT_DIR/mock/$agent/$test_name" ]; then
echo -e "\n\n\n\n========No existing mock responses for $agent/$test_name, skipping step 2 & 3========\n\n\n\n"
else
echo -e "\n\n\n\n========STEP 2: $test_name failed, regenerating prompts for $agent WITHOUT money cost========\n\n\n\n"