INT: prevent error on repeat integration tests after failed test(s) (#2935)

* Integration tests: prevent File not found error * forgot to remove debug calls in regenerate.sh
2026-01-09 14:57:59 -05:00 · 2024-07-18 06:29:15 +02:00
parent 728131ff1d
commit 5a5713009f
4 changed files with 133 additions and 48 deletions
--- a/opendevin/runtime/browser/browser_env.py
+++ b/opendevin/runtime/browser/browser_env.py
@@ -47,6 +47,16 @@ class BrowserEnv:
        self.process = multiprocessing.Process(
            target=self.browser_process,
        )
+
+        try:
+            self.original_cwd = os.getcwd()
+        except FileNotFoundError:
+            logger.warning(
+                'Current working directory does not exist. Using /tmp as fallback.'
+            )
+            self.original_cwd = '/tmp'
+            os.chdir('/tmp')
+
        if is_async:
            threading.Thread(target=self.init_browser).start()
        else:
@@ -66,7 +76,23 @@ class BrowserEnv:

    def init_browser(self):
        logger.info('Starting browser env...')
-        self.process.start()
+
+        # Ensure we're in a valid directory before starting the process
+        try:
+            os.chdir(self.original_cwd)
+            logger.debug(f'Changed back to original directory: {self.original_cwd}')
+        except Exception as e:
+            logger.error(f'Failed to change to original directory: {e}')
+            # If we can't change to the original directory, try to use a known valid directory
+            os.chdir('/tmp')
+            logger.debug('Changed to /tmp directory as fallback')
+
+        try:
+            self.process.start()
+        except Exception as e:
+            logger.error(f'Failed to start browser process: {e}')
+            raise
+
        if not self.check_alive():
            self.close()
            raise BrowserInitException('Failed to start browser environment.')
--- a/opendevin/runtime/server/runtime.py
+++ b/opendevin/runtime/server/runtime.py
@@ -79,9 +79,9 @@ class ServerRuntime(Runtime):
        self.browser: BrowserEnv | None = None

    async def close(self):
-        if not self._is_external_sandbox:
+        if hasattr(self, '_is_external_sandbox') and not self._is_external_sandbox:
            self.sandbox.close()
-        if self.browser is not None:
+        if hasattr(self, 'browser') and self.browser is not None:
            self.browser.close()

    def init_sandbox_plugins(self, plugins: list[PluginRequirement]) -> None:
@@ -109,7 +109,7 @@ class ServerRuntime(Runtime):
        return self._run_command(action.command)

    async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
-        obs = self._run_command(
+        self._run_command(
            ("cat > /tmp/opendevin_jupyter_temp.py <<'EOL'\n" f'{action.code}\n' 'EOL'),
        )

@@ -150,7 +150,7 @@ class ServerRuntime(Runtime):

                    # re-init the kernel after restart
                    if action.kernel_init_code:
-                        obs = self._run_command(
+                        self._run_command(
                            (
                                f"cat > /tmp/opendevin_jupyter_init.py <<'EOL'\n"
                                f'{action.kernel_init_code}\n'
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -1,6 +1,7 @@
 import io
 import os
 import re
+import shutil
 import subprocess
 import tempfile
 from functools import partial
@@ -12,8 +13,13 @@ from litellm import completion

 from opendevin.llm.llm import message_separator

-script_dir = os.path.dirname(os.path.realpath(__file__))
-workspace_path = os.getenv('WORKSPACE_BASE')
+script_dir = os.environ.get('SCRIPT_DIR')
+project_root = os.environ.get('PROJECT_ROOT')
+workspace_path = os.environ.get('WORKSPACE_BASE')
+
+assert script_dir is not None, 'SCRIPT_DIR environment variable is not set'
+assert project_root is not None, 'PROJECT_ROOT environment variable is not set'
+assert workspace_path is not None, 'WORKSPACE_BASE environment variable is not set'


 class SecretExit(Exception):
@@ -204,16 +210,41 @@ def http_server():
 def set_up():
    global cur_id
    cur_id = 0
-    assert workspace_path is not None
+    assert workspace_path is not None, 'workspace_path is not set'
+
+    # Remove and recreate the workspace_path
    if os.path.exists(workspace_path):
-        for file in os.listdir(workspace_path):
-            os.remove(os.path.join(workspace_path, file))
+        shutil.rmtree(workspace_path)
+    os.makedirs(workspace_path)


@pytest.fixture(autouse=True)
 def resource_setup():
-    set_up()
-    if not os.path.exists(workspace_path):
-        os.makedirs(workspace_path)
-    # Yield to test execution
-    yield
+    try:
+        original_cwd = os.getcwd()
+    except FileNotFoundError:
+        print(
+            '[DEBUG] Original working directory does not exist. Using /tmp as fallback.'
+        )
+        original_cwd = '/tmp'
+        os.chdir('/tmp')
+
+    try:
+        set_up()
+        yield
+    finally:
+        try:
+            print(f'[DEBUG] Final working directory: {os.getcwd()}')
+        except FileNotFoundError:
+            print('[DEBUG] Final working directory does not exist')
+
+        if os.path.exists(workspace_path):
+            shutil.rmtree(workspace_path)
+        os.makedirs(workspace_path, exist_ok=True)
+
+        # Try to change back to the original directory
+        try:
+            os.chdir(original_cwd)
+            print(f'[DEBUG] Changed back to original directory: {original_cwd}')
+        except Exception:
+            os.chdir('/tmp')
--- a/tests/integration/regenerate.sh
+++ b/tests/integration/regenerate.sh
@@ -11,6 +11,17 @@ unset SANDBOX_ENV_OPENAI_API_KEY
 unset OPENAI_BASE_URL
 unset OPENAI_MODEL

+# Get the absolute path of the script directory
+get_script_dir() {
+    local source="${BASH_SOURCE[0]}"
+    while [ -h "$source" ]; do
+        local dir="$( cd -P "$( dirname "$source" )" && pwd )"
+        source="$(readlink "$source")"
+        [[ $source != /* ]] && source="$dir/$source"
+    done
+    echo "$( cd -P "$( dirname "$source" )" && pwd )"
+}
+
 TMP_FILE="${TMP_FILE:-tmp.log}"

 if [ -z $WORKSPACE_MOUNT_PATH ]; then
@@ -20,14 +31,23 @@ if [ -z $WORKSPACE_BASE ]; then
  WORKSPACE_BASE=$(pwd)
 fi

-WORKSPACE_MOUNT_PATH+="/_test_workspace"
-WORKSPACE_BASE+="/_test_workspace"
+export SCRIPT_DIR=$(get_script_dir)
+export PROJECT_ROOT=$(realpath "$SCRIPT_DIR/../..")
+
+WORKSPACE_MOUNT_PATH=$(realpath "${WORKSPACE_MOUNT_PATH}/_test_workspace")
+WORKSPACE_BASE=$(realpath "${WORKSPACE_BASE}/_test_workspace")
 WORKSPACE_MOUNT_PATH_IN_SANDBOX="/workspace"

+echo "Current working directory: $(pwd)"
+echo "SCRIPT_DIR: $SCRIPT_DIR"
+echo "PROJECT_ROOT: $PROJECT_ROOT"
 echo "WORKSPACE_BASE: $WORKSPACE_BASE"
 echo "WORKSPACE_MOUNT_PATH: $WORKSPACE_MOUNT_PATH"
 echo "WORKSPACE_MOUNT_PATH_IN_SANDBOX: $WORKSPACE_MOUNT_PATH_IN_SANDBOX"

+# Ensure we're in the correct directory
+cd "$PROJECT_ROOT" || exit 1
+
 mkdir -p $WORKSPACE_BASE

 # use environmental variable if exists, otherwise use "ssh"
@@ -71,14 +91,18 @@ num_of_agents=${#agents[@]}

 # run integration test against a specific agent & test
 run_test() {
-  local pytest_cmd="poetry run pytest -s ./tests/integration/test_agent.py::$test_name"
+  # Ensure we're in the correct directory
+  cd "$PROJECT_ROOT" || exit 1

+  local pytest_cmd="poetry run pytest --cache-clear -s $SCRIPT_DIR/test_agent.py::$test_name"
  # Check if TEST_IN_CI is defined
  if [ -n "$TEST_IN_CI" ]; then
    pytest_cmd+=" --cov=agenthub --cov=opendevin --cov-report=xml --cov-append"
  fi

-  SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
+  env SCRIPT_DIR="$SCRIPT_DIR" \
+    PROJECT_ROOT="$PROJECT_ROOT" \
+    SANDBOX_BOX_TYPE="$SANDBOX_BOX_TYPE" \
    PERSIST_SANDBOX=$PERSIST_SANDBOX \
    WORKSPACE_BASE=$WORKSPACE_BASE \
    WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
@@ -120,7 +144,7 @@ run_test() {

 # browsing capability needs a local http server
 launch_http_server() {
-  poetry run python tests/integration/start_http_server.py &
+  poetry run python $SCRIPT_DIR/start_http_server.py &
  HTTP_SERVER_PID=$!
  echo "Test http server launched, PID = $HTTP_SERVER_PID"
  sleep 10
@@ -147,15 +171,17 @@ trap cleanup EXIT
 regenerate_without_llm() {
  # set -x to print the command being executed
  set -x
-  SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
-    PERSIST_SANDBOX=$PERSIST_SANDBOX \
-    WORKSPACE_BASE=$WORKSPACE_BASE \
-    WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
-    WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
-    MAX_ITERATIONS=$MAX_ITERATIONS \
-    FORCE_APPLY_PROMPTS=true \
-    DEFAULT_AGENT=$agent \
-    poetry run pytest -s ./tests/integration/test_agent.py::$test_name
+  env SCRIPT_DIR="$SCRIPT_DIR" \
+      PROJECT_ROOT="$PROJECT_ROOT" \
+      SANDBOX_BOX_TYPE="$SANDBOX_BOX_TYPE" \
+      PERSIST_SANDBOX=$PERSIST_SANDBOX \
+      WORKSPACE_BASE=$WORKSPACE_BASE \
+      WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
+      WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
+      MAX_ITERATIONS=$MAX_ITERATIONS \
+      FORCE_APPLY_PROMPTS=true \
+      DEFAULT_AGENT=$agent \
+      poetry run pytest -s $SCRIPT_DIR/test_agent.py::$test_name
  set +x
 }

@@ -165,29 +191,32 @@ regenerate_with_llm() {
  fi

  rm -rf $WORKSPACE_BASE/*
-  if [ -d "tests/integration/workspace/$test_name" ]; then
-    cp -r tests/integration/workspace/$test_name/* $WORKSPACE_BASE
+  if [ -d "$SCRIPT_DIR/workspace/$test_name" ]; then
+    cp -r "$SCRIPT_DIR/workspace/$test_name"/* $WORKSPACE_BASE
  fi

  rm -rf logs
-  rm -rf tests/integration/mock/$agent/$test_name/*
+  rm -rf "$SCRIPT_DIR/mock/$agent/$test_name/*"
  # set -x to print the command being executed
  set -x
  echo -e "/exit\n" | \
-    DEBUG=true \
-    SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
-    PERSIST_SANDBOX=$PERSIST_SANDBOX \
-    WORKSPACE_BASE=$WORKSPACE_BASE \
-    WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH AGENT=$agent \
-    WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
-    poetry run python ./opendevin/core/main.py \
-    -i $MAX_ITERATIONS \
-    -t "$task Do not ask me for confirmation at any point." \
-    -c $agent
+    env SCRIPT_DIR="$SCRIPT_DIR" \
+      PROJECT_ROOT="$PROJECT_ROOT" \
+      DEBUG=true \
+      SANDBOX_BOX_TYPE="$SANDBOX_BOX_TYPE" \
+      PERSIST_SANDBOX=$PERSIST_SANDBOX \
+      WORKSPACE_BASE=$WORKSPACE_BASE \
+      WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
+      AGENT=$agent \
+      WORKSPACE_MOUNT_PATH_IN_SANDBOX=$WORKSPACE_MOUNT_PATH_IN_SANDBOX \
+      poetry run python "$PROJECT_ROOT/opendevin/core/main.py" \
+      -i $MAX_ITERATIONS \
+      -t "$task Do not ask me for confirmation at any point." \
+      -c $agent
  set +x

-  mkdir -p tests/integration/mock/$agent/$test_name/
-  mv logs/llm/**/* tests/integration/mock/$agent/$test_name/
+  mkdir -p "$SCRIPT_DIR/mock/$agent/$test_name/"
+  mv logs/llm/**/* "$SCRIPT_DIR/mock/$agent/$test_name/"

 }

@@ -195,7 +224,6 @@ regenerate_with_llm() {
 ##                      MAIN PROGRAM                        ##
 ##############################################################

-
 if [ "$num_of_tests" -ne "${#test_names[@]}" ]; then
  echo "Every task must correspond to one test case"
  exit 1
@@ -222,8 +250,8 @@ for ((i = 0; i < num_of_tests; i++)); do

    echo -e "\n\n\n\n========STEP 1: Running $test_name for $agent========\n\n\n\n"
    rm -rf $WORKSPACE_BASE/*
-    if [ -d "tests/integration/workspace/$test_name" ]; then
-      cp -r "tests/integration/workspace/$test_name"/* $WORKSPACE_BASE
+    if [ -d "$SCRIPT_DIR/workspace/$test_name" ]; then
+      cp -r "$SCRIPT_DIR/workspace/$test_name"/* $WORKSPACE_BASE
    fi

    if [ "$TEST_ONLY" = true ]; then
@@ -245,7 +273,7 @@ for ((i = 0; i < num_of_tests; i++)); do

      if [ "$FORCE_USE_LLM" = true ]; then
        echo -e "\n\n\n\n========FORCE_USE_LLM, skipping step 2 & 3========\n\n\n\n"
-      elif [ ! -d "tests/integration/mock/$agent/$test_name" ]; then
+      elif [ ! -d "$SCRIPT_DIR/mock/$agent/$test_name" ]; then
        echo -e "\n\n\n\n========No existing mock responses for $agent/$test_name, skipping step 2 & 3========\n\n\n\n"
      else
        echo -e "\n\n\n\n========STEP 2: $test_name failed, regenerating prompts for $agent WITHOUT money cost========\n\n\n\n"