(fix) Fix runtime (RT) tests and split tests in 2 actions (openhands/root) (#3791)

Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2026-01-08 22:38:05 -05:00 · 2024-09-14 21:51:30 +02:00
parent 57390eb26b
commit 554636cf2a
21 changed files with 867 additions and 702 deletions
--- a/.github/workflows/ghcr_runtime.yml
+++ b/.github/workflows/ghcr_runtime.yml
@@ -1,5 +1,5 @@
 # Workflow that builds, tests and then pushes the runtime docker images to the ghcr.io repository
-name: Build, Test and Publish Runtime Image
+name: Build, Test and Publish RT Image

 # Only run one workflow of the same group at a time.
 # There can be at most one running and one pending job in a concurrency group at any time.
@@ -104,9 +104,9 @@ jobs:
          name: runtime-${{ matrix.base_image.tag }}
          path: /tmp/runtime-${{ matrix.base_image.tag }}.tar

-  # Run unit tests with the EventStream runtime Docker images
-  test_runtime:
-    name: Test Runtime
+  # Run unit tests with the EventStream runtime Docker images as root
+  test_runtime_root:
+    name: RT Unit Tests (Root)
    needs: [ghcr_build_runtime]
    runs-on: ubuntu-latest
    strategy:
@@ -164,11 +164,84 @@ jobs:
          image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ github.sha }}-${{ matrix.base_image }}
          image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')

+          SKIP_CONTAINER_LOGS=true \
          TEST_RUNTIME=eventstream \
          SANDBOX_USER_ID=$(id -u) \
          SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
          TEST_IN_CI=true \
-          poetry run pytest -n 2 --reruns 2 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
+          RUN_AS_OPENHANDS=false \
+          poetry run pytest -n 3 --reruns 1 --reruns-delay 3 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v4
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+
+  # Run unit tests with the EventStream runtime Docker images as openhands user
+  test_runtime_oh:
+    name: RT Unit Tests (openhands)
+    runs-on: ubuntu-latest
+    needs: [ghcr_build_runtime]
+    strategy:
+      matrix:
+        base_image: ['nikolaik']
+    steps:
+      - uses: actions/checkout@v4
+      - name: Free Disk Space (Ubuntu)
+        uses: jlumbroso/free-disk-space@main
+        with:
+          tool-cache: true
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: true
+      # Forked repos can't push to GHCR, so we need to download the image as an artifact
+      - name: Download runtime image for fork
+        if: github.event.pull_request.head.repo.fork
+        uses: actions/download-artifact@v4
+        with:
+          name: runtime-${{ matrix.base_image }}
+          path: /tmp
+      - name: Load runtime image for fork
+        if: github.event.pull_request.head.repo.fork
+        run: |
+          docker load --input /tmp/runtime-${{ matrix.base_image }}.tar
+      - name: Cache Poetry dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/pypoetry
+            ~/.virtualenvs
+          key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-poetry-
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+      - name: Install poetry via pipx
+        run: pipx install poetry
+      - name: Install Python dependencies using Poetry
+        run: make install-python-dependencies
+      - name: Run runtime tests
+        run: |
+          # We install pytest-xdist in order to run tests across CPUs. However, tests start to fail when we run
+          # then across more than 2 CPUs for some reason
+          poetry run pip install pytest-xdist
+
+          # Install to be able to retry on failures for flaky tests
+          poetry run pip install pytest-rerunfailures
+
+          image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ github.sha }}-${{ matrix.base_image }}
+          image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
+
+          SKIP_CONTAINER_LOGS=true \
+          TEST_RUNTIME=eventstream \
+          SANDBOX_USER_ID=$(id -u) \
+          SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
+          TEST_IN_CI=true \
+          RUN_AS_OPENHANDS=true \
+          poetry run pytest -n 3 --reruns 1 --reruns-delay 3 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
      - name: Upload coverage to Codecov
        uses: codecov/codecov-action@v4
        env:
@@ -176,7 +249,7 @@ jobs:

  # Run integration tests with the eventstream runtime Docker image
  runtime_integration_tests_on_linux:
-    name: Runtime Integration Tests on Linux
+    name: RT Integration Tests (Linux)
    runs-on: ubuntu-latest
    needs: [ghcr_build_runtime]
    strategy:
@@ -237,7 +310,7 @@ jobs:
    name: All Runtime Tests Passed
    if: ${{ !cancelled() && !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }}
    runs-on: ubuntu-latest
-    needs: [test_runtime, runtime_integration_tests_on_linux]
+    needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux]
    steps:
      - name: All tests passed
        run: echo "All runtime tests have passed successfully!"
@@ -246,7 +319,7 @@ jobs:
    name: All Runtime Tests Passed
    if: ${{ cancelled() || contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}
    runs-on: ubuntu-latest
-    needs: [test_runtime, runtime_integration_tests_on_linux]
+    needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux]
    steps:
      - name: Some tests failed
        run: |