(fix) Fix runtime (RT) tests and split tests in 2 actions (openhands/root) (#3791)

Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
This commit is contained in:
tobitege
2024-09-14 21:51:30 +02:00
committed by GitHub
parent 57390eb26b
commit 554636cf2a
21 changed files with 867 additions and 702 deletions

View File

@@ -1,5 +1,5 @@
# Workflow that builds, tests and then pushes the runtime docker images to the ghcr.io repository
name: Build, Test and Publish Runtime Image
name: Build, Test and Publish RT Image
# Only run one workflow of the same group at a time.
# There can be at most one running and one pending job in a concurrency group at any time.
@@ -104,9 +104,9 @@ jobs:
name: runtime-${{ matrix.base_image.tag }}
path: /tmp/runtime-${{ matrix.base_image.tag }}.tar
# Run unit tests with the EventStream runtime Docker images
test_runtime:
name: Test Runtime
# Run unit tests with the EventStream runtime Docker images as root
test_runtime_root:
name: RT Unit Tests (Root)
needs: [ghcr_build_runtime]
runs-on: ubuntu-latest
strategy:
@@ -164,11 +164,84 @@ jobs:
image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ github.sha }}-${{ matrix.base_image }}
image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
SKIP_CONTAINER_LOGS=true \
TEST_RUNTIME=eventstream \
SANDBOX_USER_ID=$(id -u) \
SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
TEST_IN_CI=true \
poetry run pytest -n 2 --reruns 2 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
RUN_AS_OPENHANDS=false \
poetry run pytest -n 3 --reruns 1 --reruns-delay 3 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
# Run unit tests with the EventStream runtime Docker images as openhands user
test_runtime_oh:
name: RT Unit Tests (openhands)
runs-on: ubuntu-latest
needs: [ghcr_build_runtime]
strategy:
matrix:
base_image: ['nikolaik']
steps:
- uses: actions/checkout@v4
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
tool-cache: true
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: true
# Forked repos can't push to GHCR, so we need to download the image as an artifact
- name: Download runtime image for fork
if: github.event.pull_request.head.repo.fork
uses: actions/download-artifact@v4
with:
name: runtime-${{ matrix.base_image }}
path: /tmp
- name: Load runtime image for fork
if: github.event.pull_request.head.repo.fork
run: |
docker load --input /tmp/runtime-${{ matrix.base_image }}.tar
- name: Cache Poetry dependencies
uses: actions/cache@v4
with:
path: |
~/.cache/pypoetry
~/.virtualenvs
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install poetry via pipx
run: pipx install poetry
- name: Install Python dependencies using Poetry
run: make install-python-dependencies
- name: Run runtime tests
run: |
# We install pytest-xdist in order to run tests across CPUs. However, tests start to fail when we run
# then across more than 2 CPUs for some reason
poetry run pip install pytest-xdist
# Install to be able to retry on failures for flaky tests
poetry run pip install pytest-rerunfailures
image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ github.sha }}-${{ matrix.base_image }}
image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
SKIP_CONTAINER_LOGS=true \
TEST_RUNTIME=eventstream \
SANDBOX_USER_ID=$(id -u) \
SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
TEST_IN_CI=true \
RUN_AS_OPENHANDS=true \
poetry run pytest -n 3 --reruns 1 --reruns-delay 3 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
env:
@@ -176,7 +249,7 @@ jobs:
# Run integration tests with the eventstream runtime Docker image
runtime_integration_tests_on_linux:
name: Runtime Integration Tests on Linux
name: RT Integration Tests (Linux)
runs-on: ubuntu-latest
needs: [ghcr_build_runtime]
strategy:
@@ -237,7 +310,7 @@ jobs:
name: All Runtime Tests Passed
if: ${{ !cancelled() && !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }}
runs-on: ubuntu-latest
needs: [test_runtime, runtime_integration_tests_on_linux]
needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux]
steps:
- name: All tests passed
run: echo "All runtime tests have passed successfully!"
@@ -246,7 +319,7 @@ jobs:
name: All Runtime Tests Passed
if: ${{ cancelled() || contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}
runs-on: ubuntu-latest
needs: [test_runtime, runtime_integration_tests_on_linux]
needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux]
steps:
- name: Some tests failed
run: |