From 83f36c1d66cebcaf3d5d06ea7f777c0cda8109ba Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Mon, 19 Aug 2024 21:12:00 +0800 Subject: [PATCH] test: build and run runtime tests on different custom docker images (#3324) * try to fix pip unavailable * update test case for pip * force rebuild in CI * remove extra symlink * fix newline * added semi-colon to line 31 * Dockerfile.j2: activate env at the end * Revert "Dockerfile.j2: activate env at the end" This reverts commit cf2f5651021fe80d4ab69a35a85f0a35b29dc3d7. * cleanup Dockerfile * switch default python image * remove image agnostic (no longer used) * fix tests * simplify integration tests default image * add nodejs specific runtime tests * update tests and workflows * switch to nikolaik/python-nodejs:python3.11-nodejs22 * update build sh to output image name correctly * increase custom images to test * fix test * fix test * fix double quote * try fixing ci * update ghcr workflow * fix artifact name * try to fix ghcr again * fix workflow * save built image to correct dir * remove extra -docker-image * make last tag to be human readable image tag * fix hyphen to underscore * run test runtime on all tags * revert app build * separate ghcr workflow * update dockerfile for eval * fix tag for test run * try fix tag * try fix tag via matrix output * try workflow again * update comments * try fixing test matrix * fix artifact name * try fix tag again * Revert "try fix tag again" This reverts commit b369badd8cccf4a526e36d27eafb77ea2d32f6be. * tweak filename * try different path * fix filepath * try fix tag artifact path again * save json instead of line * update matrix * print all tags in workflow * fix DOCKER_IMAGE to avoid ghcr.io/opendevin/ghcr.io/opendevin/od_runtime * fix test matrix to only load unique test image tags * try fix matrix again!!!!! * add all runtime tests passed --------- Co-authored-by: tobitege Co-authored-by: Graham Neubig Co-authored-by: tobitege <10787084+tobitege@users.noreply.github.com> --- .github/workflows/gchr_app.yml | 148 +++++++++++++ .github/workflows/ghcr.yml | 270 +++++++++-------------- containers/build.sh | 12 +- containers/runtime/config.sh | 2 +- evaluation/logic_reasoning/Dockerfile | 4 +- evaluation/miniwob/Dockerfile | 2 +- evaluation/mint/Dockerfile | 2 +- evaluation/toolqa/Dockerfile | 2 +- opendevin/runtime/utils/runtime_build.py | 1 - tests/integration/regenerate.sh | 7 +- tests/unit/test_runtime.py | 142 +++++++----- 11 files changed, 343 insertions(+), 249 deletions(-) create mode 100644 .github/workflows/gchr_app.yml diff --git a/.github/workflows/gchr_app.yml b/.github/workflows/gchr_app.yml new file mode 100644 index 0000000000..8f13aafc2a --- /dev/null +++ b/.github/workflows/gchr_app.yml @@ -0,0 +1,148 @@ +# Workflow that builds, tests and then pushes the app docker images to the ghcr.io repository +name: Build and Publish App Image + + +# Always run on "main" +# Always run on tags +# Always run on PRs +# Can also be triggered manually +on: + push: + branches: + - main + tags: + - '*' + pull_request: + workflow_dispatch: + inputs: + reason: + description: 'Reason for manual trigger' + required: true + default: '' + +jobs: + # Builds the OpenDevin Docker images + ghcr_build: + name: Build App Image + runs-on: ubuntu-latest + outputs: + tags: ${{ steps.capture-tags.outputs.tags }} + permissions: + contents: read + packages: write + strategy: + matrix: + image: ['opendevin'] + platform: ['amd64', 'arm64'] + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: true + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: false + swap-storage: true + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v3 + - name: Build and export image + id: build + run: ./containers/build.sh ${{ matrix.image }} ${{ github.repository_owner }} ${{ matrix.platform }} + - name: Capture tags + id: capture-tags + run: | + tags=$(cat tags.txt) + echo "tags=$tags" + echo "tags=$tags" >> $GITHUB_OUTPUT + - name: Upload Docker image as artifact + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.image }}_image_${{ matrix.platform }} + path: /tmp/${{ matrix.image }}_image_${{ matrix.platform }}.tar + retention-days: 14 + + # Push the OpenDevin and sandbox Docker images to the ghcr.io repository + ghcr_push: + runs-on: ubuntu-latest + needs: [ghcr_build] + if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') || (github.event_name == 'pull_request' && github.event.pull_request.merged == true && github.event.pull_request.base.ref == 'main') + env: + tags: ${{ needs.ghcr_build.outputs.tags }} + permissions: + contents: read + packages: write + strategy: + matrix: + image: ['opendevin'] + platform: ['amd64', 'arm64'] + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Login to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Download Docker images + uses: actions/download-artifact@v4 + with: + name: ${{ matrix.image }}_image_${{ matrix.platform }} + path: /tmp/${{ matrix.platform }} + - name: Load images and push to registry + run: | + mv /tmp/${{ matrix.platform }}/${{ matrix.image }}_image_${{ matrix.platform }}.tar . + loaded_image=$(docker load -i ${{ matrix.image }}_image_${{ matrix.platform }}.tar | grep "Loaded image:" | head -n 1 | awk '{print $3}') + echo "loaded image = $loaded_image" + tags=$(echo ${tags} | tr ' ' '\n') + image_name=$(echo "ghcr.io/${{ github.repository_owner }}/${{ matrix.image }}" | tr '[:upper:]' '[:lower:]') + echo "image name = $image_name" + for tag in $tags; do + echo "tag = $tag" + docker tag $loaded_image $image_name:${tag}_${{ matrix.platform }} + docker push $image_name:${tag}_${{ matrix.platform }} + done + # Creates and pushes the OpenDevin and sandbox Docker image manifests + create_manifest: + runs-on: ubuntu-latest + needs: [ghcr_build, ghcr_push] + if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') || (github.event_name == 'pull_request' && github.event.pull_request.merged == true && github.event.pull_request.base.ref == 'main') + env: + tags: ${{ needs.ghcr_build.outputs.tags }} + strategy: + matrix: + image: ['opendevin'] + permissions: + contents: read + packages: write + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Login to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Create and push multi-platform manifest + run: | + image_name=$(echo "ghcr.io/${{ github.repository_owner }}/${{ matrix.image }}" | tr '[:upper:]' '[:lower:]') + echo "image name = $image_name" + tags=$(echo ${tags} | tr ' ' '\n') + for tag in $tags; do + echo 'tag = $tag' + docker buildx imagetools create --tag $image_name:$tag \ + $image_name:${tag}_amd64 \ + $image_name:${tag}_arm64 + done diff --git a/.github/workflows/ghcr.yml b/.github/workflows/ghcr.yml index 60fc4988e3..b62ed288c5 100644 --- a/.github/workflows/ghcr.yml +++ b/.github/workflows/ghcr.yml @@ -1,5 +1,5 @@ -# Workflow that builds, tests and then pushes the docker images to the ghcr.io repository -name: Build Publish and Test Runtime Image +# Workflow that builds, tests and then pushes the runtime docker images to the ghcr.io repository +name: Build, Test and Publish Runtime Image # Only run one workflow of the same group at a time. # There can be at most one running and one pending job in a concurrency group at any time. @@ -22,69 +22,20 @@ on: default: '' jobs: - # Builds the OpenDevin Docker images - ghcr_build: - runs-on: ubuntu-latest - outputs: - tags: ${{ steps.capture-tags.outputs.tags }} - permissions: - contents: read - packages: write - strategy: - matrix: - image: ['opendevin'] - platform: ['amd64', 'arm64'] - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@main - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: true - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow - android: true - dotnet: true - haskell: true - large-packages: true - docker-images: false - swap-storage: true - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - name: Set up Docker Buildx - id: buildx - uses: docker/setup-buildx-action@v3 - - name: Build and export image - id: build - run: ./containers/build.sh ${{ matrix.image }} ${{ github.repository_owner }} ${{ matrix.platform }} - - name: Capture tags - id: capture-tags - run: | - tags=$(cat tags.txt) - echo "tags=$tags" - echo "tags=$tags" >> $GITHUB_OUTPUT - - name: Upload Docker image as artifact - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.image }}-docker-image-${{ matrix.platform }} - path: /tmp/${{ matrix.image }}_image_${{ matrix.platform }}.tar - retention-days: 14 - # Builds the runtime Docker images ghcr_build_runtime: + name: Build Image runs-on: ubuntu-latest - outputs: - tags: ${{ steps.capture-tags.outputs.tags }} permissions: contents: read packages: write strategy: matrix: image: ['od_runtime'] - base_image: ['nikolaik/python-nodejs:python3.11-nodejs22'] + base_image: ['nikolaik/python-nodejs:python3.11-nodejs22', 'python:3.11-bookworm', 'node:22-bookworm'] platform: ['amd64', 'arm64'] + outputs: + tags: ${{ steps.capture-tags.outputs.tags }} steps: - name: Checkout uses: actions/checkout@v4 @@ -124,14 +75,18 @@ jobs: if [ -f 'containers/runtime/Dockerfile' ]; then echo 'Dockerfile detected, building runtime image...' ./containers/build.sh ${{ matrix.image }} ${{ github.repository_owner }} ${{ matrix.platform }} + # Capture the last tag to use in the artifact name + last_tag=$(cat tags.txt | awk '{print $NF}') else echo 'No Dockerfile detected which means an exact image is already built. Pulling the image and saving it to a tar file...' source containers/runtime/config.sh - echo "$DOCKER_IMAGE_TAG $DOCKER_IMAGE_HASH_TAG" >> tags.txt - echo "Pulling image $DOCKER_IMAGE/$DOCKER_IMAGE_HASH_TAG to /tmp/${{ matrix.image }}_image_${{ matrix.platform }}.tar" + echo "$DOCKER_IMAGE_HASH_TAG $DOCKER_IMAGE_TAG" >> tags.txt + export last_tag=$DOCKER_IMAGE_TAG + echo "Pulling image $DOCKER_IMAGE:$DOCKER_IMAGE_HASH_TAG to /tmp/${{ matrix.image }}_${last_tag}_${{ matrix.platform }}.tar" docker pull $DOCKER_IMAGE:$DOCKER_IMAGE_HASH_TAG - docker save $DOCKER_IMAGE:$DOCKER_IMAGE_HASH_TAG -o /tmp/${{ matrix.image }}_image_${{ matrix.platform }}.tar + docker save $DOCKER_IMAGE:$DOCKER_IMAGE_HASH_TAG -o /tmp/${{ matrix.image }}_${last_tag}_${{ matrix.platform }}.tar fi + echo "last_tag=${last_tag}" >> $GITHUB_OUTPUT - name: Capture tags id: capture-tags run: | @@ -141,28 +96,59 @@ jobs: - name: Upload Docker image as artifact uses: actions/upload-artifact@v4 with: - name: ${{ matrix.image }}-docker-image-${{ matrix.platform }} - path: /tmp/${{ matrix.image }}_image_${{ matrix.platform }}.tar + name: ${{ matrix.image }}_${{ steps.build.outputs.last_tag }}_${{ matrix.platform }} + path: /tmp/${{ matrix.image }}_${{ steps.build.outputs.last_tag }}_${{ matrix.platform }}.tar retention-days: 14 + - name: Capture last tag + id: capture-last-tag + run: | + last_tag=$(cat tags.txt | awk '{print $NF}') + echo "$last_tag" > /tmp/last-tag-${{ matrix.image }}-${{ matrix.platform }}-${{ steps.build.outputs.last_tag }}.txt + echo "Saved last tag to /tmp/last-tag-${{ matrix.image }}-${{ matrix.platform }}-${{ steps.build.outputs.last_tag }}.txt" + - name: Upload last tag as artifact + uses: actions/upload-artifact@v4 + with: + name: last-tag-${{ matrix.image }}-${{ matrix.platform }}-${{ steps.build.outputs.last_tag }} + path: /tmp/last-tag-${{ matrix.image }}-${{ matrix.platform }}-${{ steps.build.outputs.last_tag }}.txt + retention-days: 1 + + prepare_test_image_tags: + name: Prepare Test Images Tags + needs: ghcr_build_runtime + runs-on: ubuntu-latest + outputs: + test_image_tags: ${{ steps.set-matrix.outputs.test_image_tags }} + steps: + - name: Download last tags + uses: actions/download-artifact@v4 + with: + pattern: last-tag-* + path: /tmp/ + merge-multiple: true + - name: Set up test matrix + id: set-matrix + run: | + matrix=$(cat /tmp/last-tag-*.txt | sort -u | jq -R -s -c 'split("\n") | map(select(length > 0))') + echo "test_image_tags=$matrix" >> $GITHUB_OUTPUT + echo "Generated test_image_tags: $matrix" # Run unit tests with the EventStream and Server runtime Docker images test_runtime: name: Test Runtime runs-on: ubuntu-latest - needs: [ghcr_build_runtime, ghcr_build] + needs: prepare_test_image_tags strategy: matrix: + image: ['od_runtime'] runtime_type: ['eventstream'] + platform: ['amd64'] + last_tag: ${{ fromJson(needs.prepare_test_image_tags.outputs.test_image_tags) }} steps: - uses: actions/checkout@v4 - name: Free Disk Space (Ubuntu) uses: jlumbroso/free-disk-space@main with: - # this might remove tools that are actually needed, - # when set to "true" but frees about 6 GB tool-cache: true - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow android: true dotnet: true haskell: true @@ -178,28 +164,29 @@ jobs: - name: Install Python dependencies using Poetry run: make install-python-dependencies - name: Download Runtime Docker image - if: matrix.runtime_type == 'eventstream' uses: actions/download-artifact@v4 with: - name: od_runtime-docker-image-amd64 - path: /tmp/ - - name: Download Sandbox Docker image - if: matrix.runtime_type == 'server' - uses: actions/download-artifact@v4 - with: - name: sandbox-docker-image-amd64 + name: ${{ matrix.image }}_${{ matrix.last_tag }}_${{ matrix.platform }} path: /tmp/ - name: Load Runtime image and run runtime tests run: | - # Load the Docker image and capture the output - if [ "${{ matrix.runtime_type }}" == "eventstream" ]; then - output=$(docker load -i /tmp/od_runtime_image_amd64.tar) - else - output=$(docker load -i /tmp/sandbox_image_amd64.tar) + image_file=$(find /tmp -name "${{ matrix.image }}_${{ matrix.last_tag }}_${{ matrix.platform }}.tar" | head -n 1) + + if [ -z "$image_file" ]; then + echo "No matching image file found for tag: ${{ matrix.last_tag }}" + exit 1 fi - # Extract the first image name from the output - image_name=$(echo "$output" | grep -oP 'Loaded image: \K.*' | head -n 1) + echo "Loading image from file: $image_file" + output=$(docker load -i "$image_file") + + # Extract the image name from the output + # Print all tags + echo "All tags:" + all_tags=$(echo "$output" | grep -oP 'Loaded image: \K.*') + echo "$all_tags" + # Choose the last tag + image_name=$(echo "$all_tags" | tail -n 1) # Print the full name of the image echo "Loaded Docker image: $image_name" @@ -214,13 +201,14 @@ jobs: runtime_integration_tests_on_linux: name: Runtime Integration Tests on Linux runs-on: ubuntu-latest - needs: [ghcr_build_runtime] + needs: prepare_test_image_tags strategy: fail-fast: false matrix: - python-version: ['3.11'] - # server is tested in a separate workflow + image: ['od_runtime'] runtime_type: ['eventstream'] + platform: ['amd64'] + last_tag: ${{ fromJson(needs.prepare_test_image_tags.outputs.test_image_tags) }} steps: - uses: actions/checkout@v4 - name: Install poetry via pipx @@ -228,26 +216,28 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: ${{ matrix.python-version }} + python-version: '3.11' cache: 'poetry' - name: Install Python dependencies using Poetry run: make install-python-dependencies - name: Download Runtime Docker image uses: actions/download-artifact@v4 with: - name: od_runtime-docker-image-amd64 + name: ${{ matrix.image }}_${{ matrix.last_tag }}_${{ matrix.platform }} path: /tmp/ - name: Load runtime image and run integration tests run: | - # Load the Docker image and capture the output - if [ "${{ matrix.runtime_type }}" == "eventstream" ]; then - output=$(docker load -i /tmp/od_runtime_image_amd64.tar) - else - echo "No Runtime Docker image to load" + image_file=$(find /tmp -name "${{ matrix.image }}_${{ matrix.last_tag }}_${{ matrix.platform }}.tar" | head -n 1) + + if [ -z "$image_file" ]; then + echo "No matching image file found for tag: ${{ matrix.last_tag }}" exit 1 fi - # Extract the first image name from the output + echo "Loading image from file: $image_file" + output=$(docker load -i "$image_file") + + # Extract the image name from the output image_name=$(echo "$output" | grep -oP 'Loaded image: \K.*' | head -n 1) # Print the full name of the image @@ -259,52 +249,19 @@ jobs: env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - # Push the OpenDevin and sandbox Docker images to the ghcr.io repository - ghcr_push: + # New job to indicate all runtime tests have passed + all_runtime_tests_passed: + name: All Runtime Tests Passed runs-on: ubuntu-latest - needs: [ghcr_build] - if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') || (github.event_name == 'pull_request' && github.event.pull_request.merged == true && github.event.pull_request.base.ref == 'main') - env: - tags: ${{ needs.ghcr_build.outputs.tags }} - permissions: - contents: read - packages: write - strategy: - matrix: - image: ['opendevin'] - platform: ['amd64', 'arm64'] + needs: [test_runtime, runtime_integration_tests_on_linux] steps: - - name: Checkout code - uses: actions/checkout@v4 - - name: Login to GHCR - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.repository_owner }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Download Docker images - uses: actions/download-artifact@v4 - with: - name: ${{ matrix.image }}-docker-image-${{ matrix.platform }} - path: /tmp/${{ matrix.platform }} - - name: Load images and push to registry - run: | - mv /tmp/${{ matrix.platform }}/${{ matrix.image }}_image_${{ matrix.platform }}.tar . - loaded_image=$(docker load -i ${{ matrix.image }}_image_${{ matrix.platform }}.tar | grep "Loaded image:" | head -n 1 | awk '{print $3}') - echo "loaded image = $loaded_image" - tags=$(echo ${tags} | tr ' ' '\n') - image_name=$(echo "ghcr.io/${{ github.repository_owner }}/${{ matrix.image }}" | tr '[:upper:]' '[:lower:]') - echo "image name = $image_name" - for tag in $tags; do - echo "tag = $tag" - docker tag $loaded_image $image_name:${tag}_${{ matrix.platform }} - docker push $image_name:${tag}_${{ matrix.platform }} - done + - name: All tests passed + run: echo "All runtime tests have passed successfully!" # Push the runtime Docker images to the ghcr.io repository ghcr_push_runtime: runs-on: ubuntu-latest - needs: [ghcr_build_runtime, test_runtime, runtime_integration_tests_on_linux] + needs: [ghcr_build_runtime, prepare_test_image_tags, all_runtime_tests_passed] if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') || (github.event_name == 'pull_request' && github.event.pull_request.merged == true && github.event.pull_request.base.ref == 'main') env: RUNTIME_TAGS: ${{ needs.ghcr_build_runtime.outputs.tags }} @@ -314,7 +271,9 @@ jobs: strategy: matrix: image: ['od_runtime'] + runtime_type: ['eventstream'] platform: ['amd64', 'arm64'] + last_tag: ${{ fromJson(needs.prepare_test_image_tags.outputs.test_image_tags) }} steps: - name: Checkout code uses: actions/checkout@v4 @@ -337,16 +296,21 @@ jobs: - name: Download Docker images uses: actions/download-artifact@v4 with: - name: ${{ matrix.image }}-docker-image-${{ matrix.platform }} - path: /tmp/${{ matrix.platform }} + name: ${{ matrix.image }}_${{ matrix.last_tag }}_${{ matrix.platform }} + path: /tmp/${{ matrix.image }}_${{ matrix.last_tag }}_${{ matrix.platform }}.tar - name: List downloaded files run: | ls -la /tmp/${{ matrix.platform }} file /tmp/${{ matrix.platform }}/* - name: Load images and push to registry run: | - mv /tmp/${{ matrix.platform }}/${{ matrix.image }}_image_${{ matrix.platform }}.tar ./${{ matrix.image }}_image_${{ matrix.platform }}.tar - if ! loaded_image=$(docker load -i ${{ matrix.image }}_image_${{ matrix.platform }}.tar | grep "Loaded image:" | head -n 1 | awk '{print $3}'); then + image_file=$(find /tmp/${{ matrix.platform }} -name "${{ matrix.image }}_${{ matrix.last_tag }}_${{ matrix.platform }}.tar" | head -n 1) + if [ -z "$image_file" ]; then + echo "No matching image file found" + exit 1 + fi + echo "Loading image from file: $image_file" + if ! loaded_image=$(docker load -i "$image_file" | grep "Loaded image:" | head -n 1 | awk '{print $3}'); then echo "Failed to load Docker image" exit 1 fi @@ -363,44 +327,10 @@ jobs: fi done - # Creates and pushes the OpenDevin and sandbox Docker image manifests - create_manifest: - runs-on: ubuntu-latest - needs: [ghcr_build, ghcr_push] - if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') || (github.event_name == 'pull_request' && github.event.pull_request.merged == true && github.event.pull_request.base.ref == 'main') - env: - tags: ${{ needs.ghcr_build.outputs.tags }} - strategy: - matrix: - image: ['opendevin'] - permissions: - contents: read - packages: write - steps: - - name: Checkout code - uses: actions/checkout@v4 - - name: Login to GHCR - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.repository_owner }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Create and push multi-platform manifest - run: | - image_name=$(echo "ghcr.io/${{ github.repository_owner }}/${{ matrix.image }}" | tr '[:upper:]' '[:lower:]') - echo "image name = $image_name" - tags=$(echo ${tags} | tr ' ' '\n') - for tag in $tags; do - echo 'tag = $tag' - docker buildx imagetools create --tag $image_name:$tag \ - $image_name:${tag}_amd64 \ - $image_name:${tag}_arm64 - done - # Creates and pushes the runtime Docker image manifest create_manifest_runtime: runs-on: ubuntu-latest - needs: [ghcr_build_runtime, ghcr_push_runtime] + needs: [ghcr_build_runtime, prepare_test_image_tags, ghcr_push_runtime] if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') || (github.event_name == 'pull_request' && github.event.pull_request.merged == true && github.event.pull_request.base.ref == 'main') env: tags: ${{ needs.ghcr_build_runtime.outputs.tags }} diff --git a/containers/build.sh b/containers/build.sh index 1518482344..2d874d80f1 100755 --- a/containers/build.sh +++ b/containers/build.sh @@ -49,15 +49,14 @@ if [[ -n "$org_name" ]]; then DOCKER_ORG="$org_name" fi -# If $DOCKER_IMAGE_TAG is set, add it to the tags -if [[ -n "$DOCKER_IMAGE_TAG" ]]; then - tags+=("$DOCKER_IMAGE_TAG") -fi # If $DOCKER_IMAGE_HASH_TAG is set, add it to the tags if [[ -n "$DOCKER_IMAGE_HASH_TAG" ]]; then tags+=("$DOCKER_IMAGE_HASH_TAG") fi - +# If $DOCKER_IMAGE_TAG is set, add it to the tags +if [[ -n "$DOCKER_IMAGE_TAG" ]]; then + tags+=("$DOCKER_IMAGE_TAG") +fi DOCKER_REPOSITORY="$DOCKER_REGISTRY/$DOCKER_ORG/$DOCKER_IMAGE" DOCKER_REPOSITORY=${DOCKER_REPOSITORY,,} # lowercase @@ -69,7 +68,8 @@ for tag in "${tags[@]}"; do args+=" -t $DOCKER_REPOSITORY:$tag" done -output_image="/tmp/${image_name}_image_${platform}.tar" +output_image="/tmp/${image_name}_${tags[-1]}_${platform}.tar" +echo "Output image will be saved to: $output_image" docker buildx build \ $args \ diff --git a/containers/runtime/config.sh b/containers/runtime/config.sh index e0de71f4fe..1b55efe7b2 100644 --- a/containers/runtime/config.sh +++ b/containers/runtime/config.sh @@ -1,7 +1,7 @@ DOCKER_REGISTRY=ghcr.io DOCKER_ORG=opendevin DOCKER_BASE_DIR="./containers/runtime" +DOCKER_IMAGE=od_runtime # These variables will be appended by the runtime_build.py script -# DOCKER_IMAGE= # DOCKER_IMAGE_TAG= # DOCKER_IMAGE_HASH_TAG= diff --git a/evaluation/logic_reasoning/Dockerfile b/evaluation/logic_reasoning/Dockerfile index 0730c2e36d..8b93c20414 100644 --- a/evaluation/logic_reasoning/Dockerfile +++ b/evaluation/logic_reasoning/Dockerfile @@ -1,6 +1,4 @@ -FROM ubuntu:22.04 - -RUN apt-get update && apt-get install -y python3 python3-pip +FROM python:3.11-bookworm RUN pip install scitools-pyke diff --git a/evaluation/miniwob/Dockerfile b/evaluation/miniwob/Dockerfile index b7d191ac67..08e3308d98 100644 --- a/evaluation/miniwob/Dockerfile +++ b/evaluation/miniwob/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:22.04 +FROM python:3.11-bookworm RUN apt-get update && apt-get install -y python3 python3-pip git diff --git a/evaluation/mint/Dockerfile b/evaluation/mint/Dockerfile index af7366763d..f795409457 100644 --- a/evaluation/mint/Dockerfile +++ b/evaluation/mint/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:22.04 +FROM python:3.11-bookworm RUN apt-get update && apt-get install -y python3 python3-pip git gcc diff --git a/evaluation/toolqa/Dockerfile b/evaluation/toolqa/Dockerfile index a15b774fcf..73d8086b04 100644 --- a/evaluation/toolqa/Dockerfile +++ b/evaluation/toolqa/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:22.04 +FROM python:3.11-bookworm RUN apt-get update && apt-get install -y python3 python3-pip diff --git a/opendevin/runtime/utils/runtime_build.py b/opendevin/runtime/utils/runtime_build.py index bed8803a78..d8e0296d78 100644 --- a/opendevin/runtime/utils/runtime_build.py +++ b/opendevin/runtime/utils/runtime_build.py @@ -423,7 +423,6 @@ if __name__ == '__main__': file.write( ( f'\n' - f'DOCKER_IMAGE={runtime_image_repo}\n' f'DOCKER_IMAGE_TAG={runtime_image_tag}\n' f'DOCKER_IMAGE_HASH_TAG={runtime_image_hash_tag}\n' ) diff --git a/tests/integration/regenerate.sh b/tests/integration/regenerate.sh index 56253c5b96..61343af739 100755 --- a/tests/integration/regenerate.sh +++ b/tests/integration/regenerate.sh @@ -55,10 +55,9 @@ cd "$PROJECT_ROOT" || exit 1 mkdir -p $WORKSPACE_BASE -# use environmental variable if exists, otherwise use "ssh" -TEST_RUNTIME="${TEST_RUNTIME:-eventstream}" # can be server or eventstream -# TODO: set this as default after ServerRuntime is deprecated -if [ "$TEST_RUNTIME" == "eventstream" ] && [ -z "$SANDBOX_CONTAINER_IMAGE" ]; then +# use environmental variable if exists +TEST_RUNTIME="${TEST_RUNTIME:-eventstream}" +if [ -z "$SANDBOX_CONTAINER_IMAGE" ]; then SANDBOX_CONTAINER_IMAGE="nikolaik/python-nodejs:python3.11-nodejs22" fi diff --git a/tests/unit/test_runtime.py b/tests/unit/test_runtime.py index 50d9b67231..9f14fd0578 100644 --- a/tests/unit/test_runtime.py +++ b/tests/unit/test_runtime.py @@ -48,7 +48,7 @@ def temp_dir(tmp_path_factory: TempPathFactory) -> str: return str(tmp_path_factory.mktemp('test_runtime')) -TEST_RUNTIME = os.getenv('TEST_RUNTIME', 'both') +TEST_RUNTIME = os.getenv('TEST_RUNTIME', 'eventstream') PY3_FOR_TESTING = '/opendevin/miniforge3/bin/mamba run -n base python3' @@ -58,7 +58,7 @@ def get_box_classes(): if runtime.lower() == 'eventstream': return [EventStreamRuntime] else: - return [EventStreamRuntime] + raise ValueError(f'Invalid runtime: {runtime}') # This assures that all tests run together per runtime, not alternating between them, @@ -83,12 +83,17 @@ def enable_auto_lint(request): return request.param -@pytest.fixture( - scope='module', params=['nikolaik/python-nodejs:python3.11-nodejs22', 'debian:11'] -) +@pytest.fixture(scope='module') def container_image(request): time.sleep(1) - return request.param + env_image = os.environ.get('SANDBOX_CONTAINER_IMAGE') + if env_image: + return [env_image] + return [ + 'nikolaik/python-nodejs:python3.11-nodejs22', + 'python:3.11-bookworm', + 'node:22-bookworm', + ] async def _load_runtime( @@ -122,31 +127,14 @@ async def _load_runtime( if container_image is not None: config.sandbox.container_image = container_image - if box_class == EventStreamRuntime: - # NOTE: we will use the default container image specified in the config.sandbox - # if it is an official od_runtime image. - cur_container_image = config.sandbox.container_image - if 'od_runtime' not in cur_container_image and cur_container_image not in { - 'xingyaoww/od-eval-miniwob:v1.0' - }: # a special exception list - cur_container_image = 'nikolaik/python-nodejs:python3.11-nodejs22' - logger.warning( - f'`{config.sandbox.container_image}` is not an od_runtime image. Will use `{cur_container_image}` as the container image for testing.' - ) - - runtime = EventStreamRuntime( - config=config, - event_stream=event_stream, - sid=sid, - plugins=plugins, - # NOTE: we probably don't have a default container image `/sandbox` for the event stream runtime - # Instead, we will pre-build a suite of container images with OD-runtime-cli installed. - container_image=cur_container_image, - ) - await runtime.ainit() - - else: - raise ValueError(f'Invalid box class: {box_class}') + runtime = box_class( + config=config, + event_stream=event_stream, + sid=sid, + plugins=plugins, + container_image=container_image, + ) + await runtime.ainit() await asyncio.sleep(1) return runtime @@ -1031,36 +1019,6 @@ async def test_ipython_agentskills_fileop_pwd_with_userdir(temp_dir, box_class): await asyncio.sleep(1) -@pytest.mark.asyncio -async def test_bash_python_version(temp_dir, box_class): - """Make sure Python is available in bash.""" - - runtime = await _load_runtime(temp_dir, box_class) - - action = CmdRunAction(command='which python') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = await runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert obs.exit_code == 0 - - action = CmdRunAction(command='python --version') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = await runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert obs.exit_code == 0 - # Should not error out - - action = CmdRunAction(command='pip --version') - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = await runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert obs.exit_code == 0 - # Should not error out - - await runtime.close() - await asyncio.sleep(1) - - @pytest.mark.asyncio async def test_ipython_package_install(temp_dir, box_class, run_as_devin): """Make sure that cd in bash also update the current working directory in ipython.""" @@ -1370,3 +1328,65 @@ async def test_git_operation(box_class): await runtime.close() await asyncio.sleep(1) + + +# ============================================================================================================================ +# Image-specific tests +# ============================================================================================================================ + + +@pytest.mark.asyncio +async def test_bash_python_version(temp_dir, box_class, container_image): + """Make sure Python is available in bash.""" + if container_image not in [ + 'python:3.11-bookworm', + 'nikolaik/python-nodejs:python3.11-nodejs22', + ]: + pytest.skip('This test is only for python-related images') + + runtime = await _load_runtime(temp_dir, box_class, container_image=container_image) + + action = CmdRunAction(command='which python') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.exit_code == 0 + + action = CmdRunAction(command='python --version') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.exit_code == 0 + assert 'Python 3.11' in obs.content # Check for specific version + + action = CmdRunAction(command='pip --version') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.exit_code == 0 + assert 'pip' in obs.content # Check that pip is available + + await runtime.close() + await asyncio.sleep(1) + + +@pytest.mark.asyncio +async def test_nodejs_22_version(temp_dir, box_class, container_image): + """Make sure Node.js is available in bash.""" + if container_image not in [ + 'node:22-bookworm', + 'nikolaik/python-nodejs:python3.11-nodejs22', + ]: + pytest.skip('This test is only for nodejs-related images') + + runtime = await _load_runtime(temp_dir, box_class, container_image=container_image) + + action = CmdRunAction(command='node --version') + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = await runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.exit_code == 0 + assert 'v22' in obs.content # Check for specific version + + await runtime.close() + await asyncio.sleep(1)