From 1b9c61c2d1f24c12e57103c91c81c4bdb9d2e529 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Test=C3=A9?= Date: Mon, 13 May 2024 17:04:33 +0200 Subject: [PATCH] chore(ci): run gpu benchmarks on hyperstack platform --- .github/workflows/aws_tfhe_fast_tests.yml | 20 +-- .github/workflows/aws_tfhe_gpu_tests.yml | 24 ++-- .github/workflows/aws_tfhe_integer_tests.yml | 20 +-- .../aws_tfhe_signed_integer_tests.yml | 20 +-- .github/workflows/aws_tfhe_tests.yml | 20 +-- .github/workflows/aws_tfhe_wasm_tests.yml | 20 +-- .../workflows/core_crypto_gpu_benchmark.yml | 133 ++++++++++------- .github/workflows/csprng_randomness_tests.yml | 20 +-- .github/workflows/integer_gpu_benchmark.yml | 134 ++++++++++++------ .../workflows/integer_gpu_full_benchmark.yml | 124 ++++++++++------ .../integer_multi_bit_gpu_benchmark.yml | 127 +++++++++++------ .github/workflows/make_release_cuda.yml | 20 +-- .github/workflows/start_benchmarks.yml | 7 +- .github/workflows/start_full_benchmarks.yml | 4 +- ci/ec2_products_cost.json | 3 +- ci/slab.toml | 5 + 16 files changed, 432 insertions(+), 269 deletions(-) diff --git a/.github/workflows/aws_tfhe_fast_tests.yml b/.github/workflows/aws_tfhe_fast_tests.yml index 4a3e37ce1..c1c660235 100644 --- a/.github/workflows/aws_tfhe_fast_tests.yml +++ b/.github/workflows/aws_tfhe_fast_tests.yml @@ -18,8 +18,8 @@ on: pull_request: jobs: - setup-ec2: - name: Setup EC2 instance (fast-tests) + setup-instance: + name: Setup instance (fast-tests) runs-on: ubuntu-latest outputs: runner-name: ${{ steps.start-instance.outputs.label }} @@ -37,11 +37,11 @@ jobs: fast-tests: name: Fast CPU tests - needs: setup-ec2 + needs: setup-instance concurrency: group: ${{ github.workflow }}_${{ github.ref }} cancel-in-progress: true - runs-on: ${{ needs.setup-ec2.outputs.runner-name }} + runs-on: ${{ needs.setup-instance.outputs.runner-name }} steps: - name: Checkout tfhe-rs uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b @@ -115,10 +115,10 @@ jobs: SLACK_COLOR: ${{ job.status }} SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" - teardown-ec2: - name: Teardown EC2 instance (fast-tests) - if: ${{ always() && needs.setup-ec2.result != 'skipped' }} - needs: [ setup-ec2, fast-tests ] + teardown-instance: + name: Teardown instance (fast-tests) + if: ${{ always() && needs.setup-instance.result != 'skipped' }} + needs: [ setup-instance, fast-tests ] runs-on: ubuntu-latest steps: - name: Stop instance @@ -129,7 +129,7 @@ jobs: github-token: ${{ secrets.SLAB_ACTION_TOKEN }} slab-url: ${{ secrets.SLAB_BASE_URL }} job-secret: ${{ secrets.JOB_SECRET }} - label: ${{ needs.setup-ec2.outputs.runner-name }} + label: ${{ needs.setup-instance.outputs.runner-name }} - name: Slack Notification if: ${{ failure() }} @@ -137,4 +137,4 @@ jobs: uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 env: SLACK_COLOR: ${{ job.status }} - SLACK_MESSAGE: "EC2 teardown (fast-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" + SLACK_MESSAGE: "Instance teardown (fast-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/aws_tfhe_gpu_tests.yml b/.github/workflows/aws_tfhe_gpu_tests.yml index 7bfe7c51d..260ff1118 100644 --- a/.github/workflows/aws_tfhe_gpu_tests.yml +++ b/.github/workflows/aws_tfhe_gpu_tests.yml @@ -18,8 +18,8 @@ on: pull_request: jobs: - setup-ec2: - name: Setup EC2 instance (cuda-tests) + setup-instance: + name: Setup instance (cuda-tests) runs-on: ubuntu-latest outputs: runner-name: ${{ steps.start-instance.outputs.label }} @@ -37,11 +37,11 @@ jobs: cuda-pcc: name: CUDA post-commit checks - needs: setup-ec2 + needs: setup-instance concurrency: group: ${{ github.workflow }}_${{ github.ref }} cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} - runs-on: ${{ needs.setup-ec2.outputs.runner-name }} + runs-on: ${{ needs.setup-instance.outputs.runner-name }} strategy: fail-fast: false # explicit include-based build matrix, of known valid options @@ -103,11 +103,11 @@ jobs: cuda-tests-linux: name: CUDA tests - needs: [ setup-ec2, cuda-pcc ] + needs: [ setup-instance, cuda-pcc ] concurrency: group: ${{ github.workflow }}_${{ github.ref }} cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} - runs-on: ${{ needs.setup-ec2.outputs.runner-name }} + runs-on: ${{ needs.setup-instance.outputs.runner-name }} strategy: fail-fast: false # explicit include-based build matrix, of known valid options @@ -175,10 +175,10 @@ jobs: SLACK_COLOR: ${{ job.status }} SLACK_MESSAGE: "CUDA AWS tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" - teardown-ec2: - name: Teardown EC2 instance (cuda-tests) - if: ${{ always() && needs.setup-ec2.result != 'skipped' }} - needs: [ setup-ec2, cuda-pcc, cuda-tests-linux ] + teardown-instance: + name: Teardown instance (cuda-tests) + if: ${{ always() && needs.setup-instance.result != 'skipped' }} + needs: [ setup-instance, cuda-pcc, cuda-tests-linux ] runs-on: ubuntu-latest steps: - name: Stop instance @@ -189,7 +189,7 @@ jobs: github-token: ${{ secrets.SLAB_ACTION_TOKEN }} slab-url: ${{ secrets.SLAB_BASE_URL }} job-secret: ${{ secrets.JOB_SECRET }} - label: ${{ needs.setup-ec2.outputs.runner-name }} + label: ${{ needs.setup-instance.outputs.runner-name }} - name: Slack Notification if: ${{ failure() }} @@ -197,4 +197,4 @@ jobs: uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 env: SLACK_COLOR: ${{ job.status }} - SLACK_MESSAGE: "EC2 teardown (cuda-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" + SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/aws_tfhe_integer_tests.yml b/.github/workflows/aws_tfhe_integer_tests.yml index 2e4af16d6..98f758e8c 100644 --- a/.github/workflows/aws_tfhe_integer_tests.yml +++ b/.github/workflows/aws_tfhe_integer_tests.yml @@ -18,8 +18,8 @@ on: types: [ labeled ] jobs: - setup-ec2: - name: Setup EC2 instance (unsigned-integer-tests) + setup-instance: + name: Setup instance (unsigned-integer-tests) if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }} runs-on: ubuntu-latest outputs: @@ -38,11 +38,11 @@ jobs: unsigned-integer-tests: name: Unsigned integer tests - needs: setup-ec2 + needs: setup-instance concurrency: group: ${{ github.workflow }}_${{ github.ref }} cancel-in-progress: true - runs-on: ${{ needs.setup-ec2.outputs.runner-name }} + runs-on: ${{ needs.setup-instance.outputs.runner-name }} steps: - name: Checkout tfhe-rs uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b @@ -80,10 +80,10 @@ jobs: SLACK_COLOR: ${{ job.status }} SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" - teardown-ec2: - name: Teardown EC2 instance (unsigned-integer-tests) - if: ${{ always() && needs.setup-ec2.result != 'skipped' }} - needs: [ setup-ec2, unsigned-integer-tests ] + teardown-instance: + name: Teardown instance (unsigned-integer-tests) + if: ${{ always() && needs.setup-instance.result != 'skipped' }} + needs: [ setup-instance, unsigned-integer-tests ] runs-on: ubuntu-latest steps: - name: Stop instance @@ -94,7 +94,7 @@ jobs: github-token: ${{ secrets.SLAB_ACTION_TOKEN }} slab-url: ${{ secrets.SLAB_BASE_URL }} job-secret: ${{ secrets.JOB_SECRET }} - label: ${{ needs.setup-ec2.outputs.runner-name }} + label: ${{ needs.setup-instance.outputs.runner-name }} - name: Slack Notification if: ${{ failure() }} @@ -102,4 +102,4 @@ jobs: uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 env: SLACK_COLOR: ${{ job.status }} - SLACK_MESSAGE: "EC2 teardown (unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" + SLACK_MESSAGE: "Instance teardown (unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/aws_tfhe_signed_integer_tests.yml b/.github/workflows/aws_tfhe_signed_integer_tests.yml index 811fe2f83..f30279046 100644 --- a/.github/workflows/aws_tfhe_signed_integer_tests.yml +++ b/.github/workflows/aws_tfhe_signed_integer_tests.yml @@ -18,8 +18,8 @@ on: types: [ labeled ] jobs: - setup-ec2: - name: Setup EC2 instance (signed-integer-tests) + setup-instance: + name: Setup instance (signed-integer-tests) if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }} runs-on: ubuntu-latest outputs: @@ -38,11 +38,11 @@ jobs: signed-integer-tests: name: Signed integer tests - needs: setup-ec2 + needs: setup-instance concurrency: group: ${{ github.workflow }}_${{ github.ref }} cancel-in-progress: true - runs-on: ${{ needs.setup-ec2.outputs.runner-name }} + runs-on: ${{ needs.setup-instance.outputs.runner-name }} steps: - name: Checkout tfhe-rs uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b @@ -84,10 +84,10 @@ jobs: SLACK_COLOR: ${{ job.status }} SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" - teardown-ec2: - name: Teardown EC2 instance (signed-integer-tests) - if: ${{ always() && needs.setup-ec2.result != 'skipped' }} - needs: [ setup-ec2, signed-integer-tests ] + teardown-instance: + name: Teardown instance (signed-integer-tests) + if: ${{ always() && needs.setup-instance.result != 'skipped' }} + needs: [ setup-instance, signed-integer-tests ] runs-on: ubuntu-latest steps: - name: Stop instance @@ -98,7 +98,7 @@ jobs: github-token: ${{ secrets.SLAB_ACTION_TOKEN }} slab-url: ${{ secrets.SLAB_BASE_URL }} job-secret: ${{ secrets.JOB_SECRET }} - label: ${{ needs.setup-ec2.outputs.runner-name }} + label: ${{ needs.setup-instance.outputs.runner-name }} - name: Slack Notification if: ${{ failure() }} @@ -106,4 +106,4 @@ jobs: uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 env: SLACK_COLOR: ${{ job.status }} - SLACK_MESSAGE: "EC2 teardown (signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" + SLACK_MESSAGE: "Instance teardown (signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/aws_tfhe_tests.yml b/.github/workflows/aws_tfhe_tests.yml index 5d5cfcc71..dfb66e95c 100644 --- a/.github/workflows/aws_tfhe_tests.yml +++ b/.github/workflows/aws_tfhe_tests.yml @@ -18,8 +18,8 @@ on: types: [ labeled ] jobs: - setup-ec2: - name: Setup EC2 instance (cpu-tests) + setup-instance: + name: Setup instance (cpu-tests) if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }} runs-on: ubuntu-latest outputs: @@ -38,11 +38,11 @@ jobs: cpu-tests: name: CPU tests - needs: setup-ec2 + needs: setup-instance concurrency: group: ${{ github.workflow }}_${{ github.ref }} cancel-in-progress: true - runs-on: ${{ needs.setup-ec2.outputs.runner-name }} + runs-on: ${{ needs.setup-instance.outputs.runner-name }} steps: - name: Checkout tfhe-rs uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b @@ -110,10 +110,10 @@ jobs: SLACK_COLOR: ${{ job.status }} SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" - teardown-ec2: - name: Teardown EC2 instance (cpu-tests) - if: ${{ always() && needs.setup-ec2.result != 'skipped' }} - needs: [ setup-ec2, cpu-tests ] + teardown-instance: + name: Teardown instance (cpu-tests) + if: ${{ always() && needs.setup-instance.result != 'skipped' }} + needs: [ setup-instance, cpu-tests ] runs-on: ubuntu-latest steps: - name: Stop instance @@ -124,7 +124,7 @@ jobs: github-token: ${{ secrets.SLAB_ACTION_TOKEN }} slab-url: ${{ secrets.SLAB_BASE_URL }} job-secret: ${{ secrets.JOB_SECRET }} - label: ${{ needs.setup-ec2.outputs.runner-name }} + label: ${{ needs.setup-instance.outputs.runner-name }} - name: Slack Notification if: ${{ failure() }} @@ -132,4 +132,4 @@ jobs: uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 env: SLACK_COLOR: ${{ job.status }} - SLACK_MESSAGE: "EC2 teardown (cpu-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" + SLACK_MESSAGE: "Instance teardown (cpu-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/aws_tfhe_wasm_tests.yml b/.github/workflows/aws_tfhe_wasm_tests.yml index 466b4b37e..ef0a9dca6 100644 --- a/.github/workflows/aws_tfhe_wasm_tests.yml +++ b/.github/workflows/aws_tfhe_wasm_tests.yml @@ -18,8 +18,8 @@ on: types: [ labeled ] jobs: - setup-ec2: - name: Setup EC2 instance (wasm-tests) + setup-instance: + name: Setup instance (wasm-tests) if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }} runs-on: ubuntu-latest outputs: @@ -38,11 +38,11 @@ jobs: wasm-tests: name: WASM tests - needs: setup-ec2 + needs: setup-instance concurrency: group: ${{ github.workflow }}_${{ github.ref }} cancel-in-progress: true - runs-on: ${{ needs.setup-ec2.outputs.runner-name }} + runs-on: ${{ needs.setup-instance.outputs.runner-name }} steps: - name: Checkout tfhe-rs uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b @@ -80,10 +80,10 @@ jobs: SLACK_COLOR: ${{ job.status }} SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" - teardown-ec2: - name: Teardown EC2 instance (wasm-tests) - if: ${{ always() && needs.setup-ec2.result != 'skipped' }} - needs: [ setup-ec2, wasm-tests ] + teardown-instance: + name: Teardown instance (wasm-tests) + if: ${{ always() && needs.setup-instance.result != 'skipped' }} + needs: [ setup-instance, wasm-tests ] runs-on: ubuntu-latest steps: - name: Stop instance @@ -94,7 +94,7 @@ jobs: github-token: ${{ secrets.SLAB_ACTION_TOKEN }} slab-url: ${{ secrets.SLAB_BASE_URL }} job-secret: ${{ secrets.JOB_SECRET }} - label: ${{ needs.setup-ec2.outputs.runner-name }} + label: ${{ needs.setup-instance.outputs.runner-name }} - name: Slack Notification if: ${{ failure() }} @@ -102,4 +102,4 @@ jobs: uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 env: SLACK_COLOR: ${{ job.status }} - SLACK_MESSAGE: "EC2 teardown (wasm-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" + SLACK_MESSAGE: "Instance teardown (wasm-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/core_crypto_gpu_benchmark.yml b/.github/workflows/core_crypto_gpu_benchmark.yml index c4be34f73..6da1fffe6 100644 --- a/.github/workflows/core_crypto_gpu_benchmark.yml +++ b/.github/workflows/core_crypto_gpu_benchmark.yml @@ -1,43 +1,43 @@ -# Run core crypto benchmarks on an AWS instance with CUDA and return parsed results to Slab CI bot. +# Run core crypto benchmarks on an instance with CUDA and return parsed results to Slab CI bot. name: Core crypto GPU benchmarks on: workflow_dispatch: - inputs: - instance_id: - description: "Instance ID" - type: string - instance_image_id: - description: "Instance AMI ID" - type: string - instance_type: - description: "Instance product type" - type: string - runner_name: - description: "Action runner name" - type: string - request_id: - description: "Slab request ID" - type: string - # This input is not used in this workflow but still mandatory since a calling workflow could - # use it. If a triggering command include a user_inputs field, then the triggered workflow - # must include this very input, otherwise the workflow won't be called. - # See start_full_benchmarks.yml as example. - user_inputs: - description: "Type of benchmarks to run" - type: string - default: "weekly_benchmarks" + schedule: + # Weekly benchmarks will be triggered each Saturday at 1a.m. + - cron: '0 1 * * 6' env: CARGO_TERM_COLOR: always RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png + SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} jobs: - run-core-crypto-benchmarks: - name: Execute GPU core crypto benchmarks in EC2 - runs-on: ${{ github.event.inputs.runner_name }} - if: ${{ !cancelled() }} + setup-instance: + name: Setup instance (cuda-core-crypto-benchmarks) + runs-on: ubuntu-latest + outputs: + runner-name: ${{ steps.start-instance.outputs.label }} + steps: + - name: Start instance + id: start-instance + uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d + with: + mode: start + github-token: ${{ secrets.SLAB_ACTION_TOKEN }} + slab-url: ${{ secrets.SLAB_BASE_URL }} + job-secret: ${{ secrets.JOB_SECRET }} + backend: hyperstack + profile: single-h100 + + cuda-core-crypto-benchmarks: + name: Execute GPU core crypto benchmarks + needs: setup-instance + runs-on: ${{ needs.setup-instance.outputs.runner-name }} strategy: fail-fast: false # explicit include-based build matrix, of known valid options @@ -45,26 +45,45 @@ jobs: include: - os: ubuntu-22.04 cuda: "12.2" - gcc: 9 + gcc: 11 env: CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }} + CMAKE_VERSION: 3.29.1 steps: - - name: Instance configuration used + # Mandatory on hyperstack since a bootable volume is not re-usable yet. + - name: Install dependencies run: | - echo "IDs: ${{ inputs.instance_id }}" - echo "AMI: ${{ inputs.instance_image_id }}" - echo "Type: ${{ inputs.instance_type }}" - echo "Request ID: ${{ inputs.request_id }}" - - - name: Get benchmark date - run: | - echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}" + sudo apt update + sudo apt install ca-certificates curl + sudo install -m 0755 -d /etc/apt/keyrings + sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc + sudo chmod a+r /etc/apt/keyrings/docker.asc + echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + sudo apt update + sudo apt install -y checkinstall zlib1g-dev libssl-dev docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz + tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz + cd cmake-${{ env.CMAKE_VERSION }} + ./bootstrap + make -j"$(nproc)" + sudo make install - name: Checkout tfhe-rs repo with tags uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b with: fetch-depth: 0 + - name: Get benchmark details + run: | + { + echo "BENCH_DATE=$(date --iso-8601=seconds)"; + echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"; + echo "COMMIT_HASH=$(git describe --tags --dirty)"; + } >> "${GITHUB_ENV}" + - name: Set up home # "Install rust" step require root user to have a HOME directory which is not set. run: | @@ -103,15 +122,13 @@ jobs: - name: Parse results run: | - COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})" - COMMIT_HASH="$(git describe --tags --dirty)" python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ --database tfhe_rs \ - --hardware ${{ inputs.instance_type }} \ + --hardware "n3-H100x1" \ --backend gpu \ - --project-version "${COMMIT_HASH}" \ + --project-version "${{ env.COMMIT_HASH }}" \ --branch ${{ github.ref_name }} \ - --commit-date "${COMMIT_DATE}" \ + --commit-date "${{ env.COMMIT_DATE }}" \ --bench-date "${{ env.BENCH_DATE }}" \ --name-suffix avx512 \ --walk-subdirs \ @@ -150,8 +167,28 @@ jobs: uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 env: SLACK_COLOR: ${{ job.status }} - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png SLACK_MESSAGE: "PBS GPU benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" - SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + + teardown-instance: + name: Teardown instance (cuda-integer-full-benchmarks) + if: ${{ always() && needs.setup-instance.result != 'skipped' }} + needs: [ setup-instance, cuda-core-crypto-benchmarks ] + runs-on: ubuntu-latest + steps: + - name: Stop instance + id: stop-instance + uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d + with: + mode: stop + github-token: ${{ secrets.SLAB_ACTION_TOKEN }} + slab-url: ${{ secrets.SLAB_BASE_URL }} + job-secret: ${{ secrets.JOB_SECRET }} + label: ${{ needs.setup-instance.outputs.runner-name }} + + - name: Slack Notification + if: ${{ failure() }} + continue-on-error: true + uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 + env: + SLACK_COLOR: ${{ job.status }} + SLACK_MESSAGE: "Instance teardown (cuda-core-crypto-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/csprng_randomness_tests.yml b/.github/workflows/csprng_randomness_tests.yml index edb78dccd..0da8f0383 100644 --- a/.github/workflows/csprng_randomness_tests.yml +++ b/.github/workflows/csprng_randomness_tests.yml @@ -19,8 +19,8 @@ on: jobs: - setup-ec2: - name: Setup EC2 instance (csprng-randomness-tests) + setup-instance: + name: Setup instance (csprng-randomness-tests) if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }} runs-on: ubuntu-latest outputs: @@ -39,11 +39,11 @@ jobs: csprng-randomness-tests: name: CSPRNG randomness tests - needs: setup-ec2 + needs: setup-instance concurrency: group: ${{ github.workflow }}_${{ github.ref }} cancel-in-progress: true - runs-on: ${{ needs.setup-ec2.outputs.runner-name }} + runs-on: ${{ needs.setup-instance.outputs.runner-name }} steps: - name: Checkout tfhe-rs uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b @@ -69,10 +69,10 @@ jobs: SLACK_COLOR: ${{ job.status }} SLACK_MESSAGE: "concrete-csprng randomness check finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" - teardown-ec2: - name: Teardown EC2 instance (csprng-randomness-tests) - if: ${{ always() && needs.setup-ec2.result != 'skipped' }} - needs: [ setup-ec2, csprng-randomness-tests ] + teardown-instance: + name: Teardown instance (csprng-randomness-tests) + if: ${{ always() && needs.setup-instance.result != 'skipped' }} + needs: [ setup-instance, csprng-randomness-tests ] runs-on: ubuntu-latest steps: - name: Stop instance @@ -83,7 +83,7 @@ jobs: github-token: ${{ secrets.SLAB_ACTION_TOKEN }} slab-url: ${{ secrets.SLAB_BASE_URL }} job-secret: ${{ secrets.JOB_SECRET }} - label: ${{ needs.setup-ec2.outputs.runner-name }} + label: ${{ needs.setup-instance.outputs.runner-name }} - name: Slack Notification if: ${{ failure() }} @@ -91,4 +91,4 @@ jobs: uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 env: SLACK_COLOR: ${{ job.status }} - SLACK_MESSAGE: "EC2 teardown (csprng-randomness-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" + SLACK_MESSAGE: "Instance teardown (csprng-randomness-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/integer_gpu_benchmark.yml b/.github/workflows/integer_gpu_benchmark.yml index 93bc29482..0df0be283 100644 --- a/.github/workflows/integer_gpu_benchmark.yml +++ b/.github/workflows/integer_gpu_benchmark.yml @@ -1,24 +1,11 @@ -# Run integer benchmarks on an AWS instance with CUDA and return parsed results to Slab CI bot. +# Run integer benchmarks on an instance with CUDA and return parsed results to Slab CI bot. name: Integer GPU benchmarks on: workflow_dispatch: - inputs: - instance_id: - description: "Instance ID" - type: string - instance_image_id: - description: "Instance AMI ID" - type: string - instance_type: - description: "Instance product type" - type: string - runner_name: - description: "Action runner name" - type: string - request_id: - description: "Slab request ID" - type: string + push: + branches: + - main env: CARGO_TERM_COLOR: always @@ -27,12 +14,33 @@ env: ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} RUST_BACKTRACE: "full" RUST_MIN_STACK: "8388608" + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png + SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} jobs: - run-integer-benchmarks: - name: Execute integer benchmarks in EC2 - runs-on: ${{ github.event.inputs.runner_name }} - if: ${{ !cancelled() }} + setup-instance: + name: Setup instance (cuda-integer-benchmarks) + runs-on: ubuntu-latest + outputs: + runner-name: ${{ steps.start-instance.outputs.label }} + steps: + - name: Start instance + id: start-instance + uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d + with: + mode: start + github-token: ${{ secrets.SLAB_ACTION_TOKEN }} + slab-url: ${{ secrets.SLAB_BASE_URL }} + job-secret: ${{ secrets.JOB_SECRET }} + backend: hyperstack + profile: single-h100 + + cuda-integer-benchmarks: + name: Execute GPU integer benchmarks + needs: setup-instance + runs-on: ${{ needs.setup-instance.outputs.runner-name }} strategy: fail-fast: false # explicit include-based build matrix, of known valid options @@ -40,26 +48,45 @@ jobs: include: - os: ubuntu-22.04 cuda: "12.2" - gcc: 9 + gcc: 11 env: CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }} + CMAKE_VERSION: 3.29.1 steps: - - name: Instance configuration used + # Mandatory on hyperstack since a bootable volume is not re-usable yet. + - name: Install dependencies run: | - echo "IDs: ${{ inputs.instance_id }}" - echo "AMI: ${{ inputs.instance_image_id }}" - echo "Type: ${{ inputs.instance_type }}" - echo "Request ID: ${{ inputs.request_id }}" - - - name: Get benchmark date - run: | - echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}" + sudo apt update + sudo apt install ca-certificates curl + sudo install -m 0755 -d /etc/apt/keyrings + sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc + sudo chmod a+r /etc/apt/keyrings/docker.asc + echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + sudo apt update + sudo apt install -y checkinstall zlib1g-dev libssl-dev docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz + tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz + cd cmake-${{ env.CMAKE_VERSION }} + ./bootstrap + make -j"$(nproc)" + sudo make install - name: Checkout tfhe-rs repo with tags uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b with: fetch-depth: 0 + - name: Get benchmark details + run: | + { + echo "BENCH_DATE=$(date --iso-8601=seconds)"; + echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"; + echo "COMMIT_HASH=$(git describe --tags --dirty)"; + } >> "${GITHUB_ENV}" + - name: Set up home # "Install rust" step require root user to have a HOME directory which is not set. run: | @@ -107,15 +134,13 @@ jobs: - name: Parse results run: | - COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})" - COMMIT_HASH="$(git describe --tags --dirty)" python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ --database tfhe_rs \ - --hardware ${{ inputs.instance_type }} \ + --hardware "n3-H100x1" \ --backend gpu \ - --project-version "${COMMIT_HASH}" \ + --project-version "${{ env.COMMIT_HASH }}" \ --branch ${{ github.ref_name }} \ - --commit-date "${COMMIT_DATE}" \ + --commit-date "${{ env.COMMIT_DATE }}" \ --bench-date "${{ env.BENCH_DATE }}" \ --walk-subdirs \ --name-suffix avx512 \ @@ -148,14 +173,39 @@ jobs: -d @${{ env.RESULTS_FILENAME }} \ ${{ secrets.SLAB_URL }} + slack-notify: + name: Slack Notification + needs: [ setup-instance, cuda-integer-benchmarks] + runs-on: ${{ needs.setup-instance.outputs.runner-name }} + if: ${{ !success() && !cancelled() }} + continue-on-error: true + steps: + - name: Send message + uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 + env: + SLACK_COLOR: ${{ needs.cuda-integer-benchmarks.result }} + SLACK_MESSAGE: "Integer GPU benchmarks finished with status: ${{ needs.cuda-integer-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})" + + teardown-instance: + name: Teardown instance (cuda-integer-benchmarks) + if: ${{ always() && needs.setup-instance.result != 'skipped' }} + needs: [ setup-instance, cuda-integer-benchmarks ] + runs-on: ubuntu-latest + steps: + - name: Stop instance + id: stop-instance + uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d + with: + mode: stop + github-token: ${{ secrets.SLAB_ACTION_TOKEN }} + slab-url: ${{ secrets.SLAB_BASE_URL }} + job-secret: ${{ secrets.JOB_SECRET }} + label: ${{ needs.setup-instance.outputs.runner-name }} + - name: Slack Notification - if: ${{ !success() && !cancelled() }} + if: ${{ failure() }} continue-on-error: true uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 env: SLACK_COLOR: ${{ job.status }} - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png - SLACK_MESSAGE: "Integer GPU benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" - SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + SLACK_MESSAGE: "Instance teardown (cuda-integer-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/integer_gpu_full_benchmark.yml b/.github/workflows/integer_gpu_full_benchmark.yml index 00c872173..ffc9bd9d2 100644 --- a/.github/workflows/integer_gpu_full_benchmark.yml +++ b/.github/workflows/integer_gpu_full_benchmark.yml @@ -1,32 +1,11 @@ -# Run all integer benchmarks on an AWS instance with CUDA and return parsed results to Slab CI bot. +# Run all integer benchmarks on an instance with CUDA and return parsed results to Slab CI bot. name: Integer GPU full benchmarks on: workflow_dispatch: - inputs: - instance_id: - description: "Instance ID" - type: string - instance_image_id: - description: "Instance AMI ID" - type: string - instance_type: - description: "Instance product type" - type: string - runner_name: - description: "Action runner name" - type: string - request_id: - description: "Slab request ID" - type: string - # This input is not used in this workflow but still mandatory since a calling workflow could - # use it. If a triggering command include a user_inputs field, then the triggered workflow - # must include this very input, otherwise the workflow won't be called. - # See start_full_benchmarks.yml as example. - user_inputs: - description: "Type of benchmarks to run" - type: string - default: "weekly_benchmarks" + schedule: + # Weekly benchmarks will be triggered each Saturday at 1a.m. + - cron: '0 1 * * 6' env: CARGO_TERM_COLOR: always @@ -34,13 +13,34 @@ env: ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} RUST_BACKTRACE: "full" RUST_MIN_STACK: "8388608" + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png + SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} jobs: - integer-benchmarks: - name: Execute integer benchmarks for all operations flavor - runs-on: ${{ github.event.inputs.runner_name }} + setup-instance: + name: Setup instance (cuda-integer-full-benchmarks) + runs-on: ubuntu-latest + outputs: + runner-name: ${{ steps.start-instance.outputs.label }} + steps: + - name: Start instance + id: start-instance + uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d + with: + mode: start + github-token: ${{ secrets.SLAB_ACTION_TOKEN }} + slab-url: ${{ secrets.SLAB_BASE_URL }} + job-secret: ${{ secrets.JOB_SECRET }} + backend: hyperstack + profile: single-h100 + + cuda-integer-full-benchmarks: + name: Execute GPU integer benchmarks for all operations flavor + needs: setup-instance + runs-on: ${{ needs.setup-instance.outputs.runner-name }} timeout-minutes: 1440 # 24 hours - if: ${{ !cancelled() }} continue-on-error: true strategy: fail-fast: false @@ -52,16 +52,31 @@ jobs: include: - os: ubuntu-22.04 cuda: "12.2" - gcc: 9 + gcc: 11 env: CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }} + CMAKE_VERSION: 3.29.1 steps: - - name: Instance configuration used + # Mandatory on hyperstack since a bootable volume is not re-usable yet. + - name: Install dependencies run: | - echo "IDs: ${{ inputs.instance_id }}" - echo "AMI: ${{ inputs.instance_image_id }}" - echo "Type: ${{ inputs.instance_type }}" - echo "Request ID: ${{ inputs.request_id }}" + sudo apt update + sudo apt install ca-certificates curl + sudo install -m 0755 -d /etc/apt/keyrings + sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc + sudo chmod a+r /etc/apt/keyrings/docker.asc + echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + sudo apt update + sudo apt install -y checkinstall zlib1g-dev libssl-dev docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz + tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz + cd cmake-${{ env.CMAKE_VERSION }} + ./bootstrap + make -j"$(nproc)" + sudo make install - name: Checkout tfhe-rs repo with tags uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b @@ -121,7 +136,7 @@ jobs: run: | python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ --database tfhe_rs \ - --hardware ${{ inputs.instance_type }} \ + --hardware "n3-H100x1" \ --backend gpu \ --project-version "${{ env.COMMIT_HASH }}" \ --branch ${{ github.ref_name }} \ @@ -151,19 +166,34 @@ jobs: -d @${{ env.RESULTS_FILENAME }} \ ${{ secrets.SLAB_URL }} - slack-notification: - name: Slack Notification - runs-on: ${{ github.event.inputs.runner_name }} - if: ${{ !success() && !cancelled() }} - needs: integer-benchmarks - steps: - - name: Notify + - name: Slack Notification + if: ${{ !success() && !cancelled() }} continue-on-error: true uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 env: SLACK_COLOR: ${{ job.status }} - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png SLACK_MESSAGE: "Integer GPU full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" - SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + + teardown-instance: + name: Teardown instance (cuda-integer-full-benchmarks) + if: ${{ always() && needs.setup-instance.result != 'skipped' }} + needs: [ setup-instance, cuda-integer-full-benchmarks ] + runs-on: ubuntu-latest + steps: + - name: Stop instance + id: stop-instance + uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d + with: + mode: stop + github-token: ${{ secrets.SLAB_ACTION_TOKEN }} + slab-url: ${{ secrets.SLAB_BASE_URL }} + job-secret: ${{ secrets.JOB_SECRET }} + label: ${{ needs.setup-instance.outputs.runner-name }} + + - name: Slack Notification + if: ${{ failure() }} + continue-on-error: true + uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 + env: + SLACK_COLOR: ${{ job.status }} + SLACK_MESSAGE: "Instance teardown (cuda-integer-full-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/integer_multi_bit_gpu_benchmark.yml b/.github/workflows/integer_multi_bit_gpu_benchmark.yml index 48e532cd8..a271bf7b5 100644 --- a/.github/workflows/integer_multi_bit_gpu_benchmark.yml +++ b/.github/workflows/integer_multi_bit_gpu_benchmark.yml @@ -1,24 +1,11 @@ -# Run integer benchmarks with multi-bit cryptographic parameters on an AWS instance and return parsed results to Slab CI bot. +# Run integer benchmarks with multi-bit cryptographic parameters on an instance and return parsed results to Slab CI bot. name: Integer GPU Multi-bit benchmarks on: workflow_dispatch: - inputs: - instance_id: - description: "Instance ID" - type: string - instance_image_id: - description: "Instance AMI ID" - type: string - instance_type: - description: "Instance product type" - type: string - runner_name: - description: "Action runner name" - type: string - request_id: - description: "Slab request ID" - type: string + schedule: + # Weekly benchmarks will be triggered each Saturday at 1a.m. + - cron: '0 1 * * 6' env: CARGO_TERM_COLOR: always @@ -27,13 +14,34 @@ env: ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} RUST_BACKTRACE: "full" RUST_MIN_STACK: "8388608" + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png + SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} jobs: - cuda-integer-benchmarks: - name: Execute integer multi-bit benchmarks in EC2 - runs-on: ${{ github.event.inputs.runner_name }} + setup-instance: + name: Setup instance (cuda-integer-multi-bit-benchmarks) + runs-on: ubuntu-latest + outputs: + runner-name: ${{ steps.start-instance.outputs.label }} + steps: + - name: Start instance + id: start-instance + uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d + with: + mode: start + github-token: ${{ secrets.SLAB_ACTION_TOKEN }} + slab-url: ${{ secrets.SLAB_BASE_URL }} + job-secret: ${{ secrets.JOB_SECRET }} + backend: hyperstack + profile: single-h100 + + cuda-integer-multi-bit-benchmarks: + name: Execute GPU integer multi-bit benchmarks + needs: setup-instance + runs-on: ${{ needs.setup-instance.outputs.runner-name }} timeout-minutes: 1440 # 24 hours - if: ${{ !cancelled() }} strategy: fail-fast: false # explicit include-based build matrix, of known valid options @@ -41,26 +49,45 @@ jobs: include: - os: ubuntu-22.04 cuda: "12.2" - gcc: 9 + gcc: 11 env: CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }} + CMAKE_VERSION: 3.29.1 steps: - - name: Instance configuration used + # Mandatory on hyperstack since a bootable volume is not re-usable yet. + - name: Install dependencies run: | - echo "IDs: ${{ inputs.instance_id }}" - echo "AMI: ${{ inputs.instance_image_id }}" - echo "Type: ${{ inputs.instance_type }}" - echo "Request ID: ${{ inputs.request_id }}" - - - name: Get benchmark date - run: | - echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}" + sudo apt update + sudo apt install ca-certificates curl + sudo install -m 0755 -d /etc/apt/keyrings + sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc + sudo chmod a+r /etc/apt/keyrings/docker.asc + echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + sudo apt update + sudo apt install -y checkinstall zlib1g-dev libssl-dev docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz + tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz + cd cmake-${{ env.CMAKE_VERSION }} + ./bootstrap + make -j"$(nproc)" + sudo make install - name: Checkout tfhe-rs repo with tags uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b with: fetch-depth: 0 + - name: Get benchmark details + run: | + { + echo "BENCH_DATE=$(date --iso-8601=seconds)"; + echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"; + echo "COMMIT_HASH=$(git describe --tags --dirty)"; + } >> "${GITHUB_ENV}" + - name: Set up home # "Install rust" step require root user to have a HOME directory which is not set. run: | @@ -108,15 +135,13 @@ jobs: - name: Parse results run: | - COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})" - COMMIT_HASH="$(git describe --tags --dirty)" python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ --database tfhe_rs \ - --hardware ${{ inputs.instance_type }} \ + --hardware "n3-H100x1" \ --backend gpu \ - --project-version "${COMMIT_HASH}" \ + --project-version "${{ env.COMMIT_HASH }}" \ --branch ${{ github.ref_name }} \ - --commit-date "${COMMIT_DATE}" \ + --commit-date "${{ env.COMMIT_DATE }}" \ --bench-date "${{ env.BENCH_DATE }}" \ --walk-subdirs \ --name-suffix avx512 \ @@ -155,8 +180,28 @@ jobs: uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 env: SLACK_COLOR: ${{ job.status }} - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png - SLACK_MESSAGE: "Integer GPU benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" - SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + SLACK_MESSAGE: "Integer GPU multi-bit benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" + + teardown-instance: + name: Teardown instance (cuda-integer-full-benchmarks) + if: ${{ always() && needs.setup-instance.result != 'skipped' }} + needs: [ setup-instance, cuda-integer-multi-bit-benchmarks ] + runs-on: ubuntu-latest + steps: + - name: Stop instance + id: stop-instance + uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d + with: + mode: stop + github-token: ${{ secrets.SLAB_ACTION_TOKEN }} + slab-url: ${{ secrets.SLAB_BASE_URL }} + job-secret: ${{ secrets.JOB_SECRET }} + label: ${{ needs.setup-instance.outputs.runner-name }} + + - name: Slack Notification + if: ${{ failure() }} + continue-on-error: true + uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 + env: + SLACK_COLOR: ${{ job.status }} + SLACK_MESSAGE: "Instance teardown (cuda-integer-multi-bit-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/make_release_cuda.yml b/.github/workflows/make_release_cuda.yml index 173abe6b0..90ee28539 100644 --- a/.github/workflows/make_release_cuda.yml +++ b/.github/workflows/make_release_cuda.yml @@ -21,8 +21,8 @@ env: SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} jobs: - setup-ec2: - name: Setup EC2 instance (publish-cuda-release) + setup-instance: + name: Setup instance (publish-cuda-release) runs-on: ubuntu-latest outputs: runner-name: ${{ steps.start-instance.outputs.label }} @@ -40,8 +40,8 @@ jobs: publish-cuda-release: name: Publish CUDA Release - needs: setup-ec2 - runs-on: ${{ needs.setup-ec2.outputs.runner-name }} + needs: setup-instance + runs-on: ${{ needs.setup-instance.outputs.runner-name }} strategy: fail-fast: false # explicit include-based build matrix, of known valid options @@ -104,10 +104,10 @@ jobs: SLACK_COLOR: ${{ job.status }} SLACK_MESSAGE: "tfhe-cuda-backend release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" - teardown-ec2: - name: Teardown EC2 instance (publish-release) - if: ${{ always() && needs.setup-ec2.result != 'skipped' }} - needs: [ setup-ec2, publish-cuda-release ] + teardown-instance: + name: Teardown instance (publish-release) + if: ${{ always() && needs.setup-instance.result != 'skipped' }} + needs: [ setup-instance, publish-cuda-release ] runs-on: ubuntu-latest steps: - name: Stop instance @@ -118,7 +118,7 @@ jobs: github-token: ${{ secrets.SLAB_ACTION_TOKEN }} slab-url: ${{ secrets.SLAB_BASE_URL }} job-secret: ${{ secrets.JOB_SECRET }} - label: ${{ needs.setup-ec2.outputs.runner-name }} + label: ${{ needs.setup-instance.outputs.runner-name }} - name: Slack Notification if: ${{ failure() }} @@ -126,4 +126,4 @@ jobs: uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907 env: SLACK_COLOR: ${{ job.status }} - SLACK_MESSAGE: "EC2 teardown (publish-cuda-release) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" + SLACK_MESSAGE: "Instance teardown (publish-cuda-release) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/start_benchmarks.yml b/.github/workflows/start_benchmarks.yml index 228f1956b..1a7bd8015 100644 --- a/.github/workflows/start_benchmarks.yml +++ b/.github/workflows/start_benchmarks.yml @@ -36,10 +36,6 @@ on: description: "Run core crypto benches" type: boolean default: true - core_crypto_gpu_bench: - description: "Run core crypto benches on GPU" - type: boolean - default: true wasm_client_bench: description: "Run WASM client benches" type: boolean @@ -53,8 +49,7 @@ jobs: command: [ boolean_bench, shortint_bench, integer_bench, integer_multi_bit_bench, signed_integer_bench, signed_integer_multi_bit_bench, - integer_gpu_bench, integer_multi_bit_gpu_bench, - core_crypto_bench, core_crypto_gpu_bench, wasm_client_bench ] + core_crypto_bench, wasm_client_bench ] runs-on: ubuntu-latest steps: - name: Checkout tfhe-rs diff --git a/.github/workflows/start_full_benchmarks.yml b/.github/workflows/start_full_benchmarks.yml index 1cdd1f1f5..49334a871 100644 --- a/.github/workflows/start_full_benchmarks.yml +++ b/.github/workflows/start_full_benchmarks.yml @@ -25,8 +25,8 @@ jobs: strategy: matrix: command: [ boolean_bench, shortint_full_bench, - integer_full_bench, signed_integer_full_bench, integer_gpu_full_bench, - core_crypto_bench, core_crypto_gpu_bench, wasm_client_bench ] + integer_full_bench, signed_integer_full_bench, + core_crypto_bench, wasm_client_bench ] runs-on: ubuntu-latest steps: - name: Checkout tfhe-rs diff --git a/ci/ec2_products_cost.json b/ci/ec2_products_cost.json index 09ff45af7..0745d2997 100644 --- a/ci/ec2_products_cost.json +++ b/ci/ec2_products_cost.json @@ -4,5 +4,6 @@ "p3.2xlarge": 3.06, "p4d.24xlarge": 32.7726, "p5.48xlarge": 98.32, - "rtx4090": 0.04 + "rtx4090": 0.04, + "n3-H100x1": 4.30 } diff --git a/ci/slab.toml b/ci/slab.toml index f0f927668..a83c01d6c 100644 --- a/ci/slab.toml +++ b/ci/slab.toml @@ -26,6 +26,11 @@ instance_type = "p3.2xlarge" spawn_retry_attempts = 120 spawn_retry_duration = 60 +[backend.hyperstack.single-h100] +environment_name = "canada" +image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2" +flavor_name = "n3-H100x1" + [command.cpu_test] workflow = "aws_tfhe_tests.yml" profile = "cpu-big"