From bbf484c7f64fc9e863efa36768e888451cab5037 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Test=C3=A9?= Date: Tue, 25 Nov 2025 17:35:00 +0100 Subject: [PATCH] chore(ci): move erc20 and dex gpu benchmarks to common files --- .github/workflows/benchmark_gpu.yml | 2 + .github/workflows/benchmark_gpu_dex.yml | 67 ------ .../workflows/benchmark_gpu_dex_common.yml | 215 ----------------- .../workflows/benchmark_gpu_dex_weekly.yml | 63 ----- .github/workflows/benchmark_gpu_erc20.yml | 68 ------ .../workflows/benchmark_gpu_erc20_common.yml | 216 ------------------ .../workflows/benchmark_gpu_erc20_weekly.yml | 64 ------ .github/workflows/benchmark_gpu_weekly.yml | 189 ++++++++++++++- 8 files changed, 186 insertions(+), 698 deletions(-) delete mode 100644 .github/workflows/benchmark_gpu_dex.yml delete mode 100644 .github/workflows/benchmark_gpu_dex_common.yml delete mode 100644 .github/workflows/benchmark_gpu_dex_weekly.yml delete mode 100644 .github/workflows/benchmark_gpu_erc20.yml delete mode 100644 .github/workflows/benchmark_gpu_erc20_common.yml delete mode 100644 .github/workflows/benchmark_gpu_erc20_weekly.yml diff --git a/.github/workflows/benchmark_gpu.yml b/.github/workflows/benchmark_gpu.yml index e20fa9e37..478edb3ff 100644 --- a/.github/workflows/benchmark_gpu.yml +++ b/.github/workflows/benchmark_gpu.yml @@ -33,6 +33,8 @@ on: - integer_zk - integer_aes - integer_aes256 + - hlapi_erc20 + - hlapi_dex - hlapi_noise_squash op_flavor: description: "Operations set to run" diff --git a/.github/workflows/benchmark_gpu_dex.yml b/.github/workflows/benchmark_gpu_dex.yml deleted file mode 100644 index 7b62e3e91..000000000 --- a/.github/workflows/benchmark_gpu_dex.yml +++ /dev/null @@ -1,67 +0,0 @@ -# Run CUDA DEX benchmarks on a Hyperstack VM and return parsed results to Slab CI bot. -name: benchmark_gpu_dex/ - -on: - workflow_dispatch: - inputs: - profile: - description: "Instance type" - required: true - type: choice - options: - - "l40 (n3-L40x1)" - - "4-l40 (n3-L40x4)" - - "multi-a100-nvlink (n3-A100x8-NVLink)" - - "single-h100 (n3-H100x1)" - - "2-h100 (n3-H100x2)" - - "4-h100 (n3-H100x4)" - - "multi-h100 (n3-H100x8)" - - "multi-h100-nvlink (n3-H100x8-NVLink)" - - "multi-h100-sxm5 (n3-H100-SXM5x8)" - -permissions: {} - -# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow - -jobs: - parse-inputs: - name: benchmark_gpu_dex/parse-inputs - runs-on: ubuntu-latest - outputs: - profile: ${{ steps.parse_profile.outputs.profile }} - hardware_name: ${{ steps.parse_hardware_name.outputs.name }} - env: - INPUTS_PROFILE: ${{ inputs.profile }} - steps: - - name: Parse profile - id: parse_profile - run: | - # Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern. - # shellcheck disable=SC2001 - PROFILE=$(echo "${INPUTS_PROFILE}" | sed 's|\(.*\)[[:space:]](.*)|\1|') - echo "profile=${PROFILE}" >> "${GITHUB_OUTPUT}" - - - name: Parse hardware name - id: parse_hardware_name - run: | - # Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern. - # shellcheck disable=SC2001 - NAME=$(echo "${INPUTS_PROFILE}" | sed 's|.*[[:space:]](\(.*\))|\1|') - echo "name=${NAME}" >> "${GITHUB_OUTPUT}" - - run-benchmarks: - name: benchmark_gpu_dex/run-benchmarks - needs: parse-inputs - uses: ./.github/workflows/benchmark_gpu_dex_common.yml - with: - profile: ${{ needs.parse-inputs.outputs.profile }} - hardware_name: ${{ needs.parse-inputs.outputs.hardware_name }} - secrets: - BOT_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }} - JOB_SECRET: ${{ secrets.JOB_SECRET }} - SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }} - SLAB_URL: ${{ secrets.SLAB_URL }} - SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }} diff --git a/.github/workflows/benchmark_gpu_dex_common.yml b/.github/workflows/benchmark_gpu_dex_common.yml deleted file mode 100644 index 5fee2697e..000000000 --- a/.github/workflows/benchmark_gpu_dex_common.yml +++ /dev/null @@ -1,215 +0,0 @@ -# Run DEX benchmarks on an instance with CUDA and return parsed results to Slab CI bot. -name: benchmark_gpu_dex_common - -on: - workflow_call: - inputs: - backend: - type: string - default: hyperstack - profile: - type: string - required: true - hardware_name: - type: string - required: true - secrets: - REPO_CHECKOUT_TOKEN: - required: true - SLAB_ACTION_TOKEN: - required: true - SLAB_BASE_URL: - required: true - SLAB_URL: - required: true - JOB_SECRET: - required: true - SLACK_CHANNEL: - required: true - BOT_USERNAME: - required: true - SLACK_WEBHOOK: - required: true - -env: - CARGO_TERM_COLOR: always - RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json - PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv - ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - RUST_BACKTRACE: "full" - RUST_MIN_STACK: "8388608" - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png - SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - -permissions: {} - -# zizmor: ignore[concurrency-limits] caller workflow is responsible for the concurrency - -jobs: - setup-instance: - name: benchmark_gpu_dex_common/setup-instance - runs-on: ubuntu-latest - if: github.event_name == 'workflow_dispatch' || - (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') - outputs: - # Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step. - # If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance. - # Since the on-demand remote label is set before failure, we have to do the logical OR in this order, - # otherwise we'll try to run the next job on a non-existing on-demand instance. - runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }} - remote-instance-outcome: ${{ steps.start-remote-instance.outcome }} - steps: - - name: Start remote instance - id: start-remote-instance - continue-on-error: true - uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac - with: - mode: start - github-token: ${{ secrets.SLAB_ACTION_TOKEN }} - slab-url: ${{ secrets.SLAB_BASE_URL }} - job-secret: ${{ secrets.JOB_SECRET }} - backend: ${{ inputs.backend }} - profile: ${{ inputs.profile }} - - - name: Acknowledge remote instance failure - if: steps.start-remote-instance.outcome == 'failure' && - inputs.profile != 'single-h100' - run: | - echo "Remote instance instance has failed to start (profile provided: '${INPUTS_PROFILE}')" - echo "Permanent instance instance cannot be used as a substitute (profile needed: 'single-h100')" - exit 1 - env: - INPUTS_PROFILE: ${{ inputs.profile }} - - # This will allow to fallback on permanent instances running on Hyperstack. - - name: Use permanent remote instance - id: use-permanent-instance - if: env.SECRETS_AVAILABLE == 'true' && - steps.start-remote-instance.outcome == 'failure' && - inputs.profile == 'single-h100' - run: | - echo "runner_group=h100x1" >> "$GITHUB_OUTPUT" - - cuda-dex-benchmarks: - name: benchmark_gpu_dex_common/cuda-dex-benchmarks - needs: setup-instance - runs-on: ${{ needs.setup-instance.outputs.runner-name }} - strategy: - fail-fast: false - # explicit include-based build matrix, of known valid options - matrix: - include: - - os: ubuntu-22.04 - cuda: "12.8" - gcc: 11 - steps: - - name: Checkout tfhe-rs repo with tags - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 - with: - fetch-depth: 0 - persist-credentials: 'false' - token: ${{ secrets.REPO_CHECKOUT_TOKEN }} - - - name: Setup Hyperstack dependencies - if: needs.setup-instance.outputs.remote-instance-outcome == 'success' - uses: ./.github/actions/gpu_setup - with: - cuda-version: ${{ matrix.cuda }} - gcc-version: ${{ matrix.gcc }} - - - name: Get benchmark details - run: | - COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}"); - { - echo "BENCH_DATE=$(date --iso-8601=seconds)"; - echo "COMMIT_DATE=${COMMIT_DATE}"; - echo "COMMIT_HASH=$(git describe --tags --dirty)"; - } >> "${GITHUB_ENV}" - env: - SHA: ${{ github.sha }} - - - name: Install rust - uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases - with: - toolchain: nightly - - - name: Run benchmarks - run: | - make bench_hlapi_dex_gpu - - - name: Parse results - run: | - python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \ - --database tfhe_rs \ - --hardware "${INPUTS_HARDWARE_NAME}" \ - --backend gpu \ - --project-version "${COMMIT_HASH}" \ - --branch "${REF_NAME}" \ - --commit-date "${COMMIT_DATE}" \ - --bench-date "${BENCH_DATE}" \ - --walk-subdirs \ - --name-suffix avx512 - env: - INPUTS_HARDWARE_NAME: ${{ inputs.hardware_name }} - REF_NAME: ${{ github.ref_name }} - - - name: Upload parsed results artifact - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 - with: - name: ${{ github.sha }}_dex_${{ inputs.profile }} - path: ${{ env.RESULTS_FILENAME }} - - - name: Checkout Slab repo - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 - with: - repository: zama-ai/slab - path: slab - persist-credentials: 'false' - token: ${{ secrets.REPO_CHECKOUT_TOKEN }} - - - name: Send data to Slab - shell: bash - run: | - python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \ - --slab-url "${SLAB_URL}" - env: - JOB_SECRET: ${{ secrets.JOB_SECRET }} - SLAB_URL: ${{ secrets.SLAB_URL }} - - slack-notify: - name: benchmark_gpu_dex_common/slack-notify - needs: [ setup-instance, cuda-dex-benchmarks ] - runs-on: ubuntu-latest - if: ${{ always() && needs.cuda-dex-benchmarks.result != 'skipped' && failure() }} - continue-on-error: true - steps: - - name: Send message - uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 - env: - SLACK_COLOR: ${{ needs.cuda-dex-benchmarks.result }} - SLACK_MESSAGE: "Cuda DEX benchmarks (${{ inputs.profile }}) finished with status: ${{ needs.cuda-dex-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})" - - teardown-instance: - name: benchmark_gpu_dex_common/teardown-instance - if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }} - needs: [ setup-instance, cuda-dex-benchmarks, slack-notify ] - runs-on: ubuntu-latest - steps: - - name: Stop instance - id: stop-instance - uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac - with: - mode: stop - github-token: ${{ secrets.SLAB_ACTION_TOKEN }} - slab-url: ${{ secrets.SLAB_BASE_URL }} - job-secret: ${{ secrets.JOB_SECRET }} - label: ${{ needs.setup-instance.outputs.runner-name }} - - - name: Slack Notification - if: ${{ failure() }} - uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 - env: - SLACK_COLOR: ${{ job.status }} - SLACK_MESSAGE: "Instance teardown (cuda-dex-${{ inputs.profile }}-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/benchmark_gpu_dex_weekly.yml b/.github/workflows/benchmark_gpu_dex_weekly.yml deleted file mode 100644 index 6bb8ea157..000000000 --- a/.github/workflows/benchmark_gpu_dex_weekly.yml +++ /dev/null @@ -1,63 +0,0 @@ -# Run CUDA DEX benchmarks on multiple Hyperstack VMs and return parsed results to Slab CI bot. -name: benchmark_gpu_dex_weekly - -on: - schedule: - # Weekly benchmarks will be triggered each Saturday at 9a.m. - - cron: '0 9 * * 6' - -permissions: {} - -# zizmor: ignore[concurrency-limits] only GitHub can trigger this workflow - -jobs: - run-benchmarks-1-h100: - name: benchmark_gpu_dex_weekly/run-benchmarks-1-h100 - if: github.repository == 'zama-ai/tfhe-rs' - uses: ./.github/workflows/benchmark_gpu_dex_common.yml - with: - profile: single-h100 - hardware_name: n3-H100x1 - secrets: - BOT_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }} - JOB_SECRET: ${{ secrets.JOB_SECRET }} - SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }} - SLAB_URL: ${{ secrets.SLAB_URL }} - SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }} - - run-benchmarks-2-h100: - name: benchmark_gpu_dex_weekly/run-benchmarks-2-h100 - if: github.repository == 'zama-ai/tfhe-rs' - uses: ./.github/workflows/benchmark_gpu_dex_common.yml - with: - profile: 2-h100 - hardware_name: n3-H100x2 - secrets: - BOT_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }} - JOB_SECRET: ${{ secrets.JOB_SECRET }} - SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }} - SLAB_URL: ${{ secrets.SLAB_URL }} - SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }} - - run-benchmarks-8-h100: - name: benchmark_gpu_dex_weekly/run-benchmarks-8-h100 - if: github.repository == 'zama-ai/tfhe-rs' - uses: ./.github/workflows/benchmark_gpu_dex_common.yml - with: - profile: multi-h100 - hardware_name: n3-H100x8 - secrets: - BOT_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }} - JOB_SECRET: ${{ secrets.JOB_SECRET }} - SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }} - SLAB_URL: ${{ secrets.SLAB_URL }} - SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }} diff --git a/.github/workflows/benchmark_gpu_erc20.yml b/.github/workflows/benchmark_gpu_erc20.yml deleted file mode 100644 index 70f402b65..000000000 --- a/.github/workflows/benchmark_gpu_erc20.yml +++ /dev/null @@ -1,68 +0,0 @@ -# Run CUDA ERC20 benchmarks on a Hyperstack VM and return parsed results to Slab CI bot. -name: benchmark_gpu_erc20 - -on: - workflow_dispatch: - inputs: - profile: - description: "Instance type" - required: true - type: choice - options: - - "l40 (n3-L40x1)" - - "4-l40 (n3-L40x4)" - - "multi-a100-nvlink (n3-A100x8-NVLink)" - - "single-h100 (n3-H100x1)" - - "2-h100 (n3-H100x2)" - - "4-h100 (n3-H100x4)" - - "multi-h100 (n3-H100x8)" - - "multi-h100-nvlink (n3-H100x8-NVLink)" - - "multi-h100-sxm5 (n3-H100-SXM5x8)" - - -permissions: {} - -# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow - -jobs: - parse-inputs: - name: benchmark_gpu_erc20/parse-inputs - runs-on: ubuntu-latest - outputs: - profile: ${{ steps.parse_profile.outputs.profile }} - hardware_name: ${{ steps.parse_hardware_name.outputs.name }} - env: - INPUTS_PROFILE: ${{ inputs.profile }} - steps: - - name: Parse profile - id: parse_profile - run: | - # Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern. - # shellcheck disable=SC2001 - PROFILE=$(echo "${INPUTS_PROFILE}" | sed 's|\(.*\)[[:space:]](.*)|\1|') - echo "profile=${PROFILE}" >> "${GITHUB_OUTPUT}" - - - name: Parse hardware name - id: parse_hardware_name - run: | - # Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern. - # shellcheck disable=SC2001 - NAME=$(echo "${INPUTS_PROFILE}" | sed 's|.*[[:space:]](\(.*\))|\1|') - echo "name=${NAME}" >> "${GITHUB_OUTPUT}" - - run-benchmarks: - name: benchmark_gpu_erc20/run-benchmarks - needs: parse-inputs - uses: ./.github/workflows/benchmark_gpu_erc20_common.yml - with: - profile: ${{ needs.parse-inputs.outputs.profile }} - hardware_name: ${{ needs.parse-inputs.outputs.hardware_name }} - secrets: - BOT_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }} - JOB_SECRET: ${{ secrets.JOB_SECRET }} - SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }} - SLAB_URL: ${{ secrets.SLAB_URL }} - SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }} diff --git a/.github/workflows/benchmark_gpu_erc20_common.yml b/.github/workflows/benchmark_gpu_erc20_common.yml deleted file mode 100644 index fe382e033..000000000 --- a/.github/workflows/benchmark_gpu_erc20_common.yml +++ /dev/null @@ -1,216 +0,0 @@ -# Run ERC20 benchmarks on an instance with CUDA and return parsed results to Slab CI bot. -name: benchmark_gpu_erc20_common - -on: - workflow_call: - inputs: - backend: - type: string - default: hyperstack - profile: - type: string - required: true - hardware_name: - type: string - required: true - secrets: - REPO_CHECKOUT_TOKEN: - required: true - SLAB_ACTION_TOKEN: - required: true - SLAB_BASE_URL: - required: true - SLAB_URL: - required: true - JOB_SECRET: - required: true - SLACK_CHANNEL: - required: true - BOT_USERNAME: - required: true - SLACK_WEBHOOK: - required: true - -env: - CARGO_TERM_COLOR: always - RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json - PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv - ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} - RUST_BACKTRACE: "full" - RUST_MIN_STACK: "8388608" - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png - SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - - -permissions: {} - -# zizmor: ignore[concurrency-limits] caller workflow is responsible for the concurrency - -jobs: - setup-instance: - name: benchmark_gpu_erc20_common/setup-instance - runs-on: ubuntu-latest - if: github.event_name == 'workflow_dispatch' || - (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') - outputs: - # Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step. - # If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance. - # Since the on-demand remote label is set before failure, we have to do the logical OR in this order, - # otherwise we'll try to run the next job on a non-existing on-demand instance. - runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }} - remote-instance-outcome: ${{ steps.start-remote-instance.outcome }} - steps: - - name: Start remote instance - id: start-remote-instance - continue-on-error: true - uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac - with: - mode: start - github-token: ${{ secrets.SLAB_ACTION_TOKEN }} - slab-url: ${{ secrets.SLAB_BASE_URL }} - job-secret: ${{ secrets.JOB_SECRET }} - backend: ${{ inputs.backend }} - profile: ${{ inputs.profile }} - - - name: Acknowledge remote instance failure - if: steps.start-remote-instance.outcome == 'failure' && - inputs.profile != 'single-h100' - run: | - echo "Remote instance instance has failed to start (profile provided: '${INPUTS_PROFILE}')" - echo "Permanent instance instance cannot be used as a substitute (profile needed: 'single-h100')" - exit 1 - env: - INPUTS_PROFILE: ${{ inputs.profile }} - - # This will allow to fallback on permanent instances running on Hyperstack. - - name: Use permanent remote instance - id: use-permanent-instance - if: env.SECRETS_AVAILABLE == 'true' && - steps.start-remote-instance.outcome == 'failure' && - inputs.profile == 'single-h100' - run: | - echo "runner_group=h100x1" >> "$GITHUB_OUTPUT" - - cuda-erc20-benchmarks: - name: benchmark_gpu_erc20_common/cuda-erc20-benchmarks - needs: setup-instance - runs-on: ${{ needs.setup-instance.outputs.runner-name }} - strategy: - fail-fast: false - # explicit include-based build matrix, of known valid options - matrix: - include: - - os: ubuntu-22.04 - cuda: "12.8" - gcc: 11 - steps: - - name: Checkout tfhe-rs repo with tags - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 - with: - fetch-depth: 0 - persist-credentials: 'false' - token: ${{ secrets.REPO_CHECKOUT_TOKEN }} - - - name: Setup Hyperstack dependencies - if: needs.setup-instance.outputs.remote-instance-outcome == 'success' - uses: ./.github/actions/gpu_setup - with: - cuda-version: ${{ matrix.cuda }} - gcc-version: ${{ matrix.gcc }} - - - name: Get benchmark details - run: | - COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}"); - { - echo "BENCH_DATE=$(date --iso-8601=seconds)"; - echo "COMMIT_DATE=${COMMIT_DATE}"; - echo "COMMIT_HASH=$(git describe --tags --dirty)"; - } >> "${GITHUB_ENV}" - env: - SHA: ${{ github.sha }} - - - name: Install rust - uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases - with: - toolchain: nightly - - - name: Run benchmarks - run: | - make bench_hlapi_erc20_gpu - - - name: Parse results - run: | - python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \ - --database tfhe_rs \ - --hardware "${INPUTS_HARDWARE_NAME}" \ - --backend gpu \ - --project-version "${COMMIT_HASH}" \ - --branch "${REF_NAME}" \ - --commit-date "${COMMIT_DATE}" \ - --bench-date "${BENCH_DATE}" \ - --walk-subdirs \ - --name-suffix avx512 - env: - INPUTS_HARDWARE_NAME: ${{ inputs.hardware_name }} - REF_NAME: ${{ github.ref_name }} - - - name: Upload parsed results artifact - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 - with: - name: ${{ github.sha }}_erc20_${{ inputs.profile }} - path: ${{ env.RESULTS_FILENAME }} - - - name: Checkout Slab repo - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 - with: - repository: zama-ai/slab - path: slab - persist-credentials: 'false' - token: ${{ secrets.REPO_CHECKOUT_TOKEN }} - - - name: Send data to Slab - shell: bash - run: | - python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \ - --slab-url "${SLAB_URL}" - env: - JOB_SECRET: ${{ secrets.JOB_SECRET }} - SLAB_URL: ${{ secrets.SLAB_URL }} - - slack-notify: - name: benchmark_gpu_erc20_common/slack-notify - needs: [ setup-instance, cuda-erc20-benchmarks ] - runs-on: ubuntu-latest - if: ${{ always() && needs.cuda-erc20-benchmarks.result != 'skipped' && failure() }} - continue-on-error: true - steps: - - name: Send message - uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 - env: - SLACK_COLOR: ${{ needs.cuda-erc20-benchmarks.result }} - SLACK_MESSAGE: "Cuda ERC20 benchmarks (${{ inputs.profile }}) finished with status: ${{ needs.cuda-erc20-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})" - - teardown-instance: - name: benchmark_gpu_erc20_common/teardown-instance - if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }} - needs: [ setup-instance, cuda-erc20-benchmarks, slack-notify ] - runs-on: ubuntu-latest - steps: - - name: Stop instance - id: stop-instance - uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac - with: - mode: stop - github-token: ${{ secrets.SLAB_ACTION_TOKEN }} - slab-url: ${{ secrets.SLAB_BASE_URL }} - job-secret: ${{ secrets.JOB_SECRET }} - label: ${{ needs.setup-instance.outputs.runner-name }} - - - name: Slack Notification - if: ${{ failure() }} - uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 - env: - SLACK_COLOR: ${{ job.status }} - SLACK_MESSAGE: "Instance teardown (cuda-erc20-${{ inputs.profile }}-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/benchmark_gpu_erc20_weekly.yml b/.github/workflows/benchmark_gpu_erc20_weekly.yml deleted file mode 100644 index be4f6bad1..000000000 --- a/.github/workflows/benchmark_gpu_erc20_weekly.yml +++ /dev/null @@ -1,64 +0,0 @@ -# Run CUDA ERC20 benchmarks on multiple Hyperstack VMs and return parsed results to Slab CI bot. -name: benchmark_gpu_erc20_weekly - -on: - schedule: - # Weekly benchmarks will be triggered each Saturday at 5a.m. - - cron: '0 5 * * 6' - - -permissions: {} - -# zizmor: ignore[concurrency-limits] only GitHub can trigger this workflow - -jobs: - run-benchmarks-1-h100: - name: benchmark_gpu_erc20_weekly/run-benchmarks-1-h100 - if: github.repository == 'zama-ai/tfhe-rs' - uses: ./.github/workflows/benchmark_gpu_erc20_common.yml - with: - profile: single-h100 - hardware_name: n3-H100x1 - secrets: - BOT_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }} - JOB_SECRET: ${{ secrets.JOB_SECRET }} - SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }} - SLAB_URL: ${{ secrets.SLAB_URL }} - SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }} - - run-benchmarks-2-h100: - name: benchmark_gpu_erc20_weekly/run-benchmarks-2-h100 - if: github.repository == 'zama-ai/tfhe-rs' - uses: ./.github/workflows/benchmark_gpu_erc20_common.yml - with: - profile: 2-h100 - hardware_name: n3-H100x2 - secrets: - BOT_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }} - JOB_SECRET: ${{ secrets.JOB_SECRET }} - SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }} - SLAB_URL: ${{ secrets.SLAB_URL }} - SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }} - - run-benchmarks-8-h100: - name: benchmark_gpu_erc20_weekly/run-benchmarks-8-h100 - if: github.repository == 'zama-ai/tfhe-rs' - uses: ./.github/workflows/benchmark_gpu_erc20_common.yml - with: - profile: multi-h100 - hardware_name: n3-H100x8 - secrets: - BOT_USERNAME: ${{ secrets.BOT_USERNAME }} - SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }} - JOB_SECRET: ${{ secrets.JOB_SECRET }} - SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }} - SLAB_URL: ${{ secrets.SLAB_URL }} - SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }} diff --git a/.github/workflows/benchmark_gpu_weekly.yml b/.github/workflows/benchmark_gpu_weekly.yml index 9b4f75755..a67879707 100644 --- a/.github/workflows/benchmark_gpu_weekly.yml +++ b/.github/workflows/benchmark_gpu_weekly.yml @@ -3,8 +3,19 @@ name: benchmark_gpu_weekly on: schedule: + # Weekly schedules are separated in several groups to avoid spawning too many the machines at once thus risking resource shortages. + # Group 1 + # ------- # Weekly benchmarks will be triggered each Saturday at 1a.m. - cron: '0 1 * * 6' + # Group 2 + # ------- + # Weekly benchmarks will be triggered each Sunday at 1a.m. + - cron: '0 1 * * 0' + # Group 3 + # ------- + # Weekly benchmarks will be triggered each Sunday at 9p.m. + - cron: '0 9 * * 0' permissions: {} @@ -12,9 +23,35 @@ permissions: {} # zizmor: ignore[concurrency-limits] only GitHub can trigger this workflow jobs: + prepare-inputs: + name: benchmark_cpu_weekly/prepare-inputs + runs-on: ubuntu-latest + outputs: + is_weekly_bench_group_1: ${{ steps.check_bench_group_1.outputs.is_weekly_bench_group_1 }} + is_weekly_bench_group_2: ${{ steps.check_bench_group_2.outputs.is_weekly_bench_group_2 }} + is_weekly_bench_group_3: ${{ steps.check_bench_group_3.outputs.is_weekly_bench_group_3 }} + steps: + - name: Check is weekly bench group 1 + id: check_bench_group_1 + run: | # zizmor: ignore[template-injection] this env variable is safe + echo "is_weekly_bench_group_1=${{ github.event.schedule == '0 1 * * 6' }}" >> "${GITHUB_OUTPUT}" + + - name: Check is weekly bench group 2 + id: check_bench_group_2 + run: | # zizmor: ignore[template-injection] this env variable is safe + echo "is_weekly_bench_group_2=${{ github.event.schedule == '0 1 * * 0' }}" >> "${GITHUB_OUTPUT}" + + - name: Check is weekly bench group 3 + id: check_bench_group_3 + run: | # zizmor: ignore[template-injection] this env variable is safe + echo "is_weekly_bench_group_3=${{ github.event.schedule == '0 9 * * 0' }}" >> "${GITHUB_OUTPUT}" + + run-benchmarks-8-h100-sxm5-integer: name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-integer - if: github.repository == 'zama-ai/tfhe-rs' + if: github.repository == 'zama-ai/tfhe-rs' && + needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true' + needs: prepare-inputs uses: ./.github/workflows/benchmark_gpu_common.yml with: profile: multi-h100-sxm5 @@ -35,7 +72,9 @@ jobs: run-benchmarks-8-h100-sxm5-integer-compression: name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-integer-compression - if: github.repository == 'zama-ai/tfhe-rs' + if: github.repository == 'zama-ai/tfhe-rs' && + needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true' + needs: prepare-inputs uses: ./.github/workflows/benchmark_gpu_common.yml with: profile: multi-h100-sxm5 @@ -56,7 +95,9 @@ jobs: run-benchmarks-8-h100-sxm5-integer-zk-aes: name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-integer-zk-aes - if: github.repository == 'zama-ai/tfhe-rs' + if: github.repository == 'zama-ai/tfhe-rs' && + needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true' + needs: prepare-inputs uses: ./.github/workflows/benchmark_gpu_common.yml with: profile: multi-h100-sxm5 @@ -77,7 +118,9 @@ jobs: run-benchmarks-8-h100-sxm5-noise-squash: name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-noise-squash - if: github.repository == 'zama-ai/tfhe-rs' + if: github.repository == 'zama-ai/tfhe-rs' && + needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true' + needs: prepare-inputs uses: ./.github/workflows/benchmark_gpu_common.yml with: profile: multi-h100-sxm5 @@ -98,7 +141,9 @@ jobs: run-benchmarks-1-h100-core-crypto: name: benchmark_gpu_weekly/run-benchmarks-1-h100-core-crypto (1xH100) - if: github.repository == 'zama-ai/tfhe-rs' + if: github.repository == 'zama-ai/tfhe-rs' && + needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true' + needs: prepare-inputs uses: ./.github/workflows/benchmark_gpu_common.yml with: profile: single-h100 @@ -114,3 +159,137 @@ jobs: SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }} SLAB_URL: ${{ secrets.SLAB_URL }} SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }} + + # ----------------------------------------------------- + # ERC20 benchmarks + # ----------------------------------------------------- + + run-benchmarks-1-h100-erc20: + name: benchmark_gpu_weekly/run-benchmarks-1-h100-erc20 + if: github.repository == 'zama-ai/tfhe-rs' && + needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true' + needs: prepare-inputs + uses: ./.github/workflows/benchmark_gpu_common.yml + with: + profile: single-h100 + hardware_name: n3-H100x1 + command: hlapi_erc20 + bench_type: both + secrets: + BOT_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }} + JOB_SECRET: ${{ secrets.JOB_SECRET }} + SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }} + SLAB_URL: ${{ secrets.SLAB_URL }} + SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }} + + run-benchmarks-2-h100-erc20: + name: benchmark_gpu_weekly/run-benchmarks-2-h100-erc20 + if: github.repository == 'zama-ai/tfhe-rs' && + needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true' + needs: prepare-inputs + uses: ./.github/workflows/benchmark_gpu_common.yml + with: + profile: 2-h100 + hardware_name: n3-H100x2 + command: hlapi_erc20 + bench_type: both + secrets: + BOT_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }} + JOB_SECRET: ${{ secrets.JOB_SECRET }} + SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }} + SLAB_URL: ${{ secrets.SLAB_URL }} + SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }} + + run-benchmarks-8-h100-erc20: + name: benchmark_gpu_weekly/run-benchmarks-8-h100-erc20 + if: github.repository == 'zama-ai/tfhe-rs' && + needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true' + needs: prepare-inputs + uses: ./.github/workflows/benchmark_gpu_common.yml + with: + profile: multi-h100 + hardware_name: n3-H100-SXM5x8 + command: hlapi_erc20 + bench_type: both + secrets: + BOT_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }} + JOB_SECRET: ${{ secrets.JOB_SECRET }} + SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }} + SLAB_URL: ${{ secrets.SLAB_URL }} + SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }} + + # ----------------------------------------------------- + # DEX benchmarks + # ----------------------------------------------------- + + run-benchmarks-1-h100-dex: + name: benchmark_gpu_weekly/run-benchmarks-1-h100-dex + if: github.repository == 'zama-ai/tfhe-rs' && + needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true' + needs: prepare-inputs + uses: ./.github/workflows/benchmark_gpu_common.yml + with: + profile: single-h100 + hardware_name: n3-H100x1 + command: hlapi_dex + bench_type: both + secrets: + BOT_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }} + JOB_SECRET: ${{ secrets.JOB_SECRET }} + SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }} + SLAB_URL: ${{ secrets.SLAB_URL }} + SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }} + + run-benchmarks-2-h100-dex: + name: benchmark_gpu_weekly/run-benchmarks-2-h100-dex + if: github.repository == 'zama-ai/tfhe-rs' && + needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true' + needs: prepare-inputs + uses: ./.github/workflows/benchmark_gpu_common.yml + with: + profile: 2-h100 + hardware_name: n3-H100x2 + command: hlapi_dex + bench_type: both + secrets: + BOT_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }} + JOB_SECRET: ${{ secrets.JOB_SECRET }} + SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }} + SLAB_URL: ${{ secrets.SLAB_URL }} + SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }} + + run-benchmarks-8-h100-dex: + name: benchmark_gpu_weekly/run-benchmarks-8-h100-dex + if: github.repository == 'zama-ai/tfhe-rs' && + needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true' + needs: prepare-inputs + uses: ./.github/workflows/benchmark_gpu_common.yml + with: + profile: multi-h100 + hardware_name: n3-H100-SXM5x8 + command: hlapi_dex + bench_type: both + secrets: + BOT_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }} + JOB_SECRET: ${{ secrets.JOB_SECRET }} + SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }} + SLAB_URL: ${{ secrets.SLAB_URL }} + SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}