Files
tfhe-rs/.github/workflows/benchmark_gpu_common.yml
David Testé 11df6c69ee chore(ci): fix workflow security warnings
Since Zizmor v1.9.0, new pedantic warnings are detected especially
regarding template-injection patterns.
2025-06-02 14:46:14 +02:00

375 lines
14 KiB
YAML

# Run benchmarks on CUDA instance and return parsed results to Slab CI bot.
name: Cuda benchmarks - common
on:
workflow_call:
inputs:
backend:
type: string
default: hyperstack
profile:
type: string
required: true
hardware_name:
type: string
required: true
command: # Use a comma separated values to generate an array
type: string
required: true
op_flavor: # Use a comma separated values to generate an array
type: string
default: default
bench_type:
type: string
default: latency
params_type:
type: string
default: multi_bit
all_precisions:
type: boolean
default: false
secrets:
REPO_CHECKOUT_TOKEN:
required: true
SLAB_ACTION_TOKEN:
required: true
SLAB_BASE_URL:
required: true
SLAB_URL:
required: true
JOB_SECRET:
required: true
SLACK_CHANNEL:
required: true
BOT_USERNAME:
required: true
SLACK_WEBHOOK:
required: true
env:
CARGO_TERM_COLOR: always
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
RUST_BACKTRACE: "full"
RUST_MIN_STACK: "8388608"
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
FAST_BENCH: TRUE
permissions: {}
jobs:
prepare-matrix:
name: Prepare operations matrix
runs-on: ubuntu-latest
outputs:
command: ${{ steps.set_command.outputs.command }}
op_flavor: ${{ steps.set_op_flavor.outputs.op_flavor }}
bench_type: ${{ steps.set_bench_type.outputs.bench_type }}
params_type: ${{ steps.set_params_type.outputs.params_type }}
env:
INPUTS_COMMAND: ${{ inputs.command }}
INPUTS_OP_FLAVOR: ${{ inputs.op_flavor }}
steps:
- name: Set single command
if: ${{ !contains(inputs.command, ',')}}
run: |
echo "COMMAND=[\"${INPUTS_COMMAND}\"]" >> "${GITHUB_ENV}"
- name: Set multiple commands
if: ${{ contains(inputs.command, ',')}}
run: |
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
# shellcheck disable=SC2001
PARSED_COMMAND=$(echo "${INPUTS_COMMAND}" | sed 's/[[:space:]]*,[[:space:]]*/\", \"/g')
echo "COMMAND=[\"${PARSED_COMMAND}\"]" >> "${GITHUB_ENV}"
- name: Set single operations flavor
if: ${{ !contains(inputs.op_flavor, ',')}}
run: |
echo "OP_FLAVOR=[\"${INPUTS_OP_FLAVOR}\"]" >> "${GITHUB_ENV}"
- name: Set multiple operations flavors
if: ${{ contains(inputs.op_flavor, ',')}}
run: |
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
# shellcheck disable=SC2001
PARSED_OP_FLAVOR=$(echo "${INPUTS_OP_FLAVOR}" | sed 's/[[:space:]]*,[[:space:]]*/", "/g')
echo "OP_FLAVOR=[\"${PARSED_OP_FLAVOR}\"]" >> "${GITHUB_ENV}"
- name: Set benchmark types
run: |
if [[ "${INPUTS_BENCH_TYPE}" == "both" ]]; then
echo "BENCH_TYPE=[\"latency\", \"throughput\"]" >> "${GITHUB_ENV}"
else
echo "BENCH_TYPE=[\"${INPUTS_BENCH_TYPE}\"]" >> "${GITHUB_ENV}"
fi
env:
INPUTS_BENCH_TYPE: ${{ inputs.bench_type }}
- name: Set parameters types
run: |
if [[ "${INPUTS_PARAMS_TYPE}" == "both" ]]; then
echo "PARAMS_TYPE=[\"classical\", \"multi_bit\"]" >> "${GITHUB_ENV}"
else
echo "PARAMS_TYPE=[\"${INPUTS_PARAMS_TYPE}\"]" >> "${GITHUB_ENV}"
fi
env:
INPUTS_PARAMS_TYPE: ${{ inputs.params_type }}
- name: Set command output
id: set_command
run: |
echo "command=${COMMAND_OUTPUT}" >> "${GITHUB_OUTPUT}"
env:
COMMAND_OUTPUT: ${{ toJSON(env.COMMAND) }}
- name: Set operation flavor output
id: set_op_flavor
run: |
echo "op_flavor=${OP_FLAVOR_OUTPUT}" >> "${GITHUB_OUTPUT}"
env:
OP_FLAVOR_OUTPUT: ${{ toJSON(env.OP_FLAVOR) }}
- name: Set benchmark types output
id: set_bench_type
run: |
echo "bench_type=${BENCH_TYPE_OUTPUT}" >> "${GITHUB_OUTPUT}"
env:
BENCH_TYPE_OUTPUT: ${{ toJSON(env.BENCH_TYPE) }}
- name: Set parameters types output
id: set_params_type
run: |
echo "params_type=${PARAMS_TYPE_OUTPUT}" >> "${GITHUB_OUTPUT}"
env:
PARAMS_TYPE_OUTPUT: ${{ toJSON(env.PARAMS_TYPE) }}
setup-instance:
name: Setup instance (cuda-${{ inputs.profile }}-benchmarks)
needs: prepare-matrix
runs-on: ubuntu-latest
outputs:
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
# otherwise we'll try to run the next job on a non-existing on-demand instance.
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }}
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
steps:
- name: Start remote instance
id: start-remote-instance
continue-on-error: true
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: ${{ inputs.backend }}
profile: ${{ inputs.profile }}
- name: Acknowledge remote instance failure
if: steps.start-remote-instance.outcome == 'failure' &&
inputs.profile != 'single-h100'
run: |
echo "Remote instance instance has failed to start (profile provided: '${INPUTS_PROFILE}')"
echo "Permanent instance instance cannot be used as a substitute (profile needed: 'single-h100')"
exit 1
env:
INPUTS_PROFILE: ${{ inputs.profile }}
# This will allow to fallback on permanent instances running on Hyperstack.
- name: Use permanent remote instance
id: use-permanent-instance
if: env.SECRETS_AVAILABLE == 'true' &&
steps.start-remote-instance.outcome == 'failure' &&
inputs.profile == 'single-h100'
run: |
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
# Install dependencies only once since cuda-benchmarks uses a matrix strategy, thus running multiple times.
install-dependencies:
name: Install dependencies
needs: [ setup-instance ]
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
matrix:
# explicit include-based build matrix, of known valid options
include:
- cuda: "12.2"
gcc: 11
steps:
- name: Checkout tfhe-rs repo
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Setup Hyperstack dependencies
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
uses: ./.github/actions/gpu_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
cuda-benchmarks:
name: Cuda benchmarks (${{ inputs.profile }})
needs: [ prepare-matrix, setup-instance, install-dependencies ]
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
timeout-minutes: 1440 # 24 hours
strategy:
fail-fast: false
max-parallel: 1
matrix:
command: ${{ fromJSON(needs.prepare-matrix.outputs.command) }}
op_flavor: ${{ fromJSON(needs.prepare-matrix.outputs.op_flavor) }}
bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }}
params_type: ${{ fromJSON(needs.prepare-matrix.outputs.params_type) }}
# explicit include-based build matrix, of known valid options
include:
- cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
steps:
- name: Checkout tfhe-rs repo with tags
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
# Re-export environment variables as dependencies setup perform this task in the previous job.
# Local env variables are cleaned at the end of each job.
- name: Export CUDA variables
shell: bash
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "PATH=$PATH:$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib64:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDA_MODULE_LOADER=EAGER" >> "${GITHUB_ENV}"
- name: Export gcc and g++ variables
shell: bash
run: |
{
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
echo "CXX=/usr/bin/g++-${GCC_VERSION}";
echo "CUDAHOSTCXX=/usr/bin/g++-${GCC_VERSION}";
} >> "${GITHUB_ENV}"
env:
GCC_VERSION: ${{ matrix.gcc }}
- name: Install rust
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
with:
toolchain: nightly
- name: Should run benchmarks with all precisions
if: inputs.all_precisions
run: |
echo "FAST_BENCH=FALSE" >> "${GITHUB_ENV}"
- name: Run benchmarks
run: |
make BENCH_OP_FLAVOR="${OP_FLAVOR}" BENCH_TYPE="${BENCH_TYPE}" BENCH_PARAM_TYPE="${BENCH_PARAMS_TYPE}" bench_"${BENCH_COMMAND}"_gpu
env:
OP_FLAVOR: ${{ matrix.op_flavor }}
BENCH_TYPE: ${{ matrix.bench_type }}
BENCH_PARAMS_TYPE: ${{ matrix.params_type }}
BENCH_COMMAND: ${{ matrix.command }}
- name: Parse results
run: |
python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \
--database tfhe_rs \
--hardware "${INPUTS_HARDWARE_NAME}" \
--backend gpu \
--project-version "${COMMIT_HASH}" \
--branch "${REF_NAME}" \
--commit-date "${COMMIT_DATE}" \
--bench-date "${BENCH_DATE}" \
--walk-subdirs \
--name-suffix avx512 \
--bench-type "${BENCH_TYPE}"
env:
INPUTS_HARDWARE_NAME: ${{ inputs.hardware_name }}
REF_NAME: ${{ github.ref_name }}
BENCH_TYPE: ${{ matrix.bench_type }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
with:
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}_${{ matrix.bench_type }}_${{ matrix.params_type }}
path: ${{ env.RESULTS_FILENAME }}
- name: Checkout Slab repo
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}
slack-notify:
name: Slack Notification
needs: [ setup-instance, cuda-benchmarks ]
runs-on: ubuntu-latest
if: ${{ always() && needs.cuda-benchmarks.result != 'skipped' && failure() }}
continue-on-error: true
steps:
- name: Send message
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ needs.cuda-benchmarks.result }}
SLACK_MESSAGE: "Cuda benchmarks (${{ inputs.profile }}) finished with status: ${{ needs.cuda-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (cuda-${{ inputs.profile }}-benchmarks)
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
needs: [ setup-instance, cuda-benchmarks, slack-notify ]
runs-on: ubuntu-latest
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-${{ inputs.profile }}-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"