mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-09 14:47:56 -05:00
The term "bpr" means Branch Protection Rule. It helps one to identify any job that must pass before being able to merge to the base branch.
367 lines
14 KiB
YAML
367 lines
14 KiB
YAML
# Run benchmarks on CUDA instance and return parsed results to Slab CI bot.
|
|
name: benchmark_gpu_common
|
|
|
|
on:
|
|
workflow_call:
|
|
inputs:
|
|
backend:
|
|
type: string
|
|
default: hyperstack
|
|
profile:
|
|
type: string
|
|
required: true
|
|
hardware_name:
|
|
type: string
|
|
required: true
|
|
command: # Use a comma separated values to generate an array
|
|
type: string
|
|
required: true
|
|
op_flavor: # Use a comma separated values to generate an array
|
|
type: string
|
|
default: default
|
|
bench_type:
|
|
type: string
|
|
default: latency
|
|
params_type:
|
|
type: string
|
|
default: multi_bit
|
|
all_precisions:
|
|
type: boolean
|
|
default: false
|
|
secrets:
|
|
REPO_CHECKOUT_TOKEN:
|
|
required: true
|
|
SLAB_ACTION_TOKEN:
|
|
required: true
|
|
SLAB_BASE_URL:
|
|
required: true
|
|
SLAB_URL:
|
|
required: true
|
|
JOB_SECRET:
|
|
required: true
|
|
SLACK_CHANNEL:
|
|
required: true
|
|
BOT_USERNAME:
|
|
required: true
|
|
SLACK_WEBHOOK:
|
|
required: true
|
|
|
|
env:
|
|
CARGO_TERM_COLOR: always
|
|
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
|
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
|
RUST_BACKTRACE: "full"
|
|
RUST_MIN_STACK: "8388608"
|
|
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
|
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
|
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
|
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
|
FAST_BENCH: TRUE
|
|
|
|
|
|
permissions: {}
|
|
|
|
jobs:
|
|
prepare-matrix:
|
|
name: benchmark_gpu_common/prepare-matrix
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
command: ${{ steps.set_command.outputs.command }}
|
|
op_flavor: ${{ steps.set_op_flavor.outputs.op_flavor }}
|
|
bench_type: ${{ steps.set_bench_type.outputs.bench_type }}
|
|
params_type: ${{ steps.set_params_type.outputs.params_type }}
|
|
env:
|
|
INPUTS_COMMAND: ${{ inputs.command }}
|
|
INPUTS_OP_FLAVOR: ${{ inputs.op_flavor }}
|
|
steps:
|
|
- name: Set single command
|
|
if: ${{ !contains(inputs.command, ',')}}
|
|
run: |
|
|
echo "COMMAND=[\"${INPUTS_COMMAND}\"]" >> "${GITHUB_ENV}"
|
|
|
|
- name: Set multiple commands
|
|
if: ${{ contains(inputs.command, ',')}}
|
|
run: |
|
|
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
|
|
# shellcheck disable=SC2001
|
|
PARSED_COMMAND=$(echo "${INPUTS_COMMAND}" | sed 's/[[:space:]]*,[[:space:]]*/\", \"/g')
|
|
echo "COMMAND=[\"${PARSED_COMMAND}\"]" >> "${GITHUB_ENV}"
|
|
|
|
- name: Set single operations flavor
|
|
if: ${{ !contains(inputs.op_flavor, ',')}}
|
|
run: |
|
|
echo "OP_FLAVOR=[\"${INPUTS_OP_FLAVOR}\"]" >> "${GITHUB_ENV}"
|
|
|
|
- name: Set multiple operations flavors
|
|
if: ${{ contains(inputs.op_flavor, ',')}}
|
|
run: |
|
|
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
|
|
# shellcheck disable=SC2001
|
|
PARSED_OP_FLAVOR=$(echo "${INPUTS_OP_FLAVOR}" | sed 's/[[:space:]]*,[[:space:]]*/", "/g')
|
|
echo "OP_FLAVOR=[\"${PARSED_OP_FLAVOR}\"]" >> "${GITHUB_ENV}"
|
|
|
|
- name: Set benchmark types
|
|
run: |
|
|
if [[ "${INPUTS_BENCH_TYPE}" == "both" ]]; then
|
|
echo "BENCH_TYPE=[\"latency\", \"throughput\"]" >> "${GITHUB_ENV}"
|
|
else
|
|
echo "BENCH_TYPE=[\"${INPUTS_BENCH_TYPE}\"]" >> "${GITHUB_ENV}"
|
|
fi
|
|
env:
|
|
INPUTS_BENCH_TYPE: ${{ inputs.bench_type }}
|
|
|
|
- name: Set parameters types
|
|
run: |
|
|
if [[ "${INPUTS_PARAMS_TYPE}" == "both" ]]; then
|
|
echo "PARAMS_TYPE=[\"classical\", \"multi_bit\"]" >> "${GITHUB_ENV}"
|
|
else
|
|
echo "PARAMS_TYPE=[\"${INPUTS_PARAMS_TYPE}\"]" >> "${GITHUB_ENV}"
|
|
fi
|
|
env:
|
|
INPUTS_PARAMS_TYPE: ${{ inputs.params_type }}
|
|
|
|
- name: Set command output
|
|
id: set_command
|
|
run: | # zizmor: ignore[template-injection] this env variable is safe
|
|
echo "command=${{ toJSON(env.COMMAND) }}" >> "${GITHUB_OUTPUT}"
|
|
|
|
- name: Set operation flavor output
|
|
id: set_op_flavor
|
|
run: | # zizmor: ignore[template-injection] this env variable is safe
|
|
echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
|
|
|
|
- name: Set benchmark types output
|
|
id: set_bench_type
|
|
run: | # zizmor: ignore[template-injection] this env variable is safe
|
|
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
|
|
|
- name: Set parameters types output
|
|
id: set_params_type
|
|
run: | # zizmor: ignore[template-injection] this env variable is safe
|
|
echo "params_type=${{ toJSON(env.PARAMS_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
|
|
|
setup-instance:
|
|
name: benchmark_gpu_common/setup-instance
|
|
needs: prepare-matrix
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
|
|
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
|
|
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
|
|
# otherwise we'll try to run the next job on a non-existing on-demand instance.
|
|
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }}
|
|
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
|
|
steps:
|
|
- name: Start remote instance
|
|
id: start-remote-instance
|
|
continue-on-error: true
|
|
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
|
with:
|
|
mode: start
|
|
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
|
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
|
job-secret: ${{ secrets.JOB_SECRET }}
|
|
backend: ${{ inputs.backend }}
|
|
profile: ${{ inputs.profile }}
|
|
|
|
- name: Acknowledge remote instance failure
|
|
if: steps.start-remote-instance.outcome == 'failure' &&
|
|
inputs.profile != 'single-h100'
|
|
run: |
|
|
echo "Remote instance instance has failed to start (profile provided: '${INPUTS_PROFILE}')"
|
|
echo "Permanent instance instance cannot be used as a substitute (profile needed: 'single-h100')"
|
|
exit 1
|
|
env:
|
|
INPUTS_PROFILE: ${{ inputs.profile }}
|
|
|
|
# This will allow to fallback on permanent instances running on Hyperstack.
|
|
- name: Use permanent remote instance
|
|
id: use-permanent-instance
|
|
if: env.SECRETS_AVAILABLE == 'true' &&
|
|
steps.start-remote-instance.outcome == 'failure' &&
|
|
inputs.profile == 'single-h100'
|
|
run: |
|
|
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
|
|
|
|
# Install dependencies only once since cuda-benchmarks uses a matrix strategy, thus running multiple times.
|
|
install-dependencies:
|
|
name: benchmark_gpu_common/install-dependencies
|
|
needs: [ setup-instance ]
|
|
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
|
strategy:
|
|
matrix:
|
|
# explicit include-based build matrix, of known valid options
|
|
include:
|
|
- cuda: "12.8"
|
|
gcc: 11
|
|
steps:
|
|
- name: Checkout tfhe-rs repo
|
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
|
|
with:
|
|
fetch-depth: 0
|
|
persist-credentials: 'false'
|
|
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
|
|
|
- name: Setup Hyperstack dependencies
|
|
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
|
|
uses: ./.github/actions/gpu_setup
|
|
with:
|
|
cuda-version: ${{ matrix.cuda }}
|
|
gcc-version: ${{ matrix.gcc }}
|
|
|
|
cuda-benchmarks:
|
|
name: benchmark_gpu_common/cuda-benchmarks
|
|
needs: [ prepare-matrix, setup-instance, install-dependencies ]
|
|
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
|
timeout-minutes: 1440 # 24 hours
|
|
strategy:
|
|
fail-fast: false
|
|
max-parallel: 1
|
|
matrix:
|
|
command: ${{ fromJSON(needs.prepare-matrix.outputs.command) }}
|
|
op_flavor: ${{ fromJSON(needs.prepare-matrix.outputs.op_flavor) }}
|
|
bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }}
|
|
params_type: ${{ fromJSON(needs.prepare-matrix.outputs.params_type) }}
|
|
# explicit include-based build matrix, of known valid options
|
|
include:
|
|
- cuda: "12.8"
|
|
gcc: 11
|
|
env:
|
|
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
|
steps:
|
|
- name: Checkout tfhe-rs repo with tags
|
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
|
|
with:
|
|
fetch-depth: 0
|
|
persist-credentials: 'false'
|
|
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
|
|
|
- name: Get benchmark details
|
|
run: |
|
|
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
|
{
|
|
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
|
echo "COMMIT_DATE=${COMMIT_DATE}";
|
|
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
|
} >> "${GITHUB_ENV}"
|
|
env:
|
|
SHA: ${{ github.sha }}
|
|
|
|
# Re-export environment variables as dependencies setup perform this task in the previous job.
|
|
# Local env variables are cleaned at the end of each job.
|
|
- name: Export CUDA variables
|
|
shell: bash
|
|
run: |
|
|
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
|
echo "PATH=$PATH:$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
|
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib64:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
|
echo "CUDA_MODULE_LOADER=EAGER" >> "${GITHUB_ENV}"
|
|
|
|
- name: Export gcc and g++ variables
|
|
shell: bash
|
|
run: |
|
|
{
|
|
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
|
|
echo "CXX=/usr/bin/g++-${GCC_VERSION}";
|
|
echo "CUDAHOSTCXX=/usr/bin/g++-${GCC_VERSION}";
|
|
} >> "${GITHUB_ENV}"
|
|
env:
|
|
GCC_VERSION: ${{ matrix.gcc }}
|
|
|
|
- name: Install rust
|
|
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
|
with:
|
|
toolchain: nightly
|
|
|
|
- name: Should run benchmarks with all precisions
|
|
if: inputs.all_precisions
|
|
run: |
|
|
echo "FAST_BENCH=FALSE" >> "${GITHUB_ENV}"
|
|
|
|
- name: Run benchmarks
|
|
run: |
|
|
make BENCH_OP_FLAVOR="${OP_FLAVOR}" BENCH_TYPE="${BENCH_TYPE}" BENCH_PARAM_TYPE="${BENCH_PARAMS_TYPE}" bench_"${BENCH_COMMAND}"_gpu
|
|
env:
|
|
OP_FLAVOR: ${{ matrix.op_flavor }}
|
|
BENCH_TYPE: ${{ matrix.bench_type }}
|
|
BENCH_PARAMS_TYPE: ${{ matrix.params_type }}
|
|
BENCH_COMMAND: ${{ matrix.command }}
|
|
|
|
- name: Parse results
|
|
run: |
|
|
python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \
|
|
--database tfhe_rs \
|
|
--hardware "${INPUTS_HARDWARE_NAME}" \
|
|
--backend gpu \
|
|
--project-version "${COMMIT_HASH}" \
|
|
--branch "${REF_NAME}" \
|
|
--commit-date "${COMMIT_DATE}" \
|
|
--bench-date "${BENCH_DATE}" \
|
|
--walk-subdirs \
|
|
--name-suffix avx512 \
|
|
--bench-type "${BENCH_TYPE}"
|
|
env:
|
|
INPUTS_HARDWARE_NAME: ${{ inputs.hardware_name }}
|
|
REF_NAME: ${{ github.ref_name }}
|
|
BENCH_TYPE: ${{ matrix.bench_type }}
|
|
|
|
- name: Upload parsed results artifact
|
|
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
|
with:
|
|
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}_${{ matrix.bench_type }}_${{ matrix.params_type }}
|
|
path: ${{ env.RESULTS_FILENAME }}
|
|
|
|
- name: Checkout Slab repo
|
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
|
|
with:
|
|
repository: zama-ai/slab
|
|
path: slab
|
|
persist-credentials: 'false'
|
|
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
|
|
|
- name: Send data to Slab
|
|
shell: bash
|
|
run: |
|
|
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
|
--slab-url "${SLAB_URL}"
|
|
env:
|
|
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
|
SLAB_URL: ${{ secrets.SLAB_URL }}
|
|
|
|
slack-notify:
|
|
name: benchmark_gpu_common/slack-notify
|
|
needs: [ setup-instance, cuda-benchmarks ]
|
|
runs-on: ubuntu-latest
|
|
if: ${{ always() && needs.cuda-benchmarks.result != 'skipped' && failure() }}
|
|
continue-on-error: true
|
|
steps:
|
|
- name: Send message
|
|
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
|
env:
|
|
SLACK_COLOR: ${{ needs.cuda-benchmarks.result }}
|
|
SLACK_MESSAGE: "Cuda benchmarks (${{ inputs.profile }}) finished with status: ${{ needs.cuda-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
|
|
|
teardown-instance:
|
|
name: benchmark_gpu_common/teardown-instance
|
|
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
|
needs: [ setup-instance, cuda-benchmarks, slack-notify ]
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Stop instance
|
|
id: stop-instance
|
|
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
|
with:
|
|
mode: stop
|
|
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
|
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
|
job-secret: ${{ secrets.JOB_SECRET }}
|
|
label: ${{ needs.setup-instance.outputs.runner-name }}
|
|
|
|
- name: Slack Notification
|
|
if: ${{ failure() }}
|
|
continue-on-error: true
|
|
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
|
env:
|
|
SLACK_COLOR: ${{ job.status }}
|
|
SLACK_MESSAGE: "Instance teardown (cuda-${{ inputs.profile }}-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|