mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-07 22:04:10 -05:00
This new workflow can trigger all the required benchmarks needed to populate benchmarks tables in documentation. It also can generate SVG tables and store them as artifacts. Optionally, it can open a pull-request to update the current tables in documentation.
328 lines
12 KiB
YAML
328 lines
12 KiB
YAML
# Run benchmarks on CUDA instance and return parsed results to Slab CI bot.
|
|
name: benchmark_gpu_common
|
|
|
|
on:
|
|
workflow_call:
|
|
inputs:
|
|
backend:
|
|
type: string
|
|
default: hyperstack
|
|
profile:
|
|
type: string
|
|
required: true
|
|
hardware_name:
|
|
type: string
|
|
required: true
|
|
command: # Use a comma separated values to generate an array
|
|
type: string
|
|
required: true
|
|
op_flavor: # Use a comma separated values to generate an array
|
|
type: string
|
|
default: default
|
|
bench_type:
|
|
type: string
|
|
default: latency
|
|
params_type:
|
|
type: string
|
|
default: multi_bit
|
|
precisions_set:
|
|
type: string
|
|
default: fast
|
|
secrets:
|
|
REPO_CHECKOUT_TOKEN:
|
|
required: true
|
|
SLAB_ACTION_TOKEN:
|
|
required: true
|
|
SLAB_BASE_URL:
|
|
required: true
|
|
SLAB_URL:
|
|
required: true
|
|
JOB_SECRET:
|
|
required: true
|
|
SLACK_CHANNEL:
|
|
required: true
|
|
BOT_USERNAME:
|
|
required: true
|
|
SLACK_WEBHOOK:
|
|
required: true
|
|
|
|
env:
|
|
CARGO_TERM_COLOR: always
|
|
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
|
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
|
RUST_BACKTRACE: "full"
|
|
RUST_MIN_STACK: "8388608"
|
|
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
|
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
|
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
|
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
|
|
|
permissions: {}
|
|
|
|
# zizmor: ignore[concurrency-limits] caller workflow is responsible for the concurrency
|
|
|
|
jobs:
|
|
prepare-matrix:
|
|
name: benchmark_gpu_common/prepare-matrix
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
command: ${{ steps.set_matrix_args.outputs.command }}
|
|
op_flavor: ${{ steps.set_matrix_args.outputs.op_flavor }}
|
|
bench_type: ${{ steps.set_matrix_args.outputs.bench_type }}
|
|
params_type: ${{ steps.set_matrix_args.outputs.params_type }}
|
|
steps:
|
|
- name: Parse user inputs
|
|
shell: python
|
|
run: | # zizmor: ignore[template-injection] these env variables are safe
|
|
split_command = "${{ inputs.command }}".replace(" ", "").split(",")
|
|
split_op_flavor = "${{ inputs.op_flavor }}".replace(" ", "").split(",")
|
|
|
|
if "${{ inputs.bench_type }}" == "both":
|
|
bench_type = ["latency", "throughput"]
|
|
else:
|
|
bench_type = ["${{ inputs.bench_type }}", ]
|
|
|
|
if "+" in "${{ inputs.params_type }}":
|
|
split_params_type= "${{ inputs.params_type }}".replace(" ", "").split("+")
|
|
else:
|
|
split_params_type = ["${{ inputs.params_type }}", ]
|
|
|
|
with open("${{ github.env }}", "a") as f:
|
|
for env_name, values_to_join in [
|
|
("COMMAND", split_command),
|
|
("OP_FLAVOR", split_op_flavor),
|
|
("BENCH_TYPE", bench_type),
|
|
("PARAMS_TYPE", split_params_type),
|
|
]:
|
|
f.write(f"""{env_name}=["{'", "'.join(values_to_join)}"]\n""")
|
|
|
|
- name: Set martix arguments outputs
|
|
id: set_matrix_args
|
|
run: | # zizmor: ignore[template-injection] these env variable are safe
|
|
{
|
|
echo "command=${{ toJSON(env.COMMAND) }}";
|
|
echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}";
|
|
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}";
|
|
echo "params_type=${{ toJSON(env.PARAMS_TYPE) }}";
|
|
} >> "${GITHUB_OUTPUT}"
|
|
|
|
setup-instance:
|
|
name: benchmark_gpu_common/setup-instance
|
|
needs: prepare-matrix
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
|
|
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
|
|
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
|
|
# otherwise we'll try to run the next job on a non-existing on-demand instance.
|
|
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }}
|
|
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
|
|
steps:
|
|
- name: Start remote instance
|
|
id: start-remote-instance
|
|
continue-on-error: true
|
|
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
|
with:
|
|
mode: start
|
|
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
|
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
|
job-secret: ${{ secrets.JOB_SECRET }}
|
|
backend: ${{ inputs.backend }}
|
|
profile: ${{ inputs.profile }}
|
|
|
|
- name: Acknowledge remote instance failure
|
|
if: steps.start-remote-instance.outcome == 'failure' &&
|
|
inputs.profile != 'single-h100'
|
|
run: |
|
|
echo "Remote instance instance has failed to start (profile provided: '${INPUTS_PROFILE}')"
|
|
echo "Permanent instance instance cannot be used as a substitute (profile needed: 'single-h100')"
|
|
exit 1
|
|
env:
|
|
INPUTS_PROFILE: ${{ inputs.profile }}
|
|
|
|
# This will allow to fallback on permanent instances running on Hyperstack.
|
|
- name: Use permanent remote instance
|
|
id: use-permanent-instance
|
|
if: env.SECRETS_AVAILABLE == 'true' &&
|
|
steps.start-remote-instance.outcome == 'failure' &&
|
|
inputs.profile == 'single-h100'
|
|
run: |
|
|
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
|
|
|
|
# Install dependencies only once since cuda-benchmarks uses a matrix strategy, thus running multiple times.
|
|
install-dependencies:
|
|
name: benchmark_gpu_common/install-dependencies
|
|
needs: [ setup-instance ]
|
|
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
|
strategy:
|
|
matrix:
|
|
# explicit include-based build matrix, of known valid options
|
|
include:
|
|
- cuda: "12.8"
|
|
gcc: 11
|
|
steps:
|
|
- name: Checkout tfhe-rs repo
|
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
|
|
with:
|
|
fetch-depth: 0
|
|
persist-credentials: 'false'
|
|
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
|
|
|
- name: Setup Hyperstack dependencies
|
|
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
|
|
uses: ./.github/actions/gpu_setup
|
|
with:
|
|
cuda-version: ${{ matrix.cuda }}
|
|
gcc-version: ${{ matrix.gcc }}
|
|
|
|
cuda-benchmarks:
|
|
name: benchmark_gpu_common/cuda-benchmarks
|
|
needs: [ prepare-matrix, setup-instance, install-dependencies ]
|
|
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
|
timeout-minutes: 1440 # 24 hours
|
|
strategy:
|
|
fail-fast: false
|
|
max-parallel: 1
|
|
matrix:
|
|
command: ${{ fromJSON(needs.prepare-matrix.outputs.command) }}
|
|
op_flavor: ${{ fromJSON(needs.prepare-matrix.outputs.op_flavor) }}
|
|
bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }}
|
|
params_type: ${{ fromJSON(needs.prepare-matrix.outputs.params_type) }}
|
|
# explicit include-based build matrix, of known valid options
|
|
include:
|
|
- cuda: "12.8"
|
|
gcc: 11
|
|
env:
|
|
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
|
steps:
|
|
- name: Checkout tfhe-rs repo with tags
|
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
|
|
with:
|
|
fetch-depth: 0
|
|
persist-credentials: 'false'
|
|
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
|
|
|
- name: Get benchmark details
|
|
run: |
|
|
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
|
{
|
|
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
|
echo "COMMIT_DATE=${COMMIT_DATE}";
|
|
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
|
} >> "${GITHUB_ENV}"
|
|
env:
|
|
SHA: ${{ github.sha }}
|
|
|
|
# Re-export environment variables as dependencies setup perform this task in the previous job.
|
|
# Local env variables are cleaned at the end of each job.
|
|
- name: Export CUDA variables
|
|
shell: bash
|
|
run: |
|
|
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
|
echo "PATH=$PATH:$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
|
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib64:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
|
echo "CUDA_MODULE_LOADER=EAGER" >> "${GITHUB_ENV}"
|
|
|
|
- name: Export gcc and g++ variables
|
|
shell: bash
|
|
run: |
|
|
{
|
|
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
|
|
echo "CXX=/usr/bin/g++-${GCC_VERSION}";
|
|
echo "CUDAHOSTCXX=/usr/bin/g++-${GCC_VERSION}";
|
|
} >> "${GITHUB_ENV}"
|
|
env:
|
|
GCC_VERSION: ${{ matrix.gcc }}
|
|
|
|
- name: Install rust
|
|
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
|
with:
|
|
toolchain: nightly
|
|
|
|
- name: Run benchmarks
|
|
run: |
|
|
make BIT_SIZES_SET="${PRECISIONS_SET}" BENCH_OP_FLAVOR="${OP_FLAVOR}" BENCH_TYPE="${BENCH_TYPE}" BENCH_PARAM_TYPE="${BENCH_PARAMS_TYPE}" bench_"${BENCH_COMMAND}"_gpu
|
|
env:
|
|
OP_FLAVOR: ${{ matrix.op_flavor }}
|
|
BENCH_TYPE: ${{ matrix.bench_type }}
|
|
BENCH_PARAMS_TYPE: ${{ matrix.params_type }}
|
|
BENCH_COMMAND: ${{ matrix.command }}
|
|
PRECISIONS_SET: ${{ inputs.precisions_set }}
|
|
|
|
- name: Parse results
|
|
run: |
|
|
python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \
|
|
--database tfhe_rs \
|
|
--hardware "${INPUTS_HARDWARE_NAME}" \
|
|
--backend gpu \
|
|
--project-version "${COMMIT_HASH}" \
|
|
--branch "${REF_NAME}" \
|
|
--commit-date "${COMMIT_DATE}" \
|
|
--bench-date "${BENCH_DATE}" \
|
|
--walk-subdirs \
|
|
--name-suffix avx512 \
|
|
--bench-type "${BENCH_TYPE}"
|
|
env:
|
|
INPUTS_HARDWARE_NAME: ${{ inputs.hardware_name }}
|
|
REF_NAME: ${{ github.ref_name }}
|
|
BENCH_TYPE: ${{ matrix.bench_type }}
|
|
|
|
- name: Upload parsed results artifact
|
|
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
|
|
with:
|
|
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}_${{ matrix.bench_type }}_${{ matrix.params_type }}
|
|
path: ${{ env.RESULTS_FILENAME }}
|
|
|
|
- name: Checkout Slab repo
|
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
|
|
with:
|
|
repository: zama-ai/slab
|
|
path: slab
|
|
persist-credentials: 'false'
|
|
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
|
|
|
- name: Send data to Slab
|
|
shell: bash
|
|
run: |
|
|
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
|
--slab-url "${SLAB_URL}"
|
|
env:
|
|
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
|
SLAB_URL: ${{ secrets.SLAB_URL }}
|
|
|
|
slack-notify:
|
|
name: benchmark_gpu_common/slack-notify
|
|
needs: [ setup-instance, cuda-benchmarks ]
|
|
runs-on: ubuntu-latest
|
|
if: ${{ always() && needs.cuda-benchmarks.result != 'skipped' && failure() }}
|
|
continue-on-error: true
|
|
steps:
|
|
- name: Send message
|
|
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
|
env:
|
|
SLACK_COLOR: ${{ needs.cuda-benchmarks.result }}
|
|
SLACK_MESSAGE: "Cuda benchmarks (${{ inputs.profile }}) finished with status: ${{ needs.cuda-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
|
|
|
teardown-instance:
|
|
name: benchmark_gpu_common/teardown-instance
|
|
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
|
needs: [ setup-instance, cuda-benchmarks, slack-notify ]
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Stop instance
|
|
id: stop-instance
|
|
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
|
with:
|
|
mode: stop
|
|
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
|
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
|
job-secret: ${{ secrets.JOB_SECRET }}
|
|
label: ${{ needs.setup-instance.outputs.runner-name }}
|
|
|
|
- name: Slack Notification
|
|
if: ${{ failure() }}
|
|
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
|
env:
|
|
SLACK_COLOR: ${{ job.status }}
|
|
SLACK_MESSAGE: "Instance teardown (cuda-${{ inputs.profile }}-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|