mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-09 14:47:56 -05:00
The term "bpr" means Branch Protection Rule. It helps one to identify any job that must pass before being able to merge to the base branch.
215 lines
8.0 KiB
YAML
215 lines
8.0 KiB
YAML
# Run DEX benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
|
|
name: benchmark_gpu_dex_common
|
|
|
|
on:
|
|
workflow_call:
|
|
inputs:
|
|
backend:
|
|
type: string
|
|
default: hyperstack
|
|
profile:
|
|
type: string
|
|
required: true
|
|
hardware_name:
|
|
type: string
|
|
required: true
|
|
secrets:
|
|
REPO_CHECKOUT_TOKEN:
|
|
required: true
|
|
SLAB_ACTION_TOKEN:
|
|
required: true
|
|
SLAB_BASE_URL:
|
|
required: true
|
|
SLAB_URL:
|
|
required: true
|
|
JOB_SECRET:
|
|
required: true
|
|
SLACK_CHANNEL:
|
|
required: true
|
|
BOT_USERNAME:
|
|
required: true
|
|
SLACK_WEBHOOK:
|
|
required: true
|
|
|
|
env:
|
|
CARGO_TERM_COLOR: always
|
|
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
|
PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
|
|
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
|
RUST_BACKTRACE: "full"
|
|
RUST_MIN_STACK: "8388608"
|
|
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
|
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
|
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
|
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
|
|
|
permissions: {}
|
|
|
|
jobs:
|
|
setup-instance:
|
|
name: benchmark_gpu_dex_common/setup-instance
|
|
runs-on: ubuntu-latest
|
|
if: github.event_name == 'workflow_dispatch' ||
|
|
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
|
outputs:
|
|
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
|
|
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
|
|
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
|
|
# otherwise we'll try to run the next job on a non-existing on-demand instance.
|
|
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }}
|
|
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
|
|
steps:
|
|
- name: Start remote instance
|
|
id: start-remote-instance
|
|
continue-on-error: true
|
|
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
|
with:
|
|
mode: start
|
|
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
|
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
|
job-secret: ${{ secrets.JOB_SECRET }}
|
|
backend: ${{ inputs.backend }}
|
|
profile: ${{ inputs.profile }}
|
|
|
|
- name: Acknowledge remote instance failure
|
|
if: steps.start-remote-instance.outcome == 'failure' &&
|
|
inputs.profile != 'single-h100'
|
|
run: |
|
|
echo "Remote instance instance has failed to start (profile provided: '${INPUTS_PROFILE}')"
|
|
echo "Permanent instance instance cannot be used as a substitute (profile needed: 'single-h100')"
|
|
exit 1
|
|
env:
|
|
INPUTS_PROFILE: ${{ inputs.profile }}
|
|
|
|
# This will allow to fallback on permanent instances running on Hyperstack.
|
|
- name: Use permanent remote instance
|
|
id: use-permanent-instance
|
|
if: env.SECRETS_AVAILABLE == 'true' &&
|
|
steps.start-remote-instance.outcome == 'failure' &&
|
|
inputs.profile == 'single-h100'
|
|
run: |
|
|
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
|
|
|
|
cuda-dex-benchmarks:
|
|
name: benchmark_gpu_dex_common/cuda-dex-benchmarks
|
|
needs: setup-instance
|
|
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
|
strategy:
|
|
fail-fast: false
|
|
# explicit include-based build matrix, of known valid options
|
|
matrix:
|
|
include:
|
|
- os: ubuntu-22.04
|
|
cuda: "12.8"
|
|
gcc: 11
|
|
steps:
|
|
- name: Checkout tfhe-rs repo with tags
|
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
|
|
with:
|
|
fetch-depth: 0
|
|
persist-credentials: 'false'
|
|
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
|
|
|
- name: Setup Hyperstack dependencies
|
|
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
|
|
uses: ./.github/actions/gpu_setup
|
|
with:
|
|
cuda-version: ${{ matrix.cuda }}
|
|
gcc-version: ${{ matrix.gcc }}
|
|
|
|
- name: Get benchmark details
|
|
run: |
|
|
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
|
{
|
|
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
|
echo "COMMIT_DATE=${COMMIT_DATE}";
|
|
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
|
} >> "${GITHUB_ENV}"
|
|
env:
|
|
SHA: ${{ github.sha }}
|
|
|
|
- name: Install rust
|
|
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
|
with:
|
|
toolchain: nightly
|
|
|
|
- name: Run benchmarks
|
|
run: |
|
|
make bench_hlapi_dex_gpu
|
|
|
|
- name: Parse results
|
|
run: |
|
|
python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \
|
|
--database tfhe_rs \
|
|
--hardware "${INPUTS_HARDWARE_NAME}" \
|
|
--backend gpu \
|
|
--project-version "${COMMIT_HASH}" \
|
|
--branch "${REF_NAME}" \
|
|
--commit-date "${COMMIT_DATE}" \
|
|
--bench-date "${BENCH_DATE}" \
|
|
--walk-subdirs \
|
|
--name-suffix avx512
|
|
env:
|
|
INPUTS_HARDWARE_NAME: ${{ inputs.hardware_name }}
|
|
REF_NAME: ${{ github.ref_name }}
|
|
|
|
- name: Upload parsed results artifact
|
|
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
|
with:
|
|
name: ${{ github.sha }}_dex_${{ inputs.profile }}
|
|
path: ${{ env.RESULTS_FILENAME }}
|
|
|
|
- name: Checkout Slab repo
|
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
|
|
with:
|
|
repository: zama-ai/slab
|
|
path: slab
|
|
persist-credentials: 'false'
|
|
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
|
|
|
- name: Send data to Slab
|
|
shell: bash
|
|
run: |
|
|
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
|
--slab-url "${SLAB_URL}"
|
|
env:
|
|
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
|
SLAB_URL: ${{ secrets.SLAB_URL }}
|
|
|
|
slack-notify:
|
|
name: benchmark_gpu_dex_common/slack-notify
|
|
needs: [ setup-instance, cuda-dex-benchmarks ]
|
|
runs-on: ubuntu-latest
|
|
if: ${{ always() && needs.cuda-dex-benchmarks.result != 'skipped' && failure() }}
|
|
continue-on-error: true
|
|
steps:
|
|
- name: Send message
|
|
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
|
env:
|
|
SLACK_COLOR: ${{ needs.cuda-dex-benchmarks.result }}
|
|
SLACK_MESSAGE: "Cuda DEX benchmarks (${{ inputs.profile }}) finished with status: ${{ needs.cuda-dex-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
|
|
|
teardown-instance:
|
|
name: benchmark_gpu_dex_common/teardown-instance
|
|
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
|
needs: [ setup-instance, cuda-dex-benchmarks, slack-notify ]
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Stop instance
|
|
id: stop-instance
|
|
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
|
with:
|
|
mode: stop
|
|
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
|
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
|
job-secret: ${{ secrets.JOB_SECRET }}
|
|
label: ${{ needs.setup-instance.outputs.runner-name }}
|
|
|
|
- name: Slack Notification
|
|
if: ${{ failure() }}
|
|
continue-on-error: true
|
|
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
|
env:
|
|
SLACK_COLOR: ${{ job.status }}
|
|
SLACK_MESSAGE: "Instance teardown (cuda-dex-${{ inputs.profile }}-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|