mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-08 22:28:01 -05:00
chore(gpu): add coprocessor benchmarks in tfhe-rs gpu ci
This commit is contained in:
286
.github/workflows/coprocessor-benchmark-gpu.yml
vendored
Normal file
286
.github/workflows/coprocessor-benchmark-gpu.yml
vendored
Normal file
@@ -0,0 +1,286 @@
|
||||
# Run all fhevm coprocessor benchmarks on a GPU instance on Hyperstack and return parsed results to Slab CI bot.
|
||||
name: coprocessor-benchmark-gpu
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Nightly tests @ 1AM after each work day
|
||||
- cron: "0 1 * * MON-FRI"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
PROFILE: "multi-h100-sxm5 (n3-H100x8-SXM5)"
|
||||
BENCHMARK_TYPE: "ALL"
|
||||
OPTIMIZATION_TARGET: "throughput"
|
||||
BATCH_SIZE: "5000"
|
||||
SCHEDULING_POLICY: "MAX_PARALLELISM"
|
||||
BENCHMARKS: "erc20"
|
||||
BRANCH_NAME: ${{ github.ref_name }}
|
||||
COMMIT_SHA: ${{ github.sha }}
|
||||
SLAB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
|
||||
jobs:
|
||||
parse-inputs:
|
||||
name: coprocessor-benchmark-gpu/parse-inputs
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: 'read'
|
||||
outputs:
|
||||
profile: ${{ steps.parse_profile.outputs.profile }}
|
||||
hardware_name: ${{ steps.parse_hardware_name.outputs.name }}
|
||||
steps:
|
||||
- name: Parse profile
|
||||
id: parse_profile
|
||||
run: |
|
||||
# shellcheck disable=SC2001
|
||||
PROFILE_VAL=$(echo "${PROFILE}" | sed 's|\(.*\)[[:space:]](.*)|\1|')
|
||||
echo "profile=$PROFILE_VAL" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Parse hardware name
|
||||
id: parse_hardware_name
|
||||
run: |
|
||||
# shellcheck disable=SC2001
|
||||
PROFILE_VAL=$(echo "${PROFILE}" | sed 's|.*[[:space:]](\(.*\))|\1|')
|
||||
echo "name=$PROFILE_VAL" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
setup-instance:
|
||||
name: coprocessor-benchmark-gpu/setup-instance
|
||||
needs: parse-inputs
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: 'read'
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: ${{ needs.parse-inputs.outputs.profile }}
|
||||
|
||||
benchmark:
|
||||
name: coprocessor-benchmark-gpu/benchmark-gpu (bpr)
|
||||
needs: [ parse-inputs, setup-instance ]
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
continue-on-error: true
|
||||
timeout-minutes: 720 # 12 hours
|
||||
permissions:
|
||||
contents: 'read'
|
||||
packages: 'read'
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.8"
|
||||
gcc: 11
|
||||
env:
|
||||
HW_NAME: "${{ needs.parse-inputs.outputs.hardware_name }}"
|
||||
|
||||
steps:
|
||||
- name: Install git LFS
|
||||
run: |
|
||||
sudo apt-get remove -y unattended-upgrades
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y git-lfs protobuf-compiler
|
||||
git lfs install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
path: tfhe-rs
|
||||
persist-credentials: false
|
||||
|
||||
- name: Check fhEVM and TFHE-rs repos
|
||||
run: |
|
||||
pwd
|
||||
ls
|
||||
|
||||
- name: Checkout fhevm
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
|
||||
with:
|
||||
repository: zama-ai/fhevm
|
||||
persist-credentials: 'false'
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
ref: antoniu/use-tfhe-main-benches
|
||||
path: fhevm
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE_ENV=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${COMMIT_SHA}")
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$COMMIT_DATE_ENV";
|
||||
echo "COMMIT_HASH=$(git rev-parse HEAD)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
working-directory: tfhe-rs/
|
||||
|
||||
- name: Check fhEVM and TFHE-rs repos
|
||||
run: |
|
||||
pwd
|
||||
ls
|
||||
mv tfhe-rs fhevm/coprocessor/
|
||||
|
||||
- name: Checkout LFS objects
|
||||
run: git lfs checkout
|
||||
working-directory: fhevm/
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./fhevm/.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Install cargo dependencies
|
||||
run: |
|
||||
sudo apt-get install -y protobuf-compiler cmake pkg-config libssl-dev \
|
||||
libclang-dev docker-compose-v2 docker.io acl
|
||||
sudo usermod -aG docker "$USER"
|
||||
newgrp docker
|
||||
sudo setfacl --modify user:"$USER":rw /var/run/docker.sock
|
||||
cargo install sqlx-cli
|
||||
|
||||
- name: Install foundry
|
||||
uses: foundry-rs/foundry-toolchain@de808b1eea699e761c404bda44ba8f21aba30b2c
|
||||
|
||||
- name: Cache cargo
|
||||
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
~/.cargo/git
|
||||
target
|
||||
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
|
||||
restore-keys: ${{ runner.os }}-cargo-
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Init database
|
||||
run: make init_db
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
|
||||
|
||||
- name: Use Node.js
|
||||
uses: actions/setup-node@60edb5dd545a775178f52524783378180af0d1f8 # v4.0.2
|
||||
with:
|
||||
node-version: 20.x
|
||||
|
||||
- name: Build contracts
|
||||
env:
|
||||
HARDHAT_NETWORK: hardhat
|
||||
run: |
|
||||
ls
|
||||
pwd
|
||||
cp ./host-contracts/.env.example ./host-contracts/.env
|
||||
npm --prefix ./host-contracts ci --include=optional
|
||||
cd host-contracts && npm install && npm run deploy:emptyProxies && npx hardhat compile
|
||||
working-directory: fhevm/
|
||||
|
||||
- name: Profile erc20 no-cmux benchmark on GPU
|
||||
run: |
|
||||
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="LATENCY" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "profile_erc20_gpu"
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
|
||||
|
||||
- name: Get nsys profile name
|
||||
id: nsys_profile_name
|
||||
run: echo "::set-output name=profile::coprocessor_profile_$(date +"%Y-%m-%d-%Hh").nsys-rep"
|
||||
|
||||
- name: Timestamp nsys profile
|
||||
run: |
|
||||
mv report1.nsys-rep ${{ steps.nsys_profile_name.outputs.profile }}
|
||||
|
||||
- name: Upload profile artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ steps.nsys_profile_name.outputs.profile }}
|
||||
path: fhevm/coprocessor/fhevm-engine/coprocessor/${{ steps.nsys_profile_name.outputs.profile }}
|
||||
|
||||
- name: Run latency benchmark on GPU
|
||||
run: |
|
||||
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="LATENCY" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "benchmark_${BENCHMARKS}_gpu"
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
|
||||
|
||||
- name: Run throughput benchmarks on GPU
|
||||
run: |
|
||||
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="THROUGHPUT_200" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "benchmark_${BENCHMARKS}_gpu"
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py coprocessor/fhevm-engine/target/criterion "${RESULTS_FILENAME}" \
|
||||
--database coprocessor \
|
||||
--hardware "${HW_NAME}" \
|
||||
--backend gpu \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
--branch "${BRANCH_NAME}" \
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--bench-date "${BENCH_DATE}" \
|
||||
--walk-subdirs \
|
||||
--crate "coprocessor/fhevm-engine/coprocessor" \
|
||||
--name-suffix "operation_batch_size_${BATCH_SIZE}-schedule_${SCHEDULING_POLICY}-optimization_target_${OPTIMIZATION_TARGET}"
|
||||
working-directory: fhevm/
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${COMMIT_SHA}_${BENCHMARKS}_${{ needs.parse-inputs.outputs.profile }}
|
||||
path: fhevm/${RESULTS_FILENAME}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
env:
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py fhevm/"${RESULTS_FILENAME}" "${SLAB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
|
||||
teardown-instance:
|
||||
name: coprocessor-benchmark-gpu/teardown
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, benchmark ]
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: 'read'
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
Reference in New Issue
Block a user