Files
tfhe-rs/.github/workflows/coprocessor-benchmark-gpu.yml
Andrei Stoian 6adc6a2393 fix: cron
2025-08-25 10:16:43 +02:00

287 lines
10 KiB
YAML

# Run all fhevm coprocessor benchmarks on a GPU instance on Hyperstack and return parsed results to Slab CI bot.
name: coprocessor-benchmark-gpu
on:
workflow_dispatch:
schedule:
# Nightly tests @ 1AM after each work day
- cron: "0 2 * * MON-FRI"
permissions:
contents: read
env:
CARGO_TERM_COLOR: always
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
RUST_BACKTRACE: "full"
RUST_MIN_STACK: "8388608"
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
PROFILE: "multi-h100-sxm5 (n3-H100x8-SXM5)"
BENCHMARK_TYPE: "ALL"
OPTIMIZATION_TARGET: "throughput"
BATCH_SIZE: "5000"
SCHEDULING_POLICY: "MAX_PARALLELISM"
BENCHMARKS: "erc20"
BRANCH_NAME: ${{ github.ref_name }}
COMMIT_SHA: ${{ github.sha }}
SLAB_SECRET: ${{ secrets.JOB_SECRET }}
jobs:
parse-inputs:
name: coprocessor-benchmark-gpu/parse-inputs
runs-on: ubuntu-latest
permissions:
contents: 'read'
outputs:
profile: ${{ steps.parse_profile.outputs.profile }}
hardware_name: ${{ steps.parse_hardware_name.outputs.name }}
steps:
- name: Parse profile
id: parse_profile
run: |
# shellcheck disable=SC2001
PROFILE_VAL=$(echo "${PROFILE}" | sed 's|\(.*\)[[:space:]](.*)|\1|')
echo "profile=$PROFILE_VAL" >> "${GITHUB_OUTPUT}"
- name: Parse hardware name
id: parse_hardware_name
run: |
# shellcheck disable=SC2001
PROFILE_VAL=$(echo "${PROFILE}" | sed 's|.*[[:space:]](\(.*\))|\1|')
echo "name=$PROFILE_VAL" >> "${GITHUB_OUTPUT}"
setup-instance:
name: coprocessor-benchmark-gpu/setup-instance
needs: parse-inputs
runs-on: ubuntu-latest
permissions:
contents: 'read'
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label }}
steps:
- name: Start remote instance
id: start-remote-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: hyperstack
profile: ${{ needs.parse-inputs.outputs.profile }}
benchmark:
name: coprocessor-benchmark-gpu/benchmark-gpu (bpr)
needs: [ parse-inputs, setup-instance ]
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
continue-on-error: true
timeout-minutes: 720 # 12 hours
permissions:
contents: 'read'
packages: 'read'
strategy:
fail-fast: false
# explicit include-based build matrix, of known valid options
matrix:
include:
- os: ubuntu-22.04
cuda: "12.8"
gcc: 11
env:
HW_NAME: "${{ needs.parse-inputs.outputs.hardware_name }}"
steps:
- name: Install git LFS
run: |
sudo apt-get remove -y unattended-upgrades
sudo apt-get update
sudo apt-get install -y git-lfs protobuf-compiler
git lfs install
- name: Checkout tfhe-rs
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
path: tfhe-rs
persist-credentials: false
- name: Check fhEVM and TFHE-rs repos
run: |
pwd
ls
- name: Checkout fhevm
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
repository: zama-ai/fhevm
persist-credentials: 'false'
fetch-depth: 0
lfs: true
ref: antoniu/use-tfhe-main-benches
path: fhevm
- name: Get benchmark details
run: |
COMMIT_DATE_ENV=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${COMMIT_SHA}")
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$COMMIT_DATE_ENV";
echo "COMMIT_HASH=$(git rev-parse HEAD)";
} >> "${GITHUB_ENV}"
working-directory: tfhe-rs/
- name: Check fhEVM and TFHE-rs repos
run: |
pwd
ls
mv tfhe-rs fhevm/coprocessor/
- name: Checkout LFS objects
run: git lfs checkout
working-directory: fhevm/
- name: Setup Hyperstack dependencies
uses: ./fhevm/.github/actions/gpu_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
- name: Install rust
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: nightly
- name: Install cargo dependencies
run: |
sudo apt-get install -y protobuf-compiler cmake pkg-config libssl-dev \
libclang-dev docker-compose-v2 docker.io acl
sudo usermod -aG docker "$USER"
newgrp docker
sudo setfacl --modify user:"$USER":rw /var/run/docker.sock
cargo install sqlx-cli
- name: Install foundry
uses: foundry-rs/foundry-toolchain@de808b1eea699e761c404bda44ba8f21aba30b2c
- name: Cache cargo
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
restore-keys: ${{ runner.os }}-cargo-
- name: Login to GitHub Container Registry
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Init database
run: make init_db
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
- name: Use Node.js
uses: actions/setup-node@60edb5dd545a775178f52524783378180af0d1f8 # v4.0.2
with:
node-version: 20.x
- name: Build contracts
env:
HARDHAT_NETWORK: hardhat
run: |
ls
pwd
cp ./host-contracts/.env.example ./host-contracts/.env
npm --prefix ./host-contracts ci --include=optional
cd host-contracts && npm install && npm run deploy:emptyProxies && npx hardhat compile
working-directory: fhevm/
- name: Profile erc20 no-cmux benchmark on GPU
run: |
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="LATENCY" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "profile_erc20_gpu"
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
- name: Get nsys profile name
id: nsys_profile_name
run: echo "::set-output name=profile::coprocessor_profile_$(date +"%Y-%m-%d-%Hh").nsys-rep"
- name: Timestamp nsys profile
run: |
mv report1.nsys-rep ${{ steps.nsys_profile_name.outputs.profile }}
- name: Upload profile artifact
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
with:
name: ${{ steps.nsys_profile_name.outputs.profile }}
path: fhevm/coprocessor/fhevm-engine/coprocessor/${{ steps.nsys_profile_name.outputs.profile }}
- name: Run latency benchmark on GPU
run: |
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="LATENCY" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "benchmark_${BENCHMARKS}_gpu"
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
- name: Run throughput benchmarks on GPU
run: |
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="THROUGHPUT_200" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "benchmark_${BENCHMARKS}_gpu"
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
- name: Parse results
run: |
python3 ./ci/benchmark_parser.py coprocessor/fhevm-engine/target/criterion "${RESULTS_FILENAME}" \
--database coprocessor \
--hardware "${HW_NAME}" \
--backend gpu \
--project-version "${COMMIT_HASH}" \
--branch "${BRANCH_NAME}" \
--commit-date "${COMMIT_DATE}" \
--bench-date "${BENCH_DATE}" \
--walk-subdirs \
--crate "coprocessor/fhevm-engine/coprocessor" \
--name-suffix "operation_batch_size_${BATCH_SIZE}-schedule_${SCHEDULING_POLICY}-optimization_target_${OPTIMIZATION_TARGET}"
working-directory: fhevm/
- name: Upload parsed results artifact
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
with:
name: ${COMMIT_SHA}_${BENCHMARKS}_${{ needs.parse-inputs.outputs.profile }}
path: fhevm/${RESULTS_FILENAME}
- name: Checkout Slab repo
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Send data to Slab
shell: bash
env:
SLAB_URL: ${{ secrets.SLAB_URL }}
run: |
python3 slab/scripts/data_sender.py fhevm/"${RESULTS_FILENAME}" "${SLAB_SECRET}" \
--slab-url "${SLAB_URL}"
teardown-instance:
name: coprocessor-benchmark-gpu/teardown
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, benchmark ]
runs-on: ubuntu-latest
permissions:
contents: 'read'
steps:
- name: Stop remote instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}