From abe7d43bbc107ed64de8090b6d560bd3054cd06d Mon Sep 17 00:00:00 2001 From: Andrei Stoian Date: Tue, 1 Jul 2025 17:45:22 +0200 Subject: [PATCH] chore(gpu): add coprocessor benchmarks in tfhe-rs gpu ci --- .../workflows/coprocessor-benchmark-gpu.yml | 286 ++++++++++++++++++ 1 file changed, 286 insertions(+) create mode 100644 .github/workflows/coprocessor-benchmark-gpu.yml diff --git a/.github/workflows/coprocessor-benchmark-gpu.yml b/.github/workflows/coprocessor-benchmark-gpu.yml new file mode 100644 index 000000000..15da3e9db --- /dev/null +++ b/.github/workflows/coprocessor-benchmark-gpu.yml @@ -0,0 +1,286 @@ +# Run all fhevm coprocessor benchmarks on a GPU instance on Hyperstack and return parsed results to Slab CI bot. +name: coprocessor-benchmark-gpu + +on: + workflow_dispatch: + schedule: + # Nightly tests @ 1AM after each work day + - cron: "0 1 * * MON-FRI" + +permissions: + contents: read + +env: + CARGO_TERM_COLOR: always + RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json + ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + RUST_BACKTRACE: "full" + RUST_MIN_STACK: "8388608" + CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }} + PROFILE: "multi-h100-sxm5 (n3-H100x8-SXM5)" + BENCHMARK_TYPE: "ALL" + OPTIMIZATION_TARGET: "throughput" + BATCH_SIZE: "5000" + SCHEDULING_POLICY: "MAX_PARALLELISM" + BENCHMARKS: "erc20" + BRANCH_NAME: ${{ github.ref_name }} + COMMIT_SHA: ${{ github.sha }} + SLAB_SECRET: ${{ secrets.JOB_SECRET }} + +jobs: + parse-inputs: + name: coprocessor-benchmark-gpu/parse-inputs + runs-on: ubuntu-latest + permissions: + contents: 'read' + outputs: + profile: ${{ steps.parse_profile.outputs.profile }} + hardware_name: ${{ steps.parse_hardware_name.outputs.name }} + steps: + - name: Parse profile + id: parse_profile + run: | + # shellcheck disable=SC2001 + PROFILE_VAL=$(echo "${PROFILE}" | sed 's|\(.*\)[[:space:]](.*)|\1|') + echo "profile=$PROFILE_VAL" >> "${GITHUB_OUTPUT}" + + - name: Parse hardware name + id: parse_hardware_name + run: | + # shellcheck disable=SC2001 + PROFILE_VAL=$(echo "${PROFILE}" | sed 's|.*[[:space:]](\(.*\))|\1|') + echo "name=$PROFILE_VAL" >> "${GITHUB_OUTPUT}" + + setup-instance: + name: coprocessor-benchmark-gpu/setup-instance + needs: parse-inputs + runs-on: ubuntu-latest + permissions: + contents: 'read' + outputs: + runner-name: ${{ steps.start-remote-instance.outputs.label }} + steps: + - name: Start remote instance + id: start-remote-instance + uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac + with: + mode: start + github-token: ${{ secrets.SLAB_ACTION_TOKEN }} + slab-url: ${{ secrets.SLAB_BASE_URL }} + job-secret: ${{ secrets.JOB_SECRET }} + backend: hyperstack + profile: ${{ needs.parse-inputs.outputs.profile }} + + benchmark: + name: coprocessor-benchmark-gpu/benchmark-gpu (bpr) + needs: [ parse-inputs, setup-instance ] + runs-on: ${{ needs.setup-instance.outputs.runner-name }} + continue-on-error: true + timeout-minutes: 720 # 12 hours + permissions: + contents: 'read' + packages: 'read' + strategy: + fail-fast: false + # explicit include-based build matrix, of known valid options + matrix: + include: + - os: ubuntu-22.04 + cuda: "12.8" + gcc: 11 + env: + HW_NAME: "${{ needs.parse-inputs.outputs.hardware_name }}" + + steps: + - name: Install git LFS + run: | + sudo apt-get remove -y unattended-upgrades + sudo apt-get update + sudo apt-get install -y git-lfs protobuf-compiler + git lfs install + + - name: Checkout tfhe-rs + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + path: tfhe-rs + persist-credentials: false + + - name: Check fhEVM and TFHE-rs repos + run: | + pwd + ls + + - name: Checkout fhevm + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + repository: zama-ai/fhevm + persist-credentials: 'false' + fetch-depth: 0 + lfs: true + ref: antoniu/use-tfhe-main-benches + path: fhevm + + - name: Get benchmark details + run: | + COMMIT_DATE_ENV=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${COMMIT_SHA}") + { + echo "BENCH_DATE=$(date --iso-8601=seconds)"; + echo "COMMIT_DATE=$COMMIT_DATE_ENV"; + echo "COMMIT_HASH=$(git rev-parse HEAD)"; + } >> "${GITHUB_ENV}" + working-directory: tfhe-rs/ + + - name: Check fhEVM and TFHE-rs repos + run: | + pwd + ls + mv tfhe-rs fhevm/coprocessor/ + + - name: Checkout LFS objects + run: git lfs checkout + working-directory: fhevm/ + + - name: Setup Hyperstack dependencies + uses: ./fhevm/.github/actions/gpu_setup + with: + cuda-version: ${{ matrix.cuda }} + gcc-version: ${{ matrix.gcc }} + github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }} + + - name: Install rust + uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases + with: + toolchain: nightly + + - name: Install cargo dependencies + run: | + sudo apt-get install -y protobuf-compiler cmake pkg-config libssl-dev \ + libclang-dev docker-compose-v2 docker.io acl + sudo usermod -aG docker "$USER" + newgrp docker + sudo setfacl --modify user:"$USER":rw /var/run/docker.sock + cargo install sqlx-cli + + - name: Install foundry + uses: foundry-rs/foundry-toolchain@de808b1eea699e761c404bda44ba8f21aba30b2c + + - name: Cache cargo + uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: ${{ runner.os }}-cargo- + + - name: Login to GitHub Container Registry + uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Init database + run: make init_db + working-directory: fhevm/coprocessor/fhevm-engine/coprocessor + + - name: Use Node.js + uses: actions/setup-node@60edb5dd545a775178f52524783378180af0d1f8 # v4.0.2 + with: + node-version: 20.x + + - name: Build contracts + env: + HARDHAT_NETWORK: hardhat + run: | + ls + pwd + cp ./host-contracts/.env.example ./host-contracts/.env + npm --prefix ./host-contracts ci --include=optional + cd host-contracts && npm install && npm run deploy:emptyProxies && npx hardhat compile + working-directory: fhevm/ + + - name: Profile erc20 no-cmux benchmark on GPU + run: | + BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="LATENCY" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "profile_erc20_gpu" + working-directory: fhevm/coprocessor/fhevm-engine/coprocessor + + - name: Get nsys profile name + id: nsys_profile_name + run: echo "::set-output name=profile::coprocessor_profile_$(date +"%Y-%m-%d-%Hh").nsys-rep" + + - name: Timestamp nsys profile + run: | + mv report1.nsys-rep ${{ steps.nsys_profile_name.outputs.profile }} + + - name: Upload profile artifact + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 + with: + name: ${{ steps.nsys_profile_name.outputs.profile }} + path: fhevm/coprocessor/fhevm-engine/coprocessor/${{ steps.nsys_profile_name.outputs.profile }} + + - name: Run latency benchmark on GPU + run: | + BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="LATENCY" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "benchmark_${BENCHMARKS}_gpu" + working-directory: fhevm/coprocessor/fhevm-engine/coprocessor + + - name: Run throughput benchmarks on GPU + run: | + BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="THROUGHPUT_200" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "benchmark_${BENCHMARKS}_gpu" + working-directory: fhevm/coprocessor/fhevm-engine/coprocessor + + - name: Parse results + run: | + python3 ./ci/benchmark_parser.py coprocessor/fhevm-engine/target/criterion "${RESULTS_FILENAME}" \ + --database coprocessor \ + --hardware "${HW_NAME}" \ + --backend gpu \ + --project-version "${COMMIT_HASH}" \ + --branch "${BRANCH_NAME}" \ + --commit-date "${COMMIT_DATE}" \ + --bench-date "${BENCH_DATE}" \ + --walk-subdirs \ + --crate "coprocessor/fhevm-engine/coprocessor" \ + --name-suffix "operation_batch_size_${BATCH_SIZE}-schedule_${SCHEDULING_POLICY}-optimization_target_${OPTIMIZATION_TARGET}" + working-directory: fhevm/ + + - name: Upload parsed results artifact + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 + with: + name: ${COMMIT_SHA}_${BENCHMARKS}_${{ needs.parse-inputs.outputs.profile }} + path: fhevm/${RESULTS_FILENAME} + + - name: Checkout Slab repo + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + with: + repository: zama-ai/slab + path: slab + persist-credentials: 'false' + token: ${{ secrets.REPO_CHECKOUT_TOKEN }} + + - name: Send data to Slab + shell: bash + env: + SLAB_URL: ${{ secrets.SLAB_URL }} + run: | + python3 slab/scripts/data_sender.py fhevm/"${RESULTS_FILENAME}" "${SLAB_SECRET}" \ + --slab-url "${SLAB_URL}" + + teardown-instance: + name: coprocessor-benchmark-gpu/teardown + if: ${{ always() && needs.setup-instance.result == 'success' }} + needs: [ setup-instance, benchmark ] + runs-on: ubuntu-latest + permissions: + contents: 'read' + steps: + - name: Stop remote instance + id: stop-instance + uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac + with: + mode: stop + github-token: ${{ secrets.SLAB_ACTION_TOKEN }} + slab-url: ${{ secrets.SLAB_BASE_URL }} + job-secret: ${{ secrets.JOB_SECRET }} + label: ${{ needs.setup-instance.outputs.runner-name }}