From bd7df4a03bd68ea3c4d9c245c677076d54c842fd Mon Sep 17 00:00:00 2001 From: pgardratzama Date: Tue, 2 Sep 2025 19:44:21 +0200 Subject: [PATCH] chore(hpu): enable hpu hlapi workflow and throughput bench in integer workflow --- .github/workflows/benchmark_hpu_hlapi.yml | 14 +++-- .github/workflows/benchmark_hpu_integer.yml | 55 +++++++++++++++++-- Makefile | 24 +++----- setup_hpu.sh | 7 ++- .../benches/high_level_api/bench.rs | 7 ++- 5 files changed, 79 insertions(+), 28 deletions(-) diff --git a/.github/workflows/benchmark_hpu_hlapi.yml b/.github/workflows/benchmark_hpu_hlapi.yml index 60fb53990..9f8a5584d 100644 --- a/.github/workflows/benchmark_hpu_hlapi.yml +++ b/.github/workflows/benchmark_hpu_hlapi.yml @@ -14,7 +14,7 @@ env: permissions: {} jobs: - integer-benchmarks-hpu: + hlapi-benchmarks-hpu: name: Execute HLAPI benchmarks for HPU backend runs-on: v80-desktop concurrency: @@ -29,7 +29,7 @@ jobs: ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }} - name: Checkout tfhe-rs repo with tags - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 with: fetch-depth: 0 persist-credentials: 'false' @@ -48,12 +48,12 @@ jobs: SHA: ${{ github.sha }} - name: Install rust - uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases + uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases with: toolchain: nightly - name: Checkout Slab repo - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 with: repository: zama-ai/slab path: slab @@ -62,9 +62,11 @@ jobs: - name: Run benchmarks run: | - git lfs pull --include="*" --exclude="" - make bench_hlapi_hpu + make pull_hpu_files + export V80_SERIAL_NUMBER=XFL12E4XJXWK + source /opt/xilinx/Vivado/2024.2/settings64.sh make bench_hlapi_erc20_hpu + make bench_hlapi_hpu - name: Parse results run: | diff --git a/.github/workflows/benchmark_hpu_integer.yml b/.github/workflows/benchmark_hpu_integer.yml index e09bbb51e..612b3c7f5 100644 --- a/.github/workflows/benchmark_hpu_integer.yml +++ b/.github/workflows/benchmark_hpu_integer.yml @@ -3,6 +3,15 @@ name: Hpu Integer Benchmarks on: workflow_dispatch: + inputs: + bench_type: + description: "Benchmarks type" + type: choice + default: both + options: + - latency + - throughput + - both env: CARGO_TERM_COLOR: always @@ -14,13 +23,46 @@ env: permissions: {} jobs: + prepare-matrix: + name: Prepare operations matrix + runs-on: v80-desktop + outputs: + bench_type: ${{ steps.set_bench_type.outputs.bench_type }} + steps: + - name: Set benchmark types + if: github.event_name == 'workflow_dispatch' + run: | + if [[ -z $INPUTS_BENCH_TYPE || "${INPUTS_BENCH_TYPE}" == "both" ]]; then + echo "BENCH_TYPE=[\"latency\", \"throughput\"]" >> "${GITHUB_ENV}" + else + echo "BENCH_TYPE=[\"${INPUTS_BENCH_TYPE}\"]" >> "${GITHUB_ENV}" + fi + env: + INPUTS_BENCH_TYPE: ${{ inputs.bench_type }} + + - name: Default benchmark type + if: github.event_name != 'workflow_dispatch' + run: | + echo "BENCH_TYPE=[\"latency\"]" >> "${GITHUB_ENV}" + + + - name: Set benchmark types output + id: set_bench_type + run: | # zizmor: ignore[template-injection] this env variable is safe + echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}" + integer-benchmarks-hpu: name: Execute integer & erc20 benchmarks for HPU backend + needs: prepare-matrix runs-on: v80-desktop concurrency: group: ${{ github.workflow }}_${{ github.ref }} cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} timeout-minutes: 1440 # 24 hours + strategy: + max-parallel: 1 + matrix: + bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }} steps: # Needed as long as hw_regmap repository is private - name: Configure SSH @@ -63,8 +105,11 @@ jobs: - name: Run benchmarks run: | make pull_hpu_files - make bench_integer_hpu - make bench_hlapi_erc20_hpu + export V80_SERIAL_NUMBER=XFL12E4XJXWK + source /opt/xilinx/Vivado/2024.2/settings64.sh + make BENCH_TYPE="${BENCH_TYPE}" bench_integer_hpu + env: + BENCH_TYPE: ${{ matrix.bench_type }} - name: Parse results run: | @@ -76,14 +121,16 @@ jobs: --branch "${REF_NAME}" \ --commit-date "${COMMIT_DATE}" \ --bench-date "${BENCH_DATE}" \ - --walk-subdirs + --walk-subdirs \ + --bench-type "${BENCH_TYPE}" env: REF_NAME: ${{ github.ref_name }} + BENCH_TYPE: ${{ matrix.bench_type }} - name: Upload parsed results artifact uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 with: - name: ${{ github.sha }}_integer_benchmarks + name: ${{ github.sha }}_${{ matrix.bench_type }}_integer_benchmarks path: ${{ env.RESULTS_FILENAME }} - name: Send data to Slab diff --git a/Makefile b/Makefile index aa16402db..b2144c8aa 100644 --- a/Makefile +++ b/Makefile @@ -1312,11 +1312,11 @@ bench_signed_integer_gpu: install_rs_check_toolchain .PHONY: bench_integer_hpu # Run benchmarks for integer on HPU backend bench_integer_hpu: install_rs_check_toolchain - source ./setup_hpu.sh --config $(HPU_CONFIG) ; \ + source ./setup_hpu.sh --config $(HPU_CONFIG) -p ; \ RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \ cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ --bench integer-bench \ - --features=integer,internal-keycache,pbs-stats,hpu,hpu-v80 -p tfhe-benchmark -- --quick + --features=integer,internal-keycache,pbs-stats,hpu,hpu-v80 -p tfhe-benchmark -- .PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression bench_integer_compression: install_rs_check_toolchain @@ -1497,11 +1497,13 @@ bench_hlapi_gpu: install_rs_check_toolchain --bench hlapi \ --features=integer,gpu,internal-keycache,nightly-avx512 -p tfhe-benchmark -- -.PHONY: bench_hlapi_hpu # Run benchmarks for integer operations on HPU +.PHONY: bench_hlapi_hpu # Run benchmarks for HLAPI operations on HPU bench_hlapi_hpu: install_rs_check_toolchain - RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ + source ./setup_hpu.sh --config $(HPU_CONFIG) -p ; \ + RUSTFLAGS="$(RUSTFLAGS)" \ + cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ --bench hlapi \ - --features=integer,hpu,hpu-v80,internal-keycache,nightly-avx512 -p tfhe-benchmark -- + --features=integer,internal-keycache,hpu,hpu-v80 -p tfhe-benchmark -- .PHONY: bench_hlapi_erc20 # Run benchmarks for ERC20 operations bench_hlapi_erc20: install_rs_check_toolchain @@ -1527,21 +1529,13 @@ bench_hlapi_dex_gpu: install_rs_check_toolchain --bench hlapi-dex \ --features=integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p tfhe-benchmark -- -.PHONY: bench_hlapi_hpu # Run benchmarks for HLAPI operations on HPU -bench_hlapi_hpu: install_rs_check_toolchain - source ./setup_hpu.sh --config $(HPU_CONFIG) ; \ - RUSTFLAGS="$(RUSTFLAGS)" \ - cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ - --bench hlapi \ - --features=integer,internal-keycache,hpu,hpu-v80 -p tfhe-benchmark -- --quick - .PHONY: bench_hlapi_erc20_hpu # Run benchmarks for ECR20 operations on HPU bench_hlapi_erc20_hpu: install_rs_check_toolchain - source ./setup_hpu.sh --config $(HPU_CONFIG) ; \ + source ./setup_hpu.sh --config $(HPU_CONFIG) -p ; \ RUSTFLAGS="$(RUSTFLAGS)" \ cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ --bench hlapi-erc20 \ - --features=integer,internal-keycache,hpu,hpu-v80 -p tfhe-benchmark -- --quick + --features=integer,internal-keycache,hpu,hpu-v80 -p tfhe-benchmark -- .PHONY: bench_tfhe_zk_pok # Run benchmarks for the tfhe_zk_pok crate bench_tfhe_zk_pok: install_rs_check_toolchain diff --git a/setup_hpu.sh b/setup_hpu.sh index f032b1548..5eac1f64d 100644 --- a/setup_hpu.sh +++ b/setup_hpu.sh @@ -23,7 +23,7 @@ V80_PCIE_DEV="unselected" XILINX_VIVADO=${XILINX_VIVADO:-"/opt/amd/Vivado/2024.2"} # V80 bitstream refresh require insmod of ami.ko module -AMI_PATH=${AMI_PATH:-"/opt/v80/ami/1e6a8da"} +AMI_PATH=${AMI_PATH:-"/opt/v80/ami/ef9249f"} # Parse user CLI ############################################################## opt_short="hc:l:p:" @@ -63,6 +63,9 @@ do if [ -n "${2}" ] && [[ ! ${2} =~ ^- ]]; then V80_PCIE_DEV="${2}" ((i++)) + shift 1 + elif [[ ${#DEVICE[@]} -eq 1 ]]; then + V80_PCIE_DEV=${DEVICE[0]%%:*} else echo "Please select a device in following list (1st two digits):" for item in "${DEVICE[@]}"; do @@ -70,7 +73,7 @@ do done return 1 fi - shift 2 + shift 1 ;; "") # End of input reading break ;; diff --git a/tfhe-benchmark/benches/high_level_api/bench.rs b/tfhe-benchmark/benches/high_level_api/bench.rs index 058a053a0..384d6fd56 100644 --- a/tfhe-benchmark/benches/high_level_api/bench.rs +++ b/tfhe-benchmark/benches/high_level_api/bench.rs @@ -32,7 +32,12 @@ fn bench_fhe_type( for<'a> FheType: FheMin<&'a FheType, Output = FheType> + FheMax<&'a FheType, Output = FheType>, { let mut bench_group = c.benchmark_group(type_name); - let bench_prefix = "hlapi::ops"; + let mut bench_prefix = "hlapi::ops".to_string(); + if cfg!(feature = "gpu") { + bench_prefix = format!("{}::cuda", bench_prefix); + } else if cfg!(feature = "hpu") { + bench_prefix = format!("{}::hpu", bench_prefix); + } let mut rng = thread_rng();