chore(hpu): enable hpu hlapi workflow and throughput bench in integer workflow

2026-01-10 07:08:03 -05:00 · 2025-09-02 19:44:21 +02:00
parent 2279d0deb8
commit bd7df4a03b
5 changed files with 79 additions and 28 deletions
--- a/.github/workflows/benchmark_hpu_hlapi.yml
+++ b/.github/workflows/benchmark_hpu_hlapi.yml
@@ -14,7 +14,7 @@ env:
 permissions: {}

 jobs:
-  integer-benchmarks-hpu:
+  hlapi-benchmarks-hpu:
    name: Execute HLAPI benchmarks for HPU backend
    runs-on: v80-desktop
    concurrency:
@@ -29,7 +29,7 @@ jobs:
          ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -48,12 +48,12 @@ jobs:
          SHA: ${{ github.sha }}

      - name: Install rust
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: nightly

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
@@ -62,9 +62,11 @@ jobs:

      - name: Run benchmarks
        run: |
-          git lfs pull --include="*" --exclude=""
-          make bench_hlapi_hpu
+          make pull_hpu_files
+          export V80_SERIAL_NUMBER=XFL12E4XJXWK
+          source /opt/xilinx/Vivado/2024.2/settings64.sh
          make bench_hlapi_erc20_hpu
+          make bench_hlapi_hpu

      - name: Parse results
        run: |
--- a/.github/workflows/benchmark_hpu_integer.yml
+++ b/.github/workflows/benchmark_hpu_integer.yml
@@ -3,6 +3,15 @@ name: Hpu Integer Benchmarks

 on:
  workflow_dispatch:
+    inputs:
+      bench_type:
+        description: "Benchmarks type"
+        type: choice
+        default: both
+        options:
+          - latency
+          - throughput
+          - both

 env:
  CARGO_TERM_COLOR: always
@@ -14,13 +23,46 @@ env:
 permissions: {}

 jobs:
+  prepare-matrix:
+    name: Prepare operations matrix
+    runs-on: v80-desktop
+    outputs:
+      bench_type: ${{ steps.set_bench_type.outputs.bench_type }}
+    steps:
+      - name: Set benchmark types
+        if: github.event_name == 'workflow_dispatch'
+        run: |
+          if [[ -z $INPUTS_BENCH_TYPE || "${INPUTS_BENCH_TYPE}" == "both" ]]; then
+            echo "BENCH_TYPE=[\"latency\", \"throughput\"]" >> "${GITHUB_ENV}"
+          else
+            echo "BENCH_TYPE=[\"${INPUTS_BENCH_TYPE}\"]" >> "${GITHUB_ENV}"
+          fi
+        env:
+          INPUTS_BENCH_TYPE: ${{ inputs.bench_type }}
+
+      - name: Default benchmark type
+        if: github.event_name != 'workflow_dispatch'
+        run: |
+          echo "BENCH_TYPE=[\"latency\"]" >> "${GITHUB_ENV}"
+
+
+      - name: Set benchmark types output
+        id: set_bench_type
+        run: | # zizmor: ignore[template-injection] this env variable is safe
+          echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
+
  integer-benchmarks-hpu:
    name: Execute integer & erc20 benchmarks for HPU backend
+    needs: prepare-matrix
    runs-on: v80-desktop
    concurrency:
      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    timeout-minutes: 1440  # 24 hours
+    strategy:
+      max-parallel: 1
+      matrix:
+        bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }}
    steps:
      # Needed as long as hw_regmap repository is private
      - name: Configure SSH
@@ -63,8 +105,11 @@ jobs:
      - name: Run benchmarks
        run: |
          make pull_hpu_files
-          make bench_integer_hpu
-          make bench_hlapi_erc20_hpu
+          export V80_SERIAL_NUMBER=XFL12E4XJXWK
+          source /opt/xilinx/Vivado/2024.2/settings64.sh
+          make BENCH_TYPE="${BENCH_TYPE}" bench_integer_hpu
+        env:
+          BENCH_TYPE: ${{ matrix.bench_type }}

      - name: Parse results
        run: |
@@ -76,14 +121,16 @@ jobs:
          --branch "${REF_NAME}" \
          --commit-date "${COMMIT_DATE}" \
          --bench-date "${BENCH_DATE}" \
-          --walk-subdirs
+          --walk-subdirs \
+          --bench-type "${BENCH_TYPE}"
        env:
          REF_NAME: ${{ github.ref_name }}
+          BENCH_TYPE: ${{ matrix.bench_type }}

      - name: Upload parsed results artifact
        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
        with:
-          name: ${{ github.sha }}_integer_benchmarks
+          name: ${{ github.sha }}_${{ matrix.bench_type }}_integer_benchmarks
          path: ${{ env.RESULTS_FILENAME }}

      - name: Send data to Slab
--- a/24
+++ b/24
@@ -1312,11 +1312,11 @@ bench_signed_integer_gpu: install_rs_check_toolchain

 .PHONY: bench_integer_hpu # Run benchmarks for integer on HPU backend
 bench_integer_hpu: install_rs_check_toolchain
-	source ./setup_hpu.sh --config $(HPU_CONFIG) ; \
+	source ./setup_hpu.sh --config $(HPU_CONFIG) -p ; \
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=integer,internal-keycache,pbs-stats,hpu,hpu-v80 -p tfhe-benchmark -- --quick
+	--features=integer,internal-keycache,pbs-stats,hpu,hpu-v80 -p tfhe-benchmark --

 .PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
 bench_integer_compression: install_rs_check_toolchain
@@ -1497,11 +1497,13 @@ bench_hlapi_gpu: install_rs_check_toolchain
 	--bench hlapi \
 	--features=integer,gpu,internal-keycache,nightly-avx512 -p tfhe-benchmark --

-.PHONY: bench_hlapi_hpu # Run benchmarks for integer operations on HPU
+.PHONY: bench_hlapi_hpu # Run benchmarks for HLAPI operations on HPU
 bench_hlapi_hpu: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	source ./setup_hpu.sh --config $(HPU_CONFIG) -p ; \
+	RUSTFLAGS="$(RUSTFLAGS)" \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi \
-	--features=integer,hpu,hpu-v80,internal-keycache,nightly-avx512 -p tfhe-benchmark --
+	--features=integer,internal-keycache,hpu,hpu-v80 -p tfhe-benchmark --

 .PHONY: bench_hlapi_erc20 # Run benchmarks for ERC20 operations
 bench_hlapi_erc20: install_rs_check_toolchain
@@ -1527,21 +1529,13 @@ bench_hlapi_dex_gpu: install_rs_check_toolchain
 	--bench hlapi-dex \
 	--features=integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p tfhe-benchmark --

-.PHONY: bench_hlapi_hpu # Run benchmarks for HLAPI operations on HPU
-bench_hlapi_hpu: install_rs_check_toolchain
-	source ./setup_hpu.sh --config $(HPU_CONFIG) ; \
-	RUSTFLAGS="$(RUSTFLAGS)" \
-	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
-	--bench hlapi \
-	--features=integer,internal-keycache,hpu,hpu-v80 -p tfhe-benchmark -- --quick
-
 .PHONY: bench_hlapi_erc20_hpu # Run benchmarks for ECR20 operations on HPU
 bench_hlapi_erc20_hpu: install_rs_check_toolchain
-	source ./setup_hpu.sh --config $(HPU_CONFIG) ; \
+	source ./setup_hpu.sh --config $(HPU_CONFIG) -p ; \
 	RUSTFLAGS="$(RUSTFLAGS)" \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi-erc20 \
-	--features=integer,internal-keycache,hpu,hpu-v80 -p tfhe-benchmark -- --quick
+	--features=integer,internal-keycache,hpu,hpu-v80 -p tfhe-benchmark --

 .PHONY: bench_tfhe_zk_pok # Run benchmarks for the tfhe_zk_pok crate
 bench_tfhe_zk_pok: install_rs_check_toolchain
--- a/setup_hpu.sh
+++ b/setup_hpu.sh
@@ -23,7 +23,7 @@ V80_PCIE_DEV="unselected"
 XILINX_VIVADO=${XILINX_VIVADO:-"/opt/amd/Vivado/2024.2"}

 # V80 bitstream refresh require insmod of ami.ko module
-AMI_PATH=${AMI_PATH:-"/opt/v80/ami/1e6a8da"}
+AMI_PATH=${AMI_PATH:-"/opt/v80/ami/ef9249f"}

 # Parse user CLI ##############################################################
 opt_short="hc:l:p:"
@@ -63,6 +63,9 @@ do
            if [ -n "${2}" ] && [[ ! ${2} =~ ^- ]]; then
                V80_PCIE_DEV="${2}"
                ((i++))
+                shift 1
+            elif [[ ${#DEVICE[@]} -eq 1 ]]; then
+                V80_PCIE_DEV=${DEVICE[0]%%:*}
            else
                echo "Please select a device in following list (1st two digits):"
                for item in "${DEVICE[@]}"; do
@@ -70,7 +73,7 @@ do
                done
                return 1
            fi
-            shift 2
+            shift 1
            ;;
        "") # End of input reading
            break ;;
--- a/tfhe-benchmark/benches/high_level_api/bench.rs
+++ b/tfhe-benchmark/benches/high_level_api/bench.rs
@@ -32,7 +32,12 @@ fn bench_fhe_type<FheType>(
    for<'a> FheType: FheMin<&'a FheType, Output = FheType> + FheMax<&'a FheType, Output = FheType>,
 {
    let mut bench_group = c.benchmark_group(type_name);
-    let bench_prefix = "hlapi::ops";
+    let mut bench_prefix = "hlapi::ops".to_string();
+    if cfg!(feature = "gpu") {
+        bench_prefix = format!("{}::cuda", bench_prefix);
+    } else if cfg!(feature = "hpu") {
+        bench_prefix = format!("{}::hpu", bench_prefix);
+    }

    let mut rng = thread_rng();