From bd7df4a03bd68ea3c4d9c245c677076d54c842fd Mon Sep 17 00:00:00 2001
From: pgardratzama <pierre.gardrat@zama.ai>
Date: Tue, 2 Sep 2025 19:44:21 +0200
Subject: [PATCH] chore(hpu): enable hpu hlapi workflow and throughput bench in
 integer workflow

---
 .github/workflows/benchmark_hpu_hlapi.yml     | 14 +++--
 .github/workflows/benchmark_hpu_integer.yml   | 55 +++++++++++++++++--
 Makefile                                      | 24 +++-----
 setup_hpu.sh                                  |  7 ++-
 .../benches/high_level_api/bench.rs           |  7 ++-
 5 files changed, 79 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/benchmark_hpu_hlapi.yml b/.github/workflows/benchmark_hpu_hlapi.yml
index 60fb53990..9f8a5584d 100644
--- a/.github/workflows/benchmark_hpu_hlapi.yml
+++ b/.github/workflows/benchmark_hpu_hlapi.yml
@@ -14,7 +14,7 @@ env:
 permissions: {}
 
 jobs:
-  integer-benchmarks-hpu:
+  hlapi-benchmarks-hpu:
     name: Execute HLAPI benchmarks for HPU backend
     runs-on: v80-desktop
     concurrency:
@@ -29,7 +29,7 @@ jobs:
           ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
 
       - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
         with:
           fetch-depth: 0
           persist-credentials: 'false'
@@ -48,12 +48,12 @@ jobs:
           SHA: ${{ github.sha }}
 
       - name: Install rust
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
         with:
           toolchain: nightly
 
       - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
         with:
           repository: zama-ai/slab
           path: slab
@@ -62,9 +62,11 @@ jobs:
 
       - name: Run benchmarks
         run: |
-          git lfs pull --include="*" --exclude=""
-          make bench_hlapi_hpu
+          make pull_hpu_files
+          export V80_SERIAL_NUMBER=XFL12E4XJXWK
+          source /opt/xilinx/Vivado/2024.2/settings64.sh
           make bench_hlapi_erc20_hpu
+          make bench_hlapi_hpu
 
       - name: Parse results
         run: |
diff --git a/.github/workflows/benchmark_hpu_integer.yml b/.github/workflows/benchmark_hpu_integer.yml
index e09bbb51e..612b3c7f5 100644
--- a/.github/workflows/benchmark_hpu_integer.yml
+++ b/.github/workflows/benchmark_hpu_integer.yml
@@ -3,6 +3,15 @@ name: Hpu Integer Benchmarks
 
 on:
   workflow_dispatch:
+    inputs:
+      bench_type:
+        description: "Benchmarks type"
+        type: choice
+        default: both
+        options:
+          - latency
+          - throughput
+          - both
 
 env:
   CARGO_TERM_COLOR: always
@@ -14,13 +23,46 @@ env:
 permissions: {}
 
 jobs:
+  prepare-matrix:
+    name: Prepare operations matrix
+    runs-on: v80-desktop
+    outputs:
+      bench_type: ${{ steps.set_bench_type.outputs.bench_type }}
+    steps:
+      - name: Set benchmark types
+        if: github.event_name == 'workflow_dispatch'
+        run: |
+          if [[ -z $INPUTS_BENCH_TYPE || "${INPUTS_BENCH_TYPE}" == "both" ]]; then
+            echo "BENCH_TYPE=[\"latency\", \"throughput\"]" >> "${GITHUB_ENV}"
+          else
+            echo "BENCH_TYPE=[\"${INPUTS_BENCH_TYPE}\"]" >> "${GITHUB_ENV}"
+          fi
+        env:
+          INPUTS_BENCH_TYPE: ${{ inputs.bench_type }}
+
+      - name: Default benchmark type
+        if: github.event_name != 'workflow_dispatch'
+        run: |
+          echo "BENCH_TYPE=[\"latency\"]" >> "${GITHUB_ENV}"
+
+
+      - name: Set benchmark types output
+        id: set_bench_type
+        run: | # zizmor: ignore[template-injection] this env variable is safe
+          echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
+
   integer-benchmarks-hpu:
     name: Execute integer & erc20 benchmarks for HPU backend
+    needs: prepare-matrix
     runs-on: v80-desktop
     concurrency:
       group: ${{ github.workflow }}_${{ github.ref }}
       cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
     timeout-minutes: 1440  # 24 hours
+    strategy:
+      max-parallel: 1
+      matrix:
+        bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }}
     steps:
       # Needed as long as hw_regmap repository is private
       - name: Configure SSH
@@ -63,8 +105,11 @@ jobs:
       - name: Run benchmarks
         run: |
           make pull_hpu_files
-          make bench_integer_hpu
-          make bench_hlapi_erc20_hpu
+          export V80_SERIAL_NUMBER=XFL12E4XJXWK
+          source /opt/xilinx/Vivado/2024.2/settings64.sh
+          make BENCH_TYPE="${BENCH_TYPE}" bench_integer_hpu
+        env:
+          BENCH_TYPE: ${{ matrix.bench_type }}
 
       - name: Parse results
         run: |
@@ -76,14 +121,16 @@ jobs:
           --branch "${REF_NAME}" \
           --commit-date "${COMMIT_DATE}" \
           --bench-date "${BENCH_DATE}" \
-          --walk-subdirs
+          --walk-subdirs \
+          --bench-type "${BENCH_TYPE}"
         env:
           REF_NAME: ${{ github.ref_name }}
+          BENCH_TYPE: ${{ matrix.bench_type }}
 
       - name: Upload parsed results artifact
         uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
         with:
-          name: ${{ github.sha }}_integer_benchmarks
+          name: ${{ github.sha }}_${{ matrix.bench_type }}_integer_benchmarks
           path: ${{ env.RESULTS_FILENAME }}
 
       - name: Send data to Slab
diff --git a/Makefile b/Makefile
index aa16402db..b2144c8aa 100644
--- a/Makefile
+++ b/Makefile
@@ -1312,11 +1312,11 @@ bench_signed_integer_gpu: install_rs_check_toolchain
 
 .PHONY: bench_integer_hpu # Run benchmarks for integer on HPU backend
 bench_integer_hpu: install_rs_check_toolchain
-	source ./setup_hpu.sh --config $(HPU_CONFIG) ; \
+	source ./setup_hpu.sh --config $(HPU_CONFIG) -p ; \
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=integer,internal-keycache,pbs-stats,hpu,hpu-v80 -p tfhe-benchmark -- --quick
+	--features=integer,internal-keycache,pbs-stats,hpu,hpu-v80 -p tfhe-benchmark --
 
 .PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
 bench_integer_compression: install_rs_check_toolchain
@@ -1497,11 +1497,13 @@ bench_hlapi_gpu: install_rs_check_toolchain
 	--bench hlapi \
 	--features=integer,gpu,internal-keycache,nightly-avx512 -p tfhe-benchmark --
 
-.PHONY: bench_hlapi_hpu # Run benchmarks for integer operations on HPU
+.PHONY: bench_hlapi_hpu # Run benchmarks for HLAPI operations on HPU
 bench_hlapi_hpu: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	source ./setup_hpu.sh --config $(HPU_CONFIG) -p ; \
+	RUSTFLAGS="$(RUSTFLAGS)" \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi \
-	--features=integer,hpu,hpu-v80,internal-keycache,nightly-avx512 -p tfhe-benchmark --
+	--features=integer,internal-keycache,hpu,hpu-v80 -p tfhe-benchmark --
 
 .PHONY: bench_hlapi_erc20 # Run benchmarks for ERC20 operations
 bench_hlapi_erc20: install_rs_check_toolchain
@@ -1527,21 +1529,13 @@ bench_hlapi_dex_gpu: install_rs_check_toolchain
 	--bench hlapi-dex \
 	--features=integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p tfhe-benchmark --
 
-.PHONY: bench_hlapi_hpu # Run benchmarks for HLAPI operations on HPU
-bench_hlapi_hpu: install_rs_check_toolchain
-	source ./setup_hpu.sh --config $(HPU_CONFIG) ; \
-	RUSTFLAGS="$(RUSTFLAGS)" \
-	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
-	--bench hlapi \
-	--features=integer,internal-keycache,hpu,hpu-v80 -p tfhe-benchmark -- --quick
-
 .PHONY: bench_hlapi_erc20_hpu # Run benchmarks for ECR20 operations on HPU
 bench_hlapi_erc20_hpu: install_rs_check_toolchain
-	source ./setup_hpu.sh --config $(HPU_CONFIG) ; \
+	source ./setup_hpu.sh --config $(HPU_CONFIG) -p ; \
 	RUSTFLAGS="$(RUSTFLAGS)" \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi-erc20 \
-	--features=integer,internal-keycache,hpu,hpu-v80 -p tfhe-benchmark -- --quick
+	--features=integer,internal-keycache,hpu,hpu-v80 -p tfhe-benchmark --
 
 .PHONY: bench_tfhe_zk_pok # Run benchmarks for the tfhe_zk_pok crate
 bench_tfhe_zk_pok: install_rs_check_toolchain
diff --git a/setup_hpu.sh b/setup_hpu.sh
index f032b1548..5eac1f64d 100644
--- a/setup_hpu.sh
+++ b/setup_hpu.sh
@@ -23,7 +23,7 @@ V80_PCIE_DEV="unselected"
 XILINX_VIVADO=${XILINX_VIVADO:-"/opt/amd/Vivado/2024.2"}
 
 # V80 bitstream refresh require insmod of ami.ko module
-AMI_PATH=${AMI_PATH:-"/opt/v80/ami/1e6a8da"}
+AMI_PATH=${AMI_PATH:-"/opt/v80/ami/ef9249f"}
 
 # Parse user CLI ##############################################################
 opt_short="hc:l:p:"
@@ -63,6 +63,9 @@ do
             if [ -n "${2}" ] && [[ ! ${2} =~ ^- ]]; then
                 V80_PCIE_DEV="${2}"
                 ((i++))
+                shift 1
+            elif [[ ${#DEVICE[@]} -eq 1 ]]; then
+                V80_PCIE_DEV=${DEVICE[0]%%:*}
             else
                 echo "Please select a device in following list (1st two digits):"
                 for item in "${DEVICE[@]}"; do
@@ -70,7 +73,7 @@ do
                 done
                 return 1
             fi
-            shift 2
+            shift 1
             ;;
         "") # End of input reading
             break ;;
diff --git a/tfhe-benchmark/benches/high_level_api/bench.rs b/tfhe-benchmark/benches/high_level_api/bench.rs
index 058a053a0..384d6fd56 100644
--- a/tfhe-benchmark/benches/high_level_api/bench.rs
+++ b/tfhe-benchmark/benches/high_level_api/bench.rs
@@ -32,7 +32,12 @@ fn bench_fhe_type<FheType>(
     for<'a> FheType: FheMin<&'a FheType, Output = FheType> + FheMax<&'a FheType, Output = FheType>,
 {
     let mut bench_group = c.benchmark_group(type_name);
-    let bench_prefix = "hlapi::ops";
+    let mut bench_prefix = "hlapi::ops".to_string();
+    if cfg!(feature = "gpu") {
+        bench_prefix = format!("{}::cuda", bench_prefix);
+    } else if cfg!(feature = "hpu") {
+        bench_prefix = format!("{}::hpu", bench_prefix);
+    }
 
     let mut rng = thread_rng();