chore(hpu): make hpu integer bench fast by default

2026-01-10 07:08:03 -05:00 · 2025-09-10 13:28:27 +02:00
parent 4ff0d6cac2
commit 757c2fc828
2 changed files with 28 additions and 4 deletions
--- a/.github/workflows/benchmark_hpu_integer.yml
+++ b/.github/workflows/benchmark_hpu_integer.yml
@@ -4,6 +4,9 @@ name: benchmark_hpu_integer
 on:
  workflow_dispatch:
    inputs:
+      all_precisions:
+        description: "Run all precisions"
+        type: boolean
      bench_type:
        description: "Benchmarks type"
        type: choice
@@ -19,6 +22,7 @@ env:
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUST_BACKTRACE: "full"
  RUST_MIN_STACK: "8388608"
+  FAST_BENCH: TRUE

 permissions: {}

@@ -102,6 +106,11 @@ jobs:
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

+      - name: Should run benchmarks with all precisions
+        if: inputs.all_precisions
+        run: |
+          echo "FAST_BENCH=FALSE" >> "${GITHUB_ENV}"
+
      - name: Run benchmarks
        run: |
          make pull_hpu_files
--- a/tfhe-benchmark/benches/high_level_api/erc20.rs
+++ b/tfhe-benchmark/benches/high_level_api/erc20.rs
@@ -255,7 +255,10 @@ where
 {
    use tfhe::tfhe_hpu_backend::prelude::hpu_asm;
    let src = HpuHandle {
-        native: vec![from_amount, to_amount, amount].into_iter().flatten().collect(),
+        native: vec![from_amount, to_amount, amount]
+            .into_iter()
+            .flatten()
+            .collect(),
        boolean: vec![],
        imm: vec![],
    };
@@ -392,7 +395,13 @@ fn bench_transfer_latency_simd<FheType, F>(
    F: for<'a> Fn(&'a Vec<FheType>, &'a Vec<FheType>, &'a Vec<FheType>) -> Vec<FheType>,
 {
    use tfhe::tfhe_hpu_backend::prelude::hpu_asm;
-    let hpu_simd_n = hpu_asm::iop::IOP_ERC_20_SIMD.format().unwrap().proto.src.len()/3;
+    let hpu_simd_n = hpu_asm::iop::IOP_ERC_20_SIMD
+        .format()
+        .unwrap()
+        .proto
+        .src
+        .len()
+        / 3;
    let bench_id = format!("{bench_name}::{fn_name}::{type_name}");
    c.bench_function(&bench_id, |b| {
        let mut rng = thread_rng();
@@ -646,10 +655,16 @@ fn hpu_bench_transfer_throughput_simd<FheType, F>(
    F: for<'a> Fn(&'a Vec<FheType>, &'a Vec<FheType>, &'a Vec<FheType>) -> Vec<FheType> + Sync,
 {
    use tfhe::tfhe_hpu_backend::prelude::hpu_asm;
-    let hpu_simd_n = hpu_asm::iop::IOP_ERC_20_SIMD.format().unwrap().proto.src.len()/3;
+    let hpu_simd_n = hpu_asm::iop::IOP_ERC_20_SIMD
+        .format()
+        .unwrap()
+        .proto
+        .src
+        .len()
+        / 3;
    let mut rng = thread_rng();
    for num_elems in [2, 10] {
-        let real_num_elems = num_elems*(hpu_simd_n as u64);
+        let real_num_elems = num_elems * (hpu_simd_n as u64);
        group.throughput(Throughput::Elements(real_num_elems));
        let bench_id =
            format!("{bench_name}::throughput::{fn_name}::{type_name}::{real_num_elems}_elems");