From 5eb4cc5a229343d33cd077895f2d8e1509ac40a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Test=C3=A9?= Date: Mon, 8 Dec 2025 18:00:21 +0100 Subject: [PATCH] chore(bench): add fast benchmark capability for hlapi Run only a small subset of the current benchmarks to speed up developers feedback --- Makefile | 11 ++- .../benches/high_level_api/bench.rs | 81 +++++++++++-------- .../benches/high_level_api/noise_squash.rs | 34 +++++--- 3 files changed, 78 insertions(+), 48 deletions(-) diff --git a/Makefile b/Makefile index b5c0d2980..32f2effb6 100644 --- a/Makefile +++ b/Makefile @@ -1588,13 +1588,15 @@ bench_web_js_api_parallel_firefox_ci: setup_venv .PHONY: bench_hlapi # Run benchmarks for integer operations bench_hlapi: install_rs_check_toolchain - RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ + RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \ + cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ --bench hlapi \ --features=integer,internal-keycache,pbs-stats -p tfhe-benchmark -- .PHONY: bench_hlapi_gpu # Run benchmarks for integer operations on GPU bench_hlapi_gpu: install_rs_check_toolchain - RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ + RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \ + cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ --bench hlapi \ --features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off -- @@ -1603,6 +1605,7 @@ bench_hlapi_hpu: install_rs_check_toolchain source ./setup_hpu.sh --config $(HPU_CONFIG); \ export V80_PCIE_DEV=${V80_PCIE_DEV}; \ RUSTFLAGS="$(RUSTFLAGS)" \ + __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \ cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ --bench hlapi \ --features=integer,internal-keycache,hpu,hpu-v80,pbs-stats -p tfhe-benchmark -- @@ -1665,14 +1668,14 @@ bench_tfhe_zk_pok: install_rs_check_toolchain .PHONY: bench_hlapi_noise_squash # Run benchmarks for noise squash operation bench_hlapi_noise_squash: install_rs_check_toolchain - RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \ + RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \ cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ --bench hlapi-noise-squash \ --features=integer,internal-keycache,pbs-stats -p tfhe-benchmark -- .PHONY: bench_hlapi_noise_squash_gpu # Run benchmarks for noise squash operation on GPU bench_hlapi_noise_squash_gpu: install_rs_check_toolchain - RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \ + RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \ cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ --bench hlapi-noise-squash \ --features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off -- diff --git a/tfhe-benchmark/benches/high_level_api/bench.rs b/tfhe-benchmark/benches/high_level_api/bench.rs index 3d0cc653f..17e90764f 100644 --- a/tfhe-benchmark/benches/high_level_api/bench.rs +++ b/tfhe-benchmark/benches/high_level_api/bench.rs @@ -1,4 +1,6 @@ -use benchmark::utilities::{hlapi_throughput_num_ops, write_to_json, BenchmarkType, OperatorType}; +use benchmark::utilities::{ + hlapi_throughput_num_ops, write_to_json, BenchmarkType, BitSizesSet, EnvConfig, OperatorType, +}; use criterion::{black_box, Criterion, Throughput}; use rand::prelude::*; use std::marker::PhantomData; @@ -293,12 +295,15 @@ where FheKey: FheEncrypt + FheIntegerType + Send + Sync, FheKey::Id: FheUintId, { + let mut bench_group = c.benchmark_group("kv_store"); + bench_group.sample_size(10); + let mut kv_store = KVStore::new(); let mut rng = rand::thread_rng(); let format_id_bench = |op_name: &str| -> String { format!( - "KVStore::<{}, {}>::{op_name}/{num_elements}", + "hlapi::kv_store::<{}, {}>::{op_name}/{num_elements}", TypeDisplayer::::default(), TypeDisplayer::::default(), ) @@ -320,19 +325,19 @@ where let value = rng.gen::(); let value_to_add = Value::encrypt(value, cks); - c.bench_function(&format_id_bench("Get"), |b| { + bench_group.bench_function(format_id_bench("get"), |b| { b.iter(|| { let _ = kv_store.get(&encrypted_key); }) }); - c.bench_function(&format_id_bench("Update"), |b| { + bench_group.bench_function(format_id_bench("update"), |b| { b.iter(|| { let _ = kv_store.update(&encrypted_key, &value_to_add); }) }); - c.bench_function(&format_id_bench("Map"), |b| { + bench_group.bench_function(format_id_bench("map"), |b| { b.iter(|| { kv_store.map(&encrypted_key, |v| v); }) @@ -366,10 +371,9 @@ where } kv_stores.push(kv_store); - let mut group = c.benchmark_group("KVStore Throughput"); - group.throughput(Throughput::Elements(kv_stores.len() as u64)); + bench_group.throughput(Throughput::Elements(kv_stores.len() as u64)); - group.bench_function(format_id_bench("Map"), |b| { + bench_group.bench_function(format_id_bench("map::throughput"), |b| { b.iter(|| { kv_stores.par_iter_mut().for_each(|kv_store| { kv_store.map(&encrypted_key, |v| v); @@ -377,7 +381,7 @@ where }) }); - group.bench_function(format_id_bench("Update"), |b| { + bench_group.bench_function(format_id_bench("update::throughput"), |b| { b.iter(|| { kv_stores.par_iter_mut().for_each(|kv_store| { kv_store.update(&encrypted_key, &value_to_add); @@ -385,20 +389,21 @@ where }) }); - group.bench_function(format_id_bench("Get"), |b| { + bench_group.bench_function(format_id_bench("get::throughput"), |b| { b.iter(|| { kv_stores.par_iter_mut().for_each(|kv_store| { kv_store.get(&encrypted_key); }) }) }); - - group.finish(); } } + bench_group.finish(); } fn main() { + let env_config = EnvConfig::new(); + #[cfg(feature = "hpu")] let (cks, benched_device) = { // Hpu is enabled, start benchmark on Hpu hw accelerator @@ -437,30 +442,42 @@ fn main() { let mut c = Criterion::default().configure_from_args(); - bench_fhe_uint2(&mut c, &cks); - bench_fhe_uint4(&mut c, &cks); - bench_fhe_uint6(&mut c, &cks); - bench_fhe_uint8(&mut c, &cks); - bench_fhe_uint10(&mut c, &cks); - bench_fhe_uint12(&mut c, &cks); - bench_fhe_uint14(&mut c, &cks); - bench_fhe_uint16(&mut c, &cks); - bench_fhe_uint32(&mut c, &cks); - bench_fhe_uint64(&mut c, &cks); - bench_fhe_uint128(&mut c, &cks); + match env_config.bit_sizes_set { + BitSizesSet::Fast => { + bench_fhe_uint64(&mut c, &cks); - // KVStore Benches - if benched_device == tfhe::Device::Cpu { - for pow in 1..=10 { - bench_kv_store::(&mut c, &cks, 1 << pow); + // KVStore Benches + if benched_device == tfhe::Device::Cpu { + bench_kv_store::(&mut c, &cks, 1 << 10); + } } + _ => { + bench_fhe_uint2(&mut c, &cks); + bench_fhe_uint4(&mut c, &cks); + bench_fhe_uint6(&mut c, &cks); + bench_fhe_uint8(&mut c, &cks); + bench_fhe_uint10(&mut c, &cks); + bench_fhe_uint12(&mut c, &cks); + bench_fhe_uint14(&mut c, &cks); + bench_fhe_uint16(&mut c, &cks); + bench_fhe_uint32(&mut c, &cks); + bench_fhe_uint64(&mut c, &cks); + bench_fhe_uint128(&mut c, &cks); - for pow in 1..=10 { - bench_kv_store::(&mut c, &cks, 1 << pow); - } + // KVStore Benches + if benched_device == tfhe::Device::Cpu { + for pow in 1..=10 { + bench_kv_store::(&mut c, &cks, 1 << pow); + } - for pow in 1..=10 { - bench_kv_store::(&mut c, &cks, 1 << pow); + for pow in 1..=10 { + bench_kv_store::(&mut c, &cks, 1 << pow); + } + + for pow in 1..=10 { + bench_kv_store::(&mut c, &cks, 1 << pow); + } + } } } diff --git a/tfhe-benchmark/benches/high_level_api/noise_squash.rs b/tfhe-benchmark/benches/high_level_api/noise_squash.rs index 3491d85ad..fffc9c754 100644 --- a/tfhe-benchmark/benches/high_level_api/noise_squash.rs +++ b/tfhe-benchmark/benches/high_level_api/noise_squash.rs @@ -18,7 +18,8 @@ use benchmark::params_aliases::{ #[cfg(feature = "gpu")] use benchmark::utilities::configure_gpu; use benchmark::utilities::{ - get_bench_type, throughput_num_threads, write_to_json, BenchmarkType, OperatorType, + get_bench_type, throughput_num_threads, write_to_json, BenchmarkType, BitSizesSet, EnvConfig, + OperatorType, }; use criterion::{Criterion, Throughput}; use rand::prelude::*; @@ -392,6 +393,8 @@ bench_sns_only_type!(FheUint128); bench_decomp_sns_comp_type!(FheUint64); fn main() { + let env_config = EnvConfig::new(); + #[cfg(feature = "hpu")] panic!("Noise squashing is not supported on HPU"); @@ -430,17 +433,24 @@ fn main() { let mut c = Criterion::default().configure_from_args(); - bench_sns_only_fhe_uint2(&mut c, params.as_slice()); - bench_sns_only_fhe_uint4(&mut c, params.as_slice()); - bench_sns_only_fhe_uint6(&mut c, params.as_slice()); - bench_sns_only_fhe_uint8(&mut c, params.as_slice()); - bench_sns_only_fhe_uint10(&mut c, params.as_slice()); - bench_sns_only_fhe_uint12(&mut c, params.as_slice()); - bench_sns_only_fhe_uint14(&mut c, params.as_slice()); - bench_sns_only_fhe_uint16(&mut c, params.as_slice()); - bench_sns_only_fhe_uint32(&mut c, params.as_slice()); - bench_sns_only_fhe_uint64(&mut c, params.as_slice()); - bench_sns_only_fhe_uint128(&mut c, params.as_slice()); + match env_config.bit_sizes_set { + BitSizesSet::Fast => { + bench_sns_only_fhe_uint64(&mut c, params.as_slice()); + } + _ => { + bench_sns_only_fhe_uint2(&mut c, params.as_slice()); + bench_sns_only_fhe_uint4(&mut c, params.as_slice()); + bench_sns_only_fhe_uint6(&mut c, params.as_slice()); + bench_sns_only_fhe_uint8(&mut c, params.as_slice()); + bench_sns_only_fhe_uint10(&mut c, params.as_slice()); + bench_sns_only_fhe_uint12(&mut c, params.as_slice()); + bench_sns_only_fhe_uint14(&mut c, params.as_slice()); + bench_sns_only_fhe_uint16(&mut c, params.as_slice()); + bench_sns_only_fhe_uint32(&mut c, params.as_slice()); + bench_sns_only_fhe_uint64(&mut c, params.as_slice()); + bench_sns_only_fhe_uint128(&mut c, params.as_slice()); + } + } bench_decomp_sns_comp_fhe_uint64(&mut c, params.as_slice());