From ef07963767c5e1a1a01241c67e977ad10743f20d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Test=C3=A9?= Date: Mon, 10 Nov 2025 17:46:46 +0100 Subject: [PATCH] chore(bench): print bench id before running the benchmark Done to circumvent criterion limitation regarding automatic truncation of long benchmark ID. Using a println() call we ensure the complete name is displayed before benchmark execution to ease manual parsing and debugging. --- tfhe-benchmark/benches/boolean/bench.rs | 7 ++++ .../benches/core_crypto/ks_bench.rs | 8 +++++ .../benches/core_crypto/ks_pbs_bench.rs | 8 +++++ .../modulus_switch_noise_reduction.rs | 1 + .../benches/core_crypto/pbs128_bench.rs | 4 +++ .../benches/core_crypto/pbs_bench.rs | 12 +++++++ .../benches/high_level_api/bench.rs | 34 ++++++++++++++----- tfhe-benchmark/benches/high_level_api/dex.rs | 6 ++++ .../benches/high_level_api/erc20.rs | 6 ++++ .../benches/high_level_api/noise_squash.rs | 4 +++ tfhe-benchmark/benches/integer/aes.rs | 4 ++- tfhe-benchmark/benches/integer/aes256.rs | 4 ++- tfhe-benchmark/benches/integer/bench.rs | 27 +++++++++++++++ .../integer/glwe_packing_compression.rs | 8 +++++ .../integer/glwe_packing_compression_128b.rs | 4 +++ tfhe-benchmark/benches/integer/oprf.rs | 8 +++++ tfhe-benchmark/benches/integer/rerand.rs | 4 +++ .../benches/integer/signed_bench.rs | 23 +++++++++++++ tfhe-benchmark/benches/integer/zk_pke.rs | 12 +++++++ tfhe-benchmark/benches/shortint/bench.rs | 7 ++++ tfhe-benchmark/benches/shortint/casting.rs | 3 ++ .../shortint/glwe_packing_compression.rs | 18 +++++++--- tfhe-benchmark/benches/shortint/oprf.rs | 4 ++- 23 files changed, 200 insertions(+), 16 deletions(-) diff --git a/tfhe-benchmark/benches/boolean/bench.rs b/tfhe-benchmark/benches/boolean/bench.rs index d68fd473a..a42799821 100644 --- a/tfhe-benchmark/benches/boolean/bench.rs +++ b/tfhe-benchmark/benches/boolean/bench.rs @@ -51,30 +51,37 @@ fn benches(c: &mut Criterion, params: BooleanParameters, parameter_name: &str) { let ct3 = cks.encrypt(true); let id = format!("AND::{parameter_name}"); + println!("{id}"); bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.and(&ct1, &ct2)))); write_to_json_boolean(&id, params, parameter_name, "and"); let id = format!("NAND::{parameter_name}"); + println!("{id}"); bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.nand(&ct1, &ct2)))); write_to_json_boolean(&id, params, parameter_name, "nand"); let id = format!("OR::{parameter_name}"); + println!("{id}"); bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.or(&ct1, &ct2)))); write_to_json_boolean(&id, params, parameter_name, "or"); let id = format!("XOR::{parameter_name}"); + println!("{id}"); bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.xor(&ct1, &ct2)))); write_to_json_boolean(&id, params, parameter_name, "xor"); let id = format!("XNOR::{parameter_name}"); + println!("{id}"); bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.xnor(&ct1, &ct2)))); write_to_json_boolean(&id, params, parameter_name, "xnor"); let id = format!("NOT::{parameter_name}"); + println!("{id}"); bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.not(&ct1)))); write_to_json_boolean(&id, params, parameter_name, "not"); let id = format!("MUX::{parameter_name}"); + println!("{id}"); bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.mux(&ct1, &ct2, &ct3)))); write_to_json_boolean(&id, params, parameter_name, "mux"); } diff --git a/tfhe-benchmark/benches/core_crypto/ks_bench.rs b/tfhe-benchmark/benches/core_crypto/ks_bench.rs index 24270e9f1..75bc33f70 100644 --- a/tfhe-benchmark/benches/core_crypto/ks_bench.rs +++ b/tfhe-benchmark/benches/core_crypto/ks_bench.rs @@ -73,6 +73,7 @@ fn keyswitch + Serialize>( ); bench_id = format!("{bench_name}::{name}"); + println!("{bench_id}"); { bench_group.bench_function(&bench_id, |b| { b.iter(|| { @@ -84,6 +85,7 @@ fn keyswitch + Serialize>( } BenchmarkType::Throughput => { bench_id = format!("{bench_name}::throughput::{name}"); + println!("{bench_id}"); let blocks: usize = 1; let elements = throughput_num_threads(blocks, 1); // FIXME This number of element do not staturate the target machine bench_group.throughput(Throughput::Elements(elements)); @@ -231,6 +233,7 @@ fn packing_keyswitch( ); bench_id = format!("{bench_name}::{name}"); + println!("{bench_id}"); { bench_group.bench_function(&bench_id, |b| { b.iter(|| { @@ -242,6 +245,7 @@ fn packing_keyswitch( } BenchmarkType::Throughput => { bench_id = format!("{bench_name}::throughput::{name}"); + println!("{bench_id}"); let blocks: usize = 1; let elements = throughput_num_threads(blocks, 1); bench_group.throughput(Throughput::Elements(elements)); @@ -414,6 +418,7 @@ mod cuda { let cuda_indexes = CudaIndexes::new(&h_indexes, &streams, 0); bench_id = format!("{bench_name}::{name}"); + println!("{bench_id}"); { bench_group.bench_function(&bench_id, |b| { b.iter(|| { @@ -435,6 +440,7 @@ mod cuda { let gpu_count = get_number_of_gpus() as usize; bench_id = format!("{bench_name}::throughput::{name}"); + println!("{bench_id}"); let blocks: usize = 1; let elements = throughput_num_threads(blocks, 1); let elements_per_stream = elements as usize / gpu_count; @@ -644,6 +650,7 @@ mod cuda { streams.synchronize(); bench_id = format!("{bench_name}::{name}"); + println!("{bench_id}"); { bench_group.bench_function(&bench_id, |b| { b.iter(|| { @@ -663,6 +670,7 @@ mod cuda { let gpu_count = get_number_of_gpus() as usize; bench_id = format!("{bench_name}::throughput::{name}"); + println!("{bench_id}"); let mem_size = get_packing_keyswitch_list_64_size_on_gpu( &CudaStreams::new_single_gpu(GpuIndex::new(0)), diff --git a/tfhe-benchmark/benches/core_crypto/ks_pbs_bench.rs b/tfhe-benchmark/benches/core_crypto/ks_pbs_bench.rs index a57f8184f..2b428b4c3 100644 --- a/tfhe-benchmark/benches/core_crypto/ks_pbs_bench.rs +++ b/tfhe-benchmark/benches/core_crypto/ks_pbs_bench.rs @@ -111,6 +111,7 @@ fn ks_pbs + Serialize>( ); bench_id = format!("{bench_name}::{name}"); + println!("{bench_id}"); { bench_group.bench_function(&bench_id, |b| { b.iter(|| { @@ -134,6 +135,7 @@ fn ks_pbs + Serialize>( } BenchmarkType::Throughput => { bench_id = format!("{bench_name}::throughput::{name}"); + println!("{bench_id}"); let blocks: usize = 1; let elements = throughput_num_threads(blocks, 1); println!("Number of elements: {elements}"); // DEBUG @@ -370,6 +372,7 @@ fn multi_bit_ks_pbs< ); bench_id = format!("{bench_name}::{name}::parallelized"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { keyswitch_lwe_ciphertext( @@ -391,6 +394,7 @@ fn multi_bit_ks_pbs< } BenchmarkType::Throughput => { bench_id = format!("{bench_name}::throughput::{name}"); + println!("{bench_id}"); let blocks: usize = 1; let elements = throughput_num_threads(blocks, 1); println!("Number of elements: {elements}"); // DEBUG @@ -621,6 +625,7 @@ mod cuda { let cuda_indexes = CudaIndexes::new(&h_indexes, &streams, 0); bench_id = format!("{bench_name}::{name}"); + println!("{bench_id}"); { bench_group.bench_function(&bench_id, |b| { b.iter(|| { @@ -652,6 +657,7 @@ mod cuda { let gpu_count = get_number_of_gpus() as usize; bench_id = format!("{bench_name}::throughput::{name}"); + println!("{bench_id}"); let blocks: usize = 1; let elements = throughput_num_threads(blocks, 1); let elements_per_stream = elements as usize / gpu_count; @@ -929,6 +935,7 @@ mod cuda { let cuda_indexes = CudaIndexes::new(&h_indexes, &streams, 0); bench_id = format!("{bench_name}::{name}"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { cuda_keyswitch_lwe_ciphertext( @@ -958,6 +965,7 @@ mod cuda { let gpu_count = get_number_of_gpus() as usize; bench_id = format!("{bench_name}::throughput::{name}"); + println!("{bench_id}"); let blocks: usize = 1; let elements = throughput_num_threads(blocks, 1); let elements_per_stream = elements as usize / gpu_count; diff --git a/tfhe-benchmark/benches/core_crypto/modulus_switch_noise_reduction.rs b/tfhe-benchmark/benches/core_crypto/modulus_switch_noise_reduction.rs index 4a70440b4..a35d8f68b 100644 --- a/tfhe-benchmark/benches/core_crypto/modulus_switch_noise_reduction.rs +++ b/tfhe-benchmark/benches/core_crypto/modulus_switch_noise_reduction.rs @@ -62,6 +62,7 @@ fn modulus_switch_noise_reduction(c: &mut Criterion) { .measurement_time(std::time::Duration::from_secs(5)); let bench_name = format!("modulus_switch_noise_reduction_{count}"); + println!("{bench_name}"); bench_group.bench_function(&bench_name, |b| { b.iter(|| { diff --git a/tfhe-benchmark/benches/core_crypto/pbs128_bench.rs b/tfhe-benchmark/benches/core_crypto/pbs128_bench.rs index 3c7503252..528fb7137 100644 --- a/tfhe-benchmark/benches/core_crypto/pbs128_bench.rs +++ b/tfhe-benchmark/benches/core_crypto/pbs128_bench.rs @@ -296,6 +296,7 @@ mod cuda { CudaLweCiphertextList::from_lwe_ciphertext(&out_pbs_ct, &streams); bench_id = format!("{bench_name}::{params_name}"); + println!("{bench_id}"); { bench_group.bench_function(&bench_id, |b| { b.iter(|| { @@ -317,6 +318,7 @@ mod cuda { let gpu_count = get_number_of_gpus() as usize; bench_id = format!("{bench_name}::throughput::{params_name}"); + println!("{bench_id}"); let blocks: usize = 1; let elements = throughput_num_threads(blocks, 1); let elements_per_stream = elements as usize / gpu_count; @@ -541,6 +543,7 @@ mod cuda { let cuda_indexes = CudaIndexes::new(&h_indexes, &streams, 0); bench_id = format!("{bench_name}::{params_name}"); + println!("{bench_id}"); { bench_group.bench_function(&bench_id, |b| { b.iter(|| { @@ -564,6 +567,7 @@ mod cuda { let gpu_count = get_number_of_gpus() as usize; bench_id = format!("{bench_name}::throughput::{params_name}"); + println!("{bench_id}"); let blocks: usize = 1; let elements = throughput_num_threads(blocks, 1); let elements_per_stream = elements as usize / gpu_count; diff --git a/tfhe-benchmark/benches/core_crypto/pbs_bench.rs b/tfhe-benchmark/benches/core_crypto/pbs_bench.rs index 4e8044bd3..b328ffd1a 100644 --- a/tfhe-benchmark/benches/core_crypto/pbs_bench.rs +++ b/tfhe-benchmark/benches/core_crypto/pbs_bench.rs @@ -97,6 +97,7 @@ fn mem_optimized_pbs + Serialize>( ); bench_id = format!("{bench_name}::{name}"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { @@ -114,6 +115,7 @@ fn mem_optimized_pbs + Serialize>( } BenchmarkType::Throughput => { bench_id = format!("{bench_name}::throughput::{name}"); + println!("{bench_id}"); let blocks: usize = 1; let elements = throughput_num_threads(blocks, 1); bench_group.throughput(Throughput::Elements(elements)); @@ -326,6 +328,7 @@ fn mem_optimized_batched_pbs + Serialize ); bench_id = format!("{bench_name}::{name}"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { batch_programmable_bootstrap_lwe_ciphertext_mem_optimized( @@ -342,6 +345,7 @@ fn mem_optimized_batched_pbs + Serialize } BenchmarkType::Throughput => { bench_id = format!("{bench_name}::throughput::{name}"); + println!("{bench_id}"); let blocks: usize = 1; let elements = throughput_num_threads(blocks, 1); bench_group.throughput(Throughput::Elements(elements)); @@ -552,6 +556,7 @@ fn multi_bit_pbs< ); bench_id = format!("{bench_name}::{name}::parallelized"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { multi_bit_programmable_bootstrap_lwe_ciphertext( @@ -568,6 +573,7 @@ fn multi_bit_pbs< } BenchmarkType::Throughput => { bench_id = format!("{bench_name}::throughput::{name}"); + println!("{bench_id}"); let blocks: usize = 1; let elements = throughput_num_threads(blocks, 1); bench_group.throughput(Throughput::Elements(elements)); @@ -779,6 +785,7 @@ fn mem_optimized_pbs_ntt(c: &mut Criterion) { buffers.resize(stack_size); bench_id = format!("{bench_name}::{name}"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { programmable_bootstrap_ntt64_lwe_ciphertext_mem_optimized( @@ -795,6 +802,7 @@ fn mem_optimized_pbs_ntt(c: &mut Criterion) { } BenchmarkType::Throughput => { bench_id = format!("{bench_name}::throughput::{name}"); + println!("{bench_id}"); let blocks: usize = 1; let elements = throughput_num_threads(blocks, 1); bench_group.throughput(Throughput::Elements(elements)); @@ -1020,6 +1028,7 @@ mod cuda { let cuda_indexes = CudaIndexes::new(&h_indexes, &streams, 0); bench_id = format!("{bench_name}::{name}"); + println!("{bench_id}"); { bench_group.bench_function(&bench_id, |b| { b.iter(|| { @@ -1043,6 +1052,7 @@ mod cuda { let gpu_count = get_number_of_gpus() as usize; bench_id = format!("{bench_name}::throughput::{name}"); + println!("{bench_id}"); let blocks: usize = 1; let elements = throughput_num_threads(blocks, 1); let elements_per_stream = elements as usize / gpu_count; @@ -1280,6 +1290,7 @@ mod cuda { let cuda_indexes = CudaIndexes::new(&h_indexes, &streams, 0); bench_id = format!("{bench_name}::{name}"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { cuda_multi_bit_programmable_bootstrap_lwe_ciphertext( @@ -1301,6 +1312,7 @@ mod cuda { let gpu_count = get_number_of_gpus() as usize; bench_id = format!("{bench_name}::throughput::{name}"); + println!("{bench_id}"); let blocks: usize = 1; let elements = throughput_num_threads(blocks, 1); let elements_per_stream = elements as usize / gpu_count; diff --git a/tfhe-benchmark/benches/high_level_api/bench.rs b/tfhe-benchmark/benches/high_level_api/bench.rs index da02a07b1..7291132d7 100644 --- a/tfhe-benchmark/benches/high_level_api/bench.rs +++ b/tfhe-benchmark/benches/high_level_api/bench.rs @@ -70,6 +70,7 @@ fn bench_fhe_type( let mut bench_id; bench_id = format!("{bench_prefix}::add::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { let res = &lhs + &rhs; @@ -80,6 +81,7 @@ fn bench_fhe_type( write_record(bench_id, "add"); bench_id = format!("{bench_prefix}::overflowing_add::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { let (res, flag) = lhs.overflowing_add(&rhs); @@ -90,6 +92,7 @@ fn bench_fhe_type( write_record(bench_id, "overflowing_add"); bench_id = format!("{bench_prefix}::overflowing_sub::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { let (res, flag) = lhs.overflowing_sub(&rhs); @@ -100,6 +103,7 @@ fn bench_fhe_type( write_record(bench_id, "overflowing_sub"); bench_id = format!("{bench_prefix}::sub::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { let res = &lhs - &rhs; @@ -110,6 +114,7 @@ fn bench_fhe_type( write_record(bench_id, "sub"); bench_id = format!("{bench_prefix}::mul::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { let res = &lhs * &rhs; @@ -120,6 +125,7 @@ fn bench_fhe_type( write_record(bench_id, "mul"); bench_id = format!("{bench_prefix}::bitand::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { let res = &lhs & &rhs; @@ -130,6 +136,7 @@ fn bench_fhe_type( write_record(bench_id, "bitand"); bench_id = format!("{bench_prefix}::bitor::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { let res = &lhs | &rhs; @@ -140,6 +147,7 @@ fn bench_fhe_type( write_record(bench_id, "bitor"); bench_id = format!("{bench_prefix}::bitxor::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { let res = &lhs ^ &rhs; @@ -150,6 +158,7 @@ fn bench_fhe_type( write_record(bench_id, "bitxor"); bench_id = format!("{bench_prefix}::left_shift::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { let res = &lhs << &rhs; @@ -160,6 +169,7 @@ fn bench_fhe_type( write_record(bench_id, "left_shift"); bench_id = format!("{bench_prefix}::right_shift::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { let res = &lhs >> &rhs; @@ -170,6 +180,7 @@ fn bench_fhe_type( write_record(bench_id, "right_shift"); bench_id = format!("{bench_prefix}::left_rotate::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { let res = (&lhs).rotate_left(&rhs); @@ -180,6 +191,7 @@ fn bench_fhe_type( write_record(bench_id, "left_rotate"); bench_id = format!("{bench_prefix}::right_rotate::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { let res = (&lhs).rotate_right(&rhs); @@ -190,6 +202,7 @@ fn bench_fhe_type( write_record(bench_id, "right_rotate"); bench_id = format!("{bench_prefix}::min::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { let res = lhs.min(&rhs); @@ -200,6 +213,7 @@ fn bench_fhe_type( write_record(bench_id, "min"); bench_id = format!("{bench_prefix}::max::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { let res = lhs.max(&rhs); @@ -294,12 +308,14 @@ where let mut kv_store = KVStore::new(); let mut rng = rand::thread_rng(); - let format_id_bench = |op_name: &str| -> String { - format!( + let format_and_print_bench_id = |op_name: &str| -> String { + let bench_id = format!( "KVStore::<{}, {}>::{op_name}/{num_elements}", TypeDisplayer::::default(), TypeDisplayer::::default(), - ) + ); + println!("{bench_id}"); + bench_id }; match BenchmarkType::from_env().unwrap() { @@ -318,19 +334,19 @@ where let value = rng.gen::(); let value_to_add = Value::encrypt(value, cks); - c.bench_function(&format_id_bench("Get"), |b| { + c.bench_function(&format_and_print_bench_id("Get"), |b| { b.iter(|| { let _ = kv_store.get(&encrypted_key); }) }); - c.bench_function(&format_id_bench("Update"), |b| { + c.bench_function(&format_and_print_bench_id("Update"), |b| { b.iter(|| { let _ = kv_store.update(&encrypted_key, &value_to_add); }) }); - c.bench_function(&format_id_bench("Map"), |b| { + c.bench_function(&format_and_print_bench_id("Map"), |b| { b.iter(|| { kv_store.map(&encrypted_key, |v| v); }) @@ -367,7 +383,7 @@ where let mut group = c.benchmark_group("KVStore Throughput"); group.throughput(Throughput::Elements(kv_stores.len() as u64)); - group.bench_function(format_id_bench("Map"), |b| { + group.bench_function(format_and_print_bench_id("Map"), |b| { b.iter(|| { kv_stores.par_iter_mut().for_each(|kv_store| { kv_store.map(&encrypted_key, |v| v); @@ -375,7 +391,7 @@ where }) }); - group.bench_function(format_id_bench("Update"), |b| { + group.bench_function(format_and_print_bench_id("Update"), |b| { b.iter(|| { kv_stores.par_iter_mut().for_each(|kv_store| { kv_store.update(&encrypted_key, &value_to_add); @@ -383,7 +399,7 @@ where }) }); - group.bench_function(format_id_bench("Get"), |b| { + group.bench_function(format_and_print_bench_id("Get"), |b| { b.iter(|| { kv_stores.par_iter_mut().for_each(|kv_store| { kv_store.get(&encrypted_key); diff --git a/tfhe-benchmark/benches/high_level_api/dex.rs b/tfhe-benchmark/benches/high_level_api/dex.rs index f802a38d7..7f981cc99 100644 --- a/tfhe-benchmark/benches/high_level_api/dex.rs +++ b/tfhe-benchmark/benches/high_level_api/dex.rs @@ -483,6 +483,7 @@ fn bench_swap_request_latency( let params_name = params.name(); let bench_id = format!("{bench_name}::{fn_name}::{type_name}"); + println!("{bench_id}"); c.bench_function(&bench_id, |b| { let mut rng = thread_rng(); @@ -556,6 +557,7 @@ fn bench_swap_request_throughput( let bench_id = format!( "{bench_name}::throughput::{fn_name}::{params_name}::{type_name}::{num_elems}_elems" ); + println!("{bench_id}"); group.bench_with_input(&bench_id, &num_elems, |b, &num_elems| { let from_balances_0 = (0..num_elems) .map(|_| FheType::encrypt(rng.gen::(), client_key)) @@ -679,6 +681,7 @@ fn cuda_bench_swap_request_throughput( let bench_id = format!( "{bench_name}::throughput::{fn_name}::{params_name}::{type_name}::{num_elems}_elems" ); + println!("{bench_id}"); group.bench_with_input(&bench_id, &num_elems, |b, &num_elems| { let from_balances_0 = (0..num_elems) .map(|_| FheType::encrypt(rng.gen::(), client_key)) @@ -881,6 +884,7 @@ fn bench_swap_claim_latency( let params_name = params.name(); let bench_id = format!("{bench_name}::{fn_name}::{params_name}::{type_name}"); + println!("{bench_id}"); c.bench_function(&bench_id, |b| { let mut rng = thread_rng(); @@ -960,6 +964,7 @@ fn bench_swap_claim_throughput( let bench_id = format!( "{bench_name}::throughput::{fn_name}::{params_name}::{type_name}::{num_elems}_elems" ); + println!("{bench_id}"); group.bench_with_input(&bench_id, &num_elems, |b, &num_elems| { let pending_0_in = (0..num_elems) .map(|_| FheType::encrypt(rng.gen::(), client_key)) @@ -1101,6 +1106,7 @@ fn cuda_bench_swap_claim_throughput( let bench_id = format!( "{bench_name}::throughput::{fn_name}::{params_name}::{type_name}::{num_elems}_elems" ); + println!("{bench_id}"); group.bench_with_input(&bench_id, &num_elems, |b, &num_elems| { let pending_0_in = (0..num_elems) .map(|_| FheType::encrypt(rng.gen::(), client_key)) diff --git a/tfhe-benchmark/benches/high_level_api/erc20.rs b/tfhe-benchmark/benches/high_level_api/erc20.rs index 04bd76016..fff08c929 100644 --- a/tfhe-benchmark/benches/high_level_api/erc20.rs +++ b/tfhe-benchmark/benches/high_level_api/erc20.rs @@ -356,6 +356,7 @@ fn bench_transfer_latency( let params_name = params.name(); let bench_id = format!("{bench_name}::{fn_name}::{params_name}::{type_name}"); + println!("{bench_id}"); c.bench_function(&bench_id, |b| { let mut rng = thread_rng(); @@ -409,6 +410,7 @@ fn bench_transfer_latency_simd( let params_name = params.name(); let bench_id = format!("{bench_name}::{fn_name}::{params_name}::{type_name}"); + println!("{bench_id}"); c.bench_function(&bench_id, |b| { let mut rng = thread_rng(); @@ -466,6 +468,7 @@ fn bench_transfer_throughput( let bench_id = format!( "{bench_name}::throughput::{fn_name}::{params_name}::{type_name}::{num_elems}_elems" ); + println!("{bench_id}"); group.bench_with_input(&bench_id, &num_elems, |b, &num_elems| { let from_amounts = (0..num_elems) .map(|_| FheType::encrypt(rng.gen::(), client_key)) @@ -529,6 +532,7 @@ fn cuda_bench_transfer_throughput( let bench_id = format!( "{bench_name}::throughput::{fn_name}::{params_name}::{type_name}::{num_elems}_elems" ); + println!("{bench_id}"); group.bench_with_input(&bench_id, &num_elems, |b, &num_elems| { let from_amounts = (0..num_elems) .map(|_| FheType::encrypt(rng.gen::(), client_key)) @@ -610,6 +614,7 @@ fn hpu_bench_transfer_throughput( let bench_id = format!( "{bench_name}::throughput::{fn_name}::{params_name}::{type_name}::{num_elems}_elems" ); + println!("{bench_id}"); group.bench_with_input(&bench_id, &num_elems, |b, &num_elems| { let from_amounts = (0..num_elems) .map(|_| FheType::encrypt(rng.gen::(), client_key)) @@ -683,6 +688,7 @@ fn hpu_bench_transfer_throughput_simd( group.throughput(Throughput::Elements(real_num_elems)); let bench_id = format!("{bench_name}::throughput::{fn_name}::{params_name}::{type_name}::{real_num_elems}_elems"); + println!("{bench_id}"); group.bench_with_input(&bench_id, &num_elems, |b, &num_elems| { let from_amounts = (0..num_elems) .map(|_| { diff --git a/tfhe-benchmark/benches/high_level_api/noise_squash.rs b/tfhe-benchmark/benches/high_level_api/noise_squash.rs index 3491d85ad..0569f860f 100644 --- a/tfhe-benchmark/benches/high_level_api/noise_squash.rs +++ b/tfhe-benchmark/benches/high_level_api/noise_squash.rs @@ -91,6 +91,7 @@ fn bench_sns_only_fhe_type( match get_bench_type() { BenchmarkType::Latency => { bench_id = format!("{bench_id_prefix}::{bench_id_suffix}"); + println!("{bench_id}"); #[cfg(feature = "gpu")] configure_gpu(&client_key); @@ -105,6 +106,7 @@ fn bench_sns_only_fhe_type( } BenchmarkType::Throughput => { bench_id = format!("{bench_id_prefix}::throughput::{bench_id_suffix}"); + println!("{bench_id}"); let params = client_key.computation_parameters(); let num_blocks = num_bits .div_ceil((params.message_modulus().0 * params.carry_modulus().0).ilog2() as usize); @@ -233,6 +235,7 @@ fn bench_decomp_sns_comp_fhe_type( match get_bench_type() { BenchmarkType::Latency => { bench_id = format!("{bench_id_prefix}::{bench_id_suffix}"); + println!("{bench_id}"); #[cfg(feature = "gpu")] configure_gpu(&client_key); @@ -255,6 +258,7 @@ fn bench_decomp_sns_comp_fhe_type( } BenchmarkType::Throughput => { bench_id = format!("{bench_id_prefix}::throughput::{bench_id_suffix}"); + println!("{bench_id}"); let params = client_key.computation_parameters(); let num_blocks = num_bits .div_ceil((params.message_modulus().0 * params.carry_modulus().0).ilog2() as usize); diff --git a/tfhe-benchmark/benches/integer/aes.rs b/tfhe-benchmark/benches/integer/aes.rs index a842f1a12..14c3f79a1 100644 --- a/tfhe-benchmark/benches/integer/aes.rs +++ b/tfhe-benchmark/benches/integer/aes.rs @@ -44,6 +44,7 @@ pub mod cuda { const NUM_AES_INPUTS: usize = 1; const SBOX_PARALLELISM: usize = 16; let bench_id = format!("{param_name}::{NUM_AES_INPUTS}_input_encryption"); + println!("{bench_id}"); let round_keys = sks.key_expansion(&d_key, &streams); @@ -73,7 +74,7 @@ pub mod cuda { { let bench_id = format!("{param_name}::key_expansion"); - + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { black_box(sks.key_expansion(&d_key, &streams)); @@ -109,6 +110,7 @@ pub mod cuda { let round_keys = sks.key_expansion(&d_key, &streams); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { black_box(sks.aes_encrypt( diff --git a/tfhe-benchmark/benches/integer/aes256.rs b/tfhe-benchmark/benches/integer/aes256.rs index 60f88f0b6..4fff52a4d 100644 --- a/tfhe-benchmark/benches/integer/aes256.rs +++ b/tfhe-benchmark/benches/integer/aes256.rs @@ -48,6 +48,7 @@ pub mod cuda { const NUM_AES_INPUTS: usize = 1; const SBOX_PARALLELISM: usize = 16; let bench_id = format!("{param_name}::{NUM_AES_INPUTS}_input_encryption"); + println!("{bench_id}"); let round_keys = sks.key_expansion_256(&d_key, &streams); @@ -77,7 +78,7 @@ pub mod cuda { { let bench_id = format!("{param_name}::key_expansion"); - + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { black_box(sks.key_expansion_256(&d_key, &streams)); @@ -114,6 +115,7 @@ pub mod cuda { let round_keys = sks.key_expansion_256(&d_key, &streams); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { black_box(sks.aes_256_encrypt( diff --git a/tfhe-benchmark/benches/integer/bench.rs b/tfhe-benchmark/benches/integer/bench.rs index ae565828c..259473130 100644 --- a/tfhe-benchmark/benches/integer/bench.rs +++ b/tfhe-benchmark/benches/integer/bench.rs @@ -55,6 +55,7 @@ fn bench_server_key_binary_function_dirty_inputs( let keys = LazyCell::new(move || KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix)); let bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (cks, sks) = (&keys.0, &keys.1); @@ -138,6 +139,7 @@ fn bench_server_key_binary_function_clean_inputs( }); bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (sks, ct_0, ct_1) = (&bench_data.0, &bench_data.1, &bench_data.2); b.iter(|| { @@ -159,6 +161,7 @@ fn bench_server_key_binary_function_clean_inputs( let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group .sample_size(10) .measurement_time(std::time::Duration::from_secs(30)); @@ -229,6 +232,7 @@ fn bench_server_key_unary_function_dirty_inputs( let keys = LazyCell::new(move || KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix)); let bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (cks, sks) = (&keys.0, &keys.1); @@ -307,6 +311,7 @@ fn bench_server_key_unary_function_clean_inputs( }); bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (sks, ct_0) = (&bench_data.0, &bench_data.1); @@ -327,6 +332,7 @@ fn bench_server_key_unary_function_clean_inputs( let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group .sample_size(10) .measurement_time(std::time::Duration::from_secs(30)); @@ -389,6 +395,7 @@ fn bench_server_key_binary_scalar_function_dirty_inputs( let keys = LazyCell::new(move || KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix)); let bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (cks, sks) = (&keys.0, &keys.1); @@ -476,6 +483,7 @@ fn bench_server_key_binary_scalar_function_clean_inputs( }); bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits_scalar_{bit_size}"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (sks, ct_0, clear_1) = (&bench_data.0, &bench_data.1, bench_data.2); @@ -497,6 +505,7 @@ fn bench_server_key_binary_scalar_function_clean_inputs( let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group .sample_size(10) .measurement_time(std::time::Duration::from_secs(30)); @@ -605,6 +614,7 @@ fn if_then_else_parallelized(c: &mut Criterion) { }); bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (sks, condition, true_ct, false_ct) = (&bench_data.0, &bench_data.1, &bench_data.2, &bench_data.3); @@ -629,6 +639,7 @@ fn if_then_else_parallelized(c: &mut Criterion) { let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group .sample_size(10) .measurement_time(std::time::Duration::from_secs(30)); @@ -713,6 +724,7 @@ fn flip_parallelized(c: &mut Criterion) { }); bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (sks, condition, true_ct, false_ct) = (&bench_data.0, &bench_data.1, &bench_data.2, &bench_data.3); @@ -737,6 +749,7 @@ fn flip_parallelized(c: &mut Criterion) { let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group .sample_size(10) .measurement_time(std::time::Duration::from_secs(30)); @@ -826,6 +839,7 @@ fn ciphertexts_sum_parallelized(c: &mut Criterion) { }); bench_id = format!("{bench_name}_{len}_ctxts::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (sks, ctxts) = (&bench_data.0, &bench_data.1); @@ -855,6 +869,7 @@ fn ciphertexts_sum_parallelized(c: &mut Criterion) { bench_id = format!( "{bench_name}_{len}_ctxts::throughput::{param_name}::{bit_size}_bits" ); + println!("{bench_id}"); bench_group .sample_size(10) .measurement_time(std::time::Duration::from_secs(30)); @@ -1531,6 +1546,7 @@ mod cuda { BenchmarkType::Latency => { let streams = CudaStreams::new_multi_gpu(); bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (cks, _cpu_sks) = @@ -1565,6 +1581,7 @@ mod cuda { let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group .sample_size(10) .measurement_time(std::time::Duration::from_secs(30)); @@ -1649,6 +1666,7 @@ mod cuda { BenchmarkType::Latency => { let streams = CudaStreams::new_multi_gpu(); bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (cks, _cpu_sks) = @@ -1692,6 +1710,7 @@ mod cuda { let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group .sample_size(10) .measurement_time(std::time::Duration::from_secs(30)); @@ -1796,6 +1815,7 @@ mod cuda { .measurement_time(std::time::Duration::from_secs(30)); bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits_scalar_{bit_size}"); // FIXME it makes no sense to duplicate `bit_size` + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (cks, _cpu_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); @@ -1841,6 +1861,7 @@ mod cuda { bench_id = format!( "{bench_name}::throughput::{param_name}::{bit_size}_bits_scalar_{bit_size}" ); + println!("{bench_id}"); let elements = throughput_num_threads(num_block, pbs_count); bench_group.throughput(Throughput::Elements(elements)); bench_group.bench_function(&bench_id, |b| { @@ -1922,6 +1943,7 @@ mod cuda { let stream = CudaStreams::new_multi_gpu(); bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (cks, _cpu_sks) = @@ -1972,6 +1994,7 @@ mod cuda { let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group .sample_size(10) .measurement_time(std::time::Duration::from_secs(30)); @@ -2890,6 +2913,7 @@ mod cuda { target_num_blocks * param.message_modulus().0.ilog2() as usize; let bench_id = format!("{bench_name}::{param_name}::{bit_size}_to_{target_bit_size}"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (cks, _sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); let gpu_sks = CudaServerKey::new(&cks, &stream); @@ -2994,6 +3018,7 @@ mod hpu { match get_bench_type() { BenchmarkType::Latency => { bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (cks, _sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); let hpu_device_mutex = KEY_CACHE.get_hpu_device(param); @@ -3046,6 +3071,7 @@ mod hpu { } BenchmarkType::Throughput => { bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group .sample_size(10) .measurement_time(std::time::Duration::from_secs(120)); @@ -3675,6 +3701,7 @@ fn bench_server_key_cast_function( for target_num_blocks in all_num_blocks.iter().copied() { let target_bit_size = target_num_blocks * param.message_modulus().0.ilog2() as usize; let bench_id = format!("{bench_name}::{param_name}::{bit_size}_to_{target_bit_size}"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); diff --git a/tfhe-benchmark/benches/integer/glwe_packing_compression.rs b/tfhe-benchmark/benches/integer/glwe_packing_compression.rs index 199ef2886..1ec614f75 100644 --- a/tfhe-benchmark/benches/integer/glwe_packing_compression.rs +++ b/tfhe-benchmark/benches/integer/glwe_packing_compression.rs @@ -54,6 +54,7 @@ fn cpu_glwe_packing(c: &mut Criterion) { builder.push(ct); bench_id_pack = format!("{bench_name}::pack_u{bit_size}"); + println!("{bench_id_pack}"); bench_group.bench_function(&bench_id_pack, |b| { b.iter(|| { let compressed = builder.build(&compression_key); @@ -65,6 +66,7 @@ fn cpu_glwe_packing(c: &mut Criterion) { let compressed = builder.build(&compression_key); bench_id_unpack = format!("{bench_name}::unpack_u{bit_size}"); + println!("{bench_id_unpack}"); bench_group.bench_function(&bench_id_unpack, |b| { b.iter(|| { let unpacked: RadixCiphertext = @@ -105,6 +107,7 @@ fn cpu_glwe_packing(c: &mut Criterion) { .collect::>(); bench_id_pack = format!("{bench_name}::throughput::pack_u{bit_size}"); + println!("{bench_id_pack}"); bench_group.bench_function(&bench_id_pack, |b| { b.iter(|| { builders.par_iter().for_each(|builder| { @@ -119,6 +122,7 @@ fn cpu_glwe_packing(c: &mut Criterion) { .collect::>(); bench_id_unpack = format!("{bench_name}::throughput::unpack_u{bit_size}"); + println!("{bench_id_unpack}"); bench_group.bench_function(&bench_id_unpack, |b| { b.iter(|| { compressed.par_iter().for_each(|comp| { @@ -225,6 +229,7 @@ mod cuda { builder.push(d_ct, &stream); bench_id_pack = format!("{bench_name}::pack_u{bit_size}"); + println!("{bench_id_pack}"); bench_group.bench_function(&bench_id_pack, |b| { b.iter(|| { let compressed = builder.build(&cuda_compression_key, &stream); @@ -250,6 +255,7 @@ mod cuda { let local_streams = cuda_local_streams(num_block, elements as usize); bench_id_pack = format!("{bench_name}::throughput::pack_u{bit_size}"); + println!("{bench_id_pack}"); let cuda_compression_key_vec = (0..get_number_of_gpus()) .into_par_iter() .map(|i| { @@ -353,6 +359,7 @@ mod cuda { let compressed = builder.build(&cuda_compression_key, &stream); bench_id_unpack = format!("{bench_name}::unpack_u{bit_size}"); + println!("{bench_id_unpack}"); bench_group.bench_function(&bench_id_unpack, |b| { b.iter(|| { let unpacked: CudaUnsignedRadixCiphertext = compressed @@ -381,6 +388,7 @@ mod cuda { let local_streams = cuda_local_streams(num_block, elements as usize); bench_id_unpack = format!("{bench_name}::throughput::unpack_u{bit_size}"); + println!("{bench_id_unpack}"); let builders = (0..elements) .into_par_iter() .map(|i| { diff --git a/tfhe-benchmark/benches/integer/glwe_packing_compression_128b.rs b/tfhe-benchmark/benches/integer/glwe_packing_compression_128b.rs index 8fd92a595..b9c1f5c6a 100644 --- a/tfhe-benchmark/benches/integer/glwe_packing_compression_128b.rs +++ b/tfhe-benchmark/benches/integer/glwe_packing_compression_128b.rs @@ -89,6 +89,7 @@ mod cuda { builder.push(d_ns_ct, &stream); bench_id_pack = format!("{bench_name}::pack_u{bit_size}"); + println!("{bench_id_pack}"); bench_group.bench_function(&bench_id_pack, |b| { b.iter(|| { let compressed = @@ -157,6 +158,7 @@ mod cuda { .collect::>(); bench_id_pack = format!("{bench_name}::throughput::pack_u{bit_size}"); + println!("{bench_id_pack}"); bench_group.bench_function(&bench_id_pack, |b| { b.iter(|| { builders.par_iter().for_each( @@ -233,6 +235,7 @@ mod cuda { let compressed = builder.build(&cuda_noise_squashing_compression_key, &stream); bench_id_unpack = format!("{bench_name}::unpack_u{bit_size}"); + println!("{bench_id_unpack}"); bench_group.bench_function(&bench_id_unpack, |b| { b.iter(|| { let unpacked: CudaSquashedNoiseRadixCiphertext = @@ -310,6 +313,7 @@ mod cuda { .collect::>(); bench_id_unpack = format!("{bench_name}::throughput::unpack_u{bit_size}"); + println!("{bench_id_unpack}"); bench_group.bench_function(&bench_id_unpack, |b| { b.iter(|| { compressed.par_iter().enumerate().for_each(|(i, comp)| { diff --git a/tfhe-benchmark/benches/integer/oprf.rs b/tfhe-benchmark/benches/integer/oprf.rs index 244ddd118..df90aefe5 100644 --- a/tfhe-benchmark/benches/integer/oprf.rs +++ b/tfhe-benchmark/benches/integer/oprf.rs @@ -31,6 +31,7 @@ pub fn unsigned_oprf(c: &mut Criterion) { bench_id_oprf_bounded = format!("{bench_name}_bounded::{param_name}::{bit_size}_bits"); + println!("{bench_id_oprf}"); bench_group.bench_function(&bench_id_oprf, |b| { let (_, sk) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); @@ -42,6 +43,7 @@ pub fn unsigned_oprf(c: &mut Criterion) { }) }); + println!("{bench_id_oprf_bounded}"); bench_group.bench_function(&bench_id_oprf_bounded, |b| { let (_, sk) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); @@ -75,6 +77,7 @@ pub fn unsigned_oprf(c: &mut Criterion) { let elements = throughput_num_threads(num_block, pbs_count); bench_group.throughput(Throughput::Elements(elements)); + println!("{bench_id_oprf}"); bench_group.bench_function(&bench_id_oprf, |b| { b.iter(|| { (0..elements).into_par_iter().for_each(|_| { @@ -86,6 +89,7 @@ pub fn unsigned_oprf(c: &mut Criterion) { }) }); + println!("{bench_id_oprf_bounded}"); bench_group.bench_function(&bench_id_oprf_bounded, |b| { b.iter(|| { (0..elements).into_par_iter().for_each(|_| { @@ -152,6 +156,7 @@ pub mod cuda { bench_id_oprf_bounded = format!("{bench_name}_bounded::{param_name}::{bit_size}_bits"); + println!("{bench_id_oprf}"); bench_group.bench_function(&bench_id_oprf, |b| { let (cks, _cpu_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); @@ -168,6 +173,7 @@ pub mod cuda { }) }); + println!("{bench_id_oprf_bounded}"); bench_group.bench_function(&bench_id_oprf_bounded, |b| { let (cks, _cpu_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); @@ -206,6 +212,7 @@ pub mod cuda { let elements = throughput_num_threads(num_block, pbs_count); bench_group.throughput(Throughput::Elements(elements)); + println!("{bench_id_oprf}"); bench_group.bench_function(&bench_id_oprf, |b| { b.iter(|| { (0..elements).into_par_iter().for_each(|i| { @@ -221,6 +228,7 @@ pub mod cuda { }) }); + println!("{bench_id_oprf_bounded}"); bench_group.bench_function(&bench_id_oprf_bounded, |b| { b.iter(|| { (0..elements).into_par_iter().for_each(|i| { diff --git a/tfhe-benchmark/benches/integer/rerand.rs b/tfhe-benchmark/benches/integer/rerand.rs index b297a3148..e7dcd5596 100644 --- a/tfhe-benchmark/benches/integer/rerand.rs +++ b/tfhe-benchmark/benches/integer/rerand.rs @@ -69,6 +69,7 @@ fn execute_cpu_re_randomize(c: &mut Criterion, bit_size: usize) { let mut d_re_randomized = decompressed.clone(); bench_id = format!("{bench_name}::latency_u{bit_size}"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter_batched( || { @@ -116,6 +117,7 @@ fn execute_cpu_re_randomize(c: &mut Criterion, bit_size: usize) { .collect(); bench_id = format!("{bench_name}::throughput_u{bit_size}"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter_batched( || { @@ -257,6 +259,7 @@ mod cuda { let mut d_re_randomized = d_decompressed.duplicate(&streams); bench_id = format!("{bench_name}::latency_u{bit_size}"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter_batched( || { @@ -331,6 +334,7 @@ mod cuda { .collect(); bench_id = format!("{bench_name}::throughput_u{bit_size}"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter_batched( || { diff --git a/tfhe-benchmark/benches/integer/signed_bench.rs b/tfhe-benchmark/benches/integer/signed_bench.rs index 8b773eb24..92e47e593 100644 --- a/tfhe-benchmark/benches/integer/signed_bench.rs +++ b/tfhe-benchmark/benches/integer/signed_bench.rs @@ -55,6 +55,7 @@ fn bench_server_key_signed_binary_function_clean_inputs( }); bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (sks, ct_0, ct_1) = (&bench_data.0, &bench_data.1, &bench_data.2); @@ -153,6 +154,7 @@ fn bench_server_key_signed_shift_function_clean_inputs( }); bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (sks, ct_0, ct_1) = (&bench_data.0, &bench_data.1, &bench_data.2); @@ -174,6 +176,7 @@ fn bench_server_key_signed_shift_function_clean_inputs( let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); let elements = throughput_num_threads(num_block, pbs_count); bench_group.throughput(Throughput::Elements(elements)); bench_group.bench_function(&bench_id, |b| { @@ -251,6 +254,7 @@ fn bench_server_key_unary_function_clean_inputs( }); bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (sks, ct_0) = (&bench_data.0, &bench_data.1); b.iter(|| { @@ -269,6 +273,7 @@ fn bench_server_key_unary_function_clean_inputs( let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); let elements = throughput_num_threads(num_block, pbs_count); bench_group.throughput(Throughput::Elements(elements)); bench_group.bench_function(&bench_id, |b| { @@ -333,6 +338,7 @@ fn signed_if_then_else_parallelized(c: &mut Criterion) { }); bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (sks, condition, true_ct, false_ct) = (&bench_data.0, &bench_data.1, &bench_data.2, &bench_data.3); @@ -353,6 +359,7 @@ fn signed_if_then_else_parallelized(c: &mut Criterion) { let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); let elements = throughput_num_threads(num_block, pbs_count); bench_group.throughput(Throughput::Elements(elements)); bench_group.bench_function(&bench_id, |b| { @@ -858,6 +865,7 @@ fn bench_server_key_binary_scalar_function_clean_inputs( match get_bench_type() { BenchmarkType::Latency => { bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits_scalar_{bit_size}"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); @@ -894,6 +902,7 @@ fn bench_server_key_binary_scalar_function_clean_inputs( let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); let elements = throughput_num_threads(num_block, pbs_count); bench_group.throughput(Throughput::Elements(elements)); bench_group.bench_function(&bench_id, |b| { @@ -1046,6 +1055,7 @@ fn signed_flip_parallelized(c: &mut Criterion) { }); bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (sks, condition, true_ct, false_ct) = (&bench_data.0, &bench_data.1, &bench_data.2, &bench_data.3); @@ -1070,6 +1080,7 @@ fn signed_flip_parallelized(c: &mut Criterion) { let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group .sample_size(10) .measurement_time(std::time::Duration::from_secs(30)); @@ -1448,6 +1459,7 @@ fn bench_server_key_signed_cast_function( for target_num_blocks in all_num_blocks.iter().copied() { let target_bit_size = target_num_blocks * param.message_modulus().0.ilog2() as usize; let bench_id = format!("{bench_name}::{param_name}::{bit_size}_to_{target_bit_size}"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); @@ -1543,6 +1555,7 @@ mod cuda { let stream = CudaStreams::new_multi_gpu(); bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (cks, _cpu_sks) = @@ -1594,6 +1607,7 @@ mod cuda { let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); let elements = throughput_num_threads(num_block, pbs_count); bench_group.throughput(Throughput::Elements(elements)); bench_group.bench_function(&bench_id, |b| { @@ -1714,6 +1728,7 @@ mod cuda { let stream = CudaStreams::new_multi_gpu(); bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (cks, _cpu_sks) = @@ -1752,6 +1767,7 @@ mod cuda { let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); let elements = throughput_num_threads(num_block, pbs_count); bench_group.throughput(Throughput::Elements(elements)); bench_group.bench_function(&bench_id, |b| { @@ -1858,6 +1874,7 @@ mod cuda { bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits_scalar_{bit_size}"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (cks, _cpu_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); @@ -1904,6 +1921,7 @@ mod cuda { bench_id = format!( "{bench_name}::throughput::{param_name}::{bit_size}_bits_scalar_{bit_size}" ); + println!("{bench_id}"); let elements = throughput_num_threads(num_block, pbs_count); bench_group.throughput(Throughput::Elements(elements)); bench_group.bench_function(&bench_id, |b| { @@ -2020,6 +2038,7 @@ mod cuda { let streams = CudaStreams::new_multi_gpu(); bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (cks, _cpu_sks) = @@ -2071,6 +2090,7 @@ mod cuda { let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); let elements = throughput_num_threads(num_block, pbs_count); bench_group.throughput(Throughput::Elements(elements)); bench_group.bench_function(&bench_id, |b| { @@ -2185,6 +2205,7 @@ mod cuda { let stream = CudaStreams::new_multi_gpu(); bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (cks, _cpu_sks) = @@ -2235,6 +2256,7 @@ mod cuda { let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits"); + println!("{bench_id}"); let elements = throughput_num_threads(num_block, pbs_count); bench_group.throughput(Throughput::Elements(elements)); bench_group.bench_function(&bench_id, |b| { @@ -3054,6 +3076,7 @@ mod cuda { target_num_blocks * param.message_modulus().0.ilog2() as usize; let bench_id = format!("{bench_name}::{param_name}::{bit_size}_to_{target_bit_size}"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let (cks, _sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); let gpu_sks = CudaServerKey::new(&cks, &stream); diff --git a/tfhe-benchmark/benches/integer/zk_pke.rs b/tfhe-benchmark/benches/integer/zk_pke.rs index 889afb982..440f4426f 100644 --- a/tfhe-benchmark/benches/integer/zk_pke.rs +++ b/tfhe-benchmark/benches/integer/zk_pke.rs @@ -128,6 +128,7 @@ fn cpu_pke_zk_proof(c: &mut Criterion) { bench_id = format!( "{bench_name}::{param_name}_{bits}_bits_packed_{crs_size}_bits_crs_{zk_load}_ZK{zk_vers:?}" ); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let input_msg = rng.gen::(); let messages = vec![input_msg; fhe_uint_count]; @@ -149,6 +150,7 @@ fn cpu_pke_zk_proof(c: &mut Criterion) { bench_id = format!( "{bench_name}::throughput::{param_name}_{bits}_bits_packed_{crs_size}_bits_crs_{zk_load}_ZK{zk_vers:?}" ); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let messages = (0..elements) .map(|_| { @@ -340,12 +342,14 @@ fn cpu_pke_zk_verify(c: &mut Criterion, results_file: &Path) { vec![], ); + println!("{bench_id_verify}"); bench_group.bench_function(&bench_id_verify, |b| { b.iter(|| { let _ret = ct1.verify(&crs, &pk, &metadata); }); }); + println!("{bench_id_verify_and_expand}"); bench_group.bench_function(&bench_id_verify_and_expand, |b| { b.iter(|| { let _ret = ct1 @@ -386,6 +390,7 @@ fn cpu_pke_zk_verify(c: &mut Criterion, results_file: &Path) { }) .collect::>(); + println!("{bench_id_verify}"); bench_group.bench_function(&bench_id_verify, |b| { b.iter(|| { cts.par_iter().for_each(|ct1| { @@ -394,6 +399,7 @@ fn cpu_pke_zk_verify(c: &mut Criterion, results_file: &Path) { }); }); + println!("{bench_id_verify_and_expand}"); bench_group.bench_function(&bench_id_verify_and_expand, |b| { b.iter(|| { cts.par_iter().for_each(|ct1| { @@ -622,12 +628,14 @@ mod cuda { vec![], ); + println!("{bench_id_verify}"); bench_group.bench_function(&bench_id_verify, |b| { b.iter(|| { let _ret = ct1.verify(&crs, &pk, &metadata); }); }); + println!("{bench_id_expand_without_verify}"); bench_group.bench_function(&bench_id_expand_without_verify, |b| { b.iter(|| { let _ret = gpu_ct1 @@ -636,6 +644,7 @@ mod cuda { }); }); + println!("{bench_id_verify_and_expand}"); bench_group.bench_function(&bench_id_verify_and_expand, |b| { b.iter(|| { let _ret = gpu_ct1 @@ -688,6 +697,7 @@ mod cuda { }) .collect::>(); + println!("{bench_id_verify}"); bench_group.bench_function(&bench_id_verify, |b| { b.iter(|| { cts.par_iter().for_each(|ct1| { @@ -696,6 +706,7 @@ mod cuda { }); }); + println!("{bench_id_expand_without_verify}"); bench_group.bench_function(&bench_id_expand_without_verify, |b| { let setup_encrypted_values = || { let gpu_cts = cts.iter().enumerate().map(|(i, ct)| { @@ -725,6 +736,7 @@ mod cuda { }, BatchSize::SmallInput); }); + println!("{bench_id_verify_and_expand}"); bench_group.bench_function(&bench_id_verify_and_expand, |b| { let setup_encrypted_values = || { let gpu_cts = cts.iter().enumerate().map(|(i, ct)| { diff --git a/tfhe-benchmark/benches/shortint/bench.rs b/tfhe-benchmark/benches/shortint/bench.rs index 68a38fa43..8a0b976ea 100644 --- a/tfhe-benchmark/benches/shortint/bench.rs +++ b/tfhe-benchmark/benches/shortint/bench.rs @@ -34,6 +34,7 @@ fn bench_server_key_unary_function( let mut ct = cks.encrypt(clear_text); let bench_id = format!("{bench_name}::{}", param.name()); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { unary_op(sks, &mut ct); @@ -79,6 +80,7 @@ fn bench_server_key_binary_function( let mut ct_1 = cks.encrypt(clear_1); let bench_id = format!("{bench_name}::{}", param.name()); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { binary_op(sks, &mut ct_0, &mut ct_1); @@ -123,6 +125,7 @@ fn bench_server_key_binary_scalar_function( let mut ct_0 = cks.encrypt(clear_0); let bench_id = format!("{bench_name}::{}", param.name()); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { binary_op(sks, &mut ct_0, clear_1 as u8); @@ -171,6 +174,7 @@ fn bench_server_key_binary_scalar_division_function( let mut ct_0 = cks.encrypt(clear_0); let bench_id = format!("{bench_name}::{}", param.name()); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { binary_op(sks, &mut ct_0, clear_1 as u8); @@ -207,6 +211,7 @@ fn carry_extract_bench(c: &mut Criterion) { let ct_0 = cks.encrypt(clear_0); let bench_id = format!("shortint::carry_extract::{}", param.name()); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { let _ = sks.carry_extract(&ct_0); @@ -245,6 +250,7 @@ fn programmable_bootstrapping_bench(c: &mut Criterion) { let ctxt = cks.encrypt(clear_0); let bench_id = format!("shortint::programmable_bootstrap::{}", param.name()); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { @@ -288,6 +294,7 @@ fn server_key_from_compressed_key(c: &mut Criterion) { let sks_compressed = CompressedServerKey::new(keys.client_key()); let bench_id = format!("shortint::uncompress_key::{}", param.name()); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { let clone_compressed_key = || sks_compressed.clone(); diff --git a/tfhe-benchmark/benches/shortint/casting.rs b/tfhe-benchmark/benches/shortint/casting.rs index f0162ac3e..499568cb5 100644 --- a/tfhe-benchmark/benches/shortint/casting.rs +++ b/tfhe-benchmark/benches/shortint/casting.rs @@ -26,6 +26,7 @@ pub fn pack_cast_64(c: &mut Criterion) { let vec_ct = vec![client_key_1.encrypt(1); 64]; let bench_id = format!("{bench_name}_{ks_param_name}"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { let _ = (0..32) @@ -78,6 +79,7 @@ pub fn pack_cast(c: &mut Criterion) { let ct_2 = client_key_1.encrypt(1); let bench_id = format!("{bench_name}_{ks_param_name}"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { let _ = ksk.cast( @@ -118,6 +120,7 @@ pub fn cast(c: &mut Criterion) { let ct = client_key_1.encrypt(1); let bench_id = format!("{bench_name}_{ks_param_name}"); + println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter(|| { let _ = ksk.cast(&ct); diff --git a/tfhe-benchmark/benches/shortint/glwe_packing_compression.rs b/tfhe-benchmark/benches/shortint/glwe_packing_compression.rs index 78a633e46..f2f1b58d4 100644 --- a/tfhe-benchmark/benches/shortint/glwe_packing_compression.rs +++ b/tfhe-benchmark/benches/shortint/glwe_packing_compression.rs @@ -24,6 +24,8 @@ fn glwe_packing(c: &mut Criterion) { let ct: Vec<_> = (0..number_to_pack).map(|_| cks.encrypt(0)).collect(); + let bench_id = format!("{bench_name}::pack"); + println!("{bench_id}"); bench_group.bench_function("pack".to_owned(), |b| { b.iter(|| { let packed = compression_key.compress_ciphertexts_into_list(&ct); @@ -32,8 +34,10 @@ fn glwe_packing(c: &mut Criterion) { }) }); + let bench_id = format!("{bench_name}::unpack_all"); + println!("{bench_id}"); let packed = compression_key.compress_ciphertexts_into_list(&ct); - bench_group.bench_function("unpack_all".to_owned(), |b| { + bench_group.bench_function(bench_id, |b| { b.iter(|| { (0..number_to_pack).into_par_iter().for_each(|i| { let unpacked = decompression_key.unpack(&packed, i); @@ -43,7 +47,9 @@ fn glwe_packing(c: &mut Criterion) { }) }); - bench_group.bench_function("unpack_one_lwe".to_owned(), |b| { + let bench_id = format!("{bench_name}::unpack_one_lwe"); + println!("{bench_id}"); + bench_group.bench_function(bench_id, |b| { b.iter(|| { let unpacked = decompression_key.unpack(&packed, 0); @@ -51,7 +57,9 @@ fn glwe_packing(c: &mut Criterion) { }) }); - bench_group.bench_function("unpack_64b".to_owned(), |b| { + let bench_id = format!("{bench_name}::unpack_64b"); + println!("{bench_id}"); + bench_group.bench_function(bench_id, |b| { b.iter(|| { (0..32).into_par_iter().for_each(|i| { let unpacked = decompression_key.unpack(&packed, i); @@ -61,7 +69,9 @@ fn glwe_packing(c: &mut Criterion) { }) }); - bench_group.bench_function("pack_unpack".to_owned(), |b| { + let bench_id = format!("{bench_name}::pack_unpack"); + println!("{bench_id}"); + bench_group.bench_function(bench_id, |b| { b.iter(|| { let packed = compression_key.compress_ciphertexts_into_list(&ct); diff --git a/tfhe-benchmark/benches/shortint/oprf.rs b/tfhe-benchmark/benches/shortint/oprf.rs index 6e6579aa2..b6ce5d4d5 100644 --- a/tfhe-benchmark/benches/shortint/oprf.rs +++ b/tfhe-benchmark/benches/shortint/oprf.rs @@ -14,7 +14,9 @@ fn oprf(c: &mut Criterion) { let keys = KEY_CACHE.get_from_param(param); let sks = keys.server_key(); - bench_group.bench_function(format!("2-bits-oprf::{}", param.name()), |b| { + let bench_id = format!("2-bits-oprf::{}", param.name()); + println!("{bench_id}"); + bench_group.bench_function(bench_id, |b| { b.iter(|| { _ = black_box(sks.generate_oblivious_pseudo_random(Seed(0), 2)); })