mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-09 22:57:59 -05:00
@@ -44,7 +44,6 @@ pub mod cuda {
|
||||
const NUM_AES_INPUTS: usize = 1;
|
||||
const SBOX_PARALLELISM: usize = 16;
|
||||
let bench_id = format!("{param_name}::{NUM_AES_INPUTS}_input_encryption");
|
||||
println!("{bench_id}");
|
||||
|
||||
let round_keys = sks.key_expansion(&d_key, &streams);
|
||||
|
||||
@@ -74,7 +73,6 @@ pub mod cuda {
|
||||
|
||||
{
|
||||
let bench_id = format!("{param_name}::key_expansion");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
black_box(sks.key_expansion(&d_key, &streams));
|
||||
@@ -122,7 +120,6 @@ pub mod cuda {
|
||||
|
||||
let round_keys = sks.key_expansion(&d_key, &streams);
|
||||
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
black_box(sks.aes_encrypt(
|
||||
|
||||
@@ -48,7 +48,6 @@ pub mod cuda {
|
||||
const NUM_AES_INPUTS: usize = 1;
|
||||
const SBOX_PARALLELISM: usize = 16;
|
||||
let bench_id = format!("{param_name}::{NUM_AES_INPUTS}_input_encryption");
|
||||
println!("{bench_id}");
|
||||
|
||||
let round_keys = sks.key_expansion_256(&d_key, &streams);
|
||||
|
||||
@@ -78,7 +77,6 @@ pub mod cuda {
|
||||
|
||||
{
|
||||
let bench_id = format!("{param_name}::key_expansion");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
black_box(sks.key_expansion_256(&d_key, &streams));
|
||||
@@ -126,7 +124,6 @@ pub mod cuda {
|
||||
|
||||
let round_keys = sks.key_expansion_256(&d_key, &streams);
|
||||
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
black_box(sks.aes_256_encrypt(
|
||||
|
||||
@@ -55,7 +55,6 @@ fn bench_server_key_binary_function_dirty_inputs<F>(
|
||||
let keys = LazyCell::new(move || KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix));
|
||||
|
||||
let bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, sks) = (&keys.0, &keys.1);
|
||||
|
||||
@@ -139,7 +138,6 @@ fn bench_server_key_binary_function_clean_inputs<F>(
|
||||
});
|
||||
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (sks, ct_0, ct_1) = (&bench_data.0, &bench_data.1, &bench_data.2);
|
||||
b.iter(|| {
|
||||
@@ -161,7 +159,6 @@ fn bench_server_key_binary_function_clean_inputs<F>(
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group
|
||||
.sample_size(10)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
@@ -232,7 +229,6 @@ fn bench_server_key_unary_function_dirty_inputs<F>(
|
||||
let keys = LazyCell::new(move || KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix));
|
||||
|
||||
let bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, sks) = (&keys.0, &keys.1);
|
||||
|
||||
@@ -311,7 +307,6 @@ fn bench_server_key_unary_function_clean_inputs<F>(
|
||||
});
|
||||
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (sks, ct_0) = (&bench_data.0, &bench_data.1);
|
||||
|
||||
@@ -332,7 +327,6 @@ fn bench_server_key_unary_function_clean_inputs<F>(
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group
|
||||
.sample_size(10)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
@@ -395,7 +389,6 @@ fn bench_server_key_binary_scalar_function_dirty_inputs<F, G>(
|
||||
let keys = LazyCell::new(move || KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix));
|
||||
|
||||
let bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, sks) = (&keys.0, &keys.1);
|
||||
|
||||
@@ -483,7 +476,6 @@ fn bench_server_key_binary_scalar_function_clean_inputs<F, G>(
|
||||
});
|
||||
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits_scalar_{bit_size}");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (sks, ct_0, clear_1) = (&bench_data.0, &bench_data.1, bench_data.2);
|
||||
|
||||
@@ -505,7 +497,6 @@ fn bench_server_key_binary_scalar_function_clean_inputs<F, G>(
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group
|
||||
.sample_size(10)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
@@ -614,7 +605,6 @@ fn if_then_else_parallelized(c: &mut Criterion) {
|
||||
});
|
||||
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (sks, condition, true_ct, false_ct) =
|
||||
(&bench_data.0, &bench_data.1, &bench_data.2, &bench_data.3);
|
||||
@@ -639,7 +629,6 @@ fn if_then_else_parallelized(c: &mut Criterion) {
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group
|
||||
.sample_size(10)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
@@ -724,7 +713,6 @@ fn flip_parallelized(c: &mut Criterion) {
|
||||
});
|
||||
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (sks, condition, true_ct, false_ct) =
|
||||
(&bench_data.0, &bench_data.1, &bench_data.2, &bench_data.3);
|
||||
@@ -749,7 +737,6 @@ fn flip_parallelized(c: &mut Criterion) {
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group
|
||||
.sample_size(10)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
@@ -839,7 +826,6 @@ fn ciphertexts_sum_parallelized(c: &mut Criterion) {
|
||||
});
|
||||
|
||||
bench_id = format!("{bench_name}_{len}_ctxts::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (sks, ctxts) = (&bench_data.0, &bench_data.1);
|
||||
|
||||
@@ -869,7 +855,6 @@ fn ciphertexts_sum_parallelized(c: &mut Criterion) {
|
||||
bench_id = format!(
|
||||
"{bench_name}_{len}_ctxts::throughput::{param_name}::{bit_size}_bits"
|
||||
);
|
||||
println!("{bench_id}");
|
||||
bench_group
|
||||
.sample_size(10)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
@@ -1546,7 +1531,6 @@ mod cuda {
|
||||
BenchmarkType::Latency => {
|
||||
let streams = CudaStreams::new_multi_gpu();
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, _cpu_sks) =
|
||||
@@ -1581,7 +1565,6 @@ mod cuda {
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group
|
||||
.sample_size(10)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
@@ -1666,7 +1649,6 @@ mod cuda {
|
||||
BenchmarkType::Latency => {
|
||||
let streams = CudaStreams::new_multi_gpu();
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, _cpu_sks) =
|
||||
@@ -1710,7 +1692,6 @@ mod cuda {
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group
|
||||
.sample_size(10)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
@@ -1815,7 +1796,6 @@ mod cuda {
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
bench_id =
|
||||
format!("{bench_name}::{param_name}::{bit_size}_bits_scalar_{bit_size}"); // FIXME it makes no sense to duplicate `bit_size`
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, _cpu_sks) =
|
||||
KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
@@ -1861,7 +1841,6 @@ mod cuda {
|
||||
bench_id = format!(
|
||||
"{bench_name}::throughput::{param_name}::{bit_size}_bits_scalar_{bit_size}"
|
||||
);
|
||||
println!("{bench_id}");
|
||||
let elements = throughput_num_threads(num_block, pbs_count);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
@@ -1943,7 +1922,6 @@ mod cuda {
|
||||
let stream = CudaStreams::new_multi_gpu();
|
||||
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, _cpu_sks) =
|
||||
@@ -1994,7 +1972,6 @@ mod cuda {
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group
|
||||
.sample_size(10)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
@@ -2913,7 +2890,6 @@ mod cuda {
|
||||
target_num_blocks * param.message_modulus().0.ilog2() as usize;
|
||||
let bench_id =
|
||||
format!("{bench_name}::{param_name}::{bit_size}_to_{target_bit_size}");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, _sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let gpu_sks = CudaServerKey::new(&cks, &stream);
|
||||
@@ -3018,7 +2994,6 @@ mod hpu {
|
||||
match get_bench_type() {
|
||||
BenchmarkType::Latency => {
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, _sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let hpu_device_mutex = KEY_CACHE.get_hpu_device(param);
|
||||
@@ -3071,7 +3046,6 @@ mod hpu {
|
||||
}
|
||||
BenchmarkType::Throughput => {
|
||||
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group
|
||||
.sample_size(10)
|
||||
.measurement_time(std::time::Duration::from_secs(120));
|
||||
@@ -3701,7 +3675,6 @@ fn bench_server_key_cast_function<F>(
|
||||
for target_num_blocks in all_num_blocks.iter().copied() {
|
||||
let target_bit_size = target_num_blocks * param.message_modulus().0.ilog2() as usize;
|
||||
let bench_id = format!("{bench_name}::{param_name}::{bit_size}_to_{target_bit_size}");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
|
||||
|
||||
@@ -54,7 +54,6 @@ fn cpu_glwe_packing(c: &mut Criterion) {
|
||||
builder.push(ct);
|
||||
|
||||
bench_id_pack = format!("{bench_name}::pack_u{bit_size}");
|
||||
println!("{bench_id_pack}");
|
||||
bench_group.bench_function(&bench_id_pack, |b| {
|
||||
b.iter(|| {
|
||||
let compressed = builder.build(&compression_key);
|
||||
@@ -66,7 +65,6 @@ fn cpu_glwe_packing(c: &mut Criterion) {
|
||||
let compressed = builder.build(&compression_key);
|
||||
|
||||
bench_id_unpack = format!("{bench_name}::unpack_u{bit_size}");
|
||||
println!("{bench_id_unpack}");
|
||||
bench_group.bench_function(&bench_id_unpack, |b| {
|
||||
b.iter(|| {
|
||||
let unpacked: RadixCiphertext =
|
||||
@@ -107,7 +105,6 @@ fn cpu_glwe_packing(c: &mut Criterion) {
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
bench_id_pack = format!("{bench_name}::throughput::pack_u{bit_size}");
|
||||
println!("{bench_id_pack}");
|
||||
bench_group.bench_function(&bench_id_pack, |b| {
|
||||
b.iter(|| {
|
||||
builders.par_iter().for_each(|builder| {
|
||||
@@ -122,7 +119,6 @@ fn cpu_glwe_packing(c: &mut Criterion) {
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
bench_id_unpack = format!("{bench_name}::throughput::unpack_u{bit_size}");
|
||||
println!("{bench_id_unpack}");
|
||||
bench_group.bench_function(&bench_id_unpack, |b| {
|
||||
b.iter(|| {
|
||||
compressed.par_iter().for_each(|comp| {
|
||||
@@ -229,7 +225,6 @@ mod cuda {
|
||||
builder.push(d_ct, &stream);
|
||||
|
||||
bench_id_pack = format!("{bench_name}::pack_u{bit_size}");
|
||||
println!("{bench_id_pack}");
|
||||
bench_group.bench_function(&bench_id_pack, |b| {
|
||||
b.iter(|| {
|
||||
let compressed = builder.build(&cuda_compression_key, &stream);
|
||||
@@ -255,7 +250,6 @@ mod cuda {
|
||||
let local_streams = cuda_local_streams(num_block, elements as usize);
|
||||
|
||||
bench_id_pack = format!("{bench_name}::throughput::pack_u{bit_size}");
|
||||
println!("{bench_id_pack}");
|
||||
let cuda_compression_key_vec = (0..get_number_of_gpus())
|
||||
.into_par_iter()
|
||||
.map(|i| {
|
||||
@@ -359,7 +353,6 @@ mod cuda {
|
||||
let compressed = builder.build(&cuda_compression_key, &stream);
|
||||
|
||||
bench_id_unpack = format!("{bench_name}::unpack_u{bit_size}");
|
||||
println!("{bench_id_unpack}");
|
||||
bench_group.bench_function(&bench_id_unpack, |b| {
|
||||
b.iter(|| {
|
||||
let unpacked: CudaUnsignedRadixCiphertext = compressed
|
||||
@@ -388,7 +381,6 @@ mod cuda {
|
||||
let local_streams = cuda_local_streams(num_block, elements as usize);
|
||||
|
||||
bench_id_unpack = format!("{bench_name}::throughput::unpack_u{bit_size}");
|
||||
println!("{bench_id_unpack}");
|
||||
let builders = (0..elements)
|
||||
.into_par_iter()
|
||||
.map(|i| {
|
||||
|
||||
@@ -89,7 +89,6 @@ mod cuda {
|
||||
builder.push(d_ns_ct, &stream);
|
||||
|
||||
bench_id_pack = format!("{bench_name}::pack_u{bit_size}");
|
||||
println!("{bench_id_pack}");
|
||||
bench_group.bench_function(&bench_id_pack, |b| {
|
||||
b.iter(|| {
|
||||
let compressed =
|
||||
@@ -158,7 +157,6 @@ mod cuda {
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
bench_id_pack = format!("{bench_name}::throughput::pack_u{bit_size}");
|
||||
println!("{bench_id_pack}");
|
||||
bench_group.bench_function(&bench_id_pack, |b| {
|
||||
b.iter(|| {
|
||||
builders.par_iter().for_each(
|
||||
@@ -235,7 +233,6 @@ mod cuda {
|
||||
let compressed = builder.build(&cuda_noise_squashing_compression_key, &stream);
|
||||
|
||||
bench_id_unpack = format!("{bench_name}::unpack_u{bit_size}");
|
||||
println!("{bench_id_unpack}");
|
||||
bench_group.bench_function(&bench_id_unpack, |b| {
|
||||
b.iter(|| {
|
||||
let unpacked: CudaSquashedNoiseRadixCiphertext =
|
||||
@@ -313,7 +310,6 @@ mod cuda {
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
bench_id_unpack = format!("{bench_name}::throughput::unpack_u{bit_size}");
|
||||
println!("{bench_id_unpack}");
|
||||
bench_group.bench_function(&bench_id_unpack, |b| {
|
||||
b.iter(|| {
|
||||
compressed.par_iter().enumerate().for_each(|(i, comp)| {
|
||||
|
||||
@@ -31,7 +31,6 @@ pub fn unsigned_oprf(c: &mut Criterion) {
|
||||
bench_id_oprf_bounded =
|
||||
format!("{bench_name}_bounded::{param_name}::{bit_size}_bits");
|
||||
|
||||
println!("{bench_id_oprf}");
|
||||
bench_group.bench_function(&bench_id_oprf, |b| {
|
||||
let (_, sk) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
|
||||
@@ -43,7 +42,6 @@ pub fn unsigned_oprf(c: &mut Criterion) {
|
||||
})
|
||||
});
|
||||
|
||||
println!("{bench_id_oprf_bounded}");
|
||||
bench_group.bench_function(&bench_id_oprf_bounded, |b| {
|
||||
let (_, sk) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
|
||||
@@ -77,7 +75,6 @@ pub fn unsigned_oprf(c: &mut Criterion) {
|
||||
let elements = throughput_num_threads(num_block, pbs_count);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
|
||||
println!("{bench_id_oprf}");
|
||||
bench_group.bench_function(&bench_id_oprf, |b| {
|
||||
b.iter(|| {
|
||||
(0..elements).into_par_iter().for_each(|_| {
|
||||
@@ -89,7 +86,6 @@ pub fn unsigned_oprf(c: &mut Criterion) {
|
||||
})
|
||||
});
|
||||
|
||||
println!("{bench_id_oprf_bounded}");
|
||||
bench_group.bench_function(&bench_id_oprf_bounded, |b| {
|
||||
b.iter(|| {
|
||||
(0..elements).into_par_iter().for_each(|_| {
|
||||
@@ -156,7 +152,6 @@ pub mod cuda {
|
||||
bench_id_oprf_bounded =
|
||||
format!("{bench_name}_bounded::{param_name}::{bit_size}_bits");
|
||||
|
||||
println!("{bench_id_oprf}");
|
||||
bench_group.bench_function(&bench_id_oprf, |b| {
|
||||
let (cks, _cpu_sks) =
|
||||
KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
@@ -173,7 +168,6 @@ pub mod cuda {
|
||||
})
|
||||
});
|
||||
|
||||
println!("{bench_id_oprf_bounded}");
|
||||
bench_group.bench_function(&bench_id_oprf_bounded, |b| {
|
||||
let (cks, _cpu_sks) =
|
||||
KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
@@ -212,7 +206,6 @@ pub mod cuda {
|
||||
let elements = throughput_num_threads(num_block, pbs_count);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
|
||||
println!("{bench_id_oprf}");
|
||||
bench_group.bench_function(&bench_id_oprf, |b| {
|
||||
b.iter(|| {
|
||||
(0..elements).into_par_iter().for_each(|i| {
|
||||
@@ -228,7 +221,6 @@ pub mod cuda {
|
||||
})
|
||||
});
|
||||
|
||||
println!("{bench_id_oprf_bounded}");
|
||||
bench_group.bench_function(&bench_id_oprf_bounded, |b| {
|
||||
b.iter(|| {
|
||||
(0..elements).into_par_iter().for_each(|i| {
|
||||
|
||||
@@ -69,7 +69,6 @@ fn execute_cpu_re_randomize(c: &mut Criterion, bit_size: usize) {
|
||||
let mut d_re_randomized = decompressed.clone();
|
||||
|
||||
bench_id = format!("{bench_name}::latency_u{bit_size}");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter_batched(
|
||||
|| {
|
||||
@@ -117,7 +116,6 @@ fn execute_cpu_re_randomize(c: &mut Criterion, bit_size: usize) {
|
||||
.collect();
|
||||
|
||||
bench_id = format!("{bench_name}::throughput_u{bit_size}");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter_batched(
|
||||
|| {
|
||||
@@ -259,7 +257,6 @@ mod cuda {
|
||||
let mut d_re_randomized = d_decompressed.duplicate(&streams);
|
||||
|
||||
bench_id = format!("{bench_name}::latency_u{bit_size}");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter_batched(
|
||||
|| {
|
||||
@@ -334,7 +331,6 @@ mod cuda {
|
||||
.collect();
|
||||
|
||||
bench_id = format!("{bench_name}::throughput_u{bit_size}");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter_batched(
|
||||
|| {
|
||||
|
||||
@@ -55,7 +55,6 @@ fn bench_server_key_signed_binary_function_clean_inputs<F>(
|
||||
});
|
||||
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (sks, ct_0, ct_1) = (&bench_data.0, &bench_data.1, &bench_data.2);
|
||||
|
||||
@@ -154,7 +153,6 @@ fn bench_server_key_signed_shift_function_clean_inputs<F>(
|
||||
});
|
||||
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (sks, ct_0, ct_1) = (&bench_data.0, &bench_data.1, &bench_data.2);
|
||||
|
||||
@@ -176,7 +174,6 @@ fn bench_server_key_signed_shift_function_clean_inputs<F>(
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
let elements = throughput_num_threads(num_block, pbs_count);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
@@ -254,7 +251,6 @@ fn bench_server_key_unary_function_clean_inputs<F>(
|
||||
});
|
||||
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (sks, ct_0) = (&bench_data.0, &bench_data.1);
|
||||
b.iter(|| {
|
||||
@@ -273,7 +269,6 @@ fn bench_server_key_unary_function_clean_inputs<F>(
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
let elements = throughput_num_threads(num_block, pbs_count);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
@@ -338,7 +333,6 @@ fn signed_if_then_else_parallelized(c: &mut Criterion) {
|
||||
});
|
||||
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (sks, condition, true_ct, false_ct) =
|
||||
(&bench_data.0, &bench_data.1, &bench_data.2, &bench_data.3);
|
||||
@@ -359,7 +353,6 @@ fn signed_if_then_else_parallelized(c: &mut Criterion) {
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
let elements = throughput_num_threads(num_block, pbs_count);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
@@ -865,7 +858,6 @@ fn bench_server_key_binary_scalar_function_clean_inputs<F, G>(
|
||||
match get_bench_type() {
|
||||
BenchmarkType::Latency => {
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits_scalar_{bit_size}");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
|
||||
@@ -902,7 +894,6 @@ fn bench_server_key_binary_scalar_function_clean_inputs<F, G>(
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
let elements = throughput_num_threads(num_block, pbs_count);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
@@ -1055,7 +1046,6 @@ fn signed_flip_parallelized(c: &mut Criterion) {
|
||||
});
|
||||
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (sks, condition, true_ct, false_ct) =
|
||||
(&bench_data.0, &bench_data.1, &bench_data.2, &bench_data.3);
|
||||
@@ -1080,7 +1070,6 @@ fn signed_flip_parallelized(c: &mut Criterion) {
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
bench_group
|
||||
.sample_size(10)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
@@ -1459,7 +1448,6 @@ fn bench_server_key_signed_cast_function<F>(
|
||||
for target_num_blocks in all_num_blocks.iter().copied() {
|
||||
let target_bit_size = target_num_blocks * param.message_modulus().0.ilog2() as usize;
|
||||
let bench_id = format!("{bench_name}::{param_name}::{bit_size}_to_{target_bit_size}");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
|
||||
@@ -1555,7 +1543,6 @@ mod cuda {
|
||||
let stream = CudaStreams::new_multi_gpu();
|
||||
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, _cpu_sks) =
|
||||
@@ -1607,7 +1594,6 @@ mod cuda {
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
let elements = throughput_num_threads(num_block, pbs_count);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
@@ -1728,7 +1714,6 @@ mod cuda {
|
||||
let stream = CudaStreams::new_multi_gpu();
|
||||
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, _cpu_sks) =
|
||||
@@ -1767,7 +1752,6 @@ mod cuda {
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
let elements = throughput_num_threads(num_block, pbs_count);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
@@ -1874,7 +1858,6 @@ mod cuda {
|
||||
|
||||
bench_id =
|
||||
format!("{bench_name}::{param_name}::{bit_size}_bits_scalar_{bit_size}");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, _cpu_sks) =
|
||||
KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
@@ -1921,7 +1904,6 @@ mod cuda {
|
||||
bench_id = format!(
|
||||
"{bench_name}::throughput::{param_name}::{bit_size}_bits_scalar_{bit_size}"
|
||||
);
|
||||
println!("{bench_id}");
|
||||
let elements = throughput_num_threads(num_block, pbs_count);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
@@ -2038,7 +2020,6 @@ mod cuda {
|
||||
let streams = CudaStreams::new_multi_gpu();
|
||||
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, _cpu_sks) =
|
||||
@@ -2090,7 +2071,6 @@ mod cuda {
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
let elements = throughput_num_threads(num_block, pbs_count);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
@@ -2205,7 +2185,6 @@ mod cuda {
|
||||
let stream = CudaStreams::new_multi_gpu();
|
||||
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, _cpu_sks) =
|
||||
@@ -2256,7 +2235,6 @@ mod cuda {
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
println!("{bench_id}");
|
||||
let elements = throughput_num_threads(num_block, pbs_count);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
@@ -3076,7 +3054,6 @@ mod cuda {
|
||||
target_num_blocks * param.message_modulus().0.ilog2() as usize;
|
||||
let bench_id =
|
||||
format!("{bench_name}::{param_name}::{bit_size}_to_{target_bit_size}");
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, _sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let gpu_sks = CudaServerKey::new(&cks, &stream);
|
||||
|
||||
@@ -128,7 +128,6 @@ fn cpu_pke_zk_proof(c: &mut Criterion) {
|
||||
bench_id = format!(
|
||||
"{bench_name}::{param_name}_{bits}_bits_packed_{crs_size}_bits_crs_{zk_load}_ZK{zk_vers:?}"
|
||||
);
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let input_msg = rng.gen::<u64>();
|
||||
let messages = vec![input_msg; fhe_uint_count];
|
||||
@@ -150,7 +149,6 @@ fn cpu_pke_zk_proof(c: &mut Criterion) {
|
||||
bench_id = format!(
|
||||
"{bench_name}::throughput::{param_name}_{bits}_bits_packed_{crs_size}_bits_crs_{zk_load}_ZK{zk_vers:?}"
|
||||
);
|
||||
println!("{bench_id}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let messages = (0..elements)
|
||||
.map(|_| {
|
||||
@@ -342,14 +340,12 @@ fn cpu_pke_zk_verify(c: &mut Criterion, results_file: &Path) {
|
||||
vec![],
|
||||
);
|
||||
|
||||
println!("{bench_id_verify}");
|
||||
bench_group.bench_function(&bench_id_verify, |b| {
|
||||
b.iter(|| {
|
||||
let _ret = ct1.verify(&crs, &pk, &metadata);
|
||||
});
|
||||
});
|
||||
|
||||
println!("{bench_id_verify_and_expand}");
|
||||
bench_group.bench_function(&bench_id_verify_and_expand, |b| {
|
||||
b.iter(|| {
|
||||
let _ret = ct1
|
||||
@@ -390,7 +386,6 @@ fn cpu_pke_zk_verify(c: &mut Criterion, results_file: &Path) {
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
println!("{bench_id_verify}");
|
||||
bench_group.bench_function(&bench_id_verify, |b| {
|
||||
b.iter(|| {
|
||||
cts.par_iter().for_each(|ct1| {
|
||||
@@ -399,7 +394,6 @@ fn cpu_pke_zk_verify(c: &mut Criterion, results_file: &Path) {
|
||||
});
|
||||
});
|
||||
|
||||
println!("{bench_id_verify_and_expand}");
|
||||
bench_group.bench_function(&bench_id_verify_and_expand, |b| {
|
||||
b.iter(|| {
|
||||
cts.par_iter().for_each(|ct1| {
|
||||
@@ -628,14 +622,12 @@ mod cuda {
|
||||
vec![],
|
||||
);
|
||||
|
||||
println!("{bench_id_verify}");
|
||||
bench_group.bench_function(&bench_id_verify, |b| {
|
||||
b.iter(|| {
|
||||
let _ret = ct1.verify(&crs, &pk, &metadata);
|
||||
});
|
||||
});
|
||||
|
||||
println!("{bench_id_expand_without_verify}");
|
||||
bench_group.bench_function(&bench_id_expand_without_verify, |b| {
|
||||
b.iter(|| {
|
||||
let _ret = gpu_ct1
|
||||
@@ -644,7 +636,6 @@ mod cuda {
|
||||
});
|
||||
});
|
||||
|
||||
println!("{bench_id_verify_and_expand}");
|
||||
bench_group.bench_function(&bench_id_verify_and_expand, |b| {
|
||||
b.iter(|| {
|
||||
let _ret = gpu_ct1
|
||||
@@ -697,7 +688,6 @@ mod cuda {
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
println!("{bench_id_verify}");
|
||||
bench_group.bench_function(&bench_id_verify, |b| {
|
||||
b.iter(|| {
|
||||
cts.par_iter().for_each(|ct1| {
|
||||
@@ -706,7 +696,6 @@ mod cuda {
|
||||
});
|
||||
});
|
||||
|
||||
println!("{bench_id_expand_without_verify}");
|
||||
bench_group.bench_function(&bench_id_expand_without_verify, |b| {
|
||||
let setup_encrypted_values = || {
|
||||
let gpu_cts = cts.iter().enumerate().map(|(i, ct)| {
|
||||
@@ -736,7 +725,6 @@ mod cuda {
|
||||
}, BatchSize::SmallInput);
|
||||
});
|
||||
|
||||
println!("{bench_id_verify_and_expand}");
|
||||
bench_group.bench_function(&bench_id_verify_and_expand, |b| {
|
||||
let setup_encrypted_values = || {
|
||||
let gpu_cts = cts.iter().enumerate().map(|(i, ct)| {
|
||||
|
||||
Reference in New Issue
Block a user