Files
tfhe-rs/tfhe-benchmark/benches/core_crypto/pbs128_bench.rs
2026-01-05 09:48:00 +01:00

747 lines
30 KiB
Rust

use benchmark::params_aliases::{
BENCH_NOISE_SQUASHING_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
};
use benchmark::utilities::{write_to_json, CryptoParametersRecord, OperatorType};
use criterion::{black_box, Criterion};
use dyn_stack::PodStack;
use tfhe::core_crypto::fft_impl::fft128::crypto::bootstrap::bootstrap_scratch;
use tfhe::core_crypto::prelude::*;
use tfhe::keycache::NamedParam;
fn pbs_128(c: &mut Criterion) {
let bench_name = "core_crypto::pbs128";
let mut bench_group = c.benchmark_group(bench_name);
bench_group
.sample_size(10)
.measurement_time(std::time::Duration::from_secs(30));
type InputScalar = u64;
type OutputScalar = u128;
let noise_params = BENCH_NOISE_SQUASHING_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let base_params = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let lwe_dimension = base_params.lwe_dimension; // From PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128
let glwe_dimension = noise_params.glwe_dimension();
let polynomial_size = noise_params.polynomial_size();
let lwe_noise_distribution = base_params.lwe_noise_distribution;
let glwe_noise_distribution = noise_params.glwe_noise_distribution();
let pbs_base_log = noise_params.decomp_base_log();
let pbs_level = noise_params.decomp_level_count();
let input_ciphertext_modulus = base_params.ciphertext_modulus;
let output_ciphertext_modulus = noise_params.ciphertext_modulus();
let mut boxed_seeder = new_seeder();
let seeder = boxed_seeder.as_mut();
let mut secret_generator = SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
let mut encryption_generator =
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
let input_lwe_secret_key =
LweSecretKey::generate_new_binary(lwe_dimension, &mut secret_generator);
let output_glwe_secret_key = GlweSecretKey::<Vec<OutputScalar>>::generate_new_binary(
glwe_dimension,
polynomial_size,
&mut secret_generator,
);
let output_lwe_secret_key = output_glwe_secret_key.clone().into_lwe_secret_key();
let mut bsk = LweBootstrapKey::new(
OutputScalar::ZERO,
glwe_dimension.to_glwe_size(),
polynomial_size,
pbs_base_log,
pbs_level,
lwe_dimension,
output_ciphertext_modulus,
);
par_generate_lwe_bootstrap_key(
&input_lwe_secret_key,
&output_glwe_secret_key,
&mut bsk,
glwe_noise_distribution,
&mut encryption_generator,
);
let mut fourier_bsk = Fourier128LweBootstrapKey::new(
lwe_dimension,
glwe_dimension.to_glwe_size(),
polynomial_size,
pbs_base_log,
pbs_level,
);
convert_standard_lwe_bootstrap_key_to_fourier_128(&bsk, &mut fourier_bsk);
let message_modulus: InputScalar = 1 << 4;
let input_message: InputScalar = 3;
let delta: InputScalar = (1 << (InputScalar::BITS - 1)) / message_modulus;
let plaintext = Plaintext(input_message * delta);
let lwe_ciphertext_in: LweCiphertextOwned<InputScalar> =
allocate_and_encrypt_new_lwe_ciphertext(
&input_lwe_secret_key,
plaintext,
lwe_noise_distribution,
input_ciphertext_modulus,
&mut encryption_generator,
);
let accumulator: GlweCiphertextOwned<OutputScalar> = GlweCiphertextOwned::new(
OutputScalar::ONE,
glwe_dimension.to_glwe_size(),
polynomial_size,
output_ciphertext_modulus,
);
let mut out_pbs_ct: LweCiphertext<Vec<OutputScalar>> = LweCiphertext::new(
OutputScalar::ZERO,
output_lwe_secret_key.lwe_dimension().to_lwe_size(),
output_ciphertext_modulus,
);
let fft = Fft128::new(polynomial_size);
let fft = fft.as_view();
let mut buffers = vec![
0u8;
bootstrap_scratch::<OutputScalar>(
fourier_bsk.glwe_size(),
fourier_bsk.polynomial_size(),
fft
)
.unwrap()
.unaligned_bytes_required()
];
let id = format!("{bench_name}::{}", noise_params.name());
bench_group.bench_function(&id, |b| {
b.iter(|| {
fourier_bsk.bootstrap(
&mut out_pbs_ct,
&lwe_ciphertext_in,
&accumulator,
fft,
PodStack::new(&mut buffers),
);
black_box(&mut out_pbs_ct);
});
});
// TODO Add throughput benchmark case
let params_record = CryptoParametersRecord {
lwe_dimension: Some(lwe_dimension),
glwe_dimension: Some(glwe_dimension),
polynomial_size: Some(polynomial_size),
lwe_noise_distribution: Some(lwe_noise_distribution),
glwe_noise_distribution: Some(base_params.glwe_noise_distribution),
pbs_base_log: Some(pbs_base_log),
pbs_level: Some(pbs_level),
ciphertext_modulus: Some(input_ciphertext_modulus),
..Default::default()
};
let bit_size = (message_modulus as u32).ilog2();
write_to_json(
&id,
params_record,
noise_params.name(),
"pbs",
&OperatorType::Atomic,
bit_size,
vec![bit_size],
);
}
#[cfg(feature = "gpu")]
mod cuda {
use benchmark::utilities::{
cuda_local_keys_core, cuda_local_streams_core, get_bench_type, throughput_num_threads,
write_to_json, BenchmarkType, CpuKeys, CpuKeysBuilder, CryptoParametersRecord, CudaIndexes,
CudaLocalKeys, OperatorType,
};
use criterion::{black_box, Criterion, Throughput};
use rayon::prelude::*;
use tfhe::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
use tfhe::core_crypto::gpu::lwe_bootstrap_key::CudaModulusSwitchNoiseReductionConfiguration;
use tfhe::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use tfhe::core_crypto::gpu::{
cuda_multi_bit_programmable_bootstrap_128_lwe_ciphertext,
cuda_programmable_bootstrap_128_lwe_ciphertext, get_number_of_gpus, CudaStreams,
};
use tfhe::core_crypto::prelude::*;
use tfhe::shortint::parameters::{
ModulusSwitchType, NoiseSquashingParameters,
NOISE_SQUASHING_PARAM_GPU_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
NOISE_SQUASHING_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
};
fn cuda_pbs_128(c: &mut Criterion) {
let bench_name = "core_crypto::cuda::pbs128";
let mut bench_group = c.benchmark_group(bench_name);
bench_group
.sample_size(10)
.measurement_time(std::time::Duration::from_secs(30));
type Scalar = u128;
let input_params = PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let squash_params = NOISE_SQUASHING_PARAM_GPU_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let NoiseSquashingParameters::Classic(squash_params) = squash_params else {
panic!("Multi bit noise squashing PBS currently not supported on GPU");
};
let lwe_noise_distribution_u64 = DynamicDistribution::new_t_uniform(46);
let ct_modulus_u64: CiphertextModulus<u64> = CiphertextModulus::new_native();
let params_name = "PARAMS_SWITCH_SQUASH";
let mut boxed_seeder = new_seeder();
let seeder = boxed_seeder.as_mut();
let mut secret_generator =
SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
let mut encryption_generator =
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
let input_lwe_secret_key =
LweSecretKey::generate_new_binary(input_params.lwe_dimension, &mut secret_generator);
let output_glwe_secret_key = GlweSecretKey::<Vec<Scalar>>::generate_new_binary(
squash_params.glwe_dimension,
squash_params.polynomial_size,
&mut secret_generator,
);
let output_lwe_secret_key = output_glwe_secret_key.clone().into_lwe_secret_key();
let bsk = LweBootstrapKey::new(
Scalar::ZERO,
squash_params.glwe_dimension.to_glwe_size(),
squash_params.polynomial_size,
squash_params.decomp_base_log,
squash_params.decomp_level_count,
LweDimension(input_params.lwe_dimension.0),
squash_params.ciphertext_modulus,
);
let streams = CudaStreams::new_multi_gpu();
let modulus_switch_noise_reduction_configuration =
match squash_params.modulus_switch_noise_reduction_params {
ModulusSwitchType::Standard => None,
ModulusSwitchType::DriftTechniqueNoiseReduction(
_modulus_switch_noise_reduction_params,
) => {
panic!("Drift noise reduction is not supported on GPU")
}
ModulusSwitchType::CenteredMeanNoiseReduction => {
Some(CudaModulusSwitchNoiseReductionConfiguration::Centered)
}
};
let cpu_keys: CpuKeys<_, u64> = CpuKeysBuilder::new().bootstrap_key(bsk).build();
let message_modulus: u64 = 1 << 4;
let input_message: u64 = 3;
let delta: u64 = (1 << (u64::BITS - 1)) / message_modulus;
let plaintext = Plaintext(input_message * delta);
let bench_id;
match get_bench_type() {
BenchmarkType::Latency => {
let gpu_keys = CudaLocalKeys::from_cpu_keys(
&cpu_keys,
modulus_switch_noise_reduction_configuration,
&streams,
);
let lwe_ciphertext_in: LweCiphertextOwned<u64> =
allocate_and_encrypt_new_lwe_ciphertext(
&input_lwe_secret_key,
plaintext,
lwe_noise_distribution_u64,
ct_modulus_u64,
&mut encryption_generator,
);
let lwe_ciphertext_in_gpu =
CudaLweCiphertextList::from_lwe_ciphertext(&lwe_ciphertext_in, &streams);
let accumulator: GlweCiphertextOwned<Scalar> = GlweCiphertextOwned::new(
Scalar::ONE,
squash_params.glwe_dimension.to_glwe_size(),
squash_params.polynomial_size,
squash_params.ciphertext_modulus,
);
let accumulator_gpu =
CudaGlweCiphertextList::from_glwe_ciphertext(&accumulator, &streams);
let out_pbs_ct = LweCiphertext::new(
Scalar::ZERO,
output_lwe_secret_key.lwe_dimension().to_lwe_size(),
squash_params.ciphertext_modulus,
);
let mut out_pbs_ct_gpu =
CudaLweCiphertextList::from_lwe_ciphertext(&out_pbs_ct, &streams);
bench_id = format!("{bench_name}::{params_name}");
{
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
cuda_programmable_bootstrap_128_lwe_ciphertext(
&lwe_ciphertext_in_gpu,
&mut out_pbs_ct_gpu,
&accumulator_gpu,
gpu_keys.bsk.as_ref().unwrap(),
&streams,
);
black_box(&mut out_pbs_ct_gpu);
})
});
}
}
BenchmarkType::Throughput => {
let gpu_keys_vec =
cuda_local_keys_core(&cpu_keys, modulus_switch_noise_reduction_configuration);
let gpu_count = get_number_of_gpus() as usize;
bench_id = format!("{bench_name}::throughput::{params_name}");
let blocks: usize = 1;
let elements = throughput_num_threads(blocks, 1);
let elements_per_stream = elements as usize / gpu_count;
bench_group.throughput(Throughput::Elements(elements));
bench_group.bench_function(&bench_id, |b| {
let setup_encrypted_values = || {
let local_streams = cuda_local_streams_core();
let plaintext_list =
PlaintextList::new(u64::ZERO, PlaintextCount(elements_per_stream));
let input_cts = (0..gpu_count)
.map(|i| {
let mut input_ct_list = LweCiphertextList::new(
u64::ZERO,
input_lwe_secret_key.lwe_dimension().to_lwe_size(),
LweCiphertextCount(elements_per_stream),
ct_modulus_u64,
);
encrypt_lwe_ciphertext_list(
&input_lwe_secret_key,
&mut input_ct_list,
&plaintext_list,
lwe_noise_distribution_u64,
&mut encryption_generator,
);
CudaLweCiphertextList::from_lwe_ciphertext_list(
&input_ct_list,
&local_streams[i],
)
})
.collect::<Vec<_>>();
let accumulators = (0..gpu_count)
.map(|i| {
let accumulator = GlweCiphertextOwned::new(
Scalar::ONE,
squash_params.glwe_dimension.to_glwe_size(),
squash_params.polynomial_size,
squash_params.ciphertext_modulus,
);
CudaGlweCiphertextList::from_glwe_ciphertext(
&accumulator,
&local_streams[i],
)
})
.collect::<Vec<_>>();
// Allocate the LweCiphertext to store the result of the PBS
let output_cts = (0..gpu_count)
.map(|i| {
let output_ct_list = LweCiphertextList::new(
Scalar::ZERO,
output_lwe_secret_key.lwe_dimension().to_lwe_size(),
LweCiphertextCount(elements_per_stream),
squash_params.ciphertext_modulus,
);
CudaLweCiphertextList::from_lwe_ciphertext_list(
&output_ct_list,
&local_streams[i],
)
})
.collect::<Vec<_>>();
local_streams.iter().for_each(|stream| stream.synchronize());
(input_cts, output_cts, accumulators, local_streams)
};
b.iter_batched(
setup_encrypted_values,
|(input_cts, mut output_cts, accumulators, local_streams)| {
(0..gpu_count)
.into_par_iter()
.zip(input_cts.par_iter())
.zip(output_cts.par_iter_mut())
.zip(accumulators.par_iter())
.zip(local_streams.par_iter())
.for_each(
|(
(((i, input_batch), output_batch), accumulator),
local_stream,
)| {
cuda_programmable_bootstrap_128_lwe_ciphertext(
input_batch,
output_batch,
accumulator,
gpu_keys_vec[i].bsk.as_ref().unwrap(),
local_stream,
);
},
)
},
criterion::BatchSize::SmallInput,
);
});
}
};
let params_record = CryptoParametersRecord {
lwe_dimension: Some(input_params.lwe_dimension),
glwe_dimension: Some(squash_params.glwe_dimension),
polynomial_size: Some(squash_params.polynomial_size),
lwe_noise_distribution: Some(lwe_noise_distribution_u64),
glwe_noise_distribution: Some(input_params.glwe_noise_distribution),
pbs_base_log: Some(squash_params.decomp_base_log),
pbs_level: Some(squash_params.decomp_level_count),
ciphertext_modulus: Some(input_params.ciphertext_modulus),
..Default::default()
};
let bit_size = (message_modulus as u32).ilog2();
write_to_json(
&bench_id,
params_record,
params_name,
"pbs",
&OperatorType::Atomic,
bit_size,
vec![bit_size],
);
}
fn cuda_multi_bit_pbs_128(c: &mut Criterion) {
let bench_name = "core_crypto::cuda::multi_bit_pbs128";
let mut bench_group = c.benchmark_group(bench_name);
bench_group
.sample_size(10)
.measurement_time(std::time::Duration::from_secs(30));
type Scalar = u128;
let input_params = PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let NoiseSquashingParameters::MultiBit(squash_params) =
NOISE_SQUASHING_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128
else {
panic!("Expected Multi bit params")
};
let lwe_noise_distribution_u64 = DynamicDistribution::new_t_uniform(46);
let ct_modulus_u64: CiphertextModulus<u64> = CiphertextModulus::new_native();
let params_name = "PARAMS_SWITCH_SQUASH";
let mut boxed_seeder = new_seeder();
let seeder = boxed_seeder.as_mut();
let mut secret_generator =
SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
let mut encryption_generator =
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
let input_lwe_secret_key =
LweSecretKey::generate_new_binary(input_params.lwe_dimension, &mut secret_generator);
let output_glwe_secret_key = GlweSecretKey::<Vec<Scalar>>::generate_new_binary(
squash_params.glwe_dimension,
squash_params.polynomial_size,
&mut secret_generator,
);
let output_lwe_secret_key = output_glwe_secret_key.clone().into_lwe_secret_key();
let multi_bit_bsk = LweMultiBitBootstrapKey::new(
Scalar::ZERO,
squash_params.glwe_dimension.to_glwe_size(),
squash_params.polynomial_size,
squash_params.decomp_base_log,
squash_params.decomp_level_count,
input_params.lwe_dimension,
squash_params.grouping_factor,
squash_params.ciphertext_modulus,
);
let cpu_keys: CpuKeys<_, u64> = CpuKeysBuilder::new()
.multi_bit_bootstrap_key(multi_bit_bsk)
.build();
let message_modulus: u64 = 1 << 4;
let input_message: u64 = 3;
let delta: u64 = (1 << (u64::BITS - 1)) / message_modulus;
let plaintext = Plaintext(input_message * delta);
let bench_id;
match get_bench_type() {
BenchmarkType::Latency => {
let streams = CudaStreams::new_multi_gpu();
let gpu_keys = CudaLocalKeys::from_cpu_keys(&cpu_keys, None, &streams);
let lwe_ciphertext_in: LweCiphertextOwned<u64> =
allocate_and_encrypt_new_lwe_ciphertext(
&input_lwe_secret_key,
plaintext,
lwe_noise_distribution_u64,
ct_modulus_u64,
&mut encryption_generator,
);
let lwe_ciphertext_in_gpu =
CudaLweCiphertextList::from_lwe_ciphertext(&lwe_ciphertext_in, &streams);
let accumulator: GlweCiphertextOwned<Scalar> = GlweCiphertextOwned::new(
Scalar::ONE,
squash_params.glwe_dimension.to_glwe_size(),
squash_params.polynomial_size,
squash_params.ciphertext_modulus,
);
let accumulator_gpu =
CudaGlweCiphertextList::from_glwe_ciphertext(&accumulator, &streams);
let out_pbs_ct = LweCiphertext::new(
Scalar::ZERO,
output_lwe_secret_key.lwe_dimension().to_lwe_size(),
squash_params.ciphertext_modulus,
);
let mut out_pbs_ct_gpu =
CudaLweCiphertextList::from_lwe_ciphertext(&out_pbs_ct, &streams);
let h_indexes = [0];
let cuda_indexes = CudaIndexes::new(&h_indexes, &streams, 0);
bench_id = format!("{bench_name}::{params_name}");
{
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
cuda_multi_bit_programmable_bootstrap_128_lwe_ciphertext(
&lwe_ciphertext_in_gpu,
&mut out_pbs_ct_gpu,
&accumulator_gpu,
&cuda_indexes.d_lut,
&cuda_indexes.d_output,
&cuda_indexes.d_input,
gpu_keys.multi_bit_bsk.as_ref().unwrap(),
&streams,
);
black_box(&mut out_pbs_ct_gpu);
})
});
}
}
BenchmarkType::Throughput => {
let gpu_keys_vec = cuda_local_keys_core(&cpu_keys, None);
let gpu_count = get_number_of_gpus() as usize;
bench_id = format!("{bench_name}::throughput::{params_name}");
let blocks: usize = 1;
let elements = throughput_num_threads(blocks, 1);
let elements_per_stream = elements as usize / gpu_count;
bench_group.throughput(Throughput::Elements(elements));
bench_group.bench_function(&bench_id, |b| {
let setup_encrypted_values = || {
let local_streams = cuda_local_streams_core();
let plaintext_list =
PlaintextList::new(u64::ZERO, PlaintextCount(elements_per_stream));
let input_cts = (0..gpu_count)
.map(|i| {
let mut input_ct_list = LweCiphertextList::new(
u64::ZERO,
input_lwe_secret_key.lwe_dimension().to_lwe_size(),
LweCiphertextCount(elements_per_stream),
ct_modulus_u64,
);
encrypt_lwe_ciphertext_list(
&input_lwe_secret_key,
&mut input_ct_list,
&plaintext_list,
lwe_noise_distribution_u64,
&mut encryption_generator,
);
CudaLweCiphertextList::from_lwe_ciphertext_list(
&input_ct_list,
&local_streams[i],
)
})
.collect::<Vec<_>>();
let accumulators = (0..gpu_count)
.map(|i| {
let accumulator = GlweCiphertextOwned::new(
Scalar::ONE,
squash_params.glwe_dimension.to_glwe_size(),
squash_params.polynomial_size,
squash_params.ciphertext_modulus,
);
CudaGlweCiphertextList::from_glwe_ciphertext(
&accumulator,
&local_streams[i],
)
})
.collect::<Vec<_>>();
// Allocate the LweCiphertext to store the result of the PBS
let output_cts = (0..gpu_count)
.map(|i| {
let output_ct_list = LweCiphertextList::new(
Scalar::ZERO,
output_lwe_secret_key.lwe_dimension().to_lwe_size(),
LweCiphertextCount(elements_per_stream),
squash_params.ciphertext_modulus,
);
CudaLweCiphertextList::from_lwe_ciphertext_list(
&output_ct_list,
&local_streams[i],
)
})
.collect::<Vec<_>>();
let h_indexes = (0..(elements / gpu_count as u64))
.map(CastFrom::cast_from)
.collect::<Vec<_>>();
let cuda_indexes_vec = (0..gpu_count)
.map(|i| CudaIndexes::new(&h_indexes, &local_streams[i], 0))
.collect::<Vec<_>>();
local_streams.iter().for_each(|stream| stream.synchronize());
(
input_cts,
output_cts,
accumulators,
cuda_indexes_vec,
local_streams,
)
};
b.iter_batched(
setup_encrypted_values,
|(
input_cts,
mut output_cts,
accumulators,
cuda_indexes_vec,
local_streams,
)| {
(0..gpu_count)
.into_par_iter()
.zip(input_cts.par_iter())
.zip(output_cts.par_iter_mut())
.zip(accumulators.par_iter())
.zip(local_streams.par_iter())
.for_each(
|((((i, input_ct), output_ct), accumulator), local_stream)| {
cuda_multi_bit_programmable_bootstrap_128_lwe_ciphertext(
input_ct,
output_ct,
accumulator,
&cuda_indexes_vec[i].d_lut,
&cuda_indexes_vec[i].d_output,
&cuda_indexes_vec[i].d_input,
gpu_keys_vec[i].multi_bit_bsk.as_ref().unwrap(),
local_stream,
);
},
)
},
criterion::BatchSize::SmallInput,
);
});
}
};
let params_record = CryptoParametersRecord {
lwe_dimension: Some(input_params.lwe_dimension),
glwe_dimension: Some(squash_params.glwe_dimension),
polynomial_size: Some(squash_params.polynomial_size),
lwe_noise_distribution: Some(lwe_noise_distribution_u64),
glwe_noise_distribution: Some(input_params.glwe_noise_distribution),
pbs_base_log: Some(squash_params.decomp_base_log),
pbs_level: Some(squash_params.decomp_level_count),
ciphertext_modulus: Some(input_params.ciphertext_modulus),
..Default::default()
};
let bit_size = (message_modulus as u32).ilog2();
write_to_json(
&bench_id,
params_record,
params_name,
"pbs",
&OperatorType::Atomic,
bit_size,
vec![bit_size],
);
}
pub fn cuda_pbs128_group() {
let mut criterion: Criterion<_> = Criterion::default().configure_from_args();
cuda_pbs_128(&mut criterion);
}
pub fn cuda_multi_bit_pbs128_group() {
let mut criterion: Criterion<_> = Criterion::default().configure_from_args();
cuda_multi_bit_pbs_128(&mut criterion);
}
}
#[cfg(feature = "gpu")]
use cuda::{cuda_multi_bit_pbs128_group, cuda_pbs128_group};
pub fn pbs128_group() {
let mut criterion: Criterion<_> = Criterion::default().configure_from_args();
pbs_128(&mut criterion);
}
#[cfg(feature = "gpu")]
fn go_through_gpu_bench_groups() {
cuda_pbs128_group();
cuda_multi_bit_pbs128_group();
}
#[cfg(not(feature = "gpu"))]
fn go_through_cpu_bench_groups() {
pbs128_group();
}
fn main() {
#[cfg(feature = "gpu")]
go_through_gpu_bench_groups();
#[cfg(not(feature = "gpu"))]
go_through_cpu_bench_groups();
Criterion::default().configure_from_args().final_summary();
}