mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-08 06:13:58 -05:00
747 lines
30 KiB
Rust
747 lines
30 KiB
Rust
use benchmark::params_aliases::{
|
|
BENCH_NOISE_SQUASHING_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
|
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
|
};
|
|
use benchmark::utilities::{write_to_json, CryptoParametersRecord, OperatorType};
|
|
use criterion::{black_box, Criterion};
|
|
use dyn_stack::PodStack;
|
|
use tfhe::core_crypto::fft_impl::fft128::crypto::bootstrap::bootstrap_scratch;
|
|
use tfhe::core_crypto::prelude::*;
|
|
use tfhe::keycache::NamedParam;
|
|
|
|
fn pbs_128(c: &mut Criterion) {
|
|
let bench_name = "core_crypto::pbs128";
|
|
let mut bench_group = c.benchmark_group(bench_name);
|
|
bench_group
|
|
.sample_size(10)
|
|
.measurement_time(std::time::Duration::from_secs(30));
|
|
|
|
type InputScalar = u64;
|
|
type OutputScalar = u128;
|
|
|
|
let noise_params = BENCH_NOISE_SQUASHING_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
|
let base_params = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
|
|
|
let lwe_dimension = base_params.lwe_dimension; // From PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128
|
|
let glwe_dimension = noise_params.glwe_dimension();
|
|
let polynomial_size = noise_params.polynomial_size();
|
|
let lwe_noise_distribution = base_params.lwe_noise_distribution;
|
|
let glwe_noise_distribution = noise_params.glwe_noise_distribution();
|
|
let pbs_base_log = noise_params.decomp_base_log();
|
|
let pbs_level = noise_params.decomp_level_count();
|
|
let input_ciphertext_modulus = base_params.ciphertext_modulus;
|
|
let output_ciphertext_modulus = noise_params.ciphertext_modulus();
|
|
|
|
let mut boxed_seeder = new_seeder();
|
|
let seeder = boxed_seeder.as_mut();
|
|
|
|
let mut secret_generator = SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
|
|
|
|
let mut encryption_generator =
|
|
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
|
|
|
|
let input_lwe_secret_key =
|
|
LweSecretKey::generate_new_binary(lwe_dimension, &mut secret_generator);
|
|
|
|
let output_glwe_secret_key = GlweSecretKey::<Vec<OutputScalar>>::generate_new_binary(
|
|
glwe_dimension,
|
|
polynomial_size,
|
|
&mut secret_generator,
|
|
);
|
|
|
|
let output_lwe_secret_key = output_glwe_secret_key.clone().into_lwe_secret_key();
|
|
|
|
let mut bsk = LweBootstrapKey::new(
|
|
OutputScalar::ZERO,
|
|
glwe_dimension.to_glwe_size(),
|
|
polynomial_size,
|
|
pbs_base_log,
|
|
pbs_level,
|
|
lwe_dimension,
|
|
output_ciphertext_modulus,
|
|
);
|
|
par_generate_lwe_bootstrap_key(
|
|
&input_lwe_secret_key,
|
|
&output_glwe_secret_key,
|
|
&mut bsk,
|
|
glwe_noise_distribution,
|
|
&mut encryption_generator,
|
|
);
|
|
|
|
let mut fourier_bsk = Fourier128LweBootstrapKey::new(
|
|
lwe_dimension,
|
|
glwe_dimension.to_glwe_size(),
|
|
polynomial_size,
|
|
pbs_base_log,
|
|
pbs_level,
|
|
);
|
|
convert_standard_lwe_bootstrap_key_to_fourier_128(&bsk, &mut fourier_bsk);
|
|
|
|
let message_modulus: InputScalar = 1 << 4;
|
|
|
|
let input_message: InputScalar = 3;
|
|
|
|
let delta: InputScalar = (1 << (InputScalar::BITS - 1)) / message_modulus;
|
|
|
|
let plaintext = Plaintext(input_message * delta);
|
|
|
|
let lwe_ciphertext_in: LweCiphertextOwned<InputScalar> =
|
|
allocate_and_encrypt_new_lwe_ciphertext(
|
|
&input_lwe_secret_key,
|
|
plaintext,
|
|
lwe_noise_distribution,
|
|
input_ciphertext_modulus,
|
|
&mut encryption_generator,
|
|
);
|
|
|
|
let accumulator: GlweCiphertextOwned<OutputScalar> = GlweCiphertextOwned::new(
|
|
OutputScalar::ONE,
|
|
glwe_dimension.to_glwe_size(),
|
|
polynomial_size,
|
|
output_ciphertext_modulus,
|
|
);
|
|
|
|
let mut out_pbs_ct: LweCiphertext<Vec<OutputScalar>> = LweCiphertext::new(
|
|
OutputScalar::ZERO,
|
|
output_lwe_secret_key.lwe_dimension().to_lwe_size(),
|
|
output_ciphertext_modulus,
|
|
);
|
|
|
|
let fft = Fft128::new(polynomial_size);
|
|
let fft = fft.as_view();
|
|
|
|
let mut buffers = vec![
|
|
0u8;
|
|
bootstrap_scratch::<OutputScalar>(
|
|
fourier_bsk.glwe_size(),
|
|
fourier_bsk.polynomial_size(),
|
|
fft
|
|
)
|
|
.unwrap()
|
|
.unaligned_bytes_required()
|
|
];
|
|
|
|
let id = format!("{bench_name}::{}", noise_params.name());
|
|
bench_group.bench_function(&id, |b| {
|
|
b.iter(|| {
|
|
fourier_bsk.bootstrap(
|
|
&mut out_pbs_ct,
|
|
&lwe_ciphertext_in,
|
|
&accumulator,
|
|
fft,
|
|
PodStack::new(&mut buffers),
|
|
);
|
|
black_box(&mut out_pbs_ct);
|
|
});
|
|
});
|
|
|
|
// TODO Add throughput benchmark case
|
|
|
|
let params_record = CryptoParametersRecord {
|
|
lwe_dimension: Some(lwe_dimension),
|
|
glwe_dimension: Some(glwe_dimension),
|
|
polynomial_size: Some(polynomial_size),
|
|
lwe_noise_distribution: Some(lwe_noise_distribution),
|
|
glwe_noise_distribution: Some(base_params.glwe_noise_distribution),
|
|
pbs_base_log: Some(pbs_base_log),
|
|
pbs_level: Some(pbs_level),
|
|
ciphertext_modulus: Some(input_ciphertext_modulus),
|
|
..Default::default()
|
|
};
|
|
|
|
let bit_size = (message_modulus as u32).ilog2();
|
|
write_to_json(
|
|
&id,
|
|
params_record,
|
|
noise_params.name(),
|
|
"pbs",
|
|
&OperatorType::Atomic,
|
|
bit_size,
|
|
vec![bit_size],
|
|
);
|
|
}
|
|
|
|
#[cfg(feature = "gpu")]
|
|
mod cuda {
|
|
use benchmark::utilities::{
|
|
cuda_local_keys_core, cuda_local_streams_core, get_bench_type, throughput_num_threads,
|
|
write_to_json, BenchmarkType, CpuKeys, CpuKeysBuilder, CryptoParametersRecord, CudaIndexes,
|
|
CudaLocalKeys, OperatorType,
|
|
};
|
|
use criterion::{black_box, Criterion, Throughput};
|
|
use rayon::prelude::*;
|
|
use tfhe::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
|
|
use tfhe::core_crypto::gpu::lwe_bootstrap_key::CudaModulusSwitchNoiseReductionConfiguration;
|
|
use tfhe::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
|
|
use tfhe::core_crypto::gpu::{
|
|
cuda_multi_bit_programmable_bootstrap_128_lwe_ciphertext,
|
|
cuda_programmable_bootstrap_128_lwe_ciphertext, get_number_of_gpus, CudaStreams,
|
|
};
|
|
use tfhe::core_crypto::prelude::*;
|
|
use tfhe::shortint::parameters::{
|
|
ModulusSwitchType, NoiseSquashingParameters,
|
|
NOISE_SQUASHING_PARAM_GPU_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
|
NOISE_SQUASHING_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
|
PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
|
};
|
|
|
|
fn cuda_pbs_128(c: &mut Criterion) {
|
|
let bench_name = "core_crypto::cuda::pbs128";
|
|
let mut bench_group = c.benchmark_group(bench_name);
|
|
bench_group
|
|
.sample_size(10)
|
|
.measurement_time(std::time::Duration::from_secs(30));
|
|
|
|
type Scalar = u128;
|
|
let input_params = PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
|
let squash_params = NOISE_SQUASHING_PARAM_GPU_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
|
|
|
let NoiseSquashingParameters::Classic(squash_params) = squash_params else {
|
|
panic!("Multi bit noise squashing PBS currently not supported on GPU");
|
|
};
|
|
|
|
let lwe_noise_distribution_u64 = DynamicDistribution::new_t_uniform(46);
|
|
let ct_modulus_u64: CiphertextModulus<u64> = CiphertextModulus::new_native();
|
|
|
|
let params_name = "PARAMS_SWITCH_SQUASH";
|
|
|
|
let mut boxed_seeder = new_seeder();
|
|
let seeder = boxed_seeder.as_mut();
|
|
|
|
let mut secret_generator =
|
|
SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
|
|
|
|
let mut encryption_generator =
|
|
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
|
|
|
|
let input_lwe_secret_key =
|
|
LweSecretKey::generate_new_binary(input_params.lwe_dimension, &mut secret_generator);
|
|
|
|
let output_glwe_secret_key = GlweSecretKey::<Vec<Scalar>>::generate_new_binary(
|
|
squash_params.glwe_dimension,
|
|
squash_params.polynomial_size,
|
|
&mut secret_generator,
|
|
);
|
|
|
|
let output_lwe_secret_key = output_glwe_secret_key.clone().into_lwe_secret_key();
|
|
|
|
let bsk = LweBootstrapKey::new(
|
|
Scalar::ZERO,
|
|
squash_params.glwe_dimension.to_glwe_size(),
|
|
squash_params.polynomial_size,
|
|
squash_params.decomp_base_log,
|
|
squash_params.decomp_level_count,
|
|
LweDimension(input_params.lwe_dimension.0),
|
|
squash_params.ciphertext_modulus,
|
|
);
|
|
|
|
let streams = CudaStreams::new_multi_gpu();
|
|
|
|
let modulus_switch_noise_reduction_configuration =
|
|
match squash_params.modulus_switch_noise_reduction_params {
|
|
ModulusSwitchType::Standard => None,
|
|
ModulusSwitchType::DriftTechniqueNoiseReduction(
|
|
_modulus_switch_noise_reduction_params,
|
|
) => {
|
|
panic!("Drift noise reduction is not supported on GPU")
|
|
}
|
|
ModulusSwitchType::CenteredMeanNoiseReduction => {
|
|
Some(CudaModulusSwitchNoiseReductionConfiguration::Centered)
|
|
}
|
|
};
|
|
|
|
let cpu_keys: CpuKeys<_, u64> = CpuKeysBuilder::new().bootstrap_key(bsk).build();
|
|
|
|
let message_modulus: u64 = 1 << 4;
|
|
let input_message: u64 = 3;
|
|
let delta: u64 = (1 << (u64::BITS - 1)) / message_modulus;
|
|
let plaintext = Plaintext(input_message * delta);
|
|
|
|
let bench_id;
|
|
|
|
match get_bench_type() {
|
|
BenchmarkType::Latency => {
|
|
let gpu_keys = CudaLocalKeys::from_cpu_keys(
|
|
&cpu_keys,
|
|
modulus_switch_noise_reduction_configuration,
|
|
&streams,
|
|
);
|
|
|
|
let lwe_ciphertext_in: LweCiphertextOwned<u64> =
|
|
allocate_and_encrypt_new_lwe_ciphertext(
|
|
&input_lwe_secret_key,
|
|
plaintext,
|
|
lwe_noise_distribution_u64,
|
|
ct_modulus_u64,
|
|
&mut encryption_generator,
|
|
);
|
|
let lwe_ciphertext_in_gpu =
|
|
CudaLweCiphertextList::from_lwe_ciphertext(&lwe_ciphertext_in, &streams);
|
|
|
|
let accumulator: GlweCiphertextOwned<Scalar> = GlweCiphertextOwned::new(
|
|
Scalar::ONE,
|
|
squash_params.glwe_dimension.to_glwe_size(),
|
|
squash_params.polynomial_size,
|
|
squash_params.ciphertext_modulus,
|
|
);
|
|
let accumulator_gpu =
|
|
CudaGlweCiphertextList::from_glwe_ciphertext(&accumulator, &streams);
|
|
|
|
let out_pbs_ct = LweCiphertext::new(
|
|
Scalar::ZERO,
|
|
output_lwe_secret_key.lwe_dimension().to_lwe_size(),
|
|
squash_params.ciphertext_modulus,
|
|
);
|
|
let mut out_pbs_ct_gpu =
|
|
CudaLweCiphertextList::from_lwe_ciphertext(&out_pbs_ct, &streams);
|
|
|
|
bench_id = format!("{bench_name}::{params_name}");
|
|
{
|
|
bench_group.bench_function(&bench_id, |b| {
|
|
b.iter(|| {
|
|
cuda_programmable_bootstrap_128_lwe_ciphertext(
|
|
&lwe_ciphertext_in_gpu,
|
|
&mut out_pbs_ct_gpu,
|
|
&accumulator_gpu,
|
|
gpu_keys.bsk.as_ref().unwrap(),
|
|
&streams,
|
|
);
|
|
black_box(&mut out_pbs_ct_gpu);
|
|
})
|
|
});
|
|
}
|
|
}
|
|
BenchmarkType::Throughput => {
|
|
let gpu_keys_vec =
|
|
cuda_local_keys_core(&cpu_keys, modulus_switch_noise_reduction_configuration);
|
|
let gpu_count = get_number_of_gpus() as usize;
|
|
|
|
bench_id = format!("{bench_name}::throughput::{params_name}");
|
|
let blocks: usize = 1;
|
|
let elements = throughput_num_threads(blocks, 1);
|
|
let elements_per_stream = elements as usize / gpu_count;
|
|
bench_group.throughput(Throughput::Elements(elements));
|
|
bench_group.bench_function(&bench_id, |b| {
|
|
let setup_encrypted_values = || {
|
|
let local_streams = cuda_local_streams_core();
|
|
|
|
let plaintext_list =
|
|
PlaintextList::new(u64::ZERO, PlaintextCount(elements_per_stream));
|
|
|
|
let input_cts = (0..gpu_count)
|
|
.map(|i| {
|
|
let mut input_ct_list = LweCiphertextList::new(
|
|
u64::ZERO,
|
|
input_lwe_secret_key.lwe_dimension().to_lwe_size(),
|
|
LweCiphertextCount(elements_per_stream),
|
|
ct_modulus_u64,
|
|
);
|
|
|
|
encrypt_lwe_ciphertext_list(
|
|
&input_lwe_secret_key,
|
|
&mut input_ct_list,
|
|
&plaintext_list,
|
|
lwe_noise_distribution_u64,
|
|
&mut encryption_generator,
|
|
);
|
|
|
|
CudaLweCiphertextList::from_lwe_ciphertext_list(
|
|
&input_ct_list,
|
|
&local_streams[i],
|
|
)
|
|
})
|
|
.collect::<Vec<_>>();
|
|
|
|
let accumulators = (0..gpu_count)
|
|
.map(|i| {
|
|
let accumulator = GlweCiphertextOwned::new(
|
|
Scalar::ONE,
|
|
squash_params.glwe_dimension.to_glwe_size(),
|
|
squash_params.polynomial_size,
|
|
squash_params.ciphertext_modulus,
|
|
);
|
|
CudaGlweCiphertextList::from_glwe_ciphertext(
|
|
&accumulator,
|
|
&local_streams[i],
|
|
)
|
|
})
|
|
.collect::<Vec<_>>();
|
|
|
|
// Allocate the LweCiphertext to store the result of the PBS
|
|
let output_cts = (0..gpu_count)
|
|
.map(|i| {
|
|
let output_ct_list = LweCiphertextList::new(
|
|
Scalar::ZERO,
|
|
output_lwe_secret_key.lwe_dimension().to_lwe_size(),
|
|
LweCiphertextCount(elements_per_stream),
|
|
squash_params.ciphertext_modulus,
|
|
);
|
|
CudaLweCiphertextList::from_lwe_ciphertext_list(
|
|
&output_ct_list,
|
|
&local_streams[i],
|
|
)
|
|
})
|
|
.collect::<Vec<_>>();
|
|
|
|
local_streams.iter().for_each(|stream| stream.synchronize());
|
|
|
|
(input_cts, output_cts, accumulators, local_streams)
|
|
};
|
|
|
|
b.iter_batched(
|
|
setup_encrypted_values,
|
|
|(input_cts, mut output_cts, accumulators, local_streams)| {
|
|
(0..gpu_count)
|
|
.into_par_iter()
|
|
.zip(input_cts.par_iter())
|
|
.zip(output_cts.par_iter_mut())
|
|
.zip(accumulators.par_iter())
|
|
.zip(local_streams.par_iter())
|
|
.for_each(
|
|
|(
|
|
(((i, input_batch), output_batch), accumulator),
|
|
local_stream,
|
|
)| {
|
|
cuda_programmable_bootstrap_128_lwe_ciphertext(
|
|
input_batch,
|
|
output_batch,
|
|
accumulator,
|
|
gpu_keys_vec[i].bsk.as_ref().unwrap(),
|
|
local_stream,
|
|
);
|
|
},
|
|
)
|
|
},
|
|
criterion::BatchSize::SmallInput,
|
|
);
|
|
});
|
|
}
|
|
};
|
|
|
|
let params_record = CryptoParametersRecord {
|
|
lwe_dimension: Some(input_params.lwe_dimension),
|
|
glwe_dimension: Some(squash_params.glwe_dimension),
|
|
polynomial_size: Some(squash_params.polynomial_size),
|
|
lwe_noise_distribution: Some(lwe_noise_distribution_u64),
|
|
glwe_noise_distribution: Some(input_params.glwe_noise_distribution),
|
|
pbs_base_log: Some(squash_params.decomp_base_log),
|
|
pbs_level: Some(squash_params.decomp_level_count),
|
|
ciphertext_modulus: Some(input_params.ciphertext_modulus),
|
|
..Default::default()
|
|
};
|
|
|
|
let bit_size = (message_modulus as u32).ilog2();
|
|
write_to_json(
|
|
&bench_id,
|
|
params_record,
|
|
params_name,
|
|
"pbs",
|
|
&OperatorType::Atomic,
|
|
bit_size,
|
|
vec![bit_size],
|
|
);
|
|
}
|
|
|
|
fn cuda_multi_bit_pbs_128(c: &mut Criterion) {
|
|
let bench_name = "core_crypto::cuda::multi_bit_pbs128";
|
|
let mut bench_group = c.benchmark_group(bench_name);
|
|
bench_group
|
|
.sample_size(10)
|
|
.measurement_time(std::time::Duration::from_secs(30));
|
|
|
|
type Scalar = u128;
|
|
let input_params = PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
|
let NoiseSquashingParameters::MultiBit(squash_params) =
|
|
NOISE_SQUASHING_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128
|
|
else {
|
|
panic!("Expected Multi bit params")
|
|
};
|
|
|
|
let lwe_noise_distribution_u64 = DynamicDistribution::new_t_uniform(46);
|
|
let ct_modulus_u64: CiphertextModulus<u64> = CiphertextModulus::new_native();
|
|
|
|
let params_name = "PARAMS_SWITCH_SQUASH";
|
|
|
|
let mut boxed_seeder = new_seeder();
|
|
let seeder = boxed_seeder.as_mut();
|
|
|
|
let mut secret_generator =
|
|
SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
|
|
|
|
let mut encryption_generator =
|
|
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
|
|
|
|
let input_lwe_secret_key =
|
|
LweSecretKey::generate_new_binary(input_params.lwe_dimension, &mut secret_generator);
|
|
|
|
let output_glwe_secret_key = GlweSecretKey::<Vec<Scalar>>::generate_new_binary(
|
|
squash_params.glwe_dimension,
|
|
squash_params.polynomial_size,
|
|
&mut secret_generator,
|
|
);
|
|
|
|
let output_lwe_secret_key = output_glwe_secret_key.clone().into_lwe_secret_key();
|
|
|
|
let multi_bit_bsk = LweMultiBitBootstrapKey::new(
|
|
Scalar::ZERO,
|
|
squash_params.glwe_dimension.to_glwe_size(),
|
|
squash_params.polynomial_size,
|
|
squash_params.decomp_base_log,
|
|
squash_params.decomp_level_count,
|
|
input_params.lwe_dimension,
|
|
squash_params.grouping_factor,
|
|
squash_params.ciphertext_modulus,
|
|
);
|
|
|
|
let cpu_keys: CpuKeys<_, u64> = CpuKeysBuilder::new()
|
|
.multi_bit_bootstrap_key(multi_bit_bsk)
|
|
.build();
|
|
|
|
let message_modulus: u64 = 1 << 4;
|
|
let input_message: u64 = 3;
|
|
let delta: u64 = (1 << (u64::BITS - 1)) / message_modulus;
|
|
let plaintext = Plaintext(input_message * delta);
|
|
|
|
let bench_id;
|
|
|
|
match get_bench_type() {
|
|
BenchmarkType::Latency => {
|
|
let streams = CudaStreams::new_multi_gpu();
|
|
let gpu_keys = CudaLocalKeys::from_cpu_keys(&cpu_keys, None, &streams);
|
|
|
|
let lwe_ciphertext_in: LweCiphertextOwned<u64> =
|
|
allocate_and_encrypt_new_lwe_ciphertext(
|
|
&input_lwe_secret_key,
|
|
plaintext,
|
|
lwe_noise_distribution_u64,
|
|
ct_modulus_u64,
|
|
&mut encryption_generator,
|
|
);
|
|
let lwe_ciphertext_in_gpu =
|
|
CudaLweCiphertextList::from_lwe_ciphertext(&lwe_ciphertext_in, &streams);
|
|
|
|
let accumulator: GlweCiphertextOwned<Scalar> = GlweCiphertextOwned::new(
|
|
Scalar::ONE,
|
|
squash_params.glwe_dimension.to_glwe_size(),
|
|
squash_params.polynomial_size,
|
|
squash_params.ciphertext_modulus,
|
|
);
|
|
let accumulator_gpu =
|
|
CudaGlweCiphertextList::from_glwe_ciphertext(&accumulator, &streams);
|
|
|
|
let out_pbs_ct = LweCiphertext::new(
|
|
Scalar::ZERO,
|
|
output_lwe_secret_key.lwe_dimension().to_lwe_size(),
|
|
squash_params.ciphertext_modulus,
|
|
);
|
|
let mut out_pbs_ct_gpu =
|
|
CudaLweCiphertextList::from_lwe_ciphertext(&out_pbs_ct, &streams);
|
|
|
|
let h_indexes = [0];
|
|
let cuda_indexes = CudaIndexes::new(&h_indexes, &streams, 0);
|
|
|
|
bench_id = format!("{bench_name}::{params_name}");
|
|
{
|
|
bench_group.bench_function(&bench_id, |b| {
|
|
b.iter(|| {
|
|
cuda_multi_bit_programmable_bootstrap_128_lwe_ciphertext(
|
|
&lwe_ciphertext_in_gpu,
|
|
&mut out_pbs_ct_gpu,
|
|
&accumulator_gpu,
|
|
&cuda_indexes.d_lut,
|
|
&cuda_indexes.d_output,
|
|
&cuda_indexes.d_input,
|
|
gpu_keys.multi_bit_bsk.as_ref().unwrap(),
|
|
&streams,
|
|
);
|
|
black_box(&mut out_pbs_ct_gpu);
|
|
})
|
|
});
|
|
}
|
|
}
|
|
BenchmarkType::Throughput => {
|
|
let gpu_keys_vec = cuda_local_keys_core(&cpu_keys, None);
|
|
let gpu_count = get_number_of_gpus() as usize;
|
|
|
|
bench_id = format!("{bench_name}::throughput::{params_name}");
|
|
let blocks: usize = 1;
|
|
let elements = throughput_num_threads(blocks, 1);
|
|
let elements_per_stream = elements as usize / gpu_count;
|
|
bench_group.throughput(Throughput::Elements(elements));
|
|
bench_group.bench_function(&bench_id, |b| {
|
|
let setup_encrypted_values = || {
|
|
let local_streams = cuda_local_streams_core();
|
|
|
|
let plaintext_list =
|
|
PlaintextList::new(u64::ZERO, PlaintextCount(elements_per_stream));
|
|
|
|
let input_cts = (0..gpu_count)
|
|
.map(|i| {
|
|
let mut input_ct_list = LweCiphertextList::new(
|
|
u64::ZERO,
|
|
input_lwe_secret_key.lwe_dimension().to_lwe_size(),
|
|
LweCiphertextCount(elements_per_stream),
|
|
ct_modulus_u64,
|
|
);
|
|
|
|
encrypt_lwe_ciphertext_list(
|
|
&input_lwe_secret_key,
|
|
&mut input_ct_list,
|
|
&plaintext_list,
|
|
lwe_noise_distribution_u64,
|
|
&mut encryption_generator,
|
|
);
|
|
|
|
CudaLweCiphertextList::from_lwe_ciphertext_list(
|
|
&input_ct_list,
|
|
&local_streams[i],
|
|
)
|
|
})
|
|
.collect::<Vec<_>>();
|
|
|
|
let accumulators = (0..gpu_count)
|
|
.map(|i| {
|
|
let accumulator = GlweCiphertextOwned::new(
|
|
Scalar::ONE,
|
|
squash_params.glwe_dimension.to_glwe_size(),
|
|
squash_params.polynomial_size,
|
|
squash_params.ciphertext_modulus,
|
|
);
|
|
CudaGlweCiphertextList::from_glwe_ciphertext(
|
|
&accumulator,
|
|
&local_streams[i],
|
|
)
|
|
})
|
|
.collect::<Vec<_>>();
|
|
|
|
// Allocate the LweCiphertext to store the result of the PBS
|
|
let output_cts = (0..gpu_count)
|
|
.map(|i| {
|
|
let output_ct_list = LweCiphertextList::new(
|
|
Scalar::ZERO,
|
|
output_lwe_secret_key.lwe_dimension().to_lwe_size(),
|
|
LweCiphertextCount(elements_per_stream),
|
|
squash_params.ciphertext_modulus,
|
|
);
|
|
CudaLweCiphertextList::from_lwe_ciphertext_list(
|
|
&output_ct_list,
|
|
&local_streams[i],
|
|
)
|
|
})
|
|
.collect::<Vec<_>>();
|
|
|
|
let h_indexes = (0..(elements / gpu_count as u64))
|
|
.map(CastFrom::cast_from)
|
|
.collect::<Vec<_>>();
|
|
let cuda_indexes_vec = (0..gpu_count)
|
|
.map(|i| CudaIndexes::new(&h_indexes, &local_streams[i], 0))
|
|
.collect::<Vec<_>>();
|
|
local_streams.iter().for_each(|stream| stream.synchronize());
|
|
|
|
(
|
|
input_cts,
|
|
output_cts,
|
|
accumulators,
|
|
cuda_indexes_vec,
|
|
local_streams,
|
|
)
|
|
};
|
|
|
|
b.iter_batched(
|
|
setup_encrypted_values,
|
|
|(
|
|
input_cts,
|
|
mut output_cts,
|
|
accumulators,
|
|
cuda_indexes_vec,
|
|
local_streams,
|
|
)| {
|
|
(0..gpu_count)
|
|
.into_par_iter()
|
|
.zip(input_cts.par_iter())
|
|
.zip(output_cts.par_iter_mut())
|
|
.zip(accumulators.par_iter())
|
|
.zip(local_streams.par_iter())
|
|
.for_each(
|
|
|((((i, input_ct), output_ct), accumulator), local_stream)| {
|
|
cuda_multi_bit_programmable_bootstrap_128_lwe_ciphertext(
|
|
input_ct,
|
|
output_ct,
|
|
accumulator,
|
|
&cuda_indexes_vec[i].d_lut,
|
|
&cuda_indexes_vec[i].d_output,
|
|
&cuda_indexes_vec[i].d_input,
|
|
gpu_keys_vec[i].multi_bit_bsk.as_ref().unwrap(),
|
|
local_stream,
|
|
);
|
|
},
|
|
)
|
|
},
|
|
criterion::BatchSize::SmallInput,
|
|
);
|
|
});
|
|
}
|
|
};
|
|
|
|
let params_record = CryptoParametersRecord {
|
|
lwe_dimension: Some(input_params.lwe_dimension),
|
|
glwe_dimension: Some(squash_params.glwe_dimension),
|
|
polynomial_size: Some(squash_params.polynomial_size),
|
|
lwe_noise_distribution: Some(lwe_noise_distribution_u64),
|
|
glwe_noise_distribution: Some(input_params.glwe_noise_distribution),
|
|
pbs_base_log: Some(squash_params.decomp_base_log),
|
|
pbs_level: Some(squash_params.decomp_level_count),
|
|
ciphertext_modulus: Some(input_params.ciphertext_modulus),
|
|
..Default::default()
|
|
};
|
|
|
|
let bit_size = (message_modulus as u32).ilog2();
|
|
write_to_json(
|
|
&bench_id,
|
|
params_record,
|
|
params_name,
|
|
"pbs",
|
|
&OperatorType::Atomic,
|
|
bit_size,
|
|
vec![bit_size],
|
|
);
|
|
}
|
|
|
|
pub fn cuda_pbs128_group() {
|
|
let mut criterion: Criterion<_> = Criterion::default().configure_from_args();
|
|
cuda_pbs_128(&mut criterion);
|
|
}
|
|
|
|
pub fn cuda_multi_bit_pbs128_group() {
|
|
let mut criterion: Criterion<_> = Criterion::default().configure_from_args();
|
|
cuda_multi_bit_pbs_128(&mut criterion);
|
|
}
|
|
}
|
|
|
|
#[cfg(feature = "gpu")]
|
|
use cuda::{cuda_multi_bit_pbs128_group, cuda_pbs128_group};
|
|
|
|
pub fn pbs128_group() {
|
|
let mut criterion: Criterion<_> = Criterion::default().configure_from_args();
|
|
pbs_128(&mut criterion);
|
|
}
|
|
|
|
#[cfg(feature = "gpu")]
|
|
fn go_through_gpu_bench_groups() {
|
|
cuda_pbs128_group();
|
|
cuda_multi_bit_pbs128_group();
|
|
}
|
|
|
|
#[cfg(not(feature = "gpu"))]
|
|
fn go_through_cpu_bench_groups() {
|
|
pbs128_group();
|
|
}
|
|
fn main() {
|
|
#[cfg(feature = "gpu")]
|
|
go_through_gpu_bench_groups();
|
|
#[cfg(not(feature = "gpu"))]
|
|
go_through_cpu_bench_groups();
|
|
|
|
Criterion::default().configure_from_args().final_summary();
|
|
}
|