use benchmark::params_aliases::{ BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128, BENCH_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128, BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128, BENCH_PARAM_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1, }; use benchmark::utilities::{ get_bench_type, throughput_num_threads, write_to_json, BenchmarkType, OperatorType, }; use criterion::{black_box, criterion_group, BatchSize, Criterion, Throughput}; #[cfg(feature = "gpu")] use cuda::gpu_re_randomize_group; use rayon::iter::{IndexedParallelIterator, ParallelIterator}; use rayon::prelude::{IntoParallelIterator, IntoParallelRefMutIterator}; use tfhe::integer::ciphertext::{CompressedCiphertextListBuilder, ReRandomizationContext}; use tfhe::integer::key_switching_key::{KeySwitchingKey, KeySwitchingKeyMaterial}; use tfhe::integer::{gen_keys_radix, CompactPrivateKey, CompactPublicKey, RadixCiphertext}; use tfhe::keycache::NamedParam; fn execute_cpu_re_randomize(c: &mut Criterion, bit_size: usize) { let bench_name = "integer::re_randomize"; let mut bench_group = c.benchmark_group(bench_name); bench_group .sample_size(15) .measurement_time(std::time::Duration::from_secs(30)); let param = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128; let comp_param = BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128; let cpk_param = BENCH_PARAM_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1; let ks_param = BENCH_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128; let num_blocks = (bit_size as f64 / (param.message_modulus.0 as f64).log(2.0)).ceil() as usize; let (radix_cks, sks) = gen_keys_radix(param, num_blocks); let cks = radix_cks.as_ref(); let private_compression_key = cks.new_compression_private_key(comp_param); let (compressed_compression_key, compressed_decompression_key) = radix_cks.new_compressed_compression_decompression_keys(&private_compression_key); let compression_key = compressed_compression_key.decompress(); let decompression_key = compressed_decompression_key.decompress(); let cpk_private_key = CompactPrivateKey::new(cpk_param); let cpk = CompactPublicKey::new(&cpk_private_key); let ksk = KeySwitchingKey::new((&cpk_private_key, None), ((&cks), (&sks)), ks_param); let ksk = ksk.into_raw_parts(); let (ksk_material, _, _) = ksk.into_raw_parts(); let ksk_material = KeySwitchingKeyMaterial::from_raw_parts(ksk_material); let rerand_domain_separator = *b"TFHE_Rrd"; let compact_public_encryption_domain_separator = *b"TFHE_Enc"; let metadata = b"bench".as_slice(); let bench_id; match get_bench_type() { BenchmarkType::Latency => { // Encrypt and compress a single ciphertext let message = 42u64; let ct = cks.encrypt_radix(message, num_blocks); let mut builder = CompressedCiphertextListBuilder::new(); builder.push(ct); let compressed = builder.build(&compression_key); let decompressed: RadixCiphertext = compressed.get(0, &decompression_key).unwrap().unwrap(); let mut d_re_randomized = decompressed.clone(); bench_id = format!("{bench_name}::latency_u{bit_size}"); println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter_batched( || { let mut re_randomizer_context = ReRandomizationContext::new( rerand_domain_separator, [metadata], compact_public_encryption_domain_separator, ); re_randomizer_context.add_ciphertext(&decompressed); re_randomizer_context.finalize() }, |mut seed_gen| { d_re_randomized .re_randomize( &cpk, &ksk_material.as_view(), seed_gen.next_seed().unwrap(), ) .unwrap(); _ = black_box(&d_re_randomized); }, BatchSize::SmallInput, ) }); } BenchmarkType::Throughput => { let elements = throughput_num_threads(num_blocks, 1); bench_group.throughput(Throughput::Elements(elements)); // Pre-generate and compress ciphertexts for throughput test let decompressed_cts: Vec = (0..elements as usize) .into_par_iter() .map(|_| { let message = 42u64; let ct = cks.encrypt_radix(message, num_blocks); let mut builder = CompressedCiphertextListBuilder::new(); builder.push(ct); let compressed = builder.build(&compression_key); compressed.get(0, &decompression_key).unwrap().unwrap() }) .collect(); bench_id = format!("{bench_name}::throughput_u{bit_size}"); println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter_batched( || { // Create a fresh context for each benchmark iteration let mut ctx = ReRandomizationContext::new( rerand_domain_separator, [metadata], compact_public_encryption_domain_separator, ); // Add all ciphertexts to the context for ct in &decompressed_cts { ctx.add_ciphertext(ct); } // Return a new seed generator for this iteration (ctx.finalize(), decompressed_cts.clone()) }, |(mut seed_gen, mut cts_to_rerand)| { let seeds: Vec<_> = (0..cts_to_rerand.len()) .map(|_| seed_gen.next_seed().unwrap()) .collect(); cts_to_rerand .par_iter_mut() .zip(seeds.into_par_iter()) .for_each(|(d_re_randomized, seed)| { d_re_randomized .re_randomize(&cpk, &ksk_material.as_view(), seed) .unwrap(); _ = black_box(&d_re_randomized); }) }, BatchSize::SmallInput, ) }); } } write_to_json::( &bench_id, (comp_param, param.into()), comp_param.name(), "re_randomize", &OperatorType::Atomic, bit_size as u32, vec![param.message_modulus.0.ilog2(); num_blocks], ); bench_group.finish() } fn cpu_re_randomize(c: &mut Criterion) { let bit_sizes = [2, 4, 8, 16, 32, 64, 128, 256]; for bit_size in bit_sizes.iter() { execute_cpu_re_randomize(c, *bit_size); } } criterion_group!(cpu_re_randomize_group, cpu_re_randomize); #[cfg(feature = "gpu")] mod cuda { use benchmark::params_aliases::{ BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128, BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128, BENCH_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128, BENCH_PARAM_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1, }; use benchmark::utilities::cuda_integer_utils::cuda_local_streams; use benchmark::utilities::{ get_bench_type, throughput_num_threads, write_to_json, BenchmarkType, OperatorType, }; use criterion::{black_box, criterion_group, BatchSize, Criterion, Throughput}; use rayon::prelude::*; use tfhe::core_crypto::gpu::{get_number_of_gpus, CudaStreams}; use tfhe::integer::ciphertext::ReRandomizationContext; use tfhe::integer::gpu::ciphertext::compressed_ciphertext_list::CudaCompressedCiphertextListBuilder; use tfhe::integer::gpu::ciphertext::{CudaIntegerRadixCiphertext, CudaUnsignedRadixCiphertext}; use tfhe::integer::key_switching_key::KeySwitchingKey; use tfhe::integer::{gen_keys_radix, CompactPrivateKey, CompactPublicKey}; use tfhe::keycache::NamedParam; use tfhe::shortint::key_switching_key::CudaKeySwitchingKeyMaterial; fn execute_gpu_re_randomize(c: &mut Criterion, bit_size: usize) { let bench_name = "integer::cuda::re_randomize"; let mut bench_group = c.benchmark_group(bench_name); bench_group .sample_size(15) .measurement_time(std::time::Duration::from_secs(30)); let param = BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128; let comp_param = BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128; let cpk_param = BENCH_PARAM_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1; let ks_param = BENCH_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128; let streams = CudaStreams::new_multi_gpu(); let num_blocks = (bit_size as f64 / (param.message_modulus.0 as f64).log(2.0)).ceil() as usize; let (radix_cks, sks) = gen_keys_radix(param, num_blocks); let cks = radix_cks.as_ref(); let private_compression_key = cks.new_compression_private_key(comp_param); let (cuda_compression_key, cuda_decompression_key) = radix_cks.new_cuda_compression_decompression_keys(&private_compression_key, &streams); let cpk_private_key = CompactPrivateKey::new(cpk_param); let cpk = CompactPublicKey::new(&cpk_private_key); let ksk = KeySwitchingKey::new((&cpk_private_key, None), (&cks, &sks), ks_param); let d_ksk_material = CudaKeySwitchingKeyMaterial::from_key_switching_key(&ksk, &streams); let rerand_domain_separator = *b"TFHE_Rrd"; let compact_public_encryption_domain_separator = *b"TFHE_Enc"; let metadata = b"bench".as_slice(); let bench_id; match get_bench_type() { BenchmarkType::Latency => { // Encrypt and compress a single ciphertext let message = 42u64; let ct = cks.encrypt_radix(message, num_blocks); let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &streams); let mut builder = CudaCompressedCiphertextListBuilder::new(); builder.push(d_ct, &streams); let compressed = builder.build(&cuda_compression_key, &streams); let d_decompressed: CudaUnsignedRadixCiphertext = compressed .get(0, &cuda_decompression_key, &streams) .unwrap() .unwrap(); let decompressed = d_decompressed.to_radix_ciphertext(&streams); let mut d_re_randomized = d_decompressed.duplicate(&streams); bench_id = format!("{bench_name}::latency_u{bit_size}"); println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter_batched( || { let mut re_randomizer_context = ReRandomizationContext::new( rerand_domain_separator, [metadata], compact_public_encryption_domain_separator, ); re_randomizer_context.add_ciphertext(&decompressed); re_randomizer_context.finalize() }, |mut seed_gen| { d_re_randomized .re_randomize( &cpk, &d_ksk_material, seed_gen.next_seed().unwrap(), &streams, ) .unwrap(); _ = black_box(&d_re_randomized); }, BatchSize::SmallInput, ) }); } BenchmarkType::Throughput => { let elements = throughput_num_threads(num_blocks, 1); bench_group.throughput(Throughput::Elements(elements)); let local_streams = cuda_local_streams(num_blocks, elements as usize); let num_gpus = get_number_of_gpus() as usize; let d_ksk_material_vec: Vec = (0..num_gpus) .map(|i| { let local_stream = &local_streams[i % local_streams.len()]; CudaKeySwitchingKeyMaterial::from_key_switching_key(&ksk, local_stream) }) .collect(); // Pre-generate and compress ciphertexts for throughput test let d_compressed_cts: Vec = (0..elements as usize) .into_par_iter() .map(|i| { let message = 42u64; let ct = cks.encrypt_radix(message, num_blocks); let local_stream = &local_streams[i % local_streams.len()]; let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, local_stream); let mut builder = CudaCompressedCiphertextListBuilder::new(); builder.push(d_ct, local_stream); let compressed = builder.build(&cuda_compression_key, local_stream); compressed .get(0, &cuda_decompression_key, local_stream) .unwrap() .unwrap() }) .collect(); // Prepare decompressed ciphertexts once let h_decompressed_cts: Vec<_> = d_compressed_cts .iter() .enumerate() .map(|(i, d_ct)| { let local_stream = &local_streams[i % local_streams.len()]; d_ct.to_radix_ciphertext(local_stream) }) .collect(); bench_id = format!("{bench_name}::throughput_u{bit_size}"); println!("{bench_id}"); bench_group.bench_function(&bench_id, |b| { b.iter_batched( || { // Create a fresh context for each benchmark iteration let mut ctx = ReRandomizationContext::new( rerand_domain_separator, [metadata], compact_public_encryption_domain_separator, ); // Add all ciphertexts to the context for ct in &h_decompressed_cts { ctx.add_ciphertext(ct); } let d_cts_to_rerand = d_compressed_cts .iter() .enumerate() .map(|(i, d_ct)| { let local_stream = &local_streams[i % local_streams.len()]; d_ct.duplicate(local_stream) }) .collect::>(); // Return a new seed generator for this iteration (ctx.finalize(), h_decompressed_cts.clone(), d_cts_to_rerand) }, |(mut seed_gen, h_cts_to_rerand, mut d_cts_to_rerand)| { let seeds: Vec<_> = (0..h_cts_to_rerand.len()) .map(|_| seed_gen.next_seed().unwrap()) .collect(); d_cts_to_rerand .par_iter_mut() .zip(seeds.into_par_iter()) .enumerate() .for_each(|(i, (d_re_randomized, seed))| { let local_stream = &local_streams[i % local_streams.len()]; let d_ksk = &d_ksk_material_vec[i % num_gpus]; d_re_randomized .re_randomize(&cpk, d_ksk, seed, local_stream) .unwrap(); _ = black_box(&d_re_randomized); }) }, BatchSize::SmallInput, ) }); } } write_to_json::( &bench_id, (comp_param, param.into()), comp_param.name(), "re_randomize", &OperatorType::Atomic, bit_size as u32, vec![param.message_modulus.0.ilog2(); num_blocks], ); bench_group.finish() } fn gpu_re_randomize(c: &mut Criterion) { let bit_sizes = [2, 4, 16, 32, 64, 128, 256]; for bit_size in bit_sizes.iter() { execute_gpu_re_randomize(c, *bit_size); } } criterion_group!(gpu_re_randomize_group, gpu_re_randomize); } fn main() { #[cfg(feature = "gpu")] gpu_re_randomize_group(); #[cfg(not(feature = "gpu"))] cpu_re_randomize_group(); Criterion::default().configure_from_args().final_summary(); }