mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-07 22:04:10 -05:00
feat(gpu): implement re-randomization
- exposed to integer and HL API - test on the HL API - benchmarks for GPU and CPU implementation
This commit is contained in:
@@ -96,6 +96,12 @@ path = "benches/integer/glwe_packing_compression.rs"
|
||||
harness = false
|
||||
required-features = ["integer", "pbs-stats", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "integer-rerand"
|
||||
path = "benches/integer/rerand.rs"
|
||||
harness = false
|
||||
required-features = ["integer", "pbs-stats", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "glwe_packing_compression_128b-integer-bench"
|
||||
path = "benches/integer/glwe_packing_compression_128b.rs"
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
mod aes;
|
||||
mod oprf;
|
||||
|
||||
mod rerand;
|
||||
|
||||
use benchmark::params::ParamsAndNumBlocksIter;
|
||||
use benchmark::utilities::{
|
||||
get_bench_type, throughput_num_threads, write_to_json, BenchmarkType, EnvConfig, OperatorType,
|
||||
|
||||
417
tfhe-benchmark/benches/integer/rerand.rs
Normal file
417
tfhe-benchmark/benches/integer/rerand.rs
Normal file
@@ -0,0 +1,417 @@
|
||||
use benchmark::params_aliases::{
|
||||
BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
BENCH_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
BENCH_PARAM_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1,
|
||||
};
|
||||
use benchmark::utilities::{
|
||||
get_bench_type, throughput_num_threads, write_to_json, BenchmarkType, OperatorType,
|
||||
};
|
||||
use criterion::{black_box, criterion_group, BatchSize, Criterion, Throughput};
|
||||
#[cfg(feature = "gpu")]
|
||||
use cuda::gpu_re_randomize_group;
|
||||
use rayon::iter::{IndexedParallelIterator, ParallelIterator};
|
||||
use rayon::prelude::{IntoParallelIterator, IntoParallelRefMutIterator};
|
||||
use tfhe::integer::ciphertext::{CompressedCiphertextListBuilder, ReRandomizationContext};
|
||||
use tfhe::integer::key_switching_key::{KeySwitchingKey, KeySwitchingKeyMaterial};
|
||||
use tfhe::integer::{gen_keys_radix, CompactPrivateKey, CompactPublicKey, RadixCiphertext};
|
||||
use tfhe::keycache::NamedParam;
|
||||
|
||||
fn execute_cpu_re_randomize(c: &mut Criterion, bit_size: usize) {
|
||||
let bench_name = "integer::re_randomize";
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
bench_group
|
||||
.sample_size(15)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
|
||||
let param = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
let comp_param = BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
let cpk_param = BENCH_PARAM_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1;
|
||||
let ks_param = BENCH_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
|
||||
let num_blocks = (bit_size as f64 / (param.message_modulus.0 as f64).log(2.0)).ceil() as usize;
|
||||
|
||||
let (radix_cks, sks) = gen_keys_radix(param, num_blocks);
|
||||
let cks = radix_cks.as_ref();
|
||||
|
||||
let private_compression_key = cks.new_compression_private_key(comp_param);
|
||||
let (compressed_compression_key, compressed_decompression_key) =
|
||||
radix_cks.new_compressed_compression_decompression_keys(&private_compression_key);
|
||||
|
||||
let compression_key = compressed_compression_key.decompress();
|
||||
let decompression_key = compressed_decompression_key.decompress();
|
||||
|
||||
let cpk_private_key = CompactPrivateKey::new(cpk_param);
|
||||
let cpk = CompactPublicKey::new(&cpk_private_key);
|
||||
let ksk = KeySwitchingKey::new((&cpk_private_key, None), ((&cks), (&sks)), ks_param);
|
||||
let ksk = ksk.into_raw_parts();
|
||||
let (ksk_material, _, _) = ksk.into_raw_parts();
|
||||
let ksk_material = KeySwitchingKeyMaterial::from_raw_parts(ksk_material);
|
||||
|
||||
let rerand_domain_separator = *b"TFHE_Rrd";
|
||||
let compact_public_encryption_domain_separator = *b"TFHE_Enc";
|
||||
let metadata = b"bench".as_slice();
|
||||
|
||||
let bench_id;
|
||||
|
||||
match get_bench_type() {
|
||||
BenchmarkType::Latency => {
|
||||
// Encrypt and compress a single ciphertext
|
||||
let message = 42u64;
|
||||
let ct = cks.encrypt_radix(message, num_blocks);
|
||||
|
||||
let mut builder = CompressedCiphertextListBuilder::new();
|
||||
builder.push(ct);
|
||||
let compressed = builder.build(&compression_key);
|
||||
let decompressed: RadixCiphertext =
|
||||
compressed.get(0, &decompression_key).unwrap().unwrap();
|
||||
|
||||
let mut d_re_randomized = decompressed.clone();
|
||||
|
||||
bench_id = format!("{bench_name}::latency_u{bit_size}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter_batched(
|
||||
|| {
|
||||
let mut re_randomizer_context = ReRandomizationContext::new(
|
||||
rerand_domain_separator,
|
||||
[metadata],
|
||||
compact_public_encryption_domain_separator,
|
||||
);
|
||||
|
||||
re_randomizer_context.add_ciphertext(&decompressed);
|
||||
re_randomizer_context.finalize()
|
||||
},
|
||||
|mut seed_gen| {
|
||||
d_re_randomized
|
||||
.re_randomize(
|
||||
&cpk,
|
||||
&ksk_material.as_view(),
|
||||
seed_gen.next_seed().unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
_ = black_box(&d_re_randomized);
|
||||
},
|
||||
BatchSize::SmallInput,
|
||||
)
|
||||
});
|
||||
}
|
||||
BenchmarkType::Throughput => {
|
||||
let elements = throughput_num_threads(num_blocks, 1);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
|
||||
// Pre-generate and compress ciphertexts for throughput test
|
||||
let decompressed_cts: Vec<RadixCiphertext> = (0..elements as usize)
|
||||
.into_par_iter()
|
||||
.map(|_| {
|
||||
let message = 42u64;
|
||||
let ct = cks.encrypt_radix(message, num_blocks);
|
||||
|
||||
let mut builder = CompressedCiphertextListBuilder::new();
|
||||
builder.push(ct);
|
||||
let compressed = builder.build(&compression_key);
|
||||
|
||||
compressed.get(0, &decompression_key).unwrap().unwrap()
|
||||
})
|
||||
.collect();
|
||||
|
||||
bench_id = format!("{bench_name}::throughput_u{bit_size}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter_batched(
|
||||
|| {
|
||||
// Create a fresh context for each benchmark iteration
|
||||
let mut ctx = ReRandomizationContext::new(
|
||||
rerand_domain_separator,
|
||||
[metadata],
|
||||
compact_public_encryption_domain_separator,
|
||||
);
|
||||
|
||||
// Add all ciphertexts to the context
|
||||
for ct in &decompressed_cts {
|
||||
ctx.add_ciphertext(ct);
|
||||
}
|
||||
|
||||
// Return a new seed generator for this iteration
|
||||
(ctx.finalize(), decompressed_cts.clone())
|
||||
},
|
||||
|(mut seed_gen, mut cts_to_rerand)| {
|
||||
let seeds: Vec<_> = (0..cts_to_rerand.len())
|
||||
.map(|_| seed_gen.next_seed().unwrap())
|
||||
.collect();
|
||||
|
||||
cts_to_rerand
|
||||
.par_iter_mut()
|
||||
.zip(seeds.into_par_iter())
|
||||
.for_each(|(d_re_randomized, seed)| {
|
||||
d_re_randomized
|
||||
.re_randomize(&cpk, &ksk_material.as_view(), seed)
|
||||
.unwrap();
|
||||
|
||||
_ = black_box(&d_re_randomized);
|
||||
})
|
||||
},
|
||||
BatchSize::SmallInput,
|
||||
)
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
(comp_param, param.into()),
|
||||
comp_param.name(),
|
||||
"re_randomize",
|
||||
&OperatorType::Atomic,
|
||||
bit_size as u32,
|
||||
vec![param.message_modulus.0.ilog2(); num_blocks],
|
||||
);
|
||||
|
||||
bench_group.finish()
|
||||
}
|
||||
|
||||
fn cpu_re_randomize(c: &mut Criterion) {
|
||||
let bit_sizes = [2, 4, 8, 16, 32, 64, 128, 256];
|
||||
|
||||
for bit_size in bit_sizes.iter() {
|
||||
execute_cpu_re_randomize(c, *bit_size);
|
||||
}
|
||||
}
|
||||
|
||||
criterion_group!(cpu_re_randomize_group, cpu_re_randomize);
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
mod cuda {
|
||||
use benchmark::params_aliases::{
|
||||
BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
BENCH_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
BENCH_PARAM_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1,
|
||||
};
|
||||
use benchmark::utilities::cuda_integer_utils::cuda_local_streams;
|
||||
use benchmark::utilities::{
|
||||
get_bench_type, throughput_num_threads, write_to_json, BenchmarkType, OperatorType,
|
||||
};
|
||||
use criterion::{black_box, criterion_group, BatchSize, Criterion, Throughput};
|
||||
use rayon::prelude::*;
|
||||
use tfhe::core_crypto::gpu::{get_number_of_gpus, CudaStreams};
|
||||
use tfhe::integer::ciphertext::ReRandomizationContext;
|
||||
use tfhe::integer::gpu::ciphertext::compressed_ciphertext_list::CudaCompressedCiphertextListBuilder;
|
||||
use tfhe::integer::gpu::ciphertext::{CudaIntegerRadixCiphertext, CudaUnsignedRadixCiphertext};
|
||||
use tfhe::integer::key_switching_key::KeySwitchingKey;
|
||||
use tfhe::integer::{gen_keys_radix, CompactPrivateKey, CompactPublicKey};
|
||||
use tfhe::keycache::NamedParam;
|
||||
use tfhe::shortint::key_switching_key::CudaKeySwitchingKeyMaterial;
|
||||
|
||||
fn execute_gpu_re_randomize(c: &mut Criterion, bit_size: usize) {
|
||||
let bench_name = "integer::cuda::re_randomize";
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
bench_group
|
||||
.sample_size(15)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
|
||||
let param = BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128;
|
||||
let comp_param = BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
let cpk_param = BENCH_PARAM_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1;
|
||||
let ks_param = BENCH_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
|
||||
let streams = CudaStreams::new_multi_gpu();
|
||||
|
||||
let num_blocks =
|
||||
(bit_size as f64 / (param.message_modulus.0 as f64).log(2.0)).ceil() as usize;
|
||||
|
||||
let (radix_cks, sks) = gen_keys_radix(param, num_blocks);
|
||||
let cks = radix_cks.as_ref();
|
||||
|
||||
let private_compression_key = cks.new_compression_private_key(comp_param);
|
||||
let (cuda_compression_key, cuda_decompression_key) =
|
||||
radix_cks.new_cuda_compression_decompression_keys(&private_compression_key, &streams);
|
||||
|
||||
let cpk_private_key = CompactPrivateKey::new(cpk_param);
|
||||
let cpk = CompactPublicKey::new(&cpk_private_key);
|
||||
let ksk = KeySwitchingKey::new((&cpk_private_key, None), (&cks, &sks), ks_param);
|
||||
let d_ksk_material = CudaKeySwitchingKeyMaterial::from_key_switching_key(&ksk, &streams);
|
||||
|
||||
let rerand_domain_separator = *b"TFHE_Rrd";
|
||||
let compact_public_encryption_domain_separator = *b"TFHE_Enc";
|
||||
let metadata = b"bench".as_slice();
|
||||
|
||||
let bench_id;
|
||||
|
||||
match get_bench_type() {
|
||||
BenchmarkType::Latency => {
|
||||
// Encrypt and compress a single ciphertext
|
||||
let message = 42u64;
|
||||
let ct = cks.encrypt_radix(message, num_blocks);
|
||||
let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &streams);
|
||||
|
||||
let mut builder = CudaCompressedCiphertextListBuilder::new();
|
||||
builder.push(d_ct, &streams);
|
||||
let compressed = builder.build(&cuda_compression_key, &streams);
|
||||
let d_decompressed: CudaUnsignedRadixCiphertext = compressed
|
||||
.get(0, &cuda_decompression_key, &streams)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
let decompressed = d_decompressed.to_radix_ciphertext(&streams);
|
||||
|
||||
let mut d_re_randomized = d_decompressed.duplicate(&streams);
|
||||
|
||||
bench_id = format!("{bench_name}::latency_u{bit_size}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter_batched(
|
||||
|| {
|
||||
let mut re_randomizer_context = ReRandomizationContext::new(
|
||||
rerand_domain_separator,
|
||||
[metadata],
|
||||
compact_public_encryption_domain_separator,
|
||||
);
|
||||
|
||||
re_randomizer_context.add_ciphertext(&decompressed);
|
||||
re_randomizer_context.finalize()
|
||||
},
|
||||
|mut seed_gen| {
|
||||
d_re_randomized
|
||||
.re_randomize(
|
||||
&cpk,
|
||||
&d_ksk_material,
|
||||
seed_gen.next_seed().unwrap(),
|
||||
&streams,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
_ = black_box(&d_re_randomized);
|
||||
},
|
||||
BatchSize::SmallInput,
|
||||
)
|
||||
});
|
||||
}
|
||||
BenchmarkType::Throughput => {
|
||||
let elements = throughput_num_threads(num_blocks, 1);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
|
||||
let local_streams = cuda_local_streams(num_blocks, elements as usize);
|
||||
let num_gpus = get_number_of_gpus() as usize;
|
||||
|
||||
let d_ksk_material_vec: Vec<CudaKeySwitchingKeyMaterial> = (0..num_gpus)
|
||||
.map(|i| {
|
||||
let local_stream = &local_streams[i % local_streams.len()];
|
||||
CudaKeySwitchingKeyMaterial::from_key_switching_key(&ksk, local_stream)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Pre-generate and compress ciphertexts for throughput test
|
||||
let d_compressed_cts: Vec<CudaUnsignedRadixCiphertext> = (0..elements as usize)
|
||||
.into_par_iter()
|
||||
.map(|i| {
|
||||
let message = 42u64;
|
||||
let ct = cks.encrypt_radix(message, num_blocks);
|
||||
let local_stream = &local_streams[i % local_streams.len()];
|
||||
let d_ct =
|
||||
CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, local_stream);
|
||||
|
||||
let mut builder = CudaCompressedCiphertextListBuilder::new();
|
||||
builder.push(d_ct, local_stream);
|
||||
let compressed = builder.build(&cuda_compression_key, local_stream);
|
||||
|
||||
compressed
|
||||
.get(0, &cuda_decompression_key, local_stream)
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Prepare decompressed ciphertexts once
|
||||
let h_decompressed_cts: Vec<_> = d_compressed_cts
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, d_ct)| {
|
||||
let local_stream = &local_streams[i % local_streams.len()];
|
||||
d_ct.to_radix_ciphertext(local_stream)
|
||||
})
|
||||
.collect();
|
||||
|
||||
bench_id = format!("{bench_name}::throughput_u{bit_size}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter_batched(
|
||||
|| {
|
||||
// Create a fresh context for each benchmark iteration
|
||||
let mut ctx = ReRandomizationContext::new(
|
||||
rerand_domain_separator,
|
||||
[metadata],
|
||||
compact_public_encryption_domain_separator,
|
||||
);
|
||||
|
||||
// Add all ciphertexts to the context
|
||||
for ct in &h_decompressed_cts {
|
||||
ctx.add_ciphertext(ct);
|
||||
}
|
||||
|
||||
let d_cts_to_rerand = d_compressed_cts
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, d_ct)| {
|
||||
let local_stream = &local_streams[i % local_streams.len()];
|
||||
d_ct.duplicate(local_stream)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Return a new seed generator for this iteration
|
||||
(ctx.finalize(), h_decompressed_cts.clone(), d_cts_to_rerand)
|
||||
},
|
||||
|(mut seed_gen, h_cts_to_rerand, mut d_cts_to_rerand)| {
|
||||
let seeds: Vec<_> = (0..h_cts_to_rerand.len())
|
||||
.map(|_| seed_gen.next_seed().unwrap())
|
||||
.collect();
|
||||
|
||||
d_cts_to_rerand
|
||||
.par_iter_mut()
|
||||
.zip(seeds.into_par_iter())
|
||||
.enumerate()
|
||||
.for_each(|(i, (d_re_randomized, seed))| {
|
||||
let local_stream = &local_streams[i % local_streams.len()];
|
||||
let d_ksk = &d_ksk_material_vec[i % num_gpus];
|
||||
|
||||
d_re_randomized
|
||||
.re_randomize(&cpk, d_ksk, seed, local_stream)
|
||||
.unwrap();
|
||||
|
||||
_ = black_box(&d_re_randomized);
|
||||
})
|
||||
},
|
||||
BatchSize::SmallInput,
|
||||
)
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
(comp_param, param.into()),
|
||||
comp_param.name(),
|
||||
"re_randomize",
|
||||
&OperatorType::Atomic,
|
||||
bit_size as u32,
|
||||
vec![param.message_modulus.0.ilog2(); num_blocks],
|
||||
);
|
||||
|
||||
bench_group.finish()
|
||||
}
|
||||
|
||||
fn gpu_re_randomize(c: &mut Criterion) {
|
||||
let bit_sizes = [2, 4, 16, 32, 64, 128, 256];
|
||||
|
||||
for bit_size in bit_sizes.iter() {
|
||||
execute_gpu_re_randomize(c, *bit_size);
|
||||
}
|
||||
}
|
||||
|
||||
criterion_group!(gpu_re_randomize_group, gpu_re_randomize);
|
||||
}
|
||||
|
||||
fn main() {
|
||||
#[cfg(feature = "gpu")]
|
||||
gpu_re_randomize_group();
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
cpu_re_randomize_group();
|
||||
Criterion::default().configure_from_args().final_summary();
|
||||
}
|
||||
Reference in New Issue
Block a user