feat(gpu): create noise and pfail tests pbs128 and packingks

This commit is contained in:
Guillermo Oyarzun
2025-12-02 13:33:39 +01:00
parent 92df46f8f2
commit 918cdcb052
11 changed files with 1971 additions and 8 deletions

View File

@@ -65,6 +65,16 @@ void cleanup_cuda_integer_compress_radix_ciphertext_128(CudaStreamsFFI streams,
void cleanup_cuda_integer_decompress_radix_ciphertext_128(
CudaStreamsFFI streams, int8_t **mem_ptr_void);
void cuda_integer_extract_glwe_128(
CudaStreamsFFI streams, void *glwe_array_out,
CudaPackedGlweCiphertextListFFI const *glwe_list,
uint32_t const glwe_index);
void cuda_integer_extract_glwe_64(
CudaStreamsFFI streams, void *glwe_array_out,
CudaPackedGlweCiphertextListFFI const *glwe_list,
uint32_t const glwe_index);
}
#endif

View File

@@ -155,3 +155,24 @@ void cleanup_cuda_integer_decompress_radix_ciphertext_128(
delete mem_ptr;
*mem_ptr_void = nullptr;
}
void cuda_integer_extract_glwe_128(
CudaStreamsFFI streams, void *glwe_array_out,
CudaPackedGlweCiphertextListFFI const *glwe_list,
uint32_t const glwe_index) {
CudaStreams _streams = CudaStreams(streams);
host_extract<__uint128_t>(_streams.stream(0), _streams.gpu_index(0),
(__uint128_t *)glwe_array_out, glwe_list,
glwe_index);
}
void cuda_integer_extract_glwe_64(
CudaStreamsFFI streams, void *glwe_array_out,
CudaPackedGlweCiphertextListFFI const *glwe_list,
uint32_t const glwe_index) {
CudaStreams _streams = CudaStreams(streams);
host_extract<__uint64_t>(_streams.stream(0), _streams.gpu_index(0),
(__uint64_t *)glwe_array_out, glwe_list, glwe_index);
}

View File

@@ -2349,6 +2349,22 @@ unsafe extern "C" {
mem_ptr_void: *mut *mut i8,
);
}
unsafe extern "C" {
pub fn cuda_integer_extract_glwe_128(
streams: CudaStreamsFFI,
glwe_array_out: *mut ffi::c_void,
glwe_list: *const CudaPackedGlweCiphertextListFFI,
glwe_index: u32,
);
}
unsafe extern "C" {
pub fn cuda_integer_extract_glwe_64(
streams: CudaStreamsFFI,
glwe_array_out: *mut ffi::c_void,
glwe_list: *const CudaPackedGlweCiphertextListFFI,
glwe_index: u32,
);
}
unsafe extern "C" {
pub fn scratch_cuda_rerand_64(
streams: CudaStreamsFFI,

View File

@@ -877,7 +877,7 @@ pub fn cuda_modulus_switch_ciphertext<Scalar>(
Scalar: UnsignedInteger,
{
unsafe {
cuda_modulus_switch_ciphertext_async(streams, output_lwe_ciphertext, log_modulus);
cuda_modulus_switch_ciphertext_async(streams, &mut *output_lwe_ciphertext, log_modulus);
}
streams.synchronize();
}

View File

@@ -1,4 +1,5 @@
use crate::core_crypto::gpu::entities::lwe_packing_keyswitch_key::CudaLwePackingKeyswitchKey;
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use crate::core_crypto::gpu::vec::CudaVec;
use crate::core_crypto::gpu::CudaStreams;
@@ -16,7 +17,8 @@ use crate::integer::gpu::ciphertext::CudaRadixCiphertext;
use crate::integer::gpu::server_key::CudaBootstrappingKey;
use crate::integer::gpu::{
cuda_backend_compress, cuda_backend_decompress, cuda_backend_get_compression_size_on_gpu,
cuda_backend_get_decompression_size_on_gpu, cuda_memcpy_async_gpu_to_gpu, PBSType,
cuda_backend_get_decompression_size_on_gpu, cuda_memcpy_async_gpu_to_gpu, extract_glwe_async,
PBSType,
};
use crate::prelude::CastInto;
use crate::shortint::ciphertext::{
@@ -197,6 +199,30 @@ impl<T: UnsignedInteger> CudaPackedGlweCiphertextList<T> {
meta: self.meta,
}
}
pub fn extract_glwe(
&self,
glwe_index: usize,
streams: &CudaStreams,
) -> CudaGlweCiphertextList<T> {
let meta = self
.meta
.as_ref()
.expect("CudaPackedGlweCiphertextList meta must be set to extract GLWE");
let mut output_cuda_glwe_list = CudaGlweCiphertextList::new(
meta.glwe_dimension,
meta.polynomial_size,
GlweCiphertextCount(1),
meta.ciphertext_modulus,
streams,
);
unsafe {
extract_glwe_async(streams, &mut output_cuda_glwe_list, self, glwe_index as u32);
}
streams.synchronize();
output_cuda_glwe_list
}
}
impl<T: UnsignedInteger> Clone for CudaPackedGlweCiphertextList<T> {

View File

@@ -7,6 +7,7 @@ pub mod server_key;
#[cfg(feature = "zk-pok")]
pub mod zk;
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
use crate::core_crypto::gpu::lwe_bootstrap_key::CudaModulusSwitchNoiseReductionConfiguration;
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use crate::core_crypto::gpu::lwe_compact_ciphertext_list::CudaLweCompactCiphertextList;
@@ -10423,3 +10424,44 @@ pub unsafe fn unchecked_small_scalar_mul_integer_async(
carry_modulus.0 as u32,
);
}
#[allow(clippy::too_many_arguments)]
/// # Safety
///
/// - [CudaStreams::synchronize] __must__ be called after this function as soon as synchronization
/// is required
pub unsafe fn extract_glwe_async<T: UnsignedInteger>(
streams: &CudaStreams,
glwe_array_out: &mut CudaGlweCiphertextList<T>,
glwe_list: &CudaPackedGlweCiphertextList<T>,
glwe_index: u32,
) {
assert_eq!(
streams.gpu_indexes[0],
glwe_array_out.0.d_vec.gpu_index(0),
"GPU error: all data should reside on the same GPU."
);
assert_eq!(
streams.gpu_indexes[0],
glwe_list.data.gpu_index(0),
"GPU error: all data should reside on the same GPU."
);
let packed_glwe_list_ffi = prepare_cuda_packed_glwe_ct_ffi(glwe_list);
if T::BITS == 128 {
cuda_integer_extract_glwe_128(
streams.ffi(),
glwe_array_out.0.d_vec.as_mut_c_ptr(0),
&raw const packed_glwe_list_ffi,
glwe_index,
);
} else if T::BITS == 64 {
cuda_integer_extract_glwe_64(
streams.ffi(),
glwe_array_out.0.d_vec.as_mut_c_ptr(0),
&raw const packed_glwe_list_ffi,
glwe_index,
);
} else {
panic!("Unsupported integer size for CUDA GLWE extraction");
}
}

View File

@@ -0,0 +1,756 @@
use super::utils::noise_simulation::{CudaDynLwe, CudaSideResources};
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::{GlweCiphertext, LweCiphertext};
use crate::integer::compression_keys::CompressionPrivateKeys;
use crate::integer::gpu::list_compression::server_keys::CudaCompressionKey;
use crate::integer::gpu::server_key::radix::tests_noise_distribution::utils::noise_simulation::cuda_glwe_list_to_glwe_ciphertext;
use crate::integer::gpu::server_key::radix::tests_unsigned::create_gpu_parameterized_test;
use crate::integer::gpu::server_key::radix::CudaUnsignedRadixCiphertext;
use crate::integer::gpu::CudaServerKey;
use crate::integer::{ClientKey, CompressedServerKey, IntegerCiphertext};
use crate::shortint::ciphertext::{Ciphertext, Degree, NoiseLevel};
use crate::shortint::client_key::atomic_pattern::AtomicPatternClientKey;
use crate::shortint::engine::ShortintEngine;
use crate::shortint::parameters::test_params::TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128;
use crate::shortint::parameters::{CompressionParameters, MetaParameters, Variance};
use crate::shortint::server_key::tests::noise_distribution::br_dp_packingks_ms::br_dp_packing_ks_ms;
use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::{
NoiseSimulationGlwe, NoiseSimulationLwe, NoiseSimulationLweFourierBsk,
NoiseSimulationLwePackingKeyswitchKey, NoiseSimulationModulus,
};
use crate::shortint::server_key::tests::noise_distribution::utils::{
expected_pfail_for_precision, mean_and_variance_check, normality_check, pfail_check,
precision_with_padding, update_ap_params_msg_and_carry_moduli, DecryptionAndNoiseResult,
NoiseSample, PfailAndPrecision, PfailTestMeta, PfailTestResult,
};
use crate::shortint::server_key::tests::noise_distribution::{
should_run_short_pfail_tests_debug, should_use_single_key_debug,
};
use crate::shortint::{
AtomicPatternParameters, CarryModulus, MessageModulus, ShortintEncoding, ShortintParameterSet,
};
use crate::GpuIndex;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
fn sanity_check_encrypt_br_dp_packing_ks_ms(meta_params: MetaParameters) {
let (params, comp_params) = (
meta_params.compute_parameters,
meta_params.compression_parameters.unwrap(),
);
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let block_params: ShortintParameterSet = params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let private_compression_key = cks.new_compression_private_key(comp_params);
let (compressed_compression_key, _compressed_decompression_key) =
cks.new_compressed_compression_decompression_keys(&private_compression_key);
let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&streams);
let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
// The multiplication done in the compression is made to move the message up at the top of the
// carry space, multiplying by the carry modulus achieves that
let dp_scalar = params.carry_modulus().0;
let br_input_modulus_log = cuda_sks.br_input_modulus_log();
let storage_modulus_log = cuda_compression_key.storage_log_modulus;
let id_lut = cuda_sks.generate_lookup_table(|x| x);
let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, &streams);
let input_zeros: Vec<_> = (0..lwe_per_glwe.0)
.map(|_| {
cks.key
.encrypt_noiseless_pbs_input_dyn_lwe(br_input_modulus_log, 0)
})
.collect();
let d_input_zeros: Vec<_> = input_zeros
.iter()
.map(|ct| {
let d_ct_input = CudaLweCiphertextList::from_lwe_ciphertext(&ct.as_lwe_64(), &streams);
CudaDynLwe::U64(d_ct_input)
})
.collect();
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
atomic_pattern: params.atomic_pattern(),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
.map(|_| CudaSideResources::new(&streams, cuda_block_info))
.collect();
let (d_before_packing, _after_packing, d_after_ms) = br_dp_packing_ks_ms(
d_input_zeros,
&cuda_sks,
&d_accumulator,
dp_scalar,
&cuda_compression_key.packing_key_switching_key,
storage_modulus_log,
&mut cuda_side_resources,
);
let compression_inputs: Vec<_> = d_before_packing
.into_iter()
.map(|(_input, pbs_result, _dp_result)| {
let pbs_result_list_cpu = pbs_result.as_lwe_64().to_lwe_ciphertext_list(&streams);
let pbs_result_cpu = LweCiphertext::from_container(
pbs_result_list_cpu.clone().into_container(),
pbs_result_list_cpu.ciphertext_modulus(),
);
let cpu_ct = Ciphertext::new(
pbs_result_cpu,
Degree::new(params.message_modulus().0 - 1),
NoiseLevel::NOMINAL,
params.message_modulus(),
params.carry_modulus(),
params.atomic_pattern(),
);
let radix_ct = crate::integer::RadixCiphertext::from_blocks(vec![cpu_ct]);
let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&radix_ct, &streams);
d_ct.ciphertext
})
.collect();
let gpu_compressed =
cuda_compression_key.compress_ciphertexts_into_list(&compression_inputs, &streams);
let gpu_extracted = gpu_compressed.extract_glwe(0, &streams);
let extracted_list = gpu_extracted.to_glwe_ciphertext_list(&streams);
let extracted_glwe = GlweCiphertext::from_container(
extracted_list.clone().into_container(),
extracted_list.polynomial_size(),
extracted_list.ciphertext_modulus(),
);
let after_ms_list = d_after_ms.to_glwe_ciphertext_list(&streams);
let mut after_ms = GlweCiphertext::from_container(
after_ms_list.clone().into_container(),
after_ms_list.polynomial_size(),
after_ms_list.ciphertext_modulus(),
);
// Bodies that were not filled are discarded
after_ms.get_mut_body().as_mut()[lwe_per_glwe.0..].fill(0);
assert_eq!(after_ms.as_view(), extracted_glwe.as_view());
}
create_gpu_parameterized_test!(sanity_check_encrypt_br_dp_packing_ks_ms {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
});
#[allow(clippy::type_complexity, clippy::too_many_arguments)]
fn encrypt_br_dp_packing_ks_ms_inner_helper_gpu(
params: AtomicPatternParameters,
comp_params: CompressionParameters,
single_cks: &ClientKey,
single_cuda_sks: &CudaServerKey,
single_compression_private_key: &CompressionPrivateKeys,
single_cuda_compression_key: &CudaCompressionKey,
msg: u64,
streams: &CudaStreams,
) -> (
Vec<(
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
)>,
Vec<DecryptionAndNoiseResult>,
Vec<DecryptionAndNoiseResult>,
) {
let mut engine = ShortintEngine::new();
let thread_cks: crate::integer::ClientKey;
let thread_cuda_sks: CudaServerKey;
let thread_compression_private_key;
let thread_cuda_compression_key;
let (cks, cuda_sks, compression_private_key, cuda_compression_key) =
if should_use_single_key_debug() {
(
single_cks,
single_cuda_sks,
single_compression_private_key,
single_cuda_compression_key,
)
} else {
let block_params: ShortintParameterSet = params.into();
thread_cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key =
CompressedServerKey::new_radix_compressed_server_key(&thread_cks);
thread_cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, streams);
thread_compression_private_key = thread_cks.new_compression_private_key(comp_params);
let (compressed_compression_key, _compressed_decompression_key) = thread_cks
.new_compressed_compression_decompression_keys(&thread_compression_private_key);
thread_cuda_compression_key = compressed_compression_key.decompress_to_cuda(streams);
(
&thread_cks,
&thread_cuda_sks,
&thread_compression_private_key,
&thread_cuda_compression_key,
)
};
let br_input_modulus_log = cuda_sks.br_input_modulus_log();
let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
let input_zeros: Vec<_> = (0..lwe_per_glwe.0)
.map(|_| {
cks.key.encrypt_noiseless_pbs_input_dyn_lwe_with_engine(
br_input_modulus_log,
msg,
&mut engine,
)
})
.collect();
let d_input_zeros: Vec<_> = input_zeros
.iter()
.map(|ct| {
let d_ct_input = CudaLweCiphertextList::from_lwe_ciphertext(&ct.as_lwe_64(), streams);
CudaDynLwe::U64(d_ct_input)
})
.collect();
let id_lut = cuda_sks.generate_lookup_table(|x| x);
let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, streams);
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
atomic_pattern: params.atomic_pattern(),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
.map(|_| CudaSideResources::new(streams, cuda_block_info))
.collect();
let dp_scalar = params.carry_modulus().0;
let storage_modulus_log = cuda_compression_key.storage_log_modulus;
let (d_before_packing, d_after_packing, d_after_ms) = br_dp_packing_ks_ms(
d_input_zeros,
cuda_sks,
&d_accumulator,
dp_scalar,
&cuda_compression_key.packing_key_switching_key,
storage_modulus_log,
&mut cuda_side_resources,
);
let compute_large_lwe_secret_key = cks.key.encryption_key();
let compression_glwe_secret_key = &compression_private_key.key.post_packing_ks_key;
let compute_encoding = cuda_sks.encoding();
let compression_encoding = ShortintEncoding {
carry_modulus: CarryModulus(1),
..compute_encoding
};
let after_packing = cuda_glwe_list_to_glwe_ciphertext(&d_after_packing, streams);
let after_ms = cuda_glwe_list_to_glwe_ciphertext(&d_after_ms, streams);
(
d_before_packing
.into_iter()
.map(|(d_input, d_pbs_result, d_dp_result)| {
let input = d_input.as_ct_64_cpu(streams);
let pbs_result = d_pbs_result.as_ct_64_cpu(streams);
let dp_result = d_dp_result.as_ct_64_cpu(streams);
(
match &cks.key.atomic_pattern {
AtomicPatternClientKey::Standard(standard_atomic_pattern_client_key) => {
DecryptionAndNoiseResult::new_from_lwe(
&input,
&standard_atomic_pattern_client_key.lwe_secret_key,
msg,
&compute_encoding,
)
}
AtomicPatternClientKey::KeySwitch32(_ks32_atomic_pattern_client_key) => {
panic!("KS32 Atomic Pattern not supported on GPU tests yet");
}
},
DecryptionAndNoiseResult::new_from_lwe(
&pbs_result,
&compute_large_lwe_secret_key,
msg,
&compute_encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&dp_result,
&compute_large_lwe_secret_key,
msg,
&compression_encoding,
),
)
})
.collect(),
DecryptionAndNoiseResult::new_from_glwe(
&after_packing,
compression_glwe_secret_key,
compression_private_key.key.params.lwe_per_glwe(),
msg,
&compression_encoding,
),
DecryptionAndNoiseResult::new_from_glwe(
&after_ms,
compression_glwe_secret_key,
compression_private_key.key.params.lwe_per_glwe(),
msg,
&compression_encoding,
),
)
}
#[allow(clippy::type_complexity, clippy::too_many_arguments)]
fn encrypt_br_dp_packing_ks_ms_noise_helper_gpu(
params: AtomicPatternParameters,
comp_params: CompressionParameters,
single_cks: &ClientKey,
single_cuda_sks: &CudaServerKey,
single_compression_private_key: &CompressionPrivateKeys,
single_cuda_compression_key: &CudaCompressionKey,
msg: u64,
streams: &CudaStreams,
) -> (
Vec<(NoiseSample, NoiseSample, NoiseSample)>,
Vec<NoiseSample>,
Vec<NoiseSample>,
) {
let (before_packing, after_packing, after_ms) = encrypt_br_dp_packing_ks_ms_inner_helper_gpu(
params,
comp_params,
single_cks,
single_cuda_sks,
single_compression_private_key,
single_cuda_compression_key,
msg,
streams,
);
(
before_packing
.into_iter()
.map(|(input, after_pbs, after_dp)| {
(
input
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_pbs
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_dp
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
)
})
.collect(),
after_packing
.into_iter()
.map(|x| {
x.get_noise_if_decryption_was_correct()
.expect("Decryption Failed")
})
.collect(),
after_ms
.into_iter()
.map(|x| {
x.get_noise_if_decryption_was_correct()
.expect("Decryption Failed")
})
.collect(),
)
}
#[allow(clippy::type_complexity, clippy::too_many_arguments)]
fn encrypt_br_dp_packing_ks_ms_pfail_helper_gpu(
params: AtomicPatternParameters,
comp_params: CompressionParameters,
single_cks: &ClientKey,
single_cuda_sks: &CudaServerKey,
single_compression_private_key: &CompressionPrivateKeys,
single_cuda_compression_key: &CudaCompressionKey,
msg: u64,
streams: &CudaStreams,
) -> Vec<DecryptionAndNoiseResult> {
let (_before_packing, _after_packing, after_ms) = encrypt_br_dp_packing_ks_ms_inner_helper_gpu(
params,
comp_params,
single_cks,
single_cuda_sks,
single_compression_private_key,
single_cuda_compression_key,
msg,
streams,
);
after_ms
}
fn noise_check_encrypt_br_dp_packing_ks_ms_noise_gpu(meta_params: MetaParameters) {
let (params, comp_params) = (
meta_params.compute_parameters,
meta_params.compression_parameters.unwrap(),
);
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let block_params: ShortintParameterSet = params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let private_compression_key = cks.new_compression_private_key(comp_params);
let (compressed_compression_key, _compressed_decompression_key) =
cks.new_compressed_compression_decompression_keys(&private_compression_key);
let compression_key = compressed_compression_key.decompress();
let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&streams);
let noise_simulation_bsk =
NoiseSimulationLweFourierBsk::new_from_atomic_pattern_parameters(params);
let noise_simulation_packing_key =
NoiseSimulationLwePackingKeyswitchKey::new_from_comp_parameters(params, comp_params);
assert!(noise_simulation_bsk.matches_actual_bsk_gpu(&cuda_sks.bootstrapping_key));
assert!(noise_simulation_packing_key.matches_actual_shortint_comp_key(&compression_key.key));
// The multiplication done in the compression is made to move the message up at the top of the
// carry space, multiplying by the carry modulus achieves that
let dp_scalar = params.carry_modulus().0;
let noise_simulation_accumulator = NoiseSimulationGlwe::new(
noise_simulation_bsk.output_glwe_size().to_glwe_dimension(),
noise_simulation_bsk.output_polynomial_size(),
Variance(0.0),
noise_simulation_bsk.modulus(),
);
let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
let storage_modulus_log = cuda_compression_key.storage_log_modulus;
let br_input_modulus_log = cuda_sks.br_input_modulus_log();
let (_before_packing_sim, _after_packing_sim, after_ms_sim) = {
let noise_simulation = NoiseSimulationLwe::new(
cks.parameters().lwe_dimension(),
Variance(0.0),
NoiseSimulationModulus::from_ciphertext_modulus(cks.parameters().ciphertext_modulus()),
);
br_dp_packing_ks_ms(
vec![noise_simulation; lwe_per_glwe.0],
&noise_simulation_bsk,
&noise_simulation_accumulator,
dp_scalar,
&noise_simulation_packing_key,
storage_modulus_log,
&mut vec![(); lwe_per_glwe.0],
)
};
let input_zeros: Vec<_> = (0..lwe_per_glwe.0)
.map(|_| {
cks.key
.encrypt_noiseless_pbs_input_dyn_lwe(br_input_modulus_log, 0)
})
.collect();
let d_input_zeros: Vec<_> = input_zeros
.iter()
.map(|ct| {
let d_ct_input = CudaLweCiphertextList::from_lwe_ciphertext(&ct.as_lwe_64(), &streams);
CudaDynLwe::U64(d_ct_input)
})
.collect();
let id_lut = cuda_sks.generate_lookup_table(|x| x);
let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, &streams);
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
atomic_pattern: params.atomic_pattern(),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
.map(|_| CudaSideResources::new(&streams, cuda_block_info))
.collect();
// Check that the circuit is correct with respect to core implementation, i.e. does not crash on
// dimension checks
let (expected_glwe_size_out, expected_polynomial_size_out, expected_modulus_f64_out) = {
let (_before_packing_sim, _after_packing, after_ms) = br_dp_packing_ks_ms(
d_input_zeros,
&cuda_sks,
&d_accumulator,
dp_scalar,
&cuda_compression_key.packing_key_switching_key,
storage_modulus_log,
&mut cuda_side_resources,
);
(
after_ms.glwe_dimension().to_glwe_size(),
after_ms.polynomial_size(),
after_ms.ciphertext_modulus().raw_modulus_float(),
)
};
assert_eq!(after_ms_sim.glwe_size(), expected_glwe_size_out);
assert_eq!(after_ms_sim.polynomial_size(), expected_polynomial_size_out);
assert_eq!(after_ms_sim.modulus().as_f64(), expected_modulus_f64_out);
let cleartext_modulus = params.message_modulus().0 * params.carry_modulus().0;
let mut noise_samples_before_ms = vec![];
let mut noise_samples_after_ms = vec![];
let sample_count_per_msg = 1000usize;
let chunk_size = 8;
let vec_local_streams = (0..chunk_size)
.map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
.collect::<Vec<_>>();
for _ in 0..cleartext_modulus {
let (current_noise_samples_before_ms, current_noise_samples_after_ms): (Vec<_>, Vec<_>) =
(0..sample_count_per_msg)
.collect::<Vec<_>>()
.chunks(chunk_size)
.flat_map(|chunk| {
chunk
.into_par_iter()
.map(|i| {
let local_stream = &vec_local_streams[*i % chunk_size];
let (_before_packing, after_packing, after_ms) =
encrypt_br_dp_packing_ks_ms_noise_helper_gpu(
params,
comp_params,
&cks,
&cuda_sks,
&private_compression_key,
&cuda_compression_key,
0,
local_stream,
);
(after_packing, after_ms)
})
.collect::<Vec<_>>()
})
.unzip();
noise_samples_before_ms.extend(current_noise_samples_before_ms);
noise_samples_after_ms.extend(current_noise_samples_after_ms);
}
let noise_samples_before_ms_flattened: Vec<_> = noise_samples_before_ms
.into_iter()
.flatten()
.map(|x| x.value)
.collect();
let noise_samples_after_ms_flattened: Vec<_> = noise_samples_after_ms
.into_iter()
.flatten()
.map(|x| x.value)
.collect();
let before_ms_normality =
normality_check(&noise_samples_before_ms_flattened, "before ms", 0.01);
let after_ms_is_ok = mean_and_variance_check(
&noise_samples_after_ms_flattened,
"after_ms",
0.0,
after_ms_sim.variance_per_occupied_slot(),
comp_params.packing_ks_key_noise_distribution(),
after_ms_sim
.glwe_dimension()
.to_equivalent_lwe_dimension(after_ms_sim.polynomial_size()),
after_ms_sim.modulus().as_f64(),
);
assert!(before_ms_normality.null_hypothesis_is_valid && after_ms_is_ok);
}
create_gpu_parameterized_test!(noise_check_encrypt_br_dp_packing_ks_ms_noise_gpu {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
});
fn noise_check_encrypt_br_dp_packing_ks_ms_pfail_gpu(meta_params: MetaParameters) {
let (pfail_test_meta, params, comp_params) = {
let (mut params, comp_params) = (
meta_params.compute_parameters,
meta_params.compression_parameters.unwrap(),
);
let original_message_modulus = params.message_modulus();
let original_carry_modulus = params.carry_modulus();
// For now only allow 2_2 parameters, and see later for heuristics to use
assert_eq!(original_message_modulus.0, 4);
assert_eq!(original_carry_modulus.0, 4);
let noise_simulation_bsk =
NoiseSimulationLweFourierBsk::new_from_atomic_pattern_parameters(params);
let noise_simulation_packing_key =
NoiseSimulationLwePackingKeyswitchKey::new_from_comp_parameters(params, comp_params);
// The multiplication done in the compression is made to move the message up at the top of
// the carry space, multiplying by the carry modulus achieves that
let dp_scalar = params.carry_modulus().0;
let noise_simulation_accumulator = NoiseSimulationGlwe::new(
noise_simulation_bsk.output_glwe_size().to_glwe_dimension(),
noise_simulation_bsk.output_polynomial_size(),
Variance(0.0),
noise_simulation_bsk.modulus(),
);
let lwe_per_glwe = comp_params.lwe_per_glwe();
let storage_modulus_log = comp_params.storage_log_modulus();
let (_before_packing_sim, _after_packing_sim, after_ms_sim) = {
let noise_simulation = NoiseSimulationLwe::new(
params.lwe_dimension(),
Variance(0.0),
NoiseSimulationModulus::from_ciphertext_modulus(params.ciphertext_modulus()),
);
br_dp_packing_ks_ms(
vec![noise_simulation; lwe_per_glwe.0],
&noise_simulation_bsk,
&noise_simulation_accumulator,
dp_scalar,
&noise_simulation_packing_key,
storage_modulus_log,
&mut vec![(); lwe_per_glwe.0],
)
};
let expected_variance_after_storage = after_ms_sim.variance_per_occupied_slot();
let compression_carry_mod = CarryModulus(1);
let compression_message_mod = original_message_modulus;
let compression_precision_with_padding =
precision_with_padding(compression_message_mod, compression_carry_mod);
let expected_pfail_for_storage = expected_pfail_for_precision(
compression_precision_with_padding,
expected_variance_after_storage,
);
let original_pfail_and_precision = PfailAndPrecision::new(
expected_pfail_for_storage,
compression_message_mod,
compression_carry_mod,
);
// Here we update the message modulus only:
// - because the message modulus matches for the compression encoding and compute encoding
// - so that the carry modulus stays the same and we apply the same dot product as normal
// for 2_2
// - so that the effective encoding after the storage is the one we used to evaluate the
// pfail
let updated_message_mod = MessageModulus(1 << 6);
let updated_carry_mod = compression_carry_mod;
update_ap_params_msg_and_carry_moduli(&mut params, updated_message_mod, updated_carry_mod);
assert!(
(params.message_modulus().0 * params.carry_modulus().0).ilog2()
<= comp_params.storage_log_modulus().0 as u32,
"Compression storage modulus cannot store enough bits for pfail estimation"
);
let updated_precision_with_padding =
precision_with_padding(updated_message_mod, updated_carry_mod);
let new_expected_pfail_for_storage = expected_pfail_for_precision(
updated_precision_with_padding,
expected_variance_after_storage,
);
let new_expected_pfail_and_precision = PfailAndPrecision::new(
new_expected_pfail_for_storage,
updated_message_mod,
updated_carry_mod,
);
let pfail_test_meta = if should_run_short_pfail_tests_debug() {
// To have the same amount of keys generated as the case where a single run is a single
// sample
let expected_fails = 200 * lwe_per_glwe.0 as u32;
PfailTestMeta::new_with_desired_expected_fails(
original_pfail_and_precision,
new_expected_pfail_and_precision,
expected_fails,
)
} else {
// To guarantee 1_000_000 keysets are generated
let total_runs = 1_000_000 * lwe_per_glwe.0 as u32;
PfailTestMeta::new_with_total_runs(
original_pfail_and_precision,
new_expected_pfail_and_precision,
total_runs,
)
};
(pfail_test_meta, params, comp_params)
};
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let block_params: ShortintParameterSet = params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let private_compression_key = cks.new_compression_private_key(comp_params);
let (compressed_compression_key, _compressed_decompression_key) =
cks.new_compressed_compression_decompression_keys(&private_compression_key);
let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&streams);
let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
let total_runs_for_expected_fails = pfail_test_meta
.total_runs_for_expected_fails()
.div_ceil(lwe_per_glwe.0.try_into().unwrap());
let chunk_size = 8;
let vec_local_streams = (0..chunk_size)
.map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
.collect::<Vec<_>>();
let measured_fails: f64 = (0..total_runs_for_expected_fails)
.collect::<Vec<_>>()
.chunks(chunk_size)
.flat_map(|chunk| {
chunk
.into_par_iter()
.map(|i| {
let local_streams = &vec_local_streams[*i as usize % chunk_size];
let after_ms_decryption_result = encrypt_br_dp_packing_ks_ms_pfail_helper_gpu(
params,
comp_params,
&cks,
&cuda_sks,
&private_compression_key,
&cuda_compression_key,
0,
local_streams,
);
after_ms_decryption_result
.into_iter()
.map(|result| result.failure_as_f64())
.sum::<f64>()
})
.collect::<Vec<_>>()
})
.sum();
let test_result = PfailTestResult { measured_fails };
pfail_check(&pfail_test_meta, test_result);
}
create_gpu_parameterized_test!(noise_check_encrypt_br_dp_packing_ks_ms_pfail_gpu {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
});

View File

@@ -0,0 +1,872 @@
use super::utils::noise_simulation::{CudaDynLwe, CudaSideResources};
use crate::core_crypto::commons::noise_formulas::noise_simulation::{
NoiseSimulationLweFourier128Bsk, NoiseSimulationLwePackingKeyswitchKey,
};
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::{GlweCiphertext, LweCiphertextCount};
use crate::integer::gpu::CudaServerKey;
use crate::integer::noise_squashing::NoiseSquashingPrivateKey;
use crate::integer::CompressedServerKey;
use crate::core_crypto::commons::parameters::CiphertextModulusLog;
use crate::core_crypto::prelude::generate_programmable_bootstrap_glwe_lut;
use crate::integer::ciphertext::NoiseSquashingCompressionPrivateKey;
use crate::integer::gpu::list_compression::server_keys::CudaNoiseSquashingCompressionKey;
use crate::integer::gpu::server_key::radix::tests_unsigned::create_gpu_parameterized_test;
use crate::integer::gpu::server_key::radix::{CudaNoiseSquashingKey, CudaUnsignedRadixCiphertext};
use crate::integer::gpu::unchecked_small_scalar_mul_integer_async;
use crate::integer::IntegerCiphertext;
use crate::shortint::client_key::atomic_pattern::AtomicPatternClientKey;
use crate::shortint::parameters::noise_squashing::NoiseSquashingParameters;
use crate::shortint::parameters::test_params::TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128;
use crate::shortint::parameters::{
AtomicPatternParameters, MetaParameters, NoiseSquashingCompressionParameters, Variance,
};
use crate::shortint::server_key::tests::noise_distribution::dp_ks_pbs128_packingks::{
dp_ks_any_ms_standard_pbs128, dp_ks_any_ms_standard_pbs128_packing_ks,
};
use crate::shortint::server_key::tests::noise_distribution::should_use_single_key_debug;
use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::{
NoiseSimulationGlwe, NoiseSimulationLwe, NoiseSimulationLweFourierBsk,
NoiseSimulationLweKeyswitchKey, NoiseSimulationModulusSwitchConfig,
};
use crate::shortint::server_key::tests::noise_distribution::utils::{
mean_and_variance_check, DecryptionAndNoiseResult, NoiseSample,
};
use crate::shortint::{PaddingBit, ShortintEncoding, ShortintParameterSet};
use crate::GpuIndex;
use rayon::prelude::*;
/// Test function to verify that the noise checking tools match the actual atomic patterns
/// implemented in shortint for GPU
fn sanity_check_encrypt_dp_ks_standard_pbs128_packing_ks_gpu(meta_params: MetaParameters) {
let (atomic_params, noise_squashing_params, noise_squashing_compression_params) = {
let meta_noise_squashing_params = meta_params.noise_squashing_parameters.unwrap();
(
meta_params.compute_parameters,
meta_noise_squashing_params.parameters,
meta_noise_squashing_params.compression_parameters.unwrap(),
)
};
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let block_params: ShortintParameterSet = atomic_params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
let compressed_noise_squashing_compression_key =
cks.new_compressed_noise_squashing_key(&noise_squashing_private_key);
let noise_squashing_key = compressed_noise_squashing_compression_key.decompress();
let cuda_noise_squashing_key =
compressed_noise_squashing_compression_key.decompress_to_cuda(&streams);
let noise_squashing_compression_private_key =
NoiseSquashingCompressionPrivateKey::new(noise_squashing_compression_params);
let noise_squashing_compression_key = noise_squashing_private_key
.new_noise_squashing_compression_key(&noise_squashing_compression_private_key);
let cuda_noise_squashing_compression_key =
CudaNoiseSquashingCompressionKey::from_noise_squashing_compression_key(
&noise_squashing_compression_key,
&streams,
);
let lwe_per_glwe = cuda_noise_squashing_compression_key.lwe_per_glwe;
let modulus_switch_config = cuda_noise_squashing_key.noise_simulation_modulus_switch_config();
let br_input_modulus_log = noise_squashing_key.key.br_input_modulus_log();
let u128_encoding = ShortintEncoding {
ciphertext_modulus: noise_squashing_params.ciphertext_modulus(),
message_modulus: noise_squashing_params.message_modulus(),
carry_modulus: noise_squashing_params.carry_modulus(),
padding_bit: PaddingBit::Yes,
};
let max_scalar_mul = cuda_sks.max_noise_level.get();
let id_lut_cpu = generate_programmable_bootstrap_glwe_lut(
noise_squashing_key.key.polynomial_size(),
noise_squashing_key.key.glwe_size(),
u128_encoding
.cleartext_space_without_padding()
.try_into()
.unwrap(),
u128_encoding.ciphertext_modulus,
u128_encoding.delta(),
|x| x,
);
let id_lut_gpu = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut_cpu, &streams);
let input_zeros: Vec<_> = (0..lwe_per_glwe.0).map(|_| cks.key.encrypt(0)).collect();
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(atomic_params.message_modulus().0 - 1),
message_modulus: atomic_params.message_modulus(),
carry_modulus: atomic_params.carry_modulus(),
atomic_pattern: atomic_params.atomic_pattern(),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
.map(|_| CudaSideResources::new(&streams, cuda_block_info))
.collect();
let input_zero_as_lwe: Vec<_> = input_zeros
.iter()
.map(|ct| {
let d_ct_input = CudaUnsignedRadixCiphertext::from_radix_ciphertext(
&crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
&streams,
);
CudaDynLwe::U64(d_ct_input.ciphertext.d_blocks)
})
.collect();
let (_before_packing, d_after_packing) = dp_ks_any_ms_standard_pbs128_packing_ks(
input_zero_as_lwe,
max_scalar_mul,
&cuda_sks,
modulus_switch_config,
&cuda_noise_squashing_key,
br_input_modulus_log,
&id_lut_gpu,
&cuda_noise_squashing_compression_key.packing_key_switching_key,
&mut cuda_side_resources,
);
let cuda_noise_squashed_cts: Vec<_> = input_zeros
.into_par_iter()
.map(|ct| {
let cloned_ct = ct;
let radix_ct = crate::integer::RadixCiphertext::from_blocks(vec![cloned_ct]);
let mut d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&radix_ct, &streams);
unsafe {
unchecked_small_scalar_mul_integer_async(
&streams,
&mut d_ct.ciphertext,
max_scalar_mul,
atomic_params.message_modulus(),
atomic_params.carry_modulus(),
);
}
streams.synchronize();
cuda_noise_squashing_key.unchecked_squash_ciphertext_noise(
&d_ct.ciphertext,
&cuda_sks,
&streams,
)
})
.collect();
let gpu_compressed = cuda_noise_squashing_compression_key
.compress_noise_squashed_ciphertexts_into_list(&cuda_noise_squashed_cts, &streams);
let gpu_extracted = gpu_compressed.extract_glwe(0, &streams);
let extracted_list = gpu_extracted.to_glwe_ciphertext_list(&streams);
let extracted_glwe = GlweCiphertext::from_container(
extracted_list.clone().into_container(),
extracted_list.polynomial_size(),
extracted_list.ciphertext_modulus(),
);
let after_packing_list = d_after_packing.to_glwe_ciphertext_list(&streams);
let mut after_packing = GlweCiphertext::from_container(
after_packing_list.clone().into_container(),
after_packing_list.polynomial_size(),
after_packing_list.ciphertext_modulus(),
);
// Bodies that were not filled are discarded
after_packing.get_mut_body().as_mut()[lwe_per_glwe.0..].fill(0);
assert_eq!(after_packing.as_view(), extracted_glwe.as_view());
}
/// Test function to verify that the noise checking tools match the actual atomic patterns
/// implemented in shortint for GPU
fn sanity_check_encrypt_dp_ks_standard_pbs128_gpu(meta_params: MetaParameters) {
let (params, noise_squashing_params) = {
let meta_noise_squashing_params = meta_params.noise_squashing_parameters.unwrap();
(
meta_params.compute_parameters,
meta_noise_squashing_params.parameters,
)
};
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let block_params: ShortintParameterSet = params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
let compressed_noise_squashing_compression_key =
cks.new_compressed_noise_squashing_key(&noise_squashing_private_key);
let noise_squashing_key = compressed_noise_squashing_compression_key.decompress();
let cuda_noise_squashing_key =
compressed_noise_squashing_compression_key.decompress_to_cuda(&streams);
let modulus_switch_config = cuda_noise_squashing_key.noise_simulation_modulus_switch_config();
let br_input_modulus_log = noise_squashing_key.key.br_input_modulus_log();
let u128_encoding = ShortintEncoding {
ciphertext_modulus: noise_squashing_params.ciphertext_modulus(),
message_modulus: noise_squashing_params.message_modulus(),
carry_modulus: noise_squashing_params.carry_modulus(),
padding_bit: PaddingBit::Yes,
};
let max_scalar_mul = cuda_sks.max_noise_level.get();
let id_lut_cpu = generate_programmable_bootstrap_glwe_lut(
noise_squashing_key.key.polynomial_size(),
noise_squashing_key.key.glwe_size(),
u128_encoding
.cleartext_space_without_padding()
.try_into()
.unwrap(),
u128_encoding.ciphertext_modulus,
u128_encoding.delta(),
|x| x,
);
let id_lut_gpu = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut_cpu, &streams);
let lwe_per_glwe = LweCiphertextCount(128);
let input_zeros: Vec<_> = (0..lwe_per_glwe.0).map(|_| cks.key.encrypt(0)).collect();
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
atomic_pattern: params.atomic_pattern(),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
.map(|_| CudaSideResources::new(&streams, cuda_block_info))
.collect();
let input_zero_as_lwe: Vec<_> = input_zeros
.iter()
.map(|ct| {
let d_ct_input = CudaUnsignedRadixCiphertext::from_radix_ciphertext(
&crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
&streams,
);
CudaDynLwe::U64(d_ct_input.ciphertext.d_blocks)
})
.collect();
let res: Vec<_> = input_zero_as_lwe
.into_par_iter()
.zip(cuda_side_resources.par_iter_mut())
.map(|(input, side_resources)| {
let (input, after_dp, ks_result, drift_technique_result, ms_result, pbs_result) =
dp_ks_any_ms_standard_pbs128(
input,
max_scalar_mul,
&cuda_sks,
modulus_switch_config,
&cuda_noise_squashing_key,
br_input_modulus_log,
&id_lut_gpu,
side_resources,
);
(
input,
after_dp,
ks_result,
drift_technique_result,
ms_result,
pbs_result,
)
})
.collect();
let input_zeros_non_pattern: Vec<_> = input_zeros
.iter()
.map(|ct| {
CudaUnsignedRadixCiphertext::from_radix_ciphertext(
&crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
&streams,
)
})
.collect();
let vector_non_pattern: Vec<_> = input_zeros_non_pattern
.into_par_iter()
.map(|mut d_ct_input2| {
unsafe {
unchecked_small_scalar_mul_integer_async(
&streams,
&mut d_ct_input2.ciphertext,
max_scalar_mul,
params.message_modulus(),
params.carry_modulus(),
);
}
streams.synchronize();
cuda_noise_squashing_key
.squash_radix_ciphertext_noise(&cuda_sks, &d_ct_input2.ciphertext, &streams)
.unwrap()
})
.collect();
let vector_pattern_cpu: Vec<_> = res
.into_iter()
.map(
|(_input, _after_dp, _ks_result, _drift_technique_result, _ms_result, pbs_result)| {
pbs_result.as_ct_128_cpu(&streams)
},
)
.collect();
let vector_non_pattern_cpu: Vec<_> = vector_non_pattern
.into_par_iter()
.map(|cuda_squashed_radix_ct| {
let squashed_noise_ct_cpu =
cuda_squashed_radix_ct.to_squashed_noise_radix_ciphertext(&streams);
squashed_noise_ct_cpu.packed_blocks()[0]
.lwe_ciphertext()
.clone()
})
.collect();
// Compare that all the results are equivalent
assert_eq!(vector_pattern_cpu.len(), vector_non_pattern_cpu.len());
for (a, b) in vector_pattern_cpu.iter().zip(vector_non_pattern_cpu.iter()) {
assert_eq!(a.as_view(), b.as_view());
}
}
#[allow(clippy::too_many_arguments)]
#[allow(clippy::type_complexity)]
fn encrypt_dp_ks_standard_pbs128_packing_ks_inner_helper_gpu(
params: AtomicPatternParameters,
noise_squashing_params: NoiseSquashingParameters,
noise_squashing_compression_params: NoiseSquashingCompressionParameters,
single_cks: &crate::integer::ClientKey,
single_cuda_sks: &CudaServerKey,
single_noise_squashing_private_key: &NoiseSquashingPrivateKey,
single_noise_squashing_key: &crate::integer::noise_squashing::NoiseSquashingKey,
single_cuda_noise_squashing_key: &CudaNoiseSquashingKey,
single_noise_squashing_compression_private_key: &NoiseSquashingCompressionPrivateKey,
single_cuda_noise_squashing_compression_key: &CudaNoiseSquashingCompressionKey,
msg: u64,
scalar_for_multiplication: u64,
br_input_modulus_log: CiphertextModulusLog,
streams: &CudaStreams,
) -> (
Vec<(
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
)>,
Vec<DecryptionAndNoiseResult>,
) {
let thread_cks: crate::integer::ClientKey;
let thread_cuda_sks: CudaServerKey;
let thread_noise_squashing_private_key: NoiseSquashingPrivateKey;
let thread_noise_squashing_key: crate::integer::noise_squashing::NoiseSquashingKey;
let thread_cuda_noise_squashing_key: CudaNoiseSquashingKey;
let thread_noise_squashing_compression_private_key: NoiseSquashingCompressionPrivateKey;
let thread_cuda_noise_squashing_compression_key: CudaNoiseSquashingCompressionKey;
let (
cks,
cuda_sks,
noise_squashing_private_key,
noise_squashing_key,
cuda_noise_squashing_key,
noise_squashing_compression_private_key,
cuda_noise_squashing_compression_key,
) = if should_use_single_key_debug() {
(
single_cks,
single_cuda_sks,
single_noise_squashing_private_key,
single_noise_squashing_key,
single_cuda_noise_squashing_key,
single_noise_squashing_compression_private_key,
single_cuda_noise_squashing_compression_key,
)
} else {
let block_params: ShortintParameterSet = params.into();
thread_cks = crate::integer::ClientKey::new(block_params);
let thread_compressed_server_key =
CompressedServerKey::new_radix_compressed_server_key(&thread_cks);
thread_cuda_sks =
CudaServerKey::decompress_from_cpu(&thread_compressed_server_key, streams);
thread_noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
let thread_compressed_noise_squashing_compression_key =
thread_cks.new_compressed_noise_squashing_key(&thread_noise_squashing_private_key);
thread_noise_squashing_key = thread_compressed_noise_squashing_compression_key.decompress();
thread_cuda_noise_squashing_key =
thread_compressed_noise_squashing_compression_key.decompress_to_cuda(streams);
thread_noise_squashing_compression_private_key =
NoiseSquashingCompressionPrivateKey::new(noise_squashing_compression_params);
let thread_noise_squashing_compression_key = thread_noise_squashing_private_key
.new_noise_squashing_compression_key(&thread_noise_squashing_compression_private_key);
thread_cuda_noise_squashing_compression_key =
CudaNoiseSquashingCompressionKey::from_noise_squashing_compression_key(
&thread_noise_squashing_compression_key,
streams,
);
(
&thread_cks,
&thread_cuda_sks,
&thread_noise_squashing_private_key,
&thread_noise_squashing_key,
&thread_cuda_noise_squashing_key,
&thread_noise_squashing_compression_private_key,
&thread_cuda_noise_squashing_compression_key,
)
};
let modulus_switch_config = cuda_noise_squashing_key.noise_simulation_modulus_switch_config();
let bsk_polynomial_size = noise_squashing_key.key.polynomial_size();
let bsk_glwe_size = noise_squashing_key.key.glwe_size();
let u128_encoding = ShortintEncoding {
ciphertext_modulus: noise_squashing_params.ciphertext_modulus(),
message_modulus: noise_squashing_params.message_modulus(),
carry_modulus: noise_squashing_params.carry_modulus(),
padding_bit: PaddingBit::Yes,
};
let id_lut_cpu = generate_programmable_bootstrap_glwe_lut(
bsk_polynomial_size,
bsk_glwe_size,
u128_encoding
.cleartext_space_without_padding()
.try_into()
.unwrap(),
u128_encoding.ciphertext_modulus,
u128_encoding.delta(),
|x| x,
);
let id_lut_gpu = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut_cpu, streams);
let lwe_per_glwe = cuda_noise_squashing_compression_key.lwe_per_glwe;
let input_zeros: Vec<_> = (0..lwe_per_glwe.0).map(|_| cks.key.encrypt(msg)).collect();
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
atomic_pattern: params.atomic_pattern(),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
.map(|_| CudaSideResources::new(streams, cuda_block_info))
.collect();
let input_zero_as_lwe: Vec<_> = input_zeros
.iter()
.map(|ct| {
let d_ct_input = CudaUnsignedRadixCiphertext::from_radix_ciphertext(
&crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
streams,
);
CudaDynLwe::U64(d_ct_input.ciphertext.d_blocks)
})
.collect();
let (before_packing_gpu, after_packing_gpu) = dp_ks_any_ms_standard_pbs128_packing_ks(
input_zero_as_lwe,
scalar_for_multiplication,
cuda_sks,
modulus_switch_config,
cuda_noise_squashing_key,
br_input_modulus_log,
&id_lut_gpu,
&cuda_noise_squashing_compression_key.packing_key_switching_key,
&mut cuda_side_resources,
);
let before_packing: Vec<_> = before_packing_gpu
.into_iter()
.map(
|(
input_gpu,
after_dp_gpu,
after_ks_gpu,
after_drift_gpu,
after_ms_gpu,
after_pbs128_gpu,
)| {
match &cks.key.atomic_pattern {
AtomicPatternClientKey::Standard(standard_atomic_pattern_client_key) => {
let params = standard_atomic_pattern_client_key.parameters;
let u64_encoding = ShortintEncoding {
ciphertext_modulus: params.ciphertext_modulus(),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
padding_bit: PaddingBit::Yes,
};
let large_lwe_secret_key =
standard_atomic_pattern_client_key.large_lwe_secret_key();
let small_lwe_secret_key =
standard_atomic_pattern_client_key.small_lwe_secret_key();
let input_ct = input_gpu.as_ct_64_cpu(streams);
let after_dp_ct = after_dp_gpu.as_ct_64_cpu(streams);
let after_ks_ct = after_ks_gpu.as_ct_64_cpu(streams);
let before_ms_gpu: &CudaDynLwe =
after_drift_gpu.as_ref().unwrap_or(&after_ks_gpu);
let before_ms_ct = before_ms_gpu.as_ct_64_cpu(streams);
let after_ms_ct = after_ms_gpu.as_ct_64_cpu(streams);
let after_pbs128_ct = after_pbs128_gpu.as_ct_128_cpu(streams);
(
DecryptionAndNoiseResult::new_from_lwe(
&input_ct,
&large_lwe_secret_key,
msg,
&u64_encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&after_dp_ct,
&large_lwe_secret_key,
msg,
&u64_encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&after_ks_ct,
&small_lwe_secret_key,
msg,
&u64_encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&before_ms_ct,
&small_lwe_secret_key,
msg,
&u64_encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&after_ms_ct,
&small_lwe_secret_key,
msg,
&u64_encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&after_pbs128_ct,
&noise_squashing_private_key
.key
.post_noise_squashing_lwe_secret_key(),
msg.into(),
&u128_encoding,
),
)
}
AtomicPatternClientKey::KeySwitch32(_ks32_atomic_pattern_client_key) => {
panic!("KS32 atomic pattern not supported for GPU yet");
}
}
},
)
.collect();
let after_packing_list = after_packing_gpu.to_glwe_ciphertext_list(streams);
let after_packing = GlweCiphertext::from_container(
after_packing_list.clone().into_container(),
after_packing_list.polynomial_size(),
after_packing_list.ciphertext_modulus(),
);
let after_packing = DecryptionAndNoiseResult::new_from_glwe(
&after_packing,
noise_squashing_compression_private_key
.key
.post_packing_ks_key(),
lwe_per_glwe,
msg.into(),
&u128_encoding,
);
assert_eq!(after_packing.len(), lwe_per_glwe.0);
(before_packing, after_packing)
}
#[allow(clippy::too_many_arguments)]
#[allow(clippy::type_complexity)]
fn encrypt_dp_ks_standard_pbs128_packing_ks_noise_helper_gpu(
params: AtomicPatternParameters,
noise_squashing_params: NoiseSquashingParameters,
noise_squashing_compression_params: NoiseSquashingCompressionParameters,
single_cks: &crate::integer::ClientKey,
single_cuda_sks: &CudaServerKey,
single_noise_squashing_private_key: &NoiseSquashingPrivateKey,
single_noise_squashing_key: &crate::integer::noise_squashing::NoiseSquashingKey,
single_cuda_noise_squashing_key: &CudaNoiseSquashingKey,
single_noise_squashing_compression_private_key: &NoiseSquashingCompressionPrivateKey,
single_cuda_noise_squashing_compression_key: &CudaNoiseSquashingCompressionKey,
msg: u64,
scalar_for_multiplication: u64,
br_input_modulus_log: CiphertextModulusLog,
streams: &CudaStreams,
) -> (
Vec<(
NoiseSample,
NoiseSample,
NoiseSample,
NoiseSample,
NoiseSample,
NoiseSample,
)>,
Vec<NoiseSample>,
) {
let (before_compression, after_compression) =
encrypt_dp_ks_standard_pbs128_packing_ks_inner_helper_gpu(
params,
noise_squashing_params,
noise_squashing_compression_params,
single_cks,
single_cuda_sks,
single_noise_squashing_private_key,
single_noise_squashing_key,
single_cuda_noise_squashing_key,
single_noise_squashing_compression_private_key,
single_cuda_noise_squashing_compression_key,
msg,
scalar_for_multiplication,
br_input_modulus_log,
streams,
);
(
before_compression
.into_iter()
.map(
|(input, after_dp, after_ks, after_drift, after_ms, after_pbs)| {
(
input
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_dp
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_ks
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_drift
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_ms
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_pbs
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
)
},
)
.collect(),
after_compression
.into_iter()
.map(|after_compression| {
after_compression
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed")
})
.collect(),
)
}
fn noise_check_encrypt_dp_ks_standard_pbs128_packing_ks_noise_gpu(meta_params: MetaParameters) {
let (atomic_params, noise_squashing_params, noise_squashing_compression_params) = {
let meta_noise_squashing_params = meta_params.noise_squashing_parameters.unwrap();
(
meta_params.compute_parameters,
meta_noise_squashing_params.parameters,
meta_noise_squashing_params.compression_parameters.unwrap(),
)
};
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let block_params: ShortintParameterSet = atomic_params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
let compressed_noise_squashing_compression_key =
cks.new_compressed_noise_squashing_key(&noise_squashing_private_key);
let noise_squashing_key = compressed_noise_squashing_compression_key.decompress();
let cuda_noise_squashing_key =
compressed_noise_squashing_compression_key.decompress_to_cuda(&streams);
let noise_squashing_compression_private_key =
NoiseSquashingCompressionPrivateKey::new(noise_squashing_compression_params);
let noise_squashing_compression_key = noise_squashing_private_key
.new_noise_squashing_compression_key(&noise_squashing_compression_private_key);
let cuda_noise_squashing_compression_key =
CudaNoiseSquashingCompressionKey::from_noise_squashing_compression_key(
&noise_squashing_compression_key,
&streams,
);
let noise_simulation_ksk =
NoiseSimulationLweKeyswitchKey::new_from_atomic_pattern_parameters(atomic_params);
let noise_simulation_bsk =
NoiseSimulationLweFourierBsk::new_from_atomic_pattern_parameters(atomic_params);
let noise_simulation_modulus_switch_config =
NoiseSimulationModulusSwitchConfig::new_from_atomic_pattern_parameters(atomic_params);
let noise_simulation_bsk128 =
NoiseSimulationLweFourier128Bsk::new_from_parameters(atomic_params, noise_squashing_params);
let noise_simulation_packing_key =
NoiseSimulationLwePackingKeyswitchKey::new_from_noise_squashing_parameters(
noise_squashing_params,
noise_squashing_compression_params,
);
assert!(noise_simulation_bsk.matches_actual_bsk_gpu(&cuda_sks.bootstrapping_key));
assert!(noise_simulation_bsk128
.matches_actual_shortint_noise_squashing_key(&noise_squashing_key.key));
assert!(noise_simulation_packing_key.matches_actual_pksk(
noise_squashing_compression_key
.key
.packing_key_switching_key()
));
let br_input_modulus_log = noise_squashing_key.key.br_input_modulus_log();
let max_scalar_mul = cuda_sks.max_noise_level.get();
let noise_simulation_accumulator = NoiseSimulationGlwe::new(
noise_simulation_bsk128
.output_glwe_size()
.to_glwe_dimension(),
noise_simulation_bsk128.output_polynomial_size(),
Variance(0.0),
noise_simulation_bsk128.modulus(),
);
let (_before_packing_sim, after_packing_sim) = {
let noise_simulation = NoiseSimulationLwe::encrypt(&cks.key, 0);
dp_ks_any_ms_standard_pbs128_packing_ks(
vec![noise_simulation; cuda_noise_squashing_compression_key.lwe_per_glwe.0],
max_scalar_mul,
&noise_simulation_ksk,
noise_simulation_modulus_switch_config.as_ref(),
&noise_simulation_bsk128,
br_input_modulus_log,
&noise_simulation_accumulator,
&noise_simulation_packing_key,
&mut vec![(); cuda_noise_squashing_compression_key.lwe_per_glwe.0],
)
};
let after_packing_sim = after_packing_sim.into_lwe();
// Check that the circuit is correct with respect to core implementation, i.e. does not crash on
// dimension checks
let (expected_lwe_dimension_out, expected_modulus_f64_out) = {
let pksk = noise_squashing_compression_key
.key
.packing_key_switching_key();
let out_glwe_dim = pksk.output_key_glwe_dimension();
let out_poly_size = pksk.output_key_polynomial_size();
(
out_glwe_dim.to_equivalent_lwe_dimension(out_poly_size),
pksk.ciphertext_modulus().raw_modulus_float(),
)
};
assert_eq!(
after_packing_sim.lwe_dimension(),
expected_lwe_dimension_out
);
assert_eq!(
after_packing_sim.modulus().as_f64(),
expected_modulus_f64_out
);
let cleartext_modulus = atomic_params.message_modulus().0 * atomic_params.carry_modulus().0;
let mut noise_samples_after_packing = vec![];
let sample_count_per_msg =
1000usize.div_ceil(cuda_noise_squashing_compression_key.lwe_per_glwe.0);
let chunk_size = 4;
let vec_local_streams = (0..chunk_size)
.map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
.collect::<Vec<_>>();
for _i in 0..cleartext_modulus {
let current_noise_samples_after_packing: Vec<_> = (0..sample_count_per_msg)
.collect::<Vec<_>>()
.chunks(chunk_size)
.flat_map(|chunk| {
chunk
.into_par_iter()
.map(|i| {
let local_stream = &vec_local_streams[*i % chunk_size];
let (_before_packing, after_packing) =
encrypt_dp_ks_standard_pbs128_packing_ks_noise_helper_gpu(
atomic_params,
noise_squashing_params,
noise_squashing_compression_params,
&cks,
&cuda_sks,
&noise_squashing_private_key,
&noise_squashing_key,
&cuda_noise_squashing_key,
&noise_squashing_compression_private_key,
&cuda_noise_squashing_compression_key,
0,
max_scalar_mul,
br_input_modulus_log,
local_stream,
);
after_packing
})
.collect::<Vec<_>>()
})
.collect();
noise_samples_after_packing.extend(current_noise_samples_after_packing);
}
let noise_samples_after_packing_flattened: Vec<_> = noise_samples_after_packing
.into_iter()
.flatten()
.map(|x| x.value)
.collect();
let after_packing_is_ok = mean_and_variance_check(
&noise_samples_after_packing_flattened,
"after_packing",
0.0,
after_packing_sim.variance(),
noise_squashing_compression_params.packing_ks_key_noise_distribution,
after_packing_sim.lwe_dimension(),
after_packing_sim.modulus().as_f64(),
);
assert!(after_packing_is_ok);
}
create_gpu_parameterized_test!(
noise_check_encrypt_dp_ks_standard_pbs128_packing_ks_noise_gpu {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
}
);
create_gpu_parameterized_test!(sanity_check_encrypt_dp_ks_standard_pbs128_packing_ks_gpu {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
});
create_gpu_parameterized_test!(sanity_check_encrypt_dp_ks_standard_pbs128_gpu {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
});

View File

@@ -1,3 +1,5 @@
pub mod br_dp_ks_ms;
pub mod br_dp_packingks_ms;
pub mod dp_ks_ms;
pub mod dp_ks_pbs_128_packingks;
pub mod utils;

View File

@@ -1,7 +1,7 @@
use crate::core_crypto::commons::noise_formulas::noise_simulation::traits::{
AllocateCenteredBinaryShiftedStandardModSwitchResult,
AllocateDriftTechniqueStandardModSwitchResult, AllocateLweBootstrapResult,
AllocateLweKeyswitchResult, AllocateStandardModSwitchResult,
AllocateLweKeyswitchResult, AllocateLwePackingKeyswitchResult, AllocateStandardModSwitchResult,
CenteredBinaryShiftedStandardModSwitch, DriftTechniqueStandardModSwitch,
LweClassicFftBootstrap, LweKeyswitch, ScalarMul, StandardModSwitch,
};
@@ -13,6 +13,7 @@ use crate::core_crypto::gpu::cuda_modulus_switch_ciphertext;
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
use crate::core_crypto::gpu::lwe_bootstrap_key::CudaModulusSwitchNoiseReductionConfiguration;
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use crate::core_crypto::gpu::lwe_packing_keyswitch_key::CudaLwePackingKeyswitchKey;
use crate::core_crypto::gpu::vec::CudaVec;
use crate::core_crypto::prelude::*;
use crate::integer::gpu::ciphertext::info::CudaBlockInfo;
@@ -25,7 +26,7 @@ use crate::integer::gpu::{
cuda_centered_modulus_switch_64, unchecked_small_scalar_mul_integer_async, CudaStreams,
};
use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::NoiseSimulationModulusSwitchConfig;
use crate::shortint::server_key::tests::noise_distribution::utils::traits::LwePackingKeyswitch;
/// Side resources for CUDA operations in noise simulation
#[derive(Clone)]
pub struct CudaSideResources {
@@ -128,6 +129,19 @@ impl CudaDynLwe {
}
}
pub fn as_ct_128_cpu(&self, streams: &CudaStreams) -> LweCiphertext<Vec<u128>> {
match self {
Self::U32(_) => panic!("Tried getting a u32 CudaLweCiphertextList as u128."),
Self::U64(_) => panic!("Tried getting a u64 CudaLweCiphertextList as u128."),
Self::U128(_cuda_lwe) => {
let cpu_lwe_list = self.as_lwe_128().to_lwe_ciphertext_list(streams);
LweCiphertext::from_container(
cpu_lwe_list.clone().into_container(),
cpu_lwe_list.ciphertext_modulus(),
)
}
}
}
pub fn from_lwe_32(cuda_lwe: CudaLweCiphertextList<u32>) -> Self {
Self::U32(cuda_lwe)
}
@@ -141,6 +155,19 @@ impl CudaDynLwe {
}
}
/// Converts a CudaGlweCiphertextList<u64> to a GlweCiphertext<Vec<u64>>
pub fn cuda_glwe_list_to_glwe_ciphertext(
cuda_glwe_list: &CudaGlweCiphertextList<u64>,
streams: &CudaStreams,
) -> GlweCiphertext<Vec<u64>> {
let cpu_glwe_list = cuda_glwe_list.to_glwe_ciphertext_list(streams);
GlweCiphertext::from_container(
cpu_glwe_list.clone().into_container(),
cpu_glwe_list.polynomial_size(),
cpu_glwe_list.ciphertext_modulus(),
)
}
impl ScalarMul<u64> for CudaDynLwe {
type Output = Self;
type SideResources = CudaSideResources;
@@ -313,13 +340,14 @@ impl StandardModSwitch<Self> for CudaDynLwe {
panic!("U32 modulus switch not implemented for CudaDynLwe - only U64 is supported");
}
(Self::U64(input), Self::U64(output_cuda_lwe)) => {
let internal_output = input.duplicate(&side_resources.streams);
let mut internal_output = input.duplicate(&side_resources.streams);
cuda_modulus_switch_ciphertext(
&mut output_cuda_lwe.0.d_vec,
&mut internal_output.0.d_vec,
output_modulus_log.0 as u32,
&side_resources.streams,
);
let mut cpu_lwe = internal_output.to_lwe_ciphertext_list(&side_resources.streams);
let shift_to_map_to_native = u64::BITS - output_modulus_log.0 as u32;
for val in cpu_lwe.as_mut_view().into_container().iter_mut() {
*val <<= shift_to_map_to_native;
@@ -713,3 +741,193 @@ impl AllocateLweBootstrapResult for CudaGlweCiphertextList<u128> {
CudaDynLwe::U128(cuda_lwe)
}
}
// Implement LweClassicFft128Bootstrap for CudaNoiseSquashingKey using 128-bit PBS CUDA function
impl
crate::core_crypto::commons::noise_formulas::noise_simulation::traits::LweClassicFft128Bootstrap<
CudaDynLwe,
CudaDynLwe,
CudaGlweCiphertextList<u128>,
> for crate::integer::gpu::noise_squashing::keys::CudaNoiseSquashingKey
{
type SideResources = CudaSideResources;
fn lwe_classic_fft_128_pbs(
&self,
input: &CudaDynLwe,
output: &mut CudaDynLwe,
accumulator: &CudaGlweCiphertextList<u128>,
side_resources: &mut Self::SideResources,
) {
use crate::core_crypto::gpu::algorithms::lwe_programmable_bootstrapping::cuda_programmable_bootstrap_128_lwe_ciphertext_async;
use crate::integer::gpu::server_key::CudaBootstrappingKey;
match (input, output) {
(CudaDynLwe::U64(input_cuda_lwe), CudaDynLwe::U128(output_cuda_lwe)) => {
// Get the bootstrap key from self - it's already u128 type
let bsk = match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => d_bsk,
CudaBootstrappingKey::MultiBit(_) => {
panic!("MultiBit bootstrapping keys are not supported for 128-bit PBS");
}
};
unsafe {
cuda_programmable_bootstrap_128_lwe_ciphertext_async(
input_cuda_lwe,
output_cuda_lwe,
accumulator,
bsk,
&side_resources.streams,
);
side_resources.streams.synchronize();
}
}
_ => panic!("128-bit PBS expects U64 input and U128 output for CudaDynLwe"),
}
}
}
impl AllocateLwePackingKeyswitchResult for CudaLwePackingKeyswitchKey<u64> {
type Output = CudaGlweCiphertextList<u64>;
type SideResources = CudaSideResources;
fn allocate_lwe_packing_keyswitch_result(
&self,
side_resources: &mut Self::SideResources,
) -> Self::Output {
let glwe_dimension = self.output_glwe_size().to_glwe_dimension();
let polynomial_size = self.output_polynomial_size();
let ciphertext_modulus = self.ciphertext_modulus();
CudaGlweCiphertextList::new(
glwe_dimension,
polynomial_size,
GlweCiphertextCount(1),
ciphertext_modulus,
&side_resources.streams,
)
}
}
impl LwePackingKeyswitch<[&CudaDynLwe], CudaGlweCiphertextList<u64>>
for CudaLwePackingKeyswitchKey<u64>
{
type SideResources = CudaSideResources;
fn keyswitch_lwes_and_pack_in_glwe(
&self,
input: &[&CudaDynLwe],
output: &mut CudaGlweCiphertextList<u64>,
side_resources: &mut CudaSideResources,
) {
use crate::core_crypto::gpu::algorithms::lwe_packing_keyswitch::cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_64;
let input_lwe_ciphertext_list = CudaLweCiphertextList::from_vec_cuda_lwe_ciphertexts_list(
input.iter().map(|ciphertext| ciphertext.as_lwe_64()),
&side_resources.streams,
);
cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_64(
self,
&input_lwe_ciphertext_list,
output,
&side_resources.streams,
);
}
}
// Implement StandardModSwitch traits for CudaGlweCiphertextList<u64>
impl AllocateStandardModSwitchResult for CudaGlweCiphertextList<u64> {
type Output = Self;
type SideResources = CudaSideResources;
fn allocate_standard_mod_switch_result(
&self,
side_resources: &mut Self::SideResources,
) -> Self::Output {
Self::new(
self.glwe_dimension(),
self.polynomial_size(),
self.glwe_ciphertext_count(),
self.ciphertext_modulus(),
&side_resources.streams,
)
}
}
impl StandardModSwitch<Self> for CudaGlweCiphertextList<u64> {
type SideResources = CudaSideResources;
fn standard_mod_switch(
&self,
storage_log_modulus: CiphertextModulusLog,
output: &mut Self,
side_resources: &mut CudaSideResources,
) {
let mut internal_output = self.duplicate(&side_resources.streams);
cuda_modulus_switch_ciphertext(
&mut internal_output.0.d_vec,
storage_log_modulus.0 as u32,
&side_resources.streams,
);
side_resources.streams.synchronize();
let mut cpu_glwe = internal_output.to_glwe_ciphertext_list(&side_resources.streams);
let shift_to_map_to_native = u64::BITS - storage_log_modulus.0 as u32;
for val in cpu_glwe.as_mut_view().into_container().iter_mut() {
*val <<= shift_to_map_to_native;
}
let d_after_ms = Self::from_glwe_ciphertext_list(&cpu_glwe, &side_resources.streams);
*output = d_after_ms;
}
}
impl AllocateLwePackingKeyswitchResult for CudaLwePackingKeyswitchKey<u128> {
type Output = CudaGlweCiphertextList<u128>;
type SideResources = CudaSideResources;
fn allocate_lwe_packing_keyswitch_result(
&self,
side_resources: &mut Self::SideResources,
) -> Self::Output {
let glwe_dimension = self.output_glwe_size().to_glwe_dimension();
let polynomial_size = self.output_polynomial_size();
let ciphertext_modulus = self.ciphertext_modulus();
CudaGlweCiphertextList::new(
glwe_dimension,
polynomial_size,
GlweCiphertextCount(1),
ciphertext_modulus,
&side_resources.streams,
)
}
}
impl LwePackingKeyswitch<[&CudaDynLwe], CudaGlweCiphertextList<u128>>
for CudaLwePackingKeyswitchKey<u128>
{
type SideResources = CudaSideResources;
fn keyswitch_lwes_and_pack_in_glwe(
&self,
input: &[&CudaDynLwe],
output: &mut CudaGlweCiphertextList<u128>,
side_resources: &mut CudaSideResources,
) {
use crate::core_crypto::gpu::algorithms::lwe_packing_keyswitch::cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_128;
let input_lwe_ciphertext_list = CudaLweCiphertextList::from_vec_cuda_lwe_ciphertexts_list(
input.iter().map(|ciphertext| ciphertext.as_lwe_128()),
&side_resources.streams,
);
cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_128(
self,
&input_lwe_ciphertext_list,
output,
&side_resources.streams,
);
}
}

View File

@@ -27,7 +27,7 @@ use crate::shortint::server_key::ServerKey;
use rayon::prelude::*;
#[allow(clippy::too_many_arguments)]
fn dp_ks_any_ms_standard_pbs128<
pub fn dp_ks_any_ms_standard_pbs128<
InputCt,
ScalarMulResult,
KsResult,
@@ -111,7 +111,7 @@ where
#[allow(clippy::too_many_arguments)]
#[allow(clippy::type_complexity)]
fn dp_ks_any_ms_standard_pbs128_packing_ks<
pub fn dp_ks_any_ms_standard_pbs128_packing_ks<
InputCt,
ScalarMulResult,
KsResult,