mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-06 21:34:05 -05:00
feat(gpu): create noise and pfail tests pbs128 and packingks
This commit is contained in:
@@ -65,6 +65,16 @@ void cleanup_cuda_integer_compress_radix_ciphertext_128(CudaStreamsFFI streams,
|
||||
|
||||
void cleanup_cuda_integer_decompress_radix_ciphertext_128(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr_void);
|
||||
|
||||
void cuda_integer_extract_glwe_128(
|
||||
CudaStreamsFFI streams, void *glwe_array_out,
|
||||
CudaPackedGlweCiphertextListFFI const *glwe_list,
|
||||
uint32_t const glwe_index);
|
||||
|
||||
void cuda_integer_extract_glwe_64(
|
||||
CudaStreamsFFI streams, void *glwe_array_out,
|
||||
CudaPackedGlweCiphertextListFFI const *glwe_list,
|
||||
uint32_t const glwe_index);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -155,3 +155,24 @@ void cleanup_cuda_integer_decompress_radix_ciphertext_128(
|
||||
delete mem_ptr;
|
||||
*mem_ptr_void = nullptr;
|
||||
}
|
||||
|
||||
void cuda_integer_extract_glwe_128(
|
||||
CudaStreamsFFI streams, void *glwe_array_out,
|
||||
CudaPackedGlweCiphertextListFFI const *glwe_list,
|
||||
uint32_t const glwe_index) {
|
||||
|
||||
CudaStreams _streams = CudaStreams(streams);
|
||||
host_extract<__uint128_t>(_streams.stream(0), _streams.gpu_index(0),
|
||||
(__uint128_t *)glwe_array_out, glwe_list,
|
||||
glwe_index);
|
||||
}
|
||||
|
||||
void cuda_integer_extract_glwe_64(
|
||||
CudaStreamsFFI streams, void *glwe_array_out,
|
||||
CudaPackedGlweCiphertextListFFI const *glwe_list,
|
||||
uint32_t const glwe_index) {
|
||||
|
||||
CudaStreams _streams = CudaStreams(streams);
|
||||
host_extract<__uint64_t>(_streams.stream(0), _streams.gpu_index(0),
|
||||
(__uint64_t *)glwe_array_out, glwe_list, glwe_index);
|
||||
}
|
||||
|
||||
@@ -2349,6 +2349,22 @@ unsafe extern "C" {
|
||||
mem_ptr_void: *mut *mut i8,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn cuda_integer_extract_glwe_128(
|
||||
streams: CudaStreamsFFI,
|
||||
glwe_array_out: *mut ffi::c_void,
|
||||
glwe_list: *const CudaPackedGlweCiphertextListFFI,
|
||||
glwe_index: u32,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn cuda_integer_extract_glwe_64(
|
||||
streams: CudaStreamsFFI,
|
||||
glwe_array_out: *mut ffi::c_void,
|
||||
glwe_list: *const CudaPackedGlweCiphertextListFFI,
|
||||
glwe_index: u32,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn scratch_cuda_rerand_64(
|
||||
streams: CudaStreamsFFI,
|
||||
|
||||
@@ -877,7 +877,7 @@ pub fn cuda_modulus_switch_ciphertext<Scalar>(
|
||||
Scalar: UnsignedInteger,
|
||||
{
|
||||
unsafe {
|
||||
cuda_modulus_switch_ciphertext_async(streams, output_lwe_ciphertext, log_modulus);
|
||||
cuda_modulus_switch_ciphertext_async(streams, &mut *output_lwe_ciphertext, log_modulus);
|
||||
}
|
||||
streams.synchronize();
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use crate::core_crypto::gpu::entities::lwe_packing_keyswitch_key::CudaLwePackingKeyswitchKey;
|
||||
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
|
||||
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
|
||||
use crate::core_crypto::gpu::vec::CudaVec;
|
||||
use crate::core_crypto::gpu::CudaStreams;
|
||||
@@ -16,7 +17,8 @@ use crate::integer::gpu::ciphertext::CudaRadixCiphertext;
|
||||
use crate::integer::gpu::server_key::CudaBootstrappingKey;
|
||||
use crate::integer::gpu::{
|
||||
cuda_backend_compress, cuda_backend_decompress, cuda_backend_get_compression_size_on_gpu,
|
||||
cuda_backend_get_decompression_size_on_gpu, cuda_memcpy_async_gpu_to_gpu, PBSType,
|
||||
cuda_backend_get_decompression_size_on_gpu, cuda_memcpy_async_gpu_to_gpu, extract_glwe_async,
|
||||
PBSType,
|
||||
};
|
||||
use crate::prelude::CastInto;
|
||||
use crate::shortint::ciphertext::{
|
||||
@@ -197,6 +199,30 @@ impl<T: UnsignedInteger> CudaPackedGlweCiphertextList<T> {
|
||||
meta: self.meta,
|
||||
}
|
||||
}
|
||||
pub fn extract_glwe(
|
||||
&self,
|
||||
glwe_index: usize,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaGlweCiphertextList<T> {
|
||||
let meta = self
|
||||
.meta
|
||||
.as_ref()
|
||||
.expect("CudaPackedGlweCiphertextList meta must be set to extract GLWE");
|
||||
|
||||
let mut output_cuda_glwe_list = CudaGlweCiphertextList::new(
|
||||
meta.glwe_dimension,
|
||||
meta.polynomial_size,
|
||||
GlweCiphertextCount(1),
|
||||
meta.ciphertext_modulus,
|
||||
streams,
|
||||
);
|
||||
|
||||
unsafe {
|
||||
extract_glwe_async(streams, &mut output_cuda_glwe_list, self, glwe_index as u32);
|
||||
}
|
||||
streams.synchronize();
|
||||
output_cuda_glwe_list
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: UnsignedInteger> Clone for CudaPackedGlweCiphertextList<T> {
|
||||
|
||||
@@ -7,6 +7,7 @@ pub mod server_key;
|
||||
#[cfg(feature = "zk-pok")]
|
||||
pub mod zk;
|
||||
|
||||
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
|
||||
use crate::core_crypto::gpu::lwe_bootstrap_key::CudaModulusSwitchNoiseReductionConfiguration;
|
||||
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
|
||||
use crate::core_crypto::gpu::lwe_compact_ciphertext_list::CudaLweCompactCiphertextList;
|
||||
@@ -10423,3 +10424,44 @@ pub unsafe fn unchecked_small_scalar_mul_integer_async(
|
||||
carry_modulus.0 as u32,
|
||||
);
|
||||
}
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
/// # Safety
|
||||
///
|
||||
/// - [CudaStreams::synchronize] __must__ be called after this function as soon as synchronization
|
||||
/// is required
|
||||
pub unsafe fn extract_glwe_async<T: UnsignedInteger>(
|
||||
streams: &CudaStreams,
|
||||
glwe_array_out: &mut CudaGlweCiphertextList<T>,
|
||||
glwe_list: &CudaPackedGlweCiphertextList<T>,
|
||||
glwe_index: u32,
|
||||
) {
|
||||
assert_eq!(
|
||||
streams.gpu_indexes[0],
|
||||
glwe_array_out.0.d_vec.gpu_index(0),
|
||||
"GPU error: all data should reside on the same GPU."
|
||||
);
|
||||
assert_eq!(
|
||||
streams.gpu_indexes[0],
|
||||
glwe_list.data.gpu_index(0),
|
||||
"GPU error: all data should reside on the same GPU."
|
||||
);
|
||||
let packed_glwe_list_ffi = prepare_cuda_packed_glwe_ct_ffi(glwe_list);
|
||||
|
||||
if T::BITS == 128 {
|
||||
cuda_integer_extract_glwe_128(
|
||||
streams.ffi(),
|
||||
glwe_array_out.0.d_vec.as_mut_c_ptr(0),
|
||||
&raw const packed_glwe_list_ffi,
|
||||
glwe_index,
|
||||
);
|
||||
} else if T::BITS == 64 {
|
||||
cuda_integer_extract_glwe_64(
|
||||
streams.ffi(),
|
||||
glwe_array_out.0.d_vec.as_mut_c_ptr(0),
|
||||
&raw const packed_glwe_list_ffi,
|
||||
glwe_index,
|
||||
);
|
||||
} else {
|
||||
panic!("Unsupported integer size for CUDA GLWE extraction");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,756 @@
|
||||
use super::utils::noise_simulation::{CudaDynLwe, CudaSideResources};
|
||||
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
|
||||
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
|
||||
use crate::core_crypto::gpu::CudaStreams;
|
||||
use crate::core_crypto::prelude::{GlweCiphertext, LweCiphertext};
|
||||
use crate::integer::compression_keys::CompressionPrivateKeys;
|
||||
use crate::integer::gpu::list_compression::server_keys::CudaCompressionKey;
|
||||
use crate::integer::gpu::server_key::radix::tests_noise_distribution::utils::noise_simulation::cuda_glwe_list_to_glwe_ciphertext;
|
||||
use crate::integer::gpu::server_key::radix::tests_unsigned::create_gpu_parameterized_test;
|
||||
use crate::integer::gpu::server_key::radix::CudaUnsignedRadixCiphertext;
|
||||
use crate::integer::gpu::CudaServerKey;
|
||||
use crate::integer::{ClientKey, CompressedServerKey, IntegerCiphertext};
|
||||
use crate::shortint::ciphertext::{Ciphertext, Degree, NoiseLevel};
|
||||
use crate::shortint::client_key::atomic_pattern::AtomicPatternClientKey;
|
||||
use crate::shortint::engine::ShortintEngine;
|
||||
use crate::shortint::parameters::test_params::TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128;
|
||||
use crate::shortint::parameters::{CompressionParameters, MetaParameters, Variance};
|
||||
use crate::shortint::server_key::tests::noise_distribution::br_dp_packingks_ms::br_dp_packing_ks_ms;
|
||||
use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::{
|
||||
NoiseSimulationGlwe, NoiseSimulationLwe, NoiseSimulationLweFourierBsk,
|
||||
NoiseSimulationLwePackingKeyswitchKey, NoiseSimulationModulus,
|
||||
};
|
||||
use crate::shortint::server_key::tests::noise_distribution::utils::{
|
||||
expected_pfail_for_precision, mean_and_variance_check, normality_check, pfail_check,
|
||||
precision_with_padding, update_ap_params_msg_and_carry_moduli, DecryptionAndNoiseResult,
|
||||
NoiseSample, PfailAndPrecision, PfailTestMeta, PfailTestResult,
|
||||
};
|
||||
use crate::shortint::server_key::tests::noise_distribution::{
|
||||
should_run_short_pfail_tests_debug, should_use_single_key_debug,
|
||||
};
|
||||
use crate::shortint::{
|
||||
AtomicPatternParameters, CarryModulus, MessageModulus, ShortintEncoding, ShortintParameterSet,
|
||||
};
|
||||
use crate::GpuIndex;
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
|
||||
fn sanity_check_encrypt_br_dp_packing_ks_ms(meta_params: MetaParameters) {
|
||||
let (params, comp_params) = (
|
||||
meta_params.compute_parameters,
|
||||
meta_params.compression_parameters.unwrap(),
|
||||
);
|
||||
let gpu_index = 0;
|
||||
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
|
||||
let block_params: ShortintParameterSet = params.into();
|
||||
let cks = crate::integer::ClientKey::new(block_params);
|
||||
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
|
||||
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
|
||||
|
||||
let private_compression_key = cks.new_compression_private_key(comp_params);
|
||||
let (compressed_compression_key, _compressed_decompression_key) =
|
||||
cks.new_compressed_compression_decompression_keys(&private_compression_key);
|
||||
let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&streams);
|
||||
let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
|
||||
// The multiplication done in the compression is made to move the message up at the top of the
|
||||
// carry space, multiplying by the carry modulus achieves that
|
||||
let dp_scalar = params.carry_modulus().0;
|
||||
let br_input_modulus_log = cuda_sks.br_input_modulus_log();
|
||||
let storage_modulus_log = cuda_compression_key.storage_log_modulus;
|
||||
|
||||
let id_lut = cuda_sks.generate_lookup_table(|x| x);
|
||||
let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, &streams);
|
||||
|
||||
let input_zeros: Vec<_> = (0..lwe_per_glwe.0)
|
||||
.map(|_| {
|
||||
cks.key
|
||||
.encrypt_noiseless_pbs_input_dyn_lwe(br_input_modulus_log, 0)
|
||||
})
|
||||
.collect();
|
||||
let d_input_zeros: Vec<_> = input_zeros
|
||||
.iter()
|
||||
.map(|ct| {
|
||||
let d_ct_input = CudaLweCiphertextList::from_lwe_ciphertext(&ct.as_lwe_64(), &streams);
|
||||
CudaDynLwe::U64(d_ct_input)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
|
||||
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
|
||||
message_modulus: params.message_modulus(),
|
||||
carry_modulus: params.carry_modulus(),
|
||||
atomic_pattern: params.atomic_pattern(),
|
||||
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
|
||||
};
|
||||
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
|
||||
.map(|_| CudaSideResources::new(&streams, cuda_block_info))
|
||||
.collect();
|
||||
|
||||
let (d_before_packing, _after_packing, d_after_ms) = br_dp_packing_ks_ms(
|
||||
d_input_zeros,
|
||||
&cuda_sks,
|
||||
&d_accumulator,
|
||||
dp_scalar,
|
||||
&cuda_compression_key.packing_key_switching_key,
|
||||
storage_modulus_log,
|
||||
&mut cuda_side_resources,
|
||||
);
|
||||
|
||||
let compression_inputs: Vec<_> = d_before_packing
|
||||
.into_iter()
|
||||
.map(|(_input, pbs_result, _dp_result)| {
|
||||
let pbs_result_list_cpu = pbs_result.as_lwe_64().to_lwe_ciphertext_list(&streams);
|
||||
let pbs_result_cpu = LweCiphertext::from_container(
|
||||
pbs_result_list_cpu.clone().into_container(),
|
||||
pbs_result_list_cpu.ciphertext_modulus(),
|
||||
);
|
||||
let cpu_ct = Ciphertext::new(
|
||||
pbs_result_cpu,
|
||||
Degree::new(params.message_modulus().0 - 1),
|
||||
NoiseLevel::NOMINAL,
|
||||
params.message_modulus(),
|
||||
params.carry_modulus(),
|
||||
params.atomic_pattern(),
|
||||
);
|
||||
let radix_ct = crate::integer::RadixCiphertext::from_blocks(vec![cpu_ct]);
|
||||
let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&radix_ct, &streams);
|
||||
d_ct.ciphertext
|
||||
})
|
||||
.collect();
|
||||
|
||||
let gpu_compressed =
|
||||
cuda_compression_key.compress_ciphertexts_into_list(&compression_inputs, &streams);
|
||||
|
||||
let gpu_extracted = gpu_compressed.extract_glwe(0, &streams);
|
||||
let extracted_list = gpu_extracted.to_glwe_ciphertext_list(&streams);
|
||||
let extracted_glwe = GlweCiphertext::from_container(
|
||||
extracted_list.clone().into_container(),
|
||||
extracted_list.polynomial_size(),
|
||||
extracted_list.ciphertext_modulus(),
|
||||
);
|
||||
let after_ms_list = d_after_ms.to_glwe_ciphertext_list(&streams);
|
||||
let mut after_ms = GlweCiphertext::from_container(
|
||||
after_ms_list.clone().into_container(),
|
||||
after_ms_list.polynomial_size(),
|
||||
after_ms_list.ciphertext_modulus(),
|
||||
);
|
||||
// Bodies that were not filled are discarded
|
||||
after_ms.get_mut_body().as_mut()[lwe_per_glwe.0..].fill(0);
|
||||
|
||||
assert_eq!(after_ms.as_view(), extracted_glwe.as_view());
|
||||
}
|
||||
|
||||
create_gpu_parameterized_test!(sanity_check_encrypt_br_dp_packing_ks_ms {
|
||||
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
|
||||
});
|
||||
|
||||
#[allow(clippy::type_complexity, clippy::too_many_arguments)]
|
||||
fn encrypt_br_dp_packing_ks_ms_inner_helper_gpu(
|
||||
params: AtomicPatternParameters,
|
||||
comp_params: CompressionParameters,
|
||||
single_cks: &ClientKey,
|
||||
single_cuda_sks: &CudaServerKey,
|
||||
single_compression_private_key: &CompressionPrivateKeys,
|
||||
single_cuda_compression_key: &CudaCompressionKey,
|
||||
msg: u64,
|
||||
streams: &CudaStreams,
|
||||
) -> (
|
||||
Vec<(
|
||||
DecryptionAndNoiseResult,
|
||||
DecryptionAndNoiseResult,
|
||||
DecryptionAndNoiseResult,
|
||||
)>,
|
||||
Vec<DecryptionAndNoiseResult>,
|
||||
Vec<DecryptionAndNoiseResult>,
|
||||
) {
|
||||
let mut engine = ShortintEngine::new();
|
||||
let thread_cks: crate::integer::ClientKey;
|
||||
let thread_cuda_sks: CudaServerKey;
|
||||
let thread_compression_private_key;
|
||||
let thread_cuda_compression_key;
|
||||
let (cks, cuda_sks, compression_private_key, cuda_compression_key) =
|
||||
if should_use_single_key_debug() {
|
||||
(
|
||||
single_cks,
|
||||
single_cuda_sks,
|
||||
single_compression_private_key,
|
||||
single_cuda_compression_key,
|
||||
)
|
||||
} else {
|
||||
let block_params: ShortintParameterSet = params.into();
|
||||
thread_cks = crate::integer::ClientKey::new(block_params);
|
||||
let compressed_server_key =
|
||||
CompressedServerKey::new_radix_compressed_server_key(&thread_cks);
|
||||
thread_cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, streams);
|
||||
|
||||
thread_compression_private_key = thread_cks.new_compression_private_key(comp_params);
|
||||
let (compressed_compression_key, _compressed_decompression_key) = thread_cks
|
||||
.new_compressed_compression_decompression_keys(&thread_compression_private_key);
|
||||
thread_cuda_compression_key = compressed_compression_key.decompress_to_cuda(streams);
|
||||
|
||||
(
|
||||
&thread_cks,
|
||||
&thread_cuda_sks,
|
||||
&thread_compression_private_key,
|
||||
&thread_cuda_compression_key,
|
||||
)
|
||||
};
|
||||
let br_input_modulus_log = cuda_sks.br_input_modulus_log();
|
||||
let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
|
||||
|
||||
let input_zeros: Vec<_> = (0..lwe_per_glwe.0)
|
||||
.map(|_| {
|
||||
cks.key.encrypt_noiseless_pbs_input_dyn_lwe_with_engine(
|
||||
br_input_modulus_log,
|
||||
msg,
|
||||
&mut engine,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let d_input_zeros: Vec<_> = input_zeros
|
||||
.iter()
|
||||
.map(|ct| {
|
||||
let d_ct_input = CudaLweCiphertextList::from_lwe_ciphertext(&ct.as_lwe_64(), streams);
|
||||
CudaDynLwe::U64(d_ct_input)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let id_lut = cuda_sks.generate_lookup_table(|x| x);
|
||||
let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, streams);
|
||||
|
||||
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
|
||||
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
|
||||
message_modulus: params.message_modulus(),
|
||||
carry_modulus: params.carry_modulus(),
|
||||
atomic_pattern: params.atomic_pattern(),
|
||||
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
|
||||
};
|
||||
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
|
||||
.map(|_| CudaSideResources::new(streams, cuda_block_info))
|
||||
.collect();
|
||||
|
||||
let dp_scalar = params.carry_modulus().0;
|
||||
let storage_modulus_log = cuda_compression_key.storage_log_modulus;
|
||||
|
||||
let (d_before_packing, d_after_packing, d_after_ms) = br_dp_packing_ks_ms(
|
||||
d_input_zeros,
|
||||
cuda_sks,
|
||||
&d_accumulator,
|
||||
dp_scalar,
|
||||
&cuda_compression_key.packing_key_switching_key,
|
||||
storage_modulus_log,
|
||||
&mut cuda_side_resources,
|
||||
);
|
||||
|
||||
let compute_large_lwe_secret_key = cks.key.encryption_key();
|
||||
let compression_glwe_secret_key = &compression_private_key.key.post_packing_ks_key;
|
||||
|
||||
let compute_encoding = cuda_sks.encoding();
|
||||
let compression_encoding = ShortintEncoding {
|
||||
carry_modulus: CarryModulus(1),
|
||||
..compute_encoding
|
||||
};
|
||||
let after_packing = cuda_glwe_list_to_glwe_ciphertext(&d_after_packing, streams);
|
||||
let after_ms = cuda_glwe_list_to_glwe_ciphertext(&d_after_ms, streams);
|
||||
(
|
||||
d_before_packing
|
||||
.into_iter()
|
||||
.map(|(d_input, d_pbs_result, d_dp_result)| {
|
||||
let input = d_input.as_ct_64_cpu(streams);
|
||||
let pbs_result = d_pbs_result.as_ct_64_cpu(streams);
|
||||
let dp_result = d_dp_result.as_ct_64_cpu(streams);
|
||||
(
|
||||
match &cks.key.atomic_pattern {
|
||||
AtomicPatternClientKey::Standard(standard_atomic_pattern_client_key) => {
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&input,
|
||||
&standard_atomic_pattern_client_key.lwe_secret_key,
|
||||
msg,
|
||||
&compute_encoding,
|
||||
)
|
||||
}
|
||||
AtomicPatternClientKey::KeySwitch32(_ks32_atomic_pattern_client_key) => {
|
||||
panic!("KS32 Atomic Pattern not supported on GPU tests yet");
|
||||
}
|
||||
},
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&pbs_result,
|
||||
&compute_large_lwe_secret_key,
|
||||
msg,
|
||||
&compute_encoding,
|
||||
),
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&dp_result,
|
||||
&compute_large_lwe_secret_key,
|
||||
msg,
|
||||
&compression_encoding,
|
||||
),
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
DecryptionAndNoiseResult::new_from_glwe(
|
||||
&after_packing,
|
||||
compression_glwe_secret_key,
|
||||
compression_private_key.key.params.lwe_per_glwe(),
|
||||
msg,
|
||||
&compression_encoding,
|
||||
),
|
||||
DecryptionAndNoiseResult::new_from_glwe(
|
||||
&after_ms,
|
||||
compression_glwe_secret_key,
|
||||
compression_private_key.key.params.lwe_per_glwe(),
|
||||
msg,
|
||||
&compression_encoding,
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
#[allow(clippy::type_complexity, clippy::too_many_arguments)]
|
||||
fn encrypt_br_dp_packing_ks_ms_noise_helper_gpu(
|
||||
params: AtomicPatternParameters,
|
||||
comp_params: CompressionParameters,
|
||||
single_cks: &ClientKey,
|
||||
single_cuda_sks: &CudaServerKey,
|
||||
single_compression_private_key: &CompressionPrivateKeys,
|
||||
single_cuda_compression_key: &CudaCompressionKey,
|
||||
msg: u64,
|
||||
streams: &CudaStreams,
|
||||
) -> (
|
||||
Vec<(NoiseSample, NoiseSample, NoiseSample)>,
|
||||
Vec<NoiseSample>,
|
||||
Vec<NoiseSample>,
|
||||
) {
|
||||
let (before_packing, after_packing, after_ms) = encrypt_br_dp_packing_ks_ms_inner_helper_gpu(
|
||||
params,
|
||||
comp_params,
|
||||
single_cks,
|
||||
single_cuda_sks,
|
||||
single_compression_private_key,
|
||||
single_cuda_compression_key,
|
||||
msg,
|
||||
streams,
|
||||
);
|
||||
|
||||
(
|
||||
before_packing
|
||||
.into_iter()
|
||||
.map(|(input, after_pbs, after_dp)| {
|
||||
(
|
||||
input
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
after_pbs
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
after_dp
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
after_packing
|
||||
.into_iter()
|
||||
.map(|x| {
|
||||
x.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed")
|
||||
})
|
||||
.collect(),
|
||||
after_ms
|
||||
.into_iter()
|
||||
.map(|x| {
|
||||
x.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed")
|
||||
})
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
#[allow(clippy::type_complexity, clippy::too_many_arguments)]
|
||||
fn encrypt_br_dp_packing_ks_ms_pfail_helper_gpu(
|
||||
params: AtomicPatternParameters,
|
||||
comp_params: CompressionParameters,
|
||||
single_cks: &ClientKey,
|
||||
single_cuda_sks: &CudaServerKey,
|
||||
single_compression_private_key: &CompressionPrivateKeys,
|
||||
single_cuda_compression_key: &CudaCompressionKey,
|
||||
msg: u64,
|
||||
streams: &CudaStreams,
|
||||
) -> Vec<DecryptionAndNoiseResult> {
|
||||
let (_before_packing, _after_packing, after_ms) = encrypt_br_dp_packing_ks_ms_inner_helper_gpu(
|
||||
params,
|
||||
comp_params,
|
||||
single_cks,
|
||||
single_cuda_sks,
|
||||
single_compression_private_key,
|
||||
single_cuda_compression_key,
|
||||
msg,
|
||||
streams,
|
||||
);
|
||||
|
||||
after_ms
|
||||
}
|
||||
|
||||
fn noise_check_encrypt_br_dp_packing_ks_ms_noise_gpu(meta_params: MetaParameters) {
|
||||
let (params, comp_params) = (
|
||||
meta_params.compute_parameters,
|
||||
meta_params.compression_parameters.unwrap(),
|
||||
);
|
||||
let gpu_index = 0;
|
||||
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
|
||||
let block_params: ShortintParameterSet = params.into();
|
||||
let cks = crate::integer::ClientKey::new(block_params);
|
||||
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
|
||||
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
|
||||
|
||||
let private_compression_key = cks.new_compression_private_key(comp_params);
|
||||
let (compressed_compression_key, _compressed_decompression_key) =
|
||||
cks.new_compressed_compression_decompression_keys(&private_compression_key);
|
||||
let compression_key = compressed_compression_key.decompress();
|
||||
let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&streams);
|
||||
|
||||
let noise_simulation_bsk =
|
||||
NoiseSimulationLweFourierBsk::new_from_atomic_pattern_parameters(params);
|
||||
let noise_simulation_packing_key =
|
||||
NoiseSimulationLwePackingKeyswitchKey::new_from_comp_parameters(params, comp_params);
|
||||
|
||||
assert!(noise_simulation_bsk.matches_actual_bsk_gpu(&cuda_sks.bootstrapping_key));
|
||||
assert!(noise_simulation_packing_key.matches_actual_shortint_comp_key(&compression_key.key));
|
||||
|
||||
// The multiplication done in the compression is made to move the message up at the top of the
|
||||
// carry space, multiplying by the carry modulus achieves that
|
||||
let dp_scalar = params.carry_modulus().0;
|
||||
|
||||
let noise_simulation_accumulator = NoiseSimulationGlwe::new(
|
||||
noise_simulation_bsk.output_glwe_size().to_glwe_dimension(),
|
||||
noise_simulation_bsk.output_polynomial_size(),
|
||||
Variance(0.0),
|
||||
noise_simulation_bsk.modulus(),
|
||||
);
|
||||
|
||||
let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
|
||||
let storage_modulus_log = cuda_compression_key.storage_log_modulus;
|
||||
let br_input_modulus_log = cuda_sks.br_input_modulus_log();
|
||||
|
||||
let (_before_packing_sim, _after_packing_sim, after_ms_sim) = {
|
||||
let noise_simulation = NoiseSimulationLwe::new(
|
||||
cks.parameters().lwe_dimension(),
|
||||
Variance(0.0),
|
||||
NoiseSimulationModulus::from_ciphertext_modulus(cks.parameters().ciphertext_modulus()),
|
||||
);
|
||||
br_dp_packing_ks_ms(
|
||||
vec![noise_simulation; lwe_per_glwe.0],
|
||||
&noise_simulation_bsk,
|
||||
&noise_simulation_accumulator,
|
||||
dp_scalar,
|
||||
&noise_simulation_packing_key,
|
||||
storage_modulus_log,
|
||||
&mut vec![(); lwe_per_glwe.0],
|
||||
)
|
||||
};
|
||||
|
||||
let input_zeros: Vec<_> = (0..lwe_per_glwe.0)
|
||||
.map(|_| {
|
||||
cks.key
|
||||
.encrypt_noiseless_pbs_input_dyn_lwe(br_input_modulus_log, 0)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let d_input_zeros: Vec<_> = input_zeros
|
||||
.iter()
|
||||
.map(|ct| {
|
||||
let d_ct_input = CudaLweCiphertextList::from_lwe_ciphertext(&ct.as_lwe_64(), &streams);
|
||||
CudaDynLwe::U64(d_ct_input)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let id_lut = cuda_sks.generate_lookup_table(|x| x);
|
||||
let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, &streams);
|
||||
|
||||
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
|
||||
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
|
||||
message_modulus: params.message_modulus(),
|
||||
carry_modulus: params.carry_modulus(),
|
||||
atomic_pattern: params.atomic_pattern(),
|
||||
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
|
||||
};
|
||||
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
|
||||
.map(|_| CudaSideResources::new(&streams, cuda_block_info))
|
||||
.collect();
|
||||
|
||||
// Check that the circuit is correct with respect to core implementation, i.e. does not crash on
|
||||
// dimension checks
|
||||
let (expected_glwe_size_out, expected_polynomial_size_out, expected_modulus_f64_out) = {
|
||||
let (_before_packing_sim, _after_packing, after_ms) = br_dp_packing_ks_ms(
|
||||
d_input_zeros,
|
||||
&cuda_sks,
|
||||
&d_accumulator,
|
||||
dp_scalar,
|
||||
&cuda_compression_key.packing_key_switching_key,
|
||||
storage_modulus_log,
|
||||
&mut cuda_side_resources,
|
||||
);
|
||||
|
||||
(
|
||||
after_ms.glwe_dimension().to_glwe_size(),
|
||||
after_ms.polynomial_size(),
|
||||
after_ms.ciphertext_modulus().raw_modulus_float(),
|
||||
)
|
||||
};
|
||||
|
||||
assert_eq!(after_ms_sim.glwe_size(), expected_glwe_size_out);
|
||||
assert_eq!(after_ms_sim.polynomial_size(), expected_polynomial_size_out);
|
||||
assert_eq!(after_ms_sim.modulus().as_f64(), expected_modulus_f64_out);
|
||||
|
||||
let cleartext_modulus = params.message_modulus().0 * params.carry_modulus().0;
|
||||
let mut noise_samples_before_ms = vec![];
|
||||
let mut noise_samples_after_ms = vec![];
|
||||
|
||||
let sample_count_per_msg = 1000usize;
|
||||
let chunk_size = 8;
|
||||
let vec_local_streams = (0..chunk_size)
|
||||
.map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
|
||||
.collect::<Vec<_>>();
|
||||
for _ in 0..cleartext_modulus {
|
||||
let (current_noise_samples_before_ms, current_noise_samples_after_ms): (Vec<_>, Vec<_>) =
|
||||
(0..sample_count_per_msg)
|
||||
.collect::<Vec<_>>()
|
||||
.chunks(chunk_size)
|
||||
.flat_map(|chunk| {
|
||||
chunk
|
||||
.into_par_iter()
|
||||
.map(|i| {
|
||||
let local_stream = &vec_local_streams[*i % chunk_size];
|
||||
let (_before_packing, after_packing, after_ms) =
|
||||
encrypt_br_dp_packing_ks_ms_noise_helper_gpu(
|
||||
params,
|
||||
comp_params,
|
||||
&cks,
|
||||
&cuda_sks,
|
||||
&private_compression_key,
|
||||
&cuda_compression_key,
|
||||
0,
|
||||
local_stream,
|
||||
);
|
||||
(after_packing, after_ms)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.unzip();
|
||||
|
||||
noise_samples_before_ms.extend(current_noise_samples_before_ms);
|
||||
noise_samples_after_ms.extend(current_noise_samples_after_ms);
|
||||
}
|
||||
|
||||
let noise_samples_before_ms_flattened: Vec<_> = noise_samples_before_ms
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.map(|x| x.value)
|
||||
.collect();
|
||||
|
||||
let noise_samples_after_ms_flattened: Vec<_> = noise_samples_after_ms
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.map(|x| x.value)
|
||||
.collect();
|
||||
|
||||
let before_ms_normality =
|
||||
normality_check(&noise_samples_before_ms_flattened, "before ms", 0.01);
|
||||
|
||||
let after_ms_is_ok = mean_and_variance_check(
|
||||
&noise_samples_after_ms_flattened,
|
||||
"after_ms",
|
||||
0.0,
|
||||
after_ms_sim.variance_per_occupied_slot(),
|
||||
comp_params.packing_ks_key_noise_distribution(),
|
||||
after_ms_sim
|
||||
.glwe_dimension()
|
||||
.to_equivalent_lwe_dimension(after_ms_sim.polynomial_size()),
|
||||
after_ms_sim.modulus().as_f64(),
|
||||
);
|
||||
|
||||
assert!(before_ms_normality.null_hypothesis_is_valid && after_ms_is_ok);
|
||||
}
|
||||
create_gpu_parameterized_test!(noise_check_encrypt_br_dp_packing_ks_ms_noise_gpu {
|
||||
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
|
||||
});
|
||||
|
||||
fn noise_check_encrypt_br_dp_packing_ks_ms_pfail_gpu(meta_params: MetaParameters) {
|
||||
let (pfail_test_meta, params, comp_params) = {
|
||||
let (mut params, comp_params) = (
|
||||
meta_params.compute_parameters,
|
||||
meta_params.compression_parameters.unwrap(),
|
||||
);
|
||||
|
||||
let original_message_modulus = params.message_modulus();
|
||||
let original_carry_modulus = params.carry_modulus();
|
||||
|
||||
// For now only allow 2_2 parameters, and see later for heuristics to use
|
||||
assert_eq!(original_message_modulus.0, 4);
|
||||
assert_eq!(original_carry_modulus.0, 4);
|
||||
|
||||
let noise_simulation_bsk =
|
||||
NoiseSimulationLweFourierBsk::new_from_atomic_pattern_parameters(params);
|
||||
let noise_simulation_packing_key =
|
||||
NoiseSimulationLwePackingKeyswitchKey::new_from_comp_parameters(params, comp_params);
|
||||
|
||||
// The multiplication done in the compression is made to move the message up at the top of
|
||||
// the carry space, multiplying by the carry modulus achieves that
|
||||
let dp_scalar = params.carry_modulus().0;
|
||||
|
||||
let noise_simulation_accumulator = NoiseSimulationGlwe::new(
|
||||
noise_simulation_bsk.output_glwe_size().to_glwe_dimension(),
|
||||
noise_simulation_bsk.output_polynomial_size(),
|
||||
Variance(0.0),
|
||||
noise_simulation_bsk.modulus(),
|
||||
);
|
||||
|
||||
let lwe_per_glwe = comp_params.lwe_per_glwe();
|
||||
let storage_modulus_log = comp_params.storage_log_modulus();
|
||||
|
||||
let (_before_packing_sim, _after_packing_sim, after_ms_sim) = {
|
||||
let noise_simulation = NoiseSimulationLwe::new(
|
||||
params.lwe_dimension(),
|
||||
Variance(0.0),
|
||||
NoiseSimulationModulus::from_ciphertext_modulus(params.ciphertext_modulus()),
|
||||
);
|
||||
br_dp_packing_ks_ms(
|
||||
vec![noise_simulation; lwe_per_glwe.0],
|
||||
&noise_simulation_bsk,
|
||||
&noise_simulation_accumulator,
|
||||
dp_scalar,
|
||||
&noise_simulation_packing_key,
|
||||
storage_modulus_log,
|
||||
&mut vec![(); lwe_per_glwe.0],
|
||||
)
|
||||
};
|
||||
|
||||
let expected_variance_after_storage = after_ms_sim.variance_per_occupied_slot();
|
||||
|
||||
let compression_carry_mod = CarryModulus(1);
|
||||
let compression_message_mod = original_message_modulus;
|
||||
let compression_precision_with_padding =
|
||||
precision_with_padding(compression_message_mod, compression_carry_mod);
|
||||
let expected_pfail_for_storage = expected_pfail_for_precision(
|
||||
compression_precision_with_padding,
|
||||
expected_variance_after_storage,
|
||||
);
|
||||
|
||||
let original_pfail_and_precision = PfailAndPrecision::new(
|
||||
expected_pfail_for_storage,
|
||||
compression_message_mod,
|
||||
compression_carry_mod,
|
||||
);
|
||||
|
||||
// Here we update the message modulus only:
|
||||
// - because the message modulus matches for the compression encoding and compute encoding
|
||||
// - so that the carry modulus stays the same and we apply the same dot product as normal
|
||||
// for 2_2
|
||||
// - so that the effective encoding after the storage is the one we used to evaluate the
|
||||
// pfail
|
||||
let updated_message_mod = MessageModulus(1 << 6);
|
||||
let updated_carry_mod = compression_carry_mod;
|
||||
|
||||
update_ap_params_msg_and_carry_moduli(&mut params, updated_message_mod, updated_carry_mod);
|
||||
|
||||
assert!(
|
||||
(params.message_modulus().0 * params.carry_modulus().0).ilog2()
|
||||
<= comp_params.storage_log_modulus().0 as u32,
|
||||
"Compression storage modulus cannot store enough bits for pfail estimation"
|
||||
);
|
||||
|
||||
let updated_precision_with_padding =
|
||||
precision_with_padding(updated_message_mod, updated_carry_mod);
|
||||
|
||||
let new_expected_pfail_for_storage = expected_pfail_for_precision(
|
||||
updated_precision_with_padding,
|
||||
expected_variance_after_storage,
|
||||
);
|
||||
|
||||
let new_expected_pfail_and_precision = PfailAndPrecision::new(
|
||||
new_expected_pfail_for_storage,
|
||||
updated_message_mod,
|
||||
updated_carry_mod,
|
||||
);
|
||||
|
||||
let pfail_test_meta = if should_run_short_pfail_tests_debug() {
|
||||
// To have the same amount of keys generated as the case where a single run is a single
|
||||
// sample
|
||||
let expected_fails = 200 * lwe_per_glwe.0 as u32;
|
||||
PfailTestMeta::new_with_desired_expected_fails(
|
||||
original_pfail_and_precision,
|
||||
new_expected_pfail_and_precision,
|
||||
expected_fails,
|
||||
)
|
||||
} else {
|
||||
// To guarantee 1_000_000 keysets are generated
|
||||
let total_runs = 1_000_000 * lwe_per_glwe.0 as u32;
|
||||
PfailTestMeta::new_with_total_runs(
|
||||
original_pfail_and_precision,
|
||||
new_expected_pfail_and_precision,
|
||||
total_runs,
|
||||
)
|
||||
};
|
||||
|
||||
(pfail_test_meta, params, comp_params)
|
||||
};
|
||||
let gpu_index = 0;
|
||||
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
|
||||
let block_params: ShortintParameterSet = params.into();
|
||||
let cks = crate::integer::ClientKey::new(block_params);
|
||||
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
|
||||
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
|
||||
|
||||
let private_compression_key = cks.new_compression_private_key(comp_params);
|
||||
let (compressed_compression_key, _compressed_decompression_key) =
|
||||
cks.new_compressed_compression_decompression_keys(&private_compression_key);
|
||||
|
||||
let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&streams);
|
||||
|
||||
let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
|
||||
|
||||
let total_runs_for_expected_fails = pfail_test_meta
|
||||
.total_runs_for_expected_fails()
|
||||
.div_ceil(lwe_per_glwe.0.try_into().unwrap());
|
||||
|
||||
let chunk_size = 8;
|
||||
let vec_local_streams = (0..chunk_size)
|
||||
.map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let measured_fails: f64 = (0..total_runs_for_expected_fails)
|
||||
.collect::<Vec<_>>()
|
||||
.chunks(chunk_size)
|
||||
.flat_map(|chunk| {
|
||||
chunk
|
||||
.into_par_iter()
|
||||
.map(|i| {
|
||||
let local_streams = &vec_local_streams[*i as usize % chunk_size];
|
||||
let after_ms_decryption_result = encrypt_br_dp_packing_ks_ms_pfail_helper_gpu(
|
||||
params,
|
||||
comp_params,
|
||||
&cks,
|
||||
&cuda_sks,
|
||||
&private_compression_key,
|
||||
&cuda_compression_key,
|
||||
0,
|
||||
local_streams,
|
||||
);
|
||||
after_ms_decryption_result
|
||||
.into_iter()
|
||||
.map(|result| result.failure_as_f64())
|
||||
.sum::<f64>()
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.sum();
|
||||
|
||||
let test_result = PfailTestResult { measured_fails };
|
||||
|
||||
pfail_check(&pfail_test_meta, test_result);
|
||||
}
|
||||
|
||||
create_gpu_parameterized_test!(noise_check_encrypt_br_dp_packing_ks_ms_pfail_gpu {
|
||||
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
|
||||
});
|
||||
@@ -0,0 +1,872 @@
|
||||
use super::utils::noise_simulation::{CudaDynLwe, CudaSideResources};
|
||||
use crate::core_crypto::commons::noise_formulas::noise_simulation::{
|
||||
NoiseSimulationLweFourier128Bsk, NoiseSimulationLwePackingKeyswitchKey,
|
||||
};
|
||||
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
|
||||
use crate::core_crypto::gpu::CudaStreams;
|
||||
use crate::core_crypto::prelude::{GlweCiphertext, LweCiphertextCount};
|
||||
use crate::integer::gpu::CudaServerKey;
|
||||
use crate::integer::noise_squashing::NoiseSquashingPrivateKey;
|
||||
use crate::integer::CompressedServerKey;
|
||||
|
||||
use crate::core_crypto::commons::parameters::CiphertextModulusLog;
|
||||
use crate::core_crypto::prelude::generate_programmable_bootstrap_glwe_lut;
|
||||
use crate::integer::ciphertext::NoiseSquashingCompressionPrivateKey;
|
||||
use crate::integer::gpu::list_compression::server_keys::CudaNoiseSquashingCompressionKey;
|
||||
use crate::integer::gpu::server_key::radix::tests_unsigned::create_gpu_parameterized_test;
|
||||
use crate::integer::gpu::server_key::radix::{CudaNoiseSquashingKey, CudaUnsignedRadixCiphertext};
|
||||
use crate::integer::gpu::unchecked_small_scalar_mul_integer_async;
|
||||
use crate::integer::IntegerCiphertext;
|
||||
use crate::shortint::client_key::atomic_pattern::AtomicPatternClientKey;
|
||||
use crate::shortint::parameters::noise_squashing::NoiseSquashingParameters;
|
||||
use crate::shortint::parameters::test_params::TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128;
|
||||
use crate::shortint::parameters::{
|
||||
AtomicPatternParameters, MetaParameters, NoiseSquashingCompressionParameters, Variance,
|
||||
};
|
||||
use crate::shortint::server_key::tests::noise_distribution::dp_ks_pbs128_packingks::{
|
||||
dp_ks_any_ms_standard_pbs128, dp_ks_any_ms_standard_pbs128_packing_ks,
|
||||
};
|
||||
use crate::shortint::server_key::tests::noise_distribution::should_use_single_key_debug;
|
||||
use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::{
|
||||
NoiseSimulationGlwe, NoiseSimulationLwe, NoiseSimulationLweFourierBsk,
|
||||
NoiseSimulationLweKeyswitchKey, NoiseSimulationModulusSwitchConfig,
|
||||
};
|
||||
use crate::shortint::server_key::tests::noise_distribution::utils::{
|
||||
mean_and_variance_check, DecryptionAndNoiseResult, NoiseSample,
|
||||
};
|
||||
use crate::shortint::{PaddingBit, ShortintEncoding, ShortintParameterSet};
|
||||
use crate::GpuIndex;
|
||||
use rayon::prelude::*;
|
||||
|
||||
/// Test function to verify that the noise checking tools match the actual atomic patterns
|
||||
/// implemented in shortint for GPU
|
||||
fn sanity_check_encrypt_dp_ks_standard_pbs128_packing_ks_gpu(meta_params: MetaParameters) {
|
||||
let (atomic_params, noise_squashing_params, noise_squashing_compression_params) = {
|
||||
let meta_noise_squashing_params = meta_params.noise_squashing_parameters.unwrap();
|
||||
(
|
||||
meta_params.compute_parameters,
|
||||
meta_noise_squashing_params.parameters,
|
||||
meta_noise_squashing_params.compression_parameters.unwrap(),
|
||||
)
|
||||
};
|
||||
let gpu_index = 0;
|
||||
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
|
||||
let block_params: ShortintParameterSet = atomic_params.into();
|
||||
let cks = crate::integer::ClientKey::new(block_params);
|
||||
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
|
||||
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
|
||||
|
||||
let noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
|
||||
let compressed_noise_squashing_compression_key =
|
||||
cks.new_compressed_noise_squashing_key(&noise_squashing_private_key);
|
||||
let noise_squashing_key = compressed_noise_squashing_compression_key.decompress();
|
||||
let cuda_noise_squashing_key =
|
||||
compressed_noise_squashing_compression_key.decompress_to_cuda(&streams);
|
||||
let noise_squashing_compression_private_key =
|
||||
NoiseSquashingCompressionPrivateKey::new(noise_squashing_compression_params);
|
||||
let noise_squashing_compression_key = noise_squashing_private_key
|
||||
.new_noise_squashing_compression_key(&noise_squashing_compression_private_key);
|
||||
let cuda_noise_squashing_compression_key =
|
||||
CudaNoiseSquashingCompressionKey::from_noise_squashing_compression_key(
|
||||
&noise_squashing_compression_key,
|
||||
&streams,
|
||||
);
|
||||
|
||||
let lwe_per_glwe = cuda_noise_squashing_compression_key.lwe_per_glwe;
|
||||
|
||||
let modulus_switch_config = cuda_noise_squashing_key.noise_simulation_modulus_switch_config();
|
||||
|
||||
let br_input_modulus_log = noise_squashing_key.key.br_input_modulus_log();
|
||||
|
||||
let u128_encoding = ShortintEncoding {
|
||||
ciphertext_modulus: noise_squashing_params.ciphertext_modulus(),
|
||||
message_modulus: noise_squashing_params.message_modulus(),
|
||||
carry_modulus: noise_squashing_params.carry_modulus(),
|
||||
padding_bit: PaddingBit::Yes,
|
||||
};
|
||||
let max_scalar_mul = cuda_sks.max_noise_level.get();
|
||||
|
||||
let id_lut_cpu = generate_programmable_bootstrap_glwe_lut(
|
||||
noise_squashing_key.key.polynomial_size(),
|
||||
noise_squashing_key.key.glwe_size(),
|
||||
u128_encoding
|
||||
.cleartext_space_without_padding()
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
u128_encoding.ciphertext_modulus,
|
||||
u128_encoding.delta(),
|
||||
|x| x,
|
||||
);
|
||||
|
||||
let id_lut_gpu = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut_cpu, &streams);
|
||||
|
||||
let input_zeros: Vec<_> = (0..lwe_per_glwe.0).map(|_| cks.key.encrypt(0)).collect();
|
||||
|
||||
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
|
||||
degree: crate::shortint::ciphertext::Degree::new(atomic_params.message_modulus().0 - 1),
|
||||
message_modulus: atomic_params.message_modulus(),
|
||||
carry_modulus: atomic_params.carry_modulus(),
|
||||
atomic_pattern: atomic_params.atomic_pattern(),
|
||||
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
|
||||
};
|
||||
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
|
||||
.map(|_| CudaSideResources::new(&streams, cuda_block_info))
|
||||
.collect();
|
||||
|
||||
let input_zero_as_lwe: Vec<_> = input_zeros
|
||||
.iter()
|
||||
.map(|ct| {
|
||||
let d_ct_input = CudaUnsignedRadixCiphertext::from_radix_ciphertext(
|
||||
&crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
|
||||
&streams,
|
||||
);
|
||||
CudaDynLwe::U64(d_ct_input.ciphertext.d_blocks)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let (_before_packing, d_after_packing) = dp_ks_any_ms_standard_pbs128_packing_ks(
|
||||
input_zero_as_lwe,
|
||||
max_scalar_mul,
|
||||
&cuda_sks,
|
||||
modulus_switch_config,
|
||||
&cuda_noise_squashing_key,
|
||||
br_input_modulus_log,
|
||||
&id_lut_gpu,
|
||||
&cuda_noise_squashing_compression_key.packing_key_switching_key,
|
||||
&mut cuda_side_resources,
|
||||
);
|
||||
|
||||
let cuda_noise_squashed_cts: Vec<_> = input_zeros
|
||||
.into_par_iter()
|
||||
.map(|ct| {
|
||||
let cloned_ct = ct;
|
||||
let radix_ct = crate::integer::RadixCiphertext::from_blocks(vec![cloned_ct]);
|
||||
let mut d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&radix_ct, &streams);
|
||||
unsafe {
|
||||
unchecked_small_scalar_mul_integer_async(
|
||||
&streams,
|
||||
&mut d_ct.ciphertext,
|
||||
max_scalar_mul,
|
||||
atomic_params.message_modulus(),
|
||||
atomic_params.carry_modulus(),
|
||||
);
|
||||
}
|
||||
streams.synchronize();
|
||||
cuda_noise_squashing_key.unchecked_squash_ciphertext_noise(
|
||||
&d_ct.ciphertext,
|
||||
&cuda_sks,
|
||||
&streams,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let gpu_compressed = cuda_noise_squashing_compression_key
|
||||
.compress_noise_squashed_ciphertexts_into_list(&cuda_noise_squashed_cts, &streams);
|
||||
|
||||
let gpu_extracted = gpu_compressed.extract_glwe(0, &streams);
|
||||
let extracted_list = gpu_extracted.to_glwe_ciphertext_list(&streams);
|
||||
let extracted_glwe = GlweCiphertext::from_container(
|
||||
extracted_list.clone().into_container(),
|
||||
extracted_list.polynomial_size(),
|
||||
extracted_list.ciphertext_modulus(),
|
||||
);
|
||||
|
||||
let after_packing_list = d_after_packing.to_glwe_ciphertext_list(&streams);
|
||||
let mut after_packing = GlweCiphertext::from_container(
|
||||
after_packing_list.clone().into_container(),
|
||||
after_packing_list.polynomial_size(),
|
||||
after_packing_list.ciphertext_modulus(),
|
||||
);
|
||||
// Bodies that were not filled are discarded
|
||||
after_packing.get_mut_body().as_mut()[lwe_per_glwe.0..].fill(0);
|
||||
|
||||
assert_eq!(after_packing.as_view(), extracted_glwe.as_view());
|
||||
}
|
||||
|
||||
/// Test function to verify that the noise checking tools match the actual atomic patterns
|
||||
/// implemented in shortint for GPU
|
||||
fn sanity_check_encrypt_dp_ks_standard_pbs128_gpu(meta_params: MetaParameters) {
|
||||
let (params, noise_squashing_params) = {
|
||||
let meta_noise_squashing_params = meta_params.noise_squashing_parameters.unwrap();
|
||||
(
|
||||
meta_params.compute_parameters,
|
||||
meta_noise_squashing_params.parameters,
|
||||
)
|
||||
};
|
||||
let gpu_index = 0;
|
||||
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
|
||||
let block_params: ShortintParameterSet = params.into();
|
||||
let cks = crate::integer::ClientKey::new(block_params);
|
||||
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
|
||||
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
|
||||
|
||||
let noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
|
||||
let compressed_noise_squashing_compression_key =
|
||||
cks.new_compressed_noise_squashing_key(&noise_squashing_private_key);
|
||||
let noise_squashing_key = compressed_noise_squashing_compression_key.decompress();
|
||||
let cuda_noise_squashing_key =
|
||||
compressed_noise_squashing_compression_key.decompress_to_cuda(&streams);
|
||||
|
||||
let modulus_switch_config = cuda_noise_squashing_key.noise_simulation_modulus_switch_config();
|
||||
|
||||
let br_input_modulus_log = noise_squashing_key.key.br_input_modulus_log();
|
||||
|
||||
let u128_encoding = ShortintEncoding {
|
||||
ciphertext_modulus: noise_squashing_params.ciphertext_modulus(),
|
||||
message_modulus: noise_squashing_params.message_modulus(),
|
||||
carry_modulus: noise_squashing_params.carry_modulus(),
|
||||
padding_bit: PaddingBit::Yes,
|
||||
};
|
||||
let max_scalar_mul = cuda_sks.max_noise_level.get();
|
||||
|
||||
let id_lut_cpu = generate_programmable_bootstrap_glwe_lut(
|
||||
noise_squashing_key.key.polynomial_size(),
|
||||
noise_squashing_key.key.glwe_size(),
|
||||
u128_encoding
|
||||
.cleartext_space_without_padding()
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
u128_encoding.ciphertext_modulus,
|
||||
u128_encoding.delta(),
|
||||
|x| x,
|
||||
);
|
||||
|
||||
let id_lut_gpu = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut_cpu, &streams);
|
||||
|
||||
let lwe_per_glwe = LweCiphertextCount(128);
|
||||
let input_zeros: Vec<_> = (0..lwe_per_glwe.0).map(|_| cks.key.encrypt(0)).collect();
|
||||
|
||||
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
|
||||
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
|
||||
message_modulus: params.message_modulus(),
|
||||
carry_modulus: params.carry_modulus(),
|
||||
atomic_pattern: params.atomic_pattern(),
|
||||
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
|
||||
};
|
||||
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
|
||||
.map(|_| CudaSideResources::new(&streams, cuda_block_info))
|
||||
.collect();
|
||||
|
||||
let input_zero_as_lwe: Vec<_> = input_zeros
|
||||
.iter()
|
||||
.map(|ct| {
|
||||
let d_ct_input = CudaUnsignedRadixCiphertext::from_radix_ciphertext(
|
||||
&crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
|
||||
&streams,
|
||||
);
|
||||
CudaDynLwe::U64(d_ct_input.ciphertext.d_blocks)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let res: Vec<_> = input_zero_as_lwe
|
||||
.into_par_iter()
|
||||
.zip(cuda_side_resources.par_iter_mut())
|
||||
.map(|(input, side_resources)| {
|
||||
let (input, after_dp, ks_result, drift_technique_result, ms_result, pbs_result) =
|
||||
dp_ks_any_ms_standard_pbs128(
|
||||
input,
|
||||
max_scalar_mul,
|
||||
&cuda_sks,
|
||||
modulus_switch_config,
|
||||
&cuda_noise_squashing_key,
|
||||
br_input_modulus_log,
|
||||
&id_lut_gpu,
|
||||
side_resources,
|
||||
);
|
||||
|
||||
(
|
||||
input,
|
||||
after_dp,
|
||||
ks_result,
|
||||
drift_technique_result,
|
||||
ms_result,
|
||||
pbs_result,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let input_zeros_non_pattern: Vec<_> = input_zeros
|
||||
.iter()
|
||||
.map(|ct| {
|
||||
CudaUnsignedRadixCiphertext::from_radix_ciphertext(
|
||||
&crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
|
||||
&streams,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let vector_non_pattern: Vec<_> = input_zeros_non_pattern
|
||||
.into_par_iter()
|
||||
.map(|mut d_ct_input2| {
|
||||
unsafe {
|
||||
unchecked_small_scalar_mul_integer_async(
|
||||
&streams,
|
||||
&mut d_ct_input2.ciphertext,
|
||||
max_scalar_mul,
|
||||
params.message_modulus(),
|
||||
params.carry_modulus(),
|
||||
);
|
||||
}
|
||||
|
||||
streams.synchronize();
|
||||
|
||||
cuda_noise_squashing_key
|
||||
.squash_radix_ciphertext_noise(&cuda_sks, &d_ct_input2.ciphertext, &streams)
|
||||
.unwrap()
|
||||
})
|
||||
.collect();
|
||||
|
||||
let vector_pattern_cpu: Vec<_> = res
|
||||
.into_iter()
|
||||
.map(
|
||||
|(_input, _after_dp, _ks_result, _drift_technique_result, _ms_result, pbs_result)| {
|
||||
pbs_result.as_ct_128_cpu(&streams)
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
|
||||
let vector_non_pattern_cpu: Vec<_> = vector_non_pattern
|
||||
.into_par_iter()
|
||||
.map(|cuda_squashed_radix_ct| {
|
||||
let squashed_noise_ct_cpu =
|
||||
cuda_squashed_radix_ct.to_squashed_noise_radix_ciphertext(&streams);
|
||||
squashed_noise_ct_cpu.packed_blocks()[0]
|
||||
.lwe_ciphertext()
|
||||
.clone()
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Compare that all the results are equivalent
|
||||
assert_eq!(vector_pattern_cpu.len(), vector_non_pattern_cpu.len());
|
||||
for (a, b) in vector_pattern_cpu.iter().zip(vector_non_pattern_cpu.iter()) {
|
||||
assert_eq!(a.as_view(), b.as_view());
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn encrypt_dp_ks_standard_pbs128_packing_ks_inner_helper_gpu(
|
||||
params: AtomicPatternParameters,
|
||||
noise_squashing_params: NoiseSquashingParameters,
|
||||
noise_squashing_compression_params: NoiseSquashingCompressionParameters,
|
||||
single_cks: &crate::integer::ClientKey,
|
||||
single_cuda_sks: &CudaServerKey,
|
||||
single_noise_squashing_private_key: &NoiseSquashingPrivateKey,
|
||||
single_noise_squashing_key: &crate::integer::noise_squashing::NoiseSquashingKey,
|
||||
single_cuda_noise_squashing_key: &CudaNoiseSquashingKey,
|
||||
single_noise_squashing_compression_private_key: &NoiseSquashingCompressionPrivateKey,
|
||||
single_cuda_noise_squashing_compression_key: &CudaNoiseSquashingCompressionKey,
|
||||
msg: u64,
|
||||
scalar_for_multiplication: u64,
|
||||
br_input_modulus_log: CiphertextModulusLog,
|
||||
streams: &CudaStreams,
|
||||
) -> (
|
||||
Vec<(
|
||||
DecryptionAndNoiseResult,
|
||||
DecryptionAndNoiseResult,
|
||||
DecryptionAndNoiseResult,
|
||||
DecryptionAndNoiseResult,
|
||||
DecryptionAndNoiseResult,
|
||||
DecryptionAndNoiseResult,
|
||||
)>,
|
||||
Vec<DecryptionAndNoiseResult>,
|
||||
) {
|
||||
let thread_cks: crate::integer::ClientKey;
|
||||
let thread_cuda_sks: CudaServerKey;
|
||||
let thread_noise_squashing_private_key: NoiseSquashingPrivateKey;
|
||||
let thread_noise_squashing_key: crate::integer::noise_squashing::NoiseSquashingKey;
|
||||
let thread_cuda_noise_squashing_key: CudaNoiseSquashingKey;
|
||||
let thread_noise_squashing_compression_private_key: NoiseSquashingCompressionPrivateKey;
|
||||
let thread_cuda_noise_squashing_compression_key: CudaNoiseSquashingCompressionKey;
|
||||
let (
|
||||
cks,
|
||||
cuda_sks,
|
||||
noise_squashing_private_key,
|
||||
noise_squashing_key,
|
||||
cuda_noise_squashing_key,
|
||||
noise_squashing_compression_private_key,
|
||||
cuda_noise_squashing_compression_key,
|
||||
) = if should_use_single_key_debug() {
|
||||
(
|
||||
single_cks,
|
||||
single_cuda_sks,
|
||||
single_noise_squashing_private_key,
|
||||
single_noise_squashing_key,
|
||||
single_cuda_noise_squashing_key,
|
||||
single_noise_squashing_compression_private_key,
|
||||
single_cuda_noise_squashing_compression_key,
|
||||
)
|
||||
} else {
|
||||
let block_params: ShortintParameterSet = params.into();
|
||||
thread_cks = crate::integer::ClientKey::new(block_params);
|
||||
let thread_compressed_server_key =
|
||||
CompressedServerKey::new_radix_compressed_server_key(&thread_cks);
|
||||
thread_cuda_sks =
|
||||
CudaServerKey::decompress_from_cpu(&thread_compressed_server_key, streams);
|
||||
|
||||
thread_noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
|
||||
let thread_compressed_noise_squashing_compression_key =
|
||||
thread_cks.new_compressed_noise_squashing_key(&thread_noise_squashing_private_key);
|
||||
thread_noise_squashing_key = thread_compressed_noise_squashing_compression_key.decompress();
|
||||
thread_cuda_noise_squashing_key =
|
||||
thread_compressed_noise_squashing_compression_key.decompress_to_cuda(streams);
|
||||
thread_noise_squashing_compression_private_key =
|
||||
NoiseSquashingCompressionPrivateKey::new(noise_squashing_compression_params);
|
||||
let thread_noise_squashing_compression_key = thread_noise_squashing_private_key
|
||||
.new_noise_squashing_compression_key(&thread_noise_squashing_compression_private_key);
|
||||
thread_cuda_noise_squashing_compression_key =
|
||||
CudaNoiseSquashingCompressionKey::from_noise_squashing_compression_key(
|
||||
&thread_noise_squashing_compression_key,
|
||||
streams,
|
||||
);
|
||||
(
|
||||
&thread_cks,
|
||||
&thread_cuda_sks,
|
||||
&thread_noise_squashing_private_key,
|
||||
&thread_noise_squashing_key,
|
||||
&thread_cuda_noise_squashing_key,
|
||||
&thread_noise_squashing_compression_private_key,
|
||||
&thread_cuda_noise_squashing_compression_key,
|
||||
)
|
||||
};
|
||||
|
||||
let modulus_switch_config = cuda_noise_squashing_key.noise_simulation_modulus_switch_config();
|
||||
|
||||
let bsk_polynomial_size = noise_squashing_key.key.polynomial_size();
|
||||
let bsk_glwe_size = noise_squashing_key.key.glwe_size();
|
||||
|
||||
let u128_encoding = ShortintEncoding {
|
||||
ciphertext_modulus: noise_squashing_params.ciphertext_modulus(),
|
||||
message_modulus: noise_squashing_params.message_modulus(),
|
||||
carry_modulus: noise_squashing_params.carry_modulus(),
|
||||
padding_bit: PaddingBit::Yes,
|
||||
};
|
||||
|
||||
let id_lut_cpu = generate_programmable_bootstrap_glwe_lut(
|
||||
bsk_polynomial_size,
|
||||
bsk_glwe_size,
|
||||
u128_encoding
|
||||
.cleartext_space_without_padding()
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
u128_encoding.ciphertext_modulus,
|
||||
u128_encoding.delta(),
|
||||
|x| x,
|
||||
);
|
||||
let id_lut_gpu = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut_cpu, streams);
|
||||
|
||||
let lwe_per_glwe = cuda_noise_squashing_compression_key.lwe_per_glwe;
|
||||
|
||||
let input_zeros: Vec<_> = (0..lwe_per_glwe.0).map(|_| cks.key.encrypt(msg)).collect();
|
||||
|
||||
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
|
||||
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
|
||||
message_modulus: params.message_modulus(),
|
||||
carry_modulus: params.carry_modulus(),
|
||||
atomic_pattern: params.atomic_pattern(),
|
||||
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
|
||||
};
|
||||
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
|
||||
.map(|_| CudaSideResources::new(streams, cuda_block_info))
|
||||
.collect();
|
||||
|
||||
let input_zero_as_lwe: Vec<_> = input_zeros
|
||||
.iter()
|
||||
.map(|ct| {
|
||||
let d_ct_input = CudaUnsignedRadixCiphertext::from_radix_ciphertext(
|
||||
&crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
|
||||
streams,
|
||||
);
|
||||
CudaDynLwe::U64(d_ct_input.ciphertext.d_blocks)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let (before_packing_gpu, after_packing_gpu) = dp_ks_any_ms_standard_pbs128_packing_ks(
|
||||
input_zero_as_lwe,
|
||||
scalar_for_multiplication,
|
||||
cuda_sks,
|
||||
modulus_switch_config,
|
||||
cuda_noise_squashing_key,
|
||||
br_input_modulus_log,
|
||||
&id_lut_gpu,
|
||||
&cuda_noise_squashing_compression_key.packing_key_switching_key,
|
||||
&mut cuda_side_resources,
|
||||
);
|
||||
|
||||
let before_packing: Vec<_> = before_packing_gpu
|
||||
.into_iter()
|
||||
.map(
|
||||
|(
|
||||
input_gpu,
|
||||
after_dp_gpu,
|
||||
after_ks_gpu,
|
||||
after_drift_gpu,
|
||||
after_ms_gpu,
|
||||
after_pbs128_gpu,
|
||||
)| {
|
||||
match &cks.key.atomic_pattern {
|
||||
AtomicPatternClientKey::Standard(standard_atomic_pattern_client_key) => {
|
||||
let params = standard_atomic_pattern_client_key.parameters;
|
||||
let u64_encoding = ShortintEncoding {
|
||||
ciphertext_modulus: params.ciphertext_modulus(),
|
||||
message_modulus: params.message_modulus(),
|
||||
carry_modulus: params.carry_modulus(),
|
||||
padding_bit: PaddingBit::Yes,
|
||||
};
|
||||
let large_lwe_secret_key =
|
||||
standard_atomic_pattern_client_key.large_lwe_secret_key();
|
||||
let small_lwe_secret_key =
|
||||
standard_atomic_pattern_client_key.small_lwe_secret_key();
|
||||
|
||||
let input_ct = input_gpu.as_ct_64_cpu(streams);
|
||||
let after_dp_ct = after_dp_gpu.as_ct_64_cpu(streams);
|
||||
let after_ks_ct = after_ks_gpu.as_ct_64_cpu(streams);
|
||||
let before_ms_gpu: &CudaDynLwe =
|
||||
after_drift_gpu.as_ref().unwrap_or(&after_ks_gpu);
|
||||
let before_ms_ct = before_ms_gpu.as_ct_64_cpu(streams);
|
||||
let after_ms_ct = after_ms_gpu.as_ct_64_cpu(streams);
|
||||
let after_pbs128_ct = after_pbs128_gpu.as_ct_128_cpu(streams);
|
||||
(
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&input_ct,
|
||||
&large_lwe_secret_key,
|
||||
msg,
|
||||
&u64_encoding,
|
||||
),
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&after_dp_ct,
|
||||
&large_lwe_secret_key,
|
||||
msg,
|
||||
&u64_encoding,
|
||||
),
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&after_ks_ct,
|
||||
&small_lwe_secret_key,
|
||||
msg,
|
||||
&u64_encoding,
|
||||
),
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&before_ms_ct,
|
||||
&small_lwe_secret_key,
|
||||
msg,
|
||||
&u64_encoding,
|
||||
),
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&after_ms_ct,
|
||||
&small_lwe_secret_key,
|
||||
msg,
|
||||
&u64_encoding,
|
||||
),
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&after_pbs128_ct,
|
||||
&noise_squashing_private_key
|
||||
.key
|
||||
.post_noise_squashing_lwe_secret_key(),
|
||||
msg.into(),
|
||||
&u128_encoding,
|
||||
),
|
||||
)
|
||||
}
|
||||
AtomicPatternClientKey::KeySwitch32(_ks32_atomic_pattern_client_key) => {
|
||||
panic!("KS32 atomic pattern not supported for GPU yet");
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
let after_packing_list = after_packing_gpu.to_glwe_ciphertext_list(streams);
|
||||
let after_packing = GlweCiphertext::from_container(
|
||||
after_packing_list.clone().into_container(),
|
||||
after_packing_list.polynomial_size(),
|
||||
after_packing_list.ciphertext_modulus(),
|
||||
);
|
||||
let after_packing = DecryptionAndNoiseResult::new_from_glwe(
|
||||
&after_packing,
|
||||
noise_squashing_compression_private_key
|
||||
.key
|
||||
.post_packing_ks_key(),
|
||||
lwe_per_glwe,
|
||||
msg.into(),
|
||||
&u128_encoding,
|
||||
);
|
||||
|
||||
assert_eq!(after_packing.len(), lwe_per_glwe.0);
|
||||
|
||||
(before_packing, after_packing)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn encrypt_dp_ks_standard_pbs128_packing_ks_noise_helper_gpu(
|
||||
params: AtomicPatternParameters,
|
||||
noise_squashing_params: NoiseSquashingParameters,
|
||||
noise_squashing_compression_params: NoiseSquashingCompressionParameters,
|
||||
single_cks: &crate::integer::ClientKey,
|
||||
single_cuda_sks: &CudaServerKey,
|
||||
single_noise_squashing_private_key: &NoiseSquashingPrivateKey,
|
||||
single_noise_squashing_key: &crate::integer::noise_squashing::NoiseSquashingKey,
|
||||
single_cuda_noise_squashing_key: &CudaNoiseSquashingKey,
|
||||
single_noise_squashing_compression_private_key: &NoiseSquashingCompressionPrivateKey,
|
||||
single_cuda_noise_squashing_compression_key: &CudaNoiseSquashingCompressionKey,
|
||||
msg: u64,
|
||||
scalar_for_multiplication: u64,
|
||||
br_input_modulus_log: CiphertextModulusLog,
|
||||
streams: &CudaStreams,
|
||||
) -> (
|
||||
Vec<(
|
||||
NoiseSample,
|
||||
NoiseSample,
|
||||
NoiseSample,
|
||||
NoiseSample,
|
||||
NoiseSample,
|
||||
NoiseSample,
|
||||
)>,
|
||||
Vec<NoiseSample>,
|
||||
) {
|
||||
let (before_compression, after_compression) =
|
||||
encrypt_dp_ks_standard_pbs128_packing_ks_inner_helper_gpu(
|
||||
params,
|
||||
noise_squashing_params,
|
||||
noise_squashing_compression_params,
|
||||
single_cks,
|
||||
single_cuda_sks,
|
||||
single_noise_squashing_private_key,
|
||||
single_noise_squashing_key,
|
||||
single_cuda_noise_squashing_key,
|
||||
single_noise_squashing_compression_private_key,
|
||||
single_cuda_noise_squashing_compression_key,
|
||||
msg,
|
||||
scalar_for_multiplication,
|
||||
br_input_modulus_log,
|
||||
streams,
|
||||
);
|
||||
|
||||
(
|
||||
before_compression
|
||||
.into_iter()
|
||||
.map(
|
||||
|(input, after_dp, after_ks, after_drift, after_ms, after_pbs)| {
|
||||
(
|
||||
input
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
after_dp
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
after_ks
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
after_drift
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
after_ms
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
after_pbs
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
)
|
||||
},
|
||||
)
|
||||
.collect(),
|
||||
after_compression
|
||||
.into_iter()
|
||||
.map(|after_compression| {
|
||||
after_compression
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed")
|
||||
})
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
|
||||
fn noise_check_encrypt_dp_ks_standard_pbs128_packing_ks_noise_gpu(meta_params: MetaParameters) {
|
||||
let (atomic_params, noise_squashing_params, noise_squashing_compression_params) = {
|
||||
let meta_noise_squashing_params = meta_params.noise_squashing_parameters.unwrap();
|
||||
(
|
||||
meta_params.compute_parameters,
|
||||
meta_noise_squashing_params.parameters,
|
||||
meta_noise_squashing_params.compression_parameters.unwrap(),
|
||||
)
|
||||
};
|
||||
let gpu_index = 0;
|
||||
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
|
||||
let block_params: ShortintParameterSet = atomic_params.into();
|
||||
let cks = crate::integer::ClientKey::new(block_params);
|
||||
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
|
||||
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
|
||||
|
||||
let noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
|
||||
let compressed_noise_squashing_compression_key =
|
||||
cks.new_compressed_noise_squashing_key(&noise_squashing_private_key);
|
||||
let noise_squashing_key = compressed_noise_squashing_compression_key.decompress();
|
||||
let cuda_noise_squashing_key =
|
||||
compressed_noise_squashing_compression_key.decompress_to_cuda(&streams);
|
||||
let noise_squashing_compression_private_key =
|
||||
NoiseSquashingCompressionPrivateKey::new(noise_squashing_compression_params);
|
||||
let noise_squashing_compression_key = noise_squashing_private_key
|
||||
.new_noise_squashing_compression_key(&noise_squashing_compression_private_key);
|
||||
let cuda_noise_squashing_compression_key =
|
||||
CudaNoiseSquashingCompressionKey::from_noise_squashing_compression_key(
|
||||
&noise_squashing_compression_key,
|
||||
&streams,
|
||||
);
|
||||
|
||||
let noise_simulation_ksk =
|
||||
NoiseSimulationLweKeyswitchKey::new_from_atomic_pattern_parameters(atomic_params);
|
||||
let noise_simulation_bsk =
|
||||
NoiseSimulationLweFourierBsk::new_from_atomic_pattern_parameters(atomic_params);
|
||||
let noise_simulation_modulus_switch_config =
|
||||
NoiseSimulationModulusSwitchConfig::new_from_atomic_pattern_parameters(atomic_params);
|
||||
let noise_simulation_bsk128 =
|
||||
NoiseSimulationLweFourier128Bsk::new_from_parameters(atomic_params, noise_squashing_params);
|
||||
let noise_simulation_packing_key =
|
||||
NoiseSimulationLwePackingKeyswitchKey::new_from_noise_squashing_parameters(
|
||||
noise_squashing_params,
|
||||
noise_squashing_compression_params,
|
||||
);
|
||||
|
||||
assert!(noise_simulation_bsk.matches_actual_bsk_gpu(&cuda_sks.bootstrapping_key));
|
||||
|
||||
assert!(noise_simulation_bsk128
|
||||
.matches_actual_shortint_noise_squashing_key(&noise_squashing_key.key));
|
||||
assert!(noise_simulation_packing_key.matches_actual_pksk(
|
||||
noise_squashing_compression_key
|
||||
.key
|
||||
.packing_key_switching_key()
|
||||
));
|
||||
|
||||
let br_input_modulus_log = noise_squashing_key.key.br_input_modulus_log();
|
||||
|
||||
let max_scalar_mul = cuda_sks.max_noise_level.get();
|
||||
|
||||
let noise_simulation_accumulator = NoiseSimulationGlwe::new(
|
||||
noise_simulation_bsk128
|
||||
.output_glwe_size()
|
||||
.to_glwe_dimension(),
|
||||
noise_simulation_bsk128.output_polynomial_size(),
|
||||
Variance(0.0),
|
||||
noise_simulation_bsk128.modulus(),
|
||||
);
|
||||
|
||||
let (_before_packing_sim, after_packing_sim) = {
|
||||
let noise_simulation = NoiseSimulationLwe::encrypt(&cks.key, 0);
|
||||
dp_ks_any_ms_standard_pbs128_packing_ks(
|
||||
vec![noise_simulation; cuda_noise_squashing_compression_key.lwe_per_glwe.0],
|
||||
max_scalar_mul,
|
||||
&noise_simulation_ksk,
|
||||
noise_simulation_modulus_switch_config.as_ref(),
|
||||
&noise_simulation_bsk128,
|
||||
br_input_modulus_log,
|
||||
&noise_simulation_accumulator,
|
||||
&noise_simulation_packing_key,
|
||||
&mut vec![(); cuda_noise_squashing_compression_key.lwe_per_glwe.0],
|
||||
)
|
||||
};
|
||||
|
||||
let after_packing_sim = after_packing_sim.into_lwe();
|
||||
|
||||
// Check that the circuit is correct with respect to core implementation, i.e. does not crash on
|
||||
// dimension checks
|
||||
let (expected_lwe_dimension_out, expected_modulus_f64_out) = {
|
||||
let pksk = noise_squashing_compression_key
|
||||
.key
|
||||
.packing_key_switching_key();
|
||||
|
||||
let out_glwe_dim = pksk.output_key_glwe_dimension();
|
||||
let out_poly_size = pksk.output_key_polynomial_size();
|
||||
|
||||
(
|
||||
out_glwe_dim.to_equivalent_lwe_dimension(out_poly_size),
|
||||
pksk.ciphertext_modulus().raw_modulus_float(),
|
||||
)
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
after_packing_sim.lwe_dimension(),
|
||||
expected_lwe_dimension_out
|
||||
);
|
||||
assert_eq!(
|
||||
after_packing_sim.modulus().as_f64(),
|
||||
expected_modulus_f64_out
|
||||
);
|
||||
|
||||
let cleartext_modulus = atomic_params.message_modulus().0 * atomic_params.carry_modulus().0;
|
||||
let mut noise_samples_after_packing = vec![];
|
||||
|
||||
let sample_count_per_msg =
|
||||
1000usize.div_ceil(cuda_noise_squashing_compression_key.lwe_per_glwe.0);
|
||||
let chunk_size = 4;
|
||||
let vec_local_streams = (0..chunk_size)
|
||||
.map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
|
||||
.collect::<Vec<_>>();
|
||||
for _i in 0..cleartext_modulus {
|
||||
let current_noise_samples_after_packing: Vec<_> = (0..sample_count_per_msg)
|
||||
.collect::<Vec<_>>()
|
||||
.chunks(chunk_size)
|
||||
.flat_map(|chunk| {
|
||||
chunk
|
||||
.into_par_iter()
|
||||
.map(|i| {
|
||||
let local_stream = &vec_local_streams[*i % chunk_size];
|
||||
let (_before_packing, after_packing) =
|
||||
encrypt_dp_ks_standard_pbs128_packing_ks_noise_helper_gpu(
|
||||
atomic_params,
|
||||
noise_squashing_params,
|
||||
noise_squashing_compression_params,
|
||||
&cks,
|
||||
&cuda_sks,
|
||||
&noise_squashing_private_key,
|
||||
&noise_squashing_key,
|
||||
&cuda_noise_squashing_key,
|
||||
&noise_squashing_compression_private_key,
|
||||
&cuda_noise_squashing_compression_key,
|
||||
0,
|
||||
max_scalar_mul,
|
||||
br_input_modulus_log,
|
||||
local_stream,
|
||||
);
|
||||
after_packing
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect();
|
||||
|
||||
noise_samples_after_packing.extend(current_noise_samples_after_packing);
|
||||
}
|
||||
|
||||
let noise_samples_after_packing_flattened: Vec<_> = noise_samples_after_packing
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.map(|x| x.value)
|
||||
.collect();
|
||||
|
||||
let after_packing_is_ok = mean_and_variance_check(
|
||||
&noise_samples_after_packing_flattened,
|
||||
"after_packing",
|
||||
0.0,
|
||||
after_packing_sim.variance(),
|
||||
noise_squashing_compression_params.packing_ks_key_noise_distribution,
|
||||
after_packing_sim.lwe_dimension(),
|
||||
after_packing_sim.modulus().as_f64(),
|
||||
);
|
||||
|
||||
assert!(after_packing_is_ok);
|
||||
}
|
||||
|
||||
create_gpu_parameterized_test!(
|
||||
noise_check_encrypt_dp_ks_standard_pbs128_packing_ks_noise_gpu {
|
||||
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
|
||||
}
|
||||
);
|
||||
|
||||
create_gpu_parameterized_test!(sanity_check_encrypt_dp_ks_standard_pbs128_packing_ks_gpu {
|
||||
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
|
||||
});
|
||||
|
||||
create_gpu_parameterized_test!(sanity_check_encrypt_dp_ks_standard_pbs128_gpu {
|
||||
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
|
||||
});
|
||||
@@ -1,3 +1,5 @@
|
||||
pub mod br_dp_ks_ms;
|
||||
pub mod br_dp_packingks_ms;
|
||||
pub mod dp_ks_ms;
|
||||
pub mod dp_ks_pbs_128_packingks;
|
||||
pub mod utils;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use crate::core_crypto::commons::noise_formulas::noise_simulation::traits::{
|
||||
AllocateCenteredBinaryShiftedStandardModSwitchResult,
|
||||
AllocateDriftTechniqueStandardModSwitchResult, AllocateLweBootstrapResult,
|
||||
AllocateLweKeyswitchResult, AllocateStandardModSwitchResult,
|
||||
AllocateLweKeyswitchResult, AllocateLwePackingKeyswitchResult, AllocateStandardModSwitchResult,
|
||||
CenteredBinaryShiftedStandardModSwitch, DriftTechniqueStandardModSwitch,
|
||||
LweClassicFftBootstrap, LweKeyswitch, ScalarMul, StandardModSwitch,
|
||||
};
|
||||
@@ -13,6 +13,7 @@ use crate::core_crypto::gpu::cuda_modulus_switch_ciphertext;
|
||||
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
|
||||
use crate::core_crypto::gpu::lwe_bootstrap_key::CudaModulusSwitchNoiseReductionConfiguration;
|
||||
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
|
||||
use crate::core_crypto::gpu::lwe_packing_keyswitch_key::CudaLwePackingKeyswitchKey;
|
||||
use crate::core_crypto::gpu::vec::CudaVec;
|
||||
use crate::core_crypto::prelude::*;
|
||||
use crate::integer::gpu::ciphertext::info::CudaBlockInfo;
|
||||
@@ -25,7 +26,7 @@ use crate::integer::gpu::{
|
||||
cuda_centered_modulus_switch_64, unchecked_small_scalar_mul_integer_async, CudaStreams,
|
||||
};
|
||||
use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::NoiseSimulationModulusSwitchConfig;
|
||||
|
||||
use crate::shortint::server_key::tests::noise_distribution::utils::traits::LwePackingKeyswitch;
|
||||
/// Side resources for CUDA operations in noise simulation
|
||||
#[derive(Clone)]
|
||||
pub struct CudaSideResources {
|
||||
@@ -128,6 +129,19 @@ impl CudaDynLwe {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_ct_128_cpu(&self, streams: &CudaStreams) -> LweCiphertext<Vec<u128>> {
|
||||
match self {
|
||||
Self::U32(_) => panic!("Tried getting a u32 CudaLweCiphertextList as u128."),
|
||||
Self::U64(_) => panic!("Tried getting a u64 CudaLweCiphertextList as u128."),
|
||||
Self::U128(_cuda_lwe) => {
|
||||
let cpu_lwe_list = self.as_lwe_128().to_lwe_ciphertext_list(streams);
|
||||
LweCiphertext::from_container(
|
||||
cpu_lwe_list.clone().into_container(),
|
||||
cpu_lwe_list.ciphertext_modulus(),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn from_lwe_32(cuda_lwe: CudaLweCiphertextList<u32>) -> Self {
|
||||
Self::U32(cuda_lwe)
|
||||
}
|
||||
@@ -141,6 +155,19 @@ impl CudaDynLwe {
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a CudaGlweCiphertextList<u64> to a GlweCiphertext<Vec<u64>>
|
||||
pub fn cuda_glwe_list_to_glwe_ciphertext(
|
||||
cuda_glwe_list: &CudaGlweCiphertextList<u64>,
|
||||
streams: &CudaStreams,
|
||||
) -> GlweCiphertext<Vec<u64>> {
|
||||
let cpu_glwe_list = cuda_glwe_list.to_glwe_ciphertext_list(streams);
|
||||
GlweCiphertext::from_container(
|
||||
cpu_glwe_list.clone().into_container(),
|
||||
cpu_glwe_list.polynomial_size(),
|
||||
cpu_glwe_list.ciphertext_modulus(),
|
||||
)
|
||||
}
|
||||
|
||||
impl ScalarMul<u64> for CudaDynLwe {
|
||||
type Output = Self;
|
||||
type SideResources = CudaSideResources;
|
||||
@@ -313,13 +340,14 @@ impl StandardModSwitch<Self> for CudaDynLwe {
|
||||
panic!("U32 modulus switch not implemented for CudaDynLwe - only U64 is supported");
|
||||
}
|
||||
(Self::U64(input), Self::U64(output_cuda_lwe)) => {
|
||||
let internal_output = input.duplicate(&side_resources.streams);
|
||||
let mut internal_output = input.duplicate(&side_resources.streams);
|
||||
cuda_modulus_switch_ciphertext(
|
||||
&mut output_cuda_lwe.0.d_vec,
|
||||
&mut internal_output.0.d_vec,
|
||||
output_modulus_log.0 as u32,
|
||||
&side_resources.streams,
|
||||
);
|
||||
let mut cpu_lwe = internal_output.to_lwe_ciphertext_list(&side_resources.streams);
|
||||
|
||||
let shift_to_map_to_native = u64::BITS - output_modulus_log.0 as u32;
|
||||
for val in cpu_lwe.as_mut_view().into_container().iter_mut() {
|
||||
*val <<= shift_to_map_to_native;
|
||||
@@ -713,3 +741,193 @@ impl AllocateLweBootstrapResult for CudaGlweCiphertextList<u128> {
|
||||
CudaDynLwe::U128(cuda_lwe)
|
||||
}
|
||||
}
|
||||
|
||||
// Implement LweClassicFft128Bootstrap for CudaNoiseSquashingKey using 128-bit PBS CUDA function
|
||||
impl
|
||||
crate::core_crypto::commons::noise_formulas::noise_simulation::traits::LweClassicFft128Bootstrap<
|
||||
CudaDynLwe,
|
||||
CudaDynLwe,
|
||||
CudaGlweCiphertextList<u128>,
|
||||
> for crate::integer::gpu::noise_squashing::keys::CudaNoiseSquashingKey
|
||||
{
|
||||
type SideResources = CudaSideResources;
|
||||
|
||||
fn lwe_classic_fft_128_pbs(
|
||||
&self,
|
||||
input: &CudaDynLwe,
|
||||
output: &mut CudaDynLwe,
|
||||
accumulator: &CudaGlweCiphertextList<u128>,
|
||||
side_resources: &mut Self::SideResources,
|
||||
) {
|
||||
use crate::core_crypto::gpu::algorithms::lwe_programmable_bootstrapping::cuda_programmable_bootstrap_128_lwe_ciphertext_async;
|
||||
use crate::integer::gpu::server_key::CudaBootstrappingKey;
|
||||
|
||||
match (input, output) {
|
||||
(CudaDynLwe::U64(input_cuda_lwe), CudaDynLwe::U128(output_cuda_lwe)) => {
|
||||
// Get the bootstrap key from self - it's already u128 type
|
||||
let bsk = match &self.bootstrapping_key {
|
||||
CudaBootstrappingKey::Classic(d_bsk) => d_bsk,
|
||||
CudaBootstrappingKey::MultiBit(_) => {
|
||||
panic!("MultiBit bootstrapping keys are not supported for 128-bit PBS");
|
||||
}
|
||||
};
|
||||
|
||||
unsafe {
|
||||
cuda_programmable_bootstrap_128_lwe_ciphertext_async(
|
||||
input_cuda_lwe,
|
||||
output_cuda_lwe,
|
||||
accumulator,
|
||||
bsk,
|
||||
&side_resources.streams,
|
||||
);
|
||||
side_resources.streams.synchronize();
|
||||
}
|
||||
}
|
||||
_ => panic!("128-bit PBS expects U64 input and U128 output for CudaDynLwe"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AllocateLwePackingKeyswitchResult for CudaLwePackingKeyswitchKey<u64> {
|
||||
type Output = CudaGlweCiphertextList<u64>;
|
||||
type SideResources = CudaSideResources;
|
||||
|
||||
fn allocate_lwe_packing_keyswitch_result(
|
||||
&self,
|
||||
side_resources: &mut Self::SideResources,
|
||||
) -> Self::Output {
|
||||
let glwe_dimension = self.output_glwe_size().to_glwe_dimension();
|
||||
let polynomial_size = self.output_polynomial_size();
|
||||
let ciphertext_modulus = self.ciphertext_modulus();
|
||||
|
||||
CudaGlweCiphertextList::new(
|
||||
glwe_dimension,
|
||||
polynomial_size,
|
||||
GlweCiphertextCount(1),
|
||||
ciphertext_modulus,
|
||||
&side_resources.streams,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl LwePackingKeyswitch<[&CudaDynLwe], CudaGlweCiphertextList<u64>>
|
||||
for CudaLwePackingKeyswitchKey<u64>
|
||||
{
|
||||
type SideResources = CudaSideResources;
|
||||
|
||||
fn keyswitch_lwes_and_pack_in_glwe(
|
||||
&self,
|
||||
input: &[&CudaDynLwe],
|
||||
output: &mut CudaGlweCiphertextList<u64>,
|
||||
side_resources: &mut CudaSideResources,
|
||||
) {
|
||||
use crate::core_crypto::gpu::algorithms::lwe_packing_keyswitch::cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_64;
|
||||
let input_lwe_ciphertext_list = CudaLweCiphertextList::from_vec_cuda_lwe_ciphertexts_list(
|
||||
input.iter().map(|ciphertext| ciphertext.as_lwe_64()),
|
||||
&side_resources.streams,
|
||||
);
|
||||
|
||||
cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_64(
|
||||
self,
|
||||
&input_lwe_ciphertext_list,
|
||||
output,
|
||||
&side_resources.streams,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Implement StandardModSwitch traits for CudaGlweCiphertextList<u64>
|
||||
impl AllocateStandardModSwitchResult for CudaGlweCiphertextList<u64> {
|
||||
type Output = Self;
|
||||
type SideResources = CudaSideResources;
|
||||
|
||||
fn allocate_standard_mod_switch_result(
|
||||
&self,
|
||||
side_resources: &mut Self::SideResources,
|
||||
) -> Self::Output {
|
||||
Self::new(
|
||||
self.glwe_dimension(),
|
||||
self.polynomial_size(),
|
||||
self.glwe_ciphertext_count(),
|
||||
self.ciphertext_modulus(),
|
||||
&side_resources.streams,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl StandardModSwitch<Self> for CudaGlweCiphertextList<u64> {
|
||||
type SideResources = CudaSideResources;
|
||||
|
||||
fn standard_mod_switch(
|
||||
&self,
|
||||
storage_log_modulus: CiphertextModulusLog,
|
||||
output: &mut Self,
|
||||
side_resources: &mut CudaSideResources,
|
||||
) {
|
||||
let mut internal_output = self.duplicate(&side_resources.streams);
|
||||
|
||||
cuda_modulus_switch_ciphertext(
|
||||
&mut internal_output.0.d_vec,
|
||||
storage_log_modulus.0 as u32,
|
||||
&side_resources.streams,
|
||||
);
|
||||
side_resources.streams.synchronize();
|
||||
let mut cpu_glwe = internal_output.to_glwe_ciphertext_list(&side_resources.streams);
|
||||
|
||||
let shift_to_map_to_native = u64::BITS - storage_log_modulus.0 as u32;
|
||||
for val in cpu_glwe.as_mut_view().into_container().iter_mut() {
|
||||
*val <<= shift_to_map_to_native;
|
||||
}
|
||||
let d_after_ms = Self::from_glwe_ciphertext_list(&cpu_glwe, &side_resources.streams);
|
||||
|
||||
*output = d_after_ms;
|
||||
}
|
||||
}
|
||||
|
||||
impl AllocateLwePackingKeyswitchResult for CudaLwePackingKeyswitchKey<u128> {
|
||||
type Output = CudaGlweCiphertextList<u128>;
|
||||
type SideResources = CudaSideResources;
|
||||
|
||||
fn allocate_lwe_packing_keyswitch_result(
|
||||
&self,
|
||||
side_resources: &mut Self::SideResources,
|
||||
) -> Self::Output {
|
||||
let glwe_dimension = self.output_glwe_size().to_glwe_dimension();
|
||||
let polynomial_size = self.output_polynomial_size();
|
||||
let ciphertext_modulus = self.ciphertext_modulus();
|
||||
|
||||
CudaGlweCiphertextList::new(
|
||||
glwe_dimension,
|
||||
polynomial_size,
|
||||
GlweCiphertextCount(1),
|
||||
ciphertext_modulus,
|
||||
&side_resources.streams,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl LwePackingKeyswitch<[&CudaDynLwe], CudaGlweCiphertextList<u128>>
|
||||
for CudaLwePackingKeyswitchKey<u128>
|
||||
{
|
||||
type SideResources = CudaSideResources;
|
||||
|
||||
fn keyswitch_lwes_and_pack_in_glwe(
|
||||
&self,
|
||||
input: &[&CudaDynLwe],
|
||||
output: &mut CudaGlweCiphertextList<u128>,
|
||||
side_resources: &mut CudaSideResources,
|
||||
) {
|
||||
use crate::core_crypto::gpu::algorithms::lwe_packing_keyswitch::cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_128;
|
||||
let input_lwe_ciphertext_list = CudaLweCiphertextList::from_vec_cuda_lwe_ciphertexts_list(
|
||||
input.iter().map(|ciphertext| ciphertext.as_lwe_128()),
|
||||
&side_resources.streams,
|
||||
);
|
||||
|
||||
cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_128(
|
||||
self,
|
||||
&input_lwe_ciphertext_list,
|
||||
output,
|
||||
&side_resources.streams,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@ use crate::shortint::server_key::ServerKey;
|
||||
use rayon::prelude::*;
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn dp_ks_any_ms_standard_pbs128<
|
||||
pub fn dp_ks_any_ms_standard_pbs128<
|
||||
InputCt,
|
||||
ScalarMulResult,
|
||||
KsResult,
|
||||
@@ -111,7 +111,7 @@ where
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn dp_ks_any_ms_standard_pbs128_packing_ks<
|
||||
pub fn dp_ks_any_ms_standard_pbs128_packing_ks<
|
||||
InputCt,
|
||||
ScalarMulResult,
|
||||
KsResult,
|
||||
|
||||
Reference in New Issue
Block a user