From 1eb1ff9d89fac49d94191ef27c53fac08b9acdef Mon Sep 17 00:00:00 2001 From: Guillermo Oyarzun Date: Tue, 2 Dec 2025 18:40:06 +0100 Subject: [PATCH] feat(gpu): create noise and pfail tests cpk --- .../tfhe-cuda-backend/cuda/include/zk/zk.h | 3 +- .../cuda/include/zk/zk_enums.h | 7 + .../cuda/include/zk/zk_utilities.h | 34 +- backends/tfhe-cuda-backend/cuda/src/zk/zk.cu | 5 +- backends/tfhe-cuda-backend/cuda/src/zk/zk.cuh | 27 +- backends/tfhe-cuda-backend/src/bindings.rs | 5 + tfhe/src/high_level_api/compact_list.rs | 6 +- .../integer/gpu/ciphertext/compact_list.rs | 4 +- tfhe/src/integer/gpu/mod.rs | 11 +- .../tests_noise_distribution/cpk_ks_ms.rs | 818 ++++++++++++++++++ .../radix/tests_noise_distribution/mod.rs | 1 + .../utils/key_switching_test_utils.rs | 28 + .../tests_noise_distribution/utils/mod.rs | 1 + tfhe/src/integer/gpu/zk/mod.rs | 3 +- 14 files changed, 937 insertions(+), 16 deletions(-) create mode 100644 backends/tfhe-cuda-backend/cuda/include/zk/zk_enums.h create mode 100644 tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/cpk_ks_ms.rs create mode 100644 tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/utils/key_switching_test_utils.rs diff --git a/backends/tfhe-cuda-backend/cuda/include/zk/zk.h b/backends/tfhe-cuda-backend/cuda/include/zk/zk.h index 713938f87..066f91d9d 100644 --- a/backends/tfhe-cuda-backend/cuda/include/zk/zk.h +++ b/backends/tfhe-cuda-backend/cuda/include/zk/zk.h @@ -3,6 +3,7 @@ #include "../keyswitch/ks_enums.h" #include "../pbs/pbs_enums.h" +#include "zk_enums.h" #include extern "C" { @@ -16,7 +17,7 @@ uint64_t scratch_cuda_expand_without_verification_64( uint32_t grouping_factor, const uint32_t *num_lwes_per_compact_list, const bool *is_boolean_array, uint32_t num_compact_lists, uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type, - KS_TYPE casting_key_type, bool allocate_gpu_memory, + KS_TYPE casting_key_type, bool allocate_gpu_memory, EXPAND_KIND expand_kind, PBS_MS_REDUCTION_T noise_reduction_type); void cuda_expand_without_verification_64( diff --git a/backends/tfhe-cuda-backend/cuda/include/zk/zk_enums.h b/backends/tfhe-cuda-backend/cuda/include/zk/zk_enums.h new file mode 100644 index 000000000..7690307e1 --- /dev/null +++ b/backends/tfhe-cuda-backend/cuda/include/zk/zk_enums.h @@ -0,0 +1,7 @@ +#ifndef CUDA_ZK_ENUMS_H +#define CUDA_ZK_ENUMS_H +#include +// Additional to the two kinds of expand (no_casting and casting), we have a +// third that is used only in the noise tests +enum EXPAND_KIND { NO_CASTING = 0, CASTING = 1, SANITY_CHECK = 2 }; +#endif // CUDA_ZK_ENUMS_H \ No newline at end of file diff --git a/backends/tfhe-cuda-backend/cuda/include/zk/zk_utilities.h b/backends/tfhe-cuda-backend/cuda/include/zk/zk_utilities.h index fa9bbee23..262866dde 100644 --- a/backends/tfhe-cuda-backend/cuda/include/zk/zk_utilities.h +++ b/backends/tfhe-cuda-backend/cuda/include/zk/zk_utilities.h @@ -1,6 +1,5 @@ #ifndef ZK_UTILITIES_H #define ZK_UTILITIES_H - #include "../integer/integer_utilities.h" #include "integer/integer.cuh" #include @@ -103,6 +102,7 @@ template struct zk_expand_mem { uint32_t num_compact_lists; int_radix_lut *message_and_carry_extract_luts; + int_radix_lut *identity_lut; Torus *tmp_expanded_lwes; Torus *tmp_ksed_small_to_big_expanded_lwes; @@ -113,15 +113,17 @@ template struct zk_expand_mem { expand_job *d_expand_jobs; expand_job *h_expand_jobs; + EXPAND_KIND expand_kind; + zk_expand_mem(CudaStreams streams, int_radix_params computing_params, int_radix_params casting_params, KS_TYPE casting_key_type, const uint32_t *num_lwes_per_compact_list, const bool *is_boolean_array, uint32_t num_compact_lists, - bool allocate_gpu_memory, uint64_t &size_tracker) + bool allocate_gpu_memory, uint64_t &size_tracker, + EXPAND_KIND expand_kind_in) : computing_params(computing_params), casting_params(casting_params), num_compact_lists(num_compact_lists), - casting_key_type(casting_key_type) { - + casting_key_type(casting_key_type), expand_kind(expand_kind_in) { gpu_memory_allocated = allocate_gpu_memory; // We copy num_lwes_per_compact_list so we get protection against @@ -136,10 +138,27 @@ template struct zk_expand_mem { num_lwes += this->num_lwes_per_compact_list[i]; } - if (computing_params.carry_modulus != computing_params.message_modulus) { + if (computing_params.carry_modulus != computing_params.message_modulus && + expand_kind == EXPAND_KIND::CASTING) { PANIC("GPU backend requires carry_modulus equal to message_modulus") } + // We create the identity LUT only if we are doing a SANITY_CHECK + if (expand_kind == EXPAND_KIND::SANITY_CHECK) { + identity_lut = + new int_radix_lut(streams, computing_params, 1, 2 * num_lwes, + allocate_gpu_memory, size_tracker); + + auto identity_lut_f = [](Torus x) -> Torus { return x; }; + + generate_device_accumulator( + streams.stream(0), streams.gpu_index(0), identity_lut->get_lut(0, 0), + identity_lut->get_degree(0), identity_lut->get_max_degree(0), + casting_params.glwe_dimension, casting_params.polynomial_size, + casting_params.message_modulus, casting_params.carry_modulus, + identity_lut_f, gpu_memory_allocated); + } + auto message_extract_lut_f = [casting_params](Torus x) -> Torus { return x % casting_params.message_modulus; }; @@ -317,6 +336,11 @@ template struct zk_expand_mem { message_and_carry_extract_luts->release(streams); delete message_and_carry_extract_luts; + if (expand_kind == EXPAND_KIND::SANITY_CHECK) { + identity_lut->release(streams); + delete identity_lut; + } + cuda_drop_with_size_tracking_async(tmp_expanded_lwes, streams.stream(0), streams.gpu_index(0), gpu_memory_allocated); diff --git a/backends/tfhe-cuda-backend/cuda/src/zk/zk.cu b/backends/tfhe-cuda-backend/cuda/src/zk/zk.cu index 6846cba28..9bb20fbcb 100644 --- a/backends/tfhe-cuda-backend/cuda/src/zk/zk.cu +++ b/backends/tfhe-cuda-backend/cuda/src/zk/zk.cu @@ -10,7 +10,7 @@ uint64_t scratch_cuda_expand_without_verification_64( uint32_t grouping_factor, const uint32_t *num_lwes_per_compact_list, const bool *is_boolean_array, uint32_t num_compact_lists, uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type, - KS_TYPE casting_key_type, bool allocate_gpu_memory, + KS_TYPE casting_key_type, bool allocate_gpu_memory, EXPAND_KIND expand_kind, PBS_MS_REDUCTION_T noise_reduction_type) { // Since CUDA backend works with the concept of "big" and "small" key, instead @@ -37,7 +37,8 @@ uint64_t scratch_cuda_expand_without_verification_64( CudaStreams(streams), reinterpret_cast **>(mem_ptr), num_lwes_per_compact_list, is_boolean_array, num_compact_lists, - computing_params, casting_params, casting_key_type, allocate_gpu_memory); + computing_params, casting_params, casting_key_type, allocate_gpu_memory, + expand_kind); } void cuda_expand_without_verification_64( diff --git a/backends/tfhe-cuda-backend/cuda/src/zk/zk.cuh b/backends/tfhe-cuda-backend/cuda/src/zk/zk.cuh index 912387fd2..7697b9576 100644 --- a/backends/tfhe-cuda-backend/cuda/src/zk/zk.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/zk/zk.cuh @@ -12,6 +12,7 @@ #include "utils/helper.cuh" #include "utils/helper_multi_gpu.cuh" #include "utils/kernel_dimensions.cuh" +#include "zk/zk_enums.h" #include "zk/zk_utilities.h" #include @@ -54,15 +55,24 @@ __host__ void host_expand_without_verification( compact_lwe_lists.total_num_lwes * sizeof(expand_job), streams.stream(0), streams.gpu_index(0), true); + if (mem_ptr->expand_kind == EXPAND_KIND::NO_CASTING) { + host_lwe_expand(streams.stream(0), streams.gpu_index(0), + lwe_array_out, d_expand_jobs, num_lwes); + return; + } + host_lwe_expand(streams.stream(0), streams.gpu_index(0), expanded_lwes, d_expand_jobs, num_lwes); - auto ksks = casting_keys; auto lwe_array_input = expanded_lwes; + auto ksks = casting_keys; auto message_and_carry_extract_luts = mem_ptr->message_and_carry_extract_luts; auto lut = mem_ptr->message_and_carry_extract_luts; if (casting_key_type == SMALL_TO_BIG) { + if (mem_ptr->expand_kind == EXPAND_KIND::SANITY_CHECK) { + PANIC("SANITY_CHECK not supported for SMALL_TO_BIG casting"); + } // Keyswitch from small to big key if needed auto ksed_small_to_big_expanded_lwes = mem_ptr->tmp_ksed_small_to_big_expanded_lwes; @@ -95,6 +105,17 @@ __host__ void host_expand_without_verification( into_radix_ciphertext(output, lwe_array_out, 2 * num_lwes, lwe_dimension); auto input = new CudaRadixCiphertextFFI; into_radix_ciphertext(input, lwe_array_input, 2 * num_lwes, lwe_dimension); + + // This is a special case only for our noise sanity checks + // If we are doing a SANITY_CHECK expand, we just apply the identity LUT + // This replicates the CPU fallback behaviour of the casting expand + if (mem_ptr->expand_kind == EXPAND_KIND::SANITY_CHECK) { + integer_radix_apply_univariate_lookup_table( + streams, output, input, bsks, ksks, mem_ptr->identity_lut, + 2 * num_lwes); + return; + } + integer_radix_apply_univariate_lookup_table( streams, output, input, bsks, ksks, message_and_carry_extract_luts, 2 * num_lwes); @@ -106,13 +127,13 @@ __host__ uint64_t scratch_cuda_expand_without_verification( const uint32_t *num_lwes_per_compact_list, const bool *is_boolean_array, uint32_t num_compact_lists, int_radix_params computing_params, int_radix_params casting_params, KS_TYPE casting_key_type, - bool allocate_gpu_memory) { + bool allocate_gpu_memory, EXPAND_KIND expand_kind) { uint64_t size_tracker = 0; *mem_ptr = new zk_expand_mem( streams, computing_params, casting_params, casting_key_type, num_lwes_per_compact_list, is_boolean_array, num_compact_lists, - allocate_gpu_memory, size_tracker); + allocate_gpu_memory, size_tracker, expand_kind); return size_tracker; } diff --git a/backends/tfhe-cuda-backend/src/bindings.rs b/backends/tfhe-cuda-backend/src/bindings.rs index 86efa2367..fabde9851 100644 --- a/backends/tfhe-cuda-backend/src/bindings.rs +++ b/backends/tfhe-cuda-backend/src/bindings.rs @@ -2471,6 +2471,10 @@ unsafe extern "C" { pub const KS_TYPE_BIG_TO_SMALL: KS_TYPE = 0; pub const KS_TYPE_SMALL_TO_BIG: KS_TYPE = 1; pub type KS_TYPE = ffi::c_uint; +pub const EXPAND_KIND_NO_CASTING: EXPAND_KIND = 0; +pub const EXPAND_KIND_CASTING: EXPAND_KIND = 1; +pub const EXPAND_KIND_SANITY_CHECK: EXPAND_KIND = 2; +pub type EXPAND_KIND = ffi::c_uint; unsafe extern "C" { pub fn scratch_cuda_expand_without_verification_64( streams: CudaStreamsFFI, @@ -2496,6 +2500,7 @@ unsafe extern "C" { pbs_type: PBS_TYPE, casting_key_type: KS_TYPE, allocate_gpu_memory: bool, + expand_kind: EXPAND_KIND, noise_reduction_type: PBS_MS_REDUCTION_T, ) -> u64; } diff --git a/tfhe/src/high_level_api/compact_list.rs b/tfhe/src/high_level_api/compact_list.rs index f1180ea47..8968d4288 100644 --- a/tfhe/src/high_level_api/compact_list.rs +++ b/tfhe/src/high_level_api/compact_list.rs @@ -350,7 +350,8 @@ impl CompactCiphertextList { .unwrap(), dest_server_key: &cuda_key.key.key, }; - let expander = gpu_inner.expand(&ksk, streams)?; + let expander = + gpu_inner.expand(&ksk, crate::integer::gpu::ZKType::Casting, streams)?; Ok(CompactCiphertextListExpander { inner: InnerCompactCiphertextListExpander::Cuda(expander), @@ -390,7 +391,8 @@ impl CompactCiphertextList { dest_server_key: &cuda_key.key.key, }; let streams = &cuda_key.streams; - let expander = gpu_inner.expand(&ksk, streams)?; + let expander = + gpu_inner.expand(&ksk, crate::integer::gpu::ZKType::Casting, streams)?; Ok(CompactCiphertextListExpander { inner: InnerCompactCiphertextListExpander::Cuda(expander), diff --git a/tfhe/src/integer/gpu/ciphertext/compact_list.rs b/tfhe/src/integer/gpu/ciphertext/compact_list.rs index b5637a644..4fe9e5d82 100644 --- a/tfhe/src/integer/gpu/ciphertext/compact_list.rs +++ b/tfhe/src/integer/gpu/ciphertext/compact_list.rs @@ -21,7 +21,6 @@ use crate::GpuIndex; use itertools::Itertools; use serde::Deserializer; use tfhe_cuda_backend::cuda_bind::cuda_memcpy_async_to_gpu; - #[derive(Clone)] pub struct CudaCompactCiphertextListInfo { pub info: CudaBlockInfo, @@ -377,6 +376,7 @@ impl CudaFlattenedVecCompactCiphertextList { pub fn expand( &self, key: &CudaKeySwitchingKey, + zk_type: crate::integer::gpu::ZKType, streams: &CudaStreams, ) -> crate::Result { assert!( @@ -441,6 +441,7 @@ impl CudaFlattenedVecCompactCiphertextList { LweBskGroupingFactor(0), self.num_lwe_per_compact_list.as_slice(), self.is_boolean.as_slice(), + zk_type, d_bsk.ms_noise_reduction_configuration.as_ref(), ); } @@ -476,6 +477,7 @@ impl CudaFlattenedVecCompactCiphertextList { d_multibit_bsk.grouping_factor, self.num_lwe_per_compact_list.as_slice(), self.is_boolean.as_slice(), + zk_type, None, ); } diff --git a/tfhe/src/integer/gpu/mod.rs b/tfhe/src/integer/gpu/mod.rs index 81a9eee08..32444a38e 100644 --- a/tfhe/src/integer/gpu/mod.rs +++ b/tfhe/src/integer/gpu/mod.rs @@ -80,6 +80,12 @@ pub enum ComparisonType { MAX = 6, MIN = 7, } +#[repr(u32)] +pub enum ZKType { + NoCasting = 0, + Casting = 1, + SanityCheck = 2, +} fn resolve_noise_reduction_type( ms_noise_reduction_configuration: Option<&CudaModulusSwitchNoiseReductionConfiguration>, @@ -7597,6 +7603,7 @@ pub(crate) unsafe fn cuda_backend_expand( grouping_factor: LweBskGroupingFactor, num_lwes_per_compact_list: &[u32], is_boolean: &[bool], + zk_type: ZKType, ms_noise_reduction_configuration: Option<&CudaModulusSwitchNoiseReductionConfiguration>, ) { assert_eq!( @@ -7665,6 +7672,7 @@ pub(crate) unsafe fn cuda_backend_expand( pbs_type as u32, casting_key_type as u32, true, + zk_type as u32, noise_reduction_type as u32, ); cuda_expand_without_verification_64( @@ -10218,12 +10226,13 @@ pub unsafe fn unchecked_small_scalar_mul_integer_async( cuda_small_scalar_multiplication_integer_64_inplace( streams.ffi(), - &raw mut cuda_ffi_lwe_array, + &mut cuda_ffi_lwe_array, small_scalar, message_modulus.0 as u32, carry_modulus.0 as u32, ); } + #[allow(clippy::too_many_arguments)] /// # Safety /// diff --git a/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/cpk_ks_ms.rs b/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/cpk_ks_ms.rs new file mode 100644 index 000000000..65137b358 --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/cpk_ks_ms.rs @@ -0,0 +1,818 @@ +use crate::integer::gpu::ciphertext::compact_list::CudaFlattenedVecCompactCiphertextList; + +use crate::core_crypto::commons::parameters::CiphertextModulusLog; +use crate::shortint::client_key::atomic_pattern::AtomicPatternClientKey; +use crate::shortint::encoding::ShortintEncoding; +use crate::shortint::engine::ShortintEngine; +use crate::shortint::parameters::test_params::TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128; +use crate::shortint::parameters::{ + AtomicPatternParameters, CarryModulus, CompactCiphertextListExpansionKind, + CompactPublicKeyEncryptionParameters, MetaParameters, ShortintCompactCiphertextListCastingMode, + ShortintKeySwitchingParameters, +}; +use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::{ + DynLwe, NoiseSimulationLwe, NoiseSimulationLweKeyswitchKey, NoiseSimulationModulusSwitchConfig, +}; +use crate::shortint::server_key::tests::noise_distribution::utils::{ + mean_and_variance_check, normality_check, pfail_check, update_ap_params_for_pfail, + DecryptionAndNoiseResult, NoiseSample, PfailTestMeta, PfailTestResult, +}; +use crate::shortint::server_key::tests::noise_distribution::{ + should_run_short_pfail_tests_debug, should_use_single_key_debug, +}; + +use crate::shortint::server_key::tests::parameterized_test::create_parameterized_test; +use crate::shortint::PaddingBit; +use rayon::prelude::*; +use crate::integer::gpu::server_key::radix::CudaUnsignedRadixCiphertext; +use crate::integer::gpu::CudaServerKey; +use crate::integer::{ClientKey, CompressedServerKey}; +use crate::GpuIndex; +use crate::core_crypto::gpu::{CudaSideResources, CudaStreams}; +use crate::shortint::ShortintParameterSet; +use crate::integer::gpu::key_switching_key::CudaKeySwitchingKey; +use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList; +use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList; +use crate::shortint::server_key::tests::noise_distribution::cpk_ks_ms::cpk_ks_any_ms; +use crate::integer::gpu::server_key::radix::tests_noise_distribution::utils::noise_simulation::CudaDynLwe; +use crate::core_crypto::commons::noise_formulas::noise_simulation::traits::lwe_programmable_bootstrap::LweClassicFftBootstrap; +use crate::core_crypto::commons::noise_formulas::noise_simulation::traits::lwe_programmable_bootstrap::AllocateLweBootstrapResult; +use crate::core_crypto::commons::noise_formulas::noise_simulation::traits::{ + AllocateLweKeyswitchResult, LweKeyswitch, +}; +use crate::core_crypto::gpu::algorithms::lwe_keyswitch::cuda_keyswitch_lwe_ciphertext; +use crate::core_crypto::gpu::vec::CudaVec; +use crate::core_crypto::prelude::LweCiphertextCount; +use crate::shortint::key_switching_key::CudaKeySwitchingKeyMaterial; +use crate::integer::key_switching_key::KeySwitchingKey; +use crate::integer::{CompactPublicKey, CompactPrivateKey}; +use crate::core_crypto::prelude::LweCiphertext; +use crate::integer::ciphertext::DataKind; +use std::num::NonZeroUsize; +use crate::integer::gpu::server_key::radix::tests_noise_distribution::utils::key_switching_test_utils::new_key_switching_key_for_pfail_test; + +#[allow(clippy::too_many_arguments)] +#[allow(clippy::type_complexity)] +fn cpk_ks_any_ms_inner_helper_gpu( + params: AtomicPatternParameters, + cpk_params: CompactPublicKeyEncryptionParameters, + ksk_ds_params: ShortintKeySwitchingParameters, + single_cpk_private_key: &CompactPrivateKey>, + single_cpk: &CompactPublicKey, + single_cuda_ksk: &CudaKeySwitchingKey<'_>, + single_cks: &ClientKey, + single_cuda_sks: &CudaServerKey, + msg: u64, + br_input_modulus_log: CiphertextModulusLog, + streams: &CudaStreams, +) -> ( + DecryptionAndNoiseResult, + DecryptionAndNoiseResult, + DecryptionAndNoiseResult, + DecryptionAndNoiseResult, +) { + let mut engine = ShortintEngine::new(); + let thread_cpk_private_key; + let thread_cpk; + let thread_cuda_ksk; + let thread_cks; + let thread_sks; + let thread_cuda_sks; + let thread_cuda_ksk_material; + let (cpk_private_key, cpk, cuda_ksk, cks, cuda_sks) = if should_use_single_key_debug() { + ( + single_cpk_private_key, + single_cpk, + single_cuda_ksk, + single_cks, + single_cuda_sks, + ) + } else { + thread_cpk_private_key = CompactPrivateKey::new(cpk_params); + thread_cpk = CompactPublicKey::new(&thread_cpk_private_key); + + let block_params: ShortintParameterSet = params.into(); + thread_cks = crate::integer::ClientKey::new(block_params); + let compressed_server_key = + CompressedServerKey::new_radix_compressed_server_key(&thread_cks); + thread_sks = compressed_server_key.decompress(); + thread_cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams); + let ksk = new_key_switching_key_for_pfail_test( + (&thread_cpk_private_key, None), + (&thread_cks, &thread_sks), + ksk_ds_params, + ); + thread_cuda_ksk_material = + CudaKeySwitchingKeyMaterial::from_key_switching_key(&ksk, &streams); + thread_cuda_ksk = CudaKeySwitchingKey::from_cuda_key_switching_key_material( + &thread_cuda_ksk_material, + &thread_cuda_sks, + ); + + ( + &thread_cpk_private_key, + &thread_cpk, + &thread_cuda_ksk, + &thread_cks, + &thread_cuda_sks, + ) + }; + + //let br_input_modulus_log = sks.br_input_modulus_log(); + let modulus_switch_config = cuda_sks.noise_simulation_modulus_switch_config(); + let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo { + degree: crate::shortint::ciphertext::Degree::new(1), + message_modulus: params.message_modulus(), + carry_modulus: params.carry_modulus(), + atomic_pattern: crate::shortint::AtomicPatternKind::Standard( + crate::shortint::PBSOrder::KeyswitchBootstrap, + ), + noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL, + }; + let mut cuda_side_resources = CudaSideResources::new(streams, cuda_block_info); + let ct = { + let compact_list = cpk.key.encrypt_iter_with_modulus_with_engine( + core::iter::once(msg), + cpk.key.parameters.message_modulus.0, + &mut engine, + ); + + let num_blocks = 1usize; + + let data_info = vec![DataKind::Unsigned(NonZeroUsize::new(num_blocks).unwrap())]; + let cuda_casting_compact_list = + CudaFlattenedVecCompactCiphertextList::from_vec_shortint_compact_ciphertext_list( + vec![compact_list.clone()], + data_info, + &cuda_side_resources.streams, + ); + let cuda_compact_list_expander = cuda_casting_compact_list + .expand( + &cuda_ksk, + crate::integer::gpu::ZKType::NoCasting, + &cuda_side_resources.streams, + ) + .unwrap(); + + let cuda_expanded_ct: CudaUnsignedRadixCiphertext = cuda_compact_list_expander + .get(0usize, &cuda_side_resources.streams) + .unwrap() + .unwrap(); + + CudaDynLwe::U64(cuda_expanded_ct.ciphertext.d_blocks) + }; + + let (input_gpu, after_ks_ds_gpu, after_drift_gpu, after_ms_gpu) = cpk_ks_any_ms( + ct, + cuda_ksk, + modulus_switch_config, + br_input_modulus_log, + &mut cuda_side_resources, + ); + let input_list = input_gpu + .as_lwe_64() + .to_lwe_ciphertext_list(&cuda_side_resources.streams); + let input_ct = LweCiphertext::from_container( + input_list.clone().into_container(), + input_list.ciphertext_modulus(), + ); + let input = DynLwe::U64(input_ct); + + let after_ks_ds_list = after_ks_ds_gpu + .as_lwe_64() + .to_lwe_ciphertext_list(&cuda_side_resources.streams); + let after_ks_ds_ct = LweCiphertext::from_container( + after_ks_ds_list.clone().into_container(), + after_ks_ds_list.ciphertext_modulus(), + ); + let after_ks_ds = DynLwe::U64(after_ks_ds_ct); + + let before_ms_gpu: &CudaDynLwe = after_drift_gpu.as_ref().unwrap_or(&after_ks_ds_gpu); + let before_ms_list = before_ms_gpu + .as_lwe_64() + .to_lwe_ciphertext_list(&cuda_side_resources.streams); + let before_ms_ct = LweCiphertext::from_container( + before_ms_list.clone().into_container(), + before_ms_list.ciphertext_modulus(), + ); + let before_ms = DynLwe::U64(before_ms_ct); + let after_ms_list = after_ms_gpu + .as_lwe_64() + .to_lwe_ciphertext_list(&cuda_side_resources.streams); + let after_ms_ct = LweCiphertext::from_container( + after_ms_list.clone().into_container(), + after_ms_list.ciphertext_modulus(), + ); + let after_ms = DynLwe::U64(after_ms_ct); + match &cks.key.atomic_pattern { + AtomicPatternClientKey::Standard(standard_atomic_pattern_client_key) => { + let params = standard_atomic_pattern_client_key.parameters; + let encoding = ShortintEncoding { + ciphertext_modulus: params.ciphertext_modulus(), + message_modulus: params.message_modulus(), + carry_modulus: params.carry_modulus(), + padding_bit: PaddingBit::Yes, + }; + + let cpk_lwe_secret_key = cpk_private_key.key.key(); + + let small_compute_lwe_secret_key = + standard_atomic_pattern_client_key.small_lwe_secret_key(); + ( + DecryptionAndNoiseResult::new_from_lwe( + &input.as_lwe_64(), + &cpk_lwe_secret_key, + msg, + &encoding, + ), + DecryptionAndNoiseResult::new_from_lwe( + &after_ks_ds.as_lwe_64(), + &small_compute_lwe_secret_key, + msg, + &encoding, + ), + DecryptionAndNoiseResult::new_from_lwe( + &before_ms.as_lwe_64(), + &small_compute_lwe_secret_key, + msg, + &encoding, + ), + DecryptionAndNoiseResult::new_from_lwe( + &after_ms.as_lwe_64(), + &small_compute_lwe_secret_key, + msg, + &encoding, + ), + ) + } + AtomicPatternClientKey::KeySwitch32(_ks32_atomic_pattern_client_key) => { + panic!("KeySwitch32 atomic pattern is not supported on GPU yet"); + } + } +} + +#[allow(clippy::too_many_arguments)] +#[allow(clippy::type_complexity)] +fn cpk_ks_any_ms_noise_helper_gpu( + params: AtomicPatternParameters, + cpk_params: CompactPublicKeyEncryptionParameters, + ksk_ds_params: ShortintKeySwitchingParameters, + single_cpk_private_key: &CompactPrivateKey>, + single_cpk: &CompactPublicKey, + single_cuda_ksk_ds: &CudaKeySwitchingKey<'_>, + single_cks: &ClientKey, + single_cuda_sks: &CudaServerKey, + msg: u64, + br_input_modulus_log: CiphertextModulusLog, + streams: &CudaStreams, +) -> (NoiseSample, NoiseSample, NoiseSample, NoiseSample) { + let (input, after_ks_ds, before_ms, after_ms) = cpk_ks_any_ms_inner_helper_gpu( + params, + cpk_params, + ksk_ds_params, + single_cpk_private_key, + single_cpk, + single_cuda_ksk_ds, + single_cks, + single_cuda_sks, + msg, + br_input_modulus_log, + streams, + ); + + ( + input + .get_noise_if_decryption_was_correct() + .expect("Decryption Failed"), + after_ks_ds + .get_noise_if_decryption_was_correct() + .expect("Decryption Failed"), + before_ms + .get_noise_if_decryption_was_correct() + .expect("Decryption Failed"), + after_ms + .get_noise_if_decryption_was_correct() + .expect("Decryption Failed"), + ) +} + +#[allow(clippy::too_many_arguments)] +#[allow(clippy::type_complexity)] +fn cpk_ks_any_ms_pfail_helper_gpu( + params: AtomicPatternParameters, + cpk_params: CompactPublicKeyEncryptionParameters, + ksk_ds_params: ShortintKeySwitchingParameters, + single_cpk_private_key: &CompactPrivateKey>, + single_cpk: &CompactPublicKey, + single_cuda_ksk_ds: &CudaKeySwitchingKey<'_>, + single_cks: &ClientKey, + single_cuda_sks: &CudaServerKey, + msg: u64, + br_input_modulus_log: CiphertextModulusLog, + streams: &CudaStreams, +) -> DecryptionAndNoiseResult { + let (_input, _after_ks_ds, _before_ms, after_ms) = cpk_ks_any_ms_inner_helper_gpu( + params, + cpk_params, + ksk_ds_params, + single_cpk_private_key, + single_cpk, + single_cuda_ksk_ds, + single_cks, + single_cuda_sks, + msg, + br_input_modulus_log, + streams, + ); + + after_ms +} + +fn noise_check_encrypt_cpk_ks_ms_noise_gpu(meta_params: MetaParameters) { + let (params, cpk_params, ksk_ds_params) = { + let compute_params = meta_params.compute_parameters; + let dedicated_cpk_params = meta_params.dedicated_compact_public_key_parameters.unwrap(); + // To avoid the expand logic of shortint which would force a keyswitch + LUT eval after + // expand + let cpk_params = { + let mut cpk_params = dedicated_cpk_params.pke_params; + cpk_params.expansion_kind = CompactCiphertextListExpansionKind::NoCasting( + compute_params.encryption_key_choice().into_pbs_order(), + ); + cpk_params + }; + + (compute_params, cpk_params, dedicated_cpk_params.ksk_params) + }; + let gpu_index = 0; + let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)); + let cpk_private_key = CompactPrivateKey::new(cpk_params); + let cpk = CompactPublicKey::new(&cpk_private_key); + + let block_params: ShortintParameterSet = params.into(); + let cks = crate::integer::ClientKey::new(block_params); + let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks); + let sks = compressed_server_key.decompress(); + let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams); + let ksk = KeySwitchingKey::new((&cpk_private_key, None), (&cks, &sks), ksk_ds_params); + let cuda_ksk_material = CudaKeySwitchingKeyMaterial::from_key_switching_key(&ksk, &streams); + let cuda_ksk = + CudaKeySwitchingKey::from_cuda_key_switching_key_material(&cuda_ksk_material, &cuda_sks); + + let noise_simulation_ksk = + NoiseSimulationLweKeyswitchKey::new_from_atomic_pattern_parameters(params); + let noise_simulation_ksk_ds = + NoiseSimulationLweKeyswitchKey::new_from_cpk_params(cpk_params, ksk_ds_params, params); + let noise_simulation_modulus_switch_config = + NoiseSimulationModulusSwitchConfig::new_from_atomic_pattern_parameters(params); + + let modulus_switch_config = sks.key.noise_simulation_modulus_switch_config(); + let cuda_modulus_switch_config = cuda_sks.noise_simulation_modulus_switch_config(); + let compute_br_input_modulus_log = sks.key.br_input_modulus_log(); + let expected_average_after_ms = + modulus_switch_config.expected_average_after_ms(params.polynomial_size()); + + assert!(noise_simulation_ksk.matches_actual_shortint_server_key(&sks.key)); + assert!(noise_simulation_ksk_ds.matches_actual_shortint_keyswitching_key(&ksk.key.as_view())); + assert!(noise_simulation_modulus_switch_config + .matches_shortint_server_key_modulus_switch_config(modulus_switch_config)); + + let (_input_sim, _after_ks_ds_sim, _after_drift_sim, after_ms_sim) = { + let noise_simulation_input = NoiseSimulationLwe::encrypt_with_cpk(&cpk.key); + cpk_ks_any_ms( + noise_simulation_input, + &noise_simulation_ksk_ds, + noise_simulation_modulus_switch_config.as_ref(), + compute_br_input_modulus_log, + &mut (), + ) + }; + + let sample_input = { + let compact_list = cpk.key.encrypt_slice(&[0]); + let mut expanded = compact_list + .expand(ShortintCompactCiphertextListCastingMode::NoCasting) + .unwrap(); + assert_eq!(expanded.len(), 1); + + DynLwe::U64(expanded.pop().unwrap().ct) + }; + let d_ct_input = + CudaLweCiphertextList::from_lwe_ciphertext(&sample_input.as_lwe_64(), &streams); + let gpu_sample_input = CudaDynLwe::U64(d_ct_input); + + let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo { + degree: crate::shortint::ciphertext::Degree::new(1), + message_modulus: params.message_modulus(), + carry_modulus: params.carry_modulus(), + atomic_pattern: crate::shortint::AtomicPatternKind::Standard( + crate::shortint::PBSOrder::KeyswitchBootstrap, + ), + noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL, + }; + let mut cuda_side_resources = CudaSideResources::new(&streams, cuda_block_info); + // Check that the circuit is correct with respect to core implementation, i.e. does not crash on + // dimension checks + let (expected_lwe_dimension_out, expected_modulus_f64_out) = { + let (_input, _after_ks_ds, _before_ms, after_ms) = cpk_ks_any_ms( + gpu_sample_input, + &cuda_ksk, + cuda_modulus_switch_config, + compute_br_input_modulus_log, + &mut cuda_side_resources, + ); + + (after_ms.lwe_dimension(), after_ms.raw_modulus_float()) + }; + + assert_eq!(after_ms_sim.lwe_dimension(), expected_lwe_dimension_out); + assert_eq!(after_ms_sim.modulus().as_f64(), expected_modulus_f64_out); + + let cleartext_modulus = params.message_modulus().0 * params.carry_modulus().0; + let mut noise_samples_before_ms = vec![]; + let mut noise_samples_after_ms = vec![]; + + let sample_count_per_msg = 1000usize; + let chunk_size = 8; + let vec_local_streams = (0..chunk_size) + .map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index))) + .collect::>(); + + for _ in 0..cleartext_modulus { + let (current_noise_sample_before_ms, current_noise_samples_after_ms): (Vec<_>, Vec<_>) = (0 + ..sample_count_per_msg) + .collect::>() + .chunks(chunk_size) + .flat_map(|chunk| { + chunk + .iter() + .collect::>() + .into_par_iter() + .map(|i| { + let local_stream = &vec_local_streams[*i % chunk_size]; + let (_input, _after_ks_ds, before_ms, after_ms) = + cpk_ks_any_ms_noise_helper_gpu( + params, + cpk_params, + ksk_ds_params, + &cpk_private_key, + &cpk, + &cuda_ksk, + &cks, + &cuda_sks, + 0, + compute_br_input_modulus_log, + local_stream, + ); + (before_ms.value, after_ms.value) + }) + .collect::>() + }) + .unzip(); + + noise_samples_before_ms.extend(current_noise_sample_before_ms); + noise_samples_after_ms.extend(current_noise_samples_after_ms); + } + + let before_ms_normality = normality_check(&noise_samples_before_ms, "before ms", 0.01); + + let after_ms_is_ok = mean_and_variance_check( + &noise_samples_after_ms, + "after_ms", + expected_average_after_ms, + after_ms_sim.variance(), + params.lwe_noise_distribution(), + after_ms_sim.lwe_dimension(), + after_ms_sim.modulus().as_f64(), + ); + + assert!(before_ms_normality.null_hypothesis_is_valid && after_ms_is_ok); +} + +create_parameterized_test!(noise_check_encrypt_cpk_ks_ms_noise_gpu { + TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128, +}); + +fn noise_check_encrypt_cpk_ks_ms_pfail_gpu(meta_params: MetaParameters) { + let (params, cpk_params, ksk_ds_params) = { + let compute_params = meta_params.compute_parameters; + let dedicated_cpk_params = meta_params.dedicated_compact_public_key_parameters.unwrap(); + // To avoid the expand logic of shortint which would force a keyswitch + LUT eval after + // expand + let cpk_params = { + let mut cpk_params = dedicated_cpk_params.pke_params; + cpk_params.expansion_kind = CompactCiphertextListExpansionKind::NoCasting( + compute_params.encryption_key_choice().into_pbs_order(), + ); + cpk_params + }; + + (compute_params, cpk_params, dedicated_cpk_params.ksk_params) + }; + + let (pfail_test_meta, params) = { + let mut ap_params = params; + + let original_message_modulus = ap_params.message_modulus(); + let original_carry_modulus = ap_params.carry_modulus(); + + // For now only allow 2_2 parameters, and see later for heuristics to use + assert_eq!(original_message_modulus.0, 4); + assert_eq!(original_carry_modulus.0, 4); + + // Update parameters to fail more frequently by inflating the carry modulus, allows to keep + // the max multiplication without risks of message overflow + let (original_pfail_and_precision, new_expected_pfail_and_precision) = + update_ap_params_for_pfail( + &mut ap_params, + original_message_modulus, + CarryModulus(1 << 5), + ); + + let pfail_test_meta = if should_run_short_pfail_tests_debug() { + let expected_fails = 200; + PfailTestMeta::new_with_desired_expected_fails( + original_pfail_and_precision, + new_expected_pfail_and_precision, + expected_fails, + ) + } else { + let total_runs = 1_000_000; + PfailTestMeta::new_with_total_runs( + original_pfail_and_precision, + new_expected_pfail_and_precision, + total_runs, + ) + }; + + (pfail_test_meta, ap_params) + }; + let gpu_index = 0; + let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)); + let cpk_private_key = CompactPrivateKey::new(cpk_params); + let cpk = CompactPublicKey::new(&cpk_private_key); + + let block_params: ShortintParameterSet = params.into(); + let cks = crate::integer::ClientKey::new(block_params); + let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks); + let sks = compressed_server_key.decompress(); + let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams); + let ksk = + new_key_switching_key_for_pfail_test((&cpk_private_key, None), (&cks, &sks), ksk_ds_params); + let cuda_ksk_material = CudaKeySwitchingKeyMaterial::from_key_switching_key(&ksk, &streams); + let cuda_ksk = + CudaKeySwitchingKey::from_cuda_key_switching_key_material(&cuda_ksk_material, &cuda_sks); + + let total_runs_for_expected_fails = pfail_test_meta.total_runs_for_expected_fails(); + let chunk_size = 8; + let vec_local_streams = (0..chunk_size) + .map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index))) + .collect::>(); + let measured_fails: f64 = (0..total_runs_for_expected_fails) + .collect::>() + .chunks(chunk_size) + .flat_map(|chunk| { + chunk + .iter() + .collect::>() + .into_par_iter() + .map(|i| { + let local_stream = &vec_local_streams[*i as usize % chunk_size]; + let after_ms_decryption_result = cpk_ks_any_ms_pfail_helper_gpu( + params, + cpk_params, + ksk_ds_params, + &cpk_private_key, + &cpk, + &cuda_ksk, + &cks, + &cuda_sks, + 0, + sks.key.br_input_modulus_log(), + local_stream, + ); + after_ms_decryption_result.failure_as_f64() + }) + .collect::>() + }) + .sum(); + + let test_result = PfailTestResult { measured_fails }; + + pfail_check(&pfail_test_meta, test_result); +} + +create_parameterized_test!(noise_check_encrypt_cpk_ks_ms_pfail_gpu { + TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128, +}); + +fn sanity_check_encrypt_cpk_ks_ms_pbs_gpu(meta_params: MetaParameters) { + let (params, cpk_params, ksk_ds_params) = { + let compute_params = meta_params.compute_parameters; + let dedicated_cpk_params = meta_params.dedicated_compact_public_key_parameters.unwrap(); + // To avoid the expand logic of shortint which would force a keyswitch + LUT eval after + // expand + let (cpk_params, orig_cast_mode) = { + let mut cpk_params = dedicated_cpk_params.pke_params; + let orig_cast_mode = cpk_params.expansion_kind; + cpk_params.expansion_kind = CompactCiphertextListExpansionKind::NoCasting( + compute_params.encryption_key_choice().into_pbs_order(), + ); + (cpk_params, orig_cast_mode) + }; + + assert!(matches!( + orig_cast_mode, + CompactCiphertextListExpansionKind::RequiresCasting + )); + + (compute_params, cpk_params, dedicated_cpk_params.ksk_params) + }; + let gpu_index = 0; + let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)); + let cpk_private_key = CompactPrivateKey::new(cpk_params); + let cpk = CompactPublicKey::new(&cpk_private_key); + + let block_params: ShortintParameterSet = params.into(); + let cks = crate::integer::ClientKey::new(block_params); + let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks); + let sks = compressed_server_key.decompress(); + let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams); + let ksk = KeySwitchingKey::new((&cpk_private_key, None), (&cks, &sks), ksk_ds_params); + let cuda_ksk_material = CudaKeySwitchingKeyMaterial::from_key_switching_key(&ksk, &streams); + let cuda_ksk = + CudaKeySwitchingKey::from_cuda_key_switching_key_material(&cuda_ksk_material, &cuda_sks); + let modulus_switch_config = cuda_sks.noise_simulation_modulus_switch_config(); + let compute_br_input_modulus_log = sks.key.br_input_modulus_log(); + + let id_lut = cuda_sks.generate_lookup_table(|x| x); + let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, &streams); + + let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo { + degree: crate::shortint::ciphertext::Degree::new(1), + message_modulus: params.message_modulus(), + carry_modulus: params.carry_modulus(), + atomic_pattern: crate::shortint::AtomicPatternKind::Standard( + crate::shortint::PBSOrder::KeyswitchBootstrap, + ), + noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL, + }; + let mut cuda_side_resources = CudaSideResources::new(&streams, cuda_block_info); + + for _ in 0..10 { + let (gpu_sample_input, shortint_res) = { + let mut engine = ShortintEngine::new(); + let no_casting_compact_list = cpk.key.encrypt_iter_with_modulus_with_engine( + core::iter::once(0), + cpk.key.parameters.message_modulus.0, + &mut engine, + ); + + let num_blocks = 1usize; + let data_info = vec![DataKind::Unsigned(NonZeroUsize::new(num_blocks).unwrap())]; + //This is for the ap + let cuda_no_casting_compact_list = + CudaFlattenedVecCompactCiphertextList::from_vec_shortint_compact_ciphertext_list( + vec![no_casting_compact_list.clone()], + data_info, + &cuda_side_resources.streams, + ); + + //This is for the verification + let cuda_casting_compact_list = + cuda_no_casting_compact_list.duplicate(&cuda_side_resources.streams); + + let cuda_no_casting_compact_list_expander = cuda_no_casting_compact_list + .expand( + &cuda_ksk, + crate::integer::gpu::ZKType::NoCasting, + &cuda_side_resources.streams, + ) + .unwrap(); + + let cuda_ap_input_expanded: CudaUnsignedRadixCiphertext = + cuda_no_casting_compact_list_expander + .get(0usize, &cuda_side_resources.streams) + .unwrap() + .unwrap(); + + let cuda_casting_compact_list_expander = cuda_casting_compact_list + .expand( + &cuda_ksk, + crate::integer::gpu::ZKType::SanityCheck, + &cuda_side_resources.streams, + ) + .unwrap(); + + let cuda_int_res: CudaUnsignedRadixCiphertext = cuda_casting_compact_list_expander + .get(0usize, &cuda_side_resources.streams) + .unwrap() + .unwrap(); + + ( + CudaDynLwe::U64( + cuda_ap_input_expanded + .ciphertext + .d_blocks + .duplicate(&cuda_side_resources.streams), + ), + cuda_int_res + .ciphertext + .d_blocks + .to_lwe_ciphertext_list(&cuda_side_resources.streams), + ) + }; + + let (_input, _after_ks, _before_ms, after_ms) = cpk_ks_any_ms( + gpu_sample_input, + &cuda_ksk, + modulus_switch_config, + compute_br_input_modulus_log, + &mut cuda_side_resources, + ); + + // Complete the AP by computing the PBS to match shortint + let mut pbs_result = d_accumulator.allocate_lwe_bootstrap_result(&mut cuda_side_resources); + cuda_sks.lwe_classic_fft_pbs( + &after_ms, + &mut pbs_result, + &d_accumulator, + &mut cuda_side_resources, + ); + + let pbs_result_list = pbs_result + .as_lwe_64() + .to_lwe_ciphertext_list(&cuda_side_resources.streams); + + assert_eq!(pbs_result_list, shortint_res); + } +} + +// Trait implementations for CudaKeySwitchingKey to enable noise distribution tests +impl AllocateLweKeyswitchResult for CudaKeySwitchingKey<'_> { + type Output = CudaDynLwe; + type SideResources = CudaSideResources; + + fn allocate_lwe_keyswitch_result( + &self, + side_resources: &mut Self::SideResources, + ) -> Self::Output { + let output_lwe_dimension = self + .key_switching_key_material + .lwe_keyswitch_key + .output_key_lwe_size() + .to_lwe_dimension(); + let lwe_ciphertext_count = LweCiphertextCount(1); + let ciphertext_modulus = self.dest_server_key.ciphertext_modulus; + + let cuda_lwe = CudaLweCiphertextList::new( + output_lwe_dimension, + lwe_ciphertext_count, + ciphertext_modulus, + &side_resources.streams, + ); + CudaDynLwe::U64(cuda_lwe) + } +} + +impl LweKeyswitch for CudaKeySwitchingKey<'_> { + type SideResources = CudaSideResources; + + fn lwe_keyswitch( + &self, + input: &CudaDynLwe, + output: &mut CudaDynLwe, + side_resources: &mut Self::SideResources, + ) { + match (input, output) { + (CudaDynLwe::U64(input_cuda_lwe), CudaDynLwe::U64(output_cuda_lwe)) => { + let d_input_indexes = CudaVec::::new(1, &side_resources.streams, 0); + let d_output_indexes = CudaVec::::new(1, &side_resources.streams, 0); + + cuda_keyswitch_lwe_ciphertext( + &self.key_switching_key_material.lwe_keyswitch_key, + input_cuda_lwe, + output_cuda_lwe, + &d_input_indexes, + &d_output_indexes, + false, + &side_resources.streams, + false, + ); + } + (CudaDynLwe::U32(_), CudaDynLwe::U32(_)) => { + panic!( + "U32 keyswitch not implemented for CudaKeySwitchingKey - only U64 is supported" + ); + } + (CudaDynLwe::U128(_), CudaDynLwe::U128(_)) => { + panic!("U128 keyswitch not implemented for CudaKeySwitchingKey - only U64 is supported"); + } + _ => panic!("Inconsistent input/output types for CudaDynLwe keyswitch"), + } + } +} + +create_parameterized_test!(sanity_check_encrypt_cpk_ks_ms_pbs_gpu { + TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128, +}); diff --git a/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/mod.rs b/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/mod.rs index 6ae93d65e..a9c57038a 100644 --- a/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/mod.rs +++ b/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/mod.rs @@ -1,5 +1,6 @@ pub mod br_dp_ks_ms; pub mod br_dp_packingks_ms; +pub mod cpk_ks_ms; pub mod dp_ks_ms; pub mod dp_ks_pbs_128_packingks; pub mod utils; diff --git a/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/utils/key_switching_test_utils.rs b/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/utils/key_switching_test_utils.rs new file mode 100644 index 000000000..a31134f29 --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/utils/key_switching_test_utils.rs @@ -0,0 +1,28 @@ +use crate::integer::client_key::secret_encryption_key::SecretEncryptionKeyView; +use crate::integer::key_switching_key::KeySwitchingKey; +use crate::integer::{ClientKey, ServerKey}; +use crate::shortint::parameters::ShortintKeySwitchingParameters; + +/// Test-only implementation of KeySwitchingKey::new that skips the cast_rshift assertion. +/// This is needed for pfail tests where we intentionally use different message modulus and carry. +pub fn new_key_switching_key_for_pfail_test<'input_key, InputEncryptionKey, ClientKeyType>( + input_key_pair: (InputEncryptionKey, Option<&ServerKey>), + output_key_pair: (&ClientKeyType, &ServerKey), + params: ShortintKeySwitchingParameters, +) -> KeySwitchingKey +where + InputEncryptionKey: Into>, + ClientKeyType: AsRef, +{ + let input_secret_encryption_key: SecretEncryptionKeyView<'_> = input_key_pair.0.into(); + KeySwitchingKey { + key: crate::shortint::KeySwitchingKey::new( + ( + input_secret_encryption_key.key, + input_key_pair.1.map(|k| &k.key), + ), + (&output_key_pair.0.as_ref().key, &output_key_pair.1.key), + params, + ), + } +} diff --git a/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/utils/mod.rs b/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/utils/mod.rs index 44d0c45d7..af1f3f5f0 100644 --- a/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/utils/mod.rs +++ b/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/utils/mod.rs @@ -1 +1,2 @@ +pub mod key_switching_test_utils; pub mod noise_simulation; diff --git a/tfhe/src/integer/gpu/zk/mod.rs b/tfhe/src/integer/gpu/zk/mod.rs index a08565334..af829499d 100644 --- a/tfhe/src/integer/gpu/zk/mod.rs +++ b/tfhe/src/integer/gpu/zk/mod.rs @@ -73,7 +73,8 @@ impl CudaProvenCompactCiphertextList { key: &CudaKeySwitchingKey, streams: &CudaStreams, ) -> crate::Result { - self.d_flattened_compact_lists.expand(key, streams) + self.d_flattened_compact_lists + .expand(key, super::ZKType::Casting, streams) } pub fn from_proven_compact_ciphertext_list(