feat(gpu): create noise and pfail tests cpk

This commit is contained in:
Guillermo Oyarzun
2025-12-02 18:40:06 +01:00
parent 636de3721b
commit 1eb1ff9d89
14 changed files with 937 additions and 16 deletions

View File

@@ -3,6 +3,7 @@
#include "../keyswitch/ks_enums.h"
#include "../pbs/pbs_enums.h"
#include "zk_enums.h"
#include <stdint.h>
extern "C" {
@@ -16,7 +17,7 @@ uint64_t scratch_cuda_expand_without_verification_64(
uint32_t grouping_factor, const uint32_t *num_lwes_per_compact_list,
const bool *is_boolean_array, uint32_t num_compact_lists,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
KS_TYPE casting_key_type, bool allocate_gpu_memory,
KS_TYPE casting_key_type, bool allocate_gpu_memory, EXPAND_KIND expand_kind,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_expand_without_verification_64(

View File

@@ -0,0 +1,7 @@
#ifndef CUDA_ZK_ENUMS_H
#define CUDA_ZK_ENUMS_H
#include <stdint.h>
// Additional to the two kinds of expand (no_casting and casting), we have a
// third that is used only in the noise tests
enum EXPAND_KIND { NO_CASTING = 0, CASTING = 1, SANITY_CHECK = 2 };
#endif // CUDA_ZK_ENUMS_H

View File

@@ -1,6 +1,5 @@
#ifndef ZK_UTILITIES_H
#define ZK_UTILITIES_H
#include "../integer/integer_utilities.h"
#include "integer/integer.cuh"
#include <cstdint>
@@ -103,6 +102,7 @@ template <typename Torus> struct zk_expand_mem {
uint32_t num_compact_lists;
int_radix_lut<Torus> *message_and_carry_extract_luts;
int_radix_lut<Torus> *identity_lut;
Torus *tmp_expanded_lwes;
Torus *tmp_ksed_small_to_big_expanded_lwes;
@@ -113,15 +113,17 @@ template <typename Torus> struct zk_expand_mem {
expand_job<Torus> *d_expand_jobs;
expand_job<Torus> *h_expand_jobs;
EXPAND_KIND expand_kind;
zk_expand_mem(CudaStreams streams, int_radix_params computing_params,
int_radix_params casting_params, KS_TYPE casting_key_type,
const uint32_t *num_lwes_per_compact_list,
const bool *is_boolean_array, uint32_t num_compact_lists,
bool allocate_gpu_memory, uint64_t &size_tracker)
bool allocate_gpu_memory, uint64_t &size_tracker,
EXPAND_KIND expand_kind_in)
: computing_params(computing_params), casting_params(casting_params),
num_compact_lists(num_compact_lists),
casting_key_type(casting_key_type) {
casting_key_type(casting_key_type), expand_kind(expand_kind_in) {
gpu_memory_allocated = allocate_gpu_memory;
// We copy num_lwes_per_compact_list so we get protection against
@@ -136,10 +138,27 @@ template <typename Torus> struct zk_expand_mem {
num_lwes += this->num_lwes_per_compact_list[i];
}
if (computing_params.carry_modulus != computing_params.message_modulus) {
if (computing_params.carry_modulus != computing_params.message_modulus &&
expand_kind == EXPAND_KIND::CASTING) {
PANIC("GPU backend requires carry_modulus equal to message_modulus")
}
// We create the identity LUT only if we are doing a SANITY_CHECK
if (expand_kind == EXPAND_KIND::SANITY_CHECK) {
identity_lut =
new int_radix_lut<Torus>(streams, computing_params, 1, 2 * num_lwes,
allocate_gpu_memory, size_tracker);
auto identity_lut_f = [](Torus x) -> Torus { return x; };
generate_device_accumulator<Torus>(
streams.stream(0), streams.gpu_index(0), identity_lut->get_lut(0, 0),
identity_lut->get_degree(0), identity_lut->get_max_degree(0),
casting_params.glwe_dimension, casting_params.polynomial_size,
casting_params.message_modulus, casting_params.carry_modulus,
identity_lut_f, gpu_memory_allocated);
}
auto message_extract_lut_f = [casting_params](Torus x) -> Torus {
return x % casting_params.message_modulus;
};
@@ -317,6 +336,11 @@ template <typename Torus> struct zk_expand_mem {
message_and_carry_extract_luts->release(streams);
delete message_and_carry_extract_luts;
if (expand_kind == EXPAND_KIND::SANITY_CHECK) {
identity_lut->release(streams);
delete identity_lut;
}
cuda_drop_with_size_tracking_async(tmp_expanded_lwes, streams.stream(0),
streams.gpu_index(0),
gpu_memory_allocated);

View File

@@ -10,7 +10,7 @@ uint64_t scratch_cuda_expand_without_verification_64(
uint32_t grouping_factor, const uint32_t *num_lwes_per_compact_list,
const bool *is_boolean_array, uint32_t num_compact_lists,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
KS_TYPE casting_key_type, bool allocate_gpu_memory,
KS_TYPE casting_key_type, bool allocate_gpu_memory, EXPAND_KIND expand_kind,
PBS_MS_REDUCTION_T noise_reduction_type) {
// Since CUDA backend works with the concept of "big" and "small" key, instead
@@ -37,7 +37,8 @@ uint64_t scratch_cuda_expand_without_verification_64(
CudaStreams(streams),
reinterpret_cast<zk_expand_mem<uint64_t> **>(mem_ptr),
num_lwes_per_compact_list, is_boolean_array, num_compact_lists,
computing_params, casting_params, casting_key_type, allocate_gpu_memory);
computing_params, casting_params, casting_key_type, allocate_gpu_memory,
expand_kind);
}
void cuda_expand_without_verification_64(

View File

@@ -12,6 +12,7 @@
#include "utils/helper.cuh"
#include "utils/helper_multi_gpu.cuh"
#include "utils/kernel_dimensions.cuh"
#include "zk/zk_enums.h"
#include "zk/zk_utilities.h"
#include <functional>
@@ -54,15 +55,24 @@ __host__ void host_expand_without_verification(
compact_lwe_lists.total_num_lwes * sizeof(expand_job<Torus>),
streams.stream(0), streams.gpu_index(0), true);
if (mem_ptr->expand_kind == EXPAND_KIND::NO_CASTING) {
host_lwe_expand<Torus, params>(streams.stream(0), streams.gpu_index(0),
lwe_array_out, d_expand_jobs, num_lwes);
return;
}
host_lwe_expand<Torus, params>(streams.stream(0), streams.gpu_index(0),
expanded_lwes, d_expand_jobs, num_lwes);
auto ksks = casting_keys;
auto lwe_array_input = expanded_lwes;
auto ksks = casting_keys;
auto message_and_carry_extract_luts = mem_ptr->message_and_carry_extract_luts;
auto lut = mem_ptr->message_and_carry_extract_luts;
if (casting_key_type == SMALL_TO_BIG) {
if (mem_ptr->expand_kind == EXPAND_KIND::SANITY_CHECK) {
PANIC("SANITY_CHECK not supported for SMALL_TO_BIG casting");
}
// Keyswitch from small to big key if needed
auto ksed_small_to_big_expanded_lwes =
mem_ptr->tmp_ksed_small_to_big_expanded_lwes;
@@ -95,6 +105,17 @@ __host__ void host_expand_without_verification(
into_radix_ciphertext(output, lwe_array_out, 2 * num_lwes, lwe_dimension);
auto input = new CudaRadixCiphertextFFI;
into_radix_ciphertext(input, lwe_array_input, 2 * num_lwes, lwe_dimension);
// This is a special case only for our noise sanity checks
// If we are doing a SANITY_CHECK expand, we just apply the identity LUT
// This replicates the CPU fallback behaviour of the casting expand
if (mem_ptr->expand_kind == EXPAND_KIND::SANITY_CHECK) {
integer_radix_apply_univariate_lookup_table<Torus>(
streams, output, input, bsks, ksks, mem_ptr->identity_lut,
2 * num_lwes);
return;
}
integer_radix_apply_univariate_lookup_table<Torus>(
streams, output, input, bsks, ksks, message_and_carry_extract_luts,
2 * num_lwes);
@@ -106,13 +127,13 @@ __host__ uint64_t scratch_cuda_expand_without_verification(
const uint32_t *num_lwes_per_compact_list, const bool *is_boolean_array,
uint32_t num_compact_lists, int_radix_params computing_params,
int_radix_params casting_params, KS_TYPE casting_key_type,
bool allocate_gpu_memory) {
bool allocate_gpu_memory, EXPAND_KIND expand_kind) {
uint64_t size_tracker = 0;
*mem_ptr = new zk_expand_mem<Torus>(
streams, computing_params, casting_params, casting_key_type,
num_lwes_per_compact_list, is_boolean_array, num_compact_lists,
allocate_gpu_memory, size_tracker);
allocate_gpu_memory, size_tracker, expand_kind);
return size_tracker;
}

View File

@@ -2471,6 +2471,10 @@ unsafe extern "C" {
pub const KS_TYPE_BIG_TO_SMALL: KS_TYPE = 0;
pub const KS_TYPE_SMALL_TO_BIG: KS_TYPE = 1;
pub type KS_TYPE = ffi::c_uint;
pub const EXPAND_KIND_NO_CASTING: EXPAND_KIND = 0;
pub const EXPAND_KIND_CASTING: EXPAND_KIND = 1;
pub const EXPAND_KIND_SANITY_CHECK: EXPAND_KIND = 2;
pub type EXPAND_KIND = ffi::c_uint;
unsafe extern "C" {
pub fn scratch_cuda_expand_without_verification_64(
streams: CudaStreamsFFI,
@@ -2496,6 +2500,7 @@ unsafe extern "C" {
pbs_type: PBS_TYPE,
casting_key_type: KS_TYPE,
allocate_gpu_memory: bool,
expand_kind: EXPAND_KIND,
noise_reduction_type: PBS_MS_REDUCTION_T,
) -> u64;
}

View File

@@ -350,7 +350,8 @@ impl CompactCiphertextList {
.unwrap(),
dest_server_key: &cuda_key.key.key,
};
let expander = gpu_inner.expand(&ksk, streams)?;
let expander =
gpu_inner.expand(&ksk, crate::integer::gpu::ZKType::Casting, streams)?;
Ok(CompactCiphertextListExpander {
inner: InnerCompactCiphertextListExpander::Cuda(expander),
@@ -390,7 +391,8 @@ impl CompactCiphertextList {
dest_server_key: &cuda_key.key.key,
};
let streams = &cuda_key.streams;
let expander = gpu_inner.expand(&ksk, streams)?;
let expander =
gpu_inner.expand(&ksk, crate::integer::gpu::ZKType::Casting, streams)?;
Ok(CompactCiphertextListExpander {
inner: InnerCompactCiphertextListExpander::Cuda(expander),

View File

@@ -21,7 +21,6 @@ use crate::GpuIndex;
use itertools::Itertools;
use serde::Deserializer;
use tfhe_cuda_backend::cuda_bind::cuda_memcpy_async_to_gpu;
#[derive(Clone)]
pub struct CudaCompactCiphertextListInfo {
pub info: CudaBlockInfo,
@@ -377,6 +376,7 @@ impl CudaFlattenedVecCompactCiphertextList {
pub fn expand(
&self,
key: &CudaKeySwitchingKey,
zk_type: crate::integer::gpu::ZKType,
streams: &CudaStreams,
) -> crate::Result<CudaCompactCiphertextListExpander> {
assert!(
@@ -441,6 +441,7 @@ impl CudaFlattenedVecCompactCiphertextList {
LweBskGroupingFactor(0),
self.num_lwe_per_compact_list.as_slice(),
self.is_boolean.as_slice(),
zk_type,
d_bsk.ms_noise_reduction_configuration.as_ref(),
);
}
@@ -476,6 +477,7 @@ impl CudaFlattenedVecCompactCiphertextList {
d_multibit_bsk.grouping_factor,
self.num_lwe_per_compact_list.as_slice(),
self.is_boolean.as_slice(),
zk_type,
None,
);
}

View File

@@ -80,6 +80,12 @@ pub enum ComparisonType {
MAX = 6,
MIN = 7,
}
#[repr(u32)]
pub enum ZKType {
NoCasting = 0,
Casting = 1,
SanityCheck = 2,
}
fn resolve_noise_reduction_type(
ms_noise_reduction_configuration: Option<&CudaModulusSwitchNoiseReductionConfiguration>,
@@ -7597,6 +7603,7 @@ pub(crate) unsafe fn cuda_backend_expand<T: UnsignedInteger, B: Numeric>(
grouping_factor: LweBskGroupingFactor,
num_lwes_per_compact_list: &[u32],
is_boolean: &[bool],
zk_type: ZKType,
ms_noise_reduction_configuration: Option<&CudaModulusSwitchNoiseReductionConfiguration>,
) {
assert_eq!(
@@ -7665,6 +7672,7 @@ pub(crate) unsafe fn cuda_backend_expand<T: UnsignedInteger, B: Numeric>(
pbs_type as u32,
casting_key_type as u32,
true,
zk_type as u32,
noise_reduction_type as u32,
);
cuda_expand_without_verification_64(
@@ -10218,12 +10226,13 @@ pub unsafe fn unchecked_small_scalar_mul_integer_async(
cuda_small_scalar_multiplication_integer_64_inplace(
streams.ffi(),
&raw mut cuda_ffi_lwe_array,
&mut cuda_ffi_lwe_array,
small_scalar,
message_modulus.0 as u32,
carry_modulus.0 as u32,
);
}
#[allow(clippy::too_many_arguments)]
/// # Safety
///

View File

@@ -0,0 +1,818 @@
use crate::integer::gpu::ciphertext::compact_list::CudaFlattenedVecCompactCiphertextList;
use crate::core_crypto::commons::parameters::CiphertextModulusLog;
use crate::shortint::client_key::atomic_pattern::AtomicPatternClientKey;
use crate::shortint::encoding::ShortintEncoding;
use crate::shortint::engine::ShortintEngine;
use crate::shortint::parameters::test_params::TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128;
use crate::shortint::parameters::{
AtomicPatternParameters, CarryModulus, CompactCiphertextListExpansionKind,
CompactPublicKeyEncryptionParameters, MetaParameters, ShortintCompactCiphertextListCastingMode,
ShortintKeySwitchingParameters,
};
use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::{
DynLwe, NoiseSimulationLwe, NoiseSimulationLweKeyswitchKey, NoiseSimulationModulusSwitchConfig,
};
use crate::shortint::server_key::tests::noise_distribution::utils::{
mean_and_variance_check, normality_check, pfail_check, update_ap_params_for_pfail,
DecryptionAndNoiseResult, NoiseSample, PfailTestMeta, PfailTestResult,
};
use crate::shortint::server_key::tests::noise_distribution::{
should_run_short_pfail_tests_debug, should_use_single_key_debug,
};
use crate::shortint::server_key::tests::parameterized_test::create_parameterized_test;
use crate::shortint::PaddingBit;
use rayon::prelude::*;
use crate::integer::gpu::server_key::radix::CudaUnsignedRadixCiphertext;
use crate::integer::gpu::CudaServerKey;
use crate::integer::{ClientKey, CompressedServerKey};
use crate::GpuIndex;
use crate::core_crypto::gpu::{CudaSideResources, CudaStreams};
use crate::shortint::ShortintParameterSet;
use crate::integer::gpu::key_switching_key::CudaKeySwitchingKey;
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use crate::shortint::server_key::tests::noise_distribution::cpk_ks_ms::cpk_ks_any_ms;
use crate::integer::gpu::server_key::radix::tests_noise_distribution::utils::noise_simulation::CudaDynLwe;
use crate::core_crypto::commons::noise_formulas::noise_simulation::traits::lwe_programmable_bootstrap::LweClassicFftBootstrap;
use crate::core_crypto::commons::noise_formulas::noise_simulation::traits::lwe_programmable_bootstrap::AllocateLweBootstrapResult;
use crate::core_crypto::commons::noise_formulas::noise_simulation::traits::{
AllocateLweKeyswitchResult, LweKeyswitch,
};
use crate::core_crypto::gpu::algorithms::lwe_keyswitch::cuda_keyswitch_lwe_ciphertext;
use crate::core_crypto::gpu::vec::CudaVec;
use crate::core_crypto::prelude::LweCiphertextCount;
use crate::shortint::key_switching_key::CudaKeySwitchingKeyMaterial;
use crate::integer::key_switching_key::KeySwitchingKey;
use crate::integer::{CompactPublicKey, CompactPrivateKey};
use crate::core_crypto::prelude::LweCiphertext;
use crate::integer::ciphertext::DataKind;
use std::num::NonZeroUsize;
use crate::integer::gpu::server_key::radix::tests_noise_distribution::utils::key_switching_test_utils::new_key_switching_key_for_pfail_test;
#[allow(clippy::too_many_arguments)]
#[allow(clippy::type_complexity)]
fn cpk_ks_any_ms_inner_helper_gpu(
params: AtomicPatternParameters,
cpk_params: CompactPublicKeyEncryptionParameters,
ksk_ds_params: ShortintKeySwitchingParameters,
single_cpk_private_key: &CompactPrivateKey<Vec<u64>>,
single_cpk: &CompactPublicKey,
single_cuda_ksk: &CudaKeySwitchingKey<'_>,
single_cks: &ClientKey,
single_cuda_sks: &CudaServerKey,
msg: u64,
br_input_modulus_log: CiphertextModulusLog,
streams: &CudaStreams,
) -> (
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
) {
let mut engine = ShortintEngine::new();
let thread_cpk_private_key;
let thread_cpk;
let thread_cuda_ksk;
let thread_cks;
let thread_sks;
let thread_cuda_sks;
let thread_cuda_ksk_material;
let (cpk_private_key, cpk, cuda_ksk, cks, cuda_sks) = if should_use_single_key_debug() {
(
single_cpk_private_key,
single_cpk,
single_cuda_ksk,
single_cks,
single_cuda_sks,
)
} else {
thread_cpk_private_key = CompactPrivateKey::new(cpk_params);
thread_cpk = CompactPublicKey::new(&thread_cpk_private_key);
let block_params: ShortintParameterSet = params.into();
thread_cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key =
CompressedServerKey::new_radix_compressed_server_key(&thread_cks);
thread_sks = compressed_server_key.decompress();
thread_cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let ksk = new_key_switching_key_for_pfail_test(
(&thread_cpk_private_key, None),
(&thread_cks, &thread_sks),
ksk_ds_params,
);
thread_cuda_ksk_material =
CudaKeySwitchingKeyMaterial::from_key_switching_key(&ksk, &streams);
thread_cuda_ksk = CudaKeySwitchingKey::from_cuda_key_switching_key_material(
&thread_cuda_ksk_material,
&thread_cuda_sks,
);
(
&thread_cpk_private_key,
&thread_cpk,
&thread_cuda_ksk,
&thread_cks,
&thread_cuda_sks,
)
};
//let br_input_modulus_log = sks.br_input_modulus_log();
let modulus_switch_config = cuda_sks.noise_simulation_modulus_switch_config();
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(1),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
atomic_pattern: crate::shortint::AtomicPatternKind::Standard(
crate::shortint::PBSOrder::KeyswitchBootstrap,
),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources = CudaSideResources::new(streams, cuda_block_info);
let ct = {
let compact_list = cpk.key.encrypt_iter_with_modulus_with_engine(
core::iter::once(msg),
cpk.key.parameters.message_modulus.0,
&mut engine,
);
let num_blocks = 1usize;
let data_info = vec![DataKind::Unsigned(NonZeroUsize::new(num_blocks).unwrap())];
let cuda_casting_compact_list =
CudaFlattenedVecCompactCiphertextList::from_vec_shortint_compact_ciphertext_list(
vec![compact_list.clone()],
data_info,
&cuda_side_resources.streams,
);
let cuda_compact_list_expander = cuda_casting_compact_list
.expand(
&cuda_ksk,
crate::integer::gpu::ZKType::NoCasting,
&cuda_side_resources.streams,
)
.unwrap();
let cuda_expanded_ct: CudaUnsignedRadixCiphertext = cuda_compact_list_expander
.get(0usize, &cuda_side_resources.streams)
.unwrap()
.unwrap();
CudaDynLwe::U64(cuda_expanded_ct.ciphertext.d_blocks)
};
let (input_gpu, after_ks_ds_gpu, after_drift_gpu, after_ms_gpu) = cpk_ks_any_ms(
ct,
cuda_ksk,
modulus_switch_config,
br_input_modulus_log,
&mut cuda_side_resources,
);
let input_list = input_gpu
.as_lwe_64()
.to_lwe_ciphertext_list(&cuda_side_resources.streams);
let input_ct = LweCiphertext::from_container(
input_list.clone().into_container(),
input_list.ciphertext_modulus(),
);
let input = DynLwe::U64(input_ct);
let after_ks_ds_list = after_ks_ds_gpu
.as_lwe_64()
.to_lwe_ciphertext_list(&cuda_side_resources.streams);
let after_ks_ds_ct = LweCiphertext::from_container(
after_ks_ds_list.clone().into_container(),
after_ks_ds_list.ciphertext_modulus(),
);
let after_ks_ds = DynLwe::U64(after_ks_ds_ct);
let before_ms_gpu: &CudaDynLwe = after_drift_gpu.as_ref().unwrap_or(&after_ks_ds_gpu);
let before_ms_list = before_ms_gpu
.as_lwe_64()
.to_lwe_ciphertext_list(&cuda_side_resources.streams);
let before_ms_ct = LweCiphertext::from_container(
before_ms_list.clone().into_container(),
before_ms_list.ciphertext_modulus(),
);
let before_ms = DynLwe::U64(before_ms_ct);
let after_ms_list = after_ms_gpu
.as_lwe_64()
.to_lwe_ciphertext_list(&cuda_side_resources.streams);
let after_ms_ct = LweCiphertext::from_container(
after_ms_list.clone().into_container(),
after_ms_list.ciphertext_modulus(),
);
let after_ms = DynLwe::U64(after_ms_ct);
match &cks.key.atomic_pattern {
AtomicPatternClientKey::Standard(standard_atomic_pattern_client_key) => {
let params = standard_atomic_pattern_client_key.parameters;
let encoding = ShortintEncoding {
ciphertext_modulus: params.ciphertext_modulus(),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
padding_bit: PaddingBit::Yes,
};
let cpk_lwe_secret_key = cpk_private_key.key.key();
let small_compute_lwe_secret_key =
standard_atomic_pattern_client_key.small_lwe_secret_key();
(
DecryptionAndNoiseResult::new_from_lwe(
&input.as_lwe_64(),
&cpk_lwe_secret_key,
msg,
&encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&after_ks_ds.as_lwe_64(),
&small_compute_lwe_secret_key,
msg,
&encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&before_ms.as_lwe_64(),
&small_compute_lwe_secret_key,
msg,
&encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&after_ms.as_lwe_64(),
&small_compute_lwe_secret_key,
msg,
&encoding,
),
)
}
AtomicPatternClientKey::KeySwitch32(_ks32_atomic_pattern_client_key) => {
panic!("KeySwitch32 atomic pattern is not supported on GPU yet");
}
}
}
#[allow(clippy::too_many_arguments)]
#[allow(clippy::type_complexity)]
fn cpk_ks_any_ms_noise_helper_gpu(
params: AtomicPatternParameters,
cpk_params: CompactPublicKeyEncryptionParameters,
ksk_ds_params: ShortintKeySwitchingParameters,
single_cpk_private_key: &CompactPrivateKey<Vec<u64>>,
single_cpk: &CompactPublicKey,
single_cuda_ksk_ds: &CudaKeySwitchingKey<'_>,
single_cks: &ClientKey,
single_cuda_sks: &CudaServerKey,
msg: u64,
br_input_modulus_log: CiphertextModulusLog,
streams: &CudaStreams,
) -> (NoiseSample, NoiseSample, NoiseSample, NoiseSample) {
let (input, after_ks_ds, before_ms, after_ms) = cpk_ks_any_ms_inner_helper_gpu(
params,
cpk_params,
ksk_ds_params,
single_cpk_private_key,
single_cpk,
single_cuda_ksk_ds,
single_cks,
single_cuda_sks,
msg,
br_input_modulus_log,
streams,
);
(
input
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_ks_ds
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
before_ms
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_ms
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
)
}
#[allow(clippy::too_many_arguments)]
#[allow(clippy::type_complexity)]
fn cpk_ks_any_ms_pfail_helper_gpu(
params: AtomicPatternParameters,
cpk_params: CompactPublicKeyEncryptionParameters,
ksk_ds_params: ShortintKeySwitchingParameters,
single_cpk_private_key: &CompactPrivateKey<Vec<u64>>,
single_cpk: &CompactPublicKey,
single_cuda_ksk_ds: &CudaKeySwitchingKey<'_>,
single_cks: &ClientKey,
single_cuda_sks: &CudaServerKey,
msg: u64,
br_input_modulus_log: CiphertextModulusLog,
streams: &CudaStreams,
) -> DecryptionAndNoiseResult {
let (_input, _after_ks_ds, _before_ms, after_ms) = cpk_ks_any_ms_inner_helper_gpu(
params,
cpk_params,
ksk_ds_params,
single_cpk_private_key,
single_cpk,
single_cuda_ksk_ds,
single_cks,
single_cuda_sks,
msg,
br_input_modulus_log,
streams,
);
after_ms
}
fn noise_check_encrypt_cpk_ks_ms_noise_gpu(meta_params: MetaParameters) {
let (params, cpk_params, ksk_ds_params) = {
let compute_params = meta_params.compute_parameters;
let dedicated_cpk_params = meta_params.dedicated_compact_public_key_parameters.unwrap();
// To avoid the expand logic of shortint which would force a keyswitch + LUT eval after
// expand
let cpk_params = {
let mut cpk_params = dedicated_cpk_params.pke_params;
cpk_params.expansion_kind = CompactCiphertextListExpansionKind::NoCasting(
compute_params.encryption_key_choice().into_pbs_order(),
);
cpk_params
};
(compute_params, cpk_params, dedicated_cpk_params.ksk_params)
};
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let cpk_private_key = CompactPrivateKey::new(cpk_params);
let cpk = CompactPublicKey::new(&cpk_private_key);
let block_params: ShortintParameterSet = params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let sks = compressed_server_key.decompress();
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let ksk = KeySwitchingKey::new((&cpk_private_key, None), (&cks, &sks), ksk_ds_params);
let cuda_ksk_material = CudaKeySwitchingKeyMaterial::from_key_switching_key(&ksk, &streams);
let cuda_ksk =
CudaKeySwitchingKey::from_cuda_key_switching_key_material(&cuda_ksk_material, &cuda_sks);
let noise_simulation_ksk =
NoiseSimulationLweKeyswitchKey::new_from_atomic_pattern_parameters(params);
let noise_simulation_ksk_ds =
NoiseSimulationLweKeyswitchKey::new_from_cpk_params(cpk_params, ksk_ds_params, params);
let noise_simulation_modulus_switch_config =
NoiseSimulationModulusSwitchConfig::new_from_atomic_pattern_parameters(params);
let modulus_switch_config = sks.key.noise_simulation_modulus_switch_config();
let cuda_modulus_switch_config = cuda_sks.noise_simulation_modulus_switch_config();
let compute_br_input_modulus_log = sks.key.br_input_modulus_log();
let expected_average_after_ms =
modulus_switch_config.expected_average_after_ms(params.polynomial_size());
assert!(noise_simulation_ksk.matches_actual_shortint_server_key(&sks.key));
assert!(noise_simulation_ksk_ds.matches_actual_shortint_keyswitching_key(&ksk.key.as_view()));
assert!(noise_simulation_modulus_switch_config
.matches_shortint_server_key_modulus_switch_config(modulus_switch_config));
let (_input_sim, _after_ks_ds_sim, _after_drift_sim, after_ms_sim) = {
let noise_simulation_input = NoiseSimulationLwe::encrypt_with_cpk(&cpk.key);
cpk_ks_any_ms(
noise_simulation_input,
&noise_simulation_ksk_ds,
noise_simulation_modulus_switch_config.as_ref(),
compute_br_input_modulus_log,
&mut (),
)
};
let sample_input = {
let compact_list = cpk.key.encrypt_slice(&[0]);
let mut expanded = compact_list
.expand(ShortintCompactCiphertextListCastingMode::NoCasting)
.unwrap();
assert_eq!(expanded.len(), 1);
DynLwe::U64(expanded.pop().unwrap().ct)
};
let d_ct_input =
CudaLweCiphertextList::from_lwe_ciphertext(&sample_input.as_lwe_64(), &streams);
let gpu_sample_input = CudaDynLwe::U64(d_ct_input);
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(1),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
atomic_pattern: crate::shortint::AtomicPatternKind::Standard(
crate::shortint::PBSOrder::KeyswitchBootstrap,
),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources = CudaSideResources::new(&streams, cuda_block_info);
// Check that the circuit is correct with respect to core implementation, i.e. does not crash on
// dimension checks
let (expected_lwe_dimension_out, expected_modulus_f64_out) = {
let (_input, _after_ks_ds, _before_ms, after_ms) = cpk_ks_any_ms(
gpu_sample_input,
&cuda_ksk,
cuda_modulus_switch_config,
compute_br_input_modulus_log,
&mut cuda_side_resources,
);
(after_ms.lwe_dimension(), after_ms.raw_modulus_float())
};
assert_eq!(after_ms_sim.lwe_dimension(), expected_lwe_dimension_out);
assert_eq!(after_ms_sim.modulus().as_f64(), expected_modulus_f64_out);
let cleartext_modulus = params.message_modulus().0 * params.carry_modulus().0;
let mut noise_samples_before_ms = vec![];
let mut noise_samples_after_ms = vec![];
let sample_count_per_msg = 1000usize;
let chunk_size = 8;
let vec_local_streams = (0..chunk_size)
.map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
.collect::<Vec<_>>();
for _ in 0..cleartext_modulus {
let (current_noise_sample_before_ms, current_noise_samples_after_ms): (Vec<_>, Vec<_>) = (0
..sample_count_per_msg)
.collect::<Vec<_>>()
.chunks(chunk_size)
.flat_map(|chunk| {
chunk
.iter()
.collect::<Vec<_>>()
.into_par_iter()
.map(|i| {
let local_stream = &vec_local_streams[*i % chunk_size];
let (_input, _after_ks_ds, before_ms, after_ms) =
cpk_ks_any_ms_noise_helper_gpu(
params,
cpk_params,
ksk_ds_params,
&cpk_private_key,
&cpk,
&cuda_ksk,
&cks,
&cuda_sks,
0,
compute_br_input_modulus_log,
local_stream,
);
(before_ms.value, after_ms.value)
})
.collect::<Vec<_>>()
})
.unzip();
noise_samples_before_ms.extend(current_noise_sample_before_ms);
noise_samples_after_ms.extend(current_noise_samples_after_ms);
}
let before_ms_normality = normality_check(&noise_samples_before_ms, "before ms", 0.01);
let after_ms_is_ok = mean_and_variance_check(
&noise_samples_after_ms,
"after_ms",
expected_average_after_ms,
after_ms_sim.variance(),
params.lwe_noise_distribution(),
after_ms_sim.lwe_dimension(),
after_ms_sim.modulus().as_f64(),
);
assert!(before_ms_normality.null_hypothesis_is_valid && after_ms_is_ok);
}
create_parameterized_test!(noise_check_encrypt_cpk_ks_ms_noise_gpu {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
});
fn noise_check_encrypt_cpk_ks_ms_pfail_gpu(meta_params: MetaParameters) {
let (params, cpk_params, ksk_ds_params) = {
let compute_params = meta_params.compute_parameters;
let dedicated_cpk_params = meta_params.dedicated_compact_public_key_parameters.unwrap();
// To avoid the expand logic of shortint which would force a keyswitch + LUT eval after
// expand
let cpk_params = {
let mut cpk_params = dedicated_cpk_params.pke_params;
cpk_params.expansion_kind = CompactCiphertextListExpansionKind::NoCasting(
compute_params.encryption_key_choice().into_pbs_order(),
);
cpk_params
};
(compute_params, cpk_params, dedicated_cpk_params.ksk_params)
};
let (pfail_test_meta, params) = {
let mut ap_params = params;
let original_message_modulus = ap_params.message_modulus();
let original_carry_modulus = ap_params.carry_modulus();
// For now only allow 2_2 parameters, and see later for heuristics to use
assert_eq!(original_message_modulus.0, 4);
assert_eq!(original_carry_modulus.0, 4);
// Update parameters to fail more frequently by inflating the carry modulus, allows to keep
// the max multiplication without risks of message overflow
let (original_pfail_and_precision, new_expected_pfail_and_precision) =
update_ap_params_for_pfail(
&mut ap_params,
original_message_modulus,
CarryModulus(1 << 5),
);
let pfail_test_meta = if should_run_short_pfail_tests_debug() {
let expected_fails = 200;
PfailTestMeta::new_with_desired_expected_fails(
original_pfail_and_precision,
new_expected_pfail_and_precision,
expected_fails,
)
} else {
let total_runs = 1_000_000;
PfailTestMeta::new_with_total_runs(
original_pfail_and_precision,
new_expected_pfail_and_precision,
total_runs,
)
};
(pfail_test_meta, ap_params)
};
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let cpk_private_key = CompactPrivateKey::new(cpk_params);
let cpk = CompactPublicKey::new(&cpk_private_key);
let block_params: ShortintParameterSet = params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let sks = compressed_server_key.decompress();
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let ksk =
new_key_switching_key_for_pfail_test((&cpk_private_key, None), (&cks, &sks), ksk_ds_params);
let cuda_ksk_material = CudaKeySwitchingKeyMaterial::from_key_switching_key(&ksk, &streams);
let cuda_ksk =
CudaKeySwitchingKey::from_cuda_key_switching_key_material(&cuda_ksk_material, &cuda_sks);
let total_runs_for_expected_fails = pfail_test_meta.total_runs_for_expected_fails();
let chunk_size = 8;
let vec_local_streams = (0..chunk_size)
.map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
.collect::<Vec<_>>();
let measured_fails: f64 = (0..total_runs_for_expected_fails)
.collect::<Vec<_>>()
.chunks(chunk_size)
.flat_map(|chunk| {
chunk
.iter()
.collect::<Vec<_>>()
.into_par_iter()
.map(|i| {
let local_stream = &vec_local_streams[*i as usize % chunk_size];
let after_ms_decryption_result = cpk_ks_any_ms_pfail_helper_gpu(
params,
cpk_params,
ksk_ds_params,
&cpk_private_key,
&cpk,
&cuda_ksk,
&cks,
&cuda_sks,
0,
sks.key.br_input_modulus_log(),
local_stream,
);
after_ms_decryption_result.failure_as_f64()
})
.collect::<Vec<_>>()
})
.sum();
let test_result = PfailTestResult { measured_fails };
pfail_check(&pfail_test_meta, test_result);
}
create_parameterized_test!(noise_check_encrypt_cpk_ks_ms_pfail_gpu {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
});
fn sanity_check_encrypt_cpk_ks_ms_pbs_gpu(meta_params: MetaParameters) {
let (params, cpk_params, ksk_ds_params) = {
let compute_params = meta_params.compute_parameters;
let dedicated_cpk_params = meta_params.dedicated_compact_public_key_parameters.unwrap();
// To avoid the expand logic of shortint which would force a keyswitch + LUT eval after
// expand
let (cpk_params, orig_cast_mode) = {
let mut cpk_params = dedicated_cpk_params.pke_params;
let orig_cast_mode = cpk_params.expansion_kind;
cpk_params.expansion_kind = CompactCiphertextListExpansionKind::NoCasting(
compute_params.encryption_key_choice().into_pbs_order(),
);
(cpk_params, orig_cast_mode)
};
assert!(matches!(
orig_cast_mode,
CompactCiphertextListExpansionKind::RequiresCasting
));
(compute_params, cpk_params, dedicated_cpk_params.ksk_params)
};
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let cpk_private_key = CompactPrivateKey::new(cpk_params);
let cpk = CompactPublicKey::new(&cpk_private_key);
let block_params: ShortintParameterSet = params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let sks = compressed_server_key.decompress();
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let ksk = KeySwitchingKey::new((&cpk_private_key, None), (&cks, &sks), ksk_ds_params);
let cuda_ksk_material = CudaKeySwitchingKeyMaterial::from_key_switching_key(&ksk, &streams);
let cuda_ksk =
CudaKeySwitchingKey::from_cuda_key_switching_key_material(&cuda_ksk_material, &cuda_sks);
let modulus_switch_config = cuda_sks.noise_simulation_modulus_switch_config();
let compute_br_input_modulus_log = sks.key.br_input_modulus_log();
let id_lut = cuda_sks.generate_lookup_table(|x| x);
let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, &streams);
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(1),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
atomic_pattern: crate::shortint::AtomicPatternKind::Standard(
crate::shortint::PBSOrder::KeyswitchBootstrap,
),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources = CudaSideResources::new(&streams, cuda_block_info);
for _ in 0..10 {
let (gpu_sample_input, shortint_res) = {
let mut engine = ShortintEngine::new();
let no_casting_compact_list = cpk.key.encrypt_iter_with_modulus_with_engine(
core::iter::once(0),
cpk.key.parameters.message_modulus.0,
&mut engine,
);
let num_blocks = 1usize;
let data_info = vec![DataKind::Unsigned(NonZeroUsize::new(num_blocks).unwrap())];
//This is for the ap
let cuda_no_casting_compact_list =
CudaFlattenedVecCompactCiphertextList::from_vec_shortint_compact_ciphertext_list(
vec![no_casting_compact_list.clone()],
data_info,
&cuda_side_resources.streams,
);
//This is for the verification
let cuda_casting_compact_list =
cuda_no_casting_compact_list.duplicate(&cuda_side_resources.streams);
let cuda_no_casting_compact_list_expander = cuda_no_casting_compact_list
.expand(
&cuda_ksk,
crate::integer::gpu::ZKType::NoCasting,
&cuda_side_resources.streams,
)
.unwrap();
let cuda_ap_input_expanded: CudaUnsignedRadixCiphertext =
cuda_no_casting_compact_list_expander
.get(0usize, &cuda_side_resources.streams)
.unwrap()
.unwrap();
let cuda_casting_compact_list_expander = cuda_casting_compact_list
.expand(
&cuda_ksk,
crate::integer::gpu::ZKType::SanityCheck,
&cuda_side_resources.streams,
)
.unwrap();
let cuda_int_res: CudaUnsignedRadixCiphertext = cuda_casting_compact_list_expander
.get(0usize, &cuda_side_resources.streams)
.unwrap()
.unwrap();
(
CudaDynLwe::U64(
cuda_ap_input_expanded
.ciphertext
.d_blocks
.duplicate(&cuda_side_resources.streams),
),
cuda_int_res
.ciphertext
.d_blocks
.to_lwe_ciphertext_list(&cuda_side_resources.streams),
)
};
let (_input, _after_ks, _before_ms, after_ms) = cpk_ks_any_ms(
gpu_sample_input,
&cuda_ksk,
modulus_switch_config,
compute_br_input_modulus_log,
&mut cuda_side_resources,
);
// Complete the AP by computing the PBS to match shortint
let mut pbs_result = d_accumulator.allocate_lwe_bootstrap_result(&mut cuda_side_resources);
cuda_sks.lwe_classic_fft_pbs(
&after_ms,
&mut pbs_result,
&d_accumulator,
&mut cuda_side_resources,
);
let pbs_result_list = pbs_result
.as_lwe_64()
.to_lwe_ciphertext_list(&cuda_side_resources.streams);
assert_eq!(pbs_result_list, shortint_res);
}
}
// Trait implementations for CudaKeySwitchingKey to enable noise distribution tests
impl AllocateLweKeyswitchResult for CudaKeySwitchingKey<'_> {
type Output = CudaDynLwe;
type SideResources = CudaSideResources;
fn allocate_lwe_keyswitch_result(
&self,
side_resources: &mut Self::SideResources,
) -> Self::Output {
let output_lwe_dimension = self
.key_switching_key_material
.lwe_keyswitch_key
.output_key_lwe_size()
.to_lwe_dimension();
let lwe_ciphertext_count = LweCiphertextCount(1);
let ciphertext_modulus = self.dest_server_key.ciphertext_modulus;
let cuda_lwe = CudaLweCiphertextList::new(
output_lwe_dimension,
lwe_ciphertext_count,
ciphertext_modulus,
&side_resources.streams,
);
CudaDynLwe::U64(cuda_lwe)
}
}
impl LweKeyswitch<CudaDynLwe, CudaDynLwe> for CudaKeySwitchingKey<'_> {
type SideResources = CudaSideResources;
fn lwe_keyswitch(
&self,
input: &CudaDynLwe,
output: &mut CudaDynLwe,
side_resources: &mut Self::SideResources,
) {
match (input, output) {
(CudaDynLwe::U64(input_cuda_lwe), CudaDynLwe::U64(output_cuda_lwe)) => {
let d_input_indexes = CudaVec::<u64>::new(1, &side_resources.streams, 0);
let d_output_indexes = CudaVec::<u64>::new(1, &side_resources.streams, 0);
cuda_keyswitch_lwe_ciphertext(
&self.key_switching_key_material.lwe_keyswitch_key,
input_cuda_lwe,
output_cuda_lwe,
&d_input_indexes,
&d_output_indexes,
false,
&side_resources.streams,
false,
);
}
(CudaDynLwe::U32(_), CudaDynLwe::U32(_)) => {
panic!(
"U32 keyswitch not implemented for CudaKeySwitchingKey - only U64 is supported"
);
}
(CudaDynLwe::U128(_), CudaDynLwe::U128(_)) => {
panic!("U128 keyswitch not implemented for CudaKeySwitchingKey - only U64 is supported");
}
_ => panic!("Inconsistent input/output types for CudaDynLwe keyswitch"),
}
}
}
create_parameterized_test!(sanity_check_encrypt_cpk_ks_ms_pbs_gpu {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
});

View File

@@ -1,5 +1,6 @@
pub mod br_dp_ks_ms;
pub mod br_dp_packingks_ms;
pub mod cpk_ks_ms;
pub mod dp_ks_ms;
pub mod dp_ks_pbs_128_packingks;
pub mod utils;

View File

@@ -0,0 +1,28 @@
use crate::integer::client_key::secret_encryption_key::SecretEncryptionKeyView;
use crate::integer::key_switching_key::KeySwitchingKey;
use crate::integer::{ClientKey, ServerKey};
use crate::shortint::parameters::ShortintKeySwitchingParameters;
/// Test-only implementation of KeySwitchingKey::new that skips the cast_rshift assertion.
/// This is needed for pfail tests where we intentionally use different message modulus and carry.
pub fn new_key_switching_key_for_pfail_test<'input_key, InputEncryptionKey, ClientKeyType>(
input_key_pair: (InputEncryptionKey, Option<&ServerKey>),
output_key_pair: (&ClientKeyType, &ServerKey),
params: ShortintKeySwitchingParameters,
) -> KeySwitchingKey
where
InputEncryptionKey: Into<SecretEncryptionKeyView<'input_key>>,
ClientKeyType: AsRef<ClientKey>,
{
let input_secret_encryption_key: SecretEncryptionKeyView<'_> = input_key_pair.0.into();
KeySwitchingKey {
key: crate::shortint::KeySwitchingKey::new(
(
input_secret_encryption_key.key,
input_key_pair.1.map(|k| &k.key),
),
(&output_key_pair.0.as_ref().key, &output_key_pair.1.key),
params,
),
}
}

View File

@@ -1 +1,2 @@
pub mod key_switching_test_utils;
pub mod noise_simulation;

View File

@@ -73,7 +73,8 @@ impl CudaProvenCompactCiphertextList {
key: &CudaKeySwitchingKey,
streams: &CudaStreams,
) -> crate::Result<CudaCompactCiphertextListExpander> {
self.d_flattened_compact_lists.expand(key, streams)
self.d_flattened_compact_lists
.expand(key, super::ZKType::Casting, streams)
}
pub fn from_proven_compact_ciphertext_list(