mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-08 22:28:01 -05:00
feat(gpu): create noise and pfail tests cpk
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
|
||||
#include "../keyswitch/ks_enums.h"
|
||||
#include "../pbs/pbs_enums.h"
|
||||
#include "zk_enums.h"
|
||||
#include <stdint.h>
|
||||
|
||||
extern "C" {
|
||||
@@ -16,7 +17,7 @@ uint64_t scratch_cuda_expand_without_verification_64(
|
||||
uint32_t grouping_factor, const uint32_t *num_lwes_per_compact_list,
|
||||
const bool *is_boolean_array, uint32_t num_compact_lists,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
KS_TYPE casting_key_type, bool allocate_gpu_memory,
|
||||
KS_TYPE casting_key_type, bool allocate_gpu_memory, EXPAND_KIND expand_kind,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_expand_without_verification_64(
|
||||
|
||||
7
backends/tfhe-cuda-backend/cuda/include/zk/zk_enums.h
Normal file
7
backends/tfhe-cuda-backend/cuda/include/zk/zk_enums.h
Normal file
@@ -0,0 +1,7 @@
|
||||
#ifndef CUDA_ZK_ENUMS_H
|
||||
#define CUDA_ZK_ENUMS_H
|
||||
#include <stdint.h>
|
||||
// Additional to the two kinds of expand (no_casting and casting), we have a
|
||||
// third that is used only in the noise tests
|
||||
enum EXPAND_KIND { NO_CASTING = 0, CASTING = 1, SANITY_CHECK = 2 };
|
||||
#endif // CUDA_ZK_ENUMS_H
|
||||
@@ -1,6 +1,5 @@
|
||||
#ifndef ZK_UTILITIES_H
|
||||
#define ZK_UTILITIES_H
|
||||
|
||||
#include "../integer/integer_utilities.h"
|
||||
#include "integer/integer.cuh"
|
||||
#include <cstdint>
|
||||
@@ -103,6 +102,7 @@ template <typename Torus> struct zk_expand_mem {
|
||||
uint32_t num_compact_lists;
|
||||
|
||||
int_radix_lut<Torus> *message_and_carry_extract_luts;
|
||||
int_radix_lut<Torus> *identity_lut;
|
||||
|
||||
Torus *tmp_expanded_lwes;
|
||||
Torus *tmp_ksed_small_to_big_expanded_lwes;
|
||||
@@ -113,15 +113,17 @@ template <typename Torus> struct zk_expand_mem {
|
||||
expand_job<Torus> *d_expand_jobs;
|
||||
expand_job<Torus> *h_expand_jobs;
|
||||
|
||||
EXPAND_KIND expand_kind;
|
||||
|
||||
zk_expand_mem(CudaStreams streams, int_radix_params computing_params,
|
||||
int_radix_params casting_params, KS_TYPE casting_key_type,
|
||||
const uint32_t *num_lwes_per_compact_list,
|
||||
const bool *is_boolean_array, uint32_t num_compact_lists,
|
||||
bool allocate_gpu_memory, uint64_t &size_tracker)
|
||||
bool allocate_gpu_memory, uint64_t &size_tracker,
|
||||
EXPAND_KIND expand_kind_in)
|
||||
: computing_params(computing_params), casting_params(casting_params),
|
||||
num_compact_lists(num_compact_lists),
|
||||
casting_key_type(casting_key_type) {
|
||||
|
||||
casting_key_type(casting_key_type), expand_kind(expand_kind_in) {
|
||||
gpu_memory_allocated = allocate_gpu_memory;
|
||||
|
||||
// We copy num_lwes_per_compact_list so we get protection against
|
||||
@@ -136,10 +138,27 @@ template <typename Torus> struct zk_expand_mem {
|
||||
num_lwes += this->num_lwes_per_compact_list[i];
|
||||
}
|
||||
|
||||
if (computing_params.carry_modulus != computing_params.message_modulus) {
|
||||
if (computing_params.carry_modulus != computing_params.message_modulus &&
|
||||
expand_kind == EXPAND_KIND::CASTING) {
|
||||
PANIC("GPU backend requires carry_modulus equal to message_modulus")
|
||||
}
|
||||
|
||||
// We create the identity LUT only if we are doing a SANITY_CHECK
|
||||
if (expand_kind == EXPAND_KIND::SANITY_CHECK) {
|
||||
identity_lut =
|
||||
new int_radix_lut<Torus>(streams, computing_params, 1, 2 * num_lwes,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
|
||||
auto identity_lut_f = [](Torus x) -> Torus { return x; };
|
||||
|
||||
generate_device_accumulator<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), identity_lut->get_lut(0, 0),
|
||||
identity_lut->get_degree(0), identity_lut->get_max_degree(0),
|
||||
casting_params.glwe_dimension, casting_params.polynomial_size,
|
||||
casting_params.message_modulus, casting_params.carry_modulus,
|
||||
identity_lut_f, gpu_memory_allocated);
|
||||
}
|
||||
|
||||
auto message_extract_lut_f = [casting_params](Torus x) -> Torus {
|
||||
return x % casting_params.message_modulus;
|
||||
};
|
||||
@@ -317,6 +336,11 @@ template <typename Torus> struct zk_expand_mem {
|
||||
message_and_carry_extract_luts->release(streams);
|
||||
delete message_and_carry_extract_luts;
|
||||
|
||||
if (expand_kind == EXPAND_KIND::SANITY_CHECK) {
|
||||
identity_lut->release(streams);
|
||||
delete identity_lut;
|
||||
}
|
||||
|
||||
cuda_drop_with_size_tracking_async(tmp_expanded_lwes, streams.stream(0),
|
||||
streams.gpu_index(0),
|
||||
gpu_memory_allocated);
|
||||
|
||||
@@ -10,7 +10,7 @@ uint64_t scratch_cuda_expand_without_verification_64(
|
||||
uint32_t grouping_factor, const uint32_t *num_lwes_per_compact_list,
|
||||
const bool *is_boolean_array, uint32_t num_compact_lists,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
KS_TYPE casting_key_type, bool allocate_gpu_memory,
|
||||
KS_TYPE casting_key_type, bool allocate_gpu_memory, EXPAND_KIND expand_kind,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
// Since CUDA backend works with the concept of "big" and "small" key, instead
|
||||
@@ -37,7 +37,8 @@ uint64_t scratch_cuda_expand_without_verification_64(
|
||||
CudaStreams(streams),
|
||||
reinterpret_cast<zk_expand_mem<uint64_t> **>(mem_ptr),
|
||||
num_lwes_per_compact_list, is_boolean_array, num_compact_lists,
|
||||
computing_params, casting_params, casting_key_type, allocate_gpu_memory);
|
||||
computing_params, casting_params, casting_key_type, allocate_gpu_memory,
|
||||
expand_kind);
|
||||
}
|
||||
|
||||
void cuda_expand_without_verification_64(
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "utils/helper.cuh"
|
||||
#include "utils/helper_multi_gpu.cuh"
|
||||
#include "utils/kernel_dimensions.cuh"
|
||||
#include "zk/zk_enums.h"
|
||||
#include "zk/zk_utilities.h"
|
||||
#include <functional>
|
||||
|
||||
@@ -54,15 +55,24 @@ __host__ void host_expand_without_verification(
|
||||
compact_lwe_lists.total_num_lwes * sizeof(expand_job<Torus>),
|
||||
streams.stream(0), streams.gpu_index(0), true);
|
||||
|
||||
if (mem_ptr->expand_kind == EXPAND_KIND::NO_CASTING) {
|
||||
host_lwe_expand<Torus, params>(streams.stream(0), streams.gpu_index(0),
|
||||
lwe_array_out, d_expand_jobs, num_lwes);
|
||||
return;
|
||||
}
|
||||
|
||||
host_lwe_expand<Torus, params>(streams.stream(0), streams.gpu_index(0),
|
||||
expanded_lwes, d_expand_jobs, num_lwes);
|
||||
|
||||
auto ksks = casting_keys;
|
||||
auto lwe_array_input = expanded_lwes;
|
||||
auto ksks = casting_keys;
|
||||
auto message_and_carry_extract_luts = mem_ptr->message_and_carry_extract_luts;
|
||||
|
||||
auto lut = mem_ptr->message_and_carry_extract_luts;
|
||||
if (casting_key_type == SMALL_TO_BIG) {
|
||||
if (mem_ptr->expand_kind == EXPAND_KIND::SANITY_CHECK) {
|
||||
PANIC("SANITY_CHECK not supported for SMALL_TO_BIG casting");
|
||||
}
|
||||
// Keyswitch from small to big key if needed
|
||||
auto ksed_small_to_big_expanded_lwes =
|
||||
mem_ptr->tmp_ksed_small_to_big_expanded_lwes;
|
||||
@@ -95,6 +105,17 @@ __host__ void host_expand_without_verification(
|
||||
into_radix_ciphertext(output, lwe_array_out, 2 * num_lwes, lwe_dimension);
|
||||
auto input = new CudaRadixCiphertextFFI;
|
||||
into_radix_ciphertext(input, lwe_array_input, 2 * num_lwes, lwe_dimension);
|
||||
|
||||
// This is a special case only for our noise sanity checks
|
||||
// If we are doing a SANITY_CHECK expand, we just apply the identity LUT
|
||||
// This replicates the CPU fallback behaviour of the casting expand
|
||||
if (mem_ptr->expand_kind == EXPAND_KIND::SANITY_CHECK) {
|
||||
integer_radix_apply_univariate_lookup_table<Torus>(
|
||||
streams, output, input, bsks, ksks, mem_ptr->identity_lut,
|
||||
2 * num_lwes);
|
||||
return;
|
||||
}
|
||||
|
||||
integer_radix_apply_univariate_lookup_table<Torus>(
|
||||
streams, output, input, bsks, ksks, message_and_carry_extract_luts,
|
||||
2 * num_lwes);
|
||||
@@ -106,13 +127,13 @@ __host__ uint64_t scratch_cuda_expand_without_verification(
|
||||
const uint32_t *num_lwes_per_compact_list, const bool *is_boolean_array,
|
||||
uint32_t num_compact_lists, int_radix_params computing_params,
|
||||
int_radix_params casting_params, KS_TYPE casting_key_type,
|
||||
bool allocate_gpu_memory) {
|
||||
bool allocate_gpu_memory, EXPAND_KIND expand_kind) {
|
||||
|
||||
uint64_t size_tracker = 0;
|
||||
*mem_ptr = new zk_expand_mem<Torus>(
|
||||
streams, computing_params, casting_params, casting_key_type,
|
||||
num_lwes_per_compact_list, is_boolean_array, num_compact_lists,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
allocate_gpu_memory, size_tracker, expand_kind);
|
||||
return size_tracker;
|
||||
}
|
||||
|
||||
|
||||
@@ -2471,6 +2471,10 @@ unsafe extern "C" {
|
||||
pub const KS_TYPE_BIG_TO_SMALL: KS_TYPE = 0;
|
||||
pub const KS_TYPE_SMALL_TO_BIG: KS_TYPE = 1;
|
||||
pub type KS_TYPE = ffi::c_uint;
|
||||
pub const EXPAND_KIND_NO_CASTING: EXPAND_KIND = 0;
|
||||
pub const EXPAND_KIND_CASTING: EXPAND_KIND = 1;
|
||||
pub const EXPAND_KIND_SANITY_CHECK: EXPAND_KIND = 2;
|
||||
pub type EXPAND_KIND = ffi::c_uint;
|
||||
unsafe extern "C" {
|
||||
pub fn scratch_cuda_expand_without_verification_64(
|
||||
streams: CudaStreamsFFI,
|
||||
@@ -2496,6 +2500,7 @@ unsafe extern "C" {
|
||||
pbs_type: PBS_TYPE,
|
||||
casting_key_type: KS_TYPE,
|
||||
allocate_gpu_memory: bool,
|
||||
expand_kind: EXPAND_KIND,
|
||||
noise_reduction_type: PBS_MS_REDUCTION_T,
|
||||
) -> u64;
|
||||
}
|
||||
|
||||
@@ -350,7 +350,8 @@ impl CompactCiphertextList {
|
||||
.unwrap(),
|
||||
dest_server_key: &cuda_key.key.key,
|
||||
};
|
||||
let expander = gpu_inner.expand(&ksk, streams)?;
|
||||
let expander =
|
||||
gpu_inner.expand(&ksk, crate::integer::gpu::ZKType::Casting, streams)?;
|
||||
|
||||
Ok(CompactCiphertextListExpander {
|
||||
inner: InnerCompactCiphertextListExpander::Cuda(expander),
|
||||
@@ -390,7 +391,8 @@ impl CompactCiphertextList {
|
||||
dest_server_key: &cuda_key.key.key,
|
||||
};
|
||||
let streams = &cuda_key.streams;
|
||||
let expander = gpu_inner.expand(&ksk, streams)?;
|
||||
let expander =
|
||||
gpu_inner.expand(&ksk, crate::integer::gpu::ZKType::Casting, streams)?;
|
||||
|
||||
Ok(CompactCiphertextListExpander {
|
||||
inner: InnerCompactCiphertextListExpander::Cuda(expander),
|
||||
|
||||
@@ -21,7 +21,6 @@ use crate::GpuIndex;
|
||||
use itertools::Itertools;
|
||||
use serde::Deserializer;
|
||||
use tfhe_cuda_backend::cuda_bind::cuda_memcpy_async_to_gpu;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct CudaCompactCiphertextListInfo {
|
||||
pub info: CudaBlockInfo,
|
||||
@@ -377,6 +376,7 @@ impl CudaFlattenedVecCompactCiphertextList {
|
||||
pub fn expand(
|
||||
&self,
|
||||
key: &CudaKeySwitchingKey,
|
||||
zk_type: crate::integer::gpu::ZKType,
|
||||
streams: &CudaStreams,
|
||||
) -> crate::Result<CudaCompactCiphertextListExpander> {
|
||||
assert!(
|
||||
@@ -441,6 +441,7 @@ impl CudaFlattenedVecCompactCiphertextList {
|
||||
LweBskGroupingFactor(0),
|
||||
self.num_lwe_per_compact_list.as_slice(),
|
||||
self.is_boolean.as_slice(),
|
||||
zk_type,
|
||||
d_bsk.ms_noise_reduction_configuration.as_ref(),
|
||||
);
|
||||
}
|
||||
@@ -476,6 +477,7 @@ impl CudaFlattenedVecCompactCiphertextList {
|
||||
d_multibit_bsk.grouping_factor,
|
||||
self.num_lwe_per_compact_list.as_slice(),
|
||||
self.is_boolean.as_slice(),
|
||||
zk_type,
|
||||
None,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -80,6 +80,12 @@ pub enum ComparisonType {
|
||||
MAX = 6,
|
||||
MIN = 7,
|
||||
}
|
||||
#[repr(u32)]
|
||||
pub enum ZKType {
|
||||
NoCasting = 0,
|
||||
Casting = 1,
|
||||
SanityCheck = 2,
|
||||
}
|
||||
|
||||
fn resolve_noise_reduction_type(
|
||||
ms_noise_reduction_configuration: Option<&CudaModulusSwitchNoiseReductionConfiguration>,
|
||||
@@ -7597,6 +7603,7 @@ pub(crate) unsafe fn cuda_backend_expand<T: UnsignedInteger, B: Numeric>(
|
||||
grouping_factor: LweBskGroupingFactor,
|
||||
num_lwes_per_compact_list: &[u32],
|
||||
is_boolean: &[bool],
|
||||
zk_type: ZKType,
|
||||
ms_noise_reduction_configuration: Option<&CudaModulusSwitchNoiseReductionConfiguration>,
|
||||
) {
|
||||
assert_eq!(
|
||||
@@ -7665,6 +7672,7 @@ pub(crate) unsafe fn cuda_backend_expand<T: UnsignedInteger, B: Numeric>(
|
||||
pbs_type as u32,
|
||||
casting_key_type as u32,
|
||||
true,
|
||||
zk_type as u32,
|
||||
noise_reduction_type as u32,
|
||||
);
|
||||
cuda_expand_without_verification_64(
|
||||
@@ -10218,12 +10226,13 @@ pub unsafe fn unchecked_small_scalar_mul_integer_async(
|
||||
|
||||
cuda_small_scalar_multiplication_integer_64_inplace(
|
||||
streams.ffi(),
|
||||
&raw mut cuda_ffi_lwe_array,
|
||||
&mut cuda_ffi_lwe_array,
|
||||
small_scalar,
|
||||
message_modulus.0 as u32,
|
||||
carry_modulus.0 as u32,
|
||||
);
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
/// # Safety
|
||||
///
|
||||
|
||||
@@ -0,0 +1,818 @@
|
||||
use crate::integer::gpu::ciphertext::compact_list::CudaFlattenedVecCompactCiphertextList;
|
||||
|
||||
use crate::core_crypto::commons::parameters::CiphertextModulusLog;
|
||||
use crate::shortint::client_key::atomic_pattern::AtomicPatternClientKey;
|
||||
use crate::shortint::encoding::ShortintEncoding;
|
||||
use crate::shortint::engine::ShortintEngine;
|
||||
use crate::shortint::parameters::test_params::TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128;
|
||||
use crate::shortint::parameters::{
|
||||
AtomicPatternParameters, CarryModulus, CompactCiphertextListExpansionKind,
|
||||
CompactPublicKeyEncryptionParameters, MetaParameters, ShortintCompactCiphertextListCastingMode,
|
||||
ShortintKeySwitchingParameters,
|
||||
};
|
||||
use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::{
|
||||
DynLwe, NoiseSimulationLwe, NoiseSimulationLweKeyswitchKey, NoiseSimulationModulusSwitchConfig,
|
||||
};
|
||||
use crate::shortint::server_key::tests::noise_distribution::utils::{
|
||||
mean_and_variance_check, normality_check, pfail_check, update_ap_params_for_pfail,
|
||||
DecryptionAndNoiseResult, NoiseSample, PfailTestMeta, PfailTestResult,
|
||||
};
|
||||
use crate::shortint::server_key::tests::noise_distribution::{
|
||||
should_run_short_pfail_tests_debug, should_use_single_key_debug,
|
||||
};
|
||||
|
||||
use crate::shortint::server_key::tests::parameterized_test::create_parameterized_test;
|
||||
use crate::shortint::PaddingBit;
|
||||
use rayon::prelude::*;
|
||||
use crate::integer::gpu::server_key::radix::CudaUnsignedRadixCiphertext;
|
||||
use crate::integer::gpu::CudaServerKey;
|
||||
use crate::integer::{ClientKey, CompressedServerKey};
|
||||
use crate::GpuIndex;
|
||||
use crate::core_crypto::gpu::{CudaSideResources, CudaStreams};
|
||||
use crate::shortint::ShortintParameterSet;
|
||||
use crate::integer::gpu::key_switching_key::CudaKeySwitchingKey;
|
||||
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
|
||||
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
|
||||
use crate::shortint::server_key::tests::noise_distribution::cpk_ks_ms::cpk_ks_any_ms;
|
||||
use crate::integer::gpu::server_key::radix::tests_noise_distribution::utils::noise_simulation::CudaDynLwe;
|
||||
use crate::core_crypto::commons::noise_formulas::noise_simulation::traits::lwe_programmable_bootstrap::LweClassicFftBootstrap;
|
||||
use crate::core_crypto::commons::noise_formulas::noise_simulation::traits::lwe_programmable_bootstrap::AllocateLweBootstrapResult;
|
||||
use crate::core_crypto::commons::noise_formulas::noise_simulation::traits::{
|
||||
AllocateLweKeyswitchResult, LweKeyswitch,
|
||||
};
|
||||
use crate::core_crypto::gpu::algorithms::lwe_keyswitch::cuda_keyswitch_lwe_ciphertext;
|
||||
use crate::core_crypto::gpu::vec::CudaVec;
|
||||
use crate::core_crypto::prelude::LweCiphertextCount;
|
||||
use crate::shortint::key_switching_key::CudaKeySwitchingKeyMaterial;
|
||||
use crate::integer::key_switching_key::KeySwitchingKey;
|
||||
use crate::integer::{CompactPublicKey, CompactPrivateKey};
|
||||
use crate::core_crypto::prelude::LweCiphertext;
|
||||
use crate::integer::ciphertext::DataKind;
|
||||
use std::num::NonZeroUsize;
|
||||
use crate::integer::gpu::server_key::radix::tests_noise_distribution::utils::key_switching_test_utils::new_key_switching_key_for_pfail_test;
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn cpk_ks_any_ms_inner_helper_gpu(
|
||||
params: AtomicPatternParameters,
|
||||
cpk_params: CompactPublicKeyEncryptionParameters,
|
||||
ksk_ds_params: ShortintKeySwitchingParameters,
|
||||
single_cpk_private_key: &CompactPrivateKey<Vec<u64>>,
|
||||
single_cpk: &CompactPublicKey,
|
||||
single_cuda_ksk: &CudaKeySwitchingKey<'_>,
|
||||
single_cks: &ClientKey,
|
||||
single_cuda_sks: &CudaServerKey,
|
||||
msg: u64,
|
||||
br_input_modulus_log: CiphertextModulusLog,
|
||||
streams: &CudaStreams,
|
||||
) -> (
|
||||
DecryptionAndNoiseResult,
|
||||
DecryptionAndNoiseResult,
|
||||
DecryptionAndNoiseResult,
|
||||
DecryptionAndNoiseResult,
|
||||
) {
|
||||
let mut engine = ShortintEngine::new();
|
||||
let thread_cpk_private_key;
|
||||
let thread_cpk;
|
||||
let thread_cuda_ksk;
|
||||
let thread_cks;
|
||||
let thread_sks;
|
||||
let thread_cuda_sks;
|
||||
let thread_cuda_ksk_material;
|
||||
let (cpk_private_key, cpk, cuda_ksk, cks, cuda_sks) = if should_use_single_key_debug() {
|
||||
(
|
||||
single_cpk_private_key,
|
||||
single_cpk,
|
||||
single_cuda_ksk,
|
||||
single_cks,
|
||||
single_cuda_sks,
|
||||
)
|
||||
} else {
|
||||
thread_cpk_private_key = CompactPrivateKey::new(cpk_params);
|
||||
thread_cpk = CompactPublicKey::new(&thread_cpk_private_key);
|
||||
|
||||
let block_params: ShortintParameterSet = params.into();
|
||||
thread_cks = crate::integer::ClientKey::new(block_params);
|
||||
let compressed_server_key =
|
||||
CompressedServerKey::new_radix_compressed_server_key(&thread_cks);
|
||||
thread_sks = compressed_server_key.decompress();
|
||||
thread_cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
|
||||
let ksk = new_key_switching_key_for_pfail_test(
|
||||
(&thread_cpk_private_key, None),
|
||||
(&thread_cks, &thread_sks),
|
||||
ksk_ds_params,
|
||||
);
|
||||
thread_cuda_ksk_material =
|
||||
CudaKeySwitchingKeyMaterial::from_key_switching_key(&ksk, &streams);
|
||||
thread_cuda_ksk = CudaKeySwitchingKey::from_cuda_key_switching_key_material(
|
||||
&thread_cuda_ksk_material,
|
||||
&thread_cuda_sks,
|
||||
);
|
||||
|
||||
(
|
||||
&thread_cpk_private_key,
|
||||
&thread_cpk,
|
||||
&thread_cuda_ksk,
|
||||
&thread_cks,
|
||||
&thread_cuda_sks,
|
||||
)
|
||||
};
|
||||
|
||||
//let br_input_modulus_log = sks.br_input_modulus_log();
|
||||
let modulus_switch_config = cuda_sks.noise_simulation_modulus_switch_config();
|
||||
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
|
||||
degree: crate::shortint::ciphertext::Degree::new(1),
|
||||
message_modulus: params.message_modulus(),
|
||||
carry_modulus: params.carry_modulus(),
|
||||
atomic_pattern: crate::shortint::AtomicPatternKind::Standard(
|
||||
crate::shortint::PBSOrder::KeyswitchBootstrap,
|
||||
),
|
||||
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
|
||||
};
|
||||
let mut cuda_side_resources = CudaSideResources::new(streams, cuda_block_info);
|
||||
let ct = {
|
||||
let compact_list = cpk.key.encrypt_iter_with_modulus_with_engine(
|
||||
core::iter::once(msg),
|
||||
cpk.key.parameters.message_modulus.0,
|
||||
&mut engine,
|
||||
);
|
||||
|
||||
let num_blocks = 1usize;
|
||||
|
||||
let data_info = vec![DataKind::Unsigned(NonZeroUsize::new(num_blocks).unwrap())];
|
||||
let cuda_casting_compact_list =
|
||||
CudaFlattenedVecCompactCiphertextList::from_vec_shortint_compact_ciphertext_list(
|
||||
vec![compact_list.clone()],
|
||||
data_info,
|
||||
&cuda_side_resources.streams,
|
||||
);
|
||||
let cuda_compact_list_expander = cuda_casting_compact_list
|
||||
.expand(
|
||||
&cuda_ksk,
|
||||
crate::integer::gpu::ZKType::NoCasting,
|
||||
&cuda_side_resources.streams,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let cuda_expanded_ct: CudaUnsignedRadixCiphertext = cuda_compact_list_expander
|
||||
.get(0usize, &cuda_side_resources.streams)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
CudaDynLwe::U64(cuda_expanded_ct.ciphertext.d_blocks)
|
||||
};
|
||||
|
||||
let (input_gpu, after_ks_ds_gpu, after_drift_gpu, after_ms_gpu) = cpk_ks_any_ms(
|
||||
ct,
|
||||
cuda_ksk,
|
||||
modulus_switch_config,
|
||||
br_input_modulus_log,
|
||||
&mut cuda_side_resources,
|
||||
);
|
||||
let input_list = input_gpu
|
||||
.as_lwe_64()
|
||||
.to_lwe_ciphertext_list(&cuda_side_resources.streams);
|
||||
let input_ct = LweCiphertext::from_container(
|
||||
input_list.clone().into_container(),
|
||||
input_list.ciphertext_modulus(),
|
||||
);
|
||||
let input = DynLwe::U64(input_ct);
|
||||
|
||||
let after_ks_ds_list = after_ks_ds_gpu
|
||||
.as_lwe_64()
|
||||
.to_lwe_ciphertext_list(&cuda_side_resources.streams);
|
||||
let after_ks_ds_ct = LweCiphertext::from_container(
|
||||
after_ks_ds_list.clone().into_container(),
|
||||
after_ks_ds_list.ciphertext_modulus(),
|
||||
);
|
||||
let after_ks_ds = DynLwe::U64(after_ks_ds_ct);
|
||||
|
||||
let before_ms_gpu: &CudaDynLwe = after_drift_gpu.as_ref().unwrap_or(&after_ks_ds_gpu);
|
||||
let before_ms_list = before_ms_gpu
|
||||
.as_lwe_64()
|
||||
.to_lwe_ciphertext_list(&cuda_side_resources.streams);
|
||||
let before_ms_ct = LweCiphertext::from_container(
|
||||
before_ms_list.clone().into_container(),
|
||||
before_ms_list.ciphertext_modulus(),
|
||||
);
|
||||
let before_ms = DynLwe::U64(before_ms_ct);
|
||||
let after_ms_list = after_ms_gpu
|
||||
.as_lwe_64()
|
||||
.to_lwe_ciphertext_list(&cuda_side_resources.streams);
|
||||
let after_ms_ct = LweCiphertext::from_container(
|
||||
after_ms_list.clone().into_container(),
|
||||
after_ms_list.ciphertext_modulus(),
|
||||
);
|
||||
let after_ms = DynLwe::U64(after_ms_ct);
|
||||
match &cks.key.atomic_pattern {
|
||||
AtomicPatternClientKey::Standard(standard_atomic_pattern_client_key) => {
|
||||
let params = standard_atomic_pattern_client_key.parameters;
|
||||
let encoding = ShortintEncoding {
|
||||
ciphertext_modulus: params.ciphertext_modulus(),
|
||||
message_modulus: params.message_modulus(),
|
||||
carry_modulus: params.carry_modulus(),
|
||||
padding_bit: PaddingBit::Yes,
|
||||
};
|
||||
|
||||
let cpk_lwe_secret_key = cpk_private_key.key.key();
|
||||
|
||||
let small_compute_lwe_secret_key =
|
||||
standard_atomic_pattern_client_key.small_lwe_secret_key();
|
||||
(
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&input.as_lwe_64(),
|
||||
&cpk_lwe_secret_key,
|
||||
msg,
|
||||
&encoding,
|
||||
),
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&after_ks_ds.as_lwe_64(),
|
||||
&small_compute_lwe_secret_key,
|
||||
msg,
|
||||
&encoding,
|
||||
),
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&before_ms.as_lwe_64(),
|
||||
&small_compute_lwe_secret_key,
|
||||
msg,
|
||||
&encoding,
|
||||
),
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&after_ms.as_lwe_64(),
|
||||
&small_compute_lwe_secret_key,
|
||||
msg,
|
||||
&encoding,
|
||||
),
|
||||
)
|
||||
}
|
||||
AtomicPatternClientKey::KeySwitch32(_ks32_atomic_pattern_client_key) => {
|
||||
panic!("KeySwitch32 atomic pattern is not supported on GPU yet");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn cpk_ks_any_ms_noise_helper_gpu(
|
||||
params: AtomicPatternParameters,
|
||||
cpk_params: CompactPublicKeyEncryptionParameters,
|
||||
ksk_ds_params: ShortintKeySwitchingParameters,
|
||||
single_cpk_private_key: &CompactPrivateKey<Vec<u64>>,
|
||||
single_cpk: &CompactPublicKey,
|
||||
single_cuda_ksk_ds: &CudaKeySwitchingKey<'_>,
|
||||
single_cks: &ClientKey,
|
||||
single_cuda_sks: &CudaServerKey,
|
||||
msg: u64,
|
||||
br_input_modulus_log: CiphertextModulusLog,
|
||||
streams: &CudaStreams,
|
||||
) -> (NoiseSample, NoiseSample, NoiseSample, NoiseSample) {
|
||||
let (input, after_ks_ds, before_ms, after_ms) = cpk_ks_any_ms_inner_helper_gpu(
|
||||
params,
|
||||
cpk_params,
|
||||
ksk_ds_params,
|
||||
single_cpk_private_key,
|
||||
single_cpk,
|
||||
single_cuda_ksk_ds,
|
||||
single_cks,
|
||||
single_cuda_sks,
|
||||
msg,
|
||||
br_input_modulus_log,
|
||||
streams,
|
||||
);
|
||||
|
||||
(
|
||||
input
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
after_ks_ds
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
before_ms
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
after_ms
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn cpk_ks_any_ms_pfail_helper_gpu(
|
||||
params: AtomicPatternParameters,
|
||||
cpk_params: CompactPublicKeyEncryptionParameters,
|
||||
ksk_ds_params: ShortintKeySwitchingParameters,
|
||||
single_cpk_private_key: &CompactPrivateKey<Vec<u64>>,
|
||||
single_cpk: &CompactPublicKey,
|
||||
single_cuda_ksk_ds: &CudaKeySwitchingKey<'_>,
|
||||
single_cks: &ClientKey,
|
||||
single_cuda_sks: &CudaServerKey,
|
||||
msg: u64,
|
||||
br_input_modulus_log: CiphertextModulusLog,
|
||||
streams: &CudaStreams,
|
||||
) -> DecryptionAndNoiseResult {
|
||||
let (_input, _after_ks_ds, _before_ms, after_ms) = cpk_ks_any_ms_inner_helper_gpu(
|
||||
params,
|
||||
cpk_params,
|
||||
ksk_ds_params,
|
||||
single_cpk_private_key,
|
||||
single_cpk,
|
||||
single_cuda_ksk_ds,
|
||||
single_cks,
|
||||
single_cuda_sks,
|
||||
msg,
|
||||
br_input_modulus_log,
|
||||
streams,
|
||||
);
|
||||
|
||||
after_ms
|
||||
}
|
||||
|
||||
fn noise_check_encrypt_cpk_ks_ms_noise_gpu(meta_params: MetaParameters) {
|
||||
let (params, cpk_params, ksk_ds_params) = {
|
||||
let compute_params = meta_params.compute_parameters;
|
||||
let dedicated_cpk_params = meta_params.dedicated_compact_public_key_parameters.unwrap();
|
||||
// To avoid the expand logic of shortint which would force a keyswitch + LUT eval after
|
||||
// expand
|
||||
let cpk_params = {
|
||||
let mut cpk_params = dedicated_cpk_params.pke_params;
|
||||
cpk_params.expansion_kind = CompactCiphertextListExpansionKind::NoCasting(
|
||||
compute_params.encryption_key_choice().into_pbs_order(),
|
||||
);
|
||||
cpk_params
|
||||
};
|
||||
|
||||
(compute_params, cpk_params, dedicated_cpk_params.ksk_params)
|
||||
};
|
||||
let gpu_index = 0;
|
||||
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
let cpk_private_key = CompactPrivateKey::new(cpk_params);
|
||||
let cpk = CompactPublicKey::new(&cpk_private_key);
|
||||
|
||||
let block_params: ShortintParameterSet = params.into();
|
||||
let cks = crate::integer::ClientKey::new(block_params);
|
||||
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
|
||||
let sks = compressed_server_key.decompress();
|
||||
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
|
||||
let ksk = KeySwitchingKey::new((&cpk_private_key, None), (&cks, &sks), ksk_ds_params);
|
||||
let cuda_ksk_material = CudaKeySwitchingKeyMaterial::from_key_switching_key(&ksk, &streams);
|
||||
let cuda_ksk =
|
||||
CudaKeySwitchingKey::from_cuda_key_switching_key_material(&cuda_ksk_material, &cuda_sks);
|
||||
|
||||
let noise_simulation_ksk =
|
||||
NoiseSimulationLweKeyswitchKey::new_from_atomic_pattern_parameters(params);
|
||||
let noise_simulation_ksk_ds =
|
||||
NoiseSimulationLweKeyswitchKey::new_from_cpk_params(cpk_params, ksk_ds_params, params);
|
||||
let noise_simulation_modulus_switch_config =
|
||||
NoiseSimulationModulusSwitchConfig::new_from_atomic_pattern_parameters(params);
|
||||
|
||||
let modulus_switch_config = sks.key.noise_simulation_modulus_switch_config();
|
||||
let cuda_modulus_switch_config = cuda_sks.noise_simulation_modulus_switch_config();
|
||||
let compute_br_input_modulus_log = sks.key.br_input_modulus_log();
|
||||
let expected_average_after_ms =
|
||||
modulus_switch_config.expected_average_after_ms(params.polynomial_size());
|
||||
|
||||
assert!(noise_simulation_ksk.matches_actual_shortint_server_key(&sks.key));
|
||||
assert!(noise_simulation_ksk_ds.matches_actual_shortint_keyswitching_key(&ksk.key.as_view()));
|
||||
assert!(noise_simulation_modulus_switch_config
|
||||
.matches_shortint_server_key_modulus_switch_config(modulus_switch_config));
|
||||
|
||||
let (_input_sim, _after_ks_ds_sim, _after_drift_sim, after_ms_sim) = {
|
||||
let noise_simulation_input = NoiseSimulationLwe::encrypt_with_cpk(&cpk.key);
|
||||
cpk_ks_any_ms(
|
||||
noise_simulation_input,
|
||||
&noise_simulation_ksk_ds,
|
||||
noise_simulation_modulus_switch_config.as_ref(),
|
||||
compute_br_input_modulus_log,
|
||||
&mut (),
|
||||
)
|
||||
};
|
||||
|
||||
let sample_input = {
|
||||
let compact_list = cpk.key.encrypt_slice(&[0]);
|
||||
let mut expanded = compact_list
|
||||
.expand(ShortintCompactCiphertextListCastingMode::NoCasting)
|
||||
.unwrap();
|
||||
assert_eq!(expanded.len(), 1);
|
||||
|
||||
DynLwe::U64(expanded.pop().unwrap().ct)
|
||||
};
|
||||
let d_ct_input =
|
||||
CudaLweCiphertextList::from_lwe_ciphertext(&sample_input.as_lwe_64(), &streams);
|
||||
let gpu_sample_input = CudaDynLwe::U64(d_ct_input);
|
||||
|
||||
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
|
||||
degree: crate::shortint::ciphertext::Degree::new(1),
|
||||
message_modulus: params.message_modulus(),
|
||||
carry_modulus: params.carry_modulus(),
|
||||
atomic_pattern: crate::shortint::AtomicPatternKind::Standard(
|
||||
crate::shortint::PBSOrder::KeyswitchBootstrap,
|
||||
),
|
||||
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
|
||||
};
|
||||
let mut cuda_side_resources = CudaSideResources::new(&streams, cuda_block_info);
|
||||
// Check that the circuit is correct with respect to core implementation, i.e. does not crash on
|
||||
// dimension checks
|
||||
let (expected_lwe_dimension_out, expected_modulus_f64_out) = {
|
||||
let (_input, _after_ks_ds, _before_ms, after_ms) = cpk_ks_any_ms(
|
||||
gpu_sample_input,
|
||||
&cuda_ksk,
|
||||
cuda_modulus_switch_config,
|
||||
compute_br_input_modulus_log,
|
||||
&mut cuda_side_resources,
|
||||
);
|
||||
|
||||
(after_ms.lwe_dimension(), after_ms.raw_modulus_float())
|
||||
};
|
||||
|
||||
assert_eq!(after_ms_sim.lwe_dimension(), expected_lwe_dimension_out);
|
||||
assert_eq!(after_ms_sim.modulus().as_f64(), expected_modulus_f64_out);
|
||||
|
||||
let cleartext_modulus = params.message_modulus().0 * params.carry_modulus().0;
|
||||
let mut noise_samples_before_ms = vec![];
|
||||
let mut noise_samples_after_ms = vec![];
|
||||
|
||||
let sample_count_per_msg = 1000usize;
|
||||
let chunk_size = 8;
|
||||
let vec_local_streams = (0..chunk_size)
|
||||
.map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for _ in 0..cleartext_modulus {
|
||||
let (current_noise_sample_before_ms, current_noise_samples_after_ms): (Vec<_>, Vec<_>) = (0
|
||||
..sample_count_per_msg)
|
||||
.collect::<Vec<_>>()
|
||||
.chunks(chunk_size)
|
||||
.flat_map(|chunk| {
|
||||
chunk
|
||||
.iter()
|
||||
.collect::<Vec<_>>()
|
||||
.into_par_iter()
|
||||
.map(|i| {
|
||||
let local_stream = &vec_local_streams[*i % chunk_size];
|
||||
let (_input, _after_ks_ds, before_ms, after_ms) =
|
||||
cpk_ks_any_ms_noise_helper_gpu(
|
||||
params,
|
||||
cpk_params,
|
||||
ksk_ds_params,
|
||||
&cpk_private_key,
|
||||
&cpk,
|
||||
&cuda_ksk,
|
||||
&cks,
|
||||
&cuda_sks,
|
||||
0,
|
||||
compute_br_input_modulus_log,
|
||||
local_stream,
|
||||
);
|
||||
(before_ms.value, after_ms.value)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.unzip();
|
||||
|
||||
noise_samples_before_ms.extend(current_noise_sample_before_ms);
|
||||
noise_samples_after_ms.extend(current_noise_samples_after_ms);
|
||||
}
|
||||
|
||||
let before_ms_normality = normality_check(&noise_samples_before_ms, "before ms", 0.01);
|
||||
|
||||
let after_ms_is_ok = mean_and_variance_check(
|
||||
&noise_samples_after_ms,
|
||||
"after_ms",
|
||||
expected_average_after_ms,
|
||||
after_ms_sim.variance(),
|
||||
params.lwe_noise_distribution(),
|
||||
after_ms_sim.lwe_dimension(),
|
||||
after_ms_sim.modulus().as_f64(),
|
||||
);
|
||||
|
||||
assert!(before_ms_normality.null_hypothesis_is_valid && after_ms_is_ok);
|
||||
}
|
||||
|
||||
create_parameterized_test!(noise_check_encrypt_cpk_ks_ms_noise_gpu {
|
||||
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
|
||||
});
|
||||
|
||||
fn noise_check_encrypt_cpk_ks_ms_pfail_gpu(meta_params: MetaParameters) {
|
||||
let (params, cpk_params, ksk_ds_params) = {
|
||||
let compute_params = meta_params.compute_parameters;
|
||||
let dedicated_cpk_params = meta_params.dedicated_compact_public_key_parameters.unwrap();
|
||||
// To avoid the expand logic of shortint which would force a keyswitch + LUT eval after
|
||||
// expand
|
||||
let cpk_params = {
|
||||
let mut cpk_params = dedicated_cpk_params.pke_params;
|
||||
cpk_params.expansion_kind = CompactCiphertextListExpansionKind::NoCasting(
|
||||
compute_params.encryption_key_choice().into_pbs_order(),
|
||||
);
|
||||
cpk_params
|
||||
};
|
||||
|
||||
(compute_params, cpk_params, dedicated_cpk_params.ksk_params)
|
||||
};
|
||||
|
||||
let (pfail_test_meta, params) = {
|
||||
let mut ap_params = params;
|
||||
|
||||
let original_message_modulus = ap_params.message_modulus();
|
||||
let original_carry_modulus = ap_params.carry_modulus();
|
||||
|
||||
// For now only allow 2_2 parameters, and see later for heuristics to use
|
||||
assert_eq!(original_message_modulus.0, 4);
|
||||
assert_eq!(original_carry_modulus.0, 4);
|
||||
|
||||
// Update parameters to fail more frequently by inflating the carry modulus, allows to keep
|
||||
// the max multiplication without risks of message overflow
|
||||
let (original_pfail_and_precision, new_expected_pfail_and_precision) =
|
||||
update_ap_params_for_pfail(
|
||||
&mut ap_params,
|
||||
original_message_modulus,
|
||||
CarryModulus(1 << 5),
|
||||
);
|
||||
|
||||
let pfail_test_meta = if should_run_short_pfail_tests_debug() {
|
||||
let expected_fails = 200;
|
||||
PfailTestMeta::new_with_desired_expected_fails(
|
||||
original_pfail_and_precision,
|
||||
new_expected_pfail_and_precision,
|
||||
expected_fails,
|
||||
)
|
||||
} else {
|
||||
let total_runs = 1_000_000;
|
||||
PfailTestMeta::new_with_total_runs(
|
||||
original_pfail_and_precision,
|
||||
new_expected_pfail_and_precision,
|
||||
total_runs,
|
||||
)
|
||||
};
|
||||
|
||||
(pfail_test_meta, ap_params)
|
||||
};
|
||||
let gpu_index = 0;
|
||||
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
let cpk_private_key = CompactPrivateKey::new(cpk_params);
|
||||
let cpk = CompactPublicKey::new(&cpk_private_key);
|
||||
|
||||
let block_params: ShortintParameterSet = params.into();
|
||||
let cks = crate::integer::ClientKey::new(block_params);
|
||||
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
|
||||
let sks = compressed_server_key.decompress();
|
||||
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
|
||||
let ksk =
|
||||
new_key_switching_key_for_pfail_test((&cpk_private_key, None), (&cks, &sks), ksk_ds_params);
|
||||
let cuda_ksk_material = CudaKeySwitchingKeyMaterial::from_key_switching_key(&ksk, &streams);
|
||||
let cuda_ksk =
|
||||
CudaKeySwitchingKey::from_cuda_key_switching_key_material(&cuda_ksk_material, &cuda_sks);
|
||||
|
||||
let total_runs_for_expected_fails = pfail_test_meta.total_runs_for_expected_fails();
|
||||
let chunk_size = 8;
|
||||
let vec_local_streams = (0..chunk_size)
|
||||
.map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
|
||||
.collect::<Vec<_>>();
|
||||
let measured_fails: f64 = (0..total_runs_for_expected_fails)
|
||||
.collect::<Vec<_>>()
|
||||
.chunks(chunk_size)
|
||||
.flat_map(|chunk| {
|
||||
chunk
|
||||
.iter()
|
||||
.collect::<Vec<_>>()
|
||||
.into_par_iter()
|
||||
.map(|i| {
|
||||
let local_stream = &vec_local_streams[*i as usize % chunk_size];
|
||||
let after_ms_decryption_result = cpk_ks_any_ms_pfail_helper_gpu(
|
||||
params,
|
||||
cpk_params,
|
||||
ksk_ds_params,
|
||||
&cpk_private_key,
|
||||
&cpk,
|
||||
&cuda_ksk,
|
||||
&cks,
|
||||
&cuda_sks,
|
||||
0,
|
||||
sks.key.br_input_modulus_log(),
|
||||
local_stream,
|
||||
);
|
||||
after_ms_decryption_result.failure_as_f64()
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.sum();
|
||||
|
||||
let test_result = PfailTestResult { measured_fails };
|
||||
|
||||
pfail_check(&pfail_test_meta, test_result);
|
||||
}
|
||||
|
||||
create_parameterized_test!(noise_check_encrypt_cpk_ks_ms_pfail_gpu {
|
||||
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
|
||||
});
|
||||
|
||||
fn sanity_check_encrypt_cpk_ks_ms_pbs_gpu(meta_params: MetaParameters) {
|
||||
let (params, cpk_params, ksk_ds_params) = {
|
||||
let compute_params = meta_params.compute_parameters;
|
||||
let dedicated_cpk_params = meta_params.dedicated_compact_public_key_parameters.unwrap();
|
||||
// To avoid the expand logic of shortint which would force a keyswitch + LUT eval after
|
||||
// expand
|
||||
let (cpk_params, orig_cast_mode) = {
|
||||
let mut cpk_params = dedicated_cpk_params.pke_params;
|
||||
let orig_cast_mode = cpk_params.expansion_kind;
|
||||
cpk_params.expansion_kind = CompactCiphertextListExpansionKind::NoCasting(
|
||||
compute_params.encryption_key_choice().into_pbs_order(),
|
||||
);
|
||||
(cpk_params, orig_cast_mode)
|
||||
};
|
||||
|
||||
assert!(matches!(
|
||||
orig_cast_mode,
|
||||
CompactCiphertextListExpansionKind::RequiresCasting
|
||||
));
|
||||
|
||||
(compute_params, cpk_params, dedicated_cpk_params.ksk_params)
|
||||
};
|
||||
let gpu_index = 0;
|
||||
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
let cpk_private_key = CompactPrivateKey::new(cpk_params);
|
||||
let cpk = CompactPublicKey::new(&cpk_private_key);
|
||||
|
||||
let block_params: ShortintParameterSet = params.into();
|
||||
let cks = crate::integer::ClientKey::new(block_params);
|
||||
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
|
||||
let sks = compressed_server_key.decompress();
|
||||
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
|
||||
let ksk = KeySwitchingKey::new((&cpk_private_key, None), (&cks, &sks), ksk_ds_params);
|
||||
let cuda_ksk_material = CudaKeySwitchingKeyMaterial::from_key_switching_key(&ksk, &streams);
|
||||
let cuda_ksk =
|
||||
CudaKeySwitchingKey::from_cuda_key_switching_key_material(&cuda_ksk_material, &cuda_sks);
|
||||
let modulus_switch_config = cuda_sks.noise_simulation_modulus_switch_config();
|
||||
let compute_br_input_modulus_log = sks.key.br_input_modulus_log();
|
||||
|
||||
let id_lut = cuda_sks.generate_lookup_table(|x| x);
|
||||
let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, &streams);
|
||||
|
||||
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
|
||||
degree: crate::shortint::ciphertext::Degree::new(1),
|
||||
message_modulus: params.message_modulus(),
|
||||
carry_modulus: params.carry_modulus(),
|
||||
atomic_pattern: crate::shortint::AtomicPatternKind::Standard(
|
||||
crate::shortint::PBSOrder::KeyswitchBootstrap,
|
||||
),
|
||||
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
|
||||
};
|
||||
let mut cuda_side_resources = CudaSideResources::new(&streams, cuda_block_info);
|
||||
|
||||
for _ in 0..10 {
|
||||
let (gpu_sample_input, shortint_res) = {
|
||||
let mut engine = ShortintEngine::new();
|
||||
let no_casting_compact_list = cpk.key.encrypt_iter_with_modulus_with_engine(
|
||||
core::iter::once(0),
|
||||
cpk.key.parameters.message_modulus.0,
|
||||
&mut engine,
|
||||
);
|
||||
|
||||
let num_blocks = 1usize;
|
||||
let data_info = vec![DataKind::Unsigned(NonZeroUsize::new(num_blocks).unwrap())];
|
||||
//This is for the ap
|
||||
let cuda_no_casting_compact_list =
|
||||
CudaFlattenedVecCompactCiphertextList::from_vec_shortint_compact_ciphertext_list(
|
||||
vec![no_casting_compact_list.clone()],
|
||||
data_info,
|
||||
&cuda_side_resources.streams,
|
||||
);
|
||||
|
||||
//This is for the verification
|
||||
let cuda_casting_compact_list =
|
||||
cuda_no_casting_compact_list.duplicate(&cuda_side_resources.streams);
|
||||
|
||||
let cuda_no_casting_compact_list_expander = cuda_no_casting_compact_list
|
||||
.expand(
|
||||
&cuda_ksk,
|
||||
crate::integer::gpu::ZKType::NoCasting,
|
||||
&cuda_side_resources.streams,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let cuda_ap_input_expanded: CudaUnsignedRadixCiphertext =
|
||||
cuda_no_casting_compact_list_expander
|
||||
.get(0usize, &cuda_side_resources.streams)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
let cuda_casting_compact_list_expander = cuda_casting_compact_list
|
||||
.expand(
|
||||
&cuda_ksk,
|
||||
crate::integer::gpu::ZKType::SanityCheck,
|
||||
&cuda_side_resources.streams,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let cuda_int_res: CudaUnsignedRadixCiphertext = cuda_casting_compact_list_expander
|
||||
.get(0usize, &cuda_side_resources.streams)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
(
|
||||
CudaDynLwe::U64(
|
||||
cuda_ap_input_expanded
|
||||
.ciphertext
|
||||
.d_blocks
|
||||
.duplicate(&cuda_side_resources.streams),
|
||||
),
|
||||
cuda_int_res
|
||||
.ciphertext
|
||||
.d_blocks
|
||||
.to_lwe_ciphertext_list(&cuda_side_resources.streams),
|
||||
)
|
||||
};
|
||||
|
||||
let (_input, _after_ks, _before_ms, after_ms) = cpk_ks_any_ms(
|
||||
gpu_sample_input,
|
||||
&cuda_ksk,
|
||||
modulus_switch_config,
|
||||
compute_br_input_modulus_log,
|
||||
&mut cuda_side_resources,
|
||||
);
|
||||
|
||||
// Complete the AP by computing the PBS to match shortint
|
||||
let mut pbs_result = d_accumulator.allocate_lwe_bootstrap_result(&mut cuda_side_resources);
|
||||
cuda_sks.lwe_classic_fft_pbs(
|
||||
&after_ms,
|
||||
&mut pbs_result,
|
||||
&d_accumulator,
|
||||
&mut cuda_side_resources,
|
||||
);
|
||||
|
||||
let pbs_result_list = pbs_result
|
||||
.as_lwe_64()
|
||||
.to_lwe_ciphertext_list(&cuda_side_resources.streams);
|
||||
|
||||
assert_eq!(pbs_result_list, shortint_res);
|
||||
}
|
||||
}
|
||||
|
||||
// Trait implementations for CudaKeySwitchingKey to enable noise distribution tests
|
||||
impl AllocateLweKeyswitchResult for CudaKeySwitchingKey<'_> {
|
||||
type Output = CudaDynLwe;
|
||||
type SideResources = CudaSideResources;
|
||||
|
||||
fn allocate_lwe_keyswitch_result(
|
||||
&self,
|
||||
side_resources: &mut Self::SideResources,
|
||||
) -> Self::Output {
|
||||
let output_lwe_dimension = self
|
||||
.key_switching_key_material
|
||||
.lwe_keyswitch_key
|
||||
.output_key_lwe_size()
|
||||
.to_lwe_dimension();
|
||||
let lwe_ciphertext_count = LweCiphertextCount(1);
|
||||
let ciphertext_modulus = self.dest_server_key.ciphertext_modulus;
|
||||
|
||||
let cuda_lwe = CudaLweCiphertextList::new(
|
||||
output_lwe_dimension,
|
||||
lwe_ciphertext_count,
|
||||
ciphertext_modulus,
|
||||
&side_resources.streams,
|
||||
);
|
||||
CudaDynLwe::U64(cuda_lwe)
|
||||
}
|
||||
}
|
||||
|
||||
impl LweKeyswitch<CudaDynLwe, CudaDynLwe> for CudaKeySwitchingKey<'_> {
|
||||
type SideResources = CudaSideResources;
|
||||
|
||||
fn lwe_keyswitch(
|
||||
&self,
|
||||
input: &CudaDynLwe,
|
||||
output: &mut CudaDynLwe,
|
||||
side_resources: &mut Self::SideResources,
|
||||
) {
|
||||
match (input, output) {
|
||||
(CudaDynLwe::U64(input_cuda_lwe), CudaDynLwe::U64(output_cuda_lwe)) => {
|
||||
let d_input_indexes = CudaVec::<u64>::new(1, &side_resources.streams, 0);
|
||||
let d_output_indexes = CudaVec::<u64>::new(1, &side_resources.streams, 0);
|
||||
|
||||
cuda_keyswitch_lwe_ciphertext(
|
||||
&self.key_switching_key_material.lwe_keyswitch_key,
|
||||
input_cuda_lwe,
|
||||
output_cuda_lwe,
|
||||
&d_input_indexes,
|
||||
&d_output_indexes,
|
||||
false,
|
||||
&side_resources.streams,
|
||||
false,
|
||||
);
|
||||
}
|
||||
(CudaDynLwe::U32(_), CudaDynLwe::U32(_)) => {
|
||||
panic!(
|
||||
"U32 keyswitch not implemented for CudaKeySwitchingKey - only U64 is supported"
|
||||
);
|
||||
}
|
||||
(CudaDynLwe::U128(_), CudaDynLwe::U128(_)) => {
|
||||
panic!("U128 keyswitch not implemented for CudaKeySwitchingKey - only U64 is supported");
|
||||
}
|
||||
_ => panic!("Inconsistent input/output types for CudaDynLwe keyswitch"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
create_parameterized_test!(sanity_check_encrypt_cpk_ks_ms_pbs_gpu {
|
||||
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
|
||||
});
|
||||
@@ -1,5 +1,6 @@
|
||||
pub mod br_dp_ks_ms;
|
||||
pub mod br_dp_packingks_ms;
|
||||
pub mod cpk_ks_ms;
|
||||
pub mod dp_ks_ms;
|
||||
pub mod dp_ks_pbs_128_packingks;
|
||||
pub mod utils;
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
use crate::integer::client_key::secret_encryption_key::SecretEncryptionKeyView;
|
||||
use crate::integer::key_switching_key::KeySwitchingKey;
|
||||
use crate::integer::{ClientKey, ServerKey};
|
||||
use crate::shortint::parameters::ShortintKeySwitchingParameters;
|
||||
|
||||
/// Test-only implementation of KeySwitchingKey::new that skips the cast_rshift assertion.
|
||||
/// This is needed for pfail tests where we intentionally use different message modulus and carry.
|
||||
pub fn new_key_switching_key_for_pfail_test<'input_key, InputEncryptionKey, ClientKeyType>(
|
||||
input_key_pair: (InputEncryptionKey, Option<&ServerKey>),
|
||||
output_key_pair: (&ClientKeyType, &ServerKey),
|
||||
params: ShortintKeySwitchingParameters,
|
||||
) -> KeySwitchingKey
|
||||
where
|
||||
InputEncryptionKey: Into<SecretEncryptionKeyView<'input_key>>,
|
||||
ClientKeyType: AsRef<ClientKey>,
|
||||
{
|
||||
let input_secret_encryption_key: SecretEncryptionKeyView<'_> = input_key_pair.0.into();
|
||||
KeySwitchingKey {
|
||||
key: crate::shortint::KeySwitchingKey::new(
|
||||
(
|
||||
input_secret_encryption_key.key,
|
||||
input_key_pair.1.map(|k| &k.key),
|
||||
),
|
||||
(&output_key_pair.0.as_ref().key, &output_key_pair.1.key),
|
||||
params,
|
||||
),
|
||||
}
|
||||
}
|
||||
@@ -1 +1,2 @@
|
||||
pub mod key_switching_test_utils;
|
||||
pub mod noise_simulation;
|
||||
|
||||
@@ -73,7 +73,8 @@ impl CudaProvenCompactCiphertextList {
|
||||
key: &CudaKeySwitchingKey,
|
||||
streams: &CudaStreams,
|
||||
) -> crate::Result<CudaCompactCiphertextListExpander> {
|
||||
self.d_flattened_compact_lists.expand(key, streams)
|
||||
self.d_flattened_compact_lists
|
||||
.expand(key, super::ZKType::Casting, streams)
|
||||
}
|
||||
|
||||
pub fn from_proven_compact_ciphertext_list(
|
||||
|
||||
Reference in New Issue
Block a user