Compare commits

...

51 Commits

Author SHA1 Message Date
Guillermo Oyarzun
35b3430404 chore(gpu): fix some pke bugs 2025-06-19 12:22:39 +02:00
Guillermo Oyarzun
a56a8107ab chore(gpu): reduce the pool size 2025-06-17 13:20:58 +02:00
Guillermo Oyarzun
b322708cbd chore(gpu): add drift to pke gpu tests 2025-06-16 17:36:11 +02:00
Guillermo Oyarzun
25da5ef721 chore(gpu): add pke gpu tests 2025-06-16 15:17:54 +02:00
Guillermo Oyarzun
43b95aaf7c chore(gpu): fix input_br variance formula for multi-bit 2025-06-16 12:27:27 +02:00
Guillermo Oyarzun
cfc19d3bd2 chore(gpu): correct ms formula in pbs128 2025-06-16 12:27:27 +02:00
Guillermo Oyarzun
51427fc9ae chore(gpu): add multiplication and extra logs to pbs128 test 2025-06-16 12:27:26 +02:00
Guillermo Oyarzun
a45baaa3d0 chore(gpu): fixes after rebase 2025-06-16 12:27:26 +02:00
Guillermo Oyarzun
a24adf528b chore(gpu): use the m128 params 2025-06-16 12:27:26 +02:00
Guillermo Oyarzun
3ddf9bdba6 chore(gpu): fix pbs128 after rebasing 2025-06-16 12:27:25 +02:00
Guillermo Oyarzun
de4902fb9f chore(gpu): add noise squashing params without using it 2025-06-16 12:27:25 +02:00
Guillermo Oyarzun
b89bca0d13 chore(gpu): fix pbs128 with old params 2025-06-16 12:27:25 +02:00
Guillermo Oyarzun
f0dc0e18ab chore(gpu): add pbs128 2025-06-16 12:27:24 +02:00
Guillermo Oyarzun
98ca66581e chore(gpu): fix errors after rebasing 2025-06-16 12:27:24 +02:00
Guillermo Oyarzun
c94ccc3a23 feat(gpu): fix bugs and add extra logs 2025-06-16 12:27:24 +02:00
Guillermo Oyarzun
7e6573a1d2 feat(gpu): fix compression tests after rebase 2025-06-16 12:27:23 +02:00
Guillermo Oyarzun
94ff21b089 feat(gpu): add noise checks with multi-bit pbs 2025-06-16 12:27:23 +02:00
Guillermo Oyarzun
71420f0d92 feat(gpu): add noise checks 2025-06-16 12:27:23 +02:00
Arthur Meyre
d81bd4ebd6 wip: long pfail runs 2025-06-16 12:27:22 +02:00
Nicolas Sarlin
83d1d6a46c wip: fix packing ks noise formula 2025-06-16 12:27:22 +02:00
Nicolas Sarlin
ac693f97e1 wip: remove check that makes compression pfail test fail 2025-06-16 12:27:22 +02:00
Nicolas Sarlin
d06656cfb4 wip: remove check that makes compression pfail test fail 2025-06-16 12:27:21 +02:00
Nicolas Sarlin
472ea682ae wip: update noise formulas 2025-06-16 12:27:21 +02:00
Nicolas Sarlin
258524f5e2 chore(zk): add noise tests for zkv1 2025-06-16 12:27:21 +02:00
Nicolas Sarlin
6b98865515 fix: new param naming 2025-06-16 12:27:20 +02:00
Arthur Meyre
e47731b1ee fix(shortint): fix sample count in pbs128 pfail 2025-06-16 12:27:20 +02:00
Arthur Meyre
d1b9bc676d test(shortint): add variance check after KS in classic PBS AP 2025-06-16 12:27:19 +02:00
Arthur Meyre
405952e323 test(shortint): add normality check after KS in classic AP 2025-06-16 12:27:19 +02:00
Arthur Meyre
7556a8e05f fix(test): fix slighlty wrong log message 2025-06-16 12:27:19 +02:00
Arthur Meyre
ebda1426e4 fix(test): fix test parameters for PBS 128
Disable the bound check as our computation disagrees with the RO
2025-06-16 12:27:18 +02:00
Arthur Meyre
da91075b26 test(shortint): add pfail estimation after ms in compression 2025-06-16 12:27:18 +02:00
Arthur Meyre
f95eb2cf2c wip: re-exported pbs 128 with symbolic mantissa
- add pbs 128 params with a mantissa setting
- noise with this settings is in line with RO prediction
2025-06-16 12:27:17 +02:00
Arthur Meyre
45da14c7dd wip: add pbs 128 tests (noise and pfail) 2025-06-16 12:27:16 +02:00
Arthur Meyre
75e03ae800 feat(core): add pbs 128 noise formulas 2025-06-16 12:27:16 +02:00
Arthur Meyre
9976cbe1f2 test(shortint): add pfail measurement for full compression + AP 2025-06-16 12:27:15 +02:00
Arthur Meyre
a9006486e8 test(shortint): slightly change decryption logic to also get the padding bit out 2025-06-16 12:27:14 +02:00
Arthur Meyre
7f8778f178 test(shortint): add pfail check for first part of compression 2025-06-16 12:27:14 +02:00
Arthur Meyre
370e4ae2e6 test(shortint): add first part of the compression AP 2025-06-16 12:27:13 +02:00
Arthur Meyre
b6db9d8ba0 chore: rename helper function to match test usage 2025-06-16 12:27:12 +02:00
Arthur Meyre
54b139b1b5 feat(core): add noise formula for packing keyswitch 2025-06-16 12:27:12 +02:00
Arthur Meyre
d3ccf08f2c test(shortint): add pfail test for PKE -> KS -> (DP -> KS Compute) -> MS 2025-06-16 12:27:11 +02:00
Arthur Meyre
6f1492766f refactor(test): also accept PKE -> KS to big -> DP -> KS -> MS 2025-06-16 12:27:10 +02:00
Arthur Meyre
03fa607209 test(shortint): add noise check for PKE -> KS to small key + MS 2025-06-16 12:27:09 +02:00
Arthur Meyre
29ab6c0709 wip: hotfix for borrow mut error, this needs a design pass/reflection 2025-06-16 12:27:09 +02:00
Arthur Meyre
1c0b428cd3 feat: add noise formulas for TUniform 132 bits of security 2025-06-16 12:27:08 +02:00
Arthur Meyre
dab2d39749 test(hl): add noise check for CompactPublicKey encryption w/ TUniform param 2025-06-16 12:27:07 +02:00
Arthur Meyre
ea81ef5d15 test(shortint): add pfail test for the classic AP 2025-06-16 12:27:06 +02:00
Arthur Meyre
57a7a5a084 refactor: prepare code factorization for noise and pfail 2025-06-16 12:27:06 +02:00
Arthur Meyre
9010ded3d5 test: add shortint atomic pattern noise measurement 2025-06-16 12:27:05 +02:00
Arthur Meyre
1ebd2848ad test(hl): test encryption noise of FheUint ciphertext in HL API 2025-06-16 12:27:04 +02:00
Nicolas Sarlin
6a1a024e6d chore(zk)!: store inside the pke params the supported zk scheme
BREAKING_CHANGE:
- Zk for compact PKE now requires dedicated encryption parameters
2025-06-16 12:27:04 +02:00
52 changed files with 10753 additions and 125 deletions

View File

@@ -37,6 +37,17 @@ void cuda_glwe_sample_extract_128(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *glwe_array_in, uint32_t const *nth_array, uint32_t num_nths,
uint32_t lwe_per_glwe, uint32_t glwe_dimension, uint32_t polynomial_size);
void cuda_modulus_switch_multi_bit_64(void *stream, uint32_t gpu_index,
void *lwe_array_out, void *lwe_array_in,
uint32_t size, uint32_t log_modulus,
uint32_t degree,
uint32_t grouping_factor);
void cuda_modulus_switch_multi_bit_128(void *stream, uint32_t gpu_index,
void *lwe_array_out, void *lwe_array_in,
uint32_t size, uint32_t log_modulus,
uint32_t degree,
uint32_t grouping_factor);
}
#endif

View File

@@ -40,6 +40,11 @@ void cleanup_cuda_integer_compress_radix_ciphertext_64(
void cleanup_cuda_integer_decompress_radix_ciphertext_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr_void);
}
void cuda_integer_extract_glwe_64(
void *const *streams, uint32_t const *gpu_indexes, void *lwe_array_out,
void const *glwe_list, uint32_t const glwe_index,
uint32_t const log_modulus, uint32_t const polynomial_size,
uint32_t const glwe_dimension, uint32_t const body_count);
}
#endif

View File

@@ -590,6 +590,9 @@ void cleanup_cuda_apply_noise_squashing_kb(void *const *streams,
uint32_t const *gpu_indexes,
uint32_t gpu_count,
int8_t **mem_ptr_void);
void cuda_small_scalar_multiplication_integer_64_inplace(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *lwe_array, uint64_t scalar);
uint64_t scratch_cuda_sub_and_propagate_single_carry_kb_64_inplace(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,

View File

@@ -65,6 +65,9 @@ void cuda_add_lwe_ciphertext_vector_plaintext_64(
void const *lwe_array_in, const uint64_t plaintext_in,
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count);
void cuda_sub_lwe_ciphertext_vector_plaintext_vector_64(
void *stream, uint32_t gpu_index, void *lwe_array_out, void *lwe_array_in,
void const *plaintext_array_in, const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count);
}
#endif // CUDA_LINALG_H_

View File

@@ -10,8 +10,8 @@ extern "C" {
void cuda_lwe_expand_64(void *const stream, uint32_t gpu_index,
void *lwe_array_out, const void *lwe_compact_array_in,
uint32_t lwe_dimension, uint32_t num_lwe,
const uint32_t *lwe_compact_input_indexes,
const uint32_t *output_body_id_per_compact_list);
const void *lwe_compact_input_indexes,
const void *output_body_id_per_compact_list);
uint64_t scratch_cuda_expand_without_verification_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,

View File

@@ -1,5 +1,6 @@
#include "ciphertext.cuh"
#include "polynomial/parameters.cuh"
#include "torus.cuh"
void cuda_convert_lwe_ciphertext_vector_to_gpu_64(void *stream,
uint32_t gpu_index,
@@ -142,3 +143,29 @@ void cuda_glwe_sample_extract_128(
"N's are powers of two in the interval [256..4096].")
}
}
void cuda_modulus_switch_multi_bit_64(void *stream, uint32_t gpu_index,
void *lwe_array_out, void *lwe_array_in,
uint32_t size, uint32_t log_modulus,
uint32_t degree,
uint32_t grouping_factor) {
host_modulus_switch_multi_bit<uint64_t>(
static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(lwe_array_out),
static_cast<uint64_t *>(lwe_array_in), size, log_modulus, degree,
grouping_factor);
}
void cuda_modulus_switch_multi_bit_128(void *stream, uint32_t gpu_index,
void *lwe_array_out, void *lwe_array_in,
uint32_t size, uint32_t log_modulus,
uint32_t degree,
uint32_t grouping_factor) {
host_modulus_switch_multi_bit<__uint128_t>(
static_cast<cudaStream_t>(stream), gpu_index,
static_cast<__uint128_t *>(lwe_array_out),
static_cast<__uint128_t *>(lwe_array_in), size, log_modulus, degree,
grouping_factor);
}

View File

@@ -295,4 +295,87 @@ __host__ void host_improve_noise_modulus_switch(
check_cuda_error(cudaGetLastError());
}
template <typename Torus, class params>
__device__ uint32_t calculates_monomial_degree(const Torus *lwe_array_group,
uint32_t ggsw_idx,
uint32_t grouping_factor) {
Torus x = 0;
for (int i = 0; i < grouping_factor; i++) {
uint32_t mask_position = grouping_factor - (i + 1);
int selection_bit = (ggsw_idx >> mask_position) & 1;
x += selection_bit * lwe_array_group[i];
}
return modulus_switch(x, params::log2_degree + 1);
}
template <typename Torus, class params>
__global__ void
modulus_switch_multi_bit(Torus *array_out, const Torus *array_in, int size,
uint32_t log_modulus, uint32_t grouping_factor) {
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid < size) {
int num_monomials = 1 << grouping_factor;
int input_offset = tid * grouping_factor;
int output_offset = tid * (num_monomials - 1); // First monomial is skipped
for (int ggsw_idx = 1; ggsw_idx < num_monomials; ggsw_idx++) {
array_out[ggsw_idx - 1 + output_offset] =
calculates_monomial_degree<Torus, params>(&array_in[input_offset],
ggsw_idx, grouping_factor);
}
}
}
template <typename Torus>
__host__ void host_modulus_switch_multi_bit(
cudaStream_t stream, uint32_t gpu_index, Torus *array_out, Torus *array_in,
int size, uint32_t log_modulus, uint32_t degree, uint32_t grouping_factor) {
cudaSetDevice(gpu_index);
int multibit_size = size / grouping_factor;
int num_threads = 0, num_blocks = 0;
getNumBlocksAndThreads(multibit_size, 1024, num_blocks, num_threads);
switch (degree) {
case 256:
modulus_switch_multi_bit<Torus, Degree<256>>
<<<num_blocks, num_threads, 0, stream>>>(
array_out, array_in, multibit_size, log_modulus, grouping_factor);
break;
case 512:
modulus_switch_multi_bit<Torus, Degree<512>>
<<<num_blocks, num_threads, 0, stream>>>(
array_out, array_in, multibit_size, log_modulus, grouping_factor);
break;
case 1024:
modulus_switch_multi_bit<Torus, Degree<1024>>
<<<num_blocks, num_threads, 0, stream>>>(
array_out, array_in, multibit_size, log_modulus, grouping_factor);
break;
case 2048:
modulus_switch_multi_bit<Torus, Degree<2048>>
<<<num_blocks, num_threads, 0, stream>>>(
array_out, array_in, multibit_size, log_modulus, grouping_factor);
break;
case 4096:
modulus_switch_multi_bit<Torus, Degree<4096>>
<<<num_blocks, num_threads, 0, stream>>>(
array_out, array_in, multibit_size, log_modulus, grouping_factor);
break;
case 8192:
modulus_switch_multi_bit<Torus, Degree<8192>>
<<<num_blocks, num_threads, 0, stream>>>(
array_out, array_in, multibit_size, log_modulus, grouping_factor);
break;
case 16384:
modulus_switch_multi_bit<Torus, Degree<16384>>
<<<num_blocks, num_threads, 0, stream>>>(
array_out, array_in, multibit_size, log_modulus, grouping_factor);
break;
default:
PANIC("Cuda error: unsupported polynomial size. Supported "
"N's are powers of two in the interval [256..16384].")
};
check_cuda_error(cudaGetLastError());
}
#endif // CNCRT_TORUS_H

View File

@@ -89,3 +89,16 @@ void cleanup_cuda_integer_decompress_radix_ciphertext_64(
(int_decompression<uint64_t> *)(*mem_ptr_void);
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
}
void cuda_integer_extract_glwe_64(
void *const *streams, uint32_t const *gpu_indexes, void *lwe_array_out,
void const *glwe_list, uint32_t const glwe_index,
uint32_t const log_modulus, uint32_t const polynomial_size,
uint32_t const glwe_dimension, uint32_t const body_count) {
auto casted_streams = (cudaStream_t *)(streams);
host_extract_mem_alloc_free<uint64_t>(
casted_streams[0], gpu_indexes[0], static_cast<uint64_t *>(lwe_array_out),
static_cast<const uint64_t *>(glwe_list), glwe_index, log_modulus,
polynomial_size, glwe_dimension, body_count);
}

View File

@@ -231,6 +231,84 @@ __host__ void host_extract(cudaStream_t stream, uint32_t gpu_index,
check_cuda_error(cudaGetLastError());
}
/// Extracts the glwe_index-nth GLWE ciphertext
/// This function does not require memory allocation
template <typename Torus>
__host__ void host_extract_mem_alloc_free(
cudaStream_t stream, uint32_t gpu_index, Torus *glwe_array_out,
Torus const *array_in, const uint32_t glwe_index,
const uint32_t log_modulus, const uint32_t polynomial_size,
const uint32_t glwe_dimension, const uint32_t body_count_in) {
if (array_in == glwe_array_out)
PANIC("Cuda error: Input and output must be different");
cuda_set_device(gpu_index);
auto glwe_ciphertext_size = (glwe_dimension + 1) * polynomial_size;
auto num_glwes = (body_count_in + polynomial_size - 1) / polynomial_size;
// Compressed length of the compressed GLWE we want to extract
uint32_t body_count = 0;
if (body_count_in % polynomial_size == 0)
body_count = polynomial_size;
else if (glwe_index == num_glwes - 1)
body_count = body_count_in % polynomial_size;
else
body_count = polynomial_size;
auto initial_out_len = glwe_dimension * polynomial_size + body_count;
// Calculates how many bits this particular GLWE shall use
auto number_bits_to_unpack = initial_out_len * log_modulus;
auto nbits = sizeof(Torus) * 8;
auto input_len = (number_bits_to_unpack + nbits - 1) / nbits;
// Calculates how many bits a full-packed GLWE shall use
number_bits_to_unpack = glwe_ciphertext_size * log_modulus;
auto len = (number_bits_to_unpack + nbits - 1) / nbits;
// Uses that length to set the input pointer
auto chunk_array_in = array_in + glwe_index * len;
// Ensure the tail of the GLWE is zeroed
if (initial_out_len < glwe_ciphertext_size) {
auto zeroed_slice = glwe_array_out + initial_out_len;
cuda_memset_async(glwe_array_out, 0,
(glwe_ciphertext_size - initial_out_len) * sizeof(Torus),
stream, gpu_index);
}
int num_blocks = 0, num_threads = 0;
getNumBlocksAndThreads(initial_out_len, 128, num_blocks, num_threads);
dim3 grid(num_blocks);
dim3 threads(num_threads);
extract<Torus><<<grid, threads, 0, stream>>>(glwe_array_out, chunk_array_in,
log_modulus, initial_out_len);
// uint32_t body_count = std::min(body_count_in, polynomial_size);
// auto initial_out_len = glwe_dimension * polynomial_size + body_count;
// auto compressed_glwe_accumulator_size =
// (glwe_dimension + 1) * polynomial_size;
// auto number_bits_to_unpack = compressed_glwe_accumulator_size *
// log_modulus; auto nbits = sizeof(Torus) * 8;
// // number_bits_to_unpack.div_ceil(Scalar::BITS)
// auto input_len = (number_bits_to_unpack + nbits - 1) / nbits;
// // We assure the tail of the glwe is zeroed
// auto zeroed_slice = glwe_array_out + initial_out_len;
// cuda_memset_async(zeroed_slice, 0,
// (polynomial_size - body_count) * sizeof(Torus),
// streams[0], gpu_indexes[0]);
// int num_blocks = 0, num_threads = 0;
// getNumBlocksAndThreads(initial_out_len, 128, num_blocks, num_threads);
// dim3 grid(num_blocks);
// dim3 threads(num_threads);
// extract<Torus><<<grid, threads, 0, streams[0]>>>(glwe_array_out, array_in,
// glwe_index, log_modulus,
// input_len,
// initial_out_len);
check_cuda_error(cudaGetLastError());
}
template <typename Torus>
__host__ void host_integer_decompress(
cudaStream_t const *streams, uint32_t const *gpu_indexes,

View File

@@ -139,3 +139,12 @@ void cleanup_cuda_integer_radix_scalar_mul_high_kb_64(
mem_ptr->release((cudaStream_t *)streams, gpu_indexes, gpu_count);
}
void cuda_small_scalar_multiplication_integer_64_inplace(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *lwe_array, uint64_t scalar) {
host_integer_small_scalar_mul_radix<uint64_t>((cudaStream_t *)(streams),
gpu_indexes, gpu_count,
lwe_array, lwe_array, scalar);
}

View File

@@ -147,3 +147,44 @@ void cuda_add_lwe_ciphertext_vector_plaintext_64(
static_cast<const uint64_t *>(lwe_array_in), plaintext_in,
input_lwe_dimension, input_lwe_ciphertext_count);
}
/*
* Perform the subtraction of a u64 input LWE ciphertext vector with a u64 input
* plaintext vector.
* - `stream` is a void pointer to the Cuda stream to be used in the kernel
* launch
* - `gpu_index` is the index of the GPU to be used in the kernel launch
* - `lwe_array_out` is an array of size
* `(input_lwe_dimension + 1) * input_lwe_ciphertext_count` that should have
* been allocated on the GPU before calling this function, and that will hold
* the result of the computation.
* - `lwe_array_in` is the LWE ciphertext vector used as input, it should have
* been allocated and initialized before calling this function. It has the same
* size as the output array.
* - `plaintext_array_in` is the plaintext vector used as input, it should have
* been allocated and initialized before calling this function. It should be of
* size `input_lwe_ciphertext_count`.
* - `input_lwe_dimension` is the number of mask elements in the input and
* output LWE ciphertext vectors
* - `input_lwe_ciphertext_count` is the number of ciphertexts contained in the
* input LWE ciphertext vector, as well as in the output. It is also the number
* of plaintexts in the input plaintext vector.
*
* Each plaintext of the input plaintext vector is subtracted to the body of the
* corresponding LWE ciphertext in the LWE ciphertext vector. The result of the
* operation is stored in the output LWE ciphertext vector. The two input
* vectors are unchanged. This function is a wrapper to a device function that
* performs the operation on the GPU.
*/
void cuda_sub_lwe_ciphertext_vector_plaintext_vector_64(
void *stream, uint32_t gpu_index, void *lwe_array_out, void *lwe_array_in,
void const *plaintext_array_in, uint32_t input_lwe_dimension,
uint32_t input_lwe_ciphertext_count) {
host_subtraction_plaintext<uint64_t>(
static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(lwe_array_out),
static_cast<uint64_t *>(lwe_array_in),
static_cast<const uint64_t *>(plaintext_array_in), input_lwe_dimension,
input_lwe_ciphertext_count);
}

View File

@@ -271,7 +271,8 @@ __host__ void host_subtraction(cudaStream_t stream, uint32_t gpu_index,
}
template <typename T>
__global__ void radix_body_subtraction_inplace(T *lwe_ct, T *plaintext_input,
__global__ void radix_body_subtraction_inplace(T *lwe_ct,
const T *plaintext_input,
uint32_t input_lwe_dimension,
uint32_t num_entries) {
@@ -288,7 +289,7 @@ __global__ void radix_body_subtraction_inplace(T *lwe_ct, T *plaintext_input,
template <typename T>
__host__ void host_subtraction_plaintext(cudaStream_t stream,
uint32_t gpu_index, T *output,
T *lwe_input, T *plaintext_input,
T *lwe_input, const T *plaintext_input,
uint32_t input_lwe_dimension,
uint32_t input_lwe_ciphertext_count) {

View File

@@ -141,7 +141,7 @@ void host_programmable_bootstrap_lwe_ciphertext_vector_128(
PANIC("Cuda error (classical PBS): base log should be <= 64")
// If the parameters contain noise reduction key, then apply it
if (ms_noise_reduction_key->num_zeros != 0) {
if (buffer->uses_noise_reduction) {
uint32_t log_modulus = log2(polynomial_size) + 1;
host_improve_noise_modulus_switch<InputTorus>(
static_cast<cudaStream_t>(stream), gpu_index,

View File

@@ -18,20 +18,6 @@
#include "types/complex/operations.cuh"
#include <vector>
template <typename Torus, class params>
__device__ uint32_t calculates_monomial_degree(const Torus *lwe_array_group,
uint32_t ggsw_idx,
uint32_t grouping_factor) {
Torus x = 0;
for (int i = 0; i < grouping_factor; i++) {
uint32_t mask_position = grouping_factor - (i + 1);
int selection_bit = (ggsw_idx >> mask_position) & 1;
x += selection_bit * lwe_array_group[i];
}
return modulus_switch(x, params::log2_degree + 1);
}
__device__ __forceinline__ int
get_start_ith_ggsw_offset(uint32_t polynomial_size, int glwe_dimension,
uint32_t level_count) {

View File

@@ -3,8 +3,8 @@
void cuda_lwe_expand_64(void *const stream, uint32_t gpu_index,
void *lwe_array_out, const void *lwe_compact_array_in,
uint32_t lwe_dimension, uint32_t num_lwe,
const uint32_t *lwe_compact_input_indexes,
const uint32_t *output_body_id_per_compact_list) {
const void *lwe_compact_input_indexes,
const void *output_body_id_per_compact_list) {
switch (lwe_dimension) {
case 256:
@@ -12,49 +12,56 @@ void cuda_lwe_expand_64(void *const stream, uint32_t gpu_index,
static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(lwe_array_out),
static_cast<const uint64_t *>(lwe_compact_array_in), num_lwe,
lwe_compact_input_indexes, output_body_id_per_compact_list);
static_cast<const uint32_t *>(lwe_compact_input_indexes),
static_cast<const uint32_t *>(output_body_id_per_compact_list));
break;
case 512:
host_lwe_expand<uint64_t, AmortizedDegree<512>>(
static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(lwe_array_out),
static_cast<const uint64_t *>(lwe_compact_array_in), num_lwe,
lwe_compact_input_indexes, output_body_id_per_compact_list);
static_cast<const uint32_t *>(lwe_compact_input_indexes),
static_cast<const uint32_t *>(output_body_id_per_compact_list));
break;
case 1024:
host_lwe_expand<uint64_t, AmortizedDegree<1024>>(
static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(lwe_array_out),
static_cast<const uint64_t *>(lwe_compact_array_in), num_lwe,
lwe_compact_input_indexes, output_body_id_per_compact_list);
static_cast<const uint32_t *>(lwe_compact_input_indexes),
static_cast<const uint32_t *>(output_body_id_per_compact_list));
break;
case 2048:
host_lwe_expand<uint64_t, AmortizedDegree<2048>>(
static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(lwe_array_out),
static_cast<const uint64_t *>(lwe_compact_array_in), num_lwe,
lwe_compact_input_indexes, output_body_id_per_compact_list);
static_cast<const uint32_t *>(lwe_compact_input_indexes),
static_cast<const uint32_t *>(output_body_id_per_compact_list));
break;
case 4096:
host_lwe_expand<uint64_t, AmortizedDegree<4096>>(
static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(lwe_array_out),
static_cast<const uint64_t *>(lwe_compact_array_in), num_lwe,
lwe_compact_input_indexes, output_body_id_per_compact_list);
static_cast<const uint32_t *>(lwe_compact_input_indexes),
static_cast<const uint32_t *>(output_body_id_per_compact_list));
break;
case 8192:
host_lwe_expand<uint64_t, AmortizedDegree<8192>>(
static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(lwe_array_out),
static_cast<const uint64_t *>(lwe_compact_array_in), num_lwe,
lwe_compact_input_indexes, output_body_id_per_compact_list);
static_cast<const uint32_t *>(lwe_compact_input_indexes),
static_cast<const uint32_t *>(output_body_id_per_compact_list));
break;
case 16384:
host_lwe_expand<uint64_t, AmortizedDegree<16384>>(
static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(lwe_array_out),
static_cast<const uint64_t *>(lwe_compact_array_in), num_lwe,
lwe_compact_input_indexes, output_body_id_per_compact_list);
static_cast<const uint32_t *>(lwe_compact_input_indexes),
static_cast<const uint32_t *>(output_body_id_per_compact_list));
break;
default:
PANIC("CUDA error: lwe_dimension not supported."

View File

@@ -34,7 +34,8 @@ __host__ void host_expand_without_verification(
if (sizeof(Torus) == 8) {
cuda_lwe_expand_64(streams[0], gpu_indexes[0], expanded_lwes,
lwe_flattened_compact_array_in, lwe_dimension, num_lwes,
d_lwe_compact_input_indexes, d_body_id_per_compact_list);
(void *)d_lwe_compact_input_indexes,
(void *)d_body_id_per_compact_list);
} else
PANIC("Cuda error: expand is only supported on 64 bits")

View File

@@ -74,6 +74,30 @@ unsafe extern "C" {
polynomial_size: u32,
);
}
unsafe extern "C" {
pub fn cuda_modulus_switch_multi_bit_64(
stream: *mut ffi::c_void,
gpu_index: u32,
lwe_array_out: *mut ffi::c_void,
lwe_array_in: *mut ffi::c_void,
size: u32,
log_modulus: u32,
degree: u32,
grouping_factor: u32,
);
}
unsafe extern "C" {
pub fn cuda_modulus_switch_multi_bit_128(
stream: *mut ffi::c_void,
gpu_index: u32,
lwe_array_out: *mut ffi::c_void,
lwe_array_in: *mut ffi::c_void,
size: u32,
log_modulus: u32,
degree: u32,
grouping_factor: u32,
);
}
pub const PBS_TYPE_MULTI_BIT: PBS_TYPE = 0;
pub const PBS_TYPE_CLASSICAL: PBS_TYPE = 1;
pub type PBS_TYPE = ffi::c_uint;
@@ -189,6 +213,19 @@ unsafe extern "C" {
mem_ptr_void: *mut *mut i8,
);
}
unsafe extern "C" {
pub fn cuda_integer_extract_glwe_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
lwe_array_out: *mut ffi::c_void,
glwe_list: *const ffi::c_void,
glwe_index: u32,
log_modulus: u32,
polynomial_size: u32,
glwe_dimension: u32,
body_count: u32,
);
}
pub const SHIFT_OR_ROTATE_TYPE_LEFT_SHIFT: SHIFT_OR_ROTATE_TYPE = 0;
pub const SHIFT_OR_ROTATE_TYPE_RIGHT_SHIFT: SHIFT_OR_ROTATE_TYPE = 1;
pub const SHIFT_OR_ROTATE_TYPE_LEFT_ROTATE: SHIFT_OR_ROTATE_TYPE = 2;
@@ -1425,6 +1462,15 @@ unsafe extern "C" {
mem_ptr_void: *mut *mut i8,
);
}
unsafe extern "C" {
pub fn cuda_small_scalar_multiplication_integer_64_inplace(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
lwe_array: *mut CudaRadixCiphertextFFI,
scalar: u64,
);
}
unsafe extern "C" {
pub fn scratch_cuda_sub_and_propagate_single_carry_kb_64_inplace(
streams: *const *mut ffi::c_void,
@@ -1485,8 +1531,8 @@ unsafe extern "C" {
lwe_compact_array_in: *const ffi::c_void,
lwe_dimension: u32,
num_lwe: u32,
lwe_compact_input_indexes: *const u32,
output_body_id_per_compact_list: *const u32,
lwe_compact_input_indexes: *const ffi::c_void,
output_body_id_per_compact_list: *const ffi::c_void,
);
}
unsafe extern "C" {
@@ -1770,6 +1816,17 @@ unsafe extern "C" {
input_lwe_ciphertext_count: u32,
);
}
unsafe extern "C" {
pub fn cuda_sub_lwe_ciphertext_vector_plaintext_vector_64(
stream: *mut ffi::c_void,
gpu_index: u32,
lwe_array_out: *mut ffi::c_void,
lwe_array_in: *mut ffi::c_void,
plaintext_array_in: *const ffi::c_void,
input_lwe_dimension: u32,
input_lwe_ciphertext_count: u32,
);
}
unsafe extern "C" {
pub fn cuda_fourier_transform_forward_as_torus_f128_async(
stream: *mut ffi::c_void,

View File

@@ -489,6 +489,7 @@ fn hpu_bench_transfer_throughput<FheType, F>(
#[cfg(not(any(feature = "gpu", feature = "hpu")))]
fn main() {
#[cfg(feature = "pbs-stats")]
use crate::pbs_stats::print_transfer_pbs_counts;
let params = benchmark::params_aliases::BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
@@ -607,10 +608,11 @@ fn main() {
c.final_summary();
}
#[cfg(feature = "gpu")]
fn main() {
#[cfg(feature = "pbs-stats")]
use crate::pbs_stats::print_transfer_pbs_counts;
let params = benchmark::params_aliases::BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let params = benchmark::params_aliases::BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let config = tfhe::ConfigBuilder::with_custom_parameters(params).build();
let cks = ClientKey::generate(config);

View File

@@ -16,6 +16,9 @@ Note that you need to use dedicated parameters for the compact public key encryp
The following example shows how a client can encrypt and prove a ciphertext, and how a server can verify and compute the ciphertext:
Note that you need to use dedicated parameters for the compact public key encryption. This helps to reduce the size of encrypted data and speed up the zero-knowledge proof computation.
The following example shows how to generate a CRS and proofs for ZKV1. Compared to the previous example, only the parameters are changed:
```rust
use rand::prelude::*;
use tfhe::prelude::*;

View File

@@ -569,15 +569,15 @@ test("hlapi_compact_ciphertext_list_with_proof", (t) => {
ShortintCompactPublicKeyEncryptionParametersName.PARAM_PKE_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
);
let config = TfheConfigBuilder.default()
.use_custom_parameters(block_params)
.use_dedicated_compact_public_key_parameters(publicKeyParams)
.build();
let config = TfheConfigBuilder.default()
.use_custom_parameters(block_params)
.use_dedicated_compact_public_key_parameters(publicKeyParams)
.build();
let clientKey = TfheClientKey.generate(config);
let publicKey = TfheCompactPublicKey.new(clientKey);
let crs = CompactPkeCrs.from_config(config, 2 + 32 + 1 + 256);
let crs = CompactPkeCrs.from_config(config, 2 + 32 + 1 + 256);
const compress = false; // We don't compress as it's too slow on wasm
let serialized_pke_crs = crs.serialize(compress);

View File

@@ -131,6 +131,73 @@ pub mod test_tools {
}
}
pub fn mean_confidence_interval(
sample_count: f64,
measured_mean: f64,
measured_std_dev: StandardDev,
probability_to_be_in_the_interval: f64,
) -> MeanConfidenceInterval {
let standard_score = core::f64::consts::SQRT_2
* statrs::function::erf::erfc_inv(1.0 - probability_to_be_in_the_interval);
let interval_delta = standard_score * measured_std_dev.0 / f64::sqrt(sample_count);
let lower_bound = measured_mean - interval_delta;
let upper_bound = measured_mean + interval_delta;
assert!(lower_bound <= upper_bound);
MeanConfidenceInterval {
lower_bound,
upper_bound,
}
}
/// Return a MeanConfidenceInterval when you cannot evaluate the standard deviation of a random
/// variable
pub fn clopper_pearson_exact_confidence_interval(
sample_count: f64,
measured_fails: f64,
confidence_level: f64,
) -> MeanConfidenceInterval {
// import scipy.stats as stats
// import math
//
// # Parameters
// n_tests = 10000 # Number of trials
// p_fail = 2**-5.5 # Theoretical probability of failure
// alpha = 0.01 # Significance level (1 - confidence level)
//
// # /!\ to be replaced by the experimental number of failures
// observed_failures = round(n_tests * p_fail)
//
// # Clopper-Pearson Exact Confidence Interval
// lower_bound = stats.beta.ppf(alpha / 2, observed_failures, n_tests - observed_failures +
// 1) upper_bound = stats.beta.ppf(1 - alpha / 2, observed_failures + 1, n_tests -
// observed_failures)
//
// print("Observed number of failures (k):", observed_failures)
// print(f"Confidence Interval ({(1-alpha)*100}%): [2^{round(math.log2(lower_bound),3)},
// 2^{round(math.log2(upper_bound),3)}]")
let alpha = 1.0 - confidence_level;
let beta_distribution_lower_bound =
statrs::distribution::Beta::new(measured_fails, sample_count - measured_fails + 1.0)
.unwrap();
let beta_distribution_upper_bound =
statrs::distribution::Beta::new(measured_fails + 1.0, sample_count - measured_fails)
.unwrap();
let lower_bound = beta_distribution_lower_bound.inverse_cdf(alpha / 2.0);
let upper_bound = beta_distribution_upper_bound.inverse_cdf(1.0 - alpha / 2.0);
assert!(lower_bound <= upper_bound);
MeanConfidenceInterval {
lower_bound,
upper_bound,
}
}
pub fn variance(samples: &[f64]) -> Variance {
let sample_count = samples.len();
@@ -202,6 +269,32 @@ pub mod test_tools {
}
}
pub fn variance_confidence_interval(
sample_count: f64,
measured_variance: Variance,
probability_to_be_in_the_interval: f64,
) -> VarianceConfidenceInterval {
assert!(probability_to_be_in_the_interval >= 0.0);
assert!(probability_to_be_in_the_interval <= 1.0);
let alpha = 1.0 - probability_to_be_in_the_interval;
let degrees_of_freedom = sample_count - 1.0;
let chi2 = ChiSquared::new(degrees_of_freedom).unwrap();
let chi2_lower = chi2.inverse_cdf(alpha / 2.0);
let chi2_upper = chi2.inverse_cdf(1.0 - alpha / 2.0);
// Lower bound is divided by Chi_right^2 so by chi2_upper, upper bound divided by Chi_left^2
// so chi2_lower
let lower_bound = Variance(degrees_of_freedom * measured_variance.0 / chi2_upper);
let upper_bound = Variance(degrees_of_freedom * measured_variance.0 / chi2_lower);
assert!(lower_bound <= upper_bound);
VarianceConfidenceInterval {
lower_bound,
upper_bound,
}
}
pub fn new_random_generator() -> RandomGenerator<DefaultRandomGenerator> {
RandomGenerator::new(random_seed())
}

View File

@@ -0,0 +1,33 @@
// This file was autogenerated, do not modify by hand.
use crate::core_crypto::commons::dispersion::Variance;
use crate::core_crypto::commons::parameters::*;
/// This formula is only valid when going from a larger to a smaller modulus
/// This formula is based on a heuristic, so may not always be valid
#[allow(dead_code)]
pub(crate) fn generalized_modulus_switch_additive_variance(
input_lwe_dimension: LweDimension,
modulus: f64,
new_modulus: f64,
) -> Variance {
Variance(generalized_modulus_switch_additive_variance_impl(
input_lwe_dimension.0 as f64,
modulus,
new_modulus,
))
}
/// This formula is only valid when going from a larger to a smaller modulus
/// This formula is based on a heuristic, so may not always be valid
#[allow(dead_code)]
pub(crate) fn generalized_modulus_switch_additive_variance_impl(
input_lwe_dimension: f64,
modulus: f64,
new_modulus: f64,
) -> f64 {
(1_f64 / 2.0)
* input_lwe_dimension
* (0.0208333333333333 * modulus.powf(-2.0) + 0.0416666666666667 * new_modulus.powf(-2.0))
+ (1_f64 / 12.0) * modulus.powf(-2.0)
+ (1_f64 / 24.0) * new_modulus.powf(-2.0)
}

View File

@@ -33,12 +33,60 @@ pub fn keyswitch_additive_variance_132_bits_security_gaussian_impl(
decomposition_level_count: f64,
modulus: f64,
) -> f64 {
(1_f64 / 3.0)
* decomposition_level_count
decomposition_level_count
* input_lwe_dimension
* ((5.31469187675068 - 0.0497829131652661 * output_lwe_dimension).exp2()
+ 16.0 * modulus.powf(-2.0))
* ((1_f64 / 4.0) * decomposition_base.powf(2.0) + 0.5)
* ((4.0 - 2.88539008177793 * modulus.ln()).exp2()
+ (5.31469187675068 - 0.0497829131652661 * output_lwe_dimension).exp2())
* ((1_f64 / 12.0) * decomposition_base.powf(2.0) + 0.166666666666667)
+ input_lwe_dimension
* (0.0208333333333333 * modulus.powf(-2.0)
+ 0.0416666666666667 * decomposition_base.powf(-2.0 * decomposition_level_count))
}
/// This formula is only valid if the proper noise distributions are used and
/// if the keys used are encrypted using secure noise given by the
/// [`minimal_glwe_variance`](`super::secure_noise`)
/// and [`minimal_lwe_variance`](`super::secure_noise`) family of functions.
pub fn keyswitch_additive_variance_132_bits_security_tuniform(
input_lwe_dimension: LweDimension,
output_lwe_dimension: LweDimension,
decomposition_base_log: DecompositionBaseLog,
decomposition_level_count: DecompositionLevelCount,
modulus: f64,
) -> Variance {
Variance(keyswitch_additive_variance_132_bits_security_tuniform_impl(
input_lwe_dimension.0 as f64,
output_lwe_dimension.0 as f64,
2.0f64.powi(decomposition_base_log.0 as i32),
decomposition_level_count.0 as f64,
modulus,
))
}
/// This formula is only valid if the proper noise distributions are used and
/// if the keys used are encrypted using secure noise given by the
/// [`minimal_glwe_variance`](`super::secure_noise`)
/// and [`minimal_lwe_variance`](`super::secure_noise`) family of functions.
pub fn keyswitch_additive_variance_132_bits_security_tuniform_impl(
input_lwe_dimension: f64,
output_lwe_dimension: f64,
decomposition_base: f64,
decomposition_level_count: f64,
modulus: f64,
) -> f64 {
decomposition_level_count
* input_lwe_dimension
* ((4.0 - 2.88539008177793 * modulus.ln()).exp2()
+ (1_f64 / 3.0)
* modulus.powf(-2.0)
* ((2.0
* (-0.025167785 * output_lwe_dimension
+ 1.44269504088896 * modulus.ln()
+ 4.10067100000001)
.ceil())
.exp2()
+ 0.5))
* ((1_f64 / 12.0) * decomposition_base.powf(2.0) + 0.166666666666667)
+ input_lwe_dimension
* (0.0208333333333333 * modulus.powf(-2.0)
+ 0.0416666666666667 * decomposition_base.powf(-2.0 * decomposition_level_count))

View File

@@ -0,0 +1,115 @@
// This file was autogenerated, do not modify by hand.
use crate::core_crypto::commons::dispersion::Variance;
use crate::core_crypto::commons::parameters::*;
/// This formula is only valid if the proper noise distributions are used and
/// if the keys used are encrypted using secure noise given by the
/// [`minimal_glwe_variance`](`super::secure_noise`)
/// and [`minimal_lwe_variance`](`super::secure_noise`) family of functions.
pub fn packing_keyswitch_additive_variance_132_bits_security_gaussian(
input_lwe_dimension: LweDimension,
output_glwe_dimension: GlweDimension,
output_polynomial_size: PolynomialSize,
decomposition_base_log: DecompositionBaseLog,
decomposition_level_count: DecompositionLevelCount,
lwe_to_pack: f64,
modulus: f64,
) -> Variance {
Variance(
packing_keyswitch_additive_variance_132_bits_security_gaussian_impl(
input_lwe_dimension.0 as f64,
output_glwe_dimension.0 as f64,
output_polynomial_size.0 as f64,
2.0f64.powi(decomposition_base_log.0 as i32),
decomposition_level_count.0 as f64,
lwe_to_pack,
modulus,
),
)
}
/// This formula is only valid if the proper noise distributions are used and
/// if the keys used are encrypted using secure noise given by the
/// [`minimal_glwe_variance`](`super::secure_noise`)
/// and [`minimal_lwe_variance`](`super::secure_noise`) family of functions.
pub fn packing_keyswitch_additive_variance_132_bits_security_gaussian_impl(
input_lwe_dimension: f64,
output_glwe_dimension: f64,
output_polynomial_size: f64,
decomposition_base: f64,
decomposition_level_count: f64,
lwe_to_pack: f64,
modulus: f64,
) -> f64 {
decomposition_level_count
* input_lwe_dimension
* lwe_to_pack
* ((4.0 - 2.88539008177793 * modulus.ln()).exp2()
+ (-0.0497829131652661 * output_glwe_dimension * output_polynomial_size
+ 5.31469187675068)
.exp2())
* ((1_f64 / 12.0) * decomposition_base.powf(2.0) + 0.166666666666667)
+ 0.5
* input_lwe_dimension
* ((1_f64 / 6.0) * modulus.powf(-2.0)
+ (1_f64 / 12.0) * decomposition_base.powf(-2.0 * decomposition_level_count))
}
/// This formula is only valid if the proper noise distributions are used and
/// if the keys used are encrypted using secure noise given by the
/// [`minimal_glwe_variance`](`super::secure_noise`)
/// and [`minimal_lwe_variance`](`super::secure_noise`) family of functions.
pub fn packing_keyswitch_additive_variance_132_bits_security_tuniform(
input_lwe_dimension: LweDimension,
output_glwe_dimension: GlweDimension,
output_polynomial_size: PolynomialSize,
decomposition_base_log: DecompositionBaseLog,
decomposition_level_count: DecompositionLevelCount,
lwe_to_pack: f64,
modulus: f64,
) -> Variance {
Variance(
packing_keyswitch_additive_variance_132_bits_security_tuniform_impl(
input_lwe_dimension.0 as f64,
output_glwe_dimension.0 as f64,
output_polynomial_size.0 as f64,
2.0f64.powi(decomposition_base_log.0 as i32),
decomposition_level_count.0 as f64,
lwe_to_pack,
modulus,
),
)
}
/// This formula is only valid if the proper noise distributions are used and
/// if the keys used are encrypted using secure noise given by the
/// [`minimal_glwe_variance`](`super::secure_noise`)
/// and [`minimal_lwe_variance`](`super::secure_noise`) family of functions.
pub fn packing_keyswitch_additive_variance_132_bits_security_tuniform_impl(
input_lwe_dimension: f64,
output_glwe_dimension: f64,
output_polynomial_size: f64,
decomposition_base: f64,
decomposition_level_count: f64,
lwe_to_pack: f64,
modulus: f64,
) -> f64 {
decomposition_level_count
* input_lwe_dimension
* lwe_to_pack
* ((4.0 - 2.88539008177793 * modulus.ln()).exp2()
+ (1_f64 / 3.0)
* modulus.powf(-2.0)
* ((2.0
* (-0.025167785 * output_glwe_dimension * output_polynomial_size
+ 1.44269504088896 * modulus.ln()
+ 4.10067100000001)
.ceil())
.exp2()
+ 0.5))
* ((1_f64 / 12.0) * decomposition_base.powf(2.0) + 0.166666666666667)
+ 0.5
* input_lwe_dimension
* ((1_f64 / 6.0) * modulus.powf(-2.0)
+ (1_f64 / 12.0) * decomposition_base.powf(-2.0 * decomposition_level_count))
}

View File

@@ -28,6 +28,7 @@ pub fn pbs_variance_132_bits_security_gaussian(
/// if the keys used are encrypted using secure noise given by the
/// [`minimal_glwe_variance`](`super::secure_noise`)
/// and [`minimal_lwe_variance`](`super::secure_noise`) family of functions.
#[allow(clippy::suspicious_operation_groupings)]
pub fn pbs_variance_132_bits_security_gaussian_impl(
input_lwe_dimension: f64,
output_glwe_dimension: f64,
@@ -42,14 +43,78 @@ pub fn pbs_variance_132_bits_security_gaussian_impl(
* decomposition_level_count
* output_polynomial_size.powf(2.0)
* (output_glwe_dimension + 1.0)
+ (1_f64 / 3.0)
* decomposition_level_count
+ decomposition_level_count
* output_polynomial_size
* ((-0.0497829131652661 * output_glwe_dimension * output_polynomial_size
+ 5.31469187675068)
.exp2()
+ 16.0 * modulus.powf(-2.0))
* ((1_f64 / 4.0) * decomposition_base.powf(2.0) + 0.5)
* ((4.0 - 2.88539008177793 * modulus.ln()).exp2()
+ (-0.0497829131652661 * output_glwe_dimension * output_polynomial_size
+ 5.31469187675068)
.exp2())
* ((1_f64 / 12.0) * decomposition_base.powf(2.0) + 0.166666666666667)
* (output_glwe_dimension + 1.0)
+ (1_f64 / 12.0) * modulus.powf(-2.0)
+ (1_f64 / 2.0)
* output_glwe_dimension
* output_polynomial_size
* (0.0208333333333333 * modulus.powf(-2.0)
+ 0.0416666666666667
* decomposition_base.powf(-2.0 * decomposition_level_count))
+ (1_f64 / 24.0) * decomposition_base.powf(-2.0 * decomposition_level_count))
}
/// This formula is only valid if the proper noise distributions are used and
/// if the keys used are encrypted using secure noise given by the
/// [`minimal_glwe_variance`](`super::secure_noise`)
/// and [`minimal_lwe_variance`](`super::secure_noise`) family of functions.
pub fn pbs_variance_132_bits_security_tuniform(
input_lwe_dimension: LweDimension,
output_glwe_dimension: GlweDimension,
output_polynomial_size: PolynomialSize,
decomposition_base_log: DecompositionBaseLog,
decomposition_level_count: DecompositionLevelCount,
modulus: f64,
) -> Variance {
Variance(pbs_variance_132_bits_security_tuniform_impl(
input_lwe_dimension.0 as f64,
output_glwe_dimension.0 as f64,
output_polynomial_size.0 as f64,
2.0f64.powi(decomposition_base_log.0 as i32),
decomposition_level_count.0 as f64,
modulus,
))
}
/// This formula is only valid if the proper noise distributions are used and
/// if the keys used are encrypted using secure noise given by the
/// [`minimal_glwe_variance`](`super::secure_noise`)
/// and [`minimal_lwe_variance`](`super::secure_noise`) family of functions.
#[allow(clippy::suspicious_operation_groupings)]
pub fn pbs_variance_132_bits_security_tuniform_impl(
input_lwe_dimension: f64,
output_glwe_dimension: f64,
output_polynomial_size: f64,
decomposition_base: f64,
decomposition_level_count: f64,
modulus: f64,
) -> f64 {
input_lwe_dimension
* (2.06537277069845e-33
* decomposition_base.powf(2.0)
* decomposition_level_count
* output_polynomial_size.powf(2.0)
* (output_glwe_dimension + 1.0)
+ decomposition_level_count
* output_polynomial_size
* ((4.0 - 2.88539008177793 * modulus.ln()).exp2()
+ (1_f64 / 3.0)
* modulus.powf(-2.0)
* ((2.0
* (-0.025167785 * output_glwe_dimension * output_polynomial_size
+ 1.44269504088896 * modulus.ln()
+ 4.10067100000001)
.ceil())
.exp2()
+ 0.5))
* ((1_f64 / 12.0) * decomposition_base.powf(2.0) + 0.166666666666667)
* (output_glwe_dimension + 1.0)
+ (1_f64 / 12.0) * modulus.powf(-2.0)
+ (1_f64 / 2.0)

View File

@@ -0,0 +1,133 @@
// This file was autogenerated, do not modify by hand.
use crate::core_crypto::commons::dispersion::Variance;
use crate::core_crypto::commons::parameters::*;
/// This formula is only valid if the proper noise distributions are used and
/// if the keys used are encrypted using secure noise given by the
/// [`minimal_glwe_variance`](`super::secure_noise`)
/// and [`minimal_lwe_variance`](`super::secure_noise`) family of functions.
pub fn pbs_128_variance_132_bits_security_gaussian(
input_lwe_dimension: LweDimension,
output_glwe_dimension: GlweDimension,
output_polynomial_size: PolynomialSize,
decomposition_base_log: DecompositionBaseLog,
decomposition_level_count: DecompositionLevelCount,
mantissa_size: f64,
modulus: f64,
) -> Variance {
Variance(pbs_128_variance_132_bits_security_gaussian_impl(
input_lwe_dimension.0 as f64,
output_glwe_dimension.0 as f64,
output_polynomial_size.0 as f64,
2.0f64.powi(decomposition_base_log.0 as i32),
decomposition_level_count.0 as f64,
mantissa_size,
modulus,
))
}
/// This formula is only valid if the proper noise distributions are used and
/// if the keys used are encrypted using secure noise given by the
/// [`minimal_glwe_variance`](`super::secure_noise`)
/// and [`minimal_lwe_variance`](`super::secure_noise`) family of functions.
pub fn pbs_128_variance_132_bits_security_gaussian_impl(
input_lwe_dimension: f64,
output_glwe_dimension: f64,
output_polynomial_size: f64,
decomposition_base: f64,
decomposition_level_count: f64,
mantissa_size: f64,
modulus: f64,
) -> f64 {
input_lwe_dimension
* (decomposition_level_count
* output_polynomial_size
* ((4.0 - 2.88539008177793 * modulus.ln()).exp2()
+ (-0.0497829131652661 * output_glwe_dimension * output_polynomial_size
+ 5.31469187675068)
.exp2())
* ((1_f64 / 12.0) * decomposition_base.powf(2.0) + 0.166666666666667)
* (output_glwe_dimension + 1.0)
+ (1_f64 / 12.0) * modulus.powf(-2.0)
+ (1_f64 / 2.0)
* output_glwe_dimension
* output_polynomial_size
* (0.0208333333333333 * modulus.powf(-2.0)
+ 0.0416666666666667
* decomposition_base.powf(-2.0 * decomposition_level_count))
+ (1_f64 / 24.0) * decomposition_base.powf(-2.0 * decomposition_level_count)
+ 0.16756294607814
* (-2.0 * mantissa_size).exp2()
* decomposition_base.powf(2.0)
* decomposition_level_count
* output_polynomial_size.powf(2.0)
* (output_glwe_dimension + 1.0))
}
/// This formula is only valid if the proper noise distributions are used and
/// if the keys used are encrypted using secure noise given by the
/// [`minimal_glwe_variance`](`super::secure_noise`)
/// and [`minimal_lwe_variance`](`super::secure_noise`) family of functions.
pub fn pbs_128_variance_132_bits_security_tuniform(
input_lwe_dimension: LweDimension,
output_glwe_dimension: GlweDimension,
output_polynomial_size: PolynomialSize,
decomposition_base_log: DecompositionBaseLog,
decomposition_level_count: DecompositionLevelCount,
mantissa_size: f64,
modulus: f64,
) -> Variance {
Variance(pbs_128_variance_132_bits_security_tuniform_impl(
input_lwe_dimension.0 as f64,
output_glwe_dimension.0 as f64,
output_polynomial_size.0 as f64,
2.0f64.powi(decomposition_base_log.0 as i32),
decomposition_level_count.0 as f64,
mantissa_size,
modulus,
))
}
/// This formula is only valid if the proper noise distributions are used and
/// if the keys used are encrypted using secure noise given by the
/// [`minimal_glwe_variance`](`super::secure_noise`)
/// and [`minimal_lwe_variance`](`super::secure_noise`) family of functions.
pub fn pbs_128_variance_132_bits_security_tuniform_impl(
input_lwe_dimension: f64,
output_glwe_dimension: f64,
output_polynomial_size: f64,
decomposition_base: f64,
decomposition_level_count: f64,
mantissa_size: f64,
modulus: f64,
) -> f64 {
input_lwe_dimension
* (decomposition_level_count
* output_polynomial_size
* ((4.0 - 2.88539008177793 * modulus.ln()).exp2()
+ (1_f64 / 3.0)
* modulus.powf(-2.0)
* ((2.0
* (-0.025167785 * output_glwe_dimension * output_polynomial_size
+ 1.44269504088896 * modulus.ln()
+ 4.10067100000001)
.ceil())
.exp2()
+ 0.5))
* ((1_f64 / 12.0) * decomposition_base.powf(2.0) + 0.166666666666667)
* (output_glwe_dimension + 1.0)
+ (1_f64 / 12.0) * modulus.powf(-2.0)
+ (1_f64 / 2.0)
* output_glwe_dimension
* output_polynomial_size
* (0.0208333333333333 * modulus.powf(-2.0)
+ 0.0416666666666667
* decomposition_base.powf(-2.0 * decomposition_level_count))
+ (1_f64 / 24.0) * decomposition_base.powf(-2.0 * decomposition_level_count)
+ 0.16756294607814
* (-2.0 * mantissa_size).exp2()
* decomposition_base.powf(2.0)
* decomposition_level_count
* output_polynomial_size.powf(2.0)
* (output_glwe_dimension + 1.0))
}

View File

@@ -1,5 +1,9 @@
// This file was autogenerated, do not modify by hand.
pub mod generalized_modulus_switch;
pub mod lwe_keyswitch;
pub mod lwe_multi_bit_programmable_bootstrap;
pub mod lwe_packing_keyswitch;
pub mod lwe_programmable_bootstrap;
pub mod lwe_programmable_bootstrap_128;
pub mod modulus_switch;
pub mod secure_noise;

View File

@@ -0,0 +1,47 @@
// This file was autogenerated, do not modify by hand.
use crate::core_crypto::commons::dispersion::Variance;
use crate::core_crypto::commons::parameters::*;
/// This formula is only valid when going from a larger to a smaller modulus
pub fn modulus_switch_additive_variance(
input_lwe_dimension: LweDimension,
modulus: f64,
new_modulus: f64,
) -> Variance {
Variance(modulus_switch_additive_variance_impl(
input_lwe_dimension.0 as f64,
modulus,
new_modulus,
))
}
/// This formula is only valid when going from a larger to a smaller modulus
pub fn modulus_switch_additive_variance_impl(
input_lwe_dimension: f64,
modulus: f64,
new_modulus: f64,
) -> f64 {
input_lwe_dimension
* (0.0208333333333333 * modulus.powf(-2.0) + 0.0416666666666667 * new_modulus.powf(-2.0))
+ (1_f64 / 6.0) * modulus.powf(-2.0)
+ (1_f64 / 12.0) * new_modulus.powf(-2.0)
}
/// This formula is only valid when going from a larger to a smaller modulus
pub fn modulus_switch_multi_bit_additive_variance(
input_lwe_dimension: LweDimension,
modulus: f64,
new_modulus: f64,
grouping_factor: f64,
) -> Variance {
let multi_bit_factor = (2_f64 / grouping_factor) * (2_f64.powf(grouping_factor) - 1_f64)
/ 2_f64.powf(grouping_factor);
Variance(
multi_bit_factor
* modulus_switch_additive_variance_impl(
input_lwe_dimension.0 as f64,
modulus,
new_modulus,
),
)
}

View File

@@ -25,7 +25,64 @@ pub fn minimal_variance_for_132_bits_security_gaussian_impl(
lwe_dimension: f64,
modulus: f64,
) -> f64 {
(5.31469187675068 - 0.0497829131652661 * lwe_dimension).exp2() + 16.0 * modulus.powf(-2.0)
(4.0 - 2.88539008177793 * modulus.ln()).exp2()
+ (5.31469187675068 - 0.0497829131652661 * lwe_dimension).exp2()
}
// pub fn minimal_glwe_variance_for_132_bits_security_tuniform(
// glwe_dimension: GlweDimension,
// polynomial_size: PolynomialSize,
// modulus: f64,
// ) -> Variance {
// let lwe_dimension = glwe_dimension.to_equivalent_lwe_dimension(polynomial_size);
// minimal_lwe_variance_for_132_bits_security_tuniform(lwe_dimension, modulus)
// }
// pub fn minimal_lwe_variance_for_132_bits_security_tuniform(
// lwe_dimension: LweDimension,
// modulus: f64,
// ) -> Variance {
// Variance(minimal_variance_for_132_bits_security_tuniform_impl(
// lwe_dimension.0 as f64,
// modulus,
// ))
// }
pub fn minimal_variance_for_132_bits_security_tuniform_impl(
lwe_dimension: f64,
modulus: f64,
) -> f64 {
(4.0 - 2.88539008177793 * modulus.ln()).exp2()
+ (1_f64 / 3.0)
* modulus.powf(-2.0)
* ((2.0
* (-0.025167785 * lwe_dimension
+ 1.44269504088896 * modulus.ln()
+ 4.10067100000001)
.ceil())
.exp2()
+ 0.5)
}
pub fn minimal_glwe_bound_for_132_bits_security_tuniform(
glwe_dimension: GlweDimension,
polynomial_size: PolynomialSize,
modulus: f64,
) -> u32 {
let lwe_dimension = glwe_dimension.to_equivalent_lwe_dimension(polynomial_size);
minimal_lwe_bound_for_132_bits_security_tuniform(lwe_dimension, modulus)
}
pub fn minimal_lwe_bound_for_132_bits_security_tuniform(
lwe_dimension: LweDimension,
modulus: f64,
) -> u32 {
minimal_bound_for_132_bits_security_tuniform_impl(lwe_dimension.0 as f64, modulus)
}
pub fn minimal_bound_for_132_bits_security_tuniform_impl(lwe_dimension: f64, modulus: f64) -> u32 {
((-0.025167785 * lwe_dimension + 1.44269504088896 * modulus.ln() + 4.10067100000001).ceil())
as u32
}
pub fn minimal_glwe_variance_for_132_bits_security_tuniform(
@@ -47,18 +104,18 @@ pub fn minimal_lwe_variance_for_132_bits_security_tuniform(
))
}
pub fn minimal_variance_for_132_bits_security_tuniform_impl(
lwe_dimension: f64,
modulus: f64,
) -> f64 {
(4.0 - 2.88539008177793 * modulus.ln()).exp2()
+ (1_f64 / 3.0)
* modulus.powf(-2.0)
* ((2.0
* (-0.025167785 * lwe_dimension
+ std::f64::consts::LOG2_E * modulus.ln()
+ 4.10067100000001)
.ceil())
.exp2()
+ 0.5)
}
// pub fn minimal_variance_for_132_bits_security_tuniform_impl(
// lwe_dimension: f64,
// modulus: f64,
// ) -> f64 {
// (4.0 - 2.88539008177793 * modulus.ln()).exp2()
// + (1_f64 / 3.0)
// * modulus.powf(-2.0)
// * ((2.0
// * (-0.025167785 * lwe_dimension
// + std::f64::consts::LOG2_E * modulus.ln()
// + 4.10067100000001)
// .ceil())
// .exp2()
// + 0.5)
// }

View File

@@ -1,7 +1,10 @@
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use crate::core_crypto::gpu::lwe_keyswitch_key::CudaLweKeyswitchKey;
use crate::core_crypto::gpu::vec::CudaVec;
use crate::core_crypto::gpu::{keyswitch_async, CudaStreams};
use crate::core_crypto::gpu::{
cuda_modulus_switch_ciphertext_async, cuda_modulus_switch_multi_bit_ciphertext_async,
cuda_modulus_switch_multi_bit_ciphertext_u128_async, keyswitch_async, CudaStreams,
};
use crate::core_crypto::prelude::UnsignedInteger;
/// # Safety
@@ -107,3 +110,65 @@ pub fn cuda_keyswitch_lwe_ciphertext<Scalar>(
}
streams.synchronize();
}
pub fn cuda_modulus_switch_ciphertext<Scalar>(
output_lwe_ciphertext: &mut CudaLweCiphertextList<Scalar>,
log_modulus: u32,
streams: &CudaStreams,
) where
Scalar: UnsignedInteger,
{
unsafe {
cuda_modulus_switch_ciphertext_async(
streams,
&mut output_lwe_ciphertext.0.d_vec,
log_modulus,
);
}
streams.synchronize();
}
pub fn cuda_modulus_switch_multi_bit_ciphertext<Scalar>(
lwe_array_out: &mut CudaVec<Scalar>,
input_lwe_ciphertext: &mut CudaLweCiphertextList<Scalar>,
log_modulus: u32,
polynomial_size: u32,
grouping_factor: u32,
streams: &CudaStreams,
) where
Scalar: UnsignedInteger,
{
unsafe {
cuda_modulus_switch_multi_bit_ciphertext_async(
streams,
lwe_array_out,
&mut input_lwe_ciphertext.0.d_vec,
log_modulus,
polynomial_size,
grouping_factor,
);
}
streams.synchronize();
}
pub fn cuda_modulus_switch_multi_bit_ciphertext_u128<Scalar>(
lwe_array_out: &mut CudaVec<Scalar>,
input_lwe_ciphertext: &mut CudaLweCiphertextList<Scalar>,
log_modulus: u32,
polynomial_size: u32,
grouping_factor: u32,
streams: &CudaStreams,
) where
Scalar: UnsignedInteger,
{
unsafe {
cuda_modulus_switch_multi_bit_ciphertext_u128_async(
streams,
lwe_array_out,
&mut input_lwe_ciphertext.0.d_vec,
log_modulus,
polynomial_size,
grouping_factor,
);
}
streams.synchronize();
}

View File

@@ -5,7 +5,8 @@ use crate::core_crypto::gpu::{
add_lwe_ciphertext_vector_plaintext_vector_assign_async,
add_lwe_ciphertext_vector_plaintext_vector_async, mult_lwe_ciphertext_vector_cleartext_vector,
mult_lwe_ciphertext_vector_cleartext_vector_assign_async,
negate_lwe_ciphertext_vector_assign_async, negate_lwe_ciphertext_vector_async, CudaStreams,
negate_lwe_ciphertext_vector_assign_async, negate_lwe_ciphertext_vector_async,
sub_lwe_ciphertext_vector_plaintext_vector_assign_async, CudaStreams,
};
use crate::core_crypto::prelude::UnsignedInteger;
@@ -222,6 +223,29 @@ pub unsafe fn cuda_lwe_ciphertext_plaintext_add_assign_async<Scalar>(
);
}
/// # Safety
///
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must not
/// be dropped until stream is synchronised
pub unsafe fn cuda_lwe_ciphertext_plaintext_sub_assign_async<Scalar>(
lhs: &mut CudaLweCiphertextList<Scalar>,
rhs: &CudaVec<Scalar>,
stream: &CudaStreams,
) where
Scalar: UnsignedInteger,
{
let num_samples = lhs.lwe_ciphertext_count().0 as u32;
let lwe_dimension = &lhs.lwe_dimension();
sub_lwe_ciphertext_vector_plaintext_vector_assign_async(
stream,
&mut lhs.0.d_vec,
rhs,
*lwe_dimension,
num_samples,
);
}
/// # Safety
///
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must not
@@ -417,6 +441,19 @@ pub fn cuda_lwe_ciphertext_plaintext_add_assign<Scalar>(
streams.synchronize();
}
pub fn cuda_lwe_ciphertext_plaintext_sub_assign<Scalar>(
lhs: &mut CudaLweCiphertextList<Scalar>,
rhs: &CudaVec<Scalar>,
stream: &CudaStreams,
) where
Scalar: UnsignedInteger,
{
unsafe {
cuda_lwe_ciphertext_plaintext_sub_assign_async(lhs, rhs, stream);
}
stream.synchronize();
}
pub fn cuda_lwe_ciphertext_negate<Scalar>(
output: &mut CudaLweCiphertextList<Scalar>,
input: &CudaLweCiphertextList<Scalar>,

View File

@@ -150,6 +150,149 @@ pub unsafe fn cuda_programmable_bootstrap_lwe_ciphertext_async<Scalar>(
);
}
/// # Safety
///
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must not
/// be dropped until streams is synchronised
#[allow(clippy::too_many_arguments)]
pub unsafe fn cuda_programmable_bootstrap_lwe_ciphertext_no_ms_noise_reduction_async<Scalar>(
input: &CudaLweCiphertextList<Scalar>,
output: &mut CudaLweCiphertextList<Scalar>,
accumulator: &CudaGlweCiphertextList<Scalar>,
lut_indexes: &CudaVec<Scalar>,
output_indexes: &CudaVec<Scalar>,
input_indexes: &CudaVec<Scalar>,
num_samples: LweCiphertextCount,
bsk: &CudaLweBootstrapKey,
streams: &CudaStreams,
) where
// CastInto required for PBS modulus switch which returns a usize
Scalar: UnsignedTorus + CastInto<usize>,
{
assert_eq!(
input.lwe_dimension(),
bsk.input_lwe_dimension(),
"Mismatched input LweDimension. LweCiphertext input LweDimension {:?}. \
FourierLweMultiBitBootstrapKey input LweDimension {:?}.",
input.lwe_dimension(),
bsk.input_lwe_dimension(),
);
assert_eq!(
output.lwe_dimension(),
bsk.output_lwe_dimension(),
"Mismatched output LweDimension. LweCiphertext output LweDimension {:?}. \
FourierLweMultiBitBootstrapKey output LweDimension {:?}.",
output.lwe_dimension(),
bsk.output_lwe_dimension(),
);
assert_eq!(
accumulator.glwe_dimension(),
bsk.glwe_dimension(),
"Mismatched GlweSize. Accumulator GlweSize {:?}. \
FourierLweMultiBitBootstrapKey GlweSize {:?}.",
accumulator.glwe_dimension(),
bsk.glwe_dimension(),
);
assert_eq!(
accumulator.polynomial_size(),
bsk.polynomial_size(),
"Mismatched PolynomialSize. Accumulator PolynomialSize {:?}. \
FourierLweMultiBitBootstrapKey PolynomialSize {:?}.",
accumulator.polynomial_size(),
bsk.polynomial_size(),
);
assert_eq!(
input.ciphertext_modulus(),
output.ciphertext_modulus(),
"Mismatched CiphertextModulus between input ({:?}) and output ({:?})",
input.ciphertext_modulus(),
output.ciphertext_modulus(),
);
assert_eq!(
input.ciphertext_modulus(),
accumulator.ciphertext_modulus(),
"Mismatched CiphertextModulus between input ({:?}) and accumulator ({:?})",
input.ciphertext_modulus(),
accumulator.ciphertext_modulus(),
);
assert_eq!(
streams.gpu_indexes[0],
bsk.d_vec.gpu_index(0),
"GPU error: first stream is on GPU {}, first bsk pointer is on GPU {}",
streams.gpu_indexes[0].get(),
bsk.d_vec.gpu_index(0).get(),
);
assert_eq!(
streams.gpu_indexes[0],
input.0.d_vec.gpu_index(0),
"GPU error: first stream is on GPU {}, first input pointer is on GPU {}",
streams.gpu_indexes[0].get(),
input.0.d_vec.gpu_index(0).get(),
);
assert_eq!(
streams.gpu_indexes[0],
output.0.d_vec.gpu_index(0),
"GPU error: first stream is on GPU {}, first output pointer is on GPU {}",
streams.gpu_indexes[0].get(),
output.0.d_vec.gpu_index(0).get(),
);
assert_eq!(
streams.gpu_indexes[0],
accumulator.0.d_vec.gpu_index(0),
"GPU error: first stream is on GPU {}, first accumulator pointer is on GPU {}",
streams.gpu_indexes[0].get(),
accumulator.0.d_vec.gpu_index(0).get(),
);
assert_eq!(
streams.gpu_indexes[0],
input_indexes.gpu_index(0),
"GPU error: first stream is on GPU {}, first input indexes pointer is on GPU {}",
streams.gpu_indexes[0].get(),
input_indexes.gpu_index(0).get(),
);
assert_eq!(
streams.gpu_indexes[0],
output_indexes.gpu_index(0),
"GPU error: first stream is on GPU {}, first output indexes pointer is on GPU {}",
streams.gpu_indexes[0].get(),
output_indexes.gpu_index(0).get(),
);
assert_eq!(
streams.gpu_indexes[0],
lut_indexes.gpu_index(0),
"GPU error: first stream is on GPU {}, first lut indexes pointer is on GPU {}",
streams.gpu_indexes[0].get(),
lut_indexes.gpu_index(0).get(),
);
let lwe_dimension = input.lwe_dimension();
let ct_modulus = input.ciphertext_modulus().raw_modulus_float();
programmable_bootstrap_async(
streams,
&mut output.0.d_vec,
output_indexes,
&accumulator.0.d_vec,
lut_indexes,
&input.0.d_vec,
input_indexes,
&bsk.d_vec,
lwe_dimension,
bsk.glwe_dimension(),
bsk.polynomial_size(),
bsk.decomp_base_log(),
bsk.decomp_level_count(),
num_samples.0 as u32,
None,
ct_modulus,
);
}
/// # Safety
///
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must not
@@ -302,6 +445,36 @@ pub fn cuda_programmable_bootstrap_lwe_ciphertext<Scalar>(
streams.synchronize();
}
#[allow(clippy::too_many_arguments)]
pub fn cuda_programmable_bootstrap_lwe_ciphertext_no_ms_noise_reduction<Scalar>(
input: &CudaLweCiphertextList<Scalar>,
output: &mut CudaLweCiphertextList<Scalar>,
accumulator: &CudaGlweCiphertextList<Scalar>,
lut_indexes: &CudaVec<Scalar>,
output_indexes: &CudaVec<Scalar>,
input_indexes: &CudaVec<Scalar>,
num_samples: LweCiphertextCount,
bsk: &CudaLweBootstrapKey,
streams: &CudaStreams,
) where
Scalar: UnsignedTorus + CastInto<usize>,
{
unsafe {
cuda_programmable_bootstrap_lwe_ciphertext_no_ms_noise_reduction_async(
input,
output,
accumulator,
lut_indexes,
output_indexes,
input_indexes,
num_samples,
bsk,
streams,
);
}
streams.synchronize();
}
/// Performs a programmable bootstrap (PBS) on a list of 128-bit LWE ciphertexts,
/// storing the result back into the provided `output` list at matching indices.
///

View File

@@ -3,7 +3,7 @@
use crate::core_crypto::gpu::vec::CudaVec;
use crate::core_crypto::gpu::{
convert_lwe_keyswitch_key_async, CiphertextModulus, CudaStreams, DecompositionBaseLog,
DecompositionLevelCount,
DecompositionLevelCount, LweDimension,
};
use crate::core_crypto::prelude::{
lwe_keyswitch_key_input_key_element_encrypted_size, LweKeyswitchKeyOwned, LweSize,
@@ -69,4 +69,14 @@ impl<T: UnsignedInteger> CudaLweKeyswitchKey<T> {
pub(crate) fn decomposition_level_count(&self) -> DecompositionLevelCount {
self.decomp_level_count
}
pub(crate) fn input_key_lwe_dimension(&self) -> LweDimension {
self.input_key_lwe_size().to_lwe_dimension()
}
pub(crate) fn output_key_lwe_dimension(&self) -> LweDimension {
self.output_key_lwe_size().to_lwe_dimension()
}
pub(crate) fn ciphertext_modulus(&self) -> CiphertextModulus<T> {
self.ciphertext_modulus
}
}

View File

@@ -8,9 +8,9 @@ use crate::core_crypto::gpu::lwe_bootstrap_key::{
};
use crate::core_crypto::gpu::vec::{CudaVec, GpuIndex};
use crate::core_crypto::prelude::{
CiphertextModulus, DecompositionBaseLog, DecompositionLevelCount, GlweCiphertextCount,
GlweDimension, LweBskGroupingFactor, LweCiphertextCount, LweDimension, PolynomialSize,
UnsignedInteger,
CiphertextModulus, DecompositionBaseLog, DecompositionLevelCount, DispersionParameter,
GlweCiphertextCount, GlweDimension, LweBskGroupingFactor, LweCiphertextCount, LweDimension,
PolynomialSize, UnsignedInteger,
};
pub use algorithms::*;
pub use entities::*;
@@ -697,6 +697,91 @@ pub unsafe fn cuda_modulus_switch_ciphertext_async<T: UnsignedInteger>(
);
}
#[allow(clippy::too_many_arguments)]
pub fn cuda_improve_noise_modulus_switch_ciphertext<T: UnsignedInteger>(
lwe_array_out: &mut CudaVec<T>,
lwe_array_in: &CudaVec<T>,
lwe_in_indexes: &CudaVec<T>,
lwe_dimension: LweDimension,
num_samples: u32,
log_modulus: u32,
modulus: f64,
noise_reduction_key: &CudaModulusSwitchNoiseReductionKey,
streams: &CudaStreams,
) {
unsafe {
cuda_improve_noise_modulus_switch_64(
streams.ptr[0],
streams.gpu_indexes[0].get(),
lwe_array_out.as_mut_c_ptr(0),
lwe_array_in.as_c_ptr(0),
lwe_in_indexes.as_c_ptr(0),
noise_reduction_key.modulus_switch_zeros.as_c_ptr(0),
lwe_dimension.to_lwe_size().0 as u32,
num_samples,
noise_reduction_key.num_zeros,
noise_reduction_key
.ms_input_variance
.get_modular_variance(modulus)
.value,
noise_reduction_key.ms_r_sigma_factor.0,
noise_reduction_key.ms_bound.0,
log_modulus,
);
}
streams.synchronize_one(0);
}
/// # Safety
///
/// [CudaStreams::synchronize] __must__ be called as soon as synchronization is
/// required
#[allow(clippy::too_many_arguments)]
pub unsafe fn cuda_modulus_switch_multi_bit_ciphertext_async<T: UnsignedInteger>(
streams: &CudaStreams,
lwe_array_out: &mut CudaVec<T>,
lwe_array_in: &mut CudaVec<T>,
log_modulus: u32,
polynomial_size: u32,
grouping_factor: u32,
) {
cuda_modulus_switch_multi_bit_64(
streams.ptr[0],
streams.gpu_indexes[0].get(),
lwe_array_out.as_mut_c_ptr(0),
lwe_array_in.as_mut_c_ptr(0),
lwe_array_in.len() as u32,
log_modulus,
polynomial_size,
grouping_factor,
);
}
/// # Safety
///
/// [CudaStreams::synchronize] __must__ be called as soon as synchronization is
/// required
#[allow(clippy::too_many_arguments)]
pub unsafe fn cuda_modulus_switch_multi_bit_ciphertext_u128_async<T: UnsignedInteger>(
streams: &CudaStreams,
lwe_array_out: &mut CudaVec<T>,
lwe_array_in: &mut CudaVec<T>,
log_modulus: u32,
polynomial_size: u32,
grouping_factor: u32,
) {
cuda_modulus_switch_multi_bit_128(
streams.ptr[0],
streams.gpu_indexes[0].get(),
lwe_array_out.as_mut_c_ptr(0),
lwe_array_in.as_mut_c_ptr(0),
lwe_array_in.len() as u32,
log_modulus,
polynomial_size,
grouping_factor,
);
}
/// Addition of a vector of LWE ciphertexts
///
/// # Safety
@@ -866,6 +951,30 @@ pub unsafe fn add_lwe_ciphertext_vector_plaintext_vector_assign_async<T: Unsigne
);
}
/// Assigned subtraction of a vector of LWE ciphertexts with a vector of plaintexts
///
/// # Safety
///
/// [CudaStreams::synchronize] __must__ be called as soon as synchronization is
/// required
pub unsafe fn sub_lwe_ciphertext_vector_plaintext_vector_assign_async<T: UnsignedInteger>(
streams: &CudaStreams,
lwe_array_out: &mut CudaVec<T>,
plaintext_in: &CudaVec<T>,
lwe_dimension: LweDimension,
num_samples: u32,
) {
cuda_sub_lwe_ciphertext_vector_plaintext_vector_64(
streams.ptr[0],
streams.gpu_indexes[0].get(),
lwe_array_out.as_mut_c_ptr(0),
lwe_array_out.as_mut_c_ptr(0),
plaintext_in.as_c_ptr(0),
lwe_dimension.0 as u32,
num_samples,
);
}
/// Negation of a vector of LWE ciphertexts
///
/// # Safety

View File

@@ -1,3 +1,16 @@
// use shortint::parameters::compact_public_key_only::p_fail_2_minus_64::ks_pbs::{
// V0_11_PARAM_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64_ZKV1,
// V0_11_PARAM_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64_ZKV1,
// };
// use shortint::parameters::key_switching::p_fail_2_minus_64::ks_pbs::{
// V0_11_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64_ZKV1,
// V0_11_PARAM_KEYSWITCH_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64_ZKV1,
// };
use crate::shortint::parameters::{
CompactPublicKeyEncryptionParameters, ShortintKeySwitchingParameters,
};
use crate::shortint::ClassicPBSParameters;
use crate::core_crypto::algorithms::lwe_encryption::decrypt_lwe_ciphertext;
use crate::core_crypto::algorithms::test::noise_distribution::lwe_encryption_noise::lwe_compact_public_key_encryption_expected_variance;
use crate::core_crypto::commons::dispersion::DispersionParameter;
@@ -11,10 +24,7 @@ use crate::shortint::parameters::test_params::{
TEST_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
TEST_PARAM_PKE_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
};
use crate::shortint::parameters::{
AtomicPatternParameters, ClassicPBSParameters, CompactPublicKeyEncryptionParameters,
ShortintKeySwitchingParameters,
};
use crate::shortint::parameters::AtomicPatternParameters;
use crate::{ClientKey, CompactCiphertextList, CompactPublicKey, ConfigBuilder, FheUint2};
use rayon::prelude::*;

View File

@@ -197,7 +197,7 @@ impl CudaCompactCiphertextListExpander {
// lwe_dimension and lwe_ciphertext_count at runtime, these and other attributes are pre-computed
// and stored directly in the CudaVec structure.
pub struct CudaFlattenedVecCompactCiphertextList {
d_flattened_vec: CudaVec<u64>,
pub(crate) d_flattened_vec: CudaVec<u64>,
num_lwe_per_compact_list: Vec<u32>,
pub(crate) data_info: Vec<DataKind>,
is_boolean: Vec<bool>,

View File

@@ -1,6 +1,6 @@
use crate::shortint::ciphertext::{Degree, NoiseLevel};
use crate::shortint::parameters::AtomicPatternKind;
use crate::shortint::{CarryModulus, MessageModulus};
use crate::shortint::{CarryModulus, MaxNoiseLevel, MessageModulus};
#[derive(Clone, Copy)]
pub struct CudaBlockInfo {
@@ -15,6 +15,16 @@ impl CudaBlockInfo {
pub fn carry_is_empty(&self) -> bool {
self.degree.get() < self.message_modulus.0
}
#[cfg_attr(any(feature = "noise-asserts", test), track_caller)]
pub fn set_noise_level(&mut self, noise_level: NoiseLevel, max_noise_level: MaxNoiseLevel) {
if cfg!(feature = "noise-asserts") || cfg!(test) {
max_noise_level.validate(noise_level).unwrap()
} else {
let _ = max_noise_level;
}
self.noise_level = noise_level;
}
}
#[derive(Clone)]

View File

@@ -28,6 +28,12 @@ impl RadixClientKey {
panic!("Only the standard atomic pattern is supported on GPU")
};
// assert_eq!(
// cks_params.encryption_key_choice,
// EncryptionKeyChoice::Big,
// "Compression is only compatible with ciphertext in post PBS dimension"
// );
assert_eq!(
self.parameters().encryption_key_choice(),
EncryptionKeyChoice::Big,

View File

@@ -1,4 +1,5 @@
use crate::core_crypto::gpu::entities::lwe_packing_keyswitch_key::CudaLwePackingKeyswitchKey;
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use crate::core_crypto::gpu::vec::CudaVec;
use crate::core_crypto::gpu::CudaStreams;
@@ -13,7 +14,7 @@ use crate::integer::gpu::ciphertext::CudaRadixCiphertext;
use crate::integer::gpu::server_key::CudaBootstrappingKey;
use crate::integer::gpu::{
compress_integer_radix_async, cuda_memcpy_async_gpu_to_gpu, decompress_integer_radix_async,
get_compression_size_on_gpu, get_decompression_size_on_gpu,
extract_glwe_async, get_compression_size_on_gpu, get_decompression_size_on_gpu,
};
use crate::shortint::ciphertext::{Degree, NoiseLevel};
use crate::shortint::parameters::AtomicPatternKind;
@@ -96,6 +97,37 @@ impl Clone for CudaPackedGlweCiphertextList {
}
}
impl CudaPackedGlweCiphertextList {
pub fn extract_glwe(
&self,
glwe_index: usize,
streams: &CudaStreams,
) -> CudaGlweCiphertextList<u64> {
let mut output_cuda_glwe_list = CudaGlweCiphertextList::new(
self.glwe_dimension,
self.polynomial_size,
GlweCiphertextCount(1),
self.ciphertext_modulus,
streams,
);
unsafe {
extract_glwe_async(
streams,
&mut output_cuda_glwe_list.0.d_vec,
&self.data,
glwe_index as u32,
self.storage_log_modulus.0 as u32,
self.glwe_dimension,
self.polynomial_size,
self.bodies_count as u32,
);
}
streams.synchronize();
output_cuda_glwe_list
}
}
impl CudaCompressionKey {
pub fn from_compression_key(compression_key: &CompressionKey, streams: &CudaStreams) -> Self {
Self {
@@ -258,13 +290,13 @@ impl CudaDecompressionKey {
end_block_index: usize,
streams: &CudaStreams,
) -> Result<CudaRadixCiphertext, crate::Error> {
if self.message_modulus.0 != self.carry_modulus.0 {
return Err(crate::Error::new(format!(
"Tried to unpack values from a list where message modulus \
({:?}) is != carry modulus ({:?}), this is not supported.",
self.message_modulus, self.carry_modulus,
)));
}
// if self.message_modulus.0 != self.carry_modulus.0 {
// return Err(crate::Error::new(format!(
// "Tried to unpack values from a list where message modulus \
// ({:?}) is != carry modulus ({:?}), this is not supported.",
// self.message_modulus, self.carry_modulus,
// )));
// }
if end_block_index >= packed_list.bodies_count {
return Err(crate::Error::new(format!(

View File

@@ -454,6 +454,43 @@ pub fn get_scalar_mul_integer_radix_kb_size_on_gpu(
size_tracker
}
#[allow(clippy::too_many_arguments)]
/// # Safety
///
/// - [CudaStreams::synchronize] __must__ be called after this function as soon as synchronization
/// is required
pub unsafe fn unchecked_small_scalar_mul_integer_async(
streams: &CudaStreams,
lwe_array: &mut CudaRadixCiphertext,
small_scalar: u64,
) {
assert_eq!(
streams.gpu_indexes[0],
lwe_array.d_blocks.0.d_vec.gpu_index(0),
"GPU error: all data should reside on the same GPU."
);
let mut lwe_array_degrees = lwe_array.info.blocks.iter().map(|b| b.degree.0).collect();
let mut lwe_array_noise_levels = lwe_array
.info
.blocks
.iter()
.map(|b| b.noise_level.0)
.collect();
let mut cuda_ffi_lwe_array = prepare_cuda_radix_ffi(
lwe_array,
&mut lwe_array_degrees,
&mut lwe_array_noise_levels,
);
cuda_small_scalar_multiplication_integer_64_inplace(
streams.ptr.as_ptr(),
streams.gpu_indexes_ptr(),
streams.len() as u32,
&mut cuda_ffi_lwe_array,
small_scalar,
);
}
#[allow(clippy::too_many_arguments)]
/// # Safety
///
@@ -727,6 +764,39 @@ pub fn get_decompression_size_on_gpu(
///
/// - [CudaStreams::synchronize] __must__ be called after this function as soon as synchronization
/// is required
pub unsafe fn extract_glwe_async<T: UnsignedInteger>(
streams: &CudaStreams,
lwe_array_out: &mut CudaVec<T>,
glwe_list: &CudaVec<T>,
glwe_index: u32,
storage_log_modulus: u32,
compression_glwe_dimension: GlweDimension,
compression_polynomial_size: PolynomialSize,
bodies_count: u32,
) {
assert_eq!(
streams.gpu_indexes[0],
lwe_array_out.gpu_index(0),
"GPU error: all data should reside on the same GPU."
);
assert_eq!(
streams.gpu_indexes[0],
glwe_list.gpu_index(0),
"GPU error: all data should reside on the same GPU."
);
cuda_integer_extract_glwe_64(
streams.ptr.as_ptr(),
streams.gpu_indexes_ptr(),
lwe_array_out.as_mut_c_ptr(0),
glwe_list.as_c_ptr(0),
glwe_index,
storage_log_modulus,
compression_polynomial_size.0 as u32,
compression_glwe_dimension.0 as u32,
bodies_count,
);
}
pub unsafe fn unchecked_add_integer_radix_assign_async(
streams: &CudaStreams,
radix_lwe_left: &mut CudaRadixCiphertext,

View File

@@ -60,6 +60,8 @@ mod vector_find;
#[cfg(test)]
mod tests_long_run;
#[cfg(test)]
mod tests_noise_distribution;
#[cfg(test)]
mod tests_signed;
#[cfg(test)]
mod tests_unsigned;
@@ -186,9 +188,12 @@ impl CudaServerKey {
PBSOrder::BootstrapKeyswitch => self.key_switching_key.output_key_lwe_size(),
};
let mut bits_per_block = self.message_modulus.0.ilog2();
if bits_per_block == 0 {
bits_per_block = 1;
}
let decomposer =
BlockDecomposer::with_block_count(scalar, self.message_modulus.0.ilog2(), num_blocks)
.iter_as::<u64>();
BlockDecomposer::with_block_count(scalar, bits_per_block, num_blocks).iter_as::<u64>();
let mut cpu_lwe_list = LweCiphertextList::new(
0,
lwe_size,

View File

@@ -113,6 +113,10 @@ impl CudaServerKey {
return;
}
if decomposed_scalar.is_empty() {
return;
}
match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
unchecked_scalar_mul_integer_radix_kb_async(

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
pub mod atomic_pattern;

View File

@@ -133,13 +133,13 @@ impl DecompressionKey {
packed: &CompressedCiphertextList,
index: usize,
) -> Result<Ciphertext, crate::Error> {
if packed.message_modulus.0 != packed.carry_modulus.0 {
return Err(crate::Error::new(format!(
"Tried to unpack values from a list where message modulus \
({:?}) is != carry modulus ({:?}), this is not supported.",
packed.message_modulus, packed.carry_modulus,
)));
}
// if packed.message_modulus.0 != packed.carry_modulus.0 {
// return Err(crate::Error::new(format!(
// "Tried to unpack values from a list where message modulus \
// ({:?}) is != carry modulus ({:?}), this is not supported.",
// packed.message_modulus, packed.carry_modulus,
// )));
// }
if index >= packed.count.0 {
return Err(crate::Error::new(format!(

View File

@@ -30,14 +30,13 @@ impl ClientKey {
EncryptionKeyChoice::Big,
"Compression is only compatible with ciphertext in post PBS dimension"
);
let mut engine = ShortintEngine::new();
let post_packing_ks_key = ShortintEngine::with_thread_local_mut(|engine| {
allocate_and_generate_new_binary_glwe_secret_key(
params.packing_ks_glwe_dimension,
params.packing_ks_polynomial_size,
&mut engine.secret_generator,
)
});
let post_packing_ks_key = allocate_and_generate_new_binary_glwe_secret_key(
params.packing_ks_glwe_dimension,
params.packing_ks_polynomial_size,
&mut engine.secret_generator,
);
CompressionPrivateKeys {
post_packing_ks_key,

View File

@@ -8,6 +8,7 @@ use crate::shortint::parameters::{
ShortintParameterSet, SupportedCompactPkeZkScheme,
};
use crate::shortint::{KeySwitchingKeyView, PaddingBit, ShortintEncoding};
use crate::Error;
use serde::{Deserialize, Serialize};
use tfhe_versionable::Versionize;
@@ -111,6 +112,9 @@ impl CompactPublicKeyEncryptionParameters {
padding_bit: PaddingBit::Yes,
}
}
pub fn set_carry_modulus(&mut self, new_carry_modulus: CarryModulus) {
self.carry_modulus = new_carry_modulus;
}
}
impl TryFrom<ShortintParameterSet> for CompactPublicKeyEncryptionParameters {

View File

@@ -313,6 +313,33 @@ impl PBSParameters {
Self::MultiBitPBS(params) => params.grouping_factor,
}
}
pub const fn log2_p_fail(&self) -> f64 {
match self {
Self::PBS(params) => params.log2_p_fail,
Self::MultiBitPBS(params) => params.log2_p_fail,
}
}
pub fn set_log2_p_fail(&mut self, new_p_fail: f64) {
match self {
Self::PBS(params) => params.log2_p_fail = new_p_fail,
Self::MultiBitPBS(params) => params.log2_p_fail = new_p_fail,
}
}
pub fn set_message_modulus(&mut self, new_message_modulus: MessageModulus) {
match self {
Self::PBS(params) => params.message_modulus = new_message_modulus,
Self::MultiBitPBS(params) => params.message_modulus = new_message_modulus,
}
}
pub fn set_carry_modulus(&mut self, new_carry_modulus: CarryModulus) {
match self {
Self::PBS(params) => params.carry_modulus = new_carry_modulus,
Self::MultiBitPBS(params) => params.carry_modulus = new_carry_modulus,
}
}
pub const fn is_pbs(&self) -> bool {
matches!(self, Self::PBS(_))

View File

@@ -314,38 +314,36 @@ impl CompactPublicKey {
let encryption_noise_distribution = self.parameters.encryption_noise_distribution;
let mut engine = ShortintEngine::new();
// No parallelism allowed
#[cfg(all(feature = "__wasm_api", not(feature = "parallel-wasm-api")))]
{
use crate::core_crypto::prelude::encrypt_lwe_compact_ciphertext_list_with_compact_public_key;
ShortintEngine::with_thread_local_mut(|engine| {
encrypt_lwe_compact_ciphertext_list_with_compact_public_key(
&self.key,
&mut ct_list,
&plaintext_list,
encryption_noise_distribution,
encryption_noise_distribution,
&mut engine.secret_generator,
&mut engine.encryption_generator,
);
});
encrypt_lwe_compact_ciphertext_list_with_compact_public_key(
&self.key,
&mut ct_list,
&plaintext_list,
encryption_noise_distribution,
encryption_noise_distribution,
&mut engine.secret_generator,
&mut engine.encryption_generator,
);
}
// Parallelism allowed
#[cfg(any(not(feature = "__wasm_api"), feature = "parallel-wasm-api"))]
{
use crate::core_crypto::prelude::par_encrypt_lwe_compact_ciphertext_list_with_compact_public_key;
ShortintEngine::with_thread_local_mut(|engine| {
par_encrypt_lwe_compact_ciphertext_list_with_compact_public_key(
&self.key,
&mut ct_list,
&plaintext_list,
encryption_noise_distribution,
encryption_noise_distribution,
&mut engine.secret_generator,
&mut engine.encryption_generator,
);
});
par_encrypt_lwe_compact_ciphertext_list_with_compact_public_key(
&self.key,
&mut ct_list,
&plaintext_list,
encryption_noise_distribution,
encryption_noise_distribution,
&mut engine.secret_generator,
&mut engine.encryption_generator,
);
}
let message_modulus = self.parameters.message_modulus;

View File

@@ -1,4 +1,5 @@
pub mod modulus_switch_compression;
pub mod noise_distribution;
pub mod noise_level;
pub mod parameterized_test;
pub mod parameterized_test_bivariate_pbs_compliant;

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,32 @@
pub mod atomic_pattern;
use crate::core_crypto::commons::dispersion::Variance;
/// Return the [`Variance`] if a [`Ciphertext`](`crate::shortint::Ciphertext`) with input variance
/// `input_variance` is multiplied by the provided `scalar`.
pub fn scalar_multiplication_variance(input_variance: Variance, scalar: u64) -> Variance {
let multiplicative_factor = scalar.checked_pow(2).unwrap();
Variance(input_variance.0 * multiplicative_factor as f64)
}
pub fn should_use_one_key_per_sample() -> bool {
static ONE_KEY_PER_SAMPLE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
*ONE_KEY_PER_SAMPLE.get_or_init(|| {
std::env::var("NOISE_MEASUREMENT_USE_PER_SAMPLE_KEY").map_or(false, |val| {
let val = val.parse::<u32>();
val.map_or(false, |val| val != 0)
})
})
}
pub fn should_run_long_pfail_tests() -> bool {
static LONG_PFAIL_TESTS: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
*LONG_PFAIL_TESTS.get_or_init(|| {
std::env::var("NOISE_MEASUREMENT_LONG_PFAIL_TESTS").map_or(false, |val| {
let val = val.parse::<u32>();
val.map_or(false, |val| val != 0)
})
})
}