mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-11 15:48:20 -05:00
Compare commits
4 Commits
al/pfail_g
...
bb/signed_
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1b2eacf0ce | ||
|
|
d56e7e0b2a | ||
|
|
6d2206e5ac | ||
|
|
015b11d309 |
21
.github/workflows/benchmark_tfhe_fft.yml
vendored
21
.github/workflows/benchmark_tfhe_fft.yml
vendored
@@ -65,11 +65,11 @@ jobs:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
|
||||
- name: Run benchmarks
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make FFT128_SUPPORT=ON bench
|
||||
make bench_fft
|
||||
|
||||
- name: Parse results
|
||||
- name: Parse AVX512 results
|
||||
run: |
|
||||
python3 ./ci/fft_benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database concrete_fft \
|
||||
@@ -77,19 +77,8 @@ jobs:
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}"
|
||||
|
||||
rm -rf target/criterion benchmarks_parameters/
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make FFT128_SUPPORT=ON AVX512_SUPPORT=ON bench
|
||||
|
||||
- name: Parse AVX512 results
|
||||
run: |
|
||||
python3 ./ci/fft_benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--name-suffix avx512 \
|
||||
--append-results
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
|
||||
|
||||
@@ -357,16 +357,17 @@ void cleanup_cuda_integer_radix_scalar_mul(void *const *streams,
|
||||
|
||||
void scratch_cuda_integer_div_rem_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory);
|
||||
bool is_signed, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory);
|
||||
|
||||
void cuda_integer_div_rem_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *quotient, void *remainder, void const *numerator, void const *divisor,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
bool is_signed, int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t num_blocks_in_radix);
|
||||
|
||||
void cleanup_cuda_integer_div_rem(void *const *streams,
|
||||
|
||||
@@ -2264,7 +2264,7 @@ template <typename Torus> struct int_comparison_buffer {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Torus> struct int_div_rem_memory {
|
||||
template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
int_radix_params params;
|
||||
uint32_t active_gpu_count;
|
||||
|
||||
@@ -2501,9 +2501,10 @@ template <typename Torus> struct int_div_rem_memory {
|
||||
}
|
||||
}
|
||||
|
||||
int_div_rem_memory(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, int_radix_params params,
|
||||
uint32_t num_blocks, bool allocate_gpu_memory) {
|
||||
unsigned_int_div_rem_memory(cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int_radix_params params, uint32_t num_blocks,
|
||||
bool allocate_gpu_memory) {
|
||||
active_gpu_count = get_active_gpu_count(2 * num_blocks, gpu_count);
|
||||
|
||||
this->params = params;
|
||||
@@ -3060,4 +3061,174 @@ template <typename Torus> struct int_abs_buffer {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Torus> struct int_div_rem_memory {
|
||||
int_radix_params params;
|
||||
uint32_t active_gpu_count;
|
||||
bool is_signed;
|
||||
// memory objects for other operations
|
||||
unsigned_int_div_rem_memory<Torus> *unsigned_mem;
|
||||
int_abs_buffer<Torus> *abs_mem_1;
|
||||
int_abs_buffer<Torus> *abs_mem_2;
|
||||
int_sc_prop_memory<Torus> *scp_mem_1;
|
||||
int_sc_prop_memory<Torus> *scp_mem_2;
|
||||
int_cmux_buffer<Torus> *cmux_quotient_mem;
|
||||
int_cmux_buffer<Torus> *cmux_remainder_mem;
|
||||
|
||||
// lookup tables
|
||||
int_radix_lut<Torus> *compare_signed_bits_lut;
|
||||
|
||||
// sub streams
|
||||
cudaStream_t *sub_streams_1;
|
||||
cudaStream_t *sub_streams_2;
|
||||
cudaStream_t *sub_streams_3;
|
||||
|
||||
// temporary device buffers
|
||||
Torus *positive_numerator;
|
||||
Torus *positive_divisor;
|
||||
Torus *sign_bits_are_different;
|
||||
Torus *negated_quotient;
|
||||
Torus *negated_remainder;
|
||||
|
||||
int_div_rem_memory(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, int_radix_params params,
|
||||
bool is_signed, uint32_t num_blocks,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
this->active_gpu_count = get_active_gpu_count(2 * num_blocks, gpu_count);
|
||||
this->params = params;
|
||||
this->is_signed = is_signed;
|
||||
|
||||
unsigned_mem = new unsigned_int_div_rem_memory<Torus>(
|
||||
streams, gpu_indexes, gpu_count, params, num_blocks,
|
||||
allocate_gpu_memory);
|
||||
|
||||
if (is_signed) {
|
||||
uint32_t big_lwe_size = params.big_lwe_dimension + 1;
|
||||
Torus sign_bit_pos = 31 - __builtin_clz(params.message_modulus) - 1;
|
||||
|
||||
// init memory objects for other integer operations
|
||||
abs_mem_1 =
|
||||
new int_abs_buffer<Torus>(streams, gpu_indexes, gpu_count, params,
|
||||
num_blocks, allocate_gpu_memory);
|
||||
abs_mem_2 =
|
||||
new int_abs_buffer<Torus>(streams, gpu_indexes, gpu_count, params,
|
||||
num_blocks, allocate_gpu_memory);
|
||||
scp_mem_1 =
|
||||
new int_sc_prop_memory<Torus>(streams, gpu_indexes, gpu_count, params,
|
||||
num_blocks, allocate_gpu_memory);
|
||||
scp_mem_2 =
|
||||
new int_sc_prop_memory<Torus>(streams, gpu_indexes, gpu_count, params,
|
||||
num_blocks, allocate_gpu_memory);
|
||||
|
||||
std::function<uint64_t(uint64_t)> quotient_predicate_lut_f =
|
||||
[](uint64_t x) -> uint64_t { return x == 1; };
|
||||
std::function<uint64_t(uint64_t)> remainder_predicate_lut_f =
|
||||
[sign_bit_pos](uint64_t x) -> uint64_t {
|
||||
return (x >> sign_bit_pos) == 1;
|
||||
};
|
||||
|
||||
cmux_quotient_mem = new int_cmux_buffer<Torus>(
|
||||
streams, gpu_indexes, gpu_count, quotient_predicate_lut_f, params,
|
||||
num_blocks, allocate_gpu_memory);
|
||||
cmux_remainder_mem = new int_cmux_buffer<Torus>(
|
||||
streams, gpu_indexes, gpu_count, remainder_predicate_lut_f, params,
|
||||
num_blocks, allocate_gpu_memory);
|
||||
// init temporary memory buffers
|
||||
positive_numerator =
|
||||
(Torus *)cuda_malloc_async(big_lwe_size * num_blocks * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
positive_divisor =
|
||||
(Torus *)cuda_malloc_async(big_lwe_size * num_blocks * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
negated_quotient =
|
||||
(Torus *)cuda_malloc_async(big_lwe_size * num_blocks * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
negated_remainder =
|
||||
(Torus *)cuda_malloc_async(big_lwe_size * num_blocks * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
|
||||
// init boolean temporary buffers
|
||||
sign_bits_are_different = (Torus *)cuda_malloc_async(
|
||||
big_lwe_size * sizeof(Torus), streams[0], gpu_indexes[0]);
|
||||
|
||||
// init sub streams
|
||||
sub_streams_1 =
|
||||
(cudaStream_t *)malloc(active_gpu_count * sizeof(cudaStream_t));
|
||||
sub_streams_2 =
|
||||
(cudaStream_t *)malloc(active_gpu_count * sizeof(cudaStream_t));
|
||||
sub_streams_3 =
|
||||
(cudaStream_t *)malloc(active_gpu_count * sizeof(cudaStream_t));
|
||||
for (uint j = 0; j < active_gpu_count; j++) {
|
||||
sub_streams_1[j] = cuda_create_stream(gpu_indexes[j]);
|
||||
sub_streams_2[j] = cuda_create_stream(gpu_indexes[j]);
|
||||
sub_streams_3[j] = cuda_create_stream(gpu_indexes[j]);
|
||||
}
|
||||
|
||||
// init lookup tables
|
||||
// to extract and compare signed bits
|
||||
auto f_compare_extracted_signed_bits = [sign_bit_pos](Torus x,
|
||||
Torus y) -> Torus {
|
||||
Torus x_sign_bit = (x >> sign_bit_pos) & 1;
|
||||
Torus y_sign_bit = (y >> sign_bit_pos) & 1;
|
||||
return (Torus)(x_sign_bit != y_sign_bit);
|
||||
};
|
||||
|
||||
compare_signed_bits_lut = new int_radix_lut<Torus>(
|
||||
streams, gpu_indexes, gpu_count, params, 1, 1, true);
|
||||
|
||||
generate_device_accumulator_bivariate<Torus>(
|
||||
streams[0], gpu_indexes[0],
|
||||
compare_signed_bits_lut->get_lut(gpu_indexes[0], 0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, f_compare_extracted_signed_bits);
|
||||
compare_signed_bits_lut->broadcast_lut(streams, gpu_indexes,
|
||||
gpu_indexes[0]);
|
||||
}
|
||||
}
|
||||
|
||||
void release(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
unsigned_mem->release(streams, gpu_indexes, gpu_count);
|
||||
delete unsigned_mem;
|
||||
|
||||
if (is_signed) {
|
||||
// release objects for other integer operations
|
||||
abs_mem_1->release(streams, gpu_indexes, gpu_count);
|
||||
abs_mem_2->release(streams, gpu_indexes, gpu_count);
|
||||
scp_mem_1->release(streams, gpu_indexes, gpu_count);
|
||||
scp_mem_2->release(streams, gpu_indexes, gpu_count);
|
||||
cmux_quotient_mem->release(streams, gpu_indexes, gpu_count);
|
||||
cmux_remainder_mem->release(streams, gpu_indexes, gpu_count);
|
||||
|
||||
delete abs_mem_1;
|
||||
delete abs_mem_2;
|
||||
delete scp_mem_1;
|
||||
delete scp_mem_2;
|
||||
delete cmux_quotient_mem;
|
||||
delete cmux_remainder_mem;
|
||||
|
||||
// release lookup tables
|
||||
compare_signed_bits_lut->release(streams, gpu_indexes, gpu_count);
|
||||
delete compare_signed_bits_lut;
|
||||
|
||||
// release sub streams
|
||||
for (uint i = 0; i < active_gpu_count; i++) {
|
||||
cuda_destroy_stream(sub_streams_1[i], gpu_indexes[i]);
|
||||
cuda_destroy_stream(sub_streams_2[i], gpu_indexes[i]);
|
||||
cuda_destroy_stream(sub_streams_3[i], gpu_indexes[i]);
|
||||
}
|
||||
free(sub_streams_1);
|
||||
free(sub_streams_2);
|
||||
free(sub_streams_3);
|
||||
|
||||
// drop temporary buffers
|
||||
cuda_drop_async(positive_numerator, streams[0], gpu_indexes[0]);
|
||||
cuda_drop_async(positive_divisor, streams[0], gpu_indexes[0]);
|
||||
cuda_drop_async(sign_bits_are_different, streams[0], gpu_indexes[0]);
|
||||
cuda_drop_async(negated_quotient, streams[0], gpu_indexes[0]);
|
||||
cuda_drop_async(negated_remainder, streams[0], gpu_indexes[0]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#endif // CUDA_INTEGER_UTILITIES_H
|
||||
|
||||
@@ -2,11 +2,12 @@
|
||||
|
||||
void scratch_cuda_integer_div_rem_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory) {
|
||||
bool is_signed, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
@@ -14,7 +15,7 @@ void scratch_cuda_integer_div_rem_radix_ciphertext_kb_64(
|
||||
message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_integer_div_rem_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, is_signed,
|
||||
(int_div_rem_memory<uint64_t> **)mem_ptr, num_blocks, params,
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
@@ -22,7 +23,7 @@ void scratch_cuda_integer_div_rem_radix_ciphertext_kb_64(
|
||||
void cuda_integer_div_rem_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *quotient, void *remainder, void const *numerator, void const *divisor,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
bool is_signed, int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t num_blocks) {
|
||||
|
||||
auto mem = (int_div_rem_memory<uint64_t> *)mem_ptr;
|
||||
@@ -31,8 +32,8 @@ void cuda_integer_div_rem_radix_ciphertext_kb_64(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(quotient), static_cast<uint64_t *>(remainder),
|
||||
static_cast<const uint64_t *>(numerator),
|
||||
static_cast<const uint64_t *>(divisor), bsks, (uint64_t **)(ksks), mem,
|
||||
num_blocks);
|
||||
static_cast<const uint64_t *>(divisor), is_signed, bsks,
|
||||
(uint64_t **)(ksks), mem, num_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_div_rem(void *const *streams,
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
#include "crypto/keyswitch.cuh"
|
||||
#include "device.h"
|
||||
#include "integer/abs.cuh"
|
||||
#include "integer/comparison.cuh"
|
||||
#include "integer/integer.cuh"
|
||||
#include "integer/integer_utilities.h"
|
||||
@@ -161,22 +162,21 @@ template <typename Torus> struct lwe_ciphertext_list {
|
||||
template <typename Torus>
|
||||
__host__ void scratch_cuda_integer_div_rem_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, int_div_rem_memory<Torus> **mem_ptr,
|
||||
uint32_t gpu_count, bool is_signed, int_div_rem_memory<Torus> **mem_ptr,
|
||||
uint32_t num_blocks, int_radix_params params, bool allocate_gpu_memory) {
|
||||
|
||||
*mem_ptr = new int_div_rem_memory<Torus>(
|
||||
streams, gpu_indexes, gpu_count, params, num_blocks, allocate_gpu_memory);
|
||||
*mem_ptr =
|
||||
new int_div_rem_memory<Torus>(streams, gpu_indexes, gpu_count, params,
|
||||
is_signed, num_blocks, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *quotient,
|
||||
Torus *remainder, Torus const *numerator,
|
||||
Torus const *divisor, void *const *bsks,
|
||||
uint64_t *const *ksks,
|
||||
int_div_rem_memory<uint64_t> *mem_ptr,
|
||||
uint32_t num_blocks) {
|
||||
__host__ void host_unsigned_integer_div_rem_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *quotient, Torus *remainder,
|
||||
Torus const *numerator, Torus const *divisor, void *const *bsks,
|
||||
uint64_t *const *ksks, unsigned_int_div_rem_memory<uint64_t> *mem_ptr,
|
||||
uint32_t num_blocks) {
|
||||
|
||||
auto radix_params = mem_ptr->params;
|
||||
|
||||
@@ -594,4 +594,105 @@ __host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *quotient,
|
||||
Torus *remainder, Torus const *numerator,
|
||||
Torus const *divisor, bool is_signed,
|
||||
void *const *bsks, uint64_t *const *ksks,
|
||||
int_div_rem_memory<uint64_t> *int_mem_ptr,
|
||||
uint32_t num_blocks) {
|
||||
|
||||
if (is_signed) {
|
||||
auto radix_params = int_mem_ptr->params;
|
||||
uint32_t big_lwe_size = radix_params.big_lwe_dimension + 1;
|
||||
|
||||
// temporary memory
|
||||
lwe_ciphertext_list<Torus> positive_numerator(
|
||||
int_mem_ptr->positive_numerator, radix_params, num_blocks);
|
||||
lwe_ciphertext_list<Torus> positive_divisor(int_mem_ptr->positive_divisor,
|
||||
radix_params, num_blocks);
|
||||
|
||||
positive_numerator.clone_from((Torus *)numerator, 0, num_blocks - 1,
|
||||
streams[0], gpu_indexes[0]);
|
||||
positive_divisor.clone_from((Torus *)divisor, 0, num_blocks - 1, streams[0],
|
||||
gpu_indexes[0]);
|
||||
|
||||
for (uint j = 0; j < gpu_count; j++) {
|
||||
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
host_integer_abs_kb<Torus>(int_mem_ptr->sub_streams_1, gpu_indexes,
|
||||
gpu_count, positive_numerator.data, bsks, ksks,
|
||||
int_mem_ptr->abs_mem_1, true, num_blocks);
|
||||
host_integer_abs_kb<Torus>(int_mem_ptr->sub_streams_2, gpu_indexes,
|
||||
gpu_count, positive_divisor.data, bsks, ksks,
|
||||
int_mem_ptr->abs_mem_2, true, num_blocks);
|
||||
for (uint j = 0; j < int_mem_ptr->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(int_mem_ptr->sub_streams_1[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(int_mem_ptr->sub_streams_2[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
host_unsigned_integer_div_rem_kb<Torus>(
|
||||
int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count, quotient, remainder,
|
||||
positive_numerator.data, positive_divisor.data, bsks, ksks,
|
||||
int_mem_ptr->unsigned_mem, num_blocks);
|
||||
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
|
||||
int_mem_ptr->sign_bits_are_different,
|
||||
&numerator[big_lwe_size * (num_blocks - 1)],
|
||||
&divisor[big_lwe_size * (num_blocks - 1)], bsks, ksks, 1,
|
||||
int_mem_ptr->compare_signed_bits_lut,
|
||||
int_mem_ptr->compare_signed_bits_lut->params.message_modulus);
|
||||
|
||||
for (uint j = 0; j < int_mem_ptr->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(int_mem_ptr->sub_streams_1[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(int_mem_ptr->sub_streams_2[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
host_integer_radix_negation(
|
||||
int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count,
|
||||
int_mem_ptr->negated_quotient, quotient, radix_params.big_lwe_dimension,
|
||||
num_blocks, radix_params.message_modulus, radix_params.carry_modulus);
|
||||
|
||||
host_propagate_single_carry<Torus>(int_mem_ptr->sub_streams_1, gpu_indexes,
|
||||
gpu_count, int_mem_ptr->negated_quotient,
|
||||
nullptr, nullptr, int_mem_ptr->scp_mem_1,
|
||||
bsks, ksks, num_blocks);
|
||||
|
||||
host_integer_radix_negation(int_mem_ptr->sub_streams_2, gpu_indexes,
|
||||
gpu_count, int_mem_ptr->negated_remainder,
|
||||
remainder, radix_params.big_lwe_dimension,
|
||||
num_blocks, radix_params.message_modulus,
|
||||
radix_params.carry_modulus);
|
||||
|
||||
host_propagate_single_carry<Torus>(
|
||||
int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
|
||||
int_mem_ptr->negated_remainder, nullptr, nullptr,
|
||||
int_mem_ptr->scp_mem_2, bsks, ksks, num_blocks);
|
||||
|
||||
host_integer_radix_cmux_kb<Torus>(
|
||||
int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count, quotient,
|
||||
int_mem_ptr->sign_bits_are_different, int_mem_ptr->negated_quotient,
|
||||
quotient, int_mem_ptr->cmux_quotient_mem, bsks, ksks, num_blocks);
|
||||
|
||||
host_integer_radix_cmux_kb<Torus>(
|
||||
int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count, remainder,
|
||||
&numerator[big_lwe_size * (num_blocks - 1)],
|
||||
int_mem_ptr->negated_remainder, remainder,
|
||||
int_mem_ptr->cmux_remainder_mem, bsks, ksks, num_blocks);
|
||||
|
||||
for (uint j = 0; j < int_mem_ptr->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(int_mem_ptr->sub_streams_1[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(int_mem_ptr->sub_streams_2[j], gpu_indexes[j]);
|
||||
}
|
||||
} else {
|
||||
host_unsigned_integer_div_rem_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, quotient, remainder, numerator,
|
||||
divisor, bsks, ksks, int_mem_ptr->unsigned_mem, num_blocks);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // TFHE_RS_DIV_REM_CUH
|
||||
|
||||
@@ -896,6 +896,7 @@ extern "C" {
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
is_signed: bool,
|
||||
mem_ptr: *mut *mut i8,
|
||||
glwe_dimension: u32,
|
||||
polynomial_size: u32,
|
||||
@@ -922,6 +923,7 @@ extern "C" {
|
||||
remainder: *mut ffi::c_void,
|
||||
numerator: *const ffi::c_void,
|
||||
divisor: *const ffi::c_void,
|
||||
is_signed: bool,
|
||||
mem_ptr: *mut i8,
|
||||
bsks: *const *mut ffi::c_void,
|
||||
ksks: *const *mut ffi::c_void,
|
||||
|
||||
@@ -1,86 +0,0 @@
|
||||
use rayon::prelude::*;
|
||||
use tfhe::core_crypto::prelude::*;
|
||||
use tfhe::shortint::ciphertext::MaxNoiseLevel;
|
||||
use tfhe::shortint::engine::ShortintEngine;
|
||||
use tfhe::shortint::gen_keys;
|
||||
use tfhe::shortint::parameters::multi_bit::MultiBitPBSParameters;
|
||||
use tfhe::shortint::parameters::{CarryModulus, MessageModulus};
|
||||
|
||||
pub const PARAM_MULTI_BIT_GROUP_3_MESSAGE_4_CARRY_2_KS_PBS_GAUSSIAN_2M5_5: MultiBitPBSParameters =
|
||||
MultiBitPBSParameters {
|
||||
lwe_dimension: LweDimension(891),
|
||||
glwe_dimension: GlweDimension(1),
|
||||
polynomial_size: PolynomialSize(2048),
|
||||
lwe_noise_distribution: DynamicDistribution::new_gaussian_from_std_dev(StandardDev(
|
||||
1.3292631075564801e-06,
|
||||
)),
|
||||
glwe_noise_distribution: DynamicDistribution::new_gaussian_from_std_dev(StandardDev(
|
||||
2.845267479601915e-15,
|
||||
)),
|
||||
pbs_base_log: DecompositionBaseLog(21),
|
||||
pbs_level: DecompositionLevelCount(1),
|
||||
ks_base_log: DecompositionBaseLog(4),
|
||||
ks_level: DecompositionLevelCount(4),
|
||||
message_modulus: MessageModulus(8),
|
||||
carry_modulus: CarryModulus(4),
|
||||
max_noise_level: MaxNoiseLevel::new(5),
|
||||
log2_p_fail: -5.5,
|
||||
ciphertext_modulus: CiphertextModulus::new_native(),
|
||||
encryption_key_choice: EncryptionKeyChoice::Big,
|
||||
grouping_factor: LweBskGroupingFactor(3),
|
||||
deterministic_execution: false,
|
||||
};
|
||||
|
||||
pub fn main() {
|
||||
let fhe_params = PARAM_MULTI_BIT_GROUP_3_MESSAGE_4_CARRY_2_KS_PBS_GAUSSIAN_2M5_5;
|
||||
|
||||
let max_scalar_mul = fhe_params.max_noise_level.get() as u8;
|
||||
|
||||
let expected_fails = 500;
|
||||
|
||||
println!("running");
|
||||
// let num_pbs = (1 << 6) * expected_fails;
|
||||
let num_pbs = (2.0_f32.powf(5.5).ceil() as i32) * expected_fails;
|
||||
|
||||
let (cks, sks) = gen_keys(fhe_params);
|
||||
let lut = sks.generate_lookup_table(|x| x);
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
let actual_fails: u32 = (0..num_pbs)
|
||||
.into_par_iter()
|
||||
.map(|_i| {
|
||||
// let mut engine = ShortintEngine::new();
|
||||
// let cks = engine.new_client_key(fhe_params.into());
|
||||
// let sks = engine.new_server_key(&cks);
|
||||
|
||||
// let mut ct = engine.encrypt(&cks, 0);
|
||||
|
||||
// let lut = sks.generate_lookup_table(|x| x);
|
||||
|
||||
let mut ct = cks.encrypt(0);
|
||||
|
||||
// Get baseline noise after PBS
|
||||
sks.unchecked_scalar_mul_assign(&mut ct, max_scalar_mul);
|
||||
sks.apply_lookup_table_assign(&mut ct, &lut);
|
||||
|
||||
// // PBS with baseline noise as input
|
||||
// sks.unchecked_scalar_mul_assign(&mut ct, max_scalar_mul);
|
||||
// sks.apply_lookup_table_assign(&mut ct, &lut);
|
||||
|
||||
let dec = cks.decrypt(&ct);
|
||||
|
||||
if dec != 0 {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
}
|
||||
})
|
||||
.sum();
|
||||
|
||||
let elapsed = start.elapsed();
|
||||
|
||||
println!("Elapsed: {elapsed:?}");
|
||||
println!("Expected fails: {expected_fails}");
|
||||
println!("Got fails: {actual_fails}");
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,211 +0,0 @@
|
||||
use super::*;
|
||||
use crate::core_crypto::commons::noise_formulas::lwe_multi_bit_programmable_bootstrap::multi_bit_pbs_variance_132_bits_security_gaussian_gf_3;
|
||||
use crate::core_crypto::commons::noise_formulas::secure_noise::minimal_lwe_variance_for_132_bits_security_gaussian;
|
||||
use crate::core_crypto::commons::test_tools::{torus_modular_diff, variance};
|
||||
use rayon::prelude::*;
|
||||
|
||||
// This is 1 / 16 which is exactly representable in an f64 (even an f32)
|
||||
// 1 / 32 is too strict and fails the tests
|
||||
const RELATIVE_TOLERANCE: f64 = 0.0625;
|
||||
|
||||
const NB_TESTS: usize = 1000;
|
||||
|
||||
fn lwe_encrypt_multi_bit_pbs_group_3_decrypt_custom_mod<Scalar>(params: MultiBitTestParams<Scalar>)
|
||||
where
|
||||
Scalar: UnsignedTorus + Sync + Send + CastFrom<usize> + CastInto<usize>,
|
||||
{
|
||||
let input_lwe_dimension = params.input_lwe_dimension;
|
||||
let lwe_noise_distribution = params.lwe_noise_distribution;
|
||||
let glwe_noise_distribution = params.glwe_noise_distribution;
|
||||
let ciphertext_modulus = params.ciphertext_modulus;
|
||||
let message_modulus_log = params.message_modulus_log;
|
||||
let msg_modulus = Scalar::ONE.shl(message_modulus_log.0);
|
||||
let encoding_with_padding = get_encoding_with_padding(ciphertext_modulus);
|
||||
let glwe_dimension = params.glwe_dimension;
|
||||
let polynomial_size = params.polynomial_size;
|
||||
let pbs_decomposition_base_log = params.decomp_base_log;
|
||||
let pbs_decomposition_level_count = params.decomp_level_count;
|
||||
let grouping_factor = params.grouping_factor;
|
||||
assert_eq!(grouping_factor.0, 3);
|
||||
|
||||
let modulus_as_f64 = if ciphertext_modulus.is_native_modulus() {
|
||||
2.0f64.powi(Scalar::BITS as i32)
|
||||
} else {
|
||||
ciphertext_modulus.get_custom_modulus() as f64
|
||||
};
|
||||
|
||||
let expected_variance = multi_bit_pbs_variance_132_bits_security_gaussian_gf_3(
|
||||
input_lwe_dimension,
|
||||
glwe_dimension,
|
||||
polynomial_size,
|
||||
pbs_decomposition_base_log,
|
||||
pbs_decomposition_level_count,
|
||||
modulus_as_f64,
|
||||
);
|
||||
|
||||
let mut rsc = TestResources::new();
|
||||
|
||||
let f = |x: Scalar| x;
|
||||
|
||||
let delta: Scalar = encoding_with_padding / msg_modulus;
|
||||
let mut msg = msg_modulus;
|
||||
|
||||
let num_samples = NB_TESTS * <Scalar as CastInto<usize>>::cast_into(msg);
|
||||
let mut noise_samples = Vec::with_capacity(num_samples);
|
||||
|
||||
let input_lwe_secret_key = allocate_and_generate_new_binary_lwe_secret_key(
|
||||
input_lwe_dimension,
|
||||
&mut rsc.secret_random_generator,
|
||||
);
|
||||
|
||||
let output_glwe_secret_key = allocate_and_generate_new_binary_glwe_secret_key(
|
||||
glwe_dimension,
|
||||
polynomial_size,
|
||||
&mut rsc.secret_random_generator,
|
||||
);
|
||||
|
||||
let output_lwe_secret_key = output_glwe_secret_key.as_lwe_secret_key();
|
||||
|
||||
let fbsk = {
|
||||
let bsk = allocate_and_generate_new_lwe_multi_bit_bootstrap_key(
|
||||
&input_lwe_secret_key,
|
||||
&output_glwe_secret_key,
|
||||
pbs_decomposition_base_log,
|
||||
pbs_decomposition_level_count,
|
||||
grouping_factor,
|
||||
glwe_noise_distribution,
|
||||
ciphertext_modulus,
|
||||
&mut rsc.encryption_random_generator,
|
||||
);
|
||||
|
||||
assert!(check_encrypted_content_respects_mod(
|
||||
&*bsk,
|
||||
ciphertext_modulus
|
||||
));
|
||||
|
||||
let mut fbsk = FourierLweMultiBitBootstrapKey::new(
|
||||
bsk.input_lwe_dimension(),
|
||||
bsk.glwe_size(),
|
||||
bsk.polynomial_size(),
|
||||
bsk.decomposition_base_log(),
|
||||
bsk.decomposition_level_count(),
|
||||
bsk.grouping_factor(),
|
||||
);
|
||||
|
||||
par_convert_standard_lwe_multi_bit_bootstrap_key_to_fourier(&bsk, &mut fbsk);
|
||||
|
||||
fbsk
|
||||
};
|
||||
|
||||
let accumulator = generate_programmable_bootstrap_glwe_lut(
|
||||
polynomial_size,
|
||||
glwe_dimension.to_glwe_size(),
|
||||
msg_modulus.cast_into(),
|
||||
ciphertext_modulus,
|
||||
delta,
|
||||
f,
|
||||
);
|
||||
|
||||
assert!(check_encrypted_content_respects_mod(
|
||||
&accumulator,
|
||||
ciphertext_modulus
|
||||
));
|
||||
|
||||
while msg != Scalar::ZERO {
|
||||
msg = msg.wrapping_sub(Scalar::ONE);
|
||||
|
||||
let current_run_samples: Vec<_> = (0..NB_TESTS)
|
||||
.into_par_iter()
|
||||
.map(|_| {
|
||||
let mut rsc = TestResources::new();
|
||||
|
||||
let plaintext = Plaintext(msg * delta);
|
||||
|
||||
let lwe_ciphertext_in = allocate_and_encrypt_new_lwe_ciphertext(
|
||||
&input_lwe_secret_key,
|
||||
plaintext,
|
||||
lwe_noise_distribution,
|
||||
ciphertext_modulus,
|
||||
&mut rsc.encryption_random_generator,
|
||||
);
|
||||
|
||||
assert!(check_encrypted_content_respects_mod(
|
||||
&lwe_ciphertext_in,
|
||||
ciphertext_modulus
|
||||
));
|
||||
|
||||
let mut out_pbs_ct = LweCiphertext::new(
|
||||
Scalar::ZERO,
|
||||
output_lwe_secret_key.lwe_dimension().to_lwe_size(),
|
||||
ciphertext_modulus,
|
||||
);
|
||||
|
||||
multi_bit_programmable_bootstrap_lwe_ciphertext(
|
||||
&lwe_ciphertext_in,
|
||||
&mut out_pbs_ct,
|
||||
&accumulator,
|
||||
&fbsk,
|
||||
params.thread_count,
|
||||
true,
|
||||
);
|
||||
|
||||
assert!(check_encrypted_content_respects_mod(
|
||||
&out_pbs_ct,
|
||||
ciphertext_modulus
|
||||
));
|
||||
|
||||
let decrypted = decrypt_lwe_ciphertext(&output_lwe_secret_key, &out_pbs_ct);
|
||||
|
||||
let decoded = round_decode(decrypted.0, delta) % msg_modulus;
|
||||
|
||||
assert_eq!(decoded, f(msg));
|
||||
|
||||
torus_modular_diff(plaintext.0, decrypted.0, ciphertext_modulus)
|
||||
})
|
||||
.collect();
|
||||
|
||||
noise_samples.extend(current_run_samples);
|
||||
}
|
||||
|
||||
let measured_variance = variance(&noise_samples);
|
||||
|
||||
let minimal_variance = minimal_lwe_variance_for_132_bits_security_gaussian(
|
||||
fbsk.output_lwe_dimension(),
|
||||
if ciphertext_modulus.is_native_modulus() {
|
||||
2.0f64.powi(Scalar::BITS as i32)
|
||||
} else {
|
||||
ciphertext_modulus.get_custom_modulus() as f64
|
||||
},
|
||||
);
|
||||
|
||||
// Have a log even if it's a test to have a trace in no capture mode to eyeball variances
|
||||
println!("measured_variance={measured_variance:?}");
|
||||
println!("expected_variance={expected_variance:?}");
|
||||
println!("minimal_variance={minimal_variance:?}");
|
||||
|
||||
if measured_variance.0 < expected_variance.0 {
|
||||
// We are in the clear as long as we have at least the noise for security
|
||||
assert!(
|
||||
measured_variance.0 >= minimal_variance.0,
|
||||
"Found insecure variance after PBS\n\
|
||||
measure_variance={measured_variance:?}\n\
|
||||
minimal_variance={minimal_variance:?}"
|
||||
);
|
||||
} else {
|
||||
// Check we are not too far from the expected variance if we are bigger
|
||||
let var_abs_diff = (expected_variance.0 - measured_variance.0).abs();
|
||||
let tolerance_threshold = RELATIVE_TOLERANCE * expected_variance.0;
|
||||
|
||||
assert!(
|
||||
var_abs_diff < tolerance_threshold,
|
||||
"Absolute difference for variance: {var_abs_diff}, \
|
||||
tolerance threshold: {tolerance_threshold}, \
|
||||
got variance: {measured_variance:?}, \
|
||||
expected variance: {expected_variance:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
create_parametrized_test!(lwe_encrypt_multi_bit_pbs_group_3_decrypt_custom_mod {
|
||||
NOISE_TEST_PARAMS_MULTI_BIT_GROUP_3_4_BITS_NATIVE_U64_132_BITS_GAUSSIAN
|
||||
});
|
||||
@@ -2,9 +2,7 @@ use super::*;
|
||||
|
||||
mod lwe_encryption_noise;
|
||||
mod lwe_keyswitch_noise;
|
||||
mod lwe_multi_bit_programmable_bootstrapping_noise;
|
||||
mod lwe_programmable_bootstrapping_noise;
|
||||
mod pfail_multi_bit;
|
||||
|
||||
#[allow(clippy::excessive_precision)]
|
||||
pub const NOISE_TEST_PARAMS_4_BITS_NATIVE_U64_132_BITS_GAUSSIAN: ClassicTestParams<u64> =
|
||||
@@ -30,41 +28,3 @@ pub const NOISE_TEST_PARAMS_4_BITS_NATIVE_U64_132_BITS_GAUSSIAN: ClassicTestPara
|
||||
message_modulus_log: MessageModulusLog(4),
|
||||
ciphertext_modulus: CiphertextModulus::new_native(),
|
||||
};
|
||||
#[allow(clippy::excessive_precision)]
|
||||
pub const NOISE_TEST_PARAMS_MULTI_BIT_GROUP_3_4_BITS_NATIVE_U64_132_BITS_GAUSSIAN:
|
||||
MultiBitTestParams<u64> = MultiBitTestParams {
|
||||
input_lwe_dimension: LweDimension(837),
|
||||
lwe_noise_distribution: DynamicDistribution::new_gaussian_from_std_dev(StandardDev(
|
||||
3.3747142481837397e-06,
|
||||
)),
|
||||
decomp_base_log: DecompositionBaseLog(21),
|
||||
decomp_level_count: DecompositionLevelCount(1),
|
||||
glwe_dimension: GlweDimension(1),
|
||||
polynomial_size: PolynomialSize(2048),
|
||||
glwe_noise_distribution: DynamicDistribution::new_gaussian_from_std_dev(StandardDev(
|
||||
2.845267479601915e-15,
|
||||
)),
|
||||
message_modulus_log: MessageModulusLog(4),
|
||||
ciphertext_modulus: CiphertextModulus::new_native(),
|
||||
grouping_factor: LweBskGroupingFactor(3),
|
||||
thread_count: ThreadCount(8),
|
||||
};
|
||||
#[allow(clippy::excessive_precision)]
|
||||
pub const PFAIL_TEST_PARAMS_MULTI_BIT_GROUP_3_6_BITS_NATIVE_U64_132_BITS_GAUSSIAN:
|
||||
MultiBitTestParams<u64> = MultiBitTestParams {
|
||||
input_lwe_dimension: LweDimension(522),
|
||||
lwe_noise_distribution: DynamicDistribution::new_gaussian_from_std_dev(StandardDev(
|
||||
0.0007736698118352694,
|
||||
)),
|
||||
decomp_base_log: DecompositionBaseLog(21),
|
||||
decomp_level_count: DecompositionLevelCount(1),
|
||||
glwe_dimension: GlweDimension(1),
|
||||
polynomial_size: PolynomialSize(4096),
|
||||
glwe_noise_distribution: DynamicDistribution::new_gaussian_from_std_dev(StandardDev(
|
||||
0.0000000000000000002168404344971009,
|
||||
)),
|
||||
message_modulus_log: MessageModulusLog(6),
|
||||
ciphertext_modulus: CiphertextModulus::new_native(),
|
||||
grouping_factor: LweBskGroupingFactor(3),
|
||||
thread_count: ThreadCount(8),
|
||||
};
|
||||
|
||||
@@ -1,140 +0,0 @@
|
||||
use super::*;
|
||||
use crate::core_crypto::commons::noise_formulas::lwe_multi_bit_programmable_bootstrap::multi_bit_pbs_variance_132_bits_security_gaussian_gf_3;
|
||||
use crate::core_crypto::commons::noise_formulas::secure_noise::minimal_lwe_variance_for_132_bits_security_gaussian;
|
||||
use crate::core_crypto::commons::test_tools::{torus_modular_diff, variance};
|
||||
use rayon::prelude::*;
|
||||
|
||||
// This is 1 / 16 which is exactly representable in an f64 (even an f32)
|
||||
// 1 / 32 is too strict and fails the tests
|
||||
const RELATIVE_TOLERANCE: f64 = 0.0625;
|
||||
|
||||
const NB_TESTS: usize = 1000;
|
||||
fn pfail_multi_bit_pbs_group_3<Scalar>(params: MultiBitTestParams<Scalar>)
|
||||
where
|
||||
Scalar: UnsignedTorus + Sync + Send + CastFrom<usize> + CastInto<usize>,
|
||||
{
|
||||
let input_lwe_dimension = params.input_lwe_dimension;
|
||||
let lwe_noise_distribution = params.lwe_noise_distribution;
|
||||
let glwe_noise_distribution = params.glwe_noise_distribution;
|
||||
let ciphertext_modulus = params.ciphertext_modulus;
|
||||
let message_modulus_log = params.message_modulus_log;
|
||||
let msg_modulus = Scalar::ONE.shl(message_modulus_log.0);
|
||||
let encoding_with_padding = get_encoding_with_padding(ciphertext_modulus);
|
||||
let glwe_dimension = params.glwe_dimension;
|
||||
let polynomial_size = params.polynomial_size;
|
||||
let pbs_decomposition_base_log = params.decomp_base_log;
|
||||
let pbs_decomposition_level_count = params.decomp_level_count;
|
||||
let grouping_factor = params.grouping_factor;
|
||||
assert_eq!(grouping_factor.0, 3);
|
||||
|
||||
let modulus_as_f64 = if ciphertext_modulus.is_native_modulus() {
|
||||
2.0f64.powi(Scalar::BITS as i32)
|
||||
} else {
|
||||
ciphertext_modulus.get_custom_modulus() as f64
|
||||
};
|
||||
|
||||
let mut rsc = TestResources::new();
|
||||
|
||||
let f = |x: Scalar| x;
|
||||
|
||||
let delta: Scalar = encoding_with_padding / msg_modulus;
|
||||
let mut msg = msg_modulus;
|
||||
|
||||
let expected_fails = 100;
|
||||
let pfail = 2.0_f64.powi(-14);
|
||||
let num_samples = ((expected_fails as f64) / pfail) as usize;
|
||||
|
||||
let input_lwe_secret_key = allocate_and_generate_new_binary_lwe_secret_key(
|
||||
input_lwe_dimension,
|
||||
&mut rsc.secret_random_generator,
|
||||
);
|
||||
|
||||
let output_glwe_secret_key = allocate_and_generate_new_binary_glwe_secret_key(
|
||||
glwe_dimension,
|
||||
polynomial_size,
|
||||
&mut rsc.secret_random_generator,
|
||||
);
|
||||
|
||||
let output_lwe_secret_key = output_glwe_secret_key.as_lwe_secret_key();
|
||||
|
||||
let fbsk = {
|
||||
let bsk = allocate_and_generate_new_lwe_multi_bit_bootstrap_key(
|
||||
&input_lwe_secret_key,
|
||||
&output_glwe_secret_key,
|
||||
pbs_decomposition_base_log,
|
||||
pbs_decomposition_level_count,
|
||||
grouping_factor,
|
||||
glwe_noise_distribution,
|
||||
ciphertext_modulus,
|
||||
&mut rsc.encryption_random_generator,
|
||||
);
|
||||
|
||||
let mut fbsk = FourierLweMultiBitBootstrapKey::new(
|
||||
bsk.input_lwe_dimension(),
|
||||
bsk.glwe_size(),
|
||||
bsk.polynomial_size(),
|
||||
bsk.decomposition_base_log(),
|
||||
bsk.decomposition_level_count(),
|
||||
bsk.grouping_factor(),
|
||||
);
|
||||
|
||||
par_convert_standard_lwe_multi_bit_bootstrap_key_to_fourier(&bsk, &mut fbsk);
|
||||
|
||||
fbsk
|
||||
};
|
||||
|
||||
let accumulator = generate_programmable_bootstrap_glwe_lut(
|
||||
polynomial_size,
|
||||
glwe_dimension.to_glwe_size(),
|
||||
msg_modulus.cast_into(),
|
||||
ciphertext_modulus,
|
||||
delta,
|
||||
f,
|
||||
);
|
||||
|
||||
let msg = Scalar::ZERO;
|
||||
let fails: i32 = (0..num_samples).into_par_iter().map(|_| {
|
||||
let mut rsc = TestResources::new();
|
||||
|
||||
let plaintext = Plaintext(msg * delta);
|
||||
|
||||
let lwe_ciphertext_in = allocate_and_encrypt_new_lwe_ciphertext(
|
||||
&input_lwe_secret_key,
|
||||
plaintext,
|
||||
lwe_noise_distribution,
|
||||
ciphertext_modulus,
|
||||
&mut rsc.encryption_random_generator,
|
||||
);
|
||||
|
||||
let mut out_pbs_ct = LweCiphertext::new(
|
||||
Scalar::ZERO,
|
||||
output_lwe_secret_key.lwe_dimension().to_lwe_size(),
|
||||
ciphertext_modulus,
|
||||
);
|
||||
|
||||
multi_bit_programmable_bootstrap_lwe_ciphertext(
|
||||
&lwe_ciphertext_in,
|
||||
&mut out_pbs_ct,
|
||||
&accumulator,
|
||||
&fbsk,
|
||||
params.thread_count,
|
||||
true,
|
||||
);
|
||||
|
||||
let decrypted = decrypt_lwe_ciphertext(&output_lwe_secret_key, &out_pbs_ct);
|
||||
|
||||
let decoded = round_decode(decrypted.0, delta) % msg_modulus;
|
||||
|
||||
if decoded == f(msg) {
|
||||
0
|
||||
} else {
|
||||
1
|
||||
}
|
||||
}).sum();
|
||||
panic!("Got fails: {}, expected fails: {}", fails, expected_fails);
|
||||
}
|
||||
|
||||
|
||||
create_parametrized_test!(pfail_multi_bit_pbs_group_3 {
|
||||
PFAIL_TEST_PARAMS_MULTI_BIT_GROUP_3_6_BITS_NATIVE_U64_132_BITS_GAUSSIAN
|
||||
});
|
||||
@@ -1,63 +0,0 @@
|
||||
// This file was autogenerated, do not modify by hand.
|
||||
use crate::core_crypto::commons::dispersion::Variance;
|
||||
use crate::core_crypto::commons::parameters::*;
|
||||
|
||||
/// This formula is only valid if the proper noise distributions are used and
|
||||
/// if the keys used are encrypted using secure noise given by the
|
||||
/// [`minimal_glwe_variance`](`super::secure_noise`)
|
||||
/// and [`minimal_lwe_variance`](`super::secure_noise`) family of functions.
|
||||
pub fn multi_bit_pbs_variance_132_bits_security_gaussian_gf_3(
|
||||
input_lwe_dimension: LweDimension,
|
||||
output_glwe_dimension: GlweDimension,
|
||||
output_polynomial_size: PolynomialSize,
|
||||
decomposition_base_log: DecompositionBaseLog,
|
||||
decomposition_level_count: DecompositionLevelCount,
|
||||
modulus: f64,
|
||||
) -> Variance {
|
||||
Variance(multi_bit_pbs_variance_132_bits_security_gaussian_gf_3_impl(
|
||||
input_lwe_dimension.0 as f64,
|
||||
output_glwe_dimension.0 as f64,
|
||||
output_polynomial_size.0 as f64,
|
||||
2.0f64.powi(decomposition_base_log.0 as i32),
|
||||
decomposition_level_count.0 as f64,
|
||||
modulus,
|
||||
))
|
||||
}
|
||||
|
||||
/// This formula is only valid if the proper noise distributions are used and
|
||||
/// if the keys used are encrypted using secure noise given by the
|
||||
/// [`minimal_glwe_variance`](`super::secure_noise`)
|
||||
/// and [`minimal_lwe_variance`](`super::secure_noise`) family of functions.
|
||||
pub fn multi_bit_pbs_variance_132_bits_security_gaussian_gf_3_impl(
|
||||
input_lwe_dimension: f64,
|
||||
output_glwe_dimension: f64,
|
||||
output_polynomial_size: f64,
|
||||
decomposition_base: f64,
|
||||
decomposition_level_count: f64,
|
||||
modulus: f64,
|
||||
) -> f64 {
|
||||
(1_f64 / 3_f64)
|
||||
* input_lwe_dimension
|
||||
* (3.44492863492271e-32
|
||||
* decomposition_base.powf(2.0)
|
||||
* decomposition_level_count
|
||||
* output_polynomial_size.powf(2.0)
|
||||
* (output_glwe_dimension + 1.0)
|
||||
+ 8.0
|
||||
* decomposition_level_count
|
||||
* output_polynomial_size
|
||||
* ((4.0 - 2.88539008177793 * modulus.ln()).exp2()
|
||||
+ (-0.0497829131652661 * output_glwe_dimension * output_polynomial_size
|
||||
+ 5.31469187675068)
|
||||
.exp2())
|
||||
* ((1_f64 / 12.0) * decomposition_base.powf(2.0) + 0.166666666666667)
|
||||
* (output_glwe_dimension + 1.0)
|
||||
+ (1_f64 / 12.0) * modulus.powf(-2.0)
|
||||
+ (1_f64 / 2.0)
|
||||
* output_glwe_dimension
|
||||
* output_polynomial_size
|
||||
* (0.0208333333333333 * modulus.powf(-2.0)
|
||||
+ 0.0416666666666667
|
||||
* decomposition_base.powf(-2.0 * decomposition_level_count))
|
||||
+ (1_f64 / 24.0) * decomposition_base.powf(-2.0 * decomposition_level_count))
|
||||
}
|
||||
@@ -1,5 +1,4 @@
|
||||
// This file was autogenerated, do not modify by hand.
|
||||
pub mod lwe_keyswitch;
|
||||
pub mod lwe_multi_bit_programmable_bootstrap;
|
||||
pub mod lwe_programmable_bootstrap;
|
||||
pub mod secure_noise;
|
||||
|
||||
@@ -6,7 +6,6 @@ mod lwe_linear_algebra;
|
||||
mod lwe_multi_bit_programmable_bootstrapping;
|
||||
mod lwe_packing_keyswitch;
|
||||
mod lwe_programmable_bootstrapping;
|
||||
mod noise_distribution;
|
||||
|
||||
pub struct CudaPackingKeySwitchKeys<Scalar: UnsignedInteger> {
|
||||
pub lwe_sk: LweSecretKey<Vec<Scalar>>,
|
||||
|
||||
@@ -1,251 +0,0 @@
|
||||
use super::*;
|
||||
use crate::core_crypto::commons::noise_formulas::lwe_multi_bit_programmable_bootstrap::multi_bit_pbs_variance_132_bits_security_gaussian_gf_3;
|
||||
use crate::core_crypto::commons::noise_formulas::secure_noise::minimal_lwe_variance_for_132_bits_security_gaussian;
|
||||
use crate::core_crypto::commons::test_tools::{torus_modular_diff, variance};
|
||||
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
|
||||
use crate::core_crypto::gpu::lwe_bootstrap_key::CudaLweBootstrapKey;
|
||||
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
|
||||
use crate::core_crypto::gpu::lwe_multi_bit_bootstrap_key::CudaLweMultiBitBootstrapKey;
|
||||
use crate::core_crypto::gpu::vec::CudaVec;
|
||||
use crate::core_crypto::gpu::{cuda_multi_bit_programmable_bootstrap_lwe_ciphertext, CudaStreams};
|
||||
use itertools::Itertools;
|
||||
use rayon::prelude::*;
|
||||
|
||||
// This is 1 / 16 which is exactly representable in an f64 (even an f32)
|
||||
// 1 / 32 is too strict and fails the tests
|
||||
const RELATIVE_TOLERANCE: f64 = 0.0625;
|
||||
|
||||
const NB_TESTS: usize = 1000;
|
||||
|
||||
fn lwe_encrypt_multi_bit_pbs_decrypt_custom_mod<Scalar>(params: MultiBitTestParams<Scalar>)
|
||||
where
|
||||
Scalar: UnsignedTorus + Sync + Send + CastFrom<usize> + CastInto<usize>,
|
||||
{
|
||||
let input_lwe_dimension = params.input_lwe_dimension;
|
||||
let lwe_noise_distribution = params.lwe_noise_distribution;
|
||||
let glwe_noise_distribution = params.glwe_noise_distribution;
|
||||
let ciphertext_modulus = params.ciphertext_modulus;
|
||||
let message_modulus_log = params.message_modulus_log;
|
||||
let msg_modulus = Scalar::ONE.shl(message_modulus_log.0);
|
||||
let encoding_with_padding = get_encoding_with_padding(ciphertext_modulus);
|
||||
let glwe_dimension = params.glwe_dimension;
|
||||
let polynomial_size = params.polynomial_size;
|
||||
let pbs_decomposition_base_log = params.decomp_base_log;
|
||||
let pbs_decomposition_level_count = params.decomp_level_count;
|
||||
let grouping_factor = params.grouping_factor;
|
||||
let number_of_messages = 1;
|
||||
|
||||
let gpu_index = 0;
|
||||
let stream = CudaStreams::new_single_gpu(gpu_index);
|
||||
|
||||
let modulus_as_f64 = if ciphertext_modulus.is_native_modulus() {
|
||||
2.0f64.powi(Scalar::BITS as i32)
|
||||
} else {
|
||||
ciphertext_modulus.get_custom_modulus() as f64
|
||||
};
|
||||
|
||||
let expected_variance = multi_bit_pbs_variance_132_bits_security_gaussian_gf_3(
|
||||
input_lwe_dimension,
|
||||
glwe_dimension,
|
||||
polynomial_size,
|
||||
pbs_decomposition_base_log,
|
||||
pbs_decomposition_level_count,
|
||||
modulus_as_f64,
|
||||
);
|
||||
|
||||
let mut rsc = TestResources::new();
|
||||
|
||||
let f = |x: Scalar| x;
|
||||
|
||||
let delta: Scalar = encoding_with_padding / msg_modulus;
|
||||
let mut msg = msg_modulus;
|
||||
|
||||
let num_samples = NB_TESTS * <Scalar as CastInto<usize>>::cast_into(msg);
|
||||
let mut noise_samples = Vec::with_capacity(num_samples);
|
||||
|
||||
let input_lwe_secret_key = allocate_and_generate_new_binary_lwe_secret_key(
|
||||
input_lwe_dimension,
|
||||
&mut rsc.secret_random_generator,
|
||||
);
|
||||
|
||||
let output_glwe_secret_key = allocate_and_generate_new_binary_glwe_secret_key(
|
||||
glwe_dimension,
|
||||
polynomial_size,
|
||||
&mut rsc.secret_random_generator,
|
||||
);
|
||||
|
||||
let output_lwe_secret_key = output_glwe_secret_key.as_lwe_secret_key();
|
||||
let output_lwe_dimension = output_lwe_secret_key.lwe_dimension();
|
||||
|
||||
let accumulator = generate_programmable_bootstrap_glwe_lut(
|
||||
polynomial_size,
|
||||
glwe_dimension.to_glwe_size(),
|
||||
msg_modulus.cast_into(),
|
||||
ciphertext_modulus,
|
||||
delta,
|
||||
f,
|
||||
);
|
||||
|
||||
assert!(check_encrypted_content_respects_mod(
|
||||
&accumulator,
|
||||
ciphertext_modulus
|
||||
));
|
||||
|
||||
let mut bsk = LweMultiBitBootstrapKey::new(
|
||||
Scalar::ZERO,
|
||||
glwe_dimension.to_glwe_size(),
|
||||
polynomial_size,
|
||||
pbs_decomposition_base_log,
|
||||
pbs_decomposition_level_count,
|
||||
input_lwe_dimension,
|
||||
grouping_factor,
|
||||
ciphertext_modulus,
|
||||
);
|
||||
|
||||
par_generate_lwe_multi_bit_bootstrap_key(
|
||||
&input_lwe_secret_key,
|
||||
&output_glwe_secret_key,
|
||||
&mut bsk,
|
||||
glwe_noise_distribution,
|
||||
&mut rsc.encryption_random_generator,
|
||||
);
|
||||
|
||||
assert!(check_encrypted_content_respects_mod(
|
||||
&*bsk,
|
||||
ciphertext_modulus
|
||||
));
|
||||
|
||||
let d_bsk = CudaLweMultiBitBootstrapKey::from_lwe_multi_bit_bootstrap_key(&bsk, &stream);
|
||||
|
||||
while msg != Scalar::ZERO {
|
||||
msg = msg.wrapping_sub(Scalar::ONE);
|
||||
|
||||
let current_run_samples: Vec<_> = (0..NB_TESTS)
|
||||
.into_par_iter()
|
||||
.map(|_| {
|
||||
let mut rsc = TestResources::new();
|
||||
|
||||
let plaintext = Plaintext(msg * delta);
|
||||
|
||||
let lwe_ciphertext_in = allocate_and_encrypt_new_lwe_ciphertext(
|
||||
&input_lwe_secret_key,
|
||||
plaintext,
|
||||
lwe_noise_distribution,
|
||||
ciphertext_modulus,
|
||||
&mut rsc.encryption_random_generator,
|
||||
);
|
||||
|
||||
assert!(check_encrypted_content_respects_mod(
|
||||
&lwe_ciphertext_in,
|
||||
ciphertext_modulus
|
||||
));
|
||||
|
||||
let d_lwe_ciphertext_in =
|
||||
CudaLweCiphertextList::from_lwe_ciphertext(&lwe_ciphertext_in, &stream);
|
||||
let mut d_out_pbs_ct = CudaLweCiphertextList::new(
|
||||
output_lwe_dimension,
|
||||
LweCiphertextCount(1),
|
||||
ciphertext_modulus,
|
||||
&stream,
|
||||
);
|
||||
let d_accumulator =
|
||||
CudaGlweCiphertextList::from_glwe_ciphertext(&accumulator, &stream);
|
||||
|
||||
let mut test_vector_indexes: Vec<Scalar> = vec![Scalar::ZERO; number_of_messages];
|
||||
for (i, ind) in test_vector_indexes.iter_mut().enumerate() {
|
||||
*ind = <usize as CastInto<Scalar>>::cast_into(i);
|
||||
}
|
||||
|
||||
let mut d_test_vector_indexes =
|
||||
unsafe { CudaVec::<Scalar>::new_async(number_of_messages, &stream, 0) };
|
||||
unsafe {
|
||||
d_test_vector_indexes.copy_from_cpu_async(&test_vector_indexes, &stream, 0)
|
||||
};
|
||||
|
||||
let num_blocks = d_lwe_ciphertext_in.0.lwe_ciphertext_count.0;
|
||||
let lwe_indexes_usize: Vec<usize> = (0..num_blocks).collect_vec();
|
||||
let lwe_indexes = lwe_indexes_usize
|
||||
.iter()
|
||||
.map(|&x| <usize as CastInto<Scalar>>::cast_into(x))
|
||||
.collect_vec();
|
||||
let mut d_output_indexes =
|
||||
unsafe { CudaVec::<Scalar>::new_async(num_blocks, &stream, 0) };
|
||||
let mut d_input_indexes =
|
||||
unsafe { CudaVec::<Scalar>::new_async(num_blocks, &stream, 0) };
|
||||
unsafe {
|
||||
d_input_indexes.copy_from_cpu_async(&lwe_indexes, &stream, 0);
|
||||
d_output_indexes.copy_from_cpu_async(&lwe_indexes, &stream, 0);
|
||||
}
|
||||
|
||||
cuda_multi_bit_programmable_bootstrap_lwe_ciphertext(
|
||||
&d_lwe_ciphertext_in,
|
||||
&mut d_out_pbs_ct,
|
||||
&d_accumulator,
|
||||
&d_test_vector_indexes,
|
||||
&d_output_indexes,
|
||||
&d_input_indexes,
|
||||
&d_bsk,
|
||||
&stream,
|
||||
);
|
||||
|
||||
let out_pbs_ct = d_out_pbs_ct.into_lwe_ciphertext(&stream);
|
||||
assert!(check_encrypted_content_respects_mod(
|
||||
&out_pbs_ct,
|
||||
ciphertext_modulus
|
||||
));
|
||||
|
||||
let decrypted = decrypt_lwe_ciphertext(&output_lwe_secret_key, &out_pbs_ct);
|
||||
|
||||
let decoded = round_decode(decrypted.0, delta) % msg_modulus;
|
||||
|
||||
assert_eq!(decoded, f(msg));
|
||||
|
||||
torus_modular_diff(plaintext.0, decrypted.0, ciphertext_modulus)
|
||||
})
|
||||
.collect();
|
||||
|
||||
noise_samples.extend(current_run_samples);
|
||||
}
|
||||
|
||||
let measured_variance = variance(&noise_samples);
|
||||
|
||||
let minimal_variance = minimal_lwe_variance_for_132_bits_security_gaussian(
|
||||
bsk.output_lwe_dimension(),
|
||||
if ciphertext_modulus.is_native_modulus() {
|
||||
2.0f64.powi(Scalar::BITS as i32)
|
||||
} else {
|
||||
ciphertext_modulus.get_custom_modulus() as f64
|
||||
},
|
||||
);
|
||||
|
||||
// Have a log even if it's a test to have a trace in no capture mode to eyeball variances
|
||||
println!("measured_variance={measured_variance:?}");
|
||||
println!("expected_variance={expected_variance:?}");
|
||||
println!("minimal_variance={minimal_variance:?}");
|
||||
|
||||
if measured_variance.0 < expected_variance.0 {
|
||||
// We are in the clear as long as we have at least the noise for security
|
||||
assert!(
|
||||
measured_variance.0 >= minimal_variance.0,
|
||||
"Found insecure variance after PBS\n\
|
||||
measure_variance={measured_variance:?}\n\
|
||||
minimal_variance={minimal_variance:?}"
|
||||
);
|
||||
} else {
|
||||
// Check we are not too far from the expected variance if we are bigger
|
||||
let var_abs_diff = (expected_variance.0 - measured_variance.0).abs();
|
||||
let tolerance_threshold = RELATIVE_TOLERANCE * expected_variance.0;
|
||||
|
||||
assert!(
|
||||
var_abs_diff < tolerance_threshold,
|
||||
"Absolute difference for variance: {var_abs_diff}, \
|
||||
tolerance threshold: {tolerance_threshold}, \
|
||||
got variance: {measured_variance:?}, \
|
||||
expected variance: {expected_variance:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
create_parametrized_test!(lwe_encrypt_multi_bit_pbs_decrypt_custom_mod {
|
||||
NOISE_TEST_PARAMS_GPU_MULTI_BIT_GROUP_3_4_BITS_NATIVE_U64_132_BITS_GAUSSIAN
|
||||
});
|
||||
@@ -1,248 +0,0 @@
|
||||
use super::*;
|
||||
use crate::core_crypto::commons::noise_formulas::lwe_programmable_bootstrap::pbs_variance_132_bits_security_gaussian;
|
||||
use crate::core_crypto::commons::noise_formulas::secure_noise::minimal_lwe_variance_for_132_bits_security_gaussian;
|
||||
use crate::core_crypto::commons::test_tools::{torus_modular_diff, variance};
|
||||
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
|
||||
use crate::core_crypto::gpu::lwe_bootstrap_key::CudaLweBootstrapKey;
|
||||
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
|
||||
use crate::core_crypto::gpu::vec::CudaVec;
|
||||
use crate::core_crypto::gpu::{cuda_programmable_bootstrap_lwe_ciphertext, CudaStreams};
|
||||
use itertools::Itertools;
|
||||
use rayon::prelude::*;
|
||||
|
||||
// This is 1 / 16 which is exactly representable in an f64 (even an f32)
|
||||
// 1 / 32 is too strict and fails the tests
|
||||
const RELATIVE_TOLERANCE: f64 = 0.0625;
|
||||
|
||||
const NB_TESTS: usize = 1000;
|
||||
|
||||
fn lwe_encrypt_pbs_decrypt_custom_mod<Scalar>(params: ClassicTestParams<Scalar>)
|
||||
where
|
||||
Scalar: UnsignedTorus + Sync + Send + CastFrom<usize> + CastInto<usize>,
|
||||
{
|
||||
let input_lwe_dimension = params.lwe_dimension;
|
||||
let lwe_noise_distribution = params.lwe_noise_distribution;
|
||||
let glwe_noise_distribution = params.glwe_noise_distribution;
|
||||
let ciphertext_modulus = params.ciphertext_modulus;
|
||||
let message_modulus_log = params.message_modulus_log;
|
||||
let msg_modulus = Scalar::ONE.shl(message_modulus_log.0);
|
||||
let encoding_with_padding = get_encoding_with_padding(ciphertext_modulus);
|
||||
let glwe_dimension = params.glwe_dimension;
|
||||
let polynomial_size = params.polynomial_size;
|
||||
let pbs_decomposition_base_log = params.pbs_base_log;
|
||||
let pbs_decomposition_level_count = params.pbs_level;
|
||||
let number_of_messages = 1;
|
||||
|
||||
let gpu_index = 0;
|
||||
let stream = CudaStreams::new_single_gpu(gpu_index);
|
||||
|
||||
let modulus_as_f64 = if ciphertext_modulus.is_native_modulus() {
|
||||
2.0f64.powi(Scalar::BITS as i32)
|
||||
} else {
|
||||
ciphertext_modulus.get_custom_modulus() as f64
|
||||
};
|
||||
|
||||
let expected_variance = pbs_variance_132_bits_security_gaussian(
|
||||
input_lwe_dimension,
|
||||
glwe_dimension,
|
||||
polynomial_size,
|
||||
pbs_decomposition_base_log,
|
||||
pbs_decomposition_level_count,
|
||||
modulus_as_f64,
|
||||
);
|
||||
|
||||
let mut rsc = TestResources::new();
|
||||
|
||||
let f = |x: Scalar| x;
|
||||
|
||||
let delta: Scalar = encoding_with_padding / msg_modulus;
|
||||
let mut msg = msg_modulus;
|
||||
|
||||
let num_samples = NB_TESTS * <Scalar as CastInto<usize>>::cast_into(msg);
|
||||
let mut noise_samples = Vec::with_capacity(num_samples);
|
||||
|
||||
let input_lwe_secret_key = allocate_and_generate_new_binary_lwe_secret_key(
|
||||
input_lwe_dimension,
|
||||
&mut rsc.secret_random_generator,
|
||||
);
|
||||
|
||||
let output_glwe_secret_key = allocate_and_generate_new_binary_glwe_secret_key(
|
||||
glwe_dimension,
|
||||
polynomial_size,
|
||||
&mut rsc.secret_random_generator,
|
||||
);
|
||||
|
||||
let output_lwe_secret_key = output_glwe_secret_key.as_lwe_secret_key();
|
||||
let output_lwe_dimension = output_lwe_secret_key.lwe_dimension();
|
||||
|
||||
let accumulator = generate_programmable_bootstrap_glwe_lut(
|
||||
polynomial_size,
|
||||
glwe_dimension.to_glwe_size(),
|
||||
msg_modulus.cast_into(),
|
||||
ciphertext_modulus,
|
||||
delta,
|
||||
f,
|
||||
);
|
||||
|
||||
assert!(check_encrypted_content_respects_mod(
|
||||
&accumulator,
|
||||
ciphertext_modulus
|
||||
));
|
||||
|
||||
let mut bsk = LweBootstrapKey::new(
|
||||
Scalar::ZERO,
|
||||
glwe_dimension.to_glwe_size(),
|
||||
polynomial_size,
|
||||
pbs_decomposition_base_log,
|
||||
pbs_decomposition_level_count,
|
||||
input_lwe_dimension,
|
||||
ciphertext_modulus,
|
||||
);
|
||||
|
||||
par_generate_lwe_bootstrap_key(
|
||||
&input_lwe_secret_key,
|
||||
&output_glwe_secret_key,
|
||||
&mut bsk,
|
||||
glwe_noise_distribution,
|
||||
&mut rsc.encryption_random_generator,
|
||||
);
|
||||
|
||||
assert!(check_encrypted_content_respects_mod(
|
||||
&*bsk,
|
||||
ciphertext_modulus
|
||||
));
|
||||
|
||||
let d_bsk = CudaLweBootstrapKey::from_lwe_bootstrap_key(&bsk, &stream);
|
||||
while msg != Scalar::ZERO {
|
||||
msg = msg.wrapping_sub(Scalar::ONE);
|
||||
|
||||
let current_run_samples: Vec<_> = (0..NB_TESTS)
|
||||
.into_par_iter()
|
||||
.map(|_| {
|
||||
let mut rsc = TestResources::new();
|
||||
|
||||
let plaintext = Plaintext(msg * delta);
|
||||
|
||||
let lwe_ciphertext_in = allocate_and_encrypt_new_lwe_ciphertext(
|
||||
&input_lwe_secret_key,
|
||||
plaintext,
|
||||
lwe_noise_distribution,
|
||||
ciphertext_modulus,
|
||||
&mut rsc.encryption_random_generator,
|
||||
);
|
||||
|
||||
assert!(check_encrypted_content_respects_mod(
|
||||
&lwe_ciphertext_in,
|
||||
ciphertext_modulus
|
||||
));
|
||||
|
||||
let d_lwe_ciphertext_in =
|
||||
CudaLweCiphertextList::from_lwe_ciphertext(&lwe_ciphertext_in, &stream);
|
||||
let mut d_out_pbs_ct = CudaLweCiphertextList::new(
|
||||
output_lwe_dimension,
|
||||
LweCiphertextCount(1),
|
||||
ciphertext_modulus,
|
||||
&stream,
|
||||
);
|
||||
let d_accumulator =
|
||||
CudaGlweCiphertextList::from_glwe_ciphertext(&accumulator, &stream);
|
||||
|
||||
let mut test_vector_indexes: Vec<Scalar> = vec![Scalar::ZERO; number_of_messages];
|
||||
for (i, ind) in test_vector_indexes.iter_mut().enumerate() {
|
||||
*ind = <usize as CastInto<Scalar>>::cast_into(i);
|
||||
}
|
||||
|
||||
let mut d_test_vector_indexes =
|
||||
unsafe { CudaVec::<Scalar>::new_async(number_of_messages, &stream, 0) };
|
||||
unsafe {
|
||||
d_test_vector_indexes.copy_from_cpu_async(&test_vector_indexes, &stream, 0)
|
||||
};
|
||||
|
||||
let num_blocks = d_lwe_ciphertext_in.0.lwe_ciphertext_count.0;
|
||||
let lwe_indexes_usize: Vec<usize> = (0..num_blocks).collect_vec();
|
||||
let lwe_indexes = lwe_indexes_usize
|
||||
.iter()
|
||||
.map(|&x| <usize as CastInto<Scalar>>::cast_into(x))
|
||||
.collect_vec();
|
||||
let mut d_output_indexes =
|
||||
unsafe { CudaVec::<Scalar>::new_async(num_blocks, &stream, 0) };
|
||||
let mut d_input_indexes =
|
||||
unsafe { CudaVec::<Scalar>::new_async(num_blocks, &stream, 0) };
|
||||
unsafe {
|
||||
d_input_indexes.copy_from_cpu_async(&lwe_indexes, &stream, 0);
|
||||
d_output_indexes.copy_from_cpu_async(&lwe_indexes, &stream, 0);
|
||||
}
|
||||
|
||||
cuda_programmable_bootstrap_lwe_ciphertext(
|
||||
&d_lwe_ciphertext_in,
|
||||
&mut d_out_pbs_ct,
|
||||
&d_accumulator,
|
||||
&d_test_vector_indexes,
|
||||
&d_output_indexes,
|
||||
&d_input_indexes,
|
||||
LweCiphertextCount(num_blocks),
|
||||
&d_bsk,
|
||||
&stream,
|
||||
);
|
||||
|
||||
let out_pbs_ct = d_out_pbs_ct.into_lwe_ciphertext(&stream);
|
||||
assert!(check_encrypted_content_respects_mod(
|
||||
&out_pbs_ct,
|
||||
ciphertext_modulus
|
||||
));
|
||||
|
||||
let decrypted = decrypt_lwe_ciphertext(&output_lwe_secret_key, &out_pbs_ct);
|
||||
|
||||
let decoded = round_decode(decrypted.0, delta) % msg_modulus;
|
||||
|
||||
assert_eq!(decoded, f(msg));
|
||||
|
||||
torus_modular_diff(plaintext.0, decrypted.0, ciphertext_modulus)
|
||||
})
|
||||
.collect();
|
||||
|
||||
noise_samples.extend(current_run_samples);
|
||||
}
|
||||
|
||||
let measured_variance = variance(&noise_samples);
|
||||
|
||||
let minimal_variance = minimal_lwe_variance_for_132_bits_security_gaussian(
|
||||
bsk.output_lwe_dimension(),
|
||||
if ciphertext_modulus.is_native_modulus() {
|
||||
2.0f64.powi(Scalar::BITS as i32)
|
||||
} else {
|
||||
ciphertext_modulus.get_custom_modulus() as f64
|
||||
},
|
||||
);
|
||||
|
||||
// Have a log even if it's a test to have a trace in no capture mode to eyeball variances
|
||||
println!("measured_variance={measured_variance:?}");
|
||||
println!("expected_variance={expected_variance:?}");
|
||||
println!("minimal_variance={minimal_variance:?}");
|
||||
|
||||
if measured_variance.0 < expected_variance.0 {
|
||||
// We are in the clear as long as we have at least the noise for security
|
||||
assert!(
|
||||
measured_variance.0 >= minimal_variance.0,
|
||||
"Found insecure variance after PBS\n\
|
||||
measure_variance={measured_variance:?}\n\
|
||||
minimal_variance={minimal_variance:?}"
|
||||
);
|
||||
} else {
|
||||
// Check we are not too far from the expected variance if we are bigger
|
||||
let var_abs_diff = (expected_variance.0 - measured_variance.0).abs();
|
||||
let tolerance_threshold = RELATIVE_TOLERANCE * expected_variance.0;
|
||||
|
||||
assert!(
|
||||
var_abs_diff < tolerance_threshold,
|
||||
"Absolute difference for variance: {var_abs_diff}, \
|
||||
tolerance threshold: {tolerance_threshold}, \
|
||||
got variance: {measured_variance:?}, \
|
||||
expected variance: {expected_variance:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
create_parametrized_test!(lwe_encrypt_pbs_decrypt_custom_mod {
|
||||
NOISE_TEST_PARAMS_4_BITS_NATIVE_U64_132_BITS_GAUSSIAN
|
||||
});
|
||||
@@ -1,48 +0,0 @@
|
||||
use super::*;
|
||||
|
||||
mod lwe_multi_bit_programmable_bootstrapping_noise;
|
||||
mod lwe_programmable_bootstrapping_noise;
|
||||
|
||||
#[allow(clippy::excessive_precision)]
|
||||
pub const NOISE_TEST_PARAMS_4_BITS_NATIVE_U64_132_BITS_GAUSSIAN: ClassicTestParams<u64> =
|
||||
ClassicTestParams {
|
||||
lwe_dimension: LweDimension(841),
|
||||
glwe_dimension: GlweDimension(1),
|
||||
polynomial_size: PolynomialSize(2048),
|
||||
lwe_noise_distribution: DynamicDistribution::new_gaussian_from_std_dev(StandardDev(
|
||||
3.1496674685772435e-06,
|
||||
)),
|
||||
glwe_noise_distribution: DynamicDistribution::new_gaussian_from_std_dev(StandardDev(
|
||||
2.845267479601915e-15,
|
||||
)),
|
||||
pbs_base_log: DecompositionBaseLog(22),
|
||||
pbs_level: DecompositionLevelCount(1),
|
||||
ks_level: DecompositionLevelCount(5),
|
||||
ks_base_log: DecompositionBaseLog(3),
|
||||
pfks_level: DecompositionLevelCount(0),
|
||||
pfks_base_log: DecompositionBaseLog(0),
|
||||
pfks_noise_distribution: DynamicDistribution::new_gaussian_from_std_dev(StandardDev(0.0)),
|
||||
cbs_level: DecompositionLevelCount(0),
|
||||
cbs_base_log: DecompositionBaseLog(0),
|
||||
message_modulus_log: MessageModulusLog(4),
|
||||
ciphertext_modulus: CiphertextModulus::new_native(),
|
||||
};
|
||||
#[allow(clippy::excessive_precision)]
|
||||
pub const NOISE_TEST_PARAMS_GPU_MULTI_BIT_GROUP_3_4_BITS_NATIVE_U64_132_BITS_GAUSSIAN:
|
||||
MultiBitTestParams<u64> = MultiBitTestParams {
|
||||
input_lwe_dimension: LweDimension(837),
|
||||
lwe_noise_distribution: DynamicDistribution::new_gaussian_from_std_dev(StandardDev(
|
||||
3.3747142481837397e-06,
|
||||
)),
|
||||
decomp_base_log: DecompositionBaseLog(21),
|
||||
decomp_level_count: DecompositionLevelCount(1),
|
||||
glwe_dimension: GlweDimension(1),
|
||||
polynomial_size: PolynomialSize(2048),
|
||||
glwe_noise_distribution: DynamicDistribution::new_gaussian_from_std_dev(StandardDev(
|
||||
2.845267479601915e-15,
|
||||
)),
|
||||
message_modulus_log: MessageModulusLog(4),
|
||||
ciphertext_modulus: CiphertextModulus::new_native(),
|
||||
grouping_factor: LweBskGroupingFactor(3),
|
||||
thread_count: ThreadCount(1),
|
||||
};
|
||||
@@ -631,8 +631,8 @@ where
|
||||
#[cfg(feature = "gpu")]
|
||||
InternalServerKey::Cuda(cuda_key) => with_thread_local_cuda_streams(|streams| {
|
||||
let inner_result = cuda_key.key.key.div_rem(
|
||||
&self.ciphertext.on_gpu(),
|
||||
&rhs.ciphertext.on_gpu(),
|
||||
&*self.ciphertext.on_gpu(),
|
||||
&*rhs.ciphertext.on_gpu(),
|
||||
streams,
|
||||
);
|
||||
(
|
||||
@@ -977,7 +977,7 @@ generic_integer_impl_operation!(
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.div(&lhs.ciphertext.on_gpu(), &rhs.ciphertext.on_gpu(), streams);
|
||||
.div(&*lhs.ciphertext.on_gpu(), &*rhs.ciphertext.on_gpu(), streams);
|
||||
FheUint::new(inner_result, cuda_key.tag.clone())
|
||||
}),
|
||||
})
|
||||
@@ -1028,7 +1028,7 @@ generic_integer_impl_operation!(
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.rem(&lhs.ciphertext.on_gpu(), &rhs.ciphertext.on_gpu(), streams);
|
||||
.rem(&*lhs.ciphertext.on_gpu(), &*rhs.ciphertext.on_gpu(), streams);
|
||||
FheUint::new(inner_result, cuda_key.tag.clone())
|
||||
}),
|
||||
})
|
||||
|
||||
@@ -2514,15 +2514,13 @@ pub unsafe fn apply_bivariate_lut_kb_async<T: UnsignedInteger, B: Numeric>(
|
||||
///
|
||||
/// - [CudaStreams::synchronize] __must__ be called after this function as soon as synchronization
|
||||
/// is required
|
||||
pub unsafe fn unchecked_unsigned_div_rem_integer_radix_kb_assign_async<
|
||||
T: UnsignedInteger,
|
||||
B: Numeric,
|
||||
>(
|
||||
pub unsafe fn unchecked_div_rem_integer_radix_kb_assign_async<T: UnsignedInteger, B: Numeric>(
|
||||
streams: &CudaStreams,
|
||||
quotient: &mut CudaVec<T>,
|
||||
remainder: &mut CudaVec<T>,
|
||||
numerator: &CudaVec<T>,
|
||||
divisor: &CudaVec<T>,
|
||||
is_signed: bool,
|
||||
bootstrapping_key: &CudaVec<B>,
|
||||
keyswitch_key: &CudaVec<T>,
|
||||
message_modulus: MessageModulus,
|
||||
@@ -2544,6 +2542,7 @@ pub unsafe fn unchecked_unsigned_div_rem_integer_radix_kb_assign_async<
|
||||
streams.ptr.as_ptr(),
|
||||
streams.gpu_indexes.as_ptr(),
|
||||
streams.len() as u32,
|
||||
is_signed,
|
||||
std::ptr::addr_of_mut!(mem_ptr),
|
||||
glwe_dimension.0 as u32,
|
||||
polynomial_size.0 as u32,
|
||||
@@ -2568,6 +2567,7 @@ pub unsafe fn unchecked_unsigned_div_rem_integer_radix_kb_assign_async<
|
||||
remainder.as_mut_c_ptr(0),
|
||||
numerator.as_c_ptr(0),
|
||||
divisor.as_c_ptr(0),
|
||||
is_signed,
|
||||
mem_ptr,
|
||||
bootstrapping_key.ptr.as_ptr(),
|
||||
keyswitch_key.ptr.as_ptr(),
|
||||
|
||||
@@ -1,32 +1,35 @@
|
||||
use crate::core_crypto::gpu::CudaStreams;
|
||||
use crate::core_crypto::prelude::LweBskGroupingFactor;
|
||||
use crate::integer::gpu::ciphertext::{CudaIntegerRadixCiphertext, CudaUnsignedRadixCiphertext};
|
||||
use crate::integer::gpu::ciphertext::CudaIntegerRadixCiphertext;
|
||||
use crate::integer::gpu::server_key::{CudaBootstrappingKey, CudaServerKey};
|
||||
use crate::integer::gpu::{unchecked_unsigned_div_rem_integer_radix_kb_assign_async, PBSType};
|
||||
use crate::integer::gpu::{unchecked_div_rem_integer_radix_kb_assign_async, PBSType};
|
||||
|
||||
impl CudaServerKey {
|
||||
/// # Safety
|
||||
///
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn unsigned_unchecked_div_rem_assign_async(
|
||||
pub unsafe fn unchecked_div_rem_assign_async<T>(
|
||||
&self,
|
||||
quotient: &mut CudaUnsignedRadixCiphertext,
|
||||
remainder: &mut CudaUnsignedRadixCiphertext,
|
||||
numerator: &CudaUnsignedRadixCiphertext,
|
||||
divisor: &CudaUnsignedRadixCiphertext,
|
||||
quotient: &mut T,
|
||||
remainder: &mut T,
|
||||
numerator: &T,
|
||||
divisor: &T,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
// TODO add asserts from `unsigned_unchecked_div_rem_parallelized`
|
||||
) where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
// TODO add asserts from `unchecked_div_rem_parallelized`
|
||||
let num_blocks = divisor.as_ref().d_blocks.lwe_ciphertext_count().0 as u32;
|
||||
match &self.bootstrapping_key {
|
||||
CudaBootstrappingKey::Classic(d_bsk) => {
|
||||
unchecked_unsigned_div_rem_integer_radix_kb_assign_async(
|
||||
unchecked_div_rem_integer_radix_kb_assign_async(
|
||||
streams,
|
||||
&mut quotient.as_mut().d_blocks.0.d_vec,
|
||||
&mut remainder.as_mut().d_blocks.0.d_vec,
|
||||
&numerator.as_ref().d_blocks.0.d_vec,
|
||||
&divisor.as_ref().d_blocks.0.d_vec,
|
||||
T::IS_SIGNED,
|
||||
&d_bsk.d_vec,
|
||||
&self.key_switching_key.d_vec,
|
||||
self.message_modulus,
|
||||
@@ -49,12 +52,13 @@ impl CudaServerKey {
|
||||
);
|
||||
}
|
||||
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
|
||||
unchecked_unsigned_div_rem_integer_radix_kb_assign_async(
|
||||
unchecked_div_rem_integer_radix_kb_assign_async(
|
||||
streams,
|
||||
&mut quotient.as_mut().d_blocks.0.d_vec,
|
||||
&mut remainder.as_mut().d_blocks.0.d_vec,
|
||||
&numerator.as_ref().d_blocks.0.d_vec,
|
||||
&divisor.as_ref().d_blocks.0.d_vec,
|
||||
T::IS_SIGNED,
|
||||
&d_multibit_bsk.d_vec,
|
||||
&self.key_switching_key.d_vec,
|
||||
self.message_modulus,
|
||||
@@ -82,33 +86,31 @@ impl CudaServerKey {
|
||||
remainder.as_mut().info = remainder.as_ref().info.after_div_rem();
|
||||
}
|
||||
|
||||
pub fn unsigned_unchecked_div_rem_assign(
|
||||
pub fn unchecked_div_rem_assign<T>(
|
||||
&self,
|
||||
quotient: &mut CudaUnsignedRadixCiphertext,
|
||||
remainder: &mut CudaUnsignedRadixCiphertext,
|
||||
numerator: &CudaUnsignedRadixCiphertext,
|
||||
divisor: &CudaUnsignedRadixCiphertext,
|
||||
quotient: &mut T,
|
||||
remainder: &mut T,
|
||||
numerator: &T,
|
||||
divisor: &T,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
) where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
unsafe {
|
||||
self.unsigned_unchecked_div_rem_assign_async(
|
||||
quotient, remainder, numerator, divisor, streams,
|
||||
);
|
||||
self.unchecked_div_rem_assign_async(quotient, remainder, numerator, divisor, streams);
|
||||
}
|
||||
streams.synchronize();
|
||||
}
|
||||
|
||||
pub fn unchecked_div_rem(
|
||||
&self,
|
||||
numerator: &CudaUnsignedRadixCiphertext,
|
||||
divisor: &CudaUnsignedRadixCiphertext,
|
||||
streams: &CudaStreams,
|
||||
) -> (CudaUnsignedRadixCiphertext, CudaUnsignedRadixCiphertext) {
|
||||
pub fn unchecked_div_rem<T>(&self, numerator: &T, divisor: &T, streams: &CudaStreams) -> (T, T)
|
||||
where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let mut quotient = unsafe { numerator.duplicate_async(streams) };
|
||||
let mut remainder = unsafe { numerator.duplicate_async(streams) };
|
||||
|
||||
unsafe {
|
||||
self.unsigned_unchecked_div_rem_assign_async(
|
||||
self.unchecked_div_rem_assign_async(
|
||||
&mut quotient,
|
||||
&mut remainder,
|
||||
numerator,
|
||||
@@ -120,12 +122,10 @@ impl CudaServerKey {
|
||||
(quotient, remainder)
|
||||
}
|
||||
|
||||
pub fn div_rem(
|
||||
&self,
|
||||
numerator: &CudaUnsignedRadixCiphertext,
|
||||
divisor: &CudaUnsignedRadixCiphertext,
|
||||
streams: &CudaStreams,
|
||||
) -> (CudaUnsignedRadixCiphertext, CudaUnsignedRadixCiphertext) {
|
||||
pub fn div_rem<T>(&self, numerator: &T, divisor: &T, streams: &CudaStreams) -> (T, T)
|
||||
where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let mut tmp_numerator;
|
||||
let mut tmp_divisor;
|
||||
|
||||
@@ -158,14 +158,16 @@ impl CudaServerKey {
|
||||
self.unchecked_div_rem(numerator, divisor, streams)
|
||||
}
|
||||
|
||||
pub fn div_rem_assign(
|
||||
pub fn div_rem_assign<T>(
|
||||
&self,
|
||||
quotient: &mut CudaUnsignedRadixCiphertext,
|
||||
remainder: &mut CudaUnsignedRadixCiphertext,
|
||||
numerator: &CudaUnsignedRadixCiphertext,
|
||||
divisor: &CudaUnsignedRadixCiphertext,
|
||||
quotient: &mut T,
|
||||
remainder: &mut T,
|
||||
numerator: &T,
|
||||
divisor: &T,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
) where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let mut tmp_numerator;
|
||||
let mut tmp_divisor;
|
||||
|
||||
@@ -196,38 +198,30 @@ impl CudaServerKey {
|
||||
};
|
||||
|
||||
unsafe {
|
||||
self.unsigned_unchecked_div_rem_assign_async(
|
||||
quotient, remainder, numerator, divisor, streams,
|
||||
);
|
||||
self.unchecked_div_rem_assign_async(quotient, remainder, numerator, divisor, streams);
|
||||
}
|
||||
streams.synchronize();
|
||||
}
|
||||
|
||||
pub fn div(
|
||||
&self,
|
||||
numerator: &CudaUnsignedRadixCiphertext,
|
||||
divisor: &CudaUnsignedRadixCiphertext,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaUnsignedRadixCiphertext {
|
||||
pub fn div<T>(&self, numerator: &T, divisor: &T, streams: &CudaStreams) -> T
|
||||
where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let (q, _r) = self.div_rem(numerator, divisor, streams);
|
||||
q
|
||||
}
|
||||
|
||||
pub fn rem(
|
||||
&self,
|
||||
numerator: &CudaUnsignedRadixCiphertext,
|
||||
divisor: &CudaUnsignedRadixCiphertext,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaUnsignedRadixCiphertext {
|
||||
pub fn rem<T>(&self, numerator: &T, divisor: &T, streams: &CudaStreams) -> T
|
||||
where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let (_q, r) = self.div_rem(numerator, divisor, streams);
|
||||
r
|
||||
}
|
||||
pub fn div_assign(
|
||||
&self,
|
||||
numerator: &mut CudaUnsignedRadixCiphertext,
|
||||
divisor: &CudaUnsignedRadixCiphertext,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
pub fn div_assign<T>(&self, numerator: &mut T, divisor: &T, streams: &CudaStreams)
|
||||
where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let mut remainder = numerator.duplicate(streams);
|
||||
self.div_rem_assign(
|
||||
numerator,
|
||||
@@ -238,12 +232,10 @@ impl CudaServerKey {
|
||||
);
|
||||
}
|
||||
|
||||
pub fn rem_assign(
|
||||
&self,
|
||||
numerator: &mut CudaUnsignedRadixCiphertext,
|
||||
divisor: &CudaUnsignedRadixCiphertext,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
pub fn rem_assign<T>(&self, numerator: &mut T, divisor: &T, streams: &CudaStreams)
|
||||
where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
let mut quotient = numerator.duplicate(streams);
|
||||
self.div_rem_assign(
|
||||
&mut quotient,
|
||||
|
||||
@@ -734,7 +734,7 @@ impl CudaServerKey {
|
||||
T::from(CudaRadixCiphertext::new(trimmed_ct_list, trimmed_ct_info))
|
||||
}
|
||||
|
||||
pub fn generate_lookup_table<F>(&self, f: F) -> LookupTableOwned
|
||||
pub(crate) fn generate_lookup_table<F>(&self, f: F) -> LookupTableOwned
|
||||
where
|
||||
F: Fn(u64) -> u64,
|
||||
{
|
||||
@@ -826,7 +826,7 @@ impl CudaServerKey {
|
||||
///
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
pub unsafe fn apply_lookup_table_async(
|
||||
pub(crate) unsafe fn apply_lookup_table_async(
|
||||
&self,
|
||||
output: &mut CudaRadixCiphertext,
|
||||
input: &CudaRadixCiphertext,
|
||||
@@ -1005,7 +1005,7 @@ impl CudaServerKey {
|
||||
///
|
||||
/// - `stream` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until stream is synchronised
|
||||
pub(crate) unsafe fn apply_many_lookup_table_async(
|
||||
pub unsafe fn apply_many_lookup_table_async(
|
||||
&self,
|
||||
input: &CudaRadixCiphertext,
|
||||
lut: &ManyLookupTableOwned,
|
||||
|
||||
@@ -3,6 +3,7 @@ pub(crate) mod test_add;
|
||||
pub(crate) mod test_bitwise_op;
|
||||
pub(crate) mod test_cmux;
|
||||
pub(crate) mod test_comparison;
|
||||
pub(crate) mod test_div_mod;
|
||||
pub(crate) mod test_ilog2;
|
||||
pub(crate) mod test_mul;
|
||||
pub(crate) mod test_neg;
|
||||
@@ -523,3 +524,44 @@ where
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// for signed div_rem
|
||||
impl<'a, F>
|
||||
FunctionExecutor<
|
||||
(&'a SignedRadixCiphertext, &'a SignedRadixCiphertext),
|
||||
(SignedRadixCiphertext, SignedRadixCiphertext),
|
||||
> for GpuFunctionExecutor<F>
|
||||
where
|
||||
F: Fn(
|
||||
&CudaServerKey,
|
||||
&CudaSignedRadixCiphertext,
|
||||
&CudaSignedRadixCiphertext,
|
||||
&CudaStreams,
|
||||
) -> (CudaSignedRadixCiphertext, CudaSignedRadixCiphertext),
|
||||
{
|
||||
fn setup(&mut self, cks: &RadixClientKey, sks: Arc<ServerKey>) {
|
||||
self.setup_from_keys(cks, &sks);
|
||||
}
|
||||
|
||||
fn execute(
|
||||
&mut self,
|
||||
input: (&'a SignedRadixCiphertext, &'a SignedRadixCiphertext),
|
||||
) -> (SignedRadixCiphertext, SignedRadixCiphertext) {
|
||||
let context = self
|
||||
.context
|
||||
.as_ref()
|
||||
.expect("setup was not properly called");
|
||||
|
||||
let d_ctxt_1: CudaSignedRadixCiphertext =
|
||||
CudaSignedRadixCiphertext::from_signed_radix_ciphertext(input.0, &context.streams);
|
||||
let d_ctxt_2: CudaSignedRadixCiphertext =
|
||||
CudaSignedRadixCiphertext::from_signed_radix_ciphertext(input.1, &context.streams);
|
||||
|
||||
let d_res = (self.func)(&context.sks, &d_ctxt_1, &d_ctxt_2, &context.streams);
|
||||
|
||||
(
|
||||
d_res.0.to_signed_radix_ciphertext(&context.streams),
|
||||
d_res.1.to_signed_radix_ciphertext(&context.streams),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
use crate::integer::gpu::server_key::radix::tests_unsigned::{
|
||||
create_gpu_parametrized_test, GpuFunctionExecutor,
|
||||
};
|
||||
use crate::integer::gpu::CudaServerKey;
|
||||
use crate::integer::server_key::radix_parallel::tests_signed::test_div_rem::signed_unchecked_div_rem_test;
|
||||
use crate::shortint::parameters::*;
|
||||
|
||||
create_gpu_parametrized_test!(integer_signed_unchecked_div_rem);
|
||||
|
||||
fn integer_signed_unchecked_div_rem<P>(param: P)
|
||||
where
|
||||
P: Into<PBSParameters>,
|
||||
{
|
||||
let executor = GpuFunctionExecutor::new(&CudaServerKey::div_rem);
|
||||
signed_unchecked_div_rem_test(param, executor);
|
||||
}
|
||||
@@ -299,7 +299,7 @@ pub(crate) mod test {
|
||||
fn oprf_test_uniformity_ci_run_filter() {
|
||||
let sample_count: usize = 10_000;
|
||||
|
||||
let p_value_limit: f64 = 0.001;
|
||||
let p_value_limit: f64 = 0.000_01;
|
||||
|
||||
let random_bits_count = 3;
|
||||
|
||||
|
||||
@@ -265,7 +265,7 @@ pub(crate) mod test {
|
||||
fn oprf_test_uniformity_ci_run_filter() {
|
||||
let sample_count: usize = 100_000;
|
||||
|
||||
let p_value_limit: f64 = 0.001;
|
||||
let p_value_limit: f64 = 0.000_01;
|
||||
|
||||
use crate::shortint::gen_keys;
|
||||
use crate::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
|
||||
|
||||
Reference in New Issue
Block a user