mirror of
https://github.com/zama-ai/concrete.git
synced 2026-02-08 11:35:02 -05:00
fix(concrete_cuda): add latest fixes on the tests by Pedro
This commit is contained in:
@@ -21,6 +21,7 @@ ExternalProject_Add(
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND cargo build --release COMMAND cargo build --release
|
||||
BINARY_DIR ${CONCRETE_CPU_BINARY_DIR}
|
||||
BUILD_ALWAYS true
|
||||
INSTALL_COMMAND ""
|
||||
LOG_BUILD ON)
|
||||
|
||||
|
||||
@@ -55,6 +55,8 @@ protected:
|
||||
uint64_t *d_lwe_in_ct_array;
|
||||
uint64_t *d_lwe_out_ct_array;
|
||||
int8_t *bit_extract_buffer;
|
||||
int input_lwe_dimension;
|
||||
int output_lwe_dimension;
|
||||
|
||||
public:
|
||||
// Test arithmetic functions
|
||||
@@ -85,24 +87,21 @@ public:
|
||||
concrete_cpu_construct_concrete_csprng(
|
||||
csprng, Uint128{.little_endian_bytes = {*seed}});
|
||||
|
||||
int input_lwe_dimension = glwe_dimension * polynomial_size;
|
||||
int output_lwe_dimension = lwe_dimension;
|
||||
input_lwe_dimension = glwe_dimension * polynomial_size;
|
||||
output_lwe_dimension = lwe_dimension;
|
||||
// Generate the keys
|
||||
generate_lwe_secret_keys(&lwe_sk_in_array, input_lwe_dimension, csprng,
|
||||
REPETITIONS);
|
||||
generate_lwe_secret_keys(&lwe_sk_out_array, output_lwe_dimension, csprng,
|
||||
REPETITIONS);
|
||||
generate_lwe_secret_keys(&lwe_sk_in_array, input_lwe_dimension, csprng, REPETITIONS);
|
||||
generate_lwe_secret_keys(&lwe_sk_out_array, output_lwe_dimension, csprng, REPETITIONS);
|
||||
generate_lwe_keyswitch_keys(
|
||||
stream, gpu_index, &d_ksk_array, lwe_sk_in_array, lwe_sk_out_array,
|
||||
input_lwe_dimension, output_lwe_dimension, ks_level, ks_base_log,
|
||||
csprng, lwe_modular_variance, REPETITIONS);
|
||||
generate_lwe_bootstrap_keys(
|
||||
stream, gpu_index, &d_fourier_bsk_array, lwe_sk_out_array,
|
||||
lwe_sk_in_array, lwe_dimension, glwe_dimension, polynomial_size,
|
||||
lwe_sk_in_array, output_lwe_dimension, glwe_dimension, polynomial_size,
|
||||
pbs_level, pbs_base_log, csprng, glwe_modular_variance, REPETITIONS);
|
||||
plaintexts =
|
||||
generate_plaintexts(number_of_bits_of_message_including_padding, delta,
|
||||
number_of_inputs, REPETITIONS, SAMPLES);
|
||||
plaintexts = generate_plaintexts(
|
||||
number_of_bits_of_message_including_padding, delta, number_of_inputs, REPETITIONS, SAMPLES);
|
||||
|
||||
d_lwe_out_ct_array = (uint64_t *)cuda_malloc_async(
|
||||
(output_lwe_dimension + 1) * number_of_bits_to_extract *
|
||||
@@ -148,15 +147,15 @@ public:
|
||||
TEST_P(BitExtractionTestPrimitives_u64, bit_extraction) {
|
||||
void *v_stream = (void *)stream;
|
||||
int bsk_size = (glwe_dimension + 1) * (glwe_dimension + 1) * pbs_level *
|
||||
polynomial_size * (lwe_dimension + 1);
|
||||
polynomial_size * (output_lwe_dimension + 1);
|
||||
int ksk_size =
|
||||
ks_level * (lwe_dimension + 1) * glwe_dimension * polynomial_size;
|
||||
ks_level * input_lwe_dimension * (output_lwe_dimension + 1);
|
||||
for (uint r = 0; r < REPETITIONS; r++) {
|
||||
double *d_fourier_bsk = d_fourier_bsk_array + (ptrdiff_t)(bsk_size * r);
|
||||
uint64_t *d_ksk = d_ksk_array + (ptrdiff_t)(ksk_size * r);
|
||||
uint64_t *lwe_in_sk =
|
||||
lwe_sk_in_array + (ptrdiff_t)(glwe_dimension * polynomial_size * r);
|
||||
uint64_t *lwe_sk_out = lwe_sk_out_array + (ptrdiff_t)(r * lwe_dimension);
|
||||
lwe_sk_in_array + (ptrdiff_t)(input_lwe_dimension * r);
|
||||
uint64_t *lwe_sk_out = lwe_sk_out_array + (ptrdiff_t)(r * output_lwe_dimension);
|
||||
for (uint s = 0; s < SAMPLES; s++) {
|
||||
for (int i = 0; i < number_of_inputs; i++) {
|
||||
uint64_t plaintext = plaintexts[r * SAMPLES * number_of_inputs +
|
||||
@@ -164,15 +163,13 @@ TEST_P(BitExtractionTestPrimitives_u64, bit_extraction) {
|
||||
uint64_t *lwe_in_ct =
|
||||
lwe_in_ct_array +
|
||||
(ptrdiff_t)(
|
||||
(r * SAMPLES * number_of_inputs + s * number_of_inputs + i) *
|
||||
(glwe_dimension * polynomial_size + 1));
|
||||
i * (input_lwe_dimension + 1));
|
||||
concrete_cpu_encrypt_lwe_ciphertext_u64(
|
||||
lwe_in_sk, lwe_in_ct, plaintext, glwe_dimension * polynomial_size,
|
||||
lwe_in_sk, lwe_in_ct, plaintext, input_lwe_dimension,
|
||||
lwe_modular_variance, csprng, &CONCRETE_CSPRNG_VTABLE);
|
||||
}
|
||||
cuda_synchronize_stream(v_stream);
|
||||
cuda_memcpy_async_to_gpu(d_lwe_in_ct_array, lwe_in_ct_array,
|
||||
(glwe_dimension * polynomial_size + 1) *
|
||||
(input_lwe_dimension + 1) *
|
||||
number_of_inputs * sizeof(uint64_t),
|
||||
stream, gpu_index);
|
||||
|
||||
@@ -181,31 +178,29 @@ TEST_P(BitExtractionTestPrimitives_u64, bit_extraction) {
|
||||
stream, gpu_index, (void *)d_lwe_out_ct_array,
|
||||
(void *)d_lwe_in_ct_array, bit_extract_buffer, (void *)d_ksk,
|
||||
(void *)d_fourier_bsk, number_of_bits_to_extract, delta_log,
|
||||
glwe_dimension * polynomial_size, lwe_dimension, glwe_dimension,
|
||||
input_lwe_dimension, output_lwe_dimension, glwe_dimension,
|
||||
polynomial_size, pbs_base_log, pbs_level, ks_base_log, ks_level,
|
||||
number_of_inputs, cuda_get_max_shared_memory(gpu_index));
|
||||
|
||||
// Copy result back
|
||||
cuda_synchronize_stream(v_stream);
|
||||
cuda_memcpy_async_to_cpu(lwe_out_ct_array, d_lwe_out_ct_array,
|
||||
(lwe_dimension + 1) * number_of_bits_to_extract *
|
||||
(output_lwe_dimension + 1) * number_of_bits_to_extract *
|
||||
number_of_inputs * sizeof(uint64_t),
|
||||
stream, gpu_index);
|
||||
cuda_synchronize_stream(v_stream);
|
||||
|
||||
for (int j = 0; j < number_of_inputs; j++) {
|
||||
uint64_t *result_array =
|
||||
lwe_out_ct_array +
|
||||
(ptrdiff_t)(j * number_of_bits_to_extract * (lwe_dimension + 1));
|
||||
(ptrdiff_t)(j * number_of_bits_to_extract * (output_lwe_dimension + 1));
|
||||
uint64_t plaintext = plaintexts[r * SAMPLES * number_of_inputs +
|
||||
s * number_of_inputs + j];
|
||||
for (int i = 0; i < number_of_bits_to_extract; i++) {
|
||||
uint64_t *result_ct =
|
||||
result_array + (ptrdiff_t)((number_of_bits_to_extract - 1 - i) *
|
||||
(lwe_dimension + 1));
|
||||
(output_lwe_dimension + 1));
|
||||
uint64_t decrypted_message = 0;
|
||||
concrete_cpu_decrypt_lwe_ciphertext_u64(
|
||||
lwe_sk_out, result_ct, lwe_dimension, &decrypted_message);
|
||||
lwe_sk_out, result_ct, output_lwe_dimension, &decrypted_message);
|
||||
// Round after decryption
|
||||
uint64_t decrypted_rounded =
|
||||
closest_representable(decrypted_message, 1, 1);
|
||||
@@ -225,13 +220,11 @@ TEST_P(BitExtractionTestPrimitives_u64, bit_extraction) {
|
||||
bit_extract_params_u64 = ::testing::Values(
|
||||
// n, k, N, lwe_variance, glwe_variance, pbs_base_log, pbs_level,
|
||||
// ks_base_log, ks_level, number_of_message_bits,
|
||||
// number_of_bits_to_extract
|
||||
// number_of_bits_to_extract, number_of_inputs
|
||||
(BitExtractionTestParams){585, 1, 1024, 7.52316384526264e-37,
|
||||
7.52316384526264e-37, 10, 2, 4, 7, 5, 5,
|
||||
1}); //,
|
||||
// (BitExtractionTestParams){585, 1, 1024, 7.52316384526264e-37,
|
||||
// 7.52316384526264e-37, 10, 2, 4, 7, 5, 5,
|
||||
// 2});
|
||||
7.52316384526264e-37, 10, 2, 4, 7, 5, 5, 1},
|
||||
(BitExtractionTestParams){481, 1, 1024, 7.52316384526264e-37,
|
||||
7.52316384526264e-37, 4, 7, 1, 9, 5, 5, 1});
|
||||
|
||||
std::string
|
||||
printParamName(::testing::TestParamInfo<BitExtractionTestParams> p) {
|
||||
|
||||
@@ -81,17 +81,15 @@ public:
|
||||
csprng, Uint128{.little_endian_bytes = {*seed}});
|
||||
|
||||
// Generate the keys
|
||||
generate_lwe_secret_keys(&lwe_sk_in_array, lwe_dimension, csprng,
|
||||
REPETITIONS);
|
||||
generate_lwe_secret_keys(&lwe_sk_in_array, lwe_dimension, csprng, REPETITIONS);
|
||||
generate_lwe_secret_keys(&lwe_sk_out_array,
|
||||
glwe_dimension * polynomial_size, csprng,
|
||||
REPETITIONS);
|
||||
glwe_dimension * polynomial_size, csprng, REPETITIONS);
|
||||
generate_lwe_bootstrap_keys(
|
||||
stream, gpu_index, &d_fourier_bsk_array, lwe_sk_in_array,
|
||||
lwe_sk_out_array, lwe_dimension, glwe_dimension, polynomial_size,
|
||||
pbs_level, pbs_base_log, csprng, glwe_modular_variance, REPETITIONS);
|
||||
plaintexts = generate_plaintexts(payload_modulus, delta, number_of_inputs,
|
||||
REPETITIONS, SAMPLES);
|
||||
plaintexts = generate_plaintexts(payload_modulus, delta, number_of_inputs, REPETITIONS,
|
||||
SAMPLES);
|
||||
|
||||
// Create the LUT
|
||||
uint64_t *lut_pbs_identity = generate_identity_lut_pbs(
|
||||
@@ -227,6 +225,10 @@ TEST_P(BootstrapTestPrimitives_u64, amortized_bootstrap) {
|
||||
}
|
||||
|
||||
TEST_P(BootstrapTestPrimitives_u64, low_latency_bootstrap) {
|
||||
int number_of_sm = 0;
|
||||
cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 0);
|
||||
if(number_of_inputs > number_of_sm * 4 / (glwe_dimension + 1) / pbs_level)
|
||||
GTEST_SKIP() << "The Low Latency PBS does not support this configuration";
|
||||
uint64_t *lwe_ct_out_array =
|
||||
(uint64_t *)malloc((glwe_dimension * polynomial_size + 1) *
|
||||
number_of_inputs * sizeof(uint64_t));
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
#include <stdlib.h>
|
||||
|
||||
const unsigned REPETITIONS = 5;
|
||||
const unsigned SAMPLES = 100;
|
||||
const unsigned SAMPLES = 10;
|
||||
|
||||
typedef struct {
|
||||
int lwe_dimension;
|
||||
@@ -65,11 +65,13 @@ protected:
|
||||
uint64_t *d_lwe_ct_out_array;
|
||||
uint64_t *d_lut_vector;
|
||||
int8_t *wop_pbs_buffer;
|
||||
int input_lwe_dimension;
|
||||
|
||||
public:
|
||||
// Test arithmetic functions
|
||||
void SetUp() {
|
||||
stream = cuda_create_stream(0);
|
||||
void *v_stream = (void *)stream;
|
||||
|
||||
// TestParams
|
||||
lwe_dimension = (int)GetParam().lwe_dimension;
|
||||
@@ -98,16 +100,14 @@ public:
|
||||
concrete_cpu_construct_concrete_csprng(
|
||||
csprng, Uint128{.little_endian_bytes = {*seed}});
|
||||
|
||||
int input_lwe_dimension = glwe_dimension * polynomial_size;
|
||||
input_lwe_dimension = glwe_dimension * polynomial_size;
|
||||
// Generate the keys
|
||||
generate_lwe_secret_keys(&lwe_sk_in_array, input_lwe_dimension, csprng,
|
||||
REPETITIONS);
|
||||
generate_lwe_secret_keys(&lwe_sk_out_array, lwe_dimension, csprng,
|
||||
REPETITIONS);
|
||||
generate_lwe_keyswitch_keys(
|
||||
stream, gpu_index, &d_ksk_array, lwe_sk_in_array, lwe_sk_out_array,
|
||||
input_lwe_dimension, lwe_dimension, ks_level, ks_base_log, csprng,
|
||||
lwe_modular_variance, REPETITIONS);
|
||||
generate_lwe_secret_keys(&lwe_sk_in_array, input_lwe_dimension, csprng, REPETITIONS);
|
||||
generate_lwe_secret_keys(&lwe_sk_out_array, lwe_dimension, csprng, REPETITIONS);
|
||||
generate_lwe_keyswitch_keys(stream, gpu_index, &d_ksk_array,
|
||||
lwe_sk_in_array, lwe_sk_out_array,
|
||||
input_lwe_dimension, lwe_dimension, ks_level,
|
||||
ks_base_log, csprng, lwe_modular_variance, REPETITIONS);
|
||||
generate_lwe_bootstrap_keys(
|
||||
stream, gpu_index, &d_fourier_bsk_array, lwe_sk_out_array,
|
||||
lwe_sk_in_array, lwe_dimension, glwe_dimension, polynomial_size,
|
||||
@@ -138,7 +138,6 @@ public:
|
||||
cuda_memcpy_async_to_gpu(d_lut_vector, big_lut,
|
||||
lut_num * lut_size * sizeof(uint64_t), stream,
|
||||
gpu_index);
|
||||
free(big_lut);
|
||||
// Execute scratch
|
||||
scratch_cuda_wop_pbs_64(stream, gpu_index, &wop_pbs_buffer,
|
||||
(uint32_t *)&delta_log, &cbs_delta_log,
|
||||
@@ -155,6 +154,9 @@ public:
|
||||
(uint64_t *)malloc((input_lwe_dimension + 1) * tau * sizeof(uint64_t));
|
||||
lwe_out_ct_array =
|
||||
(uint64_t *)malloc((input_lwe_dimension + 1) * tau * sizeof(uint64_t));
|
||||
|
||||
cuda_synchronize_stream(v_stream);
|
||||
free(big_lut);
|
||||
}
|
||||
|
||||
void TearDown() {
|
||||
@@ -200,13 +202,11 @@ TEST_P(WopBootstrapTestPrimitives_u64, wop_pbs) {
|
||||
for (int t = 0; t < tau; t++) {
|
||||
uint64_t plaintext = plaintexts[r * SAMPLES * tau + s * tau + t];
|
||||
uint64_t *lwe_in_ct =
|
||||
lwe_in_ct_array + (ptrdiff_t)((r * SAMPLES * tau + s * tau + t) *
|
||||
(input_lwe_dimension + 1));
|
||||
lwe_in_ct_array + (ptrdiff_t)(t * (input_lwe_dimension + 1));
|
||||
concrete_cpu_encrypt_lwe_ciphertext_u64(
|
||||
lwe_sk_in, lwe_in_ct, plaintext, input_lwe_dimension,
|
||||
lwe_modular_variance, csprng, &CONCRETE_CSPRNG_VTABLE);
|
||||
}
|
||||
cuda_synchronize_stream(v_stream);
|
||||
cuda_memcpy_async_to_gpu(d_lwe_ct_in_array, lwe_in_ct_array,
|
||||
(input_lwe_dimension + 1) * tau *
|
||||
sizeof(uint64_t),
|
||||
@@ -223,22 +223,23 @@ TEST_P(WopBootstrapTestPrimitives_u64, wop_pbs) {
|
||||
cuda_get_max_shared_memory(gpu_index));
|
||||
|
||||
//// Copy result back
|
||||
// cuda_memcpy_async_to_cpu(lwe_out_ct_array, d_lwe_ct_out_array,
|
||||
//(input_lwe_dimension + 1) * tau * sizeof(uint64_t), stream, gpu_index);
|
||||
// cuda_synchronize_stream(v_stream);
|
||||
cuda_memcpy_async_to_cpu(lwe_out_ct_array, d_lwe_ct_out_array,
|
||||
(input_lwe_dimension + 1) * tau * sizeof(uint64_t), stream, gpu_index);
|
||||
cuda_synchronize_stream(v_stream);
|
||||
|
||||
// for (int i = 0; i < tau; i++) {
|
||||
// uint64_t *result_ct =
|
||||
// lwe_out_ct_array + (ptrdiff_t)(i * (input_lwe_dimension + 1));
|
||||
// uint64_t decrypted_message = 0;
|
||||
// concrete_cpu_decrypt_lwe_ciphertext_u64(
|
||||
// lwe_sk_in, result_ct, input_lwe_dimension, &decrypted_message);
|
||||
// // Round after decryption
|
||||
// uint64_t decrypted =
|
||||
// closest_representable(decrypted_message, 1, p) >> delta_log;
|
||||
// uint64_t expected = plaintext >> delta_log;
|
||||
// EXPECT_EQ(decrypted, expected);
|
||||
//}
|
||||
for (int i = 0; i < tau; i++) {
|
||||
uint64_t plaintext = plaintexts[r * SAMPLES * tau + s * tau + i];
|
||||
uint64_t *result_ct =
|
||||
lwe_out_ct_array + (ptrdiff_t)(i * (input_lwe_dimension + 1));
|
||||
uint64_t decrypted_message = 0;
|
||||
concrete_cpu_decrypt_lwe_ciphertext_u64(
|
||||
lwe_sk_in, result_ct, input_lwe_dimension, &decrypted_message);
|
||||
// Round after decryption
|
||||
uint64_t decrypted =
|
||||
closest_representable(decrypted_message, 1, p) >> delta_log;
|
||||
uint64_t expected = plaintext >> delta_log;
|
||||
EXPECT_EQ(decrypted, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -250,17 +251,17 @@ TEST_P(WopBootstrapTestPrimitives_u64, wop_pbs) {
|
||||
// n, k, N, lwe_variance, glwe_variance, pbs_base_log, pbs_level,
|
||||
// ks_base_log, ks_level, tau
|
||||
(WopBootstrapTestParams){481, 2, 512, 7.52316384526264e-37,
|
||||
7.52316384526264e-37, 4, 9, 1, 9, 4, 9, 6, 4,
|
||||
1} //,
|
||||
//(WopBootstrapTestParams){481, 2, 512, 7.52316384526264e-37,
|
||||
// 7.52316384526264e-37, 4, 9, 1, 9, 4, 9, 6, 4,
|
||||
// 2} //,
|
||||
//(WopBootstrapTestParams){481, 2, 1024, 7.52316384526264e-37,
|
||||
// 7.52316384526264e-37, 4,
|
||||
// 9, 1, 9, 4, 9, 6, 4, 1},
|
||||
//(WopBootstrapTestParams){481, 2, 1024, 7.52316384526264e-37,
|
||||
// 7.52316384526264e-37, 4,
|
||||
// 9, 1, 9, 4, 9, 6, 4, 2}
|
||||
7.52316384526264e-37, 4,
|
||||
9, 1, 9, 4, 9, 6, 4, 1}
|
||||
// (WopBootstrapTestParams){481, 2, 512, 7.52316384526264e-37,
|
||||
// 7.52316384526264e-37, 4, 9, 1, 9, 4, 9, 6, 4,
|
||||
// 2} ,
|
||||
// (WopBootstrapTestParams){481, 2, 1024, 7.52316384526264e-37,
|
||||
// 7.52316384526264e-37, 4,
|
||||
// 9, 1, 9, 4, 9, 6, 4, 1},
|
||||
// (WopBootstrapTestParams){481, 2, 1024, 7.52316384526264e-37,
|
||||
// 7.52316384526264e-37, 4,
|
||||
// 9, 1, 9, 4, 9, 6, 4, 2}
|
||||
);
|
||||
|
||||
std::string printParamName(::testing::TestParamInfo<WopBootstrapTestParams> p) {
|
||||
|
||||
@@ -12,8 +12,9 @@
|
||||
// The payload_modulus is the message modulus times the carry modulus
|
||||
// (so the total message modulus)
|
||||
uint64_t *generate_plaintexts(uint64_t payload_modulus, uint64_t delta,
|
||||
int number_of_inputs, const unsigned repetitions,
|
||||
const unsigned samples) {
|
||||
int number_of_inputs, const unsigned repetitions, const unsigned
|
||||
samples) {
|
||||
|
||||
uint64_t *plaintext_array = (uint64_t *)malloc(
|
||||
repetitions * samples * number_of_inputs * sizeof(uint64_t));
|
||||
std::random_device rd;
|
||||
@@ -120,8 +121,7 @@ uint64_t *generate_identity_lut_cmux_tree(int polynomial_size, int num_lut,
|
||||
// Generate repetitions LWE secret keys
|
||||
void generate_lwe_secret_keys(uint64_t **lwe_sk_array, int lwe_dimension,
|
||||
Csprng *csprng, const unsigned repetitions) {
|
||||
int lwe_sk_array_size = lwe_dimension * repetitions;
|
||||
*lwe_sk_array = (uint64_t *)malloc(lwe_sk_array_size * sizeof(uint64_t));
|
||||
*lwe_sk_array = (uint64_t *)malloc(lwe_dimension * repetitions * sizeof(uint64_t));
|
||||
int shift = 0;
|
||||
for (uint r = 0; r < repetitions; r++) {
|
||||
// Generate the lwe secret key for each repetition
|
||||
@@ -134,8 +134,7 @@ void generate_lwe_secret_keys(uint64_t **lwe_sk_array, int lwe_dimension,
|
||||
|
||||
// Generate repetitions GLWE secret keys
|
||||
void generate_glwe_secret_keys(uint64_t **glwe_sk_array, int glwe_dimension,
|
||||
int polynomial_size, Csprng *csprng,
|
||||
const unsigned repetitions) {
|
||||
int polynomial_size, Csprng *csprng, const unsigned repetitions) {
|
||||
int glwe_sk_array_size = glwe_dimension * polynomial_size * repetitions;
|
||||
*glwe_sk_array = (uint64_t *)malloc(glwe_sk_array_size * sizeof(uint64_t));
|
||||
int shift = 0;
|
||||
@@ -149,11 +148,13 @@ void generate_glwe_secret_keys(uint64_t **glwe_sk_array, int glwe_dimension,
|
||||
}
|
||||
|
||||
// Generate repetitions LWE bootstrap keys
|
||||
void generate_lwe_bootstrap_keys(
|
||||
cudaStream_t *stream, int gpu_index, double **d_fourier_bsk_array,
|
||||
uint64_t *lwe_sk_in_array, uint64_t *lwe_sk_out_array, int lwe_dimension,
|
||||
int glwe_dimension, int polynomial_size, int pbs_level, int pbs_base_log,
|
||||
Csprng *csprng, double variance, const unsigned repetitions) {
|
||||
void generate_lwe_bootstrap_keys(cudaStream_t *stream, int gpu_index,
|
||||
double **d_fourier_bsk_array,
|
||||
uint64_t *lwe_sk_in_array,
|
||||
uint64_t *lwe_sk_out_array, int lwe_dimension,
|
||||
int glwe_dimension, int polynomial_size,
|
||||
int pbs_level, int pbs_base_log,
|
||||
Csprng *csprng, double variance, const unsigned repetitions) {
|
||||
void *v_stream = (void *)stream;
|
||||
int bsk_size = (glwe_dimension + 1) * (glwe_dimension + 1) * pbs_level *
|
||||
polynomial_size * (lwe_dimension + 1);
|
||||
@@ -174,7 +175,6 @@ void generate_lwe_bootstrap_keys(
|
||||
lwe_sk_out_array + (ptrdiff_t)(shift_out), lwe_dimension,
|
||||
polynomial_size, glwe_dimension, pbs_level, pbs_base_log, variance,
|
||||
Parallelism(1), csprng, &CONCRETE_CSPRNG_VTABLE);
|
||||
cuda_synchronize_stream(v_stream);
|
||||
double *d_fourier_bsk = *d_fourier_bsk_array + (ptrdiff_t)(shift_bsk);
|
||||
uint64_t *bsk = bsk_array + (ptrdiff_t)(shift_bsk);
|
||||
cuda_synchronize_stream(v_stream);
|
||||
@@ -185,18 +185,16 @@ void generate_lwe_bootstrap_keys(
|
||||
shift_out += glwe_dimension * polynomial_size;
|
||||
shift_bsk += bsk_size;
|
||||
}
|
||||
cuda_synchronize_stream(v_stream);
|
||||
free(bsk_array);
|
||||
}
|
||||
|
||||
// Generate repetitions keyswitch keys
|
||||
void generate_lwe_keyswitch_keys(cudaStream_t *stream, int gpu_index,
|
||||
uint64_t **d_ksk_array,
|
||||
uint64_t *lwe_sk_in_array,
|
||||
uint64_t *lwe_sk_out_array,
|
||||
int input_lwe_dimension,
|
||||
int output_lwe_dimension, int ksk_level,
|
||||
int ksk_base_log, Csprng *csprng,
|
||||
double variance, const unsigned repetitions) {
|
||||
void generate_lwe_keyswitch_keys(
|
||||
cudaStream_t *stream, int gpu_index, uint64_t **d_ksk_array,
|
||||
uint64_t *lwe_sk_in_array, uint64_t *lwe_sk_out_array,
|
||||
int input_lwe_dimension, int output_lwe_dimension, int ksk_level,
|
||||
int ksk_base_log, Csprng *csprng, double variance, const unsigned repetitions) {
|
||||
|
||||
int ksk_size = ksk_level * (output_lwe_dimension + 1) * input_lwe_dimension;
|
||||
int ksk_array_size = ksk_size * repetitions;
|
||||
@@ -225,6 +223,7 @@ void generate_lwe_keyswitch_keys(cudaStream_t *stream, int gpu_index,
|
||||
shift_out += output_lwe_dimension;
|
||||
shift_ksk += ksk_size;
|
||||
}
|
||||
cuda_synchronize_stream(stream);
|
||||
free(ksk_array);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user