fix(concrete_cuda): fix cleartext mult and enhance linalg tests

This commit is contained in:
Agnes Leroy
2023-03-16 13:30:32 +01:00
committed by Agnès Leroy
parent fdd39f363f
commit 6efe5f3a3b
3 changed files with 168 additions and 109 deletions

View File

@@ -16,8 +16,8 @@ cleartext_multiplication(T *output, T *lwe_input, T *cleartext_input,
uint32_t input_lwe_dimension, uint32_t num_entries) {
int tid = threadIdx.x;
if (tid < num_entries) {
int index = blockIdx.x * blockDim.x + tid;
int index = blockIdx.x * blockDim.x + tid;
if (index < num_entries) {
int cleartext_index = index / (input_lwe_dimension + 1);
// Here we take advantage of the wrapping behaviour of uint
output[index] = lwe_input[index] * cleartext_input[cleartext_index];

View File

@@ -158,7 +158,7 @@ TEST_P(KeyswitchTestPrimitives_u64, keyswitch) {
// Compute the rounding bit
uint64_t rounding = (decrypted & rounding_bit) << 1;
uint64_t decoded = (decrypted + rounding) / delta;
ASSERT_EQ(decoded, plaintext / delta);
EXPECT_EQ(decoded, plaintext / delta);
}
}
}

View File

@@ -16,6 +16,7 @@ typedef struct {
double noise_variance;
int message_modulus;
int carry_modulus;
int number_of_inputs;
} LinearAlgebraTestParams;
class LinearAlgebraTestPrimitives_u64
@@ -25,6 +26,7 @@ protected:
double noise_variance;
int message_modulus;
int carry_modulus;
int number_of_inputs;
int payload_modulus;
uint64_t delta;
Csprng *csprng;
@@ -49,9 +51,10 @@ public:
// TestParams
lwe_dimension = (int)GetParam().lwe_dimension;
noise_variance = (int)GetParam().noise_variance;
noise_variance = (double)GetParam().noise_variance;
message_modulus = (int)GetParam().message_modulus;
carry_modulus = (int)GetParam().carry_modulus;
number_of_inputs = (int)GetParam().number_of_inputs;
payload_modulus = message_modulus * carry_modulus;
// Value of the shift we multiply our messages by
@@ -66,21 +69,27 @@ public:
// Generate the keys
generate_lwe_secret_keys(&lwe_sk_array, lwe_dimension, csprng, REPETITIONS);
plaintexts_1 =
generate_plaintexts(payload_modulus, delta, 1, REPETITIONS, SAMPLES);
plaintexts_2 =
generate_plaintexts(payload_modulus, delta, 1, REPETITIONS, SAMPLES);
plaintexts_1 = generate_plaintexts(payload_modulus, delta, number_of_inputs,
REPETITIONS, SAMPLES);
plaintexts_2 = generate_plaintexts(payload_modulus, delta, number_of_inputs,
REPETITIONS, SAMPLES);
d_lwe_in_1_ct = (uint64_t *)cuda_malloc_async(
(lwe_dimension + 1) * sizeof(uint64_t), stream, gpu_index);
number_of_inputs * (lwe_dimension + 1) * sizeof(uint64_t), stream,
gpu_index);
d_lwe_in_2_ct = (uint64_t *)cuda_malloc_async(
(lwe_dimension + 1) * sizeof(uint64_t), stream, gpu_index);
number_of_inputs * (lwe_dimension + 1) * sizeof(uint64_t), stream,
gpu_index);
d_lwe_out_ct = (uint64_t *)cuda_malloc_async(
(lwe_dimension + 1) * sizeof(uint64_t), stream, gpu_index);
number_of_inputs * (lwe_dimension + 1) * sizeof(uint64_t), stream,
gpu_index);
lwe_in_1_ct = (uint64_t *)malloc((lwe_dimension + 1) * sizeof(uint64_t));
lwe_in_2_ct = (uint64_t *)malloc((lwe_dimension + 1) * sizeof(uint64_t));
lwe_out_ct = (uint64_t *)malloc((lwe_dimension + 1) * sizeof(uint64_t));
lwe_in_1_ct = (uint64_t *)malloc(number_of_inputs * (lwe_dimension + 1) *
sizeof(uint64_t));
lwe_in_2_ct = (uint64_t *)malloc(number_of_inputs * (lwe_dimension + 1) *
sizeof(uint64_t));
lwe_out_ct = (uint64_t *)malloc(number_of_inputs * (lwe_dimension + 1) *
sizeof(uint64_t));
cuda_synchronize_stream(v_stream);
}
@@ -104,160 +113,210 @@ public:
};
TEST_P(LinearAlgebraTestPrimitives_u64, addition) {
void *v_stream = (void *)stream;
// Here execute the PBS
for (uint r = 0; r < REPETITIONS; r++) {
for (uint s = 0; s < SAMPLES; s++) {
uint64_t plaintext_1 = plaintexts_1[r * SAMPLES + s];
uint64_t plaintext_2 = plaintexts_2[r * SAMPLES + s];
uint64_t *lwe_sk = lwe_sk_array + (ptrdiff_t)(r * lwe_dimension);
concrete_cpu_encrypt_lwe_ciphertext_u64(lwe_sk, lwe_in_1_ct, plaintext_1,
lwe_dimension, noise_variance,
csprng, &CONCRETE_CSPRNG_VTABLE);
concrete_cpu_encrypt_lwe_ciphertext_u64(lwe_sk, lwe_in_2_ct, plaintext_2,
lwe_dimension, noise_variance,
csprng, &CONCRETE_CSPRNG_VTABLE);
cuda_synchronize_stream(v_stream);
for (int i = 0; i < number_of_inputs; i++) {
uint64_t plaintext_1 = plaintexts_1[r * SAMPLES * number_of_inputs +
s * number_of_inputs + i];
uint64_t plaintext_2 = plaintexts_2[r * SAMPLES * number_of_inputs +
s * number_of_inputs + i];
concrete_cpu_encrypt_lwe_ciphertext_u64(
lwe_sk, lwe_in_1_ct + i * (lwe_dimension + 1), plaintext_1,
lwe_dimension, noise_variance, csprng, &CONCRETE_CSPRNG_VTABLE);
concrete_cpu_encrypt_lwe_ciphertext_u64(
lwe_sk, lwe_in_2_ct + i * (lwe_dimension + 1), plaintext_2,
lwe_dimension, noise_variance, csprng, &CONCRETE_CSPRNG_VTABLE);
}
cuda_memcpy_async_to_gpu(d_lwe_in_1_ct, lwe_in_1_ct,
(lwe_dimension + 1) * sizeof(uint64_t), stream,
gpu_index);
number_of_inputs * (lwe_dimension + 1) *
sizeof(uint64_t),
stream, gpu_index);
cuda_memcpy_async_to_gpu(d_lwe_in_2_ct, lwe_in_2_ct,
(lwe_dimension + 1) * sizeof(uint64_t), stream,
gpu_index);
number_of_inputs * (lwe_dimension + 1) *
sizeof(uint64_t),
stream, gpu_index);
// Execute addition
cuda_add_lwe_ciphertext_vector_64(
stream, gpu_index, (void *)d_lwe_out_ct, (void *)d_lwe_in_1_ct,
(void *)d_lwe_in_2_ct, lwe_dimension, 1);
(void *)d_lwe_in_2_ct, lwe_dimension, number_of_inputs);
// Copy result back
cuda_memcpy_async_to_cpu(lwe_out_ct, d_lwe_out_ct,
(lwe_dimension + 1) * sizeof(uint64_t), stream,
gpu_index);
uint64_t decrypted = 0;
concrete_cpu_decrypt_lwe_ciphertext_u64(lwe_sk, lwe_out_ct, lwe_dimension,
&decrypted);
// The bit before the message
uint64_t rounding_bit = delta >> 1;
// Compute the rounding bit
uint64_t rounding = (decrypted & rounding_bit) << 1;
uint64_t decoded = (decrypted + rounding) / delta;
ASSERT_EQ(decoded, (plaintext_1 + plaintext_2) / delta);
cuda_synchronize_stream(v_stream);
number_of_inputs * (lwe_dimension + 1) *
sizeof(uint64_t),
stream, gpu_index);
for (int i = 0; i < number_of_inputs; i++) {
uint64_t plaintext_1 = plaintexts_1[r * SAMPLES * number_of_inputs +
s * number_of_inputs + i];
uint64_t plaintext_2 = plaintexts_2[r * SAMPLES * number_of_inputs +
s * number_of_inputs + i];
uint64_t decrypted = 0;
concrete_cpu_decrypt_lwe_ciphertext_u64(
lwe_sk, lwe_out_ct + i * (lwe_dimension + 1), lwe_dimension,
&decrypted);
// The bit before the message
uint64_t rounding_bit = delta >> 1;
// Compute the rounding bit
uint64_t rounding = (decrypted & rounding_bit) << 1;
uint64_t decoded = (decrypted + rounding) / delta;
EXPECT_EQ(decoded, (plaintext_1 + plaintext_2) / delta);
}
}
}
}
TEST_P(LinearAlgebraTestPrimitives_u64, plaintext_addition) {
void *v_stream = (void *)stream;
// Here execute the PBS
for (uint r = 0; r < REPETITIONS; r++) {
for (uint s = 0; s < SAMPLES; s++) {
uint64_t plaintext_1 = plaintexts_1[r * SAMPLES + s];
uint64_t plaintext_2 = plaintexts_2[r * SAMPLES + s];
uint64_t *lwe_sk = lwe_sk_array + (ptrdiff_t)(r * lwe_dimension);
concrete_cpu_encrypt_lwe_ciphertext_u64(lwe_sk, lwe_in_1_ct, plaintext_1,
lwe_dimension, noise_variance,
csprng, &CONCRETE_CSPRNG_VTABLE);
cuda_synchronize_stream(v_stream);
for (int i = 0; i < number_of_inputs; i++) {
uint64_t plaintext_1 = plaintexts_1[r * SAMPLES * number_of_inputs +
s * number_of_inputs + i];
concrete_cpu_encrypt_lwe_ciphertext_u64(
lwe_sk, lwe_in_1_ct + i * (lwe_dimension + 1), plaintext_1,
lwe_dimension, noise_variance, csprng, &CONCRETE_CSPRNG_VTABLE);
}
cuda_memcpy_async_to_gpu(d_lwe_in_1_ct, lwe_in_1_ct,
(lwe_dimension + 1) * sizeof(uint64_t), stream,
gpu_index);
cuda_memcpy_async_to_gpu(d_lwe_in_2_ct, &plaintext_2, sizeof(uint64_t),
number_of_inputs * (lwe_dimension + 1) *
sizeof(uint64_t),
stream, gpu_index);
cuda_memcpy_async_to_gpu(
d_lwe_in_2_ct,
&plaintexts_2[r * SAMPLES * number_of_inputs + s * number_of_inputs],
number_of_inputs * sizeof(uint64_t), stream, gpu_index);
// Execute addition
cuda_add_lwe_ciphertext_vector_plaintext_vector_64(
stream, gpu_index, (void *)d_lwe_out_ct, (void *)d_lwe_in_1_ct,
(void *)d_lwe_in_2_ct, lwe_dimension, 1);
(void *)d_lwe_in_2_ct, lwe_dimension, number_of_inputs);
// Copy result back
cuda_memcpy_async_to_cpu(lwe_out_ct, d_lwe_out_ct,
(lwe_dimension + 1) * sizeof(uint64_t), stream,
gpu_index);
uint64_t decrypted = 0;
concrete_cpu_decrypt_lwe_ciphertext_u64(lwe_sk, lwe_out_ct, lwe_dimension,
&decrypted);
// The bit before the message
uint64_t rounding_bit = delta >> 1;
// Compute the rounding bit
uint64_t rounding = (decrypted & rounding_bit) << 1;
uint64_t decoded = (decrypted + rounding) / delta;
ASSERT_EQ(decoded, (plaintext_1 + plaintext_2) / delta);
cuda_synchronize_stream(v_stream);
number_of_inputs * (lwe_dimension + 1) *
sizeof(uint64_t),
stream, gpu_index);
for (int i = 0; i < number_of_inputs; i++) {
uint64_t plaintext_1 = plaintexts_1[r * SAMPLES * number_of_inputs +
s * number_of_inputs + i];
uint64_t plaintext_2 = plaintexts_2[r * SAMPLES * number_of_inputs +
s * number_of_inputs + i];
uint64_t decrypted = 0;
concrete_cpu_decrypt_lwe_ciphertext_u64(
lwe_sk, lwe_out_ct + i * (lwe_dimension + 1), lwe_dimension,
&decrypted);
// The bit before the message
uint64_t rounding_bit = delta >> 1;
// Compute the rounding bit
uint64_t rounding = (decrypted & rounding_bit) << 1;
uint64_t decoded = (decrypted + rounding) / delta;
EXPECT_EQ(decoded, (plaintext_1 + plaintext_2) / delta);
}
}
}
}
TEST_P(LinearAlgebraTestPrimitives_u64, plaintext_multiplication) {
TEST_P(LinearAlgebraTestPrimitives_u64, cleartext_multiplication) {
void *v_stream = (void *)stream;
uint64_t delta_2 =
((uint64_t)(1) << 63) / (uint64_t)(payload_modulus * payload_modulus);
// Here execute the PBS
for (uint r = 0; r < REPETITIONS; r++) {
for (uint s = 0; s < SAMPLES; s++) {
uint64_t plaintext_1 = plaintexts_1[r * SAMPLES + s];
uint64_t plaintext_2 = plaintexts_2[r * SAMPLES + s];
uint64_t *lwe_sk = lwe_sk_array + (ptrdiff_t)(r * lwe_dimension);
concrete_cpu_encrypt_lwe_ciphertext_u64(lwe_sk, lwe_in_1_ct, plaintext_1,
lwe_dimension, noise_variance,
csprng, &CONCRETE_CSPRNG_VTABLE);
uint64_t *cleartext_array =
(uint64_t *)malloc(number_of_inputs * sizeof(uint64_t));
for (int i = 0; i < number_of_inputs; i++) {
uint64_t plaintext_1 = plaintexts_1[r * SAMPLES * number_of_inputs +
s * number_of_inputs + i] /
delta * delta_2;
uint64_t plaintext_2 = plaintexts_2[r * SAMPLES * number_of_inputs +
s * number_of_inputs + i];
cleartext_array[i] = plaintext_2 / delta;
concrete_cpu_encrypt_lwe_ciphertext_u64(
lwe_sk, lwe_in_1_ct + i * (lwe_dimension + 1), plaintext_1,
lwe_dimension, noise_variance, csprng, &CONCRETE_CSPRNG_VTABLE);
}
cuda_synchronize_stream(v_stream);
cuda_memcpy_async_to_gpu(d_lwe_in_1_ct, lwe_in_1_ct,
(lwe_dimension + 1) * sizeof(uint64_t), stream,
gpu_index);
cuda_memcpy_async_to_gpu(d_lwe_in_2_ct, &plaintext_1, sizeof(uint64_t),
number_of_inputs * (lwe_dimension + 1) *
sizeof(uint64_t),
stream, gpu_index);
// Execute addition
cuda_memcpy_async_to_gpu(d_lwe_in_2_ct, cleartext_array,
number_of_inputs * sizeof(uint64_t), stream,
gpu_index);
// Execute cleartext multiplication
cuda_mult_lwe_ciphertext_vector_cleartext_vector_64(
stream, gpu_index, (void *)d_lwe_out_ct, (void *)d_lwe_in_1_ct,
(void *)d_lwe_in_2_ct, lwe_dimension, 1);
(void *)d_lwe_in_2_ct, lwe_dimension, number_of_inputs);
// Copy result back
cuda_memcpy_async_to_cpu(lwe_out_ct, d_lwe_out_ct,
(lwe_dimension + 1) * sizeof(uint64_t), stream,
gpu_index);
uint64_t decrypted = 0;
concrete_cpu_decrypt_lwe_ciphertext_u64(lwe_sk, lwe_out_ct, lwe_dimension,
&decrypted);
// The bit before the message
uint64_t rounding_bit = delta >> 1;
// Compute the rounding bit
uint64_t rounding = (decrypted & rounding_bit) << 1;
uint64_t decoded = (decrypted + rounding) / delta;
ASSERT_EQ(decoded, (plaintext_1 * plaintext_2) / delta);
number_of_inputs * (lwe_dimension + 1) *
sizeof(uint64_t),
stream, gpu_index);
cuda_synchronize_stream(v_stream);
for (int i = 0; i < number_of_inputs; i++) {
uint64_t plaintext = plaintexts_1[r * SAMPLES * number_of_inputs +
s * number_of_inputs + i] /
delta * delta_2;
uint64_t cleartext = plaintexts_2[r * SAMPLES * number_of_inputs +
s * number_of_inputs + i] /
delta;
uint64_t decrypted = 0;
concrete_cpu_decrypt_lwe_ciphertext_u64(
lwe_sk, lwe_out_ct + i * (lwe_dimension + 1), lwe_dimension,
&decrypted);
// The bit before the message
uint64_t rounding_bit = delta_2 >> 1;
// Compute the rounding bit
uint64_t rounding = (decrypted & rounding_bit) << 1;
uint64_t decoded = (decrypted + rounding) / delta_2;
EXPECT_EQ(decoded, plaintext / delta_2 * cleartext);
}
}
}
}
TEST_P(LinearAlgebraTestPrimitives_u64, negate) {
void *v_stream = (void *)stream;
// Here execute the PBS
for (uint r = 0; r < REPETITIONS; r++) {
for (uint s = 0; s < SAMPLES; s++) {
uint64_t plaintext = plaintexts_1[r * SAMPLES + s];
uint64_t *lwe_sk = lwe_sk_array + (ptrdiff_t)(r * lwe_dimension);
concrete_cpu_encrypt_lwe_ciphertext_u64(lwe_sk, lwe_in_1_ct, plaintext,
lwe_dimension, noise_variance,
csprng, &CONCRETE_CSPRNG_VTABLE);
cuda_synchronize_stream(v_stream);
for (int i = 0; i < number_of_inputs; i++) {
uint64_t plaintext = plaintexts_1[r * SAMPLES * number_of_inputs +
s * number_of_inputs + i];
concrete_cpu_encrypt_lwe_ciphertext_u64(
lwe_sk, lwe_in_1_ct + i * (lwe_dimension + 1), plaintext,
lwe_dimension, noise_variance, csprng, &CONCRETE_CSPRNG_VTABLE);
}
cuda_memcpy_async_to_gpu(d_lwe_in_1_ct, lwe_in_1_ct,
(lwe_dimension + 1) * sizeof(uint64_t), stream,
gpu_index);
number_of_inputs * (lwe_dimension + 1) *
sizeof(uint64_t),
stream, gpu_index);
// Execute addition
cuda_negate_lwe_ciphertext_vector_64(
stream, gpu_index, (void *)d_lwe_out_ct, (void *)d_lwe_in_1_ct,
lwe_dimension, 1);
lwe_dimension, number_of_inputs);
// Copy result back
cuda_memcpy_async_to_cpu(lwe_out_ct, d_lwe_out_ct,
(lwe_dimension + 1) * sizeof(uint64_t), stream,
gpu_index);
uint64_t decrypted = 0;
concrete_cpu_decrypt_lwe_ciphertext_u64(lwe_sk, lwe_out_ct, lwe_dimension,
&decrypted);
// The bit before the message
uint64_t rounding_bit = delta >> 1;
// Compute the rounding bit
uint64_t rounding = (decrypted & rounding_bit) << 1;
uint64_t decoded = (decrypted + rounding) / delta;
ASSERT_EQ(decoded, -plaintext / delta);
cuda_synchronize_stream(v_stream);
number_of_inputs * (lwe_dimension + 1) *
sizeof(uint64_t),
stream, gpu_index);
for (int i = 0; i < number_of_inputs; i++) {
uint64_t plaintext = plaintexts_1[r * SAMPLES * number_of_inputs +
s * number_of_inputs + i];
uint64_t decrypted = 0;
concrete_cpu_decrypt_lwe_ciphertext_u64(
lwe_sk, lwe_out_ct + i * (lwe_dimension + 1), lwe_dimension,
&decrypted);
// The bit before the message
uint64_t rounding_bit = delta >> 1;
// Compute the rounding bit
uint64_t rounding = (decrypted & rounding_bit) << 1;
uint64_t decoded = (decrypted + rounding) / delta;
EXPECT_EQ(decoded, -plaintext / delta);
}
}
}
}
@@ -266,8 +325,8 @@ TEST_P(LinearAlgebraTestPrimitives_u64, negate) {
// tested. It executes each test for all pairs on phis X qs (Cartesian product)
::testing::internal::ParamGenerator<LinearAlgebraTestParams>
linear_algebra_params_u64 = ::testing::Values(
// n, lwe_std_dev, message_modulus, carry_modulus
(LinearAlgebraTestParams){600, 0.000007069849454709433, 4, 4});
// n, lwe_std_dev, message_modulus, carry_modulus, number_of_inputs
(LinearAlgebraTestParams){600, 7.52316384526264e-37, 2, 2, 10});
std::string
printParamName(::testing::TestParamInfo<LinearAlgebraTestParams> p) {