diff --git a/include/bootstrap.h b/include/bootstrap.h index d56cd9d50..6177fdf65 100644 --- a/include/bootstrap.h +++ b/include/bootstrap.h @@ -22,7 +22,8 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32( void *test_vector_indexes, void *lwe_in, void *bootstrapping_key, - uint32_t input_lwe_dimension, + uint32_t lwe_dimension, + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log, uint32_t l_gadget, @@ -38,7 +39,8 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_64( void *test_vector_indexes, void *lwe_in, void *bootstrapping_key, - uint32_t input_lwe_dimension, + uint32_t lwe_dimension, + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log, uint32_t l_gadget, @@ -54,7 +56,8 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32( void *test_vector_indexes, void *lwe_in, void *bootstrapping_key, - uint32_t input_lwe_dimension, + uint32_t lwe_dimension, + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log, uint32_t l_gadget, @@ -70,7 +73,8 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64( void *test_vector_indexes, void *lwe_in, void *bootstrapping_key, - uint32_t input_lwe_dimension, + uint32_t lwe_dimension, + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log, uint32_t l_gadget, @@ -121,6 +125,7 @@ void cuda_extract_bits_32( uint32_t delta_log, uint32_t lwe_dimension_before, uint32_t lwe_dimension_after, + uint32_t glwe_dimension, uint32_t base_log_bsk, uint32_t l_gadget_bsk, uint32_t base_log_ksk, @@ -144,6 +149,7 @@ void cuda_extract_bits_64( uint32_t delta_log, uint32_t lwe_dimension_before, uint32_t lwe_dimension_after, + uint32_t glwe_dimension, uint32_t base_log_bsk, uint32_t l_gadget_bsk, uint32_t base_log_ksk, diff --git a/src/bootstrap_amortized.cu b/src/bootstrap_amortized.cu index 6504d4b9e..5f9af6d76 100644 --- a/src/bootstrap_amortized.cu +++ b/src/bootstrap_amortized.cu @@ -64,7 +64,8 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32( void *lut_vector_indexes, void *lwe_in, void *bootstrapping_key, - uint32_t input_lwe_dimension, + uint32_t lwe_dimension, + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log, uint32_t l_gadget, @@ -74,6 +75,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32( uint32_t max_shared_memory) { assert(("Error (GPU amortized PBS): base log should be <= 16", base_log <= 16)); + assert(("Error (GPU amortized PBS): glwe_dimension should be equal to 1", glwe_dimension == 1)); assert(("Error (GPU amortized PBS): polynomial size should be one of 512, 1024, 2048, 4096, 8192", polynomial_size == 512 || polynomial_size == 1024 || polynomial_size == 2048 || polynomial_size == 4096 || polynomial_size == 8192)); @@ -83,7 +85,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32( host_bootstrap_amortized>( v_stream, (uint32_t *)lwe_out, (uint32_t *)lut_vector, (uint32_t *)lut_vector_indexes, (uint32_t *)lwe_in, - (double2 *)bootstrapping_key, input_lwe_dimension, polynomial_size, + (double2 *)bootstrapping_key, lwe_dimension, polynomial_size, base_log, l_gadget, num_samples, num_lut_vectors, lwe_idx, max_shared_memory); break; @@ -91,28 +93,28 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_32( host_bootstrap_amortized>( v_stream, (uint32_t *)lwe_out, (uint32_t *)lut_vector, (uint32_t *)lut_vector_indexes, (uint32_t *)lwe_in, - (double2 *)bootstrapping_key, input_lwe_dimension, polynomial_size, base_log, l_gadget, num_samples, + (double2 *)bootstrapping_key, lwe_dimension, polynomial_size, base_log, l_gadget, num_samples, num_lut_vectors, lwe_idx, max_shared_memory); break; case 2048: host_bootstrap_amortized>( v_stream, (uint32_t *)lwe_out, (uint32_t *)lut_vector, (uint32_t *)lut_vector_indexes, (uint32_t *)lwe_in, - (double2 *)bootstrapping_key, input_lwe_dimension, polynomial_size, base_log, l_gadget, num_samples, + (double2 *)bootstrapping_key, lwe_dimension, polynomial_size, base_log, l_gadget, num_samples, num_lut_vectors, lwe_idx, max_shared_memory); break; case 4096: host_bootstrap_amortized>( v_stream, (uint32_t *)lwe_out, (uint32_t *)lut_vector, (uint32_t *)lut_vector_indexes, (uint32_t *)lwe_in, - (double2 *)bootstrapping_key, input_lwe_dimension, polynomial_size, base_log, l_gadget, num_samples, + (double2 *)bootstrapping_key, lwe_dimension, polynomial_size, base_log, l_gadget, num_samples, num_lut_vectors, lwe_idx, max_shared_memory); break; case 8192: host_bootstrap_amortized>( v_stream, (uint32_t *)lwe_out, (uint32_t *)lut_vector, (uint32_t *)lut_vector_indexes, (uint32_t *)lwe_in, - (double2 *)bootstrapping_key, input_lwe_dimension, polynomial_size, base_log, l_gadget, num_samples, + (double2 *)bootstrapping_key, lwe_dimension, polynomial_size, base_log, l_gadget, num_samples, num_lut_vectors, lwe_idx, max_shared_memory); break; default: @@ -127,7 +129,8 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_64( void *lut_vector_indexes, void *lwe_in, void *bootstrapping_key, - uint32_t input_lwe_dimension, + uint32_t lwe_dimension, + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log, uint32_t l_gadget, @@ -137,6 +140,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_64( uint32_t max_shared_memory) { assert(("Error (GPU amortized PBS): base log should be <= 16", base_log <= 16)); + assert(("Error (GPU amortized PBS): glwe_dimension should be equal to 1", glwe_dimension == 1)); assert(("Error (GPU amortized PBS): polynomial size should be one of 512, 1024, 2048, 4096, 8192", polynomial_size == 512 || polynomial_size == 1024 || polynomial_size == 2048 || polynomial_size == 4096 || polynomial_size == 8192)); @@ -146,7 +150,7 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_64( host_bootstrap_amortized>( v_stream, (uint64_t *)lwe_out, (uint64_t *)lut_vector, (uint32_t *)lut_vector_indexes, (uint64_t *)lwe_in, - (double2 *)bootstrapping_key, input_lwe_dimension, polynomial_size, + (double2 *)bootstrapping_key, lwe_dimension, polynomial_size, base_log, l_gadget, num_samples, num_lut_vectors, lwe_idx, max_shared_memory); break; @@ -154,28 +158,28 @@ void cuda_bootstrap_amortized_lwe_ciphertext_vector_64( host_bootstrap_amortized>( v_stream, (uint64_t *)lwe_out, (uint64_t *)lut_vector, (uint32_t *)lut_vector_indexes, (uint64_t *)lwe_in, - (double2 *)bootstrapping_key, input_lwe_dimension, polynomial_size, base_log, l_gadget, num_samples, + (double2 *)bootstrapping_key, lwe_dimension, polynomial_size, base_log, l_gadget, num_samples, num_lut_vectors, lwe_idx, max_shared_memory); break; case 2048: host_bootstrap_amortized>( v_stream, (uint64_t *)lwe_out, (uint64_t *)lut_vector, (uint32_t *)lut_vector_indexes, (uint64_t *)lwe_in, - (double2 *)bootstrapping_key, input_lwe_dimension, polynomial_size, base_log, l_gadget, num_samples, + (double2 *)bootstrapping_key, lwe_dimension, polynomial_size, base_log, l_gadget, num_samples, num_lut_vectors, lwe_idx, max_shared_memory); break; case 4096: host_bootstrap_amortized>( v_stream, (uint64_t *)lwe_out, (uint64_t *)lut_vector, (uint32_t *)lut_vector_indexes, (uint64_t *)lwe_in, - (double2 *)bootstrapping_key, input_lwe_dimension, polynomial_size, base_log, l_gadget, num_samples, + (double2 *)bootstrapping_key, lwe_dimension, polynomial_size, base_log, l_gadget, num_samples, num_lut_vectors, lwe_idx, max_shared_memory); break; case 8192: host_bootstrap_amortized>( v_stream, (uint64_t *)lwe_out, (uint64_t *)lut_vector, (uint32_t *)lut_vector_indexes, (uint64_t *)lwe_in, - (double2 *)bootstrapping_key, input_lwe_dimension, polynomial_size, base_log, l_gadget, num_samples, + (double2 *)bootstrapping_key, lwe_dimension, polynomial_size, base_log, l_gadget, num_samples, num_lut_vectors, lwe_idx, max_shared_memory); break; default: diff --git a/src/bootstrap_low_latency.cu b/src/bootstrap_low_latency.cu index dfa80ce6b..848d4ddaf 100644 --- a/src/bootstrap_low_latency.cu +++ b/src/bootstrap_low_latency.cu @@ -64,6 +64,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32( void *lwe_in, void *bootstrapping_key, uint32_t lwe_dimension, + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log, uint32_t l_gadget, @@ -73,6 +74,8 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32( uint32_t max_shared_memory) { assert(("Error (GPU low latency PBS): base log should be <= 16", base_log <= 16)); + assert(("Error (GPU low latency PBS): glwe_dimension should be equal to 1", + glwe_dimension == 1)); assert(("Error (GPU low latency PBS): polynomial size should be one of 512, 1024, 2048", polynomial_size == 512 || polynomial_size == 1024 || polynomial_size == 2048)); // The number of samples should be lower than SM/(4 * (k + 1) * l) (the @@ -138,6 +141,7 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64( void *lwe_in, void *bootstrapping_key, uint32_t lwe_dimension, + uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log, uint32_t l_gadget, @@ -147,6 +151,8 @@ void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64( uint32_t max_shared_memory) { assert(("Error (GPU low latency PBS): base log should be <= 16", base_log <= 16)); + assert(("Error (GPU low latency PBS): glwe_dimension should be equal to 1", + glwe_dimension == 1)); assert(("Error (GPU low latency PBS): polynomial size should be one of 512, 1024, 2048", polynomial_size == 512 || polynomial_size == 1024 || polynomial_size == 2048)); // The number of samples should be lower than SM/(4 * (k + 1) * l) (the diff --git a/src/bootstrap_wop.cu b/src/bootstrap_wop.cu index 653dead57..1220ae3c1 100644 --- a/src/bootstrap_wop.cu +++ b/src/bootstrap_wop.cu @@ -141,6 +141,7 @@ void cuda_extract_bits_32( uint32_t delta_log, uint32_t lwe_dimension_before, uint32_t lwe_dimension_after, + uint32_t glwe_dimension, uint32_t base_log_bsk, uint32_t l_gadget_bsk, uint32_t base_log_ksk, @@ -148,6 +149,7 @@ void cuda_extract_bits_32( uint32_t number_of_samples) { assert(("Error (GPU extract bits): base log should be <= 16", base_log_bsk <= 16)); + assert(("Error (GPU extract bits): glwe_dimension should be equal to 1", glwe_dimension == 1)); assert(("Error (GPU extract bits): lwe_dimension_before should be one of 512, 1024, 2048", lwe_dimension_before == 512 || lwe_dimension_before == 1024 || lwe_dimension_before == 2048)); @@ -216,6 +218,7 @@ void cuda_extract_bits_64( uint32_t delta_log, uint32_t lwe_dimension_before, uint32_t lwe_dimension_after, + uint32_t glwe_dimension, uint32_t base_log_bsk, uint32_t l_gadget_bsk, uint32_t base_log_ksk, @@ -223,6 +226,7 @@ void cuda_extract_bits_64( uint32_t number_of_samples) { assert(("Error (GPU extract bits): base log should be <= 16", base_log_bsk <= 16)); + assert(("Error (GPU extract bits): glwe_dimension should be equal to 1", glwe_dimension == 1)); assert(("Error (GPU extract bits): lwe_dimension_before should be one of 512, 1024, 2048", lwe_dimension_before == 512 || lwe_dimension_before == 1024 || lwe_dimension_before == 2048));