From e4ba380594b37250e126bdd8a0902eb4d19f1609 Mon Sep 17 00:00:00 2001 From: Agnes Leroy Date: Wed, 14 Dec 2022 11:36:28 +0100 Subject: [PATCH] fix(cuda): remove u32 support for cbs+vp entry point --- include/bootstrap.h | 8 ----- src/bootstrap_wop.cu | 77 -------------------------------------------- 2 files changed, 85 deletions(-) diff --git a/include/bootstrap.h b/include/bootstrap.h index 312c7e3ff..f2c91bb87 100644 --- a/include/bootstrap.h +++ b/include/bootstrap.h @@ -106,14 +106,6 @@ void cuda_circuit_bootstrap_64( uint32_t base_log_pksk, uint32_t level_cbs, uint32_t base_log_cbs, uint32_t number_of_samples, uint32_t max_shared_memory); -void cuda_circuit_bootstrap_vertical_packing_32( - void *v_stream, uint32_t gpu_index, void *lwe_array_out, void *lwe_array_in, - void *fourier_bsk, void *cbs_fpksk, void *lut_vector, - uint32_t polynomial_size, uint32_t glwe_dimension, uint32_t lwe_dimension, - uint32_t level_count_bsk, uint32_t base_log_bsk, uint32_t level_count_pksk, - uint32_t base_log_pksk, uint32_t level_count_cbs, uint32_t base_log_cbs, - uint32_t number_of_inputs, uint32_t lut_number, uint32_t max_shared_memory); - void cuda_circuit_bootstrap_vertical_packing_64( void *v_stream, uint32_t gpu_index, void *lwe_array_out, void *lwe_array_in, void *fourier_bsk, void *cbs_fpksk, void *lut_vector, diff --git a/src/bootstrap_wop.cu b/src/bootstrap_wop.cu index c1eba847e..8c79d4109 100644 --- a/src/bootstrap_wop.cu +++ b/src/bootstrap_wop.cu @@ -521,83 +521,6 @@ void cuda_circuit_bootstrap_64( } } -void cuda_circuit_bootstrap_vertical_packing_32( - void *v_stream, uint32_t gpu_index, void *lwe_array_out, void *lwe_array_in, - void *fourier_bsk, void *cbs_fpksk, void *lut_vector, - uint32_t polynomial_size, uint32_t glwe_dimension, uint32_t lwe_dimension, - uint32_t level_count_bsk, uint32_t base_log_bsk, uint32_t level_count_pksk, - uint32_t base_log_pksk, uint32_t level_count_cbs, uint32_t base_log_cbs, - uint32_t number_of_inputs, uint32_t lut_number, - uint32_t max_shared_memory) { - assert(("Error (GPU circuit bootstrap): glwe_dimension should be equal to 1", - glwe_dimension == 1)); - assert(("Error (GPU circuit bootstrap): polynomial_size should be one of " - "512, 1024, 2048, 4096, 8192", - polynomial_size == 512 || polynomial_size == 1024 || - polynomial_size == 2048 || polynomial_size == 4096 || - polynomial_size == 8192)); - // The number of inputs should be lower than the number of streaming - // multiprocessors divided by (4 * (k + 1) * l) (the factor 4 being related - // to the occupancy of 50%). The only supported value for k is 1, so - // k + 1 = 2 for now. - int number_of_sm = 0; - cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 0); - assert(("Error (GPU extract bits): the number of input LWEs must be lower or " - "equal to the " - "number of streaming multiprocessors on the device divided by 8 * " - "level_count_bsk", - number_of_inputs <= number_of_sm / 4. / 2. / level_count_bsk)); - switch (polynomial_size) { - case 512: - host_circuit_bootstrap_vertical_packing>( - v_stream, gpu_index, (uint32_t *)lwe_array_out, - (uint32_t *)lwe_array_in, (uint32_t *)lut_vector, - (double2 *)fourier_bsk, (uint32_t *)cbs_fpksk, glwe_dimension, - lwe_dimension, polynomial_size, base_log_bsk, level_count_bsk, - base_log_pksk, level_count_pksk, base_log_cbs, level_count_cbs, - number_of_inputs, lut_number, max_shared_memory); - break; - case 1024: - host_circuit_bootstrap_vertical_packing>( - v_stream, gpu_index, (uint32_t *)lwe_array_out, - (uint32_t *)lwe_array_in, (uint32_t *)lut_vector, - (double2 *)fourier_bsk, (uint32_t *)cbs_fpksk, glwe_dimension, - lwe_dimension, polynomial_size, base_log_bsk, level_count_bsk, - base_log_pksk, level_count_pksk, base_log_cbs, level_count_cbs, - number_of_inputs, lut_number, max_shared_memory); - break; - case 2048: - host_circuit_bootstrap_vertical_packing>( - v_stream, gpu_index, (uint32_t *)lwe_array_out, - (uint32_t *)lwe_array_in, (uint32_t *)lut_vector, - (double2 *)fourier_bsk, (uint32_t *)cbs_fpksk, glwe_dimension, - lwe_dimension, polynomial_size, base_log_bsk, level_count_bsk, - base_log_pksk, level_count_pksk, base_log_cbs, level_count_cbs, - number_of_inputs, lut_number, max_shared_memory); - break; - case 4096: - host_circuit_bootstrap_vertical_packing>( - v_stream, gpu_index, (uint32_t *)lwe_array_out, - (uint32_t *)lwe_array_in, (uint32_t *)lut_vector, - (double2 *)fourier_bsk, (uint32_t *)cbs_fpksk, glwe_dimension, - lwe_dimension, polynomial_size, base_log_bsk, level_count_bsk, - base_log_pksk, level_count_pksk, base_log_cbs, level_count_cbs, - number_of_inputs, lut_number, max_shared_memory); - break; - case 8192: - host_circuit_bootstrap_vertical_packing>( - v_stream, gpu_index, (uint32_t *)lwe_array_out, - (uint32_t *)lwe_array_in, (uint32_t *)lut_vector, - (double2 *)fourier_bsk, (uint32_t *)cbs_fpksk, glwe_dimension, - lwe_dimension, polynomial_size, base_log_bsk, level_count_bsk, - base_log_pksk, level_count_pksk, base_log_cbs, level_count_cbs, - number_of_inputs, lut_number, max_shared_memory); - break; - default: - break; - } -} - void cuda_circuit_bootstrap_vertical_packing_64( void *v_stream, uint32_t gpu_index, void *lwe_array_out, void *lwe_array_in, void *fourier_bsk, void *cbs_fpksk, void *lut_vector,