fix(cuda): remove u32 support for cbs+vp entry point

This commit is contained in:
Agnes Leroy
2022-12-14 11:36:28 +01:00
committed by Agnès Leroy
parent 4da789abda
commit e4ba380594
2 changed files with 0 additions and 85 deletions

View File

@@ -106,14 +106,6 @@ void cuda_circuit_bootstrap_64(
uint32_t base_log_pksk, uint32_t level_cbs, uint32_t base_log_cbs,
uint32_t number_of_samples, uint32_t max_shared_memory);
void cuda_circuit_bootstrap_vertical_packing_32(
void *v_stream, uint32_t gpu_index, void *lwe_array_out, void *lwe_array_in,
void *fourier_bsk, void *cbs_fpksk, void *lut_vector,
uint32_t polynomial_size, uint32_t glwe_dimension, uint32_t lwe_dimension,
uint32_t level_count_bsk, uint32_t base_log_bsk, uint32_t level_count_pksk,
uint32_t base_log_pksk, uint32_t level_count_cbs, uint32_t base_log_cbs,
uint32_t number_of_inputs, uint32_t lut_number, uint32_t max_shared_memory);
void cuda_circuit_bootstrap_vertical_packing_64(
void *v_stream, uint32_t gpu_index, void *lwe_array_out, void *lwe_array_in,
void *fourier_bsk, void *cbs_fpksk, void *lut_vector,

View File

@@ -521,83 +521,6 @@ void cuda_circuit_bootstrap_64(
}
}
void cuda_circuit_bootstrap_vertical_packing_32(
void *v_stream, uint32_t gpu_index, void *lwe_array_out, void *lwe_array_in,
void *fourier_bsk, void *cbs_fpksk, void *lut_vector,
uint32_t polynomial_size, uint32_t glwe_dimension, uint32_t lwe_dimension,
uint32_t level_count_bsk, uint32_t base_log_bsk, uint32_t level_count_pksk,
uint32_t base_log_pksk, uint32_t level_count_cbs, uint32_t base_log_cbs,
uint32_t number_of_inputs, uint32_t lut_number,
uint32_t max_shared_memory) {
assert(("Error (GPU circuit bootstrap): glwe_dimension should be equal to 1",
glwe_dimension == 1));
assert(("Error (GPU circuit bootstrap): polynomial_size should be one of "
"512, 1024, 2048, 4096, 8192",
polynomial_size == 512 || polynomial_size == 1024 ||
polynomial_size == 2048 || polynomial_size == 4096 ||
polynomial_size == 8192));
// The number of inputs should be lower than the number of streaming
// multiprocessors divided by (4 * (k + 1) * l) (the factor 4 being related
// to the occupancy of 50%). The only supported value for k is 1, so
// k + 1 = 2 for now.
int number_of_sm = 0;
cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 0);
assert(("Error (GPU extract bits): the number of input LWEs must be lower or "
"equal to the "
"number of streaming multiprocessors on the device divided by 8 * "
"level_count_bsk",
number_of_inputs <= number_of_sm / 4. / 2. / level_count_bsk));
switch (polynomial_size) {
case 512:
host_circuit_bootstrap_vertical_packing<uint32_t, int32_t, Degree<512>>(
v_stream, gpu_index, (uint32_t *)lwe_array_out,
(uint32_t *)lwe_array_in, (uint32_t *)lut_vector,
(double2 *)fourier_bsk, (uint32_t *)cbs_fpksk, glwe_dimension,
lwe_dimension, polynomial_size, base_log_bsk, level_count_bsk,
base_log_pksk, level_count_pksk, base_log_cbs, level_count_cbs,
number_of_inputs, lut_number, max_shared_memory);
break;
case 1024:
host_circuit_bootstrap_vertical_packing<uint32_t, int32_t, Degree<1024>>(
v_stream, gpu_index, (uint32_t *)lwe_array_out,
(uint32_t *)lwe_array_in, (uint32_t *)lut_vector,
(double2 *)fourier_bsk, (uint32_t *)cbs_fpksk, glwe_dimension,
lwe_dimension, polynomial_size, base_log_bsk, level_count_bsk,
base_log_pksk, level_count_pksk, base_log_cbs, level_count_cbs,
number_of_inputs, lut_number, max_shared_memory);
break;
case 2048:
host_circuit_bootstrap_vertical_packing<uint32_t, int32_t, Degree<2048>>(
v_stream, gpu_index, (uint32_t *)lwe_array_out,
(uint32_t *)lwe_array_in, (uint32_t *)lut_vector,
(double2 *)fourier_bsk, (uint32_t *)cbs_fpksk, glwe_dimension,
lwe_dimension, polynomial_size, base_log_bsk, level_count_bsk,
base_log_pksk, level_count_pksk, base_log_cbs, level_count_cbs,
number_of_inputs, lut_number, max_shared_memory);
break;
case 4096:
host_circuit_bootstrap_vertical_packing<uint32_t, int32_t, Degree<4096>>(
v_stream, gpu_index, (uint32_t *)lwe_array_out,
(uint32_t *)lwe_array_in, (uint32_t *)lut_vector,
(double2 *)fourier_bsk, (uint32_t *)cbs_fpksk, glwe_dimension,
lwe_dimension, polynomial_size, base_log_bsk, level_count_bsk,
base_log_pksk, level_count_pksk, base_log_cbs, level_count_cbs,
number_of_inputs, lut_number, max_shared_memory);
break;
case 8192:
host_circuit_bootstrap_vertical_packing<uint32_t, int32_t, Degree<8192>>(
v_stream, gpu_index, (uint32_t *)lwe_array_out,
(uint32_t *)lwe_array_in, (uint32_t *)lut_vector,
(double2 *)fourier_bsk, (uint32_t *)cbs_fpksk, glwe_dimension,
lwe_dimension, polynomial_size, base_log_bsk, level_count_bsk,
base_log_pksk, level_count_pksk, base_log_cbs, level_count_cbs,
number_of_inputs, lut_number, max_shared_memory);
break;
default:
break;
}
}
void cuda_circuit_bootstrap_vertical_packing_64(
void *v_stream, uint32_t gpu_index, void *lwe_array_out, void *lwe_array_in,
void *fourier_bsk, void *cbs_fpksk, void *lut_vector,