refactor(cuda): prepare to introduce cmux tree scratch

This commit is contained in:
Agnes Leroy
2023-02-15 09:22:07 +01:00
committed by Agnès Leroy
parent bfb07b961d
commit e6dfb588db
9 changed files with 333 additions and 219 deletions

View File

@@ -106,34 +106,38 @@ void cuda_circuit_bootstrap_64(
uint32_t number_of_samples, uint32_t max_shared_memory);
void scratch_cuda_circuit_bootstrap_vertical_packing_32(
void *v_stream, uint32_t gpu_index, void **cbs_vp_buffer,
void *v_stream, uint32_t gpu_index, int8_t **cbs_vp_buffer,
uint32_t *cbs_delta_log, uint32_t glwe_dimension, uint32_t lwe_dimension,
uint32_t polynomial_size, uint32_t level_count_cbs,
uint32_t number_of_inputs, uint32_t tau, bool allocate_gpu_memory);
uint32_t number_of_inputs, uint32_t tau, uint32_t max_shared_memory,
bool allocate_gpu_memory);
void scratch_cuda_circuit_bootstrap_vertical_packing_64(
void *v_stream, uint32_t gpu_index, void **cbs_vp_buffer,
void *v_stream, uint32_t gpu_index, int8_t **cbs_vp_buffer,
uint32_t *cbs_delta_log, uint32_t glwe_dimension, uint32_t lwe_dimension,
uint32_t polynomial_size, uint32_t level_count_cbs,
uint32_t number_of_inputs, uint32_t tau, bool allocate_gpu_memory);
uint32_t number_of_inputs, uint32_t tau, uint32_t max_shared_memory,
bool allocate_gpu_memory);
void scratch_cuda_wop_pbs_32(
void *v_stream, uint32_t gpu_index, void **wop_pbs_buffer,
void *v_stream, uint32_t gpu_index, int8_t **wop_pbs_buffer,
uint32_t *delta_log, uint32_t *cbs_delta_log, uint32_t glwe_dimension,
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t level_count_cbs,
uint32_t number_of_bits_of_message_including_padding,
uint32_t number_of_bits_to_extract, uint32_t number_of_inputs);
uint32_t number_of_bits_to_extract, uint32_t number_of_inputs,
uint32_t max_shared_memory);
void scratch_cuda_wop_pbs_64(
void *v_stream, uint32_t gpu_index, void **wop_pbs_buffer,
void *v_stream, uint32_t gpu_index, int8_t **wop_pbs_buffer,
uint32_t *delta_log, uint32_t *cbs_delta_log, uint32_t glwe_dimension,
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t level_count_cbs,
uint32_t number_of_bits_of_message_including_padding,
uint32_t number_of_bits_to_extract, uint32_t number_of_inputs);
uint32_t number_of_bits_to_extract, uint32_t number_of_inputs,
uint32_t max_shared_memory);
void cuda_circuit_bootstrap_vertical_packing_64(
void *v_stream, uint32_t gpu_index, void *lwe_array_out, void *lwe_array_in,
void *fourier_bsk, void *cbs_fpksk, void *lut_vector, void *cbs_vp_buffer,
void *fourier_bsk, void *cbs_fpksk, void *lut_vector, int8_t *cbs_vp_buffer,
uint32_t cbs_delta_log, uint32_t polynomial_size, uint32_t glwe_dimension,
uint32_t lwe_dimension, uint32_t level_count_bsk, uint32_t base_log_bsk,
uint32_t level_count_pksk, uint32_t base_log_pksk, uint32_t level_count_cbs,
@@ -142,7 +146,7 @@ void cuda_circuit_bootstrap_vertical_packing_64(
void cuda_wop_pbs_64(void *v_stream, uint32_t gpu_index, void *lwe_array_out,
void *lwe_array_in, void *lut_vector, void *fourier_bsk,
void *ksk, void *cbs_fpksk, void *wop_pbs_buffer,
void *ksk, void *cbs_fpksk, int8_t *wop_pbs_buffer,
uint32_t cbs_delta_log, uint32_t glwe_dimension,
uint32_t lwe_dimension, uint32_t polynomial_size,
uint32_t base_log_bsk, uint32_t level_count_bsk,
@@ -153,18 +157,12 @@ void cuda_wop_pbs_64(void *v_stream, uint32_t gpu_index, void *lwe_array_out,
uint32_t number_of_bits_to_extract, uint32_t delta_log,
uint32_t number_of_inputs, uint32_t max_shared_memory);
void cleanup_cuda_wop_pbs_32(void *v_stream, uint32_t gpu_index,
void **wop_pbs_buffer);
void cleanup_cuda_wop_pbs_64(void *v_stream, uint32_t gpu_index,
void **wop_pbs_buffer);
void cleanup_cuda_wop_pbs(void *v_stream, uint32_t gpu_index,
int8_t **wop_pbs_buffer);
void cleanup_cuda_circuit_bootstrap_vertical_packing_32(void *v_stream,
uint32_t gpu_index,
void **cbs_vp_buffer);
void cleanup_cuda_circuit_bootstrap_vertical_packing_64(void *v_stream,
uint32_t gpu_index,
void **cbs_vp_buffer);
void cleanup_cuda_circuit_bootstrap_vertical_packing(void *v_stream,
uint32_t gpu_index,
int8_t **cbs_vp_buffer);
}
#ifdef __CUDACC__