mirror of
https://github.com/zama-ai/concrete.git
synced 2026-02-08 11:35:02 -05:00
feat(cuda): Check for errors after each kernel launch.
This commit is contained in:
@@ -58,6 +58,7 @@ __host__ void host_addition(void *v_stream, uint32_t gpu_index, T *output,
|
||||
|
||||
auto stream = static_cast<cudaStream_t *>(v_stream);
|
||||
addition<<<grid, thds, 0, *stream>>>(output, input_1, input_2, num_entries);
|
||||
checkCudaErrors(cudaGetLastError());
|
||||
|
||||
cudaStreamSynchronize(*stream);
|
||||
}
|
||||
@@ -84,6 +85,7 @@ __host__ void host_addition_plaintext(void *v_stream, uint32_t gpu_index,
|
||||
cudaMemcpyDeviceToDevice, *stream));
|
||||
plaintext_addition<<<grid, thds, 0, *stream>>>(
|
||||
output, lwe_input, plaintext_input, input_lwe_dimension, num_entries);
|
||||
checkCudaErrors(cudaGetLastError());
|
||||
|
||||
cudaStreamSynchronize(*stream);
|
||||
}
|
||||
|
||||
@@ -378,6 +378,8 @@ __host__ void host_bootstrap_amortized(
|
||||
bootstrapping_key, d_mem, input_lwe_dimension, polynomial_size,
|
||||
base_log, level_count, lwe_idx, 0);
|
||||
}
|
||||
checkCudaErrors(cudaGetLastError());
|
||||
|
||||
// Synchronize the streams before copying the result to lwe_array_out at the
|
||||
// right place
|
||||
cudaStreamSynchronize(*stream);
|
||||
|
||||
@@ -493,6 +493,7 @@ __host__ void host_extract_bits(
|
||||
copy_and_shift_lwe<Torus, params><<<blocks, threads, 0, *stream>>>(
|
||||
lwe_array_in_buffer, lwe_array_in_shifted_buffer, lwe_array_in,
|
||||
1ll << (ciphertext_n_bits - delta_log - 1));
|
||||
checkCudaErrors(cudaGetLastError());
|
||||
|
||||
for (int bit_idx = 0; bit_idx < number_of_bits; bit_idx++) {
|
||||
cuda_keyswitch_lwe_ciphertext_vector(
|
||||
@@ -502,6 +503,7 @@ __host__ void host_extract_bits(
|
||||
copy_small_lwe<<<1, 256, 0, *stream>>>(
|
||||
list_lwe_array_out, lwe_array_out_ks_buffer, lwe_dimension_out + 1,
|
||||
number_of_bits, number_of_bits - bit_idx - 1);
|
||||
checkCudaErrors(cudaGetLastError());
|
||||
|
||||
if (bit_idx == number_of_bits - 1) {
|
||||
break;
|
||||
@@ -510,10 +512,12 @@ __host__ void host_extract_bits(
|
||||
add_to_body<Torus><<<1, 1, 0, *stream>>>(lwe_array_out_ks_buffer,
|
||||
lwe_dimension_out,
|
||||
1ll << (ciphertext_n_bits - 2));
|
||||
checkCudaErrors(cudaGetLastError());
|
||||
|
||||
fill_lut_body_for_current_bit<Torus, params>
|
||||
<<<blocks, threads, 0, *stream>>>(
|
||||
lut_pbs, 0ll - 1ll << (delta_log - 1 + bit_idx));
|
||||
checkCudaErrors(cudaGetLastError());
|
||||
|
||||
host_bootstrap_low_latency<Torus, params>(
|
||||
v_stream, lwe_array_out_pbs_buffer, lut_pbs, lut_vector_indexes,
|
||||
@@ -524,6 +528,7 @@ __host__ void host_extract_bits(
|
||||
lwe_array_in_shifted_buffer, lwe_array_in_buffer,
|
||||
lwe_array_out_pbs_buffer, 1ll << (delta_log - 1 + bit_idx),
|
||||
1ll << (ciphertext_n_bits - delta_log - bit_idx - 2));
|
||||
checkCudaErrors(cudaGetLastError());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -143,6 +143,7 @@ __host__ void cuda_keyswitch_lwe_ciphertext_vector(
|
||||
keyswitch<<<grid, threads, shared_mem, *stream>>>(
|
||||
lwe_array_out, lwe_array_in, ksk, lwe_dimension_in, lwe_dimension_out,
|
||||
base_log, level_count, lwe_lower, lwe_upper, cutoff);
|
||||
checkCudaErrors(cudaGetLastError());
|
||||
|
||||
cudaStreamSynchronize(*stream);
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <helper_cuda.h>
|
||||
#endif
|
||||
|
||||
#include "../include/helper_cuda.h"
|
||||
#include "linear_algebra.h"
|
||||
#include "utils/kernel_dimensions.cuh"
|
||||
|
||||
@@ -45,6 +46,7 @@ host_cleartext_multiplication(void *v_stream, uint32_t gpu_index, T *output,
|
||||
auto stream = static_cast<cudaStream_t *>(v_stream);
|
||||
cleartext_multiplication<<<grid, thds, 0, *stream>>>(
|
||||
output, lwe_input, cleartext_input, input_lwe_dimension, num_entries);
|
||||
checkCudaErrors(cudaGetLastError());
|
||||
|
||||
cudaStreamSynchronize(*stream);
|
||||
}
|
||||
|
||||
@@ -7,9 +7,11 @@
|
||||
#include <helper_cuda.h>
|
||||
#endif
|
||||
|
||||
#include "../include/helper_cuda.h"
|
||||
#include "linear_algebra.h"
|
||||
#include "utils/kernel_dimensions.cuh"
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void negation(T *output, T *input, uint32_t num_entries) {
|
||||
|
||||
@@ -39,6 +41,7 @@ __host__ void host_negation(void *v_stream, uint32_t gpu_index, T *output,
|
||||
|
||||
auto stream = static_cast<cudaStream_t *>(v_stream);
|
||||
negation<<<grid, thds, 0, *stream>>>(output, input, num_entries);
|
||||
checkCudaErrors(cudaGetLastError());
|
||||
|
||||
cudaStreamSynchronize(*stream);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user