mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-10 07:08:03 -05:00
fix(gpu): add missing synchronize in scalar add, refactor scalar add on cuda side
This commit is contained in:
@@ -18,10 +18,8 @@ __global__ void device_integer_radix_scalar_addition_inplace(
|
||||
|
||||
int tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (tid < num_blocks) {
|
||||
Torus scalar = scalar_input[tid];
|
||||
Torus *body = lwe_array + tid * (lwe_dimension + 1) + lwe_dimension;
|
||||
|
||||
*body += scalar * delta;
|
||||
lwe_array[tid * (lwe_dimension + 1) + lwe_dimension] +=
|
||||
scalar_input[tid] * delta;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -266,6 +266,7 @@ impl CudaServerKey {
|
||||
unsafe {
|
||||
carry_out = self.propagate_single_carry_assign_async(ct_left, stream);
|
||||
}
|
||||
stream.synchronize();
|
||||
|
||||
let num_scalar_blocks =
|
||||
BlockDecomposer::with_early_stop_at_zero(scalar, self.message_modulus.0.ilog2())
|
||||
|
||||
Reference in New Issue
Block a user