fix(gpu): add missing synchronize in scalar add, refactor scalar add on cuda side

This commit is contained in:
Agnes Leroy
2024-09-12 17:49:32 +02:00
committed by Agnès Leroy
parent 8299e1cb9a
commit 9633b61298
2 changed files with 3 additions and 4 deletions

View File

@@ -18,10 +18,8 @@ __global__ void device_integer_radix_scalar_addition_inplace(
int tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid < num_blocks) {
Torus scalar = scalar_input[tid];
Torus *body = lwe_array + tid * (lwe_dimension + 1) + lwe_dimension;
*body += scalar * delta;
lwe_array[tid * (lwe_dimension + 1) + lwe_dimension] +=
scalar_input[tid] * delta;
}
}

View File

@@ -266,6 +266,7 @@ impl CudaServerKey {
unsafe {
carry_out = self.propagate_single_carry_assign_async(ct_left, stream);
}
stream.synchronize();
let num_scalar_blocks =
BlockDecomposer::with_early_stop_at_zero(scalar, self.message_modulus.0.ilog2())