chore(gpu): update asserts on base log now that we don't cast to u32 in decomposition

This commit is contained in:
Agnes Leroy
2024-11-14 11:17:26 +01:00
committed by Agnès Leroy
parent 0aee4c568e
commit 7aa454ee97
3 changed files with 5 additions and 23 deletions

View File

@@ -654,8 +654,8 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
int8_t *mem_ptr, uint32_t lwe_dimension, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
uint32_t num_samples, uint32_t lut_count, uint32_t lut_stride) {
if (base_log > 32)
PANIC("Cuda error (classical PBS): base log should be <= 32")
if (base_log > 64)
PANIC("Cuda error (classical PBS): base log should be <= 64")
pbs_buffer<uint64_t, CLASSICAL> *buffer =
(pbs_buffer<uint64_t, CLASSICAL> *)mem_ptr;

View File

@@ -69,9 +69,6 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
uint32_t lut_count, uint32_t lut_stride) {
if (base_log > 32)
PANIC("Cuda error (multi-bit PBS): base log should be <= 32")
switch (polynomial_size) {
case 256:
host_cg_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<256>>(
@@ -147,9 +144,6 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
uint32_t lut_count, uint32_t lut_stride) {
if (base_log > 32)
PANIC("Cuda error (multi-bit PBS): base log should be <= 32")
switch (polynomial_size) {
case 256:
host_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<256>>(
@@ -224,6 +218,9 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
uint32_t lut_stride) {
if (base_log > 64)
PANIC("Cuda error (multi-bit PBS): base log should be <= 64")
pbs_buffer<uint64_t, MULTI_BIT> *buffer =
(pbs_buffer<uint64_t, MULTI_BIT> *)mem_ptr;

View File

@@ -8,21 +8,6 @@
// Return A if C == 0 and B if C == 1
#define SEL(A, B, C) ((-(C) & ((A) ^ (B))) ^ (A))
/*
* function compresses decomposed buffer into half size complex buffer for fft
*/
template <class params>
__device__ void real_to_complex_compressed(const int16_t *__restrict__ src,
double2 *dst) {
int tid = threadIdx.x;
#pragma unroll
for (int i = 0; i < params::opt / 2; i++) {
dst[tid].x = __int2double_rn(src[2 * tid]);
dst[tid].y = __int2double_rn(src[2 * tid + 1]);
tid += params::degree / params::opt;
}
}
template <typename T, int elems_per_thread, int block_size>
__device__ void copy_polynomial(const T *__restrict__ source, T *dst) {
int tid = threadIdx.x;