mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-09 14:47:56 -05:00
chore(gpu): update asserts on base log now that we don't cast to u32 in decomposition
This commit is contained in:
@@ -654,8 +654,8 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
int8_t *mem_ptr, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples, uint32_t lut_count, uint32_t lut_stride) {
|
||||
if (base_log > 32)
|
||||
PANIC("Cuda error (classical PBS): base log should be <= 32")
|
||||
if (base_log > 64)
|
||||
PANIC("Cuda error (classical PBS): base log should be <= 64")
|
||||
|
||||
pbs_buffer<uint64_t, CLASSICAL> *buffer =
|
||||
(pbs_buffer<uint64_t, CLASSICAL> *)mem_ptr;
|
||||
|
||||
@@ -69,9 +69,6 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
|
||||
if (base_log > 32)
|
||||
PANIC("Cuda error (multi-bit PBS): base log should be <= 32")
|
||||
|
||||
switch (polynomial_size) {
|
||||
case 256:
|
||||
host_cg_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<256>>(
|
||||
@@ -147,9 +144,6 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
|
||||
if (base_log > 32)
|
||||
PANIC("Cuda error (multi-bit PBS): base log should be <= 32")
|
||||
|
||||
switch (polynomial_size) {
|
||||
case 256:
|
||||
host_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<256>>(
|
||||
@@ -224,6 +218,9 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t lut_stride) {
|
||||
|
||||
if (base_log > 64)
|
||||
PANIC("Cuda error (multi-bit PBS): base log should be <= 64")
|
||||
|
||||
pbs_buffer<uint64_t, MULTI_BIT> *buffer =
|
||||
(pbs_buffer<uint64_t, MULTI_BIT> *)mem_ptr;
|
||||
|
||||
|
||||
@@ -8,21 +8,6 @@
|
||||
// Return A if C == 0 and B if C == 1
|
||||
#define SEL(A, B, C) ((-(C) & ((A) ^ (B))) ^ (A))
|
||||
|
||||
/*
|
||||
* function compresses decomposed buffer into half size complex buffer for fft
|
||||
*/
|
||||
template <class params>
|
||||
__device__ void real_to_complex_compressed(const int16_t *__restrict__ src,
|
||||
double2 *dst) {
|
||||
int tid = threadIdx.x;
|
||||
#pragma unroll
|
||||
for (int i = 0; i < params::opt / 2; i++) {
|
||||
dst[tid].x = __int2double_rn(src[2 * tid]);
|
||||
dst[tid].y = __int2double_rn(src[2 * tid + 1]);
|
||||
tid += params::degree / params::opt;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, int elems_per_thread, int block_size>
|
||||
__device__ void copy_polynomial(const T *__restrict__ source, T *dst) {
|
||||
int tid = threadIdx.x;
|
||||
|
||||
Reference in New Issue
Block a user