chore(gpu): update asserts on base log now that we don't cast to u32 in decomposition

2026-01-09 14:47:56 -05:00 · 2024-11-14 11:17:26 +01:00
parent 0aee4c568e
commit 7aa454ee97
3 changed files with 5 additions and 23 deletions
--- a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_classic.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_classic.cu
@@ -654,8 +654,8 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
    int8_t *mem_ptr, uint32_t lwe_dimension, uint32_t glwe_dimension,
    uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
    uint32_t num_samples, uint32_t lut_count, uint32_t lut_stride) {
-  if (base_log > 32)
-    PANIC("Cuda error (classical PBS): base log should be <= 32")
+  if (base_log > 64)
+    PANIC("Cuda error (classical PBS): base log should be <= 64")

  pbs_buffer<uint64_t, CLASSICAL> *buffer =
      (pbs_buffer<uint64_t, CLASSICAL> *)mem_ptr;
--- a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_multibit.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_multibit.cu
@@ -69,9 +69,6 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
    uint32_t base_log, uint32_t level_count, uint32_t num_samples,
    uint32_t lut_count, uint32_t lut_stride) {

-  if (base_log > 32)
-    PANIC("Cuda error (multi-bit PBS): base log should be <= 32")
-
  switch (polynomial_size) {
  case 256:
    host_cg_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<256>>(
@@ -147,9 +144,6 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
    uint32_t base_log, uint32_t level_count, uint32_t num_samples,
    uint32_t lut_count, uint32_t lut_stride) {

-  if (base_log > 32)
-    PANIC("Cuda error (multi-bit PBS): base log should be <= 32")
-
  switch (polynomial_size) {
  case 256:
    host_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<256>>(
@@ -224,6 +218,9 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
    uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
    uint32_t lut_stride) {

+  if (base_log > 64)
+    PANIC("Cuda error (multi-bit PBS): base log should be <= 64")
+
  pbs_buffer<uint64_t, MULTI_BIT> *buffer =
      (pbs_buffer<uint64_t, MULTI_BIT> *)mem_ptr;

--- a/backends/tfhe-cuda-backend/cuda/src/polynomial/functions.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/polynomial/functions.cuh
@@ -8,21 +8,6 @@
 // Return A if C == 0 and B if C == 1
 #define SEL(A, B, C) ((-(C) & ((A) ^ (B))) ^ (A))

-/*
- *  function compresses decomposed buffer into half size complex buffer for fft
- */
-template <class params>
-__device__ void real_to_complex_compressed(const int16_t *__restrict__ src,
-                                           double2 *dst) {
-  int tid = threadIdx.x;
-#pragma unroll
-  for (int i = 0; i < params::opt / 2; i++) {
-    dst[tid].x = __int2double_rn(src[2 * tid]);
-    dst[tid].y = __int2double_rn(src[2 * tid + 1]);
-    tid += params::degree / params::opt;
-  }
-}
-
 template <typename T, int elems_per_thread, int block_size>
 __device__ void copy_polynomial(const T *__restrict__ source, T *dst) {
  int tid = threadIdx.x;