chore(gpu): remove support for drift noise reduction

2026-01-09 14:47:56 -05:00 · 2025-10-01 13:59:45 +02:00
parent f3cddb5635
commit f9e876730a
68 changed files with 748 additions and 1900 deletions
--- a/backends/tfhe-cuda-backend/cuda/include/integer/integer.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/integer.h
@@ -125,9 +125,7 @@ uint64_t scratch_cuda_apply_many_univariate_lut_kb_64(
 void cuda_apply_univariate_lut_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *output_radix_lwe,
    CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void *const *bsks);
+    void *const *ksks, void *const *bsks);

 void cleanup_cuda_apply_univariate_lut_kb_64(CudaStreamsFFI streams,
                                             int8_t **mem_ptr_void);
@@ -145,9 +143,8 @@ void cuda_apply_bivariate_lut_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *output_radix_lwe,
    CudaRadixCiphertextFFI const *input_radix_lwe_1,
    CudaRadixCiphertextFFI const *input_radix_lwe_2, int8_t *mem_ptr,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void *const *bsks, uint32_t num_radix_blocks, uint32_t shift);
+    void *const *ksks, void *const *bsks, uint32_t num_radix_blocks,
+    uint32_t shift);

 void cleanup_cuda_apply_bivariate_lut_kb_64(CudaStreamsFFI streams,
                                            int8_t **mem_ptr_void);
@@ -155,9 +152,8 @@ void cleanup_cuda_apply_bivariate_lut_kb_64(CudaStreamsFFI streams,
 void cuda_apply_many_univariate_lut_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *output_radix_lwe,
    CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void *const *bsks, uint32_t num_luts, uint32_t lut_stride);
+    void *const *ksks, void *const *bsks, uint32_t num_luts,
+    uint32_t lut_stride);

 uint64_t scratch_cuda_full_propagation_64(
    CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t lwe_dimension,
@@ -167,11 +163,10 @@ uint64_t scratch_cuda_full_propagation_64(
    PBS_TYPE pbs_type, bool allocate_gpu_memory,
    PBS_MS_REDUCTION_T noise_reduction_type);

-void cuda_full_propagation_64_inplace(
-    CudaStreamsFFI streams, CudaRadixCiphertextFFI *input_blocks,
-    int8_t *mem_ptr, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void *const *bsks, uint32_t num_blocks);
+void cuda_full_propagation_64_inplace(CudaStreamsFFI streams,
+                                      CudaRadixCiphertextFFI *input_blocks,
+                                      int8_t *mem_ptr, void *const *ksks,
+                                      void *const *bsks, uint32_t num_blocks);

 void cleanup_cuda_full_propagation(CudaStreamsFFI streams,
                                   int8_t **mem_ptr_void);
@@ -189,9 +184,8 @@ void cuda_integer_mult_radix_ciphertext_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *radix_lwe_out,
    CudaRadixCiphertextFFI const *radix_lwe_left, bool const is_bool_left,
    CudaRadixCiphertextFFI const *radix_lwe_right, bool const is_bool_right,
-    void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    int8_t *mem_ptr, uint32_t polynomial_size, uint32_t num_blocks);
+    void *const *bsks, void *const *ksks, int8_t *mem_ptr,
+    uint32_t polynomial_size, uint32_t num_blocks);

 void cleanup_cuda_integer_mult(CudaStreamsFFI streams, int8_t **mem_ptr_void);

@@ -216,8 +210,7 @@ uint64_t scratch_cuda_integer_radix_logical_scalar_shift_kb_64(

 void cuda_integer_radix_logical_scalar_shift_kb_64_inplace(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array, uint32_t shift,
-    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
+    int8_t *mem_ptr, void *const *bsks, void *const *ksks);

 uint64_t scratch_cuda_integer_radix_arithmetic_scalar_shift_kb_64(
    CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
@@ -230,8 +223,7 @@ uint64_t scratch_cuda_integer_radix_arithmetic_scalar_shift_kb_64(

 void cuda_integer_radix_arithmetic_scalar_shift_kb_64_inplace(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array, uint32_t shift,
-    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
+    int8_t *mem_ptr, void *const *bsks, void *const *ksks);

 void cleanup_cuda_integer_radix_logical_scalar_shift(CudaStreamsFFI streams,
                                                     int8_t **mem_ptr_void);
@@ -251,8 +243,7 @@ uint64_t scratch_cuda_integer_radix_shift_and_rotate_kb_64(
 void cuda_integer_radix_shift_and_rotate_kb_64_inplace(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array,
    CudaRadixCiphertextFFI const *lwe_shift, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
+    void *const *ksks);

 void cleanup_cuda_integer_radix_shift_and_rotate(CudaStreamsFFI streams,
                                                 int8_t **mem_ptr_void);
@@ -271,16 +262,13 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_1,
    CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
+    void *const *bsks, void *const *ksks);

 void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_in, void const *scalar_blocks,
    void const *h_scalar_blocks, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_scalar_blocks);
+    void *const *ksks, uint32_t num_scalar_blocks);

 void cleanup_cuda_integer_comparison(CudaStreamsFFI streams,
                                     int8_t **mem_ptr_void);
@@ -298,15 +286,13 @@ void cuda_bitop_integer_radix_ciphertext_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_1,
    CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
+    void *const *bsks, void *const *ksks);

 void cuda_scalar_bitop_integer_radix_ciphertext_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_input, void const *clear_blocks,
    void const *h_clear_blocks, uint32_t num_clear_blocks, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
+    void *const *bsks, void *const *ksks);

 void cleanup_cuda_integer_bitop(CudaStreamsFFI streams, int8_t **mem_ptr_void);

@@ -324,8 +310,7 @@ void cuda_cmux_integer_radix_ciphertext_kb_64(
    CudaRadixCiphertextFFI const *lwe_condition,
    CudaRadixCiphertextFFI const *lwe_array_true,
    CudaRadixCiphertextFFI const *lwe_array_false, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
+    void *const *bsks, void *const *ksks);

 void cleanup_cuda_integer_radix_cmux(CudaStreamsFFI streams,
                                     int8_t **mem_ptr_void);
@@ -341,8 +326,7 @@ uint64_t scratch_cuda_integer_radix_scalar_rotate_kb_64(

 void cuda_integer_radix_scalar_rotate_kb_64_inplace(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array, uint32_t n,
-    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
+    int8_t *mem_ptr, void *const *bsks, void *const *ksks);

 void cleanup_cuda_integer_radix_scalar_rotate(CudaStreamsFFI streams,
                                              int8_t **mem_ptr_void);
@@ -369,16 +353,13 @@ void cuda_propagate_single_carry_kb_64_inplace(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array,
    CudaRadixCiphertextFFI *carry_out, const CudaRadixCiphertextFFI *carry_in,
    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
    uint32_t requested_flag, uint32_t uses_carry);

 void cuda_add_and_propagate_single_carry_kb_64_inplace(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lhs_array,
    const CudaRadixCiphertextFFI *rhs_array, CudaRadixCiphertextFFI *carry_out,
    const CudaRadixCiphertextFFI *carry_in, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t requested_flag, uint32_t uses_carry);
+    void *const *ksks, uint32_t requested_flag, uint32_t uses_carry);

 void cleanup_cuda_propagate_single_carry(CudaStreamsFFI streams,
                                         int8_t **mem_ptr_void);
@@ -400,9 +381,8 @@ void cuda_integer_overflowing_sub_kb_64_inplace(
    const CudaRadixCiphertextFFI *rhs_array,
    CudaRadixCiphertextFFI *overflow_block,
    const CudaRadixCiphertextFFI *input_borrow, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t compute_overflow, uint32_t uses_input_borrow);
+    void *const *bsks, void *const *ksks, uint32_t compute_overflow,
+    uint32_t uses_input_borrow);

 void cleanup_cuda_integer_overflowing_sub(CudaStreamsFFI streams,
                                          int8_t **mem_ptr_void);
@@ -420,8 +400,7 @@ uint64_t scratch_cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
 void cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *radix_lwe_out,
    CudaRadixCiphertextFFI *radix_lwe_vec, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
+    void *const *ksks);

 void cleanup_cuda_integer_radix_partial_sum_ciphertexts_vec(
    CudaStreamsFFI streams, int8_t **mem_ptr_void);
@@ -438,7 +417,6 @@ void cuda_scalar_multiplication_integer_radix_ciphertext_64_inplace(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array,
    uint64_t const *decomposed_scalar, uint64_t const *has_at_least_one_set,
    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
    uint32_t polynomial_size, uint32_t message_modulus, uint32_t num_scalars);

 void cleanup_cuda_integer_radix_scalar_mul(CudaStreamsFFI streams,
@@ -457,8 +435,7 @@ void cuda_integer_div_rem_radix_ciphertext_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *quotient,
    CudaRadixCiphertextFFI *remainder, CudaRadixCiphertextFFI const *numerator,
    CudaRadixCiphertextFFI const *divisor, bool is_signed, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
+    void *const *bsks, void *const *ksks);

 void cleanup_cuda_integer_div_rem(CudaStreamsFFI streams,
                                  int8_t **mem_ptr_void);
@@ -475,9 +452,7 @@ uint64_t scratch_cuda_integer_compute_prefix_sum_hillis_steele_64(
 void cuda_integer_compute_prefix_sum_hillis_steele_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *output_radix_lwe,
    CudaRadixCiphertextFFI *generates_or_propagates, int8_t *mem_ptr,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void *const *bsks, uint32_t num_blocks);
+    void *const *ksks, void *const *bsks, uint32_t num_blocks);

 void cleanup_cuda_integer_compute_prefix_sum_hillis_steele_64(
    CudaStreamsFFI streams, int8_t **mem_ptr_void);
@@ -496,8 +471,7 @@ uint64_t scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(

 void cuda_integer_abs_inplace_radix_ciphertext_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *ct, int8_t *mem_ptr,
-    bool is_signed, void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
+    bool is_signed, void *const *bsks, void *const *ksks);

 void cleanup_cuda_integer_abs_inplace(CudaStreamsFFI streams,
                                      int8_t **mem_ptr_void);
@@ -514,9 +488,7 @@ uint64_t scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
 void cuda_integer_are_all_comparisons_block_true_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_in, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks);
+    void *const *bsks, void *const *ksks, uint32_t num_radix_blocks);

 void cleanup_cuda_integer_are_all_comparisons_block_true(CudaStreamsFFI streams,
                                                         int8_t **mem_ptr_void);
@@ -533,9 +505,7 @@ uint64_t scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
 void cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_in, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks);
+    void *const *bsks, void *const *ksks, uint32_t num_radix_blocks);

 void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
    CudaStreamsFFI streams, int8_t **mem_ptr_void);
@@ -561,9 +531,7 @@ uint64_t scratch_cuda_apply_noise_squashing_kb(
 void cuda_apply_noise_squashing_kb(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *output_radix_lwe,
    CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void *const *bsks);
+    void *const *ksks, void *const *bsks);

 void cleanup_cuda_apply_noise_squashing_kb(CudaStreamsFFI streams,
                                           int8_t **mem_ptr_void);
@@ -581,9 +549,7 @@ void cuda_sub_and_propagate_single_carry_kb_64_inplace(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lhs_array,
    const CudaRadixCiphertextFFI *rhs_array, CudaRadixCiphertextFFI *carry_out,
    const CudaRadixCiphertextFFI *carry_in, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t requested_flag, uint32_t uses_carry);
+    void *const *ksks, uint32_t requested_flag, uint32_t uses_carry);

 void cleanup_cuda_sub_and_propagate_single_carry(CudaStreamsFFI streams,
                                                 int8_t **mem_ptr_void);
@@ -600,7 +566,6 @@ uint64_t scratch_cuda_integer_unsigned_scalar_div_radix_kb_64(
 void cuda_integer_unsigned_scalar_div_radix_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *numerator_ct,
    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
-    const CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_key,
    const CudaScalarDivisorFFI *scalar_divisor_ffi);

 void cleanup_cuda_integer_unsigned_scalar_div_radix_kb_64(
@@ -615,11 +580,12 @@ uint64_t scratch_cuda_extend_radix_with_sign_msb_64(
    uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
    PBS_MS_REDUCTION_T noise_reduction_type);

-void cuda_extend_radix_with_sign_msb_64(
-    CudaStreamsFFI streams, CudaRadixCiphertextFFI *output,
-    CudaRadixCiphertextFFI const *input, int8_t *mem_ptr,
-    uint32_t num_additional_blocks, void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
+void cuda_extend_radix_with_sign_msb_64(CudaStreamsFFI streams,
+                                        CudaRadixCiphertextFFI *output,
+                                        CudaRadixCiphertextFFI const *input,
+                                        int8_t *mem_ptr,
+                                        uint32_t num_additional_blocks,
+                                        void *const *bsks, void *const *ksks);

 void cleanup_cuda_extend_radix_with_sign_msb_64(CudaStreamsFFI streams,
                                                int8_t **mem_ptr_void);
@@ -636,7 +602,6 @@ uint64_t scratch_cuda_integer_signed_scalar_div_radix_kb_64(
 void cuda_integer_signed_scalar_div_radix_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *numerator_ct,
    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
-    const CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_key,
    const CudaScalarDivisorFFI *scalar_divisor_ffi, uint32_t numerator_bits);

 void cleanup_cuda_integer_signed_scalar_div_radix_kb_64(CudaStreamsFFI streams,
@@ -655,9 +620,7 @@ uint64_t scratch_integer_unsigned_scalar_div_rem_radix_kb_64(
 void cuda_integer_unsigned_scalar_div_rem_radix_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *quotient_ct,
    CudaRadixCiphertextFFI *remainder_ct, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks,
-    const CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_key,
-    const CudaScalarDivisorFFI *scalar_divisor_ffi,
+    void *const *ksks, const CudaScalarDivisorFFI *scalar_divisor_ffi,
    uint64_t const *divisor_has_at_least_one_set,
    uint64_t const *decomposed_divisor, uint32_t const num_scalars_divisor,
    void const *clear_blocks, void const *h_clear_blocks,
@@ -679,9 +642,7 @@ uint64_t scratch_integer_signed_scalar_div_rem_radix_kb_64(
 void cuda_integer_signed_scalar_div_rem_radix_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *quotient_ct,
    CudaRadixCiphertextFFI *remainder_ct, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    const CudaScalarDivisorFFI *scalar_divisor_ffi,
+    void *const *ksks, const CudaScalarDivisorFFI *scalar_divisor_ffi,
    uint64_t const *divisor_has_at_least_one_set,
    uint64_t const *decomposed_divisor, uint32_t const num_scalars_divisor,
    uint32_t numerator_bits);
@@ -701,8 +662,7 @@ uint64_t scratch_integer_count_of_consecutive_bits_kb_64(
 void cuda_integer_count_of_consecutive_bits_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *output_ct,
    CudaRadixCiphertextFFI const *input_ct, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks,
-    const CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_key);
+    void *const *ksks);

 void cleanup_cuda_integer_count_of_consecutive_bits_kb_64(
    CudaStreamsFFI streams, int8_t **mem_ptr_void);
@@ -716,11 +676,11 @@ uint64_t scratch_cuda_integer_grouped_oprf_64(
    bool allocate_gpu_memory, uint32_t message_bits_per_block,
    uint32_t total_random_bits, PBS_MS_REDUCTION_T noise_reduction_type);

-void cuda_integer_grouped_oprf_async_64(
-    CudaStreamsFFI streams, CudaRadixCiphertextFFI *radix_lwe_out,
-    const void *seeded_lwe_input, uint32_t num_blocks_to_process, int8_t *mem,
-    void *const *bsks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
+void cuda_integer_grouped_oprf_async_64(CudaStreamsFFI streams,
+                                        CudaRadixCiphertextFFI *radix_lwe_out,
+                                        const void *seeded_lwe_input,
+                                        uint32_t num_blocks_to_process,
+                                        int8_t *mem, void *const *bsks);

 void cleanup_cuda_integer_grouped_oprf_64(CudaStreamsFFI streams,
                                          int8_t **mem_ptr_void);
@@ -740,8 +700,7 @@ void cuda_integer_ilog2_kb_64(
    CudaRadixCiphertextFFI const *trivial_ct_neg_n,
    CudaRadixCiphertextFFI const *trivial_ct_2,
    CudaRadixCiphertextFFI const *trivial_ct_m_minus_1_block, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks,
-    const CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_key);
+    void *const *bsks, void *const *ksks);

 void cleanup_cuda_integer_ilog2_kb_64(CudaStreamsFFI streams,
                                      int8_t **mem_ptr_void);
--- a/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_enums.h
+++ b/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_enums.h
@@ -3,16 +3,6 @@
 #include <stdint.h>
 enum PBS_TYPE { MULTI_BIT = 0, CLASSICAL = 1 };
 enum PBS_VARIANT { DEFAULT = 0, CG = 1, TBC = 2 };
-enum PBS_MS_REDUCTION_T { NO_REDUCTION = 0, DRIFT = 1, CENTERED = 2 };
-
-extern "C" {
-typedef struct {
-  void *const *ptr;
-  uint32_t num_zeros;
-  double ms_bound;
-  double ms_r_sigma;
-  double ms_input_variance;
-} CudaModulusSwitchNoiseReductionKeyFFI;
-}
+enum PBS_MS_REDUCTION_T { NO_REDUCTION = 0, CENTERED = 1 };

 #endif // CUDA_PBS_ENUMS_H
--- a/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_utilities.h
@@ -80,7 +80,6 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {

  Torus *global_accumulator;
  double2 *global_join_buffer;
-  Torus *temp_lwe_array_in;

  PBS_VARIANT pbs_variant;
  PBS_MS_REDUCTION_T noise_reduction_type;
@@ -97,10 +96,6 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
    this->pbs_variant = pbs_variant;

    auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);
-    this->temp_lwe_array_in = (Torus *)cuda_malloc_with_size_tracking_async(
-        (lwe_dimension + 1) * input_lwe_ciphertext_count * sizeof(Torus),
-        stream, gpu_index, size_tracker,
-        noise_reduction_type == PBS_MS_REDUCTION_T::DRIFT);
    switch (pbs_variant) {
    case PBS_VARIANT::DEFAULT: {
      uint64_t full_sm_step_one =
@@ -234,10 +229,6 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
    if (pbs_variant == DEFAULT)
      cuda_drop_with_size_tracking_async(global_accumulator, stream, gpu_index,
                                         gpu_memory_allocated);
-
-    if (noise_reduction_type == PBS_MS_REDUCTION_T::DRIFT)
-      cuda_drop_with_size_tracking_async(temp_lwe_array_in, stream, gpu_index,
-                                         gpu_memory_allocated);
  }
 };

@@ -249,8 +240,6 @@ struct pbs_buffer_128<InputTorus, PBS_TYPE::CLASSICAL> {

  __uint128_t *global_accumulator;
  double *global_join_buffer;
-  InputTorus *temp_lwe_array_in;
-  uint64_t *trivial_indexes;

  PBS_VARIANT pbs_variant;
  PBS_MS_REDUCTION_T noise_reduction_type;
@@ -268,27 +257,6 @@ struct pbs_buffer_128<InputTorus, PBS_TYPE::CLASSICAL> {
    cuda_set_device(gpu_index);
    this->pbs_variant = pbs_variant;

-    if (noise_reduction_type == PBS_MS_REDUCTION_T::DRIFT) {
-      this->temp_lwe_array_in =
-          (InputTorus *)cuda_malloc_with_size_tracking_async(
-              (lwe_dimension + 1) * input_lwe_ciphertext_count *
-                  sizeof(InputTorus),
-              stream, gpu_index, size_tracker, allocate_gpu_memory);
-      this->trivial_indexes = (uint64_t *)cuda_malloc_with_size_tracking_async(
-          input_lwe_ciphertext_count * sizeof(uint64_t), stream, gpu_index,
-          size_tracker, allocate_gpu_memory);
-      uint64_t *h_trivial_indexes = new uint64_t[input_lwe_ciphertext_count];
-      for (uint32_t i = 0; i < input_lwe_ciphertext_count; i++)
-        h_trivial_indexes[i] = i;
-
-      cuda_memcpy_with_size_tracking_async_to_gpu(
-          trivial_indexes, h_trivial_indexes,
-          input_lwe_ciphertext_count * sizeof(uint64_t), stream, gpu_index,
-          allocate_gpu_memory);
-
-      cuda_synchronize_stream(stream, gpu_index);
-      delete[] h_trivial_indexes;
-    }
    auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);
    size_t global_join_buffer_size = (glwe_dimension + 1) * level_count *
                                     input_lwe_ciphertext_count *
@@ -424,13 +392,6 @@ struct pbs_buffer_128<InputTorus, PBS_TYPE::CLASSICAL> {
    if (pbs_variant == DEFAULT)
      cuda_drop_with_size_tracking_async(global_accumulator, stream, gpu_index,
                                         gpu_memory_allocated);
-
-    if (noise_reduction_type == PBS_MS_REDUCTION_T::DRIFT) {
-      cuda_drop_with_size_tracking_async(temp_lwe_array_in, stream, gpu_index,
-                                         gpu_memory_allocated);
-      cuda_drop_with_size_tracking_async(trivial_indexes, stream, gpu_index,
-                                         gpu_memory_allocated);
-    }
  }
 };

--- a/backends/tfhe-cuda-backend/cuda/include/pbs/programmable_bootstrap.h
+++ b/backends/tfhe-cuda-backend/cuda/include/pbs/programmable_bootstrap.h
@@ -89,18 +89,14 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
    void const *lwe_output_indexes, void const *lut_vector,
    void const *lut_vector_indexes, void const *lwe_array_in,
    void const *lwe_input_indexes, void const *bootstrapping_key,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void *ms_noise_reduction_ptr, int8_t *buffer, uint32_t lwe_dimension,
-    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
-    uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
-    uint32_t lut_stride);
+    int8_t *buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
+    uint32_t num_samples, uint32_t num_many_lut, uint32_t lut_stride);

 void cuda_programmable_bootstrap_lwe_ciphertext_vector_128(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lut_vector, void const *lwe_array_in,
-    void const *bootstrapping_key,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void const *ms_noise_reduction_ptr, int8_t *buffer, uint32_t lwe_dimension,
+    void const *bootstrapping_key, int8_t *buffer, uint32_t lwe_dimension,
    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
    uint32_t level_count, uint32_t num_samples);

--- a/backends/tfhe-cuda-backend/cuda/include/zk/zk.h
+++ b/backends/tfhe-cuda-backend/cuda/include/zk/zk.h
@@ -22,8 +22,7 @@ uint64_t scratch_cuda_expand_without_verification_64(
 void cuda_expand_without_verification_64(
    CudaStreamsFFI streams, void *lwe_array_out,
    const void *lwe_flattened_compact_array_in, int8_t *mem_ptr,
-    void *const *bsks, void *const *computing_ksks, void *const *casting_keys,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
+    void *const *bsks, void *const *computing_ksks, void *const *casting_keys);

 void cleanup_expand_without_verification_64(CudaStreamsFFI streams,
                                            int8_t **mem_ptr_void);
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cu
@@ -103,23 +103,6 @@ void cuda_centered_modulus_switch_64(void *stream, uint32_t gpu_index,
      lwe_dimension, log_modulus);
 }

-// This end point is used only for testing purposes
-// its output always follows trivial ordering
-void cuda_improve_noise_modulus_switch_64(
-    void *stream, uint32_t gpu_index, void *lwe_array_out,
-    void const *lwe_array_in, void const *lwe_array_indexes,
-    void const *encrypted_zeros, uint32_t lwe_size, uint32_t num_lwes,
-    uint32_t num_zeros, double input_variance, double r_sigma, double bound,
-    uint32_t log_modulus) {
-  host_drift_modulus_switch<uint64_t>(
-      static_cast<cudaStream_t>(stream), gpu_index,
-      static_cast<uint64_t *>(lwe_array_out),
-      static_cast<uint64_t const *>(lwe_array_in),
-      static_cast<uint64_t const *>(lwe_array_indexes),
-      static_cast<const uint64_t *>(encrypted_zeros), lwe_size, num_lwes,
-      num_zeros, input_variance, r_sigma, bound, log_modulus);
-}
-
 void cuda_glwe_sample_extract_128(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *glwe_array_in, uint32_t const *nth_array, uint32_t num_nths,
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/torus.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/torus.cuh
@@ -426,31 +426,4 @@ __global__ void __launch_bounds__(512)
  }
 }

-template <typename Torus>
-__host__ void host_drift_modulus_switch(
-    cudaStream_t stream, uint32_t gpu_index, Torus *array_out,
-    Torus const *array_in, uint64_t const *indexes, const Torus *zeros,
-    uint32_t lwe_size, uint32_t num_lwes, const uint32_t num_zeros,
-    const double input_variance, const double r_sigma, const double bound,
-    uint32_t log_modulus) {
-
-  PANIC_IF_FALSE(lwe_size >= 512,
-                 "The lwe_size (%d) is less than 512, this is not supported\n",
-                 lwe_size);
-  PANIC_IF_FALSE(
-      lwe_size <= 1024,
-      "The lwe_size (%d) is greater than 1024, this is not supported\n",
-      lwe_size);
-
-  cuda_set_device(gpu_index);
-
-  // This reduction requires a power of two num of threads
-  int num_threads = 512, num_blocks = num_lwes;
-
-  improve_noise_modulus_switch<Torus><<<num_blocks, num_threads, 0, stream>>>(
-      array_out, array_in, indexes, zeros, lwe_size, num_zeros, input_variance,
-      r_sigma, bound, log_modulus);
-  check_cuda_error(cudaGetLastError());
-}
-
 #endif // CNCRT_TORUS_H
--- a/backends/tfhe-cuda-backend/cuda/src/integer/abs.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/abs.cu
@@ -21,14 +21,12 @@ uint64_t scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(

 void cuda_integer_abs_inplace_radix_ciphertext_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *ct, int8_t *mem_ptr,
-    bool is_signed, void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    bool is_signed, void *const *bsks, void *const *ksks) {

  auto mem = (int_abs_buffer<uint64_t> *)mem_ptr;

  host_integer_abs_kb<uint64_t>(CudaStreams(streams), ct, bsks,
-                                (uint64_t **)(ksks), ms_noise_reduction_key,
-                                mem, is_signed);
+                                (uint64_t **)(ksks), mem, is_signed);
 }

 void cleanup_cuda_integer_abs_inplace(CudaStreamsFFI streams,
--- a/backends/tfhe-cuda-backend/cuda/src/integer/abs.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/abs.cuh
@@ -30,11 +30,10 @@ __host__ uint64_t scratch_cuda_integer_abs_kb(
 }

 template <typename Torus>
-__host__ void host_integer_abs_kb(
-    CudaStreams streams, CudaRadixCiphertextFFI *ct, void *const *bsks,
-    uint64_t *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    int_abs_buffer<uint64_t> *mem_ptr, bool is_signed) {
+__host__ void
+host_integer_abs_kb(CudaStreams streams, CudaRadixCiphertextFFI *ct,
+                    void *const *bsks, uint64_t *const *ksks,
+                    int_abs_buffer<uint64_t> *mem_ptr, bool is_signed) {
  if (!is_signed)
    return;

@@ -49,19 +48,19 @@ __host__ void host_integer_abs_kb(

  host_integer_radix_arithmetic_scalar_shift_kb_inplace<Torus>(
      streams, mask, num_bits_in_ciphertext - 1,
-      mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks, ms_noise_reduction_key);
+      mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks);
  host_addition<Torus>(streams.stream(0), streams.gpu_index(0), ct, mask, ct,
                       ct->num_radix_blocks, mem_ptr->params.message_modulus,
                       mem_ptr->params.carry_modulus);

  uint32_t requested_flag = outputFlag::FLAG_NONE;
  uint32_t uses_carry = 0;
-  host_propagate_single_carry<Torus>(
-      streams, ct, nullptr, nullptr, mem_ptr->scp_mem, bsks, ksks,
-      ms_noise_reduction_key, requested_flag, uses_carry);
+  host_propagate_single_carry<Torus>(streams, ct, nullptr, nullptr,
+                                     mem_ptr->scp_mem, bsks, ksks,
+                                     requested_flag, uses_carry);

  host_integer_radix_bitop_kb<Torus>(streams, ct, mask, ct, mem_ptr->bitxor_mem,
-                                     bsks, ksks, ms_noise_reduction_key);
+                                     bsks, ksks);
 }

 #endif // TFHE_RS_ABS_CUH
--- a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cu
@@ -23,13 +23,11 @@ void cuda_bitop_integer_radix_ciphertext_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_1,
    CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    void *const *bsks, void *const *ksks) {

  host_integer_radix_bitop_kb<uint64_t>(
      CudaStreams(streams), lwe_array_out, lwe_array_1, lwe_array_2,
-      (int_bitop_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
-      ms_noise_reduction_key);
+      (int_bitop_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks));
 }

 void cleanup_cuda_integer_bitop(CudaStreamsFFI streams, int8_t **mem_ptr_void) {
--- a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh
@@ -16,8 +16,7 @@ __host__ void host_integer_radix_bitop_kb(
    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_1,
    CudaRadixCiphertextFFI const *lwe_array_2, int_bitop_buffer<Torus> *mem_ptr,
-    void *const *bsks, Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    void *const *bsks, Torus *const *ksks) {

  PANIC_IF_FALSE(
      lwe_array_out->num_radix_blocks == lwe_array_1->num_radix_blocks &&
@@ -45,9 +44,8 @@ __host__ void host_integer_radix_bitop_kb(
  }

  integer_radix_apply_bivariate_lookup_table_kb<Torus>(
-      streams, lwe_array_out, lwe_array_1, lwe_array_2, bsks, ksks,
-      ms_noise_reduction_key, lut, lwe_array_out->num_radix_blocks,
-      lut->params.message_modulus);
+      streams, lwe_array_out, lwe_array_1, lwe_array_2, bsks, ksks, lut,
+      lwe_array_out->num_radix_blocks, lut->params.message_modulus);

  memcpy(lwe_array_out->degrees, degrees,
         lwe_array_out->num_radix_blocks * sizeof(uint64_t));
--- a/backends/tfhe-cuda-backend/cuda/src/integer/cast.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/cast.cu
@@ -35,16 +35,17 @@ uint64_t scratch_cuda_extend_radix_with_sign_msb_64(
      num_blocks, num_additional_blocks, allocate_gpu_memory);
 }

-void cuda_extend_radix_with_sign_msb_64(
-    CudaStreamsFFI streams, CudaRadixCiphertextFFI *output,
-    CudaRadixCiphertextFFI const *input, int8_t *mem_ptr,
-    uint32_t num_additional_blocks, void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+void cuda_extend_radix_with_sign_msb_64(CudaStreamsFFI streams,
+                                        CudaRadixCiphertextFFI *output,
+                                        CudaRadixCiphertextFFI const *input,
+                                        int8_t *mem_ptr,
+                                        uint32_t num_additional_blocks,
+                                        void *const *bsks, void *const *ksks) {
  PUSH_RANGE("cast")
  host_extend_radix_with_sign_msb<uint64_t>(
      CudaStreams(streams), output, input,
      (int_extend_radix_with_sign_msb_buffer<uint64_t> *)mem_ptr,
-      num_additional_blocks, bsks, (uint64_t **)ksks, ms_noise_reduction_key);
+      num_additional_blocks, bsks, (uint64_t **)ksks);
  POP_RANGE()
 }

--- a/backends/tfhe-cuda-backend/cuda/src/integer/cast.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/cast.cuh
@@ -55,8 +55,7 @@ __host__ void host_extend_radix_with_sign_msb(
    CudaStreams streams, CudaRadixCiphertextFFI *output,
    CudaRadixCiphertextFFI const *input,
    int_extend_radix_with_sign_msb_buffer<Torus> *mem_ptr,
-    uint32_t num_additional_blocks, void *const *bsks, Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    uint32_t num_additional_blocks, void *const *bsks, Torus *const *ksks) {

  if (num_additional_blocks == 0) {
    PUSH_RANGE("cast/extend no addblocks")
@@ -79,8 +78,7 @@ __host__ void host_extend_radix_with_sign_msb(
      input_blocks - 1, input_blocks);

  host_apply_univariate_lut_kb(streams, mem_ptr->padding_block,
-                               mem_ptr->last_block, mem_ptr->lut, ksks,
-                               ms_noise_reduction_key, bsks);
+                               mem_ptr->last_block, mem_ptr->lut, ksks, bsks);

  for (uint32_t i = 0; i < num_additional_blocks; ++i) {
    uint32_t dst_block_idx = input_blocks + i;
--- a/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cu
@@ -29,13 +29,12 @@ void cuda_cmux_integer_radix_ciphertext_kb_64(
    CudaRadixCiphertextFFI const *lwe_condition,
    CudaRadixCiphertextFFI const *lwe_array_true,
    CudaRadixCiphertextFFI const *lwe_array_false, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    void *const *bsks, void *const *ksks) {
  PUSH_RANGE("cmux")
  host_integer_radix_cmux_kb<uint64_t>(
      CudaStreams(streams), lwe_array_out, lwe_condition, lwe_array_true,
      lwe_array_false, (int_cmux_buffer<uint64_t> *)mem_ptr, bsks,
-      (uint64_t **)(ksks), ms_noise_reduction_key);
+      (uint64_t **)(ksks));
  POP_RANGE()
 }

--- a/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cuh
@@ -5,15 +5,13 @@
 #include "radix_ciphertext.cuh"

 template <typename Torus>
-__host__ void
-zero_out_if(CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
-            CudaRadixCiphertextFFI const *lwe_array_input,
-            CudaRadixCiphertextFFI const *lwe_condition,
-            int_zero_out_if_buffer<Torus> *mem_ptr,
-            int_radix_lut<Torus> *predicate, void *const *bsks,
-            Torus *const *ksks,
-            CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-            uint32_t num_radix_blocks) {
+__host__ void zero_out_if(CudaStreams streams,
+                          CudaRadixCiphertextFFI *lwe_array_out,
+                          CudaRadixCiphertextFFI const *lwe_array_input,
+                          CudaRadixCiphertextFFI const *lwe_condition,
+                          int_zero_out_if_buffer<Torus> *mem_ptr,
+                          int_radix_lut<Torus> *predicate, void *const *bsks,
+                          Torus *const *ksks, uint32_t num_radix_blocks) {
  PANIC_IF_FALSE(
      lwe_array_out->num_radix_blocks >= num_radix_blocks &&
          lwe_array_input->num_radix_blocks >= num_radix_blocks,
@@ -38,8 +36,8 @@ zero_out_if(CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
      num_radix_blocks);

  integer_radix_apply_univariate_lookup_table_kb<Torus>(
-      streams, lwe_array_out, tmp_lwe_array_input, bsks, ksks,
-      ms_noise_reduction_key, predicate, num_radix_blocks);
+      streams, lwe_array_out, tmp_lwe_array_input, bsks, ksks, predicate,
+      num_radix_blocks);
 }

 template <typename Torus>
@@ -48,8 +46,7 @@ __host__ void host_integer_radix_cmux_kb(
    CudaRadixCiphertextFFI const *lwe_condition,
    CudaRadixCiphertextFFI const *lwe_array_true,
    CudaRadixCiphertextFFI const *lwe_array_false,
-    int_cmux_buffer<Torus> *mem_ptr, void *const *bsks, Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    int_cmux_buffer<Torus> *mem_ptr, void *const *bsks, Torus *const *ksks) {

  if (lwe_array_out->num_radix_blocks != lwe_array_true->num_radix_blocks)
    PANIC("Cuda error: input and output num radix blocks must be the same")
@@ -73,8 +70,8 @@ __host__ void host_integer_radix_cmux_kb(
  }
  integer_radix_apply_bivariate_lookup_table_kb<Torus>(
      streams, mem_ptr->buffer_out, mem_ptr->buffer_in,
-      mem_ptr->condition_array, bsks, ksks, ms_noise_reduction_key,
-      mem_ptr->predicate_lut, 2 * num_radix_blocks, params.message_modulus);
+      mem_ptr->condition_array, bsks, ksks, mem_ptr->predicate_lut,
+      2 * num_radix_blocks, params.message_modulus);

  // If the condition was true, true_ct will have kept its value and false_ct
  // will be 0 If the condition was false, true_ct will be 0 and false_ct will
@@ -91,7 +88,7 @@ __host__ void host_integer_radix_cmux_kb(
                       params.message_modulus, params.carry_modulus);

  integer_radix_apply_univariate_lookup_table_kb<Torus>(
-      streams, lwe_array_out, &mem_true, bsks, ksks, ms_noise_reduction_key,
+      streams, lwe_array_out, &mem_true, bsks, ksks,
      mem_ptr->message_extract_lut, num_radix_blocks);
 }

--- a/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cu
@@ -41,8 +41,7 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_1,
    CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    void *const *bsks, void *const *ksks) {
  PUSH_RANGE("comparison")
  if (lwe_array_1->num_radix_blocks != lwe_array_2->num_radix_blocks)
    PANIC("Cuda error: input num radix blocks must be the same")
@@ -57,7 +56,7 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
  case NE:
    host_integer_radix_equality_check_kb<uint64_t>(
        CudaStreams(streams), lwe_array_out, lwe_array_1, lwe_array_2, buffer,
-        bsks, (uint64_t **)(ksks), ms_noise_reduction_key, num_radix_blocks);
+        bsks, (uint64_t **)(ksks), num_radix_blocks);
    break;
  case GT:
  case GE:
@@ -69,7 +68,7 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
    host_integer_radix_difference_check_kb<uint64_t>(
        CudaStreams(streams), lwe_array_out, lwe_array_1, lwe_array_2, buffer,
        buffer->diff_buffer->operator_f, bsks, (uint64_t **)(ksks),
-        ms_noise_reduction_key, num_radix_blocks);
+        num_radix_blocks);
    break;
  case MAX:
  case MIN:
@@ -77,7 +76,7 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
      PANIC("Cuda error (max/min): the number of radix blocks has to be even.")
    host_integer_radix_maxmin_kb<uint64_t>(
        CudaStreams(streams), lwe_array_out, lwe_array_1, lwe_array_2, buffer,
-        bsks, (uint64_t **)(ksks), ms_noise_reduction_key, num_radix_blocks);
+        bsks, (uint64_t **)(ksks), num_radix_blocks);
    break;
  default:
    PANIC("Cuda error: integer operation not supported")
@@ -118,16 +117,14 @@ uint64_t scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
 void cuda_integer_are_all_comparisons_block_true_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_in, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks) {
+    void *const *bsks, void *const *ksks, uint32_t num_radix_blocks) {

  int_comparison_buffer<uint64_t> *buffer =
      (int_comparison_buffer<uint64_t> *)mem_ptr;

  host_integer_are_all_comparisons_block_true_kb<uint64_t>(
      CudaStreams(streams), lwe_array_out, lwe_array_in, buffer, bsks,
-      (uint64_t **)(ksks), ms_noise_reduction_key, num_radix_blocks);
+      (uint64_t **)(ksks), num_radix_blocks);
 }

 void cleanup_cuda_integer_are_all_comparisons_block_true(
@@ -162,16 +159,14 @@ uint64_t scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
 void cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_in, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks) {
+    void *const *bsks, void *const *ksks, uint32_t num_radix_blocks) {

  int_comparison_buffer<uint64_t> *buffer =
      (int_comparison_buffer<uint64_t> *)mem_ptr;

  host_integer_is_at_least_one_comparisons_block_true_kb<uint64_t>(
      CudaStreams(streams), lwe_array_out, lwe_array_in, buffer, bsks,
-      (uint64_t **)(ksks), ms_noise_reduction_key, num_radix_blocks);
+      (uint64_t **)(ksks), num_radix_blocks);
 }

 void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
--- a/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cuh
@@ -61,9 +61,7 @@ __host__ void are_all_comparisons_block_true(
    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_in,
    int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks) {
+    Torus *const *ksks, uint32_t num_radix_blocks) {

  if (lwe_array_out->lwe_dimension != lwe_array_in->lwe_dimension)
    PANIC("Cuda error: input and output lwe dimensions must be the same")
@@ -158,8 +156,7 @@ __host__ void are_all_comparisons_block_true(
    if (remaining_blocks == 1) {
      // In the last iteration we copy the output to the final address
      integer_radix_apply_univariate_lookup_table_kb<Torus>(
-          streams, lwe_array_out, accumulator, bsks, ksks,
-          ms_noise_reduction_key, lut, 1);
+          streams, lwe_array_out, accumulator, bsks, ksks, lut, 1);
      // Reset max_value_lut_indexes before returning, otherwise if the lut is
      // reused the lut indexes will be wrong
      memset(is_max_value_lut->h_lut_indexes, 0,
@@ -176,8 +173,7 @@ __host__ void are_all_comparisons_block_true(
      return;
    } else {
      integer_radix_apply_univariate_lookup_table_kb<Torus>(
-          streams, tmp_out, accumulator, bsks, ksks, ms_noise_reduction_key,
-          lut, num_chunks);
+          streams, tmp_out, accumulator, bsks, ksks, lut, num_chunks);
    }
  }
 }
@@ -193,9 +189,7 @@ __host__ void is_at_least_one_comparisons_block_true(
    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_in,
    int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks) {
+    Torus *const *ksks, uint32_t num_radix_blocks) {

  if (lwe_array_out->lwe_dimension != lwe_array_in->lwe_dimension)
    PANIC("Cuda error: input lwe dimensions must be the same")
@@ -249,12 +243,12 @@ __host__ void is_at_least_one_comparisons_block_true(
      // In the last iteration we copy the output to the final address
      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, lwe_array_out, buffer->tmp_block_accumulated, bsks, ksks,
-          ms_noise_reduction_key, lut, 1);
+          lut, 1);
      return;
    } else {
      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, mem_ptr->tmp_lwe_array_out, buffer->tmp_block_accumulated,
-          bsks, ksks, ms_noise_reduction_key, lut, num_chunks);
+          bsks, ksks, lut, num_chunks);
    }
  }
 }
@@ -264,9 +258,8 @@ __host__ void host_compare_blocks_with_zero(
    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_in,
    int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    int32_t num_radix_blocks, int_radix_lut<Torus> *zero_comparison) {
+    Torus *const *ksks, int32_t num_radix_blocks,
+    int_radix_lut<Torus> *zero_comparison) {

  if (num_radix_blocks == 0)
    return;
@@ -322,8 +315,7 @@ __host__ void host_compare_blocks_with_zero(
  }

  integer_radix_apply_univariate_lookup_table_kb<Torus>(
-      streams, lwe_array_out, sum, bsks, ksks, ms_noise_reduction_key,
-      zero_comparison, num_sum_blocks);
+      streams, lwe_array_out, sum, bsks, ksks, zero_comparison, num_sum_blocks);

  reset_radix_ciphertext_blocks(lwe_array_out, num_sum_blocks);
 }
@@ -334,9 +326,7 @@ __host__ void host_integer_radix_equality_check_kb(
    CudaRadixCiphertextFFI const *lwe_array_1,
    CudaRadixCiphertextFFI const *lwe_array_2,
    int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks) {
+    Torus *const *ksks, uint32_t num_radix_blocks) {

  if (lwe_array_out->lwe_dimension != lwe_array_1->lwe_dimension ||
      lwe_array_out->lwe_dimension != lwe_array_2->lwe_dimension)
@@ -347,16 +337,15 @@ __host__ void host_integer_radix_equality_check_kb(
  auto comparisons = mem_ptr->tmp_block_comparisons;
  integer_radix_apply_bivariate_lookup_table_kb<Torus>(
      streams, comparisons, lwe_array_1, lwe_array_2, bsks, ksks,
-      ms_noise_reduction_key, eq_buffer->operator_lut, num_radix_blocks,
+      eq_buffer->operator_lut, num_radix_blocks,
      eq_buffer->operator_lut->params.message_modulus);

  // This takes a Vec of blocks, where each block is either 0 or 1.
  //
  // It returns a block encrypting 1 if all input blocks are 1
  // otherwise the block encrypts 0
-  are_all_comparisons_block_true<Torus>(
-      streams, lwe_array_out, comparisons, mem_ptr, bsks, ksks,
-      ms_noise_reduction_key, num_radix_blocks);
+  are_all_comparisons_block_true<Torus>(streams, lwe_array_out, comparisons,
+                                        mem_ptr, bsks, ksks, num_radix_blocks);
 }

 template <typename Torus>
@@ -365,9 +354,7 @@ __host__ void compare_radix_blocks_kb(
    CudaRadixCiphertextFFI const *lwe_array_left,
    CudaRadixCiphertextFFI const *lwe_array_right,
    int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks) {
+    Torus *const *ksks, uint32_t num_radix_blocks) {

  if (lwe_array_out->lwe_dimension != lwe_array_left->lwe_dimension ||
      lwe_array_out->lwe_dimension != lwe_array_right->lwe_dimension)
@@ -400,8 +387,8 @@ __host__ void compare_radix_blocks_kb(
  // Apply LUT to compare to 0
  auto is_non_zero_lut = mem_ptr->eq_buffer->is_non_zero_lut;
  integer_radix_apply_univariate_lookup_table_kb<Torus>(
-      streams, lwe_array_out, lwe_array_out, bsks, ksks, ms_noise_reduction_key,
-      is_non_zero_lut, num_radix_blocks);
+      streams, lwe_array_out, lwe_array_out, bsks, ksks, is_non_zero_lut,
+      num_radix_blocks);

  // Add one
  // Here Lhs can have the following values: (-1) % (message modulus * carry
@@ -414,14 +401,13 @@ __host__ void compare_radix_blocks_kb(
 // (inferior, equal, superior) to one single shortint block containing the
 // final sign
 template <typename Torus>
-__host__ void tree_sign_reduction(
-    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
-    CudaRadixCiphertextFFI *lwe_block_comparisons,
-    int_tree_sign_reduction_buffer<Torus> *tree_buffer,
-    std::function<Torus(Torus)> sign_handler_f, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks) {
+__host__ void
+tree_sign_reduction(CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
+                    CudaRadixCiphertextFFI *lwe_block_comparisons,
+                    int_tree_sign_reduction_buffer<Torus> *tree_buffer,
+                    std::function<Torus(Torus)> sign_handler_f,
+                    void *const *bsks, Torus *const *ksks,
+                    uint32_t num_radix_blocks) {

  if (lwe_array_out->lwe_dimension != lwe_block_comparisons->lwe_dimension)
    PANIC("Cuda error: input lwe dimensions must be the same")
@@ -454,8 +440,7 @@ __host__ void tree_sign_reduction(
                       partial_block_count, message_modulus);

    integer_radix_apply_univariate_lookup_table_kb<Torus>(
-        streams, x, y, bsks, ksks, ms_noise_reduction_key, inner_tree_leaf,
-        partial_block_count >> 1);
+        streams, x, y, bsks, ksks, inner_tree_leaf, partial_block_count >> 1);

    if ((partial_block_count % 2) != 0) {
      partial_block_count >>= 1;
@@ -501,8 +486,7 @@ __host__ void tree_sign_reduction(

  // Last leaf
  integer_radix_apply_univariate_lookup_table_kb<Torus>(
-      streams, lwe_array_out, y, bsks, ksks, ms_noise_reduction_key, last_lut,
-      1);
+      streams, lwe_array_out, y, bsks, ksks, last_lut, 1);
 }

 template <typename Torus>
@@ -512,9 +496,7 @@ __host__ void host_integer_radix_difference_check_kb(
    CudaRadixCiphertextFFI const *lwe_array_right,
    int_comparison_buffer<Torus> *mem_ptr,
    std::function<Torus(Torus)> reduction_lut_f, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks) {
+    Torus *const *ksks, uint32_t num_radix_blocks) {

  if (lwe_array_out->lwe_dimension != lwe_array_left->lwe_dimension ||
      lwe_array_out->lwe_dimension != lwe_array_right->lwe_dimension)
@@ -554,7 +536,7 @@ __host__ void host_integer_radix_difference_check_kb(
    auto identity_lut = mem_ptr->identity_lut;
    integer_radix_apply_univariate_lookup_table_kb<Torus>(
        streams, diff_buffer->tmp_packed, diff_buffer->tmp_packed, bsks, ksks,
-        ms_noise_reduction_key, identity_lut, 2 * packed_num_radix_blocks);
+        identity_lut, 2 * packed_num_radix_blocks);
  } else {
    as_radix_ciphertext_slice<Torus>(&lhs, lwe_array_left, 0,
                                     lwe_array_left->num_radix_blocks);
@@ -572,16 +554,14 @@ __host__ void host_integer_radix_difference_check_kb(
    // Compare packed blocks, or simply the total number of radix blocks in the
    // inputs
    compare_radix_blocks_kb<Torus>(streams, comparisons, &lhs, &rhs, mem_ptr,
-                                   bsks, ksks, ms_noise_reduction_key,
-                                   packed_num_radix_blocks);
+                                   bsks, ksks, packed_num_radix_blocks);
    num_comparisons = packed_num_radix_blocks;
  } else {
    // Packing is possible
    if (carry_modulus >= message_modulus) {
      // Compare (num_radix_blocks - 2) / 2 packed blocks
      compare_radix_blocks_kb<Torus>(streams, comparisons, &lhs, &rhs, mem_ptr,
-                                     bsks, ksks, ms_noise_reduction_key,
-                                     packed_num_radix_blocks);
+                                     bsks, ksks, packed_num_radix_blocks);

      // Compare the last block before the sign block separately
      auto identity_lut = mem_ptr->identity_lut;
@@ -595,7 +575,7 @@ __host__ void host_integer_radix_difference_check_kb(
                                       num_radix_blocks - 1);
      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, &last_left_block_before_sign_block, &shifted_lwe_array_left,
-          bsks, ksks, ms_noise_reduction_key, identity_lut, 1);
+          bsks, ksks, identity_lut, 1);

      CudaRadixCiphertextFFI last_right_block_before_sign_block;
      as_radix_ciphertext_slice<Torus>(
@@ -608,8 +588,7 @@ __host__ void host_integer_radix_difference_check_kb(
                                       num_radix_blocks - 1);
      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, &last_right_block_before_sign_block,
-          &shifted_lwe_array_right, bsks, ksks, ms_noise_reduction_key,
-          identity_lut, 1);
+          &shifted_lwe_array_right, bsks, ksks, identity_lut, 1);

      CudaRadixCiphertextFFI shifted_comparisons;
      as_radix_ciphertext_slice<Torus>(&shifted_comparisons, comparisons,
@@ -617,8 +596,7 @@ __host__ void host_integer_radix_difference_check_kb(
                                       packed_num_radix_blocks + 1);
      compare_radix_blocks_kb<Torus>(
          streams, &shifted_comparisons, &last_left_block_before_sign_block,
-          &last_right_block_before_sign_block, mem_ptr, bsks, ksks,
-          ms_noise_reduction_key, 1);
+          &last_right_block_before_sign_block, mem_ptr, bsks, ksks, 1);

      // Compare the sign block separately
      as_radix_ciphertext_slice<Torus>(&shifted_comparisons, comparisons,
@@ -632,14 +610,14 @@ __host__ void host_integer_radix_difference_check_kb(
                                       num_radix_blocks - 1, num_radix_blocks);
      integer_radix_apply_bivariate_lookup_table_kb<Torus>(
          streams, &shifted_comparisons, &last_left_block, &last_right_block,
-          bsks, ksks, ms_noise_reduction_key, mem_ptr->signed_lut, 1,
+          bsks, ksks, mem_ptr->signed_lut, 1,
          mem_ptr->signed_lut->params.message_modulus);
      num_comparisons = packed_num_radix_blocks + 2;

    } else {
-      compare_radix_blocks_kb<Torus>(
-          streams, comparisons, lwe_array_left, lwe_array_right, mem_ptr, bsks,
-          ksks, ms_noise_reduction_key, num_radix_blocks - 1);
+      compare_radix_blocks_kb<Torus>(streams, comparisons, lwe_array_left,
+                                     lwe_array_right, mem_ptr, bsks, ksks,
+                                     num_radix_blocks - 1);
      // Compare the sign block separately
      CudaRadixCiphertextFFI shifted_comparisons;
      as_radix_ciphertext_slice<Torus>(&shifted_comparisons, comparisons,
@@ -652,7 +630,7 @@ __host__ void host_integer_radix_difference_check_kb(
                                       num_radix_blocks - 1, num_radix_blocks);
      integer_radix_apply_bivariate_lookup_table_kb<Torus>(
          streams, &shifted_comparisons, &last_left_block, &last_right_block,
-          bsks, ksks, ms_noise_reduction_key, mem_ptr->signed_lut, 1,
+          bsks, ksks, mem_ptr->signed_lut, 1,
          mem_ptr->signed_lut->params.message_modulus);
      num_comparisons = num_radix_blocks;
    }
@@ -661,9 +639,9 @@ __host__ void host_integer_radix_difference_check_kb(
  // Reduces a vec containing radix blocks that encrypts a sign
  // (inferior, equal, superior) to one single radix block containing the
  // final sign
-  tree_sign_reduction<Torus>(
-      streams, lwe_array_out, comparisons, mem_ptr->diff_buffer->tree_buffer,
-      reduction_lut_f, bsks, ksks, ms_noise_reduction_key, num_comparisons);
+  tree_sign_reduction<Torus>(streams, lwe_array_out, comparisons,
+                             mem_ptr->diff_buffer->tree_buffer, reduction_lut_f,
+                             bsks, ksks, num_comparisons);
 }

 template <typename Torus>
@@ -685,9 +663,7 @@ __host__ void host_integer_radix_maxmin_kb(
    CudaRadixCiphertextFFI const *lwe_array_left,
    CudaRadixCiphertextFFI const *lwe_array_right,
    int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks) {
+    Torus *const *ksks, uint32_t num_radix_blocks) {

  if (lwe_array_out->lwe_dimension != lwe_array_left->lwe_dimension ||
      lwe_array_out->lwe_dimension != lwe_array_right->lwe_dimension)
@@ -701,14 +677,12 @@ __host__ void host_integer_radix_maxmin_kb(
  // Compute the sign
  host_integer_radix_difference_check_kb<Torus>(
      streams, mem_ptr->tmp_lwe_array_out, lwe_array_left, lwe_array_right,
-      mem_ptr, mem_ptr->identity_lut_f, bsks, ksks, ms_noise_reduction_key,
-      num_radix_blocks);
+      mem_ptr, mem_ptr->identity_lut_f, bsks, ksks, num_radix_blocks);

  // Selector
-  host_integer_radix_cmux_kb<Torus>(streams, lwe_array_out,
-                                    mem_ptr->tmp_lwe_array_out, lwe_array_left,
-                                    lwe_array_right, mem_ptr->cmux_buffer, bsks,
-                                    ksks, ms_noise_reduction_key);
+  host_integer_radix_cmux_kb<Torus>(
+      streams, lwe_array_out, mem_ptr->tmp_lwe_array_out, lwe_array_left,
+      lwe_array_right, mem_ptr->cmux_buffer, bsks, ksks);
 }

 template <typename Torus>
@@ -716,15 +690,12 @@ __host__ void host_integer_are_all_comparisons_block_true_kb(
    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_in,
    int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks) {
+    Torus *const *ksks, uint32_t num_radix_blocks) {

  // It returns a block encrypting 1 if all input blocks are 1
  // otherwise the block encrypts 0
-  are_all_comparisons_block_true<Torus>(
-      streams, lwe_array_out, lwe_array_in, mem_ptr, bsks, ksks,
-      ms_noise_reduction_key, num_radix_blocks);
+  are_all_comparisons_block_true<Torus>(streams, lwe_array_out, lwe_array_in,
+                                        mem_ptr, bsks, ksks, num_radix_blocks);
 }

 template <typename Torus>
@@ -732,14 +703,12 @@ __host__ void host_integer_is_at_least_one_comparisons_block_true_kb(
    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_in,
    int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks) {
+    Torus *const *ksks, uint32_t num_radix_blocks) {

  // It returns a block encrypting 1 if all input blocks are 1
  // otherwise the block encrypts 0
-  is_at_least_one_comparisons_block_true<Torus>(
-      streams, lwe_array_out, lwe_array_in, mem_ptr, bsks, ksks,
-      ms_noise_reduction_key, num_radix_blocks);
+  is_at_least_one_comparisons_block_true<Torus>(streams, lwe_array_out,
+                                                lwe_array_in, mem_ptr, bsks,
+                                                ksks, num_radix_blocks);
 }
 #endif
--- a/backends/tfhe-cuda-backend/cuda/src/integer/compression/compression.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/compression/compression.cuh
@@ -344,7 +344,7 @@ host_integer_decompress(CudaStreams streams,
      execute_pbs_async<Torus, Torus>(
          active_streams, (Torus *)d_lwe_array_out->ptr, lut->lwe_indexes_out,
          lut->lut_vec, lut->lut_indexes_vec, extracted_lwe,
-          lut->lwe_indexes_in, d_bsks, nullptr, lut->buffer,
+          lut->lwe_indexes_in, d_bsks, lut->buffer,
          encryption_params.glwe_dimension,
          compression_params.small_lwe_dimension,
          encryption_params.polynomial_size, encryption_params.pbs_base_log,
@@ -378,7 +378,7 @@ host_integer_decompress(CudaStreams streams,
      execute_pbs_async<Torus, Torus>(
          active_streams, lwe_after_pbs_vec, lwe_trivial_indexes_vec,
          lut->lut_vec, lut->lut_indexes_vec, lwe_array_in_vec,
-          lwe_trivial_indexes_vec, d_bsks, nullptr, lut->buffer,
+          lwe_trivial_indexes_vec, d_bsks, lut->buffer,
          encryption_params.glwe_dimension,
          compression_params.small_lwe_dimension,
          encryption_params.polynomial_size, encryption_params.pbs_base_log,
--- a/backends/tfhe-cuda-backend/cuda/src/integer/div_rem.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/div_rem.cu
@@ -24,14 +24,13 @@ void cuda_integer_div_rem_radix_ciphertext_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *quotient,
    CudaRadixCiphertextFFI *remainder, CudaRadixCiphertextFFI const *numerator,
    CudaRadixCiphertextFFI const *divisor, bool is_signed, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    void *const *bsks, void *const *ksks) {
  PUSH_RANGE("div")
  auto mem = (int_div_rem_memory<uint64_t> *)mem_ptr;

-  host_integer_div_rem_kb<uint64_t>(
-      CudaStreams(streams), quotient, remainder, numerator, divisor, is_signed,
-      bsks, (uint64_t **)(ksks), ms_noise_reduction_key, mem);
+  host_integer_div_rem_kb<uint64_t>(CudaStreams(streams), quotient, remainder,
+                                    numerator, divisor, is_signed, bsks,
+                                    (uint64_t **)(ksks), mem);
  POP_RANGE()
 }

--- a/backends/tfhe-cuda-backend/cuda/src/integer/div_rem.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/div_rem.cuh
@@ -37,9 +37,7 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
    CudaStreams streams, CudaRadixCiphertextFFI *quotient,
    CudaRadixCiphertextFFI *remainder, CudaRadixCiphertextFFI const *numerator,
    CudaRadixCiphertextFFI const *divisor, void *const *bsks,
-    uint64_t *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    unsigned_int_div_rem_2_2_memory<uint64_t> *mem_ptr) {
+    uint64_t *const *ksks, unsigned_int_div_rem_2_2_memory<uint64_t> *mem_ptr) {

  if (streams.count() < 4) {
    PANIC("GPU count should be greater than 4 when using div_rem_2_2");
@@ -61,31 +59,6 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
  auto divisor_gpu_1 = mem_ptr->divisor_gpu_1;
  auto divisor_gpu_2 = mem_ptr->divisor_gpu_2;

-  auto make_view = [](CudaModulusSwitchNoiseReductionKeyFFI const *src,
-                      size_t i) {
-    CudaModulusSwitchNoiseReductionKeyFFI v;
-    v.ptr = (src == nullptr)        ? nullptr
-            : (src->ptr == nullptr) ? nullptr
-                                    : src->ptr + i;
-    v.num_zeros = (src == nullptr) ? 0 : src->num_zeros;
-    v.ms_bound = (src == nullptr) ? 0. : src->ms_bound;
-    v.ms_r_sigma = (src == nullptr) ? 0. : src->ms_r_sigma;
-    v.ms_input_variance = (src == nullptr) ? 0. : src->ms_input_variance;
-    return v;
-  };
-
-  CudaModulusSwitchNoiseReductionKeyFFI nrk0 =
-      make_view(ms_noise_reduction_key, 0);
-  CudaModulusSwitchNoiseReductionKeyFFI nrk1 =
-      make_view(ms_noise_reduction_key, 1);
-  CudaModulusSwitchNoiseReductionKeyFFI nrk2 =
-      make_view(ms_noise_reduction_key, 2);
-  CudaModulusSwitchNoiseReductionKeyFFI nrk3 =
-      make_view(ms_noise_reduction_key, 3);
-
-  CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_keys[4] = {
-      &nrk0, &nrk1, &nrk2, &nrk3};
-
  // gpu[0] -> gpu[0]
  copy_radix_ciphertext_async<Torus>(streams.stream(0), streams.gpu_index(0),
                                     remainder_gpu_0, numerator);
@@ -121,7 +94,7 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
      mem_ptr->d2, divisor_gpu_1, streams.get_ith(1));
  host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
      streams.get_ith(1), mem_ptr->d2, 1, mem_ptr->shift_mem, &bsks[1],
-      &ksks[1], ms_noise_reduction_keys[1], mem_ptr->d2->num_radix_blocks);
+      &ksks[1], mem_ptr->d2->num_radix_blocks);

  // Computes 3*d = 4*d - d using block shift and subtraction on gpu[0]
  host_extend_radix_with_trivial_zero_blocks_msb<Torus>(
@@ -131,10 +104,10 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
                                        mem_ptr->tmp_gpu_0->num_radix_blocks);
  set_zero_radix_ciphertext_slice_async<Torus>(
      streams.stream(0), streams.gpu_index(0), mem_ptr->d3, 0, 1);
-  host_sub_and_propagate_single_carry(
-      streams.get_ith(0), mem_ptr->d3, mem_ptr->tmp_gpu_0, nullptr, nullptr,
-      mem_ptr->sub_and_propagate_mem, &bsks[0], &ksks[0],
-      ms_noise_reduction_keys[0], outputFlag::FLAG_NONE, 0);
+  host_sub_and_propagate_single_carry(streams.get_ith(0), mem_ptr->d3,
+                                      mem_ptr->tmp_gpu_0, nullptr, nullptr,
+                                      mem_ptr->sub_and_propagate_mem, &bsks[0],
+                                      &ksks[0], outputFlag::FLAG_NONE, 0);

  // +-----------------+-----------------+-----------------+-----------------+
  // |     GPU[0]      |     GPU[1]      |     GPU[2]      |     GPU[3]      |
@@ -194,8 +167,7 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
      host_integer_overflowing_sub<uint64_t>(
          streams.get_ith(gpu_index), sub_result, rem, low, sub_overflowed,
          (const CudaRadixCiphertextFFI *)nullptr, overflow_sub_mem,
-          &bsks[gpu_index], &ksks[gpu_index],
-          ms_noise_reduction_keys[gpu_index], compute_overflow,
+          &bsks[gpu_index], &ksks[gpu_index], compute_overflow,
          uses_input_borrow);
    };

@@ -218,12 +190,10 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
        host_compare_blocks_with_zero<Torus>(
            streams.get_ith(gpu_index), comparison_blocks, d_msb,
            comparison_buffer, &bsks[gpu_index], &ksks[gpu_index],
-            ms_noise_reduction_keys[gpu_index], d_msb->num_radix_blocks,
-            comparison_buffer->is_zero_lut);
+            d_msb->num_radix_blocks, comparison_buffer->is_zero_lut);
        are_all_comparisons_block_true(
            streams.get_ith(gpu_index), out_boolean_block, comparison_blocks,
            comparison_buffer, &bsks[gpu_index], &ksks[gpu_index],
-            ms_noise_reduction_keys[gpu_index],
            comparison_blocks->num_radix_blocks);

        host_negation<Torus>(
@@ -288,16 +258,13 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(

    // used as a bitor
    host_integer_radix_bitop_kb(streams.get_ith(0), o3, o3, mem_ptr->cmp_1,
-                                mem_ptr->bitor_mem_1, &bsks[0], &ksks[0],
-                                ms_noise_reduction_keys[0]);
+                                mem_ptr->bitor_mem_1, &bsks[0], &ksks[0]);
    // used as a bitor
    host_integer_radix_bitop_kb(streams.get_ith(1), o2, o2, mem_ptr->cmp_2,
-                                mem_ptr->bitor_mem_2, &bsks[1], &ksks[1],
-                                ms_noise_reduction_keys[1]);
+                                mem_ptr->bitor_mem_2, &bsks[1], &ksks[1]);
    // used as a bitor
    host_integer_radix_bitop_kb(streams.get_ith(2), o1, o1, mem_ptr->cmp_3,
-                                mem_ptr->bitor_mem_3, &bsks[2], &ksks[2],
-                                ms_noise_reduction_keys[2]);
+                                mem_ptr->bitor_mem_3, &bsks[2], &ksks[2]);

    // cmp_1, cmp_2, cmp_3 are not needed anymore, we can reuse them as c3,
    // c2, c1. c0 is allocated on gpu[3], we take it from mem_ptr.
@@ -379,8 +346,7 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
                                                   rx, rx, cx, 4, 4);
      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams.get_ith(gpu_index), rx, rx, &bsks[gpu_index],
-          &ksks[gpu_index], ms_noise_reduction_keys[gpu_index], lut,
-          rx->num_radix_blocks);
+          &ksks[gpu_index], lut, rx->num_radix_blocks);
    };

    for (uint j = 0; j < 4; j++) {
@@ -398,15 +364,15 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
    // calculate quotient bits GPU[2]
    integer_radix_apply_univariate_lookup_table_kb<Torus>(
        mem_ptr->sub_streams_1.get_ith(2), mem_ptr->q1, c1, &bsks[2], &ksks[2],
-        ms_noise_reduction_keys[2], mem_ptr->quotient_lut_1, 1);
+        mem_ptr->quotient_lut_1, 1);
    // calculate quotient bits GPU[1]
    integer_radix_apply_univariate_lookup_table_kb<Torus>(
        mem_ptr->sub_streams_1.get_ith(1), mem_ptr->q2, c2, &bsks[1], &ksks[1],
-        ms_noise_reduction_keys[1], mem_ptr->quotient_lut_2, 1);
+        mem_ptr->quotient_lut_2, 1);
    // calculate quotient bits GPU[0]
    integer_radix_apply_univariate_lookup_table_kb<Torus>(
        mem_ptr->sub_streams_1.get_ith(0), mem_ptr->q3, c3, &bsks[0], &ksks[0],
-        ms_noise_reduction_keys[0], mem_ptr->quotient_lut_3, 1);
+        mem_ptr->quotient_lut_3, 1);

    for (uint j = 0; j < 4; j++) {
      cuda_synchronize_stream(streams.stream(j), streams.gpu_index(j));
@@ -469,11 +435,11 @@ __host__ void host_unsigned_integer_div_rem_kb_block_by_block_2_2(
    streams.synchronize();

    integer_radix_apply_univariate_lookup_table_kb<Torus>(
-        streams, rem_gpu_0, rem_gpu_0, bsks, ksks, ms_noise_reduction_key,
+        streams, rem_gpu_0, rem_gpu_0, bsks, ksks,
        mem_ptr->message_extract_lut_1, rem_gpu_0->num_radix_blocks);
    integer_radix_apply_univariate_lookup_table_kb<Torus>(
        mem_ptr->sub_streams_1, q3_gpu_0, q3_gpu_0, bsks, ksks,
-        ms_noise_reduction_key, mem_ptr->message_extract_lut_2, 1);
+        mem_ptr->message_extract_lut_2, 1);
    streams.synchronize();
    mem_ptr->sub_streams_1.synchronize();

@@ -514,9 +480,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
    CudaStreams streams, CudaRadixCiphertextFFI *quotient,
    CudaRadixCiphertextFFI *remainder, CudaRadixCiphertextFFI const *numerator,
    CudaRadixCiphertextFFI const *divisor, void *const *bsks,
-    uint64_t *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    unsigned_int_div_rem_memory<uint64_t> *mem_ptr) {
+    uint64_t *const *ksks, unsigned_int_div_rem_memory<uint64_t> *mem_ptr) {

  if (remainder->num_radix_blocks != numerator->num_radix_blocks ||
      remainder->num_radix_blocks != divisor->num_radix_blocks ||
@@ -531,7 +495,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
      mem_ptr->params.carry_modulus == 4 && streams.count() >= 4) {
    host_unsigned_integer_div_rem_kb_block_by_block_2_2<Torus>(
        streams, quotient, remainder, numerator, divisor, bsks, ksks,
-        ms_noise_reduction_key, mem_ptr->div_rem_2_2_mem);
+        mem_ptr->div_rem_2_2_mem);
    return;
  }
  auto radix_params = mem_ptr->params;
@@ -632,7 +596,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
          interesting_divisor->num_radix_blocks);
      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, &last_interesting_divisor_block,
-          &last_interesting_divisor_block, bsks, ksks, ms_noise_reduction_key,
+          &last_interesting_divisor_block, bsks, ksks,
          mem_ptr->masking_luts_1[shifted_mask], 1);
    }; // trim_last_interesting_divisor_bits

@@ -659,7 +623,7 @@ __host__ void host_unsigned_integer_div_rem_kb(

      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, divisor_ms_blocks, divisor_ms_blocks, bsks, ksks,
-          ms_noise_reduction_key, mem_ptr->masking_luts_2[shifted_mask], 1);
+          mem_ptr->masking_luts_2[shifted_mask], 1);
    }; // trim_first_divisor_ms_bits

    // This does
@@ -681,7 +645,7 @@ __host__ void host_unsigned_integer_div_rem_kb(

      host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
          streams, interesting_remainder1, 1, mem_ptr->shift_mem_1, bsks, ksks,
-          ms_noise_reduction_key, interesting_remainder1->num_radix_blocks);
+          interesting_remainder1->num_radix_blocks);

      reset_radix_ciphertext_blocks(mem_ptr->tmp_radix,
                                    interesting_remainder1->num_radix_blocks);
@@ -710,7 +674,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
    auto left_shift_interesting_remainder2 = [&](CudaStreams streams) {
      host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
          streams, interesting_remainder2, 1, mem_ptr->shift_mem_2, bsks, ksks,
-          ms_noise_reduction_key, interesting_remainder2->num_radix_blocks);
+          interesting_remainder2->num_radix_blocks);
    }; // left_shift_interesting_remainder2

    streams.synchronize();
@@ -783,8 +747,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
          streams, new_remainder, merged_interesting_remainder,
          interesting_divisor, subtraction_overflowed,
          (const CudaRadixCiphertextFFI *)nullptr, mem_ptr->overflow_sub_mem,
-          bsks, ksks, ms_noise_reduction_key, compute_borrow,
-          uses_input_borrow);
+          bsks, ksks, compute_borrow, uses_input_borrow);
    };

    // fills:
@@ -802,13 +765,12 @@ __host__ void host_unsigned_integer_div_rem_kb(
        // So we can skip some stuff
        host_compare_blocks_with_zero<Torus>(
            streams, mem_ptr->tmp_1, trivial_blocks, mem_ptr->comparison_buffer,
-            bsks, ksks, ms_noise_reduction_key,
-            trivial_blocks->num_radix_blocks,
+            bsks, ksks, trivial_blocks->num_radix_blocks,
            mem_ptr->comparison_buffer->eq_buffer->is_non_zero_lut);

        is_at_least_one_comparisons_block_true<Torus>(
            streams, at_least_one_upper_block_is_non_zero, mem_ptr->tmp_1,
-            mem_ptr->comparison_buffer, bsks, ksks, ms_noise_reduction_key,
+            mem_ptr->comparison_buffer, bsks, ksks,
            mem_ptr->tmp_1->num_radix_blocks);
      }
    };
@@ -821,7 +783,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, cleaned_merged_interesting_remainder,
          cleaned_merged_interesting_remainder, bsks, ksks,
-          ms_noise_reduction_key, mem_ptr->message_extract_lut_1,
+          mem_ptr->message_extract_lut_1,
          cleaned_merged_interesting_remainder->num_radix_blocks);
    };

@@ -859,8 +821,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
          integer_radix_apply_bivariate_lookup_table_kb<Torus>(
              streams, cleaned_merged_interesting_remainder,
              cleaned_merged_interesting_remainder, overflow_sum_radix, bsks,
-              ksks, ms_noise_reduction_key,
-              mem_ptr->zero_out_if_overflow_did_not_happen[factor_lut_id],
+              ksks, mem_ptr->zero_out_if_overflow_did_not_happen[factor_lut_id],
              cleaned_merged_interesting_remainder->num_radix_blocks, factor);
        };

@@ -868,8 +829,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
        [&](CudaStreams streams) {
          integer_radix_apply_bivariate_lookup_table_kb<Torus>(
              streams, new_remainder, new_remainder, overflow_sum_radix, bsks,
-              ksks, ms_noise_reduction_key,
-              mem_ptr->zero_out_if_overflow_happened[factor_lut_id],
+              ksks, mem_ptr->zero_out_if_overflow_happened[factor_lut_id],
              new_remainder->num_radix_blocks, factor);
        };

@@ -878,7 +838,6 @@ __host__ void host_unsigned_integer_div_rem_kb(
      integer_radix_apply_bivariate_lookup_table_kb<Torus>(
          streams, mem_ptr->did_not_overflow, subtraction_overflowed,
          at_least_one_upper_block_is_non_zero, bsks, ksks,
-          ms_noise_reduction_key,
          mem_ptr->merge_overflow_flags_luts[pos_in_block], 1,
          mem_ptr->merge_overflow_flags_luts[pos_in_block]
              ->params.message_modulus);
@@ -937,10 +896,10 @@ __host__ void host_unsigned_integer_div_rem_kb(

  integer_radix_apply_univariate_lookup_table_kb<Torus>(
      mem_ptr->sub_streams_1, remainder, remainder, bsks, ksks,
-      ms_noise_reduction_key, mem_ptr->message_extract_lut_1, num_blocks);
+      mem_ptr->message_extract_lut_1, num_blocks);
  integer_radix_apply_univariate_lookup_table_kb<Torus>(
      mem_ptr->sub_streams_2, quotient, quotient, bsks, ksks,
-      ms_noise_reduction_key, mem_ptr->message_extract_lut_2, num_blocks);
+      mem_ptr->message_extract_lut_2, num_blocks);

  mem_ptr->sub_streams_1.synchronize();
  mem_ptr->sub_streams_2.synchronize();
@@ -951,9 +910,7 @@ __host__ void host_integer_div_rem_kb(
    CudaStreams streams, CudaRadixCiphertextFFI *quotient,
    CudaRadixCiphertextFFI *remainder, CudaRadixCiphertextFFI const *numerator,
    CudaRadixCiphertextFFI const *divisor, bool is_signed, void *const *bsks,
-    uint64_t *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    int_div_rem_memory<uint64_t> *int_mem_ptr) {
+    uint64_t *const *ksks, int_div_rem_memory<uint64_t> *int_mem_ptr) {
  if (remainder->num_radix_blocks != numerator->num_radix_blocks ||
      remainder->num_radix_blocks != divisor->num_radix_blocks ||
      remainder->num_radix_blocks != quotient->num_radix_blocks)
@@ -978,19 +935,16 @@ __host__ void host_integer_div_rem_kb(
    streams.synchronize();

    host_integer_abs_kb<Torus>(int_mem_ptr->sub_streams_1, positive_numerator,
-                               bsks, ksks, ms_noise_reduction_key,
-                               int_mem_ptr->abs_mem_1, true);
+                               bsks, ksks, int_mem_ptr->abs_mem_1, true);
    host_integer_abs_kb<Torus>(int_mem_ptr->sub_streams_2, positive_divisor,
-                               bsks, ksks, ms_noise_reduction_key,
-                               int_mem_ptr->abs_mem_2, true);
+                               bsks, ksks, int_mem_ptr->abs_mem_2, true);

    int_mem_ptr->sub_streams_1.synchronize();
    int_mem_ptr->sub_streams_2.synchronize();

    host_unsigned_integer_div_rem_kb<Torus>(
        int_mem_ptr->sub_streams_1, quotient, remainder, positive_numerator,
-        positive_divisor, bsks, ksks, ms_noise_reduction_key,
-        int_mem_ptr->unsigned_mem);
+        positive_divisor, bsks, ksks, int_mem_ptr->unsigned_mem);

    CudaRadixCiphertextFFI numerator_sign;
    as_radix_ciphertext_slice<Torus>(&numerator_sign, numerator, num_blocks - 1,
@@ -1000,7 +954,7 @@ __host__ void host_integer_div_rem_kb(
                                     num_blocks);
    integer_radix_apply_bivariate_lookup_table_kb<Torus>(
        int_mem_ptr->sub_streams_2, int_mem_ptr->sign_bits_are_different,
-        &numerator_sign, &divisor_sign, bsks, ksks, ms_noise_reduction_key,
+        &numerator_sign, &divisor_sign, bsks, ksks,
        int_mem_ptr->compare_signed_bits_lut, 1,
        int_mem_ptr->compare_signed_bits_lut->params.message_modulus);

@@ -1013,37 +967,36 @@ __host__ void host_integer_div_rem_kb(

    uint32_t requested_flag = outputFlag::FLAG_NONE;
    uint32_t uses_carry = 0;
-    host_propagate_single_carry<Torus>(
-        int_mem_ptr->sub_streams_1, int_mem_ptr->negated_quotient, nullptr,
-        nullptr, int_mem_ptr->scp_mem_1, bsks, ksks, ms_noise_reduction_key,
-        requested_flag, uses_carry);
+    host_propagate_single_carry<Torus>(int_mem_ptr->sub_streams_1,
+                                       int_mem_ptr->negated_quotient, nullptr,
+                                       nullptr, int_mem_ptr->scp_mem_1, bsks,
+                                       ksks, requested_flag, uses_carry);

    host_integer_radix_negation<Torus>(
        int_mem_ptr->sub_streams_2, int_mem_ptr->negated_remainder, remainder,
        radix_params.message_modulus, radix_params.carry_modulus, num_blocks);

-    host_propagate_single_carry<Torus>(
-        int_mem_ptr->sub_streams_2, int_mem_ptr->negated_remainder, nullptr,
-        nullptr, int_mem_ptr->scp_mem_2, bsks, ksks, ms_noise_reduction_key,
-        requested_flag, uses_carry);
+    host_propagate_single_carry<Torus>(int_mem_ptr->sub_streams_2,
+                                       int_mem_ptr->negated_remainder, nullptr,
+                                       nullptr, int_mem_ptr->scp_mem_2, bsks,
+                                       ksks, requested_flag, uses_carry);

-    host_integer_radix_cmux_kb<Torus>(int_mem_ptr->sub_streams_1, quotient,
-                                      int_mem_ptr->sign_bits_are_different,
-                                      int_mem_ptr->negated_quotient, quotient,
-                                      int_mem_ptr->cmux_quotient_mem, bsks,
-                                      ksks, ms_noise_reduction_key);
+    host_integer_radix_cmux_kb<Torus>(
+        int_mem_ptr->sub_streams_1, quotient,
+        int_mem_ptr->sign_bits_are_different, int_mem_ptr->negated_quotient,
+        quotient, int_mem_ptr->cmux_quotient_mem, bsks, ksks);

    host_integer_radix_cmux_kb<Torus>(
        int_mem_ptr->sub_streams_2, remainder, &numerator_sign,
        int_mem_ptr->negated_remainder, remainder,
-        int_mem_ptr->cmux_remainder_mem, bsks, ksks, ms_noise_reduction_key);
+        int_mem_ptr->cmux_remainder_mem, bsks, ksks);

    int_mem_ptr->sub_streams_1.synchronize();
    int_mem_ptr->sub_streams_2.synchronize();
  } else {
-    host_unsigned_integer_div_rem_kb<Torus>(
-        streams, quotient, remainder, numerator, divisor, bsks, ksks,
-        ms_noise_reduction_key, int_mem_ptr->unsigned_mem);
+    host_unsigned_integer_div_rem_kb<Torus>(streams, quotient, remainder,
+                                            numerator, divisor, bsks, ksks,
+                                            int_mem_ptr->unsigned_mem);
  }
 }

--- a/backends/tfhe-cuda-backend/cuda/src/integer/ilog2.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/ilog2.cu
@@ -29,13 +29,12 @@ uint64_t scratch_integer_count_of_consecutive_bits_kb_64(
 void cuda_integer_count_of_consecutive_bits_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *output_ct,
    CudaRadixCiphertextFFI const *input_ct, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks,
-    const CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_key) {
+    void *const *ksks) {

  host_integer_count_of_consecutive_bits<uint64_t>(
      CudaStreams(streams), output_ct, input_ct,
      (int_count_of_consecutive_bits_buffer<uint64_t> *)mem_ptr, bsks,
-      (uint64_t **)ksks, ms_noise_reduction_key);
+      (uint64_t **)ksks);
 }

 void cleanup_cuda_integer_count_of_consecutive_bits_kb_64(
@@ -81,13 +80,12 @@ void cuda_integer_ilog2_kb_64(
    CudaRadixCiphertextFFI const *trivial_ct_neg_n,
    CudaRadixCiphertextFFI const *trivial_ct_2,
    CudaRadixCiphertextFFI const *trivial_ct_m_minus_1_block, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks,
-    const CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_key) {
+    void *const *bsks, void *const *ksks) {

  host_integer_ilog2<uint64_t>(
      CudaStreams(streams), output_ct, input_ct, trivial_ct_neg_n, trivial_ct_2,
      trivial_ct_m_minus_1_block, (int_ilog2_buffer<uint64_t> *)mem_ptr, bsks,
-      (uint64_t **)ksks, ms_noise_reduction_key);
+      (uint64_t **)ksks);
 }

 void cleanup_cuda_integer_ilog2_kb_64(CudaStreamsFFI streams,
--- a/backends/tfhe-cuda-backend/cuda/src/integer/ilog2.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/ilog2.cuh
@@ -9,14 +9,12 @@ template <typename Torus>
 __host__ void host_integer_prepare_count_of_consecutive_bits(
    CudaStreams streams, CudaRadixCiphertextFFI *ciphertext,
    int_prepare_count_of_consecutive_bits_buffer<Torus> *mem_ptr,
-    void *const *bsks, Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    void *const *bsks, Torus *const *ksks) {

  auto tmp = mem_ptr->tmp_ct;

  host_apply_univariate_lut_kb<Torus>(streams, tmp, ciphertext,
-                                      mem_ptr->univ_lut_mem, ksks,
-                                      ms_noise_reduction_key, bsks);
+                                      mem_ptr->univ_lut_mem, ksks, bsks);

  if (mem_ptr->direction == Leading) {
    host_radix_blocks_reverse_inplace<Torus>(streams, tmp);
@@ -24,7 +22,7 @@ __host__ void host_integer_prepare_count_of_consecutive_bits(

  host_compute_prefix_sum_hillis_steele<uint64_t>(
      streams, ciphertext, tmp, mem_ptr->biv_lut_mem, bsks, ksks,
-      ms_noise_reduction_key, ciphertext->num_radix_blocks);
+      ciphertext->num_radix_blocks);
 }

 template <typename Torus>
@@ -48,8 +46,7 @@ __host__ void host_integer_count_of_consecutive_bits(
    CudaStreams streams, CudaRadixCiphertextFFI *output_ct,
    CudaRadixCiphertextFFI const *input_ct,
    int_count_of_consecutive_bits_buffer<Torus> *mem_ptr, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    Torus *const *ksks) {

  auto params = mem_ptr->params;
  auto ct_prepared = mem_ptr->ct_prepared;
@@ -60,9 +57,8 @@ __host__ void host_integer_count_of_consecutive_bits(

  // Prepare count of consecutive bits
  //
-  host_integer_prepare_count_of_consecutive_bits(streams, ct_prepared,
-                                                 mem_ptr->prepare_mem, bsks,
-                                                 ksks, ms_noise_reduction_key);
+  host_integer_prepare_count_of_consecutive_bits(
+      streams, ct_prepared, mem_ptr->prepare_mem, bsks, ksks);

  // Perform addition and propagation of prepared cts
  //
@@ -76,12 +72,11 @@ __host__ void host_integer_count_of_consecutive_bits(
  }

  host_integer_partial_sum_ciphertexts_vec_kb<Torus>(
-      streams, output_ct, cts, bsks, ksks, ms_noise_reduction_key,
-      mem_ptr->sum_mem, counter_num_blocks, ct_prepared->num_radix_blocks);
+      streams, output_ct, cts, bsks, ksks, mem_ptr->sum_mem, counter_num_blocks,
+      ct_prepared->num_radix_blocks);

  host_propagate_single_carry<Torus>(streams, output_ct, nullptr, nullptr,
-                                     mem_ptr->propagate_mem, bsks, ksks,
-                                     ms_noise_reduction_key, 0, 0);
+                                     mem_ptr->propagate_mem, bsks, ksks, 0, 0);
 }

 template <typename Torus>
@@ -103,14 +98,14 @@ __host__ uint64_t scratch_integer_ilog2(CudaStreams streams,
 }

 template <typename Torus>
-__host__ void host_integer_ilog2(
-    CudaStreams streams, CudaRadixCiphertextFFI *output_ct,
-    CudaRadixCiphertextFFI const *input_ct,
-    CudaRadixCiphertextFFI const *trivial_ct_neg_n,
-    CudaRadixCiphertextFFI const *trivial_ct_2,
-    CudaRadixCiphertextFFI const *trivial_ct_m_minus_1_block,
-    int_ilog2_buffer<Torus> *mem_ptr, void *const *bsks, Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+__host__ void
+host_integer_ilog2(CudaStreams streams, CudaRadixCiphertextFFI *output_ct,
+                   CudaRadixCiphertextFFI const *input_ct,
+                   CudaRadixCiphertextFFI const *trivial_ct_neg_n,
+                   CudaRadixCiphertextFFI const *trivial_ct_2,
+                   CudaRadixCiphertextFFI const *trivial_ct_m_minus_1_block,
+                   int_ilog2_buffer<Torus> *mem_ptr, void *const *bsks,
+                   Torus *const *ksks) {

  // Prepare the input ciphertext by computing the number of consecutive
  // leading zeros for each of its blocks.
@@ -118,8 +113,7 @@ __host__ void host_integer_ilog2(
  copy_radix_ciphertext_async<Torus>(streams.stream(0), streams.gpu_index(0),
                                     mem_ptr->ct_in_buffer, input_ct);
  host_integer_prepare_count_of_consecutive_bits<Torus>(
-      streams, mem_ptr->ct_in_buffer, mem_ptr->prepare_mem, bsks, ksks,
-      ms_noise_reduction_key);
+      streams, mem_ptr->ct_in_buffer, mem_ptr->prepare_mem, bsks, ksks);

  // Build the input for the sum by taking each block's leading zero count
  // and placing it into a separate, zero-padded ct slot.
@@ -148,17 +142,17 @@ __host__ void host_integer_ilog2(
  //
  host_integer_partial_sum_ciphertexts_vec_kb<Torus>(
      streams, mem_ptr->sum_output_not_propagated, mem_ptr->sum_input_cts, bsks,
-      ksks, ms_noise_reduction_key, mem_ptr->sum_mem,
-      mem_ptr->counter_num_blocks, mem_ptr->input_num_blocks + 1);
+      ksks, mem_ptr->sum_mem, mem_ptr->counter_num_blocks,
+      mem_ptr->input_num_blocks + 1);

  // Apply luts to the partial sum.
  //
-  host_apply_univariate_lut_kb<Torus>(
-      streams, mem_ptr->message_blocks_not, mem_ptr->sum_output_not_propagated,
-      mem_ptr->lut_message_not, ksks, ms_noise_reduction_key, bsks);
-  host_apply_univariate_lut_kb<Torus>(
-      streams, mem_ptr->carry_blocks_not, mem_ptr->sum_output_not_propagated,
-      mem_ptr->lut_carry_not, ksks, ms_noise_reduction_key, bsks);
+  host_apply_univariate_lut_kb<Torus>(streams, mem_ptr->message_blocks_not,
+                                      mem_ptr->sum_output_not_propagated,
+                                      mem_ptr->lut_message_not, ksks, bsks);
+  host_apply_univariate_lut_kb<Torus>(streams, mem_ptr->carry_blocks_not,
+                                      mem_ptr->sum_output_not_propagated,
+                                      mem_ptr->lut_carry_not, ksks, bsks);

  // Left-shift the bitwise-negated carry blocks by one position.
  //
@@ -196,12 +190,12 @@ __host__ void host_integer_ilog2(
      trivial_ct_2, 0, mem_ptr->counter_num_blocks);

  host_integer_partial_sum_ciphertexts_vec_kb<Torus>(
-      streams, output_ct, mem_ptr->sum_input_cts, bsks, ksks,
-      ms_noise_reduction_key, mem_ptr->sum_mem, mem_ptr->counter_num_blocks, 3);
+      streams, output_ct, mem_ptr->sum_input_cts, bsks, ksks, mem_ptr->sum_mem,
+      mem_ptr->counter_num_blocks, 3);

-  host_full_propagate_inplace<Torus>(
-      streams, output_ct, mem_ptr->final_propagate_mem, ksks,
-      ms_noise_reduction_key, bsks, mem_ptr->counter_num_blocks);
+  host_full_propagate_inplace<Torus>(streams, output_ct,
+                                     mem_ptr->final_propagate_mem, ksks, bsks,
+                                     mem_ptr->counter_num_blocks);
 }

 #endif
--- a/backends/tfhe-cuda-backend/cuda/src/integer/integer.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/integer.cu
@@ -2,18 +2,17 @@
 #include "integer/negation.cuh"
 #include <linear_algebra.h>

-void cuda_full_propagation_64_inplace(
-    CudaStreamsFFI streams, CudaRadixCiphertextFFI *input_blocks,
-    int8_t *mem_ptr, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void *const *bsks, uint32_t num_blocks) {
+void cuda_full_propagation_64_inplace(CudaStreamsFFI streams,
+                                      CudaRadixCiphertextFFI *input_blocks,
+                                      int8_t *mem_ptr, void *const *ksks,
+                                      void *const *bsks, uint32_t num_blocks) {

  int_fullprop_buffer<uint64_t> *buffer =
      (int_fullprop_buffer<uint64_t> *)mem_ptr;

-  host_full_propagate_inplace<uint64_t>(
-      CudaStreams(streams), input_blocks, buffer, (uint64_t **)(ksks),
-      ms_noise_reduction_key, bsks, num_blocks);
+  host_full_propagate_inplace<uint64_t>(CudaStreams(streams), input_blocks,
+                                        buffer, (uint64_t **)(ksks), bsks,
+                                        num_blocks);
 }

 uint64_t scratch_cuda_full_propagation_64(
@@ -103,27 +102,24 @@ void cuda_propagate_single_carry_kb_64_inplace(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array,
    CudaRadixCiphertextFFI *carry_out, const CudaRadixCiphertextFFI *carry_in,
    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
    uint32_t requested_flag, uint32_t uses_carry) {

  host_propagate_single_carry<uint64_t>(
      CudaStreams(streams), lwe_array, carry_out, carry_in,
      (int_sc_prop_memory<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
-      ms_noise_reduction_key, requested_flag, uses_carry);
+      requested_flag, uses_carry);
 }

 void cuda_add_and_propagate_single_carry_kb_64_inplace(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lhs_array,
    const CudaRadixCiphertextFFI *rhs_array, CudaRadixCiphertextFFI *carry_out,
    const CudaRadixCiphertextFFI *carry_in, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t requested_flag, uint32_t uses_carry) {
+    void *const *ksks, uint32_t requested_flag, uint32_t uses_carry) {

  host_add_and_propagate_single_carry<uint64_t>(
      CudaStreams(streams), lhs_array, rhs_array, carry_out, carry_in,
      (int_sc_prop_memory<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
-      ms_noise_reduction_key, requested_flag, uses_carry);
+      requested_flag, uses_carry);
 }

 void cuda_integer_overflowing_sub_kb_64_inplace(
@@ -131,15 +127,13 @@ void cuda_integer_overflowing_sub_kb_64_inplace(
    const CudaRadixCiphertextFFI *rhs_array,
    CudaRadixCiphertextFFI *overflow_block,
    const CudaRadixCiphertextFFI *input_borrow, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t compute_overflow, uint32_t uses_input_borrow) {
+    void *const *bsks, void *const *ksks, uint32_t compute_overflow,
+    uint32_t uses_input_borrow) {
  PUSH_RANGE("overflow sub")
  host_integer_overflowing_sub<uint64_t>(
      CudaStreams(streams), lhs_array, lhs_array, rhs_array, overflow_block,
      input_borrow, (int_borrow_prop_memory<uint64_t> *)mem_ptr, bsks,
-      (uint64_t **)ksks, ms_noise_reduction_key, compute_overflow,
-      uses_input_borrow);
+      (uint64_t **)ksks, compute_overflow, uses_input_borrow);
  POP_RANGE()
 }

@@ -218,14 +212,11 @@ uint64_t scratch_cuda_apply_many_univariate_lut_kb_64(
 void cuda_apply_univariate_lut_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *output_radix_lwe,
    CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void *const *bsks) {
+    void *const *ksks, void *const *bsks) {

  host_apply_univariate_lut_kb<uint64_t>(
      CudaStreams(streams), output_radix_lwe, input_radix_lwe,
-      (int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks),
-      ms_noise_reduction_key, bsks);
+      (int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks), bsks);
 }

 void cleanup_cuda_apply_univariate_lut_kb_64(CudaStreamsFFI streams,
@@ -241,14 +232,13 @@ void cleanup_cuda_apply_univariate_lut_kb_64(CudaStreamsFFI streams,
 void cuda_apply_many_univariate_lut_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *output_radix_lwe,
    CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void *const *bsks, uint32_t num_many_lut, uint32_t lut_stride) {
+    void *const *ksks, void *const *bsks, uint32_t num_many_lut,
+    uint32_t lut_stride) {

  host_apply_many_univariate_lut_kb<uint64_t>(
      CudaStreams(streams), output_radix_lwe, input_radix_lwe,
-      (int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks),
-      ms_noise_reduction_key, bsks, num_many_lut, lut_stride);
+      (int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks), bsks,
+      num_many_lut, lut_stride);
 }

 uint64_t scratch_cuda_apply_bivariate_lut_kb_64(
@@ -275,15 +265,13 @@ void cuda_apply_bivariate_lut_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *output_radix_lwe,
    CudaRadixCiphertextFFI const *input_radix_lwe_1,
    CudaRadixCiphertextFFI const *input_radix_lwe_2, int8_t *mem_ptr,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void *const *bsks, uint32_t num_radix_blocks, uint32_t shift) {
+    void *const *ksks, void *const *bsks, uint32_t num_radix_blocks,
+    uint32_t shift) {

  host_apply_bivariate_lut_kb<uint64_t>(
      CudaStreams(streams), output_radix_lwe, input_radix_lwe_1,
      input_radix_lwe_2, (int_radix_lut<uint64_t> *)mem_ptr,
-      (uint64_t **)(ksks), ms_noise_reduction_key, bsks, num_radix_blocks,
-      shift);
+      (uint64_t **)(ksks), bsks, num_radix_blocks, shift);
 }

 void cleanup_cuda_apply_bivariate_lut_kb_64(CudaStreamsFFI streams,
@@ -320,14 +308,12 @@ uint64_t scratch_cuda_integer_compute_prefix_sum_hillis_steele_64(
 void cuda_integer_compute_prefix_sum_hillis_steele_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *output_radix_lwe,
    CudaRadixCiphertextFFI *generates_or_propagates, int8_t *mem_ptr,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void *const *bsks, uint32_t num_radix_blocks) {
+    void *const *ksks, void *const *bsks, uint32_t num_radix_blocks) {

  host_compute_prefix_sum_hillis_steele<uint64_t>(
      CudaStreams(streams), output_radix_lwe, generates_or_propagates,
      (int_radix_lut<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
-      ms_noise_reduction_key, num_radix_blocks);
+      num_radix_blocks);
 }

 void cleanup_cuda_integer_compute_prefix_sum_hillis_steele_64(
@@ -399,15 +385,12 @@ uint64_t scratch_cuda_apply_noise_squashing_kb(
 void cuda_apply_noise_squashing_kb(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *output_radix_lwe,
    CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void *const *bsks) {
+    void *const *ksks, void *const *bsks) {

  PUSH_RANGE("apply noise squashing")
  integer_radix_apply_noise_squashing_kb<uint64_t>(
      CudaStreams(streams), output_radix_lwe, input_radix_lwe,
-      (int_noise_squashing_lut<uint64_t> *)mem_ptr, bsks, (uint64_t **)ksks,
-      ms_noise_reduction_key);
+      (int_noise_squashing_lut<uint64_t> *)mem_ptr, bsks, (uint64_t **)ksks);
  POP_RANGE()
 }

--- a/backends/tfhe-cuda-backend/cuda/src/integer/integer.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/integer.cuh
@@ -507,9 +507,7 @@ template <typename Torus>
 __host__ void integer_radix_apply_univariate_lookup_table_kb(
    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_in, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    int_radix_lut<Torus> *lut, uint32_t num_radix_blocks) {
+    Torus *const *ksks, int_radix_lut<Torus> *lut, uint32_t num_radix_blocks) {
  PUSH_RANGE("apply lut")
  // apply_lookup_table
  auto params = lut->params;
@@ -557,10 +555,9 @@ __host__ void integer_radix_apply_univariate_lookup_table_kb(
    execute_pbs_async<Torus, Torus>(
        streams.get_ith(0), (Torus *)lwe_array_out->ptr, lut->lwe_indexes_out,
        lut->lut_vec, lut->lut_indexes_vec, lwe_after_ks_vec[0],
-        lwe_trivial_indexes_vec[0], bsks, ms_noise_reduction_key, lut->buffer,
-        glwe_dimension, small_lwe_dimension, polynomial_size, pbs_base_log,
-        pbs_level, grouping_factor, num_radix_blocks, pbs_type, num_many_lut,
-        lut_stride);
+        lwe_trivial_indexes_vec[0], bsks, lut->buffer, glwe_dimension,
+        small_lwe_dimension, polynomial_size, pbs_base_log, pbs_level,
+        grouping_factor, num_radix_blocks, pbs_type, num_many_lut, lut_stride);
  } else {
    /// Make sure all data that should be on GPU 0 is indeed there
    cuda_event_record(lut->event_scatter_in, streams.stream(0),
@@ -590,10 +587,9 @@ __host__ void integer_radix_apply_univariate_lookup_table_kb(
    execute_pbs_async<Torus, Torus>(
        active_streams, lwe_after_pbs_vec, lwe_trivial_indexes_vec,
        lut->lut_vec, lut->lut_indexes_vec, lwe_after_ks_vec,
-        lwe_trivial_indexes_vec, bsks, ms_noise_reduction_key, lut->buffer,
-        glwe_dimension, small_lwe_dimension, polynomial_size, pbs_base_log,
-        pbs_level, grouping_factor, num_radix_blocks, pbs_type, num_many_lut,
-        lut_stride);
+        lwe_trivial_indexes_vec, bsks, lut->buffer, glwe_dimension,
+        small_lwe_dimension, polynomial_size, pbs_base_log, pbs_level,
+        grouping_factor, num_radix_blocks, pbs_type, num_many_lut, lut_stride);

    /// Copy data back to GPU 0 and release vecs
    PUSH_RANGE("gather")
@@ -627,9 +623,8 @@ template <typename Torus>
 __host__ void integer_radix_apply_many_univariate_lookup_table_kb(
    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_in, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    int_radix_lut<Torus> *lut, uint32_t num_many_lut, uint32_t lut_stride) {
+    Torus *const *ksks, int_radix_lut<Torus> *lut, uint32_t num_many_lut,
+    uint32_t lut_stride) {
  PUSH_RANGE("apply many lut")
  // apply_lookup_table
  auto params = lut->params;
@@ -674,10 +669,9 @@ __host__ void integer_radix_apply_many_univariate_lookup_table_kb(
    execute_pbs_async<Torus, Torus>(
        streams.get_ith(0), (Torus *)lwe_array_out->ptr, lut->lwe_indexes_out,
        lut->lut_vec, lut->lut_indexes_vec, lwe_after_ks_vec[0],
-        lwe_trivial_indexes_vec[0], bsks, ms_noise_reduction_key, lut->buffer,
-        glwe_dimension, small_lwe_dimension, polynomial_size, pbs_base_log,
-        pbs_level, grouping_factor, num_radix_blocks, pbs_type, num_many_lut,
-        lut_stride);
+        lwe_trivial_indexes_vec[0], bsks, lut->buffer, glwe_dimension,
+        small_lwe_dimension, polynomial_size, pbs_base_log, pbs_level,
+        grouping_factor, num_radix_blocks, pbs_type, num_many_lut, lut_stride);
  } else {
    /// Make sure all data that should be on GPU 0 is indeed there
    cuda_event_record(lut->event_scatter_in, streams.stream(0),
@@ -706,10 +700,9 @@ __host__ void integer_radix_apply_many_univariate_lookup_table_kb(
    execute_pbs_async<Torus, Torus>(
        active_streams, lwe_after_pbs_vec, lwe_trivial_indexes_vec,
        lut->lut_vec, lut->lut_indexes_vec, lwe_after_ks_vec,
-        lwe_trivial_indexes_vec, bsks, ms_noise_reduction_key, lut->buffer,
-        glwe_dimension, small_lwe_dimension, polynomial_size, pbs_base_log,
-        pbs_level, grouping_factor, num_radix_blocks, pbs_type, num_many_lut,
-        lut_stride);
+        lwe_trivial_indexes_vec, bsks, lut->buffer, glwe_dimension,
+        small_lwe_dimension, polynomial_size, pbs_base_log, pbs_level,
+        grouping_factor, num_radix_blocks, pbs_type, num_many_lut, lut_stride);

    /// Copy data back to GPU 0 and release vecs
    PUSH_RANGE("gather")
@@ -745,9 +738,8 @@ __host__ void integer_radix_apply_bivariate_lookup_table_kb(
    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_1,
    CudaRadixCiphertextFFI const *lwe_array_2, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    int_radix_lut<Torus> *lut, uint32_t num_radix_blocks, uint32_t shift) {
+    Torus *const *ksks, int_radix_lut<Torus> *lut, uint32_t num_radix_blocks,
+    uint32_t shift) {
  PUSH_RANGE("apply bivar lut")
  if (lwe_array_out->lwe_dimension != lwe_array_1->lwe_dimension ||
      lwe_array_out->lwe_dimension != lwe_array_2->lwe_dimension)
@@ -806,10 +798,9 @@ __host__ void integer_radix_apply_bivariate_lookup_table_kb(
    execute_pbs_async<Torus, Torus>(
        streams.get_ith(0), (Torus *)(lwe_array_out->ptr), lut->lwe_indexes_out,
        lut->lut_vec, lut->lut_indexes_vec, lwe_after_ks_vec[0],
-        lwe_trivial_indexes_vec[0], bsks, ms_noise_reduction_key, lut->buffer,
-        glwe_dimension, small_lwe_dimension, polynomial_size, pbs_base_log,
-        pbs_level, grouping_factor, num_radix_blocks, pbs_type, num_many_lut,
-        lut_stride);
+        lwe_trivial_indexes_vec[0], bsks, lut->buffer, glwe_dimension,
+        small_lwe_dimension, polynomial_size, pbs_base_log, pbs_level,
+        grouping_factor, num_radix_blocks, pbs_type, num_many_lut, lut_stride);
  } else {
    cuda_event_record(lut->event_scatter_in, streams.stream(0),
                      streams.gpu_index(0));
@@ -835,10 +826,9 @@ __host__ void integer_radix_apply_bivariate_lookup_table_kb(
    execute_pbs_async<Torus, Torus>(
        active_streams, lwe_after_pbs_vec, lwe_trivial_indexes_vec,
        lut->lut_vec, lut->lut_indexes_vec, lwe_after_ks_vec,
-        lwe_trivial_indexes_vec, bsks, ms_noise_reduction_key, lut->buffer,
-        glwe_dimension, small_lwe_dimension, polynomial_size, pbs_base_log,
-        pbs_level, grouping_factor, num_radix_blocks, pbs_type, num_many_lut,
-        lut_stride);
+        lwe_trivial_indexes_vec, bsks, lut->buffer, glwe_dimension,
+        small_lwe_dimension, polynomial_size, pbs_base_log, pbs_level,
+        grouping_factor, num_radix_blocks, pbs_type, num_many_lut, lut_stride);

    /// Copy data back to GPU 0 and release vecs
    PUSH_RANGE("gather")
@@ -1317,9 +1307,7 @@ template <typename Torus>
 void host_compute_shifted_blocks_and_states(
    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array,
    int_shifted_blocks_and_states_memory<Torus> *mem, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t lut_stride, uint32_t num_many_lut) {
+    Torus *const *ksks, uint32_t lut_stride, uint32_t num_many_lut) {

  auto num_radix_blocks = lwe_array->num_radix_blocks;

@@ -1328,7 +1316,7 @@ void host_compute_shifted_blocks_and_states(

  integer_radix_apply_many_univariate_lookup_table_kb<Torus>(
      streams, shifted_blocks_and_states, lwe_array, bsks, ksks,
-      ms_noise_reduction_key, luts_array_first_step, num_many_lut, lut_stride);
+      luts_array_first_step, num_many_lut, lut_stride);

  auto shifted_blocks = mem->shifted_blocks;
  auto block_states = mem->block_states;
@@ -1347,9 +1335,7 @@ void host_resolve_group_carries_sequentially(
    CudaStreams streams, CudaRadixCiphertextFFI *resolved_carries,
    CudaRadixCiphertextFFI *grouping_pgns, int_radix_params params,
    int_seq_group_prop_memory<Torus> *mem, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_groups) {
+    Torus *const *ksks, uint32_t num_groups) {

  auto group_resolved_carries = mem->group_resolved_carries;
  if (num_groups > 1) {
@@ -1398,8 +1384,8 @@ void host_resolve_group_carries_sequentially(
                                       blocks_to_solve + 1);
      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, &shifted_group_resolved_carries,
-          &shifted_group_resolved_carries, bsks, ksks, ms_noise_reduction_key,
-          luts_sequential, blocks_to_solve);
+          &shifted_group_resolved_carries, bsks, ksks, luts_sequential,
+          blocks_to_solve);

      // Copy the result to the resolved carries array
      copy_radix_ciphertext_slice_async<Torus>(
@@ -1416,9 +1402,7 @@ template <typename Torus>
 void host_compute_prefix_sum_hillis_steele(
    CudaStreams streams, CudaRadixCiphertextFFI *step_output,
    CudaRadixCiphertextFFI *generates_or_propagates, int_radix_lut<Torus> *luts,
-    void *const *bsks, Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks) {
+    void *const *bsks, Torus *const *ksks, uint32_t num_radix_blocks) {

  if (step_output->lwe_dimension != generates_or_propagates->lwe_dimension)
    PANIC("Cuda error: input lwe dimensions must be the same")
@@ -1440,9 +1424,8 @@ void host_compute_prefix_sum_hillis_steele(
    int cur_total_blocks = num_radix_blocks - space;

    integer_radix_apply_bivariate_lookup_table_kb<Torus>(
-        streams, &cur_blocks, &cur_blocks, prev_blocks, bsks, ksks,
-        ms_noise_reduction_key, luts, cur_total_blocks,
-        luts->params.message_modulus);
+        streams, &cur_blocks, &cur_blocks, prev_blocks, bsks, ksks, luts,
+        cur_total_blocks, luts->params.message_modulus);

    copy_radix_ciphertext_slice_async<Torus>(
        streams.stream(0), streams.gpu_index(0), generates_or_propagates, space,
@@ -1462,9 +1445,8 @@ template <typename Torus>
 void host_compute_propagation_simulators_and_group_carries(
    CudaStreams streams, CudaRadixCiphertextFFI *block_states,
    int_radix_params params, int_prop_simu_group_carries_memory<Torus> *mem,
-    void *const *bsks, Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks, uint32_t num_groups) {
+    void *const *bsks, Torus *const *ksks, uint32_t num_radix_blocks,
+    uint32_t num_groups) {

  if (num_radix_blocks > block_states->num_radix_blocks)
    PANIC("Cuda error: input does not have enough radix blocks")
@@ -1481,7 +1463,7 @@ void host_compute_propagation_simulators_and_group_carries(
  auto luts_array_second_step = mem->luts_array_second_step;
  integer_radix_apply_univariate_lookup_table_kb<Torus>(
      streams, propagation_cum_sums, propagation_cum_sums, bsks, ksks,
-      ms_noise_reduction_key, luts_array_second_step, num_radix_blocks);
+      luts_array_second_step, num_radix_blocks);

  host_integer_radix_scalar_addition_inplace<Torus>(
      streams, propagation_cum_sums, mem->scalar_array_cum_sum,
@@ -1500,10 +1482,9 @@ void host_compute_propagation_simulators_and_group_carries(
  auto resolved_carries = mem->resolved_carries;
  if (mem->use_sequential_algorithm_to_resolve_group_carries) {
    // Resolve group carries sequentially
-    host_resolve_group_carries_sequentially(streams, resolved_carries,
-                                            grouping_pgns, params,
-                                            mem->seq_group_prop_mem, bsks, ksks,
-                                            ms_noise_reduction_key, num_groups);
+    host_resolve_group_carries_sequentially(
+        streams, resolved_carries, grouping_pgns, params,
+        mem->seq_group_prop_mem, bsks, ksks, num_groups);
  } else {
    // Resolve group carries with hillis steele
    auto luts_carry_propagation_sum = mem->hs_group_prop_mem->lut_hillis_steele;
@@ -1512,8 +1493,7 @@ void host_compute_propagation_simulators_and_group_carries(
                                     resolved_carries, 1, num_groups);
    host_compute_prefix_sum_hillis_steele<Torus>(
        streams, &shifted_resolved_carries, grouping_pgns,
-        luts_carry_propagation_sum, bsks, ksks, ms_noise_reduction_key,
-        num_groups - 1);
+        luts_carry_propagation_sum, bsks, ksks, num_groups - 1);
  }
 }

@@ -1527,9 +1507,7 @@ template <typename Torus>
 void host_compute_shifted_blocks_and_borrow_states(
    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array,
    int_shifted_blocks_and_borrow_states_memory<Torus> *mem, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t lut_stride, uint32_t num_many_lut) {
+    Torus *const *ksks, uint32_t lut_stride, uint32_t num_many_lut) {
  auto num_radix_blocks = lwe_array->num_radix_blocks;

  auto shifted_blocks_and_borrow_states = mem->shifted_blocks_and_borrow_states;
@@ -1537,7 +1515,7 @@ void host_compute_shifted_blocks_and_borrow_states(

  integer_radix_apply_many_univariate_lookup_table_kb<Torus>(
      streams, shifted_blocks_and_borrow_states, lwe_array, bsks, ksks,
-      ms_noise_reduction_key, luts_array_first_step, num_many_lut, lut_stride);
+      luts_array_first_step, num_many_lut, lut_stride);

  auto shifted_blocks = mem->shifted_blocks;
  auto borrow_states = mem->borrow_states;
@@ -1559,11 +1537,11 @@ void host_compute_shifted_blocks_and_borrow_states(
 * have size = 2 * (glwe_dimension * polynomial_size + 1) * sizeof(Torus)
 */
 template <typename Torus>
-void host_full_propagate_inplace(
-    CudaStreams streams, CudaRadixCiphertextFFI *input_blocks,
-    int_fullprop_buffer<Torus> *mem_ptr, Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void *const *bsks, uint32_t num_blocks) {
+void host_full_propagate_inplace(CudaStreams streams,
+                                 CudaRadixCiphertextFFI *input_blocks,
+                                 int_fullprop_buffer<Torus> *mem_ptr,
+                                 Torus *const *ksks, void *const *bsks,
+                                 uint32_t num_blocks) {
  auto params = mem_ptr->lut->params;

  // In the case of extracting a single LWE this parameters are dummy
@@ -1589,8 +1567,8 @@ void host_full_propagate_inplace(
        mem_ptr->lut->lwe_trivial_indexes, mem_ptr->lut->lut_vec,
        mem_ptr->lut->lut_indexes_vec,
        (Torus *)mem_ptr->tmp_small_lwe_vector->ptr,
-        mem_ptr->lut->lwe_trivial_indexes, bsks, ms_noise_reduction_key,
-        mem_ptr->lut->buffer, params.glwe_dimension, params.small_lwe_dimension,
+        mem_ptr->lut->lwe_trivial_indexes, bsks, mem_ptr->lut->buffer,
+        params.glwe_dimension, params.small_lwe_dimension,
        params.polynomial_size, params.pbs_base_log, params.pbs_level,
        params.grouping_factor, 2, params.pbs_type, num_many_lut, lut_stride);

@@ -1721,13 +1699,12 @@ __host__ void scalar_pack_blocks(cudaStream_t stream, uint32_t gpu_index,
 * * (lwe_dimension+1) * sizeeof(Torus) bytes
 */
 template <typename Torus>
-__host__ void extract_n_bits(
-    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
-    const CudaRadixCiphertextFFI *lwe_array_in, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t effective_num_radix_blocks, uint32_t num_radix_blocks,
-    int_bit_extract_luts_buffer<Torus> *bit_extract) {
+__host__ void
+extract_n_bits(CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
+               const CudaRadixCiphertextFFI *lwe_array_in, void *const *bsks,
+               Torus *const *ksks, uint32_t effective_num_radix_blocks,
+               uint32_t num_radix_blocks,
+               int_bit_extract_luts_buffer<Torus> *bit_extract) {

  copy_radix_ciphertext_slice_async<Torus>(
      streams.stream(0), streams.gpu_index(0), lwe_array_out, 0,
@@ -1741,19 +1718,17 @@ __host__ void extract_n_bits(
    }
  }
  integer_radix_apply_univariate_lookup_table_kb<Torus>(
-      streams, lwe_array_out, lwe_array_out, bsks, ksks, ms_noise_reduction_key,
-      bit_extract->lut, effective_num_radix_blocks);
+      streams, lwe_array_out, lwe_array_out, bsks, ksks, bit_extract->lut,
+      effective_num_radix_blocks);
 }

 template <typename Torus>
-__host__ void reduce_signs(
-    CudaStreams streams, CudaRadixCiphertextFFI *signs_array_out,
-    CudaRadixCiphertextFFI *signs_array_in,
-    int_comparison_buffer<Torus> *mem_ptr,
-    std::function<Torus(Torus)> sign_handler_f, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_sign_blocks) {
+__host__ void
+reduce_signs(CudaStreams streams, CudaRadixCiphertextFFI *signs_array_out,
+             CudaRadixCiphertextFFI *signs_array_in,
+             int_comparison_buffer<Torus> *mem_ptr,
+             std::function<Torus(Torus)> sign_handler_f, void *const *bsks,
+             Torus *const *ksks, uint32_t num_sign_blocks) {

  if (signs_array_out->lwe_dimension != signs_array_in->lwe_dimension)
    PANIC("Cuda error: input lwe dimensions must be the same")
@@ -1799,8 +1774,7 @@ __host__ void reduce_signs(
      pack_blocks<Torus>(streams.stream(0), streams.gpu_index(0), signs_b,
                         signs_a, num_sign_blocks, message_modulus);
      integer_radix_apply_univariate_lookup_table_kb<Torus>(
-          streams, signs_a, signs_b, bsks, ksks, ms_noise_reduction_key, lut,
-          num_sign_blocks / 2);
+          streams, signs_a, signs_b, bsks, ksks, lut, num_sign_blocks / 2);

      if (num_sign_blocks % 2 == 1)
        copy_radix_ciphertext_slice_async<Torus>(
@@ -1830,8 +1804,7 @@ __host__ void reduce_signs(
    pack_blocks<Torus>(streams.stream(0), streams.gpu_index(0), signs_b,
                       signs_a, num_sign_blocks, message_modulus);
    integer_radix_apply_univariate_lookup_table_kb<Torus>(
-        streams, signs_array_out, signs_b, bsks, ksks, ms_noise_reduction_key,
-        lut, 1);
+        streams, signs_array_out, signs_b, bsks, ksks, lut, 1);

  } else {

@@ -1849,8 +1822,7 @@ __host__ void reduce_signs(
    lut->broadcast_lut(lut->active_streams);

    integer_radix_apply_univariate_lookup_table_kb<Torus>(
-        streams, signs_array_out, signs_a, bsks, ksks, ms_noise_reduction_key,
-        lut, 1);
+        streams, signs_array_out, signs_a, bsks, ksks, lut, 1);
  }
 }

@@ -1877,16 +1849,15 @@ uint64_t scratch_cuda_apply_univariate_lut_kb(
 }

 template <typename Torus>
-void host_apply_univariate_lut_kb(
-    CudaStreams streams, CudaRadixCiphertextFFI *radix_lwe_out,
-    CudaRadixCiphertextFFI const *radix_lwe_in, int_radix_lut<Torus> *mem,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void *const *bsks) {
+void host_apply_univariate_lut_kb(CudaStreams streams,
+                                  CudaRadixCiphertextFFI *radix_lwe_out,
+                                  CudaRadixCiphertextFFI const *radix_lwe_in,
+                                  int_radix_lut<Torus> *mem, Torus *const *ksks,
+                                  void *const *bsks) {

  integer_radix_apply_univariate_lookup_table_kb<Torus>(
-      streams, radix_lwe_out, radix_lwe_in, bsks, ksks, ms_noise_reduction_key,
-      mem, radix_lwe_out->num_radix_blocks);
+      streams, radix_lwe_out, radix_lwe_in, bsks, ksks, mem,
+      radix_lwe_out->num_radix_blocks);
 }

 template <typename Torus>
@@ -1916,13 +1887,12 @@ template <typename Torus>
 void host_apply_many_univariate_lut_kb(
    CudaStreams streams, CudaRadixCiphertextFFI *radix_lwe_out,
    CudaRadixCiphertextFFI const *radix_lwe_in, int_radix_lut<Torus> *mem,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void *const *bsks, uint32_t num_many_lut, uint32_t lut_stride) {
+    Torus *const *ksks, void *const *bsks, uint32_t num_many_lut,
+    uint32_t lut_stride) {

  integer_radix_apply_many_univariate_lookup_table_kb<Torus>(
-      streams, radix_lwe_out, radix_lwe_in, bsks, ksks, ms_noise_reduction_key,
-      mem, num_many_lut, lut_stride);
+      streams, radix_lwe_out, radix_lwe_in, bsks, ksks, mem, num_many_lut,
+      lut_stride);
 }

 template <typename Torus>
@@ -1948,17 +1918,17 @@ uint64_t scratch_cuda_apply_bivariate_lut_kb(
 }

 template <typename Torus>
-void host_apply_bivariate_lut_kb(
-    CudaStreams streams, CudaRadixCiphertextFFI *radix_lwe_out,
-    CudaRadixCiphertextFFI const *radix_lwe_in_1,
-    CudaRadixCiphertextFFI const *radix_lwe_in_2, int_radix_lut<Torus> *mem,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void *const *bsks, uint32_t num_radix_blocks, uint32_t shift) {
+void host_apply_bivariate_lut_kb(CudaStreams streams,
+                                 CudaRadixCiphertextFFI *radix_lwe_out,
+                                 CudaRadixCiphertextFFI const *radix_lwe_in_1,
+                                 CudaRadixCiphertextFFI const *radix_lwe_in_2,
+                                 int_radix_lut<Torus> *mem, Torus *const *ksks,
+                                 void *const *bsks, uint32_t num_radix_blocks,
+                                 uint32_t shift) {

  integer_radix_apply_bivariate_lookup_table_kb<Torus>(
-      streams, radix_lwe_out, radix_lwe_in_1, radix_lwe_in_2, bsks, ksks,
-      ms_noise_reduction_key, mem, num_radix_blocks, shift);
+      streams, radix_lwe_out, radix_lwe_in_1, radix_lwe_in_2, bsks, ksks, mem,
+      num_radix_blocks, shift);
 }

 template <typename Torus>
@@ -1977,13 +1947,13 @@ uint64_t scratch_cuda_propagate_single_carry_kb_inplace(
 // This function perform the three steps of Thomas' new carry propagation
 // includes the logic to extract overflow when requested
 template <typename Torus>
-void host_propagate_single_carry(
-    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array,
-    CudaRadixCiphertextFFI *carry_out,
-    const CudaRadixCiphertextFFI *input_carries, int_sc_prop_memory<Torus> *mem,
-    void *const *bsks, Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t requested_flag, uint32_t uses_carry) {
+void host_propagate_single_carry(CudaStreams streams,
+                                 CudaRadixCiphertextFFI *lwe_array,
+                                 CudaRadixCiphertextFFI *carry_out,
+                                 const CudaRadixCiphertextFFI *input_carries,
+                                 int_sc_prop_memory<Torus> *mem,
+                                 void *const *bsks, Torus *const *ksks,
+                                 uint32_t requested_flag, uint32_t uses_carry) {
  PUSH_RANGE("propagate sc")
  auto num_radix_blocks = lwe_array->num_radix_blocks;
  auto params = mem->params;
@@ -2006,8 +1976,8 @@ void host_propagate_single_carry(

  // Step 1
  host_compute_shifted_blocks_and_states<Torus>(
-      streams, lwe_array, mem->shifted_blocks_state_mem, bsks, ksks,
-      ms_noise_reduction_key, lut_stride, num_many_lut);
+      streams, lwe_array, mem->shifted_blocks_state_mem, bsks, ksks, lut_stride,
+      num_many_lut);
  auto block_states = mem->shifted_blocks_state_mem->block_states;

  if (requested_flag == outputFlag::FLAG_CARRY) {
@@ -2018,7 +1988,7 @@ void host_propagate_single_carry(
  // Step 2
  host_compute_propagation_simulators_and_group_carries<Torus>(
      streams, block_states, params, mem->prop_simu_group_carries_mem, bsks,
-      ksks, ms_noise_reduction_key, num_radix_blocks, mem->num_groups);
+      ksks, num_radix_blocks, mem->num_groups);

  auto group_size = mem->prop_simu_group_carries_mem->group_size;

@@ -2059,7 +2029,7 @@ void host_propagate_single_carry(
        num_radix_blocks, num_radix_blocks + 1, &output_flag, 0, 1);
    integer_radix_apply_univariate_lookup_table_kb<Torus>(
        streams, mem->output_flag, prepared_blocks, bsks, ksks,
-        ms_noise_reduction_key, mem->lut_message_extract, num_radix_blocks + 1);
+        mem->lut_message_extract, num_radix_blocks + 1);

    copy_radix_ciphertext_slice_async<Torus>(
        streams.stream(0), streams.gpu_index(0), lwe_array, 0, num_radix_blocks,
@@ -2070,8 +2040,8 @@ void host_propagate_single_carry(
  } else {
    auto message_extract = mem->lut_message_extract;
    integer_radix_apply_univariate_lookup_table_kb<Torus>(
-        streams, lwe_array, prepared_blocks, bsks, ksks, ms_noise_reduction_key,
-        message_extract, num_radix_blocks);
+        streams, lwe_array, prepared_blocks, bsks, ksks, message_extract,
+        num_radix_blocks);
  }
  POP_RANGE()
 }
@@ -2083,9 +2053,8 @@ void host_add_and_propagate_single_carry(
    CudaStreams streams, CudaRadixCiphertextFFI *lhs_array,
    const CudaRadixCiphertextFFI *rhs_array, CudaRadixCiphertextFFI *carry_out,
    const CudaRadixCiphertextFFI *input_carries, int_sc_prop_memory<Torus> *mem,
-    void *const *bsks, Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t requested_flag, uint32_t uses_carry) {
+    void *const *bsks, Torus *const *ksks, uint32_t requested_flag,
+    uint32_t uses_carry) {
  PUSH_RANGE("add & propagate sc")
  if (lhs_array->num_radix_blocks != rhs_array->num_radix_blocks)
    PANIC("Cuda error: input and output num radix blocks must be the same")
@@ -2138,15 +2107,14 @@ void host_add_and_propagate_single_carry(
  }
  // Step 1
  host_compute_shifted_blocks_and_states<Torus>(
-      streams, lhs_array, mem->shifted_blocks_state_mem, bsks, ksks,
-      ms_noise_reduction_key, lut_stride, num_many_lut);
+      streams, lhs_array, mem->shifted_blocks_state_mem, bsks, ksks, lut_stride,
+      num_many_lut);
  auto block_states = mem->shifted_blocks_state_mem->block_states;
  if (requested_flag == outputFlag::FLAG_OVERFLOW) {
    auto lut_overflow_prep = mem->lut_overflow_flag_prep;
    integer_radix_apply_bivariate_lookup_table_kb<Torus>(
        streams, &output_flag, mem->last_lhs, mem->last_rhs, bsks, ksks,
-        ms_noise_reduction_key, lut_overflow_prep, 1,
-        lut_overflow_prep->params.message_modulus);
+        lut_overflow_prep, 1, lut_overflow_prep->params.message_modulus);
  } else if (requested_flag == outputFlag::FLAG_CARRY) {
    copy_radix_ciphertext_slice_async<Torus>(
        streams.stream(0), streams.gpu_index(0), &output_flag, 0, 1,
@@ -2156,7 +2124,7 @@ void host_add_and_propagate_single_carry(
  // Step 2
  host_compute_propagation_simulators_and_group_carries<Torus>(
      streams, block_states, params, mem->prop_simu_group_carries_mem, bsks,
-      ksks, ms_noise_reduction_key, num_radix_blocks, mem->num_groups);
+      ksks, num_radix_blocks, mem->num_groups);

  auto group_size = mem->prop_simu_group_carries_mem->group_size;

@@ -2209,7 +2177,7 @@ void host_add_and_propagate_single_carry(
        num_radix_blocks, num_radix_blocks + 1, &output_flag, 0, 1);
    integer_radix_apply_univariate_lookup_table_kb<Torus>(
        streams, mem->output_flag, prepared_blocks, bsks, ksks,
-        ms_noise_reduction_key, mem->lut_message_extract, num_radix_blocks + 1);
+        mem->lut_message_extract, num_radix_blocks + 1);

    copy_radix_ciphertext_slice_async<Torus>(
        streams.stream(0), streams.gpu_index(0), lhs_array, 0, num_radix_blocks,
@@ -2220,7 +2188,7 @@ void host_add_and_propagate_single_carry(
        mem->output_flag, num_radix_blocks, num_radix_blocks + 1);
  } else {
    integer_radix_apply_univariate_lookup_table_kb<Torus>(
-        streams, lhs_array, prepared_blocks, bsks, ksks, ms_noise_reduction_key,
+        streams, lhs_array, prepared_blocks, bsks, ksks,
        mem->lut_message_extract, num_radix_blocks);
  }
  POP_RANGE()
@@ -2243,14 +2211,15 @@ uint64_t scratch_cuda_integer_overflowing_sub(
 // This function perform the three steps of Thomas' new borrow propagation
 // includes the logic to extract overflow when requested
 template <typename Torus>
-void host_single_borrow_propagate(
-    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array,
-    CudaRadixCiphertextFFI *overflow_block,
-    const CudaRadixCiphertextFFI *input_borrow,
-    int_borrow_prop_memory<Torus> *mem, void *const *bsks, Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_groups, uint32_t compute_overflow,
-    uint32_t uses_input_borrow) {
+void host_single_borrow_propagate(CudaStreams streams,
+                                  CudaRadixCiphertextFFI *lwe_array,
+                                  CudaRadixCiphertextFFI *overflow_block,
+                                  const CudaRadixCiphertextFFI *input_borrow,
+                                  int_borrow_prop_memory<Torus> *mem,
+                                  void *const *bsks, Torus *const *ksks,
+                                  uint32_t num_groups,
+                                  uint32_t compute_overflow,
+                                  uint32_t uses_input_borrow) {
  auto num_radix_blocks = lwe_array->num_radix_blocks;
  auto params = mem->params;
  auto glwe_dimension = params.glwe_dimension;
@@ -2272,7 +2241,7 @@ void host_single_borrow_propagate(
  // Step 1
  host_compute_shifted_blocks_and_borrow_states<Torus>(
      streams, lwe_array, mem->shifted_blocks_borrow_state_mem, bsks, ksks,
-      ms_noise_reduction_key, lut_stride, num_many_lut);
+      lut_stride, num_many_lut);

  auto borrow_states = mem->shifted_blocks_borrow_state_mem->borrow_states;
  copy_radix_ciphertext_slice_async<Torus>(
@@ -2282,7 +2251,7 @@ void host_single_borrow_propagate(
  // Step 2
  host_compute_propagation_simulators_and_group_carries<Torus>(
      streams, borrow_states, params, mem->prop_simu_group_carries_mem, bsks,
-      ksks, ms_noise_reduction_key, num_radix_blocks, num_groups);
+      ksks, num_radix_blocks, num_groups);

  auto shifted_blocks =
      (Torus *)mem->shifted_blocks_borrow_state_mem->shifted_blocks->ptr;
@@ -2336,7 +2305,7 @@ void host_single_borrow_propagate(
    auto borrow_flag = mem->lut_borrow_flag;
    integer_radix_apply_univariate_lookup_table_kb<Torus>(
        mem->sub_streams_1, overflow_block, mem->overflow_block, bsks, ksks,
-        ms_noise_reduction_key, borrow_flag, 1);
+        borrow_flag, 1);
  }
  for (int j = 0; j < mem->active_streams.count(); j++) {
    cuda_event_record(mem->outgoing_events1[j], mem->sub_streams_1.stream(j),
@@ -2358,7 +2327,7 @@ void host_single_borrow_propagate(
  auto message_extract = mem->lut_message_extract;
  integer_radix_apply_univariate_lookup_table_kb<Torus>(
      mem->sub_streams_2, lwe_array, prepared_blocks, bsks, ksks,
-      ms_noise_reduction_key, message_extract, num_radix_blocks);
+      message_extract, num_radix_blocks);

  for (int j = 0; j < mem->active_streams.count(); j++) {
    cuda_event_record(mem->outgoing_events2[j], mem->sub_streams_2.stream(j),
@@ -2378,8 +2347,7 @@ __host__ void integer_radix_apply_noise_squashing_kb(
    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_in,
    int_noise_squashing_lut<InputTorus> *lut, void *const *bsks,
-    InputTorus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    InputTorus *const *ksks) {

  PUSH_RANGE("apply noise squashing")
  auto params = lut->params;
@@ -2431,11 +2399,10 @@ __host__ void integer_radix_apply_noise_squashing_kb(
    execute_pbs_async<uint64_t, __uint128_t>(
        streams.get_ith(0), (__uint128_t *)lwe_array_out->ptr,
        lwe_trivial_indexes_vec[0], lut->lut_vec, lwe_trivial_indexes_vec,
-        lwe_after_ks_vec[0], lwe_trivial_indexes_vec[0], bsks,
-        ms_noise_reduction_key, lut->pbs_buffer, glwe_dimension,
-        small_lwe_dimension, polynomial_size, pbs_base_log, pbs_level,
-        grouping_factor, lwe_array_out->num_radix_blocks, params.pbs_type, 0,
-        0);
+        lwe_after_ks_vec[0], lwe_trivial_indexes_vec[0], bsks, lut->pbs_buffer,
+        glwe_dimension, small_lwe_dimension, polynomial_size, pbs_base_log,
+        pbs_level, grouping_factor, lwe_array_out->num_radix_blocks,
+        params.pbs_type, 0, 0);
  } else {
    /// Make sure all data that should be on GPU 0 is indeed there
    cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
@@ -2459,10 +2426,10 @@ __host__ void integer_radix_apply_noise_squashing_kb(
    execute_pbs_async<uint64_t, __uint128_t>(
        active_streams, lwe_after_pbs_vec, lwe_trivial_indexes_vec,
        lut->lut_vec, lwe_trivial_indexes_vec, lwe_after_ks_vec,
-        lwe_trivial_indexes_vec, bsks, ms_noise_reduction_key, lut->pbs_buffer,
-        glwe_dimension, small_lwe_dimension, polynomial_size, pbs_base_log,
-        pbs_level, grouping_factor, lwe_array_out->num_radix_blocks,
-        params.pbs_type, 0, 0);
+        lwe_trivial_indexes_vec, bsks, lut->pbs_buffer, glwe_dimension,
+        small_lwe_dimension, polynomial_size, pbs_base_log, pbs_level,
+        grouping_factor, lwe_array_out->num_radix_blocks, params.pbs_type, 0,
+        0);

    /// Copy data back to GPU 0 and release vecs
    /// In apply noise squashing we always use trivial indexes
--- a/backends/tfhe-cuda-backend/cuda/src/integer/multiplication.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/multiplication.cu
@@ -128,59 +128,51 @@ void cuda_integer_mult_radix_ciphertext_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *radix_lwe_out,
    CudaRadixCiphertextFFI const *radix_lwe_left, bool const is_bool_left,
    CudaRadixCiphertextFFI const *radix_lwe_right, bool const is_bool_right,
-    void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    int8_t *mem_ptr, uint32_t polynomial_size, uint32_t num_blocks) {
+    void *const *bsks, void *const *ksks, int8_t *mem_ptr,
+    uint32_t polynomial_size, uint32_t num_blocks) {
  PUSH_RANGE("mul")
  switch (polynomial_size) {
  case 256:
    host_integer_mult_radix_kb<uint64_t, AmortizedDegree<256>>(
        CudaStreams(streams), radix_lwe_out, radix_lwe_left, is_bool_left,
        radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
-        ms_noise_reduction_key, (int_mul_memory<uint64_t> *)mem_ptr,
-        num_blocks);
+        (int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
    break;
  case 512:
    host_integer_mult_radix_kb<uint64_t, AmortizedDegree<512>>(
        CudaStreams(streams), radix_lwe_out, radix_lwe_left, is_bool_left,
        radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
-        ms_noise_reduction_key, (int_mul_memory<uint64_t> *)mem_ptr,
-        num_blocks);
+        (int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
    break;
  case 1024:
    host_integer_mult_radix_kb<uint64_t, AmortizedDegree<1024>>(
        CudaStreams(streams), radix_lwe_out, radix_lwe_left, is_bool_left,
        radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
-        ms_noise_reduction_key, (int_mul_memory<uint64_t> *)mem_ptr,
-        num_blocks);
+        (int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
    break;
  case 2048:
    host_integer_mult_radix_kb<uint64_t, AmortizedDegree<2048>>(
        CudaStreams(streams), radix_lwe_out, radix_lwe_left, is_bool_left,
        radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
-        ms_noise_reduction_key, (int_mul_memory<uint64_t> *)mem_ptr,
-        num_blocks);
+        (int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
    break;
  case 4096:
    host_integer_mult_radix_kb<uint64_t, AmortizedDegree<4096>>(
        CudaStreams(streams), radix_lwe_out, radix_lwe_left, is_bool_left,
        radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
-        ms_noise_reduction_key, (int_mul_memory<uint64_t> *)mem_ptr,
-        num_blocks);
+        (int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
    break;
  case 8192:
    host_integer_mult_radix_kb<uint64_t, AmortizedDegree<8192>>(
        CudaStreams(streams), radix_lwe_out, radix_lwe_left, is_bool_left,
        radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
-        ms_noise_reduction_key, (int_mul_memory<uint64_t> *)mem_ptr,
-        num_blocks);
+        (int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
    break;
  case 16384:
    host_integer_mult_radix_kb<uint64_t, AmortizedDegree<16384>>(
        CudaStreams(streams), radix_lwe_out, radix_lwe_left, is_bool_left,
        radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
-        ms_noise_reduction_key, (int_mul_memory<uint64_t> *)mem_ptr,
-        num_blocks);
+        (int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
    break;
  default:
    PANIC("Cuda error (integer multiplication): unsupported polynomial size. "
@@ -225,8 +217,7 @@ uint64_t scratch_cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
 void cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *radix_lwe_out,
    CudaRadixCiphertextFFI *radix_lwe_vec, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    void *const *ksks) {

  auto mem = (int_sum_ciphertexts_vec_memory<uint64_t> *)mem_ptr;
  if (radix_lwe_vec->num_radix_blocks % radix_lwe_out->num_radix_blocks != 0)
@@ -234,8 +225,7 @@ void cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
          "output's number of radix blocks")
  host_integer_partial_sum_ciphertexts_vec_kb<uint64_t>(
      CudaStreams(streams), radix_lwe_out, radix_lwe_vec, bsks,
-      (uint64_t **)(ksks), ms_noise_reduction_key, mem,
-      radix_lwe_out->num_radix_blocks,
+      (uint64_t **)(ksks), mem, radix_lwe_out->num_radix_blocks,
      radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks);
 }

--- a/backends/tfhe-cuda-backend/cuda/src/integer/multiplication.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/multiplication.cuh
@@ -291,7 +291,6 @@ template <typename Torus>
 __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
    CudaStreams streams, CudaRadixCiphertextFFI *radix_lwe_out,
    CudaRadixCiphertextFFI *terms, void *const *bsks, uint64_t *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
    int_sum_ciphertexts_vec_memory<uint64_t> *mem_ptr,
    uint32_t num_radix_blocks, uint32_t num_radix_in_vec) {
  auto big_lwe_dimension = mem_ptr->params.big_lwe_dimension;
@@ -407,8 +406,8 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
          streams.get_ith(0), (Torus *)current_blocks->ptr, d_pbs_indexes_out,
          luts_message_carry->lut_vec, luts_message_carry->lut_indexes_vec,
          (Torus *)small_lwe_vector->ptr, d_pbs_indexes_in, bsks,
-          ms_noise_reduction_key, luts_message_carry->buffer, glwe_dimension,
-          small_lwe_dimension, polynomial_size, mem_ptr->params.pbs_base_log,
+          luts_message_carry->buffer, glwe_dimension, small_lwe_dimension,
+          polynomial_size, mem_ptr->params.pbs_base_log,
          mem_ptr->params.pbs_level, mem_ptr->params.grouping_factor,
          total_ciphertexts, mem_ptr->params.pbs_type, num_many_lut,
          lut_stride);
@@ -420,7 +419,7 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(

      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, current_blocks, current_blocks, bsks, ksks,
-          ms_noise_reduction_key, luts_message_carry, total_ciphertexts);
+          luts_message_carry, total_ciphertexts);
    }
    cuda_set_device(streams.gpu_index(0));
    std::swap(d_columns, d_new_columns);
@@ -458,8 +457,8 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
          streams.get_ith(0), (Torus *)current_blocks->ptr, d_pbs_indexes_out,
          luts_message_carry->lut_vec, luts_message_carry->lut_indexes_vec,
          (Torus *)small_lwe_vector->ptr, d_pbs_indexes_in, bsks,
-          ms_noise_reduction_key, luts_message_carry->buffer, glwe_dimension,
-          small_lwe_dimension, polynomial_size, mem_ptr->params.pbs_base_log,
+          luts_message_carry->buffer, glwe_dimension, small_lwe_dimension,
+          polynomial_size, mem_ptr->params.pbs_base_log,
          mem_ptr->params.pbs_level, mem_ptr->params.grouping_factor,
          2 * num_radix_blocks, mem_ptr->params.pbs_type, num_many_lut,
          lut_stride);
@@ -471,7 +470,7 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(

      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          active_streams, current_blocks, radix_lwe_out, bsks, ksks,
-          ms_noise_reduction_key, luts_message_carry, num_blocks_in_apply_lut);
+          luts_message_carry, num_blocks_in_apply_lut);
    }
    calculate_final_degrees(radix_lwe_out->degrees, terms->degrees,
                            num_radix_blocks, num_radix_in_vec, chunk_size,
@@ -493,9 +492,8 @@ __host__ void host_integer_mult_radix_kb(
    CudaStreams streams, CudaRadixCiphertextFFI *radix_lwe_out,
    CudaRadixCiphertextFFI const *radix_lwe_left, bool const is_bool_left,
    CudaRadixCiphertextFFI const *radix_lwe_right, bool const is_bool_right,
-    void *const *bsks, uint64_t *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    int_mul_memory<Torus> *mem_ptr, uint32_t num_blocks) {
+    void *const *bsks, uint64_t *const *ksks, int_mul_memory<Torus> *mem_ptr,
+    uint32_t num_blocks) {

  if (radix_lwe_out->lwe_dimension != radix_lwe_left->lwe_dimension ||
      radix_lwe_right->lwe_dimension != radix_lwe_left->lwe_dimension)
@@ -513,14 +511,14 @@ __host__ void host_integer_mult_radix_kb(
  if (is_bool_right) {
    zero_out_if<Torus>(streams, radix_lwe_out, radix_lwe_left, radix_lwe_right,
                       mem_ptr->zero_out_mem, mem_ptr->zero_out_predicate_lut,
-                       bsks, ksks, ms_noise_reduction_key, num_blocks);
+                       bsks, ksks, num_blocks);
    return;
  }

  if (is_bool_left) {
    zero_out_if<Torus>(streams, radix_lwe_out, radix_lwe_right, radix_lwe_left,
                       mem_ptr->zero_out_mem, mem_ptr->zero_out_predicate_lut,
-                       bsks, ksks, ms_noise_reduction_key, num_blocks);
+                       bsks, ksks, num_blocks);
    return;
  }

@@ -589,8 +587,7 @@ __host__ void host_integer_mult_radix_kb(

  integer_radix_apply_bivariate_lookup_table_kb<Torus>(
      streams, block_mul_res, block_mul_res, vector_result_sb, bsks, ksks,
-      ms_noise_reduction_key, luts_array, total_block_count,
-      luts_array->params.message_modulus);
+      luts_array, total_block_count, luts_array->params.message_modulus);

  vector_result_lsb = block_mul_res;
  as_radix_ciphertext_slice<Torus>(&vector_result_msb, block_mul_res,
@@ -618,15 +615,14 @@ __host__ void host_integer_mult_radix_kb(
  }
  host_integer_partial_sum_ciphertexts_vec_kb<Torus>(
      streams, radix_lwe_out, vector_result_sb, bsks, ksks,
-      ms_noise_reduction_key, mem_ptr->sum_ciphertexts_mem, num_blocks,
-      2 * num_blocks);
+      mem_ptr->sum_ciphertexts_mem, num_blocks, 2 * num_blocks);

  auto scp_mem_ptr = mem_ptr->sc_prop_mem;
  uint32_t requested_flag = outputFlag::FLAG_NONE;
  uint32_t uses_carry = 0;
-  host_propagate_single_carry<Torus>(
-      streams, radix_lwe_out, nullptr, nullptr, scp_mem_ptr, bsks, ksks,
-      ms_noise_reduction_key, requested_flag, uses_carry);
+  host_propagate_single_carry<Torus>(streams, radix_lwe_out, nullptr, nullptr,
+                                     scp_mem_ptr, bsks, ksks, requested_flag,
+                                     uses_carry);
 }

 template <typename Torus>
--- a/backends/tfhe-cuda-backend/cuda/src/integer/negation.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/negation.cuh
@@ -134,9 +134,7 @@ __host__ void host_integer_overflowing_sub(
    CudaRadixCiphertextFFI *overflow_block,
    const CudaRadixCiphertextFFI *input_borrow,
    int_borrow_prop_memory<uint64_t> *mem_ptr, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t compute_overflow, uint32_t uses_input_borrow) {
+    Torus *const *ksks, uint32_t compute_overflow, uint32_t uses_input_borrow) {
  PUSH_RANGE("overflowing sub")
  if (output->num_radix_blocks != input_left->num_radix_blocks ||
      output->num_radix_blocks != input_right->num_radix_blocks)
@@ -166,7 +164,7 @@ __host__ void host_integer_overflowing_sub(
  host_single_borrow_propagate<Torus>(
      streams, output, overflow_block, input_borrow,
      (int_borrow_prop_memory<Torus> *)mem_ptr, bsks, (Torus **)(ksks),
-      ms_noise_reduction_key, num_groups, compute_overflow, uses_input_borrow);
+      num_groups, compute_overflow, uses_input_borrow);
  POP_RANGE()
 }

--- a/backends/tfhe-cuda-backend/cuda/src/integer/oprf.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/oprf.cu
@@ -21,16 +21,15 @@ uint64_t scratch_cuda_integer_grouped_oprf_64(
      allocate_gpu_memory);
 }

-void cuda_integer_grouped_oprf_async_64(
-    CudaStreamsFFI streams, CudaRadixCiphertextFFI *radix_lwe_out,
-    const void *seeded_lwe_input, uint32_t num_blocks_to_process, int8_t *mem,
-    void *const *bsks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+void cuda_integer_grouped_oprf_async_64(CudaStreamsFFI streams,
+                                        CudaRadixCiphertextFFI *radix_lwe_out,
+                                        const void *seeded_lwe_input,
+                                        uint32_t num_blocks_to_process,
+                                        int8_t *mem, void *const *bsks) {

  host_integer_grouped_oprf<uint64_t>(
      CudaStreams(streams), radix_lwe_out, (const uint64_t *)seeded_lwe_input,
-      num_blocks_to_process, (int_grouped_oprf_memory<uint64_t> *)mem, bsks,
-      ms_noise_reduction_key);
+      num_blocks_to_process, (int_grouped_oprf_memory<uint64_t> *)mem, bsks);
 }

 void cleanup_cuda_integer_grouped_oprf_64(CudaStreamsFFI streams,
--- a/backends/tfhe-cuda-backend/cuda/src/integer/oprf.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/oprf.cuh
@@ -20,11 +20,12 @@ uint64_t scratch_cuda_integer_grouped_oprf(
 }

 template <typename Torus>
-void host_integer_grouped_oprf(
-    CudaStreams streams, CudaRadixCiphertextFFI *radix_lwe_out,
-    const Torus *seeded_lwe_input, uint32_t num_blocks_to_process,
-    int_grouped_oprf_memory<Torus> *mem_ptr, void *const *bsks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+void host_integer_grouped_oprf(CudaStreams streams,
+                               CudaRadixCiphertextFFI *radix_lwe_out,
+                               const Torus *seeded_lwe_input,
+                               uint32_t num_blocks_to_process,
+                               int_grouped_oprf_memory<Torus> *mem_ptr,
+                               void *const *bsks) {

  auto active_streams = streams.active_gpu_subset(num_blocks_to_process);
  auto lut = mem_ptr->luts;
@@ -34,7 +35,7 @@ void host_integer_grouped_oprf(
        streams.get_ith(0), (Torus *)(radix_lwe_out->ptr), lut->lwe_indexes_out,
        lut->lut_vec, lut->lut_indexes_vec,
        const_cast<Torus *>(seeded_lwe_input), lut->lwe_indexes_in, bsks,
-        ms_noise_reduction_key, lut->buffer, mem_ptr->params.glwe_dimension,
+        lut->buffer, mem_ptr->params.glwe_dimension,
        mem_ptr->params.small_lwe_dimension, mem_ptr->params.polynomial_size,
        mem_ptr->params.pbs_base_log, mem_ptr->params.pbs_level,
        mem_ptr->params.grouping_factor, num_blocks_to_process,
@@ -62,7 +63,7 @@ void host_integer_grouped_oprf(
    execute_pbs_async<Torus, Torus>(
        active_streams, lwe_after_pbs_vec, lwe_trivial_indexes_vec,
        lut->lut_vec, lut->lut_indexes_vec, lwe_array_in_vec,
-        lwe_trivial_indexes_vec, bsks, ms_noise_reduction_key, lut->buffer,
+        lwe_trivial_indexes_vec, bsks, lut->buffer,
        mem_ptr->params.glwe_dimension, mem_ptr->params.small_lwe_dimension,
        mem_ptr->params.polynomial_size, mem_ptr->params.pbs_base_log,
        mem_ptr->params.pbs_level, mem_ptr->params.grouping_factor,
--- a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_bitops.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_bitops.cu
@@ -4,15 +4,13 @@ void cuda_scalar_bitop_integer_radix_ciphertext_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_input, void const *clear_blocks,
    void const *h_clear_blocks, uint32_t num_clear_blocks, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    void *const *bsks, void *const *ksks) {

  host_integer_radix_scalar_bitop_kb<uint64_t>(
      CudaStreams(streams), lwe_array_out, lwe_array_input,
      static_cast<const uint64_t *>(clear_blocks),
      static_cast<const uint64_t *>(h_clear_blocks), num_clear_blocks,
-      (int_bitop_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
-      ms_noise_reduction_key);
+      (int_bitop_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks));
 }

 void update_degrees_after_scalar_bitand(uint64_t *output_degrees,
--- a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_bitops.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_bitops.cuh
@@ -9,8 +9,7 @@ __host__ void host_integer_radix_scalar_bitop_kb(
    CudaStreams streams, CudaRadixCiphertextFFI *output,
    CudaRadixCiphertextFFI const *input, Torus const *clear_blocks,
    Torus const *h_clear_blocks, uint32_t num_clear_blocks,
-    int_bitop_buffer<Torus> *mem_ptr, void *const *bsks, Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    int_bitop_buffer<Torus> *mem_ptr, void *const *bsks, Torus *const *ksks) {

  if (output->num_radix_blocks != input->num_radix_blocks)
    PANIC("Cuda error: input and output num radix blocks must be equal")
@@ -50,8 +49,7 @@ __host__ void host_integer_radix_scalar_bitop_kb(
    lut->broadcast_lut(active_streams, false);

    integer_radix_apply_univariate_lookup_table_kb<Torus>(
-        streams, output, input, bsks, ksks, ms_noise_reduction_key, lut,
-        num_clear_blocks);
+        streams, output, input, bsks, ksks, lut, num_clear_blocks);
    memcpy(output->degrees, degrees, num_clear_blocks * sizeof(uint64_t));

    if (op == SCALAR_BITAND && num_clear_blocks < num_radix_blocks) {
--- a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_comparison.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_comparison.cu
@@ -35,9 +35,7 @@ void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_in, void const *scalar_blocks,
    void const *h_scalar_blocks, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_scalar_blocks) {
+    void *const *ksks, uint32_t num_scalar_blocks) {

  // The output ciphertext might be a boolean block or a radix ciphertext
  // depending on the case (eq/gt vs max/min) so the amount of blocks to
@@ -51,8 +49,7 @@ void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
    host_integer_radix_scalar_equality_check_kb<uint64_t>(
        CudaStreams(streams), lwe_array_out, lwe_array_in,
        static_cast<const uint64_t *>(scalar_blocks), buffer, bsks,
-        (uint64_t **)(ksks), ms_noise_reduction_key, num_radix_blocks,
-        num_scalar_blocks);
+        (uint64_t **)(ksks), num_radix_blocks, num_scalar_blocks);
    break;
  case GT:
  case GE:
@@ -66,7 +63,7 @@ void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
        static_cast<const uint64_t *>(scalar_blocks),
        static_cast<const uint64_t *>(h_scalar_blocks), buffer,
        buffer->diff_buffer->operator_f, bsks, (uint64_t **)(ksks),
-        ms_noise_reduction_key, num_radix_blocks, num_scalar_blocks);
+        num_radix_blocks, num_scalar_blocks);
    break;
  case MAX:
  case MIN:
@@ -77,8 +74,7 @@ void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
        CudaStreams(streams), lwe_array_out, lwe_array_in,
        static_cast<const uint64_t *>(scalar_blocks),
        static_cast<const uint64_t *>(h_scalar_blocks), buffer, bsks,
-        (uint64_t **)(ksks), ms_noise_reduction_key, num_radix_blocks,
-        num_scalar_blocks);
+        (uint64_t **)(ksks), num_radix_blocks, num_scalar_blocks);
    break;
  default:
    PANIC("Cuda error: integer operation not supported")
--- a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_comparison.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_comparison.cuh
@@ -29,9 +29,7 @@ __host__ void scalar_compare_radix_blocks_kb(
    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI *lwe_array_in, Torus *scalar_blocks,
    int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks) {
+    Torus *const *ksks, uint32_t num_radix_blocks) {

  if (num_radix_blocks == 0)
    return;
@@ -71,8 +69,8 @@ __host__ void scalar_compare_radix_blocks_kb(
  // Apply LUT to compare to 0
  auto sign_lut = mem_ptr->eq_buffer->is_non_zero_lut;
  integer_radix_apply_univariate_lookup_table_kb<Torus>(
-      streams, lwe_array_out, subtracted_blocks, bsks, ksks,
-      ms_noise_reduction_key, sign_lut, num_radix_blocks);
+      streams, lwe_array_out, subtracted_blocks, bsks, ksks, sign_lut,
+      num_radix_blocks);

  // FIXME: without this sync signed scalar eq tests fail, I don't understand
  // the reason
@@ -90,9 +88,7 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
    CudaRadixCiphertextFFI const *lwe_array_in, Torus const *scalar_blocks,
    Torus const *h_scalar_blocks, int_comparison_buffer<Torus> *mem_ptr,
    std::function<Torus(Torus)> sign_handler_f, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks, uint32_t num_scalar_blocks) {
+    Torus *const *ksks, uint32_t num_radix_blocks, uint32_t num_scalar_blocks) {
  if (lwe_array_out->lwe_dimension != lwe_array_in->lwe_dimension)
    PANIC("Cuda error: input lwe dimensions must be the same")
  if (lwe_array_in->num_radix_blocks < num_radix_blocks)
@@ -132,11 +128,10 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
    // means scalar is zero
    host_compare_blocks_with_zero<Torus>(
        streams, mem_ptr->tmp_lwe_array_out, lwe_array_in, mem_ptr, bsks, ksks,
-        ms_noise_reduction_key, num_radix_blocks, mem_ptr->is_zero_lut);
+        num_radix_blocks, mem_ptr->is_zero_lut);
    are_all_comparisons_block_true<Torus>(
        streams, mem_ptr->tmp_lwe_array_out, mem_ptr->tmp_lwe_array_out,
-        mem_ptr, bsks, ksks, ms_noise_reduction_key,
-        mem_ptr->tmp_lwe_array_out->num_radix_blocks);
+        mem_ptr, bsks, ksks, mem_ptr->tmp_lwe_array_out->num_radix_blocks);

    auto scalar_last_leaf_lut_f = [sign_handler_f](Torus x) -> Torus {
      x = (x == 1 ? IS_EQUAL : IS_SUPERIOR);
@@ -154,8 +149,7 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
    lut->broadcast_lut(active_streams);

    integer_radix_apply_univariate_lookup_table_kb<Torus>(
-        streams, lwe_array_out, mem_ptr->tmp_lwe_array_out, bsks, ksks,
-        ms_noise_reduction_key, lut, 1);
+        streams, lwe_array_out, mem_ptr->tmp_lwe_array_out, bsks, ksks, lut, 1);

  } else if (num_scalar_blocks < num_radix_blocks) {
    // We have to handle both part of the work described above
@@ -207,7 +201,7 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
    auto comparisons = mem_ptr->tmp_block_comparisons;
    scalar_compare_radix_blocks_kb<Torus>(
        lsb_streams, comparisons, diff_buffer->tmp_packed, (Torus *)rhs.ptr,
-        mem_ptr, bsks, ksks, ms_noise_reduction_key, num_lsb_radix_blocks);
+        mem_ptr, bsks, ksks, num_lsb_radix_blocks);

    // Reduces a vec containing radix blocks that encrypts a sign
    // (inferior, equal, superior) to one single radix block containing the
@@ -215,15 +209,15 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
    tree_sign_reduction<Torus>(lsb_streams, lwe_array_lsb_out, comparisons,
                               mem_ptr->diff_buffer->tree_buffer,
                               mem_ptr->identity_lut_f, bsks, ksks,
-                               ms_noise_reduction_key, num_lsb_radix_blocks);
+                               num_lsb_radix_blocks);
    //////////////
    // msb
    host_compare_blocks_with_zero<Torus>(
        msb_streams, &lwe_array_msb_out, &msb, mem_ptr, bsks, ksks,
-        ms_noise_reduction_key, num_msb_radix_blocks, mem_ptr->is_zero_lut);
+        num_msb_radix_blocks, mem_ptr->is_zero_lut);
    are_all_comparisons_block_true<Torus>(
        msb_streams, &lwe_array_msb_out, &lwe_array_msb_out, mem_ptr, bsks,
-        ksks, ms_noise_reduction_key, lwe_array_msb_out.num_radix_blocks);
+        ksks, lwe_array_msb_out.num_radix_blocks);
    lsb_streams.synchronize();
    msb_streams.synchronize();

@@ -250,7 +244,7 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(

    integer_radix_apply_bivariate_lookup_table_kb<Torus>(
        streams, lwe_array_out, lwe_array_lsb_out, &lwe_array_msb_out, bsks,
-        ksks, ms_noise_reduction_key, lut, 1, lut->params.message_modulus);
+        ksks, lut, 1, lut->params.message_modulus);

  } else {
    if (num_radix_blocks == 1) {
@@ -283,8 +277,7 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
      one_block_lut->broadcast_lut(active_streams);

      integer_radix_apply_univariate_lookup_table_kb<Torus>(
-          streams, lwe_array_out, lwe_array_in, bsks, ksks,
-          ms_noise_reduction_key, one_block_lut, 1);
+          streams, lwe_array_out, lwe_array_in, bsks, ksks, one_block_lut, 1);
      one_block_lut->release(streams);
      delete one_block_lut;
    } else {
@@ -314,7 +307,7 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
      auto comparisons = mem_ptr->tmp_lwe_array_out;
      scalar_compare_radix_blocks_kb<Torus>(
          streams, comparisons, diff_buffer->tmp_packed, (Torus *)rhs.ptr,
-          mem_ptr, bsks, ksks, ms_noise_reduction_key, num_lsb_radix_blocks);
+          mem_ptr, bsks, ksks, num_lsb_radix_blocks);

      // Reduces a vec containing radix blocks that encrypts a sign
      // (inferior, equal, superior) to one single radix block containing the
@@ -322,7 +315,7 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
      tree_sign_reduction<Torus>(streams, lwe_array_out, comparisons,
                                 mem_ptr->diff_buffer->tree_buffer,
                                 sign_handler_f, bsks, ksks,
-                                 ms_noise_reduction_key, num_lsb_radix_blocks);
+                                 num_lsb_radix_blocks);
    }
  }
 }
@@ -333,9 +326,7 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
    CudaRadixCiphertextFFI const *lwe_array_in, Torus const *scalar_blocks,
    Torus const *h_scalar_blocks, int_comparison_buffer<Torus> *mem_ptr,
    std::function<Torus(Torus)> sign_handler_f, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks, uint32_t num_scalar_blocks) {
+    Torus *const *ksks, uint32_t num_radix_blocks, uint32_t num_scalar_blocks) {

  if (lwe_array_out->lwe_dimension != lwe_array_in->lwe_dimension)
    PANIC("Cuda error: input lwe dimensions must be the same")
@@ -376,10 +367,10 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
    auto are_all_msb_zeros = mem_ptr->tmp_lwe_array_out;
    host_compare_blocks_with_zero<Torus>(
        streams, are_all_msb_zeros, lwe_array_in, mem_ptr, bsks, ksks,
-        ms_noise_reduction_key, num_radix_blocks, mem_ptr->is_zero_lut);
+        num_radix_blocks, mem_ptr->is_zero_lut);
    are_all_comparisons_block_true<Torus>(
        streams, are_all_msb_zeros, are_all_msb_zeros, mem_ptr, bsks, ksks,
-        ms_noise_reduction_key, are_all_msb_zeros->num_radix_blocks);
+        are_all_msb_zeros->num_radix_blocks);
    CudaRadixCiphertextFFI sign_block;
    as_radix_ciphertext_slice<Torus>(&sign_block, lwe_array_in,
                                     num_radix_blocks - 1, num_radix_blocks);
@@ -430,8 +421,8 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
    lut->broadcast_lut(active_streams);

    integer_radix_apply_bivariate_lookup_table_kb<Torus>(
-        streams, lwe_array_out, are_all_msb_zeros, &sign_block, bsks, ksks,
-        ms_noise_reduction_key, lut, 1, lut->params.message_modulus);
+        streams, lwe_array_out, are_all_msb_zeros, &sign_block, bsks, ksks, lut,
+        1, lut->params.message_modulus);

  } else if (num_scalar_blocks < num_radix_blocks) {
    // We have to handle both part of the work described above
@@ -477,7 +468,7 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
    auto comparisons = mem_ptr->tmp_block_comparisons;
    scalar_compare_radix_blocks_kb<Torus>(
        lsb_streams, comparisons, diff_buffer->tmp_packed, (Torus *)rhs.ptr,
-        mem_ptr, bsks, ksks, ms_noise_reduction_key, num_lsb_radix_blocks);
+        mem_ptr, bsks, ksks, num_lsb_radix_blocks);

    // Reduces a vec containing radix blocks that encrypts a sign
    // (inferior, equal, superior) to one single radix block containing the
@@ -485,17 +476,17 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
    tree_sign_reduction<Torus>(lsb_streams, lwe_array_lsb_out, comparisons,
                               mem_ptr->diff_buffer->tree_buffer,
                               mem_ptr->identity_lut_f, bsks, ksks,
-                               ms_noise_reduction_key, num_lsb_radix_blocks);
+                               num_lsb_radix_blocks);
    //////////////
    // msb
    // We remove the last block (which is the sign)
    auto are_all_msb_zeros = lwe_array_msb_out;
    host_compare_blocks_with_zero<Torus>(
        msb_streams, &are_all_msb_zeros, &msb, mem_ptr, bsks, ksks,
-        ms_noise_reduction_key, num_msb_radix_blocks, mem_ptr->is_zero_lut);
+        num_msb_radix_blocks, mem_ptr->is_zero_lut);
    are_all_comparisons_block_true<Torus>(
        msb_streams, &are_all_msb_zeros, &are_all_msb_zeros, mem_ptr, bsks,
-        ksks, ms_noise_reduction_key, are_all_msb_zeros.num_radix_blocks);
+        ksks, are_all_msb_zeros.num_radix_blocks);

    auto sign_bit_pos = (int)log2(message_modulus) - 1;

@@ -536,15 +527,14 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
        &sign_block, &msb, num_msb_radix_blocks - 1, num_msb_radix_blocks);
    integer_radix_apply_bivariate_lookup_table_kb<Torus>(
        msb_streams, &lwe_array_msb_out, &sign_block, &are_all_msb_zeros, bsks,
-        ksks, ms_noise_reduction_key, signed_msb_lut, 1,
-        signed_msb_lut->params.message_modulus);
+        ksks, signed_msb_lut, 1, signed_msb_lut->params.message_modulus);
    lsb_streams.synchronize();
    msb_streams.synchronize();

    //////////////
    // Reduce the two blocks into one final
    reduce_signs<Torus>(streams, lwe_array_out, lwe_array_lsb_out, mem_ptr,
-                        sign_handler_f, bsks, ksks, ms_noise_reduction_key, 2);
+                        sign_handler_f, bsks, ksks, 2);

  } else {
    if (num_radix_blocks == 1) {
@@ -579,8 +569,7 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
      one_block_lut->broadcast_lut(active_streams);

      integer_radix_apply_univariate_lookup_table_kb<Torus>(
-          streams, lwe_array_out, lwe_array_in, bsks, ksks,
-          ms_noise_reduction_key, one_block_lut, 1);
+          streams, lwe_array_out, lwe_array_in, bsks, ksks, one_block_lut, 1);
      one_block_lut->release(streams);
      delete one_block_lut;
    } else {
@@ -619,8 +608,7 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
      // - 2 if lhs > rhs
      scalar_compare_radix_blocks_kb<Torus>(
          lsb_streams, lwe_array_ct_out, diff_buffer->tmp_packed,
-          (Torus *)rhs.ptr, mem_ptr, bsks, ksks, ms_noise_reduction_key,
-          num_lsb_radix_blocks);
+          (Torus *)rhs.ptr, mem_ptr, bsks, ksks, num_lsb_radix_blocks);
      CudaRadixCiphertextFFI encrypted_sign_block;
      as_radix_ciphertext_slice<Torus>(&encrypted_sign_block, lwe_array_in,
                                       num_radix_blocks - 1, num_radix_blocks);
@@ -636,8 +624,8 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(

      integer_radix_apply_bivariate_lookup_table_kb<Torus>(
          msb_streams, &lwe_array_sign_out, &encrypted_sign_block,
-          trivial_sign_block, bsks, ksks, ms_noise_reduction_key,
-          mem_ptr->signed_lut, 1, mem_ptr->signed_lut->params.message_modulus);
+          trivial_sign_block, bsks, ksks, mem_ptr->signed_lut, 1,
+          mem_ptr->signed_lut->params.message_modulus);
      lsb_streams.synchronize();
      msb_streams.synchronize();

@@ -645,8 +633,7 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
      // (inferior, equal, superior) to one single radix block containing the
      // final sign
      reduce_signs<Torus>(streams, lwe_array_out, lwe_array_ct_out, mem_ptr,
-                          sign_handler_f, bsks, ksks, ms_noise_reduction_key,
-                          num_lsb_radix_blocks + 1);
+                          sign_handler_f, bsks, ksks, num_lsb_radix_blocks + 1);
    }
  }
 }
@@ -657,9 +644,7 @@ __host__ void host_integer_radix_scalar_difference_check_kb(
    CudaRadixCiphertextFFI const *lwe_array_in, Torus const *scalar_blocks,
    Torus const *h_scalar_blocks, int_comparison_buffer<Torus> *mem_ptr,
    std::function<Torus(Torus)> sign_handler_f, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks, uint32_t num_scalar_blocks) {
+    Torus *const *ksks, uint32_t num_radix_blocks, uint32_t num_scalar_blocks) {

  if (lwe_array_out->lwe_dimension != lwe_array_in->lwe_dimension)
    PANIC("Cuda error: input lwe dimensions must be the same")
@@ -671,13 +656,13 @@ __host__ void host_integer_radix_scalar_difference_check_kb(
    // is signed and scalar is positive
    integer_radix_signed_scalar_difference_check_kb<Torus>(
        streams, lwe_array_out, lwe_array_in, scalar_blocks, h_scalar_blocks,
-        mem_ptr, sign_handler_f, bsks, ksks, ms_noise_reduction_key,
-        num_radix_blocks, num_scalar_blocks);
+        mem_ptr, sign_handler_f, bsks, ksks, num_radix_blocks,
+        num_scalar_blocks);
  } else {
    integer_radix_unsigned_scalar_difference_check_kb<Torus>(
        streams, lwe_array_out, lwe_array_in, scalar_blocks, h_scalar_blocks,
-        mem_ptr, sign_handler_f, bsks, ksks, ms_noise_reduction_key,
-        num_radix_blocks, num_scalar_blocks);
+        mem_ptr, sign_handler_f, bsks, ksks, num_radix_blocks,
+        num_scalar_blocks);
  }
 }

@@ -686,9 +671,8 @@ __host__ void host_integer_radix_scalar_maxmin_kb(
    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_in, Torus const *scalar_blocks,
    Torus const *h_scalar_blocks, int_comparison_buffer<Torus> *mem_ptr,
-    void *const *bsks, Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks, uint32_t num_scalar_blocks) {
+    void *const *bsks, Torus *const *ksks, uint32_t num_radix_blocks,
+    uint32_t num_scalar_blocks) {

  if (lwe_array_out->lwe_dimension != lwe_array_in->lwe_dimension)
    PANIC("Cuda error: input and output lwe dimensions must be the same")
@@ -706,8 +690,7 @@ __host__ void host_integer_radix_scalar_maxmin_kb(
  auto sign = mem_ptr->tmp_lwe_array_out;
  host_integer_radix_scalar_difference_check_kb<Torus>(
      streams, sign, lwe_array_in, scalar_blocks, h_scalar_blocks, mem_ptr,
-      mem_ptr->identity_lut_f, bsks, ksks, ms_noise_reduction_key,
-      num_radix_blocks, num_scalar_blocks);
+      mem_ptr->identity_lut_f, bsks, ksks, num_radix_blocks, num_scalar_blocks);

  // There is no optimized CMUX for scalars, so we convert to a trivial
  // ciphertext
@@ -721,10 +704,9 @@ __host__ void host_integer_radix_scalar_maxmin_kb(

  // Selector
  // CMUX for Max or Min
-  host_integer_radix_cmux_kb<Torus>(streams, lwe_array_out,
-                                    mem_ptr->tmp_lwe_array_out, lwe_array_left,
-                                    lwe_array_right, mem_ptr->cmux_buffer, bsks,
-                                    ksks, ms_noise_reduction_key);
+  host_integer_radix_cmux_kb<Torus>(
+      streams, lwe_array_out, mem_ptr->tmp_lwe_array_out, lwe_array_left,
+      lwe_array_right, mem_ptr->cmux_buffer, bsks, ksks);
 }

 template <typename Torus>
@@ -732,9 +714,7 @@ __host__ void host_integer_radix_scalar_equality_check_kb(
    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
    CudaRadixCiphertextFFI const *lwe_array_in, Torus const *scalar_blocks,
    int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_radix_blocks, uint32_t num_scalar_blocks) {
+    Torus *const *ksks, uint32_t num_radix_blocks, uint32_t num_scalar_blocks) {

  if (lwe_array_out->lwe_dimension != lwe_array_in->lwe_dimension)
    PANIC("Cuda error: input and output lwe dimensions must be the same")
@@ -807,8 +787,7 @@ __host__ void host_integer_radix_scalar_equality_check_kb(

    integer_radix_apply_univariate_lookup_table_kb<Torus>(
        lsb_streams, mem_ptr->tmp_lwe_array_out, mem_ptr->tmp_packed_input,
-        bsks, ksks, ms_noise_reduction_key, scalar_comparison_luts,
-        num_halved_lsb_radix_blocks);
+        bsks, ksks, scalar_comparison_luts, num_halved_lsb_radix_blocks);
  }
  //////////////
  // msb_in
@@ -825,12 +804,12 @@ __host__ void host_integer_radix_scalar_equality_check_kb(
      PANIC("Cuda error: integer operation not supported")
    }

-    host_compare_blocks_with_zero<Torus>(
-        msb_streams, &msb_out, &msb_in, mem_ptr, bsks, ksks,
-        ms_noise_reduction_key, num_msb_radix_blocks, msb_lut);
-    are_all_comparisons_block_true<Torus>(
-        msb_streams, &msb_out, &msb_out, mem_ptr, bsks, ksks,
-        ms_noise_reduction_key, msb_out.num_radix_blocks);
+    host_compare_blocks_with_zero<Torus>(msb_streams, &msb_out, &msb_in,
+                                         mem_ptr, bsks, ksks,
+                                         num_msb_radix_blocks, msb_lut);
+    are_all_comparisons_block_true<Torus>(msb_streams, &msb_out, &msb_out,
+                                          mem_ptr, bsks, ksks,
+                                          msb_out.num_radix_blocks);
  }

  lsb_streams.synchronize();
@@ -840,13 +819,11 @@ __host__ void host_integer_radix_scalar_equality_check_kb(
  case COMPARISON_TYPE::EQ:
    are_all_comparisons_block_true<Torus>(
        streams, lwe_array_out, mem_ptr->tmp_lwe_array_out, mem_ptr, bsks, ksks,
-        ms_noise_reduction_key,
        num_halved_scalar_blocks + (num_msb_radix_blocks > 0));
    break;
  case COMPARISON_TYPE::NE:
    is_at_least_one_comparisons_block_true<Torus>(
        streams, lwe_array_out, mem_ptr->tmp_lwe_array_out, mem_ptr, bsks, ksks,
-        ms_noise_reduction_key,
        num_halved_scalar_blocks + (num_msb_radix_blocks > 0));
    break;
  default:
--- a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_div.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_div.cu
@@ -24,13 +24,12 @@ uint64_t scratch_cuda_integer_unsigned_scalar_div_radix_kb_64(
 void cuda_integer_unsigned_scalar_div_radix_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *numerator_ct,
    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
-    const CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_key,
    const CudaScalarDivisorFFI *scalar_divisor_ffi) {

  host_integer_unsigned_scalar_div_radix<uint64_t>(
      CudaStreams(streams), numerator_ct,
      (int_unsigned_scalar_div_mem<uint64_t> *)mem_ptr, bsks, (uint64_t **)ksks,
-      ms_noise_reduction_key, scalar_divisor_ffi);
+      scalar_divisor_ffi);
 }

 void cleanup_cuda_integer_unsigned_scalar_div_radix_kb_64(
@@ -69,13 +68,12 @@ uint64_t scratch_cuda_integer_signed_scalar_div_radix_kb_64(
 void cuda_integer_signed_scalar_div_radix_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *numerator_ct,
    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
-    const CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_key,
    const CudaScalarDivisorFFI *scalar_divisor_ffi, uint32_t numerator_bits) {

  host_integer_signed_scalar_div_radix_kb<uint64_t>(
      CudaStreams(streams), numerator_ct,
      (int_signed_scalar_div_mem<uint64_t> *)mem_ptr, bsks, (uint64_t **)ksks,
-      ms_noise_reduction_key, scalar_divisor_ffi, numerator_bits);
+      scalar_divisor_ffi, numerator_bits);
 }

 void cleanup_cuda_integer_signed_scalar_div_radix_kb_64(CudaStreamsFFI streams,
@@ -115,9 +113,7 @@ uint64_t scratch_integer_unsigned_scalar_div_rem_radix_kb_64(
 void cuda_integer_unsigned_scalar_div_rem_radix_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *quotient_ct,
    CudaRadixCiphertextFFI *remainder_ct, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks,
-    const CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_key,
-    const CudaScalarDivisorFFI *scalar_divisor_ffi,
+    void *const *ksks, const CudaScalarDivisorFFI *scalar_divisor_ffi,
    uint64_t const *divisor_has_at_least_one_set,
    uint64_t const *decomposed_divisor, uint32_t const num_scalars_divisor,
    void const *clear_blocks, void const *h_clear_blocks,
@@ -126,9 +122,9 @@ void cuda_integer_unsigned_scalar_div_rem_radix_kb_64(
  host_integer_unsigned_scalar_div_rem_radix<uint64_t>(
      CudaStreams(streams), quotient_ct, remainder_ct,
      (int_unsigned_scalar_div_rem_buffer<uint64_t> *)mem_ptr, bsks,
-      (uint64_t **)ksks, ms_noise_reduction_key, scalar_divisor_ffi,
-      divisor_has_at_least_one_set, decomposed_divisor, num_scalars_divisor,
-      (uint64_t *)clear_blocks, (uint64_t *)h_clear_blocks, num_clear_blocks);
+      (uint64_t **)ksks, scalar_divisor_ffi, divisor_has_at_least_one_set,
+      decomposed_divisor, num_scalars_divisor, (uint64_t *)clear_blocks,
+      (uint64_t *)h_clear_blocks, num_clear_blocks);
 }

 void cleanup_cuda_integer_unsigned_scalar_div_rem_radix_kb_64(
@@ -168,9 +164,7 @@ uint64_t scratch_integer_signed_scalar_div_rem_radix_kb_64(
 void cuda_integer_signed_scalar_div_rem_radix_kb_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *quotient_ct,
    CudaRadixCiphertextFFI *remainder_ct, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    const CudaScalarDivisorFFI *scalar_divisor_ffi,
+    void *const *ksks, const CudaScalarDivisorFFI *scalar_divisor_ffi,
    uint64_t const *divisor_has_at_least_one_set,
    uint64_t const *decomposed_divisor, uint32_t const num_scalars_divisor,
    uint32_t numerator_bits) {
@@ -178,9 +172,8 @@ void cuda_integer_signed_scalar_div_rem_radix_kb_64(
  host_integer_signed_scalar_div_rem_radix<uint64_t>(
      CudaStreams(streams), quotient_ct, remainder_ct,
      (int_signed_scalar_div_rem_buffer<uint64_t> *)mem_ptr, bsks,
-      (uint64_t **)ksks, ms_noise_reduction_key, scalar_divisor_ffi,
-      divisor_has_at_least_one_set, decomposed_divisor, num_scalars_divisor,
-      numerator_bits);
+      (uint64_t **)ksks, scalar_divisor_ffi, divisor_has_at_least_one_set,
+      decomposed_divisor, num_scalars_divisor, numerator_bits);
 }

 void cleanup_cuda_integer_signed_scalar_div_rem_radix_kb_64(
--- a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_div.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_div.cuh
@@ -27,9 +27,7 @@ template <typename Torus>
 __host__ void host_integer_unsigned_scalar_div_radix(
    CudaStreams streams, CudaRadixCiphertextFFI *numerator_ct,
    int_unsigned_scalar_div_mem<Torus> *mem_ptr, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    const CudaScalarDivisorFFI *scalar_divisor_ffi) {
+    Torus *const *ksks, const CudaScalarDivisorFFI *scalar_divisor_ffi) {

  if (scalar_divisor_ffi->is_abs_divisor_one) {
    return;
@@ -38,7 +36,7 @@ __host__ void host_integer_unsigned_scalar_div_radix(
  if (scalar_divisor_ffi->is_divisor_pow2) {
    host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
        streams, numerator_ct, scalar_divisor_ffi->ilog2_divisor,
-        mem_ptr->logical_scalar_shift_mem, bsks, ksks, ms_noise_reduction_key,
+        mem_ptr->logical_scalar_shift_mem, bsks, ksks,
        numerator_ct->num_radix_blocks);
    return;
  }
@@ -65,26 +63,24 @@ __host__ void host_integer_unsigned_scalar_div_radix(
                                       numerator_cpy, numerator_ct);

    host_integer_radix_scalar_mul_high_kb<Torus>(
-        streams, numerator_cpy, mem_ptr->scalar_mul_high_mem, ksks,
-        ms_noise_reduction_key, bsks, scalar_divisor_ffi);
+        streams, numerator_cpy, mem_ptr->scalar_mul_high_mem, ksks, bsks,
+        scalar_divisor_ffi);

    host_sub_and_propagate_single_carry<Torus>(
        streams, numerator_ct, numerator_cpy, nullptr, nullptr,
-        mem_ptr->sub_and_propagate_mem, bsks, ksks, ms_noise_reduction_key,
-        FLAG_NONE, (uint32_t)0);
+        mem_ptr->sub_and_propagate_mem, bsks, ksks, FLAG_NONE, (uint32_t)0);

    host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
        streams, numerator_ct, (uint32_t)1, mem_ptr->logical_scalar_shift_mem,
-        bsks, ksks, ms_noise_reduction_key, numerator_ct->num_radix_blocks);
+        bsks, ksks, numerator_ct->num_radix_blocks);

    host_add_and_propagate_single_carry<Torus>(
        streams, numerator_ct, numerator_cpy, nullptr, nullptr,
-        mem_ptr->scp_mem, bsks, ksks, ms_noise_reduction_key, FLAG_NONE,
-        (uint32_t)0);
+        mem_ptr->scp_mem, bsks, ksks, FLAG_NONE, (uint32_t)0);

    host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
        streams, numerator_ct, scalar_divisor_ffi->shift_post - (uint32_t)1,
-        mem_ptr->logical_scalar_shift_mem, bsks, ksks, ms_noise_reduction_key,
+        mem_ptr->logical_scalar_shift_mem, bsks, ksks,
        numerator_ct->num_radix_blocks);

    return;
@@ -92,16 +88,16 @@ __host__ void host_integer_unsigned_scalar_div_radix(

  host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
      streams, numerator_ct, scalar_divisor_ffi->shift_pre,
-      mem_ptr->logical_scalar_shift_mem, bsks, ksks, ms_noise_reduction_key,
+      mem_ptr->logical_scalar_shift_mem, bsks, ksks,
      numerator_ct->num_radix_blocks);

-  host_integer_radix_scalar_mul_high_kb<Torus>(
-      streams, numerator_ct, mem_ptr->scalar_mul_high_mem, ksks,
-      ms_noise_reduction_key, bsks, scalar_divisor_ffi);
+  host_integer_radix_scalar_mul_high_kb<Torus>(streams, numerator_ct,
+                                               mem_ptr->scalar_mul_high_mem,
+                                               ksks, bsks, scalar_divisor_ffi);

  host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
      streams, numerator_ct, scalar_divisor_ffi->shift_post,
-      mem_ptr->logical_scalar_shift_mem, bsks, ksks, ms_noise_reduction_key,
+      mem_ptr->logical_scalar_shift_mem, bsks, ksks,
      numerator_ct->num_radix_blocks);
 }

@@ -125,9 +121,8 @@ template <typename Torus>
 __host__ void host_integer_signed_scalar_div_radix_kb(
    CudaStreams streams, CudaRadixCiphertextFFI *numerator_ct,
    int_signed_scalar_div_mem<Torus> *mem_ptr, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    const CudaScalarDivisorFFI *scalar_divisor_ffi, uint32_t numerator_bits) {
+    Torus *const *ksks, const CudaScalarDivisorFFI *scalar_divisor_ffi,
+    uint32_t numerator_bits) {

  if (scalar_divisor_ffi->is_abs_divisor_one) {
    if (scalar_divisor_ffi->is_divisor_negative) {
@@ -158,23 +153,20 @@ __host__ void host_integer_signed_scalar_div_radix_kb(

    host_integer_radix_arithmetic_scalar_shift_kb_inplace<Torus>(
        streams, tmp, scalar_divisor_ffi->chosen_multiplier_num_bits - 1,
-        mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks,
-        ms_noise_reduction_key);
+        mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks);

    host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
        streams, tmp,
        numerator_bits - scalar_divisor_ffi->chosen_multiplier_num_bits,
-        mem_ptr->logical_scalar_shift_mem, bsks, ksks, ms_noise_reduction_key,
-        tmp->num_radix_blocks);
+        mem_ptr->logical_scalar_shift_mem, bsks, ksks, tmp->num_radix_blocks);

    host_add_and_propagate_single_carry<Torus>(
        streams, tmp, numerator_ct, nullptr, nullptr, mem_ptr->scp_mem, bsks,
-        ksks, ms_noise_reduction_key, FLAG_NONE, (uint32_t)0);
+        ksks, FLAG_NONE, (uint32_t)0);

    host_integer_radix_arithmetic_scalar_shift_kb_inplace<Torus>(
        streams, tmp, scalar_divisor_ffi->chosen_multiplier_num_bits,
-        mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks,
-        ms_noise_reduction_key);
+        mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks);

  } else if (!scalar_divisor_ffi->is_chosen_multiplier_geq_two_pow_numerator) {
    copy_radix_ciphertext_async<Torus>(streams.stream(0), streams.gpu_index(0),
@@ -182,12 +174,11 @@ __host__ void host_integer_signed_scalar_div_radix_kb(

    host_integer_radix_signed_scalar_mul_high_kb<Torus>(
        streams, tmp, mem_ptr->scalar_mul_high_mem, ksks, scalar_divisor_ffi,
-        ms_noise_reduction_key, bsks);
+        bsks);

    host_integer_radix_arithmetic_scalar_shift_kb_inplace<Torus>(
        streams, tmp, scalar_divisor_ffi->shift_post,
-        mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks,
-        ms_noise_reduction_key);
+        mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks);

    CudaRadixCiphertextFFI *xsign = mem_ptr->xsign_ffi;
    copy_radix_ciphertext_async<Torus>(streams.stream(0), streams.gpu_index(0),
@@ -195,12 +186,11 @@ __host__ void host_integer_signed_scalar_div_radix_kb(

    host_integer_radix_arithmetic_scalar_shift_kb_inplace<Torus>(
        streams, xsign, numerator_bits - 1,
-        mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks,
-        ms_noise_reduction_key);
+        mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks);

    host_sub_and_propagate_single_carry<Torus>(
        streams, tmp, xsign, nullptr, nullptr, mem_ptr->sub_and_propagate_mem,
-        bsks, ksks, ms_noise_reduction_key, FLAG_NONE, (uint32_t)0);
+        bsks, ksks, FLAG_NONE, (uint32_t)0);

  } else {

@@ -209,16 +199,15 @@ __host__ void host_integer_signed_scalar_div_radix_kb(

    host_integer_radix_signed_scalar_mul_high_kb<Torus>(
        streams, tmp, mem_ptr->scalar_mul_high_mem, ksks, scalar_divisor_ffi,
-        ms_noise_reduction_key, bsks);
+        bsks);

    host_add_and_propagate_single_carry<Torus>(
        streams, tmp, numerator_ct, nullptr, nullptr, mem_ptr->scp_mem, bsks,
-        ksks, ms_noise_reduction_key, FLAG_NONE, (uint32_t)0);
+        ksks, FLAG_NONE, (uint32_t)0);

    host_integer_radix_arithmetic_scalar_shift_kb_inplace<Torus>(
        streams, tmp, scalar_divisor_ffi->shift_post,
-        mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks,
-        ms_noise_reduction_key);
+        mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks);

    CudaRadixCiphertextFFI *xsign = mem_ptr->xsign_ffi;
    copy_radix_ciphertext_async<Torus>(streams.stream(0), streams.gpu_index(0),
@@ -226,12 +215,11 @@ __host__ void host_integer_signed_scalar_div_radix_kb(

    host_integer_radix_arithmetic_scalar_shift_kb_inplace<Torus>(
        streams, xsign, numerator_bits - 1,
-        mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks,
-        ms_noise_reduction_key);
+        mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks);

    host_sub_and_propagate_single_carry<Torus>(
        streams, tmp, xsign, nullptr, nullptr, mem_ptr->sub_and_propagate_mem,
-        bsks, ksks, ms_noise_reduction_key, FLAG_NONE, (uint32_t)0);
+        bsks, ksks, FLAG_NONE, (uint32_t)0);
  }

  if (scalar_divisor_ffi->is_divisor_negative) {
@@ -263,9 +251,7 @@ __host__ void host_integer_unsigned_scalar_div_rem_radix(
    CudaStreams streams, CudaRadixCiphertextFFI *quotient_ct,
    CudaRadixCiphertextFFI *remainder_ct,
    int_unsigned_scalar_div_rem_buffer<Torus> *mem_ptr, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    const CudaScalarDivisorFFI *scalar_divisor_ffi,
+    Torus *const *ksks, const CudaScalarDivisorFFI *scalar_divisor_ffi,
    uint64_t const *divisor_has_at_least_one_set,
    uint64_t const *decomposed_divisor, uint32_t const num_scalars_divisor,
    Torus const *clear_blocks, Torus const *h_clear_blocks,
@@ -275,18 +261,17 @@ __host__ void host_integer_unsigned_scalar_div_rem_radix(
  copy_radix_ciphertext_async<Torus>(streams.stream(0), streams.gpu_index(0),
                                     numerator_ct, quotient_ct);

-  host_integer_unsigned_scalar_div_radix(
-      streams, quotient_ct, mem_ptr->unsigned_div_mem, bsks, ksks,
-      ms_noise_reduction_key, scalar_divisor_ffi);
+  host_integer_unsigned_scalar_div_radix(streams, quotient_ct,
+                                         mem_ptr->unsigned_div_mem, bsks, ksks,
+                                         scalar_divisor_ffi);

  if (scalar_divisor_ffi->is_divisor_pow2) {

    copy_radix_ciphertext_async<Torus>(streams.stream(0), streams.gpu_index(0),
                                       remainder_ct, numerator_ct);
-    host_integer_radix_scalar_bitop_kb(streams, remainder_ct, remainder_ct,
-                                       clear_blocks, h_clear_blocks,
-                                       num_clear_blocks, mem_ptr->bitop_mem,
-                                       bsks, ksks, ms_noise_reduction_key);
+    host_integer_radix_scalar_bitop_kb(
+        streams, remainder_ct, remainder_ct, clear_blocks, h_clear_blocks,
+        num_clear_blocks, mem_ptr->bitop_mem, bsks, ksks);

  } else {
    if (!scalar_divisor_ffi->is_divisor_zero) {
@@ -299,15 +284,13 @@ __host__ void host_integer_unsigned_scalar_div_rem_radix(
        host_integer_scalar_mul_radix<Torus>(
            streams, remainder_ct, decomposed_divisor,
            divisor_has_at_least_one_set, mem_ptr->scalar_mul_mem, bsks, ksks,
-            ms_noise_reduction_key, mem_ptr->params.message_modulus,
-            num_scalars_divisor);
+            mem_ptr->params.message_modulus, num_scalars_divisor);
      }
    }

    host_sub_and_propagate_single_carry(
        streams, numerator_ct, remainder_ct, nullptr, nullptr,
-        mem_ptr->sub_and_propagate_mem, bsks, ksks, ms_noise_reduction_key,
-        FLAG_NONE, (uint32_t)0);
+        mem_ptr->sub_and_propagate_mem, bsks, ksks, FLAG_NONE, (uint32_t)0);

    copy_radix_ciphertext_async<Torus>(streams.stream(0), streams.gpu_index(0),
                                       remainder_ct, numerator_ct);
@@ -335,9 +318,7 @@ __host__ void host_integer_signed_scalar_div_rem_radix(
    CudaStreams streams, CudaRadixCiphertextFFI *quotient_ct,
    CudaRadixCiphertextFFI *remainder_ct,
    int_signed_scalar_div_rem_buffer<Torus> *mem_ptr, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    const CudaScalarDivisorFFI *scalar_divisor_ffi,
+    Torus *const *ksks, const CudaScalarDivisorFFI *scalar_divisor_ffi,
    uint64_t const *divisor_has_at_least_one_set,
    uint64_t const *decomposed_divisor, uint32_t const num_scalars_divisor,
    uint32_t numerator_bits) {
@@ -346,13 +327,13 @@ __host__ void host_integer_signed_scalar_div_rem_radix(
  copy_radix_ciphertext_async<Torus>(streams.stream(0), streams.gpu_index(0),
                                     numerator_ct, quotient_ct);

-  host_integer_signed_scalar_div_radix_kb(
-      streams, quotient_ct, mem_ptr->signed_div_mem, bsks, ksks,
-      ms_noise_reduction_key, scalar_divisor_ffi, numerator_bits);
+  host_integer_signed_scalar_div_radix_kb(streams, quotient_ct,
+                                          mem_ptr->signed_div_mem, bsks, ksks,
+                                          scalar_divisor_ffi, numerator_bits);

-  host_propagate_single_carry<Torus>(
-      streams, quotient_ct, nullptr, nullptr, mem_ptr->scp_mem, bsks, ksks,
-      ms_noise_reduction_key, FLAG_NONE, (uint32_t)0);
+  host_propagate_single_carry<Torus>(streams, quotient_ct, nullptr, nullptr,
+                                     mem_ptr->scp_mem, bsks, ksks, FLAG_NONE,
+                                     (uint32_t)0);

  if (!scalar_divisor_ffi->is_divisor_negative &&
      scalar_divisor_ffi->is_divisor_pow2) {
@@ -361,7 +342,7 @@ __host__ void host_integer_signed_scalar_div_rem_radix(

    host_integer_radix_logical_scalar_shift_kb_inplace(
        streams, remainder_ct, scalar_divisor_ffi->ilog2_divisor,
-        mem_ptr->logical_scalar_shift_mem, bsks, ksks, ms_noise_reduction_key,
+        mem_ptr->logical_scalar_shift_mem, bsks, ksks,
        remainder_ct->num_radix_blocks);

  } else if (!scalar_divisor_ffi->is_divisor_zero) {
@@ -375,15 +356,13 @@ __host__ void host_integer_signed_scalar_div_rem_radix(
      host_integer_scalar_mul_radix<Torus>(
          streams, remainder_ct, decomposed_divisor,
          divisor_has_at_least_one_set, mem_ptr->scalar_mul_mem, bsks, ksks,
-          ms_noise_reduction_key, mem_ptr->params.message_modulus,
-          num_scalars_divisor);
+          mem_ptr->params.message_modulus, num_scalars_divisor);
    }
  }

  host_sub_and_propagate_single_carry(
      streams, numerator_ct, remainder_ct, nullptr, nullptr,
-      mem_ptr->sub_and_propagate_mem, bsks, ksks, ms_noise_reduction_key,
-      FLAG_NONE, (uint32_t)0);
+      mem_ptr->sub_and_propagate_mem, bsks, ksks, FLAG_NONE, (uint32_t)0);

  copy_radix_ciphertext_async<Torus>(streams.stream(0), streams.gpu_index(0),
                                     remainder_ct, numerator_ct);
--- a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_mul.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_mul.cu
@@ -22,15 +22,13 @@ uint64_t scratch_cuda_integer_scalar_mul_kb_64(
 void cuda_scalar_multiplication_integer_radix_ciphertext_64_inplace(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array,
    uint64_t const *decomposed_scalar, uint64_t const *has_at_least_one_set,
-    int8_t *mem, void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t polynomial_size, uint32_t message_modulus, uint32_t num_scalars) {
+    int8_t *mem, void *const *bsks, void *const *ksks, uint32_t polynomial_size,
+    uint32_t message_modulus, uint32_t num_scalars) {

  host_integer_scalar_mul_radix<uint64_t>(
      CudaStreams(streams), lwe_array, decomposed_scalar, has_at_least_one_set,
      reinterpret_cast<int_scalar_mul_buffer<uint64_t> *>(mem), bsks,
-      (uint64_t **)(ksks), ms_noise_reduction_key, message_modulus,
-      num_scalars);
+      (uint64_t **)(ksks), message_modulus, num_scalars);
 }

 void cleanup_cuda_integer_radix_scalar_mul(CudaStreamsFFI streams,
--- a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_mul.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_mul.cuh
@@ -46,7 +46,6 @@ __host__ void host_integer_scalar_mul_radix(
    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array,
    T const *decomposed_scalar, T const *has_at_least_one_set,
    int_scalar_mul_buffer<T> *mem, void *const *bsks, T *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
    uint32_t message_modulus, uint32_t num_scalars) {

  auto num_radix_blocks = lwe_array->num_radix_blocks;
@@ -69,7 +68,7 @@ __host__ void host_integer_scalar_mul_radix(
          num_radix_blocks, lwe_array, 0, num_radix_blocks);
      host_integer_radix_logical_scalar_shift_kb_inplace<T>(
          streams, &shift_input, shift_amount, mem->logical_scalar_shift_buffer,
-          bsks, ksks, ms_noise_reduction_key, num_radix_blocks);
+          bsks, ksks, num_radix_blocks);
    } else {
      // create trivial assign for value = 0
      set_zero_radix_ciphertext_slice_async<T>(
@@ -113,15 +112,14 @@ __host__ void host_integer_scalar_mul_radix(
  } else {
    host_integer_partial_sum_ciphertexts_vec_kb<T>(
        streams, lwe_array, all_shifted_buffer, bsks, ksks,
-        ms_noise_reduction_key, mem->sum_ciphertexts_vec_mem, num_radix_blocks,
-        j);
+        mem->sum_ciphertexts_vec_mem, num_radix_blocks, j);

    auto scp_mem_ptr = mem->sc_prop_mem;
    uint32_t requested_flag = outputFlag::FLAG_NONE;
    uint32_t uses_carry = 0;
-    host_propagate_single_carry<T>(
-        streams, lwe_array, nullptr, nullptr, scp_mem_ptr, bsks, ksks,
-        ms_noise_reduction_key, requested_flag, uses_carry);
+    host_propagate_single_carry<T>(streams, lwe_array, nullptr, nullptr,
+                                   scp_mem_ptr, bsks, ksks, requested_flag,
+                                   uses_carry);
  }
 }

@@ -170,7 +168,6 @@ template <typename Torus>
 __host__ void host_integer_radix_scalar_mul_high_kb(
    CudaStreams streams, CudaRadixCiphertextFFI *ct,
    int_scalar_mul_high_buffer<Torus> *mem_ptr, Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
    void *const *bsks, const CudaScalarDivisorFFI *scalar_divisor_ffi) {

  if (scalar_divisor_ffi->is_chosen_multiplier_zero) {
@@ -191,7 +188,7 @@ __host__ void host_integer_radix_scalar_mul_high_kb(
      host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
          streams, tmp_ffi, scalar_divisor_ffi->ilog2_chosen_multiplier,
          mem_ptr->logical_scalar_shift_mem, bsks, (uint64_t **)ksks,
-          ms_noise_reduction_key, tmp_ffi->num_radix_blocks);
+          tmp_ffi->num_radix_blocks);

    } else {

@@ -199,8 +196,7 @@ __host__ void host_integer_radix_scalar_mul_high_kb(
          streams, tmp_ffi, scalar_divisor_ffi->decomposed_chosen_multiplier,
          scalar_divisor_ffi->chosen_multiplier_has_at_least_one_set,
          mem_ptr->scalar_mul_mem, bsks, (uint64_t **)ksks,
-          ms_noise_reduction_key, mem_ptr->params.message_modulus,
-          scalar_divisor_ffi->num_scalars);
+          mem_ptr->params.message_modulus, scalar_divisor_ffi->num_scalars);
    }
  }

@@ -211,9 +207,7 @@ template <typename Torus>
 __host__ void host_integer_radix_signed_scalar_mul_high_kb(
    CudaStreams streams, CudaRadixCiphertextFFI *ct,
    int_signed_scalar_mul_high_buffer<Torus> *mem_ptr, Torus *const *ksks,
-    const CudaScalarDivisorFFI *scalar_divisor_ffi,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void *const *bsks) {
+    const CudaScalarDivisorFFI *scalar_divisor_ffi, void *const *bsks) {

  if (scalar_divisor_ffi->is_chosen_multiplier_zero) {
    set_zero_radix_ciphertext_slice_async<Torus>(
@@ -225,7 +219,7 @@ __host__ void host_integer_radix_signed_scalar_mul_high_kb(

  host_extend_radix_with_sign_msb<Torus>(
      streams, tmp_ffi, ct, mem_ptr->extend_radix_mem, ct->num_radix_blocks,
-      bsks, (uint64_t **)ksks, ms_noise_reduction_key);
+      bsks, (uint64_t **)ksks);

  if (scalar_divisor_ffi->active_bits != (uint32_t)0 &&
      !scalar_divisor_ffi->is_abs_chosen_multiplier_one &&
@@ -235,14 +229,13 @@ __host__ void host_integer_radix_signed_scalar_mul_high_kb(
      host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
          streams, tmp_ffi, scalar_divisor_ffi->ilog2_chosen_multiplier,
          mem_ptr->logical_scalar_shift_mem, bsks, (uint64_t **)ksks,
-          ms_noise_reduction_key, tmp_ffi->num_radix_blocks);
+          tmp_ffi->num_radix_blocks);
    } else {
      host_integer_scalar_mul_radix<Torus>(
          streams, tmp_ffi, scalar_divisor_ffi->decomposed_chosen_multiplier,
          scalar_divisor_ffi->chosen_multiplier_has_at_least_one_set,
          mem_ptr->scalar_mul_mem, bsks, (uint64_t **)ksks,
-          ms_noise_reduction_key, mem_ptr->params.message_modulus,
-          scalar_divisor_ffi->num_scalars);
+          mem_ptr->params.message_modulus, scalar_divisor_ffi->num_scalars);
    }
  }

--- a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_rotate.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_rotate.cu
@@ -22,13 +22,12 @@ uint64_t scratch_cuda_integer_radix_scalar_rotate_kb_64(

 void cuda_integer_radix_scalar_rotate_kb_64_inplace(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array, uint32_t n,
-    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    int8_t *mem_ptr, void *const *bsks, void *const *ksks) {

  host_integer_radix_scalar_rotate_kb_inplace<uint64_t>(
      CudaStreams(streams), lwe_array, n,
      (int_logical_scalar_shift_buffer<uint64_t> *)mem_ptr, bsks,
-      (uint64_t **)(ksks), ms_noise_reduction_key);
+      (uint64_t **)(ksks));
 }

 void cleanup_cuda_integer_radix_scalar_rotate(CudaStreamsFFI streams,
--- a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_rotate.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_rotate.cuh
@@ -28,8 +28,7 @@ template <typename Torus>
 __host__ void host_integer_radix_scalar_rotate_kb_inplace(
    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array, uint32_t n,
    int_logical_scalar_shift_buffer<Torus> *mem, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    Torus *const *ksks) {

  auto num_blocks = lwe_array->num_radix_blocks;
  auto params = mem->params;
@@ -74,8 +73,7 @@ __host__ void host_integer_radix_scalar_rotate_kb_inplace(

    integer_radix_apply_bivariate_lookup_table_kb<Torus>(
        streams, lwe_array, receiver_blocks, giver_blocks, bsks, ksks,
-        ms_noise_reduction_key, lut_bivariate, num_blocks,
-        lut_bivariate->params.message_modulus);
+        lut_bivariate, num_blocks, lut_bivariate->params.message_modulus);

  } else {
    // rotate left as the blocks are from LSB to MSB
@@ -99,8 +97,7 @@ __host__ void host_integer_radix_scalar_rotate_kb_inplace(

    integer_radix_apply_bivariate_lookup_table_kb<Torus>(
        streams, lwe_array, receiver_blocks, giver_blocks, bsks, ksks,
-        ms_noise_reduction_key, lut_bivariate, num_blocks,
-        lut_bivariate->params.message_modulus);
+        lut_bivariate, num_blocks, lut_bivariate->params.message_modulus);
  }
 }

--- a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_shifts.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_shifts.cu
@@ -26,13 +26,12 @@ uint64_t scratch_cuda_integer_radix_logical_scalar_shift_kb_64(
 /// rotations - 1 The remaining blocks are padded with zeros
 void cuda_integer_radix_logical_scalar_shift_kb_64_inplace(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array, uint32_t shift,
-    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    int8_t *mem_ptr, void *const *bsks, void *const *ksks) {

  host_integer_radix_logical_scalar_shift_kb_inplace<uint64_t>(
      CudaStreams(streams), lwe_array, shift,
      (int_logical_scalar_shift_buffer<uint64_t> *)mem_ptr, bsks,
-      (uint64_t **)(ksks), ms_noise_reduction_key, lwe_array->num_radix_blocks);
+      (uint64_t **)(ksks), lwe_array->num_radix_blocks);
 }

 uint64_t scratch_cuda_integer_radix_arithmetic_scalar_shift_kb_64(
@@ -64,13 +63,12 @@ uint64_t scratch_cuda_integer_radix_arithmetic_scalar_shift_kb_64(
 /// zeros as would be done in the logical shift.
 void cuda_integer_radix_arithmetic_scalar_shift_kb_64_inplace(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array, uint32_t shift,
-    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    int8_t *mem_ptr, void *const *bsks, void *const *ksks) {

  host_integer_radix_arithmetic_scalar_shift_kb_inplace<uint64_t>(
      CudaStreams(streams), lwe_array, shift,
      (int_arithmetic_scalar_shift_buffer<uint64_t> *)mem_ptr, bsks,
-      (uint64_t **)(ksks), ms_noise_reduction_key);
+      (uint64_t **)(ksks));
 }

 void cleanup_cuda_integer_radix_logical_scalar_shift(CudaStreamsFFI streams,
--- a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_shifts.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_shifts.cuh
@@ -28,9 +28,7 @@ template <typename Torus>
 __host__ void host_integer_radix_logical_scalar_shift_kb_inplace(
    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array, uint32_t shift,
    int_logical_scalar_shift_buffer<Torus> *mem, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t num_blocks) {
+    Torus *const *ksks, uint32_t num_blocks) {

  if (lwe_array->num_radix_blocks < num_blocks)
    PANIC("Cuda error: input does not have enough blocks")
@@ -81,9 +79,8 @@ __host__ void host_integer_radix_logical_scalar_shift_kb_inplace(

    integer_radix_apply_bivariate_lookup_table_kb<Torus>(
        streams, &partial_current_blocks, &partial_current_blocks,
-        &partial_previous_blocks, bsks, ksks, ms_noise_reduction_key,
-        lut_bivariate, partial_block_count,
-        lut_bivariate->params.message_modulus);
+        &partial_previous_blocks, bsks, ksks, lut_bivariate,
+        partial_block_count, lut_bivariate->params.message_modulus);

  } else {
    // right shift
@@ -113,8 +110,8 @@ __host__ void host_integer_radix_logical_scalar_shift_kb_inplace(

    integer_radix_apply_bivariate_lookup_table_kb<Torus>(
        streams, partial_current_blocks, partial_current_blocks,
-        &partial_next_blocks, bsks, ksks, ms_noise_reduction_key, lut_bivariate,
-        partial_block_count, lut_bivariate->params.message_modulus);
+        &partial_next_blocks, bsks, ksks, lut_bivariate, partial_block_count,
+        lut_bivariate->params.message_modulus);
  }
 }

@@ -135,8 +132,7 @@ template <typename Torus>
 __host__ void host_integer_radix_arithmetic_scalar_shift_kb_inplace(
    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array, uint32_t shift,
    int_arithmetic_scalar_shift_buffer<Torus> *mem, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    Torus *const *ksks) {

  auto num_blocks = lwe_array->num_radix_blocks;
  auto params = mem->params;
@@ -205,9 +201,8 @@ __host__ void host_integer_radix_arithmetic_scalar_shift_kb_inplace(

        integer_radix_apply_bivariate_lookup_table_kb<Torus>(
            streams, partial_current_blocks, partial_current_blocks,
-            &partial_next_blocks, bsks, ksks, ms_noise_reduction_key,
-            lut_bivariate, partial_block_count,
-            lut_bivariate->params.message_modulus);
+            &partial_next_blocks, bsks, ksks, lut_bivariate,
+            partial_block_count, lut_bivariate->params.message_modulus);
      }
      // Since our CPU threads will be working on different streams we shall
      // Ensure the work in the main stream is completed
@@ -216,7 +211,7 @@ __host__ void host_integer_radix_arithmetic_scalar_shift_kb_inplace(
          mem->lut_buffers_univariate[num_bits_in_block - 1];
      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          mem->local_streams_1, &padding_block, &last_block_copy, bsks, ksks,
-          ms_noise_reduction_key, lut_univariate_padding_block, 1);
+          lut_univariate_padding_block, 1);
      // Replace blocks 'pulled' from the left with the correct padding
      // block
      for (uint i = 0; i < rotations; i++) {
@@ -230,7 +225,7 @@ __host__ void host_integer_radix_arithmetic_scalar_shift_kb_inplace(
            mem->lut_buffers_univariate[shift_within_block - 1];
        integer_radix_apply_univariate_lookup_table_kb<Torus>(
            mem->local_streams_2, &last_block, &last_block_copy, bsks, ksks,
-            ms_noise_reduction_key, lut_univariate_shift_last_block, 1);
+            lut_univariate_shift_last_block, 1);
      }

      mem->local_streams_1.synchronize();
--- a/backends/tfhe-cuda-backend/cuda/src/integer/shift_and_rotate.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/shift_and_rotate.cu
@@ -22,13 +22,12 @@ uint64_t scratch_cuda_integer_radix_shift_and_rotate_kb_64(
 void cuda_integer_radix_shift_and_rotate_kb_64_inplace(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array,
    CudaRadixCiphertextFFI const *lwe_shift, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    void *const *ksks) {

  host_integer_radix_shift_and_rotate_kb_inplace<uint64_t>(
      CudaStreams(streams), lwe_array, lwe_shift,
      (int_shift_and_rotate_buffer<uint64_t> *)mem_ptr, bsks,
-      (uint64_t **)(ksks), ms_noise_reduction_key);
+      (uint64_t **)(ksks));
 }

 void cleanup_cuda_integer_radix_shift_and_rotate(CudaStreamsFFI streams,
--- a/backends/tfhe-cuda-backend/cuda/src/integer/shift_and_rotate.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/shift_and_rotate.cuh
@@ -29,8 +29,7 @@ __host__ void host_integer_radix_shift_and_rotate_kb_inplace(
    CudaStreams streams, CudaRadixCiphertextFFI *lwe_array,
    CudaRadixCiphertextFFI const *lwe_shift,
    int_shift_and_rotate_buffer<Torus> *mem, void *const *bsks,
-    Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    Torus *const *ksks) {
  cuda_set_device(streams.gpu_index(0));

  if (lwe_array->num_radix_blocks != lwe_shift->num_radix_blocks)
@@ -57,7 +56,6 @@ __host__ void host_integer_radix_shift_and_rotate_kb_inplace(
  // Extract all bits
  auto bits = mem->tmp_bits;
  extract_n_bits<Torus>(streams, bits, lwe_array, bsks, ksks,
-                        ms_noise_reduction_key,
                        num_radix_blocks * bits_per_block, num_radix_blocks,
                        mem->bit_extract_luts);

@@ -79,8 +77,8 @@ __host__ void host_integer_radix_shift_and_rotate_kb_inplace(
  // so that it is already aligned to the correct position of the cmux input
  // and we reduce noise growth
  extract_n_bits<Torus>(streams, shift_bits, lwe_shift, bsks, ksks,
-                        ms_noise_reduction_key, max_num_bits_that_tell_shift,
-                        num_radix_blocks, mem->bit_extract_luts_with_offset_2);
+                        max_num_bits_that_tell_shift, num_radix_blocks,
+                        mem->bit_extract_luts_with_offset_2);

  // If signed, do an "arithmetic shift" by padding with the sign bit
  CudaRadixCiphertextFFI last_bit;
@@ -163,8 +161,7 @@ __host__ void host_integer_radix_shift_and_rotate_kb_inplace(
    // we have
    // control_bit|b|a
    integer_radix_apply_univariate_lookup_table_kb<Torus>(
-        streams, input_bits_a, mux_inputs, bsks, ksks, ms_noise_reduction_key,
-        mux_lut, total_nb_bits);
+        streams, input_bits_a, mux_inputs, bsks, ksks, mux_lut, total_nb_bits);
  }

  // Initializes the output
@@ -196,8 +193,8 @@ __host__ void host_integer_radix_shift_and_rotate_kb_inplace(
    // To give back a clean ciphertext
    auto cleaning_lut = mem->cleaning_lut;
    integer_radix_apply_univariate_lookup_table_kb<Torus>(
-        streams, lwe_array, lwe_array, bsks, ksks, ms_noise_reduction_key,
-        cleaning_lut, num_radix_blocks);
+        streams, lwe_array, lwe_array, bsks, ksks, cleaning_lut,
+        num_radix_blocks);
  }
 }
 #endif
--- a/backends/tfhe-cuda-backend/cuda/src/integer/subtraction.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/subtraction.cu
@@ -23,14 +23,12 @@ void cuda_sub_and_propagate_single_carry_kb_64_inplace(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lhs_array,
    const CudaRadixCiphertextFFI *rhs_array, CudaRadixCiphertextFFI *carry_out,
    const CudaRadixCiphertextFFI *carry_in, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    uint32_t requested_flag, uint32_t uses_carry) {
+    void *const *ksks, uint32_t requested_flag, uint32_t uses_carry) {
  PUSH_RANGE("sub")
  host_sub_and_propagate_single_carry<uint64_t>(
      CudaStreams(streams), lhs_array, rhs_array, carry_out, carry_in,
      (int_sub_and_propagate<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
-      ms_noise_reduction_key, requested_flag, uses_carry);
+      requested_flag, uses_carry);
  POP_RANGE()
 }

--- a/backends/tfhe-cuda-backend/cuda/src/integer/subtraction.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/subtraction.cuh
@@ -33,7 +33,6 @@ void host_sub_and_propagate_single_carry(
    const CudaRadixCiphertextFFI *rhs_array, CudaRadixCiphertextFFI *carry_out,
    const CudaRadixCiphertextFFI *input_carries,
    int_sub_and_propagate<Torus> *mem, void *const *bsks, Torus *const *ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
    uint32_t requested_flag, uint32_t uses_carry) {

  host_integer_radix_negation<Torus>(
@@ -42,8 +41,7 @@ void host_sub_and_propagate_single_carry(

  host_add_and_propagate_single_carry<Torus>(
      streams, lhs_array, mem->neg_rhs_array, carry_out, input_carries,
-      mem->sc_prop_mem, bsks, ksks, ms_noise_reduction_key, requested_flag,
-      uses_carry);
+      mem->sc_prop_mem, bsks, ksks, requested_flag, uses_carry);
 }

 template <typename Torus>
--- a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap.cuh
@@ -204,20 +204,20 @@ __device__ void mul_ggsw_glwe_in_fourier_domain_2_2_params(
 }

 template <typename InputTorus, typename OutputTorus>
-void execute_pbs_async(
-    CudaStreams streams, const LweArrayVariant<OutputTorus> &lwe_array_out,
-    const LweArrayVariant<InputTorus> &lwe_output_indexes,
-    const std::vector<OutputTorus *> lut_vec,
-    const std::vector<InputTorus *> lut_indexes_vec,
-    const LweArrayVariant<InputTorus> &lwe_array_in,
-    const LweArrayVariant<InputTorus> &lwe_input_indexes,
-    void *const *bootstrapping_keys,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    std::vector<int8_t *> pbs_buffer, uint32_t glwe_dimension,
-    uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t base_log,
-    uint32_t level_count, uint32_t grouping_factor,
-    uint32_t input_lwe_ciphertext_count, PBS_TYPE pbs_type,
-    uint32_t num_many_lut, uint32_t lut_stride) {
+void execute_pbs_async(CudaStreams streams,
+                       const LweArrayVariant<OutputTorus> &lwe_array_out,
+                       const LweArrayVariant<InputTorus> &lwe_output_indexes,
+                       const std::vector<OutputTorus *> lut_vec,
+                       const std::vector<InputTorus *> lut_indexes_vec,
+                       const LweArrayVariant<InputTorus> &lwe_array_in,
+                       const LweArrayVariant<InputTorus> &lwe_input_indexes,
+                       void *const *bootstrapping_keys,
+                       std::vector<int8_t *> pbs_buffer,
+                       uint32_t glwe_dimension, uint32_t lwe_dimension,
+                       uint32_t polynomial_size, uint32_t base_log,
+                       uint32_t level_count, uint32_t grouping_factor,
+                       uint32_t input_lwe_ciphertext_count, PBS_TYPE pbs_type,
+                       uint32_t num_many_lut, uint32_t lut_stride) {

  if constexpr (std::is_same_v<OutputTorus, uint32_t>) {
    // 32 bits
@@ -310,17 +310,13 @@ void execute_pbs_async(
        auto d_lut_vector_indexes =
            lut_indexes_vec[i] + (ptrdiff_t)(gpu_offset);

-        void *zeros = nullptr;
-        if (ms_noise_reduction_key != nullptr &&
-            ms_noise_reduction_key->ptr != nullptr)
-          zeros = ms_noise_reduction_key->ptr[i];
        cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
            streams.stream(i), streams.gpu_index(i), current_lwe_array_out,
            current_lwe_output_indexes, lut_vec[i], d_lut_vector_indexes,
            current_lwe_array_in, current_lwe_input_indexes,
-            bootstrapping_keys[i], ms_noise_reduction_key, zeros, pbs_buffer[i],
-            lwe_dimension, glwe_dimension, polynomial_size, base_log,
-            level_count, num_inputs_on_gpu, num_many_lut, lut_stride);
+            bootstrapping_keys[i], pbs_buffer[i], lwe_dimension, glwe_dimension,
+            polynomial_size, base_log, level_count, num_inputs_on_gpu,
+            num_many_lut, lut_stride);
      }
      break;
    default:
@@ -374,16 +370,11 @@ void execute_pbs_async(
        auto d_lut_vector_indexes =
            lut_indexes_vec[i] + (ptrdiff_t)(gpu_offset);

-        void *zeros = nullptr;
-        if (ms_noise_reduction_key != nullptr &&
-            ms_noise_reduction_key->ptr != nullptr)
-          zeros = ms_noise_reduction_key->ptr[i];
        cuda_programmable_bootstrap_lwe_ciphertext_vector_128(
            streams.stream(i), streams.gpu_index(i), current_lwe_array_out,
            lut_vec[i], current_lwe_array_in, bootstrapping_keys[i],
-            ms_noise_reduction_key, zeros, pbs_buffer[i], lwe_dimension,
-            glwe_dimension, polynomial_size, base_log, level_count,
-            num_inputs_on_gpu);
+            pbs_buffer[i], lwe_dimension, glwe_dimension, polynomial_size,
+            base_log, level_count, num_inputs_on_gpu);
      }
      break;
    default:
--- a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_128.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_128.cuh
--- a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_cg_classic.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_cg_classic.cuh
@@ -80,9 +80,7 @@ __global__ void device_programmable_bootstrap_cg(
  // The third dimension of the block is used to determine on which ciphertext
  // this block is operating, in the case of batch bootstraps
  const Torus *block_lwe_array_in =
-      (noise_reduction_type == PBS_MS_REDUCTION_T::DRIFT)
-          ? &lwe_array_in[blockIdx.x * (lwe_dimension + 1)]
-          : &lwe_array_in[lwe_input_indexes[blockIdx.x] * (lwe_dimension + 1)];
+      &lwe_array_in[lwe_input_indexes[blockIdx.x] * (lwe_dimension + 1)];

  const Torus *block_lut_vector =
      &lut_vector[lut_vector_indexes[blockIdx.x] * params::degree *
--- a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_classic.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_classic.cu
@@ -650,33 +650,15 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
    void const *lwe_output_indexes, void const *lut_vector,
    void const *lut_vector_indexes, void const *lwe_array_in,
    void const *lwe_input_indexes, void const *bootstrapping_key,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void *ms_drift_noise_reduction_ptr, int8_t *mem_ptr, uint32_t lwe_dimension,
-    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
-    uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
-    uint32_t lut_stride) {
+    int8_t *mem_ptr, uint32_t lwe_dimension, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
+    uint32_t num_samples, uint32_t num_many_lut, uint32_t lut_stride) {
  if (base_log > 64)
    PANIC("Cuda error (classical PBS): base log should be <= 64")

  pbs_buffer<uint64_t, CLASSICAL> *buffer =
      (pbs_buffer<uint64_t, CLASSICAL> *)mem_ptr;

-  // If the parameters contain drift noise reduction key, then apply it
-  if (buffer->noise_reduction_type == PBS_MS_REDUCTION_T::DRIFT) {
-    uint32_t log_modulus = log2(polynomial_size) + 1;
-    host_drift_modulus_switch<uint64_t>(
-        static_cast<cudaStream_t>(stream), gpu_index, buffer->temp_lwe_array_in,
-        static_cast<uint64_t const *>(lwe_array_in),
-        static_cast<uint64_t const *>(lwe_input_indexes),
-        static_cast<uint64_t *>(ms_drift_noise_reduction_ptr),
-        lwe_dimension + 1, num_samples, ms_noise_reduction_key->num_zeros,
-        ms_noise_reduction_key->ms_input_variance,
-        ms_noise_reduction_key->ms_r_sigma, ms_noise_reduction_key->ms_bound,
-        log_modulus);
-  } else {
-    buffer->temp_lwe_array_in =
-        const_cast<uint64_t *>(static_cast<const uint64_t *>(lwe_array_in));
-  }
  check_cuda_error(cudaGetLastError());

  switch (buffer->pbs_variant) {
@@ -687,7 +669,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
        static_cast<const uint64_t *>(lwe_output_indexes),
        static_cast<const uint64_t *>(lut_vector),
        static_cast<const uint64_t *>(lut_vector_indexes),
-        static_cast<const uint64_t *>(buffer->temp_lwe_array_in),
+        static_cast<const uint64_t *>(lwe_array_in),
        static_cast<const uint64_t *>(lwe_input_indexes),
        static_cast<const double2 *>(bootstrapping_key), buffer, lwe_dimension,
        glwe_dimension, polynomial_size, base_log, level_count, num_samples,
@@ -702,7 +684,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
        static_cast<const uint64_t *>(lwe_output_indexes),
        static_cast<const uint64_t *>(lut_vector),
        static_cast<const uint64_t *>(lut_vector_indexes),
-        static_cast<const uint64_t *>(buffer->temp_lwe_array_in),
+        static_cast<const uint64_t *>(lwe_array_in),
        static_cast<const uint64_t *>(lwe_input_indexes),
        static_cast<const double2 *>(bootstrapping_key), buffer, lwe_dimension,
        glwe_dimension, polynomial_size, base_log, level_count, num_samples,
@@ -714,7 +696,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
        static_cast<const uint64_t *>(lwe_output_indexes),
        static_cast<const uint64_t *>(lut_vector),
        static_cast<const uint64_t *>(lut_vector_indexes),
-        static_cast<const uint64_t *>(buffer->temp_lwe_array_in),
+        static_cast<const uint64_t *>(lwe_array_in),
        static_cast<const uint64_t *>(lwe_input_indexes),
        static_cast<const double2 *>(bootstrapping_key), buffer, lwe_dimension,
        glwe_dimension, polynomial_size, base_log, level_count, num_samples,
--- a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_classic.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_classic.cuh
@@ -56,9 +56,7 @@ __global__ void __launch_bounds__(params::degree / params::opt)
  // The third dimension of the block is used to determine on which ciphertext
  // this block is operating, in the case of batch bootstraps
  const Torus *block_lwe_array_in =
-      (noise_reduction_type == PBS_MS_REDUCTION_T::DRIFT)
-          ? &lwe_array_in[blockIdx.x * (lwe_dimension + 1)]
-          : &lwe_array_in[lwe_input_indexes[blockIdx.x] * (lwe_dimension + 1)];
+      &lwe_array_in[lwe_input_indexes[blockIdx.x] * (lwe_dimension + 1)];

  const Torus *block_lut_vector =
      &lut_vector[lut_vector_indexes[blockIdx.x] * params::degree *
--- a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_classic_128.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_classic_128.cu
@@ -36,7 +36,7 @@ uint64_t scratch_cuda_programmable_bootstrap_128(
 template <typename InputTorus>
 void executor_cuda_programmable_bootstrap_lwe_ciphertext_vector_128(
    void *stream, uint32_t gpu_index, __uint128_t *lwe_array_out,
-    __uint128_t const *lut_vector, InputTorus *lwe_array_in,
+    __uint128_t const *lut_vector, InputTorus const *lwe_array_in,
    double const *bootstrapping_key,
    pbs_buffer_128<InputTorus, PBS_TYPE::CLASSICAL> *buffer,
    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
@@ -83,7 +83,7 @@ void executor_cuda_programmable_bootstrap_lwe_ciphertext_vector_128(
 template <typename InputTorus>
 void executor_cuda_programmable_bootstrap_cg_lwe_ciphertext_vector_128(
    void *stream, uint32_t gpu_index, __uint128_t *lwe_array_out,
-    __uint128_t const *lut_vector, InputTorus *lwe_array_in,
+    __uint128_t const *lut_vector, InputTorus const *lwe_array_in,
    double const *bootstrapping_key,
    pbs_buffer_128<InputTorus, PBS_TYPE::CLASSICAL> *buffer,
    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
@@ -132,36 +132,17 @@ void host_programmable_bootstrap_lwe_ciphertext_vector_128(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    __uint128_t const *lut_vector, void const *lwe_array_in,
    void const *bootstrapping_key,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_drift_noise_reduction_ptr,
-    void const *ms_noise_reduction_ptr,
    pbs_buffer_128<InputTorus, PBS_TYPE::CLASSICAL> *buffer,
    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
    uint32_t base_log, uint32_t level_count, uint32_t num_samples) {
  if (base_log > 64)
    PANIC("Cuda error (classical PBS): base log should be <= 64")

-  // If the parameters contain drift noise reduction key, then apply it
-  if (buffer->noise_reduction_type == PBS_MS_REDUCTION_T::DRIFT) {
-    uint32_t log_modulus = log2(polynomial_size) + 1;
-    host_drift_modulus_switch<InputTorus>(
-        static_cast<cudaStream_t>(stream), gpu_index,
-        static_cast<InputTorus *>(buffer->temp_lwe_array_in),
-        static_cast<InputTorus const *>(lwe_array_in),
-        static_cast<uint64_t const *>(buffer->trivial_indexes),
-        static_cast<const InputTorus *>(ms_noise_reduction_ptr),
-        lwe_dimension + 1, num_samples, ms_drift_noise_reduction_ptr->num_zeros,
-        ms_drift_noise_reduction_ptr->ms_input_variance,
-        ms_drift_noise_reduction_ptr->ms_r_sigma,
-        ms_drift_noise_reduction_ptr->ms_bound, log_modulus);
-  } else {
-    buffer->temp_lwe_array_in =
-        const_cast<InputTorus *>(static_cast<const InputTorus *>(lwe_array_in));
-  }
  switch (buffer->pbs_variant) {
  case DEFAULT:
    executor_cuda_programmable_bootstrap_lwe_ciphertext_vector_128<InputTorus>(
        stream, gpu_index, static_cast<__uint128_t *>(lwe_array_out),
-        lut_vector, static_cast<InputTorus *>(buffer->temp_lwe_array_in),
+        lut_vector, static_cast<InputTorus const *>(lwe_array_in),
        static_cast<const double *>(bootstrapping_key), buffer, lwe_dimension,
        glwe_dimension, polynomial_size, base_log, level_count, num_samples);
    break;
@@ -169,7 +150,7 @@ void host_programmable_bootstrap_lwe_ciphertext_vector_128(
    executor_cuda_programmable_bootstrap_cg_lwe_ciphertext_vector_128<
        InputTorus>(
        stream, gpu_index, static_cast<__uint128_t *>(lwe_array_out),
-        lut_vector, static_cast<InputTorus *>(buffer->temp_lwe_array_in),
+        lut_vector, static_cast<InputTorus const *>(lwe_array_in),
        static_cast<const double *>(bootstrapping_key), buffer, lwe_dimension,
        glwe_dimension, polynomial_size, base_log, level_count, num_samples);
    break;
@@ -234,9 +215,7 @@ void host_programmable_bootstrap_lwe_ciphertext_vector_128(
 void cuda_programmable_bootstrap_lwe_ciphertext_vector_128(
    void *streams, uint32_t gpu_index, void *lwe_array_out,
    void const *lut_vector, void const *lwe_array_in,
-    void const *bootstrapping_key,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    void const *ms_noise_reduction_ptr, int8_t *mem_ptr, uint32_t lwe_dimension,
+    void const *bootstrapping_key, int8_t *mem_ptr, uint32_t lwe_dimension,
    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
    uint32_t level_count, uint32_t num_samples) {
  pbs_buffer_128<uint64_t, PBS_TYPE::CLASSICAL> *buffer =
@@ -245,9 +224,8 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_128(
  host_programmable_bootstrap_lwe_ciphertext_vector_128<uint64_t>(
      streams, gpu_index, lwe_array_out,
      static_cast<const __uint128_t *>(lut_vector), lwe_array_in,
-      bootstrapping_key, ms_noise_reduction_key, ms_noise_reduction_ptr, buffer,
-      lwe_dimension, glwe_dimension, polynomial_size, base_log, level_count,
-      num_samples);
+      bootstrapping_key, buffer, lwe_dimension, glwe_dimension, polynomial_size,
+      base_log, level_count, num_samples);
 }

 /*
--- a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_classic_128.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_classic_128.cuh
@@ -668,7 +668,7 @@ uint64_t scratch_cuda_programmable_bootstrap_128_vector(
 template <typename InputTorus, class params, bool first_iter>
 __host__ void execute_step_one_128(
    cudaStream_t stream, uint32_t gpu_index, __uint128_t const *lut_vector,
-    InputTorus *lwe_array_in, double const *bootstrapping_key,
+    InputTorus const *lwe_array_in, double const *bootstrapping_key,
    __uint128_t *global_accumulator, double *global_join_buffer,
    PBS_MS_REDUCTION_T noise_reduction_type,
    uint32_t input_lwe_ciphertext_count, uint32_t lwe_dimension,
@@ -752,7 +752,7 @@ __host__ void execute_step_two_128(
 template <typename InputTorus, class params>
 __host__ void host_programmable_bootstrap_128(
    cudaStream_t stream, uint32_t gpu_index, __uint128_t *lwe_array_out,
-    __uint128_t const *lut_vector, InputTorus *lwe_array_in,
+    __uint128_t const *lut_vector, InputTorus const *lwe_array_in,
    double const *bootstrapping_key,
    pbs_buffer_128<InputTorus, PBS_TYPE::CLASSICAL> *pbs_buffer,
    uint32_t glwe_dimension, uint32_t lwe_dimension, uint32_t polynomial_size,
--- a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_tbc_classic.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_tbc_classic.cuh
@@ -84,9 +84,7 @@ __global__ void device_programmable_bootstrap_tbc(
  // The third dimension of the block is used to determine on which ciphertext
  // this block is operating, in the case of batch bootstraps
  const Torus *block_lwe_array_in =
-      (noise_reduction_type == PBS_MS_REDUCTION_T::DRIFT)
-          ? &lwe_array_in[blockIdx.x * (lwe_dimension + 1)]
-          : &lwe_array_in[lwe_input_indexes[blockIdx.x] * (lwe_dimension + 1)];
+      &lwe_array_in[lwe_input_indexes[blockIdx.x] * (lwe_dimension + 1)];

  const Torus *block_lut_vector =
      &lut_vector[lut_vector_indexes[blockIdx.x] * params::degree *
--- a/backends/tfhe-cuda-backend/cuda/src/zk/zk.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/zk/zk.cu
@@ -43,8 +43,7 @@ uint64_t scratch_cuda_expand_without_verification_64(
 void cuda_expand_without_verification_64(
    CudaStreamsFFI streams, void *lwe_array_out,
    const void *lwe_flattened_compact_array_in, int8_t *mem_ptr,
-    void *const *bsks, void *const *computing_ksks, void *const *casting_keys,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    void *const *bsks, void *const *computing_ksks, void *const *casting_keys) {

  auto expand_buffer = reinterpret_cast<zk_expand_mem<uint64_t> *>(mem_ptr);

@@ -54,49 +53,49 @@ void cuda_expand_without_verification_64(
        streams, static_cast<uint64_t *>(lwe_array_out),
        static_cast<const uint64_t *>(lwe_flattened_compact_array_in),
        expand_buffer, (uint64_t **)casting_keys, bsks,
-        (uint64_t **)(computing_ksks), ms_noise_reduction_key);
+        (uint64_t **)(computing_ksks));
    break;
  case 512:
    host_expand_without_verification<uint64_t, AmortizedDegree<512>>(
        streams, static_cast<uint64_t *>(lwe_array_out),
        static_cast<const uint64_t *>(lwe_flattened_compact_array_in),
        expand_buffer, (uint64_t **)casting_keys, bsks,
-        (uint64_t **)(computing_ksks), ms_noise_reduction_key);
+        (uint64_t **)(computing_ksks));
    break;
  case 1024:
    host_expand_without_verification<uint64_t, AmortizedDegree<1024>>(
        streams, static_cast<uint64_t *>(lwe_array_out),
        static_cast<const uint64_t *>(lwe_flattened_compact_array_in),
        expand_buffer, (uint64_t **)casting_keys, bsks,
-        (uint64_t **)(computing_ksks), ms_noise_reduction_key);
+        (uint64_t **)(computing_ksks));
    break;
  case 2048:
    host_expand_without_verification<uint64_t, AmortizedDegree<2048>>(
        streams, static_cast<uint64_t *>(lwe_array_out),
        static_cast<const uint64_t *>(lwe_flattened_compact_array_in),
        expand_buffer, (uint64_t **)casting_keys, bsks,
-        (uint64_t **)(computing_ksks), ms_noise_reduction_key);
+        (uint64_t **)(computing_ksks));
    break;
  case 4096:
    host_expand_without_verification<uint64_t, AmortizedDegree<4096>>(
        streams, static_cast<uint64_t *>(lwe_array_out),
        static_cast<const uint64_t *>(lwe_flattened_compact_array_in),
        expand_buffer, (uint64_t **)casting_keys, bsks,
-        (uint64_t **)(computing_ksks), ms_noise_reduction_key);
+        (uint64_t **)(computing_ksks));
    break;
  case 8192:
    host_expand_without_verification<uint64_t, AmortizedDegree<8192>>(
        streams, static_cast<uint64_t *>(lwe_array_out),
        static_cast<const uint64_t *>(lwe_flattened_compact_array_in),
        expand_buffer, (uint64_t **)casting_keys, bsks,
-        (uint64_t **)(computing_ksks), ms_noise_reduction_key);
+        (uint64_t **)(computing_ksks));
    break;
  case 16384:
    host_expand_without_verification<uint64_t, AmortizedDegree<16384>>(
        streams, static_cast<uint64_t *>(lwe_array_out),
        static_cast<const uint64_t *>(lwe_flattened_compact_array_in),
        expand_buffer, (uint64_t **)casting_keys, bsks,
-        (uint64_t **)(computing_ksks), ms_noise_reduction_key);
+        (uint64_t **)(computing_ksks));
    break;
  default:
    PANIC("CUDA error: lwe_dimension not supported."
--- a/backends/tfhe-cuda-backend/cuda/src/zk/zk.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/zk/zk.cuh
@@ -19,8 +19,7 @@ template <typename Torus, class params>
 __host__ void host_expand_without_verification(
    CudaStreams streams, Torus *lwe_array_out,
    const Torus *lwe_flattened_compact_array_in, zk_expand_mem<Torus> *mem_ptr,
-    Torus *const *casting_keys, void *const *bsks, Torus *const *compute_ksks,
-    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
+    Torus *const *casting_keys, void *const *bsks, Torus *const *compute_ksks) {
  // Expand
  auto casting_key_type = mem_ptr->casting_key_type;
  auto expanded_lwes = mem_ptr->tmp_expanded_lwes;
@@ -96,8 +95,8 @@ __host__ void host_expand_without_verification(
  auto input = new CudaRadixCiphertextFFI;
  into_radix_ciphertext(input, lwe_array_input, 2 * num_lwes, lwe_dimension);
  integer_radix_apply_univariate_lookup_table_kb<Torus>(
-      streams, output, input, bsks, ksks, ms_noise_reduction_key,
-      message_and_carry_extract_luts, 2 * num_lwes);
+      streams, output, input, bsks, ksks, message_and_carry_extract_luts,
+      2 * num_lwes);
 }

 template <typename Torus>
--- a/backends/tfhe-cuda-backend/cuda/tests_and_benchmarks/tests/test_classical_pbs.cpp
+++ b/backends/tfhe-cuda-backend/cuda/tests_and_benchmarks/tests/test_classical_pbs.cpp
@@ -191,9 +191,9 @@ TEST_P(ClassicalProgrammableBootstrapTestPrimitives_u64, bootstrap) {
          stream, gpu_index, (void *)d_lwe_ct_out_array,
          (void *)d_lwe_output_indexes, (void *)d_lut_pbs_identity,
          (void *)d_lut_pbs_indexes, (void *)d_lwe_ct_in,
-          (void *)d_lwe_input_indexes, (void *)d_fourier_bsk, nullptr, nullptr,
-          pbs_buffer, lwe_dimension, glwe_dimension, polynomial_size,
-          pbs_base_log, pbs_level, number_of_inputs, num_many_lut, lut_stride);
+          (void *)d_lwe_input_indexes, (void *)d_fourier_bsk, pbs_buffer,
+          lwe_dimension, glwe_dimension, polynomial_size, pbs_base_log,
+          pbs_level, number_of_inputs, num_many_lut, lut_stride);
      // Copy result back
      cuda_memcpy_async_to_cpu(lwe_ct_out_array, d_lwe_ct_out_array,
                               (glwe_dimension * polynomial_size + 1) *
--- a/backends/tfhe-cuda-backend/src/bindings.rs
+++ b/backends/tfhe-cuda-backend/src/bindings.rs
@@ -98,37 +98,8 @@ pub const PBS_TYPE_MULTI_BIT: PBS_TYPE = 0;
 pub const PBS_TYPE_CLASSICAL: PBS_TYPE = 1;
 pub type PBS_TYPE = ffi::c_uint;
 pub const PBS_MS_REDUCTION_T_NO_REDUCTION: PBS_MS_REDUCTION_T = 0;
-pub const PBS_MS_REDUCTION_T_DRIFT: PBS_MS_REDUCTION_T = 1;
-pub const PBS_MS_REDUCTION_T_CENTERED: PBS_MS_REDUCTION_T = 2;
+pub const PBS_MS_REDUCTION_T_CENTERED: PBS_MS_REDUCTION_T = 1;
 pub type PBS_MS_REDUCTION_T = ffi::c_uint;
-#[repr(C)]
-#[derive(Debug, Copy, Clone)]
-pub struct CudaModulusSwitchNoiseReductionKeyFFI {
-    pub ptr: *const *mut ffi::c_void,
-    pub num_zeros: u32,
-    pub ms_bound: f64,
-    pub ms_r_sigma: f64,
-    pub ms_input_variance: f64,
-}
-#[allow(clippy::unnecessary_operation, clippy::identity_op)]
-const _: () = {
-    ["Size of CudaModulusSwitchNoiseReductionKeyFFI"]
-        [::std::mem::size_of::<CudaModulusSwitchNoiseReductionKeyFFI>() - 40usize];
-    ["Alignment of CudaModulusSwitchNoiseReductionKeyFFI"]
-        [::std::mem::align_of::<CudaModulusSwitchNoiseReductionKeyFFI>() - 8usize];
-    ["Offset of field: CudaModulusSwitchNoiseReductionKeyFFI::ptr"]
-        [::std::mem::offset_of!(CudaModulusSwitchNoiseReductionKeyFFI, ptr) - 0usize];
-    ["Offset of field: CudaModulusSwitchNoiseReductionKeyFFI::num_zeros"]
-        [::std::mem::offset_of!(CudaModulusSwitchNoiseReductionKeyFFI, num_zeros) - 8usize];
-    ["Offset of field: CudaModulusSwitchNoiseReductionKeyFFI::ms_bound"]
-        [::std::mem::offset_of!(CudaModulusSwitchNoiseReductionKeyFFI, ms_bound) - 16usize];
-    ["Offset of field: CudaModulusSwitchNoiseReductionKeyFFI::ms_r_sigma"]
-        [::std::mem::offset_of!(CudaModulusSwitchNoiseReductionKeyFFI, ms_r_sigma) - 24usize];
-    ["Offset of field: CudaModulusSwitchNoiseReductionKeyFFI::ms_input_variance"][::std::mem::offset_of!(
-        CudaModulusSwitchNoiseReductionKeyFFI,
-        ms_input_variance
-    ) - 32usize];
-};
 pub const SHIFT_OR_ROTATE_TYPE_LEFT_SHIFT: SHIFT_OR_ROTATE_TYPE = 0;
 pub const SHIFT_OR_ROTATE_TYPE_RIGHT_SHIFT: SHIFT_OR_ROTATE_TYPE = 1;
 pub const SHIFT_OR_ROTATE_TYPE_LEFT_ROTATE: SHIFT_OR_ROTATE_TYPE = 2;
@@ -382,7 +353,6 @@ unsafe extern "C" {
        input_radix_lwe: *const CudaRadixCiphertextFFI,
        mem_ptr: *mut i8,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
        bsks: *const *mut ffi::c_void,
    );
 }
@@ -422,7 +392,6 @@ unsafe extern "C" {
        input_radix_lwe_2: *const CudaRadixCiphertextFFI,
        mem_ptr: *mut i8,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
        bsks: *const *mut ffi::c_void,
        num_radix_blocks: u32,
        shift: u32,
@@ -441,7 +410,6 @@ unsafe extern "C" {
        input_radix_lwe: *const CudaRadixCiphertextFFI,
        mem_ptr: *mut i8,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
        bsks: *const *mut ffi::c_void,
        num_luts: u32,
        lut_stride: u32,
@@ -472,7 +440,6 @@ unsafe extern "C" {
        input_blocks: *mut CudaRadixCiphertextFFI,
        mem_ptr: *mut i8,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
        bsks: *const *mut ffi::c_void,
        num_blocks: u32,
    );
@@ -512,7 +479,6 @@ unsafe extern "C" {
        is_bool_right: bool,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
        mem_ptr: *mut i8,
        polynomial_size: u32,
        num_blocks: u32,
@@ -572,7 +538,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
    );
 }
 unsafe extern "C" {
@@ -605,7 +570,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
    );
 }
 unsafe extern "C" {
@@ -651,7 +615,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
    );
 }
 unsafe extern "C" {
@@ -692,7 +655,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
    );
 }
 unsafe extern "C" {
@@ -705,7 +667,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
        num_scalar_blocks: u32,
    );
 }
@@ -743,7 +704,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
    );
 }
 unsafe extern "C" {
@@ -757,7 +717,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
    );
 }
 unsafe extern "C" {
@@ -794,7 +753,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
    );
 }
 unsafe extern "C" {
@@ -830,7 +788,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
    );
 }
 unsafe extern "C" {
@@ -892,7 +849,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
        requested_flag: u32,
        uses_carry: u32,
    );
@@ -907,7 +863,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
        requested_flag: u32,
        uses_carry: u32,
    );
@@ -953,7 +908,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
        compute_overflow: u32,
        uses_input_borrow: u32,
    );
@@ -994,7 +948,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
    );
 }
 unsafe extern "C" {
@@ -1033,7 +986,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
        polynomial_size: u32,
        message_modulus: u32,
        num_scalars: u32,
@@ -1078,7 +1030,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
    );
 }
 unsafe extern "C" {
@@ -1113,7 +1064,6 @@ unsafe extern "C" {
        generates_or_propagates: *mut CudaRadixCiphertextFFI,
        mem_ptr: *mut i8,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
        bsks: *const *mut ffi::c_void,
        num_blocks: u32,
    );
@@ -1160,7 +1110,6 @@ unsafe extern "C" {
        is_signed: bool,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
    );
 }
 unsafe extern "C" {
@@ -1195,7 +1144,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
        num_radix_blocks: u32,
    );
 }
@@ -1234,7 +1182,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
        num_radix_blocks: u32,
    );
 }
@@ -1288,7 +1235,6 @@ unsafe extern "C" {
        input_radix_lwe: *const CudaRadixCiphertextFFI,
        mem_ptr: *mut i8,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
        bsks: *const *mut ffi::c_void,
    );
 }
@@ -1330,7 +1276,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
        requested_flag: u32,
        uses_carry: u32,
    );
@@ -1369,7 +1314,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
        scalar_divisor_ffi: *const CudaScalarDivisorFFI,
    );
 }
@@ -1409,7 +1353,6 @@ unsafe extern "C" {
        num_additional_blocks: u32,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
    );
 }
 unsafe extern "C" {
@@ -1446,7 +1389,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
        scalar_divisor_ffi: *const CudaScalarDivisorFFI,
        numerator_bits: u32,
    );
@@ -1487,7 +1429,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
        scalar_divisor_ffi: *const CudaScalarDivisorFFI,
        divisor_has_at_least_one_set: *const u64,
        decomposed_divisor: *const u64,
@@ -1533,7 +1474,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
        scalar_divisor_ffi: *const CudaScalarDivisorFFI,
        divisor_has_at_least_one_set: *const u64,
        decomposed_divisor: *const u64,
@@ -1578,7 +1518,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
    );
 }
 unsafe extern "C" {
@@ -1617,7 +1556,6 @@ unsafe extern "C" {
        num_blocks_to_process: u32,
        mem: *mut i8,
        bsks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
    );
 }
 unsafe extern "C" {
@@ -1659,7 +1597,6 @@ unsafe extern "C" {
        mem_ptr: *mut i8,
        bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
    );
 }
 unsafe extern "C" {
@@ -1832,7 +1769,6 @@ unsafe extern "C" {
        bsks: *const *mut ffi::c_void,
        computing_ksks: *const *mut ffi::c_void,
        casting_keys: *const *mut ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
    );
 }
 unsafe extern "C" {
@@ -2300,8 +2236,6 @@ unsafe extern "C" {
        lwe_array_in: *const ffi::c_void,
        lwe_input_indexes: *const ffi::c_void,
        bootstrapping_key: *const ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
-        ms_noise_reduction_ptr: *mut ffi::c_void,
        buffer: *mut i8,
        lwe_dimension: u32,
        glwe_dimension: u32,
@@ -2321,8 +2255,6 @@ unsafe extern "C" {
        lut_vector: *const ffi::c_void,
        lwe_array_in: *const ffi::c_void,
        bootstrapping_key: *const ffi::c_void,
-        ms_noise_reduction_key: *const CudaModulusSwitchNoiseReductionKeyFFI,
-        ms_noise_reduction_ptr: *const ffi::c_void,
        buffer: *mut i8,
        lwe_dimension: u32,
        glwe_dimension: u32,
--- a/tfhe-benchmark/benches/core_crypto/pbs128_bench.rs
+++ b/tfhe-benchmark/benches/core_crypto/pbs128_bench.rs
@@ -178,14 +178,12 @@ mod cuda {
        cuda_programmable_bootstrap_128_lwe_ciphertext, get_number_of_gpus, CudaStreams,
    };
    use tfhe::core_crypto::prelude::*;
-    use tfhe::shortint::engine::ShortintEngine;
    use tfhe::shortint::parameters::{
        ModulusSwitchType, NoiseSquashingParameters,
        NOISE_SQUASHING_PARAM_GPU_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
        NOISE_SQUASHING_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
        PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
    };
-    use tfhe::shortint::server_key::ModulusSwitchNoiseReductionKey;

    fn cuda_pbs_128(c: &mut Criterion) {
        let bench_name = "core_crypto::cuda::pbs128";
@@ -237,29 +235,20 @@ mod cuda {
            squash_params.ciphertext_modulus,
        );

-        let mut engine = ShortintEngine::new();
        let streams = CudaStreams::new_multi_gpu();

-        let modulus_switch_noise_reduction_configuration = match squash_params
-            .modulus_switch_noise_reduction_params
-        {
-            ModulusSwitchType::Standard => None,
-            ModulusSwitchType::DriftTechniqueNoiseReduction(
-                modulus_switch_noise_reduction_params,
-            ) => {
-                let mod_redkey = ModulusSwitchNoiseReductionKey::new(
-                    modulus_switch_noise_reduction_params,
-                    &input_lwe_secret_key,
-                    &mut engine,
-                    input_params.ciphertext_modulus,
-                    input_params.lwe_noise_distribution,
-                );
-                Some(CudaModulusSwitchNoiseReductionConfiguration::from_modulus_switch_noise_reduction_key(&mod_redkey, &streams))
-            }
-            ModulusSwitchType::CenteredMeanNoiseReduction => {
-                Some(CudaModulusSwitchNoiseReductionConfiguration::Centered)
-            }
-        };
+        let modulus_switch_noise_reduction_configuration =
+            match squash_params.modulus_switch_noise_reduction_params {
+                ModulusSwitchType::Standard => None,
+                ModulusSwitchType::DriftTechniqueNoiseReduction(
+                    _modulus_switch_noise_reduction_params,
+                ) => {
+                    panic!("Drift noise reduction is not supported on GPU")
+                }
+                ModulusSwitchType::CenteredMeanNoiseReduction => {
+                    Some(CudaModulusSwitchNoiseReductionConfiguration::Centered)
+                }
+            };

        let cpu_keys: CpuKeys<_> = CpuKeysBuilder::new().bootstrap_key(bsk).build();

--- a/tfhe/src/core_crypto/gpu/algorithms/lwe_programmable_bootstrapping.rs
+++ b/tfhe/src/core_crypto/gpu/algorithms/lwe_programmable_bootstrapping.rs
@@ -127,7 +127,6 @@ pub unsafe fn cuda_programmable_bootstrap_lwe_ciphertext_async<Scalar>(
    );

    let lwe_dimension = input.lwe_dimension();
-    let ct_modulus = input.ciphertext_modulus().raw_modulus_float();
    let num_samples = input.lwe_ciphertext_count();
    programmable_bootstrap_async(
        streams,
@@ -145,7 +144,6 @@ pub unsafe fn cuda_programmable_bootstrap_lwe_ciphertext_async<Scalar>(
        bsk.decomp_level_count(),
        num_samples.0 as u32,
        bsk.ms_noise_reduction_configuration.as_ref(),
-        ct_modulus,
    );
 }

@@ -252,7 +250,6 @@ pub unsafe fn cuda_programmable_bootstrap_128_lwe_ciphertext_async<Scalar>(
        accumulator.0.d_vec.gpu_index(0).get(),
    );
    let lwe_dimension = input.lwe_dimension();
-    let ct_modulus = input.ciphertext_modulus().raw_modulus_float();
    let num_samples = input.lwe_ciphertext_count();
    programmable_bootstrap_128_async(
        streams,
@@ -267,7 +264,6 @@ pub unsafe fn cuda_programmable_bootstrap_128_lwe_ciphertext_async<Scalar>(
        bsk.decomp_level_count(),
        num_samples.0 as u32,
        bsk.ms_noise_reduction_configuration.as_ref(),
-        ct_modulus,
    );
 }

--- a/tfhe/src/core_crypto/gpu/algorithms/test/lwe_programmable_bootstrapping_128.rs
+++ b/tfhe/src/core_crypto/gpu/algorithms/test/lwe_programmable_bootstrapping_128.rs
@@ -22,9 +22,7 @@ use crate::core_crypto::prelude::{
    GlweCiphertextOwned, GlweSecretKey, LweCiphertextCount, LweCiphertextOwned, LweSecretKey,
    Plaintext, SignedDecomposer, UnsignedTorus,
 };
-use crate::shortint::engine::ShortintEngine;
 use crate::shortint::parameters::{ModulusSwitchType, NoiseSquashingParameters};
-use crate::shortint::server_key::ModulusSwitchNoiseReductionKey;
 use crate::shortint::MultiBitPBSParameters;
 use serde::de::DeserializeOwned;
 use serde::Serialize;
@@ -114,7 +112,6 @@ pub fn execute_bootstrap_u128(
            .collect::<Vec<_>>(),
    );

-    let mut engine = ShortintEngine::new();
    let gpu_index = 0;
    let stream = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));

@@ -122,15 +119,8 @@ pub fn execute_bootstrap_u128(
        .modulus_switch_noise_reduction_params
    {
        ModulusSwitchType::Standard => None,
-        ModulusSwitchType::DriftTechniqueNoiseReduction(modulus_switch_noise_reduction_params) => {
-            let ms_red_key = ModulusSwitchNoiseReductionKey::new(
-                modulus_switch_noise_reduction_params,
-                &input_lwe_secret_key,
-                &mut engine,
-                input_params.ciphertext_modulus,
-                input_params.lwe_noise_distribution,
-            );
-            Some(CudaModulusSwitchNoiseReductionConfiguration::from_modulus_switch_noise_reduction_key(&ms_red_key,&stream))
+        ModulusSwitchType::DriftTechniqueNoiseReduction(_modulus_switch_noise_reduction_params) => {
+            panic!("Drift noise reduction is not supported on GPU")
        }
        ModulusSwitchType::CenteredMeanNoiseReduction => {
            Some(CudaModulusSwitchNoiseReductionConfiguration::Centered)
--- a/tfhe/src/core_crypto/gpu/algorithms/test/mod.rs
+++ b/tfhe/src/core_crypto/gpu/algorithms/test/mod.rs
@@ -12,7 +12,6 @@ mod lwe_packing_keyswitch;
 mod lwe_programmable_bootstrapping;
 mod lwe_programmable_bootstrapping_128;
 mod modulus_switch;
-mod modulus_switch_noise_reduction;
 mod noise_distribution;

 pub struct CudaPackingKeySwitchKeys<Scalar: UnsignedInteger> {
--- a/tfhe/src/core_crypto/gpu/algorithms/test/modulus_switch_noise_reduction.rs
+++ b/tfhe/src/core_crypto/gpu/algorithms/test/modulus_switch_noise_reduction.rs
@@ -1,273 +0,0 @@
-use super::super::test::TestResources;
-use crate::core_crypto::commons::test_tools::{arithmetic_mean, check_both_ratio_under, variance};
-use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
-use crate::core_crypto::gpu::{CudaStreams, CudaVec};
-use crate::core_crypto::prelude::*;
-
-use crate::core_crypto::gpu::GpuIndex;
-use rayon::iter::{IntoParallelIterator, ParallelIterator};
-use std::cell::RefCell;
-use tfhe_cuda_backend::bindings::{
-    cuda_improve_noise_modulus_switch_64, cuda_modulus_switch_inplace_64,
-};
-
-#[derive(Copy, Clone)]
-struct MsNoiseReductionTestParams {
-    pub lwe_dimension: LweDimension,
-    pub lwe_noise_distribution: DynamicDistribution<u64>,
-    pub ciphertext_modulus: CiphertextModulus<u64>,
-    pub modulus_switch_zeros_count: LweCiphertextCount,
-    pub bound: NoiseEstimationMeasureBound,
-    pub r_sigma_factor: RSigmaFactor,
-    pub input_variance: Variance,
-    pub log_modulus: CiphertextModulusLog,
-    pub expected_variance_improved: Variance,
-}
-
-const TEST_PARAM: MsNoiseReductionTestParams = MsNoiseReductionTestParams {
-    lwe_dimension: LweDimension(918),
-    lwe_noise_distribution: DynamicDistribution::new_t_uniform(45),
-    ciphertext_modulus: CiphertextModulus::new_native(),
-    modulus_switch_zeros_count: LweCiphertextCount(1449),
-    bound: NoiseEstimationMeasureBound(288230376151711744_f64),
-    r_sigma_factor: RSigmaFactor(13.179852282053789f64),
-    log_modulus: PolynomialSize(2048).to_blind_rotation_input_modulus_log(),
-    expected_variance_improved: Variance(1.40546154228955e-6),
-    input_variance: Variance(2.63039184094559e-7f64),
-};
-
-thread_local! {
-    static TEST_RESOURCES: RefCell<TestResources> = {
-        RefCell::new(TestResources::new())
-    }
-}
-
-fn round_mask_gpu(
-    ct: &mut LweCiphertext<Vec<u64>>,
-    d_ct: &mut CudaLweCiphertextList<u64>,
-    log_modulus: CiphertextModulusLog,
-    lwe_dimension: LweDimension,
-
-    streams: &CudaStreams,
-) {
-    let shift_to_map_to_native = u64::BITS - log_modulus.0 as u32;
-
-    unsafe {
-        //Here i call it with lwe_dimension cause i don't want to change the body
-        cuda_modulus_switch_inplace_64(
-            streams.ptr[0],
-            streams.gpu_indexes[0].get(),
-            d_ct.0.d_vec.as_mut_c_ptr(0),
-            lwe_dimension.0 as u32,
-            log_modulus.0 as u32,
-        );
-    }
-    streams.synchronize();
-    let cpu_lwe_list = d_ct.to_lwe_ciphertext_list(streams);
-
-    let mut ct_after_ms =
-        LweCiphertext::from_container(cpu_lwe_list.into_container(), ct.ciphertext_modulus());
-
-    for val in ct_after_ms.get_mut_mask().as_mut() {
-        *val <<= shift_to_map_to_native;
-    }
-
-    *ct = ct_after_ms;
-}
-
-fn measure_noise_added_by_message_preserving_operation<C1, C2>(
-    sk: &LweSecretKey<C1>,
-    mut ct: LweCiphertext<C2>,
-    message_preserving_operation: impl Fn(&mut LweCiphertext<C2>),
-) -> f64
-where
-    C1: Container<Element = u64>,
-    C2: ContainerMut<Element = u64>,
-{
-    let decrypted_before = decrypt_lwe_ciphertext(sk, &ct);
-
-    message_preserving_operation(&mut ct);
-
-    let decrypted_after = decrypt_lwe_ciphertext(sk, &ct);
-
-    decrypted_after.0.wrapping_sub(decrypted_before.0) as i64 as f64
-}
-
-#[test]
-fn check_noise_improve_modulus_switch_noise_test_param() {
-    check_noise_improve_modulus_switch_noise(TEST_PARAM);
-}
-
-fn check_noise_improve_modulus_switch_noise(
-    ms_noise_reduction_test_params: MsNoiseReductionTestParams,
-) {
-    let MsNoiseReductionTestParams {
-        lwe_dimension,
-        lwe_noise_distribution,
-        ciphertext_modulus,
-        modulus_switch_zeros_count,
-        bound,
-        r_sigma_factor,
-        log_modulus,
-        expected_variance_improved,
-        input_variance,
-    } = ms_noise_reduction_test_params;
-
-    let number_loops = 100_000;
-
-    let mut rsc = TestResources::new();
-
-    let mut sk = LweSecretKeyOwned::new_empty_key(0, lwe_dimension);
-
-    for sk_bit in sk.as_mut().iter_mut().step_by(2) {
-        *sk_bit = 1;
-    }
-
-    let sk_average_bit: f64 =
-        sk.as_view().into_container().iter().sum::<u64>() as f64 / sk.lwe_dimension().0 as f64;
-
-    println!("sk_average_bit {sk_average_bit:.3}");
-
-    let plaintext_list = PlaintextList::new(0, PlaintextCount(modulus_switch_zeros_count.0));
-
-    let mut encryptions_of_zero = LweCiphertextList::new(
-        0,
-        lwe_dimension.to_lwe_size(),
-        modulus_switch_zeros_count,
-        ciphertext_modulus,
-    );
-
-    encrypt_lwe_ciphertext_list(
-        &sk,
-        &mut encryptions_of_zero,
-        &plaintext_list,
-        lwe_noise_distribution,
-        &mut rsc.encryption_random_generator,
-    );
-
-    let gpu_index = 0;
-    let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
-    let num_blocks = 1;
-    let lwe_indexes: Vec<u64> = (0..num_blocks).map(|x| x as u64).collect();
-    let mut d_input_indexes = unsafe { CudaVec::<u64>::new_async(num_blocks, &streams, 0) };
-    unsafe { d_input_indexes.copy_from_cpu_async(&lwe_indexes, &streams, 0) };
-
-    let d_encryptions_of_zero = CudaLweCiphertextList::from_lwe_ciphertext_list(
-        &encryptions_of_zero,
-        &CudaStreams::new_single_gpu(GpuIndex::new(0)),
-    );
-    let num_streams = 16;
-    let vec_streams = (0..num_streams)
-        .map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
-        .collect::<Vec<_>>();
-    let (ms_errors, ms_errors_improved): (Vec<_>, Vec<_>) = (0..number_loops)
-        .into_par_iter()
-        .map(|index| {
-            let stream_index = index % num_streams as usize;
-            let local_stream = &vec_streams[stream_index];
-            let lwe = TEST_RESOURCES.with(|rsc| {
-                allocate_and_encrypt_new_lwe_ciphertext(
-                    &sk,
-                    Plaintext(0),
-                    lwe_noise_distribution,
-                    ciphertext_modulus,
-                    &mut rsc.borrow_mut().encryption_random_generator,
-                )
-            });
-
-            (
-                measure_noise_added_by_message_preserving_operation(&sk, lwe.clone(), |ct| {
-                    let mut d_ct = CudaLweCiphertextList::from_lwe_ciphertext(ct, local_stream);
-                    round_mask_gpu(ct, &mut d_ct, log_modulus, lwe_dimension, local_stream);
-                }),
-                measure_noise_added_by_message_preserving_operation(&sk, lwe.clone(), |ct| {
-                    let mut d_ct = CudaLweCiphertextList::from_lwe_ciphertext(ct, local_stream);
-                    let d_ct_in = CudaLweCiphertextList::from_lwe_ciphertext(ct, local_stream);
-                    let modulus = lwe.ciphertext_modulus().raw_modulus_float();
-                    unsafe {
-                        cuda_improve_noise_modulus_switch_64(
-                            local_stream.ptr[0],
-                            streams.gpu_indexes[0].get(),
-                            d_ct.0.d_vec.as_mut_c_ptr(0),
-                            d_ct_in.0.d_vec.as_c_ptr(0),
-                            d_input_indexes.as_c_ptr(0),
-                            d_encryptions_of_zero.0.d_vec.as_c_ptr(0),
-                            lwe_dimension.to_lwe_size().0 as u32,
-                            d_ct.lwe_ciphertext_count().0 as u32,
-                            d_encryptions_of_zero.lwe_ciphertext_count().0 as u32,
-                            input_variance.get_modular_variance(modulus).value,
-                            r_sigma_factor.0,
-                            bound.0,
-                            log_modulus.0 as u32,
-                        );
-                    }
-
-                    round_mask_gpu(ct, &mut d_ct, log_modulus, lwe_dimension, local_stream);
-                }),
-            )
-        })
-        .unzip();
-
-    let ms_error_arithmetic_mean = arithmetic_mean(&ms_errors);
-
-    println!(
-        "arithmetic_mean(&ms_errors)                     {}2^{:.2}",
-        if ms_error_arithmetic_mean > 0_f64 {
-            "+"
-        } else {
-            "-"
-        },
-        ms_error_arithmetic_mean.abs().log2()
-    );
-
-    let ms_error_improved_arithmetic_mean = arithmetic_mean(&ms_errors_improved);
-
-    println!(
-        "arithmetic_mean(&ms_errors_improved)            {}2^{:.2}",
-        if ms_error_improved_arithmetic_mean > 0_f64 {
-            "+"
-        } else {
-            "-"
-        },
-        ms_error_improved_arithmetic_mean.abs().log2()
-    );
-
-    let base_variance = variance(&ms_errors).0;
-
-    println!(
-        "variance(&ms_errors),                    2^{:.2}",
-        base_variance.log2(),
-    );
-
-    let variance_improved = variance(&ms_errors_improved).0;
-
-    println!(
-        "variance(&ms_errors_improved)            2^{:.2}, ratio: {:.3}",
-        variance_improved.log2(),
-        variance_improved / base_variance,
-    );
-
-    let modulus = ciphertext_modulus.raw_modulus_float();
-
-    let expected_base_variance = {
-        let lwe_dim = lwe_dimension.0 as f64;
-
-        let poly_size = 2_f64.powi((log_modulus.0 - 1) as i32);
-
-        (lwe_dim + 2.) * modulus * modulus / (96. * poly_size * poly_size) + (lwe_dim - 4.) / 48.
-    };
-
-    assert!(
-        check_both_ratio_under(base_variance, expected_base_variance, 1.03_f64),
-        "Expected {expected_base_variance}, got {base_variance}",
-    );
-
-    let expected_variance_improved = Variance(expected_variance_improved.0 - input_variance.0)
-        .get_modular_variance(modulus)
-        .value;
-
-    assert!(
-        check_both_ratio_under(variance_improved, expected_variance_improved, 1.03_f64),
-        "Expected {expected_variance_improved}, got {variance_improved}",
-    );
-}
--- a/tfhe/src/core_crypto/gpu/entities/lwe_bootstrap_key.rs
+++ b/tfhe/src/core_crypto/gpu/entities/lwe_bootstrap_key.rs
@@ -1,85 +1,14 @@
-use crate::core_crypto::commons::dispersion::DispersionParameter;
 use crate::core_crypto::gpu::vec::CudaVec;
-use crate::core_crypto::gpu::{
-    convert_lwe_programmable_bootstrap_key_async, CudaModulusSwitchNoiseReductionKeyFFI,
-    CudaStreams,
-};
+use crate::core_crypto::gpu::{convert_lwe_programmable_bootstrap_key_async, CudaStreams};
 use crate::core_crypto::prelude::{
    lwe_bootstrap_key_size, Container, DecompositionBaseLog, DecompositionLevelCount,
-    GlweDimension, LweBootstrapKey, LweDimension, NoiseEstimationMeasureBound, PolynomialSize,
-    RSigmaFactor, UnsignedInteger, Variance,
+    GlweDimension, LweBootstrapKey, LweDimension, PolynomialSize, UnsignedInteger,
 };
-use crate::shortint::server_key::ModulusSwitchNoiseReductionKey;
-#[derive(Clone, Debug)]
-#[allow(dead_code)]
-pub struct CudaModulusSwitchDriftNoiseReductionKey {
-    pub modulus_switch_zeros: CudaVec<u64>,
-    pub ms_bound: NoiseEstimationMeasureBound,
-    pub ms_r_sigma_factor: RSigmaFactor,
-    pub ms_input_variance: Variance,
-    pub num_zeros: u32,
-}
 #[derive(Clone, Debug)]
 pub enum CudaModulusSwitchNoiseReductionConfiguration {
-    Drift(CudaModulusSwitchDriftNoiseReductionKey),
    Centered,
 }

-impl CudaModulusSwitchNoiseReductionConfiguration {
-    pub fn from_modulus_switch_noise_reduction_key(
-        ms_noise_red_key: &ModulusSwitchNoiseReductionKey<u64>,
-        streams: &CudaStreams,
-    ) -> Self {
-        let h_input = ms_noise_red_key
-            .modulus_switch_zeros
-            .as_view()
-            .into_container();
-        let lwe_ciphertext_count = ms_noise_red_key.modulus_switch_zeros.lwe_ciphertext_count();
-
-        let mut d_zeros_vec = CudaVec::new_multi_gpu(
-            ms_noise_red_key.modulus_switch_zeros.lwe_size().0 * lwe_ciphertext_count.0,
-            streams,
-        );
-
-        unsafe {
-            d_zeros_vec.copy_from_cpu_multi_gpu_async(h_input, streams);
-        }
-
-        streams.synchronize();
-        Self::Drift(CudaModulusSwitchDriftNoiseReductionKey {
-            modulus_switch_zeros: d_zeros_vec,
-            num_zeros: ms_noise_red_key
-                .modulus_switch_zeros
-                .lwe_ciphertext_count()
-                .0 as u32,
-            ms_bound: ms_noise_red_key.ms_bound,
-            ms_r_sigma_factor: ms_noise_red_key.ms_r_sigma_factor,
-            ms_input_variance: ms_noise_red_key.ms_input_variance,
-        })
-    }
-}
-
-pub fn prepare_cuda_ms_noise_reduction_key_ffi(
-    input_ms_key: Option<&CudaModulusSwitchDriftNoiseReductionKey>,
-    modulus: f64,
-) -> CudaModulusSwitchNoiseReductionKeyFFI {
-    input_ms_key.map_or(
-        CudaModulusSwitchNoiseReductionKeyFFI {
-            ptr: std::ptr::null_mut(),
-            num_zeros: 0,
-            ms_bound: 0.0,
-            ms_r_sigma: 0.0,
-            ms_input_variance: 0.0,
-        },
-        |ms_key| CudaModulusSwitchNoiseReductionKeyFFI {
-            ptr: ms_key.modulus_switch_zeros.ptr.as_ptr(),
-            num_zeros: ms_key.num_zeros,
-            ms_bound: ms_key.ms_bound.0,
-            ms_r_sigma: ms_key.ms_r_sigma_factor.0,
-            ms_input_variance: ms_key.ms_input_variance.get_modular_variance(modulus).value,
-        },
-    )
-}
 /// A structure representing a vector of GLWE ciphertexts with 64 bits of precision on the GPU.
 #[derive(Debug)]
 #[allow(dead_code)]
--- a/tfhe/src/core_crypto/gpu/mod.rs
+++ b/tfhe/src/core_crypto/gpu/mod.rs
@@ -3,9 +3,7 @@ pub mod entities;
 pub mod slice;
 pub mod vec;

-use crate::core_crypto::gpu::lwe_bootstrap_key::{
-    prepare_cuda_ms_noise_reduction_key_ffi, CudaModulusSwitchNoiseReductionConfiguration,
-};
+use crate::core_crypto::gpu::lwe_bootstrap_key::CudaModulusSwitchNoiseReductionConfiguration;
 use crate::core_crypto::gpu::vec::{CudaVec, GpuIndex};
 use crate::core_crypto::prelude::{
    CiphertextModulus, DecompositionBaseLog, DecompositionLevelCount, GlweCiphertextCount,
@@ -30,7 +28,6 @@ unsafe impl Sync for CudaStreams {}

 pub enum PBSMSNoiseReductionType {
    NoReduction = PBS_MS_REDUCTION_T_NO_REDUCTION as isize,
-    Drift = PBS_MS_REDUCTION_T_DRIFT as isize,
    Centered = PBS_MS_REDUCTION_T_CENTERED as isize,
 }

@@ -160,32 +157,16 @@ pub unsafe fn programmable_bootstrap_async<T: UnsignedInteger>(
    level: DecompositionLevelCount,
    num_samples: u32,
    ms_noise_reduction_configuration: Option<&CudaModulusSwitchNoiseReductionConfiguration>,
-    ct_modulus: f64,
 ) {
    let num_many_lut = 1u32;
    let lut_stride = 0u32;
    let mut pbs_buffer: *mut i8 = std::ptr::null_mut();

    // Initializes as NoReduction and change variables later if otherwise
-    let mut noise_reduction_type = PBSMSNoiseReductionType::NoReduction;
-    let mut ms_noise_reduction_key_ffi = prepare_cuda_ms_noise_reduction_key_ffi(None, ct_modulus);
-    let mut ms_noise_reduction_ptr = std::ptr::null_mut();
-
-    match ms_noise_reduction_configuration {
-        None => {
-            noise_reduction_type = PBSMSNoiseReductionType::NoReduction;
-        }
-        Some(config) => match config {
-            CudaModulusSwitchNoiseReductionConfiguration::Drift(noise_reduction_key) => {
-                ms_noise_reduction_key_ffi =
-                    prepare_cuda_ms_noise_reduction_key_ffi(Some(noise_reduction_key), ct_modulus);
-                ms_noise_reduction_ptr = noise_reduction_key.modulus_switch_zeros.ptr[0];
-            }
-            CudaModulusSwitchNoiseReductionConfiguration::Centered => {
-                noise_reduction_type = PBSMSNoiseReductionType::Centered;
-            }
-        },
-    }
+    let noise_reduction_type = ms_noise_reduction_configuration
+        .map_or(PBSMSNoiseReductionType::NoReduction, |_config| {
+            PBSMSNoiseReductionType::Centered
+        });

    scratch_cuda_programmable_bootstrap_64(
        streams.ptr[0],
@@ -210,8 +191,6 @@ pub unsafe fn programmable_bootstrap_async<T: UnsignedInteger>(
        lwe_array_in.as_c_ptr(0),
        lwe_in_indexes.as_c_ptr(0),
        bootstrapping_key.as_c_ptr(0),
-        &raw const ms_noise_reduction_key_ffi,
-        ms_noise_reduction_ptr,
        pbs_buffer,
        lwe_dimension.0 as u32,
        glwe_dimension.0 as u32,
@@ -241,16 +220,9 @@ pub fn get_programmable_bootstrap_size_on_gpu(
    ms_noise_reduction_configuration: Option<&CudaModulusSwitchNoiseReductionConfiguration>,
 ) -> u64 {
    let mut pbs_buffer: *mut i8 = std::ptr::null_mut();
-    let noise_reduction_type =
-        ms_noise_reduction_configuration.map_or(PBSMSNoiseReductionType::NoReduction, |config| {
-            match config {
-                CudaModulusSwitchNoiseReductionConfiguration::Drift(_) => {
-                    PBSMSNoiseReductionType::Drift
-                }
-                CudaModulusSwitchNoiseReductionConfiguration::Centered => {
-                    PBSMSNoiseReductionType::Centered
-                }
-            }
+    let noise_reduction_type = ms_noise_reduction_configuration
+        .map_or(PBSMSNoiseReductionType::NoReduction, |_config| {
+            PBSMSNoiseReductionType::Centered
        });
    let size_tracker = unsafe {
        scratch_cuda_programmable_bootstrap_64(
@@ -297,30 +269,14 @@ pub unsafe fn programmable_bootstrap_128_async<T: UnsignedInteger>(
    level: DecompositionLevelCount,
    num_samples: u32,
    ms_noise_reduction_configuration: Option<&CudaModulusSwitchNoiseReductionConfiguration>,
-    ct_modulus: f64,
 ) {
    let mut pbs_buffer: *mut i8 = std::ptr::null_mut();

    // Initializes as NoReduction and change variables later if otherwise
-    let mut noise_reduction_type = PBSMSNoiseReductionType::NoReduction;
-    let mut ms_noise_reduction_key_ffi = prepare_cuda_ms_noise_reduction_key_ffi(None, ct_modulus);
-    let mut ms_noise_reduction_ptr = std::ptr::null_mut();
-
-    match ms_noise_reduction_configuration {
-        None => {
-            noise_reduction_type = PBSMSNoiseReductionType::NoReduction;
-        }
-        Some(config) => match config {
-            CudaModulusSwitchNoiseReductionConfiguration::Drift(noise_reduction_key) => {
-                ms_noise_reduction_key_ffi =
-                    prepare_cuda_ms_noise_reduction_key_ffi(Some(noise_reduction_key), ct_modulus);
-                ms_noise_reduction_ptr = noise_reduction_key.modulus_switch_zeros.ptr[0];
-            }
-            CudaModulusSwitchNoiseReductionConfiguration::Centered => {
-                noise_reduction_type = PBSMSNoiseReductionType::Centered;
-            }
-        },
-    }
+    let noise_reduction_type = ms_noise_reduction_configuration
+        .map_or(PBSMSNoiseReductionType::NoReduction, |_config| {
+            PBSMSNoiseReductionType::Centered
+        });

    scratch_cuda_programmable_bootstrap_128(
        streams.ptr[0],
@@ -342,8 +298,6 @@ pub unsafe fn programmable_bootstrap_128_async<T: UnsignedInteger>(
        test_vector.as_c_ptr(0),
        lwe_array_in.as_c_ptr(0),
        bootstrapping_key.as_c_ptr(0),
-        &raw const ms_noise_reduction_key_ffi,
-        ms_noise_reduction_ptr,
        pbs_buffer,
        lwe_dimension.0 as u32,
        glwe_dimension.0 as u32,
--- a/tfhe/src/integer/gpu/mod.rs
+++ b/tfhe/src/integer/gpu/mod.rs
@@ -7,9 +7,7 @@ pub mod server_key;
 #[cfg(feature = "zk-pok")]
 pub mod zk;

-use crate::core_crypto::gpu::lwe_bootstrap_key::{
-    prepare_cuda_ms_noise_reduction_key_ffi, CudaModulusSwitchNoiseReductionConfiguration,
-};
+use crate::core_crypto::gpu::lwe_bootstrap_key::CudaModulusSwitchNoiseReductionConfiguration;
 use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
 use crate::core_crypto::gpu::slice::{CudaSlice, CudaSliceMut};
 use crate::core_crypto::gpu::vec::CudaVec;
@@ -82,9 +80,6 @@ fn resolve_noise_reduction_type(
 ) -> PBSMSNoiseReductionType {
    ms_noise_reduction_configuration.map_or(PBSMSNoiseReductionType::NoReduction, |config| {
        match config {
-            CudaModulusSwitchNoiseReductionConfiguration::Drift(_) => {
-                PBSMSNoiseReductionType::Drift
-            }
            CudaModulusSwitchNoiseReductionConfiguration::Centered => {
                PBSMSNoiseReductionType::Centered
            }
@@ -94,27 +89,13 @@ fn resolve_noise_reduction_type(

 fn resolve_ms_noise_reduction_config(
    ms_noise_reduction_configuration: Option<&CudaModulusSwitchNoiseReductionConfiguration>,
-    ct_modulus: f64,
-) -> (
-    PBSMSNoiseReductionType,
-    CudaModulusSwitchNoiseReductionKeyFFI,
-) {
+) -> PBSMSNoiseReductionType {
    ms_noise_reduction_configuration.map_or_else(
-        || {
-            (
-                PBSMSNoiseReductionType::NoReduction,
-                prepare_cuda_ms_noise_reduction_key_ffi(None, ct_modulus),
-            )
-        },
+        || PBSMSNoiseReductionType::NoReduction,
        |config| match config {
-            CudaModulusSwitchNoiseReductionConfiguration::Drift(noise_reduction_key) => (
-                PBSMSNoiseReductionType::Drift,
-                prepare_cuda_ms_noise_reduction_key_ffi(Some(noise_reduction_key), ct_modulus),
-            ),
-            CudaModulusSwitchNoiseReductionConfiguration::Centered => (
-                PBSMSNoiseReductionType::Centered,
-                prepare_cuda_ms_noise_reduction_key_ffi(None, ct_modulus),
-            ),
+            CudaModulusSwitchNoiseReductionConfiguration::Centered => {
+                PBSMSNoiseReductionType::Centered
+            }
        },
    )
 }
@@ -432,9 +413,7 @@ pub unsafe fn unchecked_scalar_mul_integer_radix_kb_async<T: UnsignedInteger, B:
        streams.gpu_indexes[0].get(),
        keyswitch_key.gpu_index(0).get(),
    );
-    let ct_modulus = lwe_array.d_blocks.ciphertext_modulus().raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut lwe_array_degrees = lwe_array.info.blocks.iter().map(|b| b.degree.0).collect();
@@ -486,7 +465,6 @@ pub unsafe fn unchecked_scalar_mul_integer_radix_kb_async<T: UnsignedInteger, B:
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
        polynomial_size.0 as u32,
        message_modulus.0 as u32,
        num_scalars,
@@ -1220,12 +1198,7 @@ pub unsafe fn unchecked_mul_integer_radix_kb_assign_async<T: UnsignedInteger, B:
        streams.gpu_indexes[0].get(),
        keyswitch_key.gpu_index(0).get(),
    );
-    let ct_modulus = radix_lwe_left
-        .d_blocks
-        .ciphertext_modulus()
-        .raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut radix_lwe_left_degrees = radix_lwe_left
@@ -1297,7 +1270,6 @@ pub unsafe fn unchecked_mul_integer_radix_kb_assign_async<T: UnsignedInteger, B:
        is_boolean_right,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
        mem_ptr,
        polynomial_size.0 as u32,
        num_blocks,
@@ -1411,12 +1383,7 @@ pub unsafe fn unchecked_bitop_integer_radix_kb_assign_async<T: UnsignedInteger,
        streams.gpu_indexes[0].get(),
        keyswitch_key.gpu_index(0).get(),
    );
-    let ct_modulus = radix_lwe_left
-        .d_blocks
-        .ciphertext_modulus()
-        .raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut radix_lwe_left_degrees = radix_lwe_left
@@ -1487,7 +1454,6 @@ pub unsafe fn unchecked_bitop_integer_radix_kb_assign_async<T: UnsignedInteger,
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );
    cleanup_cuda_integer_bitop(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
    update_noise_degree(radix_lwe_left, &cuda_ffi_radix_lwe_left);
@@ -1602,9 +1568,7 @@ pub unsafe fn unchecked_scalar_bitop_integer_radix_kb_assign_async<
        streams.gpu_indexes[0].get(),
        keyswitch_key.gpu_index(0).get(),
    );
-    let ct_modulus = radix_lwe.d_blocks.ciphertext_modulus().raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut radix_lwe_degrees = radix_lwe.info.blocks.iter().map(|b| b.degree.0).collect();
@@ -1649,7 +1613,6 @@ pub unsafe fn unchecked_scalar_bitop_integer_radix_kb_assign_async<
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );
    cleanup_cuda_integer_bitop(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
    update_noise_degree(radix_lwe, &cuda_ffi_radix_lwe);
@@ -1769,12 +1732,7 @@ pub unsafe fn unchecked_comparison_integer_radix_kb_async<T: UnsignedInteger, B:
        keyswitch_key.gpu_index(0).get(),
    );

-    let ct_modulus = radix_lwe_left
-        .d_blocks
-        .ciphertext_modulus()
-        .raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut radix_lwe_out_degrees = radix_lwe_out
@@ -1860,7 +1818,6 @@ pub unsafe fn unchecked_comparison_integer_radix_kb_async<T: UnsignedInteger, B:
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );

    cleanup_cuda_integer_comparison(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
@@ -1984,12 +1941,7 @@ pub unsafe fn unchecked_scalar_comparison_integer_radix_kb_async<T: UnsignedInte
        streams.gpu_indexes[0].get(),
        keyswitch_key.gpu_index(0).get(),
    );
-    let ct_modulus = radix_lwe_in
-        .d_blocks
-        .ciphertext_modulus()
-        .raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut radix_lwe_out_degrees = radix_lwe_out
@@ -2057,7 +2009,6 @@ pub unsafe fn unchecked_scalar_comparison_integer_radix_kb_async<T: UnsignedInte
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
        num_scalar_blocks,
    );

@@ -2110,12 +2061,7 @@ pub unsafe fn full_propagate_assign_async<T: UnsignedInteger, B: Numeric>(
        streams.gpu_indexes[0].get(),
        keyswitch_key.gpu_index(0).get(),
    );
-    let ct_modulus = radix_lwe_input
-        .d_blocks
-        .ciphertext_modulus()
-        .raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut radix_lwe_input_degrees = radix_lwe_input
@@ -2157,7 +2103,6 @@ pub unsafe fn full_propagate_assign_async<T: UnsignedInteger, B: Numeric>(
        &raw mut cuda_ffi_radix_lwe_input,
        mem_ptr,
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
        bootstrapping_key.ptr.as_ptr(),
        num_blocks,
    );
@@ -2259,12 +2204,7 @@ pub(crate) unsafe fn propagate_single_carry_assign_async<T: UnsignedInteger, B:
        keyswitch_key.gpu_index(0).get(),
    );

-    let ct_modulus = radix_lwe_input
-        .d_blocks
-        .ciphertext_modulus()
-        .raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let big_lwe_dimension: u32 = glwe_dimension.0 as u32 * polynomial_size.0 as u32;
@@ -2334,7 +2274,6 @@ pub(crate) unsafe fn propagate_single_carry_assign_async<T: UnsignedInteger, B:
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
        requested_flag as u32,
        uses_carry,
    );
@@ -2515,10 +2454,7 @@ pub(crate) unsafe fn sub_and_propagate_single_carry_assign_async<T: UnsignedInte
        keyswitch_key.gpu_index(0).get(),
    );

-    let ct_modulus = lhs_input.d_blocks.ciphertext_modulus().raw_modulus_float();
-
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();

@@ -2603,7 +2539,6 @@ pub(crate) unsafe fn sub_and_propagate_single_carry_assign_async<T: UnsignedInte
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
        requested_flag as u32,
        uses_carry,
    );
@@ -2686,9 +2621,7 @@ pub(crate) unsafe fn add_and_propagate_single_carry_assign_async<T: UnsignedInte
        keyswitch_key.gpu_index(0).get(),
    );

-    let ct_modulus = lhs_input.d_blocks.ciphertext_modulus().raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let big_lwe_dimension: u32 = glwe_dimension.0 as u32 * polynomial_size.0 as u32;
@@ -2766,7 +2699,6 @@ pub(crate) unsafe fn add_and_propagate_single_carry_assign_async<T: UnsignedInte
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
        requested_flag as u32,
        uses_carry,
    );
@@ -2808,12 +2740,7 @@ pub(crate) unsafe fn grouped_oprf_async<B: Numeric>(
    assert_eq!(streams.gpu_indexes[0], seeded_lwe_input.gpu_index(0));
    assert_eq!(streams.gpu_indexes[0], bootstrapping_key.gpu_index(0),);

-    let ct_modulus = radix_lwe_out
-        .d_blocks
-        .ciphertext_modulus()
-        .raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();

@@ -2860,7 +2787,6 @@ pub(crate) unsafe fn grouped_oprf_async<B: Numeric>(
        num_blocks_to_process,
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );

    cleanup_cuda_integer_grouped_oprf_64(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
@@ -3043,9 +2969,7 @@ pub unsafe fn unchecked_unsigned_scalar_div_rem_integer_radix_kb_assign_async<
            .collect::<Vec<_>>();
    let clear_blocks = CudaVec::from_cpu_async(&h_clear_blocks, streams, 0);

-    let ct_modulus = quotient.d_blocks.ciphertext_modulus().raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();

@@ -3096,7 +3020,6 @@ pub unsafe fn unchecked_unsigned_scalar_div_rem_integer_radix_kb_assign_async<
        mem_ptr,
        bsks.ptr.as_ptr(),
        ksks.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
        &raw const scalar_divisor_ffi,
        divisor_has_at_least_one_set.as_ptr(),
        decomposed_divisor.as_ptr(),
@@ -3223,9 +3146,7 @@ pub unsafe fn unchecked_signed_scalar_div_rem_integer_radix_kb_assign_async<
        0u32
    };

-    let ct_modulus = quotient.d_blocks.ciphertext_modulus().raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();

@@ -3277,7 +3198,6 @@ pub unsafe fn unchecked_signed_scalar_div_rem_integer_radix_kb_assign_async<
        mem_ptr,
        bsks.ptr.as_ptr(),
        ksks.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
        &raw const scalar_divisor_ffi,
        divisor_has_at_least_one_set.as_ptr(),
        decomposed_divisor.as_ptr(),
@@ -3628,9 +3548,7 @@ pub unsafe fn unchecked_unsigned_scalar_div_integer_radix_kb_assign_async<
        0u32
    };

-    let ct_modulus = numerator.d_blocks.ciphertext_modulus().raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();

@@ -3673,7 +3591,6 @@ pub unsafe fn unchecked_unsigned_scalar_div_integer_radix_kb_assign_async<
        mem_ptr,
        bsks.ptr.as_ptr(),
        ksks.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
        &raw const scalar_divisor_ffi,
    );

@@ -3774,9 +3691,7 @@ pub unsafe fn unchecked_signed_scalar_div_integer_radix_kb_assign_async<
        0u32
    };

-    let ct_modulus = numerator.d_blocks.ciphertext_modulus().raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();

@@ -3819,7 +3734,6 @@ pub unsafe fn unchecked_signed_scalar_div_integer_radix_kb_assign_async<
        mem_ptr,
        bsks.ptr.as_ptr(),
        ksks.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
        &raw const scalar_divisor_ffi,
        numerator_bits,
    );
@@ -3883,9 +3797,7 @@ pub unsafe fn unchecked_scalar_left_shift_integer_radix_kb_assign_async<
        keyswitch_key.gpu_index(0).get(),
    );

-    let ct_modulus = input.d_blocks.ciphertext_modulus().raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut radix_lwe_left_degrees = input.info.blocks.iter().map(|b| b.degree.0).collect();
@@ -3924,7 +3836,6 @@ pub unsafe fn unchecked_scalar_left_shift_integer_radix_kb_assign_async<
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );
    cleanup_cuda_integer_radix_logical_scalar_shift(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
    update_noise_degree(input, &cuda_ffi_radix_lwe_left);
@@ -3981,9 +3892,7 @@ pub unsafe fn unchecked_scalar_logical_right_shift_integer_radix_kb_assign_async
        keyswitch_key.gpu_index(0).get(),
    );

-    let ct_modulus = input.d_blocks.ciphertext_modulus().raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut radix_lwe_left_degrees = input.info.blocks.iter().map(|b| b.degree.0).collect();
@@ -4022,7 +3931,6 @@ pub unsafe fn unchecked_scalar_logical_right_shift_integer_radix_kb_assign_async
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );
    cleanup_cuda_integer_radix_logical_scalar_shift(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
    update_noise_degree(input, &cuda_ffi_radix_lwe_left);
@@ -4078,9 +3986,7 @@ pub unsafe fn unchecked_scalar_arithmetic_right_shift_integer_radix_kb_assign_as
        keyswitch_key.gpu_index(0).get(),
    );

-    let ct_modulus = input.d_blocks.ciphertext_modulus().raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut radix_lwe_left_degrees = input.info.blocks.iter().map(|b| b.degree.0).collect();
@@ -4119,7 +4025,6 @@ pub unsafe fn unchecked_scalar_arithmetic_right_shift_integer_radix_kb_assign_as
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );
    cleanup_cuda_integer_radix_arithmetic_scalar_shift(
        streams.ffi(),
@@ -4186,12 +4091,7 @@ pub unsafe fn unchecked_right_shift_integer_radix_kb_assign_async<
        streams.gpu_indexes[0].get(),
        keyswitch_key.gpu_index(0).get(),
    );
-    let ct_modulus = radix_input
-        .d_blocks
-        .ciphertext_modulus()
-        .raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut radix_lwe_left_degrees = radix_input.info.blocks.iter().map(|b| b.degree.0).collect();
    let mut radix_lwe_left_noise_levels = radix_input
@@ -4248,7 +4148,6 @@ pub unsafe fn unchecked_right_shift_integer_radix_kb_assign_async<
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );
    cleanup_cuda_integer_radix_shift_and_rotate(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
    update_noise_degree(radix_input, &cuda_ffi_radix_lwe_left);
@@ -4309,12 +4208,7 @@ pub unsafe fn unchecked_left_shift_integer_radix_kb_assign_async<T: UnsignedInte
        streams.gpu_indexes[0].get(),
        keyswitch_key.gpu_index(0).get(),
    );
-    let ct_modulus = radix_input
-        .d_blocks
-        .ciphertext_modulus()
-        .raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut radix_lwe_left_degrees = radix_input.info.blocks.iter().map(|b| b.degree.0).collect();
    let mut radix_lwe_left_noise_levels = radix_input
@@ -4371,7 +4265,6 @@ pub unsafe fn unchecked_left_shift_integer_radix_kb_assign_async<T: UnsignedInte
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );
    cleanup_cuda_integer_radix_shift_and_rotate(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
    update_noise_degree(radix_input, &cuda_ffi_radix_lwe_left);
@@ -4435,12 +4328,7 @@ pub unsafe fn unchecked_rotate_right_integer_radix_kb_assign_async<
        streams.gpu_indexes[0].get(),
        keyswitch_key.gpu_index(0).get(),
    );
-    let ct_modulus = radix_input
-        .d_blocks
-        .ciphertext_modulus()
-        .raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut radix_lwe_left_degrees = radix_input.info.blocks.iter().map(|b| b.degree.0).collect();
    let mut radix_lwe_left_noise_levels = radix_input
@@ -4502,7 +4390,6 @@ pub unsafe fn unchecked_rotate_right_integer_radix_kb_assign_async<
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );
    cleanup_cuda_integer_radix_shift_and_rotate(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
    update_noise_degree(radix_input, &cuda_ffi_radix_lwe_left);
@@ -4566,12 +4453,7 @@ pub unsafe fn unchecked_rotate_left_integer_radix_kb_assign_async<
        streams.gpu_indexes[0].get(),
        keyswitch_key.gpu_index(0).get(),
    );
-    let ct_modulus = radix_input
-        .d_blocks
-        .ciphertext_modulus()
-        .raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut radix_lwe_left_degrees = radix_input.info.blocks.iter().map(|b| b.degree.0).collect();
    let mut radix_lwe_left_noise_levels = radix_input
@@ -4633,7 +4515,6 @@ pub unsafe fn unchecked_rotate_left_integer_radix_kb_assign_async<
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );
    cleanup_cuda_integer_radix_shift_and_rotate(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
    update_noise_degree(radix_input, &cuda_ffi_radix_lwe_left);
@@ -5082,12 +4963,7 @@ pub unsafe fn unchecked_cmux_integer_radix_kb_async<T: UnsignedInteger, B: Numer
        streams.gpu_indexes[0].get(),
        keyswitch_key.gpu_index(0).get(),
    );
-    let ct_modulus = radix_lwe_out
-        .d_blocks
-        .ciphertext_modulus()
-        .raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut radix_lwe_out_degrees = radix_lwe_out
        .info
@@ -5190,7 +5066,6 @@ pub unsafe fn unchecked_cmux_integer_radix_kb_async<T: UnsignedInteger, B: Numer
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );
    cleanup_cuda_integer_radix_cmux(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
    update_noise_degree(radix_lwe_out, &cuda_ffi_radix_lwe_out);
@@ -5294,12 +5169,7 @@ pub unsafe fn unchecked_scalar_rotate_left_integer_radix_kb_assign_async<
        streams.gpu_indexes[0].get(),
        keyswitch_key.gpu_index(0).get(),
    );
-    let ct_modulus = radix_input
-        .d_blocks
-        .ciphertext_modulus()
-        .raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut radix_lwe_left_degrees = radix_input.info.blocks.iter().map(|b| b.degree.0).collect();
@@ -5341,7 +5211,6 @@ pub unsafe fn unchecked_scalar_rotate_left_integer_radix_kb_assign_async<
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );
    cleanup_cuda_integer_radix_scalar_rotate(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
    update_noise_degree(radix_input, &cuda_ffi_radix_lwe_left);
@@ -5397,12 +5266,7 @@ pub unsafe fn unchecked_scalar_rotate_right_integer_radix_kb_assign_async<
        streams.gpu_indexes[0].get(),
        keyswitch_key.gpu_index(0).get(),
    );
-    let ct_modulus = radix_input
-        .d_blocks
-        .ciphertext_modulus()
-        .raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut radix_lwe_left_degrees = radix_input.info.blocks.iter().map(|b| b.degree.0).collect();
@@ -5444,7 +5308,6 @@ pub unsafe fn unchecked_scalar_rotate_right_integer_radix_kb_assign_async<
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );
    cleanup_cuda_integer_radix_scalar_rotate(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
    update_noise_degree(radix_input, &cuda_ffi_radix_lwe_left);
@@ -5606,9 +5469,7 @@ pub unsafe fn unchecked_partial_sum_ciphertexts_integer_radix_kb_assign_async<
        streams.gpu_indexes[0].get(),
        keyswitch_key.gpu_index(0).get(),
    );
-    let ct_modulus = radix_list.d_blocks.ciphertext_modulus().raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut result_degrees = result.info.blocks.iter().map(|b| b.degree.0).collect();
@@ -5654,7 +5515,6 @@ pub unsafe fn unchecked_partial_sum_ciphertexts_integer_radix_kb_assign_async<
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );
    cleanup_cuda_integer_radix_partial_sum_ciphertexts_vec(
        streams.ffi(),
@@ -5688,10 +5548,8 @@ pub unsafe fn extend_radix_with_sign_msb_async<T: UnsignedInteger, B: Numeric>(
 ) {
    let message_modulus = ct.info.blocks.first().unwrap().message_modulus;
    let carry_modulus = ct.info.blocks.first().unwrap().carry_modulus;
-    let ct_modulus = ct.d_blocks.ciphertext_modulus().raw_modulus_float();

-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();

@@ -5733,7 +5591,6 @@ pub unsafe fn extend_radix_with_sign_msb_async<T: UnsignedInteger, B: Numeric>(
        num_additional_blocks,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );

    cleanup_cuda_extend_radix_with_sign_msb_64(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
@@ -5769,7 +5626,6 @@ pub unsafe fn apply_univariate_lut_kb_async<T: UnsignedInteger, B: Numeric>(
    pbs_type: PBSType,
    grouping_factor: LweBskGroupingFactor,
    ms_noise_reduction_configuration: Option<&CudaModulusSwitchNoiseReductionConfiguration>,
-    ct_modulus: f64,
 ) {
    assert_eq!(
        streams.gpu_indexes[0],
@@ -5800,8 +5656,7 @@ pub unsafe fn apply_univariate_lut_kb_async<T: UnsignedInteger, B: Numeric>(
        keyswitch_key.gpu_index(0).get(),
    );

-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut cuda_ffi_output = prepare_cuda_radix_ffi_from_slice_mut(
@@ -5844,7 +5699,6 @@ pub unsafe fn apply_univariate_lut_kb_async<T: UnsignedInteger, B: Numeric>(
        &raw const cuda_ffi_input,
        mem_ptr,
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
        bootstrapping_key.ptr.as_ptr(),
    );
    cleanup_cuda_apply_univariate_lut_kb_64(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
@@ -5880,7 +5734,6 @@ pub unsafe fn apply_many_univariate_lut_kb_async<T: UnsignedInteger, B: Numeric>
    num_many_lut: u32,
    lut_stride: u32,
    ms_noise_reduction_configuration: Option<&CudaModulusSwitchNoiseReductionConfiguration>,
-    ct_modulus: f64,
 ) {
    assert_eq!(
        streams.gpu_indexes[0],
@@ -5910,8 +5763,7 @@ pub unsafe fn apply_many_univariate_lut_kb_async<T: UnsignedInteger, B: Numeric>
        streams.gpu_indexes[0].get(),
        keyswitch_key.gpu_index(0).get(),
    );
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut cuda_ffi_output = prepare_cuda_radix_ffi_from_slice_mut(
@@ -5955,7 +5807,6 @@ pub unsafe fn apply_many_univariate_lut_kb_async<T: UnsignedInteger, B: Numeric>
        &raw const cuda_ffi_input,
        mem_ptr,
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
        bootstrapping_key.ptr.as_ptr(),
        num_many_lut,
        lut_stride,
@@ -5993,7 +5844,6 @@ pub unsafe fn apply_bivariate_lut_kb_async<T: UnsignedInteger, B: Numeric>(
    grouping_factor: LweBskGroupingFactor,
    shift: u32,
    ms_noise_reduction_configuration: Option<&CudaModulusSwitchNoiseReductionConfiguration>,
-    ct_modulus: f64,
 ) {
    assert_eq!(
        streams.gpu_indexes[0],
@@ -6031,8 +5881,7 @@ pub unsafe fn apply_bivariate_lut_kb_async<T: UnsignedInteger, B: Numeric>(
        keyswitch_key.gpu_index(0).get(),
    );

-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut cuda_ffi_output = prepare_cuda_radix_ffi_from_slice_mut(
@@ -6083,7 +5932,6 @@ pub unsafe fn apply_bivariate_lut_kb_async<T: UnsignedInteger, B: Numeric>(
        &raw const cuda_ffi_input_2,
        mem_ptr,
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
        bootstrapping_key.ptr.as_ptr(),
        num_blocks,
        shift,
@@ -6162,9 +6010,7 @@ pub unsafe fn unchecked_div_rem_integer_radix_kb_assign_async<T: UnsignedInteger
        streams.gpu_indexes[0].get(),
        keyswitch_key.gpu_index(0).get(),
    );
-    let ct_modulus = numerator.d_blocks.ciphertext_modulus().raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut quotient_degrees = quotient.info.blocks.iter().map(|b| b.degree.0).collect();
@@ -6239,7 +6085,6 @@ pub unsafe fn unchecked_div_rem_integer_radix_kb_assign_async<T: UnsignedInteger
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );
    cleanup_cuda_integer_div_rem(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
    update_noise_degree(quotient, &cuda_ffi_quotient);
@@ -6346,10 +6191,8 @@ pub unsafe fn count_of_consecutive_bits_async<T: UnsignedInteger, B: Numeric>(

    let num_blocks = input_ct.d_blocks.lwe_ciphertext_count().0 as u32;
    let counter_num_blocks = output_ct.d_blocks.lwe_ciphertext_count().0 as u32;
-    let ct_modulus = input_ct.d_blocks.ciphertext_modulus().raw_modulus_float();

-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();

@@ -6402,7 +6245,6 @@ pub unsafe fn count_of_consecutive_bits_async<T: UnsignedInteger, B: Numeric>(
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );

    cleanup_cuda_integer_count_of_consecutive_bits_kb_64(
@@ -6459,9 +6301,7 @@ pub(crate) unsafe fn ilog2_async<T: UnsignedInteger, B: Numeric>(
        trivial_ct_m_minus_1_block.d_blocks.0.d_vec.gpu_index(0)
    );

-    let ct_modulus = input.d_blocks.ciphertext_modulus().raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();

@@ -6559,7 +6399,6 @@ pub(crate) unsafe fn ilog2_async<T: UnsignedInteger, B: Numeric>(
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );

    cleanup_cuda_integer_ilog2_kb_64(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
@@ -6597,7 +6436,6 @@ pub unsafe fn compute_prefix_sum_hillis_steele_async<T: UnsignedInteger, B: Nume
    pbs_type: PBSType,
    grouping_factor: LweBskGroupingFactor,
    ms_noise_reduction_configuration: Option<&CudaModulusSwitchNoiseReductionConfiguration>,
-    ct_modulus: f64,
 ) {
    assert_eq!(
        streams.gpu_indexes[0],
@@ -6628,8 +6466,7 @@ pub unsafe fn compute_prefix_sum_hillis_steele_async<T: UnsignedInteger, B: Nume
        keyswitch_key.gpu_index(0).get(),
    );

-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut cuda_ffi_output = prepare_cuda_radix_ffi_from_slice_mut(
@@ -6673,7 +6510,6 @@ pub unsafe fn compute_prefix_sum_hillis_steele_async<T: UnsignedInteger, B: Nume
        &raw mut cuda_ffi_generates_or_propagates,
        mem_ptr,
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
        bootstrapping_key.ptr.as_ptr(),
        num_blocks,
    );
@@ -6782,12 +6618,7 @@ pub(crate) unsafe fn unchecked_unsigned_overflowing_sub_integer_radix_kb_assign_
        streams.gpu_indexes[0].get(),
        keyswitch_key.gpu_index(0).get(),
    );
-    let ct_modulus = radix_lwe_left
-        .d_blocks
-        .ciphertext_modulus()
-        .raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let big_lwe_dimension: u32 = glwe_dimension.0 as u32 * polynomial_size.0 as u32;
@@ -6875,7 +6706,6 @@ pub(crate) unsafe fn unchecked_unsigned_overflowing_sub_integer_radix_kb_assign_
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
        compute_overflow as u32,
        uses_input_borrow,
    );
@@ -6931,9 +6761,7 @@ pub unsafe fn unchecked_signed_abs_radix_kb_assign_async<T: UnsignedInteger, B:
        keyswitch_key.gpu_index(0).get(),
    );

-    let ct_modulus = ct.d_blocks.ciphertext_modulus().raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut ct_degrees = ct.info.blocks.iter().map(|b| b.degree.0).collect();
@@ -6966,7 +6794,6 @@ pub unsafe fn unchecked_signed_abs_radix_kb_assign_async<T: UnsignedInteger, B:
        true,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );
    cleanup_cuda_integer_abs_inplace(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
    update_noise_degree(ct, &cuda_ffi_ct);
@@ -7028,12 +6855,7 @@ pub unsafe fn unchecked_is_at_least_one_comparisons_block_true_integer_radix_kb_
        streams.gpu_indexes[0].get(),
        keyswitch_key.gpu_index(0).get(),
    );
-    let ct_modulus = radix_lwe_in
-        .d_blocks
-        .ciphertext_modulus()
-        .raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut radix_lwe_out_degrees = radix_lwe_out
@@ -7097,7 +6919,6 @@ pub unsafe fn unchecked_is_at_least_one_comparisons_block_true_integer_radix_kb_
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
        radix_lwe_in.d_blocks.lwe_ciphertext_count().0 as u32,
    );

@@ -7164,12 +6985,7 @@ pub unsafe fn unchecked_are_all_comparisons_block_true_integer_radix_kb_async<
        streams.gpu_indexes[0].get(),
        keyswitch_key.gpu_index(0).get(),
    );
-    let ct_modulus = radix_lwe_in
-        .d_blocks
-        .ciphertext_modulus()
-        .raw_modulus_float();
-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut radix_lwe_out_degrees = radix_lwe_out
        .info
@@ -7233,7 +7049,6 @@ pub unsafe fn unchecked_are_all_comparisons_block_true_integer_radix_kb_async<
        mem_ptr,
        bootstrapping_key.ptr.as_ptr(),
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
        radix_lwe_in.d_blocks.lwe_ciphertext_count().0 as u32,
    );

@@ -7388,7 +7203,6 @@ pub unsafe fn noise_squashing_async<T: UnsignedInteger, B: Numeric>(
    pbs_type: PBSType,
    grouping_factor: LweBskGroupingFactor,
    ms_noise_reduction_configuration: Option<&CudaModulusSwitchNoiseReductionConfiguration>,
-    ct_modulus: f64,
 ) {
    assert_eq!(
        streams.gpu_indexes[0],
@@ -7419,8 +7233,7 @@ pub unsafe fn noise_squashing_async<T: UnsignedInteger, B: Numeric>(
        keyswitch_key.gpu_index(0).get(),
    );

-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let mut cuda_ffi_output = prepare_cuda_radix_ffi_from_slice_mut(
@@ -7466,7 +7279,6 @@ pub unsafe fn noise_squashing_async<T: UnsignedInteger, B: Numeric>(
        &raw const cuda_ffi_input,
        mem_ptr,
        keyswitch_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
        bootstrapping_key.ptr.as_ptr(),
    );

@@ -7547,12 +7359,10 @@ pub unsafe fn expand_async<T: UnsignedInteger, B: Numeric>(
        streams.gpu_indexes[0].get(),
        casting_key.gpu_indexes[0].get(),
    );
-    let ct_modulus = lwe_array_out.ciphertext_modulus().raw_modulus_float();
    let mut mem_ptr: *mut i8 = std::ptr::null_mut();
    let num_compact_lists = num_lwes_per_compact_list.len();

-    let (noise_reduction_type, ms_noise_reduction_key_ffi) =
-        resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
+    let noise_reduction_type = resolve_ms_noise_reduction_config(ms_noise_reduction_configuration);

    scratch_cuda_expand_without_verification_64(
        streams.ffi(),
@@ -7590,7 +7400,6 @@ pub unsafe fn expand_async<T: UnsignedInteger, B: Numeric>(
        bootstrapping_key.ptr.as_ptr(),
        computing_ks_key.ptr.as_ptr(),
        casting_key.ptr.as_ptr(),
-        &raw const ms_noise_reduction_key_ffi,
    );
    cleanup_expand_without_verification_64(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
 }
--- a/tfhe/src/integer/gpu/noise_squashing/noise_squashing_keys.rs
+++ b/tfhe/src/integer/gpu/noise_squashing/noise_squashing_keys.rs
@@ -27,15 +27,16 @@ impl CompressedNoiseSquashingKey {
            } => {
                let std_bsk = seeded_bsk.as_view().par_decompress_into_lwe_bootstrap_key();

-                let modulus_switch_noise_reduction_configuration = match modulus_switch_noise_reduction_key {
-            CompressedModulusSwitchConfiguration::Standard => None,
-            CompressedModulusSwitchConfiguration::DriftTechniqueNoiseReduction(
-                modulus_switch_noise_reduction_key,
-            ) => Some(CudaModulusSwitchNoiseReductionConfiguration::from_modulus_switch_noise_reduction_key(&modulus_switch_noise_reduction_key.decompress(), streams)),
-            CompressedModulusSwitchConfiguration::CenteredMeanNoiseReduction => {
-            Some(CudaModulusSwitchNoiseReductionConfiguration::Centered)
-            }
-        };
+                let modulus_switch_noise_reduction_configuration =
+                    match modulus_switch_noise_reduction_key {
+                        CompressedModulusSwitchConfiguration::Standard => None,
+                        CompressedModulusSwitchConfiguration::DriftTechniqueNoiseReduction(
+                            _modulus_switch_noise_reduction_key,
+                        ) => panic!("Drift noise reduction is not supported on GPU"),
+                        CompressedModulusSwitchConfiguration::CenteredMeanNoiseReduction => {
+                            Some(CudaModulusSwitchNoiseReductionConfiguration::Centered)
+                        }
+                    };

                let bsk = CudaLweBootstrapKey::from_lwe_bootstrap_key(
                    &std_bsk,
--- a/tfhe/src/integer/gpu/server_key/mod.rs
+++ b/tfhe/src/integer/gpu/server_key/mod.rs
@@ -17,9 +17,7 @@ use crate::shortint::ciphertext::{MaxDegree, MaxNoiseLevel};
 use crate::shortint::client_key::atomic_pattern::AtomicPatternClientKey;
 use crate::shortint::engine::ShortintEngine;
 use crate::shortint::parameters::ModulusSwitchType;
-use crate::shortint::server_key::{
-    CompressedModulusSwitchConfiguration, ModulusSwitchNoiseReductionKey,
-};
+use crate::shortint::server_key::CompressedModulusSwitchConfiguration;
 use crate::shortint::{CarryModulus, CiphertextModulus, MessageModulus, PBSOrder};

 mod radix;
@@ -118,26 +116,18 @@ impl CudaServerKey {
                        pbs_params.ciphertext_modulus,
                        &mut engine.encryption_generator,
                    );
-                let modulus_switch_noise_reduction_configuration = match pbs_params
-                    .modulus_switch_noise_reduction_params
-                {
-                    ModulusSwitchType::Standard => None,
-                    ModulusSwitchType::DriftTechniqueNoiseReduction(
-                        modulus_switch_noise_reduction_params,
-                    ) => {
-                        let ms_red_key = ModulusSwitchNoiseReductionKey::new(
-                            modulus_switch_noise_reduction_params,
-                            &std_cks.lwe_secret_key,
-                            &mut engine,
-                            pbs_params.ciphertext_modulus,
-                            pbs_params.lwe_noise_distribution,
-                        );
-                        Some(CudaModulusSwitchNoiseReductionConfiguration::from_modulus_switch_noise_reduction_key(&ms_red_key,streams))
-                    }
-                    ModulusSwitchType::CenteredMeanNoiseReduction => {
-                        Some(CudaModulusSwitchNoiseReductionConfiguration::Centered)
-                    }
-                };
+                let modulus_switch_noise_reduction_configuration =
+                    match pbs_params.modulus_switch_noise_reduction_params {
+                        ModulusSwitchType::Standard => None,
+                        ModulusSwitchType::DriftTechniqueNoiseReduction(
+                            _modulus_switch_noise_reduction_params,
+                        ) => {
+                            panic!("Drift noise reduction is not supported on GPU")
+                        }
+                        ModulusSwitchType::CenteredMeanNoiseReduction => {
+                            Some(CudaModulusSwitchNoiseReductionConfiguration::Centered)
+                        }
+                    };

                let d_bootstrap_key = CudaLweBootstrapKey::from_lwe_bootstrap_key(
                    &h_bootstrap_key,
@@ -265,7 +255,7 @@ impl CudaServerKey {

                let  modulus_switch_noise_reduction_configuration = match modulus_switch_noise_reduction_key {
                    CompressedModulusSwitchConfiguration::Standard => None,
-                    CompressedModulusSwitchConfiguration::DriftTechniqueNoiseReduction(modulus_switch_noise_reduction_key) => Some(CudaModulusSwitchNoiseReductionConfiguration::from_modulus_switch_noise_reduction_key(&modulus_switch_noise_reduction_key.decompress(), streams)),
+                    CompressedModulusSwitchConfiguration::DriftTechniqueNoiseReduction(_modulus_switch_noise_reduction_key) => panic!("Drift noise reduction is not supported on GPU"),
                    CompressedModulusSwitchConfiguration::CenteredMeanNoiseReduction => Some(CudaModulusSwitchNoiseReductionConfiguration::Centered),
                };

--- a/tfhe/src/integer/gpu/server_key/radix/mod.rs
+++ b/tfhe/src/integer/gpu/server_key/radix/mod.rs
@@ -787,7 +787,6 @@ impl CudaServerKey {
        let mut output_noise_levels = vec![0_u64; num_output_blocks];

        let num_ct_blocks = block_range.len() as u32;
-        let ct_modulus = input.d_blocks.ciphertext_modulus().raw_modulus_float();
        unsafe {
            match &self.bootstrapping_key {
                CudaBootstrappingKey::Classic(d_bsk) => {
@@ -816,7 +815,6 @@ impl CudaServerKey {
                        PBSType::Classical,
                        LweBskGroupingFactor(0),
                        d_bsk.ms_noise_reduction_configuration.as_ref(),
-                        ct_modulus,
                    );
                }
                CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
@@ -845,7 +843,6 @@ impl CudaServerKey {
                        PBSType::MultiBit,
                        d_multibit_bsk.grouping_factor,
                        None,
-                        ct_modulus,
                    );
                }
            }
@@ -908,7 +905,6 @@ impl CudaServerKey {
        let mut output_noise_levels = vec![0_u64; num_output_blocks];

        let num_ct_blocks = block_range.len() as u32;
-        let ct_modulus = input_1.d_blocks.ciphertext_modulus().raw_modulus_float();
        unsafe {
            match &self.bootstrapping_key {
                CudaBootstrappingKey::Classic(d_bsk) => {
@@ -939,7 +935,6 @@ impl CudaServerKey {
                        LweBskGroupingFactor(0),
                        self.message_modulus.0 as u32,
                        d_bsk.ms_noise_reduction_configuration.as_ref(),
-                        ct_modulus,
                    );
                }
                CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
@@ -970,7 +965,6 @@ impl CudaServerKey {
                        d_multibit_bsk.grouping_factor,
                        self.message_modulus.0 as u32,
                        None,
-                        ct_modulus,
                    );
                }
            }
@@ -1090,7 +1084,6 @@ impl CudaServerKey {
            .unwrap();
        let mut output_degrees = vec![0_u64; num_ct_blocks * function_count];
        let mut output_noise_levels = vec![0_u64; num_ct_blocks * function_count];
-        let ct_modulus = input.d_blocks.ciphertext_modulus().raw_modulus_float();

        match &self.bootstrapping_key {
            CudaBootstrappingKey::Classic(d_bsk) => {
@@ -1121,7 +1114,6 @@ impl CudaServerKey {
                    function_count as u32,
                    lut.sample_extraction_stride as u32,
                    d_bsk.ms_noise_reduction_configuration.as_ref(),
-                    ct_modulus,
                );
            }
            CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
@@ -1152,7 +1144,6 @@ impl CudaServerKey {
                    function_count as u32,
                    lut.sample_extraction_stride as u32,
                    None,
-                    ct_modulus,
                );
            }
        }
@@ -1226,7 +1217,6 @@ impl CudaServerKey {
                .noise_level
                .0;
        }
-        let ct_modulus = output.d_blocks.ciphertext_modulus().raw_modulus_float();
        let mut output_slice = output
            .d_blocks
            .0
@@ -1265,7 +1255,6 @@ impl CudaServerKey {
                        PBSType::Classical,
                        LweBskGroupingFactor(0),
                        d_bsk.ms_noise_reduction_configuration.as_ref(),
-                        ct_modulus,
                    );
                }
                CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
@@ -1296,7 +1285,6 @@ impl CudaServerKey {
                        PBSType::MultiBit,
                        d_multibit_bsk.grouping_factor,
                        None,
-                        ct_modulus,
                    );
                }
            }
@@ -1636,7 +1624,6 @@ impl CudaServerKey {
        let mut output_noise_levels = vec![0_u64; num_output_blocks];
        let input_slice = input.d_blocks.0.d_vec.as_slice(.., 0).unwrap();
        let mut output_slice = output.packed_d_blocks.0.d_vec.as_mut_slice(.., 0).unwrap();
-        let ct_modulus = input.d_blocks.ciphertext_modulus().raw_modulus_float();
        let d_bootstrapping_key = &squashing_key.bootstrapping_key;
        let (input_glwe_dimension, input_polynomial_size) = match &self.bootstrapping_key {
            CudaBootstrappingKey::Classic(d_bsk) => {
@@ -1676,7 +1663,6 @@ impl CudaServerKey {
                        PBSType::Classical,
                        LweBskGroupingFactor(0),
                        bsk.ms_noise_reduction_configuration.as_ref(),
-                        ct_modulus,
                    );
                }
                CudaBootstrappingKey::MultiBit(mb_bsk) => {
@@ -1706,7 +1692,6 @@ impl CudaServerKey {
                        PBSType::MultiBit,
                        mb_bsk.grouping_factor,
                        None,
-                        ct_modulus,
                    );
                }
            }