From 1f0a83e4bbac65f9c4e75f66c9cbe39b4c3c2ffa Mon Sep 17 00:00:00 2001 From: Agnes Leroy Date: Wed, 10 Dec 2025 17:13:13 +0100 Subject: [PATCH] fix(gpu): fix some CPU memory leaks due to the use of new without delete --- .../cuda/src/integer/bitwise_ops.cuh | 46 +++++++++---------- .../cuda/src/integer/div_rem.cuh | 13 +++--- .../cuda/src/integer/rerand.cuh | 14 +++--- backends/tfhe-cuda-backend/cuda/src/zk/zk.cuh | 10 ++-- 4 files changed, 41 insertions(+), 42 deletions(-) diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh index ca6b4fe4f..df38d2527 100644 --- a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh @@ -102,21 +102,21 @@ __host__ void host_boolean_bitop(CudaStreams streams, return false; }; - CudaRadixCiphertextFFI *lwe_array_left = new CudaRadixCiphertextFFI; - CudaRadixCiphertextFFI *lwe_array_right = new CudaRadixCiphertextFFI; + CudaRadixCiphertextFFI lwe_array_left; + CudaRadixCiphertextFFI lwe_array_right; if (needs_noise_reduction(lwe_array_1)) { copy_radix_ciphertext_slice_async( streams.stream(0), streams.gpu_index(0), mem_ptr->tmp_lwe_left, 0, lwe_array_1->num_radix_blocks, lwe_array_1, 0, lwe_array_1->num_radix_blocks); - as_radix_ciphertext_slice(lwe_array_left, mem_ptr->tmp_lwe_left, 0, + as_radix_ciphertext_slice(&lwe_array_left, mem_ptr->tmp_lwe_left, 0, lwe_array_1->num_radix_blocks); integer_radix_apply_univariate_lookup_table( - streams, lwe_array_left, lwe_array_left, bsks, ksks, - mem_ptr->message_extract_lut, lwe_array_left->num_radix_blocks); + streams, &lwe_array_left, &lwe_array_left, bsks, ksks, + mem_ptr->message_extract_lut, lwe_array_left.num_radix_blocks); } else { - as_radix_ciphertext_slice(lwe_array_left, lwe_array_1, 0, + as_radix_ciphertext_slice(&lwe_array_left, lwe_array_1, 0, lwe_array_1->num_radix_blocks); } @@ -125,37 +125,37 @@ __host__ void host_boolean_bitop(CudaStreams streams, streams.stream(0), streams.gpu_index(0), mem_ptr->tmp_lwe_right, 0, lwe_array_2->num_radix_blocks, lwe_array_2, 0, lwe_array_2->num_radix_blocks); - as_radix_ciphertext_slice(lwe_array_right, mem_ptr->tmp_lwe_right, 0, - lwe_array_2->num_radix_blocks); + as_radix_ciphertext_slice(&lwe_array_right, mem_ptr->tmp_lwe_right, + 0, lwe_array_2->num_radix_blocks); integer_radix_apply_univariate_lookup_table( - streams, lwe_array_right, lwe_array_right, bsks, ksks, - mem_ptr->message_extract_lut, lwe_array_right->num_radix_blocks); + streams, &lwe_array_right, &lwe_array_right, bsks, ksks, + mem_ptr->message_extract_lut, lwe_array_right.num_radix_blocks); } else { - as_radix_ciphertext_slice(lwe_array_right, lwe_array_2, 0, + as_radix_ciphertext_slice(&lwe_array_right, lwe_array_2, 0, lwe_array_2->num_radix_blocks); } auto lut = mem_ptr->lut; - uint64_t degrees[lwe_array_left->num_radix_blocks]; + uint64_t degrees[lwe_array_left.num_radix_blocks]; if (mem_ptr->op == BITOP_TYPE::BITAND) { - update_degrees_after_bitand(degrees, lwe_array_left->degrees, - lwe_array_right->degrees, - lwe_array_left->num_radix_blocks); + update_degrees_after_bitand(degrees, lwe_array_left.degrees, + lwe_array_right.degrees, + lwe_array_left.num_radix_blocks); } else if (mem_ptr->op == BITOP_TYPE::BITOR) { - update_degrees_after_bitor(degrees, lwe_array_left->degrees, - lwe_array_right->degrees, - lwe_array_left->num_radix_blocks); + update_degrees_after_bitor(degrees, lwe_array_left.degrees, + lwe_array_right.degrees, + lwe_array_left.num_radix_blocks); } else if (mem_ptr->op == BITOP_TYPE::BITXOR) { - update_degrees_after_bitxor(degrees, lwe_array_left->degrees, - lwe_array_right->degrees, - lwe_array_left->num_radix_blocks); + update_degrees_after_bitxor(degrees, lwe_array_left.degrees, + lwe_array_right.degrees, + lwe_array_left.num_radix_blocks); } // shift argument is hardcoded as 2 here, because natively message modulus for // boolean block should be 2. lookup table is generated with same factor. integer_radix_apply_bivariate_lookup_table( - streams, lwe_array_out, lwe_array_left, lwe_array_right, bsks, ksks, lut, - lwe_array_out->num_radix_blocks, 2); + streams, lwe_array_out, &lwe_array_left, &lwe_array_right, bsks, ksks, + lut, lwe_array_out->num_radix_blocks, 2); memcpy(lwe_array_out->degrees, degrees, lwe_array_out->num_radix_blocks * sizeof(uint64_t)); diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/div_rem.cuh b/backends/tfhe-cuda-backend/cuda/src/integer/div_rem.cuh index c85155392..29d62dc54 100644 --- a/backends/tfhe-cuda-backend/cuda/src/integer/div_rem.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/integer/div_rem.cuh @@ -169,21 +169,21 @@ __host__ void host_unsigned_integer_div_rem_block_by_block_2_2( CudaRadixCiphertextFFI *comparison_blocks, CudaRadixCiphertextFFI *d, int_comparison_buffer *comparison_buffer) { - CudaRadixCiphertextFFI *d_msb = new CudaRadixCiphertextFFI; + CudaRadixCiphertextFFI d_msb; uint32_t slice_start = num_blocks - block_index; uint32_t slice_end = d->num_radix_blocks; - as_radix_ciphertext_slice(d_msb, d, slice_start, slice_end); - comparison_blocks->num_radix_blocks = d_msb->num_radix_blocks; - if (d_msb->num_radix_blocks == 0) { + as_radix_ciphertext_slice(&d_msb, d, slice_start, slice_end); + comparison_blocks->num_radix_blocks = d_msb.num_radix_blocks; + if (d_msb.num_radix_blocks == 0) { cuda_memset_async( (Torus *)out_boolean_block->ptr, 0, sizeof(Torus) * (out_boolean_block->lwe_dimension + 1), streams.stream(gpu_index), streams.gpu_index(gpu_index)); } else { host_compare_blocks_with_zero( - streams.get_ith(gpu_index), comparison_blocks, d_msb, + streams.get_ith(gpu_index), comparison_blocks, &d_msb, comparison_buffer, &bsks[gpu_index], &ksks[gpu_index], - d_msb->num_radix_blocks, comparison_buffer->is_zero_lut); + d_msb.num_radix_blocks, comparison_buffer->is_zero_lut); are_all_comparisons_block_true( streams.get_ith(gpu_index), out_boolean_block, comparison_blocks, comparison_buffer, &bsks[gpu_index], &ksks[gpu_index], @@ -202,7 +202,6 @@ __host__ void host_unsigned_integer_div_rem_block_by_block_2_2( (Torus *)out_boolean_block->ptr, (Torus *)out_boolean_block->ptr, encoded_scalar, radix_params.big_lwe_dimension, 1); } - delete d_msb; }; for (uint j = 0; j < 3; j++) { diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/rerand.cuh b/backends/tfhe-cuda-backend/cuda/src/integer/rerand.cuh index dedb87195..7d2d189c5 100644 --- a/backends/tfhe-cuda-backend/cuda/src/integer/rerand.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/integer/rerand.cuh @@ -63,14 +63,14 @@ void rerand_inplace( // Add ks output to ct // Check sizes - auto lwes_ffi = new CudaRadixCiphertextFFI; - into_radix_ciphertext(lwes_ffi, lwe_array, num_lwes, output_dimension); - auto ksed_zero_lwes_ffi = new CudaRadixCiphertextFFI; - into_radix_ciphertext(ksed_zero_lwes_ffi, ksed_zero_lwes, num_lwes, + CudaRadixCiphertextFFI lwes_ffi; + into_radix_ciphertext(&lwes_ffi, lwe_array, num_lwes, output_dimension); + CudaRadixCiphertextFFI ksed_zero_lwes_ffi; + into_radix_ciphertext(&ksed_zero_lwes_ffi, ksed_zero_lwes, num_lwes, output_dimension); - host_addition(streams.stream(0), streams.gpu_index(0), lwes_ffi, - lwes_ffi, ksed_zero_lwes_ffi, num_lwes, message_modulus, - carry_modulus); + host_addition(streams.stream(0), streams.gpu_index(0), &lwes_ffi, + &lwes_ffi, &ksed_zero_lwes_ffi, num_lwes, + message_modulus, carry_modulus); } template diff --git a/backends/tfhe-cuda-backend/cuda/src/zk/zk.cuh b/backends/tfhe-cuda-backend/cuda/src/zk/zk.cuh index 912387fd2..114588124 100644 --- a/backends/tfhe-cuda-backend/cuda/src/zk/zk.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/zk/zk.cuh @@ -91,12 +91,12 @@ __host__ void host_expand_without_verification( cuda_memset_async(lwe_array_out, 0, (lwe_dimension + 1) * num_lwes * 2 * sizeof(Torus), streams.stream(0), streams.gpu_index(0)); - auto output = new CudaRadixCiphertextFFI; - into_radix_ciphertext(output, lwe_array_out, 2 * num_lwes, lwe_dimension); - auto input = new CudaRadixCiphertextFFI; - into_radix_ciphertext(input, lwe_array_input, 2 * num_lwes, lwe_dimension); + CudaRadixCiphertextFFI output; + into_radix_ciphertext(&output, lwe_array_out, 2 * num_lwes, lwe_dimension); + CudaRadixCiphertextFFI input; + into_radix_ciphertext(&input, lwe_array_input, 2 * num_lwes, lwe_dimension); integer_radix_apply_univariate_lookup_table( - streams, output, input, bsks, ksks, message_and_carry_extract_luts, + streams, &output, &input, bsks, ksks, message_and_carry_extract_luts, 2 * num_lwes); }