From f60bd16e2be08d553a00f0277b2b6d3709bc5a53 Mon Sep 17 00:00:00 2001 From: Antoniu Pop Date: Mon, 30 Jan 2023 10:30:55 +0000 Subject: [PATCH] fix(GPU): remove transfers of uninitialized data to GPU. --- compiler/lib/Runtime/wrappers.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/compiler/lib/Runtime/wrappers.cpp b/compiler/lib/Runtime/wrappers.cpp index 4318241e8..46a3d2e6e 100644 --- a/compiler/lib/Runtime/wrappers.cpp +++ b/compiler/lib/Runtime/wrappers.cpp @@ -136,8 +136,7 @@ void memref_batched_keyswitch_lwe_cuda_u64( // TODO: The allocation should be done by the compiler codegen void *ct0_gpu = alloc_and_memcpy_async_to_gpu( ct0_aligned, ct0_offset, ct0_batch_size, gpu_idx, stream); - void *out_gpu = alloc_and_memcpy_async_to_gpu( - out_aligned, out_offset, out_batch_size, gpu_idx, stream); + void *out_gpu = cuda_malloc(out_batch_size * sizeof(uint64_t), gpu_idx); // Run the keyswitch kernel on the GPU cuda_keyswitch_lwe_ciphertext_vector_64( stream, gpu_idx, out_gpu, ct0_gpu, ksk_gpu, input_lwe_dim, output_lwe_dim, @@ -179,9 +178,7 @@ void memref_batched_bootstrap_lwe_cuda_u64( // TODO: The allocation should be done by the compiler codegen void *ct0_gpu = alloc_and_memcpy_async_to_gpu( ct0_aligned, ct0_offset, ct0_batch_size, gpu_idx, stream); - void *out_gpu = alloc_and_memcpy_async_to_gpu( - out_aligned, out_offset, out_batch_size, gpu_idx, stream); - + void *out_gpu = cuda_malloc(out_batch_size * sizeof(uint64_t), gpu_idx); // Construct the glwe accumulator (on CPU) // TODO: Should be done outside of the bootstrap call, compile time if // possible. Refactor in progress