mirror of
https://github.com/zama-ai/concrete.git
synced 2026-02-09 03:55:04 -05:00
fix(cuda): Include cuda_runtime.h in device.h to include the defininition of cudaStream_t
This commit is contained in:
@@ -78,6 +78,7 @@ if(CONCRETELANG_CUDA_SUPPORT)
|
||||
link_directories(${CUDAToolkit_LIBRARY_DIR})
|
||||
add_subdirectory(${CONCRETE_CORE_PATH}/concrete-cuda/cuda)
|
||||
include_directories(${CONCRETE_CORE_PATH}/concrete-cuda/cuda/include)
|
||||
include_directories(${CUDAToolkit_INCLUDE_DIRS})
|
||||
add_compile_options(-DCONCRETELANG_CUDA_SUPPORT)
|
||||
endif()
|
||||
|
||||
|
||||
Submodule compiler/concrete-core updated: f1246ac4f3...bf79f5db63
@@ -18,13 +18,6 @@
|
||||
#include "concretelang/Common/Error.h"
|
||||
|
||||
#ifdef CONCRETELANG_CUDA_SUPPORT
|
||||
// We need to define the double2 struct from the CUDA backend header files
|
||||
// This shouldn't be defined here, but included along with concrete-cuda header
|
||||
// files
|
||||
typedef struct double2 {
|
||||
double x, y;
|
||||
} double2;
|
||||
// From concrete-cuda
|
||||
#include "bootstrap.h"
|
||||
#include "device.h"
|
||||
#include "keyswitch.h"
|
||||
|
||||
@@ -146,8 +146,8 @@ void memref_batched_keyswitch_lwe_cuda_u64(
|
||||
uint32_t base_log, uint32_t input_lwe_dim, uint32_t output_lwe_dim,
|
||||
mlir::concretelang::RuntimeContext *context) {
|
||||
assert(out_size0 == ct0_size0);
|
||||
assert(out_size1 == output_lwe_dim+1);
|
||||
assert(ct0_size1 == input_lwe_dim+1);
|
||||
assert(out_size1 == output_lwe_dim + 1);
|
||||
assert(ct0_size1 == input_lwe_dim + 1);
|
||||
// TODO: Multi GPU
|
||||
uint32_t gpu_idx = 0;
|
||||
uint32_t num_samples = out_size0;
|
||||
@@ -167,9 +167,9 @@ void memref_batched_keyswitch_lwe_cuda_u64(
|
||||
void *out_gpu = alloc_and_memcpy_async_to_gpu(
|
||||
out_aligned, out_offset, out_batch_size, gpu_idx, stream);
|
||||
// Run the keyswitch kernel on the GPU
|
||||
cuda_keyswitch_lwe_ciphertext_vector_64(stream, out_gpu, ct0_gpu, ksk_gpu,
|
||||
input_lwe_dim, output_lwe_dim,
|
||||
base_log, level, num_samples);
|
||||
cuda_keyswitch_lwe_ciphertext_vector_64(
|
||||
stream, gpu_idx, out_gpu, ct0_gpu, ksk_gpu, input_lwe_dim, output_lwe_dim,
|
||||
base_log, level, num_samples);
|
||||
// Copy the output batch of ciphertext back to CPU
|
||||
memcpy_async_to_cpu(out_aligned, out_offset, out_batch_size, out_gpu, gpu_idx,
|
||||
stream);
|
||||
@@ -242,12 +242,13 @@ void memref_batched_bootstrap_lwe_cuda_u64(
|
||||
test_vector_idxes_size, stream, gpu_idx);
|
||||
// Run the bootstrap kernel on the GPU
|
||||
cuda_bootstrap_amortized_lwe_ciphertext_vector_64(
|
||||
stream, out_gpu, glwe_ct_gpu, test_vector_idxes_gpu, ct0_gpu, fbsk_gpu,
|
||||
input_lwe_dim, glwe_dim, poly_size, base_log, level, num_samples,
|
||||
num_test_vectors, lwe_idx, cuda_get_max_shared_memory(gpu_idx));
|
||||
stream, gpu_idx, out_gpu, glwe_ct_gpu, test_vector_idxes_gpu, ct0_gpu,
|
||||
fbsk_gpu, input_lwe_dim, glwe_dim, poly_size, base_log, level,
|
||||
num_samples, num_test_vectors, lwe_idx,
|
||||
cuda_get_max_shared_memory(gpu_idx));
|
||||
// Copy the output batch of ciphertext back to CPU
|
||||
memcpy_async_to_cpu(out_aligned, out_offset, out_batch_size, out_gpu,
|
||||
gpu_idx, stream);
|
||||
memcpy_async_to_cpu(out_aligned, out_offset, out_batch_size, out_gpu, gpu_idx,
|
||||
stream);
|
||||
cuda_synchronize_device(gpu_idx);
|
||||
// free memory that we allocated on gpu
|
||||
cuda_drop(ct0_gpu, gpu_idx);
|
||||
|
||||
Reference in New Issue
Block a user