feat(compiler): add multi-gpu scheduler for batched ops. Scheduler splits op batches in chunks to fit GPU memory and balance load across GPUs.

This commit is contained in:
Antoniu Pop
2023-04-18 17:12:24 +01:00
committed by Antoniu Pop
parent 38e14446d6
commit 81eaaa7560
3 changed files with 886 additions and 209 deletions

View File

@@ -36,12 +36,12 @@ void stream_emulator_make_memref_negate_lwe_ciphertext_u64_process(void *dfg,
void *sout);
void stream_emulator_make_memref_keyswitch_lwe_u64_process(
void *dfg, void *sin1, void *sout, uint32_t level, uint32_t base_log,
uint32_t input_lwe_dim, uint32_t output_lwe_dim, uint32_t ksk_index,
uint32_t output_size, void *context);
uint32_t input_lwe_dim, uint32_t output_lwe_dim, uint32_t output_size,
uint32_t ksk_index, void *context);
void stream_emulator_make_memref_bootstrap_lwe_u64_process(
void *dfg, void *sin1, void *sin2, void *sout, uint32_t input_lwe_dim,
uint32_t poly_size, uint32_t level, uint32_t base_log, uint32_t glwe_dim,
uint32_t bsk_index, uint32_t output_size, void *context);
uint32_t output_size, uint32_t bsk_index, void *context);
void stream_emulator_make_memref_batched_add_lwe_ciphertexts_u64_process(
void *dfg, void *sin1, void *sin2, void *sout);
@@ -57,16 +57,16 @@ void stream_emulator_make_memref_batched_negate_lwe_ciphertext_u64_process(
void *dfg, void *sin1, void *sout);
void stream_emulator_make_memref_batched_keyswitch_lwe_u64_process(
void *dfg, void *sin1, void *sout, uint32_t level, uint32_t base_log,
uint32_t input_lwe_dim, uint32_t output_lwe_dim, uint32_t ksk_index,
uint32_t output_size, void *context);
uint32_t input_lwe_dim, uint32_t output_lwe_dim, uint32_t output_size,
uint32_t ksk_index, void *context);
void stream_emulator_make_memref_batched_bootstrap_lwe_u64_process(
void *dfg, void *sin1, void *sin2, void *sout, uint32_t input_lwe_dim,
uint32_t poly_size, uint32_t level, uint32_t base_log, uint32_t glwe_dim,
uint32_t bsk_index, uint32_t output_size, void *context);
uint32_t output_size, uint32_t bsk_index, void *context);
void stream_emulator_make_memref_batched_mapped_bootstrap_lwe_u64_process(
void *dfg, void *sin1, void *sin2, void *sout, uint32_t input_lwe_dim,
uint32_t poly_size, uint32_t level, uint32_t base_log, uint32_t glwe_dim,
uint32_t bsk_index, uint32_t output_size, void *context);
uint32_t output_size, uint32_t bsk_index, void *context);
void *stream_emulator_make_uint64_stream(const char *name, stream_type stype);
void stream_emulator_put_uint64(void *stream, uint64_t e);

View File

@@ -1,5 +1,6 @@
if(CONCRETELANG_CUDA_SUPPORT)
add_library(ConcretelangRuntime SHARED context.cpp wrappers.cpp DFRuntime.cpp GPUDFG.cpp)
target_link_libraries(ConcretelangRuntime PRIVATE hwloc)
else()
add_library(ConcretelangRuntime SHARED context.cpp wrappers.cpp DFRuntime.cpp StreamEmulator.cpp)
endif()

File diff suppressed because it is too large Load Diff