mirror of
https://github.com/zama-ai/concrete.git
synced 2026-04-17 03:00:54 -04:00
- added --compress-input compiler option which forces the use of seeded bootstrap keys and keyswitch keys - replaced the concrete-cpu FHE implementation with tfhe-rs Co-authored-by: Nikita Frolov <nf@mkmks.org>
151 lines
4.3 KiB
C++
151 lines
4.3 KiB
C++
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
|
|
// Exceptions. See
|
|
// https://github.com/zama-ai/concrete-compiler-internal/blob/main/LICENSE.txt
|
|
// for license information.
|
|
|
|
#ifndef CONCRETELANG_RUNTIME_CONTEXT_H
|
|
#define CONCRETELANG_RUNTIME_CONTEXT_H
|
|
|
|
#include "concrete-cpu.h"
|
|
#include "concretelang/Common/Error.h"
|
|
#include "concretelang/Common/Keysets.h"
|
|
#include <assert.h>
|
|
#include <complex>
|
|
#include <map>
|
|
#include <mutex>
|
|
#include <pthread.h>
|
|
#include <vector>
|
|
|
|
using ::concretelang::keysets::ServerKeyset;
|
|
|
|
#ifdef CONCRETELANG_CUDA_SUPPORT
|
|
#include "bootstrap.h"
|
|
#include "device.h"
|
|
#include "keyswitch.h"
|
|
#endif
|
|
|
|
namespace mlir {
|
|
namespace concretelang {
|
|
|
|
typedef struct FFT {
|
|
FFT() = delete;
|
|
FFT(size_t polynomial_size);
|
|
FFT(FFT &other) = delete;
|
|
FFT(FFT &&other);
|
|
~FFT();
|
|
|
|
struct Fft *fft;
|
|
size_t polynomial_size;
|
|
} FFT;
|
|
|
|
typedef struct RuntimeContext {
|
|
|
|
RuntimeContext() = delete;
|
|
RuntimeContext(ServerKeyset serverKeyset);
|
|
~RuntimeContext() {
|
|
#ifdef CONCRETELANG_CUDA_SUPPORT
|
|
for (int i = 0; i < num_devices; ++i) {
|
|
if (bsk_gpu[i] != nullptr)
|
|
cuda_drop(bsk_gpu[i], i);
|
|
if (ksk_gpu[i] != nullptr)
|
|
cuda_drop(ksk_gpu[i], i);
|
|
}
|
|
#endif
|
|
};
|
|
|
|
const uint64_t *keyswitch_key_buffer(size_t keyId) {
|
|
return serverKeyset.lweKeyswitchKeys[keyId].getBuffer().data();
|
|
}
|
|
|
|
const std::complex<double> *fourier_bootstrap_key_buffer(size_t keyId) {
|
|
return fourier_bootstrap_keys[keyId]->data();
|
|
}
|
|
|
|
const uint64_t *fp_keyswitch_key_buffer(size_t keyId) {
|
|
return serverKeyset.packingKeyswitchKeys[keyId].getRawPtr();
|
|
}
|
|
|
|
const struct Fft *fft(size_t keyId) { return ffts[keyId].fft; }
|
|
|
|
const ServerKeyset getKeys() const { return serverKeyset; }
|
|
|
|
private:
|
|
ServerKeyset serverKeyset;
|
|
std::vector<std::shared_ptr<std::vector<std::complex<double>>>>
|
|
fourier_bootstrap_keys;
|
|
std::vector<FFT> ffts;
|
|
|
|
#ifdef CONCRETELANG_CUDA_SUPPORT
|
|
public:
|
|
void *get_bsk_gpu(uint32_t input_lwe_dim, uint32_t poly_size, uint32_t level,
|
|
uint32_t glwe_dim, uint32_t gpu_idx, void *stream) {
|
|
|
|
if (bsk_gpu[gpu_idx] != nullptr) {
|
|
return bsk_gpu[gpu_idx];
|
|
}
|
|
const std::lock_guard<std::mutex> guard(*bsk_gpu_mutex[gpu_idx]);
|
|
|
|
if (bsk_gpu[gpu_idx] != nullptr) {
|
|
return bsk_gpu[gpu_idx];
|
|
}
|
|
|
|
auto bsk = serverKeyset.lweBootstrapKeys[0];
|
|
|
|
size_t bsk_buffer_len = bsk.getBuffer().size();
|
|
size_t bsk_gpu_buffer_size = bsk_buffer_len * sizeof(double);
|
|
|
|
void *bsk_gpu_tmp =
|
|
cuda_malloc_async(bsk_gpu_buffer_size, (cudaStream_t *)stream, gpu_idx);
|
|
cuda_convert_lwe_bootstrap_key_64(
|
|
bsk_gpu_tmp, const_cast<uint64_t *>(bsk.getBuffer().data()),
|
|
(cudaStream_t *)stream, gpu_idx, input_lwe_dim, glwe_dim, level,
|
|
poly_size);
|
|
// Synchronization here is not optional as it works with mutex to
|
|
// prevent other GPU streams from reading partially copied keys.
|
|
cudaStreamSynchronize(*(cudaStream_t *)stream);
|
|
bsk_gpu[gpu_idx] = bsk_gpu_tmp;
|
|
return bsk_gpu[gpu_idx];
|
|
}
|
|
|
|
void *get_ksk_gpu(uint32_t level, uint32_t input_lwe_dim,
|
|
uint32_t output_lwe_dim, uint32_t gpu_idx, void *stream) {
|
|
|
|
if (ksk_gpu[gpu_idx] != nullptr) {
|
|
return ksk_gpu[gpu_idx];
|
|
}
|
|
|
|
const std::lock_guard<std::mutex> guard(*ksk_gpu_mutex[gpu_idx]);
|
|
if (ksk_gpu[gpu_idx] != nullptr) {
|
|
return ksk_gpu[gpu_idx];
|
|
}
|
|
auto ksk = serverKeyset.lweKeyswitchKeys[0];
|
|
|
|
size_t ksk_buffer_size = sizeof(uint64_t) * ksk.getBuffer().size();
|
|
|
|
void *ksk_gpu_tmp =
|
|
cuda_malloc_async(ksk_buffer_size, (cudaStream_t *)stream, gpu_idx);
|
|
|
|
cuda_memcpy_async_to_gpu(ksk_gpu_tmp,
|
|
const_cast<uint64_t *>(ksk.getBuffer().data()),
|
|
ksk_buffer_size, (cudaStream_t *)stream, gpu_idx);
|
|
// Synchronization here is not optional as it works with mutex to
|
|
// prevent other GPU streams from reading partially copied keys.
|
|
cudaStreamSynchronize(*(cudaStream_t *)stream);
|
|
ksk_gpu[gpu_idx] = ksk_gpu_tmp;
|
|
return ksk_gpu[gpu_idx];
|
|
}
|
|
|
|
private:
|
|
std::vector<std::unique_ptr<std::mutex>> bsk_gpu_mutex;
|
|
std::vector<void *> bsk_gpu;
|
|
std::vector<std::unique_ptr<std::mutex>> ksk_gpu_mutex;
|
|
std::vector<void *> ksk_gpu;
|
|
int num_devices;
|
|
#endif
|
|
} RuntimeContext;
|
|
|
|
} // namespace concretelang
|
|
} // namespace mlir
|
|
|
|
#endif
|