mirror of
https://github.com/zama-ai/concrete.git
synced 2026-02-09 03:55:04 -05:00
feat(cuda): implement negation of an LWE ciphertext vector
This commit is contained in:
20
include/negation.h
Normal file
20
include/negation.h
Normal file
@@ -0,0 +1,20 @@
|
||||
#ifndef CUDA_NEGATE_H_
|
||||
#define CUDA_NEGATE_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
extern "C" {
|
||||
|
||||
void cuda_negate_lwe_ciphertext_vector_32(void *v_stream, uint32_t gpu_index,
|
||||
void *lwe_array_out,
|
||||
void *lwe_array_in,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_negate_lwe_ciphertext_vector_64(void *v_stream, uint32_t gpu_index,
|
||||
void *lwe_array_out,
|
||||
void *lwe_array_in,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count);
|
||||
}
|
||||
|
||||
#endif // CUDA_NEGATE_H_
|
||||
@@ -1,5 +1,6 @@
|
||||
set(SOURCES ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/bootstrap.h
|
||||
${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/keyswitch.h)
|
||||
set(SOURCES ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/bootstrap.h
|
||||
${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/keyswitch.h
|
||||
${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/negation.h)
|
||||
file(GLOB SOURCES
|
||||
"*.cu"
|
||||
"*.h"
|
||||
|
||||
23
src/negation.cu
Normal file
23
src/negation.cu
Normal file
@@ -0,0 +1,23 @@
|
||||
#include "negation.cuh"
|
||||
#include "negation.h"
|
||||
|
||||
void cuda_negate_lwe_ciphertext_vector_32(void *v_stream, uint32_t gpu_index,
|
||||
void *lwe_array_out,
|
||||
void *lwe_array_in,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
host_negation(v_stream, gpu_index, static_cast<uint32_t *>(lwe_array_out),
|
||||
static_cast<uint32_t *>(lwe_array_in), input_lwe_dimension,
|
||||
input_lwe_ciphertext_count);
|
||||
}
|
||||
void cuda_negate_lwe_ciphertext_vector_64(void *v_stream, uint32_t gpu_index,
|
||||
void *lwe_array_out,
|
||||
void *lwe_array_in,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
host_negation(v_stream, gpu_index, static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<uint64_t *>(lwe_array_in), input_lwe_dimension,
|
||||
input_lwe_ciphertext_count);
|
||||
}
|
||||
46
src/negation.cuh
Normal file
46
src/negation.cuh
Normal file
@@ -0,0 +1,46 @@
|
||||
#ifdef __CDT_PARSER__
|
||||
#undef __CUDA_RUNTIME_H__
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#endif
|
||||
|
||||
#ifndef CUDA_NEGATE
|
||||
#define CUDA_NEGATE
|
||||
|
||||
#include "utils/kernel_dimensions.cuh"
|
||||
#include <thread>
|
||||
|
||||
template <typename T>
|
||||
__global__ void negation(T *output, T *input, uint32_t num_entries) {
|
||||
|
||||
int tid = threadIdx.x;
|
||||
if (tid < num_entries) {
|
||||
int index = blockIdx.x * blockDim.x + tid;
|
||||
// Here we take advantage of the wrapping behaviour of uint
|
||||
output[index] = -input[index];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void host_negation(void *v_stream, uint32_t gpu_index, T *output,
|
||||
T *input, uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
cudaSetDevice(gpu_index);
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
int lwe_size = input_lwe_dimension + 1;
|
||||
// Create a 1-dimensional grid of threads
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
int num_entries = input_lwe_ciphertext_count * lwe_size;
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
dim3 grid(num_blocks, 1, 1);
|
||||
dim3 thds(num_threads, 1, 1);
|
||||
|
||||
auto stream = static_cast<cudaStream_t *>(v_stream);
|
||||
negation<<<grid, thds, 0, *stream>>>(output, input, num_entries);
|
||||
|
||||
cudaStreamSynchronize(*stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_NEGATE
|
||||
@@ -1,4 +1,6 @@
|
||||
int nextPow2(int x) {
|
||||
#include "math_functions.h"
|
||||
|
||||
__host__ __device__ int nextPow2(int x) {
|
||||
--x;
|
||||
x |= x >> 1;
|
||||
x |= x >> 2;
|
||||
@@ -8,8 +10,10 @@ int nextPow2(int x) {
|
||||
return ++x;
|
||||
}
|
||||
|
||||
void getNumBlocksAndThreads(const int n, const int maxBlockSize, int &blocks,
|
||||
int &threads) {
|
||||
threads = (n < maxBlockSize * 2) ? nextPow2((n + 1) / 2) : maxBlockSize;
|
||||
__host__ __device__ void getNumBlocksAndThreads(const int n,
|
||||
const int maxBlockSize,
|
||||
int &blocks, int &threads) {
|
||||
threads =
|
||||
(n < maxBlockSize * 2) ? max(128, nextPow2((n + 1) / 2)) : maxBlockSize;
|
||||
blocks = (n + threads - 1) / threads;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user