Files
concrete/src/negation.cuh
2022-10-28 13:59:53 +02:00

47 lines
1.4 KiB
Plaintext

#ifndef CUDA_NEGATE_H
#define CUDA_NEGATE_H
#ifdef __CDT_PARSER__
#undef __CUDA_RUNTIME_H__
#include <cuda_runtime.h>
#include <helper_cuda.h>
#endif
#include "linear_algebra.h"
#include "utils/kernel_dimensions.cuh"
template <typename T>
__global__ void negation(T *output, T *input, uint32_t num_entries) {
int tid = threadIdx.x;
if (tid < num_entries) {
int index = blockIdx.x * blockDim.x + tid;
// Here we take advantage of the wrapping behaviour of uint
output[index] = -input[index];
}
}
template <typename T>
__host__ void host_negation(void *v_stream, uint32_t gpu_index, T *output,
T *input, uint32_t input_lwe_dimension,
uint32_t input_lwe_ciphertext_count) {
cudaSetDevice(gpu_index);
// lwe_size includes the presence of the body
// whereas lwe_dimension is the number of elements in the mask
int lwe_size = input_lwe_dimension + 1;
// Create a 1-dimensional grid of threads
int num_blocks = 0, num_threads = 0;
int num_entries = input_lwe_ciphertext_count * lwe_size;
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
dim3 grid(num_blocks, 1, 1);
dim3 thds(num_threads, 1, 1);
auto stream = static_cast<cudaStream_t *>(v_stream);
negation<<<grid, thds, 0, *stream>>>(output, input, num_entries);
cudaStreamSynchronize(*stream);
}
#endif // CUDA_NEGATE_H