mirror of
https://github.com/zama-ai/concrete.git
synced 2026-02-09 03:55:04 -05:00
47 lines
1.4 KiB
Plaintext
47 lines
1.4 KiB
Plaintext
#ifndef CUDA_NEGATE_H
|
|
#define CUDA_NEGATE_H
|
|
|
|
#ifdef __CDT_PARSER__
|
|
#undef __CUDA_RUNTIME_H__
|
|
#include <cuda_runtime.h>
|
|
#include <helper_cuda.h>
|
|
#endif
|
|
|
|
#include "linear_algebra.h"
|
|
#include "utils/kernel_dimensions.cuh"
|
|
|
|
template <typename T>
|
|
__global__ void negation(T *output, T *input, uint32_t num_entries) {
|
|
|
|
int tid = threadIdx.x;
|
|
if (tid < num_entries) {
|
|
int index = blockIdx.x * blockDim.x + tid;
|
|
// Here we take advantage of the wrapping behaviour of uint
|
|
output[index] = -input[index];
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
__host__ void host_negation(void *v_stream, uint32_t gpu_index, T *output,
|
|
T *input, uint32_t input_lwe_dimension,
|
|
uint32_t input_lwe_ciphertext_count) {
|
|
|
|
cudaSetDevice(gpu_index);
|
|
// lwe_size includes the presence of the body
|
|
// whereas lwe_dimension is the number of elements in the mask
|
|
int lwe_size = input_lwe_dimension + 1;
|
|
// Create a 1-dimensional grid of threads
|
|
int num_blocks = 0, num_threads = 0;
|
|
int num_entries = input_lwe_ciphertext_count * lwe_size;
|
|
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
|
dim3 grid(num_blocks, 1, 1);
|
|
dim3 thds(num_threads, 1, 1);
|
|
|
|
auto stream = static_cast<cudaStream_t *>(v_stream);
|
|
negation<<<grid, thds, 0, *stream>>>(output, input, num_entries);
|
|
|
|
cudaStreamSynchronize(*stream);
|
|
}
|
|
|
|
#endif // CUDA_NEGATE_H
|