mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-10 07:08:03 -05:00
feat(gpu): implement fhe rand on gpu
This commit is contained in:
@@ -5,45 +5,50 @@
|
||||
|
||||
extern "C" {
|
||||
|
||||
void cuda_negate_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out,
|
||||
void const *lwe_array_in,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_negate_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out,
|
||||
void const *lwe_array_in,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_add_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out,
|
||||
void const *lwe_array_in_1,
|
||||
void const *lwe_array_in_2,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_add_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out,
|
||||
void const *lwe_array_in_1,
|
||||
void const *lwe_array_in_2,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_negate_lwe_ciphertext_vector_32(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_negate_lwe_ciphertext_vector_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_add_lwe_ciphertext_vector_32(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in_1, void const *lwe_array_in_2,
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_add_lwe_ciphertext_vector_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in_1, void const *lwe_array_in_2,
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count);
|
||||
|
||||
void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, void const *plaintext_array_in,
|
||||
uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_add_lwe_ciphertext_vector_plaintext_vector_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, void const *plaintext_array_in,
|
||||
uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_mult_lwe_ciphertext_vector_cleartext_vector_32(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, void const *cleartext_array_in,
|
||||
uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_mult_lwe_ciphertext_vector_cleartext_vector_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, void const *cleartext_array_in,
|
||||
uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_add_lwe_ciphertext_vector_plaintext_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, const uint64_t plaintext_in,
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count);
|
||||
}
|
||||
|
||||
#endif // CUDA_LINALG_H_
|
||||
|
||||
@@ -4,12 +4,11 @@
|
||||
* Perform the addition of two u32 input LWE ciphertext vectors.
|
||||
* See the equivalent operation on u64 ciphertexts for more details.
|
||||
*/
|
||||
void cuda_add_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out,
|
||||
void const *lwe_array_in_1,
|
||||
void const *lwe_array_in_2,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count) {
|
||||
void cuda_add_lwe_ciphertext_vector_32(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in_1, void const *lwe_array_in_2,
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
host_addition<uint32_t>(static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint32_t *>(lwe_array_out),
|
||||
@@ -44,12 +43,11 @@ void cuda_add_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
|
||||
* vectors are left unchanged. This function is a wrapper to a device function
|
||||
* that performs the operation on the GPU.
|
||||
*/
|
||||
void cuda_add_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out,
|
||||
void const *lwe_array_in_1,
|
||||
void const *lwe_array_in_2,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count) {
|
||||
void cuda_add_lwe_ciphertext_vector_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in_1, void const *lwe_array_in_2,
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
host_addition<uint64_t>(static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
@@ -65,7 +63,8 @@ void cuda_add_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
|
||||
void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, void const *plaintext_array_in,
|
||||
uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count) {
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
host_addition_plaintext<uint32_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
@@ -105,7 +104,8 @@ void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
|
||||
void cuda_add_lwe_ciphertext_vector_plaintext_vector_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, void const *plaintext_array_in,
|
||||
uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count) {
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
host_addition_plaintext<uint64_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
@@ -114,3 +114,41 @@ void cuda_add_lwe_ciphertext_vector_plaintext_vector_64(
|
||||
static_cast<const uint64_t *>(plaintext_array_in), input_lwe_dimension,
|
||||
input_lwe_ciphertext_count);
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform the addition of a u64 input LWE ciphertext vector with a u64 input
|
||||
* plaintext scalar.
|
||||
* - `stream` is a void pointer to the Cuda stream to be used in the kernel
|
||||
* launch
|
||||
* - `gpu_index` is the index of the GPU to be used in the kernel launch
|
||||
* - `lwe_array_out` is an array of size
|
||||
* `(input_lwe_dimension + 1) * input_lwe_ciphertext_count` that should have
|
||||
* been allocated on the GPU before calling this function, and that will hold
|
||||
* the result of the computation.
|
||||
* - `lwe_array_in` is the LWE ciphertext vector used as input, it should have
|
||||
* been allocated and initialized before calling this function. It has the same
|
||||
* size as the output array.
|
||||
* - `plaintext_in` is the plaintext used as input.
|
||||
* - `input_lwe_dimension` is the number of mask elements in the input and
|
||||
* output LWE ciphertext vectors
|
||||
* - `input_lwe_ciphertext_count` is the number of ciphertexts contained in the
|
||||
* input LWE ciphertext vector, as well as in the output.
|
||||
*
|
||||
* The same input plaintext is added to the body of the
|
||||
* LWE ciphertexts in the LWE ciphertext vector. The result of the
|
||||
* operation is stored in the output LWE ciphertext vector. The two input
|
||||
* vectors are unchanged. This function is a wrapper to a device function that
|
||||
* performs the operation on the GPU.
|
||||
*/
|
||||
void cuda_add_lwe_ciphertext_vector_plaintext_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, const uint64_t plaintext_in,
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
host_addition_plaintext_scalar<uint64_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_in), plaintext_in,
|
||||
input_lwe_dimension, input_lwe_ciphertext_count);
|
||||
}
|
||||
|
||||
@@ -13,9 +13,10 @@
|
||||
#include <stdio.h>
|
||||
|
||||
template <typename T>
|
||||
__global__ void
|
||||
plaintext_addition(T *output, T const *lwe_input, T const *plaintext_input,
|
||||
uint32_t input_lwe_dimension, uint32_t num_entries) {
|
||||
__global__ void plaintext_addition(T *output, T const *lwe_input,
|
||||
T const *plaintext_input,
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t num_entries) {
|
||||
|
||||
int tid = threadIdx.x;
|
||||
int plaintext_index = blockIdx.x * blockDim.x + tid;
|
||||
@@ -28,10 +29,26 @@ plaintext_addition(T *output, T const *lwe_input, T const *plaintext_input,
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void
|
||||
host_addition_plaintext(cudaStream_t stream, uint32_t gpu_index, T *output,
|
||||
T const *lwe_input, T const *plaintext_input,
|
||||
uint32_t lwe_dimension, uint32_t lwe_ciphertext_count) {
|
||||
__global__ void plaintext_addition_scalar(T *output, T const *lwe_input,
|
||||
const T plaintext_input,
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t num_entries) {
|
||||
|
||||
int tid = threadIdx.x;
|
||||
int lwe_index = blockIdx.x * blockDim.x + tid;
|
||||
if (lwe_index < num_entries) {
|
||||
int index = lwe_index * (input_lwe_dimension + 1) + input_lwe_dimension;
|
||||
// Here we take advantage of the wrapping behaviour of uint
|
||||
output[index] = lwe_input[index] + plaintext_input;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void host_addition_plaintext(cudaStream_t stream, uint32_t gpu_index,
|
||||
T *output, T const *lwe_input,
|
||||
T const *plaintext_input,
|
||||
const uint32_t lwe_dimension,
|
||||
const uint32_t lwe_ciphertext_count) {
|
||||
|
||||
cudaSetDevice(gpu_index);
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
@@ -48,6 +65,27 @@ host_addition_plaintext(cudaStream_t stream, uint32_t gpu_index, T *output,
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void host_addition_plaintext_scalar(
|
||||
cudaStream_t stream, uint32_t gpu_index, T *output, T const *lwe_input,
|
||||
const T plaintext_input, const uint32_t lwe_dimension,
|
||||
const uint32_t lwe_ciphertext_count) {
|
||||
|
||||
cudaSetDevice(gpu_index);
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
int num_entries = lwe_ciphertext_count;
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
dim3 grid(num_blocks, 1, 1);
|
||||
dim3 thds(num_threads, 1, 1);
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
output, lwe_input, (lwe_dimension + 1) * lwe_ciphertext_count * sizeof(T),
|
||||
stream, gpu_index);
|
||||
plaintext_addition_scalar<T><<<grid, thds, 0, stream>>>(
|
||||
output, lwe_input, plaintext_input, lwe_dimension, num_entries);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void addition(T *output, T const *input_1, T const *input_2,
|
||||
uint32_t num_entries) {
|
||||
@@ -64,8 +102,8 @@ __global__ void addition(T *output, T const *input_1, T const *input_2,
|
||||
template <typename T>
|
||||
__host__ void host_addition(cudaStream_t stream, uint32_t gpu_index, T *output,
|
||||
T const *input_1, T const *input_2,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count) {
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
cudaSetDevice(gpu_index);
|
||||
// lwe_size includes the presence of the body
|
||||
|
||||
@@ -7,7 +7,8 @@
|
||||
void cuda_mult_lwe_ciphertext_vector_cleartext_vector_32(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, void const *cleartext_array_in,
|
||||
uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count) {
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
host_cleartext_vec_multiplication<uint32_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
@@ -47,7 +48,8 @@ void cuda_mult_lwe_ciphertext_vector_cleartext_vector_32(
|
||||
void cuda_mult_lwe_ciphertext_vector_cleartext_vector_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, void const *cleartext_array_in,
|
||||
uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count) {
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
host_cleartext_vec_multiplication<uint64_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
|
||||
@@ -16,8 +16,8 @@
|
||||
template <typename T>
|
||||
__global__ void cleartext_vec_multiplication(T *output, T const *lwe_input,
|
||||
T const *cleartext_input,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t num_entries) {
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t num_entries) {
|
||||
|
||||
int tid = threadIdx.x;
|
||||
int index = blockIdx.x * blockDim.x + tid;
|
||||
@@ -31,8 +31,8 @@ __global__ void cleartext_vec_multiplication(T *output, T const *lwe_input,
|
||||
template <typename T>
|
||||
__host__ void host_cleartext_vec_multiplication(
|
||||
cudaStream_t stream, uint32_t gpu_index, T *output, T const *lwe_input,
|
||||
T const *cleartext_input, uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count) {
|
||||
T const *cleartext_input, const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
cudaSetDevice(gpu_index);
|
||||
// lwe_size includes the presence of the body
|
||||
|
||||
@@ -4,11 +4,10 @@
|
||||
* Perform the negation of a u32 input LWE ciphertext vector.
|
||||
* See the equivalent operation on u64 ciphertexts for more details.
|
||||
*/
|
||||
void cuda_negate_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out,
|
||||
void const *lwe_array_in,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count) {
|
||||
void cuda_negate_lwe_ciphertext_vector_32(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
host_negation<uint32_t>(static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint32_t *>(lwe_array_out),
|
||||
@@ -38,11 +37,10 @@ void cuda_negate_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
|
||||
* LWE ciphertext vector is left unchanged. This function is a wrapper to a
|
||||
* device function that performs the operation on the GPU.
|
||||
*/
|
||||
void cuda_negate_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out,
|
||||
void const *lwe_array_in,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count) {
|
||||
void cuda_negate_lwe_ciphertext_vector_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
host_negation<uint64_t>(static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
|
||||
@@ -23,8 +23,8 @@ __global__ void negation(T *output, T const *input, uint32_t num_entries) {
|
||||
|
||||
template <typename T>
|
||||
__host__ void host_negation(cudaStream_t stream, uint32_t gpu_index, T *output,
|
||||
T const *input, uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count) {
|
||||
T const *input, const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
cudaSetDevice(gpu_index);
|
||||
// lwe_size includes the presence of the body
|
||||
|
||||
@@ -1345,6 +1345,17 @@ extern "C" {
|
||||
input_lwe_ciphertext_count: u32,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn cuda_add_lwe_ciphertext_vector_plaintext_64(
|
||||
stream: *mut ffi::c_void,
|
||||
gpu_index: u32,
|
||||
lwe_array_out: *mut ffi::c_void,
|
||||
lwe_array_in: *const ffi::c_void,
|
||||
plaintext_in: u64,
|
||||
input_lwe_dimension: u32,
|
||||
input_lwe_ciphertext_count: u32,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn cuda_fourier_polynomial_mul(
|
||||
stream: *mut ffi::c_void,
|
||||
|
||||
@@ -1307,11 +1307,12 @@ define_server_key_bench_default_fn!(
|
||||
#[cfg(feature = "gpu")]
|
||||
mod cuda {
|
||||
use super::*;
|
||||
use criterion::criterion_group;
|
||||
use criterion::{black_box, criterion_group};
|
||||
use tfhe::core_crypto::gpu::CudaStreams;
|
||||
use tfhe::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock;
|
||||
use tfhe::integer::gpu::ciphertext::CudaUnsignedRadixCiphertext;
|
||||
use tfhe::integer::gpu::server_key::CudaServerKey;
|
||||
use tfhe_csprng::seeders::Seed;
|
||||
|
||||
fn bench_cuda_server_key_unary_function_clean_inputs<F>(
|
||||
c: &mut Criterion,
|
||||
@@ -1731,6 +1732,84 @@ mod cuda {
|
||||
bench_group.finish()
|
||||
}
|
||||
|
||||
pub fn cuda_unsigned_oprf(c: &mut Criterion) {
|
||||
let bench_name = "integer::cuda::unsigned_oprf";
|
||||
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
bench_group
|
||||
.sample_size(15)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
|
||||
let streams = CudaStreams::new_multi_gpu();
|
||||
|
||||
for (param, num_block, bit_size) in ParamsAndNumBlocksIter::default() {
|
||||
let param_name = param.name();
|
||||
|
||||
let bench_id;
|
||||
|
||||
match BENCH_TYPE.get().unwrap() {
|
||||
BenchmarkType::Latency => {
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, _cpu_sks) =
|
||||
KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let gpu_sks = CudaServerKey::new(&cks, &streams);
|
||||
|
||||
b.iter(|| {
|
||||
_ = black_box(
|
||||
gpu_sks
|
||||
.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
&streams,
|
||||
),
|
||||
);
|
||||
})
|
||||
});
|
||||
}
|
||||
BenchmarkType::Throughput => {
|
||||
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
let elements = throughput_num_threads(num_block);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, _cpu_sks) =
|
||||
KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let gpu_sks = CudaServerKey::new(&cks, &streams);
|
||||
|
||||
b.iter(|| {
|
||||
(0..elements).into_par_iter().for_each(|i| {
|
||||
let selected_gpu =
|
||||
streams.gpu_indexes[i as usize % streams.gpu_indexes.len()];
|
||||
let stream = CudaStreams::new_single_gpu(selected_gpu);
|
||||
gpu_sks
|
||||
.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
&stream,
|
||||
);
|
||||
})
|
||||
})
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
param,
|
||||
param.name(),
|
||||
"oprf",
|
||||
&OperatorType::Atomic,
|
||||
bit_size as u32,
|
||||
vec![param.message_modulus().0.ilog2(); num_block],
|
||||
);
|
||||
}
|
||||
|
||||
bench_group.finish()
|
||||
}
|
||||
|
||||
macro_rules! define_cuda_server_key_bench_clean_input_unary_fn (
|
||||
(method_name: $server_key_method:ident, display_name:$name:ident) => {
|
||||
::paste::paste!{
|
||||
@@ -2376,6 +2455,7 @@ mod cuda {
|
||||
cuda_trailing_zeros,
|
||||
cuda_trailing_ones,
|
||||
cuda_ilog2,
|
||||
cuda_unsigned_oprf,
|
||||
);
|
||||
|
||||
criterion_group!(
|
||||
@@ -2395,6 +2475,7 @@ mod cuda {
|
||||
cuda_scalar_mul,
|
||||
cuda_scalar_div,
|
||||
cuda_scalar_rem,
|
||||
cuda_unsigned_oprf,
|
||||
);
|
||||
|
||||
criterion_group!(
|
||||
|
||||
@@ -481,6 +481,31 @@ pub unsafe fn add_lwe_ciphertext_vector_plaintext_vector_async<T: UnsignedIntege
|
||||
);
|
||||
}
|
||||
|
||||
/// Addition of a vector of LWE ciphertexts with a plaintext scalar
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// [CudaStreams::synchronize] __must__ be called as soon as synchronization is
|
||||
/// required
|
||||
pub unsafe fn add_lwe_ciphertext_vector_plaintext_scalar_async<T: UnsignedInteger>(
|
||||
streams: &CudaStreams,
|
||||
lwe_array_out: &mut CudaVec<T>,
|
||||
lwe_array_in: &CudaVec<T>,
|
||||
plaintext_in: u64,
|
||||
lwe_dimension: LweDimension,
|
||||
num_samples: u32,
|
||||
) {
|
||||
cuda_add_lwe_ciphertext_vector_plaintext_64(
|
||||
streams.ptr[0],
|
||||
streams.gpu_indexes[0].0,
|
||||
lwe_array_out.as_mut_c_ptr(0),
|
||||
lwe_array_in.as_c_ptr(0),
|
||||
plaintext_in,
|
||||
lwe_dimension.0 as u32,
|
||||
num_samples,
|
||||
);
|
||||
}
|
||||
|
||||
/// Assigned addition of a vector of LWE ciphertexts with a vector of plaintexts
|
||||
///
|
||||
/// # Safety
|
||||
|
||||
@@ -1,6 +1,12 @@
|
||||
use super::FheBool;
|
||||
use super::{FheBool, InnerBoolean};
|
||||
use crate::high_level_api::global_state;
|
||||
#[cfg(feature = "gpu")]
|
||||
use crate::high_level_api::global_state::with_thread_local_cuda_streams;
|
||||
use crate::high_level_api::keys::InternalServerKey;
|
||||
#[cfg(feature = "gpu")]
|
||||
use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock;
|
||||
#[cfg(feature = "gpu")]
|
||||
use crate::integer::gpu::ciphertext::CudaUnsignedRadixCiphertext;
|
||||
use crate::integer::BooleanBlock;
|
||||
use tfhe_csprng::seeders::Seed;
|
||||
|
||||
@@ -24,16 +30,28 @@ impl FheBool {
|
||||
/// let dec_result: bool = ct_res.decrypt(&client_key);
|
||||
/// ```
|
||||
pub fn generate_oblivious_pseudo_random(seed: Seed) -> Self {
|
||||
global_state::with_internal_keys(|key| match key {
|
||||
let (ciphertext, tag) = global_state::with_internal_keys(|key| match key {
|
||||
InternalServerKey::Cpu(key) => {
|
||||
let ct = key.pbs_key().key.generate_oblivious_pseudo_random(seed, 1);
|
||||
|
||||
Self::new(BooleanBlock(ct), key.tag.clone())
|
||||
(
|
||||
InnerBoolean::Cpu(BooleanBlock::new_unchecked(ct)),
|
||||
key.tag.clone(),
|
||||
)
|
||||
}
|
||||
#[cfg(feature = "gpu")]
|
||||
InternalServerKey::Cuda(_) => {
|
||||
todo!("Cuda devices do not yet support oblivious pseudo random generation")
|
||||
}
|
||||
})
|
||||
InternalServerKey::Cuda(cuda_key) => with_thread_local_cuda_streams(|streams| {
|
||||
let d_ct: CudaUnsignedRadixCiphertext = cuda_key
|
||||
.key
|
||||
.key
|
||||
.generate_oblivious_pseudo_random(seed, 1, streams);
|
||||
(
|
||||
InnerBoolean::Cuda(CudaBooleanBlock::from_cuda_radix_ciphertext(
|
||||
d_ct.ciphertext,
|
||||
)),
|
||||
cuda_key.tag.clone(),
|
||||
)
|
||||
}),
|
||||
});
|
||||
Self::new(ciphertext, tag)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
use super::{FheIntId, FheUintId};
|
||||
use super::{FheIntId, FheUint, FheUintId};
|
||||
use crate::high_level_api::global_state;
|
||||
#[cfg(feature = "gpu")]
|
||||
use crate::high_level_api::global_state::with_thread_local_cuda_streams;
|
||||
use crate::high_level_api::keys::InternalServerKey;
|
||||
use crate::{FheInt, FheUint, Seed};
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
use crate::integer::gpu::ciphertext::{CudaSignedRadixCiphertext, CudaUnsignedRadixCiphertext};
|
||||
use crate::{FheInt, Seed};
|
||||
impl<Id: FheUintId> FheUint<Id> {
|
||||
/// Generates an encrypted unsigned integer
|
||||
/// taken uniformly in its full range using the given seed.
|
||||
@@ -35,9 +38,18 @@ impl<Id: FheUintId> FheUint<Id> {
|
||||
Self::new(ct, key.tag.clone())
|
||||
}
|
||||
#[cfg(feature = "gpu")]
|
||||
InternalServerKey::Cuda(_) => {
|
||||
todo!("Cuda devices do not yet support oblivious pseudo random generation")
|
||||
}
|
||||
InternalServerKey::Cuda(cuda_key) => with_thread_local_cuda_streams(|streams| {
|
||||
let d_ct: CudaUnsignedRadixCiphertext = cuda_key
|
||||
.key
|
||||
.key
|
||||
.par_generate_oblivious_pseudo_random_unsigned_integer(
|
||||
seed,
|
||||
Id::num_blocks(cuda_key.message_modulus()) as u64,
|
||||
streams,
|
||||
);
|
||||
|
||||
Self::new(d_ct, cuda_key.tag.clone())
|
||||
}),
|
||||
})
|
||||
}
|
||||
/// Generates an encrypted `num_block` blocks unsigned integer
|
||||
@@ -75,9 +87,18 @@ impl<Id: FheUintId> FheUint<Id> {
|
||||
Self::new(ct, key.tag.clone())
|
||||
}
|
||||
#[cfg(feature = "gpu")]
|
||||
InternalServerKey::Cuda(_) => {
|
||||
todo!("Cuda devices do not yet support oblivious pseudo random generation")
|
||||
}
|
||||
InternalServerKey::Cuda(cuda_key) => with_thread_local_cuda_streams(|streams| {
|
||||
let d_ct: CudaUnsignedRadixCiphertext = cuda_key
|
||||
.key
|
||||
.key
|
||||
.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
seed,
|
||||
random_bits_count,
|
||||
Id::num_blocks(cuda_key.message_modulus()) as u64,
|
||||
streams,
|
||||
);
|
||||
Self::new(d_ct, cuda_key.tag.clone())
|
||||
}),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -115,9 +136,18 @@ impl<Id: FheIntId> FheInt<Id> {
|
||||
Self::new(ct, key.tag.clone())
|
||||
}
|
||||
#[cfg(feature = "gpu")]
|
||||
InternalServerKey::Cuda(_) => {
|
||||
todo!("Cuda devices do not yet support oblivious pseudo random generation")
|
||||
}
|
||||
InternalServerKey::Cuda(cuda_key) => with_thread_local_cuda_streams(|streams| {
|
||||
let d_ct: CudaSignedRadixCiphertext = cuda_key
|
||||
.key
|
||||
.key
|
||||
.par_generate_oblivious_pseudo_random_signed_integer(
|
||||
seed,
|
||||
Id::num_blocks(cuda_key.message_modulus()) as u64,
|
||||
streams,
|
||||
);
|
||||
|
||||
Self::new(d_ct, cuda_key.tag.clone())
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -157,9 +187,18 @@ impl<Id: FheIntId> FheInt<Id> {
|
||||
Self::new(ct, key.tag.clone())
|
||||
}
|
||||
#[cfg(feature = "gpu")]
|
||||
InternalServerKey::Cuda(_) => {
|
||||
todo!("Cuda devices do not yet support oblivious pseudo random generation")
|
||||
}
|
||||
InternalServerKey::Cuda(cuda_key) => with_thread_local_cuda_streams(|streams| {
|
||||
let d_ct: CudaSignedRadixCiphertext = cuda_key
|
||||
.key
|
||||
.key
|
||||
.par_generate_oblivious_pseudo_random_signed_integer_bounded(
|
||||
seed,
|
||||
random_bits_count,
|
||||
Id::num_blocks(cuda_key.message_modulus()) as u64,
|
||||
streams,
|
||||
);
|
||||
Self::new(d_ct, cuda_key.tag.clone())
|
||||
}),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,7 +20,9 @@ use crate::integer::gpu::{
|
||||
};
|
||||
use crate::integer::server_key::radix_parallel::OutputFlag;
|
||||
use crate::shortint::ciphertext::{Degree, NoiseLevel};
|
||||
use crate::shortint::engine::{fill_accumulator, fill_many_lut_accumulator};
|
||||
use crate::shortint::engine::{
|
||||
fill_accumulator, fill_accumulator_no_encoding, fill_many_lut_accumulator,
|
||||
};
|
||||
use crate::shortint::server_key::{
|
||||
BivariateLookupTableOwned, LookupTableOwned, ManyLookupTableOwned,
|
||||
};
|
||||
@@ -36,6 +38,7 @@ mod even_odd;
|
||||
mod ilog2;
|
||||
mod mul;
|
||||
mod neg;
|
||||
mod oprf;
|
||||
mod rotate;
|
||||
mod scalar_add;
|
||||
mod scalar_bitwise_op;
|
||||
@@ -805,6 +808,29 @@ impl CudaServerKey {
|
||||
degree: Degree::new(max_value),
|
||||
}
|
||||
}
|
||||
pub(crate) fn generate_lookup_table_no_encode<F>(&self, f: F) -> LookupTableOwned
|
||||
where
|
||||
F: Fn(u64) -> u64,
|
||||
{
|
||||
let (glwe_size, polynomial_size) = match &self.bootstrapping_key {
|
||||
CudaBootstrappingKey::Classic(d_bsk) => {
|
||||
(d_bsk.glwe_dimension.to_glwe_size(), d_bsk.polynomial_size)
|
||||
}
|
||||
CudaBootstrappingKey::MultiBit(d_bsk) => {
|
||||
(d_bsk.glwe_dimension.to_glwe_size(), d_bsk.polynomial_size)
|
||||
}
|
||||
};
|
||||
let mut acc = GlweCiphertext::new(0, glwe_size, polynomial_size, self.ciphertext_modulus);
|
||||
|
||||
fill_accumulator_no_encoding(&mut acc, polynomial_size, glwe_size, f);
|
||||
|
||||
LookupTableOwned {
|
||||
acc,
|
||||
// We should not rely on the degree in this case
|
||||
// The degree should be set manually on the outputs of PBS by this LUT
|
||||
degree: Degree::new(self.message_modulus.0 * self.carry_modulus.0 * 2),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn generate_many_lookup_table(
|
||||
&self,
|
||||
|
||||
736
tfhe/src/integer/gpu/server_key/radix/oprf.rs
Normal file
736
tfhe/src/integer/gpu/server_key/radix/oprf.rs
Normal file
@@ -0,0 +1,736 @@
|
||||
use crate::core_crypto::gpu::CudaStreams;
|
||||
use crate::integer::gpu::ciphertext::{
|
||||
CudaIntegerRadixCiphertext, CudaSignedRadixCiphertext, CudaUnsignedRadixCiphertext,
|
||||
};
|
||||
use crate::integer::gpu::server_key::{CudaBootstrappingKey, CudaServerKey};
|
||||
|
||||
use crate::core_crypto::commons::generators::DeterministicSeeder;
|
||||
use crate::core_crypto::prelude::DefaultRandomGenerator;
|
||||
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
|
||||
|
||||
use crate::shortint::oprf::create_random_from_seed_modulus_switched;
|
||||
use crate::shortint::server_key::LookupTableOwned;
|
||||
|
||||
pub use tfhe_csprng::seeders::{Seed, Seeder};
|
||||
|
||||
use crate::core_crypto::gpu::{
|
||||
cuda_multi_bit_programmable_bootstrap_lwe_ciphertext,
|
||||
cuda_programmable_bootstrap_lwe_ciphertext,
|
||||
};
|
||||
|
||||
use crate::core_crypto::commons::numeric::Numeric;
|
||||
use crate::core_crypto::gpu::add_lwe_ciphertext_vector_plaintext_scalar_async;
|
||||
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
|
||||
use crate::core_crypto::prelude::CastInto;
|
||||
use crate::integer::gpu::server_key::radix::{CudaLweCiphertextList, LweCiphertextCount};
|
||||
use crate::integer::gpu::CudaVec;
|
||||
use itertools::Itertools;
|
||||
impl CudaServerKey {
|
||||
/// Generates an encrypted `num_block` blocks unsigned integer
|
||||
/// taken uniformly in its full range using the given seed.
|
||||
/// The encryted value is oblivious to the server.
|
||||
/// It can be useful to make server random generation deterministic.
|
||||
///
|
||||
/// ```rust
|
||||
/// use tfhe::core_crypto::gpu::CudaStreams;
|
||||
/// use tfhe::core_crypto::gpu::vec::GpuIndex;
|
||||
/// use tfhe::integer::gpu::gen_keys_gpu;
|
||||
/// use tfhe::shortint::parameters::PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64;
|
||||
/// use tfhe::Seed;
|
||||
///
|
||||
/// let size = 4;
|
||||
/// let gpu_index = 0;
|
||||
/// let streams = CudaStreams::new_single_gpu(GpuIndex(gpu_index));
|
||||
///
|
||||
/// // Generate the client key and the server key:
|
||||
/// let (cks, sks) = gen_keys_gpu(PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64, &streams);
|
||||
///
|
||||
/// let d_ct_res = sks.par_generate_oblivious_pseudo_random_unsigned_integer(Seed(0), size as u64, &streams);
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&streams);
|
||||
/// // Decrypt:
|
||||
/// let dec_result: u64 = cks.decrypt_radix(&ct_res);
|
||||
///
|
||||
/// assert!(dec_result < 1 << (2 * size));
|
||||
/// ```
|
||||
pub fn par_generate_oblivious_pseudo_random_unsigned_integer(
|
||||
&self,
|
||||
seed: Seed,
|
||||
num_blocks: u64,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaUnsignedRadixCiphertext {
|
||||
assert!(self.message_modulus.0.is_power_of_two());
|
||||
let range_log_size = self.message_modulus.0.ilog2() as u64 * num_blocks;
|
||||
|
||||
let random_bits_count = range_log_size;
|
||||
|
||||
assert!(self.message_modulus.0.is_power_of_two());
|
||||
let mut streams_vector = Vec::<CudaStreams>::with_capacity(num_blocks as usize);
|
||||
for _ in 0..num_blocks {
|
||||
streams_vector.push(CudaStreams::new_single_gpu(streams.gpu_indexes[0]));
|
||||
}
|
||||
|
||||
let message_bits_count = self.message_modulus.0.ilog2() as u64;
|
||||
|
||||
let mut deterministic_seeder = DeterministicSeeder::<DefaultRandomGenerator>::new(seed);
|
||||
|
||||
let seeds: Vec<Seed> = (0..num_blocks)
|
||||
.map(|_| deterministic_seeder.seed())
|
||||
.collect();
|
||||
|
||||
let blocks = seeds
|
||||
.into_par_iter()
|
||||
.enumerate()
|
||||
.map(|(i, seed)| {
|
||||
let stream_index = i;
|
||||
let i = i as u64;
|
||||
if i * message_bits_count < random_bits_count {
|
||||
// if we generate 5 bits of noise in n blocks of 2 bits, the third (i=2) block
|
||||
// must have only one bit of random
|
||||
if random_bits_count < (i + 1) * message_bits_count {
|
||||
let top_message_bits_count = random_bits_count - i * message_bits_count;
|
||||
|
||||
assert!(top_message_bits_count <= message_bits_count);
|
||||
let ct: CudaUnsignedRadixCiphertext = self
|
||||
.generate_oblivious_pseudo_random(
|
||||
seed,
|
||||
top_message_bits_count,
|
||||
&streams_vector[stream_index],
|
||||
);
|
||||
ct.ciphertext
|
||||
} else {
|
||||
let ct: CudaUnsignedRadixCiphertext = self
|
||||
.generate_oblivious_pseudo_random(
|
||||
seed,
|
||||
message_bits_count,
|
||||
&streams_vector[stream_index],
|
||||
);
|
||||
ct.ciphertext
|
||||
}
|
||||
} else {
|
||||
let ct: CudaUnsignedRadixCiphertext =
|
||||
self.create_trivial_zero_radix(1, &streams_vector[stream_index]);
|
||||
ct.ciphertext
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
self.convert_radixes_vec_to_single_radix_ciphertext(&blocks, streams)
|
||||
}
|
||||
|
||||
/// Generates an encrypted `num_block` blocks unsigned integer
|
||||
/// taken uniformly in `[0, 2^random_bits_count[` using the given seed.
|
||||
/// The encryted value is oblivious to the server.
|
||||
/// It can be useful to make server random generation deterministic.
|
||||
///
|
||||
/// ```rust
|
||||
/// use tfhe::core_crypto::gpu::CudaStreams;
|
||||
/// use tfhe::core_crypto::gpu::vec::GpuIndex;
|
||||
/// use tfhe::integer::gpu::gen_keys_gpu;
|
||||
/// use tfhe::shortint::parameters::PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64;
|
||||
/// use tfhe::Seed;
|
||||
///
|
||||
/// let gpu_index = 0;
|
||||
/// let streams = CudaStreams::new_single_gpu(GpuIndex(gpu_index));
|
||||
/// let size = 4;
|
||||
///
|
||||
/// // Generate the client key and the server key:
|
||||
/// let (cks, sks) = gen_keys_gpu(PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64, &streams);
|
||||
///
|
||||
/// let random_bits_count = 3;
|
||||
///
|
||||
/// let d_ct_res = sks.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
/// Seed(0),
|
||||
/// random_bits_count,
|
||||
/// size as u64,
|
||||
/// &streams,
|
||||
/// );
|
||||
/// let ct_res = d_ct_res.to_radix_ciphertext(&streams);
|
||||
/// // Decrypt:
|
||||
/// let dec_result: u64 = cks.decrypt_radix(&ct_res);
|
||||
/// assert!(dec_result < (1 << random_bits_count));
|
||||
/// ```
|
||||
pub fn par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
&self,
|
||||
seed: Seed,
|
||||
random_bits_count: u64,
|
||||
num_blocks: u64,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaUnsignedRadixCiphertext {
|
||||
assert!(self.message_modulus.0.is_power_of_two());
|
||||
let range_log_size = self.message_modulus.0.ilog2() as u64 * num_blocks;
|
||||
|
||||
assert!(
|
||||
random_bits_count <= range_log_size,
|
||||
"The range asked for a random value (=[0, 2^{random_bits_count}[) does not fit in the available range [0, 2^{range_log_size}[",
|
||||
);
|
||||
|
||||
assert!(self.message_modulus.0.is_power_of_two());
|
||||
let mut streams_vector = Vec::<CudaStreams>::with_capacity(num_blocks as usize);
|
||||
for _ in 0..num_blocks {
|
||||
streams_vector.push(CudaStreams::new_single_gpu(streams.gpu_indexes[0]));
|
||||
}
|
||||
let message_bits_count = self.message_modulus.0.ilog2() as u64;
|
||||
|
||||
let mut deterministic_seeder = DeterministicSeeder::<DefaultRandomGenerator>::new(seed);
|
||||
|
||||
let seeds: Vec<Seed> = (0..num_blocks)
|
||||
.map(|_| deterministic_seeder.seed())
|
||||
.collect();
|
||||
|
||||
let blocks = seeds
|
||||
.into_par_iter()
|
||||
.enumerate()
|
||||
.map(|(i, seed)| {
|
||||
let stream_index = i;
|
||||
let i = i as u64;
|
||||
|
||||
if i * message_bits_count < random_bits_count {
|
||||
// if we generate 5 bits of noise in n blocks of 2 bits, the third (i=2) block
|
||||
// must have only one bit of random
|
||||
if random_bits_count < (i + 1) * message_bits_count {
|
||||
let top_message_bits_count = random_bits_count - i * message_bits_count;
|
||||
|
||||
assert!(top_message_bits_count <= message_bits_count);
|
||||
|
||||
let ct: CudaUnsignedRadixCiphertext = self
|
||||
.generate_oblivious_pseudo_random(
|
||||
seed,
|
||||
top_message_bits_count,
|
||||
&streams_vector[stream_index],
|
||||
);
|
||||
ct.ciphertext
|
||||
} else {
|
||||
let ct: CudaUnsignedRadixCiphertext = self
|
||||
.generate_oblivious_pseudo_random(
|
||||
seed,
|
||||
message_bits_count,
|
||||
&streams_vector[stream_index],
|
||||
);
|
||||
ct.ciphertext
|
||||
}
|
||||
} else {
|
||||
let ct: CudaUnsignedRadixCiphertext =
|
||||
self.create_trivial_zero_radix(1, &streams_vector[stream_index]);
|
||||
ct.ciphertext
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
self.convert_radixes_vec_to_single_radix_ciphertext(&blocks, streams)
|
||||
}
|
||||
|
||||
/// Generates an encrypted `num_block` blocks signed integer
|
||||
/// taken uniformly in its full range using the given seed.
|
||||
/// The encryted value is oblivious to the server.
|
||||
/// It can be useful to make server random generation deterministic.
|
||||
///
|
||||
/// ```rust
|
||||
/// use tfhe::core_crypto::gpu::CudaStreams;
|
||||
/// use tfhe::core_crypto::gpu::vec::GpuIndex;
|
||||
/// use tfhe::integer::gpu::gen_keys_gpu;
|
||||
/// use tfhe::shortint::parameters::PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64;
|
||||
/// use tfhe::Seed;
|
||||
///
|
||||
/// let gpu_index = 0;
|
||||
/// let streams = CudaStreams::new_single_gpu(GpuIndex(gpu_index));
|
||||
/// let size = 4;
|
||||
///
|
||||
/// // Generate the client key and the server key:
|
||||
/// let (cks, sks) = gen_keys_gpu(PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64, &streams);
|
||||
///
|
||||
/// let d_ct_res = sks.par_generate_oblivious_pseudo_random_signed_integer(Seed(0), size as u64, &streams);
|
||||
/// let ct_res = d_ct_res.to_signed_radix_ciphertext(&streams);
|
||||
///
|
||||
/// // Decrypt:
|
||||
/// let dec_result: i64 = cks.decrypt_signed_radix(&ct_res);
|
||||
/// assert!(dec_result < 1 << (2 * size - 1));
|
||||
/// assert!(dec_result >= -(1 << (2 * size - 1)));
|
||||
/// ```
|
||||
pub fn par_generate_oblivious_pseudo_random_signed_integer(
|
||||
&self,
|
||||
seed: Seed,
|
||||
num_blocks: u64,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaSignedRadixCiphertext {
|
||||
assert!(self.message_modulus.0.is_power_of_two());
|
||||
let message_bits_count = self.message_modulus.0.ilog2() as u64;
|
||||
let mut streams_vector = Vec::<CudaStreams>::with_capacity(num_blocks as usize);
|
||||
for _ in 0..num_blocks {
|
||||
streams_vector.push(CudaStreams::new_single_gpu(streams.gpu_indexes[0]));
|
||||
}
|
||||
let mut deterministic_seeder = DeterministicSeeder::<DefaultRandomGenerator>::new(seed);
|
||||
|
||||
let seeds: Vec<Seed> = (0..num_blocks)
|
||||
.map(|_| deterministic_seeder.seed())
|
||||
.collect();
|
||||
|
||||
let blocks = seeds
|
||||
.into_par_iter()
|
||||
.enumerate()
|
||||
.map(|(i, seed)| {
|
||||
let stream_index = i;
|
||||
let ct: CudaSignedRadixCiphertext = self.generate_oblivious_pseudo_random(
|
||||
seed,
|
||||
message_bits_count,
|
||||
&streams_vector[stream_index],
|
||||
);
|
||||
ct.ciphertext
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
self.convert_radixes_vec_to_single_radix_ciphertext(&blocks, streams)
|
||||
}
|
||||
|
||||
/// Generates an encrypted `num_block` blocks signed integer
|
||||
/// taken uniformly in `[0, 2^random_bits_count[` using the given seed.
|
||||
/// The encryted value is oblivious to the server.
|
||||
/// It can be useful to make server random generation deterministic.
|
||||
///
|
||||
/// ```rust
|
||||
/// use tfhe::core_crypto::gpu::CudaStreams;
|
||||
/// use tfhe::core_crypto::gpu::vec::GpuIndex;
|
||||
/// use tfhe::integer::gpu::gen_keys_gpu;
|
||||
/// use tfhe::shortint::parameters::PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64;
|
||||
/// use tfhe::Seed;
|
||||
///
|
||||
/// let gpu_index = 0;
|
||||
/// let streams = CudaStreams::new_single_gpu(GpuIndex(gpu_index));
|
||||
/// let size = 4;
|
||||
///
|
||||
/// // Generate the client key and the server key:
|
||||
/// let (cks, sks) = gen_keys_gpu(PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64, &streams);
|
||||
///
|
||||
/// let random_bits_count = 3;
|
||||
///
|
||||
/// let d_ct_res = sks.par_generate_oblivious_pseudo_random_signed_integer_bounded(
|
||||
/// Seed(0),
|
||||
/// random_bits_count,
|
||||
/// size as u64,
|
||||
/// &streams,
|
||||
/// );
|
||||
/// let ct_res = d_ct_res.to_signed_radix_ciphertext(&streams);
|
||||
///
|
||||
/// // Decrypt:
|
||||
/// let dec_result: i64 = cks.decrypt_signed_radix(&ct_res);
|
||||
/// assert!(dec_result >= 0);
|
||||
/// assert!(dec_result < (1 << random_bits_count));
|
||||
/// ```
|
||||
pub fn par_generate_oblivious_pseudo_random_signed_integer_bounded(
|
||||
&self,
|
||||
seed: Seed,
|
||||
random_bits_count: u64,
|
||||
num_blocks: u64,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaSignedRadixCiphertext {
|
||||
assert!(self.message_modulus.0.is_power_of_two());
|
||||
let range_log_size = self.message_modulus.0.ilog2() as u64 * num_blocks;
|
||||
|
||||
#[allow(clippy::int_plus_one)]
|
||||
{
|
||||
assert!(
|
||||
random_bits_count + 1 <= range_log_size,
|
||||
"The range asked for a random value (=[0, 2^{}[) does not fit in the available range [-2^{}, 2^{}[",
|
||||
random_bits_count, range_log_size-1, range_log_size-1,
|
||||
);
|
||||
}
|
||||
|
||||
assert!(self.message_modulus.0.is_power_of_two());
|
||||
let mut streams_vector = Vec::<CudaStreams>::with_capacity(num_blocks as usize);
|
||||
for _ in 0..num_blocks {
|
||||
streams_vector.push(CudaStreams::new_single_gpu(streams.gpu_indexes[0]));
|
||||
}
|
||||
let message_bits_count = self.message_modulus.0.ilog2() as u64;
|
||||
|
||||
let mut deterministic_seeder = DeterministicSeeder::<DefaultRandomGenerator>::new(seed);
|
||||
|
||||
let seeds = (0..num_blocks).map(|_| deterministic_seeder.seed());
|
||||
|
||||
let blocks = seeds
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(i, seed)| {
|
||||
let stream_index = i;
|
||||
let i = i as u64;
|
||||
if i * message_bits_count < random_bits_count {
|
||||
// if we generate 5 bits of noise in n blocks of 2 bits, the third (i=2)
|
||||
// block must have only one bit of random
|
||||
if random_bits_count < (i + 1) * message_bits_count {
|
||||
let top_message_bits_count = random_bits_count - i * message_bits_count;
|
||||
|
||||
assert!(top_message_bits_count <= message_bits_count);
|
||||
let ct: CudaUnsignedRadixCiphertext = self
|
||||
.generate_oblivious_pseudo_random(
|
||||
seed,
|
||||
top_message_bits_count,
|
||||
&streams_vector[stream_index],
|
||||
);
|
||||
ct.ciphertext
|
||||
} else {
|
||||
let ct: CudaUnsignedRadixCiphertext = self
|
||||
.generate_oblivious_pseudo_random(
|
||||
seed,
|
||||
message_bits_count,
|
||||
&streams_vector[stream_index],
|
||||
);
|
||||
ct.ciphertext
|
||||
}
|
||||
} else {
|
||||
let ct: CudaUnsignedRadixCiphertext =
|
||||
self.create_trivial_zero_radix(1, &streams_vector[stream_index]);
|
||||
ct.ciphertext
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
self.convert_radixes_vec_to_single_radix_ciphertext(&blocks, streams)
|
||||
}
|
||||
|
||||
/// Uniformly generates a random encrypted value in `[0, 2^random_bits_count[`
|
||||
/// `2^random_bits_count` must be smaller than the message modulus
|
||||
/// The encryted value is oblivious to the server
|
||||
pub fn generate_oblivious_pseudo_random<T>(
|
||||
&self,
|
||||
seed: Seed,
|
||||
random_bits_count: u64,
|
||||
streams: &CudaStreams,
|
||||
) -> T
|
||||
where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
assert!(
|
||||
1 << random_bits_count <= self.message_modulus.0,
|
||||
"The range asked for a random value (=[0, 2^{}[) does not fit in the available range [0, {}[",
|
||||
random_bits_count, self.message_modulus.0
|
||||
);
|
||||
self.generate_oblivious_pseudo_random_message_and_carry(seed, random_bits_count, streams)
|
||||
}
|
||||
|
||||
/// Uniformly generates a random value in `[0, 2^random_bits_count[`
|
||||
/// The encryted value is oblivious to the server
|
||||
pub(crate) fn generate_oblivious_pseudo_random_message_and_carry<T>(
|
||||
&self,
|
||||
seed: Seed,
|
||||
random_bits_count: u64,
|
||||
streams: &CudaStreams,
|
||||
) -> T
|
||||
where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
assert!(
|
||||
self.message_modulus.0.is_power_of_two(),
|
||||
"The message modulus(={}), must be a power of 2 to use the OPRF",
|
||||
self.message_modulus.0
|
||||
);
|
||||
let message_bits_count = self.message_modulus.0.ilog2() as u64;
|
||||
|
||||
assert!(
|
||||
self.carry_modulus.0.is_power_of_two(),
|
||||
"The carry modulus(={}), must be a power of 2 to use the OPRF",
|
||||
self.carry_modulus.0
|
||||
);
|
||||
let carry_bits_count = self.carry_modulus.0.ilog2() as u64;
|
||||
|
||||
assert!(
|
||||
random_bits_count <= carry_bits_count + message_bits_count,
|
||||
"The number of random bits asked for (={random_bits_count}) is bigger than carry_bits_count (={carry_bits_count}) + message_bits_count(={message_bits_count})",
|
||||
);
|
||||
self.generate_oblivious_pseudo_random_custom_encoding(
|
||||
seed,
|
||||
random_bits_count,
|
||||
1 + carry_bits_count + message_bits_count,
|
||||
streams,
|
||||
)
|
||||
}
|
||||
|
||||
/// Uniformly generates a random encrypted value in `[0, 2^random_bits_count[`
|
||||
/// The output in in the form 0000rrr000noise (rbc=3, fbc=7)
|
||||
/// The encryted value is oblivious to the server
|
||||
pub(crate) fn generate_oblivious_pseudo_random_custom_encoding<T>(
|
||||
&self,
|
||||
seed: Seed,
|
||||
random_bits_count: u64,
|
||||
full_bits_count: u64,
|
||||
streams: &CudaStreams,
|
||||
) -> T
|
||||
where
|
||||
T: CudaIntegerRadixCiphertext,
|
||||
{
|
||||
assert!(
|
||||
random_bits_count <= full_bits_count,
|
||||
"The number of random bits asked for (={random_bits_count}) is bigger than full_bits_count (={full_bits_count})"
|
||||
);
|
||||
|
||||
let (in_lwe_size, out_lwe_dimension, polynomial_size) = match &self.bootstrapping_key {
|
||||
CudaBootstrappingKey::Classic(d_bsk) => (
|
||||
d_bsk.input_lwe_dimension().to_lwe_size(),
|
||||
d_bsk.output_lwe_dimension(),
|
||||
d_bsk.polynomial_size(),
|
||||
),
|
||||
CudaBootstrappingKey::MultiBit(d_bsk) => (
|
||||
d_bsk.input_lwe_dimension().to_lwe_size(),
|
||||
d_bsk.output_lwe_dimension(),
|
||||
d_bsk.polynomial_size(),
|
||||
),
|
||||
};
|
||||
|
||||
let seeded = create_random_from_seed_modulus_switched(
|
||||
seed,
|
||||
in_lwe_size,
|
||||
polynomial_size.to_blind_rotation_input_modulus_log(),
|
||||
self.ciphertext_modulus,
|
||||
);
|
||||
|
||||
let p = 1 << random_bits_count;
|
||||
|
||||
let delta = 1_u64 << (64 - full_bits_count);
|
||||
|
||||
let poly_delta = 2 * polynomial_size.0 as u64 / p;
|
||||
|
||||
let lut_no_encode: LookupTableOwned =
|
||||
self.generate_lookup_table_no_encode(|x| (2 * (x / poly_delta) + 1) * delta / 2);
|
||||
|
||||
let num_ct_blocks = 1;
|
||||
let ct_seeded = CudaLweCiphertextList::from_lwe_ciphertext(&seeded, streams);
|
||||
|
||||
let mut ct_out: T = self.create_trivial_zero_radix(num_ct_blocks, streams);
|
||||
|
||||
let number_of_messages = 1;
|
||||
let d_accumulator =
|
||||
CudaGlweCiphertextList::from_glwe_ciphertext(&lut_no_encode.acc, streams);
|
||||
let mut lut_vector_indexes: Vec<u64> = vec![u64::ZERO; number_of_messages];
|
||||
for (i, ind) in lut_vector_indexes.iter_mut().enumerate() {
|
||||
*ind = <usize as CastInto<u64>>::cast_into(i);
|
||||
}
|
||||
|
||||
let mut d_lut_vector_indexes =
|
||||
unsafe { CudaVec::<u64>::new_async(number_of_messages, streams, 0) };
|
||||
unsafe { d_lut_vector_indexes.copy_from_cpu_async(&lut_vector_indexes, streams, 0) };
|
||||
let lwe_indexes_usize: Vec<usize> = (0..num_ct_blocks).collect_vec();
|
||||
let lwe_indexes = lwe_indexes_usize
|
||||
.iter()
|
||||
.map(|&x| <usize as CastInto<u64>>::cast_into(x))
|
||||
.collect_vec();
|
||||
let mut d_output_indexes = unsafe { CudaVec::<u64>::new_async(num_ct_blocks, streams, 0) };
|
||||
let mut d_input_indexes = unsafe { CudaVec::<u64>::new_async(num_ct_blocks, streams, 0) };
|
||||
unsafe {
|
||||
d_input_indexes.copy_from_cpu_async(&lwe_indexes, streams, 0);
|
||||
d_output_indexes.copy_from_cpu_async(&lwe_indexes, streams, 0);
|
||||
}
|
||||
|
||||
match &self.bootstrapping_key {
|
||||
CudaBootstrappingKey::Classic(d_bsk) => {
|
||||
cuda_programmable_bootstrap_lwe_ciphertext(
|
||||
&ct_seeded,
|
||||
&mut ct_out.as_mut().d_blocks,
|
||||
&d_accumulator,
|
||||
&d_lut_vector_indexes,
|
||||
&d_output_indexes,
|
||||
&d_input_indexes,
|
||||
LweCiphertextCount(num_ct_blocks),
|
||||
d_bsk,
|
||||
streams,
|
||||
);
|
||||
}
|
||||
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
|
||||
cuda_multi_bit_programmable_bootstrap_lwe_ciphertext(
|
||||
&ct_seeded,
|
||||
&mut ct_out.as_mut().d_blocks,
|
||||
&d_accumulator,
|
||||
&d_lut_vector_indexes,
|
||||
&d_output_indexes,
|
||||
&d_input_indexes,
|
||||
d_multibit_bsk,
|
||||
streams,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let plaintext_to_add = (p - 1) * delta / 2;
|
||||
let ct_cloned = ct_out.duplicate(streams);
|
||||
unsafe {
|
||||
add_lwe_ciphertext_vector_plaintext_scalar_async(
|
||||
streams,
|
||||
&mut ct_out.as_mut().d_blocks.0.d_vec,
|
||||
&ct_cloned.as_ref().d_blocks.0.d_vec,
|
||||
plaintext_to_add,
|
||||
out_lwe_dimension,
|
||||
num_ct_blocks as u32,
|
||||
);
|
||||
}
|
||||
streams.synchronize();
|
||||
ct_out
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test {
|
||||
use crate::core_crypto::gpu::vec::GpuIndex;
|
||||
use crate::core_crypto::gpu::CudaStreams;
|
||||
use crate::core_crypto::prelude::decrypt_lwe_ciphertext;
|
||||
use crate::integer::gpu::server_key::radix::CudaUnsignedRadixCiphertext;
|
||||
use crate::integer::gpu::server_key::CudaBootstrappingKey;
|
||||
use crate::integer::gpu::{gen_keys_gpu, CudaServerKey};
|
||||
use crate::integer::{ClientKey, RadixCiphertext};
|
||||
use crate::shortint::oprf::create_random_from_seed_modulus_switched;
|
||||
use crate::shortint::parameters::PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64;
|
||||
use rayon::prelude::*;
|
||||
use statrs::distribution::ContinuousCDF;
|
||||
use std::collections::HashMap;
|
||||
use tfhe_csprng::seeders::Seed;
|
||||
|
||||
fn square(a: f64) -> f64 {
|
||||
a * a
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn oprf_compare_plain_ci_run_filter() {
|
||||
let gpu_index = 0;
|
||||
let streams = CudaStreams::new_single_gpu(GpuIndex(gpu_index));
|
||||
let (ck, gpu_sk) = gen_keys_gpu(
|
||||
PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64,
|
||||
&streams,
|
||||
);
|
||||
|
||||
for seed in 0..1000 {
|
||||
oprf_compare_plain_from_seed(Seed(seed), &ck, &gpu_sk, &streams);
|
||||
}
|
||||
}
|
||||
|
||||
fn oprf_compare_plain_from_seed(
|
||||
seed: Seed,
|
||||
ck: &ClientKey,
|
||||
sk: &CudaServerKey,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
let params = ck.parameters();
|
||||
|
||||
let random_bits_count = 2;
|
||||
|
||||
let input_p = 2 * params.polynomial_size().0 as u64;
|
||||
|
||||
let log_input_p = input_p.ilog2();
|
||||
|
||||
let p_prime = 1 << random_bits_count;
|
||||
|
||||
let output_p = 2 * params.carry_modulus().0 * params.message_modulus().0;
|
||||
|
||||
let poly_delta = 2 * params.polynomial_size().0 as u64 / p_prime;
|
||||
|
||||
let d_img: CudaUnsignedRadixCiphertext =
|
||||
sk.generate_oblivious_pseudo_random(seed, random_bits_count, streams);
|
||||
let img: RadixCiphertext = d_img.to_radix_ciphertext(streams);
|
||||
|
||||
let (lwe_size, polynomial_size) = match &sk.bootstrapping_key {
|
||||
CudaBootstrappingKey::Classic(d_bsk) => (
|
||||
d_bsk.input_lwe_dimension().to_lwe_size(),
|
||||
d_bsk.polynomial_size(),
|
||||
),
|
||||
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => (
|
||||
d_multibit_bsk.input_lwe_dimension().to_lwe_size(),
|
||||
d_multibit_bsk.polynomial_size(),
|
||||
),
|
||||
};
|
||||
|
||||
let ct = create_random_from_seed_modulus_switched(
|
||||
seed,
|
||||
lwe_size,
|
||||
polynomial_size.to_blind_rotation_input_modulus_log(),
|
||||
sk.ciphertext_modulus,
|
||||
);
|
||||
|
||||
let sk = ck.key.small_lwe_secret_key();
|
||||
let plain_prf_input = decrypt_lwe_ciphertext(&sk, &ct)
|
||||
.0
|
||||
.wrapping_add(1 << (64 - log_input_p - 1))
|
||||
>> (64 - log_input_p);
|
||||
|
||||
let half_negacyclic_part = |x| 2 * (x / poly_delta) + 1;
|
||||
|
||||
let negacyclic_part = |x| {
|
||||
assert!(x < input_p);
|
||||
if x < input_p / 2 {
|
||||
half_negacyclic_part(x)
|
||||
} else {
|
||||
2 * output_p - half_negacyclic_part(x - (input_p / 2))
|
||||
}
|
||||
};
|
||||
|
||||
let prf = |x| {
|
||||
let a = (negacyclic_part(x) + p_prime - 1) % (2 * output_p);
|
||||
assert!(a % 2 == 0);
|
||||
a / 2
|
||||
};
|
||||
|
||||
let expected_output = prf(plain_prf_input);
|
||||
|
||||
let output = ck.key.decrypt_message_and_carry(&img.blocks[0]);
|
||||
|
||||
assert!(output < p_prime);
|
||||
assert_eq!(output, expected_output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn oprf_test_uniformity_ci_run_filter() {
|
||||
let sample_count: usize = 100_000;
|
||||
|
||||
let p_value_limit: f64 = 0.000_01;
|
||||
let gpu_index = 0;
|
||||
let streams = CudaStreams::new_single_gpu(GpuIndex(gpu_index));
|
||||
let (ck, gpu_sk) = gen_keys_gpu(
|
||||
PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64,
|
||||
&streams,
|
||||
);
|
||||
|
||||
let test_uniformity = |distinct_values: u64, f: &(dyn Fn(usize) -> u64 + Sync)| {
|
||||
test_uniformity(sample_count, p_value_limit, distinct_values, f)
|
||||
};
|
||||
|
||||
let random_bits_count = 2;
|
||||
|
||||
test_uniformity(1 << random_bits_count, &|seed| {
|
||||
let d_img: CudaUnsignedRadixCiphertext = gpu_sk.generate_oblivious_pseudo_random(
|
||||
Seed(seed as u128),
|
||||
random_bits_count,
|
||||
&streams,
|
||||
);
|
||||
let img: RadixCiphertext = d_img.to_radix_ciphertext(&streams);
|
||||
ck.key.decrypt_message_and_carry(&img.blocks[0])
|
||||
});
|
||||
}
|
||||
|
||||
pub fn test_uniformity<F>(sample_count: usize, p_value_limit: f64, distinct_values: u64, f: F)
|
||||
where
|
||||
F: Sync + Fn(usize) -> u64,
|
||||
{
|
||||
let p_value = uniformity_p_value(f, sample_count, distinct_values);
|
||||
|
||||
assert!(
|
||||
p_value_limit < p_value,
|
||||
"p_value (={p_value}) expected to be bigger than {p_value_limit}"
|
||||
);
|
||||
}
|
||||
|
||||
fn uniformity_p_value<F>(f: F, sample_count: usize, distinct_values: u64) -> f64
|
||||
where
|
||||
F: Sync + Fn(usize) -> u64,
|
||||
{
|
||||
let values: Vec<_> = (0..sample_count).into_par_iter().map(&f).collect();
|
||||
|
||||
let mut values_count = HashMap::new();
|
||||
|
||||
for i in &values {
|
||||
assert!(*i < distinct_values, "i {} dv{}", *i, distinct_values);
|
||||
|
||||
*values_count.entry(i).or_insert(0) += 1;
|
||||
}
|
||||
|
||||
let single_expected_count = sample_count as f64 / distinct_values as f64;
|
||||
|
||||
// https://en.wikipedia.org/wiki/Pearson's_chi-squared_test
|
||||
let distance: f64 = (0..distinct_values)
|
||||
.map(|value| *values_count.get(&value).unwrap_or(&0))
|
||||
.map(|count| square(count as f64 - single_expected_count) / single_expected_count)
|
||||
.sum();
|
||||
|
||||
statrs::distribution::ChiSquared::new((distinct_values - 1) as f64)
|
||||
.unwrap()
|
||||
.sf(distance)
|
||||
}
|
||||
}
|
||||
@@ -3,7 +3,9 @@ use crate::core_crypto::gpu::CudaStreams;
|
||||
use crate::core_crypto::prelude::LweBskGroupingFactor;
|
||||
use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock;
|
||||
use crate::integer::gpu::ciphertext::{CudaIntegerRadixCiphertext, CudaUnsignedRadixCiphertext};
|
||||
use crate::integer::gpu::server_key::radix::CudaRadixCiphertext;
|
||||
use crate::integer::gpu::server_key::radix::{
|
||||
CudaBlockInfo, CudaRadixCiphertext, CudaRadixCiphertextInfo,
|
||||
};
|
||||
use crate::integer::gpu::server_key::{CudaBootstrappingKey, CudaServerKey};
|
||||
use crate::integer::gpu::{apply_bivariate_lut_kb_async, PBSType};
|
||||
|
||||
@@ -23,9 +25,16 @@ impl CudaServerKey {
|
||||
.map(|ciphertext| &ciphertext.as_ref().d_blocks),
|
||||
streams,
|
||||
);
|
||||
let vec_block_info: Vec<CudaBlockInfo> = radixes
|
||||
.iter()
|
||||
.flat_map(|ct| ct.as_ref().info.blocks.clone())
|
||||
.collect();
|
||||
let radix_info = CudaRadixCiphertextInfo {
|
||||
blocks: vec_block_info,
|
||||
};
|
||||
CudaIntegerRadixCiphertext::from(CudaRadixCiphertext {
|
||||
d_blocks: packed_list,
|
||||
info: radixes[0].as_ref().info.clone(),
|
||||
info: radix_info,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ use crate::core_crypto::gpu::CudaStreams;
|
||||
use crate::core_crypto::prelude::{LweBskGroupingFactor, UnsignedInteger};
|
||||
use crate::integer::block_decomposition::{BlockDecomposer, Decomposable, DecomposableInto};
|
||||
use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock;
|
||||
use crate::integer::gpu::ciphertext::info::{CudaBlockInfo, CudaRadixCiphertextInfo};
|
||||
use crate::integer::gpu::ciphertext::{CudaIntegerRadixCiphertext, CudaUnsignedRadixCiphertext};
|
||||
use crate::integer::gpu::server_key::radix::CudaRadixCiphertext;
|
||||
use crate::integer::gpu::server_key::{CudaBootstrappingKey, CudaServerKey};
|
||||
@@ -31,14 +32,17 @@ impl CudaServerKey {
|
||||
.map(|ciphertext| &ciphertext.0.ciphertext.d_blocks),
|
||||
streams,
|
||||
);
|
||||
|
||||
let blocks_ct: CudaUnsignedRadixCiphertext = CudaUnsignedRadixCiphertext {
|
||||
ciphertext: CudaRadixCiphertext {
|
||||
d_blocks: packed_list,
|
||||
info: selectors[0].0.ciphertext.info.clone(),
|
||||
},
|
||||
let vec_block_info: Vec<CudaBlockInfo> = selectors
|
||||
.iter()
|
||||
.flat_map(|ct| ct.0.ciphertext.info.blocks.clone())
|
||||
.collect();
|
||||
let radix_info = CudaRadixCiphertextInfo {
|
||||
blocks: vec_block_info,
|
||||
};
|
||||
blocks_ct
|
||||
CudaIntegerRadixCiphertext::from(CudaRadixCiphertext {
|
||||
d_blocks: packed_list,
|
||||
info: radix_info,
|
||||
})
|
||||
}
|
||||
#[allow(clippy::unused_self)]
|
||||
pub(crate) fn convert_radixes_vec_to_single_radix_ciphertext<T>(
|
||||
@@ -56,10 +60,16 @@ impl CudaServerKey {
|
||||
radixes.iter().map(|ciphertext| &ciphertext.d_blocks),
|
||||
streams,
|
||||
);
|
||||
|
||||
let vec_block_info: Vec<CudaBlockInfo> = radixes
|
||||
.iter()
|
||||
.flat_map(|ct| ct.info.blocks.clone())
|
||||
.collect();
|
||||
let radix_info = CudaRadixCiphertextInfo {
|
||||
blocks: vec_block_info,
|
||||
};
|
||||
CudaIntegerRadixCiphertext::from(CudaRadixCiphertext {
|
||||
d_blocks: packed_list,
|
||||
info: radixes[0].info.clone(),
|
||||
info: radix_info,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -160,20 +160,15 @@ where
|
||||
|
||||
pub(crate) fn fill_accumulator_no_encoding<F, C>(
|
||||
accumulator: &mut GlweCiphertext<C>,
|
||||
server_key: &ServerKey,
|
||||
polynomial_size: PolynomialSize,
|
||||
glwe_size: GlweSize,
|
||||
f: F,
|
||||
) where
|
||||
C: ContainerMut<Element = u64>,
|
||||
F: Fn(u64) -> u64,
|
||||
{
|
||||
assert_eq!(
|
||||
accumulator.polynomial_size(),
|
||||
server_key.bootstrapping_key.polynomial_size()
|
||||
);
|
||||
assert_eq!(
|
||||
accumulator.glwe_size(),
|
||||
server_key.bootstrapping_key.glwe_size()
|
||||
);
|
||||
assert_eq!(accumulator.polynomial_size(), polynomial_size);
|
||||
assert_eq!(accumulator.glwe_size(), glwe_size);
|
||||
|
||||
let mut accumulator_view = accumulator.as_mut_view();
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use super::Ciphertext;
|
||||
use crate::core_crypto::fft_impl::common::modulus_switch;
|
||||
use crate::core_crypto::prelude::{
|
||||
keyswitch_lwe_ciphertext, lwe_ciphertext_plaintext_add_assign, CiphertextModulusLog,
|
||||
LweCiphertext, LweSize, Plaintext,
|
||||
keyswitch_lwe_ciphertext, lwe_ciphertext_plaintext_add_assign, CiphertextModulus,
|
||||
CiphertextModulusLog, LweCiphertext, LweSize, Plaintext,
|
||||
};
|
||||
use crate::shortint::ciphertext::Degree;
|
||||
use crate::shortint::engine::ShortintEngine;
|
||||
@@ -28,35 +28,33 @@ pub fn sha3_hash(values: &mut [u64], seed: Seed) {
|
||||
*value = u64::from_le_bytes(bytes);
|
||||
}
|
||||
}
|
||||
pub fn create_random_from_seed(
|
||||
seed: Seed,
|
||||
lwe_size: LweSize,
|
||||
ciphertext_modulus: CiphertextModulus<u64>,
|
||||
) -> LweCiphertext<Vec<u64>> {
|
||||
let mut ct = LweCiphertext::new(0, lwe_size, ciphertext_modulus);
|
||||
|
||||
sha3_hash(ct.get_mut_mask().as_mut(), seed);
|
||||
|
||||
ct
|
||||
}
|
||||
|
||||
pub fn create_random_from_seed_modulus_switched(
|
||||
seed: Seed,
|
||||
lwe_size: LweSize,
|
||||
log_modulus: CiphertextModulusLog,
|
||||
ciphertext_modulus: CiphertextModulus<u64>,
|
||||
) -> LweCiphertext<Vec<u64>> {
|
||||
let mut ct = create_random_from_seed(seed, lwe_size, ciphertext_modulus);
|
||||
|
||||
for i in ct.as_mut() {
|
||||
*i = modulus_switch(*i, log_modulus) << (64 - log_modulus.0);
|
||||
}
|
||||
|
||||
ct
|
||||
}
|
||||
impl ServerKey {
|
||||
pub(crate) fn create_random_from_seed(
|
||||
&self,
|
||||
seed: Seed,
|
||||
lwe_size: LweSize,
|
||||
) -> LweCiphertext<Vec<u64>> {
|
||||
let mut ct = LweCiphertext::new(0, lwe_size, self.ciphertext_modulus);
|
||||
|
||||
sha3_hash(ct.get_mut_mask().as_mut(), seed);
|
||||
|
||||
ct
|
||||
}
|
||||
|
||||
pub(crate) fn create_random_from_seed_modulus_switched(
|
||||
&self,
|
||||
seed: Seed,
|
||||
lwe_size: LweSize,
|
||||
log_modulus: CiphertextModulusLog,
|
||||
) -> LweCiphertext<Vec<u64>> {
|
||||
let mut ct = self.create_random_from_seed(seed, lwe_size);
|
||||
|
||||
for i in ct.as_mut() {
|
||||
*i = modulus_switch(*i, log_modulus) << (64 - log_modulus.0);
|
||||
}
|
||||
|
||||
ct
|
||||
}
|
||||
|
||||
/// Uniformly generates a random encrypted value in `[0, 2^random_bits_count[`
|
||||
/// `2^random_bits_count` must be smaller than the message modulus
|
||||
/// The encryted value is oblivious to the server
|
||||
@@ -123,12 +121,13 @@ impl ServerKey {
|
||||
|
||||
let in_lwe_size = self.bootstrapping_key.input_lwe_dimension().to_lwe_size();
|
||||
|
||||
let seeded = self.create_random_from_seed_modulus_switched(
|
||||
let seeded = create_random_from_seed_modulus_switched(
|
||||
seed,
|
||||
in_lwe_size,
|
||||
self.bootstrapping_key
|
||||
.polynomial_size()
|
||||
.to_blind_rotation_input_modulus_log(),
|
||||
self.ciphertext_modulus,
|
||||
);
|
||||
|
||||
let p = 1 << random_bits_count;
|
||||
@@ -183,6 +182,7 @@ impl ServerKey {
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test {
|
||||
use crate::core_crypto::prelude::decrypt_lwe_ciphertext;
|
||||
use crate::shortint::oprf::create_random_from_seed_modulus_switched;
|
||||
use crate::shortint::{ClientKey, ServerKey};
|
||||
use rayon::prelude::*;
|
||||
use statrs::distribution::ContinuousCDF;
|
||||
@@ -223,12 +223,13 @@ pub(crate) mod test {
|
||||
|
||||
let lwe_size = sk.bootstrapping_key.input_lwe_dimension().to_lwe_size();
|
||||
|
||||
let ct = sk.create_random_from_seed_modulus_switched(
|
||||
let ct = create_random_from_seed_modulus_switched(
|
||||
seed,
|
||||
lwe_size,
|
||||
sk.bootstrapping_key
|
||||
.polynomial_size()
|
||||
.to_blind_rotation_input_modulus_log(),
|
||||
sk.ciphertext_modulus,
|
||||
);
|
||||
|
||||
let sk = ck.small_lwe_secret_key();
|
||||
|
||||
@@ -640,7 +640,12 @@ impl ServerKey {
|
||||
self.bootstrapping_key.polynomial_size(),
|
||||
self.ciphertext_modulus,
|
||||
);
|
||||
fill_accumulator_no_encoding(&mut acc, self, f);
|
||||
fill_accumulator_no_encoding(
|
||||
&mut acc,
|
||||
self.bootstrapping_key.polynomial_size(),
|
||||
self.bootstrapping_key.glwe_size(),
|
||||
f,
|
||||
);
|
||||
|
||||
LookupTableOwned {
|
||||
acc,
|
||||
|
||||
Reference in New Issue
Block a user