mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-13 16:47:59 -05:00
Compare commits
19 Commits
Workflows-
...
release/1.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cb4d62b40a | ||
|
|
7a0c054095 | ||
|
|
ddb7d56f56 | ||
|
|
cbe39c8e98 | ||
|
|
27364857f1 | ||
|
|
7043246c17 | ||
|
|
51735fb8ed | ||
|
|
23a348c9ae | ||
|
|
61b616b784 | ||
|
|
df48e176f3 | ||
|
|
dd2345df6b | ||
|
|
933800ea6f | ||
|
|
3e4cee3a75 | ||
|
|
00ea9b8e07 | ||
|
|
23ce85f6a2 | ||
|
|
126a95e929 | ||
|
|
23fffb1443 | ||
|
|
6d58a54266 | ||
|
|
9b8d5f5a43 |
@@ -2,6 +2,8 @@
|
||||
ignore = [
|
||||
# Ignoring unmaintained 'paste' advisory as it is a widely used, low-risk build dependency.
|
||||
"RUSTSEC-2024-0436",
|
||||
# Ignoring unmaintained 'bincode' crate. Getting rid of it would be too complex on the short term.
|
||||
"RUSTSEC-2025-0141",
|
||||
]
|
||||
|
||||
[output]
|
||||
|
||||
2
.github/actions/gpu_setup/action.yml
vendored
2
.github/actions/gpu_setup/action.yml
vendored
@@ -23,6 +23,8 @@ runs:
|
||||
echo "${CMAKE_SCRIPT_SHA} cmake-${CMAKE_VERSION}-linux-x86_64.sh" > checksum
|
||||
sha256sum -c checksum
|
||||
sudo bash cmake-"${CMAKE_VERSION}"-linux-x86_64.sh --skip-license --prefix=/usr/ --exclude-subdir
|
||||
sudo apt-get clean
|
||||
sudo rm -rf /var/lib/apt/lists/*
|
||||
sudo apt update
|
||||
sudo apt remove -y unattended-upgrades
|
||||
sudo apt install -y cmake-format libclang-dev
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -10,6 +10,7 @@ target/
|
||||
**/*.rmeta
|
||||
**/Cargo.lock
|
||||
**/*.bin
|
||||
**/.DS_Store
|
||||
|
||||
# Some of our bench outputs
|
||||
/tfhe/benchmarks_parameters
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
/tfhe/src/core_crypto/gpu @agnesLeroy
|
||||
/tfhe/src/core_crypto/hpu @zama-ai/hardware
|
||||
|
||||
/tfhe/src/shortint/ @mayeul-zama
|
||||
/tfhe/src/shortint/ @mayeul-zama @nsarlin-zama
|
||||
|
||||
/tfhe/src/integer/ @tmontaigu
|
||||
/tfhe/src/integer/gpu @agnesLeroy
|
||||
@@ -19,8 +19,12 @@
|
||||
|
||||
/tfhe/src/high_level_api/ @tmontaigu
|
||||
|
||||
/tfhe-zk-pok/ @nsarlin-zama
|
||||
|
||||
/tfhe-benchmark/ @soonum
|
||||
|
||||
/utils/ @nsarlin-zama
|
||||
|
||||
/Makefile @IceTDrinker @soonum
|
||||
|
||||
/mockups/tfhe-hpu-mockup @zama-ai/hardware
|
||||
|
||||
@@ -36,6 +36,7 @@ rayon = "1.11"
|
||||
serde = { version = "1.0", default-features = false }
|
||||
wasm-bindgen = "0.2.101"
|
||||
getrandom = "0.2.8"
|
||||
# The project maintainers consider that this is the last version of the 1.3 branch, any newer version should not be trusted
|
||||
bincode = "=1.3.3"
|
||||
|
||||
[profile.bench]
|
||||
|
||||
@@ -65,6 +65,16 @@ void cleanup_cuda_integer_compress_radix_ciphertext_128(CudaStreamsFFI streams,
|
||||
|
||||
void cleanup_cuda_integer_decompress_radix_ciphertext_128(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr_void);
|
||||
|
||||
void cuda_integer_extract_glwe_128(
|
||||
CudaStreamsFFI streams, void *glwe_array_out,
|
||||
CudaPackedGlweCiphertextListFFI const *glwe_list,
|
||||
uint32_t const glwe_index);
|
||||
|
||||
void cuda_integer_extract_glwe_64(
|
||||
CudaStreamsFFI streams, void *glwe_array_out,
|
||||
CudaPackedGlweCiphertextListFFI const *glwe_list,
|
||||
uint32_t const glwe_index);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -155,3 +155,24 @@ void cleanup_cuda_integer_decompress_radix_ciphertext_128(
|
||||
delete mem_ptr;
|
||||
*mem_ptr_void = nullptr;
|
||||
}
|
||||
|
||||
void cuda_integer_extract_glwe_128(
|
||||
CudaStreamsFFI streams, void *glwe_array_out,
|
||||
CudaPackedGlweCiphertextListFFI const *glwe_list,
|
||||
uint32_t const glwe_index) {
|
||||
|
||||
CudaStreams _streams = CudaStreams(streams);
|
||||
host_extract<__uint128_t>(_streams.stream(0), _streams.gpu_index(0),
|
||||
(__uint128_t *)glwe_array_out, glwe_list,
|
||||
glwe_index);
|
||||
}
|
||||
|
||||
void cuda_integer_extract_glwe_64(
|
||||
CudaStreamsFFI streams, void *glwe_array_out,
|
||||
CudaPackedGlweCiphertextListFFI const *glwe_list,
|
||||
uint32_t const glwe_index) {
|
||||
|
||||
CudaStreams _streams = CudaStreams(streams);
|
||||
host_extract<__uint64_t>(_streams.stream(0), _streams.gpu_index(0),
|
||||
(__uint64_t *)glwe_array_out, glwe_list, glwe_index);
|
||||
}
|
||||
|
||||
@@ -2349,6 +2349,22 @@ unsafe extern "C" {
|
||||
mem_ptr_void: *mut *mut i8,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn cuda_integer_extract_glwe_128(
|
||||
streams: CudaStreamsFFI,
|
||||
glwe_array_out: *mut ffi::c_void,
|
||||
glwe_list: *const CudaPackedGlweCiphertextListFFI,
|
||||
glwe_index: u32,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn cuda_integer_extract_glwe_64(
|
||||
streams: CudaStreamsFFI,
|
||||
glwe_array_out: *mut ffi::c_void,
|
||||
glwe_list: *const CudaPackedGlweCiphertextListFFI,
|
||||
glwe_index: u32,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn scratch_cuda_rerand_64(
|
||||
streams: CudaStreamsFFI,
|
||||
|
||||
@@ -40,7 +40,7 @@ rand = "0.8.5"
|
||||
regex = "1.10.4"
|
||||
bitflags = { version = "2.5.0", features = ["serde"] }
|
||||
itertools = "0.11.0"
|
||||
lru = "0.12.3"
|
||||
lru = "0.16.3"
|
||||
bitfield-struct = "0.10.0"
|
||||
crossbeam = { version = "0.8.4", features = ["crossbeam-queue"] }
|
||||
rayon = { workspace = true }
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:35cc06547a23b862ab9829351d74d944e60ea9dad3ecf593d15f0ce8445d145e
|
||||
size 81710610
|
||||
oid sha256:934c8131c12010dc837f6a2af5111b83f8f5d42f10485e9b3b971edb24c467f8
|
||||
size 82201876
|
||||
|
||||
@@ -160,9 +160,9 @@ impl ProgramInner {
|
||||
.filter(|(_, var)| var.is_none())
|
||||
.map(|(rid, _)| *rid)
|
||||
.collect::<Vec<_>>();
|
||||
demote_order
|
||||
.into_iter()
|
||||
.for_each(|rid| self.regs.demote(&rid));
|
||||
demote_order.into_iter().for_each(|rid| {
|
||||
self.regs.demote(&rid);
|
||||
});
|
||||
}
|
||||
|
||||
/// Release register entry
|
||||
@@ -179,7 +179,7 @@ impl ProgramInner {
|
||||
|
||||
/// Notify register access to update LRU state
|
||||
pub(crate) fn reg_access(&mut self, rid: asm::RegId) {
|
||||
self.regs.promote(&rid)
|
||||
self.regs.promote(&rid);
|
||||
}
|
||||
|
||||
/// Retrieved least-recent-used heap entry
|
||||
@@ -220,9 +220,9 @@ impl ProgramInner {
|
||||
.filter(|(_mid, var)| var.is_none())
|
||||
.map(|(mid, _)| *mid)
|
||||
.collect::<Vec<_>>();
|
||||
demote_order
|
||||
.into_iter()
|
||||
.for_each(|mid| self.heap.demote(&mid));
|
||||
demote_order.into_iter().for_each(|mid| {
|
||||
self.heap.demote(&mid);
|
||||
});
|
||||
}
|
||||
_ => { /*Only release Heap slot*/ }
|
||||
}
|
||||
@@ -231,7 +231,9 @@ impl ProgramInner {
|
||||
/// Notify heap access to update LRU state
|
||||
pub(crate) fn heap_access(&mut self, mid: asm::MemId) {
|
||||
match mid {
|
||||
asm::MemId::Heap { .. } => self.heap.promote(&mid),
|
||||
asm::MemId::Heap { .. } => {
|
||||
self.heap.promote(&mid);
|
||||
}
|
||||
_ => { /* Do Nothing slot do not below to heap*/ }
|
||||
}
|
||||
}
|
||||
|
||||
1
tfhe-benchmark/.gitignore
vendored
Normal file
1
tfhe-benchmark/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
benchmarks_parameters/*
|
||||
@@ -2,7 +2,9 @@ use benchmark::utilities::{
|
||||
hlapi_throughput_num_ops, write_to_json, BenchmarkType, BitSizesSet, EnvConfig, OperatorType,
|
||||
};
|
||||
use criterion::{black_box, Criterion, Throughput};
|
||||
use oprf::oprf_any_range2;
|
||||
use rand::prelude::*;
|
||||
use rayon::prelude::*;
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::*;
|
||||
use tfhe::core_crypto::prelude::Numeric;
|
||||
@@ -11,34 +13,42 @@ use tfhe::keycache::NamedParam;
|
||||
use tfhe::named::Named;
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::{
|
||||
ClientKey, CompressedServerKey, FheIntegerType, FheUint10, FheUint12, FheUint128, FheUint14,
|
||||
FheUint16, FheUint2, FheUint32, FheUint4, FheUint6, FheUint64, FheUint8, FheUintId, IntegerId,
|
||||
KVStore,
|
||||
ClientKey, CompressedServerKey, FheIntegerType, FheUint, FheUint10, FheUint12, FheUint128,
|
||||
FheUint14, FheUint16, FheUint2, FheUint32, FheUint4, FheUint6, FheUint64, FheUint8, FheUintId,
|
||||
IntegerId, KVStore,
|
||||
};
|
||||
|
||||
use rayon::prelude::*;
|
||||
mod oprf;
|
||||
|
||||
fn bench_fhe_type<FheType>(
|
||||
trait BenchWait {
|
||||
fn wait_bench(&self);
|
||||
}
|
||||
|
||||
impl<Id: FheUintId> BenchWait for FheUint<Id> {
|
||||
fn wait_bench(&self) {
|
||||
self.wait()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T1: FheWait, T2> BenchWait for (T1, T2) {
|
||||
fn wait_bench(&self) {
|
||||
self.0.wait()
|
||||
}
|
||||
}
|
||||
|
||||
fn bench_fhe_type_op<FheType, F, R>(
|
||||
c: &mut Criterion,
|
||||
client_key: &ClientKey,
|
||||
type_name: &str,
|
||||
bit_size: usize,
|
||||
display_name: &str,
|
||||
func_name: &str,
|
||||
func: F,
|
||||
) where
|
||||
F: Fn(&FheType, &FheType) -> R,
|
||||
R: BenchWait,
|
||||
FheType: FheEncrypt<u128, ClientKey>,
|
||||
FheType: FheWait,
|
||||
for<'a> &'a FheType: Add<&'a FheType, Output = FheType>
|
||||
+ Sub<&'a FheType, Output = FheType>
|
||||
+ Mul<&'a FheType, Output = FheType>
|
||||
+ BitAnd<&'a FheType, Output = FheType>
|
||||
+ BitOr<&'a FheType, Output = FheType>
|
||||
+ BitXor<&'a FheType, Output = FheType>
|
||||
+ Shl<&'a FheType, Output = FheType>
|
||||
+ Shr<&'a FheType, Output = FheType>
|
||||
+ RotateLeft<&'a FheType, Output = FheType>
|
||||
+ RotateRight<&'a FheType, Output = FheType>
|
||||
+ OverflowingAdd<&'a FheType, Output = FheType>
|
||||
+ OverflowingSub<&'a FheType, Output = FheType>,
|
||||
for<'a> FheType: FheMin<&'a FheType, Output = FheType> + FheMax<&'a FheType, Output = FheType>,
|
||||
{
|
||||
let mut bench_group = c.benchmark_group(type_name);
|
||||
let mut bench_prefix = "hlapi".to_string();
|
||||
@@ -71,170 +81,90 @@ fn bench_fhe_type<FheType>(
|
||||
let lhs = FheType::encrypt(rng.gen(), client_key);
|
||||
let rhs = FheType::encrypt(rng.gen(), client_key);
|
||||
|
||||
let mut bench_id;
|
||||
let bench_id = format!("{bench_prefix}::{func_name}::{param_name}::{type_name}");
|
||||
|
||||
bench_id = format!("{bench_prefix}::add::{param_name}::{type_name}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let res = &lhs + &rhs;
|
||||
res.wait();
|
||||
let res = func(&lhs, &rhs);
|
||||
res.wait_bench();
|
||||
black_box(res)
|
||||
})
|
||||
});
|
||||
write_record(bench_id, "add");
|
||||
|
||||
bench_id = format!("{bench_prefix}::overflowing_add::{param_name}::{type_name}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let (res, flag) = lhs.overflowing_add(&rhs);
|
||||
res.wait();
|
||||
black_box((res, flag))
|
||||
})
|
||||
});
|
||||
write_record(bench_id, "overflowing_add");
|
||||
|
||||
bench_id = format!("{bench_prefix}::overflowing_sub::{param_name}::{type_name}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let (res, flag) = lhs.overflowing_sub(&rhs);
|
||||
res.wait();
|
||||
black_box((res, flag))
|
||||
})
|
||||
});
|
||||
write_record(bench_id, "overflowing_sub");
|
||||
|
||||
bench_id = format!("{bench_prefix}::sub::{param_name}::{type_name}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let res = &lhs - &rhs;
|
||||
res.wait();
|
||||
black_box(res)
|
||||
})
|
||||
});
|
||||
write_record(bench_id, "sub");
|
||||
|
||||
bench_id = format!("{bench_prefix}::mul::{param_name}::{type_name}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let res = &lhs * &rhs;
|
||||
res.wait();
|
||||
black_box(res)
|
||||
})
|
||||
});
|
||||
write_record(bench_id, "mul");
|
||||
|
||||
bench_id = format!("{bench_prefix}::bitand::{param_name}::{type_name}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let res = &lhs & &rhs;
|
||||
res.wait();
|
||||
black_box(res)
|
||||
})
|
||||
});
|
||||
write_record(bench_id, "bitand");
|
||||
|
||||
bench_id = format!("{bench_prefix}::bitor::{param_name}::{type_name}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let res = &lhs | &rhs;
|
||||
res.wait();
|
||||
black_box(res)
|
||||
})
|
||||
});
|
||||
write_record(bench_id, "bitor");
|
||||
|
||||
bench_id = format!("{bench_prefix}::bitxor::{param_name}::{type_name}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let res = &lhs ^ &rhs;
|
||||
res.wait();
|
||||
black_box(res)
|
||||
})
|
||||
});
|
||||
write_record(bench_id, "bitxor");
|
||||
|
||||
bench_id = format!("{bench_prefix}::left_shift::{param_name}::{type_name}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let res = &lhs << &rhs;
|
||||
res.wait();
|
||||
black_box(res)
|
||||
})
|
||||
});
|
||||
write_record(bench_id, "left_shift");
|
||||
|
||||
bench_id = format!("{bench_prefix}::right_shift::{param_name}::{type_name}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let res = &lhs >> &rhs;
|
||||
res.wait();
|
||||
black_box(res)
|
||||
})
|
||||
});
|
||||
write_record(bench_id, "right_shift");
|
||||
|
||||
bench_id = format!("{bench_prefix}::left_rotate::{param_name}::{type_name}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let res = (&lhs).rotate_left(&rhs);
|
||||
res.wait();
|
||||
black_box(res)
|
||||
})
|
||||
});
|
||||
write_record(bench_id, "left_rotate");
|
||||
|
||||
bench_id = format!("{bench_prefix}::right_rotate::{param_name}::{type_name}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let res = (&lhs).rotate_right(&rhs);
|
||||
res.wait();
|
||||
black_box(res)
|
||||
})
|
||||
});
|
||||
write_record(bench_id, "right_rotate");
|
||||
|
||||
bench_id = format!("{bench_prefix}::min::{param_name}::{type_name}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let res = lhs.min(&rhs);
|
||||
res.wait();
|
||||
black_box(res)
|
||||
})
|
||||
});
|
||||
write_record(bench_id, "min");
|
||||
|
||||
bench_id = format!("{bench_prefix}::max::{param_name}::{type_name}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let res = lhs.max(&rhs);
|
||||
res.wait();
|
||||
black_box(res)
|
||||
})
|
||||
});
|
||||
write_record(bench_id, "max");
|
||||
write_record(bench_id, display_name);
|
||||
}
|
||||
|
||||
macro_rules! bench_type {
|
||||
($fhe_type:ident) => {
|
||||
macro_rules! bench_type_op (
|
||||
(type_name: $fhe_type:ident, display_name: $display_name:literal, operation: $op:ident) => {
|
||||
::paste::paste! {
|
||||
fn [<bench_ $fhe_type:snake>](c: &mut Criterion, cks: &ClientKey) {
|
||||
bench_fhe_type::<$fhe_type>(c, cks, stringify!($fhe_type), $fhe_type::num_bits());
|
||||
fn [<bench_ $fhe_type:snake _ $op>](c: &mut Criterion, cks: &ClientKey) {
|
||||
bench_fhe_type_op::<$fhe_type, _, _>(
|
||||
c,
|
||||
cks,
|
||||
stringify!($fhe_type),
|
||||
$fhe_type::num_bits(),
|
||||
$display_name,
|
||||
stringify!($op),
|
||||
|lhs, rhs| lhs.$op(rhs)
|
||||
);
|
||||
}
|
||||
}
|
||||
};
|
||||
);
|
||||
|
||||
macro_rules! generate_typed_benches {
|
||||
($fhe_type:ident) => {
|
||||
bench_type_op!(type_name: $fhe_type, display_name: "add", operation: add);
|
||||
bench_type_op!(type_name: $fhe_type, display_name: "overflowing_add", operation: overflowing_add);
|
||||
bench_type_op!(type_name: $fhe_type, display_name: "sub", operation: sub);
|
||||
bench_type_op!(type_name: $fhe_type, display_name: "overflowing_sub", operation: overflowing_sub);
|
||||
bench_type_op!(type_name: $fhe_type, display_name: "mul", operation: mul);
|
||||
bench_type_op!(type_name: $fhe_type, display_name: "bitand", operation: bitand);
|
||||
bench_type_op!(type_name: $fhe_type, display_name: "bitor", operation: bitor);
|
||||
bench_type_op!(type_name: $fhe_type, display_name: "bitxor", operation: bitxor);
|
||||
bench_type_op!(type_name: $fhe_type, display_name: "left_shift", operation: shl);
|
||||
bench_type_op!(type_name: $fhe_type, display_name: "right_shift", operation: shr);
|
||||
bench_type_op!(type_name: $fhe_type, display_name: "left_rotate", operation: rotate_left);
|
||||
bench_type_op!(type_name: $fhe_type, display_name: "right_rotate", operation: rotate_right);
|
||||
bench_type_op!(type_name: $fhe_type, display_name: "min", operation: min);
|
||||
bench_type_op!(type_name: $fhe_type, display_name: "max", operation: max);
|
||||
};
|
||||
}
|
||||
|
||||
bench_type!(FheUint2);
|
||||
bench_type!(FheUint4);
|
||||
bench_type!(FheUint6);
|
||||
bench_type!(FheUint8);
|
||||
bench_type!(FheUint10);
|
||||
bench_type!(FheUint12);
|
||||
bench_type!(FheUint14);
|
||||
bench_type!(FheUint16);
|
||||
bench_type!(FheUint32);
|
||||
bench_type!(FheUint64);
|
||||
bench_type!(FheUint128);
|
||||
// Generate benches for all FheUint types
|
||||
generate_typed_benches!(FheUint2);
|
||||
generate_typed_benches!(FheUint4);
|
||||
generate_typed_benches!(FheUint6);
|
||||
generate_typed_benches!(FheUint8);
|
||||
generate_typed_benches!(FheUint10);
|
||||
generate_typed_benches!(FheUint12);
|
||||
generate_typed_benches!(FheUint14);
|
||||
generate_typed_benches!(FheUint16);
|
||||
generate_typed_benches!(FheUint32);
|
||||
generate_typed_benches!(FheUint64);
|
||||
generate_typed_benches!(FheUint128);
|
||||
|
||||
macro_rules! run_benches {
|
||||
($c:expr, $cks:expr, $($fhe_type:ident),+ $(,)?) => {
|
||||
$(
|
||||
::paste::paste! {
|
||||
[<bench_ $fhe_type:snake _add>]($c, $cks);
|
||||
[<bench_ $fhe_type:snake _overflowing_add>]($c, $cks);
|
||||
[<bench_ $fhe_type:snake _sub>]($c, $cks);
|
||||
[<bench_ $fhe_type:snake _overflowing_sub>]($c, $cks);
|
||||
[<bench_ $fhe_type:snake _mul>]($c, $cks);
|
||||
[<bench_ $fhe_type:snake _bitand>]($c, $cks);
|
||||
[<bench_ $fhe_type:snake _bitor>]($c, $cks);
|
||||
[<bench_ $fhe_type:snake _bitxor>]($c, $cks);
|
||||
[<bench_ $fhe_type:snake _shl>]($c, $cks);
|
||||
[<bench_ $fhe_type:snake _shr>]($c, $cks);
|
||||
[<bench_ $fhe_type:snake _rotate_left>]($c, $cks);
|
||||
[<bench_ $fhe_type:snake _rotate_right>]($c, $cks);
|
||||
[<bench_ $fhe_type:snake _min>]($c, $cks);
|
||||
[<bench_ $fhe_type:snake _max>]($c, $cks);
|
||||
}
|
||||
)+
|
||||
};
|
||||
}
|
||||
|
||||
trait TypeDisplay {
|
||||
fn fmt(f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
@@ -444,7 +374,7 @@ fn main() {
|
||||
|
||||
match env_config.bit_sizes_set {
|
||||
BitSizesSet::Fast => {
|
||||
bench_fhe_uint64(&mut c, &cks);
|
||||
run_benches!(&mut c, &cks, FheUint64);
|
||||
|
||||
// KVStore Benches
|
||||
if benched_device == tfhe::Device::Cpu {
|
||||
@@ -452,17 +382,11 @@ fn main() {
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
bench_fhe_uint2(&mut c, &cks);
|
||||
bench_fhe_uint4(&mut c, &cks);
|
||||
bench_fhe_uint6(&mut c, &cks);
|
||||
bench_fhe_uint8(&mut c, &cks);
|
||||
bench_fhe_uint10(&mut c, &cks);
|
||||
bench_fhe_uint12(&mut c, &cks);
|
||||
bench_fhe_uint14(&mut c, &cks);
|
||||
bench_fhe_uint16(&mut c, &cks);
|
||||
bench_fhe_uint32(&mut c, &cks);
|
||||
bench_fhe_uint64(&mut c, &cks);
|
||||
bench_fhe_uint128(&mut c, &cks);
|
||||
// Call all benchmarks for all types
|
||||
run_benches!(
|
||||
&mut c, &cks, FheUint2, FheUint4, FheUint6, FheUint8, FheUint10, FheUint12,
|
||||
FheUint14, FheUint16, FheUint32, FheUint64, FheUint128
|
||||
);
|
||||
|
||||
// KVStore Benches
|
||||
if benched_device == tfhe::Device::Cpu {
|
||||
@@ -481,5 +405,8 @@ fn main() {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "hpu"))]
|
||||
oprf_any_range2();
|
||||
|
||||
c.final_summary();
|
||||
}
|
||||
|
||||
44
tfhe-benchmark/benches/high_level_api/oprf.rs
Normal file
44
tfhe-benchmark/benches/high_level_api/oprf.rs
Normal file
@@ -0,0 +1,44 @@
|
||||
use benchmark::params_aliases::BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
use criterion::{black_box, criterion_group, Criterion};
|
||||
use std::num::NonZeroU64;
|
||||
use tfhe::{set_server_key, ClientKey, ConfigBuilder, FheUint64, RangeForRandom, Seed, ServerKey};
|
||||
|
||||
pub fn oprf_any_range(c: &mut Criterion) {
|
||||
let bench_name = "hlapi::oprf_any_range";
|
||||
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
bench_group
|
||||
.sample_size(15)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
|
||||
let param = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
|
||||
let config = ConfigBuilder::with_custom_parameters(param).build();
|
||||
let cks = ClientKey::generate(config);
|
||||
let sks = ServerKey::new(&cks);
|
||||
|
||||
rayon::broadcast(|_| set_server_key(sks.clone()));
|
||||
set_server_key(sks);
|
||||
|
||||
for excluded_upper_bound in [3, 52] {
|
||||
let range = RangeForRandom::new_from_excluded_upper_bound(
|
||||
NonZeroU64::new(excluded_upper_bound).unwrap(),
|
||||
);
|
||||
|
||||
let bench_id_oprf = format!("{bench_name}::bound_{excluded_upper_bound}");
|
||||
|
||||
bench_group.bench_function(&bench_id_oprf, |b| {
|
||||
b.iter(|| {
|
||||
_ = black_box(FheUint64::generate_oblivious_pseudo_random_custom_range(
|
||||
Seed(0),
|
||||
&range,
|
||||
None,
|
||||
));
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
bench_group.finish()
|
||||
}
|
||||
|
||||
criterion_group!(oprf_any_range2, oprf_any_range);
|
||||
@@ -2809,6 +2809,7 @@ mod cuda {
|
||||
criterion_group!(
|
||||
default_cuda_dedup_ops,
|
||||
cuda_add,
|
||||
cuda_neg,
|
||||
cuda_mul,
|
||||
cuda_div_rem,
|
||||
cuda_bitand,
|
||||
|
||||
@@ -629,7 +629,9 @@ mod integer_params {
|
||||
// operations.
|
||||
#[cfg(feature = "hpu")]
|
||||
let params = vec![BENCH_HPU_PARAM_MESSAGE_2_CARRY_2_KS32_PBS_TUNIFORM_2M128.into()];
|
||||
#[cfg(not(feature = "hpu"))]
|
||||
#[cfg(feature = "gpu")]
|
||||
let params = vec![BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS.into()];
|
||||
#[cfg(not(any(feature = "gpu", feature = "hpu")))]
|
||||
let params = vec![BENCH_PARAM_MESSAGE_2_CARRY_2_KS32_PBS.into()];
|
||||
|
||||
let params_and_bit_sizes = iproduct!(params, env_config.bit_sizes());
|
||||
|
||||
@@ -27,6 +27,7 @@ rand_distr = "0.4.3"
|
||||
criterion = "0.5.1"
|
||||
doc-comment = "0.3.3"
|
||||
serde_json = "1.0.94"
|
||||
num-bigint = "0.4.6"
|
||||
# clap has to be pinned as its minimum supported rust version
|
||||
# changes often between minor releases, which breaks our CI
|
||||
clap = { version = "=4.5.30", features = ["derive"] }
|
||||
|
||||
@@ -2,14 +2,30 @@
|
||||
|
||||
This document explains the mechanism and steps to generate an oblivious encrypted random value using only server keys.
|
||||
|
||||
The goal is to give to the server the possibility to generate a random value, which will be obtained in an encrypted format and will remain unknown to the server. The implementation is based on [this article](https://eprint.iacr.org/2024/665).
|
||||
The goal is to give to the server the possibility to generate a random value, which will be obtained in an encrypted format and will remain unknown to the server.
|
||||
|
||||
This is possible through two methods on `FheUint` and `FheInt`:
|
||||
The main method for this is `FheUint::generate_oblivious_pseudo_random_custom_range` which returns an integer in the given range.
|
||||
Currently the range can only be in the form `[0, excluded_upper_bound[` with any `excluded_upper_bound` in `[1, 2^64[`
|
||||
It follows a distribution close to the uniform.
|
||||
|
||||
This function guarantees the norm-1 distance (defined as ∆(P,Q) := 1/2 Sum[ω∈Ω] |P(ω) − Q(ω)|)
|
||||
between the actual distribution and the target uniform distribution will be below the `max_distance` argument (which must be in ]0, 1[).
|
||||
The higher the distance, the more dissimilar the actual distribution is from the target uniform distribution.
|
||||
|
||||
The default value for `max_distance` is `2^-128` if `None` is provided.
|
||||
|
||||
Higher values allow better performance but must be considered carefully in the context of their target application as it may have serious unintended consequences.
|
||||
|
||||
If the range is a power of 2, the distribution is uniform (for any `max_distance`) and the cost is smaller.
|
||||
|
||||
|
||||
For powers of 2 specifically there are two methods on `FheUint` and `FheInt` (based on [this article](https://eprint.iacr.org/2024/665)):
|
||||
- `generate_oblivious_pseudo_random` which return an integer taken uniformly in the full integer range (`[0; 2^N[` for a `FheUintN` and `[-2^(N-1); 2^(N-1)[` for a `FheIntN`).
|
||||
- `generate_oblivious_pseudo_random_bounded` which return an integer taken uniformly in `[0; 2^random_bits_count[`. For a `FheUintN`, we must have `random_bits_count <= N`. For a `FheIntN`, we must have `random_bits_count <= N - 1`.
|
||||
|
||||
Both methods functions take a seed `Seed` as input, which could be any `u128` value.
|
||||
They both rely on the use of the usual server key.
|
||||
|
||||
These method functions take a seed `Seed` as input, which could be any `u128` value.
|
||||
They rely on the use of the usual server key.
|
||||
The output is reproducible, i.e., the function is deterministic from the inputs: assuming the same hardware, seed and server key, this function outputs the same random encrypted value.
|
||||
|
||||
|
||||
@@ -18,7 +34,8 @@ Here is an example of the usage:
|
||||
|
||||
```rust
|
||||
use tfhe::prelude::FheDecrypt;
|
||||
use tfhe::{generate_keys, set_server_key, ConfigBuilder, FheUint8, FheInt8, Seed};
|
||||
use tfhe::{generate_keys, set_server_key, ConfigBuilder, FheUint8, FheInt8, RangeForRandom, Seed};
|
||||
use std::num::NonZeroU64;
|
||||
|
||||
pub fn main() {
|
||||
let config = ConfigBuilder::default().build();
|
||||
@@ -26,23 +43,30 @@ pub fn main() {
|
||||
|
||||
set_server_key(server_key);
|
||||
|
||||
let random_bits_count = 3;
|
||||
|
||||
let ct_res = FheUint8::generate_oblivious_pseudo_random(Seed(0));
|
||||
let excluded_upper_bound = NonZeroU64::new(3).unwrap();
|
||||
let range = RangeForRandom::new_from_excluded_upper_bound(excluded_upper_bound);
|
||||
|
||||
// in [0, excluded_upper_bound[ = {0, 1, 2}
|
||||
let ct_res = FheUint8::generate_oblivious_pseudo_random_custom_range(Seed(0), &range, None);
|
||||
let dec_result: u8 = ct_res.decrypt(&client_key);
|
||||
|
||||
let ct_res = FheUint8::generate_oblivious_pseudo_random_bounded(Seed(0), random_bits_count);
|
||||
let random_bits_count = 3;
|
||||
|
||||
// in [0, 2^8[
|
||||
let ct_res = FheUint8::generate_oblivious_pseudo_random(Seed(0));
|
||||
let dec_result: u8 = ct_res.decrypt(&client_key);
|
||||
|
||||
// in [0, 2^random_bits_count[ = [0, 8[
|
||||
let ct_res = FheUint8::generate_oblivious_pseudo_random_bounded(Seed(0), random_bits_count);
|
||||
let dec_result: u8 = ct_res.decrypt(&client_key);
|
||||
assert!(dec_result < (1 << random_bits_count));
|
||||
|
||||
// in [-2^7, 2^7[
|
||||
let ct_res = FheInt8::generate_oblivious_pseudo_random(Seed(0));
|
||||
|
||||
let dec_result: i8 = ct_res.decrypt(&client_key);
|
||||
|
||||
// in [0, 2^random_bits_count[ = [0, 8[
|
||||
let ct_res = FheInt8::generate_oblivious_pseudo_random_bounded(Seed(0), random_bits_count);
|
||||
|
||||
let dec_result: i8 = ct_res.decrypt(&client_key);
|
||||
assert!(dec_result < (1 << random_bits_count));
|
||||
}
|
||||
|
||||
@@ -141,7 +141,7 @@ Some parameter sets lead to the FHE keys exceeding the 2GB memory limit of WASM,
|
||||
|
||||
### Setting up TFHE-rs JS on WASM API for Node.js programs.
|
||||
|
||||
To build the JS on WASM bindings for **TFHE-rs**, install [`wasm-pack`](https://rustwasm.github.io/wasm-pack/) and the necessary [`rust toolchain`](https://rustup.rs/). Clone the **TFHE-rs** repository and build using the following commands (this will build using the default branch, you can check out a specific tag depending on your requirements):
|
||||
To build the JS on WASM bindings for **TFHE-rs**, install [`wasm-pack`](https://drager.github.io/wasm-pack/) and the necessary [`rust toolchain`](https://rustup.rs/). Clone the **TFHE-rs** repository and build using the following commands (this will build using the default branch, you can check out a specific tag depending on your requirements):
|
||||
|
||||
```shell
|
||||
$ git clone https://github.com/zama-ai/tfhe-rs.git
|
||||
@@ -150,7 +150,7 @@ Cloning into 'tfhe-rs'...
|
||||
Resolving deltas: 100% (3866/3866), done.
|
||||
$ cd tfhe-rs
|
||||
$ cd tfhe
|
||||
$ rustup run wasm-pack build --release --target=nodejs --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api
|
||||
$ wasm-pack build --release --target=nodejs --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api
|
||||
[INFO]: Compiling to Wasm...
|
||||
...
|
||||
[INFO]: :-) Your wasm pkg is ready to publish at ...
|
||||
@@ -164,7 +164,7 @@ After the build, a new directory **pkg** is available in the `tfhe` directory.
|
||||
|
||||
```shell
|
||||
$ ls pkg
|
||||
LICENSE index.html package.json tfhe.d.ts tfhe.js tfhe_bg.txt tfhe_bg.wasm tfhe_bg.wasm.d.ts
|
||||
LICENSE README.md package.json tfhe.d.ts tfhe.js tfhe_bg.wasm tfhe_bg.wasm.d.ts
|
||||
$
|
||||
```
|
||||
|
||||
|
||||
@@ -540,10 +540,12 @@ pub fn sup_diff(cumulative_bins: &[u64], theoretical_cdf: &[f64]) -> f64 {
|
||||
.iter()
|
||||
.copied()
|
||||
.zip_eq(theoretical_cdf.iter().copied())
|
||||
.map(|(x, theoretical_cdf)| {
|
||||
.enumerate()
|
||||
.map(|(i, (x, theoretical_cdf))| {
|
||||
let empirical_cdf = x as f64 / number_of_samples as f64;
|
||||
|
||||
if theoretical_cdf == 1.0 {
|
||||
if i == cumulative_bins.len() - 1 {
|
||||
assert_eq!(theoretical_cdf, 1.0);
|
||||
assert_eq!(empirical_cdf, 1.0);
|
||||
}
|
||||
|
||||
|
||||
@@ -4,7 +4,9 @@ use crate::high_level_api::keys::InternalServerKey;
|
||||
use crate::high_level_api::re_randomization::ReRandomizationMetadata;
|
||||
#[cfg(feature = "gpu")]
|
||||
use crate::integer::gpu::ciphertext::{CudaSignedRadixCiphertext, CudaUnsignedRadixCiphertext};
|
||||
use crate::shortint::MessageModulus;
|
||||
use crate::{FheInt, Seed};
|
||||
use std::num::NonZeroU64;
|
||||
|
||||
impl<Id: FheUintId> FheUint<Id> {
|
||||
/// Generates an encrypted unsigned integer
|
||||
@@ -92,7 +94,7 @@ impl<Id: FheUintId> FheUint<Id> {
|
||||
}
|
||||
})
|
||||
}
|
||||
/// Generates an encrypted `num_block` blocks unsigned integer
|
||||
/// Generates an encrypted unsigned integer
|
||||
/// taken uniformly in `[0, 2^random_bits_count[` using the given seed.
|
||||
/// The encrypted value is oblivious to the server.
|
||||
/// It can be useful to make server random generation deterministic.
|
||||
@@ -150,6 +152,103 @@ impl<Id: FheUintId> FheUint<Id> {
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Generates an encrypted unsigned integer
|
||||
/// taken almost uniformly in the given range using the given seed.
|
||||
/// Currently the range can only be in the form `[0, excluded_upper_bound[`
|
||||
/// with any `excluded_upper_bound` in `[1, 2^64[`.
|
||||
///
|
||||
/// The encrypted value is oblivious to the server.
|
||||
/// It can be useful to make server random generation deterministic.
|
||||
///
|
||||
/// This function guarantees the the norm-1 distance
|
||||
/// (defined as ∆(P,Q) := 1/2 Sum[ω∈Ω] |P(ω) − Q(ω)|)
|
||||
/// between the actual distribution and the target uniform distribution
|
||||
/// will be below the `max_distance` argument (which must be in ]0, 1[).
|
||||
/// The higher the distance, the more dissimilar the actual distribution is
|
||||
/// from the target uniform distribution.
|
||||
///
|
||||
/// The default value for `max_distance` is `2^-128` if `None` is provided.
|
||||
///
|
||||
/// Higher values allow better performance but must be considered carefully in the context of
|
||||
/// their target application as it may have serious unintended consequences.
|
||||
///
|
||||
/// If the range is a power of 2, the distribution is uniform (for any `max_distance`) and
|
||||
/// the cost is smaller.
|
||||
///
|
||||
/// ```rust
|
||||
/// use std::num::NonZeroU64;
|
||||
/// use tfhe::prelude::FheDecrypt;
|
||||
/// use tfhe::{generate_keys, set_server_key, ConfigBuilder, FheUint8, RangeForRandom, Seed};
|
||||
///
|
||||
/// let config = ConfigBuilder::default().build();
|
||||
/// let (client_key, server_key) = generate_keys(config);
|
||||
///
|
||||
/// set_server_key(server_key);
|
||||
///
|
||||
/// let excluded_upper_bound = NonZeroU64::new(3).unwrap();
|
||||
///
|
||||
/// let range = RangeForRandom::new_from_excluded_upper_bound(excluded_upper_bound);
|
||||
///
|
||||
/// let ct_res = FheUint8::generate_oblivious_pseudo_random_custom_range(Seed(0), &range, None);
|
||||
///
|
||||
/// let dec_result: u16 = ct_res.decrypt(&client_key);
|
||||
/// assert!(dec_result < excluded_upper_bound.get() as u16);
|
||||
/// ```
|
||||
pub fn generate_oblivious_pseudo_random_custom_range(
|
||||
seed: Seed,
|
||||
range: &RangeForRandom,
|
||||
max_distance: Option<f64>,
|
||||
) -> Self {
|
||||
let excluded_upper_bound = range.excluded_upper_bound;
|
||||
|
||||
if excluded_upper_bound.is_power_of_two() {
|
||||
let random_bits_count = excluded_upper_bound.ilog2() as u64;
|
||||
|
||||
Self::generate_oblivious_pseudo_random_bounded(seed, random_bits_count)
|
||||
} else {
|
||||
let max_distance = max_distance.unwrap_or_else(|| 2_f64.powi(-128));
|
||||
|
||||
assert!(
|
||||
0_f64 < max_distance && max_distance < 1_f64,
|
||||
"max_distance (={max_distance}) should be in ]0, 1["
|
||||
);
|
||||
|
||||
global_state::with_internal_keys(|key| match key {
|
||||
InternalServerKey::Cpu(key) => {
|
||||
let message_modulus = key.message_modulus();
|
||||
|
||||
let num_input_random_bits = num_input_random_bits_for_max_distance(
|
||||
excluded_upper_bound,
|
||||
max_distance,
|
||||
message_modulus,
|
||||
);
|
||||
|
||||
let num_blocks_output = Id::num_blocks(key.message_modulus()) as u64;
|
||||
|
||||
let ct = key
|
||||
.pbs_key()
|
||||
.par_generate_oblivious_pseudo_random_unsigned_custom_range(
|
||||
seed,
|
||||
num_input_random_bits,
|
||||
excluded_upper_bound,
|
||||
num_blocks_output,
|
||||
);
|
||||
|
||||
Self::new(ct, key.tag.clone(), ReRandomizationMetadata::default())
|
||||
}
|
||||
#[cfg(feature = "gpu")]
|
||||
InternalServerKey::Cuda(_cuda_key) => {
|
||||
panic!("Gpu does not support this operation yet.")
|
||||
}
|
||||
#[cfg(feature = "hpu")]
|
||||
InternalServerKey::Hpu(_device) => {
|
||||
panic!("Hpu does not support this operation yet.")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
/// Returns the amount of memory required to execute generate_oblivious_pseudo_random_bounded
|
||||
///
|
||||
@@ -273,7 +372,7 @@ impl<Id: FheIntId> FheInt<Id> {
|
||||
}
|
||||
})
|
||||
}
|
||||
/// Generates an encrypted `num_block` blocks signed integer
|
||||
/// Generates an encrypted signed integer
|
||||
/// taken uniformly in `[0, 2^random_bits_count[` using the given seed.
|
||||
/// The encrypted value is oblivious to the server.
|
||||
/// It can be useful to make server random generation deterministic.
|
||||
@@ -367,10 +466,350 @@ impl<Id: FheIntId> FheInt<Id> {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RangeForRandom {
|
||||
excluded_upper_bound: NonZeroU64,
|
||||
}
|
||||
|
||||
impl RangeForRandom {
|
||||
pub fn new_from_excluded_upper_bound(excluded_upper_bound: NonZeroU64) -> Self {
|
||||
Self {
|
||||
excluded_upper_bound,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn num_input_random_bits_for_max_distance(
|
||||
excluded_upper_bound: NonZeroU64,
|
||||
max_distance: f64,
|
||||
message_modulus: MessageModulus,
|
||||
) -> u64 {
|
||||
assert!(message_modulus.0.is_power_of_two());
|
||||
let log_message_modulus = message_modulus.0.ilog2() as u64;
|
||||
|
||||
let mut random_block_count = 1;
|
||||
|
||||
let random_block_count = loop {
|
||||
let random_bit_count = random_block_count * log_message_modulus;
|
||||
|
||||
let distance = distance(excluded_upper_bound.get(), random_bit_count);
|
||||
|
||||
if distance < max_distance {
|
||||
break random_block_count;
|
||||
}
|
||||
|
||||
random_block_count += 1;
|
||||
};
|
||||
|
||||
random_block_count * log_message_modulus
|
||||
}
|
||||
|
||||
fn distance(excluded_upper_bound: u64, random_bit_count: u64) -> f64 {
|
||||
let remainder = mod_pow_2(random_bit_count, excluded_upper_bound);
|
||||
|
||||
remainder as f64 * (excluded_upper_bound - remainder) as f64
|
||||
/ (2_f64.powi(random_bit_count as i32) * excluded_upper_bound as f64)
|
||||
}
|
||||
|
||||
// Computes 2^exponent % modulus
|
||||
fn mod_pow_2(exponent: u64, modulus: u64) -> u64 {
|
||||
assert_ne!(modulus, 0);
|
||||
|
||||
if modulus == 1 {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let mut result: u128 = 1;
|
||||
let mut base: u128 = 2; // We are calculating 2^i
|
||||
|
||||
// We cast exponent to u128 to match the loop, though u64 is fine
|
||||
let mut exp = exponent;
|
||||
let mod_val = modulus as u128;
|
||||
|
||||
while exp > 0 {
|
||||
// If exponent is odd, multiply result with base
|
||||
if exp % 2 == 1 {
|
||||
result = (result * base) % mod_val;
|
||||
}
|
||||
|
||||
// Square the base
|
||||
base = (base * base) % mod_val;
|
||||
|
||||
// Divide exponent by 2
|
||||
exp /= 2;
|
||||
}
|
||||
|
||||
result as u64
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
use super::*;
|
||||
use crate::integer::server_key::radix_parallel::tests_unsigned::test_oprf::{
|
||||
oprf_density_function, p_value_upper_bound_oprf_almost_uniformity_from_values,
|
||||
probability_density_function_from_density,
|
||||
};
|
||||
use crate::prelude::FheDecrypt;
|
||||
use crate::shortint::oprf::test::test_uniformity;
|
||||
use crate::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS32_PBS_TUNIFORM_2M128;
|
||||
use crate::{generate_keys, set_server_key, ClientKey, ConfigBuilder, FheUint8, Seed};
|
||||
use num_bigint::BigUint;
|
||||
use rand::{thread_rng, Rng};
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
|
||||
// Helper: The "Oracle" implementation using BigInt
|
||||
// This is slow but mathematically guaranteed to be correct.
|
||||
fn oracle_mod_pow_2(exponent: u64, modulus: u64) -> u64 {
|
||||
assert_ne!(modulus, 0);
|
||||
|
||||
if modulus == 1 {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let base = BigUint::from(2u32);
|
||||
let exp = BigUint::from(exponent);
|
||||
let modu = BigUint::from(modulus);
|
||||
|
||||
let res = base.modpow(&exp, &modu);
|
||||
res.iter_u64_digits().next().unwrap_or(0)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_edge_cases() {
|
||||
// 2^0 % 10 = 1
|
||||
assert_eq!(mod_pow_2(0, 10), 1, "Failed exponent 0");
|
||||
|
||||
// 2^10 % 1 = 0
|
||||
assert_eq!(mod_pow_2(10, 1), 0, "Failed modulus 1");
|
||||
|
||||
// 2^1 % 10 = 2
|
||||
assert_eq!(mod_pow_2(1, 10), 2, "Failed exponent 1");
|
||||
|
||||
// 2^3 % 5 = 8 % 5 = 3
|
||||
assert_eq!(mod_pow_2(3, 5), 3, "Failed small calc");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boundaries_and_overflow() {
|
||||
assert_eq!(mod_pow_2(2, u64::MAX), 4);
|
||||
|
||||
assert_eq!(mod_pow_2(u64::MAX, 3), 2);
|
||||
|
||||
assert_eq!(mod_pow_2(5, 32), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_against_oracle() {
|
||||
let mut rng = thread_rng();
|
||||
for _ in 0..1_000_000 {
|
||||
let exp: u64 = rng.gen();
|
||||
let mod_val: u64 = rng.gen();
|
||||
|
||||
let mod_val = if mod_val == 0 { 1 } else { mod_val };
|
||||
|
||||
let expected = oracle_mod_pow_2(exp, mod_val);
|
||||
let actual = mod_pow_2(exp, mod_val);
|
||||
|
||||
assert_eq!(
|
||||
actual, expected,
|
||||
"Mismatch! 2^{exp} % {mod_val} => Ours: {actual}, Oracle: {expected}",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distance_with_uniform() {
|
||||
for excluded_upper_bound in 1..20 {
|
||||
for num_input_random_bits in 0..20 {
|
||||
let density = oprf_density_function(excluded_upper_bound, num_input_random_bits);
|
||||
|
||||
let theoretical_pdf = probability_density_function_from_density(&density);
|
||||
|
||||
let p_uniform = 1. / excluded_upper_bound as f64;
|
||||
|
||||
let actual_distance: f64 = 1. / 2.
|
||||
* theoretical_pdf
|
||||
.iter()
|
||||
.map(|p| (*p - p_uniform).abs())
|
||||
.sum::<f64>();
|
||||
|
||||
let theoretical_distance = distance(excluded_upper_bound, num_input_random_bits);
|
||||
|
||||
assert!(
|
||||
(theoretical_distance - actual_distance).abs()
|
||||
<= theoretical_distance / 1_000_000.,
|
||||
"{theoretical_distance} != {actual_distance}"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_uniformity_scalar_mul_shift() {
|
||||
let max_distance = 2_f64.powi(-20);
|
||||
|
||||
let message_modulus = MessageModulus(4);
|
||||
|
||||
let excluded_upper_bound = 3;
|
||||
|
||||
let num_input_random_bits = num_input_random_bits_for_max_distance(
|
||||
NonZeroU64::new(excluded_upper_bound).unwrap(),
|
||||
max_distance,
|
||||
message_modulus,
|
||||
);
|
||||
|
||||
let sample_count: usize = 10_000_000;
|
||||
|
||||
let p_value_limit: f64 = 0.001;
|
||||
|
||||
// The distribution is not exactly uniform
|
||||
// This check ensures than with the given low max_distance,
|
||||
// the distribution is indistinguishable from the uniform with at the given sample count
|
||||
test_uniformity(sample_count, p_value_limit, excluded_upper_bound, |_seed| {
|
||||
oprf_clear_equivalent(excluded_upper_bound, num_input_random_bits)
|
||||
});
|
||||
}
|
||||
|
||||
fn oprf_clear_equivalent(excluded_upper_bound: u64, num_input_random_bits: u64) -> u64 {
|
||||
let random_input_upper_bound = 1 << num_input_random_bits;
|
||||
|
||||
let random_input = thread_rng().gen_range(0..random_input_upper_bound);
|
||||
|
||||
(random_input * excluded_upper_bound) >> num_input_random_bits
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_uniformity_generate_oblivious_pseudo_random_custom_range() {
|
||||
let base_sample_count: usize = 10_000;
|
||||
|
||||
let p_value_limit: f64 = 0.001;
|
||||
|
||||
let params = PARAM_MESSAGE_2_CARRY_2_KS32_PBS_TUNIFORM_2M128;
|
||||
let config = ConfigBuilder::with_custom_parameters(params).build();
|
||||
|
||||
let (cks, sks) = generate_keys(config);
|
||||
rayon::broadcast(|_| set_server_key(sks.clone()));
|
||||
|
||||
let message_modulus = params.message_modulus;
|
||||
|
||||
// [0.7, 0.1] for `max_distance` chosen to have `num_input_random_bits` be [2, 4]
|
||||
// for any of the listed `excluded_upper_bound`
|
||||
for (expected_num_input_random_bits, max_distance, excluded_upper_bounds) in
|
||||
[(2, 0.7, [3, 5, 6, 7]), (4, 0.1, [3, 5, 6, 7])]
|
||||
{
|
||||
for excluded_upper_bound in excluded_upper_bounds {
|
||||
let sample_count = base_sample_count * excluded_upper_bound as usize;
|
||||
|
||||
let excluded_upper_bound = NonZeroU64::new(excluded_upper_bound).unwrap();
|
||||
|
||||
let num_input_random_bits = num_input_random_bits_for_max_distance(
|
||||
excluded_upper_bound,
|
||||
max_distance,
|
||||
message_modulus,
|
||||
);
|
||||
|
||||
assert_eq!(num_input_random_bits, expected_num_input_random_bits);
|
||||
|
||||
test_uniformity_generate_oblivious_pseudo_random_custom_range2(
|
||||
sample_count,
|
||||
p_value_limit,
|
||||
message_modulus,
|
||||
&cks,
|
||||
excluded_upper_bound,
|
||||
max_distance,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn test_uniformity_generate_oblivious_pseudo_random_custom_range2(
|
||||
sample_count: usize,
|
||||
p_value_limit: f64,
|
||||
message_modulus: MessageModulus,
|
||||
cks: &ClientKey,
|
||||
excluded_upper_bound: NonZeroU64,
|
||||
max_distance: f64,
|
||||
) {
|
||||
let num_input_random_bits = num_input_random_bits_for_max_distance(
|
||||
excluded_upper_bound,
|
||||
max_distance,
|
||||
message_modulus,
|
||||
);
|
||||
|
||||
let range = RangeForRandom::new_from_excluded_upper_bound(excluded_upper_bound);
|
||||
|
||||
let real_values: Vec<u64> = (0..sample_count)
|
||||
.into_par_iter()
|
||||
.map(|_| {
|
||||
let img = FheUint8::generate_oblivious_pseudo_random_custom_range(
|
||||
Seed(rand::thread_rng().gen::<u128>()),
|
||||
&range,
|
||||
Some(max_distance),
|
||||
);
|
||||
|
||||
img.decrypt(cks)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let excluded_upper_bound = excluded_upper_bound.get();
|
||||
|
||||
let uniform_values: Vec<u64> = (0..sample_count)
|
||||
.into_par_iter()
|
||||
.map(|_| thread_rng().gen_range(0..excluded_upper_bound))
|
||||
.collect();
|
||||
|
||||
let clear_oprf_value_lower_num_input_random_bits = (0..sample_count)
|
||||
.into_par_iter()
|
||||
.map(|_| oprf_clear_equivalent(excluded_upper_bound, num_input_random_bits - 1))
|
||||
.collect();
|
||||
|
||||
let clear_oprf_value_same_num_input_random_bits = (0..sample_count)
|
||||
.into_par_iter()
|
||||
.map(|_| oprf_clear_equivalent(excluded_upper_bound, num_input_random_bits))
|
||||
.collect();
|
||||
|
||||
let clear_oprf_value_higher_num_input_random_bits = (0..sample_count)
|
||||
.into_par_iter()
|
||||
.map(|_| oprf_clear_equivalent(excluded_upper_bound, num_input_random_bits + 1))
|
||||
.collect();
|
||||
|
||||
for (values, should_have_low_p_value) in [
|
||||
(&real_values, false),
|
||||
// to test that the same distribution passes
|
||||
(&clear_oprf_value_same_num_input_random_bits, false),
|
||||
// to test that other distribution don't pass
|
||||
// (makes sure the test is statistically powerful)
|
||||
(&uniform_values, true),
|
||||
(&clear_oprf_value_lower_num_input_random_bits, true),
|
||||
(&clear_oprf_value_higher_num_input_random_bits, true),
|
||||
] {
|
||||
let p_value_upper_bound = p_value_upper_bound_oprf_almost_uniformity_from_values(
|
||||
values,
|
||||
num_input_random_bits,
|
||||
excluded_upper_bound,
|
||||
);
|
||||
|
||||
println!("p_value_upper_bound: {p_value_upper_bound}");
|
||||
|
||||
if should_have_low_p_value {
|
||||
assert!(
|
||||
p_value_upper_bound < p_value_limit,
|
||||
"p_value_upper_bound (={p_value_upper_bound}) expected to be smaller than {p_value_limit}"
|
||||
);
|
||||
} else {
|
||||
assert!(
|
||||
p_value_limit < p_value_upper_bound ,
|
||||
"p_value_upper_bound (={p_value_upper_bound}) expected to be bigger than {p_value_limit}"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg(feature = "gpu")]
|
||||
#[allow(unused_imports)]
|
||||
mod test {
|
||||
mod test_gpu {
|
||||
use crate::prelude::*;
|
||||
use crate::{
|
||||
generate_keys, set_server_key, ConfigBuilder, FheInt128, FheUint32, FheUint64, GpuIndex,
|
||||
|
||||
@@ -48,6 +48,7 @@ macro_rules! export_concrete_array_types {
|
||||
}
|
||||
|
||||
pub use crate::core_crypto::commons::math::random::{Seed, XofSeed};
|
||||
pub use crate::high_level_api::integers::oprf::RangeForRandom;
|
||||
pub use crate::integer::server_key::MatchValues;
|
||||
use crate::{error, Error, Versionize};
|
||||
use backward_compatibility::compressed_ciphertext_list::SquashedNoiseCiphertextStateVersions;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use crate::core_crypto::gpu::entities::lwe_packing_keyswitch_key::CudaLwePackingKeyswitchKey;
|
||||
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
|
||||
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
|
||||
use crate::core_crypto::gpu::vec::CudaVec;
|
||||
use crate::core_crypto::gpu::CudaStreams;
|
||||
@@ -16,7 +17,8 @@ use crate::integer::gpu::ciphertext::CudaRadixCiphertext;
|
||||
use crate::integer::gpu::server_key::CudaBootstrappingKey;
|
||||
use crate::integer::gpu::{
|
||||
cuda_backend_compress, cuda_backend_decompress, cuda_backend_get_compression_size_on_gpu,
|
||||
cuda_backend_get_decompression_size_on_gpu, cuda_memcpy_async_gpu_to_gpu, PBSType,
|
||||
cuda_backend_get_decompression_size_on_gpu, cuda_memcpy_async_gpu_to_gpu, extract_glwe_async,
|
||||
PBSType,
|
||||
};
|
||||
use crate::prelude::CastInto;
|
||||
use crate::shortint::ciphertext::{
|
||||
@@ -197,6 +199,30 @@ impl<T: UnsignedInteger> CudaPackedGlweCiphertextList<T> {
|
||||
meta: self.meta,
|
||||
}
|
||||
}
|
||||
pub fn extract_glwe(
|
||||
&self,
|
||||
glwe_index: usize,
|
||||
streams: &CudaStreams,
|
||||
) -> CudaGlweCiphertextList<T> {
|
||||
let meta = self
|
||||
.meta
|
||||
.as_ref()
|
||||
.expect("CudaPackedGlweCiphertextList meta must be set to extract GLWE");
|
||||
|
||||
let mut output_cuda_glwe_list = CudaGlweCiphertextList::new(
|
||||
meta.glwe_dimension,
|
||||
meta.polynomial_size,
|
||||
GlweCiphertextCount(1),
|
||||
meta.ciphertext_modulus,
|
||||
streams,
|
||||
);
|
||||
|
||||
unsafe {
|
||||
extract_glwe_async(streams, &mut output_cuda_glwe_list, self, glwe_index as u32);
|
||||
}
|
||||
streams.synchronize();
|
||||
output_cuda_glwe_list
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: UnsignedInteger> Clone for CudaPackedGlweCiphertextList<T> {
|
||||
|
||||
@@ -7,6 +7,7 @@ pub mod server_key;
|
||||
#[cfg(feature = "zk-pok")]
|
||||
pub mod zk;
|
||||
|
||||
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
|
||||
use crate::core_crypto::gpu::lwe_bootstrap_key::CudaModulusSwitchNoiseReductionConfiguration;
|
||||
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
|
||||
use crate::core_crypto::gpu::lwe_compact_ciphertext_list::CudaLweCompactCiphertextList;
|
||||
@@ -10423,3 +10424,44 @@ pub unsafe fn unchecked_small_scalar_mul_integer_async(
|
||||
carry_modulus.0 as u32,
|
||||
);
|
||||
}
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
/// # Safety
|
||||
///
|
||||
/// - [CudaStreams::synchronize] __must__ be called after this function as soon as synchronization
|
||||
/// is required
|
||||
pub unsafe fn extract_glwe_async<T: UnsignedInteger>(
|
||||
streams: &CudaStreams,
|
||||
glwe_array_out: &mut CudaGlweCiphertextList<T>,
|
||||
glwe_list: &CudaPackedGlweCiphertextList<T>,
|
||||
glwe_index: u32,
|
||||
) {
|
||||
assert_eq!(
|
||||
streams.gpu_indexes[0],
|
||||
glwe_array_out.0.d_vec.gpu_index(0),
|
||||
"GPU error: all data should reside on the same GPU."
|
||||
);
|
||||
assert_eq!(
|
||||
streams.gpu_indexes[0],
|
||||
glwe_list.data.gpu_index(0),
|
||||
"GPU error: all data should reside on the same GPU."
|
||||
);
|
||||
let packed_glwe_list_ffi = prepare_cuda_packed_glwe_ct_ffi(glwe_list);
|
||||
|
||||
if T::BITS == 128 {
|
||||
cuda_integer_extract_glwe_128(
|
||||
streams.ffi(),
|
||||
glwe_array_out.0.d_vec.as_mut_c_ptr(0),
|
||||
&raw const packed_glwe_list_ffi,
|
||||
glwe_index,
|
||||
);
|
||||
} else if T::BITS == 64 {
|
||||
cuda_integer_extract_glwe_64(
|
||||
streams.ffi(),
|
||||
glwe_array_out.0.d_vec.as_mut_c_ptr(0),
|
||||
&raw const packed_glwe_list_ffi,
|
||||
glwe_index,
|
||||
);
|
||||
} else {
|
||||
panic!("Unsupported integer size for CUDA GLWE extraction");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,757 @@
|
||||
use super::utils::noise_simulation::{CudaDynLwe, CudaSideResources};
|
||||
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
|
||||
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
|
||||
use crate::core_crypto::gpu::CudaStreams;
|
||||
use crate::core_crypto::prelude::{GlweCiphertext, LweCiphertext};
|
||||
use crate::integer::compression_keys::CompressionPrivateKeys;
|
||||
use crate::integer::gpu::list_compression::server_keys::CudaCompressionKey;
|
||||
use crate::integer::gpu::server_key::radix::tests_noise_distribution::utils::noise_simulation::cuda_glwe_list_to_glwe_ciphertext;
|
||||
use crate::integer::gpu::server_key::radix::tests_unsigned::create_gpu_parameterized_test;
|
||||
use crate::integer::gpu::server_key::radix::CudaUnsignedRadixCiphertext;
|
||||
use crate::integer::gpu::CudaServerKey;
|
||||
use crate::integer::{ClientKey, CompressedServerKey, IntegerCiphertext};
|
||||
use crate::shortint::ciphertext::{Ciphertext, Degree, NoiseLevel};
|
||||
use crate::shortint::client_key::atomic_pattern::AtomicPatternClientKey;
|
||||
use crate::shortint::engine::ShortintEngine;
|
||||
use crate::shortint::parameters::test_params::TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128;
|
||||
use crate::shortint::parameters::{CompressionParameters, MetaParameters, Variance};
|
||||
use crate::shortint::server_key::tests::noise_distribution::br_dp_packingks_ms::br_dp_packing_ks_ms;
|
||||
use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::{
|
||||
NoiseSimulationGlwe, NoiseSimulationLwe, NoiseSimulationLweFourierBsk,
|
||||
NoiseSimulationLwePackingKeyswitchKey, NoiseSimulationModulus,
|
||||
};
|
||||
use crate::shortint::server_key::tests::noise_distribution::utils::{
|
||||
expected_pfail_for_precision, mean_and_variance_check, normality_check, pfail_check,
|
||||
precision_with_padding, update_ap_params_msg_and_carry_moduli, DecryptionAndNoiseResult,
|
||||
NoiseSample, PfailAndPrecision, PfailTestMeta, PfailTestResult,
|
||||
};
|
||||
use crate::shortint::server_key::tests::noise_distribution::{
|
||||
should_run_short_pfail_tests_debug, should_use_single_key_debug,
|
||||
};
|
||||
use crate::shortint::{
|
||||
AtomicPatternParameters, CarryModulus, MessageModulus, ShortintEncoding, ShortintParameterSet,
|
||||
};
|
||||
use crate::GpuIndex;
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
|
||||
pub const SAMPLES_PER_MSG_PACKING_KS_NOISE: usize = 1000;
|
||||
|
||||
fn sanity_check_encrypt_br_dp_packing_ks_ms(meta_params: MetaParameters) {
|
||||
let (params, comp_params) = (
|
||||
meta_params.compute_parameters,
|
||||
meta_params.compression_parameters.unwrap(),
|
||||
);
|
||||
let gpu_index = 0;
|
||||
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
|
||||
let block_params: ShortintParameterSet = params.into();
|
||||
let cks = crate::integer::ClientKey::new(block_params);
|
||||
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
|
||||
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
|
||||
|
||||
let private_compression_key = cks.new_compression_private_key(comp_params);
|
||||
let (compressed_compression_key, _compressed_decompression_key) =
|
||||
cks.new_compressed_compression_decompression_keys(&private_compression_key);
|
||||
let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&streams);
|
||||
let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
|
||||
// The multiplication done in the compression is made to move the message up at the top of the
|
||||
// carry space, multiplying by the carry modulus achieves that
|
||||
let dp_scalar = params.carry_modulus().0;
|
||||
let br_input_modulus_log = cuda_sks.br_input_modulus_log();
|
||||
let storage_modulus_log = cuda_compression_key.storage_log_modulus;
|
||||
|
||||
let id_lut = cuda_sks.generate_lookup_table(|x| x);
|
||||
let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, &streams);
|
||||
|
||||
let input_zeros: Vec<_> = (0..lwe_per_glwe.0)
|
||||
.map(|_| {
|
||||
cks.key
|
||||
.encrypt_noiseless_pbs_input_dyn_lwe(br_input_modulus_log, 0)
|
||||
})
|
||||
.collect();
|
||||
let d_input_zeros: Vec<_> = input_zeros
|
||||
.iter()
|
||||
.map(|ct| {
|
||||
let d_ct_input = CudaLweCiphertextList::from_lwe_ciphertext(&ct.as_lwe_64(), &streams);
|
||||
CudaDynLwe::U64(d_ct_input)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
|
||||
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
|
||||
message_modulus: params.message_modulus(),
|
||||
carry_modulus: params.carry_modulus(),
|
||||
atomic_pattern: params.atomic_pattern(),
|
||||
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
|
||||
};
|
||||
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
|
||||
.map(|_| CudaSideResources::new(&streams, cuda_block_info))
|
||||
.collect();
|
||||
|
||||
let (d_before_packing, _after_packing, d_after_ms) = br_dp_packing_ks_ms(
|
||||
d_input_zeros,
|
||||
&cuda_sks,
|
||||
&d_accumulator,
|
||||
dp_scalar,
|
||||
&cuda_compression_key.packing_key_switching_key,
|
||||
storage_modulus_log,
|
||||
&mut cuda_side_resources,
|
||||
);
|
||||
|
||||
let compression_inputs: Vec<_> = d_before_packing
|
||||
.into_iter()
|
||||
.map(|(_input, pbs_result, _dp_result)| {
|
||||
let pbs_result_list_cpu = pbs_result.as_lwe_64().to_lwe_ciphertext_list(&streams);
|
||||
let pbs_result_cpu = LweCiphertext::from_container(
|
||||
pbs_result_list_cpu.clone().into_container(),
|
||||
pbs_result_list_cpu.ciphertext_modulus(),
|
||||
);
|
||||
let cpu_ct = Ciphertext::new(
|
||||
pbs_result_cpu,
|
||||
Degree::new(params.message_modulus().0 - 1),
|
||||
NoiseLevel::NOMINAL,
|
||||
params.message_modulus(),
|
||||
params.carry_modulus(),
|
||||
params.atomic_pattern(),
|
||||
);
|
||||
let radix_ct = crate::integer::RadixCiphertext::from_blocks(vec![cpu_ct]);
|
||||
let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&radix_ct, &streams);
|
||||
d_ct.ciphertext
|
||||
})
|
||||
.collect();
|
||||
|
||||
let gpu_compressed =
|
||||
cuda_compression_key.compress_ciphertexts_into_list(&compression_inputs, &streams);
|
||||
|
||||
let gpu_extracted = gpu_compressed.extract_glwe(0, &streams);
|
||||
let extracted_list = gpu_extracted.to_glwe_ciphertext_list(&streams);
|
||||
let extracted_glwe = GlweCiphertext::from_container(
|
||||
extracted_list.clone().into_container(),
|
||||
extracted_list.polynomial_size(),
|
||||
extracted_list.ciphertext_modulus(),
|
||||
);
|
||||
let after_ms_list = d_after_ms.to_glwe_ciphertext_list(&streams);
|
||||
let mut after_ms = GlweCiphertext::from_container(
|
||||
after_ms_list.clone().into_container(),
|
||||
after_ms_list.polynomial_size(),
|
||||
after_ms_list.ciphertext_modulus(),
|
||||
);
|
||||
// Bodies that were not filled are discarded
|
||||
after_ms.get_mut_body().as_mut()[lwe_per_glwe.0..].fill(0);
|
||||
|
||||
assert_eq!(after_ms.as_view(), extracted_glwe.as_view());
|
||||
}
|
||||
|
||||
create_gpu_parameterized_test!(sanity_check_encrypt_br_dp_packing_ks_ms {
|
||||
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
|
||||
});
|
||||
|
||||
#[allow(clippy::type_complexity, clippy::too_many_arguments)]
|
||||
fn encrypt_br_dp_packing_ks_ms_inner_helper_gpu(
|
||||
params: AtomicPatternParameters,
|
||||
comp_params: CompressionParameters,
|
||||
single_cks: &ClientKey,
|
||||
single_cuda_sks: &CudaServerKey,
|
||||
single_compression_private_key: &CompressionPrivateKeys,
|
||||
single_cuda_compression_key: &CudaCompressionKey,
|
||||
msg: u64,
|
||||
streams: &CudaStreams,
|
||||
) -> (
|
||||
Vec<(
|
||||
DecryptionAndNoiseResult,
|
||||
DecryptionAndNoiseResult,
|
||||
DecryptionAndNoiseResult,
|
||||
)>,
|
||||
Vec<DecryptionAndNoiseResult>,
|
||||
Vec<DecryptionAndNoiseResult>,
|
||||
) {
|
||||
let mut engine = ShortintEngine::new();
|
||||
let thread_cks: crate::integer::ClientKey;
|
||||
let thread_cuda_sks: CudaServerKey;
|
||||
let thread_compression_private_key;
|
||||
let thread_cuda_compression_key;
|
||||
let (cks, cuda_sks, compression_private_key, cuda_compression_key) =
|
||||
if should_use_single_key_debug() {
|
||||
(
|
||||
single_cks,
|
||||
single_cuda_sks,
|
||||
single_compression_private_key,
|
||||
single_cuda_compression_key,
|
||||
)
|
||||
} else {
|
||||
let block_params: ShortintParameterSet = params.into();
|
||||
thread_cks = crate::integer::ClientKey::new(block_params);
|
||||
let compressed_server_key =
|
||||
CompressedServerKey::new_radix_compressed_server_key(&thread_cks);
|
||||
thread_cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, streams);
|
||||
|
||||
thread_compression_private_key = thread_cks.new_compression_private_key(comp_params);
|
||||
let (compressed_compression_key, _compressed_decompression_key) = thread_cks
|
||||
.new_compressed_compression_decompression_keys(&thread_compression_private_key);
|
||||
thread_cuda_compression_key = compressed_compression_key.decompress_to_cuda(streams);
|
||||
|
||||
(
|
||||
&thread_cks,
|
||||
&thread_cuda_sks,
|
||||
&thread_compression_private_key,
|
||||
&thread_cuda_compression_key,
|
||||
)
|
||||
};
|
||||
let br_input_modulus_log = cuda_sks.br_input_modulus_log();
|
||||
let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
|
||||
|
||||
let input_zeros: Vec<_> = (0..lwe_per_glwe.0)
|
||||
.map(|_| {
|
||||
cks.key.encrypt_noiseless_pbs_input_dyn_lwe_with_engine(
|
||||
br_input_modulus_log,
|
||||
msg,
|
||||
&mut engine,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let d_input_zeros: Vec<_> = input_zeros
|
||||
.iter()
|
||||
.map(|ct| {
|
||||
let d_ct_input = CudaLweCiphertextList::from_lwe_ciphertext(&ct.as_lwe_64(), streams);
|
||||
CudaDynLwe::U64(d_ct_input)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let id_lut = cuda_sks.generate_lookup_table(|x| x);
|
||||
let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, streams);
|
||||
|
||||
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
|
||||
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
|
||||
message_modulus: params.message_modulus(),
|
||||
carry_modulus: params.carry_modulus(),
|
||||
atomic_pattern: params.atomic_pattern(),
|
||||
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
|
||||
};
|
||||
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
|
||||
.map(|_| CudaSideResources::new(streams, cuda_block_info))
|
||||
.collect();
|
||||
|
||||
let dp_scalar = params.carry_modulus().0;
|
||||
let storage_modulus_log = cuda_compression_key.storage_log_modulus;
|
||||
|
||||
let (d_before_packing, d_after_packing, d_after_ms) = br_dp_packing_ks_ms(
|
||||
d_input_zeros,
|
||||
cuda_sks,
|
||||
&d_accumulator,
|
||||
dp_scalar,
|
||||
&cuda_compression_key.packing_key_switching_key,
|
||||
storage_modulus_log,
|
||||
&mut cuda_side_resources,
|
||||
);
|
||||
|
||||
let compute_large_lwe_secret_key = cks.key.encryption_key();
|
||||
let compression_glwe_secret_key = &compression_private_key.key.post_packing_ks_key;
|
||||
|
||||
let compute_encoding = cuda_sks.encoding();
|
||||
let compression_encoding = ShortintEncoding {
|
||||
carry_modulus: CarryModulus(1),
|
||||
..compute_encoding
|
||||
};
|
||||
let after_packing = cuda_glwe_list_to_glwe_ciphertext(&d_after_packing, streams);
|
||||
let after_ms = cuda_glwe_list_to_glwe_ciphertext(&d_after_ms, streams);
|
||||
(
|
||||
d_before_packing
|
||||
.into_iter()
|
||||
.map(|(d_input, d_pbs_result, d_dp_result)| {
|
||||
let input = d_input.as_ct_64_cpu(streams);
|
||||
let pbs_result = d_pbs_result.as_ct_64_cpu(streams);
|
||||
let dp_result = d_dp_result.as_ct_64_cpu(streams);
|
||||
(
|
||||
match &cks.key.atomic_pattern {
|
||||
AtomicPatternClientKey::Standard(standard_atomic_pattern_client_key) => {
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&input,
|
||||
&standard_atomic_pattern_client_key.lwe_secret_key,
|
||||
msg,
|
||||
&compute_encoding,
|
||||
)
|
||||
}
|
||||
AtomicPatternClientKey::KeySwitch32(_ks32_atomic_pattern_client_key) => {
|
||||
panic!("KS32 Atomic Pattern not supported on GPU tests yet");
|
||||
}
|
||||
},
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&pbs_result,
|
||||
&compute_large_lwe_secret_key,
|
||||
msg,
|
||||
&compute_encoding,
|
||||
),
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&dp_result,
|
||||
&compute_large_lwe_secret_key,
|
||||
msg,
|
||||
&compression_encoding,
|
||||
),
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
DecryptionAndNoiseResult::new_from_glwe(
|
||||
&after_packing,
|
||||
compression_glwe_secret_key,
|
||||
compression_private_key.key.params.lwe_per_glwe(),
|
||||
msg,
|
||||
&compression_encoding,
|
||||
),
|
||||
DecryptionAndNoiseResult::new_from_glwe(
|
||||
&after_ms,
|
||||
compression_glwe_secret_key,
|
||||
compression_private_key.key.params.lwe_per_glwe(),
|
||||
msg,
|
||||
&compression_encoding,
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
#[allow(clippy::type_complexity, clippy::too_many_arguments)]
|
||||
fn encrypt_br_dp_packing_ks_ms_noise_helper_gpu(
|
||||
params: AtomicPatternParameters,
|
||||
comp_params: CompressionParameters,
|
||||
single_cks: &ClientKey,
|
||||
single_cuda_sks: &CudaServerKey,
|
||||
single_compression_private_key: &CompressionPrivateKeys,
|
||||
single_cuda_compression_key: &CudaCompressionKey,
|
||||
msg: u64,
|
||||
streams: &CudaStreams,
|
||||
) -> (
|
||||
Vec<(NoiseSample, NoiseSample, NoiseSample)>,
|
||||
Vec<NoiseSample>,
|
||||
Vec<NoiseSample>,
|
||||
) {
|
||||
let (before_packing, after_packing, after_ms) = encrypt_br_dp_packing_ks_ms_inner_helper_gpu(
|
||||
params,
|
||||
comp_params,
|
||||
single_cks,
|
||||
single_cuda_sks,
|
||||
single_compression_private_key,
|
||||
single_cuda_compression_key,
|
||||
msg,
|
||||
streams,
|
||||
);
|
||||
|
||||
(
|
||||
before_packing
|
||||
.into_iter()
|
||||
.map(|(input, after_pbs, after_dp)| {
|
||||
(
|
||||
input
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
after_pbs
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
after_dp
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
after_packing
|
||||
.into_iter()
|
||||
.map(|x| {
|
||||
x.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed")
|
||||
})
|
||||
.collect(),
|
||||
after_ms
|
||||
.into_iter()
|
||||
.map(|x| {
|
||||
x.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed")
|
||||
})
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
#[allow(clippy::type_complexity, clippy::too_many_arguments)]
|
||||
fn encrypt_br_dp_packing_ks_ms_pfail_helper_gpu(
|
||||
params: AtomicPatternParameters,
|
||||
comp_params: CompressionParameters,
|
||||
single_cks: &ClientKey,
|
||||
single_cuda_sks: &CudaServerKey,
|
||||
single_compression_private_key: &CompressionPrivateKeys,
|
||||
single_cuda_compression_key: &CudaCompressionKey,
|
||||
msg: u64,
|
||||
streams: &CudaStreams,
|
||||
) -> Vec<DecryptionAndNoiseResult> {
|
||||
let (_before_packing, _after_packing, after_ms) = encrypt_br_dp_packing_ks_ms_inner_helper_gpu(
|
||||
params,
|
||||
comp_params,
|
||||
single_cks,
|
||||
single_cuda_sks,
|
||||
single_compression_private_key,
|
||||
single_cuda_compression_key,
|
||||
msg,
|
||||
streams,
|
||||
);
|
||||
|
||||
after_ms
|
||||
}
|
||||
|
||||
fn noise_check_encrypt_br_dp_packing_ks_ms_noise_gpu(meta_params: MetaParameters) {
|
||||
let (params, comp_params) = (
|
||||
meta_params.compute_parameters,
|
||||
meta_params.compression_parameters.unwrap(),
|
||||
);
|
||||
let gpu_index = 0;
|
||||
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
|
||||
let block_params: ShortintParameterSet = params.into();
|
||||
let cks = crate::integer::ClientKey::new(block_params);
|
||||
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
|
||||
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
|
||||
|
||||
let private_compression_key = cks.new_compression_private_key(comp_params);
|
||||
let (compressed_compression_key, _compressed_decompression_key) =
|
||||
cks.new_compressed_compression_decompression_keys(&private_compression_key);
|
||||
let compression_key = compressed_compression_key.decompress();
|
||||
let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&streams);
|
||||
|
||||
let noise_simulation_bsk =
|
||||
NoiseSimulationLweFourierBsk::new_from_atomic_pattern_parameters(params);
|
||||
let noise_simulation_packing_key =
|
||||
NoiseSimulationLwePackingKeyswitchKey::new_from_comp_parameters(params, comp_params);
|
||||
|
||||
assert!(noise_simulation_bsk.matches_actual_bsk_gpu(&cuda_sks.bootstrapping_key));
|
||||
assert!(noise_simulation_packing_key.matches_actual_shortint_comp_key(&compression_key.key));
|
||||
|
||||
// The multiplication done in the compression is made to move the message up at the top of the
|
||||
// carry space, multiplying by the carry modulus achieves that
|
||||
let dp_scalar = params.carry_modulus().0;
|
||||
|
||||
let noise_simulation_accumulator = NoiseSimulationGlwe::new(
|
||||
noise_simulation_bsk.output_glwe_size().to_glwe_dimension(),
|
||||
noise_simulation_bsk.output_polynomial_size(),
|
||||
Variance(0.0),
|
||||
noise_simulation_bsk.modulus(),
|
||||
);
|
||||
|
||||
let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
|
||||
let storage_modulus_log = cuda_compression_key.storage_log_modulus;
|
||||
let br_input_modulus_log = cuda_sks.br_input_modulus_log();
|
||||
|
||||
let (_before_packing_sim, _after_packing_sim, after_ms_sim) = {
|
||||
let noise_simulation = NoiseSimulationLwe::new(
|
||||
cks.parameters().lwe_dimension(),
|
||||
Variance(0.0),
|
||||
NoiseSimulationModulus::from_ciphertext_modulus(cks.parameters().ciphertext_modulus()),
|
||||
);
|
||||
br_dp_packing_ks_ms(
|
||||
vec![noise_simulation; lwe_per_glwe.0],
|
||||
&noise_simulation_bsk,
|
||||
&noise_simulation_accumulator,
|
||||
dp_scalar,
|
||||
&noise_simulation_packing_key,
|
||||
storage_modulus_log,
|
||||
&mut vec![(); lwe_per_glwe.0],
|
||||
)
|
||||
};
|
||||
|
||||
let input_zeros: Vec<_> = (0..lwe_per_glwe.0)
|
||||
.map(|_| {
|
||||
cks.key
|
||||
.encrypt_noiseless_pbs_input_dyn_lwe(br_input_modulus_log, 0)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let d_input_zeros: Vec<_> = input_zeros
|
||||
.iter()
|
||||
.map(|ct| {
|
||||
let d_ct_input = CudaLweCiphertextList::from_lwe_ciphertext(&ct.as_lwe_64(), &streams);
|
||||
CudaDynLwe::U64(d_ct_input)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let id_lut = cuda_sks.generate_lookup_table(|x| x);
|
||||
let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, &streams);
|
||||
|
||||
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
|
||||
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
|
||||
message_modulus: params.message_modulus(),
|
||||
carry_modulus: params.carry_modulus(),
|
||||
atomic_pattern: params.atomic_pattern(),
|
||||
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
|
||||
};
|
||||
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
|
||||
.map(|_| CudaSideResources::new(&streams, cuda_block_info))
|
||||
.collect();
|
||||
|
||||
// Check that the circuit is correct with respect to core implementation, i.e. does not crash on
|
||||
// dimension checks
|
||||
let (expected_glwe_size_out, expected_polynomial_size_out, expected_modulus_f64_out) = {
|
||||
let (_before_packing_sim, _after_packing, after_ms) = br_dp_packing_ks_ms(
|
||||
d_input_zeros,
|
||||
&cuda_sks,
|
||||
&d_accumulator,
|
||||
dp_scalar,
|
||||
&cuda_compression_key.packing_key_switching_key,
|
||||
storage_modulus_log,
|
||||
&mut cuda_side_resources,
|
||||
);
|
||||
|
||||
(
|
||||
after_ms.glwe_dimension().to_glwe_size(),
|
||||
after_ms.polynomial_size(),
|
||||
after_ms.ciphertext_modulus().raw_modulus_float(),
|
||||
)
|
||||
};
|
||||
|
||||
assert_eq!(after_ms_sim.glwe_size(), expected_glwe_size_out);
|
||||
assert_eq!(after_ms_sim.polynomial_size(), expected_polynomial_size_out);
|
||||
assert_eq!(after_ms_sim.modulus().as_f64(), expected_modulus_f64_out);
|
||||
|
||||
let cleartext_modulus = params.message_modulus().0 * params.carry_modulus().0;
|
||||
let mut noise_samples_before_ms = vec![];
|
||||
let mut noise_samples_after_ms = vec![];
|
||||
|
||||
let chunk_size = 8;
|
||||
let vec_local_streams = (0..chunk_size)
|
||||
.map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
|
||||
.collect::<Vec<_>>();
|
||||
for _ in 0..cleartext_modulus {
|
||||
let (current_noise_samples_before_ms, current_noise_samples_after_ms): (Vec<_>, Vec<_>) =
|
||||
(0..SAMPLES_PER_MSG_PACKING_KS_NOISE)
|
||||
.collect::<Vec<_>>()
|
||||
.chunks(chunk_size)
|
||||
.flat_map(|chunk| {
|
||||
chunk
|
||||
.into_par_iter()
|
||||
.map(|i| {
|
||||
let local_stream = &vec_local_streams[*i % chunk_size];
|
||||
let (_before_packing, after_packing, after_ms) =
|
||||
encrypt_br_dp_packing_ks_ms_noise_helper_gpu(
|
||||
params,
|
||||
comp_params,
|
||||
&cks,
|
||||
&cuda_sks,
|
||||
&private_compression_key,
|
||||
&cuda_compression_key,
|
||||
0,
|
||||
local_stream,
|
||||
);
|
||||
(after_packing, after_ms)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.unzip();
|
||||
|
||||
noise_samples_before_ms.extend(current_noise_samples_before_ms);
|
||||
noise_samples_after_ms.extend(current_noise_samples_after_ms);
|
||||
}
|
||||
|
||||
let noise_samples_before_ms_flattened: Vec<_> = noise_samples_before_ms
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.map(|x| x.value)
|
||||
.collect();
|
||||
|
||||
let noise_samples_after_ms_flattened: Vec<_> = noise_samples_after_ms
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.map(|x| x.value)
|
||||
.collect();
|
||||
|
||||
let before_ms_normality =
|
||||
normality_check(&noise_samples_before_ms_flattened, "before ms", 0.01);
|
||||
|
||||
let after_ms_is_ok = mean_and_variance_check(
|
||||
&noise_samples_after_ms_flattened,
|
||||
"after_ms",
|
||||
0.0,
|
||||
after_ms_sim.variance_per_occupied_slot(),
|
||||
comp_params.packing_ks_key_noise_distribution(),
|
||||
after_ms_sim
|
||||
.glwe_dimension()
|
||||
.to_equivalent_lwe_dimension(after_ms_sim.polynomial_size()),
|
||||
after_ms_sim.modulus().as_f64(),
|
||||
);
|
||||
|
||||
assert!(before_ms_normality.null_hypothesis_is_valid && after_ms_is_ok);
|
||||
}
|
||||
create_gpu_parameterized_test!(noise_check_encrypt_br_dp_packing_ks_ms_noise_gpu {
|
||||
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
|
||||
});
|
||||
|
||||
fn noise_check_encrypt_br_dp_packing_ks_ms_pfail_gpu(meta_params: MetaParameters) {
|
||||
let (pfail_test_meta, params, comp_params) = {
|
||||
let (mut params, comp_params) = (
|
||||
meta_params.compute_parameters,
|
||||
meta_params.compression_parameters.unwrap(),
|
||||
);
|
||||
|
||||
let original_message_modulus = params.message_modulus();
|
||||
let original_carry_modulus = params.carry_modulus();
|
||||
|
||||
// For now only allow 2_2 parameters, and see later for heuristics to use
|
||||
assert_eq!(original_message_modulus.0, 4);
|
||||
assert_eq!(original_carry_modulus.0, 4);
|
||||
|
||||
let noise_simulation_bsk =
|
||||
NoiseSimulationLweFourierBsk::new_from_atomic_pattern_parameters(params);
|
||||
let noise_simulation_packing_key =
|
||||
NoiseSimulationLwePackingKeyswitchKey::new_from_comp_parameters(params, comp_params);
|
||||
|
||||
// The multiplication done in the compression is made to move the message up at the top of
|
||||
// the carry space, multiplying by the carry modulus achieves that
|
||||
let dp_scalar = params.carry_modulus().0;
|
||||
|
||||
let noise_simulation_accumulator = NoiseSimulationGlwe::new(
|
||||
noise_simulation_bsk.output_glwe_size().to_glwe_dimension(),
|
||||
noise_simulation_bsk.output_polynomial_size(),
|
||||
Variance(0.0),
|
||||
noise_simulation_bsk.modulus(),
|
||||
);
|
||||
|
||||
let lwe_per_glwe = comp_params.lwe_per_glwe();
|
||||
let storage_modulus_log = comp_params.storage_log_modulus();
|
||||
|
||||
let (_before_packing_sim, _after_packing_sim, after_ms_sim) = {
|
||||
let noise_simulation = NoiseSimulationLwe::new(
|
||||
params.lwe_dimension(),
|
||||
Variance(0.0),
|
||||
NoiseSimulationModulus::from_ciphertext_modulus(params.ciphertext_modulus()),
|
||||
);
|
||||
br_dp_packing_ks_ms(
|
||||
vec![noise_simulation; lwe_per_glwe.0],
|
||||
&noise_simulation_bsk,
|
||||
&noise_simulation_accumulator,
|
||||
dp_scalar,
|
||||
&noise_simulation_packing_key,
|
||||
storage_modulus_log,
|
||||
&mut vec![(); lwe_per_glwe.0],
|
||||
)
|
||||
};
|
||||
|
||||
let expected_variance_after_storage = after_ms_sim.variance_per_occupied_slot();
|
||||
|
||||
let compression_carry_mod = CarryModulus(1);
|
||||
let compression_message_mod = original_message_modulus;
|
||||
let compression_precision_with_padding =
|
||||
precision_with_padding(compression_message_mod, compression_carry_mod);
|
||||
let expected_pfail_for_storage = expected_pfail_for_precision(
|
||||
compression_precision_with_padding,
|
||||
expected_variance_after_storage,
|
||||
);
|
||||
|
||||
let original_pfail_and_precision = PfailAndPrecision::new(
|
||||
expected_pfail_for_storage,
|
||||
compression_message_mod,
|
||||
compression_carry_mod,
|
||||
);
|
||||
|
||||
// Here we update the message modulus only:
|
||||
// - because the message modulus matches for the compression encoding and compute encoding
|
||||
// - so that the carry modulus stays the same and we apply the same dot product as normal
|
||||
// for 2_2
|
||||
// - so that the effective encoding after the storage is the one we used to evaluate the
|
||||
// pfail
|
||||
let updated_message_mod = MessageModulus(1 << 6);
|
||||
let updated_carry_mod = compression_carry_mod;
|
||||
|
||||
update_ap_params_msg_and_carry_moduli(&mut params, updated_message_mod, updated_carry_mod);
|
||||
|
||||
assert!(
|
||||
(params.message_modulus().0 * params.carry_modulus().0).ilog2()
|
||||
<= comp_params.storage_log_modulus().0 as u32,
|
||||
"Compression storage modulus cannot store enough bits for pfail estimation"
|
||||
);
|
||||
|
||||
let updated_precision_with_padding =
|
||||
precision_with_padding(updated_message_mod, updated_carry_mod);
|
||||
|
||||
let new_expected_pfail_for_storage = expected_pfail_for_precision(
|
||||
updated_precision_with_padding,
|
||||
expected_variance_after_storage,
|
||||
);
|
||||
|
||||
let new_expected_pfail_and_precision = PfailAndPrecision::new(
|
||||
new_expected_pfail_for_storage,
|
||||
updated_message_mod,
|
||||
updated_carry_mod,
|
||||
);
|
||||
|
||||
let pfail_test_meta = if should_run_short_pfail_tests_debug() {
|
||||
// To have the same amount of keys generated as the case where a single run is a single
|
||||
// sample
|
||||
let expected_fails = 200 * lwe_per_glwe.0 as u32;
|
||||
PfailTestMeta::new_with_desired_expected_fails(
|
||||
original_pfail_and_precision,
|
||||
new_expected_pfail_and_precision,
|
||||
expected_fails,
|
||||
)
|
||||
} else {
|
||||
// To guarantee 1_000_000 keysets are generated
|
||||
let total_runs = 1_000_000 * lwe_per_glwe.0 as u32;
|
||||
PfailTestMeta::new_with_total_runs(
|
||||
original_pfail_and_precision,
|
||||
new_expected_pfail_and_precision,
|
||||
total_runs,
|
||||
)
|
||||
};
|
||||
|
||||
(pfail_test_meta, params, comp_params)
|
||||
};
|
||||
let gpu_index = 0;
|
||||
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
|
||||
let block_params: ShortintParameterSet = params.into();
|
||||
let cks = crate::integer::ClientKey::new(block_params);
|
||||
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
|
||||
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
|
||||
|
||||
let private_compression_key = cks.new_compression_private_key(comp_params);
|
||||
let (compressed_compression_key, _compressed_decompression_key) =
|
||||
cks.new_compressed_compression_decompression_keys(&private_compression_key);
|
||||
|
||||
let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&streams);
|
||||
|
||||
let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
|
||||
|
||||
let total_runs_for_expected_fails = pfail_test_meta
|
||||
.total_runs_for_expected_fails()
|
||||
.div_ceil(lwe_per_glwe.0.try_into().unwrap());
|
||||
|
||||
let chunk_size = 8;
|
||||
let vec_local_streams = (0..chunk_size)
|
||||
.map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let measured_fails: f64 = (0..total_runs_for_expected_fails)
|
||||
.collect::<Vec<_>>()
|
||||
.chunks(chunk_size)
|
||||
.flat_map(|chunk| {
|
||||
chunk
|
||||
.into_par_iter()
|
||||
.map(|i| {
|
||||
let local_streams = &vec_local_streams[*i as usize % chunk_size];
|
||||
let after_ms_decryption_result = encrypt_br_dp_packing_ks_ms_pfail_helper_gpu(
|
||||
params,
|
||||
comp_params,
|
||||
&cks,
|
||||
&cuda_sks,
|
||||
&private_compression_key,
|
||||
&cuda_compression_key,
|
||||
0,
|
||||
local_streams,
|
||||
);
|
||||
after_ms_decryption_result
|
||||
.into_iter()
|
||||
.map(|result| result.failure_as_f64())
|
||||
.sum::<f64>()
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.sum();
|
||||
|
||||
let test_result = PfailTestResult { measured_fails };
|
||||
|
||||
pfail_check(&pfail_test_meta, test_result);
|
||||
}
|
||||
|
||||
create_gpu_parameterized_test!(noise_check_encrypt_br_dp_packing_ks_ms_pfail_gpu {
|
||||
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
|
||||
});
|
||||
@@ -0,0 +1,869 @@
|
||||
use super::utils::noise_simulation::{CudaDynLwe, CudaSideResources};
|
||||
use crate::core_crypto::commons::noise_formulas::noise_simulation::{
|
||||
NoiseSimulationLweFourier128Bsk, NoiseSimulationLwePackingKeyswitchKey,
|
||||
};
|
||||
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
|
||||
use crate::core_crypto::gpu::CudaStreams;
|
||||
use crate::core_crypto::prelude::{GlweCiphertext, LweCiphertextCount};
|
||||
use crate::integer::gpu::CudaServerKey;
|
||||
use crate::integer::noise_squashing::NoiseSquashingPrivateKey;
|
||||
use crate::integer::CompressedServerKey;
|
||||
|
||||
use crate::core_crypto::commons::parameters::CiphertextModulusLog;
|
||||
use crate::core_crypto::prelude::generate_programmable_bootstrap_glwe_lut;
|
||||
use crate::integer::ciphertext::NoiseSquashingCompressionPrivateKey;
|
||||
use crate::integer::gpu::list_compression::server_keys::CudaNoiseSquashingCompressionKey;
|
||||
use crate::integer::gpu::server_key::radix::tests_unsigned::create_gpu_parameterized_test;
|
||||
use crate::integer::gpu::server_key::radix::{CudaNoiseSquashingKey, CudaUnsignedRadixCiphertext};
|
||||
use crate::integer::gpu::unchecked_small_scalar_mul_integer_async;
|
||||
use crate::integer::IntegerCiphertext;
|
||||
use crate::shortint::client_key::atomic_pattern::AtomicPatternClientKey;
|
||||
use crate::shortint::parameters::noise_squashing::NoiseSquashingParameters;
|
||||
use crate::shortint::parameters::test_params::TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128;
|
||||
use crate::shortint::parameters::{
|
||||
AtomicPatternParameters, MetaParameters, NoiseSquashingCompressionParameters, Variance,
|
||||
};
|
||||
use crate::shortint::server_key::tests::noise_distribution::dp_ks_pbs128_packingks::{
|
||||
dp_ks_any_ms_standard_pbs128, dp_ks_any_ms_standard_pbs128_packing_ks,
|
||||
};
|
||||
use crate::shortint::server_key::tests::noise_distribution::should_use_single_key_debug;
|
||||
use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::{
|
||||
NoiseSimulationGlwe, NoiseSimulationLwe, NoiseSimulationLweFourierBsk,
|
||||
NoiseSimulationLweKeyswitchKey, NoiseSimulationModulusSwitchConfig,
|
||||
};
|
||||
use crate::shortint::server_key::tests::noise_distribution::utils::{
|
||||
mean_and_variance_check, DecryptionAndNoiseResult, NoiseSample,
|
||||
};
|
||||
use crate::shortint::{PaddingBit, ShortintEncoding, ShortintParameterSet};
|
||||
use crate::GpuIndex;
|
||||
use rayon::prelude::*;
|
||||
|
||||
/// Test function to verify that the noise checking tools match the actual atomic patterns
|
||||
/// implemented in shortint for GPU
|
||||
fn sanity_check_encrypt_dp_ks_standard_pbs128_packing_ks_gpu(meta_params: MetaParameters) {
|
||||
let (atomic_params, noise_squashing_params, noise_squashing_compression_params) = {
|
||||
let meta_noise_squashing_params = meta_params.noise_squashing_parameters.unwrap();
|
||||
(
|
||||
meta_params.compute_parameters,
|
||||
meta_noise_squashing_params.parameters,
|
||||
meta_noise_squashing_params.compression_parameters.unwrap(),
|
||||
)
|
||||
};
|
||||
let gpu_index = 0;
|
||||
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
|
||||
let block_params: ShortintParameterSet = atomic_params.into();
|
||||
let cks = crate::integer::ClientKey::new(block_params);
|
||||
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
|
||||
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
|
||||
|
||||
let noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
|
||||
let compressed_noise_squashing_compression_key =
|
||||
cks.new_compressed_noise_squashing_key(&noise_squashing_private_key);
|
||||
let noise_squashing_key = compressed_noise_squashing_compression_key.decompress();
|
||||
let cuda_noise_squashing_key =
|
||||
compressed_noise_squashing_compression_key.decompress_to_cuda(&streams);
|
||||
let noise_squashing_compression_private_key =
|
||||
NoiseSquashingCompressionPrivateKey::new(noise_squashing_compression_params);
|
||||
let noise_squashing_compression_key = noise_squashing_private_key
|
||||
.new_noise_squashing_compression_key(&noise_squashing_compression_private_key);
|
||||
let cuda_noise_squashing_compression_key =
|
||||
CudaNoiseSquashingCompressionKey::from_noise_squashing_compression_key(
|
||||
&noise_squashing_compression_key,
|
||||
&streams,
|
||||
);
|
||||
|
||||
let lwe_per_glwe = cuda_noise_squashing_compression_key.lwe_per_glwe;
|
||||
|
||||
let modulus_switch_config = cuda_noise_squashing_key.noise_simulation_modulus_switch_config();
|
||||
|
||||
let br_input_modulus_log = noise_squashing_key.key.br_input_modulus_log();
|
||||
|
||||
let u128_encoding = ShortintEncoding {
|
||||
ciphertext_modulus: noise_squashing_params.ciphertext_modulus(),
|
||||
message_modulus: noise_squashing_params.message_modulus(),
|
||||
carry_modulus: noise_squashing_params.carry_modulus(),
|
||||
padding_bit: PaddingBit::Yes,
|
||||
};
|
||||
let max_scalar_mul = cuda_sks.max_noise_level.get();
|
||||
|
||||
let id_lut_cpu = generate_programmable_bootstrap_glwe_lut(
|
||||
noise_squashing_key.key.polynomial_size(),
|
||||
noise_squashing_key.key.glwe_size(),
|
||||
u128_encoding
|
||||
.cleartext_space_without_padding()
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
u128_encoding.ciphertext_modulus,
|
||||
u128_encoding.delta(),
|
||||
|x| x,
|
||||
);
|
||||
|
||||
let id_lut_gpu = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut_cpu, &streams);
|
||||
|
||||
let input_zeros: Vec<_> = (0..lwe_per_glwe.0).map(|_| cks.key.encrypt(0)).collect();
|
||||
|
||||
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
|
||||
degree: crate::shortint::ciphertext::Degree::new(atomic_params.message_modulus().0 - 1),
|
||||
message_modulus: atomic_params.message_modulus(),
|
||||
carry_modulus: atomic_params.carry_modulus(),
|
||||
atomic_pattern: atomic_params.atomic_pattern(),
|
||||
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
|
||||
};
|
||||
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
|
||||
.map(|_| CudaSideResources::new(&streams, cuda_block_info))
|
||||
.collect();
|
||||
|
||||
let input_zero_as_lwe: Vec<_> = input_zeros
|
||||
.iter()
|
||||
.map(|ct| {
|
||||
let d_ct_input = CudaUnsignedRadixCiphertext::from_radix_ciphertext(
|
||||
&crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
|
||||
&streams,
|
||||
);
|
||||
CudaDynLwe::U64(d_ct_input.ciphertext.d_blocks)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let (_before_packing, d_after_packing) = dp_ks_any_ms_standard_pbs128_packing_ks(
|
||||
input_zero_as_lwe,
|
||||
max_scalar_mul,
|
||||
&cuda_sks,
|
||||
modulus_switch_config,
|
||||
&cuda_noise_squashing_key,
|
||||
br_input_modulus_log,
|
||||
&id_lut_gpu,
|
||||
&cuda_noise_squashing_compression_key.packing_key_switching_key,
|
||||
&mut cuda_side_resources,
|
||||
);
|
||||
|
||||
let cuda_noise_squashed_cts: Vec<_> = input_zeros
|
||||
.into_par_iter()
|
||||
.map(|ct| {
|
||||
let cloned_ct = ct;
|
||||
let radix_ct = crate::integer::RadixCiphertext::from_blocks(vec![cloned_ct]);
|
||||
let mut d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&radix_ct, &streams);
|
||||
unsafe {
|
||||
unchecked_small_scalar_mul_integer_async(
|
||||
&streams,
|
||||
&mut d_ct.ciphertext,
|
||||
max_scalar_mul,
|
||||
atomic_params.message_modulus(),
|
||||
atomic_params.carry_modulus(),
|
||||
);
|
||||
}
|
||||
streams.synchronize();
|
||||
cuda_noise_squashing_key.unchecked_squash_ciphertext_noise(
|
||||
&d_ct.ciphertext,
|
||||
&cuda_sks,
|
||||
&streams,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let gpu_compressed = cuda_noise_squashing_compression_key
|
||||
.compress_noise_squashed_ciphertexts_into_list(&cuda_noise_squashed_cts, &streams);
|
||||
|
||||
let gpu_extracted = gpu_compressed.extract_glwe(0, &streams);
|
||||
let extracted_list = gpu_extracted.to_glwe_ciphertext_list(&streams);
|
||||
let extracted_glwe = GlweCiphertext::from_container(
|
||||
extracted_list.clone().into_container(),
|
||||
extracted_list.polynomial_size(),
|
||||
extracted_list.ciphertext_modulus(),
|
||||
);
|
||||
|
||||
let after_packing_list = d_after_packing.to_glwe_ciphertext_list(&streams);
|
||||
let mut after_packing = GlweCiphertext::from_container(
|
||||
after_packing_list.clone().into_container(),
|
||||
after_packing_list.polynomial_size(),
|
||||
after_packing_list.ciphertext_modulus(),
|
||||
);
|
||||
// Bodies that were not filled are discarded
|
||||
after_packing.get_mut_body().as_mut()[lwe_per_glwe.0..].fill(0);
|
||||
|
||||
assert_eq!(after_packing.as_view(), extracted_glwe.as_view());
|
||||
}
|
||||
|
||||
/// Test function to verify that the noise checking tools match the actual atomic patterns
|
||||
/// implemented in shortint for GPU
|
||||
fn sanity_check_encrypt_dp_ks_standard_pbs128_gpu(meta_params: MetaParameters) {
|
||||
let (params, noise_squashing_params) = {
|
||||
let meta_noise_squashing_params = meta_params.noise_squashing_parameters.unwrap();
|
||||
(
|
||||
meta_params.compute_parameters,
|
||||
meta_noise_squashing_params.parameters,
|
||||
)
|
||||
};
|
||||
let gpu_index = 0;
|
||||
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
|
||||
let block_params: ShortintParameterSet = params.into();
|
||||
let cks = crate::integer::ClientKey::new(block_params);
|
||||
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
|
||||
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
|
||||
|
||||
let noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
|
||||
let compressed_noise_squashing_compression_key =
|
||||
cks.new_compressed_noise_squashing_key(&noise_squashing_private_key);
|
||||
let noise_squashing_key = compressed_noise_squashing_compression_key.decompress();
|
||||
let cuda_noise_squashing_key =
|
||||
compressed_noise_squashing_compression_key.decompress_to_cuda(&streams);
|
||||
|
||||
let modulus_switch_config = cuda_noise_squashing_key.noise_simulation_modulus_switch_config();
|
||||
|
||||
let br_input_modulus_log = noise_squashing_key.key.br_input_modulus_log();
|
||||
|
||||
let u128_encoding = ShortintEncoding {
|
||||
ciphertext_modulus: noise_squashing_params.ciphertext_modulus(),
|
||||
message_modulus: noise_squashing_params.message_modulus(),
|
||||
carry_modulus: noise_squashing_params.carry_modulus(),
|
||||
padding_bit: PaddingBit::Yes,
|
||||
};
|
||||
let max_scalar_mul = cuda_sks.max_noise_level.get();
|
||||
|
||||
let id_lut_cpu = generate_programmable_bootstrap_glwe_lut(
|
||||
noise_squashing_key.key.polynomial_size(),
|
||||
noise_squashing_key.key.glwe_size(),
|
||||
u128_encoding
|
||||
.cleartext_space_without_padding()
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
u128_encoding.ciphertext_modulus,
|
||||
u128_encoding.delta(),
|
||||
|x| x,
|
||||
);
|
||||
|
||||
let id_lut_gpu = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut_cpu, &streams);
|
||||
|
||||
let lwe_per_glwe = LweCiphertextCount(128);
|
||||
let input_zeros: Vec<_> = (0..lwe_per_glwe.0).map(|_| cks.key.encrypt(0)).collect();
|
||||
|
||||
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
|
||||
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
|
||||
message_modulus: params.message_modulus(),
|
||||
carry_modulus: params.carry_modulus(),
|
||||
atomic_pattern: params.atomic_pattern(),
|
||||
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
|
||||
};
|
||||
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
|
||||
.map(|_| CudaSideResources::new(&streams, cuda_block_info))
|
||||
.collect();
|
||||
|
||||
let input_zero_as_lwe: Vec<_> = input_zeros
|
||||
.iter()
|
||||
.map(|ct| {
|
||||
let d_ct_input = CudaUnsignedRadixCiphertext::from_radix_ciphertext(
|
||||
&crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
|
||||
&streams,
|
||||
);
|
||||
CudaDynLwe::U64(d_ct_input.ciphertext.d_blocks)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let res: Vec<_> = input_zero_as_lwe
|
||||
.into_par_iter()
|
||||
.zip(cuda_side_resources.par_iter_mut())
|
||||
.map(|(input, side_resources)| {
|
||||
let (input, after_dp, ks_result, drift_technique_result, ms_result, pbs_result) =
|
||||
dp_ks_any_ms_standard_pbs128(
|
||||
input,
|
||||
max_scalar_mul,
|
||||
&cuda_sks,
|
||||
modulus_switch_config,
|
||||
&cuda_noise_squashing_key,
|
||||
br_input_modulus_log,
|
||||
&id_lut_gpu,
|
||||
side_resources,
|
||||
);
|
||||
|
||||
(
|
||||
input,
|
||||
after_dp,
|
||||
ks_result,
|
||||
drift_technique_result,
|
||||
ms_result,
|
||||
pbs_result,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let input_zeros_non_pattern: Vec<_> = input_zeros
|
||||
.iter()
|
||||
.map(|ct| {
|
||||
CudaUnsignedRadixCiphertext::from_radix_ciphertext(
|
||||
&crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
|
||||
&streams,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let vector_non_pattern: Vec<_> = input_zeros_non_pattern
|
||||
.into_par_iter()
|
||||
.map(|mut d_ct_input2| {
|
||||
unsafe {
|
||||
unchecked_small_scalar_mul_integer_async(
|
||||
&streams,
|
||||
&mut d_ct_input2.ciphertext,
|
||||
max_scalar_mul,
|
||||
params.message_modulus(),
|
||||
params.carry_modulus(),
|
||||
);
|
||||
}
|
||||
|
||||
streams.synchronize();
|
||||
|
||||
cuda_noise_squashing_key
|
||||
.squash_radix_ciphertext_noise(&cuda_sks, &d_ct_input2.ciphertext, &streams)
|
||||
.unwrap()
|
||||
})
|
||||
.collect();
|
||||
|
||||
let vector_pattern_cpu: Vec<_> = res
|
||||
.into_iter()
|
||||
.map(
|
||||
|(_input, _after_dp, _ks_result, _drift_technique_result, _ms_result, pbs_result)| {
|
||||
pbs_result.as_ct_128_cpu(&streams)
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
|
||||
let vector_non_pattern_cpu: Vec<_> = vector_non_pattern
|
||||
.into_par_iter()
|
||||
.map(|cuda_squashed_radix_ct| {
|
||||
let squashed_noise_ct_cpu =
|
||||
cuda_squashed_radix_ct.to_squashed_noise_radix_ciphertext(&streams);
|
||||
squashed_noise_ct_cpu.packed_blocks()[0]
|
||||
.lwe_ciphertext()
|
||||
.clone()
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Compare that all the results are equivalent
|
||||
assert_eq!(vector_pattern_cpu, vector_non_pattern_cpu);
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn encrypt_dp_ks_standard_pbs128_packing_ks_inner_helper_gpu(
|
||||
params: AtomicPatternParameters,
|
||||
noise_squashing_params: NoiseSquashingParameters,
|
||||
noise_squashing_compression_params: NoiseSquashingCompressionParameters,
|
||||
single_cks: &crate::integer::ClientKey,
|
||||
single_cuda_sks: &CudaServerKey,
|
||||
single_noise_squashing_private_key: &NoiseSquashingPrivateKey,
|
||||
single_noise_squashing_key: &crate::integer::noise_squashing::NoiseSquashingKey,
|
||||
single_cuda_noise_squashing_key: &CudaNoiseSquashingKey,
|
||||
single_noise_squashing_compression_private_key: &NoiseSquashingCompressionPrivateKey,
|
||||
single_cuda_noise_squashing_compression_key: &CudaNoiseSquashingCompressionKey,
|
||||
msg: u64,
|
||||
scalar_for_multiplication: u64,
|
||||
br_input_modulus_log: CiphertextModulusLog,
|
||||
streams: &CudaStreams,
|
||||
) -> (
|
||||
Vec<(
|
||||
DecryptionAndNoiseResult,
|
||||
DecryptionAndNoiseResult,
|
||||
DecryptionAndNoiseResult,
|
||||
DecryptionAndNoiseResult,
|
||||
DecryptionAndNoiseResult,
|
||||
DecryptionAndNoiseResult,
|
||||
)>,
|
||||
Vec<DecryptionAndNoiseResult>,
|
||||
) {
|
||||
let thread_cks: crate::integer::ClientKey;
|
||||
let thread_cuda_sks: CudaServerKey;
|
||||
let thread_noise_squashing_private_key: NoiseSquashingPrivateKey;
|
||||
let thread_noise_squashing_key: crate::integer::noise_squashing::NoiseSquashingKey;
|
||||
let thread_cuda_noise_squashing_key: CudaNoiseSquashingKey;
|
||||
let thread_noise_squashing_compression_private_key: NoiseSquashingCompressionPrivateKey;
|
||||
let thread_cuda_noise_squashing_compression_key: CudaNoiseSquashingCompressionKey;
|
||||
let (
|
||||
cks,
|
||||
cuda_sks,
|
||||
noise_squashing_private_key,
|
||||
noise_squashing_key,
|
||||
cuda_noise_squashing_key,
|
||||
noise_squashing_compression_private_key,
|
||||
cuda_noise_squashing_compression_key,
|
||||
) = if should_use_single_key_debug() {
|
||||
(
|
||||
single_cks,
|
||||
single_cuda_sks,
|
||||
single_noise_squashing_private_key,
|
||||
single_noise_squashing_key,
|
||||
single_cuda_noise_squashing_key,
|
||||
single_noise_squashing_compression_private_key,
|
||||
single_cuda_noise_squashing_compression_key,
|
||||
)
|
||||
} else {
|
||||
let block_params: ShortintParameterSet = params.into();
|
||||
thread_cks = crate::integer::ClientKey::new(block_params);
|
||||
let thread_compressed_server_key =
|
||||
CompressedServerKey::new_radix_compressed_server_key(&thread_cks);
|
||||
thread_cuda_sks =
|
||||
CudaServerKey::decompress_from_cpu(&thread_compressed_server_key, streams);
|
||||
|
||||
thread_noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
|
||||
let thread_compressed_noise_squashing_compression_key =
|
||||
thread_cks.new_compressed_noise_squashing_key(&thread_noise_squashing_private_key);
|
||||
thread_noise_squashing_key = thread_compressed_noise_squashing_compression_key.decompress();
|
||||
thread_cuda_noise_squashing_key =
|
||||
thread_compressed_noise_squashing_compression_key.decompress_to_cuda(streams);
|
||||
thread_noise_squashing_compression_private_key =
|
||||
NoiseSquashingCompressionPrivateKey::new(noise_squashing_compression_params);
|
||||
let thread_noise_squashing_compression_key = thread_noise_squashing_private_key
|
||||
.new_noise_squashing_compression_key(&thread_noise_squashing_compression_private_key);
|
||||
thread_cuda_noise_squashing_compression_key =
|
||||
CudaNoiseSquashingCompressionKey::from_noise_squashing_compression_key(
|
||||
&thread_noise_squashing_compression_key,
|
||||
streams,
|
||||
);
|
||||
(
|
||||
&thread_cks,
|
||||
&thread_cuda_sks,
|
||||
&thread_noise_squashing_private_key,
|
||||
&thread_noise_squashing_key,
|
||||
&thread_cuda_noise_squashing_key,
|
||||
&thread_noise_squashing_compression_private_key,
|
||||
&thread_cuda_noise_squashing_compression_key,
|
||||
)
|
||||
};
|
||||
|
||||
let modulus_switch_config = cuda_noise_squashing_key.noise_simulation_modulus_switch_config();
|
||||
|
||||
let bsk_polynomial_size = noise_squashing_key.key.polynomial_size();
|
||||
let bsk_glwe_size = noise_squashing_key.key.glwe_size();
|
||||
|
||||
let u128_encoding = ShortintEncoding {
|
||||
ciphertext_modulus: noise_squashing_params.ciphertext_modulus(),
|
||||
message_modulus: noise_squashing_params.message_modulus(),
|
||||
carry_modulus: noise_squashing_params.carry_modulus(),
|
||||
padding_bit: PaddingBit::Yes,
|
||||
};
|
||||
|
||||
let id_lut_cpu = generate_programmable_bootstrap_glwe_lut(
|
||||
bsk_polynomial_size,
|
||||
bsk_glwe_size,
|
||||
u128_encoding
|
||||
.cleartext_space_without_padding()
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
u128_encoding.ciphertext_modulus,
|
||||
u128_encoding.delta(),
|
||||
|x| x,
|
||||
);
|
||||
let id_lut_gpu = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut_cpu, streams);
|
||||
|
||||
let lwe_per_glwe = cuda_noise_squashing_compression_key.lwe_per_glwe;
|
||||
|
||||
let input_zeros: Vec<_> = (0..lwe_per_glwe.0).map(|_| cks.key.encrypt(msg)).collect();
|
||||
|
||||
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
|
||||
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
|
||||
message_modulus: params.message_modulus(),
|
||||
carry_modulus: params.carry_modulus(),
|
||||
atomic_pattern: params.atomic_pattern(),
|
||||
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
|
||||
};
|
||||
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
|
||||
.map(|_| CudaSideResources::new(streams, cuda_block_info))
|
||||
.collect();
|
||||
|
||||
let input_zero_as_lwe: Vec<_> = input_zeros
|
||||
.iter()
|
||||
.map(|ct| {
|
||||
let d_ct_input = CudaUnsignedRadixCiphertext::from_radix_ciphertext(
|
||||
&crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
|
||||
streams,
|
||||
);
|
||||
CudaDynLwe::U64(d_ct_input.ciphertext.d_blocks)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let (before_packing_gpu, after_packing_gpu) = dp_ks_any_ms_standard_pbs128_packing_ks(
|
||||
input_zero_as_lwe,
|
||||
scalar_for_multiplication,
|
||||
cuda_sks,
|
||||
modulus_switch_config,
|
||||
cuda_noise_squashing_key,
|
||||
br_input_modulus_log,
|
||||
&id_lut_gpu,
|
||||
&cuda_noise_squashing_compression_key.packing_key_switching_key,
|
||||
&mut cuda_side_resources,
|
||||
);
|
||||
|
||||
let before_packing: Vec<_> = before_packing_gpu
|
||||
.into_iter()
|
||||
.map(
|
||||
|(
|
||||
input_gpu,
|
||||
after_dp_gpu,
|
||||
after_ks_gpu,
|
||||
after_drift_gpu,
|
||||
after_ms_gpu,
|
||||
after_pbs128_gpu,
|
||||
)| {
|
||||
match &cks.key.atomic_pattern {
|
||||
AtomicPatternClientKey::Standard(standard_atomic_pattern_client_key) => {
|
||||
let params = standard_atomic_pattern_client_key.parameters;
|
||||
let u64_encoding = ShortintEncoding {
|
||||
ciphertext_modulus: params.ciphertext_modulus(),
|
||||
message_modulus: params.message_modulus(),
|
||||
carry_modulus: params.carry_modulus(),
|
||||
padding_bit: PaddingBit::Yes,
|
||||
};
|
||||
let large_lwe_secret_key =
|
||||
standard_atomic_pattern_client_key.large_lwe_secret_key();
|
||||
let small_lwe_secret_key =
|
||||
standard_atomic_pattern_client_key.small_lwe_secret_key();
|
||||
|
||||
let input_ct = input_gpu.as_ct_64_cpu(streams);
|
||||
let after_dp_ct = after_dp_gpu.as_ct_64_cpu(streams);
|
||||
let after_ks_ct = after_ks_gpu.as_ct_64_cpu(streams);
|
||||
let before_ms_gpu: &CudaDynLwe =
|
||||
after_drift_gpu.as_ref().unwrap_or(&after_ks_gpu);
|
||||
let before_ms_ct = before_ms_gpu.as_ct_64_cpu(streams);
|
||||
let after_ms_ct = after_ms_gpu.as_ct_64_cpu(streams);
|
||||
let after_pbs128_ct = after_pbs128_gpu.as_ct_128_cpu(streams);
|
||||
(
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&input_ct,
|
||||
&large_lwe_secret_key,
|
||||
msg,
|
||||
&u64_encoding,
|
||||
),
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&after_dp_ct,
|
||||
&large_lwe_secret_key,
|
||||
msg,
|
||||
&u64_encoding,
|
||||
),
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&after_ks_ct,
|
||||
&small_lwe_secret_key,
|
||||
msg,
|
||||
&u64_encoding,
|
||||
),
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&before_ms_ct,
|
||||
&small_lwe_secret_key,
|
||||
msg,
|
||||
&u64_encoding,
|
||||
),
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&after_ms_ct,
|
||||
&small_lwe_secret_key,
|
||||
msg,
|
||||
&u64_encoding,
|
||||
),
|
||||
DecryptionAndNoiseResult::new_from_lwe(
|
||||
&after_pbs128_ct,
|
||||
&noise_squashing_private_key
|
||||
.key
|
||||
.post_noise_squashing_lwe_secret_key(),
|
||||
msg.into(),
|
||||
&u128_encoding,
|
||||
),
|
||||
)
|
||||
}
|
||||
AtomicPatternClientKey::KeySwitch32(_ks32_atomic_pattern_client_key) => {
|
||||
panic!("KS32 atomic pattern not supported for GPU yet");
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
let after_packing_list = after_packing_gpu.to_glwe_ciphertext_list(streams);
|
||||
let after_packing = GlweCiphertext::from_container(
|
||||
after_packing_list.clone().into_container(),
|
||||
after_packing_list.polynomial_size(),
|
||||
after_packing_list.ciphertext_modulus(),
|
||||
);
|
||||
let after_packing = DecryptionAndNoiseResult::new_from_glwe(
|
||||
&after_packing,
|
||||
noise_squashing_compression_private_key
|
||||
.key
|
||||
.post_packing_ks_key(),
|
||||
lwe_per_glwe,
|
||||
msg.into(),
|
||||
&u128_encoding,
|
||||
);
|
||||
|
||||
assert_eq!(after_packing.len(), lwe_per_glwe.0);
|
||||
|
||||
(before_packing, after_packing)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn encrypt_dp_ks_standard_pbs128_packing_ks_noise_helper_gpu(
|
||||
params: AtomicPatternParameters,
|
||||
noise_squashing_params: NoiseSquashingParameters,
|
||||
noise_squashing_compression_params: NoiseSquashingCompressionParameters,
|
||||
single_cks: &crate::integer::ClientKey,
|
||||
single_cuda_sks: &CudaServerKey,
|
||||
single_noise_squashing_private_key: &NoiseSquashingPrivateKey,
|
||||
single_noise_squashing_key: &crate::integer::noise_squashing::NoiseSquashingKey,
|
||||
single_cuda_noise_squashing_key: &CudaNoiseSquashingKey,
|
||||
single_noise_squashing_compression_private_key: &NoiseSquashingCompressionPrivateKey,
|
||||
single_cuda_noise_squashing_compression_key: &CudaNoiseSquashingCompressionKey,
|
||||
msg: u64,
|
||||
scalar_for_multiplication: u64,
|
||||
br_input_modulus_log: CiphertextModulusLog,
|
||||
streams: &CudaStreams,
|
||||
) -> (
|
||||
Vec<(
|
||||
NoiseSample,
|
||||
NoiseSample,
|
||||
NoiseSample,
|
||||
NoiseSample,
|
||||
NoiseSample,
|
||||
NoiseSample,
|
||||
)>,
|
||||
Vec<NoiseSample>,
|
||||
) {
|
||||
let (before_compression, after_compression) =
|
||||
encrypt_dp_ks_standard_pbs128_packing_ks_inner_helper_gpu(
|
||||
params,
|
||||
noise_squashing_params,
|
||||
noise_squashing_compression_params,
|
||||
single_cks,
|
||||
single_cuda_sks,
|
||||
single_noise_squashing_private_key,
|
||||
single_noise_squashing_key,
|
||||
single_cuda_noise_squashing_key,
|
||||
single_noise_squashing_compression_private_key,
|
||||
single_cuda_noise_squashing_compression_key,
|
||||
msg,
|
||||
scalar_for_multiplication,
|
||||
br_input_modulus_log,
|
||||
streams,
|
||||
);
|
||||
|
||||
(
|
||||
before_compression
|
||||
.into_iter()
|
||||
.map(
|
||||
|(input, after_dp, after_ks, after_drift, after_ms, after_pbs)| {
|
||||
(
|
||||
input
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
after_dp
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
after_ks
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
after_drift
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
after_ms
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
after_pbs
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed"),
|
||||
)
|
||||
},
|
||||
)
|
||||
.collect(),
|
||||
after_compression
|
||||
.into_iter()
|
||||
.map(|after_compression| {
|
||||
after_compression
|
||||
.get_noise_if_decryption_was_correct()
|
||||
.expect("Decryption Failed")
|
||||
})
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
|
||||
fn noise_check_encrypt_dp_ks_standard_pbs128_packing_ks_noise_gpu(meta_params: MetaParameters) {
|
||||
let (atomic_params, noise_squashing_params, noise_squashing_compression_params) = {
|
||||
let meta_noise_squashing_params = meta_params.noise_squashing_parameters.unwrap();
|
||||
(
|
||||
meta_params.compute_parameters,
|
||||
meta_noise_squashing_params.parameters,
|
||||
meta_noise_squashing_params.compression_parameters.unwrap(),
|
||||
)
|
||||
};
|
||||
let gpu_index = 0;
|
||||
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
|
||||
|
||||
let block_params: ShortintParameterSet = atomic_params.into();
|
||||
let cks = crate::integer::ClientKey::new(block_params);
|
||||
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
|
||||
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
|
||||
|
||||
let noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
|
||||
let compressed_noise_squashing_compression_key =
|
||||
cks.new_compressed_noise_squashing_key(&noise_squashing_private_key);
|
||||
let noise_squashing_key = compressed_noise_squashing_compression_key.decompress();
|
||||
let cuda_noise_squashing_key =
|
||||
compressed_noise_squashing_compression_key.decompress_to_cuda(&streams);
|
||||
let noise_squashing_compression_private_key =
|
||||
NoiseSquashingCompressionPrivateKey::new(noise_squashing_compression_params);
|
||||
let noise_squashing_compression_key = noise_squashing_private_key
|
||||
.new_noise_squashing_compression_key(&noise_squashing_compression_private_key);
|
||||
let cuda_noise_squashing_compression_key =
|
||||
CudaNoiseSquashingCompressionKey::from_noise_squashing_compression_key(
|
||||
&noise_squashing_compression_key,
|
||||
&streams,
|
||||
);
|
||||
|
||||
let noise_simulation_ksk =
|
||||
NoiseSimulationLweKeyswitchKey::new_from_atomic_pattern_parameters(atomic_params);
|
||||
let noise_simulation_bsk =
|
||||
NoiseSimulationLweFourierBsk::new_from_atomic_pattern_parameters(atomic_params);
|
||||
let noise_simulation_modulus_switch_config =
|
||||
NoiseSimulationModulusSwitchConfig::new_from_atomic_pattern_parameters(atomic_params);
|
||||
let noise_simulation_bsk128 =
|
||||
NoiseSimulationLweFourier128Bsk::new_from_parameters(atomic_params, noise_squashing_params);
|
||||
let noise_simulation_packing_key =
|
||||
NoiseSimulationLwePackingKeyswitchKey::new_from_noise_squashing_parameters(
|
||||
noise_squashing_params,
|
||||
noise_squashing_compression_params,
|
||||
);
|
||||
|
||||
assert!(noise_simulation_bsk.matches_actual_bsk_gpu(&cuda_sks.bootstrapping_key));
|
||||
|
||||
assert!(noise_simulation_bsk128
|
||||
.matches_actual_shortint_noise_squashing_key(&noise_squashing_key.key));
|
||||
assert!(noise_simulation_packing_key.matches_actual_pksk(
|
||||
noise_squashing_compression_key
|
||||
.key
|
||||
.packing_key_switching_key()
|
||||
));
|
||||
|
||||
let br_input_modulus_log = noise_squashing_key.key.br_input_modulus_log();
|
||||
|
||||
let max_scalar_mul = cuda_sks.max_noise_level.get();
|
||||
|
||||
let noise_simulation_accumulator = NoiseSimulationGlwe::new(
|
||||
noise_simulation_bsk128
|
||||
.output_glwe_size()
|
||||
.to_glwe_dimension(),
|
||||
noise_simulation_bsk128.output_polynomial_size(),
|
||||
Variance(0.0),
|
||||
noise_simulation_bsk128.modulus(),
|
||||
);
|
||||
|
||||
let (_before_packing_sim, after_packing_sim) = {
|
||||
let noise_simulation = NoiseSimulationLwe::encrypt(&cks.key, 0);
|
||||
dp_ks_any_ms_standard_pbs128_packing_ks(
|
||||
vec![noise_simulation; cuda_noise_squashing_compression_key.lwe_per_glwe.0],
|
||||
max_scalar_mul,
|
||||
&noise_simulation_ksk,
|
||||
noise_simulation_modulus_switch_config.as_ref(),
|
||||
&noise_simulation_bsk128,
|
||||
br_input_modulus_log,
|
||||
&noise_simulation_accumulator,
|
||||
&noise_simulation_packing_key,
|
||||
&mut vec![(); cuda_noise_squashing_compression_key.lwe_per_glwe.0],
|
||||
)
|
||||
};
|
||||
|
||||
let after_packing_sim = after_packing_sim.into_lwe();
|
||||
|
||||
// Check that the circuit is correct with respect to core implementation, i.e. does not crash on
|
||||
// dimension checks
|
||||
let (expected_lwe_dimension_out, expected_modulus_f64_out) = {
|
||||
let pksk = noise_squashing_compression_key
|
||||
.key
|
||||
.packing_key_switching_key();
|
||||
|
||||
let out_glwe_dim = pksk.output_key_glwe_dimension();
|
||||
let out_poly_size = pksk.output_key_polynomial_size();
|
||||
|
||||
(
|
||||
out_glwe_dim.to_equivalent_lwe_dimension(out_poly_size),
|
||||
pksk.ciphertext_modulus().raw_modulus_float(),
|
||||
)
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
after_packing_sim.lwe_dimension(),
|
||||
expected_lwe_dimension_out
|
||||
);
|
||||
assert_eq!(
|
||||
after_packing_sim.modulus().as_f64(),
|
||||
expected_modulus_f64_out
|
||||
);
|
||||
|
||||
let cleartext_modulus = atomic_params.message_modulus().0 * atomic_params.carry_modulus().0;
|
||||
let mut noise_samples_after_packing = vec![];
|
||||
|
||||
let sample_count_per_msg =
|
||||
1000usize.div_ceil(cuda_noise_squashing_compression_key.lwe_per_glwe.0);
|
||||
let chunk_size = 4;
|
||||
let vec_local_streams = (0..chunk_size)
|
||||
.map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
|
||||
.collect::<Vec<_>>();
|
||||
for _i in 0..cleartext_modulus {
|
||||
let current_noise_samples_after_packing: Vec<_> = (0..sample_count_per_msg)
|
||||
.collect::<Vec<_>>()
|
||||
.chunks(chunk_size)
|
||||
.flat_map(|chunk| {
|
||||
chunk
|
||||
.into_par_iter()
|
||||
.map(|i| {
|
||||
let local_stream = &vec_local_streams[*i % chunk_size];
|
||||
let (_before_packing, after_packing) =
|
||||
encrypt_dp_ks_standard_pbs128_packing_ks_noise_helper_gpu(
|
||||
atomic_params,
|
||||
noise_squashing_params,
|
||||
noise_squashing_compression_params,
|
||||
&cks,
|
||||
&cuda_sks,
|
||||
&noise_squashing_private_key,
|
||||
&noise_squashing_key,
|
||||
&cuda_noise_squashing_key,
|
||||
&noise_squashing_compression_private_key,
|
||||
&cuda_noise_squashing_compression_key,
|
||||
0,
|
||||
max_scalar_mul,
|
||||
br_input_modulus_log,
|
||||
local_stream,
|
||||
);
|
||||
after_packing
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect();
|
||||
|
||||
noise_samples_after_packing.extend(current_noise_samples_after_packing);
|
||||
}
|
||||
|
||||
let noise_samples_after_packing_flattened: Vec<_> = noise_samples_after_packing
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.map(|x| x.value)
|
||||
.collect();
|
||||
|
||||
let after_packing_is_ok = mean_and_variance_check(
|
||||
&noise_samples_after_packing_flattened,
|
||||
"after_packing",
|
||||
0.0,
|
||||
after_packing_sim.variance(),
|
||||
noise_squashing_compression_params.packing_ks_key_noise_distribution,
|
||||
after_packing_sim.lwe_dimension(),
|
||||
after_packing_sim.modulus().as_f64(),
|
||||
);
|
||||
|
||||
assert!(after_packing_is_ok);
|
||||
}
|
||||
|
||||
create_gpu_parameterized_test!(
|
||||
noise_check_encrypt_dp_ks_standard_pbs128_packing_ks_noise_gpu {
|
||||
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
|
||||
}
|
||||
);
|
||||
|
||||
create_gpu_parameterized_test!(sanity_check_encrypt_dp_ks_standard_pbs128_packing_ks_gpu {
|
||||
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
|
||||
});
|
||||
|
||||
create_gpu_parameterized_test!(sanity_check_encrypt_dp_ks_standard_pbs128_gpu {
|
||||
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
|
||||
});
|
||||
@@ -1,3 +1,5 @@
|
||||
pub mod br_dp_ks_ms;
|
||||
pub mod br_dp_packingks_ms;
|
||||
pub mod dp_ks_ms;
|
||||
pub mod dp_ks_pbs_128_packingks;
|
||||
pub mod utils;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use crate::core_crypto::commons::noise_formulas::noise_simulation::traits::{
|
||||
AllocateCenteredBinaryShiftedStandardModSwitchResult,
|
||||
AllocateDriftTechniqueStandardModSwitchResult, AllocateLweBootstrapResult,
|
||||
AllocateLweKeyswitchResult, AllocateStandardModSwitchResult,
|
||||
AllocateLweKeyswitchResult, AllocateLwePackingKeyswitchResult, AllocateStandardModSwitchResult,
|
||||
CenteredBinaryShiftedStandardModSwitch, DriftTechniqueStandardModSwitch,
|
||||
LweClassicFftBootstrap, LweKeyswitch, ScalarMul, StandardModSwitch,
|
||||
};
|
||||
@@ -13,6 +13,7 @@ use crate::core_crypto::gpu::cuda_modulus_switch_ciphertext;
|
||||
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
|
||||
use crate::core_crypto::gpu::lwe_bootstrap_key::CudaModulusSwitchNoiseReductionConfiguration;
|
||||
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
|
||||
use crate::core_crypto::gpu::lwe_packing_keyswitch_key::CudaLwePackingKeyswitchKey;
|
||||
use crate::core_crypto::gpu::vec::CudaVec;
|
||||
use crate::core_crypto::prelude::*;
|
||||
use crate::integer::gpu::ciphertext::info::CudaBlockInfo;
|
||||
@@ -25,7 +26,7 @@ use crate::integer::gpu::{
|
||||
cuda_centered_modulus_switch_64, unchecked_small_scalar_mul_integer_async, CudaStreams,
|
||||
};
|
||||
use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::NoiseSimulationModulusSwitchConfig;
|
||||
|
||||
use crate::shortint::server_key::tests::noise_distribution::utils::traits::LwePackingKeyswitch;
|
||||
/// Side resources for CUDA operations in noise simulation
|
||||
#[derive(Clone)]
|
||||
pub struct CudaSideResources {
|
||||
@@ -128,6 +129,19 @@ impl CudaDynLwe {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_ct_128_cpu(&self, streams: &CudaStreams) -> LweCiphertext<Vec<u128>> {
|
||||
match self {
|
||||
Self::U32(_) => panic!("Tried getting a u32 CudaLweCiphertextList as u128."),
|
||||
Self::U64(_) => panic!("Tried getting a u64 CudaLweCiphertextList as u128."),
|
||||
Self::U128(_cuda_lwe) => {
|
||||
let cpu_lwe_list = self.as_lwe_128().to_lwe_ciphertext_list(streams);
|
||||
LweCiphertext::from_container(
|
||||
cpu_lwe_list.clone().into_container(),
|
||||
cpu_lwe_list.ciphertext_modulus(),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn from_lwe_32(cuda_lwe: CudaLweCiphertextList<u32>) -> Self {
|
||||
Self::U32(cuda_lwe)
|
||||
}
|
||||
@@ -141,6 +155,19 @@ impl CudaDynLwe {
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a CudaGlweCiphertextList<u64> to a GlweCiphertext<Vec<u64>>
|
||||
pub fn cuda_glwe_list_to_glwe_ciphertext(
|
||||
cuda_glwe_list: &CudaGlweCiphertextList<u64>,
|
||||
streams: &CudaStreams,
|
||||
) -> GlweCiphertext<Vec<u64>> {
|
||||
let cpu_glwe_list = cuda_glwe_list.to_glwe_ciphertext_list(streams);
|
||||
GlweCiphertext::from_container(
|
||||
cpu_glwe_list.clone().into_container(),
|
||||
cpu_glwe_list.polynomial_size(),
|
||||
cpu_glwe_list.ciphertext_modulus(),
|
||||
)
|
||||
}
|
||||
|
||||
impl ScalarMul<u64> for CudaDynLwe {
|
||||
type Output = Self;
|
||||
type SideResources = CudaSideResources;
|
||||
@@ -313,13 +340,14 @@ impl StandardModSwitch<Self> for CudaDynLwe {
|
||||
panic!("U32 modulus switch not implemented for CudaDynLwe - only U64 is supported");
|
||||
}
|
||||
(Self::U64(input), Self::U64(output_cuda_lwe)) => {
|
||||
let internal_output = input.duplicate(&side_resources.streams);
|
||||
let mut internal_output = input.duplicate(&side_resources.streams);
|
||||
cuda_modulus_switch_ciphertext(
|
||||
&mut output_cuda_lwe.0.d_vec,
|
||||
&mut internal_output.0.d_vec,
|
||||
output_modulus_log.0 as u32,
|
||||
&side_resources.streams,
|
||||
);
|
||||
let mut cpu_lwe = internal_output.to_lwe_ciphertext_list(&side_resources.streams);
|
||||
|
||||
let shift_to_map_to_native = u64::BITS - output_modulus_log.0 as u32;
|
||||
for val in cpu_lwe.as_mut_view().into_container().iter_mut() {
|
||||
*val <<= shift_to_map_to_native;
|
||||
@@ -713,3 +741,193 @@ impl AllocateLweBootstrapResult for CudaGlweCiphertextList<u128> {
|
||||
CudaDynLwe::U128(cuda_lwe)
|
||||
}
|
||||
}
|
||||
|
||||
// Implement LweClassicFft128Bootstrap for CudaNoiseSquashingKey using 128-bit PBS CUDA function
|
||||
impl
|
||||
crate::core_crypto::commons::noise_formulas::noise_simulation::traits::LweClassicFft128Bootstrap<
|
||||
CudaDynLwe,
|
||||
CudaDynLwe,
|
||||
CudaGlweCiphertextList<u128>,
|
||||
> for crate::integer::gpu::noise_squashing::keys::CudaNoiseSquashingKey
|
||||
{
|
||||
type SideResources = CudaSideResources;
|
||||
|
||||
fn lwe_classic_fft_128_pbs(
|
||||
&self,
|
||||
input: &CudaDynLwe,
|
||||
output: &mut CudaDynLwe,
|
||||
accumulator: &CudaGlweCiphertextList<u128>,
|
||||
side_resources: &mut Self::SideResources,
|
||||
) {
|
||||
use crate::core_crypto::gpu::algorithms::lwe_programmable_bootstrapping::cuda_programmable_bootstrap_128_lwe_ciphertext_async;
|
||||
use crate::integer::gpu::server_key::CudaBootstrappingKey;
|
||||
|
||||
match (input, output) {
|
||||
(CudaDynLwe::U64(input_cuda_lwe), CudaDynLwe::U128(output_cuda_lwe)) => {
|
||||
// Get the bootstrap key from self - it's already u128 type
|
||||
let bsk = match &self.bootstrapping_key {
|
||||
CudaBootstrappingKey::Classic(d_bsk) => d_bsk,
|
||||
CudaBootstrappingKey::MultiBit(_) => {
|
||||
panic!("MultiBit bootstrapping keys are not supported for 128-bit PBS");
|
||||
}
|
||||
};
|
||||
|
||||
unsafe {
|
||||
cuda_programmable_bootstrap_128_lwe_ciphertext_async(
|
||||
input_cuda_lwe,
|
||||
output_cuda_lwe,
|
||||
accumulator,
|
||||
bsk,
|
||||
&side_resources.streams,
|
||||
);
|
||||
side_resources.streams.synchronize();
|
||||
}
|
||||
}
|
||||
_ => panic!("128-bit PBS expects U64 input and U128 output for CudaDynLwe"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AllocateLwePackingKeyswitchResult for CudaLwePackingKeyswitchKey<u64> {
|
||||
type Output = CudaGlweCiphertextList<u64>;
|
||||
type SideResources = CudaSideResources;
|
||||
|
||||
fn allocate_lwe_packing_keyswitch_result(
|
||||
&self,
|
||||
side_resources: &mut Self::SideResources,
|
||||
) -> Self::Output {
|
||||
let glwe_dimension = self.output_glwe_size().to_glwe_dimension();
|
||||
let polynomial_size = self.output_polynomial_size();
|
||||
let ciphertext_modulus = self.ciphertext_modulus();
|
||||
|
||||
CudaGlweCiphertextList::new(
|
||||
glwe_dimension,
|
||||
polynomial_size,
|
||||
GlweCiphertextCount(1),
|
||||
ciphertext_modulus,
|
||||
&side_resources.streams,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl LwePackingKeyswitch<[&CudaDynLwe], CudaGlweCiphertextList<u64>>
|
||||
for CudaLwePackingKeyswitchKey<u64>
|
||||
{
|
||||
type SideResources = CudaSideResources;
|
||||
|
||||
fn keyswitch_lwes_and_pack_in_glwe(
|
||||
&self,
|
||||
input: &[&CudaDynLwe],
|
||||
output: &mut CudaGlweCiphertextList<u64>,
|
||||
side_resources: &mut CudaSideResources,
|
||||
) {
|
||||
use crate::core_crypto::gpu::algorithms::lwe_packing_keyswitch::cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_64;
|
||||
let input_lwe_ciphertext_list = CudaLweCiphertextList::from_vec_cuda_lwe_ciphertexts_list(
|
||||
input.iter().map(|ciphertext| ciphertext.as_lwe_64()),
|
||||
&side_resources.streams,
|
||||
);
|
||||
|
||||
cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_64(
|
||||
self,
|
||||
&input_lwe_ciphertext_list,
|
||||
output,
|
||||
&side_resources.streams,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Implement StandardModSwitch traits for CudaGlweCiphertextList<u64>
|
||||
impl AllocateStandardModSwitchResult for CudaGlweCiphertextList<u64> {
|
||||
type Output = Self;
|
||||
type SideResources = CudaSideResources;
|
||||
|
||||
fn allocate_standard_mod_switch_result(
|
||||
&self,
|
||||
side_resources: &mut Self::SideResources,
|
||||
) -> Self::Output {
|
||||
Self::new(
|
||||
self.glwe_dimension(),
|
||||
self.polynomial_size(),
|
||||
self.glwe_ciphertext_count(),
|
||||
self.ciphertext_modulus(),
|
||||
&side_resources.streams,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl StandardModSwitch<Self> for CudaGlweCiphertextList<u64> {
|
||||
type SideResources = CudaSideResources;
|
||||
|
||||
fn standard_mod_switch(
|
||||
&self,
|
||||
storage_log_modulus: CiphertextModulusLog,
|
||||
output: &mut Self,
|
||||
side_resources: &mut CudaSideResources,
|
||||
) {
|
||||
let mut internal_output = self.duplicate(&side_resources.streams);
|
||||
|
||||
cuda_modulus_switch_ciphertext(
|
||||
&mut internal_output.0.d_vec,
|
||||
storage_log_modulus.0 as u32,
|
||||
&side_resources.streams,
|
||||
);
|
||||
side_resources.streams.synchronize();
|
||||
let mut cpu_glwe = internal_output.to_glwe_ciphertext_list(&side_resources.streams);
|
||||
|
||||
let shift_to_map_to_native = u64::BITS - storage_log_modulus.0 as u32;
|
||||
for val in cpu_glwe.as_mut_view().into_container().iter_mut() {
|
||||
*val <<= shift_to_map_to_native;
|
||||
}
|
||||
let d_after_ms = Self::from_glwe_ciphertext_list(&cpu_glwe, &side_resources.streams);
|
||||
|
||||
*output = d_after_ms;
|
||||
}
|
||||
}
|
||||
|
||||
impl AllocateLwePackingKeyswitchResult for CudaLwePackingKeyswitchKey<u128> {
|
||||
type Output = CudaGlweCiphertextList<u128>;
|
||||
type SideResources = CudaSideResources;
|
||||
|
||||
fn allocate_lwe_packing_keyswitch_result(
|
||||
&self,
|
||||
side_resources: &mut Self::SideResources,
|
||||
) -> Self::Output {
|
||||
let glwe_dimension = self.output_glwe_size().to_glwe_dimension();
|
||||
let polynomial_size = self.output_polynomial_size();
|
||||
let ciphertext_modulus = self.ciphertext_modulus();
|
||||
|
||||
CudaGlweCiphertextList::new(
|
||||
glwe_dimension,
|
||||
polynomial_size,
|
||||
GlweCiphertextCount(1),
|
||||
ciphertext_modulus,
|
||||
&side_resources.streams,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl LwePackingKeyswitch<[&CudaDynLwe], CudaGlweCiphertextList<u128>>
|
||||
for CudaLwePackingKeyswitchKey<u128>
|
||||
{
|
||||
type SideResources = CudaSideResources;
|
||||
|
||||
fn keyswitch_lwes_and_pack_in_glwe(
|
||||
&self,
|
||||
input: &[&CudaDynLwe],
|
||||
output: &mut CudaGlweCiphertextList<u128>,
|
||||
side_resources: &mut CudaSideResources,
|
||||
) {
|
||||
use crate::core_crypto::gpu::algorithms::lwe_packing_keyswitch::cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_128;
|
||||
let input_lwe_ciphertext_list = CudaLweCiphertextList::from_vec_cuda_lwe_ciphertexts_list(
|
||||
input.iter().map(|ciphertext| ciphertext.as_lwe_128()),
|
||||
&side_resources.streams,
|
||||
);
|
||||
|
||||
cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_128(
|
||||
self,
|
||||
&input_lwe_ciphertext_list,
|
||||
output,
|
||||
&side_resources.streams,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ use super::{RadixCiphertext, ServerKey, SignedRadixCiphertext};
|
||||
use crate::core_crypto::commons::generators::DeterministicSeeder;
|
||||
use crate::core_crypto::prelude::DefaultRandomGenerator;
|
||||
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
|
||||
use std::num::NonZeroU64;
|
||||
|
||||
pub use tfhe_csprng::seeders::{Seed, Seeder};
|
||||
|
||||
@@ -163,6 +164,7 @@ impl ServerKey {
|
||||
/// as `num_input_random_bits`
|
||||
///
|
||||
/// ```rust
|
||||
/// use std::num::NonZeroU64;
|
||||
/// use tfhe::integer::gen_keys_radix;
|
||||
/// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128;
|
||||
/// use tfhe::Seed;
|
||||
@@ -173,7 +175,7 @@ impl ServerKey {
|
||||
/// let (cks, sks) = gen_keys_radix(PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128, size);
|
||||
///
|
||||
/// let num_input_random_bits = 5;
|
||||
/// let excluded_upper_bound = 3;
|
||||
/// let excluded_upper_bound = NonZeroU64::new(3).unwrap();
|
||||
/// let num_blocks_output = 8;
|
||||
///
|
||||
/// let ct_res = sks.par_generate_oblivious_pseudo_random_unsigned_custom_range(
|
||||
@@ -186,15 +188,17 @@ impl ServerKey {
|
||||
/// // Decrypt:
|
||||
/// let dec_result: u64 = cks.decrypt(&ct_res);
|
||||
///
|
||||
/// assert!(dec_result < excluded_upper_bound);
|
||||
/// assert!(dec_result < excluded_upper_bound.get());
|
||||
/// ```
|
||||
pub fn par_generate_oblivious_pseudo_random_unsigned_custom_range(
|
||||
&self,
|
||||
seed: Seed,
|
||||
num_input_random_bits: u64,
|
||||
excluded_upper_bound: u64,
|
||||
excluded_upper_bound: NonZeroU64,
|
||||
num_blocks_output: u64,
|
||||
) -> RadixCiphertext {
|
||||
let excluded_upper_bound = excluded_upper_bound.get();
|
||||
|
||||
assert!(self.message_modulus().0.is_power_of_two());
|
||||
let message_bits_count = self.message_modulus().0.ilog2() as u64;
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ use crate::integer::{BooleanBlock, IntegerKeyKind, RadixCiphertext, RadixClientK
|
||||
use crate::shortint::parameters::*;
|
||||
use crate::{ClientKey, CompressedServerKey, MatchValues, Seed, Tag};
|
||||
use std::cmp::{max, min};
|
||||
use std::num::NonZeroU64;
|
||||
use std::sync::Arc;
|
||||
|
||||
create_parameterized_test!(random_op_sequence {
|
||||
@@ -498,7 +499,18 @@ where
|
||||
&ServerKey::par_generate_oblivious_pseudo_random_unsigned_integer_bounded,
|
||||
);
|
||||
let oprf_custom_range_executor = OpSequenceCpuFunctionExecutor::new(
|
||||
&ServerKey::par_generate_oblivious_pseudo_random_unsigned_custom_range,
|
||||
&|sk: &ServerKey,
|
||||
seed: Seed,
|
||||
num_input_random_bits: u64,
|
||||
excluded_upper_bound: u64,
|
||||
num_blocks_output: u64| {
|
||||
sk.par_generate_oblivious_pseudo_random_unsigned_custom_range(
|
||||
seed,
|
||||
num_input_random_bits,
|
||||
NonZeroU64::new(excluded_upper_bound).unwrap_or(NonZeroU64::new(1).unwrap()),
|
||||
num_blocks_output,
|
||||
)
|
||||
},
|
||||
);
|
||||
|
||||
let mut oprf_ops: Vec<(OprfExecutor, String)> = vec![(
|
||||
|
||||
@@ -9,6 +9,7 @@ use crate::integer::{IntegerKeyKind, RadixCiphertext, RadixClientKey, ServerKey}
|
||||
use crate::shortint::parameters::*;
|
||||
use statrs::distribution::ContinuousCDF;
|
||||
use std::collections::HashMap;
|
||||
use std::num::NonZeroU64;
|
||||
use std::sync::Arc;
|
||||
use tfhe_csprng::seeders::Seed;
|
||||
|
||||
@@ -36,9 +37,19 @@ fn oprf_any_range_unsigned<P>(param: P)
|
||||
where
|
||||
P: Into<TestParameters>,
|
||||
{
|
||||
let executor = CpuFunctionExecutor::new(
|
||||
&ServerKey::par_generate_oblivious_pseudo_random_unsigned_custom_range,
|
||||
);
|
||||
let executor =
|
||||
CpuFunctionExecutor::new(&|sk: &ServerKey,
|
||||
seed: Seed,
|
||||
num_input_random_bits: u64,
|
||||
excluded_upper_bound: u64,
|
||||
num_blocks_output: u64| {
|
||||
sk.par_generate_oblivious_pseudo_random_unsigned_custom_range(
|
||||
seed,
|
||||
num_input_random_bits,
|
||||
NonZeroU64::new(excluded_upper_bound).unwrap(),
|
||||
num_blocks_output,
|
||||
)
|
||||
});
|
||||
oprf_any_range_test(param, executor);
|
||||
}
|
||||
|
||||
@@ -46,9 +57,19 @@ fn oprf_almost_uniformity_unsigned<P>(param: P)
|
||||
where
|
||||
P: Into<TestParameters>,
|
||||
{
|
||||
let executor = CpuFunctionExecutor::new(
|
||||
&ServerKey::par_generate_oblivious_pseudo_random_unsigned_custom_range,
|
||||
);
|
||||
let executor =
|
||||
CpuFunctionExecutor::new(&|sk: &ServerKey,
|
||||
seed: Seed,
|
||||
num_input_random_bits: u64,
|
||||
excluded_upper_bound: u64,
|
||||
num_blocks_output: u64| {
|
||||
sk.par_generate_oblivious_pseudo_random_unsigned_custom_range(
|
||||
seed,
|
||||
num_input_random_bits,
|
||||
NonZeroU64::new(excluded_upper_bound).unwrap(),
|
||||
num_blocks_output,
|
||||
)
|
||||
});
|
||||
oprf_almost_uniformity_test(param, executor);
|
||||
}
|
||||
|
||||
@@ -89,7 +110,7 @@ where
|
||||
);
|
||||
}
|
||||
|
||||
pub fn oprf_uniformity_test<P, E>(param: P, mut executor: E)
|
||||
pub(crate) fn oprf_uniformity_test<P, E>(param: P, mut executor: E)
|
||||
where
|
||||
P: Into<TestParameters>,
|
||||
E: for<'a> FunctionExecutor<(Seed, u64, u64), RadixCiphertext>,
|
||||
@@ -113,7 +134,7 @@ where
|
||||
});
|
||||
}
|
||||
|
||||
pub fn oprf_any_range_test<P, E>(param: P, mut executor: E)
|
||||
pub(crate) fn oprf_any_range_test<P, E>(param: P, mut executor: E)
|
||||
where
|
||||
P: Into<TestParameters>,
|
||||
E: for<'a> FunctionExecutor<(Seed, u64, u64, u64), RadixCiphertext>,
|
||||
@@ -149,7 +170,7 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
pub fn oprf_almost_uniformity_test<P, E>(param: P, mut executor: E)
|
||||
pub(crate) fn oprf_almost_uniformity_test<P, E>(param: P, mut executor: E)
|
||||
where
|
||||
P: Into<TestParameters>,
|
||||
E: for<'a> FunctionExecutor<(Seed, u64, u64, u64), RadixCiphertext>,
|
||||
@@ -165,40 +186,70 @@ where
|
||||
let num_input_random_bits: u64 = 4;
|
||||
let num_blocks_output = 64;
|
||||
let excluded_upper_bound = 10;
|
||||
let random_input_upper_bound = 1 << num_input_random_bits;
|
||||
|
||||
let mut density = vec![0_usize; excluded_upper_bound as usize];
|
||||
for i in 0..random_input_upper_bound {
|
||||
let index = ((i * excluded_upper_bound) as f64 / random_input_upper_bound as f64) as usize;
|
||||
density[index] += 1;
|
||||
}
|
||||
|
||||
let theoretical_pdf: Vec<f64> = density
|
||||
.iter()
|
||||
.map(|count| *count as f64 / random_input_upper_bound as f64)
|
||||
.collect();
|
||||
|
||||
let values: Vec<u64> = (0..sample_count)
|
||||
.map(|seed| {
|
||||
let img = executor.execute((
|
||||
Seed(seed as u128),
|
||||
num_input_random_bits,
|
||||
excluded_upper_bound as u64,
|
||||
excluded_upper_bound,
|
||||
num_blocks_output,
|
||||
));
|
||||
cks.decrypt(&img)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let p_value_upper_bound = p_value_upper_bound_oprf_almost_uniformity_from_values(
|
||||
&values,
|
||||
num_input_random_bits,
|
||||
excluded_upper_bound,
|
||||
);
|
||||
|
||||
assert!(p_value_limit < p_value_upper_bound);
|
||||
}
|
||||
|
||||
pub(crate) fn p_value_upper_bound_oprf_almost_uniformity_from_values(
|
||||
values: &[u64],
|
||||
num_input_random_bits: u64,
|
||||
excluded_upper_bound: u64,
|
||||
) -> f64 {
|
||||
let density = oprf_density_function(excluded_upper_bound, num_input_random_bits);
|
||||
|
||||
let theoretical_pdf = probability_density_function_from_density(&density);
|
||||
|
||||
let mut bins = vec![0_u64; excluded_upper_bound as usize];
|
||||
for value in values {
|
||||
for value in values.iter().copied() {
|
||||
bins[value as usize] += 1;
|
||||
}
|
||||
|
||||
let cumulative_bins = cumulate(&bins);
|
||||
let theoretical_cdf = cumulate(&theoretical_pdf);
|
||||
let sup_diff = sup_diff(&cumulative_bins, &theoretical_cdf);
|
||||
let p_value_upper_bound = dkw_alpha_from_epsilon(sample_count as f64, sup_diff);
|
||||
|
||||
assert!(p_value_limit < p_value_upper_bound);
|
||||
dkw_alpha_from_epsilon(values.len() as f64, sup_diff)
|
||||
}
|
||||
|
||||
pub(crate) fn oprf_density_function(
|
||||
excluded_upper_bound: u64,
|
||||
num_input_random_bits: u64,
|
||||
) -> Vec<usize> {
|
||||
let random_input_upper_bound = 1 << num_input_random_bits;
|
||||
|
||||
let mut density = vec![0_usize; excluded_upper_bound as usize];
|
||||
|
||||
for i in 0..random_input_upper_bound {
|
||||
let output = ((i * excluded_upper_bound) >> num_input_random_bits) as usize;
|
||||
|
||||
density[output] += 1;
|
||||
}
|
||||
density
|
||||
}
|
||||
|
||||
pub(crate) fn probability_density_function_from_density(density: &[usize]) -> Vec<f64> {
|
||||
let total_count: usize = density.iter().copied().sum();
|
||||
|
||||
density
|
||||
.iter()
|
||||
.map(|count| *count as f64 / total_count as f64)
|
||||
.collect()
|
||||
}
|
||||
|
||||
@@ -475,8 +475,12 @@ pub(crate) mod test {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn test_uniformity<F>(sample_count: usize, p_value_limit: f64, distinct_values: u64, f: F)
|
||||
where
|
||||
pub(crate) fn test_uniformity<F>(
|
||||
sample_count: usize,
|
||||
p_value_limit: f64,
|
||||
distinct_values: u64,
|
||||
f: F,
|
||||
) where
|
||||
F: Sync + Fn(usize) -> u64,
|
||||
{
|
||||
let p_value = uniformity_p_value(f, sample_count, distinct_values);
|
||||
@@ -487,7 +491,7 @@ pub(crate) mod test {
|
||||
);
|
||||
}
|
||||
|
||||
fn uniformity_p_value<F>(f: F, sample_count: usize, distinct_values: u64) -> f64
|
||||
pub(crate) fn uniformity_p_value<F>(f: F, sample_count: usize, distinct_values: u64) -> f64
|
||||
where
|
||||
F: Sync + Fn(usize) -> u64,
|
||||
{
|
||||
@@ -495,8 +499,11 @@ pub(crate) mod test {
|
||||
|
||||
let mut values_count = HashMap::new();
|
||||
|
||||
for i in &values {
|
||||
assert!(*i < distinct_values, "i {} dv{}", *i, distinct_values);
|
||||
for i in values.iter().copied() {
|
||||
assert!(
|
||||
i < distinct_values,
|
||||
"i (={i}) is supposed to be smaller than distinct_values (={distinct_values})",
|
||||
);
|
||||
|
||||
*values_count.entry(i).or_insert(0) += 1;
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@ use crate::shortint::server_key::ServerKey;
|
||||
use rayon::prelude::*;
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn dp_ks_any_ms_standard_pbs128<
|
||||
pub fn dp_ks_any_ms_standard_pbs128<
|
||||
InputCt,
|
||||
ScalarMulResult,
|
||||
KsResult,
|
||||
@@ -111,7 +111,7 @@ where
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn dp_ks_any_ms_standard_pbs128_packing_ks<
|
||||
pub fn dp_ks_any_ms_standard_pbs128_packing_ks<
|
||||
InputCt,
|
||||
ScalarMulResult,
|
||||
KsResult,
|
||||
|
||||
@@ -727,8 +727,15 @@ async function compactPublicKeyZeroKnowledgeBench() {
|
||||
serialized_size = list.safe_serialize(BigInt(10000000)).length;
|
||||
}
|
||||
const mean = timing / bench_loops;
|
||||
|
||||
let base_bench_str = "compact_fhe_uint_proven_encryption_";
|
||||
let supportsThreads = await threads();
|
||||
if (!supportsThreads) {
|
||||
base_bench_str += "unsafe_coop_";
|
||||
}
|
||||
|
||||
const common_bench_str =
|
||||
"compact_fhe_uint_proven_encryption_" +
|
||||
base_bench_str +
|
||||
params.zk_scheme +
|
||||
"_" +
|
||||
bits_to_encrypt +
|
||||
|
||||
Reference in New Issue
Block a user