Compare commits

...

20 Commits

Author SHA1 Message Date
Theo Souchon
5f6e1fe648 chore(bench): add missing operation in hlapi 2026-01-12 14:58:08 +01:00
Arthur Meyre
cb4d62b40a chore: fix wasm-pack URL and update build output listing
Corrected the URL for 'wasm-pack' and updated the file listing after the build.

co-authored-by: d4wae89d498 <faussurier.marc@icloud.com>
2026-01-12 12:51:04 +01:00
David Testé
7a0c054095 chore(bench): use ks32 parameters set as default only for cpu 2026-01-12 11:00:52 +01:00
Agnes Leroy
ddb7d56f56 chore(gpu): add neg to dedup ops 2026-01-12 11:00:52 +01:00
Guillermo Oyarzun
cbe39c8e98 feat(gpu): create noise and pfail tests pbs128 and packingks 2026-01-12 10:46:41 +01:00
pgardratzama
27364857f1 fix(hpu): prf is not available yet on HPU 2026-01-12 09:55:18 +01:00
Arthur Meyre
7043246c17 chore: update CODEOWNERS file 2026-01-09 16:12:50 +01:00
Theo Souchon
51735fb8ed chore(bench): code refactor and automation for hlapi 2026-01-09 16:09:27 +01:00
pgardratzama
23a348c9ae feat(hpu): new HPU bitstream RTL v2.2 2026-01-09 15:25:35 +01:00
Mayeul@Zama
61b616b784 chore(hlapi): add bench of oprf over any range 2026-01-09 15:19:08 +01:00
Mayeul@Zama
df48e176f3 feat(hlapi): add oprf over any range 2026-01-09 15:19:08 +01:00
Mayeul@Zama
dd2345df6b refactor(integer): use NonZeroU64 for excluded_upper_bound 2026-01-09 15:19:08 +01:00
Mayeul@Zama
933800ea6f doc(hlapi): fix documentation 2026-01-09 15:19:08 +01:00
Mayeul@Zama
3e4cee3a75 refactor(integer): split oprf_almost_uniformity_test 2026-01-09 15:19:08 +01:00
Mayeul@Zama
00ea9b8e07 refactor(shortint): improve error in uniformity_p_value 2026-01-09 15:19:08 +01:00
Mayeul@Zama
23ce85f6a2 fix(core): make sup_diff more permissive 2026-01-09 15:19:08 +01:00
Nicolas Sarlin
126a95e929 fix(js): unsafe coop bench was overwritting mt one 2026-01-08 16:48:18 +01:00
Nicolas Sarlin
23fffb1443 chore(deps): ignore unmaintained bincode cargo audit warning 2026-01-08 15:16:37 +01:00
Agnes Leroy
6d58a54266 chore(gpu): attempt to fix apt in ci 2026-01-08 14:54:03 +01:00
Baptiste Roux
9b8d5f5a43 chore(hpu): bump version of lru
Lru required version update following caro audit

Signed-off-by: Baptiste Roux <baptiste.roux@zama.ai>
2026-01-08 14:08:31 +01:00
36 changed files with 2960 additions and 254 deletions

View File

@@ -2,6 +2,8 @@
ignore = [
# Ignoring unmaintained 'paste' advisory as it is a widely used, low-risk build dependency.
"RUSTSEC-2024-0436",
# Ignoring unmaintained 'bincode' crate. Getting rid of it would be too complex on the short term.
"RUSTSEC-2025-0141",
]
[output]

View File

@@ -23,6 +23,8 @@ runs:
echo "${CMAKE_SCRIPT_SHA} cmake-${CMAKE_VERSION}-linux-x86_64.sh" > checksum
sha256sum -c checksum
sudo bash cmake-"${CMAKE_VERSION}"-linux-x86_64.sh --skip-license --prefix=/usr/ --exclude-subdir
sudo apt-get clean
sudo rm -rf /var/lib/apt/lists/*
sudo apt update
sudo apt remove -y unattended-upgrades
sudo apt install -y cmake-format libclang-dev

1
.gitignore vendored
View File

@@ -10,6 +10,7 @@ target/
**/*.rmeta
**/Cargo.lock
**/*.bin
**/.DS_Store
# Some of our bench outputs
/tfhe/benchmarks_parameters

View File

@@ -11,7 +11,7 @@
/tfhe/src/core_crypto/gpu @agnesLeroy
/tfhe/src/core_crypto/hpu @zama-ai/hardware
/tfhe/src/shortint/ @mayeul-zama
/tfhe/src/shortint/ @mayeul-zama @nsarlin-zama
/tfhe/src/integer/ @tmontaigu
/tfhe/src/integer/gpu @agnesLeroy
@@ -19,8 +19,12 @@
/tfhe/src/high_level_api/ @tmontaigu
/tfhe-zk-pok/ @nsarlin-zama
/tfhe-benchmark/ @soonum
/utils/ @nsarlin-zama
/Makefile @IceTDrinker @soonum
/mockups/tfhe-hpu-mockup @zama-ai/hardware

View File

@@ -36,6 +36,7 @@ rayon = "1.11"
serde = { version = "1.0", default-features = false }
wasm-bindgen = "0.2.101"
getrandom = "0.2.8"
# The project maintainers consider that this is the last version of the 1.3 branch, any newer version should not be trusted
bincode = "=1.3.3"
[profile.bench]

View File

@@ -65,6 +65,16 @@ void cleanup_cuda_integer_compress_radix_ciphertext_128(CudaStreamsFFI streams,
void cleanup_cuda_integer_decompress_radix_ciphertext_128(
CudaStreamsFFI streams, int8_t **mem_ptr_void);
void cuda_integer_extract_glwe_128(
CudaStreamsFFI streams, void *glwe_array_out,
CudaPackedGlweCiphertextListFFI const *glwe_list,
uint32_t const glwe_index);
void cuda_integer_extract_glwe_64(
CudaStreamsFFI streams, void *glwe_array_out,
CudaPackedGlweCiphertextListFFI const *glwe_list,
uint32_t const glwe_index);
}
#endif

View File

@@ -155,3 +155,24 @@ void cleanup_cuda_integer_decompress_radix_ciphertext_128(
delete mem_ptr;
*mem_ptr_void = nullptr;
}
void cuda_integer_extract_glwe_128(
CudaStreamsFFI streams, void *glwe_array_out,
CudaPackedGlweCiphertextListFFI const *glwe_list,
uint32_t const glwe_index) {
CudaStreams _streams = CudaStreams(streams);
host_extract<__uint128_t>(_streams.stream(0), _streams.gpu_index(0),
(__uint128_t *)glwe_array_out, glwe_list,
glwe_index);
}
void cuda_integer_extract_glwe_64(
CudaStreamsFFI streams, void *glwe_array_out,
CudaPackedGlweCiphertextListFFI const *glwe_list,
uint32_t const glwe_index) {
CudaStreams _streams = CudaStreams(streams);
host_extract<__uint64_t>(_streams.stream(0), _streams.gpu_index(0),
(__uint64_t *)glwe_array_out, glwe_list, glwe_index);
}

View File

@@ -2349,6 +2349,22 @@ unsafe extern "C" {
mem_ptr_void: *mut *mut i8,
);
}
unsafe extern "C" {
pub fn cuda_integer_extract_glwe_128(
streams: CudaStreamsFFI,
glwe_array_out: *mut ffi::c_void,
glwe_list: *const CudaPackedGlweCiphertextListFFI,
glwe_index: u32,
);
}
unsafe extern "C" {
pub fn cuda_integer_extract_glwe_64(
streams: CudaStreamsFFI,
glwe_array_out: *mut ffi::c_void,
glwe_list: *const CudaPackedGlweCiphertextListFFI,
glwe_index: u32,
);
}
unsafe extern "C" {
pub fn scratch_cuda_rerand_64(
streams: CudaStreamsFFI,

View File

@@ -40,7 +40,7 @@ rand = "0.8.5"
regex = "1.10.4"
bitflags = { version = "2.5.0", features = ["serde"] }
itertools = "0.11.0"
lru = "0.12.3"
lru = "0.16.3"
bitfield-struct = "0.10.0"
crossbeam = { version = "0.8.4", features = ["crossbeam-queue"] }
rayon = { workspace = true }

View File

@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:35cc06547a23b862ab9829351d74d944e60ea9dad3ecf593d15f0ce8445d145e
size 81710610
oid sha256:934c8131c12010dc837f6a2af5111b83f8f5d42f10485e9b3b971edb24c467f8
size 82201876

View File

@@ -160,9 +160,9 @@ impl ProgramInner {
.filter(|(_, var)| var.is_none())
.map(|(rid, _)| *rid)
.collect::<Vec<_>>();
demote_order
.into_iter()
.for_each(|rid| self.regs.demote(&rid));
demote_order.into_iter().for_each(|rid| {
self.regs.demote(&rid);
});
}
/// Release register entry
@@ -179,7 +179,7 @@ impl ProgramInner {
/// Notify register access to update LRU state
pub(crate) fn reg_access(&mut self, rid: asm::RegId) {
self.regs.promote(&rid)
self.regs.promote(&rid);
}
/// Retrieved least-recent-used heap entry
@@ -220,9 +220,9 @@ impl ProgramInner {
.filter(|(_mid, var)| var.is_none())
.map(|(mid, _)| *mid)
.collect::<Vec<_>>();
demote_order
.into_iter()
.for_each(|mid| self.heap.demote(&mid));
demote_order.into_iter().for_each(|mid| {
self.heap.demote(&mid);
});
}
_ => { /*Only release Heap slot*/ }
}
@@ -231,7 +231,9 @@ impl ProgramInner {
/// Notify heap access to update LRU state
pub(crate) fn heap_access(&mut self, mid: asm::MemId) {
match mid {
asm::MemId::Heap { .. } => self.heap.promote(&mid),
asm::MemId::Heap { .. } => {
self.heap.promote(&mid);
}
_ => { /* Do Nothing slot do not below to heap*/ }
}
}

1
tfhe-benchmark/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
benchmarks_parameters/*

View File

@@ -2,7 +2,9 @@ use benchmark::utilities::{
hlapi_throughput_num_ops, write_to_json, BenchmarkType, BitSizesSet, EnvConfig, OperatorType,
};
use criterion::{black_box, Criterion, Throughput};
use oprf::oprf_any_range2;
use rand::prelude::*;
use rayon::prelude::*;
use std::marker::PhantomData;
use std::ops::*;
use tfhe::core_crypto::prelude::Numeric;
@@ -11,34 +13,104 @@ use tfhe::keycache::NamedParam;
use tfhe::named::Named;
use tfhe::prelude::*;
use tfhe::{
ClientKey, CompressedServerKey, FheIntegerType, FheUint10, FheUint12, FheUint128, FheUint14,
FheUint16, FheUint2, FheUint32, FheUint4, FheUint6, FheUint64, FheUint8, FheUintId, IntegerId,
KVStore,
ClientKey, CompressedServerKey, FheBool, FheIntegerType, FheUint, FheUint10, FheUint12,
FheUint128, FheUint14, FheUint16, FheUint2, FheUint32, FheUint4, FheUint6, FheUint64, FheUint8,
FheUintId, IntegerId, KVStore,
};
use rayon::prelude::*;
mod oprf;
fn bench_fhe_type<FheType>(
trait BenchWait {
fn wait_bench(&self);
}
impl<Id: FheUintId> BenchWait for FheUint<Id> {
fn wait_bench(&self) {
self.wait()
}
}
impl BenchWait for FheBool {
fn wait_bench(&self) {
self.wait()
}
}
impl<T1: FheWait, T2> BenchWait for (T1, T2) {
fn wait_bench(&self) {
self.0.wait()
}
}
fn bench_fhe_type_unary_op<FheType, F, R>(
c: &mut Criterion,
client_key: &ClientKey,
type_name: &str,
bit_size: usize,
display_name: &str,
func_name: &str,
func: F,
) where
F: Fn(&FheType) -> R,
R: BenchWait,
FheType: FheEncrypt<u128, ClientKey>,
FheType: FheWait,
{
let mut bench_group = c.benchmark_group(type_name);
let mut bench_prefix = "hlapi".to_string();
if cfg!(feature = "gpu") {
bench_prefix = format!("{}::cuda", bench_prefix);
} else if cfg!(feature = "hpu") {
bench_prefix = format!("{}::hpu", bench_prefix);
}
bench_prefix = format!("{}::ops", bench_prefix);
let mut rng = thread_rng();
let param = client_key.computation_parameters();
let param_name = param.name();
let bit_size = bit_size as u32;
let write_record = |bench_id: String, display_name| {
write_to_json::<u64, _>(
&bench_id,
param,
&param_name,
display_name,
&OperatorType::Atomic,
bit_size,
vec![],
);
};
let lhs = FheType::encrypt(rng.gen(), client_key);
let bench_id = format!("{bench_prefix}::{func_name}::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = func(&lhs);
res.wait_bench();
black_box(res)
})
});
write_record(bench_id, display_name);
}
fn bench_fhe_type_binary_op<FheType, F, R>(
c: &mut Criterion,
client_key: &ClientKey,
type_name: &str,
bit_size: usize,
display_name: &str,
func_name: &str,
func: F,
) where
F: Fn(&FheType, &FheType) -> R,
R: BenchWait,
FheType: FheEncrypt<u128, ClientKey>,
FheType: FheWait,
for<'a> &'a FheType: Add<&'a FheType, Output = FheType>
+ Sub<&'a FheType, Output = FheType>
+ Mul<&'a FheType, Output = FheType>
+ BitAnd<&'a FheType, Output = FheType>
+ BitOr<&'a FheType, Output = FheType>
+ BitXor<&'a FheType, Output = FheType>
+ Shl<&'a FheType, Output = FheType>
+ Shr<&'a FheType, Output = FheType>
+ RotateLeft<&'a FheType, Output = FheType>
+ RotateRight<&'a FheType, Output = FheType>
+ OverflowingAdd<&'a FheType, Output = FheType>
+ OverflowingSub<&'a FheType, Output = FheType>,
for<'a> FheType: FheMin<&'a FheType, Output = FheType> + FheMax<&'a FheType, Output = FheType>,
{
let mut bench_group = c.benchmark_group(type_name);
let mut bench_prefix = "hlapi".to_string();
@@ -71,170 +143,221 @@ fn bench_fhe_type<FheType>(
let lhs = FheType::encrypt(rng.gen(), client_key);
let rhs = FheType::encrypt(rng.gen(), client_key);
let mut bench_id;
let bench_id = format!("{bench_prefix}::{func_name}::{param_name}::{type_name}");
bench_id = format!("{bench_prefix}::add::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = &lhs + &rhs;
res.wait();
let res = func(&lhs, &rhs);
res.wait_bench();
black_box(res)
})
});
write_record(bench_id, "add");
bench_id = format!("{bench_prefix}::overflowing_add::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let (res, flag) = lhs.overflowing_add(&rhs);
res.wait();
black_box((res, flag))
})
});
write_record(bench_id, "overflowing_add");
bench_id = format!("{bench_prefix}::overflowing_sub::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let (res, flag) = lhs.overflowing_sub(&rhs);
res.wait();
black_box((res, flag))
})
});
write_record(bench_id, "overflowing_sub");
bench_id = format!("{bench_prefix}::sub::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = &lhs - &rhs;
res.wait();
black_box(res)
})
});
write_record(bench_id, "sub");
bench_id = format!("{bench_prefix}::mul::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = &lhs * &rhs;
res.wait();
black_box(res)
})
});
write_record(bench_id, "mul");
bench_id = format!("{bench_prefix}::bitand::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = &lhs & &rhs;
res.wait();
black_box(res)
})
});
write_record(bench_id, "bitand");
bench_id = format!("{bench_prefix}::bitor::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = &lhs | &rhs;
res.wait();
black_box(res)
})
});
write_record(bench_id, "bitor");
bench_id = format!("{bench_prefix}::bitxor::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = &lhs ^ &rhs;
res.wait();
black_box(res)
})
});
write_record(bench_id, "bitxor");
bench_id = format!("{bench_prefix}::left_shift::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = &lhs << &rhs;
res.wait();
black_box(res)
})
});
write_record(bench_id, "left_shift");
bench_id = format!("{bench_prefix}::right_shift::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = &lhs >> &rhs;
res.wait();
black_box(res)
})
});
write_record(bench_id, "right_shift");
bench_id = format!("{bench_prefix}::left_rotate::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = (&lhs).rotate_left(&rhs);
res.wait();
black_box(res)
})
});
write_record(bench_id, "left_rotate");
bench_id = format!("{bench_prefix}::right_rotate::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = (&lhs).rotate_right(&rhs);
res.wait();
black_box(res)
})
});
write_record(bench_id, "right_rotate");
bench_id = format!("{bench_prefix}::min::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = lhs.min(&rhs);
res.wait();
black_box(res)
})
});
write_record(bench_id, "min");
bench_id = format!("{bench_prefix}::max::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = lhs.max(&rhs);
res.wait();
black_box(res)
})
});
write_record(bench_id, "max");
write_record(bench_id, display_name);
}
macro_rules! bench_type {
($fhe_type:ident) => {
fn bench_fhe_type_ternary_op<FheType, F, R>(
c: &mut Criterion,
client_key: &ClientKey,
type_name: &str,
bit_size: usize,
display_name: &str,
func_name: &str,
func: F,
) where
F: Fn(&FheBool, &FheType, &FheType) -> R,
R: BenchWait,
FheType: FheEncrypt<u128, ClientKey>,
FheType: FheWait,
{
let mut bench_group = c.benchmark_group(type_name);
let mut bench_prefix = "hlapi".to_string();
if cfg!(feature = "gpu") {
bench_prefix = format!("{}::cuda", bench_prefix);
} else if cfg!(feature = "hpu") {
bench_prefix = format!("{}::hpu", bench_prefix);
}
bench_prefix = format!("{}::ops", bench_prefix);
let mut rng = thread_rng();
let param = client_key.computation_parameters();
let param_name = param.name();
let bit_size = bit_size as u32;
let write_record = |bench_id: String, display_name| {
write_to_json::<u64, _>(
&bench_id,
param,
&param_name,
display_name,
&OperatorType::Atomic,
bit_size,
vec![],
);
};
let bool = FheBool::encrypt(rng.gen(), client_key);
let lhs = FheType::encrypt(rng.gen(), client_key);
let rhs = FheType::encrypt(rng.gen(), client_key);
let bench_id = format!("{bench_prefix}::{func_name}::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = func(&bool, &lhs, &rhs);
res.wait_bench();
black_box(res)
})
});
write_record(bench_id, display_name);
}
macro_rules! bench_type_binary_op (
(type_name: $fhe_type:ident, display_name: $display_name:literal, operation: $op:ident) => {
::paste::paste! {
fn [<bench_ $fhe_type:snake>](c: &mut Criterion, cks: &ClientKey) {
bench_fhe_type::<$fhe_type>(c, cks, stringify!($fhe_type), $fhe_type::num_bits());
fn [<bench_ $fhe_type:snake _ $op>](c: &mut Criterion, cks: &ClientKey) {
bench_fhe_type_binary_op::<$fhe_type, _, _>(
c,
cks,
stringify!($fhe_type),
$fhe_type::num_bits(),
$display_name,
stringify!($op),
|lhs, rhs| lhs.$op(rhs)
);
}
}
};
);
macro_rules! bench_type_unary_op (
(type_name: $fhe_type:ident, display_name: $display_name:literal, operation: $op:ident) => {
::paste::paste! {
fn [<bench_ $fhe_type:snake _ $op>](c: &mut Criterion, cks: &ClientKey) {
bench_fhe_type_unary_op::<$fhe_type, _, _>(
c,
cks,
stringify!($fhe_type),
$fhe_type::num_bits(),
$display_name,
stringify!($op),
|lhs| lhs.$op()
);
}
}
};
);
macro_rules! bench_type_ternary_op (
(type_name: $fhe_type:ident, display_name: $display_name:literal, operation: $op:ident) => {
::paste::paste! {
fn [<bench_ $fhe_type:snake _ $op>](c: &mut Criterion, cks: &ClientKey) {
bench_fhe_type_ternary_op::<$fhe_type, _, _>(
c,
cks,
stringify!($fhe_type),
$fhe_type::num_bits(),
$display_name,
stringify!($op),
|cond, lhs, rhs| cond.$op(lhs, rhs)
);
}
}
};
);
macro_rules! generate_typed_benches {
($fhe_type:ident) => {
// bench_type_binary_op!(type_name: $fhe_type, display_name: "sum", operation: sum);
// bench_type_unary_op!(type_name: $fhe_type, display_name: "bitnot", operation: bitnot);
bench_type_binary_op!(type_name: $fhe_type, display_name: "add", operation: add);
bench_type_binary_op!(type_name: $fhe_type, display_name: "bitand", operation: bitand);
bench_type_binary_op!(type_name: $fhe_type, display_name: "bitor", operation: bitor);
bench_type_binary_op!(type_name: $fhe_type, display_name: "bitxor", operation: bitxor);
bench_type_binary_op!(type_name: $fhe_type, display_name: "div", operation: div);
bench_type_binary_op!(type_name: $fhe_type, display_name: "div_rem", operation: div_rem);
bench_type_binary_op!(type_name: $fhe_type, display_name: "eq", operation: eq);
bench_type_binary_op!(type_name: $fhe_type, display_name: "ge", operation: ge);
bench_type_binary_op!(type_name: $fhe_type, display_name: "gt", operation: gt);
bench_type_binary_op!(type_name: $fhe_type, display_name: "le", operation: le);
bench_type_binary_op!(type_name: $fhe_type, display_name: "left_rotate", operation: rotate_left);
bench_type_binary_op!(type_name: $fhe_type, display_name: "left_shift", operation: shl);
bench_type_binary_op!(type_name: $fhe_type, display_name: "lt", operation: lt);
bench_type_binary_op!(type_name: $fhe_type, display_name: "max", operation: max);
bench_type_binary_op!(type_name: $fhe_type, display_name: "min", operation: min);
bench_type_binary_op!(type_name: $fhe_type, display_name: "mul", operation: mul);
bench_type_binary_op!(type_name: $fhe_type, display_name: "ne", operation: ne);
bench_type_binary_op!(type_name: $fhe_type, display_name: "overflowing_add", operation: overflowing_add);
bench_type_binary_op!(type_name: $fhe_type, display_name: "overflowing_sub", operation: overflowing_sub);
bench_type_binary_op!(type_name: $fhe_type, display_name: "rem", operation: rem);
bench_type_binary_op!(type_name: $fhe_type, display_name: "right_rotate", operation: rotate_right);
bench_type_binary_op!(type_name: $fhe_type, display_name: "right_shift", operation: shr);
bench_type_binary_op!(type_name: $fhe_type, display_name: "sub", operation: sub);
bench_type_ternary_op!(type_name: $fhe_type, display_name: "flip", operation: flip);
bench_type_ternary_op!(type_name: $fhe_type, display_name: "if_then_else", operation: if_then_else);
bench_type_unary_op!(type_name: $fhe_type, display_name: "leading_ones", operation: leading_ones);
bench_type_unary_op!(type_name: $fhe_type, display_name: "leading_zeros", operation: leading_zeros);
bench_type_unary_op!(type_name: $fhe_type, display_name: "neg", operation: neg);
bench_type_unary_op!(type_name: $fhe_type, display_name: "not", operation: not);
bench_type_unary_op!(type_name: $fhe_type, display_name: "trailing_ones", operation: trailing_ones);
bench_type_unary_op!(type_name: $fhe_type, display_name: "trailing_zeros", operation: trailing_zeros);
};
}
bench_type!(FheUint2);
bench_type!(FheUint4);
bench_type!(FheUint6);
bench_type!(FheUint8);
bench_type!(FheUint10);
bench_type!(FheUint12);
bench_type!(FheUint14);
bench_type!(FheUint16);
bench_type!(FheUint32);
bench_type!(FheUint64);
bench_type!(FheUint128);
// Generate benches for all FheUint types
generate_typed_benches!(FheUint2);
generate_typed_benches!(FheUint4);
generate_typed_benches!(FheUint6);
generate_typed_benches!(FheUint8);
generate_typed_benches!(FheUint10);
generate_typed_benches!(FheUint12);
generate_typed_benches!(FheUint14);
generate_typed_benches!(FheUint16);
generate_typed_benches!(FheUint32);
generate_typed_benches!(FheUint64);
generate_typed_benches!(FheUint128);
macro_rules! run_benches {
($c:expr, $cks:expr, $($fhe_type:ident),+ $(,)?) => {
$(
::paste::paste! {
[<bench_ $fhe_type:snake _add>]($c, $cks);
[<bench_ $fhe_type:snake _bitand>]($c, $cks);
[<bench_ $fhe_type:snake _bitor>]($c, $cks);
[<bench_ $fhe_type:snake _bitxor>]($c, $cks);
[<bench_ $fhe_type:snake _div>]($c, $cks);
[<bench_ $fhe_type:snake _div_rem>]($c, $cks);
[<bench_ $fhe_type:snake _eq>]($c, $cks);
[<bench_ $fhe_type:snake _flip>]($c, $cks);
[<bench_ $fhe_type:snake _ge>]($c, $cks);
[<bench_ $fhe_type:snake _gt>]($c, $cks);
[<bench_ $fhe_type:snake _if_then_else>]($c, $cks);
[<bench_ $fhe_type:snake _le>]($c, $cks);
[<bench_ $fhe_type:snake _leading_ones>]($c, $cks);
[<bench_ $fhe_type:snake _leading_zeros>]($c, $cks);
[<bench_ $fhe_type:snake _lt>]($c, $cks);
[<bench_ $fhe_type:snake _max>]($c, $cks);
[<bench_ $fhe_type:snake _min>]($c, $cks);
[<bench_ $fhe_type:snake _mul>]($c, $cks);
[<bench_ $fhe_type:snake _ne>]($c, $cks);
[<bench_ $fhe_type:snake _neg>]($c, $cks);
[<bench_ $fhe_type:snake _not>]($c, $cks);
[<bench_ $fhe_type:snake _not>]($c, $cks);
[<bench_ $fhe_type:snake _overflowing_add>]($c, $cks);
[<bench_ $fhe_type:snake _overflowing_sub>]($c, $cks);
[<bench_ $fhe_type:snake _rem>]($c, $cks);
[<bench_ $fhe_type:snake _rotate_left>]($c, $cks);
[<bench_ $fhe_type:snake _rotate_right>]($c, $cks);
[<bench_ $fhe_type:snake _shl>]($c, $cks);
[<bench_ $fhe_type:snake _shr>]($c, $cks);
[<bench_ $fhe_type:snake _sub>]($c, $cks);
[<bench_ $fhe_type:snake _trailing_ones>]($c, $cks);
[<bench_ $fhe_type:snake _trailing_zeros>]($c, $cks);
}
)+
};
}
trait TypeDisplay {
fn fmt(f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
@@ -444,7 +567,7 @@ fn main() {
match env_config.bit_sizes_set {
BitSizesSet::Fast => {
bench_fhe_uint64(&mut c, &cks);
run_benches!(&mut c, &cks, FheUint64);
// KVStore Benches
if benched_device == tfhe::Device::Cpu {
@@ -452,17 +575,11 @@ fn main() {
}
}
_ => {
bench_fhe_uint2(&mut c, &cks);
bench_fhe_uint4(&mut c, &cks);
bench_fhe_uint6(&mut c, &cks);
bench_fhe_uint8(&mut c, &cks);
bench_fhe_uint10(&mut c, &cks);
bench_fhe_uint12(&mut c, &cks);
bench_fhe_uint14(&mut c, &cks);
bench_fhe_uint16(&mut c, &cks);
bench_fhe_uint32(&mut c, &cks);
bench_fhe_uint64(&mut c, &cks);
bench_fhe_uint128(&mut c, &cks);
// Call all benchmarks for all types
run_benches!(
&mut c, &cks, FheUint2, FheUint4, FheUint6, FheUint8, FheUint10, FheUint12,
FheUint14, FheUint16, FheUint32, FheUint64, FheUint128
);
// KVStore Benches
if benched_device == tfhe::Device::Cpu {
@@ -481,5 +598,8 @@ fn main() {
}
}
#[cfg(not(feature = "hpu"))]
oprf_any_range2();
c.final_summary();
}

View File

@@ -0,0 +1,44 @@
use benchmark::params_aliases::BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
use criterion::{black_box, criterion_group, Criterion};
use std::num::NonZeroU64;
use tfhe::{set_server_key, ClientKey, ConfigBuilder, FheUint64, RangeForRandom, Seed, ServerKey};
pub fn oprf_any_range(c: &mut Criterion) {
let bench_name = "hlapi::oprf_any_range";
let mut bench_group = c.benchmark_group(bench_name);
bench_group
.sample_size(15)
.measurement_time(std::time::Duration::from_secs(30));
let param = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let config = ConfigBuilder::with_custom_parameters(param).build();
let cks = ClientKey::generate(config);
let sks = ServerKey::new(&cks);
rayon::broadcast(|_| set_server_key(sks.clone()));
set_server_key(sks);
for excluded_upper_bound in [3, 52] {
let range = RangeForRandom::new_from_excluded_upper_bound(
NonZeroU64::new(excluded_upper_bound).unwrap(),
);
let bench_id_oprf = format!("{bench_name}::bound_{excluded_upper_bound}");
bench_group.bench_function(&bench_id_oprf, |b| {
b.iter(|| {
_ = black_box(FheUint64::generate_oblivious_pseudo_random_custom_range(
Seed(0),
&range,
None,
));
})
});
}
bench_group.finish()
}
criterion_group!(oprf_any_range2, oprf_any_range);

View File

@@ -2809,6 +2809,7 @@ mod cuda {
criterion_group!(
default_cuda_dedup_ops,
cuda_add,
cuda_neg,
cuda_mul,
cuda_div_rem,
cuda_bitand,

View File

@@ -629,7 +629,9 @@ mod integer_params {
// operations.
#[cfg(feature = "hpu")]
let params = vec![BENCH_HPU_PARAM_MESSAGE_2_CARRY_2_KS32_PBS_TUNIFORM_2M128.into()];
#[cfg(not(feature = "hpu"))]
#[cfg(feature = "gpu")]
let params = vec![BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS.into()];
#[cfg(not(any(feature = "gpu", feature = "hpu")))]
let params = vec![BENCH_PARAM_MESSAGE_2_CARRY_2_KS32_PBS.into()];
let params_and_bit_sizes = iproduct!(params, env_config.bit_sizes());

View File

@@ -27,6 +27,7 @@ rand_distr = "0.4.3"
criterion = "0.5.1"
doc-comment = "0.3.3"
serde_json = "1.0.94"
num-bigint = "0.4.6"
# clap has to be pinned as its minimum supported rust version
# changes often between minor releases, which breaks our CI
clap = { version = "=4.5.30", features = ["derive"] }

View File

@@ -2,14 +2,30 @@
This document explains the mechanism and steps to generate an oblivious encrypted random value using only server keys.
The goal is to give to the server the possibility to generate a random value, which will be obtained in an encrypted format and will remain unknown to the server. The implementation is based on [this article](https://eprint.iacr.org/2024/665).
The goal is to give to the server the possibility to generate a random value, which will be obtained in an encrypted format and will remain unknown to the server.
This is possible through two methods on `FheUint` and `FheInt`:
The main method for this is `FheUint::generate_oblivious_pseudo_random_custom_range` which returns an integer in the given range.
Currently the range can only be in the form `[0, excluded_upper_bound[` with any `excluded_upper_bound` in `[1, 2^64[`
It follows a distribution close to the uniform.
This function guarantees the norm-1 distance (defined as ∆(P,Q) := 1/2 Sum[ω∈Ω] |P(ω) Q(ω)|)
between the actual distribution and the target uniform distribution will be below the `max_distance` argument (which must be in ]0, 1[).
The higher the distance, the more dissimilar the actual distribution is from the target uniform distribution.
The default value for `max_distance` is `2^-128` if `None` is provided.
Higher values allow better performance but must be considered carefully in the context of their target application as it may have serious unintended consequences.
If the range is a power of 2, the distribution is uniform (for any `max_distance`) and the cost is smaller.
For powers of 2 specifically there are two methods on `FheUint` and `FheInt` (based on [this article](https://eprint.iacr.org/2024/665)):
- `generate_oblivious_pseudo_random` which return an integer taken uniformly in the full integer range (`[0; 2^N[` for a `FheUintN` and `[-2^(N-1); 2^(N-1)[` for a `FheIntN`).
- `generate_oblivious_pseudo_random_bounded` which return an integer taken uniformly in `[0; 2^random_bits_count[`. For a `FheUintN`, we must have `random_bits_count <= N`. For a `FheIntN`, we must have `random_bits_count <= N - 1`.
Both methods functions take a seed `Seed` as input, which could be any `u128` value.
They both rely on the use of the usual server key.
These method functions take a seed `Seed` as input, which could be any `u128` value.
They rely on the use of the usual server key.
The output is reproducible, i.e., the function is deterministic from the inputs: assuming the same hardware, seed and server key, this function outputs the same random encrypted value.
@@ -18,7 +34,8 @@ Here is an example of the usage:
```rust
use tfhe::prelude::FheDecrypt;
use tfhe::{generate_keys, set_server_key, ConfigBuilder, FheUint8, FheInt8, Seed};
use tfhe::{generate_keys, set_server_key, ConfigBuilder, FheUint8, FheInt8, RangeForRandom, Seed};
use std::num::NonZeroU64;
pub fn main() {
let config = ConfigBuilder::default().build();
@@ -26,23 +43,30 @@ pub fn main() {
set_server_key(server_key);
let random_bits_count = 3;
let ct_res = FheUint8::generate_oblivious_pseudo_random(Seed(0));
let excluded_upper_bound = NonZeroU64::new(3).unwrap();
let range = RangeForRandom::new_from_excluded_upper_bound(excluded_upper_bound);
// in [0, excluded_upper_bound[ = {0, 1, 2}
let ct_res = FheUint8::generate_oblivious_pseudo_random_custom_range(Seed(0), &range, None);
let dec_result: u8 = ct_res.decrypt(&client_key);
let ct_res = FheUint8::generate_oblivious_pseudo_random_bounded(Seed(0), random_bits_count);
let random_bits_count = 3;
// in [0, 2^8[
let ct_res = FheUint8::generate_oblivious_pseudo_random(Seed(0));
let dec_result: u8 = ct_res.decrypt(&client_key);
// in [0, 2^random_bits_count[ = [0, 8[
let ct_res = FheUint8::generate_oblivious_pseudo_random_bounded(Seed(0), random_bits_count);
let dec_result: u8 = ct_res.decrypt(&client_key);
assert!(dec_result < (1 << random_bits_count));
// in [-2^7, 2^7[
let ct_res = FheInt8::generate_oblivious_pseudo_random(Seed(0));
let dec_result: i8 = ct_res.decrypt(&client_key);
// in [0, 2^random_bits_count[ = [0, 8[
let ct_res = FheInt8::generate_oblivious_pseudo_random_bounded(Seed(0), random_bits_count);
let dec_result: i8 = ct_res.decrypt(&client_key);
assert!(dec_result < (1 << random_bits_count));
}

View File

@@ -141,7 +141,7 @@ Some parameter sets lead to the FHE keys exceeding the 2GB memory limit of WASM,
### Setting up TFHE-rs JS on WASM API for Node.js programs.
To build the JS on WASM bindings for **TFHE-rs**, install [`wasm-pack`](https://rustwasm.github.io/wasm-pack/) and the necessary [`rust toolchain`](https://rustup.rs/). Clone the **TFHE-rs** repository and build using the following commands (this will build using the default branch, you can check out a specific tag depending on your requirements):
To build the JS on WASM bindings for **TFHE-rs**, install [`wasm-pack`](https://drager.github.io/wasm-pack/) and the necessary [`rust toolchain`](https://rustup.rs/). Clone the **TFHE-rs** repository and build using the following commands (this will build using the default branch, you can check out a specific tag depending on your requirements):
```shell
$ git clone https://github.com/zama-ai/tfhe-rs.git
@@ -150,7 +150,7 @@ Cloning into 'tfhe-rs'...
Resolving deltas: 100% (3866/3866), done.
$ cd tfhe-rs
$ cd tfhe
$ rustup run wasm-pack build --release --target=nodejs --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api
$ wasm-pack build --release --target=nodejs --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api
[INFO]: Compiling to Wasm...
...
[INFO]: :-) Your wasm pkg is ready to publish at ...
@@ -164,7 +164,7 @@ After the build, a new directory **pkg** is available in the `tfhe` directory.
```shell
$ ls pkg
LICENSE index.html package.json tfhe.d.ts tfhe.js tfhe_bg.txt tfhe_bg.wasm tfhe_bg.wasm.d.ts
LICENSE README.md package.json tfhe.d.ts tfhe.js tfhe_bg.wasm tfhe_bg.wasm.d.ts
$
```

View File

@@ -540,10 +540,12 @@ pub fn sup_diff(cumulative_bins: &[u64], theoretical_cdf: &[f64]) -> f64 {
.iter()
.copied()
.zip_eq(theoretical_cdf.iter().copied())
.map(|(x, theoretical_cdf)| {
.enumerate()
.map(|(i, (x, theoretical_cdf))| {
let empirical_cdf = x as f64 / number_of_samples as f64;
if theoretical_cdf == 1.0 {
if i == cumulative_bins.len() - 1 {
assert_eq!(theoretical_cdf, 1.0);
assert_eq!(empirical_cdf, 1.0);
}

View File

@@ -19,6 +19,7 @@ use crate::integer::gpu::ciphertext::CudaIntegerRadixCiphertext;
use crate::integer::prelude::*;
use crate::integer::BooleanBlock;
use crate::named::Named;
use crate::prelude::FheWait;
use crate::shortint::ciphertext::NotTrivialCiphertextError;
use crate::shortint::parameters::CiphertextConformanceParams;
use crate::shortint::AtomicPatternParameters;
@@ -73,6 +74,12 @@ impl Named for FheBool {
const NAME: &'static str = "high_level_api::FheBool";
}
impl FheWait for FheBool {
fn wait(&self) {
self.ciphertext.wait()
}
}
#[derive(Copy, Clone)]
pub struct FheBoolConformanceParams(pub(crate) CiphertextConformanceParams);

View File

@@ -139,6 +139,16 @@ impl InnerBoolean {
}
}
pub(crate) fn wait(&self) {
match self {
Self::Cpu(_) => {}
#[cfg(feature = "gpu")]
Self::Cuda(_) => {}
#[cfg(feature = "hpu")]
Self::Hpu(ct) => ct.wait(),
}
}
/// Returns the inner cpu ciphertext if self is on the CPU, otherwise, returns a copy
/// that is on the CPU
pub(crate) fn on_cpu(&self) -> MaybeCloned<'_, BooleanBlock> {

View File

@@ -4,7 +4,9 @@ use crate::high_level_api::keys::InternalServerKey;
use crate::high_level_api::re_randomization::ReRandomizationMetadata;
#[cfg(feature = "gpu")]
use crate::integer::gpu::ciphertext::{CudaSignedRadixCiphertext, CudaUnsignedRadixCiphertext};
use crate::shortint::MessageModulus;
use crate::{FheInt, Seed};
use std::num::NonZeroU64;
impl<Id: FheUintId> FheUint<Id> {
/// Generates an encrypted unsigned integer
@@ -92,7 +94,7 @@ impl<Id: FheUintId> FheUint<Id> {
}
})
}
/// Generates an encrypted `num_block` blocks unsigned integer
/// Generates an encrypted unsigned integer
/// taken uniformly in `[0, 2^random_bits_count[` using the given seed.
/// The encrypted value is oblivious to the server.
/// It can be useful to make server random generation deterministic.
@@ -150,6 +152,103 @@ impl<Id: FheUintId> FheUint<Id> {
}
})
}
/// Generates an encrypted unsigned integer
/// taken almost uniformly in the given range using the given seed.
/// Currently the range can only be in the form `[0, excluded_upper_bound[`
/// with any `excluded_upper_bound` in `[1, 2^64[`.
///
/// The encrypted value is oblivious to the server.
/// It can be useful to make server random generation deterministic.
///
/// This function guarantees the the norm-1 distance
/// (defined as ∆(P,Q) := 1/2 Sum[ω∈Ω] |P(ω) Q(ω)|)
/// between the actual distribution and the target uniform distribution
/// will be below the `max_distance` argument (which must be in ]0, 1[).
/// The higher the distance, the more dissimilar the actual distribution is
/// from the target uniform distribution.
///
/// The default value for `max_distance` is `2^-128` if `None` is provided.
///
/// Higher values allow better performance but must be considered carefully in the context of
/// their target application as it may have serious unintended consequences.
///
/// If the range is a power of 2, the distribution is uniform (for any `max_distance`) and
/// the cost is smaller.
///
/// ```rust
/// use std::num::NonZeroU64;
/// use tfhe::prelude::FheDecrypt;
/// use tfhe::{generate_keys, set_server_key, ConfigBuilder, FheUint8, RangeForRandom, Seed};
///
/// let config = ConfigBuilder::default().build();
/// let (client_key, server_key) = generate_keys(config);
///
/// set_server_key(server_key);
///
/// let excluded_upper_bound = NonZeroU64::new(3).unwrap();
///
/// let range = RangeForRandom::new_from_excluded_upper_bound(excluded_upper_bound);
///
/// let ct_res = FheUint8::generate_oblivious_pseudo_random_custom_range(Seed(0), &range, None);
///
/// let dec_result: u16 = ct_res.decrypt(&client_key);
/// assert!(dec_result < excluded_upper_bound.get() as u16);
/// ```
pub fn generate_oblivious_pseudo_random_custom_range(
seed: Seed,
range: &RangeForRandom,
max_distance: Option<f64>,
) -> Self {
let excluded_upper_bound = range.excluded_upper_bound;
if excluded_upper_bound.is_power_of_two() {
let random_bits_count = excluded_upper_bound.ilog2() as u64;
Self::generate_oblivious_pseudo_random_bounded(seed, random_bits_count)
} else {
let max_distance = max_distance.unwrap_or_else(|| 2_f64.powi(-128));
assert!(
0_f64 < max_distance && max_distance < 1_f64,
"max_distance (={max_distance}) should be in ]0, 1["
);
global_state::with_internal_keys(|key| match key {
InternalServerKey::Cpu(key) => {
let message_modulus = key.message_modulus();
let num_input_random_bits = num_input_random_bits_for_max_distance(
excluded_upper_bound,
max_distance,
message_modulus,
);
let num_blocks_output = Id::num_blocks(key.message_modulus()) as u64;
let ct = key
.pbs_key()
.par_generate_oblivious_pseudo_random_unsigned_custom_range(
seed,
num_input_random_bits,
excluded_upper_bound,
num_blocks_output,
);
Self::new(ct, key.tag.clone(), ReRandomizationMetadata::default())
}
#[cfg(feature = "gpu")]
InternalServerKey::Cuda(_cuda_key) => {
panic!("Gpu does not support this operation yet.")
}
#[cfg(feature = "hpu")]
InternalServerKey::Hpu(_device) => {
panic!("Hpu does not support this operation yet.")
}
})
}
}
#[cfg(feature = "gpu")]
/// Returns the amount of memory required to execute generate_oblivious_pseudo_random_bounded
///
@@ -273,7 +372,7 @@ impl<Id: FheIntId> FheInt<Id> {
}
})
}
/// Generates an encrypted `num_block` blocks signed integer
/// Generates an encrypted signed integer
/// taken uniformly in `[0, 2^random_bits_count[` using the given seed.
/// The encrypted value is oblivious to the server.
/// It can be useful to make server random generation deterministic.
@@ -367,10 +466,350 @@ impl<Id: FheIntId> FheInt<Id> {
}
}
pub struct RangeForRandom {
excluded_upper_bound: NonZeroU64,
}
impl RangeForRandom {
pub fn new_from_excluded_upper_bound(excluded_upper_bound: NonZeroU64) -> Self {
Self {
excluded_upper_bound,
}
}
}
fn num_input_random_bits_for_max_distance(
excluded_upper_bound: NonZeroU64,
max_distance: f64,
message_modulus: MessageModulus,
) -> u64 {
assert!(message_modulus.0.is_power_of_two());
let log_message_modulus = message_modulus.0.ilog2() as u64;
let mut random_block_count = 1;
let random_block_count = loop {
let random_bit_count = random_block_count * log_message_modulus;
let distance = distance(excluded_upper_bound.get(), random_bit_count);
if distance < max_distance {
break random_block_count;
}
random_block_count += 1;
};
random_block_count * log_message_modulus
}
fn distance(excluded_upper_bound: u64, random_bit_count: u64) -> f64 {
let remainder = mod_pow_2(random_bit_count, excluded_upper_bound);
remainder as f64 * (excluded_upper_bound - remainder) as f64
/ (2_f64.powi(random_bit_count as i32) * excluded_upper_bound as f64)
}
// Computes 2^exponent % modulus
fn mod_pow_2(exponent: u64, modulus: u64) -> u64 {
assert_ne!(modulus, 0);
if modulus == 1 {
return 0;
}
let mut result: u128 = 1;
let mut base: u128 = 2; // We are calculating 2^i
// We cast exponent to u128 to match the loop, though u64 is fine
let mut exp = exponent;
let mod_val = modulus as u128;
while exp > 0 {
// If exponent is odd, multiply result with base
if exp % 2 == 1 {
result = (result * base) % mod_val;
}
// Square the base
base = (base * base) % mod_val;
// Divide exponent by 2
exp /= 2;
}
result as u64
}
#[cfg(test)]
mod test {
use super::*;
use crate::integer::server_key::radix_parallel::tests_unsigned::test_oprf::{
oprf_density_function, p_value_upper_bound_oprf_almost_uniformity_from_values,
probability_density_function_from_density,
};
use crate::prelude::FheDecrypt;
use crate::shortint::oprf::test::test_uniformity;
use crate::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS32_PBS_TUNIFORM_2M128;
use crate::{generate_keys, set_server_key, ClientKey, ConfigBuilder, FheUint8, Seed};
use num_bigint::BigUint;
use rand::{thread_rng, Rng};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
// Helper: The "Oracle" implementation using BigInt
// This is slow but mathematically guaranteed to be correct.
fn oracle_mod_pow_2(exponent: u64, modulus: u64) -> u64 {
assert_ne!(modulus, 0);
if modulus == 1 {
return 0;
}
let base = BigUint::from(2u32);
let exp = BigUint::from(exponent);
let modu = BigUint::from(modulus);
let res = base.modpow(&exp, &modu);
res.iter_u64_digits().next().unwrap_or(0)
}
#[test]
fn test_edge_cases() {
// 2^0 % 10 = 1
assert_eq!(mod_pow_2(0, 10), 1, "Failed exponent 0");
// 2^10 % 1 = 0
assert_eq!(mod_pow_2(10, 1), 0, "Failed modulus 1");
// 2^1 % 10 = 2
assert_eq!(mod_pow_2(1, 10), 2, "Failed exponent 1");
// 2^3 % 5 = 8 % 5 = 3
assert_eq!(mod_pow_2(3, 5), 3, "Failed small calc");
}
#[test]
fn test_boundaries_and_overflow() {
assert_eq!(mod_pow_2(2, u64::MAX), 4);
assert_eq!(mod_pow_2(u64::MAX, 3), 2);
assert_eq!(mod_pow_2(5, 32), 0);
}
#[test]
fn test_against_oracle() {
let mut rng = thread_rng();
for _ in 0..1_000_000 {
let exp: u64 = rng.gen();
let mod_val: u64 = rng.gen();
let mod_val = if mod_val == 0 { 1 } else { mod_val };
let expected = oracle_mod_pow_2(exp, mod_val);
let actual = mod_pow_2(exp, mod_val);
assert_eq!(
actual, expected,
"Mismatch! 2^{exp} % {mod_val} => Ours: {actual}, Oracle: {expected}",
);
}
}
#[test]
fn test_distance_with_uniform() {
for excluded_upper_bound in 1..20 {
for num_input_random_bits in 0..20 {
let density = oprf_density_function(excluded_upper_bound, num_input_random_bits);
let theoretical_pdf = probability_density_function_from_density(&density);
let p_uniform = 1. / excluded_upper_bound as f64;
let actual_distance: f64 = 1. / 2.
* theoretical_pdf
.iter()
.map(|p| (*p - p_uniform).abs())
.sum::<f64>();
let theoretical_distance = distance(excluded_upper_bound, num_input_random_bits);
assert!(
(theoretical_distance - actual_distance).abs()
<= theoretical_distance / 1_000_000.,
"{theoretical_distance} != {actual_distance}"
);
}
}
}
#[test]
fn test_uniformity_scalar_mul_shift() {
let max_distance = 2_f64.powi(-20);
let message_modulus = MessageModulus(4);
let excluded_upper_bound = 3;
let num_input_random_bits = num_input_random_bits_for_max_distance(
NonZeroU64::new(excluded_upper_bound).unwrap(),
max_distance,
message_modulus,
);
let sample_count: usize = 10_000_000;
let p_value_limit: f64 = 0.001;
// The distribution is not exactly uniform
// This check ensures than with the given low max_distance,
// the distribution is indistinguishable from the uniform with at the given sample count
test_uniformity(sample_count, p_value_limit, excluded_upper_bound, |_seed| {
oprf_clear_equivalent(excluded_upper_bound, num_input_random_bits)
});
}
fn oprf_clear_equivalent(excluded_upper_bound: u64, num_input_random_bits: u64) -> u64 {
let random_input_upper_bound = 1 << num_input_random_bits;
let random_input = thread_rng().gen_range(0..random_input_upper_bound);
(random_input * excluded_upper_bound) >> num_input_random_bits
}
#[test]
fn test_uniformity_generate_oblivious_pseudo_random_custom_range() {
let base_sample_count: usize = 10_000;
let p_value_limit: f64 = 0.001;
let params = PARAM_MESSAGE_2_CARRY_2_KS32_PBS_TUNIFORM_2M128;
let config = ConfigBuilder::with_custom_parameters(params).build();
let (cks, sks) = generate_keys(config);
rayon::broadcast(|_| set_server_key(sks.clone()));
let message_modulus = params.message_modulus;
// [0.7, 0.1] for `max_distance` chosen to have `num_input_random_bits` be [2, 4]
// for any of the listed `excluded_upper_bound`
for (expected_num_input_random_bits, max_distance, excluded_upper_bounds) in
[(2, 0.7, [3, 5, 6, 7]), (4, 0.1, [3, 5, 6, 7])]
{
for excluded_upper_bound in excluded_upper_bounds {
let sample_count = base_sample_count * excluded_upper_bound as usize;
let excluded_upper_bound = NonZeroU64::new(excluded_upper_bound).unwrap();
let num_input_random_bits = num_input_random_bits_for_max_distance(
excluded_upper_bound,
max_distance,
message_modulus,
);
assert_eq!(num_input_random_bits, expected_num_input_random_bits);
test_uniformity_generate_oblivious_pseudo_random_custom_range2(
sample_count,
p_value_limit,
message_modulus,
&cks,
excluded_upper_bound,
max_distance,
);
}
}
}
fn test_uniformity_generate_oblivious_pseudo_random_custom_range2(
sample_count: usize,
p_value_limit: f64,
message_modulus: MessageModulus,
cks: &ClientKey,
excluded_upper_bound: NonZeroU64,
max_distance: f64,
) {
let num_input_random_bits = num_input_random_bits_for_max_distance(
excluded_upper_bound,
max_distance,
message_modulus,
);
let range = RangeForRandom::new_from_excluded_upper_bound(excluded_upper_bound);
let real_values: Vec<u64> = (0..sample_count)
.into_par_iter()
.map(|_| {
let img = FheUint8::generate_oblivious_pseudo_random_custom_range(
Seed(rand::thread_rng().gen::<u128>()),
&range,
Some(max_distance),
);
img.decrypt(cks)
})
.collect();
let excluded_upper_bound = excluded_upper_bound.get();
let uniform_values: Vec<u64> = (0..sample_count)
.into_par_iter()
.map(|_| thread_rng().gen_range(0..excluded_upper_bound))
.collect();
let clear_oprf_value_lower_num_input_random_bits = (0..sample_count)
.into_par_iter()
.map(|_| oprf_clear_equivalent(excluded_upper_bound, num_input_random_bits - 1))
.collect();
let clear_oprf_value_same_num_input_random_bits = (0..sample_count)
.into_par_iter()
.map(|_| oprf_clear_equivalent(excluded_upper_bound, num_input_random_bits))
.collect();
let clear_oprf_value_higher_num_input_random_bits = (0..sample_count)
.into_par_iter()
.map(|_| oprf_clear_equivalent(excluded_upper_bound, num_input_random_bits + 1))
.collect();
for (values, should_have_low_p_value) in [
(&real_values, false),
// to test that the same distribution passes
(&clear_oprf_value_same_num_input_random_bits, false),
// to test that other distribution don't pass
// (makes sure the test is statistically powerful)
(&uniform_values, true),
(&clear_oprf_value_lower_num_input_random_bits, true),
(&clear_oprf_value_higher_num_input_random_bits, true),
] {
let p_value_upper_bound = p_value_upper_bound_oprf_almost_uniformity_from_values(
values,
num_input_random_bits,
excluded_upper_bound,
);
println!("p_value_upper_bound: {p_value_upper_bound}");
if should_have_low_p_value {
assert!(
p_value_upper_bound < p_value_limit,
"p_value_upper_bound (={p_value_upper_bound}) expected to be smaller than {p_value_limit}"
);
} else {
assert!(
p_value_limit < p_value_upper_bound ,
"p_value_upper_bound (={p_value_upper_bound}) expected to be bigger than {p_value_limit}"
);
}
}
}
}
#[cfg(test)]
#[cfg(feature = "gpu")]
#[allow(unused_imports)]
mod test {
mod test_gpu {
use crate::prelude::*;
use crate::{
generate_keys, set_server_key, ConfigBuilder, FheInt128, FheUint32, FheUint64, GpuIndex,

View File

@@ -48,6 +48,7 @@ macro_rules! export_concrete_array_types {
}
pub use crate::core_crypto::commons::math::random::{Seed, XofSeed};
pub use crate::high_level_api::integers::oprf::RangeForRandom;
pub use crate::integer::server_key::MatchValues;
use crate::{error, Error, Versionize};
use backward_compatibility::compressed_ciphertext_list::SquashedNoiseCiphertextStateVersions;

View File

@@ -1,4 +1,5 @@
use crate::core_crypto::gpu::entities::lwe_packing_keyswitch_key::CudaLwePackingKeyswitchKey;
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use crate::core_crypto::gpu::vec::CudaVec;
use crate::core_crypto::gpu::CudaStreams;
@@ -16,7 +17,8 @@ use crate::integer::gpu::ciphertext::CudaRadixCiphertext;
use crate::integer::gpu::server_key::CudaBootstrappingKey;
use crate::integer::gpu::{
cuda_backend_compress, cuda_backend_decompress, cuda_backend_get_compression_size_on_gpu,
cuda_backend_get_decompression_size_on_gpu, cuda_memcpy_async_gpu_to_gpu, PBSType,
cuda_backend_get_decompression_size_on_gpu, cuda_memcpy_async_gpu_to_gpu, extract_glwe_async,
PBSType,
};
use crate::prelude::CastInto;
use crate::shortint::ciphertext::{
@@ -197,6 +199,30 @@ impl<T: UnsignedInteger> CudaPackedGlweCiphertextList<T> {
meta: self.meta,
}
}
pub fn extract_glwe(
&self,
glwe_index: usize,
streams: &CudaStreams,
) -> CudaGlweCiphertextList<T> {
let meta = self
.meta
.as_ref()
.expect("CudaPackedGlweCiphertextList meta must be set to extract GLWE");
let mut output_cuda_glwe_list = CudaGlweCiphertextList::new(
meta.glwe_dimension,
meta.polynomial_size,
GlweCiphertextCount(1),
meta.ciphertext_modulus,
streams,
);
unsafe {
extract_glwe_async(streams, &mut output_cuda_glwe_list, self, glwe_index as u32);
}
streams.synchronize();
output_cuda_glwe_list
}
}
impl<T: UnsignedInteger> Clone for CudaPackedGlweCiphertextList<T> {

View File

@@ -7,6 +7,7 @@ pub mod server_key;
#[cfg(feature = "zk-pok")]
pub mod zk;
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
use crate::core_crypto::gpu::lwe_bootstrap_key::CudaModulusSwitchNoiseReductionConfiguration;
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use crate::core_crypto::gpu::lwe_compact_ciphertext_list::CudaLweCompactCiphertextList;
@@ -10423,3 +10424,44 @@ pub unsafe fn unchecked_small_scalar_mul_integer_async(
carry_modulus.0 as u32,
);
}
#[allow(clippy::too_many_arguments)]
/// # Safety
///
/// - [CudaStreams::synchronize] __must__ be called after this function as soon as synchronization
/// is required
pub unsafe fn extract_glwe_async<T: UnsignedInteger>(
streams: &CudaStreams,
glwe_array_out: &mut CudaGlweCiphertextList<T>,
glwe_list: &CudaPackedGlweCiphertextList<T>,
glwe_index: u32,
) {
assert_eq!(
streams.gpu_indexes[0],
glwe_array_out.0.d_vec.gpu_index(0),
"GPU error: all data should reside on the same GPU."
);
assert_eq!(
streams.gpu_indexes[0],
glwe_list.data.gpu_index(0),
"GPU error: all data should reside on the same GPU."
);
let packed_glwe_list_ffi = prepare_cuda_packed_glwe_ct_ffi(glwe_list);
if T::BITS == 128 {
cuda_integer_extract_glwe_128(
streams.ffi(),
glwe_array_out.0.d_vec.as_mut_c_ptr(0),
&raw const packed_glwe_list_ffi,
glwe_index,
);
} else if T::BITS == 64 {
cuda_integer_extract_glwe_64(
streams.ffi(),
glwe_array_out.0.d_vec.as_mut_c_ptr(0),
&raw const packed_glwe_list_ffi,
glwe_index,
);
} else {
panic!("Unsupported integer size for CUDA GLWE extraction");
}
}

View File

@@ -0,0 +1,757 @@
use super::utils::noise_simulation::{CudaDynLwe, CudaSideResources};
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::{GlweCiphertext, LweCiphertext};
use crate::integer::compression_keys::CompressionPrivateKeys;
use crate::integer::gpu::list_compression::server_keys::CudaCompressionKey;
use crate::integer::gpu::server_key::radix::tests_noise_distribution::utils::noise_simulation::cuda_glwe_list_to_glwe_ciphertext;
use crate::integer::gpu::server_key::radix::tests_unsigned::create_gpu_parameterized_test;
use crate::integer::gpu::server_key::radix::CudaUnsignedRadixCiphertext;
use crate::integer::gpu::CudaServerKey;
use crate::integer::{ClientKey, CompressedServerKey, IntegerCiphertext};
use crate::shortint::ciphertext::{Ciphertext, Degree, NoiseLevel};
use crate::shortint::client_key::atomic_pattern::AtomicPatternClientKey;
use crate::shortint::engine::ShortintEngine;
use crate::shortint::parameters::test_params::TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128;
use crate::shortint::parameters::{CompressionParameters, MetaParameters, Variance};
use crate::shortint::server_key::tests::noise_distribution::br_dp_packingks_ms::br_dp_packing_ks_ms;
use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::{
NoiseSimulationGlwe, NoiseSimulationLwe, NoiseSimulationLweFourierBsk,
NoiseSimulationLwePackingKeyswitchKey, NoiseSimulationModulus,
};
use crate::shortint::server_key::tests::noise_distribution::utils::{
expected_pfail_for_precision, mean_and_variance_check, normality_check, pfail_check,
precision_with_padding, update_ap_params_msg_and_carry_moduli, DecryptionAndNoiseResult,
NoiseSample, PfailAndPrecision, PfailTestMeta, PfailTestResult,
};
use crate::shortint::server_key::tests::noise_distribution::{
should_run_short_pfail_tests_debug, should_use_single_key_debug,
};
use crate::shortint::{
AtomicPatternParameters, CarryModulus, MessageModulus, ShortintEncoding, ShortintParameterSet,
};
use crate::GpuIndex;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
pub const SAMPLES_PER_MSG_PACKING_KS_NOISE: usize = 1000;
fn sanity_check_encrypt_br_dp_packing_ks_ms(meta_params: MetaParameters) {
let (params, comp_params) = (
meta_params.compute_parameters,
meta_params.compression_parameters.unwrap(),
);
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let block_params: ShortintParameterSet = params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let private_compression_key = cks.new_compression_private_key(comp_params);
let (compressed_compression_key, _compressed_decompression_key) =
cks.new_compressed_compression_decompression_keys(&private_compression_key);
let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&streams);
let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
// The multiplication done in the compression is made to move the message up at the top of the
// carry space, multiplying by the carry modulus achieves that
let dp_scalar = params.carry_modulus().0;
let br_input_modulus_log = cuda_sks.br_input_modulus_log();
let storage_modulus_log = cuda_compression_key.storage_log_modulus;
let id_lut = cuda_sks.generate_lookup_table(|x| x);
let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, &streams);
let input_zeros: Vec<_> = (0..lwe_per_glwe.0)
.map(|_| {
cks.key
.encrypt_noiseless_pbs_input_dyn_lwe(br_input_modulus_log, 0)
})
.collect();
let d_input_zeros: Vec<_> = input_zeros
.iter()
.map(|ct| {
let d_ct_input = CudaLweCiphertextList::from_lwe_ciphertext(&ct.as_lwe_64(), &streams);
CudaDynLwe::U64(d_ct_input)
})
.collect();
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
atomic_pattern: params.atomic_pattern(),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
.map(|_| CudaSideResources::new(&streams, cuda_block_info))
.collect();
let (d_before_packing, _after_packing, d_after_ms) = br_dp_packing_ks_ms(
d_input_zeros,
&cuda_sks,
&d_accumulator,
dp_scalar,
&cuda_compression_key.packing_key_switching_key,
storage_modulus_log,
&mut cuda_side_resources,
);
let compression_inputs: Vec<_> = d_before_packing
.into_iter()
.map(|(_input, pbs_result, _dp_result)| {
let pbs_result_list_cpu = pbs_result.as_lwe_64().to_lwe_ciphertext_list(&streams);
let pbs_result_cpu = LweCiphertext::from_container(
pbs_result_list_cpu.clone().into_container(),
pbs_result_list_cpu.ciphertext_modulus(),
);
let cpu_ct = Ciphertext::new(
pbs_result_cpu,
Degree::new(params.message_modulus().0 - 1),
NoiseLevel::NOMINAL,
params.message_modulus(),
params.carry_modulus(),
params.atomic_pattern(),
);
let radix_ct = crate::integer::RadixCiphertext::from_blocks(vec![cpu_ct]);
let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&radix_ct, &streams);
d_ct.ciphertext
})
.collect();
let gpu_compressed =
cuda_compression_key.compress_ciphertexts_into_list(&compression_inputs, &streams);
let gpu_extracted = gpu_compressed.extract_glwe(0, &streams);
let extracted_list = gpu_extracted.to_glwe_ciphertext_list(&streams);
let extracted_glwe = GlweCiphertext::from_container(
extracted_list.clone().into_container(),
extracted_list.polynomial_size(),
extracted_list.ciphertext_modulus(),
);
let after_ms_list = d_after_ms.to_glwe_ciphertext_list(&streams);
let mut after_ms = GlweCiphertext::from_container(
after_ms_list.clone().into_container(),
after_ms_list.polynomial_size(),
after_ms_list.ciphertext_modulus(),
);
// Bodies that were not filled are discarded
after_ms.get_mut_body().as_mut()[lwe_per_glwe.0..].fill(0);
assert_eq!(after_ms.as_view(), extracted_glwe.as_view());
}
create_gpu_parameterized_test!(sanity_check_encrypt_br_dp_packing_ks_ms {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
});
#[allow(clippy::type_complexity, clippy::too_many_arguments)]
fn encrypt_br_dp_packing_ks_ms_inner_helper_gpu(
params: AtomicPatternParameters,
comp_params: CompressionParameters,
single_cks: &ClientKey,
single_cuda_sks: &CudaServerKey,
single_compression_private_key: &CompressionPrivateKeys,
single_cuda_compression_key: &CudaCompressionKey,
msg: u64,
streams: &CudaStreams,
) -> (
Vec<(
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
)>,
Vec<DecryptionAndNoiseResult>,
Vec<DecryptionAndNoiseResult>,
) {
let mut engine = ShortintEngine::new();
let thread_cks: crate::integer::ClientKey;
let thread_cuda_sks: CudaServerKey;
let thread_compression_private_key;
let thread_cuda_compression_key;
let (cks, cuda_sks, compression_private_key, cuda_compression_key) =
if should_use_single_key_debug() {
(
single_cks,
single_cuda_sks,
single_compression_private_key,
single_cuda_compression_key,
)
} else {
let block_params: ShortintParameterSet = params.into();
thread_cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key =
CompressedServerKey::new_radix_compressed_server_key(&thread_cks);
thread_cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, streams);
thread_compression_private_key = thread_cks.new_compression_private_key(comp_params);
let (compressed_compression_key, _compressed_decompression_key) = thread_cks
.new_compressed_compression_decompression_keys(&thread_compression_private_key);
thread_cuda_compression_key = compressed_compression_key.decompress_to_cuda(streams);
(
&thread_cks,
&thread_cuda_sks,
&thread_compression_private_key,
&thread_cuda_compression_key,
)
};
let br_input_modulus_log = cuda_sks.br_input_modulus_log();
let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
let input_zeros: Vec<_> = (0..lwe_per_glwe.0)
.map(|_| {
cks.key.encrypt_noiseless_pbs_input_dyn_lwe_with_engine(
br_input_modulus_log,
msg,
&mut engine,
)
})
.collect();
let d_input_zeros: Vec<_> = input_zeros
.iter()
.map(|ct| {
let d_ct_input = CudaLweCiphertextList::from_lwe_ciphertext(&ct.as_lwe_64(), streams);
CudaDynLwe::U64(d_ct_input)
})
.collect();
let id_lut = cuda_sks.generate_lookup_table(|x| x);
let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, streams);
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
atomic_pattern: params.atomic_pattern(),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
.map(|_| CudaSideResources::new(streams, cuda_block_info))
.collect();
let dp_scalar = params.carry_modulus().0;
let storage_modulus_log = cuda_compression_key.storage_log_modulus;
let (d_before_packing, d_after_packing, d_after_ms) = br_dp_packing_ks_ms(
d_input_zeros,
cuda_sks,
&d_accumulator,
dp_scalar,
&cuda_compression_key.packing_key_switching_key,
storage_modulus_log,
&mut cuda_side_resources,
);
let compute_large_lwe_secret_key = cks.key.encryption_key();
let compression_glwe_secret_key = &compression_private_key.key.post_packing_ks_key;
let compute_encoding = cuda_sks.encoding();
let compression_encoding = ShortintEncoding {
carry_modulus: CarryModulus(1),
..compute_encoding
};
let after_packing = cuda_glwe_list_to_glwe_ciphertext(&d_after_packing, streams);
let after_ms = cuda_glwe_list_to_glwe_ciphertext(&d_after_ms, streams);
(
d_before_packing
.into_iter()
.map(|(d_input, d_pbs_result, d_dp_result)| {
let input = d_input.as_ct_64_cpu(streams);
let pbs_result = d_pbs_result.as_ct_64_cpu(streams);
let dp_result = d_dp_result.as_ct_64_cpu(streams);
(
match &cks.key.atomic_pattern {
AtomicPatternClientKey::Standard(standard_atomic_pattern_client_key) => {
DecryptionAndNoiseResult::new_from_lwe(
&input,
&standard_atomic_pattern_client_key.lwe_secret_key,
msg,
&compute_encoding,
)
}
AtomicPatternClientKey::KeySwitch32(_ks32_atomic_pattern_client_key) => {
panic!("KS32 Atomic Pattern not supported on GPU tests yet");
}
},
DecryptionAndNoiseResult::new_from_lwe(
&pbs_result,
&compute_large_lwe_secret_key,
msg,
&compute_encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&dp_result,
&compute_large_lwe_secret_key,
msg,
&compression_encoding,
),
)
})
.collect(),
DecryptionAndNoiseResult::new_from_glwe(
&after_packing,
compression_glwe_secret_key,
compression_private_key.key.params.lwe_per_glwe(),
msg,
&compression_encoding,
),
DecryptionAndNoiseResult::new_from_glwe(
&after_ms,
compression_glwe_secret_key,
compression_private_key.key.params.lwe_per_glwe(),
msg,
&compression_encoding,
),
)
}
#[allow(clippy::type_complexity, clippy::too_many_arguments)]
fn encrypt_br_dp_packing_ks_ms_noise_helper_gpu(
params: AtomicPatternParameters,
comp_params: CompressionParameters,
single_cks: &ClientKey,
single_cuda_sks: &CudaServerKey,
single_compression_private_key: &CompressionPrivateKeys,
single_cuda_compression_key: &CudaCompressionKey,
msg: u64,
streams: &CudaStreams,
) -> (
Vec<(NoiseSample, NoiseSample, NoiseSample)>,
Vec<NoiseSample>,
Vec<NoiseSample>,
) {
let (before_packing, after_packing, after_ms) = encrypt_br_dp_packing_ks_ms_inner_helper_gpu(
params,
comp_params,
single_cks,
single_cuda_sks,
single_compression_private_key,
single_cuda_compression_key,
msg,
streams,
);
(
before_packing
.into_iter()
.map(|(input, after_pbs, after_dp)| {
(
input
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_pbs
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_dp
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
)
})
.collect(),
after_packing
.into_iter()
.map(|x| {
x.get_noise_if_decryption_was_correct()
.expect("Decryption Failed")
})
.collect(),
after_ms
.into_iter()
.map(|x| {
x.get_noise_if_decryption_was_correct()
.expect("Decryption Failed")
})
.collect(),
)
}
#[allow(clippy::type_complexity, clippy::too_many_arguments)]
fn encrypt_br_dp_packing_ks_ms_pfail_helper_gpu(
params: AtomicPatternParameters,
comp_params: CompressionParameters,
single_cks: &ClientKey,
single_cuda_sks: &CudaServerKey,
single_compression_private_key: &CompressionPrivateKeys,
single_cuda_compression_key: &CudaCompressionKey,
msg: u64,
streams: &CudaStreams,
) -> Vec<DecryptionAndNoiseResult> {
let (_before_packing, _after_packing, after_ms) = encrypt_br_dp_packing_ks_ms_inner_helper_gpu(
params,
comp_params,
single_cks,
single_cuda_sks,
single_compression_private_key,
single_cuda_compression_key,
msg,
streams,
);
after_ms
}
fn noise_check_encrypt_br_dp_packing_ks_ms_noise_gpu(meta_params: MetaParameters) {
let (params, comp_params) = (
meta_params.compute_parameters,
meta_params.compression_parameters.unwrap(),
);
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let block_params: ShortintParameterSet = params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let private_compression_key = cks.new_compression_private_key(comp_params);
let (compressed_compression_key, _compressed_decompression_key) =
cks.new_compressed_compression_decompression_keys(&private_compression_key);
let compression_key = compressed_compression_key.decompress();
let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&streams);
let noise_simulation_bsk =
NoiseSimulationLweFourierBsk::new_from_atomic_pattern_parameters(params);
let noise_simulation_packing_key =
NoiseSimulationLwePackingKeyswitchKey::new_from_comp_parameters(params, comp_params);
assert!(noise_simulation_bsk.matches_actual_bsk_gpu(&cuda_sks.bootstrapping_key));
assert!(noise_simulation_packing_key.matches_actual_shortint_comp_key(&compression_key.key));
// The multiplication done in the compression is made to move the message up at the top of the
// carry space, multiplying by the carry modulus achieves that
let dp_scalar = params.carry_modulus().0;
let noise_simulation_accumulator = NoiseSimulationGlwe::new(
noise_simulation_bsk.output_glwe_size().to_glwe_dimension(),
noise_simulation_bsk.output_polynomial_size(),
Variance(0.0),
noise_simulation_bsk.modulus(),
);
let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
let storage_modulus_log = cuda_compression_key.storage_log_modulus;
let br_input_modulus_log = cuda_sks.br_input_modulus_log();
let (_before_packing_sim, _after_packing_sim, after_ms_sim) = {
let noise_simulation = NoiseSimulationLwe::new(
cks.parameters().lwe_dimension(),
Variance(0.0),
NoiseSimulationModulus::from_ciphertext_modulus(cks.parameters().ciphertext_modulus()),
);
br_dp_packing_ks_ms(
vec![noise_simulation; lwe_per_glwe.0],
&noise_simulation_bsk,
&noise_simulation_accumulator,
dp_scalar,
&noise_simulation_packing_key,
storage_modulus_log,
&mut vec![(); lwe_per_glwe.0],
)
};
let input_zeros: Vec<_> = (0..lwe_per_glwe.0)
.map(|_| {
cks.key
.encrypt_noiseless_pbs_input_dyn_lwe(br_input_modulus_log, 0)
})
.collect();
let d_input_zeros: Vec<_> = input_zeros
.iter()
.map(|ct| {
let d_ct_input = CudaLweCiphertextList::from_lwe_ciphertext(&ct.as_lwe_64(), &streams);
CudaDynLwe::U64(d_ct_input)
})
.collect();
let id_lut = cuda_sks.generate_lookup_table(|x| x);
let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, &streams);
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
atomic_pattern: params.atomic_pattern(),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
.map(|_| CudaSideResources::new(&streams, cuda_block_info))
.collect();
// Check that the circuit is correct with respect to core implementation, i.e. does not crash on
// dimension checks
let (expected_glwe_size_out, expected_polynomial_size_out, expected_modulus_f64_out) = {
let (_before_packing_sim, _after_packing, after_ms) = br_dp_packing_ks_ms(
d_input_zeros,
&cuda_sks,
&d_accumulator,
dp_scalar,
&cuda_compression_key.packing_key_switching_key,
storage_modulus_log,
&mut cuda_side_resources,
);
(
after_ms.glwe_dimension().to_glwe_size(),
after_ms.polynomial_size(),
after_ms.ciphertext_modulus().raw_modulus_float(),
)
};
assert_eq!(after_ms_sim.glwe_size(), expected_glwe_size_out);
assert_eq!(after_ms_sim.polynomial_size(), expected_polynomial_size_out);
assert_eq!(after_ms_sim.modulus().as_f64(), expected_modulus_f64_out);
let cleartext_modulus = params.message_modulus().0 * params.carry_modulus().0;
let mut noise_samples_before_ms = vec![];
let mut noise_samples_after_ms = vec![];
let chunk_size = 8;
let vec_local_streams = (0..chunk_size)
.map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
.collect::<Vec<_>>();
for _ in 0..cleartext_modulus {
let (current_noise_samples_before_ms, current_noise_samples_after_ms): (Vec<_>, Vec<_>) =
(0..SAMPLES_PER_MSG_PACKING_KS_NOISE)
.collect::<Vec<_>>()
.chunks(chunk_size)
.flat_map(|chunk| {
chunk
.into_par_iter()
.map(|i| {
let local_stream = &vec_local_streams[*i % chunk_size];
let (_before_packing, after_packing, after_ms) =
encrypt_br_dp_packing_ks_ms_noise_helper_gpu(
params,
comp_params,
&cks,
&cuda_sks,
&private_compression_key,
&cuda_compression_key,
0,
local_stream,
);
(after_packing, after_ms)
})
.collect::<Vec<_>>()
})
.unzip();
noise_samples_before_ms.extend(current_noise_samples_before_ms);
noise_samples_after_ms.extend(current_noise_samples_after_ms);
}
let noise_samples_before_ms_flattened: Vec<_> = noise_samples_before_ms
.into_iter()
.flatten()
.map(|x| x.value)
.collect();
let noise_samples_after_ms_flattened: Vec<_> = noise_samples_after_ms
.into_iter()
.flatten()
.map(|x| x.value)
.collect();
let before_ms_normality =
normality_check(&noise_samples_before_ms_flattened, "before ms", 0.01);
let after_ms_is_ok = mean_and_variance_check(
&noise_samples_after_ms_flattened,
"after_ms",
0.0,
after_ms_sim.variance_per_occupied_slot(),
comp_params.packing_ks_key_noise_distribution(),
after_ms_sim
.glwe_dimension()
.to_equivalent_lwe_dimension(after_ms_sim.polynomial_size()),
after_ms_sim.modulus().as_f64(),
);
assert!(before_ms_normality.null_hypothesis_is_valid && after_ms_is_ok);
}
create_gpu_parameterized_test!(noise_check_encrypt_br_dp_packing_ks_ms_noise_gpu {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
});
fn noise_check_encrypt_br_dp_packing_ks_ms_pfail_gpu(meta_params: MetaParameters) {
let (pfail_test_meta, params, comp_params) = {
let (mut params, comp_params) = (
meta_params.compute_parameters,
meta_params.compression_parameters.unwrap(),
);
let original_message_modulus = params.message_modulus();
let original_carry_modulus = params.carry_modulus();
// For now only allow 2_2 parameters, and see later for heuristics to use
assert_eq!(original_message_modulus.0, 4);
assert_eq!(original_carry_modulus.0, 4);
let noise_simulation_bsk =
NoiseSimulationLweFourierBsk::new_from_atomic_pattern_parameters(params);
let noise_simulation_packing_key =
NoiseSimulationLwePackingKeyswitchKey::new_from_comp_parameters(params, comp_params);
// The multiplication done in the compression is made to move the message up at the top of
// the carry space, multiplying by the carry modulus achieves that
let dp_scalar = params.carry_modulus().0;
let noise_simulation_accumulator = NoiseSimulationGlwe::new(
noise_simulation_bsk.output_glwe_size().to_glwe_dimension(),
noise_simulation_bsk.output_polynomial_size(),
Variance(0.0),
noise_simulation_bsk.modulus(),
);
let lwe_per_glwe = comp_params.lwe_per_glwe();
let storage_modulus_log = comp_params.storage_log_modulus();
let (_before_packing_sim, _after_packing_sim, after_ms_sim) = {
let noise_simulation = NoiseSimulationLwe::new(
params.lwe_dimension(),
Variance(0.0),
NoiseSimulationModulus::from_ciphertext_modulus(params.ciphertext_modulus()),
);
br_dp_packing_ks_ms(
vec![noise_simulation; lwe_per_glwe.0],
&noise_simulation_bsk,
&noise_simulation_accumulator,
dp_scalar,
&noise_simulation_packing_key,
storage_modulus_log,
&mut vec![(); lwe_per_glwe.0],
)
};
let expected_variance_after_storage = after_ms_sim.variance_per_occupied_slot();
let compression_carry_mod = CarryModulus(1);
let compression_message_mod = original_message_modulus;
let compression_precision_with_padding =
precision_with_padding(compression_message_mod, compression_carry_mod);
let expected_pfail_for_storage = expected_pfail_for_precision(
compression_precision_with_padding,
expected_variance_after_storage,
);
let original_pfail_and_precision = PfailAndPrecision::new(
expected_pfail_for_storage,
compression_message_mod,
compression_carry_mod,
);
// Here we update the message modulus only:
// - because the message modulus matches for the compression encoding and compute encoding
// - so that the carry modulus stays the same and we apply the same dot product as normal
// for 2_2
// - so that the effective encoding after the storage is the one we used to evaluate the
// pfail
let updated_message_mod = MessageModulus(1 << 6);
let updated_carry_mod = compression_carry_mod;
update_ap_params_msg_and_carry_moduli(&mut params, updated_message_mod, updated_carry_mod);
assert!(
(params.message_modulus().0 * params.carry_modulus().0).ilog2()
<= comp_params.storage_log_modulus().0 as u32,
"Compression storage modulus cannot store enough bits for pfail estimation"
);
let updated_precision_with_padding =
precision_with_padding(updated_message_mod, updated_carry_mod);
let new_expected_pfail_for_storage = expected_pfail_for_precision(
updated_precision_with_padding,
expected_variance_after_storage,
);
let new_expected_pfail_and_precision = PfailAndPrecision::new(
new_expected_pfail_for_storage,
updated_message_mod,
updated_carry_mod,
);
let pfail_test_meta = if should_run_short_pfail_tests_debug() {
// To have the same amount of keys generated as the case where a single run is a single
// sample
let expected_fails = 200 * lwe_per_glwe.0 as u32;
PfailTestMeta::new_with_desired_expected_fails(
original_pfail_and_precision,
new_expected_pfail_and_precision,
expected_fails,
)
} else {
// To guarantee 1_000_000 keysets are generated
let total_runs = 1_000_000 * lwe_per_glwe.0 as u32;
PfailTestMeta::new_with_total_runs(
original_pfail_and_precision,
new_expected_pfail_and_precision,
total_runs,
)
};
(pfail_test_meta, params, comp_params)
};
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let block_params: ShortintParameterSet = params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let private_compression_key = cks.new_compression_private_key(comp_params);
let (compressed_compression_key, _compressed_decompression_key) =
cks.new_compressed_compression_decompression_keys(&private_compression_key);
let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&streams);
let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
let total_runs_for_expected_fails = pfail_test_meta
.total_runs_for_expected_fails()
.div_ceil(lwe_per_glwe.0.try_into().unwrap());
let chunk_size = 8;
let vec_local_streams = (0..chunk_size)
.map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
.collect::<Vec<_>>();
let measured_fails: f64 = (0..total_runs_for_expected_fails)
.collect::<Vec<_>>()
.chunks(chunk_size)
.flat_map(|chunk| {
chunk
.into_par_iter()
.map(|i| {
let local_streams = &vec_local_streams[*i as usize % chunk_size];
let after_ms_decryption_result = encrypt_br_dp_packing_ks_ms_pfail_helper_gpu(
params,
comp_params,
&cks,
&cuda_sks,
&private_compression_key,
&cuda_compression_key,
0,
local_streams,
);
after_ms_decryption_result
.into_iter()
.map(|result| result.failure_as_f64())
.sum::<f64>()
})
.collect::<Vec<_>>()
})
.sum();
let test_result = PfailTestResult { measured_fails };
pfail_check(&pfail_test_meta, test_result);
}
create_gpu_parameterized_test!(noise_check_encrypt_br_dp_packing_ks_ms_pfail_gpu {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
});

View File

@@ -0,0 +1,869 @@
use super::utils::noise_simulation::{CudaDynLwe, CudaSideResources};
use crate::core_crypto::commons::noise_formulas::noise_simulation::{
NoiseSimulationLweFourier128Bsk, NoiseSimulationLwePackingKeyswitchKey,
};
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::{GlweCiphertext, LweCiphertextCount};
use crate::integer::gpu::CudaServerKey;
use crate::integer::noise_squashing::NoiseSquashingPrivateKey;
use crate::integer::CompressedServerKey;
use crate::core_crypto::commons::parameters::CiphertextModulusLog;
use crate::core_crypto::prelude::generate_programmable_bootstrap_glwe_lut;
use crate::integer::ciphertext::NoiseSquashingCompressionPrivateKey;
use crate::integer::gpu::list_compression::server_keys::CudaNoiseSquashingCompressionKey;
use crate::integer::gpu::server_key::radix::tests_unsigned::create_gpu_parameterized_test;
use crate::integer::gpu::server_key::radix::{CudaNoiseSquashingKey, CudaUnsignedRadixCiphertext};
use crate::integer::gpu::unchecked_small_scalar_mul_integer_async;
use crate::integer::IntegerCiphertext;
use crate::shortint::client_key::atomic_pattern::AtomicPatternClientKey;
use crate::shortint::parameters::noise_squashing::NoiseSquashingParameters;
use crate::shortint::parameters::test_params::TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128;
use crate::shortint::parameters::{
AtomicPatternParameters, MetaParameters, NoiseSquashingCompressionParameters, Variance,
};
use crate::shortint::server_key::tests::noise_distribution::dp_ks_pbs128_packingks::{
dp_ks_any_ms_standard_pbs128, dp_ks_any_ms_standard_pbs128_packing_ks,
};
use crate::shortint::server_key::tests::noise_distribution::should_use_single_key_debug;
use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::{
NoiseSimulationGlwe, NoiseSimulationLwe, NoiseSimulationLweFourierBsk,
NoiseSimulationLweKeyswitchKey, NoiseSimulationModulusSwitchConfig,
};
use crate::shortint::server_key::tests::noise_distribution::utils::{
mean_and_variance_check, DecryptionAndNoiseResult, NoiseSample,
};
use crate::shortint::{PaddingBit, ShortintEncoding, ShortintParameterSet};
use crate::GpuIndex;
use rayon::prelude::*;
/// Test function to verify that the noise checking tools match the actual atomic patterns
/// implemented in shortint for GPU
fn sanity_check_encrypt_dp_ks_standard_pbs128_packing_ks_gpu(meta_params: MetaParameters) {
let (atomic_params, noise_squashing_params, noise_squashing_compression_params) = {
let meta_noise_squashing_params = meta_params.noise_squashing_parameters.unwrap();
(
meta_params.compute_parameters,
meta_noise_squashing_params.parameters,
meta_noise_squashing_params.compression_parameters.unwrap(),
)
};
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let block_params: ShortintParameterSet = atomic_params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
let compressed_noise_squashing_compression_key =
cks.new_compressed_noise_squashing_key(&noise_squashing_private_key);
let noise_squashing_key = compressed_noise_squashing_compression_key.decompress();
let cuda_noise_squashing_key =
compressed_noise_squashing_compression_key.decompress_to_cuda(&streams);
let noise_squashing_compression_private_key =
NoiseSquashingCompressionPrivateKey::new(noise_squashing_compression_params);
let noise_squashing_compression_key = noise_squashing_private_key
.new_noise_squashing_compression_key(&noise_squashing_compression_private_key);
let cuda_noise_squashing_compression_key =
CudaNoiseSquashingCompressionKey::from_noise_squashing_compression_key(
&noise_squashing_compression_key,
&streams,
);
let lwe_per_glwe = cuda_noise_squashing_compression_key.lwe_per_glwe;
let modulus_switch_config = cuda_noise_squashing_key.noise_simulation_modulus_switch_config();
let br_input_modulus_log = noise_squashing_key.key.br_input_modulus_log();
let u128_encoding = ShortintEncoding {
ciphertext_modulus: noise_squashing_params.ciphertext_modulus(),
message_modulus: noise_squashing_params.message_modulus(),
carry_modulus: noise_squashing_params.carry_modulus(),
padding_bit: PaddingBit::Yes,
};
let max_scalar_mul = cuda_sks.max_noise_level.get();
let id_lut_cpu = generate_programmable_bootstrap_glwe_lut(
noise_squashing_key.key.polynomial_size(),
noise_squashing_key.key.glwe_size(),
u128_encoding
.cleartext_space_without_padding()
.try_into()
.unwrap(),
u128_encoding.ciphertext_modulus,
u128_encoding.delta(),
|x| x,
);
let id_lut_gpu = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut_cpu, &streams);
let input_zeros: Vec<_> = (0..lwe_per_glwe.0).map(|_| cks.key.encrypt(0)).collect();
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(atomic_params.message_modulus().0 - 1),
message_modulus: atomic_params.message_modulus(),
carry_modulus: atomic_params.carry_modulus(),
atomic_pattern: atomic_params.atomic_pattern(),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
.map(|_| CudaSideResources::new(&streams, cuda_block_info))
.collect();
let input_zero_as_lwe: Vec<_> = input_zeros
.iter()
.map(|ct| {
let d_ct_input = CudaUnsignedRadixCiphertext::from_radix_ciphertext(
&crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
&streams,
);
CudaDynLwe::U64(d_ct_input.ciphertext.d_blocks)
})
.collect();
let (_before_packing, d_after_packing) = dp_ks_any_ms_standard_pbs128_packing_ks(
input_zero_as_lwe,
max_scalar_mul,
&cuda_sks,
modulus_switch_config,
&cuda_noise_squashing_key,
br_input_modulus_log,
&id_lut_gpu,
&cuda_noise_squashing_compression_key.packing_key_switching_key,
&mut cuda_side_resources,
);
let cuda_noise_squashed_cts: Vec<_> = input_zeros
.into_par_iter()
.map(|ct| {
let cloned_ct = ct;
let radix_ct = crate::integer::RadixCiphertext::from_blocks(vec![cloned_ct]);
let mut d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&radix_ct, &streams);
unsafe {
unchecked_small_scalar_mul_integer_async(
&streams,
&mut d_ct.ciphertext,
max_scalar_mul,
atomic_params.message_modulus(),
atomic_params.carry_modulus(),
);
}
streams.synchronize();
cuda_noise_squashing_key.unchecked_squash_ciphertext_noise(
&d_ct.ciphertext,
&cuda_sks,
&streams,
)
})
.collect();
let gpu_compressed = cuda_noise_squashing_compression_key
.compress_noise_squashed_ciphertexts_into_list(&cuda_noise_squashed_cts, &streams);
let gpu_extracted = gpu_compressed.extract_glwe(0, &streams);
let extracted_list = gpu_extracted.to_glwe_ciphertext_list(&streams);
let extracted_glwe = GlweCiphertext::from_container(
extracted_list.clone().into_container(),
extracted_list.polynomial_size(),
extracted_list.ciphertext_modulus(),
);
let after_packing_list = d_after_packing.to_glwe_ciphertext_list(&streams);
let mut after_packing = GlweCiphertext::from_container(
after_packing_list.clone().into_container(),
after_packing_list.polynomial_size(),
after_packing_list.ciphertext_modulus(),
);
// Bodies that were not filled are discarded
after_packing.get_mut_body().as_mut()[lwe_per_glwe.0..].fill(0);
assert_eq!(after_packing.as_view(), extracted_glwe.as_view());
}
/// Test function to verify that the noise checking tools match the actual atomic patterns
/// implemented in shortint for GPU
fn sanity_check_encrypt_dp_ks_standard_pbs128_gpu(meta_params: MetaParameters) {
let (params, noise_squashing_params) = {
let meta_noise_squashing_params = meta_params.noise_squashing_parameters.unwrap();
(
meta_params.compute_parameters,
meta_noise_squashing_params.parameters,
)
};
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let block_params: ShortintParameterSet = params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
let compressed_noise_squashing_compression_key =
cks.new_compressed_noise_squashing_key(&noise_squashing_private_key);
let noise_squashing_key = compressed_noise_squashing_compression_key.decompress();
let cuda_noise_squashing_key =
compressed_noise_squashing_compression_key.decompress_to_cuda(&streams);
let modulus_switch_config = cuda_noise_squashing_key.noise_simulation_modulus_switch_config();
let br_input_modulus_log = noise_squashing_key.key.br_input_modulus_log();
let u128_encoding = ShortintEncoding {
ciphertext_modulus: noise_squashing_params.ciphertext_modulus(),
message_modulus: noise_squashing_params.message_modulus(),
carry_modulus: noise_squashing_params.carry_modulus(),
padding_bit: PaddingBit::Yes,
};
let max_scalar_mul = cuda_sks.max_noise_level.get();
let id_lut_cpu = generate_programmable_bootstrap_glwe_lut(
noise_squashing_key.key.polynomial_size(),
noise_squashing_key.key.glwe_size(),
u128_encoding
.cleartext_space_without_padding()
.try_into()
.unwrap(),
u128_encoding.ciphertext_modulus,
u128_encoding.delta(),
|x| x,
);
let id_lut_gpu = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut_cpu, &streams);
let lwe_per_glwe = LweCiphertextCount(128);
let input_zeros: Vec<_> = (0..lwe_per_glwe.0).map(|_| cks.key.encrypt(0)).collect();
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
atomic_pattern: params.atomic_pattern(),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
.map(|_| CudaSideResources::new(&streams, cuda_block_info))
.collect();
let input_zero_as_lwe: Vec<_> = input_zeros
.iter()
.map(|ct| {
let d_ct_input = CudaUnsignedRadixCiphertext::from_radix_ciphertext(
&crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
&streams,
);
CudaDynLwe::U64(d_ct_input.ciphertext.d_blocks)
})
.collect();
let res: Vec<_> = input_zero_as_lwe
.into_par_iter()
.zip(cuda_side_resources.par_iter_mut())
.map(|(input, side_resources)| {
let (input, after_dp, ks_result, drift_technique_result, ms_result, pbs_result) =
dp_ks_any_ms_standard_pbs128(
input,
max_scalar_mul,
&cuda_sks,
modulus_switch_config,
&cuda_noise_squashing_key,
br_input_modulus_log,
&id_lut_gpu,
side_resources,
);
(
input,
after_dp,
ks_result,
drift_technique_result,
ms_result,
pbs_result,
)
})
.collect();
let input_zeros_non_pattern: Vec<_> = input_zeros
.iter()
.map(|ct| {
CudaUnsignedRadixCiphertext::from_radix_ciphertext(
&crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
&streams,
)
})
.collect();
let vector_non_pattern: Vec<_> = input_zeros_non_pattern
.into_par_iter()
.map(|mut d_ct_input2| {
unsafe {
unchecked_small_scalar_mul_integer_async(
&streams,
&mut d_ct_input2.ciphertext,
max_scalar_mul,
params.message_modulus(),
params.carry_modulus(),
);
}
streams.synchronize();
cuda_noise_squashing_key
.squash_radix_ciphertext_noise(&cuda_sks, &d_ct_input2.ciphertext, &streams)
.unwrap()
})
.collect();
let vector_pattern_cpu: Vec<_> = res
.into_iter()
.map(
|(_input, _after_dp, _ks_result, _drift_technique_result, _ms_result, pbs_result)| {
pbs_result.as_ct_128_cpu(&streams)
},
)
.collect();
let vector_non_pattern_cpu: Vec<_> = vector_non_pattern
.into_par_iter()
.map(|cuda_squashed_radix_ct| {
let squashed_noise_ct_cpu =
cuda_squashed_radix_ct.to_squashed_noise_radix_ciphertext(&streams);
squashed_noise_ct_cpu.packed_blocks()[0]
.lwe_ciphertext()
.clone()
})
.collect();
// Compare that all the results are equivalent
assert_eq!(vector_pattern_cpu, vector_non_pattern_cpu);
}
#[allow(clippy::too_many_arguments)]
#[allow(clippy::type_complexity)]
fn encrypt_dp_ks_standard_pbs128_packing_ks_inner_helper_gpu(
params: AtomicPatternParameters,
noise_squashing_params: NoiseSquashingParameters,
noise_squashing_compression_params: NoiseSquashingCompressionParameters,
single_cks: &crate::integer::ClientKey,
single_cuda_sks: &CudaServerKey,
single_noise_squashing_private_key: &NoiseSquashingPrivateKey,
single_noise_squashing_key: &crate::integer::noise_squashing::NoiseSquashingKey,
single_cuda_noise_squashing_key: &CudaNoiseSquashingKey,
single_noise_squashing_compression_private_key: &NoiseSquashingCompressionPrivateKey,
single_cuda_noise_squashing_compression_key: &CudaNoiseSquashingCompressionKey,
msg: u64,
scalar_for_multiplication: u64,
br_input_modulus_log: CiphertextModulusLog,
streams: &CudaStreams,
) -> (
Vec<(
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
)>,
Vec<DecryptionAndNoiseResult>,
) {
let thread_cks: crate::integer::ClientKey;
let thread_cuda_sks: CudaServerKey;
let thread_noise_squashing_private_key: NoiseSquashingPrivateKey;
let thread_noise_squashing_key: crate::integer::noise_squashing::NoiseSquashingKey;
let thread_cuda_noise_squashing_key: CudaNoiseSquashingKey;
let thread_noise_squashing_compression_private_key: NoiseSquashingCompressionPrivateKey;
let thread_cuda_noise_squashing_compression_key: CudaNoiseSquashingCompressionKey;
let (
cks,
cuda_sks,
noise_squashing_private_key,
noise_squashing_key,
cuda_noise_squashing_key,
noise_squashing_compression_private_key,
cuda_noise_squashing_compression_key,
) = if should_use_single_key_debug() {
(
single_cks,
single_cuda_sks,
single_noise_squashing_private_key,
single_noise_squashing_key,
single_cuda_noise_squashing_key,
single_noise_squashing_compression_private_key,
single_cuda_noise_squashing_compression_key,
)
} else {
let block_params: ShortintParameterSet = params.into();
thread_cks = crate::integer::ClientKey::new(block_params);
let thread_compressed_server_key =
CompressedServerKey::new_radix_compressed_server_key(&thread_cks);
thread_cuda_sks =
CudaServerKey::decompress_from_cpu(&thread_compressed_server_key, streams);
thread_noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
let thread_compressed_noise_squashing_compression_key =
thread_cks.new_compressed_noise_squashing_key(&thread_noise_squashing_private_key);
thread_noise_squashing_key = thread_compressed_noise_squashing_compression_key.decompress();
thread_cuda_noise_squashing_key =
thread_compressed_noise_squashing_compression_key.decompress_to_cuda(streams);
thread_noise_squashing_compression_private_key =
NoiseSquashingCompressionPrivateKey::new(noise_squashing_compression_params);
let thread_noise_squashing_compression_key = thread_noise_squashing_private_key
.new_noise_squashing_compression_key(&thread_noise_squashing_compression_private_key);
thread_cuda_noise_squashing_compression_key =
CudaNoiseSquashingCompressionKey::from_noise_squashing_compression_key(
&thread_noise_squashing_compression_key,
streams,
);
(
&thread_cks,
&thread_cuda_sks,
&thread_noise_squashing_private_key,
&thread_noise_squashing_key,
&thread_cuda_noise_squashing_key,
&thread_noise_squashing_compression_private_key,
&thread_cuda_noise_squashing_compression_key,
)
};
let modulus_switch_config = cuda_noise_squashing_key.noise_simulation_modulus_switch_config();
let bsk_polynomial_size = noise_squashing_key.key.polynomial_size();
let bsk_glwe_size = noise_squashing_key.key.glwe_size();
let u128_encoding = ShortintEncoding {
ciphertext_modulus: noise_squashing_params.ciphertext_modulus(),
message_modulus: noise_squashing_params.message_modulus(),
carry_modulus: noise_squashing_params.carry_modulus(),
padding_bit: PaddingBit::Yes,
};
let id_lut_cpu = generate_programmable_bootstrap_glwe_lut(
bsk_polynomial_size,
bsk_glwe_size,
u128_encoding
.cleartext_space_without_padding()
.try_into()
.unwrap(),
u128_encoding.ciphertext_modulus,
u128_encoding.delta(),
|x| x,
);
let id_lut_gpu = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut_cpu, streams);
let lwe_per_glwe = cuda_noise_squashing_compression_key.lwe_per_glwe;
let input_zeros: Vec<_> = (0..lwe_per_glwe.0).map(|_| cks.key.encrypt(msg)).collect();
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
atomic_pattern: params.atomic_pattern(),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
.map(|_| CudaSideResources::new(streams, cuda_block_info))
.collect();
let input_zero_as_lwe: Vec<_> = input_zeros
.iter()
.map(|ct| {
let d_ct_input = CudaUnsignedRadixCiphertext::from_radix_ciphertext(
&crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
streams,
);
CudaDynLwe::U64(d_ct_input.ciphertext.d_blocks)
})
.collect();
let (before_packing_gpu, after_packing_gpu) = dp_ks_any_ms_standard_pbs128_packing_ks(
input_zero_as_lwe,
scalar_for_multiplication,
cuda_sks,
modulus_switch_config,
cuda_noise_squashing_key,
br_input_modulus_log,
&id_lut_gpu,
&cuda_noise_squashing_compression_key.packing_key_switching_key,
&mut cuda_side_resources,
);
let before_packing: Vec<_> = before_packing_gpu
.into_iter()
.map(
|(
input_gpu,
after_dp_gpu,
after_ks_gpu,
after_drift_gpu,
after_ms_gpu,
after_pbs128_gpu,
)| {
match &cks.key.atomic_pattern {
AtomicPatternClientKey::Standard(standard_atomic_pattern_client_key) => {
let params = standard_atomic_pattern_client_key.parameters;
let u64_encoding = ShortintEncoding {
ciphertext_modulus: params.ciphertext_modulus(),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
padding_bit: PaddingBit::Yes,
};
let large_lwe_secret_key =
standard_atomic_pattern_client_key.large_lwe_secret_key();
let small_lwe_secret_key =
standard_atomic_pattern_client_key.small_lwe_secret_key();
let input_ct = input_gpu.as_ct_64_cpu(streams);
let after_dp_ct = after_dp_gpu.as_ct_64_cpu(streams);
let after_ks_ct = after_ks_gpu.as_ct_64_cpu(streams);
let before_ms_gpu: &CudaDynLwe =
after_drift_gpu.as_ref().unwrap_or(&after_ks_gpu);
let before_ms_ct = before_ms_gpu.as_ct_64_cpu(streams);
let after_ms_ct = after_ms_gpu.as_ct_64_cpu(streams);
let after_pbs128_ct = after_pbs128_gpu.as_ct_128_cpu(streams);
(
DecryptionAndNoiseResult::new_from_lwe(
&input_ct,
&large_lwe_secret_key,
msg,
&u64_encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&after_dp_ct,
&large_lwe_secret_key,
msg,
&u64_encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&after_ks_ct,
&small_lwe_secret_key,
msg,
&u64_encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&before_ms_ct,
&small_lwe_secret_key,
msg,
&u64_encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&after_ms_ct,
&small_lwe_secret_key,
msg,
&u64_encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&after_pbs128_ct,
&noise_squashing_private_key
.key
.post_noise_squashing_lwe_secret_key(),
msg.into(),
&u128_encoding,
),
)
}
AtomicPatternClientKey::KeySwitch32(_ks32_atomic_pattern_client_key) => {
panic!("KS32 atomic pattern not supported for GPU yet");
}
}
},
)
.collect();
let after_packing_list = after_packing_gpu.to_glwe_ciphertext_list(streams);
let after_packing = GlweCiphertext::from_container(
after_packing_list.clone().into_container(),
after_packing_list.polynomial_size(),
after_packing_list.ciphertext_modulus(),
);
let after_packing = DecryptionAndNoiseResult::new_from_glwe(
&after_packing,
noise_squashing_compression_private_key
.key
.post_packing_ks_key(),
lwe_per_glwe,
msg.into(),
&u128_encoding,
);
assert_eq!(after_packing.len(), lwe_per_glwe.0);
(before_packing, after_packing)
}
#[allow(clippy::too_many_arguments)]
#[allow(clippy::type_complexity)]
fn encrypt_dp_ks_standard_pbs128_packing_ks_noise_helper_gpu(
params: AtomicPatternParameters,
noise_squashing_params: NoiseSquashingParameters,
noise_squashing_compression_params: NoiseSquashingCompressionParameters,
single_cks: &crate::integer::ClientKey,
single_cuda_sks: &CudaServerKey,
single_noise_squashing_private_key: &NoiseSquashingPrivateKey,
single_noise_squashing_key: &crate::integer::noise_squashing::NoiseSquashingKey,
single_cuda_noise_squashing_key: &CudaNoiseSquashingKey,
single_noise_squashing_compression_private_key: &NoiseSquashingCompressionPrivateKey,
single_cuda_noise_squashing_compression_key: &CudaNoiseSquashingCompressionKey,
msg: u64,
scalar_for_multiplication: u64,
br_input_modulus_log: CiphertextModulusLog,
streams: &CudaStreams,
) -> (
Vec<(
NoiseSample,
NoiseSample,
NoiseSample,
NoiseSample,
NoiseSample,
NoiseSample,
)>,
Vec<NoiseSample>,
) {
let (before_compression, after_compression) =
encrypt_dp_ks_standard_pbs128_packing_ks_inner_helper_gpu(
params,
noise_squashing_params,
noise_squashing_compression_params,
single_cks,
single_cuda_sks,
single_noise_squashing_private_key,
single_noise_squashing_key,
single_cuda_noise_squashing_key,
single_noise_squashing_compression_private_key,
single_cuda_noise_squashing_compression_key,
msg,
scalar_for_multiplication,
br_input_modulus_log,
streams,
);
(
before_compression
.into_iter()
.map(
|(input, after_dp, after_ks, after_drift, after_ms, after_pbs)| {
(
input
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_dp
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_ks
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_drift
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_ms
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_pbs
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
)
},
)
.collect(),
after_compression
.into_iter()
.map(|after_compression| {
after_compression
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed")
})
.collect(),
)
}
fn noise_check_encrypt_dp_ks_standard_pbs128_packing_ks_noise_gpu(meta_params: MetaParameters) {
let (atomic_params, noise_squashing_params, noise_squashing_compression_params) = {
let meta_noise_squashing_params = meta_params.noise_squashing_parameters.unwrap();
(
meta_params.compute_parameters,
meta_noise_squashing_params.parameters,
meta_noise_squashing_params.compression_parameters.unwrap(),
)
};
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let block_params: ShortintParameterSet = atomic_params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
let compressed_noise_squashing_compression_key =
cks.new_compressed_noise_squashing_key(&noise_squashing_private_key);
let noise_squashing_key = compressed_noise_squashing_compression_key.decompress();
let cuda_noise_squashing_key =
compressed_noise_squashing_compression_key.decompress_to_cuda(&streams);
let noise_squashing_compression_private_key =
NoiseSquashingCompressionPrivateKey::new(noise_squashing_compression_params);
let noise_squashing_compression_key = noise_squashing_private_key
.new_noise_squashing_compression_key(&noise_squashing_compression_private_key);
let cuda_noise_squashing_compression_key =
CudaNoiseSquashingCompressionKey::from_noise_squashing_compression_key(
&noise_squashing_compression_key,
&streams,
);
let noise_simulation_ksk =
NoiseSimulationLweKeyswitchKey::new_from_atomic_pattern_parameters(atomic_params);
let noise_simulation_bsk =
NoiseSimulationLweFourierBsk::new_from_atomic_pattern_parameters(atomic_params);
let noise_simulation_modulus_switch_config =
NoiseSimulationModulusSwitchConfig::new_from_atomic_pattern_parameters(atomic_params);
let noise_simulation_bsk128 =
NoiseSimulationLweFourier128Bsk::new_from_parameters(atomic_params, noise_squashing_params);
let noise_simulation_packing_key =
NoiseSimulationLwePackingKeyswitchKey::new_from_noise_squashing_parameters(
noise_squashing_params,
noise_squashing_compression_params,
);
assert!(noise_simulation_bsk.matches_actual_bsk_gpu(&cuda_sks.bootstrapping_key));
assert!(noise_simulation_bsk128
.matches_actual_shortint_noise_squashing_key(&noise_squashing_key.key));
assert!(noise_simulation_packing_key.matches_actual_pksk(
noise_squashing_compression_key
.key
.packing_key_switching_key()
));
let br_input_modulus_log = noise_squashing_key.key.br_input_modulus_log();
let max_scalar_mul = cuda_sks.max_noise_level.get();
let noise_simulation_accumulator = NoiseSimulationGlwe::new(
noise_simulation_bsk128
.output_glwe_size()
.to_glwe_dimension(),
noise_simulation_bsk128.output_polynomial_size(),
Variance(0.0),
noise_simulation_bsk128.modulus(),
);
let (_before_packing_sim, after_packing_sim) = {
let noise_simulation = NoiseSimulationLwe::encrypt(&cks.key, 0);
dp_ks_any_ms_standard_pbs128_packing_ks(
vec![noise_simulation; cuda_noise_squashing_compression_key.lwe_per_glwe.0],
max_scalar_mul,
&noise_simulation_ksk,
noise_simulation_modulus_switch_config.as_ref(),
&noise_simulation_bsk128,
br_input_modulus_log,
&noise_simulation_accumulator,
&noise_simulation_packing_key,
&mut vec![(); cuda_noise_squashing_compression_key.lwe_per_glwe.0],
)
};
let after_packing_sim = after_packing_sim.into_lwe();
// Check that the circuit is correct with respect to core implementation, i.e. does not crash on
// dimension checks
let (expected_lwe_dimension_out, expected_modulus_f64_out) = {
let pksk = noise_squashing_compression_key
.key
.packing_key_switching_key();
let out_glwe_dim = pksk.output_key_glwe_dimension();
let out_poly_size = pksk.output_key_polynomial_size();
(
out_glwe_dim.to_equivalent_lwe_dimension(out_poly_size),
pksk.ciphertext_modulus().raw_modulus_float(),
)
};
assert_eq!(
after_packing_sim.lwe_dimension(),
expected_lwe_dimension_out
);
assert_eq!(
after_packing_sim.modulus().as_f64(),
expected_modulus_f64_out
);
let cleartext_modulus = atomic_params.message_modulus().0 * atomic_params.carry_modulus().0;
let mut noise_samples_after_packing = vec![];
let sample_count_per_msg =
1000usize.div_ceil(cuda_noise_squashing_compression_key.lwe_per_glwe.0);
let chunk_size = 4;
let vec_local_streams = (0..chunk_size)
.map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
.collect::<Vec<_>>();
for _i in 0..cleartext_modulus {
let current_noise_samples_after_packing: Vec<_> = (0..sample_count_per_msg)
.collect::<Vec<_>>()
.chunks(chunk_size)
.flat_map(|chunk| {
chunk
.into_par_iter()
.map(|i| {
let local_stream = &vec_local_streams[*i % chunk_size];
let (_before_packing, after_packing) =
encrypt_dp_ks_standard_pbs128_packing_ks_noise_helper_gpu(
atomic_params,
noise_squashing_params,
noise_squashing_compression_params,
&cks,
&cuda_sks,
&noise_squashing_private_key,
&noise_squashing_key,
&cuda_noise_squashing_key,
&noise_squashing_compression_private_key,
&cuda_noise_squashing_compression_key,
0,
max_scalar_mul,
br_input_modulus_log,
local_stream,
);
after_packing
})
.collect::<Vec<_>>()
})
.collect();
noise_samples_after_packing.extend(current_noise_samples_after_packing);
}
let noise_samples_after_packing_flattened: Vec<_> = noise_samples_after_packing
.into_iter()
.flatten()
.map(|x| x.value)
.collect();
let after_packing_is_ok = mean_and_variance_check(
&noise_samples_after_packing_flattened,
"after_packing",
0.0,
after_packing_sim.variance(),
noise_squashing_compression_params.packing_ks_key_noise_distribution,
after_packing_sim.lwe_dimension(),
after_packing_sim.modulus().as_f64(),
);
assert!(after_packing_is_ok);
}
create_gpu_parameterized_test!(
noise_check_encrypt_dp_ks_standard_pbs128_packing_ks_noise_gpu {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
}
);
create_gpu_parameterized_test!(sanity_check_encrypt_dp_ks_standard_pbs128_packing_ks_gpu {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
});
create_gpu_parameterized_test!(sanity_check_encrypt_dp_ks_standard_pbs128_gpu {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
});

View File

@@ -1,3 +1,5 @@
pub mod br_dp_ks_ms;
pub mod br_dp_packingks_ms;
pub mod dp_ks_ms;
pub mod dp_ks_pbs_128_packingks;
pub mod utils;

View File

@@ -1,7 +1,7 @@
use crate::core_crypto::commons::noise_formulas::noise_simulation::traits::{
AllocateCenteredBinaryShiftedStandardModSwitchResult,
AllocateDriftTechniqueStandardModSwitchResult, AllocateLweBootstrapResult,
AllocateLweKeyswitchResult, AllocateStandardModSwitchResult,
AllocateLweKeyswitchResult, AllocateLwePackingKeyswitchResult, AllocateStandardModSwitchResult,
CenteredBinaryShiftedStandardModSwitch, DriftTechniqueStandardModSwitch,
LweClassicFftBootstrap, LweKeyswitch, ScalarMul, StandardModSwitch,
};
@@ -13,6 +13,7 @@ use crate::core_crypto::gpu::cuda_modulus_switch_ciphertext;
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
use crate::core_crypto::gpu::lwe_bootstrap_key::CudaModulusSwitchNoiseReductionConfiguration;
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use crate::core_crypto::gpu::lwe_packing_keyswitch_key::CudaLwePackingKeyswitchKey;
use crate::core_crypto::gpu::vec::CudaVec;
use crate::core_crypto::prelude::*;
use crate::integer::gpu::ciphertext::info::CudaBlockInfo;
@@ -25,7 +26,7 @@ use crate::integer::gpu::{
cuda_centered_modulus_switch_64, unchecked_small_scalar_mul_integer_async, CudaStreams,
};
use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::NoiseSimulationModulusSwitchConfig;
use crate::shortint::server_key::tests::noise_distribution::utils::traits::LwePackingKeyswitch;
/// Side resources for CUDA operations in noise simulation
#[derive(Clone)]
pub struct CudaSideResources {
@@ -128,6 +129,19 @@ impl CudaDynLwe {
}
}
pub fn as_ct_128_cpu(&self, streams: &CudaStreams) -> LweCiphertext<Vec<u128>> {
match self {
Self::U32(_) => panic!("Tried getting a u32 CudaLweCiphertextList as u128."),
Self::U64(_) => panic!("Tried getting a u64 CudaLweCiphertextList as u128."),
Self::U128(_cuda_lwe) => {
let cpu_lwe_list = self.as_lwe_128().to_lwe_ciphertext_list(streams);
LweCiphertext::from_container(
cpu_lwe_list.clone().into_container(),
cpu_lwe_list.ciphertext_modulus(),
)
}
}
}
pub fn from_lwe_32(cuda_lwe: CudaLweCiphertextList<u32>) -> Self {
Self::U32(cuda_lwe)
}
@@ -141,6 +155,19 @@ impl CudaDynLwe {
}
}
/// Converts a CudaGlweCiphertextList<u64> to a GlweCiphertext<Vec<u64>>
pub fn cuda_glwe_list_to_glwe_ciphertext(
cuda_glwe_list: &CudaGlweCiphertextList<u64>,
streams: &CudaStreams,
) -> GlweCiphertext<Vec<u64>> {
let cpu_glwe_list = cuda_glwe_list.to_glwe_ciphertext_list(streams);
GlweCiphertext::from_container(
cpu_glwe_list.clone().into_container(),
cpu_glwe_list.polynomial_size(),
cpu_glwe_list.ciphertext_modulus(),
)
}
impl ScalarMul<u64> for CudaDynLwe {
type Output = Self;
type SideResources = CudaSideResources;
@@ -313,13 +340,14 @@ impl StandardModSwitch<Self> for CudaDynLwe {
panic!("U32 modulus switch not implemented for CudaDynLwe - only U64 is supported");
}
(Self::U64(input), Self::U64(output_cuda_lwe)) => {
let internal_output = input.duplicate(&side_resources.streams);
let mut internal_output = input.duplicate(&side_resources.streams);
cuda_modulus_switch_ciphertext(
&mut output_cuda_lwe.0.d_vec,
&mut internal_output.0.d_vec,
output_modulus_log.0 as u32,
&side_resources.streams,
);
let mut cpu_lwe = internal_output.to_lwe_ciphertext_list(&side_resources.streams);
let shift_to_map_to_native = u64::BITS - output_modulus_log.0 as u32;
for val in cpu_lwe.as_mut_view().into_container().iter_mut() {
*val <<= shift_to_map_to_native;
@@ -713,3 +741,193 @@ impl AllocateLweBootstrapResult for CudaGlweCiphertextList<u128> {
CudaDynLwe::U128(cuda_lwe)
}
}
// Implement LweClassicFft128Bootstrap for CudaNoiseSquashingKey using 128-bit PBS CUDA function
impl
crate::core_crypto::commons::noise_formulas::noise_simulation::traits::LweClassicFft128Bootstrap<
CudaDynLwe,
CudaDynLwe,
CudaGlweCiphertextList<u128>,
> for crate::integer::gpu::noise_squashing::keys::CudaNoiseSquashingKey
{
type SideResources = CudaSideResources;
fn lwe_classic_fft_128_pbs(
&self,
input: &CudaDynLwe,
output: &mut CudaDynLwe,
accumulator: &CudaGlweCiphertextList<u128>,
side_resources: &mut Self::SideResources,
) {
use crate::core_crypto::gpu::algorithms::lwe_programmable_bootstrapping::cuda_programmable_bootstrap_128_lwe_ciphertext_async;
use crate::integer::gpu::server_key::CudaBootstrappingKey;
match (input, output) {
(CudaDynLwe::U64(input_cuda_lwe), CudaDynLwe::U128(output_cuda_lwe)) => {
// Get the bootstrap key from self - it's already u128 type
let bsk = match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => d_bsk,
CudaBootstrappingKey::MultiBit(_) => {
panic!("MultiBit bootstrapping keys are not supported for 128-bit PBS");
}
};
unsafe {
cuda_programmable_bootstrap_128_lwe_ciphertext_async(
input_cuda_lwe,
output_cuda_lwe,
accumulator,
bsk,
&side_resources.streams,
);
side_resources.streams.synchronize();
}
}
_ => panic!("128-bit PBS expects U64 input and U128 output for CudaDynLwe"),
}
}
}
impl AllocateLwePackingKeyswitchResult for CudaLwePackingKeyswitchKey<u64> {
type Output = CudaGlweCiphertextList<u64>;
type SideResources = CudaSideResources;
fn allocate_lwe_packing_keyswitch_result(
&self,
side_resources: &mut Self::SideResources,
) -> Self::Output {
let glwe_dimension = self.output_glwe_size().to_glwe_dimension();
let polynomial_size = self.output_polynomial_size();
let ciphertext_modulus = self.ciphertext_modulus();
CudaGlweCiphertextList::new(
glwe_dimension,
polynomial_size,
GlweCiphertextCount(1),
ciphertext_modulus,
&side_resources.streams,
)
}
}
impl LwePackingKeyswitch<[&CudaDynLwe], CudaGlweCiphertextList<u64>>
for CudaLwePackingKeyswitchKey<u64>
{
type SideResources = CudaSideResources;
fn keyswitch_lwes_and_pack_in_glwe(
&self,
input: &[&CudaDynLwe],
output: &mut CudaGlweCiphertextList<u64>,
side_resources: &mut CudaSideResources,
) {
use crate::core_crypto::gpu::algorithms::lwe_packing_keyswitch::cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_64;
let input_lwe_ciphertext_list = CudaLweCiphertextList::from_vec_cuda_lwe_ciphertexts_list(
input.iter().map(|ciphertext| ciphertext.as_lwe_64()),
&side_resources.streams,
);
cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_64(
self,
&input_lwe_ciphertext_list,
output,
&side_resources.streams,
);
}
}
// Implement StandardModSwitch traits for CudaGlweCiphertextList<u64>
impl AllocateStandardModSwitchResult for CudaGlweCiphertextList<u64> {
type Output = Self;
type SideResources = CudaSideResources;
fn allocate_standard_mod_switch_result(
&self,
side_resources: &mut Self::SideResources,
) -> Self::Output {
Self::new(
self.glwe_dimension(),
self.polynomial_size(),
self.glwe_ciphertext_count(),
self.ciphertext_modulus(),
&side_resources.streams,
)
}
}
impl StandardModSwitch<Self> for CudaGlweCiphertextList<u64> {
type SideResources = CudaSideResources;
fn standard_mod_switch(
&self,
storage_log_modulus: CiphertextModulusLog,
output: &mut Self,
side_resources: &mut CudaSideResources,
) {
let mut internal_output = self.duplicate(&side_resources.streams);
cuda_modulus_switch_ciphertext(
&mut internal_output.0.d_vec,
storage_log_modulus.0 as u32,
&side_resources.streams,
);
side_resources.streams.synchronize();
let mut cpu_glwe = internal_output.to_glwe_ciphertext_list(&side_resources.streams);
let shift_to_map_to_native = u64::BITS - storage_log_modulus.0 as u32;
for val in cpu_glwe.as_mut_view().into_container().iter_mut() {
*val <<= shift_to_map_to_native;
}
let d_after_ms = Self::from_glwe_ciphertext_list(&cpu_glwe, &side_resources.streams);
*output = d_after_ms;
}
}
impl AllocateLwePackingKeyswitchResult for CudaLwePackingKeyswitchKey<u128> {
type Output = CudaGlweCiphertextList<u128>;
type SideResources = CudaSideResources;
fn allocate_lwe_packing_keyswitch_result(
&self,
side_resources: &mut Self::SideResources,
) -> Self::Output {
let glwe_dimension = self.output_glwe_size().to_glwe_dimension();
let polynomial_size = self.output_polynomial_size();
let ciphertext_modulus = self.ciphertext_modulus();
CudaGlweCiphertextList::new(
glwe_dimension,
polynomial_size,
GlweCiphertextCount(1),
ciphertext_modulus,
&side_resources.streams,
)
}
}
impl LwePackingKeyswitch<[&CudaDynLwe], CudaGlweCiphertextList<u128>>
for CudaLwePackingKeyswitchKey<u128>
{
type SideResources = CudaSideResources;
fn keyswitch_lwes_and_pack_in_glwe(
&self,
input: &[&CudaDynLwe],
output: &mut CudaGlweCiphertextList<u128>,
side_resources: &mut CudaSideResources,
) {
use crate::core_crypto::gpu::algorithms::lwe_packing_keyswitch::cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_128;
let input_lwe_ciphertext_list = CudaLweCiphertextList::from_vec_cuda_lwe_ciphertexts_list(
input.iter().map(|ciphertext| ciphertext.as_lwe_128()),
&side_resources.streams,
);
cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_128(
self,
&input_lwe_ciphertext_list,
output,
&side_resources.streams,
);
}
}

View File

@@ -2,6 +2,7 @@ use super::{RadixCiphertext, ServerKey, SignedRadixCiphertext};
use crate::core_crypto::commons::generators::DeterministicSeeder;
use crate::core_crypto::prelude::DefaultRandomGenerator;
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
use std::num::NonZeroU64;
pub use tfhe_csprng::seeders::{Seed, Seeder};
@@ -163,6 +164,7 @@ impl ServerKey {
/// as `num_input_random_bits`
///
/// ```rust
/// use std::num::NonZeroU64;
/// use tfhe::integer::gen_keys_radix;
/// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128;
/// use tfhe::Seed;
@@ -173,7 +175,7 @@ impl ServerKey {
/// let (cks, sks) = gen_keys_radix(PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128, size);
///
/// let num_input_random_bits = 5;
/// let excluded_upper_bound = 3;
/// let excluded_upper_bound = NonZeroU64::new(3).unwrap();
/// let num_blocks_output = 8;
///
/// let ct_res = sks.par_generate_oblivious_pseudo_random_unsigned_custom_range(
@@ -186,15 +188,17 @@ impl ServerKey {
/// // Decrypt:
/// let dec_result: u64 = cks.decrypt(&ct_res);
///
/// assert!(dec_result < excluded_upper_bound);
/// assert!(dec_result < excluded_upper_bound.get());
/// ```
pub fn par_generate_oblivious_pseudo_random_unsigned_custom_range(
&self,
seed: Seed,
num_input_random_bits: u64,
excluded_upper_bound: u64,
excluded_upper_bound: NonZeroU64,
num_blocks_output: u64,
) -> RadixCiphertext {
let excluded_upper_bound = excluded_upper_bound.get();
assert!(self.message_modulus().0.is_power_of_two());
let message_bits_count = self.message_modulus().0.ilog2() as u64;

View File

@@ -10,6 +10,7 @@ use crate::integer::{BooleanBlock, IntegerKeyKind, RadixCiphertext, RadixClientK
use crate::shortint::parameters::*;
use crate::{ClientKey, CompressedServerKey, MatchValues, Seed, Tag};
use std::cmp::{max, min};
use std::num::NonZeroU64;
use std::sync::Arc;
create_parameterized_test!(random_op_sequence {
@@ -498,7 +499,18 @@ where
&ServerKey::par_generate_oblivious_pseudo_random_unsigned_integer_bounded,
);
let oprf_custom_range_executor = OpSequenceCpuFunctionExecutor::new(
&ServerKey::par_generate_oblivious_pseudo_random_unsigned_custom_range,
&|sk: &ServerKey,
seed: Seed,
num_input_random_bits: u64,
excluded_upper_bound: u64,
num_blocks_output: u64| {
sk.par_generate_oblivious_pseudo_random_unsigned_custom_range(
seed,
num_input_random_bits,
NonZeroU64::new(excluded_upper_bound).unwrap_or(NonZeroU64::new(1).unwrap()),
num_blocks_output,
)
},
);
let mut oprf_ops: Vec<(OprfExecutor, String)> = vec![(

View File

@@ -9,6 +9,7 @@ use crate::integer::{IntegerKeyKind, RadixCiphertext, RadixClientKey, ServerKey}
use crate::shortint::parameters::*;
use statrs::distribution::ContinuousCDF;
use std::collections::HashMap;
use std::num::NonZeroU64;
use std::sync::Arc;
use tfhe_csprng::seeders::Seed;
@@ -36,9 +37,19 @@ fn oprf_any_range_unsigned<P>(param: P)
where
P: Into<TestParameters>,
{
let executor = CpuFunctionExecutor::new(
&ServerKey::par_generate_oblivious_pseudo_random_unsigned_custom_range,
);
let executor =
CpuFunctionExecutor::new(&|sk: &ServerKey,
seed: Seed,
num_input_random_bits: u64,
excluded_upper_bound: u64,
num_blocks_output: u64| {
sk.par_generate_oblivious_pseudo_random_unsigned_custom_range(
seed,
num_input_random_bits,
NonZeroU64::new(excluded_upper_bound).unwrap(),
num_blocks_output,
)
});
oprf_any_range_test(param, executor);
}
@@ -46,9 +57,19 @@ fn oprf_almost_uniformity_unsigned<P>(param: P)
where
P: Into<TestParameters>,
{
let executor = CpuFunctionExecutor::new(
&ServerKey::par_generate_oblivious_pseudo_random_unsigned_custom_range,
);
let executor =
CpuFunctionExecutor::new(&|sk: &ServerKey,
seed: Seed,
num_input_random_bits: u64,
excluded_upper_bound: u64,
num_blocks_output: u64| {
sk.par_generate_oblivious_pseudo_random_unsigned_custom_range(
seed,
num_input_random_bits,
NonZeroU64::new(excluded_upper_bound).unwrap(),
num_blocks_output,
)
});
oprf_almost_uniformity_test(param, executor);
}
@@ -89,7 +110,7 @@ where
);
}
pub fn oprf_uniformity_test<P, E>(param: P, mut executor: E)
pub(crate) fn oprf_uniformity_test<P, E>(param: P, mut executor: E)
where
P: Into<TestParameters>,
E: for<'a> FunctionExecutor<(Seed, u64, u64), RadixCiphertext>,
@@ -113,7 +134,7 @@ where
});
}
pub fn oprf_any_range_test<P, E>(param: P, mut executor: E)
pub(crate) fn oprf_any_range_test<P, E>(param: P, mut executor: E)
where
P: Into<TestParameters>,
E: for<'a> FunctionExecutor<(Seed, u64, u64, u64), RadixCiphertext>,
@@ -149,7 +170,7 @@ where
}
}
pub fn oprf_almost_uniformity_test<P, E>(param: P, mut executor: E)
pub(crate) fn oprf_almost_uniformity_test<P, E>(param: P, mut executor: E)
where
P: Into<TestParameters>,
E: for<'a> FunctionExecutor<(Seed, u64, u64, u64), RadixCiphertext>,
@@ -165,40 +186,70 @@ where
let num_input_random_bits: u64 = 4;
let num_blocks_output = 64;
let excluded_upper_bound = 10;
let random_input_upper_bound = 1 << num_input_random_bits;
let mut density = vec![0_usize; excluded_upper_bound as usize];
for i in 0..random_input_upper_bound {
let index = ((i * excluded_upper_bound) as f64 / random_input_upper_bound as f64) as usize;
density[index] += 1;
}
let theoretical_pdf: Vec<f64> = density
.iter()
.map(|count| *count as f64 / random_input_upper_bound as f64)
.collect();
let values: Vec<u64> = (0..sample_count)
.map(|seed| {
let img = executor.execute((
Seed(seed as u128),
num_input_random_bits,
excluded_upper_bound as u64,
excluded_upper_bound,
num_blocks_output,
));
cks.decrypt(&img)
})
.collect();
let p_value_upper_bound = p_value_upper_bound_oprf_almost_uniformity_from_values(
&values,
num_input_random_bits,
excluded_upper_bound,
);
assert!(p_value_limit < p_value_upper_bound);
}
pub(crate) fn p_value_upper_bound_oprf_almost_uniformity_from_values(
values: &[u64],
num_input_random_bits: u64,
excluded_upper_bound: u64,
) -> f64 {
let density = oprf_density_function(excluded_upper_bound, num_input_random_bits);
let theoretical_pdf = probability_density_function_from_density(&density);
let mut bins = vec![0_u64; excluded_upper_bound as usize];
for value in values {
for value in values.iter().copied() {
bins[value as usize] += 1;
}
let cumulative_bins = cumulate(&bins);
let theoretical_cdf = cumulate(&theoretical_pdf);
let sup_diff = sup_diff(&cumulative_bins, &theoretical_cdf);
let p_value_upper_bound = dkw_alpha_from_epsilon(sample_count as f64, sup_diff);
assert!(p_value_limit < p_value_upper_bound);
dkw_alpha_from_epsilon(values.len() as f64, sup_diff)
}
pub(crate) fn oprf_density_function(
excluded_upper_bound: u64,
num_input_random_bits: u64,
) -> Vec<usize> {
let random_input_upper_bound = 1 << num_input_random_bits;
let mut density = vec![0_usize; excluded_upper_bound as usize];
for i in 0..random_input_upper_bound {
let output = ((i * excluded_upper_bound) >> num_input_random_bits) as usize;
density[output] += 1;
}
density
}
pub(crate) fn probability_density_function_from_density(density: &[usize]) -> Vec<f64> {
let total_count: usize = density.iter().copied().sum();
density
.iter()
.map(|count| *count as f64 / total_count as f64)
.collect()
}

View File

@@ -475,8 +475,12 @@ pub(crate) mod test {
}
}
pub fn test_uniformity<F>(sample_count: usize, p_value_limit: f64, distinct_values: u64, f: F)
where
pub(crate) fn test_uniformity<F>(
sample_count: usize,
p_value_limit: f64,
distinct_values: u64,
f: F,
) where
F: Sync + Fn(usize) -> u64,
{
let p_value = uniformity_p_value(f, sample_count, distinct_values);
@@ -487,7 +491,7 @@ pub(crate) mod test {
);
}
fn uniformity_p_value<F>(f: F, sample_count: usize, distinct_values: u64) -> f64
pub(crate) fn uniformity_p_value<F>(f: F, sample_count: usize, distinct_values: u64) -> f64
where
F: Sync + Fn(usize) -> u64,
{
@@ -495,8 +499,11 @@ pub(crate) mod test {
let mut values_count = HashMap::new();
for i in &values {
assert!(*i < distinct_values, "i {} dv{}", *i, distinct_values);
for i in values.iter().copied() {
assert!(
i < distinct_values,
"i (={i}) is supposed to be smaller than distinct_values (={distinct_values})",
);
*values_count.entry(i).or_insert(0) += 1;
}

View File

@@ -27,7 +27,7 @@ use crate::shortint::server_key::ServerKey;
use rayon::prelude::*;
#[allow(clippy::too_many_arguments)]
fn dp_ks_any_ms_standard_pbs128<
pub fn dp_ks_any_ms_standard_pbs128<
InputCt,
ScalarMulResult,
KsResult,
@@ -111,7 +111,7 @@ where
#[allow(clippy::too_many_arguments)]
#[allow(clippy::type_complexity)]
fn dp_ks_any_ms_standard_pbs128_packing_ks<
pub fn dp_ks_any_ms_standard_pbs128_packing_ks<
InputCt,
ScalarMulResult,
KsResult,

View File

@@ -727,8 +727,15 @@ async function compactPublicKeyZeroKnowledgeBench() {
serialized_size = list.safe_serialize(BigInt(10000000)).length;
}
const mean = timing / bench_loops;
let base_bench_str = "compact_fhe_uint_proven_encryption_";
let supportsThreads = await threads();
if (!supportsThreads) {
base_bench_str += "unsafe_coop_";
}
const common_bench_str =
"compact_fhe_uint_proven_encryption_" +
base_bench_str +
params.zk_scheme +
"_" +
bits_to_encrypt +