mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-09 14:47:56 -05:00
feat: add KVStore to the high level api
* Added Value type name to crate::integer::KVStore impl of Named trait
as well as a bool to check we deserialize the correct value type
(Radix vs SignedRadix)
* Add KVStore to high_level_api
* Add KVStore hlapi benches
* Remove specialized `[add,mul,sub]_to_slot` as `map` is now the
intended API.
- mul_to_slot was way slower than using `map`
- add/mul_to_slot were a bit faster (~5% latency-wise), but returned
less information (no old_value, no new_value, no boolean to check)
if the key matched
- Some known improvement can be made to map, which should result in
it being better than add/sub_to_slot
* Add FheIntegerType trait to make the KVStore generic over
FheUint/FheInt, and should make GPU integration "easy"
This commit is contained in:
committed by
tmontaigu
parent
33dee7673c
commit
e523fd2cb6
@@ -70,7 +70,7 @@ required-features = ["shortint", "internal-keycache"]
|
||||
name = "hlapi"
|
||||
path = "benches/high_level_api/bench.rs"
|
||||
harness = false
|
||||
required-features = ["integer", "internal-keycache"]
|
||||
required-features = ["integer", "internal-keycache", "pbs-stats"]
|
||||
|
||||
[[bench]]
|
||||
name = "hlapi-erc20"
|
||||
|
||||
@@ -1,14 +1,22 @@
|
||||
use benchmark::utilities::{write_to_json, OperatorType};
|
||||
use criterion::{black_box, Criterion};
|
||||
use benchmark::utilities::{hlapi_throughput_num_ops, write_to_json, BenchmarkType, OperatorType};
|
||||
use criterion::{black_box, Criterion, Throughput};
|
||||
use rand::prelude::*;
|
||||
use std::hash::Hash;
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::*;
|
||||
use tfhe::core_crypto::prelude::Numeric;
|
||||
use tfhe::integer::block_decomposition::DecomposableInto;
|
||||
use tfhe::keycache::NamedParam;
|
||||
use tfhe::named::Named;
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::{
|
||||
ClientKey, CompressedServerKey, FheUint10, FheUint12, FheUint128, FheUint14, FheUint16,
|
||||
FheUint2, FheUint32, FheUint4, FheUint6, FheUint64, FheUint8,
|
||||
ClientKey, CompressedServerKey, FheIntegerType, FheUint10, FheUint12, FheUint128, FheUint14,
|
||||
FheUint16, FheUint2, FheUint32, FheUint4, FheUint6, FheUint64, FheUint8, FheUintId, IntegerId,
|
||||
KVStore,
|
||||
};
|
||||
|
||||
use rayon::prelude::*;
|
||||
|
||||
fn bench_fhe_type<FheType>(
|
||||
c: &mut Criterion,
|
||||
client_key: &ClientKey,
|
||||
@@ -225,6 +233,170 @@ bench_type!(FheUint32);
|
||||
bench_type!(FheUint64);
|
||||
bench_type!(FheUint128);
|
||||
|
||||
trait TypeDisplay {
|
||||
fn fmt(f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let name = std::any::type_name::<Self>();
|
||||
let pos = name.rfind(":").map_or(0, |p| p + 1);
|
||||
write!(f, "{}", &name[pos..])
|
||||
}
|
||||
}
|
||||
|
||||
impl TypeDisplay for u8 {}
|
||||
impl TypeDisplay for u16 {}
|
||||
impl TypeDisplay for u32 {}
|
||||
impl TypeDisplay for u64 {}
|
||||
impl TypeDisplay for u128 {}
|
||||
|
||||
impl<Id: FheUintId> TypeDisplay for tfhe::FheUint<Id> {
|
||||
fn fmt(f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write_fhe_type_name::<Self>(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl<Id: tfhe::FheIntId> TypeDisplay for tfhe::FheInt<Id> {
|
||||
fn fmt(f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write_fhe_type_name::<Self>(f)
|
||||
}
|
||||
}
|
||||
|
||||
struct TypeDisplayer<T: TypeDisplay>(PhantomData<T>);
|
||||
|
||||
impl<T: TypeDisplay> Default for TypeDisplayer<T> {
|
||||
fn default() -> Self {
|
||||
Self(PhantomData)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: TypeDisplay> std::fmt::Display for TypeDisplayer<T> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
T::fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
fn write_fhe_type_name<'a, FheType>(f: &mut std::fmt::Formatter<'a>) -> std::fmt::Result
|
||||
where
|
||||
FheType: FheIntegerType + Named,
|
||||
{
|
||||
let full_name = FheType::NAME;
|
||||
let i = full_name.rfind(":").map_or(0, |p| p + 1);
|
||||
|
||||
write!(f, "{}{}", &full_name[i..], FheType::Id::num_bits())
|
||||
}
|
||||
|
||||
fn bench_kv_store<Key, FheKey, Value>(c: &mut Criterion, cks: &ClientKey, num_elements: usize)
|
||||
where
|
||||
rand::distributions::Standard: rand::distributions::Distribution<Key>,
|
||||
Key: Numeric + DecomposableInto<u64> + Eq + Hash + CastInto<usize> + TypeDisplay,
|
||||
Value: FheEncrypt<u128, ClientKey> + FheIntegerType + Clone + Send + Sync + TypeDisplay,
|
||||
Value::Id: FheUintId,
|
||||
FheKey: FheEncrypt<Key, ClientKey> + FheIntegerType + Send + Sync,
|
||||
FheKey::Id: FheUintId,
|
||||
{
|
||||
let mut kv_store = KVStore::new();
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
let format_id_bench = |op_name: &str| -> String {
|
||||
format!(
|
||||
"KVStore::<{}, {}>::{op_name}/{num_elements}",
|
||||
TypeDisplayer::<Key>::default(),
|
||||
TypeDisplayer::<Value>::default(),
|
||||
)
|
||||
};
|
||||
|
||||
match BenchmarkType::from_env().unwrap() {
|
||||
BenchmarkType::Latency => {
|
||||
while kv_store.len() != num_elements {
|
||||
let key = rng.gen::<Key>();
|
||||
let value = rng.gen::<u128>();
|
||||
|
||||
let encrypted_value = Value::encrypt(value, cks);
|
||||
kv_store.insert_with_clear_key(key, encrypted_value);
|
||||
}
|
||||
|
||||
let key = rng.gen::<Key>();
|
||||
let encrypted_key = FheKey::encrypt(key, cks);
|
||||
|
||||
let value = rng.gen::<u128>();
|
||||
let value_to_add = Value::encrypt(value, cks);
|
||||
|
||||
c.bench_function(&format_id_bench("Get"), |b| {
|
||||
b.iter(|| {
|
||||
let _ = kv_store.get(&encrypted_key);
|
||||
})
|
||||
});
|
||||
|
||||
c.bench_function(&format_id_bench("Update"), |b| {
|
||||
b.iter(|| {
|
||||
let _ = kv_store.update(&encrypted_key, &value_to_add);
|
||||
})
|
||||
});
|
||||
|
||||
c.bench_function(&format_id_bench("Map"), |b| {
|
||||
b.iter(|| {
|
||||
kv_store.map(&encrypted_key, |v| v);
|
||||
})
|
||||
});
|
||||
}
|
||||
BenchmarkType::Throughput => {
|
||||
while kv_store.len() != num_elements {
|
||||
let key = rng.gen::<Key>();
|
||||
let value = rng.gen::<u128>();
|
||||
|
||||
let encrypted_value = Value::encrypt(value, cks);
|
||||
kv_store.insert_with_clear_key(key, encrypted_value);
|
||||
}
|
||||
|
||||
let key = rng.gen::<Key>();
|
||||
let encrypted_key = FheKey::encrypt(key, cks);
|
||||
|
||||
let value = rng.gen::<u128>();
|
||||
let value_to_add = Value::encrypt(value, cks);
|
||||
|
||||
let factor = hlapi_throughput_num_ops(
|
||||
|| {
|
||||
kv_store.map(&encrypted_key, |v| v);
|
||||
},
|
||||
cks,
|
||||
);
|
||||
|
||||
let mut kv_stores = vec![];
|
||||
for _ in 0..factor.saturating_sub(1) {
|
||||
kv_stores.push(kv_store.clone());
|
||||
}
|
||||
kv_stores.push(kv_store);
|
||||
|
||||
let mut group = c.benchmark_group("KVStore Throughput");
|
||||
group.throughput(Throughput::Elements(kv_stores.len() as u64));
|
||||
|
||||
group.bench_function(format_id_bench("Map"), |b| {
|
||||
b.iter(|| {
|
||||
kv_stores.par_iter_mut().for_each(|kv_store| {
|
||||
kv_store.map(&encrypted_key, |v| v);
|
||||
})
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function(format_id_bench("Update"), |b| {
|
||||
b.iter(|| {
|
||||
kv_stores.par_iter_mut().for_each(|kv_store| {
|
||||
kv_store.update(&encrypted_key, &value_to_add);
|
||||
})
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function(format_id_bench("Get"), |b| {
|
||||
b.iter(|| {
|
||||
kv_stores.par_iter_mut().for_each(|kv_store| {
|
||||
kv_store.get(&encrypted_key);
|
||||
})
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
#[cfg(feature = "hpu")]
|
||||
let cks = {
|
||||
@@ -256,7 +428,9 @@ fn main() {
|
||||
let cks = ClientKey::generate(config);
|
||||
let compressed_sks = CompressedServerKey::new(&cks);
|
||||
|
||||
set_server_key(compressed_sks.decompress());
|
||||
let sks = compressed_sks.decompress();
|
||||
rayon::broadcast(|_| set_server_key(sks.clone()));
|
||||
set_server_key(sks);
|
||||
cks
|
||||
};
|
||||
|
||||
@@ -274,5 +448,17 @@ fn main() {
|
||||
bench_fhe_uint64(&mut c, &cks);
|
||||
bench_fhe_uint128(&mut c, &cks);
|
||||
|
||||
for pow in 1..=10 {
|
||||
bench_kv_store::<u64, FheUint64, FheUint32>(&mut c, &cks, 1 << pow);
|
||||
}
|
||||
|
||||
for pow in 1..=10 {
|
||||
bench_kv_store::<u64, FheUint64, FheUint64>(&mut c, &cks, 1 << pow);
|
||||
}
|
||||
|
||||
for pow in 1..=10 {
|
||||
bench_kv_store::<u128, FheUint128, FheUint64>(&mut c, &cks, 1 << pow);
|
||||
}
|
||||
|
||||
c.final_summary();
|
||||
}
|
||||
|
||||
@@ -5,6 +5,8 @@ use std::{env, fs};
|
||||
#[cfg(feature = "gpu")]
|
||||
use tfhe::core_crypto::gpu::{get_number_of_gpus, get_number_of_sms};
|
||||
use tfhe::core_crypto::prelude::*;
|
||||
#[cfg(feature = "integer")]
|
||||
use tfhe::prelude::*;
|
||||
|
||||
#[cfg(feature = "boolean")]
|
||||
pub mod boolean_utils {
|
||||
@@ -466,6 +468,39 @@ pub fn throughput_num_threads(num_block: usize, op_pbs_count: u64) -> u64 {
|
||||
}
|
||||
}
|
||||
|
||||
// Given an `Op` this returns how many more ops should be done in parallel
|
||||
// to saturate the CPU and have a better throughput measurement
|
||||
#[cfg(feature = "integer")]
|
||||
pub fn hlapi_throughput_num_ops<Op>(op: Op, cks: &tfhe::ClientKey) -> usize
|
||||
where
|
||||
Op: FnOnce(),
|
||||
{
|
||||
tfhe::reset_pbs_count();
|
||||
let t = std::time::Instant::now();
|
||||
op();
|
||||
let time_for_op = t.elapsed();
|
||||
let pbs_count_for_op = tfhe::get_pbs_count();
|
||||
|
||||
let a = tfhe::FheBool::encrypt(true, cks);
|
||||
let b = tfhe::FheBool::encrypt(true, cks);
|
||||
let t = std::time::Instant::now();
|
||||
let _ = a & b;
|
||||
let time_for_single_pbs = t.elapsed();
|
||||
|
||||
// Round-up with nano seconds
|
||||
let pbs_time_in_ms =
|
||||
time_for_single_pbs.as_millis() + u128::from(time_for_single_pbs.as_nanos() != 0);
|
||||
|
||||
// Theoretical time if the op was just 1 layer of PBS all in parallel
|
||||
let time_if_full_occupancy =
|
||||
pbs_count_for_op.div_ceil(rayon::current_num_threads() as u64) as u128 * pbs_time_in_ms;
|
||||
|
||||
// Then find how many ops we should do to have full occupancy
|
||||
let factor = time_for_op.as_millis().div_ceil(time_if_full_occupancy);
|
||||
|
||||
factor as usize
|
||||
}
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
mod cuda_utils {
|
||||
use tfhe::core_crypto::entities::{
|
||||
|
||||
Reference in New Issue
Block a user