mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-09 14:47:56 -05:00
WIP: add cpu noise squash benchmarks latency and throughput on fheuint64
This commit is contained in:
7
Makefile
7
Makefile
@@ -1291,6 +1291,13 @@ bench_tfhe_zk_pok: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench -p tfhe-zk-pok --
|
||||
|
||||
.PHONY: bench_hlapi_noise_squash # Run benchmarks for noise squash operation
|
||||
bench_hlapi_noise_squash: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-noise-squash \
|
||||
--features=shortint,integer,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
#
|
||||
# Utility tools
|
||||
#
|
||||
|
||||
@@ -252,6 +252,12 @@ path = "benches/high_level_api/erc20.rs"
|
||||
harness = false
|
||||
required-features = ["integer", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "hlapi-noise-squash"
|
||||
path = "benches/high_level_api/noise_squash.rs"
|
||||
harness = false
|
||||
required-features = ["shortint", "integer", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "keygen"
|
||||
path = "benches/keygen/bench.rs"
|
||||
|
||||
156
tfhe/benches/high_level_api/noise_squash.rs
Normal file
156
tfhe/benches/high_level_api/noise_squash.rs
Normal file
@@ -0,0 +1,156 @@
|
||||
#[path = "../utilities.rs"]
|
||||
mod utilities;
|
||||
|
||||
use criterion::{Criterion, Throughput};
|
||||
use rand::prelude::*;
|
||||
use rand::thread_rng;
|
||||
use rayon::prelude::*;
|
||||
use tfhe::keycache::NamedParam;
|
||||
use tfhe::prelude::*;
|
||||
|
||||
use crate::utilities::{
|
||||
get_bench_type, throughput_num_threads, write_to_json, BenchmarkType, OperatorType,
|
||||
};
|
||||
#[cfg(feature = "gpu")]
|
||||
use tfhe::core_crypto::gpu::get_number_of_gpus;
|
||||
use tfhe::shortint::parameters::v1_1::V1_3_NOISE_SQUASHING_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
#[cfg(feature = "gpu")]
|
||||
use tfhe::{set_server_key, GpuIndex};
|
||||
use tfhe::{
|
||||
ClientKey, CompressedServerKey, FheUint10, FheUint12, FheUint128, FheUint14, FheUint16,
|
||||
FheUint2, FheUint32, FheUint4, FheUint6, FheUint64, FheUint8,
|
||||
};
|
||||
|
||||
fn bench_fhe_type<FheType>(
|
||||
c: &mut Criterion,
|
||||
client_key: &ClientKey,
|
||||
type_name: &str,
|
||||
num_bits: usize,
|
||||
) where
|
||||
FheType: FheEncrypt<u128, ClientKey> + Send + Sync,
|
||||
FheType: SquashNoise,
|
||||
{
|
||||
let mut bench_group = c.benchmark_group(type_name);
|
||||
let bench_id_prefix = if cfg!(feature = "gpu") {
|
||||
"hlapi::cuda"
|
||||
} else {
|
||||
"hlapi"
|
||||
};
|
||||
let bench_id_suffix = format!("noise_squash::{type_name}");
|
||||
|
||||
let mut rng = thread_rng();
|
||||
|
||||
let bench_id;
|
||||
|
||||
match get_bench_type() {
|
||||
BenchmarkType::Latency => {
|
||||
bench_id = format!("{bench_id_prefix}::{bench_id_suffix}");
|
||||
|
||||
let input = FheType::encrypt(rng.gen(), client_key);
|
||||
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let _ = input.squash_noise();
|
||||
})
|
||||
});
|
||||
}
|
||||
BenchmarkType::Throughput => {
|
||||
bench_id = format!("{bench_id_prefix}::throughput::{bench_id_suffix}");
|
||||
let params = client_key.computation_parameters();
|
||||
let num_blocks = num_bits
|
||||
.div_ceil((params.message_modulus().0 * params.carry_modulus().0).ilog2() as usize);
|
||||
|
||||
let elements = throughput_num_threads(num_blocks, 1);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
println!("elements: {elements}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let encrypt_values = || {
|
||||
(0..elements)
|
||||
.map(|_| FheType::encrypt(rng.gen(), client_key))
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
|
||||
b.iter_batched(
|
||||
encrypt_values,
|
||||
|inputs| {
|
||||
inputs.par_iter().for_each(|input| {
|
||||
let _ = input.squash_noise();
|
||||
})
|
||||
},
|
||||
criterion::BatchSize::SmallInput,
|
||||
)
|
||||
});
|
||||
}
|
||||
}
|
||||
let params = client_key.computation_parameters();
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
params,
|
||||
params.name(),
|
||||
"noise_squash",
|
||||
&OperatorType::Atomic,
|
||||
64,
|
||||
vec![],
|
||||
);
|
||||
}
|
||||
|
||||
macro_rules! bench_type {
|
||||
($fhe_type:ident) => {
|
||||
::paste::paste! {
|
||||
fn [<bench_ $fhe_type:snake>](c: &mut Criterion, cks: &ClientKey) {
|
||||
bench_fhe_type::<$fhe_type>(c, cks, stringify!($fhe_type), $fhe_type::num_bits());
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
bench_type!(FheUint2);
|
||||
bench_type!(FheUint4);
|
||||
bench_type!(FheUint6);
|
||||
bench_type!(FheUint8);
|
||||
bench_type!(FheUint10);
|
||||
bench_type!(FheUint12);
|
||||
bench_type!(FheUint14);
|
||||
bench_type!(FheUint16);
|
||||
bench_type!(FheUint32);
|
||||
bench_type!(FheUint64);
|
||||
bench_type!(FheUint128);
|
||||
|
||||
fn main() {
|
||||
#[cfg(feature = "hpu")]
|
||||
panic!("Noise squashing is not supported on HPU");
|
||||
let cks = {
|
||||
use tfhe::{set_server_key, ConfigBuilder};
|
||||
let config =
|
||||
ConfigBuilder::with_custom_parameters(PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128)
|
||||
.enable_noise_squashing(
|
||||
V1_3_NOISE_SQUASHING_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
)
|
||||
.build();
|
||||
let cks = ClientKey::generate(config);
|
||||
let compressed_sks = CompressedServerKey::new(&cks);
|
||||
|
||||
let decompressed_sks = compressed_sks.decompress();
|
||||
rayon::broadcast(|_| set_server_key(decompressed_sks.clone()));
|
||||
set_server_key(decompressed_sks);
|
||||
cks
|
||||
};
|
||||
|
||||
let mut c = Criterion::default().configure_from_args();
|
||||
|
||||
// bench_fhe_uint2(&mut c, &cks);
|
||||
// bench_fhe_uint4(&mut c, &cks);
|
||||
// bench_fhe_uint6(&mut c, &cks);
|
||||
// bench_fhe_uint8(&mut c, &cks);
|
||||
// bench_fhe_uint10(&mut c, &cks);
|
||||
// bench_fhe_uint12(&mut c, &cks);
|
||||
// bench_fhe_uint14(&mut c, &cks);
|
||||
// bench_fhe_uint16(&mut c, &cks);
|
||||
// bench_fhe_uint32(&mut c, &cks);
|
||||
bench_fhe_uint64(&mut c, &cks);
|
||||
// bench_fhe_uint128(&mut c, &cks);
|
||||
|
||||
c.final_summary();
|
||||
}
|
||||
@@ -628,8 +628,8 @@ pub fn throughput_num_threads(num_block: usize, op_pbs_count: u64) -> u64 {
|
||||
let total_num_sm = H100_PCIE_SM_COUNT * get_number_of_gpus();
|
||||
let operation_loading = ((total_num_sm as u64 / op_pbs_count) as f64).max(minimum_loading);
|
||||
let elements = (total_num_sm as f64 * block_multiplicator * operation_loading) as u64;
|
||||
elements.min(1500) // This threshold is useful for operation with both a small number of
|
||||
// block and low PBs count.
|
||||
elements.min(200) // This threshold is useful for operation with both a small number of
|
||||
// block and low PBs count.
|
||||
}
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
{
|
||||
|
||||
@@ -22,3 +22,23 @@ pub const V1_1_NOISE_SQUASHING_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128:
|
||||
carry_modulus: CarryModulus(4),
|
||||
ciphertext_modulus: CoreCiphertextModulus::<u128>::new_native(),
|
||||
};
|
||||
|
||||
pub const V1_3_NOISE_SQUASHING_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128:
|
||||
NoiseSquashingParameters = NoiseSquashingParameters {
|
||||
glwe_dimension: GlweDimension(2),
|
||||
polynomial_size: PolynomialSize(2048),
|
||||
glwe_noise_distribution: DynamicDistribution::new_t_uniform(30),
|
||||
decomp_base_log: DecompositionBaseLog(24),
|
||||
decomp_level_count: DecompositionLevelCount(3),
|
||||
modulus_switch_noise_reduction_params: Some(
|
||||
ModulusSwitchNoiseReductionParams {
|
||||
modulus_switch_zeros_count: LweCiphertextCount(1449),
|
||||
ms_bound: NoiseEstimationMeasureBound(288230376151711744f64),
|
||||
ms_r_sigma_factor: RSigmaFactor(13.179852282053789f64),
|
||||
ms_input_variance: Variance(2.63039184094559E-7f64),
|
||||
},
|
||||
),
|
||||
message_modulus: MessageModulus(4),
|
||||
carry_modulus: CarryModulus(4),
|
||||
ciphertext_modulus: CoreCiphertextModulus::<u128>::new_native(),
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user