chore(bench): move benchmarks to their own crate

This is done to speed-up compilation duration by avoiding
recompiling tfhe each time a modification is made in a benchmark
file.
This commit is contained in:
David Testé
2025-05-06 15:08:24 +02:00
committed by David Testé
parent d197a2aa73
commit 67ec4a28c1
33 changed files with 1166 additions and 1776 deletions

157
tfhe-benchmark/Cargo.toml Normal file
View File

@@ -0,0 +1,157 @@
[package]
name = "tfhe-benchmark"
version = "0.1.0"
edition = "2021"
homepage = "https://zama.ai/"
documentation = "https://docs.zama.ai/tfhe-rs"
repository = "https://github.com/zama-ai/tfhe-rs"
license = "BSD-3-Clause-Clear"
description = "tfhe-benchmark: Performances measurements facility for tfhe-rs."
rust-version = "1.84"
publish = false
[lib]
name = "benchmark"
path = "src/lib.rs"
[dependencies]
bincode = "1.3.3"
# clap has to be pinned as its minimum supported rust version
# changes often between minor releases, which breaks our CI
clap = { version = "=4.4.4", features = ["derive"] }
criterion = "0.5.1"
dyn-stack = { workspace = true, features = ["default"] }
itertools = "0.14"
serde = { version = "1.0", default-features = false }
serde_json = "1.0.94"
paste = "1.0.7"
rand = { workspace = true }
rayon = { workspace = true }
tfhe = { path = "../tfhe" }
tfhe-csprng = { path = "../tfhe-csprng" }
[features]
boolean = ["tfhe/boolean"]
shortint = ["tfhe/shortint"]
integer = ["shortint", "tfhe/integer"]
gpu = ["tfhe/gpu"]
internal-keycache = ["tfhe/internal-keycache"]
nightly-avx512 = ["tfhe/nightly-avx512"]
pbs-stats = ["tfhe/pbs-stats"]
zk-pok = ["tfhe/zk-pok"]
[[bench]]
name = "boolean-bench"
path = "benches/boolean/bench.rs"
harness = false
required-features = ["boolean", "internal-keycache"]
[[bench]]
name = "shortint-bench"
path = "benches/shortint/bench.rs"
harness = false
required-features = ["shortint", "internal-keycache"]
[[bench]]
name = "oprf-shortint-bench"
path = "benches/shortint/oprf.rs"
harness = false
required-features = ["shortint", "internal-keycache"]
[[bench]]
name = "glwe_packing_compression-shortint-bench"
path = "benches/shortint/glwe_packing_compression.rs"
harness = false
required-features = ["shortint", "internal-keycache"]
[[bench]]
name = "hlapi"
path = "benches/high_level_api/bench.rs"
harness = false
required-features = ["integer", "internal-keycache"]
[[bench]]
name = "hlapi-erc20"
path = "benches/high_level_api/erc20.rs"
harness = false
required-features = ["integer", "internal-keycache"]
[[bench]]
name = "hlapi-dex"
path = "benches/high_level_api/dex.rs"
harness = false
required-features = ["integer", "internal-keycache"]
[[bench]]
name = "glwe_packing_compression-integer-bench"
path = "benches/integer/glwe_packing_compression.rs"
harness = false
required-features = ["integer", "pbs-stats", "internal-keycache"]
[[bench]]
name = "integer-bench"
path = "benches/integer/bench.rs"
harness = false
required-features = ["integer", "pbs-stats", "internal-keycache"]
[[bench]]
name = "integer-signed-bench"
path = "benches/integer/signed_bench.rs"
harness = false
required-features = ["integer", "pbs-stats", "internal-keycache"]
[[bench]]
name = "zk-pke-bench"
path = "benches/integer/zk_pke.rs"
harness = false
required-features = ["integer", "zk-pok", "pbs-stats", "internal-keycache"]
[[bench]]
name = "ks-bench"
path = "benches/core_crypto/ks_bench.rs"
harness = false
required-features = ["shortint", "internal-keycache"]
[[bench]]
name = "pbs-bench"
path = "benches/core_crypto/pbs_bench.rs"
harness = false
required-features = ["boolean", "shortint", "internal-keycache"]
[[bench]]
name = "ks-pbs-bench"
path = "benches/core_crypto/ks_pbs_bench.rs"
harness = false
required-features = ["shortint", "internal-keycache"]
[[bench]]
name = "modulus_switch_noise_reduction"
path = "benches/core_crypto/modulus_switch_noise_reduction.rs"
harness = false
required-features = ["shortint"]
[[bench]]
name = "pbs128-bench"
path = "benches/core_crypto/pbs128_bench.rs"
harness = false
required-features = ["shortint"]
[[bin]]
name = "boolean_key_sizes"
path = "src/bin/boolean_key_sizes.rs"
required-features = ["boolean", "internal-keycache"]
[[bin]]
name = "shortint_key_sizes"
path = "src/bin/shortint_key_sizes.rs"
required-features = ["shortint", "internal-keycache"]
[[bin]]
name = "hlapi_compact_pk_ct_sizes"
path = "src/bin/hlapi_compact_pk_ct_sizes.rs"
required-features = ["integer", "internal-keycache"]
[[bin]]
name = "wasm_benchmarks_parser"
path = "src/bin/wasm_benchmarks_parser.rs"
required-features = ["shortint", "internal-keycache"]

28
tfhe-benchmark/LICENSE Normal file
View File

@@ -0,0 +1,28 @@
BSD 3-Clause Clear License
Copyright © 2025 ZAMA.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or other
materials provided with the distribution.
3. Neither the name of ZAMA nor the names of its contributors may be used to endorse
or promote products derived from this software without specific prior written permission.
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY THIS LICENSE.
THIS SOFTWARE IS PROVIDED BY THE ZAMA AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
ZAMA OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,108 @@
use benchmark::utilities::{write_to_json, CryptoParametersRecord, OperatorType};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use tfhe::boolean::client_key::ClientKey;
use tfhe::boolean::parameters::{
BooleanParameters, DEFAULT_PARAMETERS, DEFAULT_PARAMETERS_KS_PBS,
PARAMETERS_ERROR_PROB_2_POW_MINUS_165, PARAMETERS_ERROR_PROB_2_POW_MINUS_165_KS_PBS,
TFHE_LIB_PARAMETERS,
};
use tfhe::boolean::prelude::BinaryBooleanGates;
use tfhe::boolean::server_key::ServerKey;
criterion_group!(
gates_benches,
bench_default_parameters,
bench_default_parameters_ks_pbs,
bench_low_prob_parameters,
bench_low_prob_parameters_ks_pbs,
bench_tfhe_lib_parameters,
);
criterion_main!(gates_benches);
/// Helper function to write boolean benchmarks parameters to disk in JSON format.
pub fn write_to_json_boolean<T: Into<CryptoParametersRecord<u32>>>(
bench_id: &str,
params: T,
params_alias: impl Into<String>,
display_name: impl Into<String>,
) {
write_to_json(
bench_id,
params,
params_alias,
display_name,
&OperatorType::Atomic,
1,
vec![1],
);
}
// Put all `bench_function` in one place
// so the keygen is only run once per parameters saving time.
fn benches(c: &mut Criterion, params: BooleanParameters, parameter_name: &str) {
let mut bench_group = c.benchmark_group("gates_benches");
let cks = ClientKey::new(&params);
let sks = ServerKey::new(&cks);
let ct1 = cks.encrypt(true);
let ct2 = cks.encrypt(false);
let ct3 = cks.encrypt(true);
let id = format!("AND::{parameter_name}");
bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.and(&ct1, &ct2))));
write_to_json_boolean(&id, params, parameter_name, "and");
let id = format!("NAND::{parameter_name}");
bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.nand(&ct1, &ct2))));
write_to_json_boolean(&id, params, parameter_name, "nand");
let id = format!("OR::{parameter_name}");
bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.or(&ct1, &ct2))));
write_to_json_boolean(&id, params, parameter_name, "or");
let id = format!("XOR::{parameter_name}");
bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.xor(&ct1, &ct2))));
write_to_json_boolean(&id, params, parameter_name, "xor");
let id = format!("XNOR::{parameter_name}");
bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.xnor(&ct1, &ct2))));
write_to_json_boolean(&id, params, parameter_name, "xnor");
let id = format!("NOT::{parameter_name}");
bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.not(&ct1))));
write_to_json_boolean(&id, params, parameter_name, "not");
let id = format!("MUX::{parameter_name}");
bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.mux(&ct1, &ct2, &ct3))));
write_to_json_boolean(&id, params, parameter_name, "mux");
}
fn bench_default_parameters(c: &mut Criterion) {
benches(c, DEFAULT_PARAMETERS, "DEFAULT_PARAMETERS");
}
fn bench_default_parameters_ks_pbs(c: &mut Criterion) {
benches(c, DEFAULT_PARAMETERS_KS_PBS, "DEFAULT_PARAMETERS_KS_PBS");
}
fn bench_low_prob_parameters(c: &mut Criterion) {
benches(
c,
PARAMETERS_ERROR_PROB_2_POW_MINUS_165,
"PARAMETERS_ERROR_PROB_2_POW_MINUS_165",
);
}
fn bench_low_prob_parameters_ks_pbs(c: &mut Criterion) {
benches(
c,
PARAMETERS_ERROR_PROB_2_POW_MINUS_165_KS_PBS,
"PARAMETERS_ERROR_PROB_2_POW_MINUS_165_KS_PBS",
);
}
fn bench_tfhe_lib_parameters(c: &mut Criterion) {
benches(c, TFHE_LIB_PARAMETERS, " TFHE_LIB_PARAMETERS");
}

View File

@@ -0,0 +1,834 @@
#[cfg(feature = "boolean")]
use benchmark::params::benchmark_32bits_parameters;
use benchmark::params::{
benchmark_compression_parameters, benchmark_parameters, multi_bit_benchmark_parameters,
};
use benchmark::utilities::{
get_bench_type, throughput_num_threads, write_to_json, BenchmarkType, CryptoParametersRecord,
OperatorType,
};
use criterion::{black_box, Criterion, Throughput};
use rayon::prelude::*;
use serde::Serialize;
use std::env;
use tfhe::core_crypto::prelude::*;
// TODO Refactor KS, PBS and KS-PBS benchmarks into a single generic function.
fn keyswitch<Scalar: UnsignedTorus + CastInto<usize> + Serialize>(
criterion: &mut Criterion,
parameters: &[(String, CryptoParametersRecord<Scalar>)],
) {
let bench_name = "core_crypto::keyswitch";
let mut bench_group = criterion.benchmark_group(bench_name);
// Create the PRNG
let mut seeder = new_seeder();
let seeder = seeder.as_mut();
let mut encryption_generator =
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
let mut secret_generator = SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
for (name, params) in parameters.iter() {
let lwe_dimension = params.lwe_dimension.unwrap();
let glwe_dimension = params.glwe_dimension.unwrap();
let polynomial_size = params.polynomial_size.unwrap();
let ks_decomp_base_log = params.ks_base_log.unwrap();
let ks_decomp_level_count = params.ks_level.unwrap();
let lwe_sk =
allocate_and_generate_new_binary_lwe_secret_key(lwe_dimension, &mut secret_generator);
let glwe_sk = allocate_and_generate_new_binary_glwe_secret_key(
glwe_dimension,
polynomial_size,
&mut secret_generator,
);
let big_lwe_sk = glwe_sk.into_lwe_secret_key();
let ksk_big_to_small = allocate_and_generate_new_lwe_keyswitch_key(
&big_lwe_sk,
&lwe_sk,
ks_decomp_base_log,
ks_decomp_level_count,
params.lwe_noise_distribution.unwrap(),
params.ciphertext_modulus.unwrap(),
&mut encryption_generator,
);
let bench_id;
match get_bench_type() {
BenchmarkType::Latency => {
let ct = allocate_and_encrypt_new_lwe_ciphertext(
&big_lwe_sk,
Plaintext(Scalar::ONE),
params.lwe_noise_distribution.unwrap(),
params.ciphertext_modulus.unwrap(),
&mut encryption_generator,
);
let mut output_ct = LweCiphertext::new(
Scalar::ZERO,
lwe_sk.lwe_dimension().to_lwe_size(),
params.ciphertext_modulus.unwrap(),
);
bench_id = format!("{bench_name}::{name}");
{
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
keyswitch_lwe_ciphertext(&ksk_big_to_small, &ct, &mut output_ct);
black_box(&mut output_ct);
})
});
}
}
BenchmarkType::Throughput => {
bench_id = format!("{bench_name}::throughput::{name}");
let blocks: usize = 1;
let elements = throughput_num_threads(blocks, 1); // FIXME This number of element do not staturate the target machine
bench_group.throughput(Throughput::Elements(elements));
bench_group.bench_function(&bench_id, |b| {
let setup_encrypted_values = || {
let input_cts = (0..elements)
.map(|_| {
allocate_and_encrypt_new_lwe_ciphertext(
&big_lwe_sk,
Plaintext(Scalar::ONE),
params.lwe_noise_distribution.unwrap(),
params.ciphertext_modulus.unwrap(),
&mut encryption_generator,
)
})
.collect::<Vec<_>>();
let output_cts = (0..elements)
.map(|_| {
LweCiphertext::new(
Scalar::ZERO,
lwe_sk.lwe_dimension().to_lwe_size(),
params.ciphertext_modulus.unwrap(),
)
})
.collect::<Vec<_>>();
(input_cts, output_cts)
};
b.iter_batched(
setup_encrypted_values,
|(input_cts, mut output_cts)| {
input_cts
.par_iter()
.zip(output_cts.par_iter_mut())
.for_each(|(input_ct, output_ct)| {
keyswitch_lwe_ciphertext(
&ksk_big_to_small,
input_ct,
output_ct,
);
})
},
criterion::BatchSize::SmallInput,
)
});
}
};
let bit_size = (params.message_modulus.unwrap_or(2) as u32).ilog2();
write_to_json(
&bench_id,
*params,
name,
"ks",
&OperatorType::Atomic,
bit_size,
vec![bit_size],
);
}
}
fn packing_keyswitch<Scalar, F>(
criterion: &mut Criterion,
bench_name: &str,
parameters: &[(String, CryptoParametersRecord<Scalar>)],
ks_op: F,
) where
Scalar: UnsignedTorus + CastInto<usize> + Serialize,
F: Fn(
&LwePackingKeyswitchKey<Vec<Scalar>>,
&LweCiphertextList<Vec<Scalar>>,
&mut GlweCiphertext<Vec<Scalar>>,
) + Sync
+ Send,
{
let bench_name = format!("core_crypto::{bench_name}");
let mut bench_group = criterion.benchmark_group(&bench_name);
// Create the PRNG
let mut seeder = new_seeder();
let seeder = seeder.as_mut();
let mut encryption_generator =
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
let mut secret_generator = SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
for (name, params) in parameters.iter() {
let lwe_dimension = params.lwe_dimension.unwrap();
let packing_glwe_dimension = params.packing_ks_glwe_dimension.unwrap();
let packing_polynomial_size = params.packing_ks_polynomial_size.unwrap();
let packing_ks_decomp_base_log = params.packing_ks_base_log.unwrap();
let packing_ks_decomp_level_count = params.packing_ks_level.unwrap();
let ciphertext_modulus = params.ciphertext_modulus.unwrap();
let count = params.lwe_per_glwe.unwrap();
let lwe_sk =
allocate_and_generate_new_binary_lwe_secret_key(lwe_dimension, &mut secret_generator);
let glwe_sk = allocate_and_generate_new_binary_glwe_secret_key(
packing_glwe_dimension,
packing_polynomial_size,
&mut secret_generator,
);
let pksk = allocate_and_generate_new_lwe_packing_keyswitch_key(
&lwe_sk,
&glwe_sk,
packing_ks_decomp_base_log,
packing_ks_decomp_level_count,
params.packing_ks_key_noise_distribution.unwrap(),
ciphertext_modulus,
&mut encryption_generator,
);
let bench_id;
match get_bench_type() {
BenchmarkType::Latency => {
let mut input_lwe_list = LweCiphertextList::new(
Scalar::ZERO,
lwe_sk.lwe_dimension().to_lwe_size(),
count,
ciphertext_modulus,
);
let plaintext_list = PlaintextList::new(
Scalar::ZERO,
PlaintextCount(input_lwe_list.lwe_ciphertext_count().0),
);
encrypt_lwe_ciphertext_list(
&lwe_sk,
&mut input_lwe_list,
&plaintext_list,
params.lwe_noise_distribution.unwrap(),
&mut encryption_generator,
);
let mut output_glwe = GlweCiphertext::new(
Scalar::ZERO,
glwe_sk.glwe_dimension().to_glwe_size(),
glwe_sk.polynomial_size(),
ciphertext_modulus,
);
bench_id = format!("{bench_name}::{name}");
{
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
ks_op(&pksk, &input_lwe_list, &mut output_glwe);
black_box(&mut output_glwe);
})
});
}
}
BenchmarkType::Throughput => {
bench_id = format!("{bench_name}::throughput::{name}");
let blocks: usize = 1;
let elements = throughput_num_threads(blocks, 1);
bench_group.throughput(Throughput::Elements(elements));
bench_group.bench_function(&bench_id, |b| {
let setup_encrypted_values = || {
let input_lwe_lists = (0..elements)
.map(|_| {
let mut input_lwe_list = LweCiphertextList::new(
Scalar::ZERO,
lwe_sk.lwe_dimension().to_lwe_size(),
count,
ciphertext_modulus,
);
let plaintext_list = PlaintextList::new(
Scalar::ZERO,
PlaintextCount(input_lwe_list.lwe_ciphertext_count().0),
);
encrypt_lwe_ciphertext_list(
&lwe_sk,
&mut input_lwe_list,
&plaintext_list,
params.lwe_noise_distribution.unwrap(),
&mut encryption_generator,
);
input_lwe_list
})
.collect::<Vec<_>>();
let output_glwes = (0..elements)
.map(|_| {
GlweCiphertext::new(
Scalar::ZERO,
glwe_sk.glwe_dimension().to_glwe_size(),
glwe_sk.polynomial_size(),
ciphertext_modulus,
)
})
.collect::<Vec<_>>();
(input_lwe_lists, output_glwes)
};
b.iter_batched(
setup_encrypted_values,
|(input_lwe_lists, mut output_glwes)| {
input_lwe_lists
.par_iter()
.zip(output_glwes.par_iter_mut())
.for_each(|(input_lwe_list, output_glwe)| {
ks_op(&pksk, input_lwe_list, output_glwe);
})
},
criterion::BatchSize::SmallInput,
)
});
}
};
let bit_size = (params.message_modulus.unwrap_or(2) as u32).ilog2();
write_to_json(
&bench_id,
*params,
name,
"packing_ks",
&OperatorType::Atomic,
bit_size,
vec![bit_size],
);
}
}
#[cfg(feature = "gpu")]
mod cuda {
use benchmark::params::{benchmark_parameters, multi_bit_benchmark_parameters};
use benchmark::utilities::{
cuda_local_keys_core, cuda_local_streams_core, get_bench_type, throughput_num_threads,
write_to_json, BenchmarkType, CpuKeys, CpuKeysBuilder, CryptoParametersRecord, CudaIndexes,
CudaLocalKeys, OperatorType,
};
use criterion::{black_box, Criterion, Throughput};
use rayon::prelude::*;
use serde::Serialize;
use tfhe::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
use tfhe::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use tfhe::core_crypto::gpu::{
cuda_keyswitch_lwe_ciphertext, cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_64,
get_number_of_gpus, CudaStreams,
};
use tfhe::core_crypto::prelude::*;
fn cuda_keyswitch<Scalar: UnsignedTorus + CastInto<usize> + CastFrom<u64> + Serialize>(
criterion: &mut Criterion,
parameters: &[(String, CryptoParametersRecord<Scalar>)],
) {
let bench_name = "core_crypto::cuda::keyswitch";
let mut bench_group = criterion.benchmark_group(bench_name);
// Create the PRNG
let mut seeder = new_seeder();
let seeder = seeder.as_mut();
let mut encryption_generator =
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
let mut secret_generator =
SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
for (name, params) in parameters.iter() {
let lwe_dimension = params.lwe_dimension.unwrap();
let glwe_dimension = params.glwe_dimension.unwrap();
let polynomial_size = params.polynomial_size.unwrap();
let ks_decomp_base_log = params.ks_base_log.unwrap();
let ks_decomp_level_count = params.ks_level.unwrap();
let lwe_sk = allocate_and_generate_new_binary_lwe_secret_key(
lwe_dimension,
&mut secret_generator,
);
let glwe_sk = allocate_and_generate_new_binary_glwe_secret_key(
glwe_dimension,
polynomial_size,
&mut secret_generator,
);
let big_lwe_sk = glwe_sk.into_lwe_secret_key();
let ksk_big_to_small = allocate_and_generate_new_lwe_keyswitch_key(
&big_lwe_sk,
&lwe_sk,
ks_decomp_base_log,
ks_decomp_level_count,
params.lwe_noise_distribution.unwrap(),
CiphertextModulus::new_native(),
&mut encryption_generator,
);
let cpu_keys: CpuKeys<_> = CpuKeysBuilder::new()
.keyswitch_key(ksk_big_to_small)
.build();
let bench_id;
match get_bench_type() {
BenchmarkType::Latency => {
let streams = CudaStreams::new_multi_gpu();
let gpu_keys = CudaLocalKeys::from_cpu_keys(&cpu_keys, None, &streams);
let ct = allocate_and_encrypt_new_lwe_ciphertext(
&big_lwe_sk,
Plaintext(Scalar::ONE),
params.lwe_noise_distribution.unwrap(),
CiphertextModulus::new_native(),
&mut encryption_generator,
);
let mut ct_gpu = CudaLweCiphertextList::from_lwe_ciphertext(&ct, &streams);
let output_ct = LweCiphertext::new(
Scalar::ZERO,
lwe_sk.lwe_dimension().to_lwe_size(),
CiphertextModulus::new_native(),
);
let mut output_ct_gpu =
CudaLweCiphertextList::from_lwe_ciphertext(&output_ct, &streams);
let h_indexes = [Scalar::ZERO];
let cuda_indexes = CudaIndexes::new(&h_indexes, &streams, 0);
bench_id = format!("{bench_name}::{name}");
{
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
cuda_keyswitch_lwe_ciphertext(
gpu_keys.ksk.as_ref().unwrap(),
&ct_gpu,
&mut output_ct_gpu,
&cuda_indexes.d_input,
&cuda_indexes.d_output,
&streams,
);
black_box(&mut ct_gpu);
})
});
}
}
BenchmarkType::Throughput => {
let gpu_keys_vec = cuda_local_keys_core(&cpu_keys, None);
let gpu_count = get_number_of_gpus() as usize;
bench_id = format!("{bench_name}::throughput::{name}");
let blocks: usize = 1;
let elements = throughput_num_threads(blocks, 1);
let elements_per_stream = elements as usize / gpu_count;
bench_group.throughput(Throughput::Elements(elements));
bench_group.sample_size(50);
bench_group.bench_function(&bench_id, |b| {
let setup_encrypted_values = || {
let local_streams = cuda_local_streams_core();
let plaintext_list = PlaintextList::new(
Scalar::ZERO,
PlaintextCount(elements_per_stream),
);
let input_cts = (0..gpu_count)
.map(|i| {
let mut input_ct_list = LweCiphertextList::new(
Scalar::ZERO,
big_lwe_sk.lwe_dimension().to_lwe_size(),
LweCiphertextCount(elements_per_stream),
params.ciphertext_modulus.unwrap(),
);
encrypt_lwe_ciphertext_list(
&big_lwe_sk,
&mut input_ct_list,
&plaintext_list,
params.lwe_noise_distribution.unwrap(),
&mut encryption_generator,
);
let input_ks_list = LweCiphertextList::from_container(
input_ct_list.into_container(),
big_lwe_sk.lwe_dimension().to_lwe_size(),
params.ciphertext_modulus.unwrap(),
);
CudaLweCiphertextList::from_lwe_ciphertext_list(
&input_ks_list,
&local_streams[i],
)
})
.collect::<Vec<_>>();
let output_cts = (0..gpu_count)
.map(|i| {
let output_ct_list = LweCiphertextList::new(
Scalar::ZERO,
lwe_sk.lwe_dimension().to_lwe_size(),
LweCiphertextCount(elements_per_stream),
params.ciphertext_modulus.unwrap(),
);
CudaLweCiphertextList::from_lwe_ciphertext_list(
&output_ct_list,
&local_streams[i],
)
})
.collect::<Vec<_>>();
let h_indexes = (0..(elements / gpu_count as u64))
.map(CastFrom::cast_from)
.collect::<Vec<_>>();
let cuda_indexes_vec = (0..gpu_count)
.map(|i| CudaIndexes::new(&h_indexes, &local_streams[i], 0))
.collect::<Vec<_>>();
local_streams.iter().for_each(|stream| stream.synchronize());
(input_cts, output_cts, cuda_indexes_vec, local_streams)
};
b.iter_batched(
setup_encrypted_values,
|(input_cts, mut output_cts, cuda_indexes_vec, local_streams)| {
(0..gpu_count)
.into_par_iter()
.zip(input_cts.par_iter())
.zip(output_cts.par_iter_mut())
.zip(local_streams.par_iter())
.for_each(|(((i, input_ct), output_ct), local_stream)| {
cuda_keyswitch_lwe_ciphertext(
gpu_keys_vec[i].ksk.as_ref().unwrap(),
input_ct,
output_ct,
&cuda_indexes_vec[i].d_input,
&cuda_indexes_vec[i].d_output,
local_stream,
);
})
},
criterion::BatchSize::SmallInput,
)
});
}
};
let bit_size = (params.message_modulus.unwrap_or(2) as u32).ilog2();
write_to_json(
&bench_id,
*params,
name,
"ks",
&OperatorType::Atomic,
bit_size,
vec![bit_size],
);
}
}
fn cuda_packing_keyswitch<
Scalar: UnsignedTorus + CastInto<usize> + CastFrom<u64> + Serialize,
>(
criterion: &mut Criterion,
parameters: &[(String, CryptoParametersRecord<Scalar>)],
) {
let bench_name = "core_crypto::cuda::packing_keyswitch";
let mut bench_group = criterion.benchmark_group(bench_name);
// Create the PRNG
let mut seeder = new_seeder();
let seeder = seeder.as_mut();
let mut encryption_generator =
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
let mut secret_generator =
SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
for (name, params) in parameters.iter() {
let lwe_dimension = params.lwe_dimension.unwrap();
let glwe_dimension = params.glwe_dimension.unwrap();
let polynomial_size = params.polynomial_size.unwrap();
let ks_decomp_base_log = params.ks_base_log.unwrap();
let ks_decomp_level_count = params.ks_level.unwrap();
let glwe_noise_distribution = params.glwe_noise_distribution.unwrap();
let ciphertext_modulus = params.ciphertext_modulus.unwrap();
let lwe_sk = allocate_and_generate_new_binary_lwe_secret_key(
lwe_dimension,
&mut secret_generator,
);
let glwe_sk = allocate_and_generate_new_binary_glwe_secret_key(
glwe_dimension,
polynomial_size,
&mut secret_generator,
);
let pksk = allocate_and_generate_new_lwe_packing_keyswitch_key(
&lwe_sk,
&glwe_sk,
ks_decomp_base_log,
ks_decomp_level_count,
glwe_noise_distribution,
ciphertext_modulus,
&mut encryption_generator,
);
let cpu_keys: CpuKeys<_> = CpuKeysBuilder::new().packing_keyswitch_key(pksk).build();
let bench_id;
match get_bench_type() {
BenchmarkType::Latency => {
let streams = CudaStreams::new_multi_gpu();
let gpu_keys = CudaLocalKeys::from_cpu_keys(&cpu_keys, None, &streams);
let mut input_ct_list = LweCiphertextList::new(
Scalar::ZERO,
lwe_sk.lwe_dimension().to_lwe_size(),
LweCiphertextCount(glwe_sk.polynomial_size().0),
ciphertext_modulus,
);
let plaintext_list = PlaintextList::new(
Scalar::ZERO,
PlaintextCount(input_ct_list.lwe_ciphertext_count().0),
);
encrypt_lwe_ciphertext_list(
&lwe_sk,
&mut input_ct_list,
&plaintext_list,
params.lwe_noise_distribution.unwrap(),
&mut encryption_generator,
);
let mut d_input_lwe_list =
CudaLweCiphertextList::from_lwe_ciphertext_list(&input_ct_list, &streams);
let mut d_output_glwe = CudaGlweCiphertextList::new(
glwe_sk.glwe_dimension(),
glwe_sk.polynomial_size(),
GlweCiphertextCount(1),
ciphertext_modulus,
&streams,
);
streams.synchronize();
bench_id = format!("{bench_name}::{name}");
{
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_64(
gpu_keys.pksk.as_ref().unwrap(),
&d_input_lwe_list,
&mut d_output_glwe,
&streams,
);
black_box(&mut d_input_lwe_list);
})
});
}
}
BenchmarkType::Throughput => {
let gpu_keys_vec = cuda_local_keys_core(&cpu_keys, None);
let gpu_count = get_number_of_gpus() as usize;
bench_id = format!("{bench_name}::throughput::{name}");
let blocks: usize = 1;
let elements = throughput_num_threads(blocks, 1);
let elements_per_stream = elements as usize / gpu_count;
bench_group.throughput(Throughput::Elements(elements));
bench_group.sample_size(50);
bench_group.bench_function(&bench_id, |b| {
let setup_encrypted_values = || {
let local_streams = cuda_local_streams_core();
let plaintext_list = PlaintextList::new(
Scalar::ZERO,
PlaintextCount(elements_per_stream),
);
let input_lwe_lists = (0..gpu_count)
.map(|i| {
let mut input_ct_list = LweCiphertextList::new(
Scalar::ZERO,
lwe_sk.lwe_dimension().to_lwe_size(),
LweCiphertextCount(glwe_sk.polynomial_size().0),
ciphertext_modulus,
);
encrypt_lwe_ciphertext_list(
&lwe_sk,
&mut input_ct_list,
&plaintext_list,
params.lwe_noise_distribution.unwrap(),
&mut encryption_generator,
);
CudaLweCiphertextList::from_lwe_ciphertext_list(
&input_ct_list,
&local_streams[i],
)
})
.collect::<Vec<_>>();
let output_glwe_list = (0..gpu_count)
.map(|i| {
CudaGlweCiphertextList::new(
glwe_sk.glwe_dimension(),
glwe_sk.polynomial_size(),
GlweCiphertextCount(1),
ciphertext_modulus,
&local_streams[i],
)
})
.collect::<Vec<_>>();
local_streams.iter().for_each(|stream| stream.synchronize());
(input_lwe_lists, output_glwe_list, local_streams)
};
b.iter_batched(
setup_encrypted_values,
|(input_lwe_lists, mut output_glwe_lists, local_streams)| {
(0..gpu_count)
.into_par_iter()
.zip(input_lwe_lists.par_iter())
.zip(output_glwe_lists.par_iter_mut())
.zip(local_streams.par_iter())
.for_each(
|(
((i, input_lwe_list), output_glwe_list),
local_stream,
)| {
cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_64(
gpu_keys_vec[i].pksk.as_ref().unwrap(),
input_lwe_list,
output_glwe_list,
local_stream,
);
},
)
},
criterion::BatchSize::SmallInput,
)
});
}
};
let bit_size = (params.message_modulus.unwrap_or(2) as u32).ilog2();
write_to_json(
&bench_id,
*params,
name,
"packing_ks",
&OperatorType::Atomic,
bit_size,
vec![bit_size],
);
}
}
pub fn cuda_ks_group() {
let mut criterion: Criterion<_> =
(Criterion::default().sample_size(2000)).configure_from_args();
cuda_keyswitch(&mut criterion, &benchmark_parameters());
cuda_packing_keyswitch(&mut criterion, &benchmark_parameters());
}
pub fn cuda_multi_bit_ks_group() {
let mut criterion: Criterion<_> =
(Criterion::default().sample_size(2000)).configure_from_args();
cuda_keyswitch(&mut criterion, &multi_bit_benchmark_parameters());
cuda_packing_keyswitch(&mut criterion, &multi_bit_benchmark_parameters());
}
}
#[cfg(feature = "gpu")]
use cuda::{cuda_ks_group, cuda_multi_bit_ks_group};
pub fn ks_group() {
let mut criterion: Criterion<_> = (Criterion::default()
.sample_size(15)
.measurement_time(std::time::Duration::from_secs(60)))
.configure_from_args();
keyswitch(&mut criterion, &benchmark_parameters());
#[cfg(feature = "boolean")]
keyswitch(&mut criterion, &benchmark_32bits_parameters());
}
pub fn multi_bit_ks_group() {
let mut criterion: Criterion<_> = (Criterion::default()
.sample_size(15)
.measurement_time(std::time::Duration::from_secs(60)))
.configure_from_args();
keyswitch(&mut criterion, &multi_bit_benchmark_parameters());
}
pub fn packing_ks_group() {
let mut criterion: Criterion<_> = (Criterion::default()
.sample_size(10)
.measurement_time(std::time::Duration::from_secs(30)))
.configure_from_args();
packing_keyswitch(
&mut criterion,
"packing_keyswitch",
&benchmark_compression_parameters(),
keyswitch_lwe_ciphertext_list_and_pack_in_glwe_ciphertext,
);
packing_keyswitch(
&mut criterion,
"par_packing_keyswitch",
&benchmark_compression_parameters(),
par_keyswitch_lwe_ciphertext_list_and_pack_in_glwe_ciphertext,
);
}
#[cfg(feature = "gpu")]
fn go_through_gpu_bench_groups(val: &str) {
match val.to_lowercase().as_str() {
"classical" => cuda_ks_group(),
"multi_bit" => cuda_multi_bit_ks_group(),
_ => panic!("unknown benchmark operations flavor"),
};
}
#[cfg(not(feature = "gpu"))]
fn go_through_cpu_bench_groups(val: &str) {
match val.to_lowercase().as_str() {
"classical" => {
ks_group();
packing_ks_group()
}
"multi_bit" => multi_bit_ks_group(),
_ => panic!("unknown benchmark operations flavor"),
}
}
fn main() {
match env::var("__TFHE_RS_PARAM_TYPE") {
Ok(val) => {
#[cfg(feature = "gpu")]
go_through_gpu_bench_groups(&val);
#[cfg(not(feature = "gpu"))]
go_through_cpu_bench_groups(&val);
}
Err(_) => {
ks_group();
packing_ks_group()
}
};
Criterion::default().configure_from_args().final_summary();
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,89 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use modulus_switch_noise_reduction::improve_lwe_ciphertext_modulus_switch_noise_for_binary_key;
use tfhe::core_crypto::commons::parameters::{NoiseEstimationMeasureBound, RSigmaFactor};
use tfhe::core_crypto::prelude::*;
fn modulus_switch_noise_reduction(c: &mut Criterion) {
// TODO: use shortint params
let lwe_dimension = LweDimension(918);
let noise_distribution = DynamicDistribution::new_t_uniform(46);
let ciphertext_modulus = CiphertextModulus::new_native();
let bound = NoiseEstimationMeasureBound((1_u64 << (64 - 1 - 4 - 1)) as f64);
let r_sigma_factor = RSigmaFactor(14.658999256586121);
let log_modulus = PolynomialSize(2048).to_blind_rotation_input_modulus_log();
let input_variance = Variance(0.);
for count in [10, 50, 100, 1_000, 10_000, 100_000] {
let mut boxed_seeder = new_seeder();
let seeder = boxed_seeder.as_mut();
let mut secret_generator =
SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
let mut encryption_generator =
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
let sk =
allocate_and_generate_new_binary_lwe_secret_key(lwe_dimension, &mut secret_generator);
let clean_lwe = allocate_and_encrypt_new_lwe_ciphertext(
&sk,
Plaintext(0),
noise_distribution,
ciphertext_modulus,
&mut encryption_generator,
);
let mut encryptions_of_zero = LweCiphertextList::new(
0,
lwe_dimension.to_lwe_size(),
LweCiphertextCount(count),
ciphertext_modulus,
);
let plaintext_list = PlaintextList::new(0, PlaintextCount(count));
encrypt_lwe_ciphertext_list(
&sk,
&mut encryptions_of_zero,
&plaintext_list,
noise_distribution,
&mut encryption_generator,
);
let mut lwe =
LweCiphertext::new(0_u64, sk.lwe_dimension().to_lwe_size(), ciphertext_modulus);
let bench_name = "modulus_switch_noise_reduction";
let mut bench_group = c.benchmark_group(bench_name);
bench_group
.sample_size(15)
.measurement_time(std::time::Duration::from_secs(5));
let bench_name = format!("modulus_switch_noise_reduction_{count}");
bench_group.bench_function(&bench_name, |b| {
b.iter(|| {
lwe.as_mut().copy_from_slice(clean_lwe.as_ref());
improve_lwe_ciphertext_modulus_switch_noise_for_binary_key(
&mut lwe,
&encryptions_of_zero,
r_sigma_factor,
bound,
input_variance,
log_modulus,
);
black_box(&lwe);
});
});
}
}
criterion_group!(
modulus_switch_noise_reduction2,
modulus_switch_noise_reduction
);
criterion_main!(modulus_switch_noise_reduction2);

View File

@@ -0,0 +1,484 @@
use benchmark::params_aliases::{
BENCH_NOISE_SQUASHING_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
};
use benchmark::utilities::{write_to_json, CryptoParametersRecord, OperatorType};
use criterion::{black_box, Criterion};
use dyn_stack::PodStack;
use tfhe::core_crypto::fft_impl::fft128::crypto::bootstrap::bootstrap_scratch;
use tfhe::core_crypto::prelude::*;
use tfhe::keycache::NamedParam;
fn pbs_128(c: &mut Criterion) {
let bench_name = "core_crypto::pbs128";
let mut bench_group = c.benchmark_group(bench_name);
bench_group
.sample_size(10)
.measurement_time(std::time::Duration::from_secs(30));
type InputScalar = u64;
type OutputScalar = u128;
let noise_params = BENCH_NOISE_SQUASHING_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let base_params = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let lwe_dimension = base_params.lwe_dimension; // From PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128
let glwe_dimension = noise_params.glwe_dimension;
let polynomial_size = noise_params.polynomial_size;
let lwe_noise_distribution = base_params.lwe_noise_distribution;
let glwe_noise_distribution = noise_params.glwe_noise_distribution;
let pbs_base_log = noise_params.decomp_base_log;
let pbs_level = noise_params.decomp_level_count;
let input_ciphertext_modulus = base_params.ciphertext_modulus;
let output_ciphertext_modulus = noise_params.ciphertext_modulus;
let mut boxed_seeder = new_seeder();
let seeder = boxed_seeder.as_mut();
let mut secret_generator = SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
let mut encryption_generator =
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
let input_lwe_secret_key =
LweSecretKey::generate_new_binary(lwe_dimension, &mut secret_generator);
let output_glwe_secret_key = GlweSecretKey::<Vec<OutputScalar>>::generate_new_binary(
glwe_dimension,
polynomial_size,
&mut secret_generator,
);
let output_lwe_secret_key = output_glwe_secret_key.clone().into_lwe_secret_key();
let mut bsk = LweBootstrapKey::new(
OutputScalar::ZERO,
glwe_dimension.to_glwe_size(),
polynomial_size,
pbs_base_log,
pbs_level,
lwe_dimension,
output_ciphertext_modulus,
);
par_generate_lwe_bootstrap_key(
&input_lwe_secret_key,
&output_glwe_secret_key,
&mut bsk,
glwe_noise_distribution,
&mut encryption_generator,
);
let mut fourier_bsk = Fourier128LweBootstrapKey::new(
lwe_dimension,
glwe_dimension.to_glwe_size(),
polynomial_size,
pbs_base_log,
pbs_level,
);
convert_standard_lwe_bootstrap_key_to_fourier_128(&bsk, &mut fourier_bsk);
let message_modulus: InputScalar = 1 << 4;
let input_message: InputScalar = 3;
let delta: InputScalar = (1 << (InputScalar::BITS - 1)) / message_modulus;
let plaintext = Plaintext(input_message * delta);
let lwe_ciphertext_in: LweCiphertextOwned<InputScalar> =
allocate_and_encrypt_new_lwe_ciphertext(
&input_lwe_secret_key,
plaintext,
lwe_noise_distribution,
input_ciphertext_modulus,
&mut encryption_generator,
);
let accumulator: GlweCiphertextOwned<OutputScalar> = GlweCiphertextOwned::new(
OutputScalar::ONE,
glwe_dimension.to_glwe_size(),
polynomial_size,
output_ciphertext_modulus,
);
let mut out_pbs_ct: LweCiphertext<Vec<OutputScalar>> = LweCiphertext::new(
OutputScalar::ZERO,
output_lwe_secret_key.lwe_dimension().to_lwe_size(),
output_ciphertext_modulus,
);
let fft = Fft128::new(polynomial_size);
let fft = fft.as_view();
let mut buffers = vec![
0u8;
bootstrap_scratch::<OutputScalar>(
fourier_bsk.glwe_size(),
fourier_bsk.polynomial_size(),
fft
)
.unwrap()
.unaligned_bytes_required()
];
let id = format!("{bench_name}::{}", noise_params.name());
bench_group.bench_function(&id, |b| {
b.iter(|| {
fourier_bsk.bootstrap(
&mut out_pbs_ct,
&lwe_ciphertext_in,
&accumulator,
fft,
PodStack::new(&mut buffers),
);
black_box(&mut out_pbs_ct);
});
});
// TODO Add throughput benchmark case
let params_record = CryptoParametersRecord {
lwe_dimension: Some(lwe_dimension),
glwe_dimension: Some(glwe_dimension),
polynomial_size: Some(polynomial_size),
lwe_noise_distribution: Some(lwe_noise_distribution),
glwe_noise_distribution: Some(base_params.glwe_noise_distribution),
pbs_base_log: Some(pbs_base_log),
pbs_level: Some(pbs_level),
ciphertext_modulus: Some(input_ciphertext_modulus),
..Default::default()
};
let bit_size = (message_modulus as u32).ilog2();
write_to_json(
&id,
params_record,
noise_params.name(),
"pbs",
&OperatorType::Atomic,
bit_size,
vec![bit_size],
);
}
#[cfg(feature = "gpu")]
mod cuda {
use benchmark::utilities::{
cuda_local_keys_core, cuda_local_streams_core, get_bench_type, throughput_num_threads,
write_to_json, BenchmarkType, CpuKeys, CpuKeysBuilder, CryptoParametersRecord,
CudaLocalKeys, OperatorType,
};
use criterion::{black_box, Criterion, Throughput};
use rayon::prelude::*;
use tfhe::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
use tfhe::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use tfhe::core_crypto::gpu::{
cuda_programmable_bootstrap_128_lwe_ciphertext, get_number_of_gpus, CudaStreams,
};
use tfhe::core_crypto::prelude::*;
use tfhe::shortint::engine::ShortintEngine;
use tfhe::shortint::parameters::ModulusSwitchNoiseReductionParams;
use tfhe::shortint::server_key::ModulusSwitchNoiseReductionKey;
fn cuda_pbs_128(c: &mut Criterion) {
let bench_name = "core_crypto::cuda::pbs128";
let mut bench_group = c.benchmark_group(bench_name);
bench_group
.sample_size(10)
.measurement_time(std::time::Duration::from_secs(30));
type Scalar = u128;
let lwe_dimension = LweDimension(879);
let glwe_dimension = GlweDimension(2);
let polynomial_size = PolynomialSize(2048);
let lwe_noise_distribution = DynamicDistribution::new_t_uniform(46);
let lwe_noise_distribution_u128: DynamicDistribution<u128> =
DynamicDistribution::new_t_uniform(46);
let glwe_noise_distribution = DynamicDistribution::new_t_uniform(30);
let pbs_base_log = DecompositionBaseLog(24);
let pbs_level = DecompositionLevelCount(3);
let ciphertext_modulus = CiphertextModulus::new_native();
let ct_modulus_u64: CiphertextModulus<u64> = CiphertextModulus::new_native();
let modulus_switch_noise_reduction_params = ModulusSwitchNoiseReductionParams {
modulus_switch_zeros_count: LweCiphertextCount(1449),
ms_bound: NoiseEstimationMeasureBound(288230376151711744f64),
ms_r_sigma_factor: RSigmaFactor(13.179852282053789f64),
ms_input_variance: Variance(2.63039184094559E-7f64),
};
let params_name = "PARAMS_SWITCH_SQUASH";
let mut boxed_seeder = new_seeder();
let seeder = boxed_seeder.as_mut();
let mut secret_generator =
SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
let mut encryption_generator =
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
let input_lwe_secret_key =
LweSecretKey::generate_new_binary(lwe_dimension, &mut secret_generator);
let input_lwe_secret_key_u128 = LweSecretKey::from_container(
input_lwe_secret_key
.as_ref()
.iter()
.copied()
.map(|x| x as u128)
.collect::<Vec<_>>(),
);
let output_glwe_secret_key = GlweSecretKey::<Vec<Scalar>>::generate_new_binary(
glwe_dimension,
polynomial_size,
&mut secret_generator,
);
let output_lwe_secret_key = output_glwe_secret_key.clone().into_lwe_secret_key();
let bsk = LweBootstrapKey::new(
Scalar::ZERO,
glwe_dimension.to_glwe_size(),
polynomial_size,
pbs_base_log,
pbs_level,
lwe_dimension,
ciphertext_modulus,
);
let mut engine = ShortintEngine::new();
let modulus_switch_noise_reduction_key = Some(ModulusSwitchNoiseReductionKey::new(
modulus_switch_noise_reduction_params,
&input_lwe_secret_key,
&mut engine,
CiphertextModulus::new_native(),
lwe_noise_distribution,
));
let cpu_keys: CpuKeys<_> = CpuKeysBuilder::new().bootstrap_key(bsk).build();
let message_modulus: Scalar = 1 << 4;
let input_message: Scalar = 3;
let delta: Scalar = (1 << (Scalar::BITS - 1)) / message_modulus;
let plaintext = Plaintext(input_message * delta);
let bench_id;
match get_bench_type() {
BenchmarkType::Latency => {
let streams = CudaStreams::new_multi_gpu();
let gpu_keys = CudaLocalKeys::from_cpu_keys(
&cpu_keys,
modulus_switch_noise_reduction_key.as_ref(),
&streams,
);
let lwe_ciphertext_in: LweCiphertextOwned<Scalar> =
allocate_and_encrypt_new_lwe_ciphertext(
&input_lwe_secret_key_u128,
plaintext,
lwe_noise_distribution_u128,
ciphertext_modulus,
&mut encryption_generator,
);
let lwe_ciphertext_in_gpu =
CudaLweCiphertextList::from_lwe_ciphertext(&lwe_ciphertext_in, &streams);
let accumulator: GlweCiphertextOwned<Scalar> = GlweCiphertextOwned::new(
Scalar::ONE,
glwe_dimension.to_glwe_size(),
polynomial_size,
ciphertext_modulus,
);
let accumulator_gpu =
CudaGlweCiphertextList::from_glwe_ciphertext(&accumulator, &streams);
let out_pbs_ct = LweCiphertext::new(
Scalar::ZERO,
output_lwe_secret_key.lwe_dimension().to_lwe_size(),
ciphertext_modulus,
);
let mut out_pbs_ct_gpu =
CudaLweCiphertextList::from_lwe_ciphertext(&out_pbs_ct, &streams);
bench_id = format!("{bench_name}::{params_name}");
{
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
cuda_programmable_bootstrap_128_lwe_ciphertext(
&lwe_ciphertext_in_gpu,
&mut out_pbs_ct_gpu,
&accumulator_gpu,
LweCiphertextCount(1),
gpu_keys.bsk.as_ref().unwrap(),
&streams,
);
black_box(&mut out_pbs_ct_gpu);
})
});
}
}
BenchmarkType::Throughput => {
let gpu_keys_vec =
cuda_local_keys_core(&cpu_keys, modulus_switch_noise_reduction_key.as_ref());
let gpu_count = get_number_of_gpus() as usize;
bench_id = format!("{bench_name}::throughput::{params_name}");
let blocks: usize = 1;
let elements = throughput_num_threads(blocks, 1);
let elements_per_stream = elements as usize / gpu_count;
bench_group.throughput(Throughput::Elements(elements));
bench_group.bench_function(&bench_id, |b| {
let setup_encrypted_values = || {
let local_streams = cuda_local_streams_core();
let plaintext_list =
PlaintextList::new(Scalar::ZERO, PlaintextCount(elements_per_stream));
let input_cts = (0..gpu_count)
.map(|i| {
let mut input_ct_list = LweCiphertextList::new(
Scalar::ZERO,
input_lwe_secret_key.lwe_dimension().to_lwe_size(),
LweCiphertextCount(elements_per_stream),
ciphertext_modulus,
);
encrypt_lwe_ciphertext_list(
&input_lwe_secret_key_u128,
&mut input_ct_list,
&plaintext_list,
lwe_noise_distribution_u128,
&mut encryption_generator,
);
CudaLweCiphertextList::from_lwe_ciphertext_list(
&input_ct_list,
&local_streams[i],
)
})
.collect::<Vec<_>>();
let accumulators = (0..gpu_count)
.map(|i| {
let accumulator = GlweCiphertextOwned::new(
Scalar::ONE,
glwe_dimension.to_glwe_size(),
polynomial_size,
ciphertext_modulus,
);
CudaGlweCiphertextList::from_glwe_ciphertext(
&accumulator,
&local_streams[i],
)
})
.collect::<Vec<_>>();
// Allocate the LweCiphertext to store the result of the PBS
let output_cts = (0..gpu_count)
.map(|i| {
let output_ct_list = LweCiphertextList::new(
Scalar::ZERO,
output_lwe_secret_key.lwe_dimension().to_lwe_size(),
LweCiphertextCount(elements_per_stream),
ciphertext_modulus,
);
CudaLweCiphertextList::from_lwe_ciphertext_list(
&output_ct_list,
&local_streams[i],
)
})
.collect::<Vec<_>>();
local_streams.iter().for_each(|stream| stream.synchronize());
(input_cts, output_cts, accumulators, local_streams)
};
b.iter_batched(
setup_encrypted_values,
|(input_cts, mut output_cts, accumulators, local_streams)| {
(0..gpu_count)
.into_par_iter()
.zip(input_cts.par_iter())
.zip(output_cts.par_iter_mut())
.zip(accumulators.par_iter())
.zip(local_streams.par_iter())
.for_each(
|((((i, input_ct), output_ct), accumulator), local_stream)| {
cuda_programmable_bootstrap_128_lwe_ciphertext(
input_ct,
output_ct,
accumulator,
LweCiphertextCount(1),
gpu_keys_vec[i].bsk.as_ref().unwrap(),
local_stream,
);
},
)
},
criterion::BatchSize::SmallInput,
);
});
}
};
let params_record = CryptoParametersRecord {
lwe_dimension: Some(lwe_dimension),
glwe_dimension: Some(glwe_dimension),
polynomial_size: Some(polynomial_size),
lwe_noise_distribution: Some(lwe_noise_distribution),
glwe_noise_distribution: Some(glwe_noise_distribution),
pbs_base_log: Some(pbs_base_log),
pbs_level: Some(pbs_level),
ciphertext_modulus: Some(ct_modulus_u64),
..Default::default()
};
let bit_size = (message_modulus as u32).ilog2();
write_to_json(
&bench_id,
params_record,
params_name,
"pbs",
&OperatorType::Atomic,
bit_size,
vec![bit_size],
);
}
pub fn cuda_pbs128_group() {
let mut criterion: Criterion<_> = Criterion::default().configure_from_args();
cuda_pbs_128(&mut criterion);
}
}
#[cfg(feature = "gpu")]
use cuda::cuda_pbs128_group;
pub fn pbs128_group() {
let mut criterion: Criterion<_> = Criterion::default().configure_from_args();
pbs_128(&mut criterion);
}
#[cfg(feature = "gpu")]
fn go_through_gpu_bench_groups() {
cuda_pbs128_group();
}
#[cfg(not(feature = "gpu"))]
fn go_through_cpu_bench_groups() {
pbs128_group();
}
fn main() {
#[cfg(feature = "gpu")]
go_through_gpu_bench_groups();
#[cfg(not(feature = "gpu"))]
go_through_cpu_bench_groups();
Criterion::default().configure_from_args().final_summary();
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,134 @@
use benchmark::params_aliases::BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
use criterion::{black_box, Criterion};
use rand::prelude::*;
use std::fmt::Write;
use std::ops::*;
use tfhe::prelude::*;
use tfhe::{
set_server_key, ClientKey, CompressedServerKey, ConfigBuilder, FheUint10, FheUint12,
FheUint128, FheUint14, FheUint16, FheUint2, FheUint32, FheUint4, FheUint6, FheUint64, FheUint8,
};
fn bench_fhe_type<FheType>(c: &mut Criterion, client_key: &ClientKey, type_name: &str)
where
FheType: FheEncrypt<u128, ClientKey>,
for<'a> &'a FheType: Add<&'a FheType, Output = FheType>
+ Sub<&'a FheType, Output = FheType>
+ Mul<&'a FheType, Output = FheType>
+ BitAnd<&'a FheType, Output = FheType>
+ BitOr<&'a FheType, Output = FheType>
+ BitXor<&'a FheType, Output = FheType>
+ Shl<&'a FheType, Output = FheType>
+ Shr<&'a FheType, Output = FheType>
+ RotateLeft<&'a FheType, Output = FheType>
+ RotateRight<&'a FheType, Output = FheType>
+ OverflowingAdd<&'a FheType, Output = FheType>
+ OverflowingSub<&'a FheType, Output = FheType>,
{
let mut bench_group = c.benchmark_group(type_name);
let mut rng = thread_rng();
let lhs = FheType::encrypt(rng.gen(), client_key);
let rhs = FheType::encrypt(rng.gen(), client_key);
let mut name = String::with_capacity(255);
write!(name, "add({type_name}, {type_name})").unwrap();
bench_group.bench_function(&name, |b| b.iter(|| black_box(&lhs + &rhs)));
name.clear();
write!(name, "overflowing_add({type_name}, {type_name})").unwrap();
bench_group.bench_function(&name, |b| {
b.iter(|| black_box((&lhs).overflowing_add(&rhs)))
});
name.clear();
write!(name, "overflowing_sub({type_name}, {type_name})").unwrap();
bench_group.bench_function(&name, |b| b.iter(|| black_box(lhs.overflowing_sub(&rhs))));
name.clear();
write!(name, "sub({type_name}, {type_name})").unwrap();
bench_group.bench_function(&name, |b| b.iter(|| black_box(&lhs - &rhs)));
name.clear();
write!(name, "mul({type_name}, {type_name})").unwrap();
bench_group.bench_function(&name, |b| b.iter(|| black_box(&lhs * &rhs)));
name.clear();
write!(name, "bitand({type_name}, {type_name})").unwrap();
bench_group.bench_function(&name, |b| b.iter(|| black_box(&lhs & &rhs)));
name.clear();
write!(name, "bitor({type_name}, {type_name})").unwrap();
bench_group.bench_function(&name, |b| b.iter(|| black_box(&lhs | &rhs)));
name.clear();
write!(name, "bitxor({type_name}, {type_name})").unwrap();
bench_group.bench_function(&name, |b| b.iter(|| black_box(&lhs ^ &rhs)));
name.clear();
write!(name, "shl({type_name}, {type_name})").unwrap();
bench_group.bench_function(&name, |b| b.iter(|| black_box(&lhs << &rhs)));
name.clear();
write!(name, "shr({type_name}, {type_name})").unwrap();
bench_group.bench_function(&name, |b| b.iter(|| black_box(&lhs >> &rhs)));
name.clear();
write!(name, "rotl({type_name}, {type_name})").unwrap();
bench_group.bench_function(&name, |b| b.iter(|| black_box((&lhs).rotate_left(&rhs))));
name.clear();
write!(name, "rotr({type_name}, {type_name})").unwrap();
bench_group.bench_function(&name, |b| b.iter(|| black_box((&lhs).rotate_right(&rhs))));
name.clear();
}
macro_rules! bench_type {
($fhe_type:ident) => {
::paste::paste! {
fn [<bench_ $fhe_type:snake>](c: &mut Criterion, cks: &ClientKey) {
bench_fhe_type::<$fhe_type>(c, cks, stringify!($fhe_type));
}
}
};
}
bench_type!(FheUint2);
bench_type!(FheUint4);
bench_type!(FheUint6);
bench_type!(FheUint8);
bench_type!(FheUint10);
bench_type!(FheUint12);
bench_type!(FheUint14);
bench_type!(FheUint16);
bench_type!(FheUint32);
bench_type!(FheUint64);
bench_type!(FheUint128);
fn main() {
let config =
ConfigBuilder::with_custom_parameters(BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128)
.build();
let cks = ClientKey::generate(config);
let compressed_sks = CompressedServerKey::new(&cks);
set_server_key(compressed_sks.decompress());
let mut c = Criterion::default().configure_from_args();
bench_fhe_uint2(&mut c, &cks);
bench_fhe_uint4(&mut c, &cks);
bench_fhe_uint6(&mut c, &cks);
bench_fhe_uint8(&mut c, &cks);
bench_fhe_uint10(&mut c, &cks);
bench_fhe_uint12(&mut c, &cks);
bench_fhe_uint14(&mut c, &cks);
bench_fhe_uint16(&mut c, &cks);
bench_fhe_uint32(&mut c, &cks);
bench_fhe_uint64(&mut c, &cks);
bench_fhe_uint128(&mut c, &cks);
c.final_summary();
}

View File

@@ -0,0 +1,539 @@
#[cfg(feature = "gpu")]
use benchmark::params_aliases::BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
#[cfg(not(feature = "gpu"))]
use benchmark::params_aliases::BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
#[cfg(feature = "gpu")]
use benchmark::utilities::configure_gpu;
use benchmark::utilities::{write_to_json, OperatorType};
use criterion::measurement::WallTime;
use criterion::{BenchmarkGroup, Criterion};
use rand::prelude::*;
use rand::thread_rng;
use std::ops::{Add, Div, Mul, Sub};
use tfhe::keycache::NamedParam;
use tfhe::prelude::*;
#[cfg(not(feature = "gpu"))]
use tfhe::{set_server_key, CompressedServerKey};
use tfhe::{ClientKey, ConfigBuilder, FheBool, FheUint128, FheUint64};
pub(crate) fn transfer_whitepaper<FheType>(
from_amount: &FheType,
to_amount: &FheType,
amount: &FheType,
) -> (FheType, FheType)
where
FheType: Add<Output = FheType> + for<'a> FheOrd<&'a FheType>,
FheBool: IfThenElse<FheType>,
for<'a> &'a FheType: Add<Output = FheType> + Sub<Output = FheType>,
{
let has_enough_funds = (from_amount).ge(amount);
let mut new_to_amount = to_amount + amount;
new_to_amount = has_enough_funds.if_then_else(&new_to_amount, to_amount);
let mut new_from_amount = from_amount - amount;
new_from_amount = has_enough_funds.if_then_else(&new_from_amount, from_amount);
(new_from_amount, new_to_amount)
}
#[allow(clippy::too_many_arguments)]
fn swap_request<FheType>(
from_balance_0: &FheType,
from_balance_1: &FheType,
current_dex_balance_0: &FheType,
current_dex_balance_1: &FheType,
to_balance_0: &FheType,
to_balance_1: &FheType,
total_dex_token_0_in: &FheType,
total_dex_token_1_in: &FheType,
amount0: &FheType,
amount1: &FheType,
) -> (FheType, FheType, FheType, FheType)
where
FheType: Add<Output = FheType> + for<'a> FheOrd<&'a FheType> + Clone,
FheBool: IfThenElse<FheType>,
for<'a> &'a FheType: Add<Output = FheType> + Sub<Output = FheType>,
{
let (_, new_current_balance_0) =
transfer_whitepaper(from_balance_0, current_dex_balance_0, amount0);
let (_, new_current_balance_1) =
transfer_whitepaper(from_balance_1, current_dex_balance_1, amount1);
let sent0 = &new_current_balance_0 - current_dex_balance_0;
let sent1 = &new_current_balance_1 - current_dex_balance_1;
let pending_0_in = to_balance_0 + &sent0;
let pending_total_token_0_in = total_dex_token_0_in + &sent0;
let pending_1_in = to_balance_1 + &sent1;
let pending_total_token_1_in = total_dex_token_1_in + &sent1;
(
pending_0_in,
pending_total_token_0_in,
pending_1_in,
pending_total_token_1_in,
)
}
#[allow(clippy::too_many_arguments)]
fn swap_claim<FheType, BigFheType>(
pending_0_in: &FheType,
pending_1_in: &FheType,
total_dex_token_0_in: u64,
total_dex_token_1_in: u64,
total_dex_token_0_out: u64,
total_dex_token_1_out: u64,
old_balance_0: &FheType,
old_balance_1: &FheType,
current_dex_balance_0: &FheType,
current_dex_balance_1: &FheType,
) -> (FheType, FheType)
where
FheType: CastFrom<FheBool>
+ for<'a> FheOrd<&'a FheType>
+ CastFrom<BigFheType>
+ Clone
+ Add<Output = FheType>,
BigFheType: CastFrom<FheType> + Mul<u128, Output = BigFheType> + Div<u128, Output = BigFheType>,
FheBool: IfThenElse<FheType>,
for<'a> &'a FheType: Add<Output = FheType> + Sub<Output = FheType>,
{
let mut new_balance_0 = old_balance_0.clone();
let mut new_balance_1 = old_balance_1.clone();
if total_dex_token_1_in != 0 {
let big_pending_1_in = BigFheType::cast_from(pending_1_in.clone());
let big_amount_0_out =
(big_pending_1_in * total_dex_token_0_out as u128) / total_dex_token_1_in as u128;
let amount_0_out = FheType::cast_from(big_amount_0_out);
let (_, new_balance_0_tmp) =
transfer_whitepaper(current_dex_balance_0, old_balance_0, &amount_0_out);
new_balance_0 = new_balance_0_tmp;
}
if total_dex_token_0_in != 0 {
let big_pending_0_in = BigFheType::cast_from(pending_0_in.clone());
let big_amount_1_out =
(big_pending_0_in * total_dex_token_1_out as u128) / total_dex_token_0_in as u128;
let amount_1_out = FheType::cast_from(big_amount_1_out);
let (_, new_balance_1_tmp) =
transfer_whitepaper(current_dex_balance_1, old_balance_1, &amount_1_out);
new_balance_1 = new_balance_1_tmp;
}
(new_balance_0, new_balance_1)
}
#[cfg(feature = "pbs-stats")]
mod pbs_stats {
use super::*;
use std::fs::{File, OpenOptions};
use std::io::Write;
use std::path::Path;
fn write_result(file: &mut File, name: &str, value: usize) {
let line = format!("{name},{value}\n");
let error_message = format!("cannot write {name} result into file");
file.write_all(line.as_bytes()).expect(&error_message);
}
pub fn print_swap_request_pbs_counts<FheType, F>(
client_key: &ClientKey,
type_name: &str,
swap_request_func: F,
) where
FheType: FheEncrypt<u64, ClientKey>,
F: for<'a> Fn(
&'a FheType,
&'a FheType,
&'a FheType,
&'a FheType,
&'a FheType,
&'a FheType,
&'a FheType,
&'a FheType,
&'a FheType,
&'a FheType,
) -> (FheType, FheType, FheType, FheType),
{
let mut rng = thread_rng();
let from_balance_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
let from_balance_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
let current_dex_balance_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
let current_dex_balance_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
let to_balance_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
let to_balance_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
let total_dex_token_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
let total_dex_token_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
let amount_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
let amount_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
#[cfg(feature = "gpu")]
configure_gpu(client_key);
tfhe::reset_pbs_count();
let (_, _, _, _) = swap_request_func(
&from_balance_0,
&from_balance_1,
&current_dex_balance_0,
&current_dex_balance_1,
&to_balance_0,
&to_balance_1,
&total_dex_token_0,
&total_dex_token_1,
&amount_0,
&amount_1,
);
let count = tfhe::get_pbs_count();
println!("ERC20 swap request/::{type_name}: {count} PBS");
let params = client_key.computation_parameters();
let test_name = if cfg!(feature = "gpu") {
format!("hlapi::cuda::dex::swap_request::pbs_count::{type_name}")
} else {
format!("hlapi::dex::swap_request::pbs_count::{type_name}")
};
let results_file = Path::new("dex_swap_request_pbs_count.csv");
if !results_file.exists() {
File::create(results_file).expect("create results file failed");
}
let mut file = OpenOptions::new()
.append(true)
.open(results_file)
.expect("cannot open results file");
write_result(&mut file, &test_name, count as usize);
write_to_json::<u64, _>(
&test_name,
params,
params.name(),
"pbs-count",
&OperatorType::Atomic,
0,
vec![],
);
}
pub fn print_swap_claim_pbs_counts<FheType, F>(
client_key: &ClientKey,
type_name: &str,
swap_claim_func: F,
) where
FheType: FheEncrypt<u64, ClientKey>,
F: for<'a> Fn(
&'a FheType,
&'a FheType,
u64,
u64,
u64,
u64,
&'a FheType,
&'a FheType,
&'a FheType,
&'a FheType,
) -> (FheType, FheType),
{
let mut rng = thread_rng();
let pending_0_in = FheType::encrypt(rng.gen::<u64>(), client_key);
let pending_1_in = FheType::encrypt(rng.gen::<u64>(), client_key);
let total_dex_token_0_in = rng.gen::<u64>();
let total_dex_token_1_in = rng.gen::<u64>();
let total_dex_token_0_out = rng.gen::<u64>();
let total_dex_token_1_out = rng.gen::<u64>();
let old_balance_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
let old_balance_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
let current_dex_balance_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
let current_dex_balance_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
#[cfg(feature = "gpu")]
configure_gpu(client_key);
tfhe::reset_pbs_count();
let (_, _) = swap_claim_func(
&pending_0_in,
&pending_1_in,
total_dex_token_0_in,
total_dex_token_1_in,
total_dex_token_0_out,
total_dex_token_1_out,
&old_balance_0,
&old_balance_1,
&current_dex_balance_0,
&current_dex_balance_1,
);
let count = tfhe::get_pbs_count();
println!("ERC20 swap claim/::{type_name}: {count} PBS");
let params = client_key.computation_parameters();
let test_name = if cfg!(feature = "gpu") {
format!("hlapi::cuda::dex::swap_claim::pbs_count::{type_name}")
} else {
format!("hlapi::dex::swap_claim::pbs_count::{type_name}")
};
let results_file = Path::new("dex_swap_claim_pbs_count.csv");
if !results_file.exists() {
File::create(results_file).expect("create results file failed");
}
let mut file = OpenOptions::new()
.append(true)
.open(results_file)
.expect("cannot open results file");
write_result(&mut file, &test_name, count as usize);
write_to_json::<u64, _>(
&test_name,
params,
params.name(),
"pbs-count",
&OperatorType::Atomic,
0,
vec![],
);
}
}
fn bench_swap_request_latency<FheType, F>(
c: &mut BenchmarkGroup<'_, WallTime>,
client_key: &ClientKey,
bench_name: &str,
type_name: &str,
fn_name: &str,
swap_request_func: F,
) where
FheType: FheEncrypt<u64, ClientKey>,
F: for<'a> Fn(
&'a FheType,
&'a FheType,
&'a FheType,
&'a FheType,
&'a FheType,
&'a FheType,
&'a FheType,
&'a FheType,
&'a FheType,
&'a FheType,
) -> (FheType, FheType, FheType, FheType),
{
#[cfg(feature = "gpu")]
configure_gpu(client_key);
let bench_id = format!("{bench_name}::{fn_name}::{type_name}");
c.bench_function(&bench_id, |b| {
let mut rng = thread_rng();
let from_balance_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
let from_balance_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
let current_balance_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
let current_balance_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
let to_balance_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
let to_balance_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
let total_token_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
let total_token_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
let amount_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
let amount_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
b.iter(|| {
let (_, _, _, _) = swap_request_func(
&from_balance_0,
&from_balance_1,
&current_balance_0,
&current_balance_1,
&to_balance_0,
&to_balance_1,
&total_token_0,
&total_token_1,
&amount_0,
&amount_1,
);
})
});
let params = client_key.computation_parameters();
write_to_json::<u64, _>(
&bench_id,
params,
params.name(),
"dex-swap-request",
&OperatorType::Atomic,
64,
vec![],
);
}
fn bench_swap_claim_latency<FheType, F>(
c: &mut BenchmarkGroup<'_, WallTime>,
client_key: &ClientKey,
bench_name: &str,
type_name: &str,
fn_name: &str,
swap_claim_func: F,
) where
FheType: FheEncrypt<u64, ClientKey>,
F: for<'a> Fn(
&'a FheType,
&'a FheType,
u64,
u64,
u64,
u64,
&'a FheType,
&'a FheType,
&'a FheType,
&'a FheType,
) -> (FheType, FheType),
{
#[cfg(feature = "gpu")]
configure_gpu(client_key);
let bench_id = format!("{bench_name}::{fn_name}::{type_name}");
c.bench_function(&bench_id, |b| {
let mut rng = thread_rng();
let pending_0_in = FheType::encrypt(rng.gen::<u64>(), client_key);
let pending_1_in = FheType::encrypt(rng.gen::<u64>(), client_key);
let total_token_0_in = rng.gen::<u64>();
let total_token_1_in = rng.gen::<u64>();
let total_token_0_out = rng.gen::<u64>();
let total_token_1_out = rng.gen::<u64>();
let old_balance_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
let old_balance_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
let current_balance_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
let current_balance_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
b.iter(|| {
let (_, _) = swap_claim_func(
&pending_0_in,
&pending_1_in,
total_token_0_in,
total_token_1_in,
total_token_0_out,
total_token_1_out,
&old_balance_0,
&old_balance_1,
&current_balance_0,
&current_balance_1,
);
})
});
let params = client_key.computation_parameters();
write_to_json::<u64, _>(
&bench_id,
params,
params.name(),
"dex-swap-claim",
&OperatorType::Atomic,
64,
vec![],
);
}
#[cfg(feature = "pbs-stats")]
use crate::pbs_stats::print_swap_claim_pbs_counts;
#[cfg(feature = "pbs-stats")]
use crate::pbs_stats::print_swap_request_pbs_counts;
#[cfg(not(feature = "gpu"))]
fn main() {
let params = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let config = ConfigBuilder::with_custom_parameters(params).build();
let cks = ClientKey::generate(config);
let compressed_sks = CompressedServerKey::new(&cks);
let sks = compressed_sks.decompress();
rayon::broadcast(|_| set_server_key(sks.clone()));
set_server_key(sks);
let mut c = Criterion::default().sample_size(10).configure_from_args();
let bench_name = "hlapi::dex";
// FheUint64 PBS counts
// We don't run multiple times since every input is encrypted
// PBS count is always the same
#[cfg(feature = "pbs-stats")]
{
print_swap_request_pbs_counts(&cks, "FheUint64", swap_request::<FheUint64>);
print_swap_claim_pbs_counts(&cks, "FheUint64", swap_claim::<FheUint64, FheUint128>);
}
// FheUint64 latency
{
let mut group = c.benchmark_group(bench_name);
bench_swap_request_latency(
&mut group,
&cks,
bench_name,
"FheUint64",
"swap_request",
swap_request::<FheUint64>,
);
bench_swap_claim_latency(
&mut group,
&cks,
bench_name,
"FheUint64",
"swap_claim",
swap_claim::<FheUint64, FheUint128>,
);
group.finish();
}
c.final_summary();
}
#[cfg(feature = "gpu")]
fn main() {
let params = BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let config = ConfigBuilder::with_custom_parameters(params).build();
let cks = ClientKey::generate(config);
let mut c = Criterion::default().sample_size(10).configure_from_args();
let bench_name = "hlapi::cuda::dex";
// FheUint64 PBS counts
// We don't run multiple times since every input is encrypted
// PBS count is always the same
#[cfg(feature = "pbs-stats")]
{
print_swap_request_pbs_counts(&cks, "FheUint64", swap_request::<FheUint64>);
print_swap_claim_pbs_counts(&cks, "FheUint64", swap_claim::<FheUint64, FheUint128>);
}
// FheUint64 latency
{
let mut group = c.benchmark_group(bench_name);
bench_swap_request_latency(
&mut group,
&cks,
bench_name,
"FheUint64",
"swap_request",
swap_request::<FheUint64>,
);
bench_swap_claim_latency(
&mut group,
&cks,
bench_name,
"FheUint64",
"swap_claim",
swap_claim::<FheUint64, FheUint128>,
);
group.finish();
}
c.final_summary();
}

View File

@@ -0,0 +1,595 @@
#[cfg(feature = "gpu")]
use benchmark::params_aliases::BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
#[cfg(not(feature = "gpu"))]
use benchmark::params_aliases::BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
#[cfg(feature = "gpu")]
use benchmark::utilities::configure_gpu;
use benchmark::utilities::{write_to_json, OperatorType};
use criterion::measurement::WallTime;
use criterion::{BenchmarkGroup, Criterion, Throughput};
use rand::prelude::*;
use rand::thread_rng;
use rayon::prelude::*;
use std::ops::{Add, Mul, Sub};
use tfhe::keycache::NamedParam;
use tfhe::prelude::*;
#[cfg(feature = "gpu")]
use tfhe::GpuIndex;
use tfhe::{set_server_key, ClientKey, CompressedServerKey, ConfigBuilder, FheBool, FheUint64};
/// Transfer as written in the original FHEvm white-paper,
/// it uses a comparison to check if the sender has enough,
/// and cmuxes based on the comparison result
pub fn transfer_whitepaper<FheType>(
from_amount: &FheType,
to_amount: &FheType,
amount: &FheType,
) -> (FheType, FheType)
where
FheType: Add<Output = FheType> + for<'a> FheOrd<&'a FheType>,
FheBool: IfThenElse<FheType>,
for<'a> &'a FheType: Add<Output = FheType> + Sub<Output = FheType>,
{
let has_enough_funds = (from_amount).ge(amount);
let mut new_to_amount = to_amount + amount;
new_to_amount = has_enough_funds.if_then_else(&new_to_amount, to_amount);
let mut new_from_amount = from_amount - amount;
new_from_amount = has_enough_funds.if_then_else(&new_from_amount, from_amount);
(new_from_amount, new_to_amount)
}
/// This one also uses a comparison, but it leverages the 'boolean' multiplication
/// instead of cmuxes, so it is faster
fn transfer_no_cmux<FheType>(
from_amount: &FheType,
to_amount: &FheType,
amount: &FheType,
) -> (FheType, FheType)
where
FheType: Add<Output = FheType> + CastFrom<FheBool> + for<'a> FheOrd<&'a FheType>,
FheBool: IfThenElse<FheType>,
for<'a> &'a FheType:
Add<Output = FheType> + Sub<Output = FheType> + Mul<FheType, Output = FheType>,
{
let has_enough_funds = (from_amount).ge(amount);
let amount = amount * FheType::cast_from(has_enough_funds);
let new_to_amount = to_amount + &amount;
let new_from_amount = from_amount - &amount;
(new_from_amount, new_to_amount)
}
/// This one uses overflowing sub to remove the need for comparison
/// it also uses the 'boolean' multiplication
fn transfer_overflow<FheType>(
from_amount: &FheType,
to_amount: &FheType,
amount: &FheType,
) -> (FheType, FheType)
where
FheType: CastFrom<FheBool> + for<'a> FheOrd<&'a FheType>,
FheBool: IfThenElse<FheType>,
for<'a> &'a FheType: Add<FheType, Output = FheType>
+ OverflowingSub<&'a FheType, Output = FheType>
+ Mul<FheType, Output = FheType>,
{
let (new_from, did_not_have_enough) = (from_amount).overflowing_sub(amount);
let new_from_amount = did_not_have_enough.if_then_else(from_amount, &new_from);
let had_enough_funds = !did_not_have_enough;
let new_to_amount = to_amount + (amount * FheType::cast_from(had_enough_funds));
(new_from_amount, new_to_amount)
}
/// This ones uses both overflowing_add/sub to check that both
/// the sender has enough funds, and the receiver will not overflow its balance
fn transfer_safe<FheType>(
from_amount: &FheType,
to_amount: &FheType,
amount: &FheType,
) -> (FheType, FheType)
where
for<'a> &'a FheType: OverflowingSub<&'a FheType, Output = FheType>
+ OverflowingAdd<&'a FheType, Output = FheType>,
FheBool: IfThenElse<FheType>,
{
let (new_from, did_not_have_enough_funds) = (from_amount).overflowing_sub(amount);
let (new_to, did_not_have_enough_space) = (to_amount).overflowing_add(amount);
let something_not_ok = did_not_have_enough_funds | did_not_have_enough_space;
let new_from_amount = something_not_ok.if_then_else(from_amount, &new_from);
let new_to_amount = something_not_ok.if_then_else(to_amount, &new_to);
(new_from_amount, new_to_amount)
}
#[cfg(feature = "pbs-stats")]
mod pbs_stats {
use super::*;
use std::fs::{File, OpenOptions};
use std::io::Write;
use std::path::Path;
fn write_result(file: &mut File, name: &str, value: usize) {
let line = format!("{name},{value}\n");
let error_message = format!("cannot write {name} result into file");
file.write_all(line.as_bytes()).expect(&error_message);
}
pub fn print_transfer_pbs_counts<FheType, F>(
client_key: &ClientKey,
type_name: &str,
fn_name: &str,
transfer_func: F,
) where
FheType: FheEncrypt<u64, ClientKey>,
F: for<'a> Fn(&'a FheType, &'a FheType, &'a FheType) -> (FheType, FheType),
{
let mut rng = thread_rng();
let from_amount = FheType::encrypt(rng.gen::<u64>(), client_key);
let to_amount = FheType::encrypt(rng.gen::<u64>(), client_key);
let amount = FheType::encrypt(rng.gen::<u64>(), client_key);
#[cfg(feature = "gpu")]
configure_gpu(client_key);
tfhe::reset_pbs_count();
let (_, _) = transfer_func(&from_amount, &to_amount, &amount);
let count = tfhe::get_pbs_count();
println!("ERC20 transfer/{fn_name}::{type_name}: {count} PBS");
let params = client_key.computation_parameters();
let test_name = if cfg!(feature = "gpu") {
format!("hlapi::cuda::erc20::pbs_count::{fn_name}::{type_name}")
} else {
format!("hlapi::erc20::pbs_count::{fn_name}::{type_name}")
};
let results_file = Path::new("erc20_pbs_count.csv");
if !results_file.exists() {
File::create(results_file).expect("create results file failed");
}
let mut file = OpenOptions::new()
.append(true)
.open(results_file)
.expect("cannot open results file");
write_result(&mut file, &test_name, count as usize);
write_to_json::<u64, _>(
&test_name,
params,
params.name(),
"pbs-count",
&OperatorType::Atomic,
0,
vec![],
);
}
}
fn bench_transfer_latency<FheType, F>(
c: &mut BenchmarkGroup<'_, WallTime>,
client_key: &ClientKey,
bench_name: &str,
type_name: &str,
fn_name: &str,
transfer_func: F,
) where
FheType: FheEncrypt<u64, ClientKey>,
F: for<'a> Fn(&'a FheType, &'a FheType, &'a FheType) -> (FheType, FheType),
{
#[cfg(feature = "gpu")]
configure_gpu(client_key);
let bench_id = format!("{bench_name}::{fn_name}::{type_name}");
c.bench_function(&bench_id, |b| {
let mut rng = thread_rng();
let from_amount = FheType::encrypt(rng.gen::<u64>(), client_key);
let to_amount = FheType::encrypt(rng.gen::<u64>(), client_key);
let amount = FheType::encrypt(rng.gen::<u64>(), client_key);
b.iter(|| {
let (_, _) = transfer_func(&from_amount, &to_amount, &amount);
})
});
let params = client_key.computation_parameters();
write_to_json::<u64, _>(
&bench_id,
params,
params.name(),
"erc20-transfer",
&OperatorType::Atomic,
64,
vec![],
);
}
#[cfg(not(feature = "gpu"))]
fn bench_transfer_throughput<FheType, F>(
group: &mut BenchmarkGroup<'_, WallTime>,
client_key: &ClientKey,
bench_name: &str,
type_name: &str,
fn_name: &str,
transfer_func: F,
) where
FheType: FheEncrypt<u64, ClientKey> + Send + Sync,
F: for<'a> Fn(&'a FheType, &'a FheType, &'a FheType) -> (FheType, FheType) + Sync,
{
let mut rng = thread_rng();
for num_elems in [10, 100, 500] {
group.throughput(Throughput::Elements(num_elems));
let bench_id =
format!("{bench_name}::throughput::{fn_name}::{type_name}::{num_elems}_elems");
group.bench_with_input(&bench_id, &num_elems, |b, &num_elems| {
let from_amounts = (0..num_elems)
.map(|_| FheType::encrypt(rng.gen::<u64>(), client_key))
.collect::<Vec<_>>();
let to_amounts = (0..num_elems)
.map(|_| FheType::encrypt(rng.gen::<u64>(), client_key))
.collect::<Vec<_>>();
let amounts = (0..num_elems)
.map(|_| FheType::encrypt(rng.gen::<u64>(), client_key))
.collect::<Vec<_>>();
b.iter(|| {
from_amounts
.par_iter()
.zip(to_amounts.par_iter().zip(amounts.par_iter()))
.for_each(|(from_amount, (to_amount, amount))| {
let (_, _) = transfer_func(from_amount, to_amount, amount);
})
})
});
let params = client_key.computation_parameters();
write_to_json::<u64, _>(
&bench_id,
params,
params.name(),
"erc20-transfer",
&OperatorType::Atomic,
64,
vec![],
);
}
}
#[cfg(feature = "gpu")]
fn cuda_bench_transfer_throughput<FheType, F>(
group: &mut BenchmarkGroup<'_, WallTime>,
client_key: &ClientKey,
bench_name: &str,
type_name: &str,
fn_name: &str,
transfer_func: F,
) where
FheType: FheEncrypt<u64, ClientKey> + Send + Sync,
F: for<'a> Fn(&'a FheType, &'a FheType, &'a FheType) -> (FheType, FheType) + Sync,
{
let mut rng = thread_rng();
let num_gpus = get_number_of_gpus() as u64;
let compressed_server_key = CompressedServerKey::new(client_key);
let sks_vec = (0..num_gpus)
.map(|i| compressed_server_key.decompress_to_specific_gpu(GpuIndex::new(i as u32)))
.collect::<Vec<_>>();
for num_elems in [10 * num_gpus, 100 * num_gpus, 500 * num_gpus] {
group.throughput(Throughput::Elements(num_elems));
let bench_id =
format!("{bench_name}::throughput::{fn_name}::{type_name}::{num_elems}_elems");
group.bench_with_input(&bench_id, &num_elems, |b, &num_elems| {
let from_amounts = (0..num_elems)
.map(|_| FheType::encrypt(rng.gen::<u64>(), client_key))
.collect::<Vec<_>>();
let to_amounts = (0..num_elems)
.map(|_| FheType::encrypt(rng.gen::<u64>(), client_key))
.collect::<Vec<_>>();
let amounts = (0..num_elems)
.map(|_| FheType::encrypt(rng.gen::<u64>(), client_key))
.collect::<Vec<_>>();
let num_streams_per_gpu = 8; // Hard coded stream value for FheUint64
let chunk_size = (num_elems / num_gpus) as usize;
b.iter(|| {
from_amounts
.par_chunks(chunk_size) // Split into chunks of num_gpus
.zip(
to_amounts
.par_chunks(chunk_size)
.zip(amounts.par_chunks(chunk_size)),
) // Zip with the other data
.enumerate() // Get the index for GPU
.for_each(
|(i, (from_amount_gpu_i, (to_amount_gpu_i, amount_gpu_i)))| {
// Process chunks within each GPU
let stream_chunk_size = from_amount_gpu_i.len() / num_streams_per_gpu;
from_amount_gpu_i
.par_chunks(stream_chunk_size)
.zip(to_amount_gpu_i.par_chunks(stream_chunk_size))
.zip(amount_gpu_i.par_chunks(stream_chunk_size))
.for_each(
|((from_amount_chunk, to_amount_chunk), amount_chunk)| {
// Set the server key for the current GPU
set_server_key(sks_vec[i].clone());
// Parallel iteration over the chunks of data
from_amount_chunk
.iter()
.zip(to_amount_chunk.iter().zip(amount_chunk.iter()))
.for_each(|(from_amount, (to_amount, amount))| {
transfer_func(from_amount, to_amount, amount);
});
},
);
},
);
});
});
let params = client_key.computation_parameters();
write_to_json::<u64, _>(
&bench_id,
params,
params.name(),
"erc20-transfer",
&OperatorType::Atomic,
64,
vec![],
);
}
}
#[cfg(feature = "pbs-stats")]
use pbs_stats::print_transfer_pbs_counts;
#[cfg(feature = "gpu")]
use tfhe::core_crypto::gpu::get_number_of_gpus;
#[cfg(not(feature = "gpu"))]
fn main() {
let params = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let config = ConfigBuilder::with_custom_parameters(params).build();
let cks = ClientKey::generate(config);
let compressed_sks = CompressedServerKey::new(&cks);
let sks = compressed_sks.decompress();
rayon::broadcast(|_| set_server_key(sks.clone()));
set_server_key(sks);
let mut c = Criterion::default().sample_size(10).configure_from_args();
let bench_name = "hlapi::erc20";
// FheUint64 PBS counts
// We don't run multiple times since every input is encrypted
// PBS count is always the same
#[cfg(feature = "pbs-stats")]
{
print_transfer_pbs_counts(
&cks,
"FheUint64",
"transfer::whitepaper",
transfer_whitepaper::<FheUint64>,
);
print_transfer_pbs_counts(&cks, "FheUint64", "no_cmux", transfer_no_cmux::<FheUint64>);
print_transfer_pbs_counts(
&cks,
"FheUint64",
"transfer::overflow",
transfer_overflow::<FheUint64>,
);
print_transfer_pbs_counts(&cks, "FheUint64", "safe", transfer_safe::<FheUint64>);
}
// FheUint64 latency
{
let mut group = c.benchmark_group(bench_name);
bench_transfer_latency(
&mut group,
&cks,
bench_name,
"FheUint64",
"transfer::whitepaper",
transfer_whitepaper::<FheUint64>,
);
bench_transfer_latency(
&mut group,
&cks,
bench_name,
"FheUint64",
"transfer::no_cmux",
transfer_no_cmux::<FheUint64>,
);
bench_transfer_latency(
&mut group,
&cks,
bench_name,
"FheUint64",
"transfer::overflow",
transfer_overflow::<FheUint64>,
);
bench_transfer_latency(
&mut group,
&cks,
bench_name,
"FheUint64",
"transfer::safe",
transfer_safe::<FheUint64>,
);
group.finish();
}
// FheUint64 Throughput
{
let mut group = c.benchmark_group(bench_name);
bench_transfer_throughput(
&mut group,
&cks,
bench_name,
"FheUint64",
"transfer::whitepaper",
transfer_whitepaper::<FheUint64>,
);
bench_transfer_throughput(
&mut group,
&cks,
bench_name,
"FheUint64",
"transfer::no_cmux",
transfer_no_cmux::<FheUint64>,
);
bench_transfer_throughput(
&mut group,
&cks,
bench_name,
"FheUint64",
"transfer::overflow",
transfer_overflow::<FheUint64>,
);
bench_transfer_throughput(
&mut group,
&cks,
bench_name,
"FheUint64",
"transfer::safe",
transfer_safe::<FheUint64>,
);
group.finish();
}
c.final_summary();
}
#[cfg(feature = "gpu")]
fn main() {
let params = BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let config = ConfigBuilder::with_custom_parameters(params).build();
let cks = ClientKey::generate(config);
let mut c = Criterion::default().sample_size(10).configure_from_args();
let bench_name = "hlapi::cuda::erc20";
// FheUint64 PBS counts
// We don't run multiple times since every input is encrypted
// PBS count is always the same
#[cfg(feature = "pbs-stats")]
{
print_transfer_pbs_counts(
&cks,
"FheUint64",
"transfer::whitepaper",
transfer_whitepaper::<FheUint64>,
);
print_transfer_pbs_counts(&cks, "FheUint64", "no_cmux", transfer_no_cmux::<FheUint64>);
print_transfer_pbs_counts(
&cks,
"FheUint64",
"transfer::overflow",
transfer_overflow::<FheUint64>,
);
print_transfer_pbs_counts(&cks, "FheUint64", "safe", transfer_safe::<FheUint64>);
}
// FheUint64 latency
{
let mut group = c.benchmark_group(bench_name);
bench_transfer_latency(
&mut group,
&cks,
bench_name,
"FheUint64",
"transfer::whitepaper",
transfer_whitepaper::<FheUint64>,
);
bench_transfer_latency(
&mut group,
&cks,
bench_name,
"FheUint64",
"transfer::no_cmux",
transfer_no_cmux::<FheUint64>,
);
bench_transfer_latency(
&mut group,
&cks,
bench_name,
"FheUint64",
"transfer::overflow",
transfer_overflow::<FheUint64>,
);
bench_transfer_latency(
&mut group,
&cks,
bench_name,
"FheUint64",
"transfer::safe",
transfer_safe::<FheUint64>,
);
group.finish();
}
// FheUint64 Throughput
{
let mut group = c.benchmark_group(bench_name);
cuda_bench_transfer_throughput(
&mut group,
&cks,
bench_name,
"FheUint64",
"transfer::whitepaper",
transfer_whitepaper::<FheUint64>,
);
cuda_bench_transfer_throughput(
&mut group,
&cks,
bench_name,
"FheUint64",
"transfer::no_cmux",
transfer_no_cmux::<FheUint64>,
);
cuda_bench_transfer_throughput(
&mut group,
&cks,
bench_name,
"FheUint64",
"transfer::overflow",
transfer_overflow::<FheUint64>,
);
cuda_bench_transfer_throughput(
&mut group,
&cks,
bench_name,
"FheUint64",
"transfer::safe",
transfer_safe::<FheUint64>,
);
group.finish();
}
c.final_summary();
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,368 @@
use benchmark::params_aliases::*;
use benchmark::utilities::{
get_bench_type, throughput_num_threads, write_to_json, BenchmarkType, OperatorType,
};
use criterion::{black_box, criterion_group, Criterion, Throughput};
use rayon::prelude::*;
use std::cmp::max;
use tfhe::integer::ciphertext::CompressedCiphertextListBuilder;
use tfhe::integer::{ClientKey, RadixCiphertext};
use tfhe::keycache::NamedParam;
use tfhe::{get_pbs_count, reset_pbs_count};
fn cpu_glwe_packing(c: &mut Criterion) {
let bench_name = "integer::packing_compression";
let mut bench_group = c.benchmark_group(bench_name);
bench_group
.sample_size(15)
.measurement_time(std::time::Duration::from_secs(30));
let param = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let comp_param = BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let cks = ClientKey::new(param);
let private_compression_key = cks.new_compression_private_key(comp_param);
let (compression_key, decompression_key) =
cks.new_compression_decompression_keys(&private_compression_key);
let log_message_modulus = param.message_modulus.0.ilog2() as usize;
for bit_size in [
2,
8,
16,
32,
64,
128,
256,
comp_param.lwe_per_glwe.0 * log_message_modulus,
] {
assert_eq!(bit_size % log_message_modulus, 0);
let num_blocks = bit_size / log_message_modulus;
let bench_id_pack;
let bench_id_unpack;
match get_bench_type() {
BenchmarkType::Latency => {
let ct = cks.encrypt_radix(0_u32, num_blocks);
let mut builder = CompressedCiphertextListBuilder::new();
builder.push(ct);
bench_id_pack = format!("{bench_name}::pack_u{bit_size}");
bench_group.bench_function(&bench_id_pack, |b| {
b.iter(|| {
let compressed = builder.build(&compression_key);
_ = black_box(compressed);
})
});
let compressed = builder.build(&compression_key);
bench_id_unpack = format!("{bench_name}::unpack_u{bit_size}");
bench_group.bench_function(&bench_id_unpack, |b| {
b.iter(|| {
let unpacked: RadixCiphertext =
compressed.get(0, &decompression_key).unwrap().unwrap();
_ = black_box(unpacked);
})
});
}
BenchmarkType::Throughput => {
// Execute the operation once to know its cost.
let ct = cks.encrypt_radix(0_u32, num_blocks);
let mut builder = CompressedCiphertextListBuilder::new();
builder.push(ct);
let compressed = builder.build(&compression_key);
reset_pbs_count();
let _: RadixCiphertext = compressed.get(0, &decompression_key).unwrap().unwrap();
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
let num_block =
(bit_size as f64 / (param.message_modulus.0 as f64).log(2.0)).ceil() as usize;
let elements = throughput_num_threads(num_block, pbs_count);
// FIXME thread usage seemed to be somewhat more "efficient".
// For example, with bit_size = 2, my laptop is only using around 2/3 of the
// available threads Thread usage increases with bit_size = 8 but
// still isn't fully loaded.
bench_group.throughput(Throughput::Elements(elements));
let builders = (0..elements)
.map(|_| {
let ct = cks.encrypt_radix(0_u32, num_blocks);
let mut builder = CompressedCiphertextListBuilder::new();
builder.push(ct);
builder
})
.collect::<Vec<_>>();
bench_id_pack = format!("{bench_name}::throughput::pack_u{bit_size}");
bench_group.bench_function(&bench_id_pack, |b| {
b.iter(|| {
builders.par_iter().for_each(|builder| {
builder.build(&compression_key);
})
})
});
let compressed = builders
.iter()
.map(|builder| builder.build(&compression_key))
.collect::<Vec<_>>();
bench_id_unpack = format!("{bench_name}::throughput::unpack_u{bit_size}");
bench_group.bench_function(&bench_id_unpack, |b| {
b.iter(|| {
compressed.par_iter().for_each(|comp| {
comp.get::<RadixCiphertext>(0, &decompression_key)
.unwrap()
.unwrap();
})
})
});
}
}
write_to_json::<u64, _>(
&bench_id_pack,
(comp_param, param),
comp_param.name(),
"pack",
&OperatorType::Atomic,
bit_size as u32,
vec![param.message_modulus.0.ilog2(); num_blocks],
);
write_to_json::<u64, _>(
&bench_id_unpack,
(comp_param, param),
comp_param.name(),
"unpack",
&OperatorType::Atomic,
bit_size as u32,
vec![param.message_modulus.0.ilog2(); num_blocks],
);
}
bench_group.finish()
}
#[cfg(feature = "gpu")]
mod cuda {
use super::*;
use benchmark::utilities::cuda_integer_utils::cuda_local_streams;
use std::cmp::max;
use tfhe::core_crypto::gpu::CudaStreams;
use tfhe::integer::gpu::ciphertext::compressed_ciphertext_list::CudaCompressedCiphertextListBuilder;
use tfhe::integer::gpu::ciphertext::CudaUnsignedRadixCiphertext;
use tfhe::integer::gpu::gen_keys_radix_gpu;
fn gpu_glwe_packing(c: &mut Criterion) {
let bench_name = "integer::cuda::packing_compression";
let mut bench_group = c.benchmark_group(bench_name);
bench_group
.sample_size(15)
.measurement_time(std::time::Duration::from_secs(30));
let stream = CudaStreams::new_multi_gpu();
let param = BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let comp_param =
BENCH_COMP_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let log_message_modulus = param.message_modulus.0.ilog2() as usize;
let cks = ClientKey::new(param);
let private_compression_key = cks.new_compression_private_key(comp_param);
for bit_size in [
2,
8,
16,
32,
64,
128,
256,
comp_param.lwe_per_glwe.0 * log_message_modulus,
] {
assert_eq!(bit_size % log_message_modulus, 0);
let num_blocks = bit_size / log_message_modulus;
let bench_id_pack;
let bench_id_unpack;
// Generate and convert compression keys
let (radix_cks, _) = gen_keys_radix_gpu(param, num_blocks, &stream);
let (compressed_compression_key, compressed_decompression_key) =
radix_cks.new_compressed_compression_decompression_keys(&private_compression_key);
let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&stream);
let cuda_decompression_key = compressed_decompression_key.decompress_to_cuda(
radix_cks.parameters().glwe_dimension(),
radix_cks.parameters().polynomial_size(),
radix_cks.parameters().message_modulus(),
radix_cks.parameters().carry_modulus(),
radix_cks.parameters().ciphertext_modulus(),
&stream,
);
match get_bench_type() {
BenchmarkType::Latency => {
// Encrypt
let ct = cks.encrypt_radix(0_u32, num_blocks);
let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &stream);
// Benchmark
let mut builder = CudaCompressedCiphertextListBuilder::new();
builder.push(d_ct, &stream);
bench_id_pack = format!("{bench_name}::pack_u{bit_size}");
bench_group.bench_function(&bench_id_pack, |b| {
b.iter(|| {
let compressed = builder.build(&cuda_compression_key, &stream);
_ = black_box(compressed);
})
});
let compressed = builder.build(&cuda_compression_key, &stream);
bench_id_unpack = format!("{bench_name}::unpack_u{bit_size}");
bench_group.bench_function(&bench_id_unpack, |b| {
b.iter(|| {
let unpacked: CudaUnsignedRadixCiphertext = compressed
.get(0, &cuda_decompression_key, &stream)
.unwrap()
.unwrap();
_ = black_box(unpacked);
})
});
}
BenchmarkType::Throughput => {
// Execute the operation once to know its cost.
let (cpu_compression_key, cpu_decompression_key) =
cks.new_compression_decompression_keys(&private_compression_key);
let ct = cks.encrypt_radix(0_u32, num_blocks);
let mut builder = CompressedCiphertextListBuilder::new();
builder.push(ct);
let compressed = builder.build(&cpu_compression_key);
reset_pbs_count();
// Use CPU operation as pbs_count do not count PBS on GPU backend.
let _: RadixCiphertext =
compressed.get(0, &cpu_decompression_key).unwrap().unwrap();
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
let num_block = (bit_size as f64 / (param.message_modulus.0 as f64).log(2.0))
.ceil() as usize;
let elements = throughput_num_threads(num_block, pbs_count);
bench_group.throughput(Throughput::Elements(elements));
// Encrypt
let ct = cks.encrypt_radix(0_u32, num_blocks);
let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &stream);
// Benchmark
let mut builder = CudaCompressedCiphertextListBuilder::new();
builder.push(d_ct, &stream);
let builders = (0..elements)
.map(|_| {
let ct = cks.encrypt_radix(0_u32, num_blocks);
let d_ct =
CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &stream);
let mut builder = CudaCompressedCiphertextListBuilder::new();
builder.push(d_ct, &stream);
builder
})
.collect::<Vec<_>>();
let local_streams = cuda_local_streams(num_block, elements as usize);
bench_id_pack = format!("{bench_name}::throughput::pack_u{bit_size}");
bench_group.bench_function(&bench_id_pack, |b| {
b.iter(|| {
builders.par_iter().zip(local_streams.par_iter()).for_each(
|(builder, local_stream)| {
builder.build(&cuda_compression_key, local_stream);
},
)
})
});
let compressed = builders
.iter()
.map(|builder| builder.build(&cuda_compression_key, &stream))
.collect::<Vec<_>>();
bench_id_unpack = format!("{bench_name}::throughput::unpack_u{bit_size}");
bench_group.bench_function(&bench_id_unpack, |b| {
b.iter(|| {
compressed
.par_iter()
.zip(local_streams.par_iter())
.for_each(|(comp, local_stream)| {
comp.get::<CudaUnsignedRadixCiphertext>(
0,
&cuda_decompression_key,
local_stream,
)
.unwrap()
.unwrap();
})
})
});
}
}
write_to_json::<u64, _>(
&bench_id_pack,
(comp_param, param),
comp_param.name(),
"pack",
&OperatorType::Atomic,
bit_size as u32,
vec![param.message_modulus.0.ilog2(); num_blocks],
);
write_to_json::<u64, _>(
&bench_id_unpack,
(comp_param, param),
comp_param.name(),
"unpack",
&OperatorType::Atomic,
bit_size as u32,
vec![param.message_modulus.0.ilog2(); num_blocks],
);
}
bench_group.finish()
}
criterion_group!(gpu_glwe_packing2, gpu_glwe_packing);
}
criterion_group!(cpu_glwe_packing2, cpu_glwe_packing);
#[cfg(feature = "gpu")]
use cuda::gpu_glwe_packing2;
fn main() {
#[cfg(feature = "gpu")]
gpu_glwe_packing2();
#[cfg(not(feature = "gpu"))]
cpu_glwe_packing2();
Criterion::default().configure_from_args().final_summary();
}

View File

@@ -0,0 +1,85 @@
use benchmark::params::ParamsAndNumBlocksIter;
use benchmark::utilities::{
get_bench_type, throughput_num_threads, write_to_json, BenchmarkType, OperatorType,
};
use criterion::{black_box, Criterion, Throughput};
use rayon::prelude::*;
use std::cmp::max;
use tfhe::integer::keycache::KEY_CACHE;
use tfhe::integer::IntegerKeyKind;
use tfhe::keycache::NamedParam;
use tfhe::{get_pbs_count, reset_pbs_count};
use tfhe_csprng::seeders::Seed;
pub fn unsigned_oprf(c: &mut Criterion) {
let bench_name = "integer::unsigned_oprf";
let mut bench_group = c.benchmark_group(bench_name);
bench_group
.sample_size(15)
.measurement_time(std::time::Duration::from_secs(30));
for (param, num_block, bit_size) in ParamsAndNumBlocksIter::default() {
let param_name = param.name();
let bench_id;
match get_bench_type() {
BenchmarkType::Latency => {
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
bench_group.bench_function(&bench_id, |b| {
let (_, sk) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
b.iter(|| {
_ = black_box(
sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
Seed(0),
bit_size as u64,
num_block as u64,
),
);
})
});
}
BenchmarkType::Throughput => {
let (_, sk) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
// Execute the operation once to know its cost.
reset_pbs_count();
sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
Seed(0),
bit_size as u64,
num_block as u64,
);
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
let elements = throughput_num_threads(num_block, pbs_count);
bench_group.throughput(Throughput::Elements(elements));
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
(0..elements).into_par_iter().for_each(|_| {
sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
Seed(0),
bit_size as u64,
num_block as u64,
);
})
})
});
}
}
write_to_json::<u64, _>(
&bench_id,
param,
param.name(),
"oprf",
&OperatorType::Atomic,
bit_size as u32,
vec![param.message_modulus().0.ilog2(); num_block],
);
}
bench_group.finish()
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,785 @@
use benchmark::params_aliases::*;
use benchmark::utilities::{
get_bench_type, throughput_num_threads, write_to_json, BenchmarkType, OperatorType,
};
use criterion::{criterion_group, Criterion, Throughput};
use rand::prelude::*;
use rayon::prelude::*;
use std::cmp::max;
use std::fs::{File, OpenOptions};
use std::io::Write;
use std::path::Path;
use tfhe::core_crypto::prelude::LweCiphertextCount;
use tfhe::integer::key_switching_key::KeySwitchingKey;
use tfhe::integer::parameters::IntegerCompactCiphertextListExpansionMode;
use tfhe::integer::{ClientKey, CompactPrivateKey, CompactPublicKey, ServerKey};
use tfhe::keycache::NamedParam;
use tfhe::shortint::parameters::*;
use tfhe::zk::{CompactPkeCrs, ZkComputeLoad};
use tfhe::{get_pbs_count, reset_pbs_count};
fn write_result(file: &mut File, name: &str, value: usize) {
let line = format!("{name},{value}\n");
let error_message = format!("cannot write {name} result into file");
file.write_all(line.as_bytes()).expect(&error_message);
}
fn pke_zk_proof(c: &mut Criterion) {
let bench_name = "zk::pke_zk_proof";
let mut bench_group = c.benchmark_group(bench_name);
bench_group
.sample_size(15)
.measurement_time(std::time::Duration::from_secs(60));
for (param_pke, _param_casting, param_fhe) in [
(
BENCH_PARAM_PKE_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
BENCH_PARAM_KEYSWITCH_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
),
(
BENCH_PARAM_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1,
BENCH_PARAM_KEYSWITCH_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1,
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
),
] {
let param_name = param_fhe.name();
let param_name = param_name.as_str();
let cks = ClientKey::new(param_fhe);
let sks = ServerKey::new_radix_server_key(&cks);
let compact_private_key = CompactPrivateKey::new(param_pke);
let pk = CompactPublicKey::new(&compact_private_key);
// Kept for consistency
let _casting_key =
KeySwitchingKey::new((&compact_private_key, None), (&cks, &sks), _param_casting);
// We have a use case with 320 bits of metadata
let mut metadata = [0u8; (320 / u8::BITS) as usize];
let mut rng = rand::thread_rng();
metadata.fill_with(|| rng.gen());
let zk_vers = param_pke.zk_scheme;
for bits in [64usize, 640, 1280, 4096] {
assert_eq!(bits % 64, 0);
// Packing, so we take the message and carry modulus to compute our block count
let num_block = 64usize.div_ceil(
(param_pke.message_modulus.0 * param_pke.carry_modulus.0).ilog2() as usize,
);
use rand::Rng;
let mut rng = rand::thread_rng();
let fhe_uint_count = bits / 64;
let crs = CompactPkeCrs::from_shortint_params(
param_pke,
LweCiphertextCount(num_block * fhe_uint_count),
)
.unwrap();
for compute_load in [ZkComputeLoad::Proof, ZkComputeLoad::Verify] {
let zk_load = match compute_load {
ZkComputeLoad::Proof => "compute_load_proof",
ZkComputeLoad::Verify => "compute_load_verify",
};
let bench_id;
match get_bench_type() {
BenchmarkType::Latency => {
bench_id = format!(
"{bench_name}::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
);
bench_group.bench_function(&bench_id, |b| {
let input_msg = rng.gen::<u64>();
let messages = vec![input_msg; fhe_uint_count];
b.iter(|| {
let _ct1 = tfhe::integer::ProvenCompactCiphertextList::builder(&pk)
.extend(messages.iter().copied())
.build_with_proof_packed(&crs, &metadata, compute_load)
.unwrap();
})
});
}
BenchmarkType::Throughput => {
// Execute the operation once to know its cost.
let input_msg = rng.gen::<u64>();
let messages = vec![input_msg; fhe_uint_count];
reset_pbs_count();
let _ = tfhe::integer::ProvenCompactCiphertextList::builder(&pk)
.extend(messages.iter().copied())
.build_with_proof_packed(&crs, &metadata, compute_load);
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
let elements = throughput_num_threads(num_block, pbs_count);
bench_group.throughput(Throughput::Elements(elements));
bench_id = format!(
"{bench_name}::throughput::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
);
bench_group.bench_function(&bench_id, |b| {
let messages = (0..elements)
.map(|_| {
let input_msg = rng.gen::<u64>();
vec![input_msg; fhe_uint_count]
})
.collect::<Vec<_>>();
b.iter(|| {
messages.par_iter().for_each(|msg| {
tfhe::integer::ProvenCompactCiphertextList::builder(&pk)
.extend(msg.iter().copied())
.build_with_proof_packed(&crs, &metadata, compute_load)
.unwrap();
})
})
});
}
}
let shortint_params: PBSParameters = param_fhe.into();
write_to_json::<u64, _>(
&bench_id,
shortint_params,
param_name,
"pke_zk_proof",
&OperatorType::Atomic,
shortint_params.message_modulus().0 as u32,
vec![shortint_params.message_modulus().0.ilog2(); num_block],
);
}
}
}
bench_group.finish()
}
criterion_group!(zk_proof, pke_zk_proof);
fn cpu_pke_zk_verify(c: &mut Criterion, results_file: &Path) {
let bench_name = "zk::pke_zk_verify";
let mut bench_group = c.benchmark_group(bench_name);
bench_group
.sample_size(15)
.measurement_time(std::time::Duration::from_secs(60));
File::create(results_file).expect("create results file failed");
let mut file = OpenOptions::new()
.append(true)
.open(results_file)
.expect("cannot open results file");
for (param_pke, param_casting, param_fhe) in [
(
BENCH_PARAM_PKE_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
BENCH_PARAM_KEYSWITCH_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
),
(
BENCH_PARAM_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1,
BENCH_PARAM_KEYSWITCH_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1,
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
),
] {
let param_name = param_fhe.name();
let param_name = param_name.as_str();
let cks = ClientKey::new(param_fhe);
let sks = ServerKey::new_radix_server_key(&cks);
let compact_private_key = CompactPrivateKey::new(param_pke);
let pk = CompactPublicKey::new(&compact_private_key);
let casting_key =
KeySwitchingKey::new((&compact_private_key, None), (&cks, &sks), param_casting);
// We have a use case with 320 bits of metadata
let mut metadata = [0u8; (320 / u8::BITS) as usize];
let mut rng = rand::thread_rng();
metadata.fill_with(|| rng.gen());
let zk_vers = param_pke.zk_scheme;
for bits in [64usize, 640, 1280, 4096] {
assert_eq!(bits % 64, 0);
// Packing, so we take the message and carry modulus to compute our block count
let num_block = 64usize.div_ceil(
(param_pke.message_modulus.0 * param_pke.carry_modulus.0).ilog2() as usize,
);
use rand::Rng;
let mut rng = rand::thread_rng();
let fhe_uint_count = bits / 64;
println!("Generating CRS... ");
let crs = CompactPkeCrs::from_shortint_params(
param_pke,
LweCiphertextCount(num_block * fhe_uint_count),
)
.unwrap();
let shortint_params: PBSParameters = param_fhe.into();
let crs_data = bincode::serialize(&crs).unwrap();
println!("CRS size: {}", crs_data.len());
let test_name = format!("zk::crs_sizes::{param_name}_{bits}_bits_packed_ZK{zk_vers:?}");
write_result(&mut file, &test_name, crs_data.len());
write_to_json::<u64, _>(
&test_name,
shortint_params,
param_name,
"pke_zk_crs",
&OperatorType::Atomic,
0,
vec![],
);
for compute_load in [ZkComputeLoad::Proof, ZkComputeLoad::Verify] {
let zk_load = match compute_load {
ZkComputeLoad::Proof => "compute_load_proof",
ZkComputeLoad::Verify => "compute_load_verify",
};
let bench_id_verify;
let bench_id_verify_and_expand;
match get_bench_type() {
BenchmarkType::Latency => {
bench_id_verify = format!(
"{bench_name}::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
);
bench_id_verify_and_expand = format!(
"{bench_name}_and_expand::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
);
let input_msg = rng.gen::<u64>();
let messages = vec![input_msg; fhe_uint_count];
println!("Generating proven ciphertext ({zk_load})... ");
let ct1 = tfhe::integer::ProvenCompactCiphertextList::builder(&pk)
.extend(messages.iter().copied())
.build_with_proof_packed(&crs, &metadata, compute_load)
.unwrap();
let proven_ciphertext_list_serialized = bincode::serialize(&ct1).unwrap();
println!(
"proven list size: {}",
proven_ciphertext_list_serialized.len()
);
let test_name = format!(
"zk::proven_list_size::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
);
write_result(
&mut file,
&test_name,
proven_ciphertext_list_serialized.len(),
);
write_to_json::<u64, _>(
&test_name,
shortint_params,
param_name,
"pke_zk_proof",
&OperatorType::Atomic,
0,
vec![],
);
let proof_size = ct1.proof_size();
println!("proof size: {}", ct1.proof_size());
let test_name =
format!("zk::proof_sizes::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}");
write_result(&mut file, &test_name, proof_size);
write_to_json::<u64, _>(
&test_name,
shortint_params,
param_name,
"pke_zk_proof",
&OperatorType::Atomic,
0,
vec![],
);
bench_group.bench_function(&bench_id_verify, |b| {
b.iter(|| {
let _ret = ct1.verify(&crs, &pk, &metadata);
});
});
bench_group.bench_function(&bench_id_verify_and_expand, |b| {
b.iter(|| {
let _ret = ct1
.verify_and_expand(
&crs,
&pk,
&metadata,
IntegerCompactCiphertextListExpansionMode::CastAndUnpackIfNecessary(
casting_key.as_view(),
),
)
.unwrap();
});
});
}
BenchmarkType::Throughput => {
// In throughput mode object sizes are not recorded.
// Execute the operation once to know its cost.
let input_msg = rng.gen::<u64>();
let messages = vec![input_msg; fhe_uint_count];
let ct1 = tfhe::integer::ProvenCompactCiphertextList::builder(&pk)
.extend(messages.iter().copied())
.build_with_proof_packed(&crs, &metadata, compute_load)
.unwrap();
reset_pbs_count();
let _ = ct1.verify_and_expand(
&crs,
&pk,
&metadata,
IntegerCompactCiphertextListExpansionMode::CastAndUnpackIfNecessary(
casting_key.as_view(),
),
);
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
let elements = throughput_num_threads(num_block, pbs_count);
bench_group.throughput(Throughput::Elements(elements));
bench_id_verify = format!(
"{bench_name}::throughput::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
);
bench_id_verify_and_expand = format!(
"{bench_name}_and_expand::throughput::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
);
println!("Generating proven ciphertexts list ({zk_load})... ");
let cts = (0..elements)
.map(|_| {
let input_msg = rng.gen::<u64>();
let messages = vec![input_msg; fhe_uint_count];
tfhe::integer::ProvenCompactCiphertextList::builder(&pk)
.extend(messages.iter().copied())
.build_with_proof_packed(&crs, &metadata, compute_load)
.unwrap()
})
.collect::<Vec<_>>();
bench_group.bench_function(&bench_id_verify, |b| {
b.iter(|| {
cts.par_iter().for_each(|ct1| {
ct1.verify(&crs, &pk, &metadata);
})
});
});
bench_group.bench_function(&bench_id_verify_and_expand, |b| {
b.iter(|| {
cts.par_iter().for_each(|ct1| {
ct1
.verify_and_expand(
&crs,
&pk,
&metadata,
IntegerCompactCiphertextListExpansionMode::CastAndUnpackIfNecessary(
casting_key.as_view(),
),
)
.unwrap();
})
});
});
}
}
write_to_json::<u64, _>(
&bench_id_verify,
shortint_params,
param_name,
"pke_zk_verify",
&OperatorType::Atomic,
shortint_params.message_modulus().0 as u32,
vec![shortint_params.message_modulus().0.ilog2(); num_block],
);
write_to_json::<u64, _>(
&bench_id_verify_and_expand,
shortint_params,
param_name,
"pke_zk_verify_and_expand",
&OperatorType::Atomic,
shortint_params.message_modulus().0 as u32,
vec![shortint_params.message_modulus().0.ilog2(); num_block],
);
}
}
}
bench_group.finish()
}
#[cfg(all(feature = "gpu", feature = "zk-pok"))]
mod cuda {
use super::*;
use benchmark::utilities::{cuda_local_keys, cuda_local_streams};
use criterion::BatchSize;
use itertools::Itertools;
use tfhe::core_crypto::gpu::{get_number_of_gpus, CudaStreams};
use tfhe::integer::gpu::key_switching_key::CudaKeySwitchingKey;
use tfhe::integer::gpu::zk::CudaProvenCompactCiphertextList;
use tfhe::integer::gpu::CudaServerKey;
use tfhe::integer::CompressedServerKey;
fn gpu_pke_zk_verify(c: &mut Criterion, results_file: &Path) {
let bench_name = "zk::cuda::pke_zk_verify";
let mut bench_group = c.benchmark_group(bench_name);
bench_group
.sample_size(15)
.measurement_time(std::time::Duration::from_secs(60));
let streams = CudaStreams::new_multi_gpu();
File::create(results_file).expect("create results file failed");
let mut file = OpenOptions::new()
.append(true)
.open(results_file)
.expect("cannot open results file");
for (param_pke, param_ksk, param_fhe) in [(
PARAM_PKE_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
PARAM_GPU_MULTI_BIT_GROUP_4_KEYSWITCH_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
)] {
let param_name = param_fhe.name();
let param_name = param_name.as_str();
let cks = ClientKey::new(param_fhe);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let gpu_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let compact_private_key = CompactPrivateKey::new(param_pke);
let pk = CompactPublicKey::new(&compact_private_key);
let d_ksk = CudaKeySwitchingKey::new(
(&compact_private_key, None),
(&cks, &gpu_sks),
param_ksk,
&streams,
);
// We have a use case with 320 bits of metadata
let mut metadata = [0u8; (320 / u8::BITS) as usize];
let mut rng = rand::thread_rng();
metadata.fill_with(|| rng.gen());
let zk_vers = param_pke.zk_scheme;
for bits in [64usize, 640, 1280, 4096] {
assert_eq!(bits % 64, 0);
// Packing, so we take the message and carry modulus to compute our block count
let num_block = 64usize.div_ceil(
(param_pke.message_modulus.0 * param_pke.carry_modulus.0).ilog2() as usize,
);
use rand::Rng;
let mut rng = rand::thread_rng();
let fhe_uint_count = bits / 64;
println!("Generating CRS... ");
let crs = CompactPkeCrs::from_shortint_params(
param_pke,
LweCiphertextCount(num_block * fhe_uint_count),
)
.unwrap();
let shortint_params: PBSParameters = param_fhe.into();
let crs_data = bincode::serialize(&crs).unwrap();
println!("CRS size: {}", crs_data.len());
let test_name =
format!("zk::crs_sizes::{param_name}_{bits}_bits_packed_ZK{zk_vers:?}");
write_result(&mut file, &test_name, crs_data.len());
write_to_json::<u64, _>(
&test_name,
shortint_params,
param_name,
"pke_zk_crs",
&OperatorType::Atomic,
0,
vec![],
);
for compute_load in [ZkComputeLoad::Proof, ZkComputeLoad::Verify] {
let zk_load = match compute_load {
ZkComputeLoad::Proof => "compute_load_proof",
ZkComputeLoad::Verify => "compute_load_verify",
};
let bench_id_verify;
let bench_id_verify_and_expand;
let bench_id_expand_without_verify;
match get_bench_type() {
BenchmarkType::Latency => {
bench_id_verify = format!(
"{bench_name}::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
);
bench_id_verify_and_expand = format!(
"{bench_name}_and_expand::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
);
bench_id_expand_without_verify = format!(
"{bench_name}_only_expand::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
);
let input_msg = rng.gen::<u64>();
let messages = vec![input_msg; fhe_uint_count];
println!("Generating proven ciphertext ({zk_load})... ");
let ct1 = tfhe::integer::ProvenCompactCiphertextList::builder(&pk)
.extend(messages.iter().copied())
.build_with_proof_packed(&crs, &metadata, compute_load)
.unwrap();
let gpu_ct1 =
CudaProvenCompactCiphertextList::from_proven_compact_ciphertext_list(
&ct1, &streams,
);
let proven_ciphertext_list_serialized =
bincode::serialize(&ct1).unwrap();
println!(
"proven list size: {}",
proven_ciphertext_list_serialized.len()
);
let test_name = format!(
"zk::proven_list_size::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
);
write_result(
&mut file,
&test_name,
proven_ciphertext_list_serialized.len(),
);
write_to_json::<u64, _>(
&test_name,
shortint_params,
param_name,
"pke_zk_proof",
&OperatorType::Atomic,
0,
vec![],
);
let proof_size = ct1.proof_size();
println!("proof size: {}", ct1.proof_size());
let test_name =
format!("zk::proof_sizes::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}");
write_result(&mut file, &test_name, proof_size);
write_to_json::<u64, _>(
&test_name,
shortint_params,
param_name,
"pke_zk_proof",
&OperatorType::Atomic,
0,
vec![],
);
bench_group.bench_function(&bench_id_verify, |b| {
b.iter(|| {
let _ret = ct1.verify(&crs, &pk, &metadata);
});
});
bench_group.bench_function(&bench_id_expand_without_verify, |b| {
b.iter(|| {
let _ret = gpu_ct1
.expand_without_verification(&d_ksk, &streams)
.unwrap();
});
});
bench_group.bench_function(&bench_id_verify_and_expand, |b| {
b.iter(|| {
let _ret = gpu_ct1
.verify_and_expand(&crs, &pk, &metadata, &d_ksk, &streams)
.unwrap();
});
});
}
BenchmarkType::Throughput => {
let gpu_sks_vec = cuda_local_keys(&cks);
let gpu_count = get_number_of_gpus() as usize;
// Execute the operation once to know its cost.
let input_msg = rng.gen::<u64>();
let messages = vec![input_msg; fhe_uint_count];
let ct1 = tfhe::integer::ProvenCompactCiphertextList::builder(&pk)
.extend(messages.iter().copied())
.build_with_proof_packed(&crs, &metadata, compute_load)
.unwrap();
let gpu_ct1 =
CudaProvenCompactCiphertextList::from_proven_compact_ciphertext_list(
&ct1, &streams,
);
reset_pbs_count();
let _ =
gpu_ct1.verify_and_expand(&crs, &pk, &metadata, &d_ksk, &streams);
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
let elements = throughput_num_threads(num_block, pbs_count);
bench_group.throughput(Throughput::Elements(elements));
bench_id_verify = format!(
"{bench_name}::throughput::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
);
bench_id_verify_and_expand = format!(
"{bench_name}_and_expand::throughput::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
);
bench_id_expand_without_verify = format!(
"{bench_name}_only_expand::throughput::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
);
println!("Generating proven ciphertexts list ({zk_load})... ");
let cts = (0..elements)
.map(|_| {
let input_msg = rng.gen::<u64>();
let messages = vec![input_msg; fhe_uint_count];
tfhe::integer::ProvenCompactCiphertextList::builder(&pk)
.extend(messages.iter().copied())
.build_with_proof_packed(&crs, &metadata, compute_load)
.unwrap()
})
.collect::<Vec<_>>();
let local_streams = cuda_local_streams(num_block, elements as usize);
let d_ksk_vec = gpu_sks_vec
.par_iter()
.zip(local_streams.par_iter())
.map(|(gpu_sks, local_stream)| {
CudaKeySwitchingKey::new(
(&compact_private_key, None),
(&cks, gpu_sks),
param_ksk,
local_stream,
)
})
.collect::<Vec<_>>();
assert_eq!(d_ksk_vec.len(), gpu_count);
bench_group.bench_function(&bench_id_verify, |b| {
b.iter(|| {
cts.par_iter().for_each(|ct1| {
ct1.verify(&crs, &pk, &metadata);
})
});
});
bench_group.bench_function(&bench_id_expand_without_verify, |b| {
let setup_encrypted_values = || {
let local_streams = cuda_local_streams(num_block, elements as usize);
let gpu_cts = cts.iter().enumerate().map(|(i, ct)| {
CudaProvenCompactCiphertextList::from_proven_compact_ciphertext_list(
ct, &local_streams[i],
)
}).collect_vec();
(gpu_cts, local_streams)
};
b.iter_batched(setup_encrypted_values, |(gpu_cts, local_streams)| {
gpu_cts.par_iter()
.zip(local_streams.par_iter())
.enumerate()
.for_each(|(i, (gpu_ct, local_stream))| {
gpu_ct
.expand_without_verification(&d_ksk_vec[i % gpu_count], local_stream)
.unwrap();
});
}, BatchSize::SmallInput);
});
bench_group.bench_function(&bench_id_verify_and_expand, |b| {
let setup_encrypted_values = || {
let local_streams = cuda_local_streams(num_block, elements as usize);
let gpu_cts = cts.iter().enumerate().map(|(i, ct)| {
CudaProvenCompactCiphertextList::from_proven_compact_ciphertext_list(
ct, &local_streams[i],
)
}).collect_vec();
(gpu_cts, local_streams)
};
b.iter_batched(setup_encrypted_values, |(gpu_cts, local_streams)| {
gpu_cts
.par_iter()
.zip(local_streams.par_iter())
.for_each(|(gpu_ct, local_stream)| {
gpu_ct
.verify_and_expand(
&crs, &pk, &metadata, &d_ksk, local_stream
)
.unwrap();
});
}, BatchSize::SmallInput);
});
}
}
write_to_json::<u64, _>(
&bench_id_verify_and_expand,
shortint_params,
param_name,
"pke_zk_verify_and_expand",
&OperatorType::Atomic,
shortint_params.message_modulus().0 as u32,
vec![shortint_params.message_modulus().0.ilog2(); num_block],
);
}
}
}
bench_group.finish()
}
pub fn gpu_zk_verify() {
let results_file = Path::new("gpu_pke_zk_crs_sizes.csv");
let mut criterion: Criterion<_> = (Criterion::default()).configure_from_args();
gpu_pke_zk_verify(&mut criterion, results_file);
}
}
pub fn zk_verify() {
let results_file = Path::new("pke_zk_crs_sizes.csv");
let mut criterion: Criterion<_> = (Criterion::default()).configure_from_args();
cpu_pke_zk_verify(&mut criterion, results_file);
}
#[cfg(all(feature = "gpu", feature = "zk-pok"))]
use crate::cuda::gpu_zk_verify;
fn main() {
#[cfg(all(feature = "gpu", feature = "zk-pok"))]
gpu_zk_verify();
#[cfg(not(feature = "gpu"))]
zk_verify();
Criterion::default().configure_from_args().final_summary();
}

View File

@@ -0,0 +1,697 @@
use benchmark::params::{
raw_benchmark_parameters, SHORTINT_BENCH_PARAMS_GAUSSIAN, SHORTINT_BENCH_PARAMS_TUNIFORM,
SHORTINT_MULTI_BIT_BENCH_PARAMS,
};
use benchmark::utilities::{write_to_json, OperatorType};
use criterion::{criterion_group, Criterion};
use rand::Rng;
use std::env;
use tfhe::keycache::NamedParam;
use tfhe::shortint::keycache::KEY_CACHE;
use tfhe::shortint::parameters::*;
use tfhe::shortint::{Ciphertext, CompressedServerKey, ServerKey};
fn bench_server_key_unary_function<F>(
c: &mut Criterion,
bench_name: &str,
display_name: &str,
unary_op: F,
) where
F: Fn(&ServerKey, &mut Ciphertext),
{
let mut bench_group = c.benchmark_group(bench_name);
for param in raw_benchmark_parameters().iter() {
let keys = KEY_CACHE.get_from_param(*param);
let (cks, sks) = (keys.client_key(), keys.server_key());
let mut rng = rand::thread_rng();
let modulus = cks.parameters.message_modulus().0;
let clear_text = rng.gen::<u64>() % modulus;
let mut ct = cks.encrypt(clear_text);
let bench_id = format!("{bench_name}::{}", param.name());
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
unary_op(sks, &mut ct);
})
});
write_to_json::<u64, _>(
&bench_id,
*param,
param.name(),
display_name,
&OperatorType::Atomic,
param.message_modulus().0.ilog2(),
vec![param.message_modulus().0.ilog2()],
);
}
bench_group.finish()
}
fn bench_server_key_binary_function<F>(
c: &mut Criterion,
bench_name: &str,
display_name: &str,
binary_op: F,
) where
F: Fn(&ServerKey, &mut Ciphertext, &mut Ciphertext),
{
let mut bench_group = c.benchmark_group(bench_name);
for param in raw_benchmark_parameters().iter() {
let keys = KEY_CACHE.get_from_param(*param);
let (cks, sks) = (keys.client_key(), keys.server_key());
let mut rng = rand::thread_rng();
let modulus = cks.parameters.message_modulus().0;
let clear_0 = rng.gen::<u64>() % modulus;
let clear_1 = rng.gen::<u64>() % modulus;
let mut ct_0 = cks.encrypt(clear_0);
let mut ct_1 = cks.encrypt(clear_1);
let bench_id = format!("{bench_name}::{}", param.name());
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
binary_op(sks, &mut ct_0, &mut ct_1);
})
});
write_to_json::<u64, _>(
&bench_id,
*param,
param.name(),
display_name,
&OperatorType::Atomic,
param.message_modulus().0.ilog2(),
vec![param.message_modulus().0.ilog2()],
);
}
bench_group.finish()
}
fn bench_server_key_binary_scalar_function<F>(
c: &mut Criterion,
bench_name: &str,
display_name: &str,
binary_op: F,
) where
F: Fn(&ServerKey, &mut Ciphertext, u8),
{
let mut bench_group = c.benchmark_group(bench_name);
for param in raw_benchmark_parameters().iter() {
let keys = KEY_CACHE.get_from_param(*param);
let (cks, sks) = (keys.client_key(), keys.server_key());
let mut rng = rand::thread_rng();
let modulus = cks.parameters.message_modulus().0;
let clear_0 = rng.gen::<u64>() % modulus;
let clear_1 = rng.gen::<u64>() % modulus;
let mut ct_0 = cks.encrypt(clear_0);
let bench_id = format!("{bench_name}::{}", param.name());
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
binary_op(sks, &mut ct_0, clear_1 as u8);
})
});
write_to_json::<u64, _>(
&bench_id,
*param,
param.name(),
display_name,
&OperatorType::Atomic,
param.message_modulus().0.ilog2(),
vec![param.message_modulus().0.ilog2()],
);
}
bench_group.finish()
}
fn bench_server_key_binary_scalar_division_function<F>(
c: &mut Criterion,
bench_name: &str,
display_name: &str,
binary_op: F,
) where
F: Fn(&ServerKey, &mut Ciphertext, u8),
{
let mut bench_group = c.benchmark_group(bench_name);
for param in raw_benchmark_parameters().iter() {
let keys = KEY_CACHE.get_from_param(*param);
let (cks, sks) = (keys.client_key(), keys.server_key());
let mut rng = rand::thread_rng();
let modulus = cks.parameters.message_modulus().0;
assert_ne!(modulus, 1);
let clear_0 = rng.gen::<u64>() % modulus;
let mut clear_1 = rng.gen::<u64>() % modulus;
while clear_1 == 0 {
clear_1 = rng.gen::<u64>() % modulus;
}
let mut ct_0 = cks.encrypt(clear_0);
let bench_id = format!("{bench_name}::{}", param.name());
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
binary_op(sks, &mut ct_0, clear_1 as u8);
})
});
write_to_json::<u64, _>(
&bench_id,
*param,
param.name(),
display_name,
&OperatorType::Atomic,
param.message_modulus().0.ilog2(),
vec![param.message_modulus().0.ilog2()],
);
}
bench_group.finish()
}
fn carry_extract_bench(c: &mut Criterion) {
let mut bench_group = c.benchmark_group("carry_extract");
for param in raw_benchmark_parameters().iter() {
let keys = KEY_CACHE.get_from_param(*param);
let (cks, sks) = (keys.client_key(), keys.server_key());
let mut rng = rand::thread_rng();
let modulus = cks.parameters.message_modulus().0;
let clear_0 = rng.gen::<u64>() % modulus;
let ct_0 = cks.encrypt(clear_0);
let bench_id = format!("shortint::carry_extract::{}", param.name());
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let _ = sks.carry_extract(&ct_0);
})
});
write_to_json::<u64, _>(
&bench_id,
*param,
param.name(),
"carry_extract",
&OperatorType::Atomic,
param.message_modulus().0.ilog2(),
vec![param.message_modulus().0.ilog2()],
);
}
bench_group.finish()
}
fn programmable_bootstrapping_bench(c: &mut Criterion) {
let mut bench_group = c.benchmark_group("programmable_bootstrap");
for param in raw_benchmark_parameters().iter() {
let keys = KEY_CACHE.get_from_param(*param);
let (cks, sks) = (keys.client_key(), keys.server_key());
let mut rng = rand::thread_rng();
let modulus = cks.parameters.message_modulus().0;
let acc = sks.generate_lookup_table(|x| x);
let clear_0 = rng.gen::<u64>() % modulus;
let ctxt = cks.encrypt(clear_0);
let bench_id = format!("shortint::programmable_bootstrap::{}", param.name());
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let _ = sks.apply_lookup_table(&ctxt, &acc);
})
});
write_to_json::<u64, _>(
&bench_id,
*param,
param.name(),
"pbs",
&OperatorType::Atomic,
param.message_modulus().0.ilog2(),
vec![param.message_modulus().0.ilog2()],
);
}
bench_group.finish();
}
fn server_key_from_compressed_key(c: &mut Criterion) {
let mut bench_group = c.benchmark_group("uncompress_key");
bench_group
.sample_size(10)
.measurement_time(std::time::Duration::from_secs(60));
let mut params = SHORTINT_BENCH_PARAMS_TUNIFORM
.iter()
.chain(SHORTINT_BENCH_PARAMS_GAUSSIAN.iter())
.map(|p| (*p).into())
.collect::<Vec<PBSParameters>>();
let multi_bit_params = SHORTINT_MULTI_BIT_BENCH_PARAMS
.iter()
.map(|p| (*p).into())
.collect::<Vec<PBSParameters>>();
params.extend(&multi_bit_params);
for param in params.iter() {
let keys = KEY_CACHE.get_from_param(*param);
let sks_compressed = CompressedServerKey::new(keys.client_key());
let bench_id = format!("shortint::uncompress_key::{}", param.name());
bench_group.bench_function(&bench_id, |b| {
let clone_compressed_key = || sks_compressed.clone();
b.iter_batched(
clone_compressed_key,
|sks_cloned| {
let _ = sks_cloned.decompress();
},
criterion::BatchSize::PerIteration,
)
});
write_to_json::<u64, _>(
&bench_id,
*param,
param.name(),
"uncompress_key",
&OperatorType::Atomic,
param.message_modulus().0.ilog2(),
vec![param.message_modulus().0.ilog2()],
);
}
bench_group.finish();
}
macro_rules! define_server_key_unary_bench_fn (
(method_name:$server_key_method:ident, display_name:$name:ident) => {
fn $server_key_method(c: &mut Criterion) {
bench_server_key_unary_function(
c,
concat!("shortint::", stringify!($server_key_method)),
stringify!($name),
|server_key, lhs| {
let _ = server_key.$server_key_method(lhs);},
)
}
}
);
macro_rules! define_server_key_bench_fn (
(method_name:$server_key_method:ident, display_name:$name:ident) => {
fn $server_key_method(c: &mut Criterion) {
bench_server_key_binary_function(
c,
concat!("shortint::", stringify!($server_key_method)),
stringify!($name),
|server_key, lhs, rhs| {
let _ = server_key.$server_key_method(lhs, rhs);},
)
}
}
);
macro_rules! define_server_key_scalar_bench_fn (
(method_name:$server_key_method:ident, display_name:$name:ident) => {
fn $server_key_method(c: &mut Criterion) {
bench_server_key_binary_scalar_function(
c,
concat!("shortint::", stringify!($server_key_method)),
stringify!($name),
|server_key, lhs, rhs| {
let _ = server_key.$server_key_method(lhs, rhs);},
)
}
}
);
macro_rules! define_server_key_scalar_div_bench_fn (
(method_name:$server_key_method:ident, display_name:$name:ident) => {
fn $server_key_method(c: &mut Criterion) {
bench_server_key_binary_scalar_division_function(
c,
concat!("shortint::", stringify!($server_key_method)),
stringify!($name),
|server_key, lhs, rhs| {
let _ = server_key.$server_key_method(lhs, rhs);},
)
}
}
);
macro_rules! define_custom_bench_fn (
(function_name:$function:ident) => {
fn $function(c: &mut Criterion) {
::paste::paste! {
[<$function _bench>](
c,
)
}
}
}
);
define_server_key_unary_bench_fn!(
method_name: unchecked_neg,
display_name: negation
);
define_server_key_bench_fn!(
method_name: unchecked_add,
display_name: add
);
define_server_key_bench_fn!(
method_name: unchecked_sub,
display_name: sub
);
define_server_key_bench_fn!(
method_name: unchecked_mul_lsb,
display_name: mul
);
define_server_key_bench_fn!(
method_name: unchecked_mul_msb,
display_name: mul
);
define_server_key_bench_fn!(
method_name: unchecked_div,
display_name: div
);
define_server_key_bench_fn!(
method_name: smart_bitand,
display_name: bitand
);
define_server_key_bench_fn!(
method_name: smart_bitor,
display_name: bitor
);
define_server_key_bench_fn!(
method_name: smart_bitxor,
display_name: bitxor
);
define_server_key_bench_fn!(
method_name: smart_add,
display_name: add
);
define_server_key_bench_fn!(
method_name: smart_sub,
display_name: sub
);
define_server_key_bench_fn!(
method_name: smart_mul_lsb,
display_name: mul
);
define_server_key_bench_fn!(
method_name: bitand,
display_name: bitand
);
define_server_key_bench_fn!(
method_name: bitor,
display_name: bitor
);
define_server_key_bench_fn!(
method_name: bitxor,
display_name: bitxor
);
define_server_key_bench_fn!(
method_name: add,
display_name: add
);
define_server_key_bench_fn!(
method_name: sub,
display_name: sub
);
define_server_key_bench_fn!(
method_name: mul,
display_name: mul
);
define_server_key_bench_fn!(
method_name: div,
display_name: div
);
define_server_key_bench_fn!(
method_name: greater,
display_name: greater_than
);
define_server_key_bench_fn!(
method_name: greater_or_equal,
display_name: greater_or_equal
);
define_server_key_bench_fn!(
method_name: less,
display_name: less_than
);
define_server_key_bench_fn!(
method_name: less_or_equal,
display_name: less_or_equal
);
define_server_key_bench_fn!(
method_name: equal,
display_name: equal
);
define_server_key_bench_fn!(
method_name: not_equal,
display_name: not_equal
);
define_server_key_unary_bench_fn!(
method_name: neg,
display_name: negation
);
define_server_key_bench_fn!(
method_name: unchecked_greater,
display_name: greater_than
);
define_server_key_bench_fn!(
method_name: unchecked_less,
display_name: less_than
);
define_server_key_bench_fn!(
method_name: unchecked_equal,
display_name: equal
);
define_server_key_scalar_bench_fn!(
method_name: unchecked_scalar_add,
display_name: add
);
define_server_key_scalar_bench_fn!(
method_name: unchecked_scalar_sub,
display_name: sub
);
define_server_key_scalar_bench_fn!(
method_name: unchecked_scalar_mul,
display_name: mul
);
define_server_key_scalar_bench_fn!(
method_name: unchecked_scalar_left_shift,
display_name: left_shift
);
define_server_key_scalar_bench_fn!(
method_name: unchecked_scalar_right_shift,
display_name: right_shift
);
define_server_key_scalar_div_bench_fn!(
method_name: unchecked_scalar_div,
display_name: div
);
define_server_key_scalar_div_bench_fn!(
method_name: unchecked_scalar_mod,
display_name: modulo
);
define_server_key_scalar_bench_fn!(
method_name: scalar_add,
display_name: add
);
define_server_key_scalar_bench_fn!(
method_name: scalar_sub,
display_name: sub
);
define_server_key_scalar_bench_fn!(
method_name: scalar_mul,
display_name: mul
);
define_server_key_scalar_bench_fn!(
method_name: scalar_left_shift,
display_name: left_shift
);
define_server_key_scalar_bench_fn!(
method_name: scalar_right_shift,
display_name: right_shift
);
define_server_key_scalar_div_bench_fn!(
method_name: scalar_div,
display_name: div
);
define_server_key_scalar_div_bench_fn!(
method_name: scalar_mod,
display_name: modulo
);
define_server_key_scalar_bench_fn!(
method_name: scalar_greater,
display_name: greater_than
);
define_server_key_scalar_bench_fn!(
method_name: scalar_greater_or_equal,
display_name: greater_or_equal
);
define_server_key_scalar_bench_fn!(
method_name: scalar_less,
display_name: less_than
);
define_server_key_scalar_bench_fn!(
method_name: scalar_less_or_equal,
display_name: less_or_equal
);
define_server_key_scalar_div_bench_fn!(
method_name: scalar_equal,
display_name: equal
);
define_server_key_scalar_div_bench_fn!(
method_name: scalar_not_equal,
display_name: not_equal
);
define_custom_bench_fn!(function_name: carry_extract);
define_custom_bench_fn!(
function_name: programmable_bootstrapping
);
criterion_group!(
smart_ops,
smart_bitand,
smart_bitor,
smart_bitxor,
smart_add,
smart_sub,
smart_mul_lsb
);
criterion_group!(
unchecked_ops,
unchecked_neg,
unchecked_add,
unchecked_sub,
unchecked_mul_lsb,
unchecked_mul_msb,
unchecked_div,
unchecked_greater,
unchecked_less,
unchecked_equal,
carry_extract,
programmable_bootstrapping
);
criterion_group!(
unchecked_scalar_ops,
unchecked_scalar_add,
unchecked_scalar_mul,
unchecked_scalar_sub,
unchecked_scalar_div,
unchecked_scalar_mod,
unchecked_scalar_left_shift,
unchecked_scalar_right_shift
);
criterion_group!(
default_ops,
neg,
bitand,
bitor,
bitxor,
add,
sub,
div,
mul,
greater,
greater_or_equal,
less,
less_or_equal,
equal,
not_equal
);
criterion_group!(
default_scalar_ops,
scalar_add,
scalar_sub,
scalar_div,
scalar_mul,
scalar_mod,
scalar_left_shift,
scalar_right_shift,
scalar_greater,
scalar_greater_or_equal,
scalar_less,
scalar_less_or_equal,
scalar_equal,
scalar_not_equal
);
criterion_group!(misc, server_key_from_compressed_key);
mod casting;
criterion_group!(
casting,
casting::pack_cast_64,
casting::pack_cast,
casting::cast
);
fn main() {
fn default_bench() {
casting();
default_ops();
default_scalar_ops();
misc();
}
match env::var("__TFHE_RS_BENCH_OP_FLAVOR") {
Ok(val) => {
match val.to_lowercase().as_str() {
"default" => default_bench(),
"smart" => smart_ops(),
"unchecked" => {
unchecked_ops();
unchecked_scalar_ops();
}
_ => panic!("unknown benchmark operations flavor"),
};
}
Err(_) => default_bench(),
};
Criterion::default().configure_from_args().final_summary();
}

View File

@@ -0,0 +1,136 @@
use benchmark::params_aliases::*;
use benchmark::utilities::{write_to_json, OperatorType};
use criterion::Criterion;
use rayon::prelude::*;
use tfhe::keycache::NamedParam;
use tfhe::shortint::prelude::*;
pub fn pack_cast_64(c: &mut Criterion) {
let bench_name = "shortint::pack_cast_64";
let mut bench_group = c.benchmark_group(bench_name);
let (client_key_1, server_key_1): (ClientKey, ServerKey) =
gen_keys(BENCH_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
let (client_key_2, server_key_2): (ClientKey, ServerKey) =
gen_keys(BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128);
let ks_param = BENCH_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128;
let ks_param_name = ks_param.name();
let ksk = KeySwitchingKey::new(
(&client_key_1, Some(&server_key_1)),
(&client_key_2, &server_key_2),
ks_param,
);
let vec_ct = vec![client_key_1.encrypt(1); 64];
let bench_id = format!("{bench_name}_{ks_param_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let _ = (0..32)
.into_par_iter()
.map(|i| {
let byte_idx = 7 - i / 4;
let pair_idx = i % 4;
let b0 = &vec_ct[8 * byte_idx + 2 * pair_idx];
let b1 = &vec_ct[8 * byte_idx + 2 * pair_idx + 1];
ksk.cast(
&server_key_1.unchecked_add(b0, &server_key_1.unchecked_scalar_mul(b1, 2)),
)
})
.collect::<Vec<_>>();
});
});
write_to_json::<u64, _>(
&bench_id,
ks_param,
ks_param_name,
"pack_cast_64",
&OperatorType::Atomic,
0,
vec![],
);
}
pub fn pack_cast(c: &mut Criterion) {
let bench_name = "shortint::pack_cast";
let mut bench_group = c.benchmark_group(bench_name);
let (client_key_1, server_key_1): (ClientKey, ServerKey) =
gen_keys(BENCH_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
let (client_key_2, server_key_2): (ClientKey, ServerKey) =
gen_keys(BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128);
let ks_param = BENCH_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128;
let ks_param_name = "BENCH_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128";
let ksk = KeySwitchingKey::new(
(&client_key_1, Some(&server_key_1)),
(&client_key_2, &server_key_2),
ks_param,
);
let ct_1 = client_key_1.encrypt(1);
let ct_2 = client_key_1.encrypt(1);
let bench_id = format!("{bench_name}_{ks_param_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let _ = ksk.cast(
&server_key_1.unchecked_add(&ct_1, &server_key_1.unchecked_scalar_mul(&ct_2, 2)),
);
});
});
write_to_json::<u64, _>(
&bench_id,
ks_param,
ks_param_name,
"pack_cast",
&OperatorType::Atomic,
0,
vec![],
);
}
pub fn cast(c: &mut Criterion) {
let bench_name = "shortint::cast";
let mut bench_group = c.benchmark_group(bench_name);
let (client_key_1, server_key_1): (ClientKey, ServerKey) =
gen_keys(BENCH_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
let (client_key_2, server_key_2): (ClientKey, ServerKey) =
gen_keys(BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128);
let ks_param = BENCH_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128;
let ks_param_name = "BENCH_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128";
let ksk = KeySwitchingKey::new(
(&client_key_1, Some(&server_key_1)),
(&client_key_2, &server_key_2),
ks_param,
);
let ct = client_key_1.encrypt(1);
let bench_id = format!("{bench_name}_{ks_param_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let _ = ksk.cast(&ct);
});
});
write_to_json::<u64, _>(
&bench_id,
ks_param,
ks_param_name,
"cast",
&OperatorType::Atomic,
0,
vec![],
);
}

View File

@@ -0,0 +1,82 @@
use benchmark::params_aliases::*;
use criterion::{black_box, criterion_group, Criterion};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use tfhe::shortint::prelude::*;
fn glwe_packing(c: &mut Criterion) {
let param = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let comp_param = BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let number_to_pack = 256;
let bench_name = "shortint_packing_compression";
let mut bench_group = c.benchmark_group(bench_name);
// Generate the client key and the server key:
let cks = ClientKey::new(param);
let private_compression_key = cks.new_compression_private_key(comp_param);
let (compression_key, decompression_key) =
cks.new_compression_decompression_keys(&private_compression_key);
let ct: Vec<_> = (0..number_to_pack).map(|_| cks.encrypt(0)).collect();
bench_group.bench_function("pack".to_owned(), |b| {
b.iter(|| {
let packed = compression_key.compress_ciphertexts_into_list(&ct);
_ = black_box(packed);
})
});
let packed = compression_key.compress_ciphertexts_into_list(&ct);
bench_group.bench_function("unpack_all".to_owned(), |b| {
b.iter(|| {
(0..number_to_pack).into_par_iter().for_each(|i| {
let unpacked = decompression_key.unpack(&packed, i);
_ = black_box(unpacked);
});
})
});
bench_group.bench_function("unpack_one_lwe".to_owned(), |b| {
b.iter(|| {
let unpacked = decompression_key.unpack(&packed, 0);
_ = black_box(unpacked);
})
});
bench_group.bench_function("unpack_64b".to_owned(), |b| {
b.iter(|| {
(0..32).into_par_iter().for_each(|i| {
let unpacked = decompression_key.unpack(&packed, i);
_ = black_box(unpacked);
});
})
});
bench_group.bench_function("pack_unpack".to_owned(), |b| {
b.iter(|| {
let packed = compression_key.compress_ciphertexts_into_list(&ct);
(0..number_to_pack).into_par_iter().for_each(|i| {
let unpacked = decompression_key.unpack(&packed, i);
_ = black_box(unpacked);
});
})
});
}
criterion_group!(glwe_packing2, glwe_packing);
fn main() {
glwe_packing2();
Criterion::default().configure_from_args().final_summary();
}

View File

@@ -0,0 +1,29 @@
use benchmark::params_aliases::*;
use criterion::{black_box, criterion_group, Criterion};
use tfhe::keycache::NamedParam;
use tfhe::shortint::keycache::KEY_CACHE;
use tfhe_csprng::seeders::Seed;
fn oprf(c: &mut Criterion) {
let bench_name = "shortint-oprf";
let mut bench_group = c.benchmark_group(bench_name);
let param = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS;
let keys = KEY_CACHE.get_from_param(param);
let sks = keys.server_key();
bench_group.bench_function(format!("2-bits-oprf::{}", param.name()), |b| {
b.iter(|| {
_ = black_box(sks.generate_oblivious_pseudo_random(Seed(0), 2));
})
});
}
criterion_group!(oprf2, oprf);
fn main() {
oprf2();
Criterion::default().configure_from_args().final_summary();
}

View File

@@ -0,0 +1,88 @@
use benchmark::utilities::{write_to_json, OperatorType};
use std::fs::{File, OpenOptions};
use std::io::Write;
use std::path::Path;
use tfhe::boolean::parameters::{DEFAULT_PARAMETERS, PARAMETERS_ERROR_PROB_2_POW_MINUS_165};
use tfhe::boolean::{client_key, server_key};
fn write_result(file: &mut File, name: &str, value: usize) {
let line = format!("{name},{value}\n");
let error_message = format!("cannot write {name} result into file");
file.write_all(line.as_bytes()).expect(&error_message);
}
fn client_server_key_sizes(results_file: &Path) {
let boolean_params_vec = [
(DEFAULT_PARAMETERS, "DEFAULT_PARAMETERS"),
(PARAMETERS_ERROR_PROB_2_POW_MINUS_165, "TFHE_LIB_PARAMETERS"),
];
File::create(results_file).expect("create results file failed");
let mut file = OpenOptions::new()
.append(true)
.open(results_file)
.expect("cannot open results file");
let operator = OperatorType::Atomic;
println!("Generating boolean (ClientKey, ServerKey)");
for (i, (params, params_name)) in boolean_params_vec.iter().enumerate() {
println!(
"Generating [{} / {}] : {}",
i + 1,
boolean_params_vec.len(),
params_name.to_lowercase()
);
let cks = client_key::ClientKey::new(params);
let sks = server_key::ServerKey::new(&cks);
let ksk_size = sks.key_switching_key_size_bytes();
let test_name = format!("boolean_key_sizes_{params_name}_ksk");
write_result(&mut file, &test_name, ksk_size);
write_to_json::<u32, _>(
&test_name,
*params,
*params_name,
"KSK",
&operator,
0,
vec![],
);
println!(
"Element in KSK: {}, size in bytes: {}",
sks.key_switching_key_size_elements(),
ksk_size,
);
let bsk_size = sks.bootstrapping_key_size_bytes();
let test_name = format!("boolean_key_sizes_{params_name}_bsk");
write_result(&mut file, &test_name, bsk_size);
write_to_json::<u32, _>(
&test_name,
*params,
*params_name,
"BSK",
&operator,
0,
vec![],
);
println!(
"Element in BSK: {}, size in bytes: {}",
sks.bootstrapping_key_size_elements(),
bsk_size,
);
}
}
fn main() {
let work_dir = std::env::current_dir().unwrap();
let mut new_work_dir = work_dir;
new_work_dir.push("tfhe");
std::env::set_current_dir(new_work_dir).unwrap();
let results_file = Path::new("boolean_key_sizes.csv");
client_server_key_sizes(results_file)
}

View File

@@ -0,0 +1,145 @@
use benchmark::params_aliases::*;
use benchmark::utilities::{write_to_json, OperatorType};
use rand::Rng;
use std::fs::{File, OpenOptions};
use std::io::Write;
use std::path::Path;
use tfhe::integer::U256;
use tfhe::keycache::NamedParam;
use tfhe::shortint::PBSParameters;
use tfhe::{generate_keys, CompactCiphertextList, CompactPublicKey, ConfigBuilder};
fn write_result(file: &mut File, name: &str, value: usize) {
let line = format!("{name},{value}\n");
let error_message = format!("cannot write {name} result into file");
file.write_all(line.as_bytes()).expect(&error_message);
}
pub fn cpk_and_cctl_sizes(results_file: &Path) {
const NB_CTXT: usize = 5;
let mut rng = rand::thread_rng();
File::create(results_file).expect("create results file failed");
let mut file = OpenOptions::new()
.create(true)
.truncate(true)
.write(true)
.open(results_file)
.expect("cannot open results file");
let operator = OperatorType::Atomic;
{
let params = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let config = ConfigBuilder::default()
.use_custom_parameters(params)
.use_dedicated_compact_public_key_parameters((
BENCH_PARAM_PKE_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
BENCH_PARAM_KEYSWITCH_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
))
.build();
let (client_key, _) = generate_keys(config);
let test_name = format!("hlapi_sizes_{}_cpk", params.name());
let params: PBSParameters = params.into();
println!("Sizes for: {} and 32 bits", params.name());
let public_key = CompactPublicKey::new(&client_key);
let cpk_size = bincode::serialize(&public_key).unwrap().len();
println!("PK size: {cpk_size} bytes");
write_result(&mut file, &test_name, cpk_size);
write_to_json::<u64, _>(
&test_name,
params,
params.name(),
"CPK",
&operator,
0,
vec![],
);
let test_name = format!("hlapi_sizes_{}_cctl_{NB_CTXT}_len_32_bits", params.name());
let vec_inputs: Vec<_> = (0..NB_CTXT).map(|_| rng.gen::<u32>()).collect();
let encrypted_inputs = CompactCiphertextList::builder(&public_key)
.extend(vec_inputs.iter().copied())
.build();
let cctl_size = bincode::serialize(&encrypted_inputs).unwrap().len();
println!("Compact CT list for {NB_CTXT} CTs: {cctl_size} bytes");
write_result(&mut file, &test_name, cctl_size);
write_to_json::<u64, _>(
&test_name,
params,
params.name(),
"CCTL",
&operator,
0,
vec![],
);
}
// 256 bits
{
let params = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let config = ConfigBuilder::default()
.use_custom_parameters(params)
.use_dedicated_compact_public_key_parameters((
BENCH_PARAM_PKE_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
BENCH_PARAM_KEYSWITCH_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
))
.build();
let (client_key, _) = generate_keys(config);
let params: PBSParameters = params.into();
println!("Sizes for: {} and 256 bits", params.name());
let public_key = CompactPublicKey::new(&client_key);
println!(
"PK size: {} bytes",
bincode::serialize(&public_key).unwrap().len()
);
let test_name = format!("hlapi_sizes_{}_cctl_{NB_CTXT}_len_256_bits", params.name());
let vec_inputs: Vec<_> = (0..NB_CTXT).map(|_| U256::from(rng.gen::<u32>())).collect();
let encrypted_inputs = CompactCiphertextList::builder(&public_key)
.extend(vec_inputs.iter().copied())
.build();
let cctl_size = bincode::serialize(&encrypted_inputs).unwrap().len();
println!("Compact CT list for {NB_CTXT} CTs: {cctl_size} bytes");
write_result(&mut file, &test_name, cctl_size);
write_to_json::<u64, _>(
&test_name,
params,
params.name(),
"CCTL",
&operator,
0,
vec![],
);
}
}
fn main() {
let work_dir = std::env::current_dir().unwrap();
println!("work_dir: {}", std::env::current_dir().unwrap().display());
// Change workdir so that the location of the keycache matches the one for tests
let mut new_work_dir = work_dir;
new_work_dir.push("tfhe");
std::env::set_current_dir(new_work_dir).unwrap();
let results_file = Path::new("hlapi_cpk_and_cctl_sizes.csv");
cpk_and_cctl_sizes(results_file)
}

View File

@@ -0,0 +1,291 @@
use benchmark::params_aliases::*;
use benchmark::utilities::{write_to_json, CryptoParametersRecord, OperatorType};
use std::fs::{File, OpenOptions};
use std::io::Write;
use std::path::Path;
use tfhe::keycache::NamedParam;
use tfhe::shortint::atomic_pattern::compressed::CompressedAtomicPatternServerKey;
use tfhe::shortint::keycache::KEY_CACHE;
use tfhe::shortint::server_key::{StandardServerKey, StandardServerKeyView};
use tfhe::shortint::{
ClassicPBSParameters, ClientKey, CompactPrivateKey, CompressedCompactPublicKey,
CompressedKeySwitchingKey, CompressedServerKey, PBSParameters,
};
fn write_result(file: &mut File, name: &str, value: usize) {
let line = format!("{name},{value}\n");
let error_message = format!("cannot write {name} result into file");
file.write_all(line.as_bytes()).expect(&error_message);
}
fn client_server_key_sizes(results_file: &Path) {
let shortint_params_vec: Vec<PBSParameters> = vec![
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128.into(),
BENCH_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128.into(),
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128.into(),
BENCH_PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128.into(),
BENCH_PARAM_MESSAGE_4_CARRY_4_KS_PBS_GAUSSIAN_2M128.into(),
BENCH_PARAM_MULTI_BIT_GROUP_2_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128.into(),
BENCH_PARAM_MULTI_BIT_GROUP_2_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128.into(),
BENCH_PARAM_MULTI_BIT_GROUP_2_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128.into(),
BENCH_PARAM_MULTI_BIT_GROUP_3_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128.into(),
BENCH_PARAM_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128.into(),
BENCH_PARAM_MULTI_BIT_GROUP_3_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128.into(),
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128.into(),
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128.into(),
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128.into(),
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128.into(),
];
File::create(results_file).expect("create results file failed");
let mut file = OpenOptions::new()
.append(true)
.open(results_file)
.expect("cannot open results file");
let operator = OperatorType::Atomic;
println!("Generating shortint (ClientKey, ServerKey)");
for (i, params) in shortint_params_vec.iter().copied().enumerate() {
println!(
"Generating [{} / {}] : {}",
i + 1,
shortint_params_vec.len(),
params.name().to_lowercase()
);
let keys = KEY_CACHE.get_from_param(params);
let cks = keys.client_key();
let sks = StandardServerKeyView::try_from(keys.server_key().as_view()).unwrap();
let ksk_size = sks.key_switching_key_size_bytes();
let test_name = format!("shortint_key_sizes_{}_ksk", params.name());
write_result(&mut file, &test_name, ksk_size);
write_to_json::<u64, _>(
&test_name,
params,
params.name(),
"KSK",
&operator,
0,
vec![],
);
println!(
"Element in KSK: {}, size in bytes: {}",
sks.key_switching_key_size_elements(),
ksk_size,
);
let bsk_size = sks.bootstrapping_key_size_bytes();
let test_name = format!("shortint_key_sizes_{}_bsk", params.name());
write_result(&mut file, &test_name, bsk_size);
write_to_json::<u64, _>(
&test_name,
params,
params.name(),
"BSK",
&operator,
0,
vec![],
);
println!(
"Element in BSK: {}, size in bytes: {}",
sks.bootstrapping_key_size_elements(),
bsk_size,
);
let sks_compressed = CompressedServerKey::new(cks);
let bsk_compressed_size = sks_compressed.bootstrapping_key_size_bytes();
let test_name = format!("shortint_key_sizes_{}_bsk_compressed", params.name());
write_result(&mut file, &test_name, bsk_compressed_size);
write_to_json::<u64, _>(
&test_name,
params,
params.name(),
"BSK",
&operator,
0,
vec![],
);
println!(
"Element in BSK compressed: {}, size in bytes: {}",
sks_compressed.bootstrapping_key_size_elements(),
bsk_compressed_size,
);
// Clear keys as we go to avoid filling the RAM
KEY_CACHE.clear_in_memory_cache()
}
}
fn measure_serialized_size<T: serde::Serialize, P: Into<CryptoParametersRecord<u64>> + Clone>(
to_serialize: &T,
param: P,
param_name: &str,
test_name_suffix: &str,
display_name: &str,
file: &mut File,
) {
let serialized = bincode::serialize(to_serialize).unwrap();
let size = serialized.len();
let test_name = format!("shortint_key_sizes_{param_name}_{test_name_suffix}");
write_result(file, &test_name, size);
write_to_json::<u64, _>(
&test_name,
param.clone(),
param_name,
display_name,
&OperatorType::Atomic,
0,
vec![],
);
println!("{test_name_suffix} {param_name} -> size: {size} bytes",);
}
fn tuniform_key_set_sizes(results_file: &Path) {
File::create(results_file).expect("create results file failed");
let mut file = OpenOptions::new()
.append(true)
.open(results_file)
.expect("cannot open results file");
println!("Measuring shortint key sizes:");
let param_fhe = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let param_fhe_name = param_fhe.name();
let cks = ClientKey::new(param_fhe);
let compressed_sks = CompressedServerKey::new(&cks);
let sks = StandardServerKey::try_from(compressed_sks.decompress()).unwrap();
let std_compressed_ap_key = match &compressed_sks.compressed_ap_server_key {
CompressedAtomicPatternServerKey::Standard(
compressed_standard_atomic_pattern_server_key,
) => compressed_standard_atomic_pattern_server_key,
CompressedAtomicPatternServerKey::KeySwitch32(_) => {
panic!("KS32 is unsupported to measure key sizes at the moment")
}
};
measure_serialized_size(
&sks.atomic_pattern.key_switching_key,
<ClassicPBSParameters as Into<PBSParameters>>::into(param_fhe),
&param_fhe_name,
"ksk",
"KSK",
&mut file,
);
measure_serialized_size(
std_compressed_ap_key.key_switching_key(),
<ClassicPBSParameters as Into<PBSParameters>>::into(param_fhe),
&param_fhe_name,
"ksk_compressed",
"KSK",
&mut file,
);
measure_serialized_size(
&sks.atomic_pattern.bootstrapping_key,
<ClassicPBSParameters as Into<PBSParameters>>::into(param_fhe),
&param_fhe_name,
"bsk",
"BSK",
&mut file,
);
measure_serialized_size(
&std_compressed_ap_key.bootstrapping_key(),
<ClassicPBSParameters as Into<PBSParameters>>::into(param_fhe),
&param_fhe_name,
"bsk_compressed",
"BSK",
&mut file,
);
let param_pke = BENCH_PARAM_PKE_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let param_pke_name = param_pke.name();
let compact_private_key = CompactPrivateKey::new(param_pke);
let compressed_pk = CompressedCompactPublicKey::new(&compact_private_key);
let pk = compressed_pk.decompress();
measure_serialized_size(&pk, param_pke, &param_pke_name, "cpk", "CPK", &mut file);
measure_serialized_size(
&compressed_pk,
param_pke,
&param_pke_name,
"cpk_compressed",
"CPK",
&mut file,
);
let param_compression = BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let param_compression_name = param_compression.name();
let params_tuple = (
param_compression,
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
);
let private_compression_key = cks.new_compression_private_key(param_compression);
let (compression_key, decompression_key) =
cks.new_compression_decompression_keys(&private_compression_key);
measure_serialized_size(
&compression_key,
params_tuple,
&param_compression_name,
"compression_key",
"CompressionKey",
&mut file,
);
measure_serialized_size(
&decompression_key,
params_tuple,
&param_compression_name,
"decompression_key",
"CompressionKey",
&mut file,
);
let param_casting = BENCH_PARAM_KEYSWITCH_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let param_casting_name = param_casting.name();
let compressed_casting_key = CompressedKeySwitchingKey::new(
(&compact_private_key, None),
(&cks, &compressed_sks),
param_casting,
);
let casting_key = compressed_casting_key.decompress();
measure_serialized_size(
&casting_key.into_raw_parts().0,
param_casting,
&param_casting_name,
"casting_key",
"CastKey",
&mut file,
);
measure_serialized_size(
&compressed_casting_key.into_raw_parts().0,
param_casting,
&param_casting_name,
"casting_key_compressed",
"CastKey",
&mut file,
);
}
fn main() {
let work_dir = std::env::current_dir().unwrap();
println!("work_dir: {}", std::env::current_dir().unwrap().display());
// Change workdir so that the location of the keycache matches the one for tests
let mut new_work_dir = work_dir;
new_work_dir.push("tfhe");
std::env::set_current_dir(new_work_dir).unwrap();
let results_file = Path::new("shortint_key_sizes.csv");
client_server_key_sizes(results_file);
tuniform_key_set_sizes(results_file);
}

View File

@@ -0,0 +1,87 @@
use benchmark::utilities::{write_to_json, OperatorType};
use clap::Parser;
use std::collections::HashMap;
use std::fs;
use std::fs::{File, OpenOptions};
use std::io::Write;
use std::path::Path;
use tfhe::keycache::NamedParam;
use tfhe::shortint::keycache::get_shortint_parameter_set_from_name;
use tfhe::shortint::{ClassicPBSParameters, PBSParameters};
const BENCHMARK_NAME_PREFIX: &str = "wasm::";
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
raw_results_file: String,
}
fn params_from_name(name: &str) -> ClassicPBSParameters {
match get_shortint_parameter_set_from_name(name.to_uppercase().as_str())
.pbs_parameters()
.unwrap()
{
PBSParameters::PBS(p) => p,
PBSParameters::MultiBitPBS(_) => {
panic!("Tried to get a MultiBitPBS, expected ClassicPBSParameters")
}
}
}
fn write_result(file: &mut File, name: &str, value: usize) {
let line = format!("{name},{value}\n");
let error_message = format!("cannot write {name} result into file");
file.write_all(line.as_bytes()).expect(&error_message);
}
pub fn parse_wasm_benchmarks(results_file: &Path, raw_results_file: &Path) {
File::create(results_file).expect("create results file failed");
let mut file = OpenOptions::new()
.append(true)
.open(results_file)
.expect("cannot open parsed results file");
let operator = OperatorType::Atomic;
let raw_results = fs::read_to_string(raw_results_file).expect("cannot open raw results file");
let results_as_json: HashMap<String, f32> = serde_json::from_str(&raw_results).unwrap();
for (full_name, val) in results_as_json.iter() {
let prefixed_full_name = format!("{BENCHMARK_NAME_PREFIX}{full_name}");
let name_parts = full_name.split("_mean_").collect::<Vec<_>>();
let bench_name = name_parts[0];
let params: PBSParameters = params_from_name(name_parts[1]).into();
println!("{name_parts:?}");
if bench_name.contains("_size") {
write_result(&mut file, &prefixed_full_name, *val as usize);
} else {
let value_in_ns = (val * 1_000_000_f32) as usize;
write_result(&mut file, &prefixed_full_name, value_in_ns);
}
write_to_json::<u64, _>(
&prefixed_full_name,
params,
params.name(),
bench_name,
&operator,
0,
vec![],
);
}
}
fn main() {
let args = Args::parse();
let work_dir = std::env::current_dir().unwrap();
let mut new_work_dir = work_dir;
new_work_dir.push("tfhe");
std::env::set_current_dir(new_work_dir).unwrap();
let results_file = Path::new("wasm_pk_gen.csv");
let raw_results = Path::new(&args.raw_results_file);
parse_wasm_benchmarks(results_file, raw_results);
}

View File

@@ -0,0 +1,3 @@
pub mod params;
pub mod params_aliases;
pub mod utilities;

View File

@@ -0,0 +1,449 @@
#[cfg(feature = "boolean")]
pub mod boolean_params {
use crate::utilities::CryptoParametersRecord;
use tfhe::boolean::parameters::{
DEFAULT_PARAMETERS, DEFAULT_PARAMETERS_KS_PBS, PARAMETERS_ERROR_PROB_2_POW_MINUS_165,
};
pub fn benchmark_32bits_parameters() -> Vec<(String, CryptoParametersRecord<u32>)> {
[
("BOOLEAN_DEFAULT_PARAMS", DEFAULT_PARAMETERS),
(
"BOOLEAN_TFHE_LIB_PARAMS",
PARAMETERS_ERROR_PROB_2_POW_MINUS_165,
),
("BOOLEAN_DEFAULT_PARAMS_KS_PBS", DEFAULT_PARAMETERS_KS_PBS),
]
.iter()
.map(|(name, params)| (name.to_string(), params.to_owned().into()))
.collect()
}
}
#[cfg(feature = "boolean")]
pub use boolean_params::*;
#[cfg(feature = "shortint")]
pub mod shortint_params {
use crate::params_aliases::*;
use crate::utilities::CryptoParametersRecord;
use std::collections::HashMap;
use std::env;
use std::sync::OnceLock;
use tfhe::core_crypto::prelude::{DynamicDistribution, LweBskGroupingFactor};
use tfhe::keycache::NamedParam;
use tfhe::shortint::{
CarryModulus, ClassicPBSParameters, MessageModulus, MultiBitPBSParameters, PBSParameters,
};
pub const SHORTINT_BENCH_PARAMS_TUNIFORM: [ClassicPBSParameters; 4] = [
BENCH_PARAM_MESSAGE_1_CARRY_1_KS_PBS_TUNIFORM_2M128,
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
BENCH_PARAM_MESSAGE_3_CARRY_3_KS_PBS_TUNIFORM_2M128,
BENCH_PARAM_MESSAGE_4_CARRY_4_KS_PBS_TUNIFORM_2M128,
];
pub const SHORTINT_BENCH_PARAMS_GAUSSIAN: [ClassicPBSParameters; 4] = [
BENCH_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
BENCH_PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128,
BENCH_PARAM_MESSAGE_4_CARRY_4_KS_PBS_GAUSSIAN_2M128,
];
#[cfg(feature = "gpu")]
pub const SHORTINT_MULTI_BIT_BENCH_PARAMS: [MultiBitPBSParameters; 6] = [
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_1_CARRY_1_KS_PBS_TUNIFORM_2M128,
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_3_CARRY_3_KS_PBS_TUNIFORM_2M128,
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128,
];
#[cfg(not(feature = "gpu"))]
pub const SHORTINT_MULTI_BIT_BENCH_PARAMS: [MultiBitPBSParameters; 6] = [
BENCH_PARAM_MULTI_BIT_GROUP_2_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
BENCH_PARAM_MULTI_BIT_GROUP_2_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
BENCH_PARAM_MULTI_BIT_GROUP_2_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128,
BENCH_PARAM_MULTI_BIT_GROUP_3_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
BENCH_PARAM_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
BENCH_PARAM_MULTI_BIT_GROUP_3_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128,
];
pub fn benchmark_parameters() -> Vec<(String, CryptoParametersRecord<u64>)> {
match get_parameters_set() {
ParametersSet::Default => SHORTINT_BENCH_PARAMS_TUNIFORM
.iter()
.chain(SHORTINT_BENCH_PARAMS_GAUSSIAN.iter())
.map(|params| {
(
params.name(),
<ClassicPBSParameters as Into<PBSParameters>>::into(*params)
.to_owned()
.into(),
)
})
.collect(),
ParametersSet::All => {
filter_parameters(
&BENCH_ALL_CLASSIC_PBS_PARAMETERS,
DesiredNoiseDistribution::Both,
DesiredBackend::Cpu, // No parameters set are specific to GPU in this vector
)
.into_iter()
.map(|(params, name)| {
(
name.to_string(),
<ClassicPBSParameters as Into<PBSParameters>>::into(*params)
.to_owned()
.into(),
)
})
.collect()
}
}
}
pub fn multi_bit_benchmark_parameters() -> Vec<(String, CryptoParametersRecord<u64>)> {
match get_parameters_set() {
ParametersSet::Default => SHORTINT_MULTI_BIT_BENCH_PARAMS
.iter()
.map(|params| {
(
params.name(),
<MultiBitPBSParameters as Into<PBSParameters>>::into(*params)
.to_owned()
.into(),
)
})
.collect(),
ParametersSet::All => {
let desired_backend = if cfg!(feature = "gpu") {
DesiredBackend::Gpu
} else {
DesiredBackend::Cpu
};
filter_parameters(
&BENCH_ALL_MULTI_BIT_PBS_PARAMETERS,
DesiredNoiseDistribution::Both,
desired_backend,
)
.into_iter()
.map(|(params, name)| {
(
name.to_string(),
<MultiBitPBSParameters as Into<PBSParameters>>::into(*params)
.to_owned()
.into(),
)
})
.collect()
}
}
}
pub fn multi_bit_benchmark_parameters_with_grouping(
) -> Vec<(String, CryptoParametersRecord<u64>, LweBskGroupingFactor)> {
match get_parameters_set() {
ParametersSet::Default => SHORTINT_MULTI_BIT_BENCH_PARAMS
.iter()
.map(|params| {
(
params.name(),
<MultiBitPBSParameters as Into<PBSParameters>>::into(*params)
.to_owned()
.into(),
params.grouping_factor,
)
})
.collect(),
ParametersSet::All => {
let desired_backend = if cfg!(feature = "gpu") {
DesiredBackend::Gpu
} else {
DesiredBackend::Cpu
};
filter_parameters(
&BENCH_ALL_MULTI_BIT_PBS_PARAMETERS,
DesiredNoiseDistribution::Both,
desired_backend,
)
.into_iter()
.map(|(params, name)| {
(
name.to_string(),
<MultiBitPBSParameters as Into<PBSParameters>>::into(*params)
.to_owned()
.into(),
params.grouping_factor,
)
})
.collect()
}
}
}
pub fn raw_benchmark_parameters() -> Vec<PBSParameters> {
let is_multi_bit = match env::var("__TFHE_RS_PARAM_TYPE") {
Ok(val) => val.to_lowercase() == "multi_bit",
Err(_) => false,
};
if is_multi_bit {
SHORTINT_MULTI_BIT_BENCH_PARAMS
.iter()
.map(|p| (*p).into())
.collect()
} else {
SHORTINT_BENCH_PARAMS_TUNIFORM
.iter()
.chain(SHORTINT_BENCH_PARAMS_GAUSSIAN.iter())
.map(|p| (*p).into())
.collect()
}
}
pub fn benchmark_compression_parameters() -> Vec<(String, CryptoParametersRecord<u64>)> {
vec![(
BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128.name(),
(
BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
)
.into(),
)]
}
// This array has been built according to performance benchmarks measuring latency over a
// matrix of 4 parameters set, 3 grouping factor and a wide range of threads values.
// The values available here as u64 are the optimal number of threads to use for a given triplet
// representing one or more parameters set.
const MULTI_BIT_THREADS_ARRAY: [((MessageModulus, CarryModulus, LweBskGroupingFactor), u64);
12] = [
(
(MessageModulus(2), CarryModulus(2), LweBskGroupingFactor(2)),
5,
),
(
(MessageModulus(4), CarryModulus(4), LweBskGroupingFactor(2)),
5,
),
(
(MessageModulus(8), CarryModulus(8), LweBskGroupingFactor(2)),
5,
),
(
(
MessageModulus(16),
CarryModulus(16),
LweBskGroupingFactor(2),
),
5,
),
(
(MessageModulus(2), CarryModulus(2), LweBskGroupingFactor(3)),
7,
),
(
(MessageModulus(4), CarryModulus(4), LweBskGroupingFactor(3)),
9,
),
(
(MessageModulus(8), CarryModulus(8), LweBskGroupingFactor(3)),
10,
),
(
(
MessageModulus(16),
CarryModulus(16),
LweBskGroupingFactor(3),
),
10,
),
(
(MessageModulus(2), CarryModulus(2), LweBskGroupingFactor(4)),
11,
),
(
(MessageModulus(4), CarryModulus(4), LweBskGroupingFactor(4)),
13,
),
(
(MessageModulus(8), CarryModulus(8), LweBskGroupingFactor(4)),
11,
),
(
(
MessageModulus(16),
CarryModulus(16),
LweBskGroupingFactor(4),
),
11,
),
];
/// Define the number of threads to use for parameters doing multithreaded programmable
/// bootstrapping.
///
/// Parameters must have the same values between message and carry modulus.
/// Grouping factor 2, 3 and 4 are the only ones that are supported.
pub fn multi_bit_num_threads(
message_modulus: u64,
carry_modulus: u64,
grouping_factor: usize,
) -> Option<u64> {
// TODO Implement an interpolation mechanism for X_Y parameters set
if message_modulus != carry_modulus || ![2, 3, 4].contains(&(grouping_factor as i32)) {
return None;
}
let thread_map: HashMap<(MessageModulus, CarryModulus, LweBskGroupingFactor), u64> =
HashMap::from_iter(MULTI_BIT_THREADS_ARRAY);
thread_map
.get(&(
MessageModulus(message_modulus),
CarryModulus(carry_modulus),
LweBskGroupingFactor(grouping_factor),
))
.copied()
}
pub static PARAMETERS_SET: OnceLock<ParametersSet> = OnceLock::new();
pub enum ParametersSet {
Default,
All,
}
impl ParametersSet {
pub fn from_env() -> Result<Self, String> {
let raw_value = env::var("__TFHE_RS_PARAMS_SET").unwrap_or("default".to_string());
match raw_value.to_lowercase().as_str() {
"default" => Ok(ParametersSet::Default),
"all" => Ok(ParametersSet::All),
_ => Err(format!("parameters set '{raw_value}' is not supported")),
}
}
}
pub fn get_parameters_set() -> &'static ParametersSet {
PARAMETERS_SET.get_or_init(|| ParametersSet::from_env().unwrap())
}
#[derive(Clone, Copy, Debug)]
pub enum DesiredNoiseDistribution {
Gaussian,
TUniform,
Both,
}
#[derive(Clone, Copy, Debug)]
pub enum DesiredBackend {
Cpu,
Gpu,
}
impl DesiredBackend {
fn matches_parameter_name_backend(&self, param_name: &str) -> bool {
matches!(
(self, param_name.to_lowercase().contains("gpu")),
(DesiredBackend::Cpu, false) | (DesiredBackend::Gpu, true)
)
}
}
pub fn filter_parameters<'a, P: Copy + Into<PBSParameters>>(
params: &[(&'a P, &'a str)],
desired_noise_distribution: DesiredNoiseDistribution,
desired_backend: DesiredBackend,
) -> Vec<(&'a P, &'a str)> {
params
.iter()
.filter_map(|(p, name)| {
let temp_param: PBSParameters = (**p).into();
match (
temp_param.lwe_noise_distribution(),
desired_noise_distribution,
) {
// If it's one of the pairs, we continue the process.
(DynamicDistribution::Gaussian(_), DesiredNoiseDistribution::Gaussian)
| (DynamicDistribution::TUniform(_), DesiredNoiseDistribution::TUniform)
| (_, DesiredNoiseDistribution::Both) => (),
_ => return None,
}
if !desired_backend.matches_parameter_name_backend(name) {
return None;
};
Some((*p, *name))
})
.collect()
}
}
#[cfg(feature = "shortint")]
pub use shortint_params::*;
#[cfg(feature = "integer")]
mod integer_params {
use crate::params_aliases::*;
use crate::utilities::EnvConfig;
use itertools::iproduct;
use std::vec::IntoIter;
use tfhe::shortint::PBSParameters;
/// An iterator that yields a succession of combinations
/// of parameters and a num_block to achieve a certain bit_size ciphertext
/// in radix decomposition
pub struct ParamsAndNumBlocksIter {
params_and_bit_sizes: itertools::Product<IntoIter<PBSParameters>, IntoIter<usize>>,
}
impl Default for ParamsAndNumBlocksIter {
fn default() -> Self {
let env_config = EnvConfig::new();
if env_config.is_multi_bit {
#[cfg(feature = "gpu")]
let params = vec![
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128
.into(),
];
#[cfg(not(feature = "gpu"))]
let params = vec![
BENCH_PARAM_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128.into(),
];
let params_and_bit_sizes = iproduct!(params, env_config.bit_sizes());
Self {
params_and_bit_sizes,
}
} else {
// FIXME One set of parameter is tested since we want to benchmark only quickest
// operations.
let params = vec![BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128.into()];
let params_and_bit_sizes = iproduct!(params, env_config.bit_sizes());
Self {
params_and_bit_sizes,
}
}
}
}
impl Iterator for ParamsAndNumBlocksIter {
type Item = (PBSParameters, usize, usize);
fn next(&mut self) -> Option<Self::Item> {
let (param, bit_size) = self.params_and_bit_sizes.next()?;
let num_block =
(bit_size as f64 / (param.message_modulus().0 as f64).log(2.0)).ceil() as usize;
Some((param, num_block, bit_size))
}
}
}
#[cfg(feature = "integer")]
pub use integer_params::*;

View File

@@ -0,0 +1,142 @@
#[cfg(any(feature = "shortint", feature = "integer"))]
pub mod shortint_params_aliases {
use tfhe::shortint::parameters::current_params::*;
use tfhe::shortint::parameters::{
ClassicPBSParameters, CompactPublicKeyEncryptionParameters, CompressionParameters,
MultiBitPBSParameters, NoiseSquashingParameters, ShortintKeySwitchingParameters,
};
// KS PBS Gaussian
pub const BENCH_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128: ClassicPBSParameters =
V1_2_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128;
pub const BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128: ClassicPBSParameters =
V1_2_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128;
pub const BENCH_PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128: ClassicPBSParameters =
V1_2_PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128;
pub const BENCH_PARAM_MESSAGE_4_CARRY_4_KS_PBS_GAUSSIAN_2M128: ClassicPBSParameters =
V1_2_PARAM_MESSAGE_4_CARRY_4_KS_PBS_GAUSSIAN_2M128;
// KS PBS TUniform
pub const BENCH_PARAM_MESSAGE_1_CARRY_1_KS_PBS_TUNIFORM_2M128: ClassicPBSParameters =
V1_2_PARAM_MESSAGE_1_CARRY_1_KS_PBS_TUNIFORM_2M128;
pub const BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128: ClassicPBSParameters =
V1_2_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
pub const BENCH_PARAM_MESSAGE_3_CARRY_3_KS_PBS_TUNIFORM_2M128: ClassicPBSParameters =
V1_2_PARAM_MESSAGE_3_CARRY_3_KS_PBS_TUNIFORM_2M128;
pub const BENCH_PARAM_MESSAGE_4_CARRY_4_KS_PBS_TUNIFORM_2M128: ClassicPBSParameters =
V1_2_PARAM_MESSAGE_4_CARRY_4_KS_PBS_TUNIFORM_2M128;
pub const BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS: ClassicPBSParameters =
V1_2_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
pub const BENCH_ALL_CLASSIC_PBS_PARAMETERS: [(&ClassicPBSParameters, &str); 140] =
VEC_ALL_CLASSIC_PBS_PARAMETERS;
// MultiBit
// CPU Gaussian
pub const BENCH_PARAM_MULTI_BIT_GROUP_2_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128:
MultiBitPBSParameters =
V1_2_PARAM_MULTI_BIT_GROUP_2_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128;
pub const BENCH_PARAM_MULTI_BIT_GROUP_2_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128:
MultiBitPBSParameters =
V1_2_PARAM_MULTI_BIT_GROUP_2_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128;
pub const BENCH_PARAM_MULTI_BIT_GROUP_2_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128:
MultiBitPBSParameters =
V1_2_PARAM_MULTI_BIT_GROUP_2_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128;
pub const BENCH_PARAM_MULTI_BIT_GROUP_3_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128:
MultiBitPBSParameters =
V1_2_PARAM_MULTI_BIT_GROUP_3_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128;
pub const BENCH_PARAM_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128:
MultiBitPBSParameters =
V1_2_PARAM_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128;
pub const BENCH_PARAM_MULTI_BIT_GROUP_3_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128:
MultiBitPBSParameters =
V1_2_PARAM_MULTI_BIT_GROUP_3_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128;
// GPU Gaussian
pub const BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128:
MultiBitPBSParameters =
V1_2_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128;
pub const BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128:
MultiBitPBSParameters =
V1_2_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128;
pub const BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128:
MultiBitPBSParameters =
V1_2_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128;
// GPU TUniform
pub const BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_1_CARRY_1_KS_PBS_TUNIFORM_2M128:
MultiBitPBSParameters =
V1_2_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_1_CARRY_1_KS_PBS_TUNIFORM_2M128;
pub const BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128:
MultiBitPBSParameters =
V1_2_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
pub const BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_3_CARRY_3_KS_PBS_TUNIFORM_2M128:
MultiBitPBSParameters =
V1_2_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_3_CARRY_3_KS_PBS_TUNIFORM_2M128;
pub const BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_4_CARRY_4_KS_PBS_TUNIFORM_2M128:
MultiBitPBSParameters =
V1_2_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_4_CARRY_4_KS_PBS_TUNIFORM_2M128;
pub const BENCH_ALL_MULTI_BIT_PBS_PARAMETERS: [(&MultiBitPBSParameters, &str); 240] =
VEC_ALL_MULTI_BIT_PBS_PARAMETERS;
// PKE
pub const BENCH_PARAM_PKE_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128:
CompactPublicKeyEncryptionParameters =
V1_2_PARAM_PKE_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
pub const BENCH_PARAM_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1:
CompactPublicKeyEncryptionParameters =
V1_2_PARAM_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1;
// KS
pub const BENCH_PARAM_KEYSWITCH_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128:
ShortintKeySwitchingParameters =
V1_2_PARAM_KEYSWITCH_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
pub const BENCH_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128:
ShortintKeySwitchingParameters =
V1_2_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128;
pub const BENCH_PARAM_KEYSWITCH_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128:
ShortintKeySwitchingParameters =
V1_2_PARAM_KEYSWITCH_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
pub const BENCH_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128:
ShortintKeySwitchingParameters =
V1_2_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
// ZKV1
pub const BENCH_PARAM_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1:
CompactPublicKeyEncryptionParameters =
V1_2_PARAM_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1;
pub const BENCH_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1:
ShortintKeySwitchingParameters =
V1_2_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1;
pub const BENCH_PARAM_KEYSWITCH_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1:
ShortintKeySwitchingParameters =
V1_2_PARAM_KEYSWITCH_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1;
// ZKV2
pub const BENCH_PARAM_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV2:
CompactPublicKeyEncryptionParameters =
V1_2_PARAM_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV2;
pub const BENCH_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV2:
ShortintKeySwitchingParameters =
V1_2_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV2;
pub const BENCH_PARAM_KEYSWITCH_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV2:
ShortintKeySwitchingParameters =
V1_2_PARAM_KEYSWITCH_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV2;
// Compression
pub const BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128: CompressionParameters =
V1_2_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
pub const BENCH_COMP_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128:
CompressionParameters =
V1_2_COMP_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
// Noise Squashing
pub const BENCH_NOISE_SQUASHING_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128:
NoiseSquashingParameters =
V1_2_NOISE_SQUASHING_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
}
#[cfg(any(feature = "shortint", feature = "integer"))]
pub use shortint_params_aliases::*;

View File

@@ -0,0 +1,650 @@
use serde::Serialize;
use std::path::PathBuf;
use std::sync::OnceLock;
use std::{env, fs};
#[cfg(feature = "gpu")]
use tfhe::core_crypto::gpu::get_number_of_gpus;
use tfhe::core_crypto::prelude::*;
#[cfg(feature = "boolean")]
pub mod boolean_utils {
use super::*;
use tfhe::boolean::parameters::BooleanParameters;
impl From<BooleanParameters> for CryptoParametersRecord<u32> {
fn from(params: BooleanParameters) -> Self {
CryptoParametersRecord {
lwe_dimension: Some(params.lwe_dimension),
glwe_dimension: Some(params.glwe_dimension),
polynomial_size: Some(params.polynomial_size),
lwe_noise_distribution: Some(params.lwe_noise_distribution),
glwe_noise_distribution: Some(params.glwe_noise_distribution),
pbs_base_log: Some(params.pbs_base_log),
pbs_level: Some(params.pbs_level),
ks_base_log: Some(params.ks_base_log),
ks_level: Some(params.ks_level),
ciphertext_modulus: Some(CiphertextModulus::<u32>::new_native()),
..Default::default()
}
}
}
}
#[allow(unused_imports)]
#[cfg(feature = "boolean")]
pub use boolean_utils::*;
#[cfg(feature = "shortint")]
pub mod shortint_utils {
use super::*;
use tfhe::shortint::parameters::compact_public_key_only::CompactPublicKeyEncryptionParameters;
use tfhe::shortint::parameters::list_compression::CompressionParameters;
use tfhe::shortint::parameters::ShortintKeySwitchingParameters;
use tfhe::shortint::{
AtomicPatternParameters, ClassicPBSParameters, MultiBitPBSParameters, PBSParameters,
ShortintParameterSet,
};
impl From<PBSParameters> for CryptoParametersRecord<u64> {
fn from(params: PBSParameters) -> Self {
AtomicPatternParameters::from(params).into()
}
}
impl From<AtomicPatternParameters> for CryptoParametersRecord<u64> {
fn from(params: AtomicPatternParameters) -> Self {
CryptoParametersRecord {
lwe_dimension: Some(params.lwe_dimension()),
glwe_dimension: Some(params.glwe_dimension()),
polynomial_size: Some(params.polynomial_size()),
lwe_noise_distribution: Some(params.lwe_noise_distribution()),
glwe_noise_distribution: Some(params.glwe_noise_distribution()),
pbs_base_log: Some(params.pbs_base_log()),
pbs_level: Some(params.pbs_level()),
ks_base_log: Some(params.ks_base_log()),
ks_level: Some(params.ks_level()),
message_modulus: Some(params.message_modulus().0),
carry_modulus: Some(params.carry_modulus().0),
ciphertext_modulus: Some(
params
.ciphertext_modulus()
.try_to()
.expect("failed to convert ciphertext modulus"),
),
..Default::default()
}
}
}
impl From<ShortintKeySwitchingParameters> for CryptoParametersRecord<u64> {
fn from(params: ShortintKeySwitchingParameters) -> Self {
CryptoParametersRecord {
ks_base_log: Some(params.ks_base_log),
ks_level: Some(params.ks_level),
..Default::default()
}
}
}
impl From<CompactPublicKeyEncryptionParameters> for CryptoParametersRecord<u64> {
fn from(params: CompactPublicKeyEncryptionParameters) -> Self {
CryptoParametersRecord {
message_modulus: Some(params.message_modulus.0),
carry_modulus: Some(params.carry_modulus.0),
ciphertext_modulus: Some(params.ciphertext_modulus),
..Default::default()
}
}
}
impl From<(CompressionParameters, ClassicPBSParameters)> for CryptoParametersRecord<u64> {
fn from((comp_params, pbs_params): (CompressionParameters, ClassicPBSParameters)) -> Self {
(comp_params, PBSParameters::PBS(pbs_params)).into()
}
}
impl From<(CompressionParameters, MultiBitPBSParameters)> for CryptoParametersRecord<u64> {
fn from(
(comp_params, multi_bit_pbs_params): (CompressionParameters, MultiBitPBSParameters),
) -> Self {
(
comp_params,
PBSParameters::MultiBitPBS(multi_bit_pbs_params),
)
.into()
}
}
impl From<(CompressionParameters, PBSParameters)> for CryptoParametersRecord<u64> {
fn from((comp_params, pbs_params): (CompressionParameters, PBSParameters)) -> Self {
let pbs_params = ShortintParameterSet::new_pbs_param_set(pbs_params);
let lwe_dimension = pbs_params.encryption_lwe_dimension();
CryptoParametersRecord {
lwe_dimension: Some(lwe_dimension),
br_level: Some(comp_params.br_level),
br_base_log: Some(comp_params.br_base_log),
packing_ks_level: Some(comp_params.packing_ks_level),
packing_ks_base_log: Some(comp_params.packing_ks_base_log),
packing_ks_polynomial_size: Some(comp_params.packing_ks_polynomial_size),
packing_ks_glwe_dimension: Some(comp_params.packing_ks_glwe_dimension),
lwe_per_glwe: Some(comp_params.lwe_per_glwe),
storage_log_modulus: Some(comp_params.storage_log_modulus),
lwe_noise_distribution: Some(pbs_params.encryption_noise_distribution()),
packing_ks_key_noise_distribution: Some(
comp_params.packing_ks_key_noise_distribution,
),
ciphertext_modulus: Some(pbs_params.ciphertext_modulus()),
..Default::default()
}
}
}
}
#[allow(unused_imports)]
#[cfg(feature = "shortint")]
pub use shortint_utils::*;
#[derive(Clone, Copy, Default, Serialize)]
pub struct CryptoParametersRecord<Scalar: UnsignedInteger> {
pub lwe_dimension: Option<LweDimension>,
pub glwe_dimension: Option<GlweDimension>,
pub packing_ks_glwe_dimension: Option<GlweDimension>,
pub polynomial_size: Option<PolynomialSize>,
pub packing_ks_polynomial_size: Option<PolynomialSize>,
#[serde(serialize_with = "CryptoParametersRecord::serialize_distribution")]
pub lwe_noise_distribution: Option<DynamicDistribution<Scalar>>,
#[serde(serialize_with = "CryptoParametersRecord::serialize_distribution")]
pub glwe_noise_distribution: Option<DynamicDistribution<Scalar>>,
#[serde(serialize_with = "CryptoParametersRecord::serialize_distribution")]
pub packing_ks_key_noise_distribution: Option<DynamicDistribution<Scalar>>,
pub pbs_base_log: Option<DecompositionBaseLog>,
pub pbs_level: Option<DecompositionLevelCount>,
pub ks_base_log: Option<DecompositionBaseLog>,
pub ks_level: Option<DecompositionLevelCount>,
pub pfks_level: Option<DecompositionLevelCount>,
pub pfks_base_log: Option<DecompositionBaseLog>,
pub pfks_std_dev: Option<StandardDev>,
pub cbs_level: Option<DecompositionLevelCount>,
pub cbs_base_log: Option<DecompositionBaseLog>,
pub br_level: Option<DecompositionLevelCount>,
pub br_base_log: Option<DecompositionBaseLog>,
pub packing_ks_level: Option<DecompositionLevelCount>,
pub packing_ks_base_log: Option<DecompositionBaseLog>,
pub message_modulus: Option<u64>,
pub carry_modulus: Option<u64>,
pub ciphertext_modulus: Option<CiphertextModulus<Scalar>>,
pub lwe_per_glwe: Option<LweCiphertextCount>,
pub storage_log_modulus: Option<CiphertextModulusLog>,
}
impl<Scalar: UnsignedInteger> CryptoParametersRecord<Scalar> {
pub fn noise_distribution_as_string(noise_distribution: DynamicDistribution<Scalar>) -> String {
match noise_distribution {
DynamicDistribution::Gaussian(g) => format!("Gaussian({}, {})", g.std, g.mean),
DynamicDistribution::TUniform(t) => format!("TUniform({})", t.bound_log2()),
}
}
pub fn serialize_distribution<S>(
noise_distribution: &Option<DynamicDistribution<Scalar>>,
serializer: S,
) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
match noise_distribution {
Some(d) => serializer.serialize_some(&Self::noise_distribution_as_string(*d)),
None => serializer.serialize_none(),
}
}
}
#[derive(Serialize)]
enum PolynomialMultiplication {
Fft,
// Ntt,
}
#[derive(Serialize)]
enum IntegerRepresentation {
Radix,
// Crt,
// Hybrid,
}
#[derive(Serialize)]
enum ExecutionType {
Sequential,
Parallel,
}
#[derive(Serialize)]
enum KeySetType {
Single,
// Multi,
}
#[derive(Serialize)]
enum OperandType {
CipherText,
PlainText,
}
#[derive(Clone, Serialize)]
pub enum OperatorType {
Atomic,
// AtomicPattern,
}
#[derive(Serialize)]
struct BenchmarkParametersRecord<Scalar: UnsignedInteger> {
display_name: String,
crypto_parameters_alias: String,
crypto_parameters: CryptoParametersRecord<Scalar>,
message_modulus: Option<u64>,
carry_modulus: Option<u64>,
ciphertext_modulus: usize,
bit_size: u32,
polynomial_multiplication: PolynomialMultiplication,
precision: u32,
error_probability: f64,
integer_representation: IntegerRepresentation,
decomposition_basis: Vec<u32>,
pbs_algorithm: Option<String>,
execution_type: ExecutionType,
key_set_type: KeySetType,
operand_type: OperandType,
operator_type: OperatorType,
}
/// Writes benchmarks parameters to disk in JSON format.
pub fn write_to_json<
Scalar: UnsignedInteger + Serialize,
T: Into<CryptoParametersRecord<Scalar>>,
>(
bench_id: &str,
params: T,
params_alias: impl Into<String>,
display_name: impl Into<String>,
operator_type: &OperatorType,
bit_size: u32,
decomposition_basis: Vec<u32>,
) {
let params = params.into();
let execution_type = match bench_id.contains("parallelized") {
true => ExecutionType::Parallel,
false => ExecutionType::Sequential,
};
let operand_type = match bench_id.contains("scalar") {
true => OperandType::PlainText,
false => OperandType::CipherText,
};
let record = BenchmarkParametersRecord {
display_name: display_name.into(),
crypto_parameters_alias: params_alias.into(),
crypto_parameters: params.to_owned(),
message_modulus: params.message_modulus,
carry_modulus: params.carry_modulus,
ciphertext_modulus: 64,
bit_size,
polynomial_multiplication: PolynomialMultiplication::Fft,
precision: (params.message_modulus.unwrap_or(2) as u32).ilog2(),
error_probability: 2f64.powf(-41.0),
integer_representation: IntegerRepresentation::Radix,
decomposition_basis,
pbs_algorithm: None, // To be added in future version
execution_type,
key_set_type: KeySetType::Single,
operand_type,
operator_type: operator_type.to_owned(),
};
let mut params_directory = ["benchmarks_parameters", bench_id]
.iter()
.collect::<PathBuf>();
fs::create_dir_all(&params_directory).unwrap();
params_directory.push("parameters.json");
fs::write(params_directory, serde_json::to_string(&record).unwrap()).unwrap();
}
const FAST_BENCH_BIT_SIZES: [usize; 1] = [64];
const BENCH_BIT_SIZES: [usize; 8] = [4, 8, 16, 32, 40, 64, 128, 256];
const MULTI_BIT_CPU_SIZES: [usize; 6] = [4, 8, 16, 32, 40, 64];
/// User configuration in which benchmarks must be run.
#[derive(Default)]
pub struct EnvConfig {
pub is_multi_bit: bool,
pub is_fast_bench: bool,
}
impl EnvConfig {
pub fn new() -> Self {
let is_multi_bit = match env::var("__TFHE_RS_PARAM_TYPE") {
Ok(val) => val.to_lowercase() == "multi_bit",
Err(_) => false,
};
let is_fast_bench = match env::var("__TFHE_RS_FAST_BENCH") {
Ok(val) => val.to_lowercase() == "true",
Err(_) => false,
};
EnvConfig {
is_multi_bit,
is_fast_bench,
}
}
/// Get precisions values to benchmark.
pub fn bit_sizes(&self) -> Vec<usize> {
if self.is_fast_bench {
FAST_BENCH_BIT_SIZES.to_vec()
} else if self.is_multi_bit {
if cfg!(feature = "gpu") {
BENCH_BIT_SIZES.to_vec()
} else {
MULTI_BIT_CPU_SIZES.to_vec()
}
} else {
BENCH_BIT_SIZES.to_vec()
}
}
}
pub static BENCH_TYPE: OnceLock<BenchmarkType> = OnceLock::new();
pub enum BenchmarkType {
Latency,
Throughput,
}
impl BenchmarkType {
pub fn from_env() -> Result<Self, String> {
let raw_value = env::var("__TFHE_RS_BENCH_TYPE").unwrap_or("latency".to_string());
match raw_value.to_lowercase().as_str() {
"latency" => Ok(BenchmarkType::Latency),
"throughput" => Ok(BenchmarkType::Throughput),
_ => Err(format!("benchmark type '{raw_value}' is not supported")),
}
}
}
pub fn get_bench_type() -> &'static BenchmarkType {
BENCH_TYPE.get_or_init(|| BenchmarkType::from_env().unwrap())
}
/// Number of streaming multiprocessors (SM) available on Nvidia H100 GPU
#[cfg(feature = "gpu")]
const H100_PCIE_SM_COUNT: u32 = 114;
/// Generate a number of threads to use to saturate current machine for throughput measurements.
pub fn throughput_num_threads(num_block: usize, op_pbs_count: u64) -> u64 {
let ref_block_count = 32; // Represent a ciphertext of 64 bits for 2_2 parameters set
let block_multiplicator = (ref_block_count as f64 / num_block as f64).ceil().min(1.0);
// Some operations with a high serial workload (e.g. division) would yield an operation
// loading value so low that the number of elements in the end wouldn't be meaningful.
let minimum_loading = if num_block < 64 { 0.2 } else { 0.01 };
#[cfg(feature = "gpu")]
{
let total_num_sm = H100_PCIE_SM_COUNT * get_number_of_gpus();
let operation_loading = ((total_num_sm as u64 / op_pbs_count) as f64).max(minimum_loading);
let elements = (total_num_sm as f64 * block_multiplicator * operation_loading) as u64;
elements.min(1500) // This threshold is useful for operation with both a small number of
// block and low PBs count.
}
#[cfg(not(feature = "gpu"))]
{
let num_threads = rayon::current_num_threads() as f64;
let operation_loading = (num_threads / (op_pbs_count as f64)).max(minimum_loading);
// Add 20% more to maximum threads available.
((num_threads + (num_threads * 0.2)) * block_multiplicator.min(1.0) * operation_loading)
as u64
}
}
#[cfg(feature = "gpu")]
mod cuda_utils {
use tfhe::core_crypto::entities::{
LweBootstrapKeyOwned, LweKeyswitchKeyOwned, LweMultiBitBootstrapKeyOwned,
LwePackingKeyswitchKeyOwned,
};
use tfhe::core_crypto::gpu::lwe_bootstrap_key::CudaLweBootstrapKey;
use tfhe::core_crypto::gpu::lwe_keyswitch_key::CudaLweKeyswitchKey;
use tfhe::core_crypto::gpu::lwe_multi_bit_bootstrap_key::CudaLweMultiBitBootstrapKey;
use tfhe::core_crypto::gpu::lwe_packing_keyswitch_key::CudaLwePackingKeyswitchKey;
use tfhe::core_crypto::gpu::vec::CudaVec;
use tfhe::core_crypto::gpu::{get_number_of_gpus, CudaStreams};
use tfhe::core_crypto::prelude::{Numeric, UnsignedInteger};
use tfhe::shortint::server_key::ModulusSwitchNoiseReductionKey;
use tfhe::{set_server_key, ClientKey, CompressedServerKey, GpuIndex};
pub const GPU_MAX_SUPPORTED_POLYNOMIAL_SIZE: usize = 16384;
/// Get vector of CUDA streams that can be directly used for throughput benchmarks in
/// core_crypto layer.
pub fn cuda_local_streams_core() -> Vec<CudaStreams> {
(0..get_number_of_gpus())
.map(|i| CudaStreams::new_single_gpu(GpuIndex::new(i)))
.collect::<Vec<_>>()
}
/// Computing keys in their CPU flavor.
pub struct CpuKeys<T: UnsignedInteger> {
ksk: Option<LweKeyswitchKeyOwned<T>>,
pksk: Option<LwePackingKeyswitchKeyOwned<T>>,
bsk: Option<LweBootstrapKeyOwned<T>>,
multi_bit_bsk: Option<LweMultiBitBootstrapKeyOwned<T>>,
}
impl<T: UnsignedInteger> CpuKeys<T> {
pub fn builder() -> CpuKeysBuilder<T> {
CpuKeysBuilder::new()
}
}
pub struct CpuKeysBuilder<T: UnsignedInteger> {
ksk: Option<LweKeyswitchKeyOwned<T>>,
pksk: Option<LwePackingKeyswitchKeyOwned<T>>,
bsk: Option<LweBootstrapKeyOwned<T>>,
multi_bit_bsk: Option<LweMultiBitBootstrapKeyOwned<T>>,
}
impl<T: UnsignedInteger> CpuKeysBuilder<T> {
pub fn new() -> CpuKeysBuilder<T> {
Self {
ksk: None,
pksk: None,
bsk: None,
multi_bit_bsk: None,
}
}
pub fn keyswitch_key(mut self, ksk: LweKeyswitchKeyOwned<T>) -> CpuKeysBuilder<T> {
self.ksk = Some(ksk);
self
}
pub fn packing_keyswitch_key(
mut self,
pksk: LwePackingKeyswitchKeyOwned<T>,
) -> CpuKeysBuilder<T> {
self.pksk = Some(pksk);
self
}
pub fn bootstrap_key(mut self, bsk: LweBootstrapKeyOwned<T>) -> CpuKeysBuilder<T> {
self.bsk = Some(bsk);
self
}
pub fn multi_bit_bootstrap_key(
mut self,
mb_bsk: LweMultiBitBootstrapKeyOwned<T>,
) -> CpuKeysBuilder<T> {
self.multi_bit_bsk = Some(mb_bsk);
self
}
pub fn build(self) -> CpuKeys<T> {
CpuKeys {
ksk: self.ksk,
pksk: self.pksk,
bsk: self.bsk,
multi_bit_bsk: self.multi_bit_bsk,
}
}
}
impl<T: UnsignedInteger> Default for CpuKeysBuilder<T> {
fn default() -> Self {
Self::new()
}
}
/// Computing keys in their Cuda flavor.
#[allow(dead_code)]
pub struct CudaLocalKeys<T: UnsignedInteger> {
pub ksk: Option<CudaLweKeyswitchKey<T>>,
pub pksk: Option<CudaLwePackingKeyswitchKey<T>>,
pub bsk: Option<CudaLweBootstrapKey>,
pub multi_bit_bsk: Option<CudaLweMultiBitBootstrapKey>,
}
#[allow(dead_code)]
impl<T: UnsignedInteger> CudaLocalKeys<T> {
pub fn from_cpu_keys(
cpu_keys: &CpuKeys<T>,
ms_noise_reduction_key: Option<&ModulusSwitchNoiseReductionKey<u64>>,
stream: &CudaStreams,
) -> Self {
Self {
ksk: cpu_keys
.ksk
.as_ref()
.map(|ksk| CudaLweKeyswitchKey::from_lwe_keyswitch_key(ksk, stream)),
pksk: cpu_keys.pksk.as_ref().map(|pksk| {
CudaLwePackingKeyswitchKey::from_lwe_packing_keyswitch_key(pksk, stream)
}),
bsk: cpu_keys.bsk.as_ref().map(|bsk| {
CudaLweBootstrapKey::from_lwe_bootstrap_key(bsk, ms_noise_reduction_key, stream)
}),
multi_bit_bsk: cpu_keys.multi_bit_bsk.as_ref().map(|mb_bsk| {
CudaLweMultiBitBootstrapKey::from_lwe_multi_bit_bootstrap_key(mb_bsk, stream)
}),
}
}
}
/// Instantiate Cuda computing keys to each available GPU.
pub fn cuda_local_keys_core<T: UnsignedInteger>(
cpu_keys: &CpuKeys<T>,
ms_noise_reduction_key: Option<&ModulusSwitchNoiseReductionKey<u64>>,
) -> Vec<CudaLocalKeys<T>> {
let gpu_count = get_number_of_gpus() as usize;
let mut gpu_keys_vec = Vec::with_capacity(gpu_count);
for i in 0..gpu_count {
let stream = CudaStreams::new_single_gpu(GpuIndex::new(i as u32));
gpu_keys_vec.push(CudaLocalKeys::from_cpu_keys(
cpu_keys,
ms_noise_reduction_key,
&stream,
));
}
gpu_keys_vec
}
pub struct CudaIndexes<T: Numeric> {
pub d_input: CudaVec<T>,
pub d_output: CudaVec<T>,
pub d_lut: CudaVec<T>,
}
impl<T: Numeric> CudaIndexes<T> {
pub fn new(indexes: &[T], stream: &CudaStreams, stream_index: u32) -> Self {
let length = indexes.len();
let mut d_input = unsafe { CudaVec::<T>::new_async(length, stream, stream_index) };
let mut d_output = unsafe { CudaVec::<T>::new_async(length, stream, stream_index) };
let mut d_lut = unsafe { CudaVec::<T>::new_async(length, stream, stream_index) };
unsafe {
d_input.copy_from_cpu_async(indexes.as_ref(), stream, stream_index);
d_output.copy_from_cpu_async(indexes.as_ref(), stream, stream_index);
d_lut.copy_from_cpu_async(indexes.as_ref(), stream, stream_index);
}
stream.synchronize();
Self {
d_input,
d_output,
d_lut,
}
}
}
#[cfg(feature = "integer")]
pub mod cuda_integer_utils {
use tfhe::core_crypto::gpu::{get_number_of_gpus, CudaStreams};
use tfhe::integer::gpu::CudaServerKey;
use tfhe::integer::ClientKey;
use tfhe::GpuIndex;
/// Get number of streams usable for CUDA throughput benchmarks
fn cuda_num_streams(num_block: usize) -> u64 {
let num_streams_per_gpu: u32 = match num_block {
2 => 64,
4 => 32,
8 => 16,
16 => 8,
32 => 4,
64 => 2,
128 => 1,
_ => 8,
};
(num_streams_per_gpu * get_number_of_gpus()) as u64
}
/// Get vector of CUDA streams that can be directly used for throughput benchmarks.
pub fn cuda_local_streams(
num_block: usize,
throughput_elements: usize,
) -> Vec<CudaStreams> {
(0..cuda_num_streams(num_block))
.map(|i| {
CudaStreams::new_single_gpu(GpuIndex::new(
(i % get_number_of_gpus() as u64) as u32,
))
})
.cycle()
.take(throughput_elements)
.collect::<Vec<_>>()
}
/// Instantiate Cuda server key to each available GPU.
pub fn cuda_local_keys(cks: &ClientKey) -> Vec<CudaServerKey> {
let gpu_count = get_number_of_gpus() as usize;
let mut gpu_sks_vec = Vec::with_capacity(gpu_count);
for i in 0..gpu_count {
let stream = CudaStreams::new_single_gpu(GpuIndex::new(i as u32));
gpu_sks_vec.push(CudaServerKey::new(cks, &stream));
}
gpu_sks_vec
}
}
#[allow(dead_code)]
pub fn configure_gpu(client_key: &ClientKey) {
let compressed_sks = CompressedServerKey::new(client_key);
let sks = compressed_sks.decompress_to_gpu();
rayon::broadcast(|_| set_server_key(sks.clone()));
set_server_key(sks);
}
#[allow(unused_imports)]
#[cfg(feature = "integer")]
pub use cuda_integer_utils::*;
}
#[cfg(feature = "gpu")]
pub use cuda_utils::*;