mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-08 22:28:01 -05:00
chore(bench): move benchmarks to their own crate
This is done to speed-up compilation duration by avoiding recompiling tfhe each time a modification is made in a benchmark file.
This commit is contained in:
157
tfhe-benchmark/Cargo.toml
Normal file
157
tfhe-benchmark/Cargo.toml
Normal file
@@ -0,0 +1,157 @@
|
||||
[package]
|
||||
name = "tfhe-benchmark"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
homepage = "https://zama.ai/"
|
||||
documentation = "https://docs.zama.ai/tfhe-rs"
|
||||
repository = "https://github.com/zama-ai/tfhe-rs"
|
||||
license = "BSD-3-Clause-Clear"
|
||||
description = "tfhe-benchmark: Performances measurements facility for tfhe-rs."
|
||||
rust-version = "1.84"
|
||||
publish = false
|
||||
|
||||
[lib]
|
||||
name = "benchmark"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[dependencies]
|
||||
bincode = "1.3.3"
|
||||
# clap has to be pinned as its minimum supported rust version
|
||||
# changes often between minor releases, which breaks our CI
|
||||
clap = { version = "=4.4.4", features = ["derive"] }
|
||||
criterion = "0.5.1"
|
||||
dyn-stack = { workspace = true, features = ["default"] }
|
||||
itertools = "0.14"
|
||||
serde = { version = "1.0", default-features = false }
|
||||
serde_json = "1.0.94"
|
||||
paste = "1.0.7"
|
||||
rand = { workspace = true }
|
||||
rayon = { workspace = true }
|
||||
tfhe = { path = "../tfhe" }
|
||||
tfhe-csprng = { path = "../tfhe-csprng" }
|
||||
|
||||
[features]
|
||||
boolean = ["tfhe/boolean"]
|
||||
shortint = ["tfhe/shortint"]
|
||||
integer = ["shortint", "tfhe/integer"]
|
||||
gpu = ["tfhe/gpu"]
|
||||
internal-keycache = ["tfhe/internal-keycache"]
|
||||
nightly-avx512 = ["tfhe/nightly-avx512"]
|
||||
pbs-stats = ["tfhe/pbs-stats"]
|
||||
zk-pok = ["tfhe/zk-pok"]
|
||||
|
||||
[[bench]]
|
||||
name = "boolean-bench"
|
||||
path = "benches/boolean/bench.rs"
|
||||
harness = false
|
||||
required-features = ["boolean", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "shortint-bench"
|
||||
path = "benches/shortint/bench.rs"
|
||||
harness = false
|
||||
required-features = ["shortint", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "oprf-shortint-bench"
|
||||
path = "benches/shortint/oprf.rs"
|
||||
harness = false
|
||||
required-features = ["shortint", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "glwe_packing_compression-shortint-bench"
|
||||
path = "benches/shortint/glwe_packing_compression.rs"
|
||||
harness = false
|
||||
required-features = ["shortint", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "hlapi"
|
||||
path = "benches/high_level_api/bench.rs"
|
||||
harness = false
|
||||
required-features = ["integer", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "hlapi-erc20"
|
||||
path = "benches/high_level_api/erc20.rs"
|
||||
harness = false
|
||||
required-features = ["integer", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "hlapi-dex"
|
||||
path = "benches/high_level_api/dex.rs"
|
||||
harness = false
|
||||
required-features = ["integer", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "glwe_packing_compression-integer-bench"
|
||||
path = "benches/integer/glwe_packing_compression.rs"
|
||||
harness = false
|
||||
required-features = ["integer", "pbs-stats", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "integer-bench"
|
||||
path = "benches/integer/bench.rs"
|
||||
harness = false
|
||||
required-features = ["integer", "pbs-stats", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "integer-signed-bench"
|
||||
path = "benches/integer/signed_bench.rs"
|
||||
harness = false
|
||||
required-features = ["integer", "pbs-stats", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "zk-pke-bench"
|
||||
path = "benches/integer/zk_pke.rs"
|
||||
harness = false
|
||||
required-features = ["integer", "zk-pok", "pbs-stats", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "ks-bench"
|
||||
path = "benches/core_crypto/ks_bench.rs"
|
||||
harness = false
|
||||
required-features = ["shortint", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "pbs-bench"
|
||||
path = "benches/core_crypto/pbs_bench.rs"
|
||||
harness = false
|
||||
required-features = ["boolean", "shortint", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "ks-pbs-bench"
|
||||
path = "benches/core_crypto/ks_pbs_bench.rs"
|
||||
harness = false
|
||||
required-features = ["shortint", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "modulus_switch_noise_reduction"
|
||||
path = "benches/core_crypto/modulus_switch_noise_reduction.rs"
|
||||
harness = false
|
||||
required-features = ["shortint"]
|
||||
|
||||
[[bench]]
|
||||
name = "pbs128-bench"
|
||||
path = "benches/core_crypto/pbs128_bench.rs"
|
||||
harness = false
|
||||
required-features = ["shortint"]
|
||||
|
||||
[[bin]]
|
||||
name = "boolean_key_sizes"
|
||||
path = "src/bin/boolean_key_sizes.rs"
|
||||
required-features = ["boolean", "internal-keycache"]
|
||||
|
||||
[[bin]]
|
||||
name = "shortint_key_sizes"
|
||||
path = "src/bin/shortint_key_sizes.rs"
|
||||
required-features = ["shortint", "internal-keycache"]
|
||||
|
||||
[[bin]]
|
||||
name = "hlapi_compact_pk_ct_sizes"
|
||||
path = "src/bin/hlapi_compact_pk_ct_sizes.rs"
|
||||
required-features = ["integer", "internal-keycache"]
|
||||
|
||||
[[bin]]
|
||||
name = "wasm_benchmarks_parser"
|
||||
path = "src/bin/wasm_benchmarks_parser.rs"
|
||||
required-features = ["shortint", "internal-keycache"]
|
||||
28
tfhe-benchmark/LICENSE
Normal file
28
tfhe-benchmark/LICENSE
Normal file
@@ -0,0 +1,28 @@
|
||||
BSD 3-Clause Clear License
|
||||
|
||||
Copyright © 2025 ZAMA.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or other
|
||||
materials provided with the distribution.
|
||||
|
||||
3. Neither the name of ZAMA nor the names of its contributors may be used to endorse
|
||||
or promote products derived from this software without specific prior written permission.
|
||||
|
||||
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY THIS LICENSE.
|
||||
THIS SOFTWARE IS PROVIDED BY THE ZAMA AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
ZAMA OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
|
||||
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
108
tfhe-benchmark/benches/boolean/bench.rs
Normal file
108
tfhe-benchmark/benches/boolean/bench.rs
Normal file
@@ -0,0 +1,108 @@
|
||||
use benchmark::utilities::{write_to_json, CryptoParametersRecord, OperatorType};
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use tfhe::boolean::client_key::ClientKey;
|
||||
use tfhe::boolean::parameters::{
|
||||
BooleanParameters, DEFAULT_PARAMETERS, DEFAULT_PARAMETERS_KS_PBS,
|
||||
PARAMETERS_ERROR_PROB_2_POW_MINUS_165, PARAMETERS_ERROR_PROB_2_POW_MINUS_165_KS_PBS,
|
||||
TFHE_LIB_PARAMETERS,
|
||||
};
|
||||
use tfhe::boolean::prelude::BinaryBooleanGates;
|
||||
use tfhe::boolean::server_key::ServerKey;
|
||||
|
||||
criterion_group!(
|
||||
gates_benches,
|
||||
bench_default_parameters,
|
||||
bench_default_parameters_ks_pbs,
|
||||
bench_low_prob_parameters,
|
||||
bench_low_prob_parameters_ks_pbs,
|
||||
bench_tfhe_lib_parameters,
|
||||
);
|
||||
|
||||
criterion_main!(gates_benches);
|
||||
|
||||
/// Helper function to write boolean benchmarks parameters to disk in JSON format.
|
||||
pub fn write_to_json_boolean<T: Into<CryptoParametersRecord<u32>>>(
|
||||
bench_id: &str,
|
||||
params: T,
|
||||
params_alias: impl Into<String>,
|
||||
display_name: impl Into<String>,
|
||||
) {
|
||||
write_to_json(
|
||||
bench_id,
|
||||
params,
|
||||
params_alias,
|
||||
display_name,
|
||||
&OperatorType::Atomic,
|
||||
1,
|
||||
vec![1],
|
||||
);
|
||||
}
|
||||
|
||||
// Put all `bench_function` in one place
|
||||
// so the keygen is only run once per parameters saving time.
|
||||
fn benches(c: &mut Criterion, params: BooleanParameters, parameter_name: &str) {
|
||||
let mut bench_group = c.benchmark_group("gates_benches");
|
||||
|
||||
let cks = ClientKey::new(¶ms);
|
||||
let sks = ServerKey::new(&cks);
|
||||
|
||||
let ct1 = cks.encrypt(true);
|
||||
let ct2 = cks.encrypt(false);
|
||||
let ct3 = cks.encrypt(true);
|
||||
|
||||
let id = format!("AND::{parameter_name}");
|
||||
bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.and(&ct1, &ct2))));
|
||||
write_to_json_boolean(&id, params, parameter_name, "and");
|
||||
|
||||
let id = format!("NAND::{parameter_name}");
|
||||
bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.nand(&ct1, &ct2))));
|
||||
write_to_json_boolean(&id, params, parameter_name, "nand");
|
||||
|
||||
let id = format!("OR::{parameter_name}");
|
||||
bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.or(&ct1, &ct2))));
|
||||
write_to_json_boolean(&id, params, parameter_name, "or");
|
||||
|
||||
let id = format!("XOR::{parameter_name}");
|
||||
bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.xor(&ct1, &ct2))));
|
||||
write_to_json_boolean(&id, params, parameter_name, "xor");
|
||||
|
||||
let id = format!("XNOR::{parameter_name}");
|
||||
bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.xnor(&ct1, &ct2))));
|
||||
write_to_json_boolean(&id, params, parameter_name, "xnor");
|
||||
|
||||
let id = format!("NOT::{parameter_name}");
|
||||
bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.not(&ct1))));
|
||||
write_to_json_boolean(&id, params, parameter_name, "not");
|
||||
|
||||
let id = format!("MUX::{parameter_name}");
|
||||
bench_group.bench_function(&id, |b| b.iter(|| black_box(sks.mux(&ct1, &ct2, &ct3))));
|
||||
write_to_json_boolean(&id, params, parameter_name, "mux");
|
||||
}
|
||||
|
||||
fn bench_default_parameters(c: &mut Criterion) {
|
||||
benches(c, DEFAULT_PARAMETERS, "DEFAULT_PARAMETERS");
|
||||
}
|
||||
|
||||
fn bench_default_parameters_ks_pbs(c: &mut Criterion) {
|
||||
benches(c, DEFAULT_PARAMETERS_KS_PBS, "DEFAULT_PARAMETERS_KS_PBS");
|
||||
}
|
||||
|
||||
fn bench_low_prob_parameters(c: &mut Criterion) {
|
||||
benches(
|
||||
c,
|
||||
PARAMETERS_ERROR_PROB_2_POW_MINUS_165,
|
||||
"PARAMETERS_ERROR_PROB_2_POW_MINUS_165",
|
||||
);
|
||||
}
|
||||
|
||||
fn bench_low_prob_parameters_ks_pbs(c: &mut Criterion) {
|
||||
benches(
|
||||
c,
|
||||
PARAMETERS_ERROR_PROB_2_POW_MINUS_165_KS_PBS,
|
||||
"PARAMETERS_ERROR_PROB_2_POW_MINUS_165_KS_PBS",
|
||||
);
|
||||
}
|
||||
|
||||
fn bench_tfhe_lib_parameters(c: &mut Criterion) {
|
||||
benches(c, TFHE_LIB_PARAMETERS, " TFHE_LIB_PARAMETERS");
|
||||
}
|
||||
834
tfhe-benchmark/benches/core_crypto/ks_bench.rs
Normal file
834
tfhe-benchmark/benches/core_crypto/ks_bench.rs
Normal file
@@ -0,0 +1,834 @@
|
||||
#[cfg(feature = "boolean")]
|
||||
use benchmark::params::benchmark_32bits_parameters;
|
||||
use benchmark::params::{
|
||||
benchmark_compression_parameters, benchmark_parameters, multi_bit_benchmark_parameters,
|
||||
};
|
||||
use benchmark::utilities::{
|
||||
get_bench_type, throughput_num_threads, write_to_json, BenchmarkType, CryptoParametersRecord,
|
||||
OperatorType,
|
||||
};
|
||||
use criterion::{black_box, Criterion, Throughput};
|
||||
use rayon::prelude::*;
|
||||
use serde::Serialize;
|
||||
use std::env;
|
||||
use tfhe::core_crypto::prelude::*;
|
||||
|
||||
// TODO Refactor KS, PBS and KS-PBS benchmarks into a single generic function.
|
||||
fn keyswitch<Scalar: UnsignedTorus + CastInto<usize> + Serialize>(
|
||||
criterion: &mut Criterion,
|
||||
parameters: &[(String, CryptoParametersRecord<Scalar>)],
|
||||
) {
|
||||
let bench_name = "core_crypto::keyswitch";
|
||||
let mut bench_group = criterion.benchmark_group(bench_name);
|
||||
|
||||
// Create the PRNG
|
||||
let mut seeder = new_seeder();
|
||||
let seeder = seeder.as_mut();
|
||||
let mut encryption_generator =
|
||||
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
|
||||
let mut secret_generator = SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
|
||||
|
||||
for (name, params) in parameters.iter() {
|
||||
let lwe_dimension = params.lwe_dimension.unwrap();
|
||||
let glwe_dimension = params.glwe_dimension.unwrap();
|
||||
let polynomial_size = params.polynomial_size.unwrap();
|
||||
let ks_decomp_base_log = params.ks_base_log.unwrap();
|
||||
let ks_decomp_level_count = params.ks_level.unwrap();
|
||||
|
||||
let lwe_sk =
|
||||
allocate_and_generate_new_binary_lwe_secret_key(lwe_dimension, &mut secret_generator);
|
||||
|
||||
let glwe_sk = allocate_and_generate_new_binary_glwe_secret_key(
|
||||
glwe_dimension,
|
||||
polynomial_size,
|
||||
&mut secret_generator,
|
||||
);
|
||||
let big_lwe_sk = glwe_sk.into_lwe_secret_key();
|
||||
let ksk_big_to_small = allocate_and_generate_new_lwe_keyswitch_key(
|
||||
&big_lwe_sk,
|
||||
&lwe_sk,
|
||||
ks_decomp_base_log,
|
||||
ks_decomp_level_count,
|
||||
params.lwe_noise_distribution.unwrap(),
|
||||
params.ciphertext_modulus.unwrap(),
|
||||
&mut encryption_generator,
|
||||
);
|
||||
|
||||
let bench_id;
|
||||
|
||||
match get_bench_type() {
|
||||
BenchmarkType::Latency => {
|
||||
let ct = allocate_and_encrypt_new_lwe_ciphertext(
|
||||
&big_lwe_sk,
|
||||
Plaintext(Scalar::ONE),
|
||||
params.lwe_noise_distribution.unwrap(),
|
||||
params.ciphertext_modulus.unwrap(),
|
||||
&mut encryption_generator,
|
||||
);
|
||||
|
||||
let mut output_ct = LweCiphertext::new(
|
||||
Scalar::ZERO,
|
||||
lwe_sk.lwe_dimension().to_lwe_size(),
|
||||
params.ciphertext_modulus.unwrap(),
|
||||
);
|
||||
|
||||
bench_id = format!("{bench_name}::{name}");
|
||||
{
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
keyswitch_lwe_ciphertext(&ksk_big_to_small, &ct, &mut output_ct);
|
||||
black_box(&mut output_ct);
|
||||
})
|
||||
});
|
||||
}
|
||||
}
|
||||
BenchmarkType::Throughput => {
|
||||
bench_id = format!("{bench_name}::throughput::{name}");
|
||||
let blocks: usize = 1;
|
||||
let elements = throughput_num_threads(blocks, 1); // FIXME This number of element do not staturate the target machine
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let setup_encrypted_values = || {
|
||||
let input_cts = (0..elements)
|
||||
.map(|_| {
|
||||
allocate_and_encrypt_new_lwe_ciphertext(
|
||||
&big_lwe_sk,
|
||||
Plaintext(Scalar::ONE),
|
||||
params.lwe_noise_distribution.unwrap(),
|
||||
params.ciphertext_modulus.unwrap(),
|
||||
&mut encryption_generator,
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let output_cts = (0..elements)
|
||||
.map(|_| {
|
||||
LweCiphertext::new(
|
||||
Scalar::ZERO,
|
||||
lwe_sk.lwe_dimension().to_lwe_size(),
|
||||
params.ciphertext_modulus.unwrap(),
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
(input_cts, output_cts)
|
||||
};
|
||||
|
||||
b.iter_batched(
|
||||
setup_encrypted_values,
|
||||
|(input_cts, mut output_cts)| {
|
||||
input_cts
|
||||
.par_iter()
|
||||
.zip(output_cts.par_iter_mut())
|
||||
.for_each(|(input_ct, output_ct)| {
|
||||
keyswitch_lwe_ciphertext(
|
||||
&ksk_big_to_small,
|
||||
input_ct,
|
||||
output_ct,
|
||||
);
|
||||
})
|
||||
},
|
||||
criterion::BatchSize::SmallInput,
|
||||
)
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let bit_size = (params.message_modulus.unwrap_or(2) as u32).ilog2();
|
||||
write_to_json(
|
||||
&bench_id,
|
||||
*params,
|
||||
name,
|
||||
"ks",
|
||||
&OperatorType::Atomic,
|
||||
bit_size,
|
||||
vec![bit_size],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn packing_keyswitch<Scalar, F>(
|
||||
criterion: &mut Criterion,
|
||||
bench_name: &str,
|
||||
parameters: &[(String, CryptoParametersRecord<Scalar>)],
|
||||
ks_op: F,
|
||||
) where
|
||||
Scalar: UnsignedTorus + CastInto<usize> + Serialize,
|
||||
F: Fn(
|
||||
&LwePackingKeyswitchKey<Vec<Scalar>>,
|
||||
&LweCiphertextList<Vec<Scalar>>,
|
||||
&mut GlweCiphertext<Vec<Scalar>>,
|
||||
) + Sync
|
||||
+ Send,
|
||||
{
|
||||
let bench_name = format!("core_crypto::{bench_name}");
|
||||
let mut bench_group = criterion.benchmark_group(&bench_name);
|
||||
|
||||
// Create the PRNG
|
||||
let mut seeder = new_seeder();
|
||||
let seeder = seeder.as_mut();
|
||||
let mut encryption_generator =
|
||||
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
|
||||
let mut secret_generator = SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
|
||||
|
||||
for (name, params) in parameters.iter() {
|
||||
let lwe_dimension = params.lwe_dimension.unwrap();
|
||||
let packing_glwe_dimension = params.packing_ks_glwe_dimension.unwrap();
|
||||
let packing_polynomial_size = params.packing_ks_polynomial_size.unwrap();
|
||||
let packing_ks_decomp_base_log = params.packing_ks_base_log.unwrap();
|
||||
let packing_ks_decomp_level_count = params.packing_ks_level.unwrap();
|
||||
let ciphertext_modulus = params.ciphertext_modulus.unwrap();
|
||||
let count = params.lwe_per_glwe.unwrap();
|
||||
|
||||
let lwe_sk =
|
||||
allocate_and_generate_new_binary_lwe_secret_key(lwe_dimension, &mut secret_generator);
|
||||
|
||||
let glwe_sk = allocate_and_generate_new_binary_glwe_secret_key(
|
||||
packing_glwe_dimension,
|
||||
packing_polynomial_size,
|
||||
&mut secret_generator,
|
||||
);
|
||||
|
||||
let pksk = allocate_and_generate_new_lwe_packing_keyswitch_key(
|
||||
&lwe_sk,
|
||||
&glwe_sk,
|
||||
packing_ks_decomp_base_log,
|
||||
packing_ks_decomp_level_count,
|
||||
params.packing_ks_key_noise_distribution.unwrap(),
|
||||
ciphertext_modulus,
|
||||
&mut encryption_generator,
|
||||
);
|
||||
|
||||
let bench_id;
|
||||
|
||||
match get_bench_type() {
|
||||
BenchmarkType::Latency => {
|
||||
let mut input_lwe_list = LweCiphertextList::new(
|
||||
Scalar::ZERO,
|
||||
lwe_sk.lwe_dimension().to_lwe_size(),
|
||||
count,
|
||||
ciphertext_modulus,
|
||||
);
|
||||
|
||||
let plaintext_list = PlaintextList::new(
|
||||
Scalar::ZERO,
|
||||
PlaintextCount(input_lwe_list.lwe_ciphertext_count().0),
|
||||
);
|
||||
|
||||
encrypt_lwe_ciphertext_list(
|
||||
&lwe_sk,
|
||||
&mut input_lwe_list,
|
||||
&plaintext_list,
|
||||
params.lwe_noise_distribution.unwrap(),
|
||||
&mut encryption_generator,
|
||||
);
|
||||
|
||||
let mut output_glwe = GlweCiphertext::new(
|
||||
Scalar::ZERO,
|
||||
glwe_sk.glwe_dimension().to_glwe_size(),
|
||||
glwe_sk.polynomial_size(),
|
||||
ciphertext_modulus,
|
||||
);
|
||||
|
||||
bench_id = format!("{bench_name}::{name}");
|
||||
{
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
ks_op(&pksk, &input_lwe_list, &mut output_glwe);
|
||||
black_box(&mut output_glwe);
|
||||
})
|
||||
});
|
||||
}
|
||||
}
|
||||
BenchmarkType::Throughput => {
|
||||
bench_id = format!("{bench_name}::throughput::{name}");
|
||||
let blocks: usize = 1;
|
||||
let elements = throughput_num_threads(blocks, 1);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let setup_encrypted_values = || {
|
||||
let input_lwe_lists = (0..elements)
|
||||
.map(|_| {
|
||||
let mut input_lwe_list = LweCiphertextList::new(
|
||||
Scalar::ZERO,
|
||||
lwe_sk.lwe_dimension().to_lwe_size(),
|
||||
count,
|
||||
ciphertext_modulus,
|
||||
);
|
||||
|
||||
let plaintext_list = PlaintextList::new(
|
||||
Scalar::ZERO,
|
||||
PlaintextCount(input_lwe_list.lwe_ciphertext_count().0),
|
||||
);
|
||||
|
||||
encrypt_lwe_ciphertext_list(
|
||||
&lwe_sk,
|
||||
&mut input_lwe_list,
|
||||
&plaintext_list,
|
||||
params.lwe_noise_distribution.unwrap(),
|
||||
&mut encryption_generator,
|
||||
);
|
||||
|
||||
input_lwe_list
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let output_glwes = (0..elements)
|
||||
.map(|_| {
|
||||
GlweCiphertext::new(
|
||||
Scalar::ZERO,
|
||||
glwe_sk.glwe_dimension().to_glwe_size(),
|
||||
glwe_sk.polynomial_size(),
|
||||
ciphertext_modulus,
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
(input_lwe_lists, output_glwes)
|
||||
};
|
||||
|
||||
b.iter_batched(
|
||||
setup_encrypted_values,
|
||||
|(input_lwe_lists, mut output_glwes)| {
|
||||
input_lwe_lists
|
||||
.par_iter()
|
||||
.zip(output_glwes.par_iter_mut())
|
||||
.for_each(|(input_lwe_list, output_glwe)| {
|
||||
ks_op(&pksk, input_lwe_list, output_glwe);
|
||||
})
|
||||
},
|
||||
criterion::BatchSize::SmallInput,
|
||||
)
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let bit_size = (params.message_modulus.unwrap_or(2) as u32).ilog2();
|
||||
write_to_json(
|
||||
&bench_id,
|
||||
*params,
|
||||
name,
|
||||
"packing_ks",
|
||||
&OperatorType::Atomic,
|
||||
bit_size,
|
||||
vec![bit_size],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
mod cuda {
|
||||
use benchmark::params::{benchmark_parameters, multi_bit_benchmark_parameters};
|
||||
use benchmark::utilities::{
|
||||
cuda_local_keys_core, cuda_local_streams_core, get_bench_type, throughput_num_threads,
|
||||
write_to_json, BenchmarkType, CpuKeys, CpuKeysBuilder, CryptoParametersRecord, CudaIndexes,
|
||||
CudaLocalKeys, OperatorType,
|
||||
};
|
||||
use criterion::{black_box, Criterion, Throughput};
|
||||
use rayon::prelude::*;
|
||||
use serde::Serialize;
|
||||
use tfhe::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
|
||||
use tfhe::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
|
||||
use tfhe::core_crypto::gpu::{
|
||||
cuda_keyswitch_lwe_ciphertext, cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_64,
|
||||
get_number_of_gpus, CudaStreams,
|
||||
};
|
||||
use tfhe::core_crypto::prelude::*;
|
||||
|
||||
fn cuda_keyswitch<Scalar: UnsignedTorus + CastInto<usize> + CastFrom<u64> + Serialize>(
|
||||
criterion: &mut Criterion,
|
||||
parameters: &[(String, CryptoParametersRecord<Scalar>)],
|
||||
) {
|
||||
let bench_name = "core_crypto::cuda::keyswitch";
|
||||
let mut bench_group = criterion.benchmark_group(bench_name);
|
||||
|
||||
// Create the PRNG
|
||||
let mut seeder = new_seeder();
|
||||
let seeder = seeder.as_mut();
|
||||
let mut encryption_generator =
|
||||
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
|
||||
let mut secret_generator =
|
||||
SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
|
||||
|
||||
for (name, params) in parameters.iter() {
|
||||
let lwe_dimension = params.lwe_dimension.unwrap();
|
||||
let glwe_dimension = params.glwe_dimension.unwrap();
|
||||
let polynomial_size = params.polynomial_size.unwrap();
|
||||
let ks_decomp_base_log = params.ks_base_log.unwrap();
|
||||
let ks_decomp_level_count = params.ks_level.unwrap();
|
||||
|
||||
let lwe_sk = allocate_and_generate_new_binary_lwe_secret_key(
|
||||
lwe_dimension,
|
||||
&mut secret_generator,
|
||||
);
|
||||
|
||||
let glwe_sk = allocate_and_generate_new_binary_glwe_secret_key(
|
||||
glwe_dimension,
|
||||
polynomial_size,
|
||||
&mut secret_generator,
|
||||
);
|
||||
let big_lwe_sk = glwe_sk.into_lwe_secret_key();
|
||||
let ksk_big_to_small = allocate_and_generate_new_lwe_keyswitch_key(
|
||||
&big_lwe_sk,
|
||||
&lwe_sk,
|
||||
ks_decomp_base_log,
|
||||
ks_decomp_level_count,
|
||||
params.lwe_noise_distribution.unwrap(),
|
||||
CiphertextModulus::new_native(),
|
||||
&mut encryption_generator,
|
||||
);
|
||||
|
||||
let cpu_keys: CpuKeys<_> = CpuKeysBuilder::new()
|
||||
.keyswitch_key(ksk_big_to_small)
|
||||
.build();
|
||||
|
||||
let bench_id;
|
||||
|
||||
match get_bench_type() {
|
||||
BenchmarkType::Latency => {
|
||||
let streams = CudaStreams::new_multi_gpu();
|
||||
let gpu_keys = CudaLocalKeys::from_cpu_keys(&cpu_keys, None, &streams);
|
||||
|
||||
let ct = allocate_and_encrypt_new_lwe_ciphertext(
|
||||
&big_lwe_sk,
|
||||
Plaintext(Scalar::ONE),
|
||||
params.lwe_noise_distribution.unwrap(),
|
||||
CiphertextModulus::new_native(),
|
||||
&mut encryption_generator,
|
||||
);
|
||||
let mut ct_gpu = CudaLweCiphertextList::from_lwe_ciphertext(&ct, &streams);
|
||||
|
||||
let output_ct = LweCiphertext::new(
|
||||
Scalar::ZERO,
|
||||
lwe_sk.lwe_dimension().to_lwe_size(),
|
||||
CiphertextModulus::new_native(),
|
||||
);
|
||||
let mut output_ct_gpu =
|
||||
CudaLweCiphertextList::from_lwe_ciphertext(&output_ct, &streams);
|
||||
|
||||
let h_indexes = [Scalar::ZERO];
|
||||
let cuda_indexes = CudaIndexes::new(&h_indexes, &streams, 0);
|
||||
|
||||
bench_id = format!("{bench_name}::{name}");
|
||||
{
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
cuda_keyswitch_lwe_ciphertext(
|
||||
gpu_keys.ksk.as_ref().unwrap(),
|
||||
&ct_gpu,
|
||||
&mut output_ct_gpu,
|
||||
&cuda_indexes.d_input,
|
||||
&cuda_indexes.d_output,
|
||||
&streams,
|
||||
);
|
||||
black_box(&mut ct_gpu);
|
||||
})
|
||||
});
|
||||
}
|
||||
}
|
||||
BenchmarkType::Throughput => {
|
||||
let gpu_keys_vec = cuda_local_keys_core(&cpu_keys, None);
|
||||
let gpu_count = get_number_of_gpus() as usize;
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{name}");
|
||||
let blocks: usize = 1;
|
||||
let elements = throughput_num_threads(blocks, 1);
|
||||
let elements_per_stream = elements as usize / gpu_count;
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
bench_group.sample_size(50);
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let setup_encrypted_values = || {
|
||||
let local_streams = cuda_local_streams_core();
|
||||
|
||||
let plaintext_list = PlaintextList::new(
|
||||
Scalar::ZERO,
|
||||
PlaintextCount(elements_per_stream),
|
||||
);
|
||||
|
||||
let input_cts = (0..gpu_count)
|
||||
.map(|i| {
|
||||
let mut input_ct_list = LweCiphertextList::new(
|
||||
Scalar::ZERO,
|
||||
big_lwe_sk.lwe_dimension().to_lwe_size(),
|
||||
LweCiphertextCount(elements_per_stream),
|
||||
params.ciphertext_modulus.unwrap(),
|
||||
);
|
||||
encrypt_lwe_ciphertext_list(
|
||||
&big_lwe_sk,
|
||||
&mut input_ct_list,
|
||||
&plaintext_list,
|
||||
params.lwe_noise_distribution.unwrap(),
|
||||
&mut encryption_generator,
|
||||
);
|
||||
let input_ks_list = LweCiphertextList::from_container(
|
||||
input_ct_list.into_container(),
|
||||
big_lwe_sk.lwe_dimension().to_lwe_size(),
|
||||
params.ciphertext_modulus.unwrap(),
|
||||
);
|
||||
CudaLweCiphertextList::from_lwe_ciphertext_list(
|
||||
&input_ks_list,
|
||||
&local_streams[i],
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let output_cts = (0..gpu_count)
|
||||
.map(|i| {
|
||||
let output_ct_list = LweCiphertextList::new(
|
||||
Scalar::ZERO,
|
||||
lwe_sk.lwe_dimension().to_lwe_size(),
|
||||
LweCiphertextCount(elements_per_stream),
|
||||
params.ciphertext_modulus.unwrap(),
|
||||
);
|
||||
CudaLweCiphertextList::from_lwe_ciphertext_list(
|
||||
&output_ct_list,
|
||||
&local_streams[i],
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let h_indexes = (0..(elements / gpu_count as u64))
|
||||
.map(CastFrom::cast_from)
|
||||
.collect::<Vec<_>>();
|
||||
let cuda_indexes_vec = (0..gpu_count)
|
||||
.map(|i| CudaIndexes::new(&h_indexes, &local_streams[i], 0))
|
||||
.collect::<Vec<_>>();
|
||||
local_streams.iter().for_each(|stream| stream.synchronize());
|
||||
|
||||
(input_cts, output_cts, cuda_indexes_vec, local_streams)
|
||||
};
|
||||
|
||||
b.iter_batched(
|
||||
setup_encrypted_values,
|
||||
|(input_cts, mut output_cts, cuda_indexes_vec, local_streams)| {
|
||||
(0..gpu_count)
|
||||
.into_par_iter()
|
||||
.zip(input_cts.par_iter())
|
||||
.zip(output_cts.par_iter_mut())
|
||||
.zip(local_streams.par_iter())
|
||||
.for_each(|(((i, input_ct), output_ct), local_stream)| {
|
||||
cuda_keyswitch_lwe_ciphertext(
|
||||
gpu_keys_vec[i].ksk.as_ref().unwrap(),
|
||||
input_ct,
|
||||
output_ct,
|
||||
&cuda_indexes_vec[i].d_input,
|
||||
&cuda_indexes_vec[i].d_output,
|
||||
local_stream,
|
||||
);
|
||||
})
|
||||
},
|
||||
criterion::BatchSize::SmallInput,
|
||||
)
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let bit_size = (params.message_modulus.unwrap_or(2) as u32).ilog2();
|
||||
write_to_json(
|
||||
&bench_id,
|
||||
*params,
|
||||
name,
|
||||
"ks",
|
||||
&OperatorType::Atomic,
|
||||
bit_size,
|
||||
vec![bit_size],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn cuda_packing_keyswitch<
|
||||
Scalar: UnsignedTorus + CastInto<usize> + CastFrom<u64> + Serialize,
|
||||
>(
|
||||
criterion: &mut Criterion,
|
||||
parameters: &[(String, CryptoParametersRecord<Scalar>)],
|
||||
) {
|
||||
let bench_name = "core_crypto::cuda::packing_keyswitch";
|
||||
let mut bench_group = criterion.benchmark_group(bench_name);
|
||||
|
||||
// Create the PRNG
|
||||
let mut seeder = new_seeder();
|
||||
let seeder = seeder.as_mut();
|
||||
let mut encryption_generator =
|
||||
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
|
||||
let mut secret_generator =
|
||||
SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
|
||||
|
||||
for (name, params) in parameters.iter() {
|
||||
let lwe_dimension = params.lwe_dimension.unwrap();
|
||||
let glwe_dimension = params.glwe_dimension.unwrap();
|
||||
let polynomial_size = params.polynomial_size.unwrap();
|
||||
let ks_decomp_base_log = params.ks_base_log.unwrap();
|
||||
let ks_decomp_level_count = params.ks_level.unwrap();
|
||||
let glwe_noise_distribution = params.glwe_noise_distribution.unwrap();
|
||||
let ciphertext_modulus = params.ciphertext_modulus.unwrap();
|
||||
|
||||
let lwe_sk = allocate_and_generate_new_binary_lwe_secret_key(
|
||||
lwe_dimension,
|
||||
&mut secret_generator,
|
||||
);
|
||||
|
||||
let glwe_sk = allocate_and_generate_new_binary_glwe_secret_key(
|
||||
glwe_dimension,
|
||||
polynomial_size,
|
||||
&mut secret_generator,
|
||||
);
|
||||
|
||||
let pksk = allocate_and_generate_new_lwe_packing_keyswitch_key(
|
||||
&lwe_sk,
|
||||
&glwe_sk,
|
||||
ks_decomp_base_log,
|
||||
ks_decomp_level_count,
|
||||
glwe_noise_distribution,
|
||||
ciphertext_modulus,
|
||||
&mut encryption_generator,
|
||||
);
|
||||
|
||||
let cpu_keys: CpuKeys<_> = CpuKeysBuilder::new().packing_keyswitch_key(pksk).build();
|
||||
|
||||
let bench_id;
|
||||
|
||||
match get_bench_type() {
|
||||
BenchmarkType::Latency => {
|
||||
let streams = CudaStreams::new_multi_gpu();
|
||||
let gpu_keys = CudaLocalKeys::from_cpu_keys(&cpu_keys, None, &streams);
|
||||
|
||||
let mut input_ct_list = LweCiphertextList::new(
|
||||
Scalar::ZERO,
|
||||
lwe_sk.lwe_dimension().to_lwe_size(),
|
||||
LweCiphertextCount(glwe_sk.polynomial_size().0),
|
||||
ciphertext_modulus,
|
||||
);
|
||||
|
||||
let plaintext_list = PlaintextList::new(
|
||||
Scalar::ZERO,
|
||||
PlaintextCount(input_ct_list.lwe_ciphertext_count().0),
|
||||
);
|
||||
|
||||
encrypt_lwe_ciphertext_list(
|
||||
&lwe_sk,
|
||||
&mut input_ct_list,
|
||||
&plaintext_list,
|
||||
params.lwe_noise_distribution.unwrap(),
|
||||
&mut encryption_generator,
|
||||
);
|
||||
|
||||
let mut d_input_lwe_list =
|
||||
CudaLweCiphertextList::from_lwe_ciphertext_list(&input_ct_list, &streams);
|
||||
|
||||
let mut d_output_glwe = CudaGlweCiphertextList::new(
|
||||
glwe_sk.glwe_dimension(),
|
||||
glwe_sk.polynomial_size(),
|
||||
GlweCiphertextCount(1),
|
||||
ciphertext_modulus,
|
||||
&streams,
|
||||
);
|
||||
|
||||
streams.synchronize();
|
||||
|
||||
bench_id = format!("{bench_name}::{name}");
|
||||
{
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_64(
|
||||
gpu_keys.pksk.as_ref().unwrap(),
|
||||
&d_input_lwe_list,
|
||||
&mut d_output_glwe,
|
||||
&streams,
|
||||
);
|
||||
black_box(&mut d_input_lwe_list);
|
||||
})
|
||||
});
|
||||
}
|
||||
}
|
||||
BenchmarkType::Throughput => {
|
||||
let gpu_keys_vec = cuda_local_keys_core(&cpu_keys, None);
|
||||
let gpu_count = get_number_of_gpus() as usize;
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{name}");
|
||||
let blocks: usize = 1;
|
||||
let elements = throughput_num_threads(blocks, 1);
|
||||
let elements_per_stream = elements as usize / gpu_count;
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
bench_group.sample_size(50);
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let setup_encrypted_values = || {
|
||||
let local_streams = cuda_local_streams_core();
|
||||
|
||||
let plaintext_list = PlaintextList::new(
|
||||
Scalar::ZERO,
|
||||
PlaintextCount(elements_per_stream),
|
||||
);
|
||||
|
||||
let input_lwe_lists = (0..gpu_count)
|
||||
.map(|i| {
|
||||
let mut input_ct_list = LweCiphertextList::new(
|
||||
Scalar::ZERO,
|
||||
lwe_sk.lwe_dimension().to_lwe_size(),
|
||||
LweCiphertextCount(glwe_sk.polynomial_size().0),
|
||||
ciphertext_modulus,
|
||||
);
|
||||
encrypt_lwe_ciphertext_list(
|
||||
&lwe_sk,
|
||||
&mut input_ct_list,
|
||||
&plaintext_list,
|
||||
params.lwe_noise_distribution.unwrap(),
|
||||
&mut encryption_generator,
|
||||
);
|
||||
|
||||
CudaLweCiphertextList::from_lwe_ciphertext_list(
|
||||
&input_ct_list,
|
||||
&local_streams[i],
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let output_glwe_list = (0..gpu_count)
|
||||
.map(|i| {
|
||||
CudaGlweCiphertextList::new(
|
||||
glwe_sk.glwe_dimension(),
|
||||
glwe_sk.polynomial_size(),
|
||||
GlweCiphertextCount(1),
|
||||
ciphertext_modulus,
|
||||
&local_streams[i],
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
local_streams.iter().for_each(|stream| stream.synchronize());
|
||||
|
||||
(input_lwe_lists, output_glwe_list, local_streams)
|
||||
};
|
||||
|
||||
b.iter_batched(
|
||||
setup_encrypted_values,
|
||||
|(input_lwe_lists, mut output_glwe_lists, local_streams)| {
|
||||
(0..gpu_count)
|
||||
.into_par_iter()
|
||||
.zip(input_lwe_lists.par_iter())
|
||||
.zip(output_glwe_lists.par_iter_mut())
|
||||
.zip(local_streams.par_iter())
|
||||
.for_each(
|
||||
|(
|
||||
((i, input_lwe_list), output_glwe_list),
|
||||
local_stream,
|
||||
)| {
|
||||
cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_64(
|
||||
gpu_keys_vec[i].pksk.as_ref().unwrap(),
|
||||
input_lwe_list,
|
||||
output_glwe_list,
|
||||
local_stream,
|
||||
);
|
||||
},
|
||||
)
|
||||
},
|
||||
criterion::BatchSize::SmallInput,
|
||||
)
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let bit_size = (params.message_modulus.unwrap_or(2) as u32).ilog2();
|
||||
write_to_json(
|
||||
&bench_id,
|
||||
*params,
|
||||
name,
|
||||
"packing_ks",
|
||||
&OperatorType::Atomic,
|
||||
bit_size,
|
||||
vec![bit_size],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn cuda_ks_group() {
|
||||
let mut criterion: Criterion<_> =
|
||||
(Criterion::default().sample_size(2000)).configure_from_args();
|
||||
cuda_keyswitch(&mut criterion, &benchmark_parameters());
|
||||
cuda_packing_keyswitch(&mut criterion, &benchmark_parameters());
|
||||
}
|
||||
|
||||
pub fn cuda_multi_bit_ks_group() {
|
||||
let mut criterion: Criterion<_> =
|
||||
(Criterion::default().sample_size(2000)).configure_from_args();
|
||||
cuda_keyswitch(&mut criterion, &multi_bit_benchmark_parameters());
|
||||
cuda_packing_keyswitch(&mut criterion, &multi_bit_benchmark_parameters());
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
use cuda::{cuda_ks_group, cuda_multi_bit_ks_group};
|
||||
|
||||
pub fn ks_group() {
|
||||
let mut criterion: Criterion<_> = (Criterion::default()
|
||||
.sample_size(15)
|
||||
.measurement_time(std::time::Duration::from_secs(60)))
|
||||
.configure_from_args();
|
||||
keyswitch(&mut criterion, &benchmark_parameters());
|
||||
#[cfg(feature = "boolean")]
|
||||
keyswitch(&mut criterion, &benchmark_32bits_parameters());
|
||||
}
|
||||
|
||||
pub fn multi_bit_ks_group() {
|
||||
let mut criterion: Criterion<_> = (Criterion::default()
|
||||
.sample_size(15)
|
||||
.measurement_time(std::time::Duration::from_secs(60)))
|
||||
.configure_from_args();
|
||||
keyswitch(&mut criterion, &multi_bit_benchmark_parameters());
|
||||
}
|
||||
|
||||
pub fn packing_ks_group() {
|
||||
let mut criterion: Criterion<_> = (Criterion::default()
|
||||
.sample_size(10)
|
||||
.measurement_time(std::time::Duration::from_secs(30)))
|
||||
.configure_from_args();
|
||||
packing_keyswitch(
|
||||
&mut criterion,
|
||||
"packing_keyswitch",
|
||||
&benchmark_compression_parameters(),
|
||||
keyswitch_lwe_ciphertext_list_and_pack_in_glwe_ciphertext,
|
||||
);
|
||||
packing_keyswitch(
|
||||
&mut criterion,
|
||||
"par_packing_keyswitch",
|
||||
&benchmark_compression_parameters(),
|
||||
par_keyswitch_lwe_ciphertext_list_and_pack_in_glwe_ciphertext,
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
fn go_through_gpu_bench_groups(val: &str) {
|
||||
match val.to_lowercase().as_str() {
|
||||
"classical" => cuda_ks_group(),
|
||||
"multi_bit" => cuda_multi_bit_ks_group(),
|
||||
_ => panic!("unknown benchmark operations flavor"),
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
fn go_through_cpu_bench_groups(val: &str) {
|
||||
match val.to_lowercase().as_str() {
|
||||
"classical" => {
|
||||
ks_group();
|
||||
packing_ks_group()
|
||||
}
|
||||
"multi_bit" => multi_bit_ks_group(),
|
||||
_ => panic!("unknown benchmark operations flavor"),
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
match env::var("__TFHE_RS_PARAM_TYPE") {
|
||||
Ok(val) => {
|
||||
#[cfg(feature = "gpu")]
|
||||
go_through_gpu_bench_groups(&val);
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
go_through_cpu_bench_groups(&val);
|
||||
}
|
||||
Err(_) => {
|
||||
ks_group();
|
||||
packing_ks_group()
|
||||
}
|
||||
};
|
||||
|
||||
Criterion::default().configure_from_args().final_summary();
|
||||
}
|
||||
1197
tfhe-benchmark/benches/core_crypto/ks_pbs_bench.rs
Normal file
1197
tfhe-benchmark/benches/core_crypto/ks_pbs_bench.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,89 @@
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use modulus_switch_noise_reduction::improve_lwe_ciphertext_modulus_switch_noise_for_binary_key;
|
||||
use tfhe::core_crypto::commons::parameters::{NoiseEstimationMeasureBound, RSigmaFactor};
|
||||
use tfhe::core_crypto::prelude::*;
|
||||
|
||||
fn modulus_switch_noise_reduction(c: &mut Criterion) {
|
||||
// TODO: use shortint params
|
||||
let lwe_dimension = LweDimension(918);
|
||||
let noise_distribution = DynamicDistribution::new_t_uniform(46);
|
||||
let ciphertext_modulus = CiphertextModulus::new_native();
|
||||
let bound = NoiseEstimationMeasureBound((1_u64 << (64 - 1 - 4 - 1)) as f64);
|
||||
let r_sigma_factor = RSigmaFactor(14.658999256586121);
|
||||
let log_modulus = PolynomialSize(2048).to_blind_rotation_input_modulus_log();
|
||||
let input_variance = Variance(0.);
|
||||
|
||||
for count in [10, 50, 100, 1_000, 10_000, 100_000] {
|
||||
let mut boxed_seeder = new_seeder();
|
||||
let seeder = boxed_seeder.as_mut();
|
||||
|
||||
let mut secret_generator =
|
||||
SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
|
||||
|
||||
let mut encryption_generator =
|
||||
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
|
||||
|
||||
let sk =
|
||||
allocate_and_generate_new_binary_lwe_secret_key(lwe_dimension, &mut secret_generator);
|
||||
|
||||
let clean_lwe = allocate_and_encrypt_new_lwe_ciphertext(
|
||||
&sk,
|
||||
Plaintext(0),
|
||||
noise_distribution,
|
||||
ciphertext_modulus,
|
||||
&mut encryption_generator,
|
||||
);
|
||||
|
||||
let mut encryptions_of_zero = LweCiphertextList::new(
|
||||
0,
|
||||
lwe_dimension.to_lwe_size(),
|
||||
LweCiphertextCount(count),
|
||||
ciphertext_modulus,
|
||||
);
|
||||
|
||||
let plaintext_list = PlaintextList::new(0, PlaintextCount(count));
|
||||
|
||||
encrypt_lwe_ciphertext_list(
|
||||
&sk,
|
||||
&mut encryptions_of_zero,
|
||||
&plaintext_list,
|
||||
noise_distribution,
|
||||
&mut encryption_generator,
|
||||
);
|
||||
|
||||
let mut lwe =
|
||||
LweCiphertext::new(0_u64, sk.lwe_dimension().to_lwe_size(), ciphertext_modulus);
|
||||
|
||||
let bench_name = "modulus_switch_noise_reduction";
|
||||
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
bench_group
|
||||
.sample_size(15)
|
||||
.measurement_time(std::time::Duration::from_secs(5));
|
||||
|
||||
let bench_name = format!("modulus_switch_noise_reduction_{count}");
|
||||
|
||||
bench_group.bench_function(&bench_name, |b| {
|
||||
b.iter(|| {
|
||||
lwe.as_mut().copy_from_slice(clean_lwe.as_ref());
|
||||
|
||||
improve_lwe_ciphertext_modulus_switch_noise_for_binary_key(
|
||||
&mut lwe,
|
||||
&encryptions_of_zero,
|
||||
r_sigma_factor,
|
||||
bound,
|
||||
input_variance,
|
||||
log_modulus,
|
||||
);
|
||||
|
||||
black_box(&lwe);
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
modulus_switch_noise_reduction2,
|
||||
modulus_switch_noise_reduction
|
||||
);
|
||||
criterion_main!(modulus_switch_noise_reduction2);
|
||||
484
tfhe-benchmark/benches/core_crypto/pbs128_bench.rs
Normal file
484
tfhe-benchmark/benches/core_crypto/pbs128_bench.rs
Normal file
@@ -0,0 +1,484 @@
|
||||
use benchmark::params_aliases::{
|
||||
BENCH_NOISE_SQUASHING_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
};
|
||||
use benchmark::utilities::{write_to_json, CryptoParametersRecord, OperatorType};
|
||||
use criterion::{black_box, Criterion};
|
||||
use dyn_stack::PodStack;
|
||||
use tfhe::core_crypto::fft_impl::fft128::crypto::bootstrap::bootstrap_scratch;
|
||||
use tfhe::core_crypto::prelude::*;
|
||||
use tfhe::keycache::NamedParam;
|
||||
|
||||
fn pbs_128(c: &mut Criterion) {
|
||||
let bench_name = "core_crypto::pbs128";
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
bench_group
|
||||
.sample_size(10)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
|
||||
type InputScalar = u64;
|
||||
type OutputScalar = u128;
|
||||
|
||||
let noise_params = BENCH_NOISE_SQUASHING_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
let base_params = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
|
||||
let lwe_dimension = base_params.lwe_dimension; // From PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128
|
||||
let glwe_dimension = noise_params.glwe_dimension;
|
||||
let polynomial_size = noise_params.polynomial_size;
|
||||
let lwe_noise_distribution = base_params.lwe_noise_distribution;
|
||||
let glwe_noise_distribution = noise_params.glwe_noise_distribution;
|
||||
let pbs_base_log = noise_params.decomp_base_log;
|
||||
let pbs_level = noise_params.decomp_level_count;
|
||||
let input_ciphertext_modulus = base_params.ciphertext_modulus;
|
||||
let output_ciphertext_modulus = noise_params.ciphertext_modulus;
|
||||
|
||||
let mut boxed_seeder = new_seeder();
|
||||
let seeder = boxed_seeder.as_mut();
|
||||
|
||||
let mut secret_generator = SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
|
||||
|
||||
let mut encryption_generator =
|
||||
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
|
||||
|
||||
let input_lwe_secret_key =
|
||||
LweSecretKey::generate_new_binary(lwe_dimension, &mut secret_generator);
|
||||
|
||||
let output_glwe_secret_key = GlweSecretKey::<Vec<OutputScalar>>::generate_new_binary(
|
||||
glwe_dimension,
|
||||
polynomial_size,
|
||||
&mut secret_generator,
|
||||
);
|
||||
|
||||
let output_lwe_secret_key = output_glwe_secret_key.clone().into_lwe_secret_key();
|
||||
|
||||
let mut bsk = LweBootstrapKey::new(
|
||||
OutputScalar::ZERO,
|
||||
glwe_dimension.to_glwe_size(),
|
||||
polynomial_size,
|
||||
pbs_base_log,
|
||||
pbs_level,
|
||||
lwe_dimension,
|
||||
output_ciphertext_modulus,
|
||||
);
|
||||
par_generate_lwe_bootstrap_key(
|
||||
&input_lwe_secret_key,
|
||||
&output_glwe_secret_key,
|
||||
&mut bsk,
|
||||
glwe_noise_distribution,
|
||||
&mut encryption_generator,
|
||||
);
|
||||
|
||||
let mut fourier_bsk = Fourier128LweBootstrapKey::new(
|
||||
lwe_dimension,
|
||||
glwe_dimension.to_glwe_size(),
|
||||
polynomial_size,
|
||||
pbs_base_log,
|
||||
pbs_level,
|
||||
);
|
||||
convert_standard_lwe_bootstrap_key_to_fourier_128(&bsk, &mut fourier_bsk);
|
||||
|
||||
let message_modulus: InputScalar = 1 << 4;
|
||||
|
||||
let input_message: InputScalar = 3;
|
||||
|
||||
let delta: InputScalar = (1 << (InputScalar::BITS - 1)) / message_modulus;
|
||||
|
||||
let plaintext = Plaintext(input_message * delta);
|
||||
|
||||
let lwe_ciphertext_in: LweCiphertextOwned<InputScalar> =
|
||||
allocate_and_encrypt_new_lwe_ciphertext(
|
||||
&input_lwe_secret_key,
|
||||
plaintext,
|
||||
lwe_noise_distribution,
|
||||
input_ciphertext_modulus,
|
||||
&mut encryption_generator,
|
||||
);
|
||||
|
||||
let accumulator: GlweCiphertextOwned<OutputScalar> = GlweCiphertextOwned::new(
|
||||
OutputScalar::ONE,
|
||||
glwe_dimension.to_glwe_size(),
|
||||
polynomial_size,
|
||||
output_ciphertext_modulus,
|
||||
);
|
||||
|
||||
let mut out_pbs_ct: LweCiphertext<Vec<OutputScalar>> = LweCiphertext::new(
|
||||
OutputScalar::ZERO,
|
||||
output_lwe_secret_key.lwe_dimension().to_lwe_size(),
|
||||
output_ciphertext_modulus,
|
||||
);
|
||||
|
||||
let fft = Fft128::new(polynomial_size);
|
||||
let fft = fft.as_view();
|
||||
|
||||
let mut buffers = vec![
|
||||
0u8;
|
||||
bootstrap_scratch::<OutputScalar>(
|
||||
fourier_bsk.glwe_size(),
|
||||
fourier_bsk.polynomial_size(),
|
||||
fft
|
||||
)
|
||||
.unwrap()
|
||||
.unaligned_bytes_required()
|
||||
];
|
||||
|
||||
let id = format!("{bench_name}::{}", noise_params.name());
|
||||
bench_group.bench_function(&id, |b| {
|
||||
b.iter(|| {
|
||||
fourier_bsk.bootstrap(
|
||||
&mut out_pbs_ct,
|
||||
&lwe_ciphertext_in,
|
||||
&accumulator,
|
||||
fft,
|
||||
PodStack::new(&mut buffers),
|
||||
);
|
||||
black_box(&mut out_pbs_ct);
|
||||
});
|
||||
});
|
||||
|
||||
// TODO Add throughput benchmark case
|
||||
|
||||
let params_record = CryptoParametersRecord {
|
||||
lwe_dimension: Some(lwe_dimension),
|
||||
glwe_dimension: Some(glwe_dimension),
|
||||
polynomial_size: Some(polynomial_size),
|
||||
lwe_noise_distribution: Some(lwe_noise_distribution),
|
||||
glwe_noise_distribution: Some(base_params.glwe_noise_distribution),
|
||||
pbs_base_log: Some(pbs_base_log),
|
||||
pbs_level: Some(pbs_level),
|
||||
ciphertext_modulus: Some(input_ciphertext_modulus),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let bit_size = (message_modulus as u32).ilog2();
|
||||
write_to_json(
|
||||
&id,
|
||||
params_record,
|
||||
noise_params.name(),
|
||||
"pbs",
|
||||
&OperatorType::Atomic,
|
||||
bit_size,
|
||||
vec![bit_size],
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
mod cuda {
|
||||
use benchmark::utilities::{
|
||||
cuda_local_keys_core, cuda_local_streams_core, get_bench_type, throughput_num_threads,
|
||||
write_to_json, BenchmarkType, CpuKeys, CpuKeysBuilder, CryptoParametersRecord,
|
||||
CudaLocalKeys, OperatorType,
|
||||
};
|
||||
use criterion::{black_box, Criterion, Throughput};
|
||||
use rayon::prelude::*;
|
||||
use tfhe::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
|
||||
use tfhe::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
|
||||
use tfhe::core_crypto::gpu::{
|
||||
cuda_programmable_bootstrap_128_lwe_ciphertext, get_number_of_gpus, CudaStreams,
|
||||
};
|
||||
use tfhe::core_crypto::prelude::*;
|
||||
use tfhe::shortint::engine::ShortintEngine;
|
||||
use tfhe::shortint::parameters::ModulusSwitchNoiseReductionParams;
|
||||
use tfhe::shortint::server_key::ModulusSwitchNoiseReductionKey;
|
||||
|
||||
fn cuda_pbs_128(c: &mut Criterion) {
|
||||
let bench_name = "core_crypto::cuda::pbs128";
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
bench_group
|
||||
.sample_size(10)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
|
||||
type Scalar = u128;
|
||||
|
||||
let lwe_dimension = LweDimension(879);
|
||||
let glwe_dimension = GlweDimension(2);
|
||||
let polynomial_size = PolynomialSize(2048);
|
||||
let lwe_noise_distribution = DynamicDistribution::new_t_uniform(46);
|
||||
let lwe_noise_distribution_u128: DynamicDistribution<u128> =
|
||||
DynamicDistribution::new_t_uniform(46);
|
||||
let glwe_noise_distribution = DynamicDistribution::new_t_uniform(30);
|
||||
let pbs_base_log = DecompositionBaseLog(24);
|
||||
let pbs_level = DecompositionLevelCount(3);
|
||||
let ciphertext_modulus = CiphertextModulus::new_native();
|
||||
let ct_modulus_u64: CiphertextModulus<u64> = CiphertextModulus::new_native();
|
||||
|
||||
let modulus_switch_noise_reduction_params = ModulusSwitchNoiseReductionParams {
|
||||
modulus_switch_zeros_count: LweCiphertextCount(1449),
|
||||
ms_bound: NoiseEstimationMeasureBound(288230376151711744f64),
|
||||
ms_r_sigma_factor: RSigmaFactor(13.179852282053789f64),
|
||||
ms_input_variance: Variance(2.63039184094559E-7f64),
|
||||
};
|
||||
|
||||
let params_name = "PARAMS_SWITCH_SQUASH";
|
||||
|
||||
let mut boxed_seeder = new_seeder();
|
||||
let seeder = boxed_seeder.as_mut();
|
||||
|
||||
let mut secret_generator =
|
||||
SecretRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed());
|
||||
|
||||
let mut encryption_generator =
|
||||
EncryptionRandomGenerator::<DefaultRandomGenerator>::new(seeder.seed(), seeder);
|
||||
|
||||
let input_lwe_secret_key =
|
||||
LweSecretKey::generate_new_binary(lwe_dimension, &mut secret_generator);
|
||||
|
||||
let input_lwe_secret_key_u128 = LweSecretKey::from_container(
|
||||
input_lwe_secret_key
|
||||
.as_ref()
|
||||
.iter()
|
||||
.copied()
|
||||
.map(|x| x as u128)
|
||||
.collect::<Vec<_>>(),
|
||||
);
|
||||
|
||||
let output_glwe_secret_key = GlweSecretKey::<Vec<Scalar>>::generate_new_binary(
|
||||
glwe_dimension,
|
||||
polynomial_size,
|
||||
&mut secret_generator,
|
||||
);
|
||||
|
||||
let output_lwe_secret_key = output_glwe_secret_key.clone().into_lwe_secret_key();
|
||||
|
||||
let bsk = LweBootstrapKey::new(
|
||||
Scalar::ZERO,
|
||||
glwe_dimension.to_glwe_size(),
|
||||
polynomial_size,
|
||||
pbs_base_log,
|
||||
pbs_level,
|
||||
lwe_dimension,
|
||||
ciphertext_modulus,
|
||||
);
|
||||
|
||||
let mut engine = ShortintEngine::new();
|
||||
|
||||
let modulus_switch_noise_reduction_key = Some(ModulusSwitchNoiseReductionKey::new(
|
||||
modulus_switch_noise_reduction_params,
|
||||
&input_lwe_secret_key,
|
||||
&mut engine,
|
||||
CiphertextModulus::new_native(),
|
||||
lwe_noise_distribution,
|
||||
));
|
||||
|
||||
let cpu_keys: CpuKeys<_> = CpuKeysBuilder::new().bootstrap_key(bsk).build();
|
||||
|
||||
let message_modulus: Scalar = 1 << 4;
|
||||
let input_message: Scalar = 3;
|
||||
let delta: Scalar = (1 << (Scalar::BITS - 1)) / message_modulus;
|
||||
let plaintext = Plaintext(input_message * delta);
|
||||
|
||||
let bench_id;
|
||||
|
||||
match get_bench_type() {
|
||||
BenchmarkType::Latency => {
|
||||
let streams = CudaStreams::new_multi_gpu();
|
||||
let gpu_keys = CudaLocalKeys::from_cpu_keys(
|
||||
&cpu_keys,
|
||||
modulus_switch_noise_reduction_key.as_ref(),
|
||||
&streams,
|
||||
);
|
||||
|
||||
let lwe_ciphertext_in: LweCiphertextOwned<Scalar> =
|
||||
allocate_and_encrypt_new_lwe_ciphertext(
|
||||
&input_lwe_secret_key_u128,
|
||||
plaintext,
|
||||
lwe_noise_distribution_u128,
|
||||
ciphertext_modulus,
|
||||
&mut encryption_generator,
|
||||
);
|
||||
let lwe_ciphertext_in_gpu =
|
||||
CudaLweCiphertextList::from_lwe_ciphertext(&lwe_ciphertext_in, &streams);
|
||||
|
||||
let accumulator: GlweCiphertextOwned<Scalar> = GlweCiphertextOwned::new(
|
||||
Scalar::ONE,
|
||||
glwe_dimension.to_glwe_size(),
|
||||
polynomial_size,
|
||||
ciphertext_modulus,
|
||||
);
|
||||
let accumulator_gpu =
|
||||
CudaGlweCiphertextList::from_glwe_ciphertext(&accumulator, &streams);
|
||||
|
||||
let out_pbs_ct = LweCiphertext::new(
|
||||
Scalar::ZERO,
|
||||
output_lwe_secret_key.lwe_dimension().to_lwe_size(),
|
||||
ciphertext_modulus,
|
||||
);
|
||||
let mut out_pbs_ct_gpu =
|
||||
CudaLweCiphertextList::from_lwe_ciphertext(&out_pbs_ct, &streams);
|
||||
|
||||
bench_id = format!("{bench_name}::{params_name}");
|
||||
{
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
cuda_programmable_bootstrap_128_lwe_ciphertext(
|
||||
&lwe_ciphertext_in_gpu,
|
||||
&mut out_pbs_ct_gpu,
|
||||
&accumulator_gpu,
|
||||
LweCiphertextCount(1),
|
||||
gpu_keys.bsk.as_ref().unwrap(),
|
||||
&streams,
|
||||
);
|
||||
black_box(&mut out_pbs_ct_gpu);
|
||||
})
|
||||
});
|
||||
}
|
||||
}
|
||||
BenchmarkType::Throughput => {
|
||||
let gpu_keys_vec =
|
||||
cuda_local_keys_core(&cpu_keys, modulus_switch_noise_reduction_key.as_ref());
|
||||
let gpu_count = get_number_of_gpus() as usize;
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{params_name}");
|
||||
let blocks: usize = 1;
|
||||
let elements = throughput_num_threads(blocks, 1);
|
||||
let elements_per_stream = elements as usize / gpu_count;
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let setup_encrypted_values = || {
|
||||
let local_streams = cuda_local_streams_core();
|
||||
|
||||
let plaintext_list =
|
||||
PlaintextList::new(Scalar::ZERO, PlaintextCount(elements_per_stream));
|
||||
|
||||
let input_cts = (0..gpu_count)
|
||||
.map(|i| {
|
||||
let mut input_ct_list = LweCiphertextList::new(
|
||||
Scalar::ZERO,
|
||||
input_lwe_secret_key.lwe_dimension().to_lwe_size(),
|
||||
LweCiphertextCount(elements_per_stream),
|
||||
ciphertext_modulus,
|
||||
);
|
||||
|
||||
encrypt_lwe_ciphertext_list(
|
||||
&input_lwe_secret_key_u128,
|
||||
&mut input_ct_list,
|
||||
&plaintext_list,
|
||||
lwe_noise_distribution_u128,
|
||||
&mut encryption_generator,
|
||||
);
|
||||
|
||||
CudaLweCiphertextList::from_lwe_ciphertext_list(
|
||||
&input_ct_list,
|
||||
&local_streams[i],
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let accumulators = (0..gpu_count)
|
||||
.map(|i| {
|
||||
let accumulator = GlweCiphertextOwned::new(
|
||||
Scalar::ONE,
|
||||
glwe_dimension.to_glwe_size(),
|
||||
polynomial_size,
|
||||
ciphertext_modulus,
|
||||
);
|
||||
CudaGlweCiphertextList::from_glwe_ciphertext(
|
||||
&accumulator,
|
||||
&local_streams[i],
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Allocate the LweCiphertext to store the result of the PBS
|
||||
let output_cts = (0..gpu_count)
|
||||
.map(|i| {
|
||||
let output_ct_list = LweCiphertextList::new(
|
||||
Scalar::ZERO,
|
||||
output_lwe_secret_key.lwe_dimension().to_lwe_size(),
|
||||
LweCiphertextCount(elements_per_stream),
|
||||
ciphertext_modulus,
|
||||
);
|
||||
CudaLweCiphertextList::from_lwe_ciphertext_list(
|
||||
&output_ct_list,
|
||||
&local_streams[i],
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
local_streams.iter().for_each(|stream| stream.synchronize());
|
||||
|
||||
(input_cts, output_cts, accumulators, local_streams)
|
||||
};
|
||||
|
||||
b.iter_batched(
|
||||
setup_encrypted_values,
|
||||
|(input_cts, mut output_cts, accumulators, local_streams)| {
|
||||
(0..gpu_count)
|
||||
.into_par_iter()
|
||||
.zip(input_cts.par_iter())
|
||||
.zip(output_cts.par_iter_mut())
|
||||
.zip(accumulators.par_iter())
|
||||
.zip(local_streams.par_iter())
|
||||
.for_each(
|
||||
|((((i, input_ct), output_ct), accumulator), local_stream)| {
|
||||
cuda_programmable_bootstrap_128_lwe_ciphertext(
|
||||
input_ct,
|
||||
output_ct,
|
||||
accumulator,
|
||||
LweCiphertextCount(1),
|
||||
gpu_keys_vec[i].bsk.as_ref().unwrap(),
|
||||
local_stream,
|
||||
);
|
||||
},
|
||||
)
|
||||
},
|
||||
criterion::BatchSize::SmallInput,
|
||||
);
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let params_record = CryptoParametersRecord {
|
||||
lwe_dimension: Some(lwe_dimension),
|
||||
glwe_dimension: Some(glwe_dimension),
|
||||
polynomial_size: Some(polynomial_size),
|
||||
lwe_noise_distribution: Some(lwe_noise_distribution),
|
||||
glwe_noise_distribution: Some(glwe_noise_distribution),
|
||||
pbs_base_log: Some(pbs_base_log),
|
||||
pbs_level: Some(pbs_level),
|
||||
ciphertext_modulus: Some(ct_modulus_u64),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let bit_size = (message_modulus as u32).ilog2();
|
||||
write_to_json(
|
||||
&bench_id,
|
||||
params_record,
|
||||
params_name,
|
||||
"pbs",
|
||||
&OperatorType::Atomic,
|
||||
bit_size,
|
||||
vec![bit_size],
|
||||
);
|
||||
}
|
||||
|
||||
pub fn cuda_pbs128_group() {
|
||||
let mut criterion: Criterion<_> = Criterion::default().configure_from_args();
|
||||
cuda_pbs_128(&mut criterion);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
use cuda::cuda_pbs128_group;
|
||||
|
||||
pub fn pbs128_group() {
|
||||
let mut criterion: Criterion<_> = Criterion::default().configure_from_args();
|
||||
pbs_128(&mut criterion);
|
||||
}
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
fn go_through_gpu_bench_groups() {
|
||||
cuda_pbs128_group();
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
fn go_through_cpu_bench_groups() {
|
||||
pbs128_group();
|
||||
}
|
||||
fn main() {
|
||||
#[cfg(feature = "gpu")]
|
||||
go_through_gpu_bench_groups();
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
go_through_cpu_bench_groups();
|
||||
|
||||
Criterion::default().configure_from_args().final_summary();
|
||||
}
|
||||
1509
tfhe-benchmark/benches/core_crypto/pbs_bench.rs
Normal file
1509
tfhe-benchmark/benches/core_crypto/pbs_bench.rs
Normal file
File diff suppressed because it is too large
Load Diff
134
tfhe-benchmark/benches/high_level_api/bench.rs
Normal file
134
tfhe-benchmark/benches/high_level_api/bench.rs
Normal file
@@ -0,0 +1,134 @@
|
||||
use benchmark::params_aliases::BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
use criterion::{black_box, Criterion};
|
||||
use rand::prelude::*;
|
||||
use std::fmt::Write;
|
||||
use std::ops::*;
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::{
|
||||
set_server_key, ClientKey, CompressedServerKey, ConfigBuilder, FheUint10, FheUint12,
|
||||
FheUint128, FheUint14, FheUint16, FheUint2, FheUint32, FheUint4, FheUint6, FheUint64, FheUint8,
|
||||
};
|
||||
|
||||
fn bench_fhe_type<FheType>(c: &mut Criterion, client_key: &ClientKey, type_name: &str)
|
||||
where
|
||||
FheType: FheEncrypt<u128, ClientKey>,
|
||||
for<'a> &'a FheType: Add<&'a FheType, Output = FheType>
|
||||
+ Sub<&'a FheType, Output = FheType>
|
||||
+ Mul<&'a FheType, Output = FheType>
|
||||
+ BitAnd<&'a FheType, Output = FheType>
|
||||
+ BitOr<&'a FheType, Output = FheType>
|
||||
+ BitXor<&'a FheType, Output = FheType>
|
||||
+ Shl<&'a FheType, Output = FheType>
|
||||
+ Shr<&'a FheType, Output = FheType>
|
||||
+ RotateLeft<&'a FheType, Output = FheType>
|
||||
+ RotateRight<&'a FheType, Output = FheType>
|
||||
+ OverflowingAdd<&'a FheType, Output = FheType>
|
||||
+ OverflowingSub<&'a FheType, Output = FheType>,
|
||||
{
|
||||
let mut bench_group = c.benchmark_group(type_name);
|
||||
|
||||
let mut rng = thread_rng();
|
||||
|
||||
let lhs = FheType::encrypt(rng.gen(), client_key);
|
||||
let rhs = FheType::encrypt(rng.gen(), client_key);
|
||||
|
||||
let mut name = String::with_capacity(255);
|
||||
|
||||
write!(name, "add({type_name}, {type_name})").unwrap();
|
||||
bench_group.bench_function(&name, |b| b.iter(|| black_box(&lhs + &rhs)));
|
||||
name.clear();
|
||||
|
||||
write!(name, "overflowing_add({type_name}, {type_name})").unwrap();
|
||||
bench_group.bench_function(&name, |b| {
|
||||
b.iter(|| black_box((&lhs).overflowing_add(&rhs)))
|
||||
});
|
||||
name.clear();
|
||||
|
||||
write!(name, "overflowing_sub({type_name}, {type_name})").unwrap();
|
||||
bench_group.bench_function(&name, |b| b.iter(|| black_box(lhs.overflowing_sub(&rhs))));
|
||||
name.clear();
|
||||
|
||||
write!(name, "sub({type_name}, {type_name})").unwrap();
|
||||
bench_group.bench_function(&name, |b| b.iter(|| black_box(&lhs - &rhs)));
|
||||
name.clear();
|
||||
|
||||
write!(name, "mul({type_name}, {type_name})").unwrap();
|
||||
bench_group.bench_function(&name, |b| b.iter(|| black_box(&lhs * &rhs)));
|
||||
name.clear();
|
||||
|
||||
write!(name, "bitand({type_name}, {type_name})").unwrap();
|
||||
bench_group.bench_function(&name, |b| b.iter(|| black_box(&lhs & &rhs)));
|
||||
name.clear();
|
||||
|
||||
write!(name, "bitor({type_name}, {type_name})").unwrap();
|
||||
bench_group.bench_function(&name, |b| b.iter(|| black_box(&lhs | &rhs)));
|
||||
name.clear();
|
||||
|
||||
write!(name, "bitxor({type_name}, {type_name})").unwrap();
|
||||
bench_group.bench_function(&name, |b| b.iter(|| black_box(&lhs ^ &rhs)));
|
||||
name.clear();
|
||||
|
||||
write!(name, "shl({type_name}, {type_name})").unwrap();
|
||||
bench_group.bench_function(&name, |b| b.iter(|| black_box(&lhs << &rhs)));
|
||||
name.clear();
|
||||
|
||||
write!(name, "shr({type_name}, {type_name})").unwrap();
|
||||
bench_group.bench_function(&name, |b| b.iter(|| black_box(&lhs >> &rhs)));
|
||||
name.clear();
|
||||
|
||||
write!(name, "rotl({type_name}, {type_name})").unwrap();
|
||||
bench_group.bench_function(&name, |b| b.iter(|| black_box((&lhs).rotate_left(&rhs))));
|
||||
name.clear();
|
||||
|
||||
write!(name, "rotr({type_name}, {type_name})").unwrap();
|
||||
bench_group.bench_function(&name, |b| b.iter(|| black_box((&lhs).rotate_right(&rhs))));
|
||||
name.clear();
|
||||
}
|
||||
|
||||
macro_rules! bench_type {
|
||||
($fhe_type:ident) => {
|
||||
::paste::paste! {
|
||||
fn [<bench_ $fhe_type:snake>](c: &mut Criterion, cks: &ClientKey) {
|
||||
bench_fhe_type::<$fhe_type>(c, cks, stringify!($fhe_type));
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
bench_type!(FheUint2);
|
||||
bench_type!(FheUint4);
|
||||
bench_type!(FheUint6);
|
||||
bench_type!(FheUint8);
|
||||
bench_type!(FheUint10);
|
||||
bench_type!(FheUint12);
|
||||
bench_type!(FheUint14);
|
||||
bench_type!(FheUint16);
|
||||
bench_type!(FheUint32);
|
||||
bench_type!(FheUint64);
|
||||
bench_type!(FheUint128);
|
||||
|
||||
fn main() {
|
||||
let config =
|
||||
ConfigBuilder::with_custom_parameters(BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128)
|
||||
.build();
|
||||
let cks = ClientKey::generate(config);
|
||||
let compressed_sks = CompressedServerKey::new(&cks);
|
||||
|
||||
set_server_key(compressed_sks.decompress());
|
||||
|
||||
let mut c = Criterion::default().configure_from_args();
|
||||
|
||||
bench_fhe_uint2(&mut c, &cks);
|
||||
bench_fhe_uint4(&mut c, &cks);
|
||||
bench_fhe_uint6(&mut c, &cks);
|
||||
bench_fhe_uint8(&mut c, &cks);
|
||||
bench_fhe_uint10(&mut c, &cks);
|
||||
bench_fhe_uint12(&mut c, &cks);
|
||||
bench_fhe_uint14(&mut c, &cks);
|
||||
bench_fhe_uint16(&mut c, &cks);
|
||||
bench_fhe_uint32(&mut c, &cks);
|
||||
bench_fhe_uint64(&mut c, &cks);
|
||||
bench_fhe_uint128(&mut c, &cks);
|
||||
|
||||
c.final_summary();
|
||||
}
|
||||
539
tfhe-benchmark/benches/high_level_api/dex.rs
Normal file
539
tfhe-benchmark/benches/high_level_api/dex.rs
Normal file
@@ -0,0 +1,539 @@
|
||||
#[cfg(feature = "gpu")]
|
||||
use benchmark::params_aliases::BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
use benchmark::params_aliases::BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
#[cfg(feature = "gpu")]
|
||||
use benchmark::utilities::configure_gpu;
|
||||
use benchmark::utilities::{write_to_json, OperatorType};
|
||||
use criterion::measurement::WallTime;
|
||||
use criterion::{BenchmarkGroup, Criterion};
|
||||
use rand::prelude::*;
|
||||
use rand::thread_rng;
|
||||
use std::ops::{Add, Div, Mul, Sub};
|
||||
use tfhe::keycache::NamedParam;
|
||||
use tfhe::prelude::*;
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
use tfhe::{set_server_key, CompressedServerKey};
|
||||
use tfhe::{ClientKey, ConfigBuilder, FheBool, FheUint128, FheUint64};
|
||||
|
||||
pub(crate) fn transfer_whitepaper<FheType>(
|
||||
from_amount: &FheType,
|
||||
to_amount: &FheType,
|
||||
amount: &FheType,
|
||||
) -> (FheType, FheType)
|
||||
where
|
||||
FheType: Add<Output = FheType> + for<'a> FheOrd<&'a FheType>,
|
||||
FheBool: IfThenElse<FheType>,
|
||||
for<'a> &'a FheType: Add<Output = FheType> + Sub<Output = FheType>,
|
||||
{
|
||||
let has_enough_funds = (from_amount).ge(amount);
|
||||
|
||||
let mut new_to_amount = to_amount + amount;
|
||||
new_to_amount = has_enough_funds.if_then_else(&new_to_amount, to_amount);
|
||||
|
||||
let mut new_from_amount = from_amount - amount;
|
||||
new_from_amount = has_enough_funds.if_then_else(&new_from_amount, from_amount);
|
||||
|
||||
(new_from_amount, new_to_amount)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn swap_request<FheType>(
|
||||
from_balance_0: &FheType,
|
||||
from_balance_1: &FheType,
|
||||
current_dex_balance_0: &FheType,
|
||||
current_dex_balance_1: &FheType,
|
||||
to_balance_0: &FheType,
|
||||
to_balance_1: &FheType,
|
||||
total_dex_token_0_in: &FheType,
|
||||
total_dex_token_1_in: &FheType,
|
||||
amount0: &FheType,
|
||||
amount1: &FheType,
|
||||
) -> (FheType, FheType, FheType, FheType)
|
||||
where
|
||||
FheType: Add<Output = FheType> + for<'a> FheOrd<&'a FheType> + Clone,
|
||||
FheBool: IfThenElse<FheType>,
|
||||
for<'a> &'a FheType: Add<Output = FheType> + Sub<Output = FheType>,
|
||||
{
|
||||
let (_, new_current_balance_0) =
|
||||
transfer_whitepaper(from_balance_0, current_dex_balance_0, amount0);
|
||||
let (_, new_current_balance_1) =
|
||||
transfer_whitepaper(from_balance_1, current_dex_balance_1, amount1);
|
||||
let sent0 = &new_current_balance_0 - current_dex_balance_0;
|
||||
let sent1 = &new_current_balance_1 - current_dex_balance_1;
|
||||
let pending_0_in = to_balance_0 + &sent0;
|
||||
let pending_total_token_0_in = total_dex_token_0_in + &sent0;
|
||||
let pending_1_in = to_balance_1 + &sent1;
|
||||
let pending_total_token_1_in = total_dex_token_1_in + &sent1;
|
||||
(
|
||||
pending_0_in,
|
||||
pending_total_token_0_in,
|
||||
pending_1_in,
|
||||
pending_total_token_1_in,
|
||||
)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn swap_claim<FheType, BigFheType>(
|
||||
pending_0_in: &FheType,
|
||||
pending_1_in: &FheType,
|
||||
total_dex_token_0_in: u64,
|
||||
total_dex_token_1_in: u64,
|
||||
total_dex_token_0_out: u64,
|
||||
total_dex_token_1_out: u64,
|
||||
old_balance_0: &FheType,
|
||||
old_balance_1: &FheType,
|
||||
current_dex_balance_0: &FheType,
|
||||
current_dex_balance_1: &FheType,
|
||||
) -> (FheType, FheType)
|
||||
where
|
||||
FheType: CastFrom<FheBool>
|
||||
+ for<'a> FheOrd<&'a FheType>
|
||||
+ CastFrom<BigFheType>
|
||||
+ Clone
|
||||
+ Add<Output = FheType>,
|
||||
BigFheType: CastFrom<FheType> + Mul<u128, Output = BigFheType> + Div<u128, Output = BigFheType>,
|
||||
FheBool: IfThenElse<FheType>,
|
||||
for<'a> &'a FheType: Add<Output = FheType> + Sub<Output = FheType>,
|
||||
{
|
||||
let mut new_balance_0 = old_balance_0.clone();
|
||||
let mut new_balance_1 = old_balance_1.clone();
|
||||
if total_dex_token_1_in != 0 {
|
||||
let big_pending_1_in = BigFheType::cast_from(pending_1_in.clone());
|
||||
let big_amount_0_out =
|
||||
(big_pending_1_in * total_dex_token_0_out as u128) / total_dex_token_1_in as u128;
|
||||
let amount_0_out = FheType::cast_from(big_amount_0_out);
|
||||
let (_, new_balance_0_tmp) =
|
||||
transfer_whitepaper(current_dex_balance_0, old_balance_0, &amount_0_out);
|
||||
new_balance_0 = new_balance_0_tmp;
|
||||
}
|
||||
if total_dex_token_0_in != 0 {
|
||||
let big_pending_0_in = BigFheType::cast_from(pending_0_in.clone());
|
||||
let big_amount_1_out =
|
||||
(big_pending_0_in * total_dex_token_1_out as u128) / total_dex_token_0_in as u128;
|
||||
let amount_1_out = FheType::cast_from(big_amount_1_out);
|
||||
let (_, new_balance_1_tmp) =
|
||||
transfer_whitepaper(current_dex_balance_1, old_balance_1, &amount_1_out);
|
||||
new_balance_1 = new_balance_1_tmp;
|
||||
}
|
||||
|
||||
(new_balance_0, new_balance_1)
|
||||
}
|
||||
|
||||
#[cfg(feature = "pbs-stats")]
|
||||
mod pbs_stats {
|
||||
use super::*;
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
fn write_result(file: &mut File, name: &str, value: usize) {
|
||||
let line = format!("{name},{value}\n");
|
||||
let error_message = format!("cannot write {name} result into file");
|
||||
file.write_all(line.as_bytes()).expect(&error_message);
|
||||
}
|
||||
|
||||
pub fn print_swap_request_pbs_counts<FheType, F>(
|
||||
client_key: &ClientKey,
|
||||
type_name: &str,
|
||||
swap_request_func: F,
|
||||
) where
|
||||
FheType: FheEncrypt<u64, ClientKey>,
|
||||
F: for<'a> Fn(
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
) -> (FheType, FheType, FheType, FheType),
|
||||
{
|
||||
let mut rng = thread_rng();
|
||||
|
||||
let from_balance_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let from_balance_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let current_dex_balance_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let current_dex_balance_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let to_balance_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let to_balance_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let total_dex_token_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let total_dex_token_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let amount_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let amount_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
configure_gpu(client_key);
|
||||
|
||||
tfhe::reset_pbs_count();
|
||||
let (_, _, _, _) = swap_request_func(
|
||||
&from_balance_0,
|
||||
&from_balance_1,
|
||||
¤t_dex_balance_0,
|
||||
¤t_dex_balance_1,
|
||||
&to_balance_0,
|
||||
&to_balance_1,
|
||||
&total_dex_token_0,
|
||||
&total_dex_token_1,
|
||||
&amount_0,
|
||||
&amount_1,
|
||||
);
|
||||
let count = tfhe::get_pbs_count();
|
||||
|
||||
println!("ERC20 swap request/::{type_name}: {count} PBS");
|
||||
|
||||
let params = client_key.computation_parameters();
|
||||
|
||||
let test_name = if cfg!(feature = "gpu") {
|
||||
format!("hlapi::cuda::dex::swap_request::pbs_count::{type_name}")
|
||||
} else {
|
||||
format!("hlapi::dex::swap_request::pbs_count::{type_name}")
|
||||
};
|
||||
|
||||
let results_file = Path::new("dex_swap_request_pbs_count.csv");
|
||||
if !results_file.exists() {
|
||||
File::create(results_file).expect("create results file failed");
|
||||
}
|
||||
let mut file = OpenOptions::new()
|
||||
.append(true)
|
||||
.open(results_file)
|
||||
.expect("cannot open results file");
|
||||
|
||||
write_result(&mut file, &test_name, count as usize);
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&test_name,
|
||||
params,
|
||||
params.name(),
|
||||
"pbs-count",
|
||||
&OperatorType::Atomic,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
}
|
||||
pub fn print_swap_claim_pbs_counts<FheType, F>(
|
||||
client_key: &ClientKey,
|
||||
type_name: &str,
|
||||
swap_claim_func: F,
|
||||
) where
|
||||
FheType: FheEncrypt<u64, ClientKey>,
|
||||
F: for<'a> Fn(
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
u64,
|
||||
u64,
|
||||
u64,
|
||||
u64,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
) -> (FheType, FheType),
|
||||
{
|
||||
let mut rng = thread_rng();
|
||||
|
||||
let pending_0_in = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let pending_1_in = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let total_dex_token_0_in = rng.gen::<u64>();
|
||||
let total_dex_token_1_in = rng.gen::<u64>();
|
||||
let total_dex_token_0_out = rng.gen::<u64>();
|
||||
let total_dex_token_1_out = rng.gen::<u64>();
|
||||
let old_balance_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let old_balance_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let current_dex_balance_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let current_dex_balance_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
configure_gpu(client_key);
|
||||
|
||||
tfhe::reset_pbs_count();
|
||||
let (_, _) = swap_claim_func(
|
||||
&pending_0_in,
|
||||
&pending_1_in,
|
||||
total_dex_token_0_in,
|
||||
total_dex_token_1_in,
|
||||
total_dex_token_0_out,
|
||||
total_dex_token_1_out,
|
||||
&old_balance_0,
|
||||
&old_balance_1,
|
||||
¤t_dex_balance_0,
|
||||
¤t_dex_balance_1,
|
||||
);
|
||||
let count = tfhe::get_pbs_count();
|
||||
|
||||
println!("ERC20 swap claim/::{type_name}: {count} PBS");
|
||||
|
||||
let params = client_key.computation_parameters();
|
||||
|
||||
let test_name = if cfg!(feature = "gpu") {
|
||||
format!("hlapi::cuda::dex::swap_claim::pbs_count::{type_name}")
|
||||
} else {
|
||||
format!("hlapi::dex::swap_claim::pbs_count::{type_name}")
|
||||
};
|
||||
|
||||
let results_file = Path::new("dex_swap_claim_pbs_count.csv");
|
||||
if !results_file.exists() {
|
||||
File::create(results_file).expect("create results file failed");
|
||||
}
|
||||
let mut file = OpenOptions::new()
|
||||
.append(true)
|
||||
.open(results_file)
|
||||
.expect("cannot open results file");
|
||||
|
||||
write_result(&mut file, &test_name, count as usize);
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&test_name,
|
||||
params,
|
||||
params.name(),
|
||||
"pbs-count",
|
||||
&OperatorType::Atomic,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn bench_swap_request_latency<FheType, F>(
|
||||
c: &mut BenchmarkGroup<'_, WallTime>,
|
||||
client_key: &ClientKey,
|
||||
bench_name: &str,
|
||||
type_name: &str,
|
||||
fn_name: &str,
|
||||
swap_request_func: F,
|
||||
) where
|
||||
FheType: FheEncrypt<u64, ClientKey>,
|
||||
F: for<'a> Fn(
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
) -> (FheType, FheType, FheType, FheType),
|
||||
{
|
||||
#[cfg(feature = "gpu")]
|
||||
configure_gpu(client_key);
|
||||
|
||||
let bench_id = format!("{bench_name}::{fn_name}::{type_name}");
|
||||
c.bench_function(&bench_id, |b| {
|
||||
let mut rng = thread_rng();
|
||||
|
||||
let from_balance_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let from_balance_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let current_balance_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let current_balance_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let to_balance_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let to_balance_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let total_token_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let total_token_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let amount_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let amount_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
|
||||
b.iter(|| {
|
||||
let (_, _, _, _) = swap_request_func(
|
||||
&from_balance_0,
|
||||
&from_balance_1,
|
||||
¤t_balance_0,
|
||||
¤t_balance_1,
|
||||
&to_balance_0,
|
||||
&to_balance_1,
|
||||
&total_token_0,
|
||||
&total_token_1,
|
||||
&amount_0,
|
||||
&amount_1,
|
||||
);
|
||||
})
|
||||
});
|
||||
|
||||
let params = client_key.computation_parameters();
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
params,
|
||||
params.name(),
|
||||
"dex-swap-request",
|
||||
&OperatorType::Atomic,
|
||||
64,
|
||||
vec![],
|
||||
);
|
||||
}
|
||||
|
||||
fn bench_swap_claim_latency<FheType, F>(
|
||||
c: &mut BenchmarkGroup<'_, WallTime>,
|
||||
client_key: &ClientKey,
|
||||
bench_name: &str,
|
||||
type_name: &str,
|
||||
fn_name: &str,
|
||||
swap_claim_func: F,
|
||||
) where
|
||||
FheType: FheEncrypt<u64, ClientKey>,
|
||||
F: for<'a> Fn(
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
u64,
|
||||
u64,
|
||||
u64,
|
||||
u64,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
&'a FheType,
|
||||
) -> (FheType, FheType),
|
||||
{
|
||||
#[cfg(feature = "gpu")]
|
||||
configure_gpu(client_key);
|
||||
|
||||
let bench_id = format!("{bench_name}::{fn_name}::{type_name}");
|
||||
c.bench_function(&bench_id, |b| {
|
||||
let mut rng = thread_rng();
|
||||
|
||||
let pending_0_in = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let pending_1_in = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let total_token_0_in = rng.gen::<u64>();
|
||||
let total_token_1_in = rng.gen::<u64>();
|
||||
let total_token_0_out = rng.gen::<u64>();
|
||||
let total_token_1_out = rng.gen::<u64>();
|
||||
let old_balance_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let old_balance_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let current_balance_0 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let current_balance_1 = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
|
||||
b.iter(|| {
|
||||
let (_, _) = swap_claim_func(
|
||||
&pending_0_in,
|
||||
&pending_1_in,
|
||||
total_token_0_in,
|
||||
total_token_1_in,
|
||||
total_token_0_out,
|
||||
total_token_1_out,
|
||||
&old_balance_0,
|
||||
&old_balance_1,
|
||||
¤t_balance_0,
|
||||
¤t_balance_1,
|
||||
);
|
||||
})
|
||||
});
|
||||
|
||||
let params = client_key.computation_parameters();
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
params,
|
||||
params.name(),
|
||||
"dex-swap-claim",
|
||||
&OperatorType::Atomic,
|
||||
64,
|
||||
vec![],
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(feature = "pbs-stats")]
|
||||
use crate::pbs_stats::print_swap_claim_pbs_counts;
|
||||
#[cfg(feature = "pbs-stats")]
|
||||
use crate::pbs_stats::print_swap_request_pbs_counts;
|
||||
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
fn main() {
|
||||
let params = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
|
||||
let config = ConfigBuilder::with_custom_parameters(params).build();
|
||||
let cks = ClientKey::generate(config);
|
||||
let compressed_sks = CompressedServerKey::new(&cks);
|
||||
|
||||
let sks = compressed_sks.decompress();
|
||||
|
||||
rayon::broadcast(|_| set_server_key(sks.clone()));
|
||||
set_server_key(sks);
|
||||
|
||||
let mut c = Criterion::default().sample_size(10).configure_from_args();
|
||||
|
||||
let bench_name = "hlapi::dex";
|
||||
|
||||
// FheUint64 PBS counts
|
||||
// We don't run multiple times since every input is encrypted
|
||||
// PBS count is always the same
|
||||
#[cfg(feature = "pbs-stats")]
|
||||
{
|
||||
print_swap_request_pbs_counts(&cks, "FheUint64", swap_request::<FheUint64>);
|
||||
print_swap_claim_pbs_counts(&cks, "FheUint64", swap_claim::<FheUint64, FheUint128>);
|
||||
}
|
||||
|
||||
// FheUint64 latency
|
||||
{
|
||||
let mut group = c.benchmark_group(bench_name);
|
||||
bench_swap_request_latency(
|
||||
&mut group,
|
||||
&cks,
|
||||
bench_name,
|
||||
"FheUint64",
|
||||
"swap_request",
|
||||
swap_request::<FheUint64>,
|
||||
);
|
||||
bench_swap_claim_latency(
|
||||
&mut group,
|
||||
&cks,
|
||||
bench_name,
|
||||
"FheUint64",
|
||||
"swap_claim",
|
||||
swap_claim::<FheUint64, FheUint128>,
|
||||
);
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
c.final_summary();
|
||||
}
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
fn main() {
|
||||
let params = BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
|
||||
let config = ConfigBuilder::with_custom_parameters(params).build();
|
||||
let cks = ClientKey::generate(config);
|
||||
|
||||
let mut c = Criterion::default().sample_size(10).configure_from_args();
|
||||
|
||||
let bench_name = "hlapi::cuda::dex";
|
||||
|
||||
// FheUint64 PBS counts
|
||||
// We don't run multiple times since every input is encrypted
|
||||
// PBS count is always the same
|
||||
#[cfg(feature = "pbs-stats")]
|
||||
{
|
||||
print_swap_request_pbs_counts(&cks, "FheUint64", swap_request::<FheUint64>);
|
||||
print_swap_claim_pbs_counts(&cks, "FheUint64", swap_claim::<FheUint64, FheUint128>);
|
||||
}
|
||||
|
||||
// FheUint64 latency
|
||||
{
|
||||
let mut group = c.benchmark_group(bench_name);
|
||||
bench_swap_request_latency(
|
||||
&mut group,
|
||||
&cks,
|
||||
bench_name,
|
||||
"FheUint64",
|
||||
"swap_request",
|
||||
swap_request::<FheUint64>,
|
||||
);
|
||||
bench_swap_claim_latency(
|
||||
&mut group,
|
||||
&cks,
|
||||
bench_name,
|
||||
"FheUint64",
|
||||
"swap_claim",
|
||||
swap_claim::<FheUint64, FheUint128>,
|
||||
);
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
c.final_summary();
|
||||
}
|
||||
595
tfhe-benchmark/benches/high_level_api/erc20.rs
Normal file
595
tfhe-benchmark/benches/high_level_api/erc20.rs
Normal file
@@ -0,0 +1,595 @@
|
||||
#[cfg(feature = "gpu")]
|
||||
use benchmark::params_aliases::BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
use benchmark::params_aliases::BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
#[cfg(feature = "gpu")]
|
||||
use benchmark::utilities::configure_gpu;
|
||||
use benchmark::utilities::{write_to_json, OperatorType};
|
||||
use criterion::measurement::WallTime;
|
||||
use criterion::{BenchmarkGroup, Criterion, Throughput};
|
||||
use rand::prelude::*;
|
||||
use rand::thread_rng;
|
||||
use rayon::prelude::*;
|
||||
use std::ops::{Add, Mul, Sub};
|
||||
use tfhe::keycache::NamedParam;
|
||||
use tfhe::prelude::*;
|
||||
#[cfg(feature = "gpu")]
|
||||
use tfhe::GpuIndex;
|
||||
use tfhe::{set_server_key, ClientKey, CompressedServerKey, ConfigBuilder, FheBool, FheUint64};
|
||||
|
||||
/// Transfer as written in the original FHEvm white-paper,
|
||||
/// it uses a comparison to check if the sender has enough,
|
||||
/// and cmuxes based on the comparison result
|
||||
pub fn transfer_whitepaper<FheType>(
|
||||
from_amount: &FheType,
|
||||
to_amount: &FheType,
|
||||
amount: &FheType,
|
||||
) -> (FheType, FheType)
|
||||
where
|
||||
FheType: Add<Output = FheType> + for<'a> FheOrd<&'a FheType>,
|
||||
FheBool: IfThenElse<FheType>,
|
||||
for<'a> &'a FheType: Add<Output = FheType> + Sub<Output = FheType>,
|
||||
{
|
||||
let has_enough_funds = (from_amount).ge(amount);
|
||||
|
||||
let mut new_to_amount = to_amount + amount;
|
||||
new_to_amount = has_enough_funds.if_then_else(&new_to_amount, to_amount);
|
||||
|
||||
let mut new_from_amount = from_amount - amount;
|
||||
new_from_amount = has_enough_funds.if_then_else(&new_from_amount, from_amount);
|
||||
|
||||
(new_from_amount, new_to_amount)
|
||||
}
|
||||
|
||||
/// This one also uses a comparison, but it leverages the 'boolean' multiplication
|
||||
/// instead of cmuxes, so it is faster
|
||||
fn transfer_no_cmux<FheType>(
|
||||
from_amount: &FheType,
|
||||
to_amount: &FheType,
|
||||
amount: &FheType,
|
||||
) -> (FheType, FheType)
|
||||
where
|
||||
FheType: Add<Output = FheType> + CastFrom<FheBool> + for<'a> FheOrd<&'a FheType>,
|
||||
FheBool: IfThenElse<FheType>,
|
||||
for<'a> &'a FheType:
|
||||
Add<Output = FheType> + Sub<Output = FheType> + Mul<FheType, Output = FheType>,
|
||||
{
|
||||
let has_enough_funds = (from_amount).ge(amount);
|
||||
|
||||
let amount = amount * FheType::cast_from(has_enough_funds);
|
||||
|
||||
let new_to_amount = to_amount + &amount;
|
||||
let new_from_amount = from_amount - &amount;
|
||||
|
||||
(new_from_amount, new_to_amount)
|
||||
}
|
||||
|
||||
/// This one uses overflowing sub to remove the need for comparison
|
||||
/// it also uses the 'boolean' multiplication
|
||||
fn transfer_overflow<FheType>(
|
||||
from_amount: &FheType,
|
||||
to_amount: &FheType,
|
||||
amount: &FheType,
|
||||
) -> (FheType, FheType)
|
||||
where
|
||||
FheType: CastFrom<FheBool> + for<'a> FheOrd<&'a FheType>,
|
||||
FheBool: IfThenElse<FheType>,
|
||||
for<'a> &'a FheType: Add<FheType, Output = FheType>
|
||||
+ OverflowingSub<&'a FheType, Output = FheType>
|
||||
+ Mul<FheType, Output = FheType>,
|
||||
{
|
||||
let (new_from, did_not_have_enough) = (from_amount).overflowing_sub(amount);
|
||||
|
||||
let new_from_amount = did_not_have_enough.if_then_else(from_amount, &new_from);
|
||||
|
||||
let had_enough_funds = !did_not_have_enough;
|
||||
let new_to_amount = to_amount + (amount * FheType::cast_from(had_enough_funds));
|
||||
|
||||
(new_from_amount, new_to_amount)
|
||||
}
|
||||
|
||||
/// This ones uses both overflowing_add/sub to check that both
|
||||
/// the sender has enough funds, and the receiver will not overflow its balance
|
||||
fn transfer_safe<FheType>(
|
||||
from_amount: &FheType,
|
||||
to_amount: &FheType,
|
||||
amount: &FheType,
|
||||
) -> (FheType, FheType)
|
||||
where
|
||||
for<'a> &'a FheType: OverflowingSub<&'a FheType, Output = FheType>
|
||||
+ OverflowingAdd<&'a FheType, Output = FheType>,
|
||||
FheBool: IfThenElse<FheType>,
|
||||
{
|
||||
let (new_from, did_not_have_enough_funds) = (from_amount).overflowing_sub(amount);
|
||||
let (new_to, did_not_have_enough_space) = (to_amount).overflowing_add(amount);
|
||||
|
||||
let something_not_ok = did_not_have_enough_funds | did_not_have_enough_space;
|
||||
|
||||
let new_from_amount = something_not_ok.if_then_else(from_amount, &new_from);
|
||||
let new_to_amount = something_not_ok.if_then_else(to_amount, &new_to);
|
||||
|
||||
(new_from_amount, new_to_amount)
|
||||
}
|
||||
|
||||
#[cfg(feature = "pbs-stats")]
|
||||
mod pbs_stats {
|
||||
use super::*;
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
fn write_result(file: &mut File, name: &str, value: usize) {
|
||||
let line = format!("{name},{value}\n");
|
||||
let error_message = format!("cannot write {name} result into file");
|
||||
file.write_all(line.as_bytes()).expect(&error_message);
|
||||
}
|
||||
|
||||
pub fn print_transfer_pbs_counts<FheType, F>(
|
||||
client_key: &ClientKey,
|
||||
type_name: &str,
|
||||
fn_name: &str,
|
||||
transfer_func: F,
|
||||
) where
|
||||
FheType: FheEncrypt<u64, ClientKey>,
|
||||
F: for<'a> Fn(&'a FheType, &'a FheType, &'a FheType) -> (FheType, FheType),
|
||||
{
|
||||
let mut rng = thread_rng();
|
||||
|
||||
let from_amount = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let to_amount = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let amount = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
configure_gpu(client_key);
|
||||
|
||||
tfhe::reset_pbs_count();
|
||||
let (_, _) = transfer_func(&from_amount, &to_amount, &amount);
|
||||
let count = tfhe::get_pbs_count();
|
||||
|
||||
println!("ERC20 transfer/{fn_name}::{type_name}: {count} PBS");
|
||||
|
||||
let params = client_key.computation_parameters();
|
||||
|
||||
let test_name = if cfg!(feature = "gpu") {
|
||||
format!("hlapi::cuda::erc20::pbs_count::{fn_name}::{type_name}")
|
||||
} else {
|
||||
format!("hlapi::erc20::pbs_count::{fn_name}::{type_name}")
|
||||
};
|
||||
|
||||
let results_file = Path::new("erc20_pbs_count.csv");
|
||||
if !results_file.exists() {
|
||||
File::create(results_file).expect("create results file failed");
|
||||
}
|
||||
let mut file = OpenOptions::new()
|
||||
.append(true)
|
||||
.open(results_file)
|
||||
.expect("cannot open results file");
|
||||
|
||||
write_result(&mut file, &test_name, count as usize);
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&test_name,
|
||||
params,
|
||||
params.name(),
|
||||
"pbs-count",
|
||||
&OperatorType::Atomic,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn bench_transfer_latency<FheType, F>(
|
||||
c: &mut BenchmarkGroup<'_, WallTime>,
|
||||
client_key: &ClientKey,
|
||||
bench_name: &str,
|
||||
type_name: &str,
|
||||
fn_name: &str,
|
||||
transfer_func: F,
|
||||
) where
|
||||
FheType: FheEncrypt<u64, ClientKey>,
|
||||
F: for<'a> Fn(&'a FheType, &'a FheType, &'a FheType) -> (FheType, FheType),
|
||||
{
|
||||
#[cfg(feature = "gpu")]
|
||||
configure_gpu(client_key);
|
||||
|
||||
let bench_id = format!("{bench_name}::{fn_name}::{type_name}");
|
||||
c.bench_function(&bench_id, |b| {
|
||||
let mut rng = thread_rng();
|
||||
|
||||
let from_amount = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let to_amount = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
let amount = FheType::encrypt(rng.gen::<u64>(), client_key);
|
||||
|
||||
b.iter(|| {
|
||||
let (_, _) = transfer_func(&from_amount, &to_amount, &amount);
|
||||
})
|
||||
});
|
||||
|
||||
let params = client_key.computation_parameters();
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
params,
|
||||
params.name(),
|
||||
"erc20-transfer",
|
||||
&OperatorType::Atomic,
|
||||
64,
|
||||
vec![],
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
fn bench_transfer_throughput<FheType, F>(
|
||||
group: &mut BenchmarkGroup<'_, WallTime>,
|
||||
client_key: &ClientKey,
|
||||
bench_name: &str,
|
||||
type_name: &str,
|
||||
fn_name: &str,
|
||||
transfer_func: F,
|
||||
) where
|
||||
FheType: FheEncrypt<u64, ClientKey> + Send + Sync,
|
||||
F: for<'a> Fn(&'a FheType, &'a FheType, &'a FheType) -> (FheType, FheType) + Sync,
|
||||
{
|
||||
let mut rng = thread_rng();
|
||||
|
||||
for num_elems in [10, 100, 500] {
|
||||
group.throughput(Throughput::Elements(num_elems));
|
||||
let bench_id =
|
||||
format!("{bench_name}::throughput::{fn_name}::{type_name}::{num_elems}_elems");
|
||||
group.bench_with_input(&bench_id, &num_elems, |b, &num_elems| {
|
||||
let from_amounts = (0..num_elems)
|
||||
.map(|_| FheType::encrypt(rng.gen::<u64>(), client_key))
|
||||
.collect::<Vec<_>>();
|
||||
let to_amounts = (0..num_elems)
|
||||
.map(|_| FheType::encrypt(rng.gen::<u64>(), client_key))
|
||||
.collect::<Vec<_>>();
|
||||
let amounts = (0..num_elems)
|
||||
.map(|_| FheType::encrypt(rng.gen::<u64>(), client_key))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
b.iter(|| {
|
||||
from_amounts
|
||||
.par_iter()
|
||||
.zip(to_amounts.par_iter().zip(amounts.par_iter()))
|
||||
.for_each(|(from_amount, (to_amount, amount))| {
|
||||
let (_, _) = transfer_func(from_amount, to_amount, amount);
|
||||
})
|
||||
})
|
||||
});
|
||||
|
||||
let params = client_key.computation_parameters();
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
params,
|
||||
params.name(),
|
||||
"erc20-transfer",
|
||||
&OperatorType::Atomic,
|
||||
64,
|
||||
vec![],
|
||||
);
|
||||
}
|
||||
}
|
||||
#[cfg(feature = "gpu")]
|
||||
fn cuda_bench_transfer_throughput<FheType, F>(
|
||||
group: &mut BenchmarkGroup<'_, WallTime>,
|
||||
client_key: &ClientKey,
|
||||
bench_name: &str,
|
||||
type_name: &str,
|
||||
fn_name: &str,
|
||||
transfer_func: F,
|
||||
) where
|
||||
FheType: FheEncrypt<u64, ClientKey> + Send + Sync,
|
||||
F: for<'a> Fn(&'a FheType, &'a FheType, &'a FheType) -> (FheType, FheType) + Sync,
|
||||
{
|
||||
let mut rng = thread_rng();
|
||||
let num_gpus = get_number_of_gpus() as u64;
|
||||
let compressed_server_key = CompressedServerKey::new(client_key);
|
||||
|
||||
let sks_vec = (0..num_gpus)
|
||||
.map(|i| compressed_server_key.decompress_to_specific_gpu(GpuIndex::new(i as u32)))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for num_elems in [10 * num_gpus, 100 * num_gpus, 500 * num_gpus] {
|
||||
group.throughput(Throughput::Elements(num_elems));
|
||||
let bench_id =
|
||||
format!("{bench_name}::throughput::{fn_name}::{type_name}::{num_elems}_elems");
|
||||
group.bench_with_input(&bench_id, &num_elems, |b, &num_elems| {
|
||||
let from_amounts = (0..num_elems)
|
||||
.map(|_| FheType::encrypt(rng.gen::<u64>(), client_key))
|
||||
.collect::<Vec<_>>();
|
||||
let to_amounts = (0..num_elems)
|
||||
.map(|_| FheType::encrypt(rng.gen::<u64>(), client_key))
|
||||
.collect::<Vec<_>>();
|
||||
let amounts = (0..num_elems)
|
||||
.map(|_| FheType::encrypt(rng.gen::<u64>(), client_key))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let num_streams_per_gpu = 8; // Hard coded stream value for FheUint64
|
||||
let chunk_size = (num_elems / num_gpus) as usize;
|
||||
|
||||
b.iter(|| {
|
||||
from_amounts
|
||||
.par_chunks(chunk_size) // Split into chunks of num_gpus
|
||||
.zip(
|
||||
to_amounts
|
||||
.par_chunks(chunk_size)
|
||||
.zip(amounts.par_chunks(chunk_size)),
|
||||
) // Zip with the other data
|
||||
.enumerate() // Get the index for GPU
|
||||
.for_each(
|
||||
|(i, (from_amount_gpu_i, (to_amount_gpu_i, amount_gpu_i)))| {
|
||||
// Process chunks within each GPU
|
||||
let stream_chunk_size = from_amount_gpu_i.len() / num_streams_per_gpu;
|
||||
from_amount_gpu_i
|
||||
.par_chunks(stream_chunk_size)
|
||||
.zip(to_amount_gpu_i.par_chunks(stream_chunk_size))
|
||||
.zip(amount_gpu_i.par_chunks(stream_chunk_size))
|
||||
.for_each(
|
||||
|((from_amount_chunk, to_amount_chunk), amount_chunk)| {
|
||||
// Set the server key for the current GPU
|
||||
set_server_key(sks_vec[i].clone());
|
||||
// Parallel iteration over the chunks of data
|
||||
from_amount_chunk
|
||||
.iter()
|
||||
.zip(to_amount_chunk.iter().zip(amount_chunk.iter()))
|
||||
.for_each(|(from_amount, (to_amount, amount))| {
|
||||
transfer_func(from_amount, to_amount, amount);
|
||||
});
|
||||
},
|
||||
);
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
let params = client_key.computation_parameters();
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
params,
|
||||
params.name(),
|
||||
"erc20-transfer",
|
||||
&OperatorType::Atomic,
|
||||
64,
|
||||
vec![],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "pbs-stats")]
|
||||
use pbs_stats::print_transfer_pbs_counts;
|
||||
#[cfg(feature = "gpu")]
|
||||
use tfhe::core_crypto::gpu::get_number_of_gpus;
|
||||
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
fn main() {
|
||||
let params = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
|
||||
let config = ConfigBuilder::with_custom_parameters(params).build();
|
||||
let cks = ClientKey::generate(config);
|
||||
let compressed_sks = CompressedServerKey::new(&cks);
|
||||
|
||||
let sks = compressed_sks.decompress();
|
||||
|
||||
rayon::broadcast(|_| set_server_key(sks.clone()));
|
||||
set_server_key(sks);
|
||||
|
||||
let mut c = Criterion::default().sample_size(10).configure_from_args();
|
||||
|
||||
let bench_name = "hlapi::erc20";
|
||||
|
||||
// FheUint64 PBS counts
|
||||
// We don't run multiple times since every input is encrypted
|
||||
// PBS count is always the same
|
||||
#[cfg(feature = "pbs-stats")]
|
||||
{
|
||||
print_transfer_pbs_counts(
|
||||
&cks,
|
||||
"FheUint64",
|
||||
"transfer::whitepaper",
|
||||
transfer_whitepaper::<FheUint64>,
|
||||
);
|
||||
print_transfer_pbs_counts(&cks, "FheUint64", "no_cmux", transfer_no_cmux::<FheUint64>);
|
||||
print_transfer_pbs_counts(
|
||||
&cks,
|
||||
"FheUint64",
|
||||
"transfer::overflow",
|
||||
transfer_overflow::<FheUint64>,
|
||||
);
|
||||
print_transfer_pbs_counts(&cks, "FheUint64", "safe", transfer_safe::<FheUint64>);
|
||||
}
|
||||
|
||||
// FheUint64 latency
|
||||
{
|
||||
let mut group = c.benchmark_group(bench_name);
|
||||
bench_transfer_latency(
|
||||
&mut group,
|
||||
&cks,
|
||||
bench_name,
|
||||
"FheUint64",
|
||||
"transfer::whitepaper",
|
||||
transfer_whitepaper::<FheUint64>,
|
||||
);
|
||||
bench_transfer_latency(
|
||||
&mut group,
|
||||
&cks,
|
||||
bench_name,
|
||||
"FheUint64",
|
||||
"transfer::no_cmux",
|
||||
transfer_no_cmux::<FheUint64>,
|
||||
);
|
||||
bench_transfer_latency(
|
||||
&mut group,
|
||||
&cks,
|
||||
bench_name,
|
||||
"FheUint64",
|
||||
"transfer::overflow",
|
||||
transfer_overflow::<FheUint64>,
|
||||
);
|
||||
bench_transfer_latency(
|
||||
&mut group,
|
||||
&cks,
|
||||
bench_name,
|
||||
"FheUint64",
|
||||
"transfer::safe",
|
||||
transfer_safe::<FheUint64>,
|
||||
);
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// FheUint64 Throughput
|
||||
{
|
||||
let mut group = c.benchmark_group(bench_name);
|
||||
bench_transfer_throughput(
|
||||
&mut group,
|
||||
&cks,
|
||||
bench_name,
|
||||
"FheUint64",
|
||||
"transfer::whitepaper",
|
||||
transfer_whitepaper::<FheUint64>,
|
||||
);
|
||||
bench_transfer_throughput(
|
||||
&mut group,
|
||||
&cks,
|
||||
bench_name,
|
||||
"FheUint64",
|
||||
"transfer::no_cmux",
|
||||
transfer_no_cmux::<FheUint64>,
|
||||
);
|
||||
bench_transfer_throughput(
|
||||
&mut group,
|
||||
&cks,
|
||||
bench_name,
|
||||
"FheUint64",
|
||||
"transfer::overflow",
|
||||
transfer_overflow::<FheUint64>,
|
||||
);
|
||||
bench_transfer_throughput(
|
||||
&mut group,
|
||||
&cks,
|
||||
bench_name,
|
||||
"FheUint64",
|
||||
"transfer::safe",
|
||||
transfer_safe::<FheUint64>,
|
||||
);
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
c.final_summary();
|
||||
}
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
fn main() {
|
||||
let params = BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
|
||||
let config = ConfigBuilder::with_custom_parameters(params).build();
|
||||
let cks = ClientKey::generate(config);
|
||||
|
||||
let mut c = Criterion::default().sample_size(10).configure_from_args();
|
||||
|
||||
let bench_name = "hlapi::cuda::erc20";
|
||||
|
||||
// FheUint64 PBS counts
|
||||
// We don't run multiple times since every input is encrypted
|
||||
// PBS count is always the same
|
||||
#[cfg(feature = "pbs-stats")]
|
||||
{
|
||||
print_transfer_pbs_counts(
|
||||
&cks,
|
||||
"FheUint64",
|
||||
"transfer::whitepaper",
|
||||
transfer_whitepaper::<FheUint64>,
|
||||
);
|
||||
print_transfer_pbs_counts(&cks, "FheUint64", "no_cmux", transfer_no_cmux::<FheUint64>);
|
||||
print_transfer_pbs_counts(
|
||||
&cks,
|
||||
"FheUint64",
|
||||
"transfer::overflow",
|
||||
transfer_overflow::<FheUint64>,
|
||||
);
|
||||
print_transfer_pbs_counts(&cks, "FheUint64", "safe", transfer_safe::<FheUint64>);
|
||||
}
|
||||
|
||||
// FheUint64 latency
|
||||
{
|
||||
let mut group = c.benchmark_group(bench_name);
|
||||
bench_transfer_latency(
|
||||
&mut group,
|
||||
&cks,
|
||||
bench_name,
|
||||
"FheUint64",
|
||||
"transfer::whitepaper",
|
||||
transfer_whitepaper::<FheUint64>,
|
||||
);
|
||||
bench_transfer_latency(
|
||||
&mut group,
|
||||
&cks,
|
||||
bench_name,
|
||||
"FheUint64",
|
||||
"transfer::no_cmux",
|
||||
transfer_no_cmux::<FheUint64>,
|
||||
);
|
||||
bench_transfer_latency(
|
||||
&mut group,
|
||||
&cks,
|
||||
bench_name,
|
||||
"FheUint64",
|
||||
"transfer::overflow",
|
||||
transfer_overflow::<FheUint64>,
|
||||
);
|
||||
bench_transfer_latency(
|
||||
&mut group,
|
||||
&cks,
|
||||
bench_name,
|
||||
"FheUint64",
|
||||
"transfer::safe",
|
||||
transfer_safe::<FheUint64>,
|
||||
);
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// FheUint64 Throughput
|
||||
{
|
||||
let mut group = c.benchmark_group(bench_name);
|
||||
cuda_bench_transfer_throughput(
|
||||
&mut group,
|
||||
&cks,
|
||||
bench_name,
|
||||
"FheUint64",
|
||||
"transfer::whitepaper",
|
||||
transfer_whitepaper::<FheUint64>,
|
||||
);
|
||||
cuda_bench_transfer_throughput(
|
||||
&mut group,
|
||||
&cks,
|
||||
bench_name,
|
||||
"FheUint64",
|
||||
"transfer::no_cmux",
|
||||
transfer_no_cmux::<FheUint64>,
|
||||
);
|
||||
cuda_bench_transfer_throughput(
|
||||
&mut group,
|
||||
&cks,
|
||||
bench_name,
|
||||
"FheUint64",
|
||||
"transfer::overflow",
|
||||
transfer_overflow::<FheUint64>,
|
||||
);
|
||||
cuda_bench_transfer_throughput(
|
||||
&mut group,
|
||||
&cks,
|
||||
bench_name,
|
||||
"FheUint64",
|
||||
"transfer::safe",
|
||||
transfer_safe::<FheUint64>,
|
||||
);
|
||||
group.finish();
|
||||
}
|
||||
|
||||
c.final_summary();
|
||||
}
|
||||
3353
tfhe-benchmark/benches/integer/bench.rs
Normal file
3353
tfhe-benchmark/benches/integer/bench.rs
Normal file
File diff suppressed because it is too large
Load Diff
368
tfhe-benchmark/benches/integer/glwe_packing_compression.rs
Normal file
368
tfhe-benchmark/benches/integer/glwe_packing_compression.rs
Normal file
@@ -0,0 +1,368 @@
|
||||
use benchmark::params_aliases::*;
|
||||
use benchmark::utilities::{
|
||||
get_bench_type, throughput_num_threads, write_to_json, BenchmarkType, OperatorType,
|
||||
};
|
||||
use criterion::{black_box, criterion_group, Criterion, Throughput};
|
||||
use rayon::prelude::*;
|
||||
use std::cmp::max;
|
||||
use tfhe::integer::ciphertext::CompressedCiphertextListBuilder;
|
||||
use tfhe::integer::{ClientKey, RadixCiphertext};
|
||||
use tfhe::keycache::NamedParam;
|
||||
use tfhe::{get_pbs_count, reset_pbs_count};
|
||||
|
||||
fn cpu_glwe_packing(c: &mut Criterion) {
|
||||
let bench_name = "integer::packing_compression";
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
bench_group
|
||||
.sample_size(15)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
|
||||
let param = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
let comp_param = BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
|
||||
let cks = ClientKey::new(param);
|
||||
|
||||
let private_compression_key = cks.new_compression_private_key(comp_param);
|
||||
|
||||
let (compression_key, decompression_key) =
|
||||
cks.new_compression_decompression_keys(&private_compression_key);
|
||||
|
||||
let log_message_modulus = param.message_modulus.0.ilog2() as usize;
|
||||
|
||||
for bit_size in [
|
||||
2,
|
||||
8,
|
||||
16,
|
||||
32,
|
||||
64,
|
||||
128,
|
||||
256,
|
||||
comp_param.lwe_per_glwe.0 * log_message_modulus,
|
||||
] {
|
||||
assert_eq!(bit_size % log_message_modulus, 0);
|
||||
let num_blocks = bit_size / log_message_modulus;
|
||||
|
||||
let bench_id_pack;
|
||||
let bench_id_unpack;
|
||||
|
||||
match get_bench_type() {
|
||||
BenchmarkType::Latency => {
|
||||
let ct = cks.encrypt_radix(0_u32, num_blocks);
|
||||
|
||||
let mut builder = CompressedCiphertextListBuilder::new();
|
||||
|
||||
builder.push(ct);
|
||||
|
||||
bench_id_pack = format!("{bench_name}::pack_u{bit_size}");
|
||||
bench_group.bench_function(&bench_id_pack, |b| {
|
||||
b.iter(|| {
|
||||
let compressed = builder.build(&compression_key);
|
||||
|
||||
_ = black_box(compressed);
|
||||
})
|
||||
});
|
||||
|
||||
let compressed = builder.build(&compression_key);
|
||||
|
||||
bench_id_unpack = format!("{bench_name}::unpack_u{bit_size}");
|
||||
bench_group.bench_function(&bench_id_unpack, |b| {
|
||||
b.iter(|| {
|
||||
let unpacked: RadixCiphertext =
|
||||
compressed.get(0, &decompression_key).unwrap().unwrap();
|
||||
|
||||
_ = black_box(unpacked);
|
||||
})
|
||||
});
|
||||
}
|
||||
BenchmarkType::Throughput => {
|
||||
// Execute the operation once to know its cost.
|
||||
let ct = cks.encrypt_radix(0_u32, num_blocks);
|
||||
let mut builder = CompressedCiphertextListBuilder::new();
|
||||
builder.push(ct);
|
||||
let compressed = builder.build(&compression_key);
|
||||
|
||||
reset_pbs_count();
|
||||
let _: RadixCiphertext = compressed.get(0, &decompression_key).unwrap().unwrap();
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
let num_block =
|
||||
(bit_size as f64 / (param.message_modulus.0 as f64).log(2.0)).ceil() as usize;
|
||||
let elements = throughput_num_threads(num_block, pbs_count);
|
||||
// FIXME thread usage seemed to be somewhat more "efficient".
|
||||
// For example, with bit_size = 2, my laptop is only using around 2/3 of the
|
||||
// available threads Thread usage increases with bit_size = 8 but
|
||||
// still isn't fully loaded.
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
|
||||
let builders = (0..elements)
|
||||
.map(|_| {
|
||||
let ct = cks.encrypt_radix(0_u32, num_blocks);
|
||||
let mut builder = CompressedCiphertextListBuilder::new();
|
||||
builder.push(ct);
|
||||
|
||||
builder
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
bench_id_pack = format!("{bench_name}::throughput::pack_u{bit_size}");
|
||||
bench_group.bench_function(&bench_id_pack, |b| {
|
||||
b.iter(|| {
|
||||
builders.par_iter().for_each(|builder| {
|
||||
builder.build(&compression_key);
|
||||
})
|
||||
})
|
||||
});
|
||||
|
||||
let compressed = builders
|
||||
.iter()
|
||||
.map(|builder| builder.build(&compression_key))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
bench_id_unpack = format!("{bench_name}::throughput::unpack_u{bit_size}");
|
||||
bench_group.bench_function(&bench_id_unpack, |b| {
|
||||
b.iter(|| {
|
||||
compressed.par_iter().for_each(|comp| {
|
||||
comp.get::<RadixCiphertext>(0, &decompression_key)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
})
|
||||
})
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id_pack,
|
||||
(comp_param, param),
|
||||
comp_param.name(),
|
||||
"pack",
|
||||
&OperatorType::Atomic,
|
||||
bit_size as u32,
|
||||
vec![param.message_modulus.0.ilog2(); num_blocks],
|
||||
);
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id_unpack,
|
||||
(comp_param, param),
|
||||
comp_param.name(),
|
||||
"unpack",
|
||||
&OperatorType::Atomic,
|
||||
bit_size as u32,
|
||||
vec![param.message_modulus.0.ilog2(); num_blocks],
|
||||
);
|
||||
}
|
||||
|
||||
bench_group.finish()
|
||||
}
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
mod cuda {
|
||||
use super::*;
|
||||
use benchmark::utilities::cuda_integer_utils::cuda_local_streams;
|
||||
use std::cmp::max;
|
||||
use tfhe::core_crypto::gpu::CudaStreams;
|
||||
use tfhe::integer::gpu::ciphertext::compressed_ciphertext_list::CudaCompressedCiphertextListBuilder;
|
||||
use tfhe::integer::gpu::ciphertext::CudaUnsignedRadixCiphertext;
|
||||
use tfhe::integer::gpu::gen_keys_radix_gpu;
|
||||
|
||||
fn gpu_glwe_packing(c: &mut Criterion) {
|
||||
let bench_name = "integer::cuda::packing_compression";
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
bench_group
|
||||
.sample_size(15)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
|
||||
let stream = CudaStreams::new_multi_gpu();
|
||||
|
||||
let param = BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
let comp_param =
|
||||
BENCH_COMP_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
|
||||
let log_message_modulus = param.message_modulus.0.ilog2() as usize;
|
||||
|
||||
let cks = ClientKey::new(param);
|
||||
let private_compression_key = cks.new_compression_private_key(comp_param);
|
||||
|
||||
for bit_size in [
|
||||
2,
|
||||
8,
|
||||
16,
|
||||
32,
|
||||
64,
|
||||
128,
|
||||
256,
|
||||
comp_param.lwe_per_glwe.0 * log_message_modulus,
|
||||
] {
|
||||
assert_eq!(bit_size % log_message_modulus, 0);
|
||||
let num_blocks = bit_size / log_message_modulus;
|
||||
|
||||
let bench_id_pack;
|
||||
let bench_id_unpack;
|
||||
|
||||
// Generate and convert compression keys
|
||||
let (radix_cks, _) = gen_keys_radix_gpu(param, num_blocks, &stream);
|
||||
let (compressed_compression_key, compressed_decompression_key) =
|
||||
radix_cks.new_compressed_compression_decompression_keys(&private_compression_key);
|
||||
let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&stream);
|
||||
let cuda_decompression_key = compressed_decompression_key.decompress_to_cuda(
|
||||
radix_cks.parameters().glwe_dimension(),
|
||||
radix_cks.parameters().polynomial_size(),
|
||||
radix_cks.parameters().message_modulus(),
|
||||
radix_cks.parameters().carry_modulus(),
|
||||
radix_cks.parameters().ciphertext_modulus(),
|
||||
&stream,
|
||||
);
|
||||
|
||||
match get_bench_type() {
|
||||
BenchmarkType::Latency => {
|
||||
// Encrypt
|
||||
let ct = cks.encrypt_radix(0_u32, num_blocks);
|
||||
let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &stream);
|
||||
|
||||
// Benchmark
|
||||
let mut builder = CudaCompressedCiphertextListBuilder::new();
|
||||
|
||||
builder.push(d_ct, &stream);
|
||||
|
||||
bench_id_pack = format!("{bench_name}::pack_u{bit_size}");
|
||||
bench_group.bench_function(&bench_id_pack, |b| {
|
||||
b.iter(|| {
|
||||
let compressed = builder.build(&cuda_compression_key, &stream);
|
||||
|
||||
_ = black_box(compressed);
|
||||
})
|
||||
});
|
||||
|
||||
let compressed = builder.build(&cuda_compression_key, &stream);
|
||||
|
||||
bench_id_unpack = format!("{bench_name}::unpack_u{bit_size}");
|
||||
bench_group.bench_function(&bench_id_unpack, |b| {
|
||||
b.iter(|| {
|
||||
let unpacked: CudaUnsignedRadixCiphertext = compressed
|
||||
.get(0, &cuda_decompression_key, &stream)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
_ = black_box(unpacked);
|
||||
})
|
||||
});
|
||||
}
|
||||
BenchmarkType::Throughput => {
|
||||
// Execute the operation once to know its cost.
|
||||
let (cpu_compression_key, cpu_decompression_key) =
|
||||
cks.new_compression_decompression_keys(&private_compression_key);
|
||||
let ct = cks.encrypt_radix(0_u32, num_blocks);
|
||||
let mut builder = CompressedCiphertextListBuilder::new();
|
||||
builder.push(ct);
|
||||
let compressed = builder.build(&cpu_compression_key);
|
||||
|
||||
reset_pbs_count();
|
||||
// Use CPU operation as pbs_count do not count PBS on GPU backend.
|
||||
let _: RadixCiphertext =
|
||||
compressed.get(0, &cpu_decompression_key).unwrap().unwrap();
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
let num_block = (bit_size as f64 / (param.message_modulus.0 as f64).log(2.0))
|
||||
.ceil() as usize;
|
||||
let elements = throughput_num_threads(num_block, pbs_count);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
|
||||
// Encrypt
|
||||
let ct = cks.encrypt_radix(0_u32, num_blocks);
|
||||
let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &stream);
|
||||
|
||||
// Benchmark
|
||||
let mut builder = CudaCompressedCiphertextListBuilder::new();
|
||||
|
||||
builder.push(d_ct, &stream);
|
||||
|
||||
let builders = (0..elements)
|
||||
.map(|_| {
|
||||
let ct = cks.encrypt_radix(0_u32, num_blocks);
|
||||
let d_ct =
|
||||
CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &stream);
|
||||
let mut builder = CudaCompressedCiphertextListBuilder::new();
|
||||
builder.push(d_ct, &stream);
|
||||
|
||||
builder
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let local_streams = cuda_local_streams(num_block, elements as usize);
|
||||
|
||||
bench_id_pack = format!("{bench_name}::throughput::pack_u{bit_size}");
|
||||
bench_group.bench_function(&bench_id_pack, |b| {
|
||||
b.iter(|| {
|
||||
builders.par_iter().zip(local_streams.par_iter()).for_each(
|
||||
|(builder, local_stream)| {
|
||||
builder.build(&cuda_compression_key, local_stream);
|
||||
},
|
||||
)
|
||||
})
|
||||
});
|
||||
|
||||
let compressed = builders
|
||||
.iter()
|
||||
.map(|builder| builder.build(&cuda_compression_key, &stream))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
bench_id_unpack = format!("{bench_name}::throughput::unpack_u{bit_size}");
|
||||
bench_group.bench_function(&bench_id_unpack, |b| {
|
||||
b.iter(|| {
|
||||
compressed
|
||||
.par_iter()
|
||||
.zip(local_streams.par_iter())
|
||||
.for_each(|(comp, local_stream)| {
|
||||
comp.get::<CudaUnsignedRadixCiphertext>(
|
||||
0,
|
||||
&cuda_decompression_key,
|
||||
local_stream,
|
||||
)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
})
|
||||
})
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id_pack,
|
||||
(comp_param, param),
|
||||
comp_param.name(),
|
||||
"pack",
|
||||
&OperatorType::Atomic,
|
||||
bit_size as u32,
|
||||
vec![param.message_modulus.0.ilog2(); num_blocks],
|
||||
);
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id_unpack,
|
||||
(comp_param, param),
|
||||
comp_param.name(),
|
||||
"unpack",
|
||||
&OperatorType::Atomic,
|
||||
bit_size as u32,
|
||||
vec![param.message_modulus.0.ilog2(); num_blocks],
|
||||
);
|
||||
}
|
||||
|
||||
bench_group.finish()
|
||||
}
|
||||
|
||||
criterion_group!(gpu_glwe_packing2, gpu_glwe_packing);
|
||||
}
|
||||
|
||||
criterion_group!(cpu_glwe_packing2, cpu_glwe_packing);
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
use cuda::gpu_glwe_packing2;
|
||||
|
||||
fn main() {
|
||||
#[cfg(feature = "gpu")]
|
||||
gpu_glwe_packing2();
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
cpu_glwe_packing2();
|
||||
|
||||
Criterion::default().configure_from_args().final_summary();
|
||||
}
|
||||
85
tfhe-benchmark/benches/integer/oprf.rs
Normal file
85
tfhe-benchmark/benches/integer/oprf.rs
Normal file
@@ -0,0 +1,85 @@
|
||||
use benchmark::params::ParamsAndNumBlocksIter;
|
||||
use benchmark::utilities::{
|
||||
get_bench_type, throughput_num_threads, write_to_json, BenchmarkType, OperatorType,
|
||||
};
|
||||
use criterion::{black_box, Criterion, Throughput};
|
||||
use rayon::prelude::*;
|
||||
use std::cmp::max;
|
||||
use tfhe::integer::keycache::KEY_CACHE;
|
||||
use tfhe::integer::IntegerKeyKind;
|
||||
use tfhe::keycache::NamedParam;
|
||||
use tfhe::{get_pbs_count, reset_pbs_count};
|
||||
use tfhe_csprng::seeders::Seed;
|
||||
|
||||
pub fn unsigned_oprf(c: &mut Criterion) {
|
||||
let bench_name = "integer::unsigned_oprf";
|
||||
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
bench_group
|
||||
.sample_size(15)
|
||||
.measurement_time(std::time::Duration::from_secs(30));
|
||||
|
||||
for (param, num_block, bit_size) in ParamsAndNumBlocksIter::default() {
|
||||
let param_name = param.name();
|
||||
|
||||
let bench_id;
|
||||
|
||||
match get_bench_type() {
|
||||
BenchmarkType::Latency => {
|
||||
bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (_, sk) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
|
||||
b.iter(|| {
|
||||
_ = black_box(
|
||||
sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
),
|
||||
);
|
||||
})
|
||||
});
|
||||
}
|
||||
BenchmarkType::Throughput => {
|
||||
let (_, sk) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
|
||||
// Execute the operation once to know its cost.
|
||||
reset_pbs_count();
|
||||
sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
);
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
|
||||
let elements = throughput_num_threads(num_block, pbs_count);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
(0..elements).into_par_iter().for_each(|_| {
|
||||
sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
|
||||
Seed(0),
|
||||
bit_size as u64,
|
||||
num_block as u64,
|
||||
);
|
||||
})
|
||||
})
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
param,
|
||||
param.name(),
|
||||
"oprf",
|
||||
&OperatorType::Atomic,
|
||||
bit_size as u32,
|
||||
vec![param.message_modulus().0.ilog2(); num_block],
|
||||
);
|
||||
}
|
||||
|
||||
bench_group.finish()
|
||||
}
|
||||
3082
tfhe-benchmark/benches/integer/signed_bench.rs
Normal file
3082
tfhe-benchmark/benches/integer/signed_bench.rs
Normal file
File diff suppressed because it is too large
Load Diff
785
tfhe-benchmark/benches/integer/zk_pke.rs
Normal file
785
tfhe-benchmark/benches/integer/zk_pke.rs
Normal file
@@ -0,0 +1,785 @@
|
||||
use benchmark::params_aliases::*;
|
||||
use benchmark::utilities::{
|
||||
get_bench_type, throughput_num_threads, write_to_json, BenchmarkType, OperatorType,
|
||||
};
|
||||
use criterion::{criterion_group, Criterion, Throughput};
|
||||
use rand::prelude::*;
|
||||
use rayon::prelude::*;
|
||||
use std::cmp::max;
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use tfhe::core_crypto::prelude::LweCiphertextCount;
|
||||
use tfhe::integer::key_switching_key::KeySwitchingKey;
|
||||
use tfhe::integer::parameters::IntegerCompactCiphertextListExpansionMode;
|
||||
use tfhe::integer::{ClientKey, CompactPrivateKey, CompactPublicKey, ServerKey};
|
||||
use tfhe::keycache::NamedParam;
|
||||
use tfhe::shortint::parameters::*;
|
||||
use tfhe::zk::{CompactPkeCrs, ZkComputeLoad};
|
||||
use tfhe::{get_pbs_count, reset_pbs_count};
|
||||
|
||||
fn write_result(file: &mut File, name: &str, value: usize) {
|
||||
let line = format!("{name},{value}\n");
|
||||
let error_message = format!("cannot write {name} result into file");
|
||||
file.write_all(line.as_bytes()).expect(&error_message);
|
||||
}
|
||||
|
||||
fn pke_zk_proof(c: &mut Criterion) {
|
||||
let bench_name = "zk::pke_zk_proof";
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
bench_group
|
||||
.sample_size(15)
|
||||
.measurement_time(std::time::Duration::from_secs(60));
|
||||
|
||||
for (param_pke, _param_casting, param_fhe) in [
|
||||
(
|
||||
BENCH_PARAM_PKE_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
BENCH_PARAM_KEYSWITCH_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
),
|
||||
(
|
||||
BENCH_PARAM_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1,
|
||||
BENCH_PARAM_KEYSWITCH_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1,
|
||||
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
),
|
||||
] {
|
||||
let param_name = param_fhe.name();
|
||||
let param_name = param_name.as_str();
|
||||
let cks = ClientKey::new(param_fhe);
|
||||
let sks = ServerKey::new_radix_server_key(&cks);
|
||||
let compact_private_key = CompactPrivateKey::new(param_pke);
|
||||
let pk = CompactPublicKey::new(&compact_private_key);
|
||||
// Kept for consistency
|
||||
let _casting_key =
|
||||
KeySwitchingKey::new((&compact_private_key, None), (&cks, &sks), _param_casting);
|
||||
|
||||
// We have a use case with 320 bits of metadata
|
||||
let mut metadata = [0u8; (320 / u8::BITS) as usize];
|
||||
let mut rng = rand::thread_rng();
|
||||
metadata.fill_with(|| rng.gen());
|
||||
|
||||
let zk_vers = param_pke.zk_scheme;
|
||||
|
||||
for bits in [64usize, 640, 1280, 4096] {
|
||||
assert_eq!(bits % 64, 0);
|
||||
// Packing, so we take the message and carry modulus to compute our block count
|
||||
let num_block = 64usize.div_ceil(
|
||||
(param_pke.message_modulus.0 * param_pke.carry_modulus.0).ilog2() as usize,
|
||||
);
|
||||
|
||||
use rand::Rng;
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
let fhe_uint_count = bits / 64;
|
||||
|
||||
let crs = CompactPkeCrs::from_shortint_params(
|
||||
param_pke,
|
||||
LweCiphertextCount(num_block * fhe_uint_count),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
for compute_load in [ZkComputeLoad::Proof, ZkComputeLoad::Verify] {
|
||||
let zk_load = match compute_load {
|
||||
ZkComputeLoad::Proof => "compute_load_proof",
|
||||
ZkComputeLoad::Verify => "compute_load_verify",
|
||||
};
|
||||
|
||||
let bench_id;
|
||||
|
||||
match get_bench_type() {
|
||||
BenchmarkType::Latency => {
|
||||
bench_id = format!(
|
||||
"{bench_name}::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
|
||||
);
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let input_msg = rng.gen::<u64>();
|
||||
let messages = vec![input_msg; fhe_uint_count];
|
||||
|
||||
b.iter(|| {
|
||||
let _ct1 = tfhe::integer::ProvenCompactCiphertextList::builder(&pk)
|
||||
.extend(messages.iter().copied())
|
||||
.build_with_proof_packed(&crs, &metadata, compute_load)
|
||||
.unwrap();
|
||||
})
|
||||
});
|
||||
}
|
||||
BenchmarkType::Throughput => {
|
||||
// Execute the operation once to know its cost.
|
||||
let input_msg = rng.gen::<u64>();
|
||||
let messages = vec![input_msg; fhe_uint_count];
|
||||
|
||||
reset_pbs_count();
|
||||
let _ = tfhe::integer::ProvenCompactCiphertextList::builder(&pk)
|
||||
.extend(messages.iter().copied())
|
||||
.build_with_proof_packed(&crs, &metadata, compute_load);
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
let elements = throughput_num_threads(num_block, pbs_count);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
|
||||
bench_id = format!(
|
||||
"{bench_name}::throughput::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
|
||||
);
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let messages = (0..elements)
|
||||
.map(|_| {
|
||||
let input_msg = rng.gen::<u64>();
|
||||
vec![input_msg; fhe_uint_count]
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
b.iter(|| {
|
||||
messages.par_iter().for_each(|msg| {
|
||||
tfhe::integer::ProvenCompactCiphertextList::builder(&pk)
|
||||
.extend(msg.iter().copied())
|
||||
.build_with_proof_packed(&crs, &metadata, compute_load)
|
||||
.unwrap();
|
||||
})
|
||||
})
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let shortint_params: PBSParameters = param_fhe.into();
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
shortint_params,
|
||||
param_name,
|
||||
"pke_zk_proof",
|
||||
&OperatorType::Atomic,
|
||||
shortint_params.message_modulus().0 as u32,
|
||||
vec![shortint_params.message_modulus().0.ilog2(); num_block],
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bench_group.finish()
|
||||
}
|
||||
|
||||
criterion_group!(zk_proof, pke_zk_proof);
|
||||
|
||||
fn cpu_pke_zk_verify(c: &mut Criterion, results_file: &Path) {
|
||||
let bench_name = "zk::pke_zk_verify";
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
bench_group
|
||||
.sample_size(15)
|
||||
.measurement_time(std::time::Duration::from_secs(60));
|
||||
|
||||
File::create(results_file).expect("create results file failed");
|
||||
let mut file = OpenOptions::new()
|
||||
.append(true)
|
||||
.open(results_file)
|
||||
.expect("cannot open results file");
|
||||
|
||||
for (param_pke, param_casting, param_fhe) in [
|
||||
(
|
||||
BENCH_PARAM_PKE_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
BENCH_PARAM_KEYSWITCH_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
),
|
||||
(
|
||||
BENCH_PARAM_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1,
|
||||
BENCH_PARAM_KEYSWITCH_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1,
|
||||
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
),
|
||||
] {
|
||||
let param_name = param_fhe.name();
|
||||
let param_name = param_name.as_str();
|
||||
let cks = ClientKey::new(param_fhe);
|
||||
let sks = ServerKey::new_radix_server_key(&cks);
|
||||
let compact_private_key = CompactPrivateKey::new(param_pke);
|
||||
let pk = CompactPublicKey::new(&compact_private_key);
|
||||
let casting_key =
|
||||
KeySwitchingKey::new((&compact_private_key, None), (&cks, &sks), param_casting);
|
||||
|
||||
// We have a use case with 320 bits of metadata
|
||||
let mut metadata = [0u8; (320 / u8::BITS) as usize];
|
||||
let mut rng = rand::thread_rng();
|
||||
metadata.fill_with(|| rng.gen());
|
||||
|
||||
let zk_vers = param_pke.zk_scheme;
|
||||
|
||||
for bits in [64usize, 640, 1280, 4096] {
|
||||
assert_eq!(bits % 64, 0);
|
||||
// Packing, so we take the message and carry modulus to compute our block count
|
||||
let num_block = 64usize.div_ceil(
|
||||
(param_pke.message_modulus.0 * param_pke.carry_modulus.0).ilog2() as usize,
|
||||
);
|
||||
|
||||
use rand::Rng;
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
let fhe_uint_count = bits / 64;
|
||||
|
||||
println!("Generating CRS... ");
|
||||
let crs = CompactPkeCrs::from_shortint_params(
|
||||
param_pke,
|
||||
LweCiphertextCount(num_block * fhe_uint_count),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let shortint_params: PBSParameters = param_fhe.into();
|
||||
|
||||
let crs_data = bincode::serialize(&crs).unwrap();
|
||||
|
||||
println!("CRS size: {}", crs_data.len());
|
||||
|
||||
let test_name = format!("zk::crs_sizes::{param_name}_{bits}_bits_packed_ZK{zk_vers:?}");
|
||||
|
||||
write_result(&mut file, &test_name, crs_data.len());
|
||||
write_to_json::<u64, _>(
|
||||
&test_name,
|
||||
shortint_params,
|
||||
param_name,
|
||||
"pke_zk_crs",
|
||||
&OperatorType::Atomic,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
|
||||
for compute_load in [ZkComputeLoad::Proof, ZkComputeLoad::Verify] {
|
||||
let zk_load = match compute_load {
|
||||
ZkComputeLoad::Proof => "compute_load_proof",
|
||||
ZkComputeLoad::Verify => "compute_load_verify",
|
||||
};
|
||||
|
||||
let bench_id_verify;
|
||||
let bench_id_verify_and_expand;
|
||||
|
||||
match get_bench_type() {
|
||||
BenchmarkType::Latency => {
|
||||
bench_id_verify = format!(
|
||||
"{bench_name}::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
|
||||
);
|
||||
bench_id_verify_and_expand = format!(
|
||||
"{bench_name}_and_expand::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
|
||||
);
|
||||
|
||||
let input_msg = rng.gen::<u64>();
|
||||
let messages = vec![input_msg; fhe_uint_count];
|
||||
|
||||
println!("Generating proven ciphertext ({zk_load})... ");
|
||||
let ct1 = tfhe::integer::ProvenCompactCiphertextList::builder(&pk)
|
||||
.extend(messages.iter().copied())
|
||||
.build_with_proof_packed(&crs, &metadata, compute_load)
|
||||
.unwrap();
|
||||
|
||||
let proven_ciphertext_list_serialized = bincode::serialize(&ct1).unwrap();
|
||||
|
||||
println!(
|
||||
"proven list size: {}",
|
||||
proven_ciphertext_list_serialized.len()
|
||||
);
|
||||
|
||||
let test_name = format!(
|
||||
"zk::proven_list_size::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
|
||||
);
|
||||
|
||||
write_result(
|
||||
&mut file,
|
||||
&test_name,
|
||||
proven_ciphertext_list_serialized.len(),
|
||||
);
|
||||
write_to_json::<u64, _>(
|
||||
&test_name,
|
||||
shortint_params,
|
||||
param_name,
|
||||
"pke_zk_proof",
|
||||
&OperatorType::Atomic,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
|
||||
let proof_size = ct1.proof_size();
|
||||
println!("proof size: {}", ct1.proof_size());
|
||||
|
||||
let test_name =
|
||||
format!("zk::proof_sizes::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}");
|
||||
|
||||
write_result(&mut file, &test_name, proof_size);
|
||||
write_to_json::<u64, _>(
|
||||
&test_name,
|
||||
shortint_params,
|
||||
param_name,
|
||||
"pke_zk_proof",
|
||||
&OperatorType::Atomic,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
|
||||
bench_group.bench_function(&bench_id_verify, |b| {
|
||||
b.iter(|| {
|
||||
let _ret = ct1.verify(&crs, &pk, &metadata);
|
||||
});
|
||||
});
|
||||
|
||||
bench_group.bench_function(&bench_id_verify_and_expand, |b| {
|
||||
b.iter(|| {
|
||||
let _ret = ct1
|
||||
.verify_and_expand(
|
||||
&crs,
|
||||
&pk,
|
||||
&metadata,
|
||||
IntegerCompactCiphertextListExpansionMode::CastAndUnpackIfNecessary(
|
||||
casting_key.as_view(),
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
});
|
||||
});
|
||||
}
|
||||
BenchmarkType::Throughput => {
|
||||
// In throughput mode object sizes are not recorded.
|
||||
|
||||
// Execute the operation once to know its cost.
|
||||
let input_msg = rng.gen::<u64>();
|
||||
let messages = vec![input_msg; fhe_uint_count];
|
||||
let ct1 = tfhe::integer::ProvenCompactCiphertextList::builder(&pk)
|
||||
.extend(messages.iter().copied())
|
||||
.build_with_proof_packed(&crs, &metadata, compute_load)
|
||||
.unwrap();
|
||||
|
||||
reset_pbs_count();
|
||||
let _ = ct1.verify_and_expand(
|
||||
&crs,
|
||||
&pk,
|
||||
&metadata,
|
||||
IntegerCompactCiphertextListExpansionMode::CastAndUnpackIfNecessary(
|
||||
casting_key.as_view(),
|
||||
),
|
||||
);
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
let elements = throughput_num_threads(num_block, pbs_count);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
|
||||
bench_id_verify = format!(
|
||||
"{bench_name}::throughput::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
|
||||
);
|
||||
bench_id_verify_and_expand = format!(
|
||||
"{bench_name}_and_expand::throughput::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
|
||||
);
|
||||
|
||||
println!("Generating proven ciphertexts list ({zk_load})... ");
|
||||
let cts = (0..elements)
|
||||
.map(|_| {
|
||||
let input_msg = rng.gen::<u64>();
|
||||
let messages = vec![input_msg; fhe_uint_count];
|
||||
tfhe::integer::ProvenCompactCiphertextList::builder(&pk)
|
||||
.extend(messages.iter().copied())
|
||||
.build_with_proof_packed(&crs, &metadata, compute_load)
|
||||
.unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
bench_group.bench_function(&bench_id_verify, |b| {
|
||||
b.iter(|| {
|
||||
cts.par_iter().for_each(|ct1| {
|
||||
ct1.verify(&crs, &pk, &metadata);
|
||||
})
|
||||
});
|
||||
});
|
||||
|
||||
bench_group.bench_function(&bench_id_verify_and_expand, |b| {
|
||||
b.iter(|| {
|
||||
cts.par_iter().for_each(|ct1| {
|
||||
ct1
|
||||
.verify_and_expand(
|
||||
&crs,
|
||||
&pk,
|
||||
&metadata,
|
||||
IntegerCompactCiphertextListExpansionMode::CastAndUnpackIfNecessary(
|
||||
casting_key.as_view(),
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
})
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id_verify,
|
||||
shortint_params,
|
||||
param_name,
|
||||
"pke_zk_verify",
|
||||
&OperatorType::Atomic,
|
||||
shortint_params.message_modulus().0 as u32,
|
||||
vec![shortint_params.message_modulus().0.ilog2(); num_block],
|
||||
);
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id_verify_and_expand,
|
||||
shortint_params,
|
||||
param_name,
|
||||
"pke_zk_verify_and_expand",
|
||||
&OperatorType::Atomic,
|
||||
shortint_params.message_modulus().0 as u32,
|
||||
vec![shortint_params.message_modulus().0.ilog2(); num_block],
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bench_group.finish()
|
||||
}
|
||||
|
||||
#[cfg(all(feature = "gpu", feature = "zk-pok"))]
|
||||
mod cuda {
|
||||
use super::*;
|
||||
use benchmark::utilities::{cuda_local_keys, cuda_local_streams};
|
||||
use criterion::BatchSize;
|
||||
use itertools::Itertools;
|
||||
use tfhe::core_crypto::gpu::{get_number_of_gpus, CudaStreams};
|
||||
use tfhe::integer::gpu::key_switching_key::CudaKeySwitchingKey;
|
||||
use tfhe::integer::gpu::zk::CudaProvenCompactCiphertextList;
|
||||
use tfhe::integer::gpu::CudaServerKey;
|
||||
use tfhe::integer::CompressedServerKey;
|
||||
|
||||
fn gpu_pke_zk_verify(c: &mut Criterion, results_file: &Path) {
|
||||
let bench_name = "zk::cuda::pke_zk_verify";
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
bench_group
|
||||
.sample_size(15)
|
||||
.measurement_time(std::time::Duration::from_secs(60));
|
||||
|
||||
let streams = CudaStreams::new_multi_gpu();
|
||||
|
||||
File::create(results_file).expect("create results file failed");
|
||||
let mut file = OpenOptions::new()
|
||||
.append(true)
|
||||
.open(results_file)
|
||||
.expect("cannot open results file");
|
||||
|
||||
for (param_pke, param_ksk, param_fhe) in [(
|
||||
PARAM_PKE_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
PARAM_GPU_MULTI_BIT_GROUP_4_KEYSWITCH_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
)] {
|
||||
let param_name = param_fhe.name();
|
||||
let param_name = param_name.as_str();
|
||||
let cks = ClientKey::new(param_fhe);
|
||||
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
|
||||
let gpu_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
|
||||
let compact_private_key = CompactPrivateKey::new(param_pke);
|
||||
let pk = CompactPublicKey::new(&compact_private_key);
|
||||
let d_ksk = CudaKeySwitchingKey::new(
|
||||
(&compact_private_key, None),
|
||||
(&cks, &gpu_sks),
|
||||
param_ksk,
|
||||
&streams,
|
||||
);
|
||||
|
||||
// We have a use case with 320 bits of metadata
|
||||
let mut metadata = [0u8; (320 / u8::BITS) as usize];
|
||||
let mut rng = rand::thread_rng();
|
||||
metadata.fill_with(|| rng.gen());
|
||||
|
||||
let zk_vers = param_pke.zk_scheme;
|
||||
|
||||
for bits in [64usize, 640, 1280, 4096] {
|
||||
assert_eq!(bits % 64, 0);
|
||||
// Packing, so we take the message and carry modulus to compute our block count
|
||||
let num_block = 64usize.div_ceil(
|
||||
(param_pke.message_modulus.0 * param_pke.carry_modulus.0).ilog2() as usize,
|
||||
);
|
||||
|
||||
use rand::Rng;
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
let fhe_uint_count = bits / 64;
|
||||
|
||||
println!("Generating CRS... ");
|
||||
let crs = CompactPkeCrs::from_shortint_params(
|
||||
param_pke,
|
||||
LweCiphertextCount(num_block * fhe_uint_count),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let shortint_params: PBSParameters = param_fhe.into();
|
||||
|
||||
let crs_data = bincode::serialize(&crs).unwrap();
|
||||
|
||||
println!("CRS size: {}", crs_data.len());
|
||||
|
||||
let test_name =
|
||||
format!("zk::crs_sizes::{param_name}_{bits}_bits_packed_ZK{zk_vers:?}");
|
||||
|
||||
write_result(&mut file, &test_name, crs_data.len());
|
||||
write_to_json::<u64, _>(
|
||||
&test_name,
|
||||
shortint_params,
|
||||
param_name,
|
||||
"pke_zk_crs",
|
||||
&OperatorType::Atomic,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
|
||||
for compute_load in [ZkComputeLoad::Proof, ZkComputeLoad::Verify] {
|
||||
let zk_load = match compute_load {
|
||||
ZkComputeLoad::Proof => "compute_load_proof",
|
||||
ZkComputeLoad::Verify => "compute_load_verify",
|
||||
};
|
||||
|
||||
let bench_id_verify;
|
||||
let bench_id_verify_and_expand;
|
||||
let bench_id_expand_without_verify;
|
||||
|
||||
match get_bench_type() {
|
||||
BenchmarkType::Latency => {
|
||||
bench_id_verify = format!(
|
||||
"{bench_name}::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
|
||||
);
|
||||
bench_id_verify_and_expand = format!(
|
||||
"{bench_name}_and_expand::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
|
||||
);
|
||||
bench_id_expand_without_verify = format!(
|
||||
"{bench_name}_only_expand::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
|
||||
);
|
||||
|
||||
let input_msg = rng.gen::<u64>();
|
||||
let messages = vec![input_msg; fhe_uint_count];
|
||||
|
||||
println!("Generating proven ciphertext ({zk_load})... ");
|
||||
let ct1 = tfhe::integer::ProvenCompactCiphertextList::builder(&pk)
|
||||
.extend(messages.iter().copied())
|
||||
.build_with_proof_packed(&crs, &metadata, compute_load)
|
||||
.unwrap();
|
||||
let gpu_ct1 =
|
||||
CudaProvenCompactCiphertextList::from_proven_compact_ciphertext_list(
|
||||
&ct1, &streams,
|
||||
);
|
||||
|
||||
let proven_ciphertext_list_serialized =
|
||||
bincode::serialize(&ct1).unwrap();
|
||||
|
||||
println!(
|
||||
"proven list size: {}",
|
||||
proven_ciphertext_list_serialized.len()
|
||||
);
|
||||
|
||||
let test_name = format!(
|
||||
"zk::proven_list_size::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
|
||||
);
|
||||
|
||||
write_result(
|
||||
&mut file,
|
||||
&test_name,
|
||||
proven_ciphertext_list_serialized.len(),
|
||||
);
|
||||
write_to_json::<u64, _>(
|
||||
&test_name,
|
||||
shortint_params,
|
||||
param_name,
|
||||
"pke_zk_proof",
|
||||
&OperatorType::Atomic,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
|
||||
let proof_size = ct1.proof_size();
|
||||
println!("proof size: {}", ct1.proof_size());
|
||||
|
||||
let test_name =
|
||||
format!("zk::proof_sizes::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}");
|
||||
|
||||
write_result(&mut file, &test_name, proof_size);
|
||||
write_to_json::<u64, _>(
|
||||
&test_name,
|
||||
shortint_params,
|
||||
param_name,
|
||||
"pke_zk_proof",
|
||||
&OperatorType::Atomic,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
|
||||
bench_group.bench_function(&bench_id_verify, |b| {
|
||||
b.iter(|| {
|
||||
let _ret = ct1.verify(&crs, &pk, &metadata);
|
||||
});
|
||||
});
|
||||
|
||||
bench_group.bench_function(&bench_id_expand_without_verify, |b| {
|
||||
b.iter(|| {
|
||||
let _ret = gpu_ct1
|
||||
.expand_without_verification(&d_ksk, &streams)
|
||||
.unwrap();
|
||||
});
|
||||
});
|
||||
|
||||
bench_group.bench_function(&bench_id_verify_and_expand, |b| {
|
||||
b.iter(|| {
|
||||
let _ret = gpu_ct1
|
||||
.verify_and_expand(&crs, &pk, &metadata, &d_ksk, &streams)
|
||||
.unwrap();
|
||||
});
|
||||
});
|
||||
}
|
||||
BenchmarkType::Throughput => {
|
||||
let gpu_sks_vec = cuda_local_keys(&cks);
|
||||
let gpu_count = get_number_of_gpus() as usize;
|
||||
|
||||
// Execute the operation once to know its cost.
|
||||
let input_msg = rng.gen::<u64>();
|
||||
let messages = vec![input_msg; fhe_uint_count];
|
||||
let ct1 = tfhe::integer::ProvenCompactCiphertextList::builder(&pk)
|
||||
.extend(messages.iter().copied())
|
||||
.build_with_proof_packed(&crs, &metadata, compute_load)
|
||||
.unwrap();
|
||||
let gpu_ct1 =
|
||||
CudaProvenCompactCiphertextList::from_proven_compact_ciphertext_list(
|
||||
&ct1, &streams,
|
||||
);
|
||||
|
||||
reset_pbs_count();
|
||||
let _ =
|
||||
gpu_ct1.verify_and_expand(&crs, &pk, &metadata, &d_ksk, &streams);
|
||||
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
|
||||
|
||||
let elements = throughput_num_threads(num_block, pbs_count);
|
||||
bench_group.throughput(Throughput::Elements(elements));
|
||||
|
||||
bench_id_verify = format!(
|
||||
"{bench_name}::throughput::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
|
||||
);
|
||||
bench_id_verify_and_expand = format!(
|
||||
"{bench_name}_and_expand::throughput::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
|
||||
);
|
||||
bench_id_expand_without_verify = format!(
|
||||
"{bench_name}_only_expand::throughput::{param_name}_{bits}_bits_packed_{zk_load}_ZK{zk_vers:?}"
|
||||
);
|
||||
println!("Generating proven ciphertexts list ({zk_load})... ");
|
||||
let cts = (0..elements)
|
||||
.map(|_| {
|
||||
let input_msg = rng.gen::<u64>();
|
||||
let messages = vec![input_msg; fhe_uint_count];
|
||||
tfhe::integer::ProvenCompactCiphertextList::builder(&pk)
|
||||
.extend(messages.iter().copied())
|
||||
.build_with_proof_packed(&crs, &metadata, compute_load)
|
||||
.unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let local_streams = cuda_local_streams(num_block, elements as usize);
|
||||
let d_ksk_vec = gpu_sks_vec
|
||||
.par_iter()
|
||||
.zip(local_streams.par_iter())
|
||||
.map(|(gpu_sks, local_stream)| {
|
||||
CudaKeySwitchingKey::new(
|
||||
(&compact_private_key, None),
|
||||
(&cks, gpu_sks),
|
||||
param_ksk,
|
||||
local_stream,
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(d_ksk_vec.len(), gpu_count);
|
||||
|
||||
bench_group.bench_function(&bench_id_verify, |b| {
|
||||
b.iter(|| {
|
||||
cts.par_iter().for_each(|ct1| {
|
||||
ct1.verify(&crs, &pk, &metadata);
|
||||
})
|
||||
});
|
||||
});
|
||||
|
||||
bench_group.bench_function(&bench_id_expand_without_verify, |b| {
|
||||
let setup_encrypted_values = || {
|
||||
let local_streams = cuda_local_streams(num_block, elements as usize);
|
||||
|
||||
let gpu_cts = cts.iter().enumerate().map(|(i, ct)| {
|
||||
CudaProvenCompactCiphertextList::from_proven_compact_ciphertext_list(
|
||||
ct, &local_streams[i],
|
||||
)
|
||||
}).collect_vec();
|
||||
|
||||
(gpu_cts, local_streams)
|
||||
};
|
||||
|
||||
b.iter_batched(setup_encrypted_values, |(gpu_cts, local_streams)| {
|
||||
gpu_cts.par_iter()
|
||||
.zip(local_streams.par_iter())
|
||||
.enumerate()
|
||||
.for_each(|(i, (gpu_ct, local_stream))| {
|
||||
gpu_ct
|
||||
.expand_without_verification(&d_ksk_vec[i % gpu_count], local_stream)
|
||||
.unwrap();
|
||||
});
|
||||
}, BatchSize::SmallInput);
|
||||
});
|
||||
|
||||
bench_group.bench_function(&bench_id_verify_and_expand, |b| {
|
||||
let setup_encrypted_values = || {
|
||||
let local_streams = cuda_local_streams(num_block, elements as usize);
|
||||
|
||||
let gpu_cts = cts.iter().enumerate().map(|(i, ct)| {
|
||||
CudaProvenCompactCiphertextList::from_proven_compact_ciphertext_list(
|
||||
ct, &local_streams[i],
|
||||
)
|
||||
}).collect_vec();
|
||||
|
||||
(gpu_cts, local_streams)
|
||||
};
|
||||
|
||||
b.iter_batched(setup_encrypted_values, |(gpu_cts, local_streams)| {
|
||||
gpu_cts
|
||||
.par_iter()
|
||||
.zip(local_streams.par_iter())
|
||||
.for_each(|(gpu_ct, local_stream)| {
|
||||
gpu_ct
|
||||
.verify_and_expand(
|
||||
&crs, &pk, &metadata, &d_ksk, local_stream
|
||||
)
|
||||
.unwrap();
|
||||
});
|
||||
}, BatchSize::SmallInput);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id_verify_and_expand,
|
||||
shortint_params,
|
||||
param_name,
|
||||
"pke_zk_verify_and_expand",
|
||||
&OperatorType::Atomic,
|
||||
shortint_params.message_modulus().0 as u32,
|
||||
vec![shortint_params.message_modulus().0.ilog2(); num_block],
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bench_group.finish()
|
||||
}
|
||||
|
||||
pub fn gpu_zk_verify() {
|
||||
let results_file = Path::new("gpu_pke_zk_crs_sizes.csv");
|
||||
let mut criterion: Criterion<_> = (Criterion::default()).configure_from_args();
|
||||
gpu_pke_zk_verify(&mut criterion, results_file);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn zk_verify() {
|
||||
let results_file = Path::new("pke_zk_crs_sizes.csv");
|
||||
let mut criterion: Criterion<_> = (Criterion::default()).configure_from_args();
|
||||
cpu_pke_zk_verify(&mut criterion, results_file);
|
||||
}
|
||||
|
||||
#[cfg(all(feature = "gpu", feature = "zk-pok"))]
|
||||
use crate::cuda::gpu_zk_verify;
|
||||
|
||||
fn main() {
|
||||
#[cfg(all(feature = "gpu", feature = "zk-pok"))]
|
||||
gpu_zk_verify();
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
zk_verify();
|
||||
|
||||
Criterion::default().configure_from_args().final_summary();
|
||||
}
|
||||
697
tfhe-benchmark/benches/shortint/bench.rs
Normal file
697
tfhe-benchmark/benches/shortint/bench.rs
Normal file
@@ -0,0 +1,697 @@
|
||||
use benchmark::params::{
|
||||
raw_benchmark_parameters, SHORTINT_BENCH_PARAMS_GAUSSIAN, SHORTINT_BENCH_PARAMS_TUNIFORM,
|
||||
SHORTINT_MULTI_BIT_BENCH_PARAMS,
|
||||
};
|
||||
use benchmark::utilities::{write_to_json, OperatorType};
|
||||
use criterion::{criterion_group, Criterion};
|
||||
use rand::Rng;
|
||||
use std::env;
|
||||
use tfhe::keycache::NamedParam;
|
||||
use tfhe::shortint::keycache::KEY_CACHE;
|
||||
use tfhe::shortint::parameters::*;
|
||||
use tfhe::shortint::{Ciphertext, CompressedServerKey, ServerKey};
|
||||
|
||||
fn bench_server_key_unary_function<F>(
|
||||
c: &mut Criterion,
|
||||
bench_name: &str,
|
||||
display_name: &str,
|
||||
unary_op: F,
|
||||
) where
|
||||
F: Fn(&ServerKey, &mut Ciphertext),
|
||||
{
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
|
||||
for param in raw_benchmark_parameters().iter() {
|
||||
let keys = KEY_CACHE.get_from_param(*param);
|
||||
let (cks, sks) = (keys.client_key(), keys.server_key());
|
||||
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
let modulus = cks.parameters.message_modulus().0;
|
||||
|
||||
let clear_text = rng.gen::<u64>() % modulus;
|
||||
|
||||
let mut ct = cks.encrypt(clear_text);
|
||||
|
||||
let bench_id = format!("{bench_name}::{}", param.name());
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
unary_op(sks, &mut ct);
|
||||
})
|
||||
});
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
*param,
|
||||
param.name(),
|
||||
display_name,
|
||||
&OperatorType::Atomic,
|
||||
param.message_modulus().0.ilog2(),
|
||||
vec![param.message_modulus().0.ilog2()],
|
||||
);
|
||||
}
|
||||
|
||||
bench_group.finish()
|
||||
}
|
||||
|
||||
fn bench_server_key_binary_function<F>(
|
||||
c: &mut Criterion,
|
||||
bench_name: &str,
|
||||
display_name: &str,
|
||||
binary_op: F,
|
||||
) where
|
||||
F: Fn(&ServerKey, &mut Ciphertext, &mut Ciphertext),
|
||||
{
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
|
||||
for param in raw_benchmark_parameters().iter() {
|
||||
let keys = KEY_CACHE.get_from_param(*param);
|
||||
let (cks, sks) = (keys.client_key(), keys.server_key());
|
||||
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
let modulus = cks.parameters.message_modulus().0;
|
||||
|
||||
let clear_0 = rng.gen::<u64>() % modulus;
|
||||
let clear_1 = rng.gen::<u64>() % modulus;
|
||||
|
||||
let mut ct_0 = cks.encrypt(clear_0);
|
||||
let mut ct_1 = cks.encrypt(clear_1);
|
||||
|
||||
let bench_id = format!("{bench_name}::{}", param.name());
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
binary_op(sks, &mut ct_0, &mut ct_1);
|
||||
})
|
||||
});
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
*param,
|
||||
param.name(),
|
||||
display_name,
|
||||
&OperatorType::Atomic,
|
||||
param.message_modulus().0.ilog2(),
|
||||
vec![param.message_modulus().0.ilog2()],
|
||||
);
|
||||
}
|
||||
|
||||
bench_group.finish()
|
||||
}
|
||||
|
||||
fn bench_server_key_binary_scalar_function<F>(
|
||||
c: &mut Criterion,
|
||||
bench_name: &str,
|
||||
display_name: &str,
|
||||
binary_op: F,
|
||||
) where
|
||||
F: Fn(&ServerKey, &mut Ciphertext, u8),
|
||||
{
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
|
||||
for param in raw_benchmark_parameters().iter() {
|
||||
let keys = KEY_CACHE.get_from_param(*param);
|
||||
let (cks, sks) = (keys.client_key(), keys.server_key());
|
||||
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
let modulus = cks.parameters.message_modulus().0;
|
||||
|
||||
let clear_0 = rng.gen::<u64>() % modulus;
|
||||
let clear_1 = rng.gen::<u64>() % modulus;
|
||||
|
||||
let mut ct_0 = cks.encrypt(clear_0);
|
||||
|
||||
let bench_id = format!("{bench_name}::{}", param.name());
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
binary_op(sks, &mut ct_0, clear_1 as u8);
|
||||
})
|
||||
});
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
*param,
|
||||
param.name(),
|
||||
display_name,
|
||||
&OperatorType::Atomic,
|
||||
param.message_modulus().0.ilog2(),
|
||||
vec![param.message_modulus().0.ilog2()],
|
||||
);
|
||||
}
|
||||
|
||||
bench_group.finish()
|
||||
}
|
||||
|
||||
fn bench_server_key_binary_scalar_division_function<F>(
|
||||
c: &mut Criterion,
|
||||
bench_name: &str,
|
||||
display_name: &str,
|
||||
binary_op: F,
|
||||
) where
|
||||
F: Fn(&ServerKey, &mut Ciphertext, u8),
|
||||
{
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
|
||||
for param in raw_benchmark_parameters().iter() {
|
||||
let keys = KEY_CACHE.get_from_param(*param);
|
||||
let (cks, sks) = (keys.client_key(), keys.server_key());
|
||||
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
let modulus = cks.parameters.message_modulus().0;
|
||||
assert_ne!(modulus, 1);
|
||||
|
||||
let clear_0 = rng.gen::<u64>() % modulus;
|
||||
let mut clear_1 = rng.gen::<u64>() % modulus;
|
||||
while clear_1 == 0 {
|
||||
clear_1 = rng.gen::<u64>() % modulus;
|
||||
}
|
||||
|
||||
let mut ct_0 = cks.encrypt(clear_0);
|
||||
|
||||
let bench_id = format!("{bench_name}::{}", param.name());
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
binary_op(sks, &mut ct_0, clear_1 as u8);
|
||||
})
|
||||
});
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
*param,
|
||||
param.name(),
|
||||
display_name,
|
||||
&OperatorType::Atomic,
|
||||
param.message_modulus().0.ilog2(),
|
||||
vec![param.message_modulus().0.ilog2()],
|
||||
);
|
||||
}
|
||||
|
||||
bench_group.finish()
|
||||
}
|
||||
|
||||
fn carry_extract_bench(c: &mut Criterion) {
|
||||
let mut bench_group = c.benchmark_group("carry_extract");
|
||||
|
||||
for param in raw_benchmark_parameters().iter() {
|
||||
let keys = KEY_CACHE.get_from_param(*param);
|
||||
let (cks, sks) = (keys.client_key(), keys.server_key());
|
||||
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
let modulus = cks.parameters.message_modulus().0;
|
||||
|
||||
let clear_0 = rng.gen::<u64>() % modulus;
|
||||
|
||||
let ct_0 = cks.encrypt(clear_0);
|
||||
|
||||
let bench_id = format!("shortint::carry_extract::{}", param.name());
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let _ = sks.carry_extract(&ct_0);
|
||||
})
|
||||
});
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
*param,
|
||||
param.name(),
|
||||
"carry_extract",
|
||||
&OperatorType::Atomic,
|
||||
param.message_modulus().0.ilog2(),
|
||||
vec![param.message_modulus().0.ilog2()],
|
||||
);
|
||||
}
|
||||
|
||||
bench_group.finish()
|
||||
}
|
||||
|
||||
fn programmable_bootstrapping_bench(c: &mut Criterion) {
|
||||
let mut bench_group = c.benchmark_group("programmable_bootstrap");
|
||||
|
||||
for param in raw_benchmark_parameters().iter() {
|
||||
let keys = KEY_CACHE.get_from_param(*param);
|
||||
let (cks, sks) = (keys.client_key(), keys.server_key());
|
||||
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
let modulus = cks.parameters.message_modulus().0;
|
||||
|
||||
let acc = sks.generate_lookup_table(|x| x);
|
||||
|
||||
let clear_0 = rng.gen::<u64>() % modulus;
|
||||
|
||||
let ctxt = cks.encrypt(clear_0);
|
||||
|
||||
let bench_id = format!("shortint::programmable_bootstrap::{}", param.name());
|
||||
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let _ = sks.apply_lookup_table(&ctxt, &acc);
|
||||
})
|
||||
});
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
*param,
|
||||
param.name(),
|
||||
"pbs",
|
||||
&OperatorType::Atomic,
|
||||
param.message_modulus().0.ilog2(),
|
||||
vec![param.message_modulus().0.ilog2()],
|
||||
);
|
||||
}
|
||||
|
||||
bench_group.finish();
|
||||
}
|
||||
|
||||
fn server_key_from_compressed_key(c: &mut Criterion) {
|
||||
let mut bench_group = c.benchmark_group("uncompress_key");
|
||||
bench_group
|
||||
.sample_size(10)
|
||||
.measurement_time(std::time::Duration::from_secs(60));
|
||||
|
||||
let mut params = SHORTINT_BENCH_PARAMS_TUNIFORM
|
||||
.iter()
|
||||
.chain(SHORTINT_BENCH_PARAMS_GAUSSIAN.iter())
|
||||
.map(|p| (*p).into())
|
||||
.collect::<Vec<PBSParameters>>();
|
||||
let multi_bit_params = SHORTINT_MULTI_BIT_BENCH_PARAMS
|
||||
.iter()
|
||||
.map(|p| (*p).into())
|
||||
.collect::<Vec<PBSParameters>>();
|
||||
params.extend(&multi_bit_params);
|
||||
|
||||
for param in params.iter() {
|
||||
let keys = KEY_CACHE.get_from_param(*param);
|
||||
let sks_compressed = CompressedServerKey::new(keys.client_key());
|
||||
|
||||
let bench_id = format!("shortint::uncompress_key::{}", param.name());
|
||||
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let clone_compressed_key = || sks_compressed.clone();
|
||||
|
||||
b.iter_batched(
|
||||
clone_compressed_key,
|
||||
|sks_cloned| {
|
||||
let _ = sks_cloned.decompress();
|
||||
},
|
||||
criterion::BatchSize::PerIteration,
|
||||
)
|
||||
});
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
*param,
|
||||
param.name(),
|
||||
"uncompress_key",
|
||||
&OperatorType::Atomic,
|
||||
param.message_modulus().0.ilog2(),
|
||||
vec![param.message_modulus().0.ilog2()],
|
||||
);
|
||||
}
|
||||
|
||||
bench_group.finish();
|
||||
}
|
||||
|
||||
macro_rules! define_server_key_unary_bench_fn (
|
||||
(method_name:$server_key_method:ident, display_name:$name:ident) => {
|
||||
fn $server_key_method(c: &mut Criterion) {
|
||||
bench_server_key_unary_function(
|
||||
c,
|
||||
concat!("shortint::", stringify!($server_key_method)),
|
||||
stringify!($name),
|
||||
|server_key, lhs| {
|
||||
let _ = server_key.$server_key_method(lhs);},
|
||||
)
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
macro_rules! define_server_key_bench_fn (
|
||||
(method_name:$server_key_method:ident, display_name:$name:ident) => {
|
||||
fn $server_key_method(c: &mut Criterion) {
|
||||
bench_server_key_binary_function(
|
||||
c,
|
||||
concat!("shortint::", stringify!($server_key_method)),
|
||||
stringify!($name),
|
||||
|server_key, lhs, rhs| {
|
||||
let _ = server_key.$server_key_method(lhs, rhs);},
|
||||
)
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
macro_rules! define_server_key_scalar_bench_fn (
|
||||
(method_name:$server_key_method:ident, display_name:$name:ident) => {
|
||||
fn $server_key_method(c: &mut Criterion) {
|
||||
bench_server_key_binary_scalar_function(
|
||||
c,
|
||||
concat!("shortint::", stringify!($server_key_method)),
|
||||
stringify!($name),
|
||||
|server_key, lhs, rhs| {
|
||||
let _ = server_key.$server_key_method(lhs, rhs);},
|
||||
)
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
macro_rules! define_server_key_scalar_div_bench_fn (
|
||||
(method_name:$server_key_method:ident, display_name:$name:ident) => {
|
||||
fn $server_key_method(c: &mut Criterion) {
|
||||
bench_server_key_binary_scalar_division_function(
|
||||
c,
|
||||
concat!("shortint::", stringify!($server_key_method)),
|
||||
stringify!($name),
|
||||
|server_key, lhs, rhs| {
|
||||
let _ = server_key.$server_key_method(lhs, rhs);},
|
||||
)
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
macro_rules! define_custom_bench_fn (
|
||||
(function_name:$function:ident) => {
|
||||
fn $function(c: &mut Criterion) {
|
||||
::paste::paste! {
|
||||
[<$function _bench>](
|
||||
c,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
define_server_key_unary_bench_fn!(
|
||||
method_name: unchecked_neg,
|
||||
display_name: negation
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: unchecked_add,
|
||||
display_name: add
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: unchecked_sub,
|
||||
display_name: sub
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: unchecked_mul_lsb,
|
||||
display_name: mul
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: unchecked_mul_msb,
|
||||
display_name: mul
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: unchecked_div,
|
||||
display_name: div
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: smart_bitand,
|
||||
display_name: bitand
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: smart_bitor,
|
||||
display_name: bitor
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: smart_bitxor,
|
||||
display_name: bitxor
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: smart_add,
|
||||
display_name: add
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: smart_sub,
|
||||
display_name: sub
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: smart_mul_lsb,
|
||||
display_name: mul
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: bitand,
|
||||
display_name: bitand
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: bitor,
|
||||
display_name: bitor
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: bitxor,
|
||||
display_name: bitxor
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: add,
|
||||
display_name: add
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: sub,
|
||||
display_name: sub
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: mul,
|
||||
display_name: mul
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: div,
|
||||
display_name: div
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: greater,
|
||||
display_name: greater_than
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: greater_or_equal,
|
||||
display_name: greater_or_equal
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: less,
|
||||
display_name: less_than
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: less_or_equal,
|
||||
display_name: less_or_equal
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: equal,
|
||||
display_name: equal
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: not_equal,
|
||||
display_name: not_equal
|
||||
);
|
||||
define_server_key_unary_bench_fn!(
|
||||
method_name: neg,
|
||||
display_name: negation
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: unchecked_greater,
|
||||
display_name: greater_than
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: unchecked_less,
|
||||
display_name: less_than
|
||||
);
|
||||
define_server_key_bench_fn!(
|
||||
method_name: unchecked_equal,
|
||||
display_name: equal
|
||||
);
|
||||
|
||||
define_server_key_scalar_bench_fn!(
|
||||
method_name: unchecked_scalar_add,
|
||||
display_name: add
|
||||
);
|
||||
define_server_key_scalar_bench_fn!(
|
||||
method_name: unchecked_scalar_sub,
|
||||
display_name: sub
|
||||
);
|
||||
define_server_key_scalar_bench_fn!(
|
||||
method_name: unchecked_scalar_mul,
|
||||
display_name: mul
|
||||
);
|
||||
define_server_key_scalar_bench_fn!(
|
||||
method_name: unchecked_scalar_left_shift,
|
||||
display_name: left_shift
|
||||
);
|
||||
define_server_key_scalar_bench_fn!(
|
||||
method_name: unchecked_scalar_right_shift,
|
||||
display_name: right_shift
|
||||
);
|
||||
|
||||
define_server_key_scalar_div_bench_fn!(
|
||||
method_name: unchecked_scalar_div,
|
||||
display_name: div
|
||||
);
|
||||
define_server_key_scalar_div_bench_fn!(
|
||||
method_name: unchecked_scalar_mod,
|
||||
display_name: modulo
|
||||
);
|
||||
define_server_key_scalar_bench_fn!(
|
||||
method_name: scalar_add,
|
||||
display_name: add
|
||||
);
|
||||
define_server_key_scalar_bench_fn!(
|
||||
method_name: scalar_sub,
|
||||
display_name: sub
|
||||
);
|
||||
define_server_key_scalar_bench_fn!(
|
||||
method_name: scalar_mul,
|
||||
display_name: mul
|
||||
);
|
||||
define_server_key_scalar_bench_fn!(
|
||||
method_name: scalar_left_shift,
|
||||
display_name: left_shift
|
||||
);
|
||||
define_server_key_scalar_bench_fn!(
|
||||
method_name: scalar_right_shift,
|
||||
display_name: right_shift
|
||||
);
|
||||
|
||||
define_server_key_scalar_div_bench_fn!(
|
||||
method_name: scalar_div,
|
||||
display_name: div
|
||||
);
|
||||
define_server_key_scalar_div_bench_fn!(
|
||||
method_name: scalar_mod,
|
||||
display_name: modulo
|
||||
);
|
||||
define_server_key_scalar_bench_fn!(
|
||||
method_name: scalar_greater,
|
||||
display_name: greater_than
|
||||
);
|
||||
define_server_key_scalar_bench_fn!(
|
||||
method_name: scalar_greater_or_equal,
|
||||
display_name: greater_or_equal
|
||||
);
|
||||
define_server_key_scalar_bench_fn!(
|
||||
method_name: scalar_less,
|
||||
display_name: less_than
|
||||
);
|
||||
define_server_key_scalar_bench_fn!(
|
||||
method_name: scalar_less_or_equal,
|
||||
display_name: less_or_equal
|
||||
);
|
||||
define_server_key_scalar_div_bench_fn!(
|
||||
method_name: scalar_equal,
|
||||
display_name: equal
|
||||
);
|
||||
define_server_key_scalar_div_bench_fn!(
|
||||
method_name: scalar_not_equal,
|
||||
display_name: not_equal
|
||||
);
|
||||
|
||||
define_custom_bench_fn!(function_name: carry_extract);
|
||||
|
||||
define_custom_bench_fn!(
|
||||
function_name: programmable_bootstrapping
|
||||
);
|
||||
|
||||
criterion_group!(
|
||||
smart_ops,
|
||||
smart_bitand,
|
||||
smart_bitor,
|
||||
smart_bitxor,
|
||||
smart_add,
|
||||
smart_sub,
|
||||
smart_mul_lsb
|
||||
);
|
||||
|
||||
criterion_group!(
|
||||
unchecked_ops,
|
||||
unchecked_neg,
|
||||
unchecked_add,
|
||||
unchecked_sub,
|
||||
unchecked_mul_lsb,
|
||||
unchecked_mul_msb,
|
||||
unchecked_div,
|
||||
unchecked_greater,
|
||||
unchecked_less,
|
||||
unchecked_equal,
|
||||
carry_extract,
|
||||
programmable_bootstrapping
|
||||
);
|
||||
|
||||
criterion_group!(
|
||||
unchecked_scalar_ops,
|
||||
unchecked_scalar_add,
|
||||
unchecked_scalar_mul,
|
||||
unchecked_scalar_sub,
|
||||
unchecked_scalar_div,
|
||||
unchecked_scalar_mod,
|
||||
unchecked_scalar_left_shift,
|
||||
unchecked_scalar_right_shift
|
||||
);
|
||||
|
||||
criterion_group!(
|
||||
default_ops,
|
||||
neg,
|
||||
bitand,
|
||||
bitor,
|
||||
bitxor,
|
||||
add,
|
||||
sub,
|
||||
div,
|
||||
mul,
|
||||
greater,
|
||||
greater_or_equal,
|
||||
less,
|
||||
less_or_equal,
|
||||
equal,
|
||||
not_equal
|
||||
);
|
||||
|
||||
criterion_group!(
|
||||
default_scalar_ops,
|
||||
scalar_add,
|
||||
scalar_sub,
|
||||
scalar_div,
|
||||
scalar_mul,
|
||||
scalar_mod,
|
||||
scalar_left_shift,
|
||||
scalar_right_shift,
|
||||
scalar_greater,
|
||||
scalar_greater_or_equal,
|
||||
scalar_less,
|
||||
scalar_less_or_equal,
|
||||
scalar_equal,
|
||||
scalar_not_equal
|
||||
);
|
||||
|
||||
criterion_group!(misc, server_key_from_compressed_key);
|
||||
|
||||
mod casting;
|
||||
criterion_group!(
|
||||
casting,
|
||||
casting::pack_cast_64,
|
||||
casting::pack_cast,
|
||||
casting::cast
|
||||
);
|
||||
|
||||
fn main() {
|
||||
fn default_bench() {
|
||||
casting();
|
||||
default_ops();
|
||||
default_scalar_ops();
|
||||
misc();
|
||||
}
|
||||
|
||||
match env::var("__TFHE_RS_BENCH_OP_FLAVOR") {
|
||||
Ok(val) => {
|
||||
match val.to_lowercase().as_str() {
|
||||
"default" => default_bench(),
|
||||
"smart" => smart_ops(),
|
||||
"unchecked" => {
|
||||
unchecked_ops();
|
||||
unchecked_scalar_ops();
|
||||
}
|
||||
_ => panic!("unknown benchmark operations flavor"),
|
||||
};
|
||||
}
|
||||
Err(_) => default_bench(),
|
||||
};
|
||||
|
||||
Criterion::default().configure_from_args().final_summary();
|
||||
}
|
||||
136
tfhe-benchmark/benches/shortint/casting.rs
Normal file
136
tfhe-benchmark/benches/shortint/casting.rs
Normal file
@@ -0,0 +1,136 @@
|
||||
use benchmark::params_aliases::*;
|
||||
use benchmark::utilities::{write_to_json, OperatorType};
|
||||
use criterion::Criterion;
|
||||
use rayon::prelude::*;
|
||||
use tfhe::keycache::NamedParam;
|
||||
use tfhe::shortint::prelude::*;
|
||||
|
||||
pub fn pack_cast_64(c: &mut Criterion) {
|
||||
let bench_name = "shortint::pack_cast_64";
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
|
||||
let (client_key_1, server_key_1): (ClientKey, ServerKey) =
|
||||
gen_keys(BENCH_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
let (client_key_2, server_key_2): (ClientKey, ServerKey) =
|
||||
gen_keys(BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
let ks_param = BENCH_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128;
|
||||
let ks_param_name = ks_param.name();
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key_1, Some(&server_key_1)),
|
||||
(&client_key_2, &server_key_2),
|
||||
ks_param,
|
||||
);
|
||||
|
||||
let vec_ct = vec![client_key_1.encrypt(1); 64];
|
||||
|
||||
let bench_id = format!("{bench_name}_{ks_param_name}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let _ = (0..32)
|
||||
.into_par_iter()
|
||||
.map(|i| {
|
||||
let byte_idx = 7 - i / 4;
|
||||
let pair_idx = i % 4;
|
||||
|
||||
let b0 = &vec_ct[8 * byte_idx + 2 * pair_idx];
|
||||
let b1 = &vec_ct[8 * byte_idx + 2 * pair_idx + 1];
|
||||
|
||||
ksk.cast(
|
||||
&server_key_1.unchecked_add(b0, &server_key_1.unchecked_scalar_mul(b1, 2)),
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
});
|
||||
});
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
ks_param,
|
||||
ks_param_name,
|
||||
"pack_cast_64",
|
||||
&OperatorType::Atomic,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
}
|
||||
|
||||
pub fn pack_cast(c: &mut Criterion) {
|
||||
let bench_name = "shortint::pack_cast";
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
|
||||
let (client_key_1, server_key_1): (ClientKey, ServerKey) =
|
||||
gen_keys(BENCH_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
let (client_key_2, server_key_2): (ClientKey, ServerKey) =
|
||||
gen_keys(BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
let ks_param = BENCH_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128;
|
||||
let ks_param_name = "BENCH_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128";
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key_1, Some(&server_key_1)),
|
||||
(&client_key_2, &server_key_2),
|
||||
ks_param,
|
||||
);
|
||||
|
||||
let ct_1 = client_key_1.encrypt(1);
|
||||
let ct_2 = client_key_1.encrypt(1);
|
||||
|
||||
let bench_id = format!("{bench_name}_{ks_param_name}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let _ = ksk.cast(
|
||||
&server_key_1.unchecked_add(&ct_1, &server_key_1.unchecked_scalar_mul(&ct_2, 2)),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
ks_param,
|
||||
ks_param_name,
|
||||
"pack_cast",
|
||||
&OperatorType::Atomic,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
}
|
||||
|
||||
pub fn cast(c: &mut Criterion) {
|
||||
let bench_name = "shortint::cast";
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
|
||||
let (client_key_1, server_key_1): (ClientKey, ServerKey) =
|
||||
gen_keys(BENCH_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
let (client_key_2, server_key_2): (ClientKey, ServerKey) =
|
||||
gen_keys(BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
let ks_param = BENCH_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128;
|
||||
let ks_param_name = "BENCH_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128";
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key_1, Some(&server_key_1)),
|
||||
(&client_key_2, &server_key_2),
|
||||
ks_param,
|
||||
);
|
||||
|
||||
let ct = client_key_1.encrypt(1);
|
||||
|
||||
let bench_id = format!("{bench_name}_{ks_param_name}");
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let _ = ksk.cast(&ct);
|
||||
});
|
||||
});
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
ks_param,
|
||||
ks_param_name,
|
||||
"cast",
|
||||
&OperatorType::Atomic,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
}
|
||||
82
tfhe-benchmark/benches/shortint/glwe_packing_compression.rs
Normal file
82
tfhe-benchmark/benches/shortint/glwe_packing_compression.rs
Normal file
@@ -0,0 +1,82 @@
|
||||
use benchmark::params_aliases::*;
|
||||
use criterion::{black_box, criterion_group, Criterion};
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
use tfhe::shortint::prelude::*;
|
||||
|
||||
fn glwe_packing(c: &mut Criterion) {
|
||||
let param = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
|
||||
let comp_param = BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
|
||||
let number_to_pack = 256;
|
||||
|
||||
let bench_name = "shortint_packing_compression";
|
||||
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
|
||||
// Generate the client key and the server key:
|
||||
let cks = ClientKey::new(param);
|
||||
|
||||
let private_compression_key = cks.new_compression_private_key(comp_param);
|
||||
|
||||
let (compression_key, decompression_key) =
|
||||
cks.new_compression_decompression_keys(&private_compression_key);
|
||||
|
||||
let ct: Vec<_> = (0..number_to_pack).map(|_| cks.encrypt(0)).collect();
|
||||
|
||||
bench_group.bench_function("pack".to_owned(), |b| {
|
||||
b.iter(|| {
|
||||
let packed = compression_key.compress_ciphertexts_into_list(&ct);
|
||||
|
||||
_ = black_box(packed);
|
||||
})
|
||||
});
|
||||
|
||||
let packed = compression_key.compress_ciphertexts_into_list(&ct);
|
||||
bench_group.bench_function("unpack_all".to_owned(), |b| {
|
||||
b.iter(|| {
|
||||
(0..number_to_pack).into_par_iter().for_each(|i| {
|
||||
let unpacked = decompression_key.unpack(&packed, i);
|
||||
|
||||
_ = black_box(unpacked);
|
||||
});
|
||||
})
|
||||
});
|
||||
|
||||
bench_group.bench_function("unpack_one_lwe".to_owned(), |b| {
|
||||
b.iter(|| {
|
||||
let unpacked = decompression_key.unpack(&packed, 0);
|
||||
|
||||
_ = black_box(unpacked);
|
||||
})
|
||||
});
|
||||
|
||||
bench_group.bench_function("unpack_64b".to_owned(), |b| {
|
||||
b.iter(|| {
|
||||
(0..32).into_par_iter().for_each(|i| {
|
||||
let unpacked = decompression_key.unpack(&packed, i);
|
||||
|
||||
_ = black_box(unpacked);
|
||||
});
|
||||
})
|
||||
});
|
||||
|
||||
bench_group.bench_function("pack_unpack".to_owned(), |b| {
|
||||
b.iter(|| {
|
||||
let packed = compression_key.compress_ciphertexts_into_list(&ct);
|
||||
|
||||
(0..number_to_pack).into_par_iter().for_each(|i| {
|
||||
let unpacked = decompression_key.unpack(&packed, i);
|
||||
|
||||
_ = black_box(unpacked);
|
||||
});
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(glwe_packing2, glwe_packing);
|
||||
|
||||
fn main() {
|
||||
glwe_packing2();
|
||||
Criterion::default().configure_from_args().final_summary();
|
||||
}
|
||||
29
tfhe-benchmark/benches/shortint/oprf.rs
Normal file
29
tfhe-benchmark/benches/shortint/oprf.rs
Normal file
@@ -0,0 +1,29 @@
|
||||
use benchmark::params_aliases::*;
|
||||
use criterion::{black_box, criterion_group, Criterion};
|
||||
use tfhe::keycache::NamedParam;
|
||||
use tfhe::shortint::keycache::KEY_CACHE;
|
||||
use tfhe_csprng::seeders::Seed;
|
||||
|
||||
fn oprf(c: &mut Criterion) {
|
||||
let bench_name = "shortint-oprf";
|
||||
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
|
||||
let param = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS;
|
||||
|
||||
let keys = KEY_CACHE.get_from_param(param);
|
||||
let sks = keys.server_key();
|
||||
|
||||
bench_group.bench_function(format!("2-bits-oprf::{}", param.name()), |b| {
|
||||
b.iter(|| {
|
||||
_ = black_box(sks.generate_oblivious_pseudo_random(Seed(0), 2));
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(oprf2, oprf);
|
||||
|
||||
fn main() {
|
||||
oprf2();
|
||||
Criterion::default().configure_from_args().final_summary();
|
||||
}
|
||||
88
tfhe-benchmark/src/bin/boolean_key_sizes.rs
Normal file
88
tfhe-benchmark/src/bin/boolean_key_sizes.rs
Normal file
@@ -0,0 +1,88 @@
|
||||
use benchmark::utilities::{write_to_json, OperatorType};
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use tfhe::boolean::parameters::{DEFAULT_PARAMETERS, PARAMETERS_ERROR_PROB_2_POW_MINUS_165};
|
||||
use tfhe::boolean::{client_key, server_key};
|
||||
|
||||
fn write_result(file: &mut File, name: &str, value: usize) {
|
||||
let line = format!("{name},{value}\n");
|
||||
let error_message = format!("cannot write {name} result into file");
|
||||
file.write_all(line.as_bytes()).expect(&error_message);
|
||||
}
|
||||
|
||||
fn client_server_key_sizes(results_file: &Path) {
|
||||
let boolean_params_vec = [
|
||||
(DEFAULT_PARAMETERS, "DEFAULT_PARAMETERS"),
|
||||
(PARAMETERS_ERROR_PROB_2_POW_MINUS_165, "TFHE_LIB_PARAMETERS"),
|
||||
];
|
||||
File::create(results_file).expect("create results file failed");
|
||||
let mut file = OpenOptions::new()
|
||||
.append(true)
|
||||
.open(results_file)
|
||||
.expect("cannot open results file");
|
||||
|
||||
let operator = OperatorType::Atomic;
|
||||
|
||||
println!("Generating boolean (ClientKey, ServerKey)");
|
||||
for (i, (params, params_name)) in boolean_params_vec.iter().enumerate() {
|
||||
println!(
|
||||
"Generating [{} / {}] : {}",
|
||||
i + 1,
|
||||
boolean_params_vec.len(),
|
||||
params_name.to_lowercase()
|
||||
);
|
||||
|
||||
let cks = client_key::ClientKey::new(params);
|
||||
let sks = server_key::ServerKey::new(&cks);
|
||||
let ksk_size = sks.key_switching_key_size_bytes();
|
||||
let test_name = format!("boolean_key_sizes_{params_name}_ksk");
|
||||
|
||||
write_result(&mut file, &test_name, ksk_size);
|
||||
write_to_json::<u32, _>(
|
||||
&test_name,
|
||||
*params,
|
||||
*params_name,
|
||||
"KSK",
|
||||
&operator,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
|
||||
println!(
|
||||
"Element in KSK: {}, size in bytes: {}",
|
||||
sks.key_switching_key_size_elements(),
|
||||
ksk_size,
|
||||
);
|
||||
|
||||
let bsk_size = sks.bootstrapping_key_size_bytes();
|
||||
let test_name = format!("boolean_key_sizes_{params_name}_bsk");
|
||||
|
||||
write_result(&mut file, &test_name, bsk_size);
|
||||
write_to_json::<u32, _>(
|
||||
&test_name,
|
||||
*params,
|
||||
*params_name,
|
||||
"BSK",
|
||||
&operator,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
|
||||
println!(
|
||||
"Element in BSK: {}, size in bytes: {}",
|
||||
sks.bootstrapping_key_size_elements(),
|
||||
bsk_size,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let work_dir = std::env::current_dir().unwrap();
|
||||
let mut new_work_dir = work_dir;
|
||||
new_work_dir.push("tfhe");
|
||||
std::env::set_current_dir(new_work_dir).unwrap();
|
||||
|
||||
let results_file = Path::new("boolean_key_sizes.csv");
|
||||
client_server_key_sizes(results_file)
|
||||
}
|
||||
145
tfhe-benchmark/src/bin/hlapi_compact_pk_ct_sizes.rs
Normal file
145
tfhe-benchmark/src/bin/hlapi_compact_pk_ct_sizes.rs
Normal file
@@ -0,0 +1,145 @@
|
||||
use benchmark::params_aliases::*;
|
||||
use benchmark::utilities::{write_to_json, OperatorType};
|
||||
use rand::Rng;
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use tfhe::integer::U256;
|
||||
use tfhe::keycache::NamedParam;
|
||||
use tfhe::shortint::PBSParameters;
|
||||
use tfhe::{generate_keys, CompactCiphertextList, CompactPublicKey, ConfigBuilder};
|
||||
|
||||
fn write_result(file: &mut File, name: &str, value: usize) {
|
||||
let line = format!("{name},{value}\n");
|
||||
let error_message = format!("cannot write {name} result into file");
|
||||
file.write_all(line.as_bytes()).expect(&error_message);
|
||||
}
|
||||
|
||||
pub fn cpk_and_cctl_sizes(results_file: &Path) {
|
||||
const NB_CTXT: usize = 5;
|
||||
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
File::create(results_file).expect("create results file failed");
|
||||
let mut file = OpenOptions::new()
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.write(true)
|
||||
.open(results_file)
|
||||
.expect("cannot open results file");
|
||||
|
||||
let operator = OperatorType::Atomic;
|
||||
|
||||
{
|
||||
let params = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(params)
|
||||
.use_dedicated_compact_public_key_parameters((
|
||||
BENCH_PARAM_PKE_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
BENCH_PARAM_KEYSWITCH_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
))
|
||||
.build();
|
||||
let (client_key, _) = generate_keys(config);
|
||||
let test_name = format!("hlapi_sizes_{}_cpk", params.name());
|
||||
|
||||
let params: PBSParameters = params.into();
|
||||
|
||||
println!("Sizes for: {} and 32 bits", params.name());
|
||||
|
||||
let public_key = CompactPublicKey::new(&client_key);
|
||||
|
||||
let cpk_size = bincode::serialize(&public_key).unwrap().len();
|
||||
|
||||
println!("PK size: {cpk_size} bytes");
|
||||
write_result(&mut file, &test_name, cpk_size);
|
||||
write_to_json::<u64, _>(
|
||||
&test_name,
|
||||
params,
|
||||
params.name(),
|
||||
"CPK",
|
||||
&operator,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
|
||||
let test_name = format!("hlapi_sizes_{}_cctl_{NB_CTXT}_len_32_bits", params.name());
|
||||
|
||||
let vec_inputs: Vec<_> = (0..NB_CTXT).map(|_| rng.gen::<u32>()).collect();
|
||||
|
||||
let encrypted_inputs = CompactCiphertextList::builder(&public_key)
|
||||
.extend(vec_inputs.iter().copied())
|
||||
.build();
|
||||
let cctl_size = bincode::serialize(&encrypted_inputs).unwrap().len();
|
||||
|
||||
println!("Compact CT list for {NB_CTXT} CTs: {cctl_size} bytes");
|
||||
|
||||
write_result(&mut file, &test_name, cctl_size);
|
||||
write_to_json::<u64, _>(
|
||||
&test_name,
|
||||
params,
|
||||
params.name(),
|
||||
"CCTL",
|
||||
&operator,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
}
|
||||
|
||||
// 256 bits
|
||||
{
|
||||
let params = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(params)
|
||||
.use_dedicated_compact_public_key_parameters((
|
||||
BENCH_PARAM_PKE_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
BENCH_PARAM_KEYSWITCH_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
))
|
||||
.build();
|
||||
let (client_key, _) = generate_keys(config);
|
||||
|
||||
let params: PBSParameters = params.into();
|
||||
|
||||
println!("Sizes for: {} and 256 bits", params.name());
|
||||
|
||||
let public_key = CompactPublicKey::new(&client_key);
|
||||
|
||||
println!(
|
||||
"PK size: {} bytes",
|
||||
bincode::serialize(&public_key).unwrap().len()
|
||||
);
|
||||
|
||||
let test_name = format!("hlapi_sizes_{}_cctl_{NB_CTXT}_len_256_bits", params.name());
|
||||
|
||||
let vec_inputs: Vec<_> = (0..NB_CTXT).map(|_| U256::from(rng.gen::<u32>())).collect();
|
||||
|
||||
let encrypted_inputs = CompactCiphertextList::builder(&public_key)
|
||||
.extend(vec_inputs.iter().copied())
|
||||
.build();
|
||||
let cctl_size = bincode::serialize(&encrypted_inputs).unwrap().len();
|
||||
|
||||
println!("Compact CT list for {NB_CTXT} CTs: {cctl_size} bytes");
|
||||
|
||||
write_result(&mut file, &test_name, cctl_size);
|
||||
write_to_json::<u64, _>(
|
||||
&test_name,
|
||||
params,
|
||||
params.name(),
|
||||
"CCTL",
|
||||
&operator,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let work_dir = std::env::current_dir().unwrap();
|
||||
println!("work_dir: {}", std::env::current_dir().unwrap().display());
|
||||
// Change workdir so that the location of the keycache matches the one for tests
|
||||
let mut new_work_dir = work_dir;
|
||||
new_work_dir.push("tfhe");
|
||||
std::env::set_current_dir(new_work_dir).unwrap();
|
||||
|
||||
let results_file = Path::new("hlapi_cpk_and_cctl_sizes.csv");
|
||||
cpk_and_cctl_sizes(results_file)
|
||||
}
|
||||
291
tfhe-benchmark/src/bin/shortint_key_sizes.rs
Normal file
291
tfhe-benchmark/src/bin/shortint_key_sizes.rs
Normal file
@@ -0,0 +1,291 @@
|
||||
use benchmark::params_aliases::*;
|
||||
use benchmark::utilities::{write_to_json, CryptoParametersRecord, OperatorType};
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use tfhe::keycache::NamedParam;
|
||||
use tfhe::shortint::atomic_pattern::compressed::CompressedAtomicPatternServerKey;
|
||||
use tfhe::shortint::keycache::KEY_CACHE;
|
||||
use tfhe::shortint::server_key::{StandardServerKey, StandardServerKeyView};
|
||||
use tfhe::shortint::{
|
||||
ClassicPBSParameters, ClientKey, CompactPrivateKey, CompressedCompactPublicKey,
|
||||
CompressedKeySwitchingKey, CompressedServerKey, PBSParameters,
|
||||
};
|
||||
|
||||
fn write_result(file: &mut File, name: &str, value: usize) {
|
||||
let line = format!("{name},{value}\n");
|
||||
let error_message = format!("cannot write {name} result into file");
|
||||
file.write_all(line.as_bytes()).expect(&error_message);
|
||||
}
|
||||
|
||||
fn client_server_key_sizes(results_file: &Path) {
|
||||
let shortint_params_vec: Vec<PBSParameters> = vec![
|
||||
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128.into(),
|
||||
BENCH_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128.into(),
|
||||
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128.into(),
|
||||
BENCH_PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128.into(),
|
||||
BENCH_PARAM_MESSAGE_4_CARRY_4_KS_PBS_GAUSSIAN_2M128.into(),
|
||||
BENCH_PARAM_MULTI_BIT_GROUP_2_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128.into(),
|
||||
BENCH_PARAM_MULTI_BIT_GROUP_2_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128.into(),
|
||||
BENCH_PARAM_MULTI_BIT_GROUP_2_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128.into(),
|
||||
BENCH_PARAM_MULTI_BIT_GROUP_3_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128.into(),
|
||||
BENCH_PARAM_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128.into(),
|
||||
BENCH_PARAM_MULTI_BIT_GROUP_3_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128.into(),
|
||||
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128.into(),
|
||||
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128.into(),
|
||||
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128.into(),
|
||||
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128.into(),
|
||||
];
|
||||
File::create(results_file).expect("create results file failed");
|
||||
let mut file = OpenOptions::new()
|
||||
.append(true)
|
||||
.open(results_file)
|
||||
.expect("cannot open results file");
|
||||
|
||||
let operator = OperatorType::Atomic;
|
||||
|
||||
println!("Generating shortint (ClientKey, ServerKey)");
|
||||
for (i, params) in shortint_params_vec.iter().copied().enumerate() {
|
||||
println!(
|
||||
"Generating [{} / {}] : {}",
|
||||
i + 1,
|
||||
shortint_params_vec.len(),
|
||||
params.name().to_lowercase()
|
||||
);
|
||||
|
||||
let keys = KEY_CACHE.get_from_param(params);
|
||||
|
||||
let cks = keys.client_key();
|
||||
let sks = StandardServerKeyView::try_from(keys.server_key().as_view()).unwrap();
|
||||
let ksk_size = sks.key_switching_key_size_bytes();
|
||||
let test_name = format!("shortint_key_sizes_{}_ksk", params.name());
|
||||
|
||||
write_result(&mut file, &test_name, ksk_size);
|
||||
write_to_json::<u64, _>(
|
||||
&test_name,
|
||||
params,
|
||||
params.name(),
|
||||
"KSK",
|
||||
&operator,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
|
||||
println!(
|
||||
"Element in KSK: {}, size in bytes: {}",
|
||||
sks.key_switching_key_size_elements(),
|
||||
ksk_size,
|
||||
);
|
||||
|
||||
let bsk_size = sks.bootstrapping_key_size_bytes();
|
||||
let test_name = format!("shortint_key_sizes_{}_bsk", params.name());
|
||||
|
||||
write_result(&mut file, &test_name, bsk_size);
|
||||
write_to_json::<u64, _>(
|
||||
&test_name,
|
||||
params,
|
||||
params.name(),
|
||||
"BSK",
|
||||
&operator,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
|
||||
println!(
|
||||
"Element in BSK: {}, size in bytes: {}",
|
||||
sks.bootstrapping_key_size_elements(),
|
||||
bsk_size,
|
||||
);
|
||||
|
||||
let sks_compressed = CompressedServerKey::new(cks);
|
||||
let bsk_compressed_size = sks_compressed.bootstrapping_key_size_bytes();
|
||||
let test_name = format!("shortint_key_sizes_{}_bsk_compressed", params.name());
|
||||
|
||||
write_result(&mut file, &test_name, bsk_compressed_size);
|
||||
write_to_json::<u64, _>(
|
||||
&test_name,
|
||||
params,
|
||||
params.name(),
|
||||
"BSK",
|
||||
&operator,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
|
||||
println!(
|
||||
"Element in BSK compressed: {}, size in bytes: {}",
|
||||
sks_compressed.bootstrapping_key_size_elements(),
|
||||
bsk_compressed_size,
|
||||
);
|
||||
|
||||
// Clear keys as we go to avoid filling the RAM
|
||||
KEY_CACHE.clear_in_memory_cache()
|
||||
}
|
||||
}
|
||||
|
||||
fn measure_serialized_size<T: serde::Serialize, P: Into<CryptoParametersRecord<u64>> + Clone>(
|
||||
to_serialize: &T,
|
||||
param: P,
|
||||
param_name: &str,
|
||||
test_name_suffix: &str,
|
||||
display_name: &str,
|
||||
file: &mut File,
|
||||
) {
|
||||
let serialized = bincode::serialize(to_serialize).unwrap();
|
||||
let size = serialized.len();
|
||||
let test_name = format!("shortint_key_sizes_{param_name}_{test_name_suffix}");
|
||||
write_result(file, &test_name, size);
|
||||
write_to_json::<u64, _>(
|
||||
&test_name,
|
||||
param.clone(),
|
||||
param_name,
|
||||
display_name,
|
||||
&OperatorType::Atomic,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
|
||||
println!("{test_name_suffix} {param_name} -> size: {size} bytes",);
|
||||
}
|
||||
|
||||
fn tuniform_key_set_sizes(results_file: &Path) {
|
||||
File::create(results_file).expect("create results file failed");
|
||||
let mut file = OpenOptions::new()
|
||||
.append(true)
|
||||
.open(results_file)
|
||||
.expect("cannot open results file");
|
||||
|
||||
println!("Measuring shortint key sizes:");
|
||||
|
||||
let param_fhe = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
let param_fhe_name = param_fhe.name();
|
||||
let cks = ClientKey::new(param_fhe);
|
||||
let compressed_sks = CompressedServerKey::new(&cks);
|
||||
let sks = StandardServerKey::try_from(compressed_sks.decompress()).unwrap();
|
||||
|
||||
let std_compressed_ap_key = match &compressed_sks.compressed_ap_server_key {
|
||||
CompressedAtomicPatternServerKey::Standard(
|
||||
compressed_standard_atomic_pattern_server_key,
|
||||
) => compressed_standard_atomic_pattern_server_key,
|
||||
CompressedAtomicPatternServerKey::KeySwitch32(_) => {
|
||||
panic!("KS32 is unsupported to measure key sizes at the moment")
|
||||
}
|
||||
};
|
||||
|
||||
measure_serialized_size(
|
||||
&sks.atomic_pattern.key_switching_key,
|
||||
<ClassicPBSParameters as Into<PBSParameters>>::into(param_fhe),
|
||||
¶m_fhe_name,
|
||||
"ksk",
|
||||
"KSK",
|
||||
&mut file,
|
||||
);
|
||||
measure_serialized_size(
|
||||
std_compressed_ap_key.key_switching_key(),
|
||||
<ClassicPBSParameters as Into<PBSParameters>>::into(param_fhe),
|
||||
¶m_fhe_name,
|
||||
"ksk_compressed",
|
||||
"KSK",
|
||||
&mut file,
|
||||
);
|
||||
|
||||
measure_serialized_size(
|
||||
&sks.atomic_pattern.bootstrapping_key,
|
||||
<ClassicPBSParameters as Into<PBSParameters>>::into(param_fhe),
|
||||
¶m_fhe_name,
|
||||
"bsk",
|
||||
"BSK",
|
||||
&mut file,
|
||||
);
|
||||
measure_serialized_size(
|
||||
&std_compressed_ap_key.bootstrapping_key(),
|
||||
<ClassicPBSParameters as Into<PBSParameters>>::into(param_fhe),
|
||||
¶m_fhe_name,
|
||||
"bsk_compressed",
|
||||
"BSK",
|
||||
&mut file,
|
||||
);
|
||||
|
||||
let param_pke = BENCH_PARAM_PKE_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
let param_pke_name = param_pke.name();
|
||||
let compact_private_key = CompactPrivateKey::new(param_pke);
|
||||
let compressed_pk = CompressedCompactPublicKey::new(&compact_private_key);
|
||||
let pk = compressed_pk.decompress();
|
||||
|
||||
measure_serialized_size(&pk, param_pke, ¶m_pke_name, "cpk", "CPK", &mut file);
|
||||
measure_serialized_size(
|
||||
&compressed_pk,
|
||||
param_pke,
|
||||
¶m_pke_name,
|
||||
"cpk_compressed",
|
||||
"CPK",
|
||||
&mut file,
|
||||
);
|
||||
|
||||
let param_compression = BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
let param_compression_name = param_compression.name();
|
||||
let params_tuple = (
|
||||
param_compression,
|
||||
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
);
|
||||
|
||||
let private_compression_key = cks.new_compression_private_key(param_compression);
|
||||
let (compression_key, decompression_key) =
|
||||
cks.new_compression_decompression_keys(&private_compression_key);
|
||||
|
||||
measure_serialized_size(
|
||||
&compression_key,
|
||||
params_tuple,
|
||||
¶m_compression_name,
|
||||
"compression_key",
|
||||
"CompressionKey",
|
||||
&mut file,
|
||||
);
|
||||
measure_serialized_size(
|
||||
&decompression_key,
|
||||
params_tuple,
|
||||
¶m_compression_name,
|
||||
"decompression_key",
|
||||
"CompressionKey",
|
||||
&mut file,
|
||||
);
|
||||
|
||||
let param_casting = BENCH_PARAM_KEYSWITCH_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
let param_casting_name = param_casting.name();
|
||||
let compressed_casting_key = CompressedKeySwitchingKey::new(
|
||||
(&compact_private_key, None),
|
||||
(&cks, &compressed_sks),
|
||||
param_casting,
|
||||
);
|
||||
let casting_key = compressed_casting_key.decompress();
|
||||
|
||||
measure_serialized_size(
|
||||
&casting_key.into_raw_parts().0,
|
||||
param_casting,
|
||||
¶m_casting_name,
|
||||
"casting_key",
|
||||
"CastKey",
|
||||
&mut file,
|
||||
);
|
||||
measure_serialized_size(
|
||||
&compressed_casting_key.into_raw_parts().0,
|
||||
param_casting,
|
||||
¶m_casting_name,
|
||||
"casting_key_compressed",
|
||||
"CastKey",
|
||||
&mut file,
|
||||
);
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let work_dir = std::env::current_dir().unwrap();
|
||||
println!("work_dir: {}", std::env::current_dir().unwrap().display());
|
||||
// Change workdir so that the location of the keycache matches the one for tests
|
||||
let mut new_work_dir = work_dir;
|
||||
new_work_dir.push("tfhe");
|
||||
std::env::set_current_dir(new_work_dir).unwrap();
|
||||
|
||||
let results_file = Path::new("shortint_key_sizes.csv");
|
||||
client_server_key_sizes(results_file);
|
||||
tuniform_key_set_sizes(results_file);
|
||||
}
|
||||
87
tfhe-benchmark/src/bin/wasm_benchmarks_parser.rs
Normal file
87
tfhe-benchmark/src/bin/wasm_benchmarks_parser.rs
Normal file
@@ -0,0 +1,87 @@
|
||||
use benchmark::utilities::{write_to_json, OperatorType};
|
||||
use clap::Parser;
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use tfhe::keycache::NamedParam;
|
||||
use tfhe::shortint::keycache::get_shortint_parameter_set_from_name;
|
||||
use tfhe::shortint::{ClassicPBSParameters, PBSParameters};
|
||||
|
||||
const BENCHMARK_NAME_PREFIX: &str = "wasm::";
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
struct Args {
|
||||
raw_results_file: String,
|
||||
}
|
||||
|
||||
fn params_from_name(name: &str) -> ClassicPBSParameters {
|
||||
match get_shortint_parameter_set_from_name(name.to_uppercase().as_str())
|
||||
.pbs_parameters()
|
||||
.unwrap()
|
||||
{
|
||||
PBSParameters::PBS(p) => p,
|
||||
PBSParameters::MultiBitPBS(_) => {
|
||||
panic!("Tried to get a MultiBitPBS, expected ClassicPBSParameters")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn write_result(file: &mut File, name: &str, value: usize) {
|
||||
let line = format!("{name},{value}\n");
|
||||
let error_message = format!("cannot write {name} result into file");
|
||||
file.write_all(line.as_bytes()).expect(&error_message);
|
||||
}
|
||||
|
||||
pub fn parse_wasm_benchmarks(results_file: &Path, raw_results_file: &Path) {
|
||||
File::create(results_file).expect("create results file failed");
|
||||
let mut file = OpenOptions::new()
|
||||
.append(true)
|
||||
.open(results_file)
|
||||
.expect("cannot open parsed results file");
|
||||
|
||||
let operator = OperatorType::Atomic;
|
||||
|
||||
let raw_results = fs::read_to_string(raw_results_file).expect("cannot open raw results file");
|
||||
let results_as_json: HashMap<String, f32> = serde_json::from_str(&raw_results).unwrap();
|
||||
|
||||
for (full_name, val) in results_as_json.iter() {
|
||||
let prefixed_full_name = format!("{BENCHMARK_NAME_PREFIX}{full_name}");
|
||||
let name_parts = full_name.split("_mean_").collect::<Vec<_>>();
|
||||
let bench_name = name_parts[0];
|
||||
let params: PBSParameters = params_from_name(name_parts[1]).into();
|
||||
println!("{name_parts:?}");
|
||||
if bench_name.contains("_size") {
|
||||
write_result(&mut file, &prefixed_full_name, *val as usize);
|
||||
} else {
|
||||
let value_in_ns = (val * 1_000_000_f32) as usize;
|
||||
write_result(&mut file, &prefixed_full_name, value_in_ns);
|
||||
}
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&prefixed_full_name,
|
||||
params,
|
||||
params.name(),
|
||||
bench_name,
|
||||
&operator,
|
||||
0,
|
||||
vec![],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let args = Args::parse();
|
||||
|
||||
let work_dir = std::env::current_dir().unwrap();
|
||||
let mut new_work_dir = work_dir;
|
||||
new_work_dir.push("tfhe");
|
||||
std::env::set_current_dir(new_work_dir).unwrap();
|
||||
|
||||
let results_file = Path::new("wasm_pk_gen.csv");
|
||||
let raw_results = Path::new(&args.raw_results_file);
|
||||
|
||||
parse_wasm_benchmarks(results_file, raw_results);
|
||||
}
|
||||
3
tfhe-benchmark/src/lib.rs
Normal file
3
tfhe-benchmark/src/lib.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
pub mod params;
|
||||
pub mod params_aliases;
|
||||
pub mod utilities;
|
||||
449
tfhe-benchmark/src/params.rs
Normal file
449
tfhe-benchmark/src/params.rs
Normal file
@@ -0,0 +1,449 @@
|
||||
#[cfg(feature = "boolean")]
|
||||
pub mod boolean_params {
|
||||
use crate::utilities::CryptoParametersRecord;
|
||||
use tfhe::boolean::parameters::{
|
||||
DEFAULT_PARAMETERS, DEFAULT_PARAMETERS_KS_PBS, PARAMETERS_ERROR_PROB_2_POW_MINUS_165,
|
||||
};
|
||||
|
||||
pub fn benchmark_32bits_parameters() -> Vec<(String, CryptoParametersRecord<u32>)> {
|
||||
[
|
||||
("BOOLEAN_DEFAULT_PARAMS", DEFAULT_PARAMETERS),
|
||||
(
|
||||
"BOOLEAN_TFHE_LIB_PARAMS",
|
||||
PARAMETERS_ERROR_PROB_2_POW_MINUS_165,
|
||||
),
|
||||
("BOOLEAN_DEFAULT_PARAMS_KS_PBS", DEFAULT_PARAMETERS_KS_PBS),
|
||||
]
|
||||
.iter()
|
||||
.map(|(name, params)| (name.to_string(), params.to_owned().into()))
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "boolean")]
|
||||
pub use boolean_params::*;
|
||||
|
||||
#[cfg(feature = "shortint")]
|
||||
pub mod shortint_params {
|
||||
use crate::params_aliases::*;
|
||||
use crate::utilities::CryptoParametersRecord;
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::sync::OnceLock;
|
||||
use tfhe::core_crypto::prelude::{DynamicDistribution, LweBskGroupingFactor};
|
||||
use tfhe::keycache::NamedParam;
|
||||
use tfhe::shortint::{
|
||||
CarryModulus, ClassicPBSParameters, MessageModulus, MultiBitPBSParameters, PBSParameters,
|
||||
};
|
||||
|
||||
pub const SHORTINT_BENCH_PARAMS_TUNIFORM: [ClassicPBSParameters; 4] = [
|
||||
BENCH_PARAM_MESSAGE_1_CARRY_1_KS_PBS_TUNIFORM_2M128,
|
||||
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
BENCH_PARAM_MESSAGE_3_CARRY_3_KS_PBS_TUNIFORM_2M128,
|
||||
BENCH_PARAM_MESSAGE_4_CARRY_4_KS_PBS_TUNIFORM_2M128,
|
||||
];
|
||||
|
||||
pub const SHORTINT_BENCH_PARAMS_GAUSSIAN: [ClassicPBSParameters; 4] = [
|
||||
BENCH_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
BENCH_PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128,
|
||||
BENCH_PARAM_MESSAGE_4_CARRY_4_KS_PBS_GAUSSIAN_2M128,
|
||||
];
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
pub const SHORTINT_MULTI_BIT_BENCH_PARAMS: [MultiBitPBSParameters; 6] = [
|
||||
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_1_CARRY_1_KS_PBS_TUNIFORM_2M128,
|
||||
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_3_CARRY_3_KS_PBS_TUNIFORM_2M128,
|
||||
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128,
|
||||
];
|
||||
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
pub const SHORTINT_MULTI_BIT_BENCH_PARAMS: [MultiBitPBSParameters; 6] = [
|
||||
BENCH_PARAM_MULTI_BIT_GROUP_2_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
BENCH_PARAM_MULTI_BIT_GROUP_2_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
BENCH_PARAM_MULTI_BIT_GROUP_2_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128,
|
||||
BENCH_PARAM_MULTI_BIT_GROUP_3_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
BENCH_PARAM_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
BENCH_PARAM_MULTI_BIT_GROUP_3_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128,
|
||||
];
|
||||
|
||||
pub fn benchmark_parameters() -> Vec<(String, CryptoParametersRecord<u64>)> {
|
||||
match get_parameters_set() {
|
||||
ParametersSet::Default => SHORTINT_BENCH_PARAMS_TUNIFORM
|
||||
.iter()
|
||||
.chain(SHORTINT_BENCH_PARAMS_GAUSSIAN.iter())
|
||||
.map(|params| {
|
||||
(
|
||||
params.name(),
|
||||
<ClassicPBSParameters as Into<PBSParameters>>::into(*params)
|
||||
.to_owned()
|
||||
.into(),
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
ParametersSet::All => {
|
||||
filter_parameters(
|
||||
&BENCH_ALL_CLASSIC_PBS_PARAMETERS,
|
||||
DesiredNoiseDistribution::Both,
|
||||
DesiredBackend::Cpu, // No parameters set are specific to GPU in this vector
|
||||
)
|
||||
.into_iter()
|
||||
.map(|(params, name)| {
|
||||
(
|
||||
name.to_string(),
|
||||
<ClassicPBSParameters as Into<PBSParameters>>::into(*params)
|
||||
.to_owned()
|
||||
.into(),
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn multi_bit_benchmark_parameters() -> Vec<(String, CryptoParametersRecord<u64>)> {
|
||||
match get_parameters_set() {
|
||||
ParametersSet::Default => SHORTINT_MULTI_BIT_BENCH_PARAMS
|
||||
.iter()
|
||||
.map(|params| {
|
||||
(
|
||||
params.name(),
|
||||
<MultiBitPBSParameters as Into<PBSParameters>>::into(*params)
|
||||
.to_owned()
|
||||
.into(),
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
ParametersSet::All => {
|
||||
let desired_backend = if cfg!(feature = "gpu") {
|
||||
DesiredBackend::Gpu
|
||||
} else {
|
||||
DesiredBackend::Cpu
|
||||
};
|
||||
filter_parameters(
|
||||
&BENCH_ALL_MULTI_BIT_PBS_PARAMETERS,
|
||||
DesiredNoiseDistribution::Both,
|
||||
desired_backend,
|
||||
)
|
||||
.into_iter()
|
||||
.map(|(params, name)| {
|
||||
(
|
||||
name.to_string(),
|
||||
<MultiBitPBSParameters as Into<PBSParameters>>::into(*params)
|
||||
.to_owned()
|
||||
.into(),
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn multi_bit_benchmark_parameters_with_grouping(
|
||||
) -> Vec<(String, CryptoParametersRecord<u64>, LweBskGroupingFactor)> {
|
||||
match get_parameters_set() {
|
||||
ParametersSet::Default => SHORTINT_MULTI_BIT_BENCH_PARAMS
|
||||
.iter()
|
||||
.map(|params| {
|
||||
(
|
||||
params.name(),
|
||||
<MultiBitPBSParameters as Into<PBSParameters>>::into(*params)
|
||||
.to_owned()
|
||||
.into(),
|
||||
params.grouping_factor,
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
ParametersSet::All => {
|
||||
let desired_backend = if cfg!(feature = "gpu") {
|
||||
DesiredBackend::Gpu
|
||||
} else {
|
||||
DesiredBackend::Cpu
|
||||
};
|
||||
filter_parameters(
|
||||
&BENCH_ALL_MULTI_BIT_PBS_PARAMETERS,
|
||||
DesiredNoiseDistribution::Both,
|
||||
desired_backend,
|
||||
)
|
||||
.into_iter()
|
||||
.map(|(params, name)| {
|
||||
(
|
||||
name.to_string(),
|
||||
<MultiBitPBSParameters as Into<PBSParameters>>::into(*params)
|
||||
.to_owned()
|
||||
.into(),
|
||||
params.grouping_factor,
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn raw_benchmark_parameters() -> Vec<PBSParameters> {
|
||||
let is_multi_bit = match env::var("__TFHE_RS_PARAM_TYPE") {
|
||||
Ok(val) => val.to_lowercase() == "multi_bit",
|
||||
Err(_) => false,
|
||||
};
|
||||
|
||||
if is_multi_bit {
|
||||
SHORTINT_MULTI_BIT_BENCH_PARAMS
|
||||
.iter()
|
||||
.map(|p| (*p).into())
|
||||
.collect()
|
||||
} else {
|
||||
SHORTINT_BENCH_PARAMS_TUNIFORM
|
||||
.iter()
|
||||
.chain(SHORTINT_BENCH_PARAMS_GAUSSIAN.iter())
|
||||
.map(|p| (*p).into())
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn benchmark_compression_parameters() -> Vec<(String, CryptoParametersRecord<u64>)> {
|
||||
vec![(
|
||||
BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128.name(),
|
||||
(
|
||||
BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
)
|
||||
.into(),
|
||||
)]
|
||||
}
|
||||
|
||||
// This array has been built according to performance benchmarks measuring latency over a
|
||||
// matrix of 4 parameters set, 3 grouping factor and a wide range of threads values.
|
||||
// The values available here as u64 are the optimal number of threads to use for a given triplet
|
||||
// representing one or more parameters set.
|
||||
const MULTI_BIT_THREADS_ARRAY: [((MessageModulus, CarryModulus, LweBskGroupingFactor), u64);
|
||||
12] = [
|
||||
(
|
||||
(MessageModulus(2), CarryModulus(2), LweBskGroupingFactor(2)),
|
||||
5,
|
||||
),
|
||||
(
|
||||
(MessageModulus(4), CarryModulus(4), LweBskGroupingFactor(2)),
|
||||
5,
|
||||
),
|
||||
(
|
||||
(MessageModulus(8), CarryModulus(8), LweBskGroupingFactor(2)),
|
||||
5,
|
||||
),
|
||||
(
|
||||
(
|
||||
MessageModulus(16),
|
||||
CarryModulus(16),
|
||||
LweBskGroupingFactor(2),
|
||||
),
|
||||
5,
|
||||
),
|
||||
(
|
||||
(MessageModulus(2), CarryModulus(2), LweBskGroupingFactor(3)),
|
||||
7,
|
||||
),
|
||||
(
|
||||
(MessageModulus(4), CarryModulus(4), LweBskGroupingFactor(3)),
|
||||
9,
|
||||
),
|
||||
(
|
||||
(MessageModulus(8), CarryModulus(8), LweBskGroupingFactor(3)),
|
||||
10,
|
||||
),
|
||||
(
|
||||
(
|
||||
MessageModulus(16),
|
||||
CarryModulus(16),
|
||||
LweBskGroupingFactor(3),
|
||||
),
|
||||
10,
|
||||
),
|
||||
(
|
||||
(MessageModulus(2), CarryModulus(2), LweBskGroupingFactor(4)),
|
||||
11,
|
||||
),
|
||||
(
|
||||
(MessageModulus(4), CarryModulus(4), LweBskGroupingFactor(4)),
|
||||
13,
|
||||
),
|
||||
(
|
||||
(MessageModulus(8), CarryModulus(8), LweBskGroupingFactor(4)),
|
||||
11,
|
||||
),
|
||||
(
|
||||
(
|
||||
MessageModulus(16),
|
||||
CarryModulus(16),
|
||||
LweBskGroupingFactor(4),
|
||||
),
|
||||
11,
|
||||
),
|
||||
];
|
||||
|
||||
/// Define the number of threads to use for parameters doing multithreaded programmable
|
||||
/// bootstrapping.
|
||||
///
|
||||
/// Parameters must have the same values between message and carry modulus.
|
||||
/// Grouping factor 2, 3 and 4 are the only ones that are supported.
|
||||
pub fn multi_bit_num_threads(
|
||||
message_modulus: u64,
|
||||
carry_modulus: u64,
|
||||
grouping_factor: usize,
|
||||
) -> Option<u64> {
|
||||
// TODO Implement an interpolation mechanism for X_Y parameters set
|
||||
if message_modulus != carry_modulus || ![2, 3, 4].contains(&(grouping_factor as i32)) {
|
||||
return None;
|
||||
}
|
||||
let thread_map: HashMap<(MessageModulus, CarryModulus, LweBskGroupingFactor), u64> =
|
||||
HashMap::from_iter(MULTI_BIT_THREADS_ARRAY);
|
||||
thread_map
|
||||
.get(&(
|
||||
MessageModulus(message_modulus),
|
||||
CarryModulus(carry_modulus),
|
||||
LweBskGroupingFactor(grouping_factor),
|
||||
))
|
||||
.copied()
|
||||
}
|
||||
|
||||
pub static PARAMETERS_SET: OnceLock<ParametersSet> = OnceLock::new();
|
||||
|
||||
pub enum ParametersSet {
|
||||
Default,
|
||||
All,
|
||||
}
|
||||
|
||||
impl ParametersSet {
|
||||
pub fn from_env() -> Result<Self, String> {
|
||||
let raw_value = env::var("__TFHE_RS_PARAMS_SET").unwrap_or("default".to_string());
|
||||
match raw_value.to_lowercase().as_str() {
|
||||
"default" => Ok(ParametersSet::Default),
|
||||
"all" => Ok(ParametersSet::All),
|
||||
_ => Err(format!("parameters set '{raw_value}' is not supported")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_parameters_set() -> &'static ParametersSet {
|
||||
PARAMETERS_SET.get_or_init(|| ParametersSet::from_env().unwrap())
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum DesiredNoiseDistribution {
|
||||
Gaussian,
|
||||
TUniform,
|
||||
Both,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum DesiredBackend {
|
||||
Cpu,
|
||||
Gpu,
|
||||
}
|
||||
|
||||
impl DesiredBackend {
|
||||
fn matches_parameter_name_backend(&self, param_name: &str) -> bool {
|
||||
matches!(
|
||||
(self, param_name.to_lowercase().contains("gpu")),
|
||||
(DesiredBackend::Cpu, false) | (DesiredBackend::Gpu, true)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn filter_parameters<'a, P: Copy + Into<PBSParameters>>(
|
||||
params: &[(&'a P, &'a str)],
|
||||
desired_noise_distribution: DesiredNoiseDistribution,
|
||||
desired_backend: DesiredBackend,
|
||||
) -> Vec<(&'a P, &'a str)> {
|
||||
params
|
||||
.iter()
|
||||
.filter_map(|(p, name)| {
|
||||
let temp_param: PBSParameters = (**p).into();
|
||||
|
||||
match (
|
||||
temp_param.lwe_noise_distribution(),
|
||||
desired_noise_distribution,
|
||||
) {
|
||||
// If it's one of the pairs, we continue the process.
|
||||
(DynamicDistribution::Gaussian(_), DesiredNoiseDistribution::Gaussian)
|
||||
| (DynamicDistribution::TUniform(_), DesiredNoiseDistribution::TUniform)
|
||||
| (_, DesiredNoiseDistribution::Both) => (),
|
||||
_ => return None,
|
||||
}
|
||||
|
||||
if !desired_backend.matches_parameter_name_backend(name) {
|
||||
return None;
|
||||
};
|
||||
|
||||
Some((*p, *name))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "shortint")]
|
||||
pub use shortint_params::*;
|
||||
|
||||
#[cfg(feature = "integer")]
|
||||
mod integer_params {
|
||||
use crate::params_aliases::*;
|
||||
use crate::utilities::EnvConfig;
|
||||
use itertools::iproduct;
|
||||
use std::vec::IntoIter;
|
||||
use tfhe::shortint::PBSParameters;
|
||||
|
||||
/// An iterator that yields a succession of combinations
|
||||
/// of parameters and a num_block to achieve a certain bit_size ciphertext
|
||||
/// in radix decomposition
|
||||
pub struct ParamsAndNumBlocksIter {
|
||||
params_and_bit_sizes: itertools::Product<IntoIter<PBSParameters>, IntoIter<usize>>,
|
||||
}
|
||||
|
||||
impl Default for ParamsAndNumBlocksIter {
|
||||
fn default() -> Self {
|
||||
let env_config = EnvConfig::new();
|
||||
|
||||
if env_config.is_multi_bit {
|
||||
#[cfg(feature = "gpu")]
|
||||
let params = vec![
|
||||
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128
|
||||
.into(),
|
||||
];
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
let params = vec![
|
||||
BENCH_PARAM_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128.into(),
|
||||
];
|
||||
|
||||
let params_and_bit_sizes = iproduct!(params, env_config.bit_sizes());
|
||||
Self {
|
||||
params_and_bit_sizes,
|
||||
}
|
||||
} else {
|
||||
// FIXME One set of parameter is tested since we want to benchmark only quickest
|
||||
// operations.
|
||||
let params = vec![BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128.into()];
|
||||
|
||||
let params_and_bit_sizes = iproduct!(params, env_config.bit_sizes());
|
||||
Self {
|
||||
params_and_bit_sizes,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for ParamsAndNumBlocksIter {
|
||||
type Item = (PBSParameters, usize, usize);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let (param, bit_size) = self.params_and_bit_sizes.next()?;
|
||||
let num_block =
|
||||
(bit_size as f64 / (param.message_modulus().0 as f64).log(2.0)).ceil() as usize;
|
||||
|
||||
Some((param, num_block, bit_size))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "integer")]
|
||||
pub use integer_params::*;
|
||||
142
tfhe-benchmark/src/params_aliases.rs
Normal file
142
tfhe-benchmark/src/params_aliases.rs
Normal file
@@ -0,0 +1,142 @@
|
||||
#[cfg(any(feature = "shortint", feature = "integer"))]
|
||||
pub mod shortint_params_aliases {
|
||||
use tfhe::shortint::parameters::current_params::*;
|
||||
use tfhe::shortint::parameters::{
|
||||
ClassicPBSParameters, CompactPublicKeyEncryptionParameters, CompressionParameters,
|
||||
MultiBitPBSParameters, NoiseSquashingParameters, ShortintKeySwitchingParameters,
|
||||
};
|
||||
|
||||
// KS PBS Gaussian
|
||||
pub const BENCH_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128: ClassicPBSParameters =
|
||||
V1_2_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128;
|
||||
pub const BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128: ClassicPBSParameters =
|
||||
V1_2_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128;
|
||||
pub const BENCH_PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128: ClassicPBSParameters =
|
||||
V1_2_PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128;
|
||||
pub const BENCH_PARAM_MESSAGE_4_CARRY_4_KS_PBS_GAUSSIAN_2M128: ClassicPBSParameters =
|
||||
V1_2_PARAM_MESSAGE_4_CARRY_4_KS_PBS_GAUSSIAN_2M128;
|
||||
|
||||
// KS PBS TUniform
|
||||
pub const BENCH_PARAM_MESSAGE_1_CARRY_1_KS_PBS_TUNIFORM_2M128: ClassicPBSParameters =
|
||||
V1_2_PARAM_MESSAGE_1_CARRY_1_KS_PBS_TUNIFORM_2M128;
|
||||
pub const BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128: ClassicPBSParameters =
|
||||
V1_2_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
pub const BENCH_PARAM_MESSAGE_3_CARRY_3_KS_PBS_TUNIFORM_2M128: ClassicPBSParameters =
|
||||
V1_2_PARAM_MESSAGE_3_CARRY_3_KS_PBS_TUNIFORM_2M128;
|
||||
pub const BENCH_PARAM_MESSAGE_4_CARRY_4_KS_PBS_TUNIFORM_2M128: ClassicPBSParameters =
|
||||
V1_2_PARAM_MESSAGE_4_CARRY_4_KS_PBS_TUNIFORM_2M128;
|
||||
pub const BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS: ClassicPBSParameters =
|
||||
V1_2_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
|
||||
pub const BENCH_ALL_CLASSIC_PBS_PARAMETERS: [(&ClassicPBSParameters, &str); 140] =
|
||||
VEC_ALL_CLASSIC_PBS_PARAMETERS;
|
||||
|
||||
// MultiBit
|
||||
// CPU Gaussian
|
||||
pub const BENCH_PARAM_MULTI_BIT_GROUP_2_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128:
|
||||
MultiBitPBSParameters =
|
||||
V1_2_PARAM_MULTI_BIT_GROUP_2_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128;
|
||||
pub const BENCH_PARAM_MULTI_BIT_GROUP_2_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128:
|
||||
MultiBitPBSParameters =
|
||||
V1_2_PARAM_MULTI_BIT_GROUP_2_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128;
|
||||
pub const BENCH_PARAM_MULTI_BIT_GROUP_2_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128:
|
||||
MultiBitPBSParameters =
|
||||
V1_2_PARAM_MULTI_BIT_GROUP_2_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128;
|
||||
pub const BENCH_PARAM_MULTI_BIT_GROUP_3_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128:
|
||||
MultiBitPBSParameters =
|
||||
V1_2_PARAM_MULTI_BIT_GROUP_3_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128;
|
||||
pub const BENCH_PARAM_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128:
|
||||
MultiBitPBSParameters =
|
||||
V1_2_PARAM_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128;
|
||||
pub const BENCH_PARAM_MULTI_BIT_GROUP_3_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128:
|
||||
MultiBitPBSParameters =
|
||||
V1_2_PARAM_MULTI_BIT_GROUP_3_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128;
|
||||
|
||||
// GPU Gaussian
|
||||
pub const BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128:
|
||||
MultiBitPBSParameters =
|
||||
V1_2_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128;
|
||||
pub const BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128:
|
||||
MultiBitPBSParameters =
|
||||
V1_2_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128;
|
||||
pub const BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128:
|
||||
MultiBitPBSParameters =
|
||||
V1_2_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M128;
|
||||
|
||||
// GPU TUniform
|
||||
pub const BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_1_CARRY_1_KS_PBS_TUNIFORM_2M128:
|
||||
MultiBitPBSParameters =
|
||||
V1_2_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_1_CARRY_1_KS_PBS_TUNIFORM_2M128;
|
||||
pub const BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128:
|
||||
MultiBitPBSParameters =
|
||||
V1_2_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
pub const BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_3_CARRY_3_KS_PBS_TUNIFORM_2M128:
|
||||
MultiBitPBSParameters =
|
||||
V1_2_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_3_CARRY_3_KS_PBS_TUNIFORM_2M128;
|
||||
pub const BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_4_CARRY_4_KS_PBS_TUNIFORM_2M128:
|
||||
MultiBitPBSParameters =
|
||||
V1_2_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_4_CARRY_4_KS_PBS_TUNIFORM_2M128;
|
||||
|
||||
pub const BENCH_ALL_MULTI_BIT_PBS_PARAMETERS: [(&MultiBitPBSParameters, &str); 240] =
|
||||
VEC_ALL_MULTI_BIT_PBS_PARAMETERS;
|
||||
|
||||
// PKE
|
||||
pub const BENCH_PARAM_PKE_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128:
|
||||
CompactPublicKeyEncryptionParameters =
|
||||
V1_2_PARAM_PKE_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
pub const BENCH_PARAM_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1:
|
||||
CompactPublicKeyEncryptionParameters =
|
||||
V1_2_PARAM_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1;
|
||||
|
||||
// KS
|
||||
pub const BENCH_PARAM_KEYSWITCH_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128:
|
||||
ShortintKeySwitchingParameters =
|
||||
V1_2_PARAM_KEYSWITCH_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
pub const BENCH_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128:
|
||||
ShortintKeySwitchingParameters =
|
||||
V1_2_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128;
|
||||
pub const BENCH_PARAM_KEYSWITCH_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128:
|
||||
ShortintKeySwitchingParameters =
|
||||
V1_2_PARAM_KEYSWITCH_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
pub const BENCH_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128:
|
||||
ShortintKeySwitchingParameters =
|
||||
V1_2_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
|
||||
// ZKV1
|
||||
pub const BENCH_PARAM_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1:
|
||||
CompactPublicKeyEncryptionParameters =
|
||||
V1_2_PARAM_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1;
|
||||
pub const BENCH_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1:
|
||||
ShortintKeySwitchingParameters =
|
||||
V1_2_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1;
|
||||
pub const BENCH_PARAM_KEYSWITCH_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1:
|
||||
ShortintKeySwitchingParameters =
|
||||
V1_2_PARAM_KEYSWITCH_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1;
|
||||
|
||||
// ZKV2
|
||||
pub const BENCH_PARAM_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV2:
|
||||
CompactPublicKeyEncryptionParameters =
|
||||
V1_2_PARAM_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV2;
|
||||
pub const BENCH_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV2:
|
||||
ShortintKeySwitchingParameters =
|
||||
V1_2_PARAM_KEYSWITCH_PKE_TO_BIG_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV2;
|
||||
pub const BENCH_PARAM_KEYSWITCH_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV2:
|
||||
ShortintKeySwitchingParameters =
|
||||
V1_2_PARAM_KEYSWITCH_PKE_TO_SMALL_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV2;
|
||||
|
||||
// Compression
|
||||
pub const BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128: CompressionParameters =
|
||||
V1_2_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
|
||||
pub const BENCH_COMP_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128:
|
||||
CompressionParameters =
|
||||
V1_2_COMP_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
|
||||
// Noise Squashing
|
||||
pub const BENCH_NOISE_SQUASHING_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128:
|
||||
NoiseSquashingParameters =
|
||||
V1_2_NOISE_SQUASHING_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
}
|
||||
|
||||
#[cfg(any(feature = "shortint", feature = "integer"))]
|
||||
pub use shortint_params_aliases::*;
|
||||
650
tfhe-benchmark/src/utilities.rs
Normal file
650
tfhe-benchmark/src/utilities.rs
Normal file
@@ -0,0 +1,650 @@
|
||||
use serde::Serialize;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::OnceLock;
|
||||
use std::{env, fs};
|
||||
#[cfg(feature = "gpu")]
|
||||
use tfhe::core_crypto::gpu::get_number_of_gpus;
|
||||
use tfhe::core_crypto::prelude::*;
|
||||
|
||||
#[cfg(feature = "boolean")]
|
||||
pub mod boolean_utils {
|
||||
use super::*;
|
||||
use tfhe::boolean::parameters::BooleanParameters;
|
||||
|
||||
impl From<BooleanParameters> for CryptoParametersRecord<u32> {
|
||||
fn from(params: BooleanParameters) -> Self {
|
||||
CryptoParametersRecord {
|
||||
lwe_dimension: Some(params.lwe_dimension),
|
||||
glwe_dimension: Some(params.glwe_dimension),
|
||||
polynomial_size: Some(params.polynomial_size),
|
||||
lwe_noise_distribution: Some(params.lwe_noise_distribution),
|
||||
glwe_noise_distribution: Some(params.glwe_noise_distribution),
|
||||
pbs_base_log: Some(params.pbs_base_log),
|
||||
pbs_level: Some(params.pbs_level),
|
||||
ks_base_log: Some(params.ks_base_log),
|
||||
ks_level: Some(params.ks_level),
|
||||
ciphertext_modulus: Some(CiphertextModulus::<u32>::new_native()),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unused_imports)]
|
||||
#[cfg(feature = "boolean")]
|
||||
pub use boolean_utils::*;
|
||||
|
||||
#[cfg(feature = "shortint")]
|
||||
pub mod shortint_utils {
|
||||
use super::*;
|
||||
use tfhe::shortint::parameters::compact_public_key_only::CompactPublicKeyEncryptionParameters;
|
||||
use tfhe::shortint::parameters::list_compression::CompressionParameters;
|
||||
use tfhe::shortint::parameters::ShortintKeySwitchingParameters;
|
||||
use tfhe::shortint::{
|
||||
AtomicPatternParameters, ClassicPBSParameters, MultiBitPBSParameters, PBSParameters,
|
||||
ShortintParameterSet,
|
||||
};
|
||||
|
||||
impl From<PBSParameters> for CryptoParametersRecord<u64> {
|
||||
fn from(params: PBSParameters) -> Self {
|
||||
AtomicPatternParameters::from(params).into()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<AtomicPatternParameters> for CryptoParametersRecord<u64> {
|
||||
fn from(params: AtomicPatternParameters) -> Self {
|
||||
CryptoParametersRecord {
|
||||
lwe_dimension: Some(params.lwe_dimension()),
|
||||
glwe_dimension: Some(params.glwe_dimension()),
|
||||
polynomial_size: Some(params.polynomial_size()),
|
||||
lwe_noise_distribution: Some(params.lwe_noise_distribution()),
|
||||
glwe_noise_distribution: Some(params.glwe_noise_distribution()),
|
||||
pbs_base_log: Some(params.pbs_base_log()),
|
||||
pbs_level: Some(params.pbs_level()),
|
||||
ks_base_log: Some(params.ks_base_log()),
|
||||
ks_level: Some(params.ks_level()),
|
||||
message_modulus: Some(params.message_modulus().0),
|
||||
carry_modulus: Some(params.carry_modulus().0),
|
||||
ciphertext_modulus: Some(
|
||||
params
|
||||
.ciphertext_modulus()
|
||||
.try_to()
|
||||
.expect("failed to convert ciphertext modulus"),
|
||||
),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ShortintKeySwitchingParameters> for CryptoParametersRecord<u64> {
|
||||
fn from(params: ShortintKeySwitchingParameters) -> Self {
|
||||
CryptoParametersRecord {
|
||||
ks_base_log: Some(params.ks_base_log),
|
||||
ks_level: Some(params.ks_level),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CompactPublicKeyEncryptionParameters> for CryptoParametersRecord<u64> {
|
||||
fn from(params: CompactPublicKeyEncryptionParameters) -> Self {
|
||||
CryptoParametersRecord {
|
||||
message_modulus: Some(params.message_modulus.0),
|
||||
carry_modulus: Some(params.carry_modulus.0),
|
||||
ciphertext_modulus: Some(params.ciphertext_modulus),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<(CompressionParameters, ClassicPBSParameters)> for CryptoParametersRecord<u64> {
|
||||
fn from((comp_params, pbs_params): (CompressionParameters, ClassicPBSParameters)) -> Self {
|
||||
(comp_params, PBSParameters::PBS(pbs_params)).into()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<(CompressionParameters, MultiBitPBSParameters)> for CryptoParametersRecord<u64> {
|
||||
fn from(
|
||||
(comp_params, multi_bit_pbs_params): (CompressionParameters, MultiBitPBSParameters),
|
||||
) -> Self {
|
||||
(
|
||||
comp_params,
|
||||
PBSParameters::MultiBitPBS(multi_bit_pbs_params),
|
||||
)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<(CompressionParameters, PBSParameters)> for CryptoParametersRecord<u64> {
|
||||
fn from((comp_params, pbs_params): (CompressionParameters, PBSParameters)) -> Self {
|
||||
let pbs_params = ShortintParameterSet::new_pbs_param_set(pbs_params);
|
||||
let lwe_dimension = pbs_params.encryption_lwe_dimension();
|
||||
CryptoParametersRecord {
|
||||
lwe_dimension: Some(lwe_dimension),
|
||||
br_level: Some(comp_params.br_level),
|
||||
br_base_log: Some(comp_params.br_base_log),
|
||||
packing_ks_level: Some(comp_params.packing_ks_level),
|
||||
packing_ks_base_log: Some(comp_params.packing_ks_base_log),
|
||||
packing_ks_polynomial_size: Some(comp_params.packing_ks_polynomial_size),
|
||||
packing_ks_glwe_dimension: Some(comp_params.packing_ks_glwe_dimension),
|
||||
lwe_per_glwe: Some(comp_params.lwe_per_glwe),
|
||||
storage_log_modulus: Some(comp_params.storage_log_modulus),
|
||||
lwe_noise_distribution: Some(pbs_params.encryption_noise_distribution()),
|
||||
packing_ks_key_noise_distribution: Some(
|
||||
comp_params.packing_ks_key_noise_distribution,
|
||||
),
|
||||
ciphertext_modulus: Some(pbs_params.ciphertext_modulus()),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unused_imports)]
|
||||
#[cfg(feature = "shortint")]
|
||||
pub use shortint_utils::*;
|
||||
|
||||
#[derive(Clone, Copy, Default, Serialize)]
|
||||
pub struct CryptoParametersRecord<Scalar: UnsignedInteger> {
|
||||
pub lwe_dimension: Option<LweDimension>,
|
||||
pub glwe_dimension: Option<GlweDimension>,
|
||||
pub packing_ks_glwe_dimension: Option<GlweDimension>,
|
||||
pub polynomial_size: Option<PolynomialSize>,
|
||||
pub packing_ks_polynomial_size: Option<PolynomialSize>,
|
||||
#[serde(serialize_with = "CryptoParametersRecord::serialize_distribution")]
|
||||
pub lwe_noise_distribution: Option<DynamicDistribution<Scalar>>,
|
||||
#[serde(serialize_with = "CryptoParametersRecord::serialize_distribution")]
|
||||
pub glwe_noise_distribution: Option<DynamicDistribution<Scalar>>,
|
||||
#[serde(serialize_with = "CryptoParametersRecord::serialize_distribution")]
|
||||
pub packing_ks_key_noise_distribution: Option<DynamicDistribution<Scalar>>,
|
||||
pub pbs_base_log: Option<DecompositionBaseLog>,
|
||||
pub pbs_level: Option<DecompositionLevelCount>,
|
||||
pub ks_base_log: Option<DecompositionBaseLog>,
|
||||
pub ks_level: Option<DecompositionLevelCount>,
|
||||
pub pfks_level: Option<DecompositionLevelCount>,
|
||||
pub pfks_base_log: Option<DecompositionBaseLog>,
|
||||
pub pfks_std_dev: Option<StandardDev>,
|
||||
pub cbs_level: Option<DecompositionLevelCount>,
|
||||
pub cbs_base_log: Option<DecompositionBaseLog>,
|
||||
pub br_level: Option<DecompositionLevelCount>,
|
||||
pub br_base_log: Option<DecompositionBaseLog>,
|
||||
pub packing_ks_level: Option<DecompositionLevelCount>,
|
||||
pub packing_ks_base_log: Option<DecompositionBaseLog>,
|
||||
pub message_modulus: Option<u64>,
|
||||
pub carry_modulus: Option<u64>,
|
||||
pub ciphertext_modulus: Option<CiphertextModulus<Scalar>>,
|
||||
pub lwe_per_glwe: Option<LweCiphertextCount>,
|
||||
pub storage_log_modulus: Option<CiphertextModulusLog>,
|
||||
}
|
||||
|
||||
impl<Scalar: UnsignedInteger> CryptoParametersRecord<Scalar> {
|
||||
pub fn noise_distribution_as_string(noise_distribution: DynamicDistribution<Scalar>) -> String {
|
||||
match noise_distribution {
|
||||
DynamicDistribution::Gaussian(g) => format!("Gaussian({}, {})", g.std, g.mean),
|
||||
DynamicDistribution::TUniform(t) => format!("TUniform({})", t.bound_log2()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn serialize_distribution<S>(
|
||||
noise_distribution: &Option<DynamicDistribution<Scalar>>,
|
||||
serializer: S,
|
||||
) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
match noise_distribution {
|
||||
Some(d) => serializer.serialize_some(&Self::noise_distribution_as_string(*d)),
|
||||
None => serializer.serialize_none(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
enum PolynomialMultiplication {
|
||||
Fft,
|
||||
// Ntt,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
enum IntegerRepresentation {
|
||||
Radix,
|
||||
// Crt,
|
||||
// Hybrid,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
enum ExecutionType {
|
||||
Sequential,
|
||||
Parallel,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
enum KeySetType {
|
||||
Single,
|
||||
// Multi,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
enum OperandType {
|
||||
CipherText,
|
||||
PlainText,
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize)]
|
||||
pub enum OperatorType {
|
||||
Atomic,
|
||||
// AtomicPattern,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct BenchmarkParametersRecord<Scalar: UnsignedInteger> {
|
||||
display_name: String,
|
||||
crypto_parameters_alias: String,
|
||||
crypto_parameters: CryptoParametersRecord<Scalar>,
|
||||
message_modulus: Option<u64>,
|
||||
carry_modulus: Option<u64>,
|
||||
ciphertext_modulus: usize,
|
||||
bit_size: u32,
|
||||
polynomial_multiplication: PolynomialMultiplication,
|
||||
precision: u32,
|
||||
error_probability: f64,
|
||||
integer_representation: IntegerRepresentation,
|
||||
decomposition_basis: Vec<u32>,
|
||||
pbs_algorithm: Option<String>,
|
||||
execution_type: ExecutionType,
|
||||
key_set_type: KeySetType,
|
||||
operand_type: OperandType,
|
||||
operator_type: OperatorType,
|
||||
}
|
||||
|
||||
/// Writes benchmarks parameters to disk in JSON format.
|
||||
pub fn write_to_json<
|
||||
Scalar: UnsignedInteger + Serialize,
|
||||
T: Into<CryptoParametersRecord<Scalar>>,
|
||||
>(
|
||||
bench_id: &str,
|
||||
params: T,
|
||||
params_alias: impl Into<String>,
|
||||
display_name: impl Into<String>,
|
||||
operator_type: &OperatorType,
|
||||
bit_size: u32,
|
||||
decomposition_basis: Vec<u32>,
|
||||
) {
|
||||
let params = params.into();
|
||||
|
||||
let execution_type = match bench_id.contains("parallelized") {
|
||||
true => ExecutionType::Parallel,
|
||||
false => ExecutionType::Sequential,
|
||||
};
|
||||
let operand_type = match bench_id.contains("scalar") {
|
||||
true => OperandType::PlainText,
|
||||
false => OperandType::CipherText,
|
||||
};
|
||||
|
||||
let record = BenchmarkParametersRecord {
|
||||
display_name: display_name.into(),
|
||||
crypto_parameters_alias: params_alias.into(),
|
||||
crypto_parameters: params.to_owned(),
|
||||
message_modulus: params.message_modulus,
|
||||
carry_modulus: params.carry_modulus,
|
||||
ciphertext_modulus: 64,
|
||||
bit_size,
|
||||
polynomial_multiplication: PolynomialMultiplication::Fft,
|
||||
precision: (params.message_modulus.unwrap_or(2) as u32).ilog2(),
|
||||
error_probability: 2f64.powf(-41.0),
|
||||
integer_representation: IntegerRepresentation::Radix,
|
||||
decomposition_basis,
|
||||
pbs_algorithm: None, // To be added in future version
|
||||
execution_type,
|
||||
key_set_type: KeySetType::Single,
|
||||
operand_type,
|
||||
operator_type: operator_type.to_owned(),
|
||||
};
|
||||
|
||||
let mut params_directory = ["benchmarks_parameters", bench_id]
|
||||
.iter()
|
||||
.collect::<PathBuf>();
|
||||
fs::create_dir_all(¶ms_directory).unwrap();
|
||||
params_directory.push("parameters.json");
|
||||
|
||||
fs::write(params_directory, serde_json::to_string(&record).unwrap()).unwrap();
|
||||
}
|
||||
|
||||
const FAST_BENCH_BIT_SIZES: [usize; 1] = [64];
|
||||
const BENCH_BIT_SIZES: [usize; 8] = [4, 8, 16, 32, 40, 64, 128, 256];
|
||||
const MULTI_BIT_CPU_SIZES: [usize; 6] = [4, 8, 16, 32, 40, 64];
|
||||
|
||||
/// User configuration in which benchmarks must be run.
|
||||
#[derive(Default)]
|
||||
pub struct EnvConfig {
|
||||
pub is_multi_bit: bool,
|
||||
pub is_fast_bench: bool,
|
||||
}
|
||||
|
||||
impl EnvConfig {
|
||||
pub fn new() -> Self {
|
||||
let is_multi_bit = match env::var("__TFHE_RS_PARAM_TYPE") {
|
||||
Ok(val) => val.to_lowercase() == "multi_bit",
|
||||
Err(_) => false,
|
||||
};
|
||||
|
||||
let is_fast_bench = match env::var("__TFHE_RS_FAST_BENCH") {
|
||||
Ok(val) => val.to_lowercase() == "true",
|
||||
Err(_) => false,
|
||||
};
|
||||
|
||||
EnvConfig {
|
||||
is_multi_bit,
|
||||
is_fast_bench,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get precisions values to benchmark.
|
||||
pub fn bit_sizes(&self) -> Vec<usize> {
|
||||
if self.is_fast_bench {
|
||||
FAST_BENCH_BIT_SIZES.to_vec()
|
||||
} else if self.is_multi_bit {
|
||||
if cfg!(feature = "gpu") {
|
||||
BENCH_BIT_SIZES.to_vec()
|
||||
} else {
|
||||
MULTI_BIT_CPU_SIZES.to_vec()
|
||||
}
|
||||
} else {
|
||||
BENCH_BIT_SIZES.to_vec()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub static BENCH_TYPE: OnceLock<BenchmarkType> = OnceLock::new();
|
||||
|
||||
pub enum BenchmarkType {
|
||||
Latency,
|
||||
Throughput,
|
||||
}
|
||||
|
||||
impl BenchmarkType {
|
||||
pub fn from_env() -> Result<Self, String> {
|
||||
let raw_value = env::var("__TFHE_RS_BENCH_TYPE").unwrap_or("latency".to_string());
|
||||
match raw_value.to_lowercase().as_str() {
|
||||
"latency" => Ok(BenchmarkType::Latency),
|
||||
"throughput" => Ok(BenchmarkType::Throughput),
|
||||
_ => Err(format!("benchmark type '{raw_value}' is not supported")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_bench_type() -> &'static BenchmarkType {
|
||||
BENCH_TYPE.get_or_init(|| BenchmarkType::from_env().unwrap())
|
||||
}
|
||||
|
||||
/// Number of streaming multiprocessors (SM) available on Nvidia H100 GPU
|
||||
#[cfg(feature = "gpu")]
|
||||
const H100_PCIE_SM_COUNT: u32 = 114;
|
||||
|
||||
/// Generate a number of threads to use to saturate current machine for throughput measurements.
|
||||
pub fn throughput_num_threads(num_block: usize, op_pbs_count: u64) -> u64 {
|
||||
let ref_block_count = 32; // Represent a ciphertext of 64 bits for 2_2 parameters set
|
||||
let block_multiplicator = (ref_block_count as f64 / num_block as f64).ceil().min(1.0);
|
||||
// Some operations with a high serial workload (e.g. division) would yield an operation
|
||||
// loading value so low that the number of elements in the end wouldn't be meaningful.
|
||||
let minimum_loading = if num_block < 64 { 0.2 } else { 0.01 };
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
{
|
||||
let total_num_sm = H100_PCIE_SM_COUNT * get_number_of_gpus();
|
||||
let operation_loading = ((total_num_sm as u64 / op_pbs_count) as f64).max(minimum_loading);
|
||||
let elements = (total_num_sm as f64 * block_multiplicator * operation_loading) as u64;
|
||||
elements.min(1500) // This threshold is useful for operation with both a small number of
|
||||
// block and low PBs count.
|
||||
}
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
{
|
||||
let num_threads = rayon::current_num_threads() as f64;
|
||||
let operation_loading = (num_threads / (op_pbs_count as f64)).max(minimum_loading);
|
||||
// Add 20% more to maximum threads available.
|
||||
((num_threads + (num_threads * 0.2)) * block_multiplicator.min(1.0) * operation_loading)
|
||||
as u64
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
mod cuda_utils {
|
||||
use tfhe::core_crypto::entities::{
|
||||
LweBootstrapKeyOwned, LweKeyswitchKeyOwned, LweMultiBitBootstrapKeyOwned,
|
||||
LwePackingKeyswitchKeyOwned,
|
||||
};
|
||||
use tfhe::core_crypto::gpu::lwe_bootstrap_key::CudaLweBootstrapKey;
|
||||
use tfhe::core_crypto::gpu::lwe_keyswitch_key::CudaLweKeyswitchKey;
|
||||
use tfhe::core_crypto::gpu::lwe_multi_bit_bootstrap_key::CudaLweMultiBitBootstrapKey;
|
||||
use tfhe::core_crypto::gpu::lwe_packing_keyswitch_key::CudaLwePackingKeyswitchKey;
|
||||
use tfhe::core_crypto::gpu::vec::CudaVec;
|
||||
use tfhe::core_crypto::gpu::{get_number_of_gpus, CudaStreams};
|
||||
use tfhe::core_crypto::prelude::{Numeric, UnsignedInteger};
|
||||
use tfhe::shortint::server_key::ModulusSwitchNoiseReductionKey;
|
||||
use tfhe::{set_server_key, ClientKey, CompressedServerKey, GpuIndex};
|
||||
|
||||
pub const GPU_MAX_SUPPORTED_POLYNOMIAL_SIZE: usize = 16384;
|
||||
|
||||
/// Get vector of CUDA streams that can be directly used for throughput benchmarks in
|
||||
/// core_crypto layer.
|
||||
pub fn cuda_local_streams_core() -> Vec<CudaStreams> {
|
||||
(0..get_number_of_gpus())
|
||||
.map(|i| CudaStreams::new_single_gpu(GpuIndex::new(i)))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
/// Computing keys in their CPU flavor.
|
||||
pub struct CpuKeys<T: UnsignedInteger> {
|
||||
ksk: Option<LweKeyswitchKeyOwned<T>>,
|
||||
pksk: Option<LwePackingKeyswitchKeyOwned<T>>,
|
||||
bsk: Option<LweBootstrapKeyOwned<T>>,
|
||||
multi_bit_bsk: Option<LweMultiBitBootstrapKeyOwned<T>>,
|
||||
}
|
||||
|
||||
impl<T: UnsignedInteger> CpuKeys<T> {
|
||||
pub fn builder() -> CpuKeysBuilder<T> {
|
||||
CpuKeysBuilder::new()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CpuKeysBuilder<T: UnsignedInteger> {
|
||||
ksk: Option<LweKeyswitchKeyOwned<T>>,
|
||||
pksk: Option<LwePackingKeyswitchKeyOwned<T>>,
|
||||
bsk: Option<LweBootstrapKeyOwned<T>>,
|
||||
multi_bit_bsk: Option<LweMultiBitBootstrapKeyOwned<T>>,
|
||||
}
|
||||
|
||||
impl<T: UnsignedInteger> CpuKeysBuilder<T> {
|
||||
pub fn new() -> CpuKeysBuilder<T> {
|
||||
Self {
|
||||
ksk: None,
|
||||
pksk: None,
|
||||
bsk: None,
|
||||
multi_bit_bsk: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn keyswitch_key(mut self, ksk: LweKeyswitchKeyOwned<T>) -> CpuKeysBuilder<T> {
|
||||
self.ksk = Some(ksk);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn packing_keyswitch_key(
|
||||
mut self,
|
||||
pksk: LwePackingKeyswitchKeyOwned<T>,
|
||||
) -> CpuKeysBuilder<T> {
|
||||
self.pksk = Some(pksk);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn bootstrap_key(mut self, bsk: LweBootstrapKeyOwned<T>) -> CpuKeysBuilder<T> {
|
||||
self.bsk = Some(bsk);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn multi_bit_bootstrap_key(
|
||||
mut self,
|
||||
mb_bsk: LweMultiBitBootstrapKeyOwned<T>,
|
||||
) -> CpuKeysBuilder<T> {
|
||||
self.multi_bit_bsk = Some(mb_bsk);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn build(self) -> CpuKeys<T> {
|
||||
CpuKeys {
|
||||
ksk: self.ksk,
|
||||
pksk: self.pksk,
|
||||
bsk: self.bsk,
|
||||
multi_bit_bsk: self.multi_bit_bsk,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<T: UnsignedInteger> Default for CpuKeysBuilder<T> {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Computing keys in their Cuda flavor.
|
||||
#[allow(dead_code)]
|
||||
pub struct CudaLocalKeys<T: UnsignedInteger> {
|
||||
pub ksk: Option<CudaLweKeyswitchKey<T>>,
|
||||
pub pksk: Option<CudaLwePackingKeyswitchKey<T>>,
|
||||
pub bsk: Option<CudaLweBootstrapKey>,
|
||||
pub multi_bit_bsk: Option<CudaLweMultiBitBootstrapKey>,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl<T: UnsignedInteger> CudaLocalKeys<T> {
|
||||
pub fn from_cpu_keys(
|
||||
cpu_keys: &CpuKeys<T>,
|
||||
ms_noise_reduction_key: Option<&ModulusSwitchNoiseReductionKey<u64>>,
|
||||
stream: &CudaStreams,
|
||||
) -> Self {
|
||||
Self {
|
||||
ksk: cpu_keys
|
||||
.ksk
|
||||
.as_ref()
|
||||
.map(|ksk| CudaLweKeyswitchKey::from_lwe_keyswitch_key(ksk, stream)),
|
||||
pksk: cpu_keys.pksk.as_ref().map(|pksk| {
|
||||
CudaLwePackingKeyswitchKey::from_lwe_packing_keyswitch_key(pksk, stream)
|
||||
}),
|
||||
bsk: cpu_keys.bsk.as_ref().map(|bsk| {
|
||||
CudaLweBootstrapKey::from_lwe_bootstrap_key(bsk, ms_noise_reduction_key, stream)
|
||||
}),
|
||||
multi_bit_bsk: cpu_keys.multi_bit_bsk.as_ref().map(|mb_bsk| {
|
||||
CudaLweMultiBitBootstrapKey::from_lwe_multi_bit_bootstrap_key(mb_bsk, stream)
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Instantiate Cuda computing keys to each available GPU.
|
||||
pub fn cuda_local_keys_core<T: UnsignedInteger>(
|
||||
cpu_keys: &CpuKeys<T>,
|
||||
ms_noise_reduction_key: Option<&ModulusSwitchNoiseReductionKey<u64>>,
|
||||
) -> Vec<CudaLocalKeys<T>> {
|
||||
let gpu_count = get_number_of_gpus() as usize;
|
||||
let mut gpu_keys_vec = Vec::with_capacity(gpu_count);
|
||||
for i in 0..gpu_count {
|
||||
let stream = CudaStreams::new_single_gpu(GpuIndex::new(i as u32));
|
||||
gpu_keys_vec.push(CudaLocalKeys::from_cpu_keys(
|
||||
cpu_keys,
|
||||
ms_noise_reduction_key,
|
||||
&stream,
|
||||
));
|
||||
}
|
||||
gpu_keys_vec
|
||||
}
|
||||
|
||||
pub struct CudaIndexes<T: Numeric> {
|
||||
pub d_input: CudaVec<T>,
|
||||
pub d_output: CudaVec<T>,
|
||||
pub d_lut: CudaVec<T>,
|
||||
}
|
||||
|
||||
impl<T: Numeric> CudaIndexes<T> {
|
||||
pub fn new(indexes: &[T], stream: &CudaStreams, stream_index: u32) -> Self {
|
||||
let length = indexes.len();
|
||||
let mut d_input = unsafe { CudaVec::<T>::new_async(length, stream, stream_index) };
|
||||
let mut d_output = unsafe { CudaVec::<T>::new_async(length, stream, stream_index) };
|
||||
let mut d_lut = unsafe { CudaVec::<T>::new_async(length, stream, stream_index) };
|
||||
unsafe {
|
||||
d_input.copy_from_cpu_async(indexes.as_ref(), stream, stream_index);
|
||||
d_output.copy_from_cpu_async(indexes.as_ref(), stream, stream_index);
|
||||
d_lut.copy_from_cpu_async(indexes.as_ref(), stream, stream_index);
|
||||
}
|
||||
stream.synchronize();
|
||||
|
||||
Self {
|
||||
d_input,
|
||||
d_output,
|
||||
d_lut,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "integer")]
|
||||
pub mod cuda_integer_utils {
|
||||
use tfhe::core_crypto::gpu::{get_number_of_gpus, CudaStreams};
|
||||
use tfhe::integer::gpu::CudaServerKey;
|
||||
use tfhe::integer::ClientKey;
|
||||
use tfhe::GpuIndex;
|
||||
|
||||
/// Get number of streams usable for CUDA throughput benchmarks
|
||||
fn cuda_num_streams(num_block: usize) -> u64 {
|
||||
let num_streams_per_gpu: u32 = match num_block {
|
||||
2 => 64,
|
||||
4 => 32,
|
||||
8 => 16,
|
||||
16 => 8,
|
||||
32 => 4,
|
||||
64 => 2,
|
||||
128 => 1,
|
||||
_ => 8,
|
||||
};
|
||||
(num_streams_per_gpu * get_number_of_gpus()) as u64
|
||||
}
|
||||
|
||||
/// Get vector of CUDA streams that can be directly used for throughput benchmarks.
|
||||
pub fn cuda_local_streams(
|
||||
num_block: usize,
|
||||
throughput_elements: usize,
|
||||
) -> Vec<CudaStreams> {
|
||||
(0..cuda_num_streams(num_block))
|
||||
.map(|i| {
|
||||
CudaStreams::new_single_gpu(GpuIndex::new(
|
||||
(i % get_number_of_gpus() as u64) as u32,
|
||||
))
|
||||
})
|
||||
.cycle()
|
||||
.take(throughput_elements)
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
/// Instantiate Cuda server key to each available GPU.
|
||||
pub fn cuda_local_keys(cks: &ClientKey) -> Vec<CudaServerKey> {
|
||||
let gpu_count = get_number_of_gpus() as usize;
|
||||
let mut gpu_sks_vec = Vec::with_capacity(gpu_count);
|
||||
for i in 0..gpu_count {
|
||||
let stream = CudaStreams::new_single_gpu(GpuIndex::new(i as u32));
|
||||
gpu_sks_vec.push(CudaServerKey::new(cks, &stream));
|
||||
}
|
||||
gpu_sks_vec
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn configure_gpu(client_key: &ClientKey) {
|
||||
let compressed_sks = CompressedServerKey::new(client_key);
|
||||
let sks = compressed_sks.decompress_to_gpu();
|
||||
rayon::broadcast(|_| set_server_key(sks.clone()));
|
||||
set_server_key(sks);
|
||||
}
|
||||
#[allow(unused_imports)]
|
||||
#[cfg(feature = "integer")]
|
||||
pub use cuda_integer_utils::*;
|
||||
}
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
pub use cuda_utils::*;
|
||||
Reference in New Issue
Block a user