WIP: set up to measure small subset of benchmarks

Execute: `cargo update rayon@1.11.0 --precise 1.10.0 && cargo update rayon-core@1.13.0 --precise 1.12.1 && cargo update half@2.6.0 --precise 2.4.0`
2026-01-10 07:08:03 -05:00 · 2025-08-22 14:57:32 +02:00
parent 5ede4d6b0c
commit ea43180e42
11 changed files with 563 additions and 68 deletions
--- a/34
+++ b/34
@@ -10,6 +10,23 @@ MIN_RUST_VERSION:=1.65
 AVX512_SUPPORT?=OFF
 WASM_RUSTFLAGS:=
 BIG_TESTS_INSTANCE?=FALSE
+GEN_KEY_CACHE_MULTI_BIT_ONLY?=FALSE
+PARSE_INTEGER_BENCH_CSV_FILE?=tfhe_rs_integer_benches.csv
+FAST_TESTS?=FALSE
+FAST_BENCH?=FALSE
+BENCH_OP_FLAVOR?=DEFAULT
+BENCH_TYPE?=latency
+NODE_VERSION=20
+# sed: -n, do not print input stream, -e means a script/expression
+# 1,/version/ indicates from the first line, to the line matching version at the start of the line
+# p indicates to print, so we keep only the start of the Cargo.toml until we hit the first version
+# entry which should be the version of tfhe
+TFHE_CURRENT_VERSION:=\
+$(shell sed -n -e '1,/^version/p' tfhe/Cargo.toml | \
+grep '^version[[:space:]]*=' | cut -d '=' -f 2 | xargs)
+# Cargo has a hard time distinguishing between our package from the workspace and a package that
+# could be a dependency, so we build an unambiguous spec here
+TFHE_SPEC:=tfhe@$(TFHE_CURRENT_VERSION)
 # This is done to avoid forgetting it, we still precise the RUSTFLAGS in the commands to be able to
 # copy paste the command in the terminal and change them if required without forgetting the flags
 export RUSTFLAGS?=-C target-cpu=native
@@ -279,6 +296,23 @@ test_nodejs_wasm_api: build_node_js_api
 no_tfhe_typo:
 	@./scripts/no_tfhe_typo.sh

+#
+# Benchmarks
+#
+
+.PHONY: bench_integer # Run benchmarks for integer
+bench_integer: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench integer-bench \
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p tfhe --
+
+.PHONY: bench_integer_multi_bit # Run benchmarks for integer using multi-bit parameters
+bench_integer_multi_bit: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench integer-bench \
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p tfhe --
+
 .PHONY: bench_shortint # Run benchmarks for shortint
 bench_shortint: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--- a/tfhe/Cargo.toml
+++ b/tfhe/Cargo.toml
@@ -37,8 +37,8 @@ concrete-csprng = { version = "0.3.0", features = [
    "parallel",
 ] }
 lazy_static = { version = "1.4.0", optional = true }
-serde = { version = "1.0", features = ["derive"] }
-rayon = { version = "1.5.0" }
+serde = { version = "=1.0.189", features = ["derive"] }
+rayon = { version = "=1.7.0" }
 bincode = { version = "1.3.3", optional = true }
 concrete-fft = { version = "0.2.1", features = ["serde"] }
 aligned-vec = { version = "0.5", features = ["serde"] }
--- a/tfhe/benches/integer/bench.rs
+++ b/tfhe/benches/integer/bench.rs
@@ -1,13 +1,20 @@
 #![allow(dead_code)]

-use criterion::{criterion_group, criterion_main, Criterion};
+#[path = "../utilities.rs"]
+mod utilities;
+
+use crate::utilities::{throughput_num_threads, BenchmarkType, BENCH_TYPE};
+use criterion::{criterion_group, Criterion, Throughput};
 use itertools::iproduct;
-use rand::Rng;
-use std::array::IntoIter;
+use rand::prelude::*;
+use rayon::prelude::*;
+use std::env;
+use std::vec::IntoIter;
 use tfhe::integer::keycache::KEY_CACHE;
 use tfhe::integer::{RadixCiphertextBig, ServerKey};
 use tfhe::shortint::keycache::NamedParam;

+use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64;
 #[allow(unused_imports)]
 use tfhe::shortint::parameters::{
    PARAM_MESSAGE_1_CARRY_1, PARAM_MESSAGE_2_CARRY_2, PARAM_MESSAGE_3_CARRY_3,
@@ -18,22 +25,37 @@ use tfhe::shortint::parameters::{
 /// of parameters and a num_block to achieve a certain bit_size ciphertext
 /// in radix decomposition
 struct ParamsAndNumBlocksIter {
-    params_and_bit_sizes:
-        itertools::Product<IntoIter<tfhe::shortint::Parameters, 3>, IntoIter<usize, 7>>,
+    params_and_bit_sizes: itertools::Product<IntoIter<tfhe::shortint::Parameters>, IntoIter<usize>>,
 }

 impl Default for ParamsAndNumBlocksIter {
    fn default() -> Self {
-        const PARAMS: [tfhe::shortint::Parameters; 3] = [
-            PARAM_MESSAGE_2_CARRY_2,
-            PARAM_MESSAGE_3_CARRY_3,
-            PARAM_MESSAGE_4_CARRY_4,
+        let is_multi_bit = match env::var("__TFHE_RS_BENCH_TYPE") {
+            Ok(val) => val.to_lowercase() == "multi_bit",
+            Err(_) => false,
+        };
+
+        // if is_multi_bit {
+        //     let params = vec![PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_2_KS_PBS.into()];
+        //     let bit_sizes = vec![8, 16, 32, 40, 64];
+        //     let params_and_bit_sizes = iproduct!(params, bit_sizes);
+        //     Self {
+        //         params_and_bit_sizes,
+        //     }
+        // } else {
+        // FIXME One set of parameter is tested since we want to benchmark only quickest
+        // operations.
+        let params = vec![
+            PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64.into(),
+            // PARAM_MESSAGE_3_CARRY_3_KS_PBS.into(),
+            // PARAM_MESSAGE_4_CARRY_4_KS_PBS.into(),
        ];
-        const BIT_SIZES: [usize; 7] = [8, 16, 32, 40, 64, 128, 256];
-        let params_and_bit_sizes = iproduct!(PARAMS, BIT_SIZES);
+        let bit_sizes = vec![64];
+        let params_and_bit_sizes = iproduct!(params, bit_sizes);
        Self {
            params_and_bit_sizes,
        }
+        // }
    }
 }
 impl Iterator for ParamsAndNumBlocksIter {
@@ -51,7 +73,7 @@ impl Iterator for ParamsAndNumBlocksIter {
 /// Base function to bench a server key function that is a binary operation
 fn bench_server_key_binary_function<F>(c: &mut Criterion, bench_name: &str, binary_op: F)
 where
-    F: Fn(&ServerKey, &mut RadixCiphertextBig, &mut RadixCiphertextBig),
+    F: Fn(&ServerKey, &mut RadixCiphertextBig, &mut RadixCiphertextBig) + Sync,
 {
    let mut bench_group = c.benchmark_group(bench_name);
    bench_group
@@ -62,46 +84,119 @@ where
    for (param, num_block, bit_size) in ParamsAndNumBlocksIter::default() {
        let param_name = param.name();

-        let bench_id = format!("{param_name}/{bit_size}_bits");
-        bench_group.bench_function(&bench_id, |b| {
-            let (cks, sks) = KEY_CACHE.get_from_params(param);
+        let bench_id;

-            let encrypt_two_values = || {
-                let clearlow = rng.gen::<u128>();
-                let clearhigh = rng.gen::<u128>();
-                let clear_0 = tfhe::integer::U256::from((clearlow, clearhigh));
-                let mut ct_0 = cks.encrypt_radix(clear_0, num_block);
+        match BENCH_TYPE.get().unwrap() {
+            BenchmarkType::Latency => {
+                bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
+                bench_group.bench_function(&bench_id, |b| {
+                    let (cks, sks) = KEY_CACHE.get_from_params(param);

-                let clearlow = rng.gen::<u128>();
-                let clearhigh = rng.gen::<u128>();
-                let clear_1 = tfhe::integer::U256::from((clearlow, clearhigh));
-                let mut ct_1 = cks.encrypt_radix(clear_1, num_block);
+                    let encrypt_two_values = || {
+                        let clearlow = rng.gen::<u128>();
+                        let clearhigh = rng.gen::<u128>();
+                        let clear_0 = tfhe::integer::U256::from((clearlow, clearhigh));
+                        let mut ct_0 = cks.encrypt_radix(clear_0, num_block);

-                // Raise the degree, so as to ensure worst case path in operations
-                let mut carry_mod = param.carry_modulus.0;
-                while carry_mod > 0 {
-                    // Raise the degree, so as to ensure worst case path in operations
-                    let clearlow = rng.gen::<u128>();
-                    let clearhigh = rng.gen::<u128>();
-                    let clear_2 = tfhe::integer::U256::from((clearlow, clearhigh));
-                    let ct_2 = cks.encrypt_radix(clear_2, num_block);
-                    sks.unchecked_add_assign(&mut ct_0, &ct_2);
-                    sks.unchecked_add_assign(&mut ct_1, &ct_2);
+                        let clearlow = rng.gen::<u128>();
+                        let clearhigh = rng.gen::<u128>();
+                        let clear_1 = tfhe::integer::U256::from((clearlow, clearhigh));
+                        let mut ct_1 = cks.encrypt_radix(clear_1, num_block);

-                    carry_mod -= 1;
-                }
+                        // Raise the degree, so as to ensure worst case path in operations
+                        let mut carry_mod = param.carry_modulus.0;
+                        while carry_mod > 0 {
+                            // Raise the degree, so as to ensure worst case path in operations
+                            let clearlow = rng.gen::<u128>();
+                            let clearhigh = rng.gen::<u128>();
+                            let clear_2 = tfhe::integer::U256::from((clearlow, clearhigh));
+                            let ct_2 = cks.encrypt_radix(clear_2, num_block);
+                            sks.unchecked_add_assign(&mut ct_0, &ct_2);
+                            sks.unchecked_add_assign(&mut ct_1, &ct_2);

-                (ct_0, ct_1)
-            };
+                            carry_mod -= 1;
+                        }

-            b.iter_batched(
-                encrypt_two_values,
-                |(mut ct_0, mut ct_1)| {
-                    binary_op(&sks, &mut ct_0, &mut ct_1);
-                },
-                criterion::BatchSize::SmallInput,
-            )
-        });
+                        (ct_0, ct_1)
+                    };
+
+                    b.iter_batched(
+                        encrypt_two_values,
+                        |(mut ct_0, mut ct_1)| {
+                            binary_op(&sks, &mut ct_0, &mut ct_1);
+                        },
+                        criterion::BatchSize::SmallInput,
+                    )
+                });
+            }
+            BenchmarkType::Throughput => {
+                bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
+                bench_group
+                    .sample_size(10)
+                    .measurement_time(std::time::Duration::from_secs(30));
+                let elements = throughput_num_threads(num_block);
+                bench_group.throughput(Throughput::Elements(elements));
+                bench_group.bench_function(&bench_id, |b| {
+                    let (cks, sks) = KEY_CACHE.get_from_params(param);
+
+                    let mut cts_0 = (0..elements)
+                        .map(|_| {
+                            let clearlow = rng.gen::<u128>();
+                            let clearhigh = rng.gen::<u128>();
+                            let clear_0 = tfhe::integer::U256::from((clearlow, clearhigh));
+                            let mut ct_0 = cks.encrypt_radix(clear_0, num_block);
+
+                            // Raise the degree, so as to ensure worst case path in operations
+                            let mut carry_mod = param.carry_modulus.0;
+                            while carry_mod > 0 {
+                                // Raise the degree, so as to ensure worst case path in operations
+                                let clearlow = rng.gen::<u128>();
+                                let clearhigh = rng.gen::<u128>();
+                                let clear_2 = tfhe::integer::U256::from((clearlow, clearhigh));
+                                let ct_2 = cks.encrypt_radix(clear_2, num_block);
+                                sks.unchecked_add_assign(&mut ct_0, &ct_2);
+
+                                carry_mod -= 1;
+                            }
+
+                            ct_0
+                        })
+                        .collect::<Vec<_>>();
+                    let mut cts_1 = (0..elements)
+                        .map(|_| {
+                            let clearlow = rng.gen::<u128>();
+                            let clearhigh = rng.gen::<u128>();
+                            let clear_1 = tfhe::integer::U256::from((clearlow, clearhigh));
+                            let mut ct_1 = cks.encrypt_radix(clear_1, num_block);
+
+                            // Raise the degree, so as to ensure worst case path in operations
+                            let mut carry_mod = param.carry_modulus.0;
+                            while carry_mod > 0 {
+                                // Raise the degree, so as to ensure worst case path in operations
+                                let clearlow = rng.gen::<u128>();
+                                let clearhigh = rng.gen::<u128>();
+                                let clear_2 = tfhe::integer::U256::from((clearlow, clearhigh));
+                                let ct_2 = cks.encrypt_radix(clear_2, num_block);
+                                sks.unchecked_add_assign(&mut ct_1, &ct_2);
+
+                                carry_mod -= 1;
+                            }
+
+                            ct_1
+                        })
+                        .collect::<Vec<_>>();
+
+                    b.iter(|| {
+                        cts_0
+                            .par_iter_mut()
+                            .zip(cts_1.par_iter_mut())
+                            .for_each(|(ct_0, ct_1)| {
+                                binary_op(&sks, ct_0, ct_1);
+                            })
+                    })
+                });
+            }
+        }
    }

    bench_group.finish()
@@ -332,18 +427,18 @@ criterion_group!(
 criterion_group!(
    smart_arithmetic_parallelized_operation,
    smart_add_parallelized,
-    smart_sub_parallelized,
+    // smart_sub_parallelized,
    smart_mul_parallelized,
-    smart_bitand_parallelized,
-    smart_bitor_parallelized,
-    smart_bitxor_parallelized,
-    smart_max_parallelized,
-    smart_min_parallelized,
-    smart_eq_parallelized,
-    smart_lt_parallelized,
-    smart_le_parallelized,
+    // smart_bitand_parallelized,
+    // smart_bitor_parallelized,
+    // smart_bitxor_parallelized,
+    // smart_max_parallelized,
+    // smart_min_parallelized,
+    // smart_eq_parallelized,
+    // smart_lt_parallelized,
+    // smart_le_parallelized,
    smart_gt_parallelized,
-    smart_ge_parallelized,
+    // smart_ge_parallelized,
 );

 criterion_group!(
@@ -396,12 +491,10 @@ criterion_group!(

 criterion_group!(misc, full_propagate, full_propagate_parallelized);

-criterion_main!(
-    smart_arithmetic_operation,
-    smart_arithmetic_parallelized_operation,
-    smart_scalar_arithmetic_operation,
-    smart_scalar_arithmetic_parallel_operation,
-    unchecked_arithmetic_operation,
-    unchecked_scalar_arithmetic_operation,
-    misc,
-);
+fn main() {
+    BENCH_TYPE.get_or_init(|| BenchmarkType::from_env().unwrap());
+
+    smart_arithmetic_parallelized_operation();
+
+    Criterion::default().configure_from_args().final_summary();
+}
--- a/tfhe/benches/utilities.rs
+++ b/tfhe/benches/utilities.rs
@@ -0,0 +1,322 @@
+use serde::Serialize;
+use std::{env, fs};
+use std::path::PathBuf;
+#[cfg(feature = "boolean")]
+use tfhe::boolean::parameters::BooleanParameters;
+use tfhe::core_crypto::prelude::*;
+
+// #[derive(Clone, Copy, Default, Serialize)]
+// pub struct CryptoParametersRecord<Scalar: UnsignedInteger> {
+//     pub lwe_dimension: Option<LweDimension>,
+//     pub glwe_dimension: Option<GlweDimension>,
+//     pub polynomial_size: Option<PolynomialSize>,
+//     pub lwe_modular_std_dev: Option<StandardDev>,
+//     pub glwe_modular_std_dev: Option<StandardDev>,
+//     pub pbs_base_log: Option<DecompositionBaseLog>,
+//     pub pbs_level: Option<DecompositionLevelCount>,
+//     pub ks_base_log: Option<DecompositionBaseLog>,
+//     pub ks_level: Option<DecompositionLevelCount>,
+//     pub pfks_level: Option<DecompositionLevelCount>,
+//     pub pfks_base_log: Option<DecompositionBaseLog>,
+//     pub pfks_modular_std_dev: Option<StandardDev>,
+//     pub cbs_level: Option<DecompositionLevelCount>,
+//     pub cbs_base_log: Option<DecompositionBaseLog>,
+//     pub message_modulus: Option<usize>,
+//     pub carry_modulus: Option<usize>,
+//     pub ciphertext_modulus: Option<CiphertextModulus<Scalar>>,
+// }
+//
+// #[cfg(feature = "boolean")]
+// impl<Scalar: UnsignedInteger> From<BooleanParameters> for CryptoParametersRecord<Scalar> {
+//     fn from(params: BooleanParameters) -> Self {
+//         CryptoParametersRecord {
+//             lwe_dimension: Some(params.lwe_dimension),
+//             glwe_dimension: Some(params.glwe_dimension),
+//             polynomial_size: Some(params.polynomial_size),
+//             lwe_modular_std_dev: Some(params.lwe_modular_std_dev),
+//             glwe_modular_std_dev: Some(params.glwe_modular_std_dev),
+//             pbs_base_log: Some(params.pbs_base_log),
+//             pbs_level: Some(params.pbs_level),
+//             ks_base_log: Some(params.ks_base_log),
+//             ks_level: Some(params.ks_level),
+//             pfks_level: None,
+//             pfks_base_log: None,
+//             pfks_modular_std_dev: None,
+//             cbs_level: None,
+//             cbs_base_log: None,
+//             message_modulus: None,
+//             carry_modulus: None,
+//             ciphertext_modulus: Some(CiphertextModulus::<Scalar>::new_native()),
+//         }
+//     }
+// }
+//
+// #[cfg(feature = "shortint")]
+// impl<Scalar> From<PBSParameters> for CryptoParametersRecord<Scalar>
+// where
+//     Scalar: UnsignedInteger + CastInto<u128>,
+// {
+//     fn from(params: PBSParameters) -> Self {
+//         CryptoParametersRecord {
+//             lwe_dimension: Some(params.lwe_dimension()),
+//             glwe_dimension: Some(params.glwe_dimension()),
+//             polynomial_size: Some(params.polynomial_size()),
+//             lwe_modular_std_dev: Some(params.lwe_modular_std_dev()),
+//             glwe_modular_std_dev: Some(params.glwe_modular_std_dev()),
+//             pbs_base_log: Some(params.pbs_base_log()),
+//             pbs_level: Some(params.pbs_level()),
+//             ks_base_log: Some(params.ks_base_log()),
+//             ks_level: Some(params.ks_level()),
+//             pfks_level: None,
+//             pfks_base_log: None,
+//             pfks_modular_std_dev: None,
+//             cbs_level: None,
+//             cbs_base_log: None,
+//             message_modulus: Some(params.message_modulus().0),
+//             carry_modulus: Some(params.carry_modulus().0),
+//             ciphertext_modulus: Some(
+//                 params
+//                     .ciphertext_modulus()
+//                     .try_to()
+//                     .expect("failed to convert ciphertext modulus"),
+//             ),
+//         }
+//     }
+// }
+//
+// #[cfg(feature = "shortint")]
+// impl<Scalar: UnsignedInteger> From<ShortintKeySwitchingParameters>
+//     for CryptoParametersRecord<Scalar>
+// {
+//     fn from(params: ShortintKeySwitchingParameters) -> Self {
+//         CryptoParametersRecord {
+//             lwe_dimension: None,
+//             glwe_dimension: None,
+//             polynomial_size: None,
+//             lwe_modular_std_dev: None,
+//             glwe_modular_std_dev: None,
+//             pbs_base_log: None,
+//             pbs_level: None,
+//             ks_base_log: Some(params.ks_base_log),
+//             ks_level: Some(params.ks_level),
+//             pfks_level: None,
+//             pfks_base_log: None,
+//             pfks_modular_std_dev: None,
+//             cbs_level: None,
+//             cbs_base_log: None,
+//             message_modulus: None,
+//             carry_modulus: None,
+//             ciphertext_modulus: None,
+//         }
+//     }
+// }
+//
+// #[derive(Serialize)]
+// enum PolynomialMultiplication {
+//     Fft,
+//     // Ntt,
+// }
+//
+// #[derive(Serialize)]
+// enum IntegerRepresentation {
+//     Radix,
+//     // Crt,
+//     // Hybrid,
+// }
+//
+// #[derive(Serialize)]
+// enum ExecutionType {
+//     Sequential,
+//     Parallel,
+// }
+//
+// #[derive(Serialize)]
+// enum KeySetType {
+//     Single,
+//     // Multi,
+// }
+//
+// #[derive(Serialize)]
+// enum OperandType {
+//     CipherText,
+//     PlainText,
+// }
+//
+// #[derive(Clone, Serialize)]
+// pub enum OperatorType {
+//     Atomic,
+//     // AtomicPattern,
+// }
+//
+// #[derive(Serialize)]
+// struct BenchmarkParametersRecord<Scalar: UnsignedInteger> {
+//     display_name: String,
+//     crypto_parameters_alias: String,
+//     crypto_parameters: CryptoParametersRecord<Scalar>,
+//     message_modulus: Option<usize>,
+//     carry_modulus: Option<usize>,
+//     ciphertext_modulus: usize,
+//     bit_size: u32,
+//     polynomial_multiplication: PolynomialMultiplication,
+//     precision: u32,
+//     error_probability: f64,
+//     integer_representation: IntegerRepresentation,
+//     decomposition_basis: Vec<u32>,
+//     pbs_algorithm: Option<String>,
+//     execution_type: ExecutionType,
+//     key_set_type: KeySetType,
+//     operand_type: OperandType,
+//     operator_type: OperatorType,
+// }
+//
+// /// Writes benchmarks parameters to disk in JSON format.
+// pub fn write_to_json<
+//     Scalar: UnsignedInteger + Serialize,
+//     T: Into<CryptoParametersRecord<Scalar>>,
+// >(
+//     bench_id: &str,
+//     params: T,
+//     params_alias: impl Into<String>,
+//     display_name: impl Into<String>,
+//     operator_type: &OperatorType,
+//     bit_size: u32,
+//     decomposition_basis: Vec<u32>,
+// ) {
+//     let params = params.into();
+//
+//     let execution_type = match bench_id.contains("parallelized") {
+//         true => ExecutionType::Parallel,
+//         false => ExecutionType::Sequential,
+//     };
+//     let operand_type = match bench_id.contains("scalar") {
+//         true => OperandType::PlainText,
+//         false => OperandType::CipherText,
+//     };
+//
+//     let record = BenchmarkParametersRecord {
+//         display_name: display_name.into(),
+//         crypto_parameters_alias: params_alias.into(),
+//         crypto_parameters: params.to_owned(),
+//         message_modulus: params.message_modulus,
+//         carry_modulus: params.carry_modulus,
+//         ciphertext_modulus: 64,
+//         bit_size,
+//         polynomial_multiplication: PolynomialMultiplication::Fft,
+//         precision: (params.message_modulus.unwrap_or(2) as u32).ilog2(),
+//         error_probability: 2f64.powf(-41.0),
+//         integer_representation: IntegerRepresentation::Radix,
+//         decomposition_basis,
+//         pbs_algorithm: None, // To be added in future version
+//         execution_type,
+//         key_set_type: KeySetType::Single,
+//         operand_type,
+//         operator_type: operator_type.to_owned(),
+//     };
+//
+//     let mut params_directory = ["benchmarks_parameters", bench_id]
+//         .iter()
+//         .collect::<PathBuf>();
+//     fs::create_dir_all(&params_directory).unwrap();
+//     params_directory.push("parameters.json");
+//
+//     fs::write(params_directory, serde_json::to_string(&record).unwrap()).unwrap();
+// }
+
+const FAST_BENCH_BIT_SIZES: [usize; 1] = [64];
+const BENCH_BIT_SIZES: [usize; 8] = [4, 8, 16, 32, 40, 64, 128, 256];
+const MULTI_BIT_CPU_SIZES: [usize; 6] = [4, 8, 16, 32, 40, 64];
+
+/// User configuration in which benchmarks must be run.
+#[derive(Default)]
+pub struct EnvConfig {
+    pub is_multi_bit: bool,
+    pub is_fast_bench: bool,
+}
+
+impl EnvConfig {
+    #[allow(dead_code)]
+    pub fn new() -> Self {
+        let is_multi_bit = match env::var("__TFHE_RS_BENCH_TYPE") {
+            Ok(val) => val.to_lowercase() == "multi_bit",
+            Err(_) => false,
+        };
+
+        let is_fast_bench = match env::var("__TFHE_RS_FAST_BENCH") {
+            Ok(val) => val.to_lowercase() == "true",
+            Err(_) => false,
+        };
+
+        EnvConfig {
+            is_multi_bit,
+            is_fast_bench,
+        }
+    }
+
+    /// Get precisions values to benchmark.
+    #[allow(dead_code)]
+    pub fn bit_sizes(&self) -> Vec<usize> {
+        if self.is_fast_bench {
+            FAST_BENCH_BIT_SIZES.to_vec()
+        } else if self.is_multi_bit {
+            if cfg!(feature = "gpu") {
+                BENCH_BIT_SIZES.to_vec()
+            } else {
+                MULTI_BIT_CPU_SIZES.to_vec()
+            }
+        } else {
+            BENCH_BIT_SIZES.to_vec()
+        }
+    }
+}
+
+use std::sync::OnceLock;
+#[cfg(feature = "gpu")]
+use tfhe_cuda_backend::cuda_bind::cuda_get_number_of_gpus;
+
+/// Generate a number of threads to use to saturate current machine for throughput measurements.
+#[allow(dead_code)]
+pub fn throughput_num_threads(num_block: usize) -> u64 {
+    let ref_block_count = 32; // Represent a ciphertext of 64 bits for 2_2 parameters set
+    let block_multiplicator = (ref_block_count as f64 / num_block as f64).ceil();
+
+    #[cfg(feature = "gpu")]
+    {
+        // This value is for Nvidia H100 GPU
+        let streaming_multiprocessors = 132;
+        let num_gpus = unsafe { cuda_get_number_of_gpus() };
+        ((streaming_multiprocessors * num_gpus) as f64 * block_multiplicator) as u64
+    }
+    #[cfg(not(feature = "gpu"))]
+    {
+        let num_threads = rayon::current_num_threads() as f64;
+        // Add 20% more to maximum threads available.
+        ((num_threads + (num_threads * 0.2)) * block_multiplicator) as u64
+    }
+}
+
+#[allow(dead_code)]
+pub static BENCH_TYPE: OnceLock<BenchmarkType> = OnceLock::new();
+
+#[allow(dead_code)]
+pub enum BenchmarkType {
+    Latency,
+    Throughput,
+}
+
+#[allow(dead_code)]
+impl BenchmarkType {
+    pub fn from_env() -> Result<Self, String> {
+        let raw_value = env::var("__TFHE_RS_BENCH_TYPE").unwrap_or("latency".to_string());
+        match raw_value.to_lowercase().as_str() {
+            "latency" => Ok(BenchmarkType::Latency),
+            "throughput" => Ok(BenchmarkType::Throughput),
+            _ => Err(format!("benchmark type '{raw_value}' is not supported")),
+        }
+    }
+
+}
+
+
+// Empty main to please clippy.
+#[allow(dead_code)]
+pub fn main() {}
--- a/tfhe/src/shortint/keycache.rs
+++ b/tfhe/src/shortint/keycache.rs
@@ -375,6 +375,7 @@ impl NamedParam for Parameters {
                WOPBS_PRIME_PARAM_MESSAGE_8_NORM2_6,
                WOPBS_PRIME_PARAM_MESSAGE_8_NORM2_7,
                PARAM_4_BITS_5_BLOCKS,
+                PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64,
            )
        );
    }
--- a/tfhe/src/shortint/parameters/classic/mod.rs
+++ b/tfhe/src/shortint/parameters/classic/mod.rs
--- a/tfhe/src/shortint/parameters/classic/tuniform/mod.rs
+++ b/tfhe/src/shortint/parameters/classic/tuniform/mod.rs
@@ -0,0 +1 @@
+pub mod p_fail_2_minus_64;
--- a/tfhe/src/shortint/parameters/classic/tuniform/p_fail_2_minus_64/ks_pbs.rs
+++ b/tfhe/src/shortint/parameters/classic/tuniform/p_fail_2_minus_64/ks_pbs.rs
@@ -0,0 +1,23 @@
+// security = 132 bits, p-fail = 2^-64.138, algorithmic cost ~ 113, 2-norm = 5
+pub const PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64: ClassicPBSParameters =
+    ClassicPBSParameters {
+        lwe_dimension: LweDimension(879),
+        glwe_dimension: GlweDimension(1),
+        polynomial_size: PolynomialSize(4096),
+        lwe_noise_distribution: DynamicDistribution::new_t_uniform(46),
+        glwe_noise_distribution: DynamicDistribution::new_t_uniform(17),
+        pbs_base_log: DecompositionBaseLog(23),
+        pbs_level: DecompositionLevelCount(1),
+        ks_base_log: DecompositionBaseLog(3),
+        ks_level: DecompositionLevelCount(5),
+        message_modulus: MessageModulus(4),
+        carry_modulus: CarryModulus(4),
+        max_noise_level: MaxNoiseLevel::new(5),
+        log2_p_fail: -138.803,
+        ciphertext_modulus: CiphertextModulus::new_native(),
+        encryption_key_choice: EncryptionKeyChoice::Big,
+    };
+
+use crate::core_crypto::prelude::*;
+use crate::shortint::ciphertext::MaxNoiseLevel;
+use crate::shortint::parameters::{CarryModulus, ClassicPBSParameters, MessageModulus};
--- a/tfhe/src/shortint/parameters/classic/tuniform/p_fail_2_minus_64/mod.rs
+++ b/tfhe/src/shortint/parameters/classic/tuniform/p_fail_2_minus_64/mod.rs
@@ -0,0 +1 @@
+pub mod ks_pbs;
--- a/tfhe/src/shortint/parameters/mod.rs
+++ b/tfhe/src/shortint/parameters/mod.rs
@@ -886,6 +886,26 @@ pub const PARAM_SMALL_MESSAGE_4_CARRY_4: Parameters = Parameters {
    carry_modulus: CarryModulus(16),
 };

+pub const PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64: Parameters =
+    Parameters {
+        lwe_dimension: LweDimension(879),
+        glwe_dimension: GlweDimension(1),
+        polynomial_size: PolynomialSize(4096),
+        lwe_modular_std_dev: StandardDev(0.0000006791658447437413),
+        glwe_modular_std_dev: StandardDev(0.00000000000000029403601535432533),
+        pbs_base_log: DecompositionBaseLog(23),
+        pbs_level: DecompositionLevelCount(1),
+        ks_base_log: DecompositionBaseLog(3),
+        ks_level: DecompositionLevelCount(5),
+        message_modulus: MessageModulus(4),
+        carry_modulus: CarryModulus(4),
+        pfks_level: DecompositionLevelCount(0),
+        pfks_base_log: DecompositionBaseLog(0),
+        pfks_modular_std_dev: StandardDev(0.0000000000000000002168404344971009),
+        cbs_level: DecompositionLevelCount(0),
+        cbs_base_log: DecompositionBaseLog(0),
+    };
+
 /// Return a parameter set from a message and carry moduli.
 ///
 /// # Example
--- a/toolchain.txt
+++ b/toolchain.txt
@@ -1 +1 @@
-nightly-2023-01-30
+nightly-2023-04-05