mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-07 22:04:10 -05:00
chore(gpu): bench signed add on gpu
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -19,3 +19,6 @@ dieharder_run.log
|
||||
|
||||
# Coverage reports
|
||||
/coverage/
|
||||
|
||||
# Cuda local build
|
||||
backends/tfhe-cuda-backend/cuda/cmake-build-debug/
|
||||
|
||||
@@ -1190,7 +1190,7 @@ mod cuda {
|
||||
use crate::utilities::{write_to_json, EnvConfig, OperatorType};
|
||||
use criterion::{criterion_group, Criterion};
|
||||
use tfhe::core_crypto::gpu::{CudaDevice, CudaStream};
|
||||
use tfhe::integer::gpu::ciphertext::CudaRadixCiphertext;
|
||||
use tfhe::integer::gpu::ciphertext::CudaUnsignedRadixCiphertext;
|
||||
use tfhe::integer::gpu::server_key::CudaServerKey;
|
||||
use tfhe::integer::keycache::KEY_CACHE;
|
||||
use tfhe::integer::IntegerKeyKind;
|
||||
@@ -1202,7 +1202,7 @@ mod cuda {
|
||||
display_name: &str,
|
||||
unary_op: F,
|
||||
) where
|
||||
F: Fn(&CudaServerKey, &mut CudaRadixCiphertext, &CudaStream),
|
||||
F: Fn(&CudaServerKey, &mut CudaUnsignedRadixCiphertext, &CudaStream),
|
||||
{
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
bench_group
|
||||
@@ -1229,7 +1229,7 @@ mod cuda {
|
||||
let clear_0 = tfhe::integer::U256::from((clearlow, clearhigh));
|
||||
let ct_0 = cks.encrypt_radix(clear_0, num_block);
|
||||
|
||||
CudaRadixCiphertext::from_radix_ciphertext(&ct_0, &stream)
|
||||
CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct_0, &stream)
|
||||
};
|
||||
|
||||
b.iter_batched(
|
||||
@@ -1263,7 +1263,12 @@ mod cuda {
|
||||
display_name: &str,
|
||||
binary_op: F,
|
||||
) where
|
||||
F: Fn(&CudaServerKey, &mut CudaRadixCiphertext, &mut CudaRadixCiphertext, &CudaStream),
|
||||
F: Fn(
|
||||
&CudaServerKey,
|
||||
&mut CudaUnsignedRadixCiphertext,
|
||||
&mut CudaUnsignedRadixCiphertext,
|
||||
&CudaStream,
|
||||
),
|
||||
{
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
bench_group
|
||||
@@ -1295,8 +1300,10 @@ mod cuda {
|
||||
let clear_1 = tfhe::integer::U256::from((clearlow, clearhigh));
|
||||
let ct_1 = cks.encrypt_radix(clear_1, num_block);
|
||||
|
||||
let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ct_0, &stream);
|
||||
let d_ctxt_2 = CudaRadixCiphertext::from_radix_ciphertext(&ct_1, &stream);
|
||||
let d_ctxt_1 =
|
||||
CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct_0, &stream);
|
||||
let d_ctxt_2 =
|
||||
CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct_1, &stream);
|
||||
|
||||
(d_ctxt_1, d_ctxt_2)
|
||||
};
|
||||
@@ -1331,7 +1338,7 @@ mod cuda {
|
||||
binary_op: F,
|
||||
rng_func: G,
|
||||
) where
|
||||
F: Fn(&CudaServerKey, &mut CudaRadixCiphertext, ScalarType, &CudaStream),
|
||||
F: Fn(&CudaServerKey, &mut CudaUnsignedRadixCiphertext, ScalarType, &CudaStream),
|
||||
G: Fn(&mut ThreadRng, usize) -> ScalarType,
|
||||
{
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
@@ -1364,7 +1371,8 @@ mod cuda {
|
||||
let clear_0 = tfhe::integer::U256::from((clearlow, clearhigh));
|
||||
let ct_0 = cks.encrypt_radix(clear_0, num_block);
|
||||
|
||||
let d_ctxt_1 = CudaRadixCiphertext::from_radix_ciphertext(&ct_0, &stream);
|
||||
let d_ctxt_1 =
|
||||
CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct_0, &stream);
|
||||
|
||||
let clear_1 = rng_func(&mut rng, bit_size) & max_value_for_bit_size;
|
||||
|
||||
@@ -1432,9 +1440,12 @@ mod cuda {
|
||||
let clear_1 = tfhe::integer::U256::from((clearlow, clearhigh));
|
||||
let ct_else = cks.encrypt_radix(clear_1, num_block);
|
||||
|
||||
let d_ct_cond = CudaRadixCiphertext::from_radix_ciphertext(&ct_cond, &stream);
|
||||
let d_ct_then = CudaRadixCiphertext::from_radix_ciphertext(&ct_then, &stream);
|
||||
let d_ct_else = CudaRadixCiphertext::from_radix_ciphertext(&ct_else, &stream);
|
||||
let d_ct_cond =
|
||||
CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct_cond, &stream);
|
||||
let d_ct_then =
|
||||
CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct_then, &stream);
|
||||
let d_ct_else =
|
||||
CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct_else, &stream);
|
||||
|
||||
(d_ct_cond, d_ct_then, d_ct_else)
|
||||
};
|
||||
@@ -1908,7 +1919,7 @@ mod cuda {
|
||||
display_name: &str,
|
||||
cast_op: F,
|
||||
) where
|
||||
F: Fn(&CudaServerKey, CudaRadixCiphertext, usize),
|
||||
F: Fn(&CudaServerKey, CudaUnsignedRadixCiphertext, usize),
|
||||
{
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
bench_group
|
||||
@@ -1939,9 +1950,9 @@ mod cuda {
|
||||
let (cks, _sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let gpu_sks = CudaServerKey::new(&cks, &stream);
|
||||
|
||||
let encrypt_one_value = || -> CudaRadixCiphertext {
|
||||
let encrypt_one_value = || -> CudaUnsignedRadixCiphertext {
|
||||
let ct = cks.encrypt_radix(gen_random_u256(&mut rng), num_blocks);
|
||||
CudaRadixCiphertext::from_radix_ciphertext(&ct, &stream)
|
||||
CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &stream)
|
||||
};
|
||||
|
||||
b.iter_batched(
|
||||
|
||||
@@ -13,9 +13,11 @@ use tfhe::integer::keycache::KEY_CACHE;
|
||||
use tfhe::integer::{IntegerKeyKind, RadixCiphertext, ServerKey, SignedRadixCiphertext, I256};
|
||||
use tfhe::keycache::NamedParam;
|
||||
|
||||
use tfhe::shortint::parameters::{
|
||||
PARAM_MESSAGE_2_CARRY_2_KS_PBS, PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_2_KS_PBS,
|
||||
};
|
||||
use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
use tfhe::shortint::parameters::PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_2_KS_PBS;
|
||||
#[cfg(feature = "gpu")]
|
||||
use tfhe::shortint::parameters::PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_3_KS_PBS;
|
||||
|
||||
fn gen_random_i256(rng: &mut ThreadRng) -> I256 {
|
||||
let clearlow = rng.gen::<u128>();
|
||||
@@ -37,6 +39,9 @@ impl Default for ParamsAndNumBlocksIter {
|
||||
let env_config = EnvConfig::new();
|
||||
|
||||
if env_config.is_multi_bit {
|
||||
#[cfg(feature = "gpu")]
|
||||
let params = vec![PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_3_KS_PBS.into()];
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
let params = vec![PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_2_KS_PBS.into()];
|
||||
|
||||
let params_and_bit_sizes = iproduct!(params, env_config.bit_sizes());
|
||||
@@ -1194,29 +1199,179 @@ define_server_key_bench_cast_fn!(method_name: cast_to_signed, display_name: cast
|
||||
|
||||
criterion_group!(cast_ops, cast_to_unsigned, cast_to_signed);
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
mod cuda {
|
||||
use super::*;
|
||||
use crate::utilities::{write_to_json, OperatorType};
|
||||
use criterion::{criterion_group, Criterion};
|
||||
use tfhe::core_crypto::gpu::{CudaDevice, CudaStream};
|
||||
use tfhe::integer::gpu::ciphertext::CudaSignedRadixCiphertext;
|
||||
use tfhe::integer::gpu::server_key::CudaServerKey;
|
||||
use tfhe::integer::keycache::KEY_CACHE;
|
||||
use tfhe::integer::IntegerKeyKind;
|
||||
use tfhe::keycache::NamedParam;
|
||||
|
||||
/// Base function to bench a server key function that is a binary operation, input ciphertext
|
||||
/// will contain only zero carries
|
||||
fn bench_cuda_server_key_binary_signed_function_clean_inputs<F>(
|
||||
c: &mut Criterion,
|
||||
bench_name: &str,
|
||||
display_name: &str,
|
||||
binary_op: F,
|
||||
) where
|
||||
F: Fn(
|
||||
&CudaServerKey,
|
||||
&mut CudaSignedRadixCiphertext,
|
||||
&mut CudaSignedRadixCiphertext,
|
||||
&CudaStream,
|
||||
),
|
||||
{
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
bench_group
|
||||
.sample_size(15)
|
||||
.measurement_time(std::time::Duration::from_secs(60));
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
let gpu_index = 0;
|
||||
let device = CudaDevice::new(gpu_index);
|
||||
let stream = CudaStream::new_unchecked(device);
|
||||
|
||||
for (param, num_block, bit_size) in ParamsAndNumBlocksIter::default() {
|
||||
let param_name = param.name();
|
||||
|
||||
let bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
|
||||
|
||||
bench_group.bench_function(&bench_id, |b| {
|
||||
let (cks, _cpu_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
|
||||
let gpu_sks = CudaServerKey::new(&cks, &stream);
|
||||
|
||||
let encrypt_two_values = || {
|
||||
let clearlow = rng.gen::<u128>();
|
||||
let clearhigh = rng.gen::<u128>();
|
||||
let clear_0 = tfhe::integer::I256::from((clearlow, clearhigh));
|
||||
let ct_0 = cks.encrypt_signed_radix(clear_0, num_block);
|
||||
|
||||
let clearlow = rng.gen::<u128>();
|
||||
let clearhigh = rng.gen::<u128>();
|
||||
let clear_1 = tfhe::integer::I256::from((clearlow, clearhigh));
|
||||
let ct_1 = cks.encrypt_signed_radix(clear_1, num_block);
|
||||
|
||||
let d_ctxt_1 =
|
||||
CudaSignedRadixCiphertext::from_signed_radix_ciphertext(&ct_0, &stream);
|
||||
let d_ctxt_2 =
|
||||
CudaSignedRadixCiphertext::from_signed_radix_ciphertext(&ct_1, &stream);
|
||||
|
||||
(d_ctxt_1, d_ctxt_2)
|
||||
};
|
||||
|
||||
b.iter_batched(
|
||||
encrypt_two_values,
|
||||
|(mut ct_0, mut ct_1)| {
|
||||
binary_op(&gpu_sks, &mut ct_0, &mut ct_1, &stream);
|
||||
},
|
||||
criterion::BatchSize::SmallInput,
|
||||
)
|
||||
});
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
param,
|
||||
param.name(),
|
||||
display_name,
|
||||
&OperatorType::Atomic,
|
||||
bit_size as u32,
|
||||
vec![param.message_modulus().0.ilog2(); num_block],
|
||||
);
|
||||
}
|
||||
|
||||
bench_group.finish()
|
||||
}
|
||||
|
||||
macro_rules! define_cuda_server_key_bench_clean_input_signed_fn (
|
||||
(method_name: $server_key_method:ident, display_name:$name:ident) => {
|
||||
::paste::paste!{
|
||||
fn [<cuda_ $server_key_method>](c: &mut Criterion) {
|
||||
bench_cuda_server_key_binary_signed_function_clean_inputs(
|
||||
c,
|
||||
concat!("integer::cuda::signed::", stringify!($server_key_method)),
|
||||
stringify!($name),
|
||||
|server_key, lhs, rhs, stream| {
|
||||
server_key.$server_key_method(lhs, rhs, stream);
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
define_cuda_server_key_bench_clean_input_signed_fn!(
|
||||
method_name: unchecked_add,
|
||||
display_name: add
|
||||
);
|
||||
|
||||
//===========================================
|
||||
// Default
|
||||
//===========================================
|
||||
|
||||
define_cuda_server_key_bench_clean_input_signed_fn!(
|
||||
method_name: add,
|
||||
display_name: add
|
||||
);
|
||||
|
||||
criterion_group!(unchecked_cuda_ops, cuda_unchecked_add,);
|
||||
|
||||
criterion_group!(default_cuda_ops, cuda_add,);
|
||||
}
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
use cuda::{default_cuda_ops, unchecked_cuda_ops};
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
fn go_through_gpu_bench_groups(val: &str) {
|
||||
match val.to_lowercase().as_str() {
|
||||
"default" => {
|
||||
default_cuda_ops();
|
||||
}
|
||||
"unchecked" => {
|
||||
unchecked_cuda_ops();
|
||||
}
|
||||
_ => panic!("unknown benchmark operations flavor"),
|
||||
};
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn go_through_cpu_bench_groups(val: &str) {
|
||||
match val.to_lowercase().as_str() {
|
||||
"default" => {
|
||||
default_parallelized_ops();
|
||||
default_parallelized_ops_comp();
|
||||
default_scalar_parallelized_ops();
|
||||
default_scalar_parallelized_ops_comp();
|
||||
cast_ops()
|
||||
}
|
||||
"unchecked" => {
|
||||
unchecked_ops();
|
||||
unchecked_ops_comp();
|
||||
unchecked_scalar_ops();
|
||||
unchecked_scalar_ops_comp()
|
||||
}
|
||||
_ => panic!("unknown benchmark operations flavor"),
|
||||
};
|
||||
}
|
||||
|
||||
fn main() {
|
||||
match env::var("__TFHE_RS_BENCH_OP_FLAVOR") {
|
||||
Ok(val) => {
|
||||
match val.to_lowercase().as_str() {
|
||||
"default" => {
|
||||
default_parallelized_ops();
|
||||
default_parallelized_ops_comp();
|
||||
default_scalar_parallelized_ops();
|
||||
default_scalar_parallelized_ops_comp();
|
||||
cast_ops()
|
||||
}
|
||||
"unchecked" => {
|
||||
unchecked_ops();
|
||||
unchecked_ops_comp();
|
||||
unchecked_scalar_ops();
|
||||
unchecked_scalar_ops_comp()
|
||||
}
|
||||
_ => panic!("unknown benchmark operations flavor"),
|
||||
};
|
||||
#[cfg(feature = "gpu")]
|
||||
go_through_gpu_bench_groups(&val);
|
||||
#[cfg(not(feature = "gpu"))]
|
||||
go_through_cpu_bench_groups(&val);
|
||||
}
|
||||
Err(_) => {
|
||||
default_parallelized_ops();
|
||||
default_parallelized_ops_comp();
|
||||
default_scalar_parallelized_ops();
|
||||
default_scalar_parallelized_ops_comp();
|
||||
cast_ops()
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user