diff --git a/wrappers/rust/icicle-core/src/ecntt/mod.rs b/wrappers/rust/icicle-core/src/ecntt/mod.rs index 25a77f25..e302991d 100644 --- a/wrappers/rust/icicle-core/src/ecntt/mod.rs +++ b/wrappers/rust/icicle-core/src/ecntt/mod.rs @@ -206,23 +206,35 @@ macro_rules! impl_ecntt_bench { use icicle_core::ntt::NTTDomain; use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID; - let group_id = format!("{} EC NTT", $field_prefix); + let group_id = format!("{} EC NTT ", $field_prefix); let mut group = c.benchmark_group(&group_id); group.sampling_mode(SamplingMode::Flat); group.sample_size(10); - const MAX_SIZE: u64 = 1 << 18; + const MAX_LOG2: u32 = 9; // max length = 2 ^ MAX_LOG2 //TODO: should be limited by device ram only after fix + + let max_log2 = env::var("MAX_LOG2") + .unwrap_or_else(|_| MAX_LOG2.to_string()) + .parse::() + .unwrap_or(MAX_LOG2); + const FAST_TWIDDLES_MODE: bool = false; - INIT.get_or_init(move || init_domain::<$field>(MAX_SIZE, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE)); + INIT.get_or_init(move || init_domain::<$field>(1 << max_log2, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE)); + + for test_size_log2 in [4, 8] { + for batch_size_log2 in [1, 1 << 4, 128] { + let test_size = 1 << test_size_log2; + let batch_size = 1 << batch_size_log2; + let full_size = batch_size * test_size; + + if full_size > 1 << max_log2 { + continue; + } - let test_sizes = [1 << 4, 1 << 8]; - let batch_sizes = [1, 1 << 4, 128]; - for test_size in test_sizes { - for batch_size in batch_sizes { let points = C::generate_random_projective_points(test_size); let points = HostSlice::from_slice(&points); - let mut batch_ntt_result = vec![Projective::::zero(); batch_size * test_size]; + let mut batch_ntt_result = vec![Projective::::zero(); full_size]; let batch_ntt_result = HostSlice::from_mut_slice(&mut batch_ntt_result); let mut config = NTTConfig::default(); for is_inverse in [NTTDir::kInverse, NTTDir::kForward] { diff --git a/wrappers/rust/icicle-core/src/msm/mod.rs b/wrappers/rust/icicle-core/src/msm/mod.rs index 571a8f53..c7fe4ba9 100644 --- a/wrappers/rust/icicle-core/src/msm/mod.rs +++ b/wrappers/rust/icicle-core/src/msm/mod.rs @@ -317,3 +317,138 @@ macro_rules! impl_msm_tests { } }; } + +#[macro_export] +macro_rules! impl_msm_bench { + ( + $field_prefix:literal, + $curve:ident + ) => { + use criterion::criterion_group; + use criterion::criterion_main; + use criterion::Criterion; + use icicle_core::curve::Affine; + use icicle_core::curve::Curve; + use icicle_core::curve::Projective; + use icicle_core::msm::msm; + use icicle_core::msm::MSMConfig; + use icicle_core::msm::MSM; + use icicle_core::traits::FieldImpl; + use icicle_core::traits::GenerateRandom; + use icicle_cuda_runtime::device::warmup; + use icicle_cuda_runtime::memory::DeviceVec; + use icicle_cuda_runtime::memory::HostOrDeviceSlice; + use icicle_cuda_runtime::memory::HostSlice; + + fn msm_for_bench>( + scalars_h: &(impl HostOrDeviceSlice + ?Sized), + precomputed_points_d: &(impl HostOrDeviceSlice> + ?Sized), + cfg: &MSMConfig, + msm_results: &mut (impl HostOrDeviceSlice> + ?Sized), + _seed: u32, + ) { + msm(scalars_h, precomputed_points_d, &cfg, msm_results).unwrap(); + } + + fn check_msm_batch>(c: &mut Criterion) + where + ::Config: GenerateRandom, + { + use criterion::black_box; + use criterion::SamplingMode; + use std::env; + + let group_id = format!("{} MSM ", $field_prefix); + let mut group = c.benchmark_group(&group_id); + group.sampling_mode(SamplingMode::Flat); + group.sample_size(10); + + use icicle_core::msm::precompute_bases; + use icicle_core::msm::tests::generate_random_affine_points_with_zeroes; + use icicle_cuda_runtime::stream::CudaStream; + + const MAX_LOG2: u32 = 25; // max length = 2 ^ MAX_LOG2 + + let max_log2 = env::var("MAX_LOG2") + .unwrap_or_else(|_| MAX_LOG2.to_string()) + .parse::() + .unwrap_or(MAX_LOG2); + + let stream = CudaStream::create().unwrap(); + let mut cfg = MSMConfig::default(); + cfg.ctx + .stream = &stream; + cfg.is_async = true; + cfg.large_bucket_factor = 5; + cfg.c = 4; + + warmup(&stream).unwrap(); + + for test_size_log2 in (13u32..max_log2 + 1) { + let test_size = 1 << test_size_log2; + + let points = generate_random_affine_points_with_zeroes(test_size, 10); + for precompute_factor in [1, 4, 8] { + let mut precomputed_points_d = DeviceVec::cuda_malloc(precompute_factor * test_size).unwrap(); + precompute_bases( + HostSlice::from_slice(&points), + precompute_factor as i32, + 0, + &cfg.ctx, + &mut precomputed_points_d, + ) + .unwrap(); + for batch_size_log2 in [0, 4, 7] { + let batch_size = 1 << batch_size_log2; + let full_size = batch_size * test_size; + + if full_size > 1 << max_log2 { + continue; + } + + let mut scalars = ::Config::generate_random(full_size); + let scalars = ::Config::generate_random(full_size); + // a version of batched msm without using `cfg.points_size`, requires copying bases + + let scalars_h = HostSlice::from_slice(&scalars); + + let mut msm_results = DeviceVec::>::cuda_malloc(batch_size).unwrap(); + let mut points_d = DeviceVec::>::cuda_malloc(full_size).unwrap(); + points_d + .copy_from_host_async(HostSlice::from_slice(&points), &stream) + .unwrap(); + + cfg.precompute_factor = precompute_factor as i32; + + let bench_descr = format!( + " {} x {} with precomp = {:?}", + test_size, batch_size, precompute_factor + ); + + group.bench_function(&bench_descr, |b| { + b.iter(|| { + msm_for_bench( + scalars_h, + &precomputed_points_d[..], + &cfg, + &mut msm_results[..], + black_box(1), + ) + }) + }); + + stream + .synchronize() + .unwrap(); + } + } + } + stream + .destroy() + .unwrap(); + } + + criterion_group!(benches, check_msm_batch<$curve>); + criterion_main!(benches); + }; +} diff --git a/wrappers/rust/icicle-core/src/msm/tests.rs b/wrappers/rust/icicle-core/src/msm/tests.rs index 02a05537..27b23165 100644 --- a/wrappers/rust/icicle-core/src/msm/tests.rs +++ b/wrappers/rust/icicle-core/src/msm/tests.rs @@ -15,7 +15,7 @@ use ark_ec::VariableBaseMSM; #[cfg(feature = "arkworks")] use ark_std::{rand::Rng, test_rng, UniformRand}; -fn generate_random_affine_points_with_zeroes(size: usize, num_zeroes: usize) -> Vec> { +pub fn generate_random_affine_points_with_zeroes(size: usize, num_zeroes: usize) -> Vec> { let rng = &mut test_rng(); let mut points = C::generate_random_affine_points(size); for _ in 0..num_zeroes { diff --git a/wrappers/rust/icicle-core/src/ntt/mod.rs b/wrappers/rust/icicle-core/src/ntt/mod.rs index d343a2dd..a48dce8f 100644 --- a/wrappers/rust/icicle-core/src/ntt/mod.rs +++ b/wrappers/rust/icicle-core/src/ntt/mod.rs @@ -414,3 +414,135 @@ macro_rules! impl_ntt_tests { } }; } + +#[macro_export] +macro_rules! impl_ntt_bench { + ( + $field_prefix:literal, + $field:ident + ) => { + use icicle_core::ntt::ntt; + use icicle_core::ntt::NTTDomain; + use icicle_cuda_runtime::memory::HostOrDeviceSlice; + use std::sync::OnceLock; + + use criterion::{black_box, criterion_group, criterion_main, Criterion}; + use icicle_core::{ + ntt::{FieldImpl, NTTConfig, NTTDir, NttAlgorithm, Ordering}, + traits::ArkConvertible, + }; + + use icicle_core::ntt::NTT; + use icicle_cuda_runtime::memory::HostSlice; + use icicle_core::traits::GenerateRandom; + use icicle_core::vec_ops::VecOps; + + fn ntt_for_bench( + input: &(impl HostOrDeviceSlice + ?Sized), + mut batch_ntt_result: &mut (impl HostOrDeviceSlice + ?Sized), + test_sizes: usize, + batch_size: usize, + is_inverse: NTTDir, + ordering: Ordering, + config: &mut NTTConfig, + _seed: u32, + ) where + ::Config: NTT + GenerateRandom, + ::Config: VecOps, + { + ntt(input, is_inverse, config, batch_ntt_result).unwrap(); + } + + static INIT: OnceLock<()> = OnceLock::new(); + + fn benchmark_ntt(c: &mut Criterion) + where + ::Config: NTT + GenerateRandom, + ::Config: VecOps, + { + use criterion::SamplingMode; + use icicle_core::ntt::ntt; + use icicle_core::ntt::tests::init_domain; + use icicle_core::ntt::NTTDomain; + use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID; + use std::env; + + let group_id = format!("{} NTT", $field_prefix); + let mut group = c.benchmark_group(&group_id); + group.sampling_mode(SamplingMode::Flat); + group.sample_size(10); + + const MAX_LOG2: u32 = 25; // max length = 2 ^ MAX_LOG2 + + let max_log2 = env::var("MAX_LOG2") + .unwrap_or_else(|_| MAX_LOG2.to_string()) + .parse::() + .unwrap_or(MAX_LOG2); + + const FAST_TWIDDLES_MODE: bool = false; + + INIT.get_or_init(move || init_domain::<$field>(1 << max_log2, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE)); + + let coset_generators = [F::one(), F::Config::generate_random(1)[0]]; + let mut config = NTTConfig::::default(); + + for test_size_log2 in (13u32..max_log2 + 1) { + for batch_size_log2 in (7u32..17u32) { + let test_size = 1 << test_size_log2; + let batch_size = 1 << batch_size_log2; + let full_size = batch_size * test_size; + + if full_size > 1 << max_log2 { + continue; + } + + let scalars = F::Config::generate_random(full_size); + let input = HostSlice::from_slice(&scalars); + + let mut batch_ntt_result = vec![F::zero(); batch_size * test_size]; + let batch_ntt_result = HostSlice::from_mut_slice(&mut batch_ntt_result); + let mut config = NTTConfig::default(); + for is_inverse in [NTTDir::kInverse, NTTDir::kForward] { + for ordering in [ + Ordering::kNN, + Ordering::kNR, // times are ~ same as kNN + Ordering::kRN, + Ordering::kRR, + Ordering::kNM, + Ordering::kMN, + ] { + config.ordering = ordering; + // for alg in [NttAlgorithm::Radix2, NttAlgorithm::MixedRadix] { + config.batch_size = batch_size as i32; + // config.ntt_algorithm = alg; + let bench_descr = format!( + "{:?} {:?} {} x {}", + ordering, is_inverse, test_size, batch_size + ); + group.bench_function(&bench_descr, |b| { + b.iter(|| { + ntt_for_bench::( + input, + batch_ntt_result, + test_size, + batch_size, + is_inverse, + ordering, + &mut config, + black_box(1), + ) + }) + }); + // } + } + } + } + } + + group.finish(); + } + + criterion_group!(benches, benchmark_ntt<$field, $field>); + criterion_main!(benches); + }; +} diff --git a/wrappers/rust/icicle-curves/icicle-bls12-377/Cargo.toml b/wrappers/rust/icicle-curves/icicle-bls12-377/Cargo.toml index 941385f8..94129629 100644 --- a/wrappers/rust/icicle-curves/icicle-bls12-377/Cargo.toml +++ b/wrappers/rust/icicle-curves/icicle-bls12-377/Cargo.toml @@ -38,4 +38,12 @@ arkworks = ["ark-bls12-377", "icicle-core/arkworks"] [[bench]] name = "ecntt" -harness = false # Criterion provides own harness +harness = false + +[[bench]] +name = "ntt" +harness = false + +[[bench]] +name = "msm" +harness = false \ No newline at end of file diff --git a/wrappers/rust/icicle-curves/icicle-bls12-377/benches/ecntt.rs b/wrappers/rust/icicle-curves/icicle-bls12-377/benches/ecntt.rs index de1782cf..0fd2c027 100644 --- a/wrappers/rust/icicle-curves/icicle-bls12-377/benches/ecntt.rs +++ b/wrappers/rust/icicle-curves/icicle-bls12-377/benches/ecntt.rs @@ -4,7 +4,7 @@ use icicle_bls12_377::curve::{CurveCfg, ScalarField}; #[cfg(feature = "ec_ntt")] use icicle_core::impl_ecntt_bench; #[cfg(feature = "ec_ntt")] -impl_ecntt_bench!("BLS12_377", ScalarField, CurveCfg); +impl_ecntt_bench!("bls12_377", ScalarField, CurveCfg); #[cfg(not(feature = "ec_ntt"))] fn main() {} diff --git a/wrappers/rust/icicle-curves/icicle-bls12-377/benches/msm.rs b/wrappers/rust/icicle-curves/icicle-bls12-377/benches/msm.rs new file mode 100644 index 00000000..672e319d --- /dev/null +++ b/wrappers/rust/icicle-curves/icicle-bls12-377/benches/msm.rs @@ -0,0 +1,5 @@ +use icicle_bls12_377::curve::CurveCfg; + +use icicle_core::impl_msm_bench; + +impl_msm_bench!("bls12_377", CurveCfg); diff --git a/wrappers/rust/icicle-curves/icicle-bls12-377/benches/ntt.rs b/wrappers/rust/icicle-curves/icicle-bls12-377/benches/ntt.rs new file mode 100644 index 00000000..bef2f78d --- /dev/null +++ b/wrappers/rust/icicle-curves/icicle-bls12-377/benches/ntt.rs @@ -0,0 +1,5 @@ +use icicle_bls12_377::curve::ScalarField; + +use icicle_core::impl_ntt_bench; + +impl_ntt_bench!("bls12_377", ScalarField); diff --git a/wrappers/rust/icicle-curves/icicle-bls12-381/Cargo.toml b/wrappers/rust/icicle-curves/icicle-bls12-381/Cargo.toml index 79110989..7ca2bbae 100644 --- a/wrappers/rust/icicle-curves/icicle-bls12-381/Cargo.toml +++ b/wrappers/rust/icicle-curves/icicle-bls12-381/Cargo.toml @@ -36,4 +36,12 @@ arkworks = ["ark-bls12-381", "icicle-core/arkworks"] [[bench]] name = "ecntt" -harness = false # Criterion provides own harness +harness = false + +[[bench]] +name = "ntt" +harness = false + +[[bench]] +name = "msm" +harness = false diff --git a/wrappers/rust/icicle-curves/icicle-bls12-381/benches/ecntt.rs b/wrappers/rust/icicle-curves/icicle-bls12-381/benches/ecntt.rs index 962e82ad..28371a5c 100644 --- a/wrappers/rust/icicle-curves/icicle-bls12-381/benches/ecntt.rs +++ b/wrappers/rust/icicle-curves/icicle-bls12-381/benches/ecntt.rs @@ -4,7 +4,7 @@ use icicle_bls12_381::curve::{CurveCfg, ScalarField}; #[cfg(feature = "ec_ntt")] use icicle_core::impl_ecntt_bench; #[cfg(feature = "ec_ntt")] -impl_ecntt_bench!("BLS12_381", ScalarField, CurveCfg); +impl_ecntt_bench!("bls12_381", ScalarField, CurveCfg); #[cfg(not(feature = "ec_ntt"))] fn main() {} diff --git a/wrappers/rust/icicle-curves/icicle-bls12-381/benches/msm.rs b/wrappers/rust/icicle-curves/icicle-bls12-381/benches/msm.rs new file mode 100644 index 00000000..2218e9d1 --- /dev/null +++ b/wrappers/rust/icicle-curves/icicle-bls12-381/benches/msm.rs @@ -0,0 +1,5 @@ +use icicle_bls12_381::curve::CurveCfg; + +use icicle_core::impl_msm_bench; + +impl_msm_bench!("bls12_381", CurveCfg); diff --git a/wrappers/rust/icicle-curves/icicle-bls12-381/benches/ntt.rs b/wrappers/rust/icicle-curves/icicle-bls12-381/benches/ntt.rs new file mode 100644 index 00000000..2cf2a351 --- /dev/null +++ b/wrappers/rust/icicle-curves/icicle-bls12-381/benches/ntt.rs @@ -0,0 +1,5 @@ +use icicle_bls12_381::curve::ScalarField; + +use icicle_core::impl_ntt_bench; + +impl_ntt_bench!("bls12_381", ScalarField); diff --git a/wrappers/rust/icicle-curves/icicle-bn254/Cargo.toml b/wrappers/rust/icicle-curves/icicle-bn254/Cargo.toml index 1ae1e60d..33d0539c 100644 --- a/wrappers/rust/icicle-curves/icicle-bn254/Cargo.toml +++ b/wrappers/rust/icicle-curves/icicle-bn254/Cargo.toml @@ -36,4 +36,12 @@ arkworks = ["ark-bn254", "icicle-core/arkworks"] [[bench]] name = "ecntt" -harness = false # Criterion provides own harness +harness = false + +[[bench]] +name = "ntt" +harness = false + +[[bench]] +name = "msm" +harness = false \ No newline at end of file diff --git a/wrappers/rust/icicle-curves/icicle-bn254/benches/msm.rs b/wrappers/rust/icicle-curves/icicle-bn254/benches/msm.rs new file mode 100644 index 00000000..6cdf1713 --- /dev/null +++ b/wrappers/rust/icicle-curves/icicle-bn254/benches/msm.rs @@ -0,0 +1,5 @@ +use icicle_bn254::curve::CurveCfg; + +use icicle_core::impl_msm_bench; + +impl_msm_bench!("bn254", CurveCfg); diff --git a/wrappers/rust/icicle-curves/icicle-bn254/benches/ntt.rs b/wrappers/rust/icicle-curves/icicle-bn254/benches/ntt.rs new file mode 100644 index 00000000..b0eac3e3 --- /dev/null +++ b/wrappers/rust/icicle-curves/icicle-bn254/benches/ntt.rs @@ -0,0 +1,5 @@ +use icicle_bn254::curve::ScalarField; + +use icicle_core::impl_ntt_bench; + +impl_ntt_bench!("bn254", ScalarField); diff --git a/wrappers/rust/icicle-curves/icicle-bw6-761/Cargo.toml b/wrappers/rust/icicle-curves/icicle-bw6-761/Cargo.toml index b978f04f..05a7b77e 100644 --- a/wrappers/rust/icicle-curves/icicle-bw6-761/Cargo.toml +++ b/wrappers/rust/icicle-curves/icicle-bw6-761/Cargo.toml @@ -33,3 +33,15 @@ default = [] g2 = ["icicle-bls12-377/bw6-761-g2"] devmode = ["icicle-core/devmode"] arkworks = ["ark-bw6-761", "icicle-core/arkworks", "icicle-bls12-377/arkworks"] + +# [[bench]] +# name = "ecntt" #sane compilation times only with the devmode enabled +# harness = false # Criterion provides own harness + +[[bench]] +name = "ntt" +harness = false + +[[bench]] +name = "msm" +harness = false \ No newline at end of file diff --git a/wrappers/rust/icicle-curves/icicle-bw6-761/benches/msm.rs b/wrappers/rust/icicle-curves/icicle-bw6-761/benches/msm.rs new file mode 100644 index 00000000..3112bcb7 --- /dev/null +++ b/wrappers/rust/icicle-curves/icicle-bw6-761/benches/msm.rs @@ -0,0 +1,5 @@ +use icicle_bw6_761::curve::CurveCfg; + +use icicle_core::impl_msm_bench; + +impl_msm_bench!("bw6_761", CurveCfg); diff --git a/wrappers/rust/icicle-curves/icicle-bw6-761/benches/ntt.rs b/wrappers/rust/icicle-curves/icicle-bw6-761/benches/ntt.rs new file mode 100644 index 00000000..dd02bbdf --- /dev/null +++ b/wrappers/rust/icicle-curves/icicle-bw6-761/benches/ntt.rs @@ -0,0 +1,5 @@ +use icicle_bw6_761::curve::ScalarField; + +use icicle_core::impl_ntt_bench; + +impl_ntt_bench!("bw6_761", ScalarField); diff --git a/wrappers/rust/icicle-curves/icicle-grumpkin/Cargo.toml b/wrappers/rust/icicle-curves/icicle-grumpkin/Cargo.toml index 04102bbc..86978a1b 100644 --- a/wrappers/rust/icicle-curves/icicle-grumpkin/Cargo.toml +++ b/wrappers/rust/icicle-curves/icicle-grumpkin/Cargo.toml @@ -8,6 +8,7 @@ homepage.workspace = true repository.workspace = true [dependencies] +criterion = "0.3" icicle-core = { workspace = true } icicle-cuda-runtime = { workspace = true } ark-grumpkin-test = { git = "https://github.com/ingonyama-zk/ark-grumpkin-test.git", optional = true} @@ -16,6 +17,7 @@ ark-grumpkin-test = { git = "https://github.com/ingonyama-zk/ark-grumpkin-test.g cmake = "0.1.50" [dev-dependencies] +criterion = "0.3" ark-std = "0.4.0" ark-ff = "0.4.0" ark-ec = "0.4.0" @@ -28,3 +30,7 @@ default = [] ec_ntt = ["icicle-core/ec_ntt"] devmode = ["icicle-core/devmode"] arkworks = ["ark-grumpkin-test", "icicle-core/arkworks"] + +[[bench]] +name = "msm" +harness = false \ No newline at end of file diff --git a/wrappers/rust/icicle-curves/icicle-grumpkin/benches/msm.rs b/wrappers/rust/icicle-curves/icicle-grumpkin/benches/msm.rs new file mode 100644 index 00000000..98309906 --- /dev/null +++ b/wrappers/rust/icicle-curves/icicle-grumpkin/benches/msm.rs @@ -0,0 +1,5 @@ +use icicle_grumpkin::curve::CurveCfg; + +use icicle_core::impl_msm_bench; + +impl_msm_bench!("grumpkin", CurveCfg);