mirror of
https://github.com/pseXperiments/icicle.git
synced 2026-01-08 23:17:54 -05:00
rust classic benches with Criterion for ecntt/msm/ntt (#499)
Rust idiomatic benches for EC NTT, NTT, MSM
This commit is contained in:
@@ -206,23 +206,35 @@ macro_rules! impl_ecntt_bench {
|
||||
use icicle_core::ntt::NTTDomain;
|
||||
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
|
||||
|
||||
let group_id = format!("{} EC NTT", $field_prefix);
|
||||
let group_id = format!("{} EC NTT ", $field_prefix);
|
||||
let mut group = c.benchmark_group(&group_id);
|
||||
group.sampling_mode(SamplingMode::Flat);
|
||||
group.sample_size(10);
|
||||
|
||||
const MAX_SIZE: u64 = 1 << 18;
|
||||
const MAX_LOG2: u32 = 9; // max length = 2 ^ MAX_LOG2 //TODO: should be limited by device ram only after fix
|
||||
|
||||
let max_log2 = env::var("MAX_LOG2")
|
||||
.unwrap_or_else(|_| MAX_LOG2.to_string())
|
||||
.parse::<u32>()
|
||||
.unwrap_or(MAX_LOG2);
|
||||
|
||||
const FAST_TWIDDLES_MODE: bool = false;
|
||||
|
||||
INIT.get_or_init(move || init_domain::<$field>(MAX_SIZE, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE));
|
||||
INIT.get_or_init(move || init_domain::<$field>(1 << max_log2, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE));
|
||||
|
||||
for test_size_log2 in [4, 8] {
|
||||
for batch_size_log2 in [1, 1 << 4, 128] {
|
||||
let test_size = 1 << test_size_log2;
|
||||
let batch_size = 1 << batch_size_log2;
|
||||
let full_size = batch_size * test_size;
|
||||
|
||||
if full_size > 1 << max_log2 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let test_sizes = [1 << 4, 1 << 8];
|
||||
let batch_sizes = [1, 1 << 4, 128];
|
||||
for test_size in test_sizes {
|
||||
for batch_size in batch_sizes {
|
||||
let points = C::generate_random_projective_points(test_size);
|
||||
let points = HostSlice::from_slice(&points);
|
||||
let mut batch_ntt_result = vec![Projective::<C>::zero(); batch_size * test_size];
|
||||
let mut batch_ntt_result = vec![Projective::<C>::zero(); full_size];
|
||||
let batch_ntt_result = HostSlice::from_mut_slice(&mut batch_ntt_result);
|
||||
let mut config = NTTConfig::default();
|
||||
for is_inverse in [NTTDir::kInverse, NTTDir::kForward] {
|
||||
|
||||
@@ -317,3 +317,138 @@ macro_rules! impl_msm_tests {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! impl_msm_bench {
|
||||
(
|
||||
$field_prefix:literal,
|
||||
$curve:ident
|
||||
) => {
|
||||
use criterion::criterion_group;
|
||||
use criterion::criterion_main;
|
||||
use criterion::Criterion;
|
||||
use icicle_core::curve::Affine;
|
||||
use icicle_core::curve::Curve;
|
||||
use icicle_core::curve::Projective;
|
||||
use icicle_core::msm::msm;
|
||||
use icicle_core::msm::MSMConfig;
|
||||
use icicle_core::msm::MSM;
|
||||
use icicle_core::traits::FieldImpl;
|
||||
use icicle_core::traits::GenerateRandom;
|
||||
use icicle_cuda_runtime::device::warmup;
|
||||
use icicle_cuda_runtime::memory::DeviceVec;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
use icicle_cuda_runtime::memory::HostSlice;
|
||||
|
||||
fn msm_for_bench<C: Curve + MSM<C>>(
|
||||
scalars_h: &(impl HostOrDeviceSlice<C::ScalarField> + ?Sized),
|
||||
precomputed_points_d: &(impl HostOrDeviceSlice<Affine<C>> + ?Sized),
|
||||
cfg: &MSMConfig,
|
||||
msm_results: &mut (impl HostOrDeviceSlice<Projective<C>> + ?Sized),
|
||||
_seed: u32,
|
||||
) {
|
||||
msm(scalars_h, precomputed_points_d, &cfg, msm_results).unwrap();
|
||||
}
|
||||
|
||||
fn check_msm_batch<C: Curve + MSM<C>>(c: &mut Criterion)
|
||||
where
|
||||
<C::ScalarField as FieldImpl>::Config: GenerateRandom<C::ScalarField>,
|
||||
{
|
||||
use criterion::black_box;
|
||||
use criterion::SamplingMode;
|
||||
use std::env;
|
||||
|
||||
let group_id = format!("{} MSM ", $field_prefix);
|
||||
let mut group = c.benchmark_group(&group_id);
|
||||
group.sampling_mode(SamplingMode::Flat);
|
||||
group.sample_size(10);
|
||||
|
||||
use icicle_core::msm::precompute_bases;
|
||||
use icicle_core::msm::tests::generate_random_affine_points_with_zeroes;
|
||||
use icicle_cuda_runtime::stream::CudaStream;
|
||||
|
||||
const MAX_LOG2: u32 = 25; // max length = 2 ^ MAX_LOG2
|
||||
|
||||
let max_log2 = env::var("MAX_LOG2")
|
||||
.unwrap_or_else(|_| MAX_LOG2.to_string())
|
||||
.parse::<u32>()
|
||||
.unwrap_or(MAX_LOG2);
|
||||
|
||||
let stream = CudaStream::create().unwrap();
|
||||
let mut cfg = MSMConfig::default();
|
||||
cfg.ctx
|
||||
.stream = &stream;
|
||||
cfg.is_async = true;
|
||||
cfg.large_bucket_factor = 5;
|
||||
cfg.c = 4;
|
||||
|
||||
warmup(&stream).unwrap();
|
||||
|
||||
for test_size_log2 in (13u32..max_log2 + 1) {
|
||||
let test_size = 1 << test_size_log2;
|
||||
|
||||
let points = generate_random_affine_points_with_zeroes(test_size, 10);
|
||||
for precompute_factor in [1, 4, 8] {
|
||||
let mut precomputed_points_d = DeviceVec::cuda_malloc(precompute_factor * test_size).unwrap();
|
||||
precompute_bases(
|
||||
HostSlice::from_slice(&points),
|
||||
precompute_factor as i32,
|
||||
0,
|
||||
&cfg.ctx,
|
||||
&mut precomputed_points_d,
|
||||
)
|
||||
.unwrap();
|
||||
for batch_size_log2 in [0, 4, 7] {
|
||||
let batch_size = 1 << batch_size_log2;
|
||||
let full_size = batch_size * test_size;
|
||||
|
||||
if full_size > 1 << max_log2 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut scalars = <C::ScalarField as FieldImpl>::Config::generate_random(full_size);
|
||||
let scalars = <C::ScalarField as FieldImpl>::Config::generate_random(full_size);
|
||||
// a version of batched msm without using `cfg.points_size`, requires copying bases
|
||||
|
||||
let scalars_h = HostSlice::from_slice(&scalars);
|
||||
|
||||
let mut msm_results = DeviceVec::<Projective<C>>::cuda_malloc(batch_size).unwrap();
|
||||
let mut points_d = DeviceVec::<Affine<C>>::cuda_malloc(full_size).unwrap();
|
||||
points_d
|
||||
.copy_from_host_async(HostSlice::from_slice(&points), &stream)
|
||||
.unwrap();
|
||||
|
||||
cfg.precompute_factor = precompute_factor as i32;
|
||||
|
||||
let bench_descr = format!(
|
||||
" {} x {} with precomp = {:?}",
|
||||
test_size, batch_size, precompute_factor
|
||||
);
|
||||
|
||||
group.bench_function(&bench_descr, |b| {
|
||||
b.iter(|| {
|
||||
msm_for_bench(
|
||||
scalars_h,
|
||||
&precomputed_points_d[..],
|
||||
&cfg,
|
||||
&mut msm_results[..],
|
||||
black_box(1),
|
||||
)
|
||||
})
|
||||
});
|
||||
|
||||
stream
|
||||
.synchronize()
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
stream
|
||||
.destroy()
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
criterion_group!(benches, check_msm_batch<$curve>);
|
||||
criterion_main!(benches);
|
||||
};
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ use ark_ec::VariableBaseMSM;
|
||||
#[cfg(feature = "arkworks")]
|
||||
use ark_std::{rand::Rng, test_rng, UniformRand};
|
||||
|
||||
fn generate_random_affine_points_with_zeroes<C: Curve>(size: usize, num_zeroes: usize) -> Vec<Affine<C>> {
|
||||
pub fn generate_random_affine_points_with_zeroes<C: Curve>(size: usize, num_zeroes: usize) -> Vec<Affine<C>> {
|
||||
let rng = &mut test_rng();
|
||||
let mut points = C::generate_random_affine_points(size);
|
||||
for _ in 0..num_zeroes {
|
||||
|
||||
@@ -414,3 +414,135 @@ macro_rules! impl_ntt_tests {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! impl_ntt_bench {
|
||||
(
|
||||
$field_prefix:literal,
|
||||
$field:ident
|
||||
) => {
|
||||
use icicle_core::ntt::ntt;
|
||||
use icicle_core::ntt::NTTDomain;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use icicle_core::{
|
||||
ntt::{FieldImpl, NTTConfig, NTTDir, NttAlgorithm, Ordering},
|
||||
traits::ArkConvertible,
|
||||
};
|
||||
|
||||
use icicle_core::ntt::NTT;
|
||||
use icicle_cuda_runtime::memory::HostSlice;
|
||||
use icicle_core::traits::GenerateRandom;
|
||||
use icicle_core::vec_ops::VecOps;
|
||||
|
||||
fn ntt_for_bench<T, F: FieldImpl>(
|
||||
input: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
mut batch_ntt_result: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
test_sizes: usize,
|
||||
batch_size: usize,
|
||||
is_inverse: NTTDir,
|
||||
ordering: Ordering,
|
||||
config: &mut NTTConfig<F>,
|
||||
_seed: u32,
|
||||
) where
|
||||
<F as FieldImpl>::Config: NTT<F, F> + GenerateRandom<F>,
|
||||
<F as FieldImpl>::Config: VecOps<F>,
|
||||
{
|
||||
ntt(input, is_inverse, config, batch_ntt_result).unwrap();
|
||||
}
|
||||
|
||||
static INIT: OnceLock<()> = OnceLock::new();
|
||||
|
||||
fn benchmark_ntt<T, F: FieldImpl>(c: &mut Criterion)
|
||||
where
|
||||
<F as FieldImpl>::Config: NTT<F, F> + GenerateRandom<F>,
|
||||
<F as FieldImpl>::Config: VecOps<F>,
|
||||
{
|
||||
use criterion::SamplingMode;
|
||||
use icicle_core::ntt::ntt;
|
||||
use icicle_core::ntt::tests::init_domain;
|
||||
use icicle_core::ntt::NTTDomain;
|
||||
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
|
||||
use std::env;
|
||||
|
||||
let group_id = format!("{} NTT", $field_prefix);
|
||||
let mut group = c.benchmark_group(&group_id);
|
||||
group.sampling_mode(SamplingMode::Flat);
|
||||
group.sample_size(10);
|
||||
|
||||
const MAX_LOG2: u32 = 25; // max length = 2 ^ MAX_LOG2
|
||||
|
||||
let max_log2 = env::var("MAX_LOG2")
|
||||
.unwrap_or_else(|_| MAX_LOG2.to_string())
|
||||
.parse::<u32>()
|
||||
.unwrap_or(MAX_LOG2);
|
||||
|
||||
const FAST_TWIDDLES_MODE: bool = false;
|
||||
|
||||
INIT.get_or_init(move || init_domain::<$field>(1 << max_log2, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE));
|
||||
|
||||
let coset_generators = [F::one(), F::Config::generate_random(1)[0]];
|
||||
let mut config = NTTConfig::<F>::default();
|
||||
|
||||
for test_size_log2 in (13u32..max_log2 + 1) {
|
||||
for batch_size_log2 in (7u32..17u32) {
|
||||
let test_size = 1 << test_size_log2;
|
||||
let batch_size = 1 << batch_size_log2;
|
||||
let full_size = batch_size * test_size;
|
||||
|
||||
if full_size > 1 << max_log2 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let scalars = F::Config::generate_random(full_size);
|
||||
let input = HostSlice::from_slice(&scalars);
|
||||
|
||||
let mut batch_ntt_result = vec![F::zero(); batch_size * test_size];
|
||||
let batch_ntt_result = HostSlice::from_mut_slice(&mut batch_ntt_result);
|
||||
let mut config = NTTConfig::default();
|
||||
for is_inverse in [NTTDir::kInverse, NTTDir::kForward] {
|
||||
for ordering in [
|
||||
Ordering::kNN,
|
||||
Ordering::kNR, // times are ~ same as kNN
|
||||
Ordering::kRN,
|
||||
Ordering::kRR,
|
||||
Ordering::kNM,
|
||||
Ordering::kMN,
|
||||
] {
|
||||
config.ordering = ordering;
|
||||
// for alg in [NttAlgorithm::Radix2, NttAlgorithm::MixedRadix] {
|
||||
config.batch_size = batch_size as i32;
|
||||
// config.ntt_algorithm = alg;
|
||||
let bench_descr = format!(
|
||||
"{:?} {:?} {} x {}",
|
||||
ordering, is_inverse, test_size, batch_size
|
||||
);
|
||||
group.bench_function(&bench_descr, |b| {
|
||||
b.iter(|| {
|
||||
ntt_for_bench::<F, F>(
|
||||
input,
|
||||
batch_ntt_result,
|
||||
test_size,
|
||||
batch_size,
|
||||
is_inverse,
|
||||
ordering,
|
||||
&mut config,
|
||||
black_box(1),
|
||||
)
|
||||
})
|
||||
});
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(benches, benchmark_ntt<$field, $field>);
|
||||
criterion_main!(benches);
|
||||
};
|
||||
}
|
||||
|
||||
@@ -38,4 +38,12 @@ arkworks = ["ark-bls12-377", "icicle-core/arkworks"]
|
||||
|
||||
[[bench]]
|
||||
name = "ecntt"
|
||||
harness = false # Criterion provides own harness
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "ntt"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "msm"
|
||||
harness = false
|
||||
@@ -4,7 +4,7 @@ use icicle_bls12_377::curve::{CurveCfg, ScalarField};
|
||||
#[cfg(feature = "ec_ntt")]
|
||||
use icicle_core::impl_ecntt_bench;
|
||||
#[cfg(feature = "ec_ntt")]
|
||||
impl_ecntt_bench!("BLS12_377", ScalarField, CurveCfg);
|
||||
impl_ecntt_bench!("bls12_377", ScalarField, CurveCfg);
|
||||
|
||||
#[cfg(not(feature = "ec_ntt"))]
|
||||
fn main() {}
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
use icicle_bls12_377::curve::CurveCfg;
|
||||
|
||||
use icicle_core::impl_msm_bench;
|
||||
|
||||
impl_msm_bench!("bls12_377", CurveCfg);
|
||||
@@ -0,0 +1,5 @@
|
||||
use icicle_bls12_377::curve::ScalarField;
|
||||
|
||||
use icicle_core::impl_ntt_bench;
|
||||
|
||||
impl_ntt_bench!("bls12_377", ScalarField);
|
||||
@@ -36,4 +36,12 @@ arkworks = ["ark-bls12-381", "icicle-core/arkworks"]
|
||||
|
||||
[[bench]]
|
||||
name = "ecntt"
|
||||
harness = false # Criterion provides own harness
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "ntt"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "msm"
|
||||
harness = false
|
||||
|
||||
@@ -4,7 +4,7 @@ use icicle_bls12_381::curve::{CurveCfg, ScalarField};
|
||||
#[cfg(feature = "ec_ntt")]
|
||||
use icicle_core::impl_ecntt_bench;
|
||||
#[cfg(feature = "ec_ntt")]
|
||||
impl_ecntt_bench!("BLS12_381", ScalarField, CurveCfg);
|
||||
impl_ecntt_bench!("bls12_381", ScalarField, CurveCfg);
|
||||
|
||||
#[cfg(not(feature = "ec_ntt"))]
|
||||
fn main() {}
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
use icicle_bls12_381::curve::CurveCfg;
|
||||
|
||||
use icicle_core::impl_msm_bench;
|
||||
|
||||
impl_msm_bench!("bls12_381", CurveCfg);
|
||||
@@ -0,0 +1,5 @@
|
||||
use icicle_bls12_381::curve::ScalarField;
|
||||
|
||||
use icicle_core::impl_ntt_bench;
|
||||
|
||||
impl_ntt_bench!("bls12_381", ScalarField);
|
||||
@@ -36,4 +36,12 @@ arkworks = ["ark-bn254", "icicle-core/arkworks"]
|
||||
|
||||
[[bench]]
|
||||
name = "ecntt"
|
||||
harness = false # Criterion provides own harness
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "ntt"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "msm"
|
||||
harness = false
|
||||
5
wrappers/rust/icicle-curves/icicle-bn254/benches/msm.rs
Normal file
5
wrappers/rust/icicle-curves/icicle-bn254/benches/msm.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
use icicle_bn254::curve::CurveCfg;
|
||||
|
||||
use icicle_core::impl_msm_bench;
|
||||
|
||||
impl_msm_bench!("bn254", CurveCfg);
|
||||
5
wrappers/rust/icicle-curves/icicle-bn254/benches/ntt.rs
Normal file
5
wrappers/rust/icicle-curves/icicle-bn254/benches/ntt.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
use icicle_bn254::curve::ScalarField;
|
||||
|
||||
use icicle_core::impl_ntt_bench;
|
||||
|
||||
impl_ntt_bench!("bn254", ScalarField);
|
||||
@@ -33,3 +33,15 @@ default = []
|
||||
g2 = ["icicle-bls12-377/bw6-761-g2"]
|
||||
devmode = ["icicle-core/devmode"]
|
||||
arkworks = ["ark-bw6-761", "icicle-core/arkworks", "icicle-bls12-377/arkworks"]
|
||||
|
||||
# [[bench]]
|
||||
# name = "ecntt" #sane compilation times only with the devmode enabled
|
||||
# harness = false # Criterion provides own harness
|
||||
|
||||
[[bench]]
|
||||
name = "ntt"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "msm"
|
||||
harness = false
|
||||
@@ -0,0 +1,5 @@
|
||||
use icicle_bw6_761::curve::CurveCfg;
|
||||
|
||||
use icicle_core::impl_msm_bench;
|
||||
|
||||
impl_msm_bench!("bw6_761", CurveCfg);
|
||||
@@ -0,0 +1,5 @@
|
||||
use icicle_bw6_761::curve::ScalarField;
|
||||
|
||||
use icicle_core::impl_ntt_bench;
|
||||
|
||||
impl_ntt_bench!("bw6_761", ScalarField);
|
||||
@@ -8,6 +8,7 @@ homepage.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[dependencies]
|
||||
criterion = "0.3"
|
||||
icicle-core = { workspace = true }
|
||||
icicle-cuda-runtime = { workspace = true }
|
||||
ark-grumpkin-test = { git = "https://github.com/ingonyama-zk/ark-grumpkin-test.git", optional = true}
|
||||
@@ -16,6 +17,7 @@ ark-grumpkin-test = { git = "https://github.com/ingonyama-zk/ark-grumpkin-test.g
|
||||
cmake = "0.1.50"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.3"
|
||||
ark-std = "0.4.0"
|
||||
ark-ff = "0.4.0"
|
||||
ark-ec = "0.4.0"
|
||||
@@ -28,3 +30,7 @@ default = []
|
||||
ec_ntt = ["icicle-core/ec_ntt"]
|
||||
devmode = ["icicle-core/devmode"]
|
||||
arkworks = ["ark-grumpkin-test", "icicle-core/arkworks"]
|
||||
|
||||
[[bench]]
|
||||
name = "msm"
|
||||
harness = false
|
||||
@@ -0,0 +1,5 @@
|
||||
use icicle_grumpkin::curve::CurveCfg;
|
||||
|
||||
use icicle_core::impl_msm_bench;
|
||||
|
||||
impl_msm_bench!("grumpkin", CurveCfg);
|
||||
Reference in New Issue
Block a user