vec ops bench draft

This commit is contained in:
VitaliiH
2024-07-20 17:28:50 +02:00
parent 53f34aade5
commit 8f35963199
3 changed files with 101 additions and 0 deletions

View File

@@ -513,3 +513,91 @@ macro_rules! impl_vec_add_tests {
}
};
}
#[macro_export]
macro_rules! impl_vec_ops_bench {
(
$field_prefix:literal,
$field:ident
) => {
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use icicle_core::{
ntt::{FieldImpl, NTTConfig, NTTDir, NttAlgorithm, Ordering},
};
use icicle_cuda_runtime::memory::HostSlice;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
use icicle_core::traits::GenerateRandom;
use icicle_core::vec_ops::VecOps;
fn benchmark_vec_ops<T, F: FieldImpl>(c: &mut Criterion)
where
F: FieldImpl,
<F as FieldImpl>::Config: VecOps<F> + GenerateRandom<F>,
{
use criterion::SamplingMode;
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
use std::env;
let group_id = format!("{} VecOps ", $field_prefix);
let mut group = c.benchmark_group(&group_id);
group.sampling_mode(SamplingMode::Flat);
group.sample_size(10);
const MAX_LOG2: u32 = 25; // max length = 2 ^ MAX_LOG2
let max_log2 = env::var("MAX_LOG2")
.unwrap_or_else(|_| MAX_LOG2.to_string())
.parse::<u32>()
.unwrap_or(MAX_LOG2);
for test_size_log2 in (13u32..max_log2 + 1) {
let test_size = 1 << test_size_log2;
if test_size > 1 << max_log2 {
continue;
}
use icicle_core::traits::GenerateRandom;
use icicle_core::vec_ops::{accumulate_scalars, VecOpsConfig};
use icicle_cuda_runtime::memory::{HostSlice, DeviceSlice, DeviceVec};
let mut a = F::Config::generate_random(test_size);
let b = F::Config::generate_random(test_size);
let a_host = HostSlice::from_mut_slice(&mut a);
let b_host = HostSlice::from_slice(&b);
let cfg = VecOpsConfig::default();
let bench_descr = format!(" accumulate - data on host {}", test_size);
group.bench_function(&bench_descr, |bb| {
bb.iter(|| {
accumulate_scalars(a_host, b_host, &cfg).unwrap();
})
});
let cfg = VecOpsConfig::default();
let mut a_device = DeviceVec::cuda_malloc(test_size).unwrap();
a_device
.copy_from_host(HostSlice::from_slice(&a))
.unwrap();
let mut b_device = DeviceVec::cuda_malloc(test_size).unwrap();
b_device
.copy_from_host(HostSlice::from_slice(&b))
.unwrap();
let bench_descr = format!(" accumulate - data on device {}", test_size);
group.bench_function(&bench_descr, |bb| {
bb.iter(|| {
accumulate_scalars(&mut a_device[..], &b_device[..], &cfg).unwrap();
})
});
}
group.finish();
}
criterion_group!(benches, benchmark_vec_ops<$field, $field>);
criterion_main!(benches);
};
}

View File

@@ -10,10 +10,18 @@ repository.workspace = true
[dependencies]
icicle-core = { workspace = true }
icicle-cuda-runtime = { workspace = true }
criterion = "0.3"
[build-dependencies]
cmake = "0.1.50"
[dev-dependencies]
criterion = "0.3"
[features]
default = []
devmode = ["icicle-core/devmode"]
[[bench]]
name = "vec_bench"
harness = false

View File

@@ -0,0 +1,5 @@
use icicle_m31::field::ScalarField;
use icicle_core::impl_vec_ops_bench;
impl_vec_ops_bench!("m31", ScalarField);