Feat/roman/poseidon2 (#510)

# This PR

1. Adds C++ API
2. Renames a lot of API functions
3. Adds inplace poseidon2
4. Makes input const at all poseidon functions
5. Adds benchmark for poseidon2
This commit is contained in:
ChickenLover
2024-05-09 19:19:55 +07:00
committed by GitHub
parent 49079d0d2a
commit 9da52bc09f
16 changed files with 423 additions and 84 deletions

View File

@@ -74,9 +74,6 @@ pub struct Poseidon2Config<'a> {
pub output_index: u32,
/// If true, hash results will also be copied in the input pointer in aligned format
pub loop_state: bool,
/// Whether to run Poseidon asynchronously. If set to `true`, Poseidon will be non-blocking
/// and you'd need to synchronize it explicitly by running `cudaStreamSynchronize` or `cudaDeviceSynchronize`.
/// If set to `false`, Poseidon will block the current CPU thread.
@@ -97,14 +94,13 @@ impl<'a> Poseidon2Config<'a> {
are_outputs_on_device: false,
mode: PoseidonMode::Compression,
output_index: 1,
loop_state: false,
is_async: false,
}
}
}
pub trait Poseidon2<F: FieldImpl> {
fn create_optimized_constants<'a>(
fn create_constants<'a>(
width: u32,
alpha: u32,
internal_rounds: u32,
@@ -115,24 +111,32 @@ pub trait Poseidon2<F: FieldImpl> {
diffusion: DiffusionStrategy,
ctx: &DeviceContext,
) -> IcicleResult<Poseidon2Constants<'a, F>>;
fn load_optimized_constants<'a>(
fn load_constants<'a>(
width: u32,
mds_type: MdsType,
diffusion: DiffusionStrategy,
ctx: &DeviceContext,
) -> IcicleResult<Poseidon2Constants<'a, F>>;
fn poseidon_unchecked(
states: &mut (impl HostOrDeviceSlice<F> + ?Sized),
states: &(impl HostOrDeviceSlice<F> + ?Sized),
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
number_of_states: u32,
width: u32,
constants: &Poseidon2Constants<F>,
config: &Poseidon2Config,
) -> IcicleResult<()>;
fn poseidon_unchecked_inplace(
states: &mut (impl HostOrDeviceSlice<F> + ?Sized),
number_of_states: u32,
width: u32,
constants: &Poseidon2Constants<F>,
config: &Poseidon2Config,
) -> IcicleResult<()>;
fn release_constants(constants: &Poseidon2Constants<F>, ctx: &DeviceContext) -> IcicleResult<()>;
}
/// Loads pre-calculated poseidon constants on the GPU.
pub fn load_optimized_poseidon2_constants<'a, F>(
pub fn load_poseidon2_constants<'a, F>(
width: u32,
mds_type: MdsType,
diffusion: DiffusionStrategy,
@@ -142,11 +146,11 @@ where
F: FieldImpl,
<F as FieldImpl>::Config: Poseidon2<F>,
{
<<F as FieldImpl>::Config as Poseidon2<F>>::load_optimized_constants(width, mds_type, diffusion, ctx)
<<F as FieldImpl>::Config as Poseidon2<F>>::load_constants(width, mds_type, diffusion, ctx)
}
/// Creates new instance of poseidon constants on the GPU.
pub fn create_optimized_poseidon2_constants<'a, F>(
pub fn create_poseidon2_constants<'a, F>(
width: u32,
alpha: u32,
ctx: &DeviceContext,
@@ -161,7 +165,7 @@ where
F: FieldImpl,
<F as FieldImpl>::Config: Poseidon2<F>,
{
<<F as FieldImpl>::Config as Poseidon2<F>>::create_optimized_constants(
<<F as FieldImpl>::Config as Poseidon2<F>>::create_constants(
width,
alpha,
internal_rounds,
@@ -174,30 +178,13 @@ where
)
}
/// Computes the poseidon hashes for multiple preimages.
///
/// # Arguments
///
/// * `input` - a pointer to the input data. May point to a vector of preimages or a vector of states filled with preimages.
///
/// * `output` - a pointer to the output data. Must be at least of size [number_of_states](number_of_states)
///
/// * `number_of_states` - number of input blocks of size `arity`
///
/// * `arity` - the arity of the hash function (the size of 1 preimage)
///
/// * `constants` - Poseidon constants.
///
/// * `config` - config used to specify extra arguments of the Poseidon.
pub fn poseidon_hash_many<F>(
states: &mut (impl HostOrDeviceSlice<F> + ?Sized),
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
fn poseidon_checks<F>(
states: &(impl HostOrDeviceSlice<F> + ?Sized),
output: &(impl HostOrDeviceSlice<F> + ?Sized),
number_of_states: u32,
width: u32,
constants: &Poseidon2Constants<F>,
config: &Poseidon2Config,
) -> IcicleResult<()>
where
) where
F: FieldImpl,
<F as FieldImpl>::Config: Poseidon2<F>,
{
@@ -208,7 +195,6 @@ where
number_of_states * width
);
}
if output.len() < number_of_states as usize {
panic!(
"output len is {}; but needs to be at least {}",
@@ -226,6 +212,7 @@ where
"Device ids in input and context are different"
);
}
if let Some(device_id) = output.device_id() {
assert_eq!(
device_id, ctx_device_id,
@@ -233,6 +220,36 @@ where
);
}
check_device(ctx_device_id);
}
/// Computes the poseidon hashes for multiple preimages.
///
/// # Arguments
///
/// * `input` - a pointer to the input data. May point to a vector of preimages or a vector of states filled with preimages.
///
/// * `output` - a pointer to the output data. Must be at least of size [number_of_states](number_of_states)
///
/// * `number_of_states` - number of input blocks of size `arity`
///
/// * `arity` - the arity of the hash function (the size of 1 preimage)
///
/// * `constants` - Poseidon constants.
///
/// * `config` - config used to specify extra arguments of the Poseidon.
pub fn poseidon2_hash_many<F>(
states: &(impl HostOrDeviceSlice<F> + ?Sized),
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
number_of_states: u32,
width: u32,
constants: &Poseidon2Constants<F>,
config: &Poseidon2Config,
) -> IcicleResult<()>
where
F: FieldImpl,
<F as FieldImpl>::Config: Poseidon2<F>,
{
poseidon_checks(states, output, number_of_states, width, config);
let mut local_cfg = config.clone();
local_cfg.are_states_on_device = states.is_on_device();
local_cfg.are_outputs_on_device = output.is_on_device();
@@ -247,6 +264,39 @@ where
)
}
pub fn poseidon2_hash_many_inplace<F>(
states: &mut (impl HostOrDeviceSlice<F> + ?Sized),
number_of_states: u32,
width: u32,
constants: &Poseidon2Constants<F>,
config: &Poseidon2Config,
) -> IcicleResult<()>
where
F: FieldImpl,
<F as FieldImpl>::Config: Poseidon2<F>,
{
poseidon_checks(states, states, number_of_states, width, config);
let mut local_cfg = config.clone();
local_cfg.are_states_on_device = states.is_on_device();
local_cfg.are_outputs_on_device = states.is_on_device();
<<F as FieldImpl>::Config as Poseidon2<F>>::poseidon_unchecked_inplace(
states,
number_of_states,
width,
constants,
&local_cfg,
)
}
pub fn release_poseidon2_constants<'a, F>(constants: &Poseidon2Constants<F>, ctx: &DeviceContext) -> IcicleResult<()>
where
F: FieldImpl,
<F as FieldImpl>::Config: Poseidon2<F>,
{
<<F as FieldImpl>::Config as Poseidon2<F>>::release_constants(constants, ctx)
}
#[macro_export]
macro_rules! impl_poseidon2 {
(
@@ -261,8 +311,8 @@ macro_rules! impl_poseidon2 {
Poseidon2Constants,
};
extern "C" {
#[link_name = concat!($field_prefix, "_create_optimized_poseidon2_constants_cuda")]
pub(crate) fn _create_optimized_constants(
#[link_name = concat!($field_prefix, "_create_poseidon2_constants_cuda")]
pub(crate) fn _create_constants(
width: u32,
alpha: u32,
internal_rounds: u32,
@@ -275,8 +325,8 @@ macro_rules! impl_poseidon2 {
poseidon_constants: *mut Poseidon2Constants<$field>,
) -> CudaError;
#[link_name = concat!($field_prefix, "_init_optimized_poseidon2_constants_cuda")]
pub(crate) fn _load_optimized_constants(
#[link_name = concat!($field_prefix, "_init_poseidon2_constants_cuda")]
pub(crate) fn _load_constants(
width: u32,
mds_type: MdsType,
diffusion: DiffusionStrategy,
@@ -284,9 +334,15 @@ macro_rules! impl_poseidon2 {
constants: *mut Poseidon2Constants<$field>,
) -> CudaError;
#[link_name = concat!($field_prefix, "_release_poseidon2_constants_cuda")]
pub(crate) fn _release_constants(
constants: &Poseidon2Constants<$field>,
ctx: &DeviceContext,
) -> CudaError;
#[link_name = concat!($field_prefix, "_poseidon2_hash_cuda")]
pub(crate) fn hash_many(
states: *mut $field,
states: *const $field,
output: *mut $field,
number_of_states: u32,
width: u32,
@@ -297,7 +353,7 @@ macro_rules! impl_poseidon2 {
}
impl Poseidon2<$field> for $field_config {
fn create_optimized_constants<'a>(
fn create_constants<'a>(
width: u32,
alpha: u32,
internal_rounds: u32,
@@ -310,7 +366,7 @@ macro_rules! impl_poseidon2 {
) -> IcicleResult<Poseidon2Constants<'a, $field>> {
unsafe {
let mut poseidon_constants = MaybeUninit::<Poseidon2Constants<'a, $field>>::uninit();
let err = $field_prefix_ident::_create_optimized_constants(
let err = $field_prefix_ident::_create_constants(
width,
alpha,
internal_rounds,
@@ -327,7 +383,7 @@ macro_rules! impl_poseidon2 {
}
}
fn load_optimized_constants<'a>(
fn load_constants<'a>(
width: u32,
mds_type: MdsType,
diffusion: DiffusionStrategy,
@@ -335,20 +391,15 @@ macro_rules! impl_poseidon2 {
) -> IcicleResult<Poseidon2Constants<'a, $field>> {
unsafe {
let mut constants = MaybeUninit::<Poseidon2Constants<'a, $field>>::uninit();
let err = $field_prefix_ident::_load_optimized_constants(
width,
mds_type,
diffusion,
ctx,
constants.as_mut_ptr(),
)
.wrap();
let err =
$field_prefix_ident::_load_constants(width, mds_type, diffusion, ctx, constants.as_mut_ptr())
.wrap();
err.and(Ok(constants.assume_init()))
}
}
fn poseidon_unchecked(
states: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
states: &(impl HostOrDeviceSlice<$field> + ?Sized),
output: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
number_of_states: u32,
width: u32,
@@ -357,7 +408,7 @@ macro_rules! impl_poseidon2 {
) -> IcicleResult<()> {
unsafe {
$field_prefix_ident::hash_many(
states.as_mut_ptr(),
states.as_ptr(),
output.as_mut_ptr(),
number_of_states,
width,
@@ -367,6 +418,30 @@ macro_rules! impl_poseidon2 {
.wrap()
}
}
fn poseidon_unchecked_inplace(
states: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
number_of_states: u32,
width: u32,
constants: &Poseidon2Constants<$field>,
config: &Poseidon2Config,
) -> IcicleResult<()> {
unsafe {
$field_prefix_ident::hash_many(
states.as_ptr(),
states.as_mut_ptr(),
number_of_states,
width,
constants,
config,
)
.wrap()
}
}
fn release_constants<'a>(constants: &Poseidon2Constants<$field>, ctx: &DeviceContext) -> IcicleResult<()> {
unsafe { $field_prefix_ident::_release_constants(constants, ctx).wrap() }
}
}
};
}
@@ -382,3 +457,124 @@ macro_rules! impl_poseidon2_tests {
}
};
}
pub mod bench {
use criterion::{black_box, Criterion};
use icicle_cuda_runtime::{
device_context::DeviceContext,
memory::{HostOrDeviceSlice, HostSlice},
};
use crate::{
ntt::FieldImpl,
poseidon2::{load_poseidon2_constants, DiffusionStrategy, MdsType},
traits::GenerateRandom,
vec_ops::VecOps,
};
use super::{poseidon2_hash_many, Poseidon2, Poseidon2Config, Poseidon2Constants};
#[allow(unused)]
fn poseidon2_for_bench<'a, F: FieldImpl>(
states: &(impl HostOrDeviceSlice<F> + ?Sized),
poseidon2_result: &mut (impl HostOrDeviceSlice<F> + ?Sized),
number_of_states: usize,
width: usize,
constants: &Poseidon2Constants<'a, F>,
config: &Poseidon2Config,
_seed: u32,
) where
<F as FieldImpl>::Config: Poseidon2<F> + GenerateRandom<F>,
<F as FieldImpl>::Config: VecOps<F>,
{
poseidon2_hash_many(
states,
poseidon2_result,
number_of_states as u32,
width as u32,
constants,
config,
)
.unwrap();
}
#[allow(unused)]
pub fn benchmark_poseidon2<F: FieldImpl>(c: &mut Criterion)
where
<F as FieldImpl>::Config: Poseidon2<F> + GenerateRandom<F>,
<F as FieldImpl>::Config: VecOps<F>,
{
use criterion::SamplingMode;
use std::env;
let group_id = format!("Poseidon2");
let mut group = c.benchmark_group(&group_id);
group.sampling_mode(SamplingMode::Flat);
group.sample_size(10);
const MAX_LOG2: u32 = 25; // max length = 2 ^ MAX_LOG2
let max_log2 = env::var("MAX_LOG2")
.unwrap_or_else(|_| MAX_LOG2.to_string())
.parse::<u32>()
.unwrap_or(MAX_LOG2);
for test_size_log2 in 13u32..max_log2 + 1 {
for t in [2, 3, 4, 8, 16, 20, 24] {
let number_of_states = 1 << test_size_log2;
let full_size = t * number_of_states;
let scalars = F::Config::generate_random(full_size);
let input = HostSlice::from_slice(&scalars);
let mut permutation_result = vec![F::zero(); full_size];
let permutation_result_slice = HostSlice::from_mut_slice(&mut permutation_result);
let ctx = DeviceContext::default();
let config = Poseidon2Config::default();
for mds in [MdsType::Default, MdsType::Plonky] {
for diffusion in [DiffusionStrategy::Default, DiffusionStrategy::Montgomery] {
let constants =
load_poseidon2_constants(t as u32, mds.clone(), diffusion.clone(), &ctx).unwrap();
let bench_descr = format!(
"Mds::{:?}; Diffusion::{:?}; Number of states: {}; Width: {}",
mds, diffusion, number_of_states, t
);
group.bench_function(&bench_descr, |b| {
b.iter(|| {
poseidon2_for_bench::<F>(
input,
permutation_result_slice,
number_of_states,
t,
&constants,
&config,
black_box(1),
)
})
});
// }
}
}
}
}
group.finish();
}
}
#[macro_export]
macro_rules! impl_poseidon2_bench {
(
$field_prefix:literal,
$field:ident
) => {
use criterion::criterion_group;
use criterion::criterion_main;
use icicle_core::poseidon2::bench::benchmark_poseidon2;
criterion_group!(benches, benchmark_poseidon2<$field>);
criterion_main!(benches);
};
}

View File

@@ -4,8 +4,7 @@ use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::memory::{HostOrDeviceSlice, HostSlice};
use super::{
load_optimized_poseidon2_constants, poseidon_hash_many, DiffusionStrategy, Poseidon2, Poseidon2Config,
Poseidon2Constants,
load_poseidon2_constants, poseidon2_hash_many, DiffusionStrategy, Poseidon2, Poseidon2Config, Poseidon2Constants,
};
pub fn init_poseidon<'a, F: FieldImpl>(
@@ -17,7 +16,7 @@ where
<F as FieldImpl>::Config: Poseidon2<F>,
{
let ctx = DeviceContext::default();
load_optimized_poseidon2_constants::<F>(width, mds_type, diffusion, &ctx).unwrap()
load_poseidon2_constants::<F>(width, mds_type, diffusion, &ctx).unwrap()
}
fn _check_poseidon_hash_many<F: FieldImpl>(width: u32, constants: Poseidon2Constants<F>) -> (F, F)
@@ -32,7 +31,7 @@ where
let output_slice = HostSlice::from_mut_slice(&mut outputs);
let config = Poseidon2Config::default();
poseidon_hash_many::<F>(
poseidon2_hash_many::<F>(
input_slice,
output_slice,
test_size as u32,
@@ -86,7 +85,7 @@ where
let mut config = Poseidon2Config::default();
config.mode = PoseidonMode::Permutation;
poseidon_hash_many::<F>(
poseidon2_hash_many::<F>(
input_slice,
output_slice,
batch_size as u32,

View File

@@ -0,0 +1,5 @@
use icicle_bn254::curve::ScalarField;
use icicle_core::impl_poseidon2_bench;
impl_poseidon2_bench!("bn254", ScalarField);

View File

@@ -15,6 +15,7 @@ icicle-cuda-runtime = { workspace = true }
cmake = "0.1.50"
[dev-dependencies]
criterion = "0.3"
risc0-core = "0.21.0"
risc0-zkp = "0.21.0"
p3-baby-bear = { git = "https://github.com/Plonky3/Plonky3", rev = "1e87146ebfaedc2150b635b10a096b733795fdce" }
@@ -29,3 +30,7 @@ serial_test = "3.0.0"
[features]
default = []
devmode = ["icicle-core/devmode"]
[[bench]]
name = "poseidon2"
harness = false

View File

@@ -0,0 +1,5 @@
use icicle_babybear::field::ScalarField;
use icicle_core::impl_poseidon2_bench;
impl_poseidon2_bench!("babybear", ScalarField);

View File

@@ -16,7 +16,7 @@ impl_poseidon2!("babybear", babybear, ScalarField, ScalarCfg);
pub(crate) mod tests {
use crate::field::ScalarField;
use icicle_core::impl_poseidon2_tests;
use icicle_core::poseidon2::{create_optimized_poseidon2_constants, tests::*, DiffusionStrategy, MdsType};
use icicle_core::poseidon2::{create_poseidon2_constants, tests::*, DiffusionStrategy, MdsType};
use icicle_core::traits::FieldImpl;
use icicle_cuda_runtime::device_context::DeviceContext;
@@ -291,7 +291,7 @@ pub(crate) mod tests {
ScalarField::from_u32(1 << 13),
ScalarField::from_u32(1 << 15),
];
let mut constants = create_optimized_poseidon2_constants(
let constants = create_poseidon2_constants(
WIDTH as u32,
ALPHA as u32,
&ctx,
@@ -616,7 +616,7 @@ pub(crate) mod tests {
ScalarField::from_u32(1 << 22),
ScalarField::from_u32(1 << 23),
];
let mut constants = create_optimized_poseidon2_constants(
let constants = create_poseidon2_constants(
WIDTH as u32,
ALPHA as u32,
&ctx,