Feat/roman/poseidon2 (#510)

# This PR

1. Adds C++ API
2. Renames a lot of API functions
3. Adds inplace poseidon2
4. Makes input const at all poseidon functions
5. Adds benchmark for poseidon2
This commit is contained in:
ChickenLover
2024-05-09 19:19:55 +07:00
committed by GitHub
parent 49079d0d2a
commit 9da52bc09f
16 changed files with 423 additions and 84 deletions

View File

@@ -12,10 +12,44 @@
#include "fields/stark_fields/babybear.cuh"
#include "ntt/ntt.cuh"
#include "vec_ops/vec_ops.cuh"
#include "poseidon/poseidon.cuh"
#include "poseidon/tree/merkle.cuh"
#include "poseidon2/poseidon2.cuh"
extern "C" cudaError_t babybear_extension_ntt_cuda(
const babybear::extension_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<babybear::scalar_t>& config, babybear::extension_t* output);
extern "C" cudaError_t babybear_create_poseidon2_constants_cuda(
int width,
int alpha,
int internal_rounds,
int external_rounds,
const babybear::scalar_t* round_constants,
const babybear::scalar_t* internal_matrix_diag,
poseidon2::MdsType mds_type,
poseidon2::DiffusionStrategy diffusion,
device_context::DeviceContext& ctx,
poseidon2::Poseidon2Constants<babybear::scalar_t>* poseidon_constants);
extern "C" cudaError_t babybear_init_poseidon2_constants_cuda(
int width,
poseidon2::MdsType mds_type,
poseidon2::DiffusionStrategy diffusion,
device_context::DeviceContext& ctx,
poseidon2::Poseidon2Constants<babybear::scalar_t>* poseidon_constants);
extern "C" cudaError_t babybear_poseidon2_hash_cuda(
const babybear::scalar_t* input,
babybear::scalar_t* output,
int number_of_states,
int width,
const poseidon2::Poseidon2Constants<babybear::scalar_t>& constants,
poseidon2::Poseidon2Config& config);
extern "C" cudaError_t babybear_release_poseidon2_constants_cuda(
poseidon2::Poseidon2Constants<babybear::scalar_t>* constants,
device_context::DeviceContext& ctx);
extern "C" cudaError_t babybear_mul_cuda(
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::scalar_t* result);

View File

@@ -15,6 +15,7 @@
#include "vec_ops/vec_ops.cuh"
#include "poseidon/poseidon.cuh"
#include "poseidon/tree/merkle.cuh"
#include "poseidon2/poseidon2.cuh"
extern "C" cudaError_t bn254_g2_precompute_msm_bases_cuda(
bn254::g2_affine_t* bases,
@@ -71,6 +72,37 @@ extern "C" cudaError_t bn254_affine_convert_montgomery(
extern "C" cudaError_t bn254_projective_convert_montgomery(
bn254::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
extern "C" cudaError_t bn254_create_poseidon2_constants_cuda(
int width,
int alpha,
int internal_rounds,
int external_rounds,
const bn254::scalar_t* round_constants,
const bn254::scalar_t* internal_matrix_diag,
poseidon2::MdsType mds_type,
poseidon2::DiffusionStrategy diffusion,
device_context::DeviceContext& ctx,
poseidon2::Poseidon2Constants<bn254::scalar_t>* poseidon_constants);
extern "C" cudaError_t bn254_init_poseidon2_constants_cuda(
int width,
poseidon2::MdsType mds_type,
poseidon2::DiffusionStrategy diffusion,
device_context::DeviceContext& ctx,
poseidon2::Poseidon2Constants<bn254::scalar_t>* poseidon_constants);
extern "C" cudaError_t bn254_poseidon2_hash_cuda(
const bn254::scalar_t* input,
bn254::scalar_t* output,
int number_of_states,
int width,
const poseidon2::Poseidon2Constants<bn254::scalar_t>& constants,
poseidon2::Poseidon2Config& config);
extern "C" cudaError_t bn254_release_poseidon2_constants_cuda(
poseidon2::Poseidon2Constants<bn254::scalar_t>* constants,
device_context::DeviceContext& ctx);
extern "C" cudaError_t bn254_create_optimized_poseidon_constants_cuda(
int arity,
int full_rounds_half,

View File

@@ -0,0 +1,30 @@
extern "C" cudaError_t ${FIELD}_create_poseidon2_constants_cuda(
int width,
int alpha,
int internal_rounds,
int external_rounds,
const ${FIELD}::scalar_t* round_constants,
const ${FIELD}::scalar_t* internal_matrix_diag,
poseidon2::MdsType mds_type,
poseidon2::DiffusionStrategy diffusion,
device_context::DeviceContext& ctx,
poseidon2::Poseidon2Constants<${FIELD}::scalar_t>* poseidon_constants);
extern "C" cudaError_t ${FIELD}_init_poseidon2_constants_cuda(
int width,
poseidon2::MdsType mds_type,
poseidon2::DiffusionStrategy diffusion,
device_context::DeviceContext& ctx,
poseidon2::Poseidon2Constants<${FIELD}::scalar_t>* poseidon_constants);
extern "C" cudaError_t ${FIELD}_poseidon2_hash_cuda(
const ${FIELD}::scalar_t* input,
${FIELD}::scalar_t* output,
int number_of_states,
int width,
const poseidon2::Poseidon2Constants<${FIELD}::scalar_t>& constants,
poseidon2::Poseidon2Config& config);
extern "C" cudaError_t ${FIELD}_release_poseidon2_constants_cuda(
poseidon2::Poseidon2Constants<${FIELD}::scalar_t>* constants,
device_context::DeviceContext& ctx);

View File

@@ -61,7 +61,6 @@ namespace poseidon2 {
bool are_outputs_on_device; /**< If true, output is preserved on device, otherwise on host. Default value: false. */
PoseidonMode mode;
int output_index;
bool loop_state; /**< If true, hash results will also be copied in the input pointer in aligned format */
bool
is_async; /**< Whether to run the Poseidon2 asynchronously. If set to `true`, the poseidon_hash function will be
* non-blocking and you'd need to synchronize it explicitly by running
@@ -78,14 +77,13 @@ namespace poseidon2 {
false, // are_outputs_on_device
PoseidonMode::COMPRESSION,
1, // output_index
false, // loop_state
false, // is_async
};
return config;
}
template <typename S>
cudaError_t create_optimized_poseidon2_constants(
cudaError_t create_poseidon2_constants(
int width,
int alpha,
int internal_rounds,
@@ -101,13 +99,16 @@ namespace poseidon2 {
* Loads pre-calculated optimized constants, moves them to the device
*/
template <typename S>
cudaError_t init_optimized_poseidon2_constants(
cudaError_t init_poseidon2_constants(
int width,
MdsType mds_type,
DiffusionStrategy diffusion,
device_context::DeviceContext& ctx,
Poseidon2Constants<S>* constants);
template <typename S>
cudaError_t release_poseidon2_constants(Poseidon2Constants<S>* constants, device_context::DeviceContext& ctx);
/**
* Compute the poseidon hash over a sequence of preimages.
* Takes {number_of_states * (T-1)} elements of input and computes {number_of_states} hash images
@@ -120,7 +121,7 @@ namespace poseidon2 {
*/
template <typename S, int T>
cudaError_t poseidon2_hash(
S* states,
const S* states,
S* output,
size_t number_of_states,
const Poseidon2Constants<S>& constants,

View File

@@ -24,7 +24,7 @@ using namespace poseidon2_constants_babybear;
namespace poseidon2 {
template <typename S>
cudaError_t create_optimized_poseidon2_constants(
cudaError_t create_poseidon2_constants(
int width,
int alpha,
int internal_rounds,
@@ -71,7 +71,7 @@ namespace poseidon2 {
}
template <typename S>
cudaError_t init_optimized_poseidon2_constants(
cudaError_t init_poseidon2_constants(
int width,
MdsType mds_type,
DiffusionStrategy diffusion,
@@ -105,16 +105,31 @@ namespace poseidon2 {
P2_CONSTANTS_DEF(24)
default:
THROW_ICICLE_ERR(
IcicleError_t::InvalidArgument,
"init_optimized_poseidon2_constants: #width must be one of [2, 3, 4, 8, 12, 16, 20, 24]");
IcicleError_t::InvalidArgument, "init_poseidon2_constants: #width must be one of [2, 3, 4, 8, 12, 16, 20, 24]");
}
S* h_round_constants = reinterpret_cast<S*>(round_constants);
S* h_internal_matrix = reinterpret_cast<S*>(internal_matrix);
create_optimized_poseidon2_constants(
create_poseidon2_constants(
width, alpha, internal_rounds, external_rounds, h_round_constants, h_internal_matrix, mds_type, diffusion, ctx,
poseidon2_constants);
return CHK_LAST();
}
template <typename S>
cudaError_t release_poseidon2_constants(Poseidon2Constants<S>* constants, device_context::DeviceContext& ctx)
{
CHK_INIT_IF_RETURN();
CHK_IF_RETURN(cudaFreeAsync(constants->round_constants, ctx.stream));
CHK_IF_RETURN(cudaFreeAsync(constants->internal_matrix_diag, ctx.stream));
constants->alpha = 0;
constants->width = 0;
constants->external_rounds = 0;
constants->internal_rounds = 0;
constants->round_constants = nullptr;
constants->internal_matrix_diag = nullptr;
return CHK_LAST();
}
} // namespace poseidon2

View File

@@ -6,7 +6,7 @@ using namespace field_config;
#include "poseidon.cu"
namespace poseidon2 {
extern "C" cudaError_t CONCAT_EXPAND(FIELD, create_optimized_poseidon2_constants_cuda)(
extern "C" cudaError_t CONCAT_EXPAND(FIELD, create_poseidon2_constants_cuda)(
int width,
int alpha,
int internal_rounds,
@@ -18,23 +18,23 @@ namespace poseidon2 {
device_context::DeviceContext& ctx,
Poseidon2Constants<scalar_t>* poseidon_constants)
{
return create_optimized_poseidon2_constants<scalar_t>(
return create_poseidon2_constants<scalar_t>(
width, alpha, internal_rounds, external_rounds, round_constants, internal_matrix_diag, mds_type, diffusion, ctx,
poseidon_constants);
}
extern "C" cudaError_t CONCAT_EXPAND(FIELD, init_optimized_poseidon2_constants_cuda)(
extern "C" cudaError_t CONCAT_EXPAND(FIELD, init_poseidon2_constants_cuda)(
int width,
MdsType mds_type,
DiffusionStrategy diffusion,
device_context::DeviceContext& ctx,
Poseidon2Constants<scalar_t>* constants)
{
return init_optimized_poseidon2_constants<scalar_t>(width, mds_type, diffusion, ctx, constants);
return init_poseidon2_constants<scalar_t>(width, mds_type, diffusion, ctx, constants);
}
extern "C" cudaError_t CONCAT_EXPAND(FIELD, poseidon2_hash_cuda)(
scalar_t* input,
const scalar_t* input,
scalar_t* output,
int number_of_states,
int width,
@@ -60,4 +60,10 @@ namespace poseidon2 {
}
return CHK_LAST();
}
extern "C" cudaError_t CONCAT_EXPAND(FIELD, release_poseidon2_constants_cuda)(
Poseidon2Constants<scalar_t>* constants, device_context::DeviceContext& ctx)
{
return release_poseidon2_constants<scalar_t>(constants, ctx);
}
} // namespace poseidon2

View File

@@ -14,8 +14,7 @@ namespace poseidon2 {
case 7:
return S::sqr(result2) * result2 * element;
case 11:
S result8 = S::sqr(S::sqr(result2));
return result8 * result2 * element;
return S::sqr(S::sqr(result2)) * result2 * element;
}
}
@@ -177,8 +176,8 @@ namespace poseidon2 {
}
template <typename S, int T>
__global__ void
poseidon2_permutation_kernel(S* states, S* states_out, size_t number_of_states, const Poseidon2Constants<S> constants)
__global__ void poseidon2_permutation_kernel(
const S* states, S* states_out, size_t number_of_states, const Poseidon2Constants<S> constants)
{
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
if (idx >= number_of_states) { return; }
@@ -223,7 +222,7 @@ namespace poseidon2 {
// These function is just doing copy from the states to the output
template <typename S, int T>
__global__ void get_hash_results(S* states, size_t number_of_states, int index, S* out)
__global__ void get_hash_results(const S* states, size_t number_of_states, int index, S* out)
{
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
if (idx >= number_of_states) { return; }

View File

@@ -13,7 +13,11 @@ namespace poseidon2 {
template <typename S, int T>
cudaError_t permute_many(
S* states, S* states_out, size_t number_of_states, const Poseidon2Constants<S>& constants, cudaStream_t& stream)
const S* states,
S* states_out,
size_t number_of_states,
const Poseidon2Constants<S>& constants,
cudaStream_t& stream)
{
poseidon2_permutation_kernel<S, T>
<<<poseidon_number_of_blocks<S, T>(number_of_states), poseidon_block_size, 0, stream>>>(
@@ -24,7 +28,7 @@ namespace poseidon2 {
template <typename S, int T>
cudaError_t poseidon2_hash(
S* states,
const S* states,
S* output,
size_t number_of_states,
const Poseidon2Constants<S>& constants,
@@ -34,7 +38,7 @@ namespace poseidon2 {
cudaStream_t& stream = config.ctx.stream;
S* d_states;
if (config.are_states_on_device) {
d_states = states;
d_states = const_cast<S*>(states);
} else {
// allocate memory for {number_of_states} states of {t} scalars each
CHK_IF_RETURN(cudaMallocAsync(&d_states, number_of_states * T * sizeof(S), stream))

View File

@@ -27,8 +27,7 @@ int main(int argc, char* argv[])
START_TIMER(timer_const);
device_context::DeviceContext ctx = device_context::get_default_device_context();
Poseidon2Constants<scalar_t> constants;
init_optimized_poseidon2_constants<scalar_t>(
T, MdsType::DEFAULT_MDS, DiffusionStrategy::DEFAULT_DIFFUSION, ctx, &constants);
init_poseidon2_constants<scalar_t>(T, MdsType::DEFAULT_MDS, DiffusionStrategy::DEFAULT_DIFFUSION, ctx, &constants);
END_TIMER(timer_const, "Load poseidon constants");
START_TIMER(allocation_timer);

View File

@@ -21,21 +21,25 @@ CURVES_CONFIG = {
"ntt_ext.h",
],
"bls12_381": [
"poseidon2.h",
"field_ext.h",
"vec_ops_ext.h",
"ntt_ext.h",
],
"bls12_377": [
"poseidon2.h",
"field_ext.h",
"vec_ops_ext.h",
"ntt_ext.h",
],
"bw6_761": [
"poseidon2.h",
"field_ext.h",
"vec_ops_ext.h",
"ntt_ext.h",
],
"grumpkin": {
"poseidon2.h",
"curve_g2.h",
"msm_g2.h",
"ecntt.h",
@@ -56,6 +60,7 @@ FIELDS_CONFIG = {
},
"stark252": {
"poseidon.h",
"poseidon2.h",
"field_ext.h",
"vec_ops_ext.h",
"ntt_ext.h",
@@ -106,6 +111,8 @@ if __name__ == "__main__":
if any(header.name.startswith("poseidon") for header in headers):
includes.append('#include "poseidon/poseidon.cuh"')
includes.append('#include "poseidon/tree/merkle.cuh"')
if any(header.name.startswith("poseidon2") for header in headers):
includes.append('#include "poseidon2/poseidon2.cuh"')
contents = WARN_TEXT + INCLUDE_ONCE.format(curve.upper()) + "\n".join(includes) + "\n\n"
for header in headers:
@@ -138,6 +145,8 @@ if __name__ == "__main__":
if any(header.name.startswith("poseidon") for header in headers):
includes.append('#include "poseidon/poseidon.cuh"')
includes.append('#include "poseidon/tree/merkle.cuh"')
if any(header.name.startswith("poseidon2") for header in headers):
includes.append('#include "poseidon2/poseidon2.cuh"')
contents = WARN_TEXT + INCLUDE_ONCE.format(field.upper()) + "\n".join(includes) + "\n\n"
for header in headers:

View File

@@ -74,9 +74,6 @@ pub struct Poseidon2Config<'a> {
pub output_index: u32,
/// If true, hash results will also be copied in the input pointer in aligned format
pub loop_state: bool,
/// Whether to run Poseidon asynchronously. If set to `true`, Poseidon will be non-blocking
/// and you'd need to synchronize it explicitly by running `cudaStreamSynchronize` or `cudaDeviceSynchronize`.
/// If set to `false`, Poseidon will block the current CPU thread.
@@ -97,14 +94,13 @@ impl<'a> Poseidon2Config<'a> {
are_outputs_on_device: false,
mode: PoseidonMode::Compression,
output_index: 1,
loop_state: false,
is_async: false,
}
}
}
pub trait Poseidon2<F: FieldImpl> {
fn create_optimized_constants<'a>(
fn create_constants<'a>(
width: u32,
alpha: u32,
internal_rounds: u32,
@@ -115,24 +111,32 @@ pub trait Poseidon2<F: FieldImpl> {
diffusion: DiffusionStrategy,
ctx: &DeviceContext,
) -> IcicleResult<Poseidon2Constants<'a, F>>;
fn load_optimized_constants<'a>(
fn load_constants<'a>(
width: u32,
mds_type: MdsType,
diffusion: DiffusionStrategy,
ctx: &DeviceContext,
) -> IcicleResult<Poseidon2Constants<'a, F>>;
fn poseidon_unchecked(
states: &mut (impl HostOrDeviceSlice<F> + ?Sized),
states: &(impl HostOrDeviceSlice<F> + ?Sized),
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
number_of_states: u32,
width: u32,
constants: &Poseidon2Constants<F>,
config: &Poseidon2Config,
) -> IcicleResult<()>;
fn poseidon_unchecked_inplace(
states: &mut (impl HostOrDeviceSlice<F> + ?Sized),
number_of_states: u32,
width: u32,
constants: &Poseidon2Constants<F>,
config: &Poseidon2Config,
) -> IcicleResult<()>;
fn release_constants(constants: &Poseidon2Constants<F>, ctx: &DeviceContext) -> IcicleResult<()>;
}
/// Loads pre-calculated poseidon constants on the GPU.
pub fn load_optimized_poseidon2_constants<'a, F>(
pub fn load_poseidon2_constants<'a, F>(
width: u32,
mds_type: MdsType,
diffusion: DiffusionStrategy,
@@ -142,11 +146,11 @@ where
F: FieldImpl,
<F as FieldImpl>::Config: Poseidon2<F>,
{
<<F as FieldImpl>::Config as Poseidon2<F>>::load_optimized_constants(width, mds_type, diffusion, ctx)
<<F as FieldImpl>::Config as Poseidon2<F>>::load_constants(width, mds_type, diffusion, ctx)
}
/// Creates new instance of poseidon constants on the GPU.
pub fn create_optimized_poseidon2_constants<'a, F>(
pub fn create_poseidon2_constants<'a, F>(
width: u32,
alpha: u32,
ctx: &DeviceContext,
@@ -161,7 +165,7 @@ where
F: FieldImpl,
<F as FieldImpl>::Config: Poseidon2<F>,
{
<<F as FieldImpl>::Config as Poseidon2<F>>::create_optimized_constants(
<<F as FieldImpl>::Config as Poseidon2<F>>::create_constants(
width,
alpha,
internal_rounds,
@@ -174,30 +178,13 @@ where
)
}
/// Computes the poseidon hashes for multiple preimages.
///
/// # Arguments
///
/// * `input` - a pointer to the input data. May point to a vector of preimages or a vector of states filled with preimages.
///
/// * `output` - a pointer to the output data. Must be at least of size [number_of_states](number_of_states)
///
/// * `number_of_states` - number of input blocks of size `arity`
///
/// * `arity` - the arity of the hash function (the size of 1 preimage)
///
/// * `constants` - Poseidon constants.
///
/// * `config` - config used to specify extra arguments of the Poseidon.
pub fn poseidon_hash_many<F>(
states: &mut (impl HostOrDeviceSlice<F> + ?Sized),
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
fn poseidon_checks<F>(
states: &(impl HostOrDeviceSlice<F> + ?Sized),
output: &(impl HostOrDeviceSlice<F> + ?Sized),
number_of_states: u32,
width: u32,
constants: &Poseidon2Constants<F>,
config: &Poseidon2Config,
) -> IcicleResult<()>
where
) where
F: FieldImpl,
<F as FieldImpl>::Config: Poseidon2<F>,
{
@@ -208,7 +195,6 @@ where
number_of_states * width
);
}
if output.len() < number_of_states as usize {
panic!(
"output len is {}; but needs to be at least {}",
@@ -226,6 +212,7 @@ where
"Device ids in input and context are different"
);
}
if let Some(device_id) = output.device_id() {
assert_eq!(
device_id, ctx_device_id,
@@ -233,6 +220,36 @@ where
);
}
check_device(ctx_device_id);
}
/// Computes the poseidon hashes for multiple preimages.
///
/// # Arguments
///
/// * `input` - a pointer to the input data. May point to a vector of preimages or a vector of states filled with preimages.
///
/// * `output` - a pointer to the output data. Must be at least of size [number_of_states](number_of_states)
///
/// * `number_of_states` - number of input blocks of size `arity`
///
/// * `arity` - the arity of the hash function (the size of 1 preimage)
///
/// * `constants` - Poseidon constants.
///
/// * `config` - config used to specify extra arguments of the Poseidon.
pub fn poseidon2_hash_many<F>(
states: &(impl HostOrDeviceSlice<F> + ?Sized),
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
number_of_states: u32,
width: u32,
constants: &Poseidon2Constants<F>,
config: &Poseidon2Config,
) -> IcicleResult<()>
where
F: FieldImpl,
<F as FieldImpl>::Config: Poseidon2<F>,
{
poseidon_checks(states, output, number_of_states, width, config);
let mut local_cfg = config.clone();
local_cfg.are_states_on_device = states.is_on_device();
local_cfg.are_outputs_on_device = output.is_on_device();
@@ -247,6 +264,39 @@ where
)
}
pub fn poseidon2_hash_many_inplace<F>(
states: &mut (impl HostOrDeviceSlice<F> + ?Sized),
number_of_states: u32,
width: u32,
constants: &Poseidon2Constants<F>,
config: &Poseidon2Config,
) -> IcicleResult<()>
where
F: FieldImpl,
<F as FieldImpl>::Config: Poseidon2<F>,
{
poseidon_checks(states, states, number_of_states, width, config);
let mut local_cfg = config.clone();
local_cfg.are_states_on_device = states.is_on_device();
local_cfg.are_outputs_on_device = states.is_on_device();
<<F as FieldImpl>::Config as Poseidon2<F>>::poseidon_unchecked_inplace(
states,
number_of_states,
width,
constants,
&local_cfg,
)
}
pub fn release_poseidon2_constants<'a, F>(constants: &Poseidon2Constants<F>, ctx: &DeviceContext) -> IcicleResult<()>
where
F: FieldImpl,
<F as FieldImpl>::Config: Poseidon2<F>,
{
<<F as FieldImpl>::Config as Poseidon2<F>>::release_constants(constants, ctx)
}
#[macro_export]
macro_rules! impl_poseidon2 {
(
@@ -261,8 +311,8 @@ macro_rules! impl_poseidon2 {
Poseidon2Constants,
};
extern "C" {
#[link_name = concat!($field_prefix, "_create_optimized_poseidon2_constants_cuda")]
pub(crate) fn _create_optimized_constants(
#[link_name = concat!($field_prefix, "_create_poseidon2_constants_cuda")]
pub(crate) fn _create_constants(
width: u32,
alpha: u32,
internal_rounds: u32,
@@ -275,8 +325,8 @@ macro_rules! impl_poseidon2 {
poseidon_constants: *mut Poseidon2Constants<$field>,
) -> CudaError;
#[link_name = concat!($field_prefix, "_init_optimized_poseidon2_constants_cuda")]
pub(crate) fn _load_optimized_constants(
#[link_name = concat!($field_prefix, "_init_poseidon2_constants_cuda")]
pub(crate) fn _load_constants(
width: u32,
mds_type: MdsType,
diffusion: DiffusionStrategy,
@@ -284,9 +334,15 @@ macro_rules! impl_poseidon2 {
constants: *mut Poseidon2Constants<$field>,
) -> CudaError;
#[link_name = concat!($field_prefix, "_release_poseidon2_constants_cuda")]
pub(crate) fn _release_constants(
constants: &Poseidon2Constants<$field>,
ctx: &DeviceContext,
) -> CudaError;
#[link_name = concat!($field_prefix, "_poseidon2_hash_cuda")]
pub(crate) fn hash_many(
states: *mut $field,
states: *const $field,
output: *mut $field,
number_of_states: u32,
width: u32,
@@ -297,7 +353,7 @@ macro_rules! impl_poseidon2 {
}
impl Poseidon2<$field> for $field_config {
fn create_optimized_constants<'a>(
fn create_constants<'a>(
width: u32,
alpha: u32,
internal_rounds: u32,
@@ -310,7 +366,7 @@ macro_rules! impl_poseidon2 {
) -> IcicleResult<Poseidon2Constants<'a, $field>> {
unsafe {
let mut poseidon_constants = MaybeUninit::<Poseidon2Constants<'a, $field>>::uninit();
let err = $field_prefix_ident::_create_optimized_constants(
let err = $field_prefix_ident::_create_constants(
width,
alpha,
internal_rounds,
@@ -327,7 +383,7 @@ macro_rules! impl_poseidon2 {
}
}
fn load_optimized_constants<'a>(
fn load_constants<'a>(
width: u32,
mds_type: MdsType,
diffusion: DiffusionStrategy,
@@ -335,20 +391,15 @@ macro_rules! impl_poseidon2 {
) -> IcicleResult<Poseidon2Constants<'a, $field>> {
unsafe {
let mut constants = MaybeUninit::<Poseidon2Constants<'a, $field>>::uninit();
let err = $field_prefix_ident::_load_optimized_constants(
width,
mds_type,
diffusion,
ctx,
constants.as_mut_ptr(),
)
.wrap();
let err =
$field_prefix_ident::_load_constants(width, mds_type, diffusion, ctx, constants.as_mut_ptr())
.wrap();
err.and(Ok(constants.assume_init()))
}
}
fn poseidon_unchecked(
states: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
states: &(impl HostOrDeviceSlice<$field> + ?Sized),
output: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
number_of_states: u32,
width: u32,
@@ -357,7 +408,7 @@ macro_rules! impl_poseidon2 {
) -> IcicleResult<()> {
unsafe {
$field_prefix_ident::hash_many(
states.as_mut_ptr(),
states.as_ptr(),
output.as_mut_ptr(),
number_of_states,
width,
@@ -367,6 +418,30 @@ macro_rules! impl_poseidon2 {
.wrap()
}
}
fn poseidon_unchecked_inplace(
states: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
number_of_states: u32,
width: u32,
constants: &Poseidon2Constants<$field>,
config: &Poseidon2Config,
) -> IcicleResult<()> {
unsafe {
$field_prefix_ident::hash_many(
states.as_ptr(),
states.as_mut_ptr(),
number_of_states,
width,
constants,
config,
)
.wrap()
}
}
fn release_constants<'a>(constants: &Poseidon2Constants<$field>, ctx: &DeviceContext) -> IcicleResult<()> {
unsafe { $field_prefix_ident::_release_constants(constants, ctx).wrap() }
}
}
};
}
@@ -382,3 +457,124 @@ macro_rules! impl_poseidon2_tests {
}
};
}
pub mod bench {
use criterion::{black_box, Criterion};
use icicle_cuda_runtime::{
device_context::DeviceContext,
memory::{HostOrDeviceSlice, HostSlice},
};
use crate::{
ntt::FieldImpl,
poseidon2::{load_poseidon2_constants, DiffusionStrategy, MdsType},
traits::GenerateRandom,
vec_ops::VecOps,
};
use super::{poseidon2_hash_many, Poseidon2, Poseidon2Config, Poseidon2Constants};
#[allow(unused)]
fn poseidon2_for_bench<'a, F: FieldImpl>(
states: &(impl HostOrDeviceSlice<F> + ?Sized),
poseidon2_result: &mut (impl HostOrDeviceSlice<F> + ?Sized),
number_of_states: usize,
width: usize,
constants: &Poseidon2Constants<'a, F>,
config: &Poseidon2Config,
_seed: u32,
) where
<F as FieldImpl>::Config: Poseidon2<F> + GenerateRandom<F>,
<F as FieldImpl>::Config: VecOps<F>,
{
poseidon2_hash_many(
states,
poseidon2_result,
number_of_states as u32,
width as u32,
constants,
config,
)
.unwrap();
}
#[allow(unused)]
pub fn benchmark_poseidon2<F: FieldImpl>(c: &mut Criterion)
where
<F as FieldImpl>::Config: Poseidon2<F> + GenerateRandom<F>,
<F as FieldImpl>::Config: VecOps<F>,
{
use criterion::SamplingMode;
use std::env;
let group_id = format!("Poseidon2");
let mut group = c.benchmark_group(&group_id);
group.sampling_mode(SamplingMode::Flat);
group.sample_size(10);
const MAX_LOG2: u32 = 25; // max length = 2 ^ MAX_LOG2
let max_log2 = env::var("MAX_LOG2")
.unwrap_or_else(|_| MAX_LOG2.to_string())
.parse::<u32>()
.unwrap_or(MAX_LOG2);
for test_size_log2 in 13u32..max_log2 + 1 {
for t in [2, 3, 4, 8, 16, 20, 24] {
let number_of_states = 1 << test_size_log2;
let full_size = t * number_of_states;
let scalars = F::Config::generate_random(full_size);
let input = HostSlice::from_slice(&scalars);
let mut permutation_result = vec![F::zero(); full_size];
let permutation_result_slice = HostSlice::from_mut_slice(&mut permutation_result);
let ctx = DeviceContext::default();
let config = Poseidon2Config::default();
for mds in [MdsType::Default, MdsType::Plonky] {
for diffusion in [DiffusionStrategy::Default, DiffusionStrategy::Montgomery] {
let constants =
load_poseidon2_constants(t as u32, mds.clone(), diffusion.clone(), &ctx).unwrap();
let bench_descr = format!(
"Mds::{:?}; Diffusion::{:?}; Number of states: {}; Width: {}",
mds, diffusion, number_of_states, t
);
group.bench_function(&bench_descr, |b| {
b.iter(|| {
poseidon2_for_bench::<F>(
input,
permutation_result_slice,
number_of_states,
t,
&constants,
&config,
black_box(1),
)
})
});
// }
}
}
}
}
group.finish();
}
}
#[macro_export]
macro_rules! impl_poseidon2_bench {
(
$field_prefix:literal,
$field:ident
) => {
use criterion::criterion_group;
use criterion::criterion_main;
use icicle_core::poseidon2::bench::benchmark_poseidon2;
criterion_group!(benches, benchmark_poseidon2<$field>);
criterion_main!(benches);
};
}

View File

@@ -4,8 +4,7 @@ use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::memory::{HostOrDeviceSlice, HostSlice};
use super::{
load_optimized_poseidon2_constants, poseidon_hash_many, DiffusionStrategy, Poseidon2, Poseidon2Config,
Poseidon2Constants,
load_poseidon2_constants, poseidon2_hash_many, DiffusionStrategy, Poseidon2, Poseidon2Config, Poseidon2Constants,
};
pub fn init_poseidon<'a, F: FieldImpl>(
@@ -17,7 +16,7 @@ where
<F as FieldImpl>::Config: Poseidon2<F>,
{
let ctx = DeviceContext::default();
load_optimized_poseidon2_constants::<F>(width, mds_type, diffusion, &ctx).unwrap()
load_poseidon2_constants::<F>(width, mds_type, diffusion, &ctx).unwrap()
}
fn _check_poseidon_hash_many<F: FieldImpl>(width: u32, constants: Poseidon2Constants<F>) -> (F, F)
@@ -32,7 +31,7 @@ where
let output_slice = HostSlice::from_mut_slice(&mut outputs);
let config = Poseidon2Config::default();
poseidon_hash_many::<F>(
poseidon2_hash_many::<F>(
input_slice,
output_slice,
test_size as u32,
@@ -86,7 +85,7 @@ where
let mut config = Poseidon2Config::default();
config.mode = PoseidonMode::Permutation;
poseidon_hash_many::<F>(
poseidon2_hash_many::<F>(
input_slice,
output_slice,
batch_size as u32,

View File

@@ -0,0 +1,5 @@
use icicle_bn254::curve::ScalarField;
use icicle_core::impl_poseidon2_bench;
impl_poseidon2_bench!("bn254", ScalarField);

View File

@@ -15,6 +15,7 @@ icicle-cuda-runtime = { workspace = true }
cmake = "0.1.50"
[dev-dependencies]
criterion = "0.3"
risc0-core = "0.21.0"
risc0-zkp = "0.21.0"
p3-baby-bear = { git = "https://github.com/Plonky3/Plonky3", rev = "1e87146ebfaedc2150b635b10a096b733795fdce" }
@@ -29,3 +30,7 @@ serial_test = "3.0.0"
[features]
default = []
devmode = ["icicle-core/devmode"]
[[bench]]
name = "poseidon2"
harness = false

View File

@@ -0,0 +1,5 @@
use icicle_babybear::field::ScalarField;
use icicle_core::impl_poseidon2_bench;
impl_poseidon2_bench!("babybear", ScalarField);

View File

@@ -16,7 +16,7 @@ impl_poseidon2!("babybear", babybear, ScalarField, ScalarCfg);
pub(crate) mod tests {
use crate::field::ScalarField;
use icicle_core::impl_poseidon2_tests;
use icicle_core::poseidon2::{create_optimized_poseidon2_constants, tests::*, DiffusionStrategy, MdsType};
use icicle_core::poseidon2::{create_poseidon2_constants, tests::*, DiffusionStrategy, MdsType};
use icicle_core::traits::FieldImpl;
use icicle_cuda_runtime::device_context::DeviceContext;
@@ -291,7 +291,7 @@ pub(crate) mod tests {
ScalarField::from_u32(1 << 13),
ScalarField::from_u32(1 << 15),
];
let mut constants = create_optimized_poseidon2_constants(
let constants = create_poseidon2_constants(
WIDTH as u32,
ALPHA as u32,
&ctx,
@@ -616,7 +616,7 @@ pub(crate) mod tests {
ScalarField::from_u32(1 << 22),
ScalarField::from_u32(1 << 23),
];
let mut constants = create_optimized_poseidon2_constants(
let constants = create_poseidon2_constants(
WIDTH as u32,
ALPHA as u32,
&ctx,