mirror of
https://github.com/pseXperiments/icicle.git
synced 2026-01-09 13:07:59 -05:00
Feat/roman/poseidon2 (#510)
# This PR 1. Adds C++ API 2. Renames a lot of API functions 3. Adds inplace poseidon2 4. Makes input const at all poseidon functions 5. Adds benchmark for poseidon2
This commit is contained in:
@@ -12,10 +12,44 @@
|
||||
#include "fields/stark_fields/babybear.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
#include "poseidon2/poseidon2.cuh"
|
||||
|
||||
extern "C" cudaError_t babybear_extension_ntt_cuda(
|
||||
const babybear::extension_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<babybear::scalar_t>& config, babybear::extension_t* output);
|
||||
|
||||
extern "C" cudaError_t babybear_create_poseidon2_constants_cuda(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
const babybear::scalar_t* round_constants,
|
||||
const babybear::scalar_t* internal_matrix_diag,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<babybear::scalar_t>* poseidon_constants);
|
||||
|
||||
extern "C" cudaError_t babybear_init_poseidon2_constants_cuda(
|
||||
int width,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<babybear::scalar_t>* poseidon_constants);
|
||||
|
||||
extern "C" cudaError_t babybear_poseidon2_hash_cuda(
|
||||
const babybear::scalar_t* input,
|
||||
babybear::scalar_t* output,
|
||||
int number_of_states,
|
||||
int width,
|
||||
const poseidon2::Poseidon2Constants<babybear::scalar_t>& constants,
|
||||
poseidon2::Poseidon2Config& config);
|
||||
|
||||
extern "C" cudaError_t babybear_release_poseidon2_constants_cuda(
|
||||
poseidon2::Poseidon2Constants<babybear::scalar_t>* constants,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t babybear_mul_cuda(
|
||||
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::scalar_t* result);
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
#include "poseidon2/poseidon2.cuh"
|
||||
|
||||
extern "C" cudaError_t bn254_g2_precompute_msm_bases_cuda(
|
||||
bn254::g2_affine_t* bases,
|
||||
@@ -71,6 +72,37 @@ extern "C" cudaError_t bn254_affine_convert_montgomery(
|
||||
extern "C" cudaError_t bn254_projective_convert_montgomery(
|
||||
bn254::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_create_poseidon2_constants_cuda(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
const bn254::scalar_t* round_constants,
|
||||
const bn254::scalar_t* internal_matrix_diag,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<bn254::scalar_t>* poseidon_constants);
|
||||
|
||||
extern "C" cudaError_t bn254_init_poseidon2_constants_cuda(
|
||||
int width,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<bn254::scalar_t>* poseidon_constants);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon2_hash_cuda(
|
||||
const bn254::scalar_t* input,
|
||||
bn254::scalar_t* output,
|
||||
int number_of_states,
|
||||
int width,
|
||||
const poseidon2::Poseidon2Constants<bn254::scalar_t>& constants,
|
||||
poseidon2::Poseidon2Config& config);
|
||||
|
||||
extern "C" cudaError_t bn254_release_poseidon2_constants_cuda(
|
||||
poseidon2::Poseidon2Constants<bn254::scalar_t>* constants,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
|
||||
30
icicle/include/api/templates/fields/poseidon2.h
Normal file
30
icicle/include/api/templates/fields/poseidon2.h
Normal file
@@ -0,0 +1,30 @@
|
||||
extern "C" cudaError_t ${FIELD}_create_poseidon2_constants_cuda(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
const ${FIELD}::scalar_t* round_constants,
|
||||
const ${FIELD}::scalar_t* internal_matrix_diag,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<${FIELD}::scalar_t>* poseidon_constants);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_init_poseidon2_constants_cuda(
|
||||
int width,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<${FIELD}::scalar_t>* poseidon_constants);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_poseidon2_hash_cuda(
|
||||
const ${FIELD}::scalar_t* input,
|
||||
${FIELD}::scalar_t* output,
|
||||
int number_of_states,
|
||||
int width,
|
||||
const poseidon2::Poseidon2Constants<${FIELD}::scalar_t>& constants,
|
||||
poseidon2::Poseidon2Config& config);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_release_poseidon2_constants_cuda(
|
||||
poseidon2::Poseidon2Constants<${FIELD}::scalar_t>* constants,
|
||||
device_context::DeviceContext& ctx);
|
||||
@@ -61,7 +61,6 @@ namespace poseidon2 {
|
||||
bool are_outputs_on_device; /**< If true, output is preserved on device, otherwise on host. Default value: false. */
|
||||
PoseidonMode mode;
|
||||
int output_index;
|
||||
bool loop_state; /**< If true, hash results will also be copied in the input pointer in aligned format */
|
||||
bool
|
||||
is_async; /**< Whether to run the Poseidon2 asynchronously. If set to `true`, the poseidon_hash function will be
|
||||
* non-blocking and you'd need to synchronize it explicitly by running
|
||||
@@ -78,14 +77,13 @@ namespace poseidon2 {
|
||||
false, // are_outputs_on_device
|
||||
PoseidonMode::COMPRESSION,
|
||||
1, // output_index
|
||||
false, // loop_state
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
cudaError_t create_optimized_poseidon2_constants(
|
||||
cudaError_t create_poseidon2_constants(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
@@ -101,13 +99,16 @@ namespace poseidon2 {
|
||||
* Loads pre-calculated optimized constants, moves them to the device
|
||||
*/
|
||||
template <typename S>
|
||||
cudaError_t init_optimized_poseidon2_constants(
|
||||
cudaError_t init_poseidon2_constants(
|
||||
int width,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<S>* constants);
|
||||
|
||||
template <typename S>
|
||||
cudaError_t release_poseidon2_constants(Poseidon2Constants<S>* constants, device_context::DeviceContext& ctx);
|
||||
|
||||
/**
|
||||
* Compute the poseidon hash over a sequence of preimages.
|
||||
* Takes {number_of_states * (T-1)} elements of input and computes {number_of_states} hash images
|
||||
@@ -120,7 +121,7 @@ namespace poseidon2 {
|
||||
*/
|
||||
template <typename S, int T>
|
||||
cudaError_t poseidon2_hash(
|
||||
S* states,
|
||||
const S* states,
|
||||
S* output,
|
||||
size_t number_of_states,
|
||||
const Poseidon2Constants<S>& constants,
|
||||
|
||||
@@ -24,7 +24,7 @@ using namespace poseidon2_constants_babybear;
|
||||
|
||||
namespace poseidon2 {
|
||||
template <typename S>
|
||||
cudaError_t create_optimized_poseidon2_constants(
|
||||
cudaError_t create_poseidon2_constants(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
@@ -71,7 +71,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
cudaError_t init_optimized_poseidon2_constants(
|
||||
cudaError_t init_poseidon2_constants(
|
||||
int width,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
@@ -105,16 +105,31 @@ namespace poseidon2 {
|
||||
P2_CONSTANTS_DEF(24)
|
||||
default:
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument,
|
||||
"init_optimized_poseidon2_constants: #width must be one of [2, 3, 4, 8, 12, 16, 20, 24]");
|
||||
IcicleError_t::InvalidArgument, "init_poseidon2_constants: #width must be one of [2, 3, 4, 8, 12, 16, 20, 24]");
|
||||
}
|
||||
S* h_round_constants = reinterpret_cast<S*>(round_constants);
|
||||
S* h_internal_matrix = reinterpret_cast<S*>(internal_matrix);
|
||||
|
||||
create_optimized_poseidon2_constants(
|
||||
create_poseidon2_constants(
|
||||
width, alpha, internal_rounds, external_rounds, h_round_constants, h_internal_matrix, mds_type, diffusion, ctx,
|
||||
poseidon2_constants);
|
||||
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
cudaError_t release_poseidon2_constants(Poseidon2Constants<S>* constants, device_context::DeviceContext& ctx)
|
||||
{
|
||||
CHK_INIT_IF_RETURN();
|
||||
CHK_IF_RETURN(cudaFreeAsync(constants->round_constants, ctx.stream));
|
||||
CHK_IF_RETURN(cudaFreeAsync(constants->internal_matrix_diag, ctx.stream));
|
||||
|
||||
constants->alpha = 0;
|
||||
constants->width = 0;
|
||||
constants->external_rounds = 0;
|
||||
constants->internal_rounds = 0;
|
||||
constants->round_constants = nullptr;
|
||||
constants->internal_matrix_diag = nullptr;
|
||||
return CHK_LAST();
|
||||
}
|
||||
} // namespace poseidon2
|
||||
@@ -6,7 +6,7 @@ using namespace field_config;
|
||||
#include "poseidon.cu"
|
||||
|
||||
namespace poseidon2 {
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, create_optimized_poseidon2_constants_cuda)(
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, create_poseidon2_constants_cuda)(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
@@ -18,23 +18,23 @@ namespace poseidon2 {
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<scalar_t>* poseidon_constants)
|
||||
{
|
||||
return create_optimized_poseidon2_constants<scalar_t>(
|
||||
return create_poseidon2_constants<scalar_t>(
|
||||
width, alpha, internal_rounds, external_rounds, round_constants, internal_matrix_diag, mds_type, diffusion, ctx,
|
||||
poseidon_constants);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, init_optimized_poseidon2_constants_cuda)(
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, init_poseidon2_constants_cuda)(
|
||||
int width,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<scalar_t>* constants)
|
||||
{
|
||||
return init_optimized_poseidon2_constants<scalar_t>(width, mds_type, diffusion, ctx, constants);
|
||||
return init_poseidon2_constants<scalar_t>(width, mds_type, diffusion, ctx, constants);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, poseidon2_hash_cuda)(
|
||||
scalar_t* input,
|
||||
const scalar_t* input,
|
||||
scalar_t* output,
|
||||
int number_of_states,
|
||||
int width,
|
||||
@@ -60,4 +60,10 @@ namespace poseidon2 {
|
||||
}
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, release_poseidon2_constants_cuda)(
|
||||
Poseidon2Constants<scalar_t>* constants, device_context::DeviceContext& ctx)
|
||||
{
|
||||
return release_poseidon2_constants<scalar_t>(constants, ctx);
|
||||
}
|
||||
} // namespace poseidon2
|
||||
@@ -14,8 +14,7 @@ namespace poseidon2 {
|
||||
case 7:
|
||||
return S::sqr(result2) * result2 * element;
|
||||
case 11:
|
||||
S result8 = S::sqr(S::sqr(result2));
|
||||
return result8 * result2 * element;
|
||||
return S::sqr(S::sqr(result2)) * result2 * element;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -177,8 +176,8 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void
|
||||
poseidon2_permutation_kernel(S* states, S* states_out, size_t number_of_states, const Poseidon2Constants<S> constants)
|
||||
__global__ void poseidon2_permutation_kernel(
|
||||
const S* states, S* states_out, size_t number_of_states, const Poseidon2Constants<S> constants)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
@@ -223,7 +222,7 @@ namespace poseidon2 {
|
||||
|
||||
// These function is just doing copy from the states to the output
|
||||
template <typename S, int T>
|
||||
__global__ void get_hash_results(S* states, size_t number_of_states, int index, S* out)
|
||||
__global__ void get_hash_results(const S* states, size_t number_of_states, int index, S* out)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
|
||||
@@ -13,7 +13,11 @@ namespace poseidon2 {
|
||||
|
||||
template <typename S, int T>
|
||||
cudaError_t permute_many(
|
||||
S* states, S* states_out, size_t number_of_states, const Poseidon2Constants<S>& constants, cudaStream_t& stream)
|
||||
const S* states,
|
||||
S* states_out,
|
||||
size_t number_of_states,
|
||||
const Poseidon2Constants<S>& constants,
|
||||
cudaStream_t& stream)
|
||||
{
|
||||
poseidon2_permutation_kernel<S, T>
|
||||
<<<poseidon_number_of_blocks<S, T>(number_of_states), poseidon_block_size, 0, stream>>>(
|
||||
@@ -24,7 +28,7 @@ namespace poseidon2 {
|
||||
|
||||
template <typename S, int T>
|
||||
cudaError_t poseidon2_hash(
|
||||
S* states,
|
||||
const S* states,
|
||||
S* output,
|
||||
size_t number_of_states,
|
||||
const Poseidon2Constants<S>& constants,
|
||||
@@ -34,7 +38,7 @@ namespace poseidon2 {
|
||||
cudaStream_t& stream = config.ctx.stream;
|
||||
S* d_states;
|
||||
if (config.are_states_on_device) {
|
||||
d_states = states;
|
||||
d_states = const_cast<S*>(states);
|
||||
} else {
|
||||
// allocate memory for {number_of_states} states of {t} scalars each
|
||||
CHK_IF_RETURN(cudaMallocAsync(&d_states, number_of_states * T * sizeof(S), stream))
|
||||
|
||||
@@ -27,8 +27,7 @@ int main(int argc, char* argv[])
|
||||
START_TIMER(timer_const);
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
Poseidon2Constants<scalar_t> constants;
|
||||
init_optimized_poseidon2_constants<scalar_t>(
|
||||
T, MdsType::DEFAULT_MDS, DiffusionStrategy::DEFAULT_DIFFUSION, ctx, &constants);
|
||||
init_poseidon2_constants<scalar_t>(T, MdsType::DEFAULT_MDS, DiffusionStrategy::DEFAULT_DIFFUSION, ctx, &constants);
|
||||
END_TIMER(timer_const, "Load poseidon constants");
|
||||
|
||||
START_TIMER(allocation_timer);
|
||||
|
||||
@@ -21,21 +21,25 @@ CURVES_CONFIG = {
|
||||
"ntt_ext.h",
|
||||
],
|
||||
"bls12_381": [
|
||||
"poseidon2.h",
|
||||
"field_ext.h",
|
||||
"vec_ops_ext.h",
|
||||
"ntt_ext.h",
|
||||
],
|
||||
"bls12_377": [
|
||||
"poseidon2.h",
|
||||
"field_ext.h",
|
||||
"vec_ops_ext.h",
|
||||
"ntt_ext.h",
|
||||
],
|
||||
"bw6_761": [
|
||||
"poseidon2.h",
|
||||
"field_ext.h",
|
||||
"vec_ops_ext.h",
|
||||
"ntt_ext.h",
|
||||
],
|
||||
"grumpkin": {
|
||||
"poseidon2.h",
|
||||
"curve_g2.h",
|
||||
"msm_g2.h",
|
||||
"ecntt.h",
|
||||
@@ -56,6 +60,7 @@ FIELDS_CONFIG = {
|
||||
},
|
||||
"stark252": {
|
||||
"poseidon.h",
|
||||
"poseidon2.h",
|
||||
"field_ext.h",
|
||||
"vec_ops_ext.h",
|
||||
"ntt_ext.h",
|
||||
@@ -106,6 +111,8 @@ if __name__ == "__main__":
|
||||
if any(header.name.startswith("poseidon") for header in headers):
|
||||
includes.append('#include "poseidon/poseidon.cuh"')
|
||||
includes.append('#include "poseidon/tree/merkle.cuh"')
|
||||
if any(header.name.startswith("poseidon2") for header in headers):
|
||||
includes.append('#include "poseidon2/poseidon2.cuh"')
|
||||
|
||||
contents = WARN_TEXT + INCLUDE_ONCE.format(curve.upper()) + "\n".join(includes) + "\n\n"
|
||||
for header in headers:
|
||||
@@ -138,6 +145,8 @@ if __name__ == "__main__":
|
||||
if any(header.name.startswith("poseidon") for header in headers):
|
||||
includes.append('#include "poseidon/poseidon.cuh"')
|
||||
includes.append('#include "poseidon/tree/merkle.cuh"')
|
||||
if any(header.name.startswith("poseidon2") for header in headers):
|
||||
includes.append('#include "poseidon2/poseidon2.cuh"')
|
||||
|
||||
contents = WARN_TEXT + INCLUDE_ONCE.format(field.upper()) + "\n".join(includes) + "\n\n"
|
||||
for header in headers:
|
||||
|
||||
@@ -74,9 +74,6 @@ pub struct Poseidon2Config<'a> {
|
||||
|
||||
pub output_index: u32,
|
||||
|
||||
/// If true, hash results will also be copied in the input pointer in aligned format
|
||||
pub loop_state: bool,
|
||||
|
||||
/// Whether to run Poseidon asynchronously. If set to `true`, Poseidon will be non-blocking
|
||||
/// and you'd need to synchronize it explicitly by running `cudaStreamSynchronize` or `cudaDeviceSynchronize`.
|
||||
/// If set to `false`, Poseidon will block the current CPU thread.
|
||||
@@ -97,14 +94,13 @@ impl<'a> Poseidon2Config<'a> {
|
||||
are_outputs_on_device: false,
|
||||
mode: PoseidonMode::Compression,
|
||||
output_index: 1,
|
||||
loop_state: false,
|
||||
is_async: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Poseidon2<F: FieldImpl> {
|
||||
fn create_optimized_constants<'a>(
|
||||
fn create_constants<'a>(
|
||||
width: u32,
|
||||
alpha: u32,
|
||||
internal_rounds: u32,
|
||||
@@ -115,24 +111,32 @@ pub trait Poseidon2<F: FieldImpl> {
|
||||
diffusion: DiffusionStrategy,
|
||||
ctx: &DeviceContext,
|
||||
) -> IcicleResult<Poseidon2Constants<'a, F>>;
|
||||
fn load_optimized_constants<'a>(
|
||||
fn load_constants<'a>(
|
||||
width: u32,
|
||||
mds_type: MdsType,
|
||||
diffusion: DiffusionStrategy,
|
||||
ctx: &DeviceContext,
|
||||
) -> IcicleResult<Poseidon2Constants<'a, F>>;
|
||||
fn poseidon_unchecked(
|
||||
states: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
states: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
number_of_states: u32,
|
||||
width: u32,
|
||||
constants: &Poseidon2Constants<F>,
|
||||
config: &Poseidon2Config,
|
||||
) -> IcicleResult<()>;
|
||||
fn poseidon_unchecked_inplace(
|
||||
states: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
number_of_states: u32,
|
||||
width: u32,
|
||||
constants: &Poseidon2Constants<F>,
|
||||
config: &Poseidon2Config,
|
||||
) -> IcicleResult<()>;
|
||||
fn release_constants(constants: &Poseidon2Constants<F>, ctx: &DeviceContext) -> IcicleResult<()>;
|
||||
}
|
||||
|
||||
/// Loads pre-calculated poseidon constants on the GPU.
|
||||
pub fn load_optimized_poseidon2_constants<'a, F>(
|
||||
pub fn load_poseidon2_constants<'a, F>(
|
||||
width: u32,
|
||||
mds_type: MdsType,
|
||||
diffusion: DiffusionStrategy,
|
||||
@@ -142,11 +146,11 @@ where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: Poseidon2<F>,
|
||||
{
|
||||
<<F as FieldImpl>::Config as Poseidon2<F>>::load_optimized_constants(width, mds_type, diffusion, ctx)
|
||||
<<F as FieldImpl>::Config as Poseidon2<F>>::load_constants(width, mds_type, diffusion, ctx)
|
||||
}
|
||||
|
||||
/// Creates new instance of poseidon constants on the GPU.
|
||||
pub fn create_optimized_poseidon2_constants<'a, F>(
|
||||
pub fn create_poseidon2_constants<'a, F>(
|
||||
width: u32,
|
||||
alpha: u32,
|
||||
ctx: &DeviceContext,
|
||||
@@ -161,7 +165,7 @@ where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: Poseidon2<F>,
|
||||
{
|
||||
<<F as FieldImpl>::Config as Poseidon2<F>>::create_optimized_constants(
|
||||
<<F as FieldImpl>::Config as Poseidon2<F>>::create_constants(
|
||||
width,
|
||||
alpha,
|
||||
internal_rounds,
|
||||
@@ -174,30 +178,13 @@ where
|
||||
)
|
||||
}
|
||||
|
||||
/// Computes the poseidon hashes for multiple preimages.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `input` - a pointer to the input data. May point to a vector of preimages or a vector of states filled with preimages.
|
||||
///
|
||||
/// * `output` - a pointer to the output data. Must be at least of size [number_of_states](number_of_states)
|
||||
///
|
||||
/// * `number_of_states` - number of input blocks of size `arity`
|
||||
///
|
||||
/// * `arity` - the arity of the hash function (the size of 1 preimage)
|
||||
///
|
||||
/// * `constants` - Poseidon constants.
|
||||
///
|
||||
/// * `config` - config used to specify extra arguments of the Poseidon.
|
||||
pub fn poseidon_hash_many<F>(
|
||||
states: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
fn poseidon_checks<F>(
|
||||
states: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
output: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
number_of_states: u32,
|
||||
width: u32,
|
||||
constants: &Poseidon2Constants<F>,
|
||||
config: &Poseidon2Config,
|
||||
) -> IcicleResult<()>
|
||||
where
|
||||
) where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: Poseidon2<F>,
|
||||
{
|
||||
@@ -208,7 +195,6 @@ where
|
||||
number_of_states * width
|
||||
);
|
||||
}
|
||||
|
||||
if output.len() < number_of_states as usize {
|
||||
panic!(
|
||||
"output len is {}; but needs to be at least {}",
|
||||
@@ -226,6 +212,7 @@ where
|
||||
"Device ids in input and context are different"
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(device_id) = output.device_id() {
|
||||
assert_eq!(
|
||||
device_id, ctx_device_id,
|
||||
@@ -233,6 +220,36 @@ where
|
||||
);
|
||||
}
|
||||
check_device(ctx_device_id);
|
||||
}
|
||||
|
||||
/// Computes the poseidon hashes for multiple preimages.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `input` - a pointer to the input data. May point to a vector of preimages or a vector of states filled with preimages.
|
||||
///
|
||||
/// * `output` - a pointer to the output data. Must be at least of size [number_of_states](number_of_states)
|
||||
///
|
||||
/// * `number_of_states` - number of input blocks of size `arity`
|
||||
///
|
||||
/// * `arity` - the arity of the hash function (the size of 1 preimage)
|
||||
///
|
||||
/// * `constants` - Poseidon constants.
|
||||
///
|
||||
/// * `config` - config used to specify extra arguments of the Poseidon.
|
||||
pub fn poseidon2_hash_many<F>(
|
||||
states: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
number_of_states: u32,
|
||||
width: u32,
|
||||
constants: &Poseidon2Constants<F>,
|
||||
config: &Poseidon2Config,
|
||||
) -> IcicleResult<()>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: Poseidon2<F>,
|
||||
{
|
||||
poseidon_checks(states, output, number_of_states, width, config);
|
||||
let mut local_cfg = config.clone();
|
||||
local_cfg.are_states_on_device = states.is_on_device();
|
||||
local_cfg.are_outputs_on_device = output.is_on_device();
|
||||
@@ -247,6 +264,39 @@ where
|
||||
)
|
||||
}
|
||||
|
||||
pub fn poseidon2_hash_many_inplace<F>(
|
||||
states: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
number_of_states: u32,
|
||||
width: u32,
|
||||
constants: &Poseidon2Constants<F>,
|
||||
config: &Poseidon2Config,
|
||||
) -> IcicleResult<()>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: Poseidon2<F>,
|
||||
{
|
||||
poseidon_checks(states, states, number_of_states, width, config);
|
||||
let mut local_cfg = config.clone();
|
||||
local_cfg.are_states_on_device = states.is_on_device();
|
||||
local_cfg.are_outputs_on_device = states.is_on_device();
|
||||
|
||||
<<F as FieldImpl>::Config as Poseidon2<F>>::poseidon_unchecked_inplace(
|
||||
states,
|
||||
number_of_states,
|
||||
width,
|
||||
constants,
|
||||
&local_cfg,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn release_poseidon2_constants<'a, F>(constants: &Poseidon2Constants<F>, ctx: &DeviceContext) -> IcicleResult<()>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: Poseidon2<F>,
|
||||
{
|
||||
<<F as FieldImpl>::Config as Poseidon2<F>>::release_constants(constants, ctx)
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! impl_poseidon2 {
|
||||
(
|
||||
@@ -261,8 +311,8 @@ macro_rules! impl_poseidon2 {
|
||||
Poseidon2Constants,
|
||||
};
|
||||
extern "C" {
|
||||
#[link_name = concat!($field_prefix, "_create_optimized_poseidon2_constants_cuda")]
|
||||
pub(crate) fn _create_optimized_constants(
|
||||
#[link_name = concat!($field_prefix, "_create_poseidon2_constants_cuda")]
|
||||
pub(crate) fn _create_constants(
|
||||
width: u32,
|
||||
alpha: u32,
|
||||
internal_rounds: u32,
|
||||
@@ -275,8 +325,8 @@ macro_rules! impl_poseidon2 {
|
||||
poseidon_constants: *mut Poseidon2Constants<$field>,
|
||||
) -> CudaError;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_init_optimized_poseidon2_constants_cuda")]
|
||||
pub(crate) fn _load_optimized_constants(
|
||||
#[link_name = concat!($field_prefix, "_init_poseidon2_constants_cuda")]
|
||||
pub(crate) fn _load_constants(
|
||||
width: u32,
|
||||
mds_type: MdsType,
|
||||
diffusion: DiffusionStrategy,
|
||||
@@ -284,9 +334,15 @@ macro_rules! impl_poseidon2 {
|
||||
constants: *mut Poseidon2Constants<$field>,
|
||||
) -> CudaError;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_release_poseidon2_constants_cuda")]
|
||||
pub(crate) fn _release_constants(
|
||||
constants: &Poseidon2Constants<$field>,
|
||||
ctx: &DeviceContext,
|
||||
) -> CudaError;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_poseidon2_hash_cuda")]
|
||||
pub(crate) fn hash_many(
|
||||
states: *mut $field,
|
||||
states: *const $field,
|
||||
output: *mut $field,
|
||||
number_of_states: u32,
|
||||
width: u32,
|
||||
@@ -297,7 +353,7 @@ macro_rules! impl_poseidon2 {
|
||||
}
|
||||
|
||||
impl Poseidon2<$field> for $field_config {
|
||||
fn create_optimized_constants<'a>(
|
||||
fn create_constants<'a>(
|
||||
width: u32,
|
||||
alpha: u32,
|
||||
internal_rounds: u32,
|
||||
@@ -310,7 +366,7 @@ macro_rules! impl_poseidon2 {
|
||||
) -> IcicleResult<Poseidon2Constants<'a, $field>> {
|
||||
unsafe {
|
||||
let mut poseidon_constants = MaybeUninit::<Poseidon2Constants<'a, $field>>::uninit();
|
||||
let err = $field_prefix_ident::_create_optimized_constants(
|
||||
let err = $field_prefix_ident::_create_constants(
|
||||
width,
|
||||
alpha,
|
||||
internal_rounds,
|
||||
@@ -327,7 +383,7 @@ macro_rules! impl_poseidon2 {
|
||||
}
|
||||
}
|
||||
|
||||
fn load_optimized_constants<'a>(
|
||||
fn load_constants<'a>(
|
||||
width: u32,
|
||||
mds_type: MdsType,
|
||||
diffusion: DiffusionStrategy,
|
||||
@@ -335,20 +391,15 @@ macro_rules! impl_poseidon2 {
|
||||
) -> IcicleResult<Poseidon2Constants<'a, $field>> {
|
||||
unsafe {
|
||||
let mut constants = MaybeUninit::<Poseidon2Constants<'a, $field>>::uninit();
|
||||
let err = $field_prefix_ident::_load_optimized_constants(
|
||||
width,
|
||||
mds_type,
|
||||
diffusion,
|
||||
ctx,
|
||||
constants.as_mut_ptr(),
|
||||
)
|
||||
.wrap();
|
||||
let err =
|
||||
$field_prefix_ident::_load_constants(width, mds_type, diffusion, ctx, constants.as_mut_ptr())
|
||||
.wrap();
|
||||
err.and(Ok(constants.assume_init()))
|
||||
}
|
||||
}
|
||||
|
||||
fn poseidon_unchecked(
|
||||
states: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
states: &(impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
output: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
number_of_states: u32,
|
||||
width: u32,
|
||||
@@ -357,7 +408,7 @@ macro_rules! impl_poseidon2 {
|
||||
) -> IcicleResult<()> {
|
||||
unsafe {
|
||||
$field_prefix_ident::hash_many(
|
||||
states.as_mut_ptr(),
|
||||
states.as_ptr(),
|
||||
output.as_mut_ptr(),
|
||||
number_of_states,
|
||||
width,
|
||||
@@ -367,6 +418,30 @@ macro_rules! impl_poseidon2 {
|
||||
.wrap()
|
||||
}
|
||||
}
|
||||
|
||||
fn poseidon_unchecked_inplace(
|
||||
states: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
number_of_states: u32,
|
||||
width: u32,
|
||||
constants: &Poseidon2Constants<$field>,
|
||||
config: &Poseidon2Config,
|
||||
) -> IcicleResult<()> {
|
||||
unsafe {
|
||||
$field_prefix_ident::hash_many(
|
||||
states.as_ptr(),
|
||||
states.as_mut_ptr(),
|
||||
number_of_states,
|
||||
width,
|
||||
constants,
|
||||
config,
|
||||
)
|
||||
.wrap()
|
||||
}
|
||||
}
|
||||
|
||||
fn release_constants<'a>(constants: &Poseidon2Constants<$field>, ctx: &DeviceContext) -> IcicleResult<()> {
|
||||
unsafe { $field_prefix_ident::_release_constants(constants, ctx).wrap() }
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -382,3 +457,124 @@ macro_rules! impl_poseidon2_tests {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub mod bench {
|
||||
use criterion::{black_box, Criterion};
|
||||
use icicle_cuda_runtime::{
|
||||
device_context::DeviceContext,
|
||||
memory::{HostOrDeviceSlice, HostSlice},
|
||||
};
|
||||
|
||||
use crate::{
|
||||
ntt::FieldImpl,
|
||||
poseidon2::{load_poseidon2_constants, DiffusionStrategy, MdsType},
|
||||
traits::GenerateRandom,
|
||||
vec_ops::VecOps,
|
||||
};
|
||||
|
||||
use super::{poseidon2_hash_many, Poseidon2, Poseidon2Config, Poseidon2Constants};
|
||||
|
||||
#[allow(unused)]
|
||||
fn poseidon2_for_bench<'a, F: FieldImpl>(
|
||||
states: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
poseidon2_result: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
number_of_states: usize,
|
||||
width: usize,
|
||||
constants: &Poseidon2Constants<'a, F>,
|
||||
config: &Poseidon2Config,
|
||||
_seed: u32,
|
||||
) where
|
||||
<F as FieldImpl>::Config: Poseidon2<F> + GenerateRandom<F>,
|
||||
<F as FieldImpl>::Config: VecOps<F>,
|
||||
{
|
||||
poseidon2_hash_many(
|
||||
states,
|
||||
poseidon2_result,
|
||||
number_of_states as u32,
|
||||
width as u32,
|
||||
constants,
|
||||
config,
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn benchmark_poseidon2<F: FieldImpl>(c: &mut Criterion)
|
||||
where
|
||||
<F as FieldImpl>::Config: Poseidon2<F> + GenerateRandom<F>,
|
||||
<F as FieldImpl>::Config: VecOps<F>,
|
||||
{
|
||||
use criterion::SamplingMode;
|
||||
use std::env;
|
||||
|
||||
let group_id = format!("Poseidon2");
|
||||
let mut group = c.benchmark_group(&group_id);
|
||||
group.sampling_mode(SamplingMode::Flat);
|
||||
group.sample_size(10);
|
||||
|
||||
const MAX_LOG2: u32 = 25; // max length = 2 ^ MAX_LOG2
|
||||
|
||||
let max_log2 = env::var("MAX_LOG2")
|
||||
.unwrap_or_else(|_| MAX_LOG2.to_string())
|
||||
.parse::<u32>()
|
||||
.unwrap_or(MAX_LOG2);
|
||||
|
||||
for test_size_log2 in 13u32..max_log2 + 1 {
|
||||
for t in [2, 3, 4, 8, 16, 20, 24] {
|
||||
let number_of_states = 1 << test_size_log2;
|
||||
let full_size = t * number_of_states;
|
||||
|
||||
let scalars = F::Config::generate_random(full_size);
|
||||
let input = HostSlice::from_slice(&scalars);
|
||||
|
||||
let mut permutation_result = vec![F::zero(); full_size];
|
||||
let permutation_result_slice = HostSlice::from_mut_slice(&mut permutation_result);
|
||||
|
||||
let ctx = DeviceContext::default();
|
||||
let config = Poseidon2Config::default();
|
||||
for mds in [MdsType::Default, MdsType::Plonky] {
|
||||
for diffusion in [DiffusionStrategy::Default, DiffusionStrategy::Montgomery] {
|
||||
let constants =
|
||||
load_poseidon2_constants(t as u32, mds.clone(), diffusion.clone(), &ctx).unwrap();
|
||||
let bench_descr = format!(
|
||||
"Mds::{:?}; Diffusion::{:?}; Number of states: {}; Width: {}",
|
||||
mds, diffusion, number_of_states, t
|
||||
);
|
||||
group.bench_function(&bench_descr, |b| {
|
||||
b.iter(|| {
|
||||
poseidon2_for_bench::<F>(
|
||||
input,
|
||||
permutation_result_slice,
|
||||
number_of_states,
|
||||
t,
|
||||
&constants,
|
||||
&config,
|
||||
black_box(1),
|
||||
)
|
||||
})
|
||||
});
|
||||
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! impl_poseidon2_bench {
|
||||
(
|
||||
$field_prefix:literal,
|
||||
$field:ident
|
||||
) => {
|
||||
use criterion::criterion_group;
|
||||
use criterion::criterion_main;
|
||||
use icicle_core::poseidon2::bench::benchmark_poseidon2;
|
||||
|
||||
criterion_group!(benches, benchmark_poseidon2<$field>);
|
||||
criterion_main!(benches);
|
||||
};
|
||||
}
|
||||
|
||||
@@ -4,8 +4,7 @@ use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::memory::{HostOrDeviceSlice, HostSlice};
|
||||
|
||||
use super::{
|
||||
load_optimized_poseidon2_constants, poseidon_hash_many, DiffusionStrategy, Poseidon2, Poseidon2Config,
|
||||
Poseidon2Constants,
|
||||
load_poseidon2_constants, poseidon2_hash_many, DiffusionStrategy, Poseidon2, Poseidon2Config, Poseidon2Constants,
|
||||
};
|
||||
|
||||
pub fn init_poseidon<'a, F: FieldImpl>(
|
||||
@@ -17,7 +16,7 @@ where
|
||||
<F as FieldImpl>::Config: Poseidon2<F>,
|
||||
{
|
||||
let ctx = DeviceContext::default();
|
||||
load_optimized_poseidon2_constants::<F>(width, mds_type, diffusion, &ctx).unwrap()
|
||||
load_poseidon2_constants::<F>(width, mds_type, diffusion, &ctx).unwrap()
|
||||
}
|
||||
|
||||
fn _check_poseidon_hash_many<F: FieldImpl>(width: u32, constants: Poseidon2Constants<F>) -> (F, F)
|
||||
@@ -32,7 +31,7 @@ where
|
||||
let output_slice = HostSlice::from_mut_slice(&mut outputs);
|
||||
|
||||
let config = Poseidon2Config::default();
|
||||
poseidon_hash_many::<F>(
|
||||
poseidon2_hash_many::<F>(
|
||||
input_slice,
|
||||
output_slice,
|
||||
test_size as u32,
|
||||
@@ -86,7 +85,7 @@ where
|
||||
|
||||
let mut config = Poseidon2Config::default();
|
||||
config.mode = PoseidonMode::Permutation;
|
||||
poseidon_hash_many::<F>(
|
||||
poseidon2_hash_many::<F>(
|
||||
input_slice,
|
||||
output_slice,
|
||||
batch_size as u32,
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
use icicle_bn254::curve::ScalarField;
|
||||
|
||||
use icicle_core::impl_poseidon2_bench;
|
||||
|
||||
impl_poseidon2_bench!("bn254", ScalarField);
|
||||
@@ -15,6 +15,7 @@ icicle-cuda-runtime = { workspace = true }
|
||||
cmake = "0.1.50"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.3"
|
||||
risc0-core = "0.21.0"
|
||||
risc0-zkp = "0.21.0"
|
||||
p3-baby-bear = { git = "https://github.com/Plonky3/Plonky3", rev = "1e87146ebfaedc2150b635b10a096b733795fdce" }
|
||||
@@ -29,3 +30,7 @@ serial_test = "3.0.0"
|
||||
[features]
|
||||
default = []
|
||||
devmode = ["icicle-core/devmode"]
|
||||
|
||||
[[bench]]
|
||||
name = "poseidon2"
|
||||
harness = false
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
use icicle_babybear::field::ScalarField;
|
||||
|
||||
use icicle_core::impl_poseidon2_bench;
|
||||
|
||||
impl_poseidon2_bench!("babybear", ScalarField);
|
||||
@@ -16,7 +16,7 @@ impl_poseidon2!("babybear", babybear, ScalarField, ScalarCfg);
|
||||
pub(crate) mod tests {
|
||||
use crate::field::ScalarField;
|
||||
use icicle_core::impl_poseidon2_tests;
|
||||
use icicle_core::poseidon2::{create_optimized_poseidon2_constants, tests::*, DiffusionStrategy, MdsType};
|
||||
use icicle_core::poseidon2::{create_poseidon2_constants, tests::*, DiffusionStrategy, MdsType};
|
||||
use icicle_core::traits::FieldImpl;
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
|
||||
@@ -291,7 +291,7 @@ pub(crate) mod tests {
|
||||
ScalarField::from_u32(1 << 13),
|
||||
ScalarField::from_u32(1 << 15),
|
||||
];
|
||||
let mut constants = create_optimized_poseidon2_constants(
|
||||
let constants = create_poseidon2_constants(
|
||||
WIDTH as u32,
|
||||
ALPHA as u32,
|
||||
&ctx,
|
||||
@@ -616,7 +616,7 @@ pub(crate) mod tests {
|
||||
ScalarField::from_u32(1 << 22),
|
||||
ScalarField::from_u32(1 << 23),
|
||||
];
|
||||
let mut constants = create_optimized_poseidon2_constants(
|
||||
let constants = create_poseidon2_constants(
|
||||
WIDTH as u32,
|
||||
ALPHA as u32,
|
||||
&ctx,
|
||||
|
||||
Reference in New Issue
Block a user