mirror of
https://github.com/pseXperiments/icicle.git
synced 2026-01-06 22:24:06 -05:00
Feat/roman/tree builder (#525)
# Updates: ## Hashing - Added SpongeHasher class - Can be used to accept any hash function as an argument - Absorb and squeeze are now separated - Memory management is now mostly done by SpongeHasher class, each hash function only describes permutation kernels ## Tree builder - Tree builder is now hash-agnostic. - Tree builder now supports 2D input (matrices) - Tree builder can now use two different hash functions for layer 0 and compression layers ## Poseidon1 - Interface changed to classes - Now allows for any alpha - Now allows passing constants not in a single vector - Now allows for any domain tag - Constants are now released upon going out of scope - Rust wrappers changed to Poseidon struct ## Poseidon2 - Interface changed to classes - Constants are now released upon going out of scope - Rust wrappers changed to Poseidon2 struct ## Keccak - Added Keccak class which inherits SpongeHasher - Now doesn't use gpu registers for storing states To do: - [x] Update poseidon1 golang bindings - [x] Update poseidon1 examples - [x] Fix poseidon2 cuda test - [x] Fix poseidon2 merkle tree builder test - [x] Update keccak class with new design - [x] Update keccak test - [x] Check keccak correctness - [x] Update tree builder rust wrappers - [x] Leave doc comments Future work: - [ ] Add keccak merkle tree builder externs - [ ] Add keccak rust tree builder wrappers - [ ] Write docs - [ ] Add example - [ ] Fix device output for tree builder --------- Co-authored-by: Jeremy Felder <jeremy.felder1@gmail.com> Co-authored-by: nonam3e <71525212+nonam3e@users.noreply.github.com>
This commit is contained in:
@@ -6,6 +6,9 @@
|
||||
#include "api/bn254.h"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "hash/hash.cuh"
|
||||
|
||||
using namespace poseidon;
|
||||
using namespace bn254;
|
||||
|
||||
@@ -20,31 +23,20 @@ void checkCudaError(cudaError_t error)
|
||||
// these global constants go into template calls
|
||||
const int size_col = 11;
|
||||
|
||||
// this function executes the Poseidon thread
|
||||
void threadPoseidon(
|
||||
device_context::DeviceContext ctx,
|
||||
unsigned size_partition,
|
||||
scalar_t* layers,
|
||||
scalar_t* column_hashes,
|
||||
PoseidonConstants<scalar_t>* constants)
|
||||
Poseidon<scalar_t> * poseidon)
|
||||
{
|
||||
cudaError_t err_result = CHK_STICKY(cudaSetDevice(ctx.device_id));
|
||||
if (err_result != cudaSuccess) {
|
||||
std::cerr << "CUDA error: " << cudaGetErrorString(err_result) << std::endl;
|
||||
return;
|
||||
}
|
||||
// CHK_IF_RETURN(); I can't use it in a standard thread function
|
||||
PoseidonConfig column_config = {
|
||||
ctx, // ctx
|
||||
false, // are_inputes_on_device
|
||||
false, // are_outputs_on_device
|
||||
false, // input_is_a_state
|
||||
false, // aligned
|
||||
false, // loop_state
|
||||
false, // is_async
|
||||
};
|
||||
cudaError_t err =
|
||||
bn254_poseidon_hash_cuda(layers, column_hashes, (size_t)size_partition, size_col, *constants, column_config);
|
||||
SpongeConfig column_config = default_sponge_config(ctx);
|
||||
cudaError_t err = poseidon->hash_many(layers, column_hashes, (size_t) size_partition, size_col, 1, column_config);
|
||||
checkCudaError(err);
|
||||
}
|
||||
|
||||
@@ -59,6 +51,11 @@ using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::p
|
||||
exit(EXIT_FAILURE); \
|
||||
}
|
||||
|
||||
#define CHECK_ALLOC(ptr) if ((ptr) == nullptr) { \
|
||||
std::cerr << "Memory allocation for '" #ptr "' failed." << std::endl; \
|
||||
exit(EXIT_FAILURE); \
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
const unsigned size_row = (1 << 30);
|
||||
@@ -116,19 +113,18 @@ int main()
|
||||
scalar_t* column_hash1 = static_cast<scalar_t*>(malloc(size_partition * sizeof(scalar_t)));
|
||||
CHECK_ALLOC(column_hash1);
|
||||
|
||||
PoseidonConstants<scalar_t> column_constants0, column_constants1;
|
||||
bn254_init_optimized_poseidon_constants_cuda(size_col, ctx0, &column_constants0);
|
||||
cudaError_t err_result = CHK_STICKY(cudaSetDevice(ctx1.device_id));
|
||||
if (err_result != cudaSuccess) {
|
||||
std::cerr << "CUDA error: " << cudaGetErrorString(err_result) << std::endl;
|
||||
return;
|
||||
}
|
||||
bn254_init_optimized_poseidon_constants_cuda(size_col, ctx1, &column_constants1);
|
||||
Poseidon<scalar_t> column_poseidon0(size_col, ctx0);
|
||||
cudaError_t err_result = CHK_STICKY(cudaSetDevice(ctx1.device_id));
|
||||
if (err_result != cudaSuccess) {
|
||||
std::cerr << "CUDA error: " << cudaGetErrorString(err_result) << std::endl;
|
||||
return;
|
||||
}
|
||||
Poseidon<scalar_t> column_poseidon1(size_col, ctx1);
|
||||
|
||||
std::cout << "Parallel execution of Poseidon threads" << std::endl;
|
||||
START_TIMER(parallel);
|
||||
std::thread thread0(threadPoseidon, ctx0, size_partition, layers0, column_hash0, &column_constants0);
|
||||
std::thread thread1(threadPoseidon, ctx1, size_partition, layers1, column_hash1, &column_constants1);
|
||||
std::thread thread0(threadPoseidon, ctx0, size_partition, layers0, column_hash0, &column_poseidon0);
|
||||
std::thread thread1(threadPoseidon, ctx1, size_partition, layers1, column_hash1, &column_poseidon1);
|
||||
|
||||
// Wait for the threads to finish
|
||||
thread0.join();
|
||||
@@ -141,9 +137,9 @@ int main()
|
||||
|
||||
std::cout << "Sequential execution of Poseidon threads" << std::endl;
|
||||
START_TIMER(sequential);
|
||||
std::thread thread2(threadPoseidon, ctx0, size_partition, layers0, column_hash0, &column_constants0);
|
||||
std::thread thread2(threadPoseidon, ctx0, size_partition, layers0, column_hash0, &column_poseidon0);
|
||||
thread2.join();
|
||||
std::thread thread3(threadPoseidon, ctx0, size_partition, layers1, column_hash1, &column_constants0);
|
||||
std::thread thread3(threadPoseidon, ctx0, size_partition, layers1, column_hash1, &column_poseidon0);
|
||||
thread3.join();
|
||||
END_TIMER(sequential, "1 GPU");
|
||||
std::cout << "Output Data from Thread 2: ";
|
||||
|
||||
@@ -3,13 +3,11 @@
|
||||
#include "polynomials/polynomials.h"
|
||||
#include "polynomials/cuda_backend/polynomial_cuda_backend.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
#include "api/bn254.h"
|
||||
#include <chrono>
|
||||
|
||||
// using namespace field_config;
|
||||
using namespace polynomials;
|
||||
using namespace merkle;
|
||||
using namespace bn254;
|
||||
|
||||
// define the polynomial type
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
|
||||
#include "api/bn254.h"
|
||||
#include "curves/params/bn254.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "hash/hash.cuh"
|
||||
using namespace poseidon;
|
||||
using namespace bn254;
|
||||
|
||||
@@ -14,13 +16,12 @@ inline uint32_t tree_index(uint32_t level, uint32_t offset) { return (1 << level
|
||||
|
||||
// We assume the tree has leaves already set, compute all other levels
|
||||
void build_tree(
|
||||
const uint32_t tree_height, scalar_t* tree, PoseidonConstants<scalar_t>* constants, PoseidonConfig config)
|
||||
const uint32_t tree_height, scalar_t* tree, Poseidon<scalar_t> &poseidon, SpongeConfig &config)
|
||||
{
|
||||
for (uint32_t level = tree_height - 1; level > 0; level--) {
|
||||
const uint32_t next_level = level - 1;
|
||||
const uint32_t next_level_width = 1 << next_level;
|
||||
bn254_poseidon_hash_cuda(
|
||||
&tree[tree_index(level, 0)], &tree[tree_index(next_level, 0)], next_level_width, 2, *constants, config);
|
||||
poseidon.hash_many(&tree[tree_index(level, 0)], &tree[tree_index(next_level, 0)], next_level_width, 2, 1, config);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,8 +66,8 @@ uint32_t validate_proof(
|
||||
const uint32_t tree_height,
|
||||
const uint32_t* proof_lr,
|
||||
const scalar_t* proof_hash,
|
||||
PoseidonConstants<scalar_t>* constants,
|
||||
PoseidonConfig config)
|
||||
Poseidon<scalar_t> &poseidon,
|
||||
SpongeConfig &config)
|
||||
{
|
||||
scalar_t hashes_in[2], hash_out[1], level_hash;
|
||||
level_hash = hash;
|
||||
@@ -79,7 +80,7 @@ uint32_t validate_proof(
|
||||
hashes_in[1] = level_hash;
|
||||
}
|
||||
// next level hash
|
||||
bn254_poseidon_hash_cuda(hashes_in, hash_out, 1, 2, *constants, config);
|
||||
poseidon.hash_many(hashes_in, hash_out, 1, 2, 1, config);
|
||||
level_hash = hash_out[0];
|
||||
}
|
||||
return proof_hash[0] == level_hash;
|
||||
@@ -109,16 +110,15 @@ int main(int argc, char* argv[])
|
||||
d = d + scalar_t::one();
|
||||
}
|
||||
std::cout << "Hashing blocks into tree leaves..." << std::endl;
|
||||
PoseidonConstants<scalar_t> constants;
|
||||
bn254_init_optimized_poseidon_constants_cuda(data_arity, ctx, &constants);
|
||||
PoseidonConfig config = default_poseidon_config(data_arity + 1);
|
||||
bn254_poseidon_hash_cuda(data, &tree[tree_index(leaf_level, 0)], tree_width, 4, constants, config);
|
||||
|
||||
Poseidon<scalar_t> poseidon(data_arity, ctx);
|
||||
SpongeConfig config = default_sponge_config(ctx);
|
||||
poseidon.hash_many(data, &tree[tree_index(leaf_level, 0)], tree_width, data_arity, 1, config);
|
||||
|
||||
std::cout << "3. Building Merkle tree" << std::endl;
|
||||
PoseidonConstants<scalar_t> tree_constants;
|
||||
bn254_init_optimized_poseidon_constants_cuda(tree_arity, ctx, &tree_constants);
|
||||
PoseidonConfig tree_config = default_poseidon_config(tree_arity + 1);
|
||||
build_tree(tree_height, tree, &tree_constants, tree_config);
|
||||
Poseidon<scalar_t> tree_poseidon(tree_arity, ctx);
|
||||
SpongeConfig tree_config = default_sponge_config(ctx);
|
||||
build_tree(tree_height, tree, tree_poseidon, tree_config);
|
||||
|
||||
std::cout << "4. Generate membership proof" << std::endl;
|
||||
uint32_t position = tree_width - 1;
|
||||
@@ -133,13 +133,13 @@ int main(int argc, char* argv[])
|
||||
std::cout << "5. Validate the hash membership" << std::endl;
|
||||
uint32_t validated;
|
||||
const scalar_t hash = tree[tree_index(leaf_level, query_position)];
|
||||
validated = validate_proof(hash, tree_height, proof_lr, proof_hash, &tree_constants, tree_config);
|
||||
validated = validate_proof(hash, tree_height, proof_lr, proof_hash, tree_poseidon, tree_config);
|
||||
std::cout << "Validated: " << validated << std::endl;
|
||||
|
||||
std::cout << "6. Tamper the hash" << std::endl;
|
||||
const scalar_t tampered_hash = hash + scalar_t::one();
|
||||
validated = validate_proof(tampered_hash, tree_height, proof_lr, proof_hash, &tree_constants, tree_config);
|
||||
|
||||
validated = validate_proof(tampered_hash, tree_height, proof_lr, proof_hash, tree_poseidon, tree_config);
|
||||
|
||||
std::cout << "7. Invalidate tamper hash membership" << std::endl;
|
||||
std::cout << "Validated: " << validated << std::endl;
|
||||
return 0;
|
||||
|
||||
@@ -2,7 +2,8 @@ use icicle_bls12_381::curve::ScalarField as F;
|
||||
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
|
||||
use icicle_core::poseidon::{load_optimized_poseidon_constants, poseidon_hash_many, PoseidonConfig};
|
||||
use icicle_core::hash::{SpongeHash, SpongeConfig};
|
||||
use icicle_core::poseidon::Poseidon;
|
||||
use icicle_core::traits::FieldImpl;
|
||||
use icicle_cuda_runtime::memory::HostSlice;
|
||||
|
||||
@@ -24,14 +25,14 @@ fn main() {
|
||||
let test_size = 1 << size;
|
||||
|
||||
println!("Running Icicle Examples: Rust Poseidon Hash");
|
||||
let arity = 2u32;
|
||||
let arity = 2;
|
||||
println!(
|
||||
"---------------------- Loading optimized Poseidon constants for arity={} ------------------------",
|
||||
arity
|
||||
);
|
||||
let ctx = DeviceContext::default();
|
||||
let constants = load_optimized_poseidon_constants::<F>(arity, &ctx).unwrap();
|
||||
let config = PoseidonConfig::default();
|
||||
let poseidon = Poseidon::load(arity, &ctx).unwrap();
|
||||
let config = SpongeConfig::default();
|
||||
|
||||
println!(
|
||||
"---------------------- Input size 2^{}={} ------------------------",
|
||||
@@ -45,12 +46,12 @@ fn main() {
|
||||
println!("Executing BLS12-381 Poseidon Hash on device...");
|
||||
#[cfg(feature = "profile")]
|
||||
let start = Instant::now();
|
||||
poseidon_hash_many::<F>(
|
||||
poseidon.hash_many(
|
||||
input_slice,
|
||||
output_slice,
|
||||
test_size as u32,
|
||||
arity as u32,
|
||||
&constants,
|
||||
test_size,
|
||||
arity,
|
||||
1,
|
||||
&config,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -9,58 +9,67 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "fields/stark_fields/babybear.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
#include "poseidon2/poseidon2.cuh"
|
||||
|
||||
extern "C" cudaError_t babybear_extension_ntt_cuda(
|
||||
const babybear::extension_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<babybear::scalar_t>& config, babybear::extension_t* output);
|
||||
|
||||
extern "C" cudaError_t babybear_initialize_domain(
|
||||
babybear::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
extern "C" cudaError_t babybear_poseidon2_create_cuda(
|
||||
poseidon2::Poseidon2<babybear::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
unsigned int alpha,
|
||||
unsigned int internal_rounds,
|
||||
unsigned int external_rounds,
|
||||
const babybear::scalar_t* round_constants,
|
||||
const babybear::scalar_t* internal_matrix_diag,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t babybear_ntt_cuda(
|
||||
const babybear::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<babybear::scalar_t>& config, babybear::scalar_t* output);
|
||||
extern "C" cudaError_t babybear_poseidon2_load_cuda(
|
||||
poseidon2::Poseidon2<babybear::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t babybear_release_domain(device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t babybear_poseidon2_hash_many_cuda(
|
||||
const poseidon2::Poseidon2<babybear::scalar_t>* poseidon,
|
||||
const babybear::scalar_t* inputs,
|
||||
babybear::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::SpongeConfig& cfg);
|
||||
|
||||
extern "C" void babybear_generate_scalars(babybear::scalar_t* scalars, int size);
|
||||
extern "C" cudaError_t
|
||||
babybear_poseidon2_delete_cuda(poseidon2::Poseidon2<babybear::scalar_t>* poseidon, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t babybear_scalar_convert_montgomery(
|
||||
babybear::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t babybear_build_merkle_tree(
|
||||
const babybear::scalar_t* leaves,
|
||||
babybear::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::SpongeHasher<babybear::scalar_t, babybear::scalar_t>* compression,
|
||||
const hash::SpongeHasher<babybear::scalar_t, babybear::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_mul_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_add_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_accumulate_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_sub_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_transpose_matrix_cuda(
|
||||
const babybear::extension_t* input,
|
||||
uint32_t row_size,
|
||||
uint32_t column_size,
|
||||
babybear::extension_t* output,
|
||||
device_context::DeviceContext& ctx,
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_bit_reverse_cuda(
|
||||
const babybear::extension_t* input, uint64_t n, vec_ops::BitReverseConfig& config, babybear::extension_t* output);
|
||||
|
||||
|
||||
extern "C" void babybear_extension_generate_scalars(babybear::extension_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_scalar_convert_montgomery(
|
||||
babybear::extension_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t babybear_mmcs_commit_cuda(
|
||||
const matrix::Matrix<babybear::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
babybear::scalar_t* digests,
|
||||
const hash::SpongeHasher<babybear::scalar_t, babybear::scalar_t>* hasher,
|
||||
const hash::SpongeHasher<babybear::scalar_t, babybear::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t babybear_mul_cuda(
|
||||
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::scalar_t* result);
|
||||
@@ -87,35 +96,47 @@ extern "C" cudaError_t babybear_bit_reverse_cuda(
|
||||
const babybear::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, babybear::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" cudaError_t babybear_create_poseidon2_constants_cuda(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
const babybear::scalar_t* round_constants,
|
||||
const babybear::scalar_t* internal_matrix_diag,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
extern "C" void babybear_generate_scalars(babybear::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t babybear_scalar_convert_montgomery(
|
||||
babybear::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t babybear_initialize_domain(
|
||||
babybear::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
|
||||
extern "C" cudaError_t babybear_ntt_cuda(
|
||||
const babybear::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<babybear::scalar_t>& config, babybear::scalar_t* output);
|
||||
|
||||
extern "C" cudaError_t babybear_release_domain(device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" void babybear_extension_generate_scalars(babybear::extension_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_scalar_convert_montgomery(
|
||||
babybear::extension_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_mul_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_add_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_accumulate_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_sub_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_transpose_matrix_cuda(
|
||||
const babybear::extension_t* input,
|
||||
uint32_t row_size,
|
||||
uint32_t column_size,
|
||||
babybear::extension_t* output,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<babybear::scalar_t>* poseidon_constants);
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t babybear_init_poseidon2_constants_cuda(
|
||||
int width,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<babybear::scalar_t>* poseidon_constants);
|
||||
extern "C" cudaError_t babybear_extension_bit_reverse_cuda(
|
||||
const babybear::extension_t* input, uint64_t n, vec_ops::BitReverseConfig& config, babybear::extension_t* output);
|
||||
|
||||
extern "C" cudaError_t babybear_poseidon2_hash_cuda(
|
||||
const babybear::scalar_t* input,
|
||||
babybear::scalar_t* output,
|
||||
int number_of_states,
|
||||
int width,
|
||||
const poseidon2::Poseidon2Constants<babybear::scalar_t>& constants,
|
||||
poseidon2::Poseidon2Config& config);
|
||||
|
||||
extern "C" cudaError_t babybear_release_poseidon2_constants_cuda(
|
||||
poseidon2::Poseidon2Constants<babybear::scalar_t>* constants,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
#endif
|
||||
@@ -9,26 +9,13 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "curves/params/bls12_377.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
extern "C" bool bls12_377_g2_eq(bls12_377::g2_projective_t* point1, bls12_377::g2_projective_t* point2);
|
||||
|
||||
extern "C" void bls12_377_g2_to_affine(bls12_377::g2_projective_t* point, bls12_377::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void bls12_377_g2_generate_projective_points(bls12_377::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void bls12_377_g2_generate_affine_points(bls12_377::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" cudaError_t bls12_377_g2_affine_convert_montgomery(
|
||||
bls12_377::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_377_g2_projective_convert_montgomery(
|
||||
bls12_377::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_377_g2_precompute_msm_bases_cuda(
|
||||
bls12_377::g2_affine_t* bases,
|
||||
@@ -48,6 +35,20 @@ extern "C" cudaError_t bls12_377_precompute_msm_bases_cuda(
|
||||
extern "C" cudaError_t bls12_377_msm_cuda(
|
||||
const bls12_377::scalar_t* scalars, const bls12_377::affine_t* points, int msm_size, msm::MSMConfig& config, bls12_377::projective_t* out);
|
||||
|
||||
extern "C" bool bls12_377_g2_eq(bls12_377::g2_projective_t* point1, bls12_377::g2_projective_t* point2);
|
||||
|
||||
extern "C" void bls12_377_g2_to_affine(bls12_377::g2_projective_t* point, bls12_377::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void bls12_377_g2_generate_projective_points(bls12_377::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void bls12_377_g2_generate_affine_points(bls12_377::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" cudaError_t bls12_377_g2_affine_convert_montgomery(
|
||||
bls12_377::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_377_g2_projective_convert_montgomery(
|
||||
bls12_377::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_377_ecntt_cuda(
|
||||
const bls12_377::projective_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_377::scalar_t>& config, bls12_377::projective_t* output);
|
||||
|
||||
@@ -65,18 +66,52 @@ extern "C" cudaError_t bls12_377_affine_convert_montgomery(
|
||||
extern "C" cudaError_t bls12_377_projective_convert_montgomery(
|
||||
bls12_377::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_377_initialize_domain(
|
||||
bls12_377::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
extern "C" cudaError_t bls12_377_build_merkle_tree(
|
||||
const bls12_377::scalar_t* leaves,
|
||||
bls12_377::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::SpongeHasher<bls12_377::scalar_t, bls12_377::scalar_t>* compression,
|
||||
const hash::SpongeHasher<bls12_377::scalar_t, bls12_377::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bls12_377_ntt_cuda(
|
||||
const bls12_377::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_377::scalar_t>& config, bls12_377::scalar_t* output);
|
||||
extern "C" cudaError_t bls12_377_mmcs_commit_cuda(
|
||||
const matrix::Matrix<bls12_377::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
bls12_377::scalar_t* digests,
|
||||
const hash::SpongeHasher<bls12_377::scalar_t, bls12_377::scalar_t>* hasher,
|
||||
const hash::SpongeHasher<bls12_377::scalar_t, bls12_377::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bls12_377_release_domain(device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t bls12_377_poseidon_create_cuda(
|
||||
poseidon::Poseidon<bls12_377::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const bls12_377::scalar_t* round_constants,
|
||||
const bls12_377::scalar_t* mds_matrix,
|
||||
const bls12_377::scalar_t* non_sparse_matrix,
|
||||
const bls12_377::scalar_t* sparse_matrices,
|
||||
const bls12_377::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" void bls12_377_generate_scalars(bls12_377::scalar_t* scalars, int size);
|
||||
extern "C" cudaError_t bls12_377_poseidon_load_cuda(
|
||||
poseidon::Poseidon<bls12_377::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_377_scalar_convert_montgomery(
|
||||
bls12_377::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t bls12_377_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<bls12_377::scalar_t>* poseidon,
|
||||
const bls12_377::scalar_t* inputs,
|
||||
bls12_377::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::SpongeConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t
|
||||
bls12_377_poseidon_delete_cuda(poseidon::Poseidon<bls12_377::scalar_t>* poseidon);
|
||||
|
||||
extern "C" cudaError_t bls12_377_mul_cuda(
|
||||
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_377::scalar_t* result);
|
||||
@@ -103,31 +138,17 @@ extern "C" cudaError_t bls12_377_bit_reverse_cuda(
|
||||
const bls12_377::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bls12_377::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" cudaError_t bls12_377_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const bls12_377::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<bls12_377::scalar_t>* poseidon_constants);
|
||||
extern "C" void bls12_377_generate_scalars(bls12_377::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t bls12_377_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<bls12_377::scalar_t>* constants);
|
||||
extern "C" cudaError_t bls12_377_scalar_convert_montgomery(
|
||||
bls12_377::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_377_poseidon_hash_cuda(
|
||||
bls12_377::scalar_t* input,
|
||||
bls12_377::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<bls12_377::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
extern "C" cudaError_t bls12_377_initialize_domain(
|
||||
bls12_377::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
|
||||
extern "C" cudaError_t bls12_377_build_poseidon_merkle_tree(
|
||||
const bls12_377::scalar_t* leaves,
|
||||
bls12_377::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<bls12_377::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
extern "C" cudaError_t bls12_377_ntt_cuda(
|
||||
const bls12_377::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_377::scalar_t>& config, bls12_377::scalar_t* output);
|
||||
|
||||
extern "C" cudaError_t bls12_377_release_domain(device_context::DeviceContext& ctx);
|
||||
|
||||
#endif
|
||||
@@ -9,26 +9,13 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "curves/params/bls12_381.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
extern "C" bool bls12_381_g2_eq(bls12_381::g2_projective_t* point1, bls12_381::g2_projective_t* point2);
|
||||
|
||||
extern "C" void bls12_381_g2_to_affine(bls12_381::g2_projective_t* point, bls12_381::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void bls12_381_g2_generate_projective_points(bls12_381::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void bls12_381_g2_generate_affine_points(bls12_381::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" cudaError_t bls12_381_g2_affine_convert_montgomery(
|
||||
bls12_381::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_381_g2_projective_convert_montgomery(
|
||||
bls12_381::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_381_g2_precompute_msm_bases_cuda(
|
||||
bls12_381::g2_affine_t* bases,
|
||||
@@ -48,6 +35,20 @@ extern "C" cudaError_t bls12_381_precompute_msm_bases_cuda(
|
||||
extern "C" cudaError_t bls12_381_msm_cuda(
|
||||
const bls12_381::scalar_t* scalars, const bls12_381::affine_t* points, int msm_size, msm::MSMConfig& config, bls12_381::projective_t* out);
|
||||
|
||||
extern "C" bool bls12_381_g2_eq(bls12_381::g2_projective_t* point1, bls12_381::g2_projective_t* point2);
|
||||
|
||||
extern "C" void bls12_381_g2_to_affine(bls12_381::g2_projective_t* point, bls12_381::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void bls12_381_g2_generate_projective_points(bls12_381::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void bls12_381_g2_generate_affine_points(bls12_381::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" cudaError_t bls12_381_g2_affine_convert_montgomery(
|
||||
bls12_381::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_381_g2_projective_convert_montgomery(
|
||||
bls12_381::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_381_ecntt_cuda(
|
||||
const bls12_381::projective_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_381::scalar_t>& config, bls12_381::projective_t* output);
|
||||
|
||||
@@ -65,18 +66,52 @@ extern "C" cudaError_t bls12_381_affine_convert_montgomery(
|
||||
extern "C" cudaError_t bls12_381_projective_convert_montgomery(
|
||||
bls12_381::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_381_initialize_domain(
|
||||
bls12_381::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
extern "C" cudaError_t bls12_381_build_merkle_tree(
|
||||
const bls12_381::scalar_t* leaves,
|
||||
bls12_381::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::SpongeHasher<bls12_381::scalar_t, bls12_381::scalar_t>* compression,
|
||||
const hash::SpongeHasher<bls12_381::scalar_t, bls12_381::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bls12_381_ntt_cuda(
|
||||
const bls12_381::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_381::scalar_t>& config, bls12_381::scalar_t* output);
|
||||
extern "C" cudaError_t bls12_381_mmcs_commit_cuda(
|
||||
const matrix::Matrix<bls12_381::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
bls12_381::scalar_t* digests,
|
||||
const hash::SpongeHasher<bls12_381::scalar_t, bls12_381::scalar_t>* hasher,
|
||||
const hash::SpongeHasher<bls12_381::scalar_t, bls12_381::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bls12_381_release_domain(device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t bls12_381_poseidon_create_cuda(
|
||||
poseidon::Poseidon<bls12_381::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const bls12_381::scalar_t* round_constants,
|
||||
const bls12_381::scalar_t* mds_matrix,
|
||||
const bls12_381::scalar_t* non_sparse_matrix,
|
||||
const bls12_381::scalar_t* sparse_matrices,
|
||||
const bls12_381::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" void bls12_381_generate_scalars(bls12_381::scalar_t* scalars, int size);
|
||||
extern "C" cudaError_t bls12_381_poseidon_load_cuda(
|
||||
poseidon::Poseidon<bls12_381::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_381_scalar_convert_montgomery(
|
||||
bls12_381::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t bls12_381_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<bls12_381::scalar_t>* poseidon,
|
||||
const bls12_381::scalar_t* inputs,
|
||||
bls12_381::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::SpongeConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t
|
||||
bls12_381_poseidon_delete_cuda(poseidon::Poseidon<bls12_381::scalar_t>* poseidon);
|
||||
|
||||
extern "C" cudaError_t bls12_381_mul_cuda(
|
||||
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_381::scalar_t* result);
|
||||
@@ -103,31 +138,17 @@ extern "C" cudaError_t bls12_381_bit_reverse_cuda(
|
||||
const bls12_381::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bls12_381::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" cudaError_t bls12_381_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const bls12_381::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<bls12_381::scalar_t>* poseidon_constants);
|
||||
extern "C" void bls12_381_generate_scalars(bls12_381::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t bls12_381_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<bls12_381::scalar_t>* constants);
|
||||
extern "C" cudaError_t bls12_381_scalar_convert_montgomery(
|
||||
bls12_381::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_381_poseidon_hash_cuda(
|
||||
bls12_381::scalar_t* input,
|
||||
bls12_381::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<bls12_381::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
extern "C" cudaError_t bls12_381_initialize_domain(
|
||||
bls12_381::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
|
||||
extern "C" cudaError_t bls12_381_build_poseidon_merkle_tree(
|
||||
const bls12_381::scalar_t* leaves,
|
||||
bls12_381::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<bls12_381::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
extern "C" cudaError_t bls12_381_ntt_cuda(
|
||||
const bls12_381::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_381::scalar_t>& config, bls12_381::scalar_t* output);
|
||||
|
||||
extern "C" cudaError_t bls12_381_release_domain(device_context::DeviceContext& ctx);
|
||||
|
||||
#endif
|
||||
@@ -9,28 +9,15 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "curves/params/bn254.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
#include "poseidon2/poseidon2.cuh"
|
||||
|
||||
extern "C" bool bn254_g2_eq(bn254::g2_projective_t* point1, bn254::g2_projective_t* point2);
|
||||
|
||||
extern "C" void bn254_g2_to_affine(bn254::g2_projective_t* point, bn254::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void bn254_g2_generate_projective_points(bn254::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void bn254_g2_generate_affine_points(bn254::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" cudaError_t bn254_g2_affine_convert_montgomery(
|
||||
bn254::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_g2_projective_convert_montgomery(
|
||||
bn254::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_g2_precompute_msm_bases_cuda(
|
||||
bn254::g2_affine_t* bases,
|
||||
int msm_size,
|
||||
@@ -49,6 +36,20 @@ extern "C" cudaError_t bn254_precompute_msm_bases_cuda(
|
||||
extern "C" cudaError_t bn254_msm_cuda(
|
||||
const bn254::scalar_t* scalars, const bn254::affine_t* points, int msm_size, msm::MSMConfig& config, bn254::projective_t* out);
|
||||
|
||||
extern "C" bool bn254_g2_eq(bn254::g2_projective_t* point1, bn254::g2_projective_t* point2);
|
||||
|
||||
extern "C" void bn254_g2_to_affine(bn254::g2_projective_t* point, bn254::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void bn254_g2_generate_projective_points(bn254::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void bn254_g2_generate_affine_points(bn254::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" cudaError_t bn254_g2_affine_convert_montgomery(
|
||||
bn254::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_g2_projective_convert_montgomery(
|
||||
bn254::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_ecntt_cuda(
|
||||
const bn254::projective_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bn254::scalar_t>& config, bn254::projective_t* output);
|
||||
|
||||
@@ -66,18 +67,87 @@ extern "C" cudaError_t bn254_affine_convert_montgomery(
|
||||
extern "C" cudaError_t bn254_projective_convert_montgomery(
|
||||
bn254::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_initialize_domain(
|
||||
bn254::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
extern "C" cudaError_t bn254_poseidon2_create_cuda(
|
||||
poseidon2::Poseidon2<bn254::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
unsigned int alpha,
|
||||
unsigned int internal_rounds,
|
||||
unsigned int external_rounds,
|
||||
const bn254::scalar_t* round_constants,
|
||||
const bn254::scalar_t* internal_matrix_diag,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t bn254_ntt_cuda(
|
||||
const bn254::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bn254::scalar_t>& config, bn254::scalar_t* output);
|
||||
extern "C" cudaError_t bn254_poseidon2_load_cuda(
|
||||
poseidon2::Poseidon2<bn254::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t bn254_release_domain(device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t bn254_poseidon2_hash_many_cuda(
|
||||
const poseidon2::Poseidon2<bn254::scalar_t>* poseidon,
|
||||
const bn254::scalar_t* inputs,
|
||||
bn254::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::SpongeConfig& cfg);
|
||||
|
||||
extern "C" void bn254_generate_scalars(bn254::scalar_t* scalars, int size);
|
||||
extern "C" cudaError_t
|
||||
bn254_poseidon2_delete_cuda(poseidon2::Poseidon2<bn254::scalar_t>* poseidon, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_scalar_convert_montgomery(
|
||||
bn254::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t bn254_build_merkle_tree(
|
||||
const bn254::scalar_t* leaves,
|
||||
bn254::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::SpongeHasher<bn254::scalar_t, bn254::scalar_t>* compression,
|
||||
const hash::SpongeHasher<bn254::scalar_t, bn254::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bn254_mmcs_commit_cuda(
|
||||
const matrix::Matrix<bn254::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
bn254::scalar_t* digests,
|
||||
const hash::SpongeHasher<bn254::scalar_t, bn254::scalar_t>* hasher,
|
||||
const hash::SpongeHasher<bn254::scalar_t, bn254::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon_create_cuda(
|
||||
poseidon::Poseidon<bn254::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const bn254::scalar_t* round_constants,
|
||||
const bn254::scalar_t* mds_matrix,
|
||||
const bn254::scalar_t* non_sparse_matrix,
|
||||
const bn254::scalar_t* sparse_matrices,
|
||||
const bn254::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon_load_cuda(
|
||||
poseidon::Poseidon<bn254::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<bn254::scalar_t>* poseidon,
|
||||
const bn254::scalar_t* inputs,
|
||||
bn254::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::SpongeConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t
|
||||
bn254_poseidon_delete_cuda(poseidon::Poseidon<bn254::scalar_t>* poseidon);
|
||||
|
||||
extern "C" cudaError_t bn254_mul_cuda(
|
||||
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bn254::scalar_t* result);
|
||||
@@ -104,62 +174,17 @@ extern "C" cudaError_t bn254_bit_reverse_cuda(
|
||||
const bn254::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bn254::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" cudaError_t bn254_create_poseidon2_constants_cuda(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
const bn254::scalar_t* round_constants,
|
||||
const bn254::scalar_t* internal_matrix_diag,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<bn254::scalar_t>* poseidon_constants);
|
||||
extern "C" void bn254_generate_scalars(bn254::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t bn254_init_poseidon2_constants_cuda(
|
||||
int width,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<bn254::scalar_t>* poseidon_constants);
|
||||
extern "C" cudaError_t bn254_scalar_convert_montgomery(
|
||||
bn254::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon2_hash_cuda(
|
||||
const bn254::scalar_t* input,
|
||||
bn254::scalar_t* output,
|
||||
int number_of_states,
|
||||
int width,
|
||||
const poseidon2::Poseidon2Constants<bn254::scalar_t>& constants,
|
||||
poseidon2::Poseidon2Config& config);
|
||||
extern "C" cudaError_t bn254_initialize_domain(
|
||||
bn254::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
|
||||
extern "C" cudaError_t bn254_release_poseidon2_constants_cuda(
|
||||
poseidon2::Poseidon2Constants<bn254::scalar_t>* constants,
|
||||
device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t bn254_ntt_cuda(
|
||||
const bn254::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bn254::scalar_t>& config, bn254::scalar_t* output);
|
||||
|
||||
extern "C" cudaError_t bn254_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const bn254::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<bn254::scalar_t>* poseidon_constants);
|
||||
|
||||
extern "C" cudaError_t bn254_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<bn254::scalar_t>* constants);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon_hash_cuda(
|
||||
bn254::scalar_t* input,
|
||||
bn254::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<bn254::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
|
||||
extern "C" cudaError_t bn254_build_poseidon_merkle_tree(
|
||||
const bn254::scalar_t* leaves,
|
||||
bn254::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<bn254::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
extern "C" cudaError_t bn254_release_domain(device_context::DeviceContext& ctx);
|
||||
|
||||
#endif
|
||||
@@ -9,26 +9,13 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "curves/params/bw6_761.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
extern "C" bool bw6_761_g2_eq(bw6_761::g2_projective_t* point1, bw6_761::g2_projective_t* point2);
|
||||
|
||||
extern "C" void bw6_761_g2_to_affine(bw6_761::g2_projective_t* point, bw6_761::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void bw6_761_g2_generate_projective_points(bw6_761::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void bw6_761_g2_generate_affine_points(bw6_761::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" cudaError_t bw6_761_g2_affine_convert_montgomery(
|
||||
bw6_761::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bw6_761_g2_projective_convert_montgomery(
|
||||
bw6_761::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bw6_761_g2_precompute_msm_bases_cuda(
|
||||
bw6_761::g2_affine_t* bases,
|
||||
@@ -48,6 +35,20 @@ extern "C" cudaError_t bw6_761_precompute_msm_bases_cuda(
|
||||
extern "C" cudaError_t bw6_761_msm_cuda(
|
||||
const bw6_761::scalar_t* scalars, const bw6_761::affine_t* points, int msm_size, msm::MSMConfig& config, bw6_761::projective_t* out);
|
||||
|
||||
extern "C" bool bw6_761_g2_eq(bw6_761::g2_projective_t* point1, bw6_761::g2_projective_t* point2);
|
||||
|
||||
extern "C" void bw6_761_g2_to_affine(bw6_761::g2_projective_t* point, bw6_761::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void bw6_761_g2_generate_projective_points(bw6_761::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void bw6_761_g2_generate_affine_points(bw6_761::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" cudaError_t bw6_761_g2_affine_convert_montgomery(
|
||||
bw6_761::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bw6_761_g2_projective_convert_montgomery(
|
||||
bw6_761::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bw6_761_ecntt_cuda(
|
||||
const bw6_761::projective_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bw6_761::scalar_t>& config, bw6_761::projective_t* output);
|
||||
|
||||
@@ -65,18 +66,52 @@ extern "C" cudaError_t bw6_761_affine_convert_montgomery(
|
||||
extern "C" cudaError_t bw6_761_projective_convert_montgomery(
|
||||
bw6_761::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bw6_761_initialize_domain(
|
||||
bw6_761::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
extern "C" cudaError_t bw6_761_build_merkle_tree(
|
||||
const bw6_761::scalar_t* leaves,
|
||||
bw6_761::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::SpongeHasher<bw6_761::scalar_t, bw6_761::scalar_t>* compression,
|
||||
const hash::SpongeHasher<bw6_761::scalar_t, bw6_761::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bw6_761_ntt_cuda(
|
||||
const bw6_761::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bw6_761::scalar_t>& config, bw6_761::scalar_t* output);
|
||||
extern "C" cudaError_t bw6_761_mmcs_commit_cuda(
|
||||
const matrix::Matrix<bw6_761::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
bw6_761::scalar_t* digests,
|
||||
const hash::SpongeHasher<bw6_761::scalar_t, bw6_761::scalar_t>* hasher,
|
||||
const hash::SpongeHasher<bw6_761::scalar_t, bw6_761::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bw6_761_release_domain(device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t bw6_761_poseidon_create_cuda(
|
||||
poseidon::Poseidon<bw6_761::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const bw6_761::scalar_t* round_constants,
|
||||
const bw6_761::scalar_t* mds_matrix,
|
||||
const bw6_761::scalar_t* non_sparse_matrix,
|
||||
const bw6_761::scalar_t* sparse_matrices,
|
||||
const bw6_761::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" void bw6_761_generate_scalars(bw6_761::scalar_t* scalars, int size);
|
||||
extern "C" cudaError_t bw6_761_poseidon_load_cuda(
|
||||
poseidon::Poseidon<bw6_761::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bw6_761_scalar_convert_montgomery(
|
||||
bw6_761::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t bw6_761_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<bw6_761::scalar_t>* poseidon,
|
||||
const bw6_761::scalar_t* inputs,
|
||||
bw6_761::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::SpongeConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t
|
||||
bw6_761_poseidon_delete_cuda(poseidon::Poseidon<bw6_761::scalar_t>* poseidon);
|
||||
|
||||
extern "C" cudaError_t bw6_761_mul_cuda(
|
||||
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bw6_761::scalar_t* result);
|
||||
@@ -103,31 +138,17 @@ extern "C" cudaError_t bw6_761_bit_reverse_cuda(
|
||||
const bw6_761::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bw6_761::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" cudaError_t bw6_761_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const bw6_761::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<bw6_761::scalar_t>* poseidon_constants);
|
||||
extern "C" void bw6_761_generate_scalars(bw6_761::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t bw6_761_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<bw6_761::scalar_t>* constants);
|
||||
extern "C" cudaError_t bw6_761_scalar_convert_montgomery(
|
||||
bw6_761::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bw6_761_poseidon_hash_cuda(
|
||||
bw6_761::scalar_t* input,
|
||||
bw6_761::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<bw6_761::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
extern "C" cudaError_t bw6_761_initialize_domain(
|
||||
bw6_761::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
|
||||
extern "C" cudaError_t bw6_761_build_poseidon_merkle_tree(
|
||||
const bw6_761::scalar_t* leaves,
|
||||
bw6_761::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<bw6_761::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
extern "C" cudaError_t bw6_761_ntt_cuda(
|
||||
const bw6_761::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bw6_761::scalar_t>& config, bw6_761::scalar_t* output);
|
||||
|
||||
extern "C" cudaError_t bw6_761_release_domain(device_context::DeviceContext& ctx);
|
||||
|
||||
#endif
|
||||
@@ -9,11 +9,12 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "curves/params/grumpkin.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
extern "C" cudaError_t grumpkin_precompute_msm_bases_cuda(
|
||||
grumpkin::affine_t* bases,
|
||||
@@ -38,10 +39,52 @@ extern "C" cudaError_t grumpkin_affine_convert_montgomery(
|
||||
extern "C" cudaError_t grumpkin_projective_convert_montgomery(
|
||||
grumpkin::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" void grumpkin_generate_scalars(grumpkin::scalar_t* scalars, int size);
|
||||
extern "C" cudaError_t grumpkin_build_merkle_tree(
|
||||
const grumpkin::scalar_t* leaves,
|
||||
grumpkin::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::SpongeHasher<grumpkin::scalar_t, grumpkin::scalar_t>* compression,
|
||||
const hash::SpongeHasher<grumpkin::scalar_t, grumpkin::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t grumpkin_scalar_convert_montgomery(
|
||||
grumpkin::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t grumpkin_mmcs_commit_cuda(
|
||||
const matrix::Matrix<grumpkin::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
grumpkin::scalar_t* digests,
|
||||
const hash::SpongeHasher<grumpkin::scalar_t, grumpkin::scalar_t>* hasher,
|
||||
const hash::SpongeHasher<grumpkin::scalar_t, grumpkin::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t grumpkin_poseidon_create_cuda(
|
||||
poseidon::Poseidon<grumpkin::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const grumpkin::scalar_t* round_constants,
|
||||
const grumpkin::scalar_t* mds_matrix,
|
||||
const grumpkin::scalar_t* non_sparse_matrix,
|
||||
const grumpkin::scalar_t* sparse_matrices,
|
||||
const grumpkin::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t grumpkin_poseidon_load_cuda(
|
||||
poseidon::Poseidon<grumpkin::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t grumpkin_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<grumpkin::scalar_t>* poseidon,
|
||||
const grumpkin::scalar_t* inputs,
|
||||
grumpkin::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::SpongeConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t
|
||||
grumpkin_poseidon_delete_cuda(poseidon::Poseidon<grumpkin::scalar_t>* poseidon);
|
||||
|
||||
extern "C" cudaError_t grumpkin_mul_cuda(
|
||||
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, grumpkin::scalar_t* result);
|
||||
@@ -68,31 +111,9 @@ extern "C" cudaError_t grumpkin_bit_reverse_cuda(
|
||||
const grumpkin::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, grumpkin::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" cudaError_t grumpkin_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const grumpkin::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<grumpkin::scalar_t>* poseidon_constants);
|
||||
extern "C" void grumpkin_generate_scalars(grumpkin::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t grumpkin_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<grumpkin::scalar_t>* constants);
|
||||
|
||||
extern "C" cudaError_t grumpkin_poseidon_hash_cuda(
|
||||
grumpkin::scalar_t* input,
|
||||
grumpkin::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<grumpkin::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
|
||||
extern "C" cudaError_t grumpkin_build_poseidon_merkle_tree(
|
||||
const grumpkin::scalar_t* leaves,
|
||||
grumpkin::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<grumpkin::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
extern "C" cudaError_t grumpkin_scalar_convert_montgomery(
|
||||
grumpkin::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
#endif
|
||||
@@ -9,43 +9,27 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "fields/stark_fields/m31.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
|
||||
extern "C" void m31_generate_scalars(m31::scalar_t* scalars, int size);
|
||||
extern "C" cudaError_t m31_build_merkle_tree(
|
||||
const m31::scalar_t* leaves,
|
||||
m31::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::SpongeHasher<m31::scalar_t, m31::scalar_t>* compression,
|
||||
const hash::SpongeHasher<m31::scalar_t, m31::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t m31_scalar_convert_montgomery(
|
||||
m31::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t m31_extension_mul_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_extension_add_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_extension_accumulate_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t m31_extension_sub_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_extension_transpose_matrix_cuda(
|
||||
const m31::extension_t* input,
|
||||
uint32_t row_size,
|
||||
uint32_t column_size,
|
||||
m31::extension_t* output,
|
||||
device_context::DeviceContext& ctx,
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t m31_extension_bit_reverse_cuda(
|
||||
const m31::extension_t* input, uint64_t n, vec_ops::BitReverseConfig& config, m31::extension_t* output);
|
||||
|
||||
|
||||
extern "C" void m31_extension_generate_scalars(m31::extension_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t m31_extension_scalar_convert_montgomery(
|
||||
m31::extension_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t m31_mmcs_commit_cuda(
|
||||
const matrix::Matrix<m31::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
m31::scalar_t* digests,
|
||||
const hash::SpongeHasher<m31::scalar_t, m31::scalar_t>* hasher,
|
||||
const hash::SpongeHasher<m31::scalar_t, m31::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t m31_mul_cuda(
|
||||
m31::scalar_t* vec_a, m31::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::scalar_t* result);
|
||||
@@ -72,4 +56,39 @@ extern "C" cudaError_t m31_bit_reverse_cuda(
|
||||
const m31::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, m31::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" void m31_generate_scalars(m31::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t m31_scalar_convert_montgomery(
|
||||
m31::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" void m31_extension_generate_scalars(m31::extension_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t m31_extension_scalar_convert_montgomery(
|
||||
m31::extension_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t m31_extension_mul_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_extension_add_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_extension_accumulate_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t m31_extension_sub_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_extension_transpose_matrix_cuda(
|
||||
const m31::extension_t* input,
|
||||
uint32_t row_size,
|
||||
uint32_t column_size,
|
||||
m31::extension_t* output,
|
||||
device_context::DeviceContext& ctx,
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t m31_extension_bit_reverse_cuda(
|
||||
const m31::extension_t* input, uint64_t n, vec_ops::BitReverseConfig& config, m31::extension_t* output);
|
||||
|
||||
|
||||
#endif
|
||||
@@ -9,22 +9,28 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "fields/stark_fields/stark252.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
|
||||
extern "C" cudaError_t stark252_initialize_domain(
|
||||
stark252::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
extern "C" cudaError_t stark252_build_merkle_tree(
|
||||
const stark252::scalar_t* leaves,
|
||||
stark252::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::SpongeHasher<stark252::scalar_t, stark252::scalar_t>* compression,
|
||||
const hash::SpongeHasher<stark252::scalar_t, stark252::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t stark252_ntt_cuda(
|
||||
const stark252::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<stark252::scalar_t>& config, stark252::scalar_t* output);
|
||||
|
||||
extern "C" cudaError_t stark252_release_domain(device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" void stark252_generate_scalars(stark252::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t stark252_scalar_convert_montgomery(
|
||||
stark252::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t stark252_mmcs_commit_cuda(
|
||||
const matrix::Matrix<stark252::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
stark252::scalar_t* digests,
|
||||
const hash::SpongeHasher<stark252::scalar_t, stark252::scalar_t>* hasher,
|
||||
const hash::SpongeHasher<stark252::scalar_t, stark252::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t stark252_mul_cuda(
|
||||
stark252::scalar_t* vec_a, stark252::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, stark252::scalar_t* result);
|
||||
@@ -51,4 +57,17 @@ extern "C" cudaError_t stark252_bit_reverse_cuda(
|
||||
const stark252::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, stark252::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" void stark252_generate_scalars(stark252::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t stark252_scalar_convert_montgomery(
|
||||
stark252::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t stark252_initialize_domain(
|
||||
stark252::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
|
||||
extern "C" cudaError_t stark252_ntt_cuda(
|
||||
const stark252::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<stark252::scalar_t>& config, stark252::scalar_t* output);
|
||||
|
||||
extern "C" cudaError_t stark252_release_domain(device_context::DeviceContext& ctx);
|
||||
|
||||
#endif
|
||||
@@ -1,26 +1,29 @@
|
||||
extern "C" cudaError_t ${FIELD}_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const ${FIELD}::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<${FIELD}::scalar_t>* poseidon_constants);
|
||||
extern "C" cudaError_t ${FIELD}_poseidon_create_cuda(
|
||||
poseidon::Poseidon<${FIELD}::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const ${FIELD}::scalar_t* round_constants,
|
||||
const ${FIELD}::scalar_t* mds_matrix,
|
||||
const ${FIELD}::scalar_t* non_sparse_matrix,
|
||||
const ${FIELD}::scalar_t* sparse_matrices,
|
||||
const ${FIELD}::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<${FIELD}::scalar_t>* constants);
|
||||
extern "C" cudaError_t ${FIELD}_poseidon_load_cuda(
|
||||
poseidon::Poseidon<${FIELD}::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_poseidon_hash_cuda(
|
||||
${FIELD}::scalar_t* input,
|
||||
extern "C" cudaError_t ${FIELD}_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<${FIELD}::scalar_t>* poseidon,
|
||||
const ${FIELD}::scalar_t* inputs,
|
||||
${FIELD}::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<${FIELD}::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::SpongeConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_build_poseidon_merkle_tree(
|
||||
const ${FIELD}::scalar_t* leaves,
|
||||
${FIELD}::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<${FIELD}::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
extern "C" cudaError_t
|
||||
${FIELD}_poseidon_delete_cuda(poseidon::Poseidon<${FIELD}::scalar_t>* poseidon);
|
||||
@@ -1,30 +1,34 @@
|
||||
extern "C" cudaError_t ${FIELD}_create_poseidon2_constants_cuda(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
extern "C" cudaError_t ${FIELD}_poseidon2_create_cuda(
|
||||
poseidon2::Poseidon2<${FIELD}::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
unsigned int alpha,
|
||||
unsigned int internal_rounds,
|
||||
unsigned int external_rounds,
|
||||
const ${FIELD}::scalar_t* round_constants,
|
||||
const ${FIELD}::scalar_t* internal_matrix_diag,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<${FIELD}::scalar_t>* poseidon_constants);
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_init_poseidon2_constants_cuda(
|
||||
int width,
|
||||
extern "C" cudaError_t ${FIELD}_poseidon2_load_cuda(
|
||||
poseidon2::Poseidon2<${FIELD}::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<${FIELD}::scalar_t>* poseidon_constants);
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_poseidon2_hash_cuda(
|
||||
const ${FIELD}::scalar_t* input,
|
||||
extern "C" cudaError_t ${FIELD}_poseidon2_hash_many_cuda(
|
||||
const poseidon2::Poseidon2<${FIELD}::scalar_t>* poseidon,
|
||||
const ${FIELD}::scalar_t* inputs,
|
||||
${FIELD}::scalar_t* output,
|
||||
int number_of_states,
|
||||
int width,
|
||||
const poseidon2::Poseidon2Constants<${FIELD}::scalar_t>& constants,
|
||||
poseidon2::Poseidon2Config& config);
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::SpongeConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_release_poseidon2_constants_cuda(
|
||||
poseidon2::Poseidon2Constants<${FIELD}::scalar_t>* constants,
|
||||
device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t
|
||||
${FIELD}_poseidon2_delete_cuda(poseidon2::Poseidon2<${FIELD}::scalar_t>* poseidon, device_context::DeviceContext& ctx);
|
||||
16
icicle/include/api/templates/fields/tree.h
Normal file
16
icicle/include/api/templates/fields/tree.h
Normal file
@@ -0,0 +1,16 @@
|
||||
extern "C" cudaError_t ${FIELD}_build_merkle_tree(
|
||||
const ${FIELD}::scalar_t* leaves,
|
||||
${FIELD}::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::SpongeHasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* compression,
|
||||
const hash::SpongeHasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_mmcs_commit_cuda(
|
||||
const matrix::Matrix<${FIELD}::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
${FIELD}::scalar_t* digests,
|
||||
const hash::SpongeHasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* hasher,
|
||||
const hash::SpongeHasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
@@ -796,6 +796,14 @@ public:
|
||||
return r;
|
||||
}
|
||||
|
||||
HOST_DEVICE_INLINE Field& operator=(Field const& other)
|
||||
{
|
||||
for (int i = 0; i < TLC; i++) {
|
||||
this->limbs_storage.limbs[i] = other.limbs_storage.limbs[i];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Field operator*(const Field& xs, const Field& ys)
|
||||
{
|
||||
Wide xy = mul_wide(xs, ys); // full mult
|
||||
|
||||
@@ -14,7 +14,7 @@ namespace m31 {
|
||||
HOST_DEVICE_INLINE MersenneField(storage<CONFIG::limbs_count> x) : Field<CONFIG>{x} {}
|
||||
HOST_DEVICE_INLINE MersenneField(const Field<CONFIG>& other) : Field<CONFIG>(other) {}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField zero() { return MersenneField(CONFIG::zero.limbs[0]); }
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField zero() { return MersenneField(CONFIG::zero); }
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField one() { return MersenneField(CONFIG::one.limbs[0]); }
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#define DEVICE_CONTEXT_H
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
|
||||
namespace device_context {
|
||||
|
||||
@@ -30,6 +31,28 @@ namespace device_context {
|
||||
};
|
||||
}
|
||||
|
||||
} // namespace device_context
|
||||
// checking whether a pointer is on host or device and asserts device matches provided device
|
||||
static bool is_host_ptr(const void* p, int device_id = 0)
|
||||
{
|
||||
cudaPointerAttributes attributes;
|
||||
CHK_STICKY(cudaPointerGetAttributes(&attributes, p));
|
||||
const bool is_on_host = attributes.type == cudaMemoryTypeHost ||
|
||||
attributes.type == cudaMemoryTypeUnregistered; // unregistered is host memory
|
||||
const bool is_on_cur_device = !is_on_host && attributes.device == device_id;
|
||||
const bool is_valid_ptr = is_on_host || is_on_cur_device;
|
||||
if (!is_valid_ptr) { THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Invalid ptr"); }
|
||||
|
||||
return is_on_host;
|
||||
}
|
||||
|
||||
static int get_cuda_device(const void* p)
|
||||
{
|
||||
cudaPointerAttributes attributes;
|
||||
CHK_STICKY(cudaPointerGetAttributes(&attributes, p));
|
||||
const bool is_on_host = attributes.type == cudaMemoryTypeHost ||
|
||||
attributes.type == cudaMemoryTypeUnregistered; // unregistered is host memory
|
||||
return is_on_host ? -1 : attributes.device;
|
||||
}
|
||||
|
||||
} // namespace device_context
|
||||
#endif
|
||||
176
icicle/include/hash/hash.cuh
Normal file
176
icicle/include/hash/hash.cuh
Normal file
@@ -0,0 +1,176 @@
|
||||
#pragma once
|
||||
#ifndef HASH_H
|
||||
#define HASH_H
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include <cassert>
|
||||
|
||||
using matrix::Matrix;
|
||||
|
||||
/**
|
||||
* @namespace hash
|
||||
* Includes classes and methods for describing hash functions.
|
||||
*/
|
||||
namespace hash {
|
||||
|
||||
/**
|
||||
* @struct SpongeConfig
|
||||
* Encodes sponge hash operations parameters.
|
||||
*/
|
||||
struct SpongeConfig {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool
|
||||
are_outputs_on_device; /**< True if outputs are on device and false if they're on host. Default value: false. */
|
||||
bool is_async; /**< Whether to run the hash operations asynchronously. If set to `true`, the functions will be
|
||||
* non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false,
|
||||
* functions will block the current CPU thread. */
|
||||
};
|
||||
|
||||
/**
|
||||
* A function that returns the default value of [SpongeConfig](@ref SpongeConfig) for the [SpongeHasher](@ref
|
||||
* SpongeHasher) class.
|
||||
* @return Default value of [SpongeConfig](@ref SpongeConfig).
|
||||
*/
|
||||
static SpongeConfig
|
||||
default_sponge_config(const device_context::DeviceContext& ctx = device_context::get_default_device_context())
|
||||
{
|
||||
SpongeConfig config = {
|
||||
ctx, // ctx
|
||||
false, // are_inputs_on_device
|
||||
false, // are_outputs_on_device
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* @class SpongeHasher
|
||||
*
|
||||
* Can be inherited by a cryptographic permutation function to create a
|
||||
* [sponge](https://en.wikipedia.org/wiki/Sponge_function) construction out of it.
|
||||
*
|
||||
* @tparam PreImage type of inputs elements
|
||||
* @tparam Image type of state elements. Also used to describe the type of hash output
|
||||
*/
|
||||
template <typename PreImage, typename Image>
|
||||
class SpongeHasher
|
||||
{
|
||||
public:
|
||||
/// @brief the width of permutation state
|
||||
const unsigned int width;
|
||||
|
||||
/// @brief how many elements a state can fit per 1 permutation. Used with domain separation.
|
||||
const unsigned int preimage_max_length;
|
||||
|
||||
/// @brief portion of the state to absorb input into, or squeeze output from
|
||||
const unsigned int rate;
|
||||
|
||||
/// @brief start squeezing from this offset. Used with domain separation.
|
||||
const unsigned int offset;
|
||||
|
||||
SpongeHasher(unsigned int width, unsigned int preimage_max_length, unsigned int rate, unsigned int offset)
|
||||
: width(width), preimage_max_length(preimage_max_length), rate(rate), offset(offset)
|
||||
{
|
||||
assert(
|
||||
rate * sizeof(PreImage) <= preimage_max_length * sizeof(Image) &&
|
||||
"Input rate can not be bigger than preimage max length");
|
||||
}
|
||||
|
||||
virtual cudaError_t hash_2d(
|
||||
const Matrix<PreImage>* inputs,
|
||||
Image* states,
|
||||
unsigned int number_of_inputs,
|
||||
unsigned int output_len,
|
||||
uint64_t number_of_rows,
|
||||
const device_context::DeviceContext& ctx) const
|
||||
{
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Absorb 2d is not implemented for this hash");
|
||||
return cudaError_t::cudaSuccess;
|
||||
};
|
||||
|
||||
virtual cudaError_t compress_and_inject(
|
||||
const Matrix<PreImage>* matrices_to_inject,
|
||||
unsigned int number_of_inputs,
|
||||
uint64_t number_of_rows,
|
||||
const Image* prev_layer,
|
||||
Image* next_layer,
|
||||
unsigned int digest_elements,
|
||||
const device_context::DeviceContext& ctx) const
|
||||
{
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Compress and inject is not implemented for this hash");
|
||||
return cudaError_t::cudaSuccess;
|
||||
}
|
||||
|
||||
/// @brief Permute aligned input and do squeeze
|
||||
/// @param input pointer to input allocated on-device
|
||||
/// @param out pointer to output allocated on-device
|
||||
cudaError_t compress_many(
|
||||
const Image* input,
|
||||
Image* out,
|
||||
unsigned int number_of_states,
|
||||
unsigned int output_len,
|
||||
const SpongeConfig& cfg) const
|
||||
{
|
||||
return hash_many((const PreImage*)input, out, number_of_states, width, output_len, cfg);
|
||||
}
|
||||
|
||||
virtual cudaError_t run_hash_many_kernel(
|
||||
const PreImage* input,
|
||||
Image* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const device_context::DeviceContext& ctx) const
|
||||
{
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Hash many kernel is not implemented for this hash");
|
||||
return cudaError_t::cudaSuccess;
|
||||
};
|
||||
|
||||
cudaError_t hash_many(
|
||||
const PreImage* input,
|
||||
Image* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const SpongeConfig& cfg) const
|
||||
{
|
||||
const PreImage* d_input;
|
||||
PreImage* d_alloc_input;
|
||||
Image* d_output;
|
||||
if (!cfg.are_inputs_on_device) {
|
||||
CHK_IF_RETURN(cudaMallocAsync(&d_alloc_input, number_of_states * input_len * sizeof(PreImage), cfg.ctx.stream));
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
d_alloc_input, input, number_of_states * input_len * sizeof(PreImage), cudaMemcpyHostToDevice,
|
||||
cfg.ctx.stream));
|
||||
d_input = d_alloc_input;
|
||||
} else {
|
||||
d_input = input;
|
||||
}
|
||||
|
||||
if (!cfg.are_outputs_on_device) {
|
||||
CHK_IF_RETURN(cudaMallocAsync(&d_output, number_of_states * output_len * sizeof(Image), cfg.ctx.stream));
|
||||
} else {
|
||||
d_output = output;
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(run_hash_many_kernel(d_input, d_output, number_of_states, input_len, output_len, cfg.ctx));
|
||||
|
||||
if (!cfg.are_inputs_on_device) { CHK_IF_RETURN(cudaFreeAsync(d_alloc_input, cfg.ctx.stream)); }
|
||||
if (!cfg.are_outputs_on_device) {
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
output, d_output, number_of_states * output_len * sizeof(Image), cudaMemcpyDeviceToHost, cfg.ctx.stream));
|
||||
CHK_IF_RETURN(cudaFreeAsync(d_output, cfg.ctx.stream));
|
||||
}
|
||||
|
||||
if (!cfg.is_async) CHK_IF_RETURN(cudaStreamSynchronize(cfg.ctx.stream));
|
||||
|
||||
return CHK_LAST();
|
||||
};
|
||||
};
|
||||
} // namespace hash
|
||||
|
||||
#endif
|
||||
@@ -6,6 +6,10 @@
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
|
||||
using namespace hash;
|
||||
|
||||
namespace keccak {
|
||||
/**
|
||||
* @struct KeccakConfig
|
||||
@@ -32,25 +36,6 @@ namespace keccak {
|
||||
};
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the keccak hash over a sequence of preimages.
|
||||
* Takes {number_of_blocks * input_block_size} u64s of input and computes {number_of_blocks} outputs, each of size {D
|
||||
* / 64} u64
|
||||
* @tparam C - number of bits of capacity (c = b - r = 1600 - r). Only multiples of 64 are supported.
|
||||
* @tparam D - number of bits of output. Only multiples of 64 are supported.
|
||||
* @param input a pointer to the input data. May be allocated on device or on host, regulated
|
||||
* by the config. Must be of size [input_block_size](@ref input_block_size) * [number_of_blocks](@ref
|
||||
* number_of_blocks)}.
|
||||
* @param input_block_size - size of each input block in bytes. Should be divisible by 8.
|
||||
* @param number_of_blocks number of input and output blocks. One GPU thread processes one block
|
||||
* @param output a pointer to the output data. May be allocated on device or on host, regulated
|
||||
* by the config. Must be of size [output_block_size](@ref output_block_size) * [number_of_blocks](@ref
|
||||
* number_of_blocks)}
|
||||
*/
|
||||
template <int C, int D>
|
||||
cudaError_t
|
||||
keccak_hash(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, KeccakConfig& config);
|
||||
} // namespace keccak
|
||||
|
||||
#endif
|
||||
14
icicle/include/matrix/matrix.cuh
Normal file
14
icicle/include/matrix/matrix.cuh
Normal file
@@ -0,0 +1,14 @@
|
||||
#pragma once
|
||||
#ifndef MATRIX_H
|
||||
#define MATRIX_H
|
||||
|
||||
namespace matrix {
|
||||
template <typename T>
|
||||
struct Matrix {
|
||||
T* values;
|
||||
size_t width;
|
||||
size_t height;
|
||||
};
|
||||
} // namespace matrix
|
||||
|
||||
#endif
|
||||
128
icicle/include/merkle-tree/merkle.cuh
Normal file
128
icicle/include/merkle-tree/merkle.cuh
Normal file
@@ -0,0 +1,128 @@
|
||||
#pragma once
|
||||
#ifndef MERKLE_H
|
||||
#define MERKLE_H
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "utils/utils.h"
|
||||
#include "hash/hash.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
|
||||
using namespace hash;
|
||||
using matrix::Matrix;
|
||||
|
||||
/**
|
||||
* @namespace merkle_tree
|
||||
* Implementation of the [Merkle tree](https://en.wikipedia.org/wiki/Merkle_tree) builder,
|
||||
* parallelized for the use on GPU
|
||||
*/
|
||||
namespace merkle_tree {
|
||||
static constexpr size_t GIGA = 1024 * 1024 * 1024;
|
||||
|
||||
/// Bytes per stream
|
||||
static constexpr uint64_t STREAM_CHUNK_SIZE = GIGA;
|
||||
|
||||
/// Flattens the tree digests and sum them up to get
|
||||
/// the memory needed to contain all the digests
|
||||
static size_t get_digests_len(uint32_t height, uint32_t arity, uint32_t digest_elements)
|
||||
{
|
||||
size_t digests_len = 0;
|
||||
size_t row_length = digest_elements;
|
||||
for (int i = 0; i <= height; i++) {
|
||||
digests_len += row_length;
|
||||
row_length *= arity;
|
||||
}
|
||||
|
||||
return digests_len;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void swap(T** r, T** s)
|
||||
{
|
||||
T* t = *r;
|
||||
*r = *s;
|
||||
*s = t;
|
||||
}
|
||||
|
||||
static unsigned int get_height(uint64_t number_of_elements)
|
||||
{
|
||||
unsigned int height = 0;
|
||||
while (number_of_elements >>= 1)
|
||||
++height;
|
||||
return height;
|
||||
}
|
||||
|
||||
/**
|
||||
* @struct TreeBuilderConfig
|
||||
* Struct that encodes various Tree builder parameters.
|
||||
*/
|
||||
struct TreeBuilderConfig {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
unsigned int arity;
|
||||
unsigned int
|
||||
keep_rows; /**< How many rows of the Merkle tree rows should be written to output. '0' means all of them */
|
||||
unsigned int
|
||||
digest_elements; /** @param digest_elements the size of output for each bottom layer hash and compression.
|
||||
* Will also be equal to the size of the root of the tree. Default value 1 */
|
||||
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool
|
||||
are_outputs_on_device; /**< True if outputs are on device and false if they're on host. Default value: false. */
|
||||
bool is_async; /**< Whether to run the tree builder asynchronously. If set to `true`, the build_merkle_tree
|
||||
* function will be non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the
|
||||
* function will block the current CPU thread. */
|
||||
};
|
||||
|
||||
static TreeBuilderConfig
|
||||
default_merkle_config(const device_context::DeviceContext& ctx = device_context::get_default_device_context())
|
||||
{
|
||||
TreeBuilderConfig config = {
|
||||
ctx, // ctx
|
||||
2, // arity
|
||||
0, // keep_rows
|
||||
1, // digest_elements
|
||||
false, // are_inputes_on_device
|
||||
false, // are_outputs_on_device
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the Merkle tree
|
||||
*
|
||||
* @param leaves a pointer to the leaves layer. May be allocated on device or on host, regulated by the config
|
||||
* Expected to have arity ^ (height) * input_block_len elements
|
||||
* @param digests a pointer to the digests storage. May only be allocated on the host
|
||||
* Expected to have `sum(digests_len * (arity ^ (i))) for i in [0..keep_rows]`
|
||||
* @param height the height of the merkle tree
|
||||
* @param input_block_len the size of input vectors at the bottom layer of the tree
|
||||
* # Algorithm
|
||||
* The function will split large tree into many subtrees of size that will fit `STREAM_CHUNK_SIZE`.
|
||||
* Each subtree is build in it's own stream (there is a maximum number of streams)
|
||||
* After all subtrees are constructed - the function will combine the resulting sub-digests into the final top-tree
|
||||
*/
|
||||
template <typename Leaf, typename Digest>
|
||||
cudaError_t build_merkle_tree(
|
||||
const Leaf* inputs,
|
||||
Digest* digests,
|
||||
const SpongeHasher<Leaf, Digest>& compression,
|
||||
const SpongeHasher<Leaf, Digest>& bottom_layer,
|
||||
const TreeBuilderConfig& config);
|
||||
|
||||
template <typename Leaf, typename Digest>
|
||||
cudaError_t mmcs_commit(
|
||||
const Matrix<Leaf>* inputs,
|
||||
const unsigned int number_of_inputs,
|
||||
Digest* digests,
|
||||
const SpongeHasher<Leaf, Digest>& hasher,
|
||||
const SpongeHasher<Leaf, Digest>& compression,
|
||||
const TreeBuilderConfig& tree_config);
|
||||
} // namespace merkle_tree
|
||||
|
||||
#endif
|
||||
114
icicle/include/poseidon/constants.cuh
Normal file
114
icicle/include/poseidon/constants.cuh
Normal file
@@ -0,0 +1,114 @@
|
||||
#pragma once
|
||||
#ifndef POSEIDON_CONSTANTS_H
|
||||
#define POSEIDON_CONSTANTS_H
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace poseidon {
|
||||
#define FIRST_FULL_ROUNDS true
|
||||
#define SECOND_FULL_ROUNDS false
|
||||
|
||||
/**
|
||||
* For most of the Poseidon configurations this is the case
|
||||
* TODO: Add support for different full rounds numbers
|
||||
*/
|
||||
const int FULL_ROUNDS_DEFAULT = 4;
|
||||
|
||||
/**
|
||||
* @struct PoseidonConstants
|
||||
* This constants are enough to define a Poseidon instantce
|
||||
* @param round_constants A pointer to round constants allocated on the device
|
||||
* @param mds_matrix A pointer to an mds matrix allocated on the device
|
||||
* @param non_sparse_matrix A pointer to non sparse matrix allocated on the device
|
||||
* @param sparse_matrices A pointer to sparse matrices allocated on the device
|
||||
*/
|
||||
template <typename S>
|
||||
struct PoseidonConstants {
|
||||
unsigned int arity;
|
||||
unsigned int alpha;
|
||||
unsigned int partial_rounds;
|
||||
unsigned int full_rounds_half;
|
||||
S* round_constants = nullptr;
|
||||
S* mds_matrix = nullptr;
|
||||
S* non_sparse_matrix = nullptr;
|
||||
S* sparse_matrices = nullptr;
|
||||
S domain_tag = S::zero();
|
||||
|
||||
PoseidonConstants() = default;
|
||||
PoseidonConstants(const PoseidonConstants& other) = default;
|
||||
|
||||
PoseidonConstants<S>& operator=(PoseidonConstants<S> const& other)
|
||||
{
|
||||
this->arity = other.arity;
|
||||
this->alpha = other.alpha;
|
||||
this->partial_rounds = other.partial_rounds;
|
||||
this->full_rounds_half = other.full_rounds_half;
|
||||
this->round_constants = other.round_constants;
|
||||
this->mds_matrix = other.mds_matrix;
|
||||
this->non_sparse_matrix = other.non_sparse_matrix;
|
||||
this->sparse_matrices = other.sparse_matrices;
|
||||
this->domain_tag = other.domain_tag;
|
||||
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @class PoseidonKernelsConfiguration
|
||||
* Describes the logic of deriving CUDA kernels parameters
|
||||
* such as the number of threads and the number of blocks
|
||||
*/
|
||||
class PoseidonKernelsConfiguration
|
||||
{
|
||||
public:
|
||||
// The logic behind this is that 1 thread only works on 1 element
|
||||
// We have {width} elements in each state, and {number_of_states} states total
|
||||
static int number_of_threads(unsigned int width) { return 256 / width * width; }
|
||||
|
||||
// The partial rounds operates on the whole state, so we define
|
||||
// the parallelism params for processing a single hash preimage per thread
|
||||
static const int singlehash_block_size = 128;
|
||||
|
||||
static int hashes_per_block(unsigned int width) { return number_of_threads(width) / width; }
|
||||
|
||||
static int number_of_full_blocks(unsigned int width, size_t number_of_states)
|
||||
{
|
||||
int total_number_of_threads = number_of_states * width;
|
||||
return total_number_of_threads / number_of_threads(width) +
|
||||
static_cast<bool>(total_number_of_threads % number_of_threads(width));
|
||||
}
|
||||
|
||||
static int number_of_singlehash_blocks(size_t number_of_states)
|
||||
{
|
||||
return number_of_states / singlehash_block_size + static_cast<bool>(number_of_states % singlehash_block_size);
|
||||
}
|
||||
};
|
||||
|
||||
using PKC = PoseidonKernelsConfiguration;
|
||||
|
||||
template <typename S>
|
||||
cudaError_t create_optimized_poseidon_constants(
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const S* round_constants,
|
||||
const S* mds_matrix,
|
||||
const S* non_sparse_matrix,
|
||||
const S* sparse_matrices,
|
||||
const S domain_tag,
|
||||
PoseidonConstants<S>* poseidon_constants,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
/**
|
||||
* Loads pre-calculated optimized constants, moves them to the device
|
||||
*/
|
||||
template <typename S>
|
||||
cudaError_t
|
||||
init_optimized_poseidon_constants(int arity, device_context::DeviceContext& ctx, PoseidonConstants<S>* constants);
|
||||
|
||||
template <typename S>
|
||||
cudaError_t release_optimized_poseidon_constants(PoseidonConstants<S>* constants, device_context::DeviceContext& ctx);
|
||||
} // namespace poseidon
|
||||
|
||||
#endif
|
||||
@@ -8,17 +8,18 @@ import numpy as np
|
||||
from poseidon import round_constants as rc, round_numbers as rn
|
||||
|
||||
# Modify these
|
||||
arity = 11
|
||||
p = 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 # grumpkin
|
||||
arity = 2
|
||||
p = 2 ** 31 - 1 # grumpkin
|
||||
# p = 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 # grumpkin
|
||||
# p = 0x73EDA753299D7D483339D80809A1D80553BDA402FFFE5BFEFFFFFFFF00000001 # bls12-381
|
||||
# p = 0x12ab655e9a2ca55660b44d1e5c37b00159aa76fed00000010a11800000000001 # bls12-377
|
||||
# p = 0x30644e72e131a029b85045b68181585d2833e84879b9709143e1f593f0000001 # bn254
|
||||
# p = 0x1ae3a4617c510eac63b05c06ca1493b1a22d9f300f5138f1ef3622fba094800170b5d44300000008508c00000000001 # bw6-761
|
||||
prime_bit_len = 255
|
||||
field_bytes = 32
|
||||
prime_bit_len = 31
|
||||
field_bytes = 4
|
||||
|
||||
# leave set to -1 if not sure
|
||||
full_round = -1
|
||||
full_round = 8
|
||||
half_full_round = full_round // 2
|
||||
# leave set to -1 if not sure
|
||||
partial_round = -1
|
||||
@@ -31,12 +32,12 @@ security_level = 128
|
||||
# F = GF(p)
|
||||
# F.primitive_element()
|
||||
#
|
||||
# primitive_element = None
|
||||
primitive_element = None
|
||||
# primitive_element = 7 # bls12-381
|
||||
# primitive_element = 22 # bls12-377
|
||||
# primitive_element = 5 # bn254
|
||||
# primitive_element = 15 # bw6-761
|
||||
primitive_element = 3 # grumpkin
|
||||
# primitive_element = 3 # grumpkin
|
||||
|
||||
# currently we only support alpha 5, if you need alpha other than 5 - feal free to reach out
|
||||
alpha = 5
|
||||
|
||||
508
icicle/include/poseidon/constants/m31_poseidon.h
Normal file
508
icicle/include/poseidon/constants/m31_poseidon.h
Normal file
@@ -0,0 +1,508 @@
|
||||
#pragma once
|
||||
#ifndef M31_POSEIDON_H
|
||||
#define M31_POSEIDON_H
|
||||
|
||||
namespace poseidon_constants_m31 {
|
||||
/**
|
||||
* This inner namespace contains optimized constants for running Poseidon.
|
||||
* These constants were generated using an algorithm defined at
|
||||
* https://spec.filecoin.io/algorithms/crypto/poseidon/
|
||||
* The number in the name corresponds to the arity of hash function
|
||||
* Each array contains:
|
||||
* RoundConstants | MDSMatrix | Non-sparse matrix | Sparse matrices
|
||||
*/
|
||||
|
||||
int partial_rounds_2 = 7;
|
||||
|
||||
int partial_rounds_4 = 11;
|
||||
|
||||
int partial_rounds_8 = 12;
|
||||
|
||||
int partial_rounds_11 = 12;
|
||||
|
||||
unsigned char poseidon_constants_2[] = {
|
||||
0x33, 0x8b, 0x6d, 0x47, 0xbb, 0x97, 0x11, 0x67, 0x92, 0x9d, 0x55, 0x2d,
|
||||
0xee, 0x1e, 0x2e, 0x45, 0xfe, 0x35, 0x0e, 0x25, 0x7e, 0xc3, 0x4f, 0x70,
|
||||
0x4d, 0x0a, 0x8c, 0x18, 0xd9, 0x43, 0xa4, 0x61, 0xfb, 0x14, 0xd9, 0x14,
|
||||
0x99, 0x13, 0xb9, 0x30, 0xec, 0x3b, 0x8c, 0x16, 0xcc, 0xb2, 0x0b, 0x2e,
|
||||
0x9e, 0x18, 0xbf, 0x26, 0xb6, 0xb7, 0x2a, 0x44, 0x61, 0x29, 0xdb, 0x21,
|
||||
0x18, 0x84, 0x03, 0x4e, 0xef, 0x95, 0xf9, 0x45, 0xe3, 0xd8, 0xf2, 0x46,
|
||||
0x82, 0xb4, 0xc9, 0x5e, 0x5f, 0xf3, 0xb2, 0x4f, 0x61, 0x80, 0x50, 0x0f,
|
||||
0x0d, 0x7f, 0xe3, 0x1b, 0x23, 0xbd, 0x05, 0x2f, 0x0f, 0xb1, 0x60, 0x67,
|
||||
0xd8, 0x85, 0xdf, 0x57, 0x0c, 0x8c, 0xdf, 0x50, 0x9e, 0x65, 0x3c, 0x58,
|
||||
0x07, 0xbd, 0x29, 0x7e, 0xc5, 0xe5, 0xa7, 0x5a, 0x5a, 0x4b, 0x0c, 0x29,
|
||||
0x89, 0x9d, 0x14, 0x11, 0x8c, 0x20, 0xcb, 0x76, 0x4d, 0x56, 0x2d, 0x4a,
|
||||
0x10, 0xda, 0xaf, 0x0a, 0x65, 0x9d, 0x98, 0x3e, 0xa1, 0xac, 0x57, 0x46,
|
||||
0xcb, 0xe8, 0xfc, 0x5b, 0xd4, 0x43, 0x4b, 0x63, 0x1b, 0x13, 0x4b, 0x1f,
|
||||
0xed, 0xac, 0xbf, 0x30, 0x27, 0x15, 0xac, 0x53, 0x4b, 0x27, 0x61, 0x3e,
|
||||
0x37, 0xc3, 0x65, 0x74, 0x55, 0x55, 0x55, 0x55, 0x00, 0x00, 0x00, 0x20,
|
||||
0x33, 0x33, 0x33, 0x33, 0x00, 0x00, 0x00, 0x20, 0x33, 0x33, 0x33, 0x33,
|
||||
0xaa, 0xaa, 0xaa, 0x6a, 0x33, 0x33, 0x33, 0x33, 0xaa, 0xaa, 0xaa, 0x6a,
|
||||
0x6d, 0xdb, 0xb6, 0x6d, 0x55, 0x55, 0x55, 0x55, 0xc0, 0x72, 0x8d, 0x36,
|
||||
0x2c, 0xe5, 0xc0, 0x51, 0x00, 0x00, 0x00, 0x20, 0x0b, 0xd5, 0x67, 0x6c,
|
||||
0x6c, 0x67, 0x2c, 0x13, 0x33, 0x33, 0x33, 0x33, 0x6c, 0x67, 0x2c, 0x13,
|
||||
0xe6, 0xb8, 0x2c, 0x62, 0x55, 0x55, 0x55, 0x55, 0x15, 0x1f, 0xaf, 0x6a,
|
||||
0xd9, 0xa8, 0x14, 0x44, 0xae, 0xb0, 0x38, 0x4b, 0x17, 0x76, 0xd9, 0x39,
|
||||
0x55, 0x55, 0x55, 0x55, 0x28, 0xef, 0x9d, 0x4f, 0xc7, 0x3b, 0xa6, 0x24,
|
||||
0x84, 0x5b, 0x79, 0x6f, 0xde, 0x4f, 0x8f, 0x3d, 0x55, 0x55, 0x55, 0x55,
|
||||
0x54, 0xc2, 0xb2, 0x00, 0x5a, 0xed, 0x68, 0x0c, 0xeb, 0xd4, 0xc4, 0x61,
|
||||
0x02, 0x8c, 0x85, 0x27, 0x55, 0x55, 0x55, 0x55, 0xe4, 0xc5, 0xbd, 0x0a,
|
||||
0xf6, 0xec, 0x75, 0x26, 0xe0, 0xdb, 0xd8, 0x52, 0xdf, 0x28, 0xff, 0x33,
|
||||
0x55, 0x55, 0x55, 0x55, 0xac, 0x68, 0x06, 0x00, 0xc9, 0xff, 0x91, 0x19,
|
||||
0xb1, 0x12, 0x2b, 0x19, 0xa2, 0xdd, 0x47, 0x39, 0x55, 0x55, 0x55, 0x55,
|
||||
0xd5, 0x03, 0x00, 0x00, 0x45, 0xc8, 0xcc, 0x4c, 0x55, 0x55, 0x55, 0x35,
|
||||
0x8d, 0xd6, 0x68, 0x3d, 0x55, 0x55, 0x55, 0x55, 0x03, 0x00, 0x00, 0x00,
|
||||
0x64, 0x66, 0x66, 0x26, 0x00, 0x00, 0x00, 0x20, 0x33, 0x33, 0x33, 0x33
|
||||
};
|
||||
|
||||
unsigned char poseidon_constants_4[] = {
|
||||
0xdb, 0x64, 0xa5, 0x32, 0xd6, 0x3d, 0x12, 0x6e, 0x65, 0x66, 0x46, 0x59,
|
||||
0x2a, 0x64, 0x51, 0x3b, 0xaf, 0xbe, 0x72, 0x0b, 0x66, 0x5f, 0x5c, 0x6c,
|
||||
0x66, 0x11, 0x8c, 0x61, 0x99, 0x24, 0x99, 0x14, 0x1d, 0x5f, 0x67, 0x0a,
|
||||
0x4d, 0xab, 0xc4, 0x1e, 0x43, 0xb2, 0x09, 0x58, 0xc0, 0x27, 0x4c, 0x5b,
|
||||
0xf0, 0x0c, 0xf5, 0x12, 0xc9, 0x2f, 0x88, 0x4f, 0x59, 0x52, 0x5b, 0x6a,
|
||||
0x73, 0x90, 0x55, 0x5b, 0xaf, 0x47, 0x55, 0x0d, 0xa7, 0xc2, 0x0c, 0x6e,
|
||||
0xe6, 0xd6, 0x4e, 0x30, 0x9e, 0x75, 0x47, 0x12, 0xca, 0x93, 0xd1, 0x5b,
|
||||
0x64, 0x27, 0xfc, 0x60, 0x6c, 0x16, 0x52, 0x20, 0xf5, 0xe0, 0x01, 0x15,
|
||||
0x27, 0xf9, 0x96, 0x7f, 0xa0, 0x38, 0xad, 0x3c, 0x95, 0xd3, 0xe4, 0x32,
|
||||
0x57, 0x95, 0x5a, 0x6b, 0x12, 0xcc, 0xdc, 0x18, 0x2b, 0xdd, 0xa4, 0x66,
|
||||
0xbf, 0xe7, 0x96, 0x15, 0x85, 0x87, 0x6a, 0x1f, 0x15, 0x19, 0x9c, 0x65,
|
||||
0xef, 0x24, 0xaa, 0x2c, 0x3f, 0x6b, 0xbc, 0x6b, 0x54, 0x24, 0x2c, 0x17,
|
||||
0xf1, 0x7a, 0x8d, 0x57, 0x90, 0xa4, 0xd4, 0x4a, 0x12, 0x06, 0x77, 0x6a,
|
||||
0xe8, 0x6b, 0xd9, 0x51, 0x80, 0x72, 0xa1, 0x31, 0xce, 0xa8, 0x59, 0x10,
|
||||
0x0c, 0x90, 0xd4, 0x10, 0x8e, 0x60, 0x54, 0x1c, 0xe7, 0xfd, 0x42, 0x3a,
|
||||
0x73, 0xc1, 0xcc, 0x4f, 0x58, 0xbb, 0x99, 0x7c, 0xd2, 0x51, 0xda, 0x43,
|
||||
0xea, 0x6e, 0xe8, 0x16, 0xb2, 0x51, 0x53, 0x61, 0x7e, 0x68, 0x44, 0x3c,
|
||||
0x33, 0x33, 0x33, 0x33, 0xaa, 0xaa, 0xaa, 0x6a, 0x6d, 0xdb, 0xb6, 0x6d,
|
||||
0x00, 0x00, 0x00, 0x10, 0x71, 0x1c, 0xc7, 0x71, 0xaa, 0xaa, 0xaa, 0x6a,
|
||||
0x6d, 0xdb, 0xb6, 0x6d, 0x00, 0x00, 0x00, 0x10, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0x99, 0x99, 0x99, 0x59, 0x6d, 0xdb, 0xb6, 0x6d, 0x00, 0x00, 0x00, 0x10,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x99, 0x99, 0x99, 0x59, 0x45, 0x17, 0x5d, 0x74,
|
||||
0x00, 0x00, 0x00, 0x10, 0x71, 0x1c, 0xc7, 0x71, 0x99, 0x99, 0x99, 0x59,
|
||||
0x45, 0x17, 0x5d, 0x74, 0x55, 0x55, 0x55, 0x35, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0x99, 0x99, 0x99, 0x59, 0x45, 0x17, 0x5d, 0x74, 0x55, 0x55, 0x55, 0x35,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0x33, 0x33, 0x33, 0x33, 0xae, 0x9d, 0xba, 0x61,
|
||||
0x09, 0xf2, 0xee, 0x53, 0x5e, 0x5c, 0xe8, 0x61, 0x8e, 0x1a, 0x60, 0x6c,
|
||||
0xaa, 0xaa, 0xaa, 0x6a, 0xff, 0x1a, 0xb7, 0x09, 0x1d, 0x84, 0x75, 0x5e,
|
||||
0x88, 0x5e, 0x36, 0x25, 0x6b, 0xd4, 0xdd, 0x65, 0x6d, 0xdb, 0xb6, 0x6d,
|
||||
0x1d, 0x84, 0x75, 0x5e, 0x10, 0x9d, 0x2d, 0x63, 0xa7, 0x62, 0xfc, 0x1f,
|
||||
0xe2, 0x43, 0x63, 0x14, 0x00, 0x00, 0x00, 0x10, 0x88, 0x5e, 0x36, 0x25,
|
||||
0xa7, 0x62, 0xfc, 0x1f, 0x47, 0xa0, 0x19, 0x6f, 0x48, 0x1f, 0x4e, 0x22,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x6b, 0xd4, 0xdd, 0x65, 0xe2, 0x43, 0x63, 0x14,
|
||||
0x48, 0x1f, 0x4e, 0x22, 0xb7, 0x4e, 0x73, 0x01, 0x33, 0x33, 0x33, 0x33,
|
||||
0x84, 0xdd, 0xf7, 0x08, 0x6f, 0xc5, 0x14, 0x63, 0xb6, 0x22, 0x01, 0x3d,
|
||||
0xcd, 0xab, 0x7d, 0x62, 0xac, 0x7e, 0x61, 0x57, 0x40, 0x6b, 0xc5, 0x45,
|
||||
0x77, 0xbc, 0x02, 0x18, 0x8c, 0x66, 0xda, 0x74, 0x33, 0x33, 0x33, 0x33,
|
||||
0x01, 0x9d, 0x33, 0x55, 0xed, 0x7d, 0x75, 0x63, 0x41, 0x92, 0x33, 0x76,
|
||||
0x6b, 0xd5, 0x10, 0x23, 0x1a, 0xc4, 0x49, 0x5b, 0x0c, 0x86, 0x5a, 0x60,
|
||||
0x23, 0xe5, 0xd8, 0x1c, 0x43, 0xe9, 0xe2, 0x0d, 0x33, 0x33, 0x33, 0x33,
|
||||
0x1b, 0x68, 0xec, 0x17, 0x0e, 0x3f, 0x34, 0x1a, 0xb0, 0x28, 0xe9, 0x6c,
|
||||
0xc0, 0xf7, 0x3e, 0x79, 0xdc, 0x08, 0x9e, 0x32, 0x45, 0xde, 0xea, 0x73,
|
||||
0x7a, 0xc4, 0xb4, 0x0d, 0x65, 0xb6, 0x61, 0x04, 0x33, 0x33, 0x33, 0x33,
|
||||
0x41, 0x01, 0x02, 0x6b, 0xd8, 0x62, 0x6b, 0x47, 0x47, 0xd9, 0x7e, 0x72,
|
||||
0x4f, 0x80, 0x31, 0x54, 0x8b, 0x5e, 0x3e, 0x26, 0x64, 0x16, 0xe2, 0x51,
|
||||
0xf4, 0xa6, 0xed, 0x35, 0xc3, 0xe9, 0xc5, 0x41, 0x33, 0x33, 0x33, 0x33,
|
||||
0xd5, 0x3f, 0xed, 0x11, 0xf5, 0x0f, 0x56, 0x41, 0xf6, 0x0d, 0xf3, 0x78,
|
||||
0xb0, 0x78, 0xa1, 0x7d, 0x5d, 0x33, 0xc4, 0x5e, 0xa6, 0xd9, 0x47, 0x4c,
|
||||
0x07, 0xc3, 0x30, 0x5a, 0x91, 0x10, 0x31, 0x20, 0x33, 0x33, 0x33, 0x33,
|
||||
0xa5, 0xec, 0xe5, 0x25, 0xe6, 0xa7, 0x4e, 0x01, 0xee, 0x3a, 0xe7, 0x62,
|
||||
0x02, 0xfd, 0xf9, 0x08, 0xdd, 0x91, 0x3f, 0x2d, 0xca, 0xbc, 0xb5, 0x2c,
|
||||
0x54, 0x9e, 0xd4, 0x78, 0x6b, 0x18, 0x94, 0x21, 0x33, 0x33, 0x33, 0x33,
|
||||
0xe6, 0xb3, 0xd2, 0x2e, 0x49, 0xdb, 0xa8, 0x52, 0x5f, 0x6a, 0x75, 0x59,
|
||||
0xd5, 0x45, 0x5c, 0x73, 0x40, 0xe4, 0xd8, 0x2a, 0x8c, 0xe6, 0xda, 0x50,
|
||||
0x5f, 0x4f, 0x18, 0x5d, 0xf4, 0xa4, 0xf4, 0x46, 0x33, 0x33, 0x33, 0x33,
|
||||
0x3e, 0x90, 0x5b, 0x3a, 0x55, 0x96, 0x22, 0x7c, 0xd9, 0x64, 0x36, 0x4e,
|
||||
0x0b, 0xec, 0x66, 0x65, 0xac, 0x55, 0xa9, 0x19, 0x50, 0x87, 0x49, 0x1a,
|
||||
0x1f, 0x78, 0x89, 0x36, 0x25, 0x2a, 0x06, 0x55, 0x33, 0x33, 0x33, 0x33,
|
||||
0x6b, 0xf1, 0x61, 0x67, 0x67, 0x00, 0xc5, 0x24, 0x9e, 0xd1, 0x94, 0x6f,
|
||||
0xbf, 0x8b, 0xaf, 0x2d, 0x69, 0x9c, 0xb7, 0x62, 0xf8, 0x0a, 0x43, 0x13,
|
||||
0x3c, 0xc0, 0x48, 0x3e, 0x9f, 0x3f, 0xa8, 0x2c, 0x33, 0x33, 0x33, 0x33,
|
||||
0x9d, 0x5b, 0xb2, 0x2b, 0x62, 0x05, 0x39, 0x20, 0x52, 0x1f, 0xe8, 0x05,
|
||||
0x1b, 0x24, 0xc0, 0x13, 0x11, 0x11, 0x11, 0x11, 0x9c, 0x6a, 0x35, 0x45,
|
||||
0xf6, 0x7f, 0x5c, 0x4c, 0x9f, 0xc4, 0x8f, 0x1f, 0x33, 0x33, 0x33, 0x33,
|
||||
0xb1, 0xaa, 0xaa, 0x2a, 0xcb, 0xb6, 0x6d, 0x5b, 0x34, 0x49, 0x92, 0x24,
|
||||
0x90, 0x65, 0x59, 0x56, 0xaa, 0xaa, 0xaa, 0x6a, 0x6d, 0xdb, 0xb6, 0x6d,
|
||||
0x00, 0x00, 0x00, 0x10, 0x71, 0x1c, 0xc7, 0x71
|
||||
};
|
||||
|
||||
unsigned char poseidon_constants_8[] = {
|
||||
0x90, 0xaf, 0x71, 0x3e, 0xa3, 0xbe, 0x5a, 0x30, 0xd4, 0x1b, 0x6f, 0x5d,
|
||||
0xeb, 0x36, 0x6b, 0x53, 0x14, 0xc0, 0x30, 0x13, 0xd5, 0xf8, 0x0b, 0x1c,
|
||||
0xa8, 0x66, 0xf1, 0x3c, 0xbd, 0x64, 0xa3, 0x6c, 0x06, 0x5e, 0x95, 0x7c,
|
||||
0xee, 0xc4, 0x0a, 0x0f, 0x37, 0x03, 0xba, 0x6d, 0x20, 0x85, 0xf1, 0x2c,
|
||||
0xee, 0x59, 0x21, 0x11, 0x42, 0xae, 0xb7, 0x3c, 0x73, 0xb4, 0xd6, 0x71,
|
||||
0x6a, 0x29, 0x40, 0x03, 0x86, 0xd8, 0x32, 0x68, 0x61, 0x62, 0x62, 0x32,
|
||||
0x44, 0x5d, 0xcc, 0x38, 0x76, 0x0f, 0xbc, 0x1f, 0xc9, 0x6e, 0x67, 0x1d,
|
||||
0x95, 0x35, 0x10, 0x79, 0x45, 0xaa, 0x0f, 0x7c, 0x73, 0xfa, 0x5d, 0x3f,
|
||||
0x53, 0xf2, 0xdc, 0x21, 0x37, 0xfa, 0x15, 0x04, 0xfd, 0x31, 0x3d, 0x5d,
|
||||
0x5d, 0xe6, 0x1d, 0x4a, 0xb3, 0x2b, 0xa2, 0x07, 0x2d, 0x48, 0x07, 0x2b,
|
||||
0x92, 0x1c, 0x31, 0x52, 0x6c, 0xd3, 0x32, 0x2f, 0x0f, 0xdd, 0x82, 0x7d,
|
||||
0x41, 0x0e, 0x81, 0x7e, 0x60, 0xfb, 0x49, 0x7b, 0xe5, 0x39, 0x3d, 0x75,
|
||||
0x6d, 0xcf, 0x02, 0x77, 0x0d, 0xf6, 0xf8, 0x0c, 0x43, 0xae, 0x62, 0x5e,
|
||||
0x26, 0x36, 0x9e, 0x3a, 0x10, 0xe3, 0x59, 0x4b, 0x3a, 0x59, 0x49, 0x73,
|
||||
0x31, 0x20, 0xb9, 0x40, 0x39, 0xed, 0xaf, 0x37, 0x6d, 0x5c, 0x4c, 0x6a,
|
||||
0xce, 0xca, 0xc4, 0x33, 0x53, 0x96, 0x92, 0x1d, 0xb2, 0xa1, 0xac, 0x65,
|
||||
0xbb, 0x43, 0xc4, 0x16, 0xf9, 0x38, 0x10, 0x67, 0x3d, 0xbb, 0x28, 0x7a,
|
||||
0x2b, 0x1e, 0x65, 0x36, 0x07, 0x14, 0x36, 0x3c, 0xcb, 0xdf, 0x03, 0x6b,
|
||||
0x03, 0x7b, 0xe6, 0x67, 0x79, 0x2a, 0x08, 0x47, 0xb7, 0x8f, 0x9c, 0x7e,
|
||||
0x54, 0xde, 0x08, 0x0a, 0xf8, 0x99, 0x24, 0x6f, 0x64, 0x78, 0x80, 0x5f,
|
||||
0x43, 0x76, 0x77, 0x40, 0x12, 0x62, 0x71, 0x10, 0x35, 0xf5, 0xdd, 0x0a,
|
||||
0x06, 0xff, 0x9b, 0x7b, 0xd8, 0x1a, 0xf3, 0x50, 0x1d, 0xc3, 0x8c, 0x60,
|
||||
0xe0, 0x61, 0xf5, 0x3d, 0xf9, 0xbf, 0xe4, 0x38, 0x78, 0xbf, 0x59, 0x0e,
|
||||
0xed, 0xc9, 0x4d, 0x0b, 0xb1, 0x7a, 0x10, 0x2b, 0x84, 0x27, 0x07, 0x70,
|
||||
0x5d, 0xc0, 0xa4, 0x7e, 0x9c, 0xf0, 0xf6, 0x69, 0x89, 0x6c, 0xc5, 0x39,
|
||||
0x4a, 0x7d, 0x5e, 0x26, 0x2f, 0x08, 0x9d, 0x05, 0xdc, 0x71, 0xec, 0x08,
|
||||
0x2b, 0xca, 0x68, 0x14, 0x42, 0xf6, 0xe6, 0x0a, 0x2f, 0xa5, 0x34, 0x6d,
|
||||
0x95, 0xaa, 0x80, 0x55, 0x23, 0x0f, 0x5f, 0x20, 0xbe, 0x4d, 0x0b, 0x20,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x99, 0x99, 0x99, 0x59, 0x45, 0x17, 0x5d, 0x74,
|
||||
0x55, 0x55, 0x55, 0x35, 0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x99, 0x99, 0x99, 0x59, 0x45, 0x17, 0x5d, 0x74, 0x55, 0x55, 0x55, 0x35,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11,
|
||||
0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78,
|
||||
0x45, 0x17, 0x5d, 0x74, 0x55, 0x55, 0x55, 0x35, 0xd8, 0x89, 0x9d, 0x58,
|
||||
0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c,
|
||||
0x55, 0x55, 0x55, 0x35, 0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11,
|
||||
0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79,
|
||||
0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32,
|
||||
0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a,
|
||||
0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x50, 0x05, 0xd7, 0x30, 0x09, 0x94, 0x4f, 0x13,
|
||||
0x11, 0x86, 0x4b, 0x61, 0x74, 0x8b, 0x94, 0x0e, 0x7e, 0x5d, 0x93, 0x27,
|
||||
0xeb, 0xb6, 0x4b, 0x61, 0x90, 0x3f, 0x9b, 0x7d, 0x10, 0xe9, 0x16, 0x06,
|
||||
0x99, 0x99, 0x99, 0x59, 0x4f, 0xf6, 0x15, 0x6b, 0x84, 0x8c, 0xe0, 0x5f,
|
||||
0x88, 0x9e, 0xb2, 0x08, 0x32, 0x36, 0xe3, 0x25, 0x64, 0x0a, 0xf5, 0x6f,
|
||||
0x80, 0xff, 0x8e, 0x6f, 0xcd, 0xb5, 0x72, 0x12, 0x90, 0xa2, 0x7a, 0x09,
|
||||
0x45, 0x17, 0x5d, 0x74, 0x84, 0x8c, 0xe0, 0x5f, 0xf5, 0x67, 0x02, 0x2d,
|
||||
0x71, 0x83, 0xf0, 0x55, 0x81, 0xa2, 0x81, 0x4b, 0xec, 0xff, 0xb0, 0x6b,
|
||||
0x17, 0x41, 0xd6, 0x36, 0xf3, 0x16, 0x58, 0x23, 0x49, 0x90, 0xa2, 0x17,
|
||||
0x55, 0x55, 0x55, 0x35, 0x88, 0x9e, 0xb2, 0x08, 0x71, 0x83, 0xf0, 0x55,
|
||||
0x27, 0x2a, 0xb0, 0x29, 0x0b, 0xe4, 0x53, 0x70, 0x7f, 0xeb, 0x60, 0x74,
|
||||
0xb9, 0x92, 0xa9, 0x4b, 0x51, 0x41, 0x0e, 0x56, 0x1b, 0xe4, 0x67, 0x43,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0x32, 0x36, 0xe3, 0x25, 0x81, 0xa2, 0x81, 0x4b,
|
||||
0x0b, 0xe4, 0x53, 0x70, 0x73, 0x99, 0xf0, 0x02, 0x1a, 0xf7, 0xe1, 0x40,
|
||||
0x18, 0xc4, 0x58, 0x3a, 0xcc, 0xf5, 0x0b, 0x18, 0xf0, 0x39, 0xab, 0x7a,
|
||||
0xb6, 0x6d, 0xdb, 0x76, 0x64, 0x0a, 0xf5, 0x6f, 0xec, 0xff, 0xb0, 0x6b,
|
||||
0x7f, 0xeb, 0x60, 0x74, 0x1a, 0xf7, 0xe1, 0x40, 0xf7, 0xfc, 0xbe, 0x7f,
|
||||
0xbf, 0x63, 0xc5, 0x05, 0x15, 0x3c, 0x9f, 0x2b, 0x9b, 0x77, 0xb0, 0x44,
|
||||
0x11, 0x11, 0x11, 0x11, 0x80, 0xff, 0x8e, 0x6f, 0x17, 0x41, 0xd6, 0x36,
|
||||
0xb9, 0x92, 0xa9, 0x4b, 0x18, 0xc4, 0x58, 0x3a, 0xbf, 0x63, 0xc5, 0x05,
|
||||
0x2f, 0x5c, 0x3c, 0x09, 0x25, 0xaf, 0xdf, 0x11, 0x21, 0x7d, 0x95, 0x58,
|
||||
0x00, 0x00, 0x00, 0x08, 0xcd, 0xb5, 0x72, 0x12, 0xf3, 0x16, 0x58, 0x23,
|
||||
0x51, 0x41, 0x0e, 0x56, 0xcc, 0xf5, 0x0b, 0x18, 0x15, 0x3c, 0x9f, 0x2b,
|
||||
0x25, 0xaf, 0xdf, 0x11, 0x38, 0x50, 0xe9, 0x16, 0x12, 0xb8, 0xc8, 0x17,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x90, 0xa2, 0x7a, 0x09, 0x49, 0x90, 0xa2, 0x17,
|
||||
0x1b, 0xe4, 0x67, 0x43, 0xf0, 0x39, 0xab, 0x7a, 0x9b, 0x77, 0xb0, 0x44,
|
||||
0x21, 0x7d, 0x95, 0x58, 0x12, 0xb8, 0xc8, 0x17, 0x5a, 0xfc, 0xf7, 0x5c,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0xdb, 0x50, 0x89, 0x38, 0x5f, 0x88, 0xe3, 0x32,
|
||||
0x8b, 0xb4, 0x3b, 0x6c, 0x95, 0x0a, 0xf1, 0x41, 0xe6, 0x0a, 0x52, 0x7d,
|
||||
0xd1, 0x0d, 0xb1, 0x57, 0x9b, 0xd2, 0xf4, 0x1d, 0x80, 0x17, 0xb2, 0x42,
|
||||
0x9c, 0x40, 0x6e, 0x2f, 0x63, 0xa7, 0x42, 0x77, 0xf9, 0x37, 0xd1, 0x43,
|
||||
0x98, 0xd1, 0xec, 0x50, 0x91, 0x26, 0xfa, 0x4e, 0x0c, 0x9e, 0xcc, 0x31,
|
||||
0x52, 0xf4, 0x20, 0x5d, 0x2a, 0x20, 0xeb, 0x1b, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0x54, 0x29, 0xf4, 0x4a, 0xde, 0x91, 0xf6, 0x54, 0x8b, 0xed, 0x18, 0x26,
|
||||
0x71, 0x24, 0x22, 0x34, 0xb7, 0xaf, 0x61, 0x27, 0x7a, 0x0a, 0x21, 0x7f,
|
||||
0x9f, 0xfe, 0xa1, 0x53, 0x26, 0x97, 0x6b, 0x5b, 0xf4, 0xea, 0xef, 0x4a,
|
||||
0x4b, 0x03, 0xa0, 0x7c, 0xe6, 0x64, 0x69, 0x47, 0x76, 0xf7, 0x2d, 0x0b,
|
||||
0x6f, 0xd5, 0x2c, 0x45, 0x52, 0xc1, 0x5c, 0x46, 0x25, 0x38, 0xab, 0x79,
|
||||
0x64, 0xed, 0xe7, 0x57, 0x71, 0x1c, 0xc7, 0x71, 0x94, 0xc2, 0xb7, 0x7f,
|
||||
0xaf, 0x0d, 0x61, 0x4c, 0xa3, 0x86, 0x8e, 0x45, 0xdc, 0x73, 0xe3, 0x77,
|
||||
0x71, 0xed, 0x21, 0x7d, 0x4b, 0x8e, 0xc7, 0x52, 0x39, 0x5d, 0x49, 0x1d,
|
||||
0x75, 0x35, 0xed, 0x09, 0xc6, 0x02, 0x3b, 0x22, 0xb8, 0x91, 0x07, 0x13,
|
||||
0x7f, 0xbf, 0x15, 0x7f, 0xb5, 0xbe, 0x0a, 0x5c, 0xbc, 0x75, 0x54, 0x61,
|
||||
0x6c, 0x2f, 0x28, 0x5f, 0xff, 0xf0, 0x7b, 0x67, 0x11, 0x8e, 0x70, 0x29,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0xe6, 0xfc, 0x29, 0x07, 0xbd, 0x0c, 0x4d, 0x5f,
|
||||
0x57, 0xb7, 0x87, 0x41, 0xec, 0x48, 0xda, 0x18, 0x78, 0x41, 0xb8, 0x6d,
|
||||
0xde, 0x7e, 0x47, 0x5a, 0x13, 0x03, 0xc5, 0x52, 0x2e, 0xee, 0xf3, 0x3f,
|
||||
0x06, 0xd0, 0xcd, 0x48, 0x77, 0x2a, 0xcd, 0x7e, 0x35, 0xee, 0x74, 0x63,
|
||||
0x3e, 0x26, 0x65, 0x64, 0x37, 0xa1, 0xfb, 0x7a, 0x03, 0x44, 0xa8, 0x70,
|
||||
0x2f, 0x03, 0x27, 0x1e, 0xb3, 0x02, 0x3e, 0x4a, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0xfd, 0xe1, 0xfe, 0x3c, 0x88, 0x1c, 0x36, 0x53, 0x36, 0x31, 0x5a, 0x32,
|
||||
0x88, 0x7b, 0xa6, 0x17, 0x40, 0x31, 0xe4, 0x0a, 0xb3, 0x70, 0x8f, 0x4f,
|
||||
0xc3, 0xa2, 0xd7, 0x06, 0x34, 0x9d, 0x4a, 0x71, 0x5b, 0xfa, 0x79, 0x25,
|
||||
0xe8, 0x6f, 0x05, 0x65, 0xc1, 0x4a, 0xee, 0x5c, 0x9a, 0xb2, 0x83, 0x05,
|
||||
0xb0, 0x89, 0x77, 0x2e, 0xc1, 0x56, 0x34, 0x08, 0x50, 0xf5, 0xde, 0x12,
|
||||
0xae, 0x68, 0xc2, 0x1b, 0x71, 0x1c, 0xc7, 0x71, 0xb3, 0x84, 0x6e, 0x4f,
|
||||
0xae, 0x74, 0x57, 0x4f, 0x56, 0xf3, 0xfc, 0x48, 0xfa, 0x73, 0xd7, 0x0e,
|
||||
0x8a, 0xc5, 0x35, 0x4d, 0xf6, 0x26, 0x15, 0x2a, 0xcf, 0xb5, 0x2d, 0x64,
|
||||
0xd1, 0x2a, 0x84, 0x43, 0xab, 0xc0, 0xec, 0x60, 0xa9, 0xbc, 0x09, 0x11,
|
||||
0xfd, 0x06, 0xea, 0x1e, 0xba, 0x29, 0x77, 0x6c, 0xb1, 0x37, 0xa5, 0x42,
|
||||
0x1c, 0x9b, 0x58, 0x37, 0xa8, 0xb7, 0xae, 0x3e, 0x6a, 0xf8, 0x63, 0x25,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x22, 0xa0, 0x75, 0x4e, 0x17, 0x33, 0x99, 0x7c,
|
||||
0x97, 0x97, 0x30, 0x04, 0xbc, 0x22, 0x6d, 0x7c, 0xb3, 0xd7, 0xd9, 0x56,
|
||||
0x4e, 0xef, 0x40, 0x5e, 0x02, 0x05, 0x51, 0x1e, 0x0c, 0x32, 0xb7, 0x06,
|
||||
0x41, 0x16, 0x80, 0x33, 0xc2, 0xdd, 0x8f, 0x18, 0x65, 0xa3, 0xe1, 0x4a,
|
||||
0xdb, 0xb4, 0x5d, 0x78, 0xf3, 0x99, 0x48, 0x3e, 0x04, 0x5b, 0xb9, 0x09,
|
||||
0xd2, 0x3d, 0x14, 0x05, 0x69, 0x50, 0xe9, 0x57, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0x0d, 0x72, 0x37, 0x6c, 0xe3, 0xd1, 0x57, 0x2f, 0x9e, 0xb7, 0xe1, 0x30,
|
||||
0x22, 0xce, 0xe5, 0x66, 0x45, 0x7b, 0x06, 0x0e, 0x06, 0x66, 0xdd, 0x11,
|
||||
0xef, 0xdf, 0x61, 0x52, 0x7d, 0xb9, 0xcf, 0x1e, 0x97, 0xbe, 0x55, 0x00,
|
||||
0x94, 0xcb, 0x50, 0x7c, 0xa0, 0x83, 0x1c, 0x57, 0xf3, 0x72, 0x8c, 0x40,
|
||||
0x07, 0x32, 0x39, 0x54, 0xe8, 0x5a, 0x10, 0x7b, 0x09, 0xc2, 0x02, 0x58,
|
||||
0xb0, 0xeb, 0x23, 0x51, 0x71, 0x1c, 0xc7, 0x71, 0xf0, 0xfd, 0x78, 0x2c,
|
||||
0xe7, 0xa8, 0x53, 0x7c, 0xdd, 0xf6, 0xa3, 0x2b, 0xa9, 0x51, 0xf4, 0x33,
|
||||
0x1d, 0x4d, 0x13, 0x0e, 0x53, 0x6b, 0xde, 0x6b, 0x48, 0x46, 0xa0, 0x01,
|
||||
0xbf, 0x74, 0xf2, 0x14, 0xe5, 0x99, 0x3d, 0x72, 0x37, 0x8e, 0xa9, 0x44,
|
||||
0x61, 0xed, 0xdd, 0x3b, 0x7c, 0x11, 0x28, 0x12, 0xd5, 0xd6, 0x27, 0x78,
|
||||
0x4e, 0xf8, 0xe4, 0x3d, 0xdc, 0x5c, 0x92, 0x0c, 0xea, 0x5b, 0xe2, 0x44,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x64, 0x55, 0xb2, 0x0d, 0x54, 0x7f, 0x64, 0x72,
|
||||
0x8e, 0xe1, 0x7b, 0x52, 0xf5, 0xe4, 0x20, 0x13, 0xd1, 0xd4, 0x5d, 0x4c,
|
||||
0x33, 0x3d, 0xb6, 0x55, 0x26, 0xed, 0xb0, 0x75, 0xa0, 0xf2, 0x72, 0x51,
|
||||
0x6b, 0xc5, 0x37, 0x23, 0x0d, 0x1d, 0xf5, 0x6f, 0xa6, 0x83, 0x5f, 0x3e,
|
||||
0x1e, 0xb5, 0x18, 0x23, 0xc8, 0x40, 0xae, 0x63, 0x68, 0x79, 0x8e, 0x56,
|
||||
0xb0, 0x33, 0x43, 0x08, 0x5b, 0xac, 0x52, 0x39, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0x9d, 0xf2, 0x00, 0x73, 0xf8, 0x96, 0xbb, 0x43, 0x5b, 0x59, 0xce, 0x07,
|
||||
0xbb, 0x11, 0xc8, 0x43, 0xde, 0xea, 0xb7, 0x34, 0x51, 0xbf, 0xa7, 0x2d,
|
||||
0x33, 0x35, 0xc2, 0x40, 0x1c, 0x81, 0x60, 0x63, 0x60, 0x0b, 0xb6, 0x60,
|
||||
0xbf, 0xb9, 0x38, 0x0c, 0x02, 0x54, 0x53, 0x20, 0xd9, 0xf9, 0xeb, 0x2f,
|
||||
0x7e, 0x5b, 0xdf, 0x58, 0x4b, 0x99, 0x8e, 0x04, 0x27, 0xb4, 0x18, 0x78,
|
||||
0xd6, 0x37, 0x16, 0x60, 0x71, 0x1c, 0xc7, 0x71, 0x74, 0x66, 0x66, 0x66,
|
||||
0xb2, 0xf1, 0x94, 0x20, 0xad, 0x2f, 0xba, 0x68, 0x6a, 0x33, 0xfe, 0x6e,
|
||||
0xa5, 0x51, 0xec, 0x44, 0xab, 0x05, 0x7e, 0x60, 0x48, 0x6b, 0xa5, 0x56,
|
||||
0x38, 0x3d, 0xc7, 0x24, 0x99, 0x99, 0x99, 0x59, 0x45, 0x17, 0x5d, 0x74,
|
||||
0x55, 0x55, 0x55, 0x35, 0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f
|
||||
};
|
||||
|
||||
unsigned char poseidon_constants_11[] = {
|
||||
0xb0, 0xf1, 0x1f, 0x2e, 0xf8, 0x8b, 0xb5, 0x07, 0x8d, 0xc4, 0xe1, 0x46,
|
||||
0x99, 0x23, 0x9f, 0x06, 0xcc, 0x64, 0x13, 0x45, 0x9e, 0xb1, 0xdf, 0x5f,
|
||||
0xfa, 0x8e, 0x0f, 0x6f, 0x33, 0xd8, 0xfe, 0x19, 0x0a, 0x25, 0x8b, 0x20,
|
||||
0xe1, 0x2c, 0xcc, 0x36, 0x17, 0x3f, 0x03, 0x05, 0xe1, 0x13, 0xce, 0x35,
|
||||
0xd4, 0xc9, 0xe7, 0x65, 0x1f, 0x7f, 0x2c, 0x7a, 0x93, 0x9f, 0x34, 0x19,
|
||||
0x4d, 0x22, 0xf2, 0x7f, 0x8e, 0xa8, 0xb0, 0x51, 0x22, 0x8c, 0x91, 0x30,
|
||||
0xa5, 0x9c, 0xff, 0x31, 0x0e, 0x04, 0xc9, 0x19, 0x69, 0x60, 0xee, 0x0f,
|
||||
0xc5, 0xa5, 0xeb, 0x6b, 0xb0, 0xa4, 0xaa, 0x5d, 0x1c, 0x4e, 0xeb, 0x73,
|
||||
0xec, 0x94, 0xb7, 0x15, 0xce, 0x64, 0x1c, 0x60, 0x3e, 0xa3, 0x6b, 0x4a,
|
||||
0x87, 0x7a, 0x25, 0x2f, 0xfc, 0xc3, 0x17, 0x20, 0x06, 0xb6, 0x22, 0x7d,
|
||||
0xca, 0xea, 0x8b, 0x3b, 0xf9, 0xca, 0xa4, 0x32, 0xd2, 0xb7, 0x2e, 0x01,
|
||||
0x4f, 0x31, 0xc9, 0x2f, 0x10, 0xbf, 0x41, 0x4c, 0xe6, 0xfe, 0xba, 0x49,
|
||||
0xe5, 0x89, 0xbb, 0x77, 0x7e, 0xe8, 0x83, 0x1c, 0x72, 0xe7, 0x26, 0x58,
|
||||
0x24, 0x90, 0x9d, 0x1e, 0xb3, 0x20, 0xc8, 0x64, 0x84, 0xa3, 0x21, 0x5d,
|
||||
0x06, 0x64, 0x30, 0x4b, 0x19, 0x35, 0x96, 0x1e, 0xd1, 0x86, 0x57, 0x4a,
|
||||
0xb3, 0x8e, 0xd6, 0x7d, 0xaf, 0xd1, 0xde, 0x3f, 0xa2, 0x2c, 0x32, 0x0a,
|
||||
0xbb, 0xea, 0x4a, 0x46, 0x64, 0x1b, 0x72, 0x14, 0x75, 0x85, 0x1b, 0x4d,
|
||||
0x11, 0x02, 0x5f, 0x6f, 0x06, 0xdd, 0xd3, 0x6f, 0xbc, 0xcc, 0x77, 0x2e,
|
||||
0xb7, 0x43, 0xf4, 0x19, 0x9d, 0x2c, 0x4b, 0x2b, 0x0c, 0x41, 0xb9, 0x02,
|
||||
0xdc, 0x14, 0x5a, 0x67, 0xd4, 0x56, 0xca, 0x45, 0x65, 0xd2, 0x7d, 0x17,
|
||||
0xcd, 0x91, 0xdd, 0x45, 0xd8, 0xa8, 0xd8, 0x4b, 0xc9, 0x2b, 0xf2, 0x35,
|
||||
0xc1, 0x81, 0x6c, 0x33, 0xbc, 0xf4, 0x4d, 0x04, 0xfd, 0xb0, 0x91, 0x2b,
|
||||
0xcf, 0xad, 0x39, 0x45, 0x35, 0xb2, 0xac, 0x2e, 0x2f, 0x13, 0xe3, 0x0b,
|
||||
0x40, 0x59, 0x33, 0x07, 0xe3, 0xa5, 0xa1, 0x4d, 0x0e, 0x79, 0x05, 0x4c,
|
||||
0x36, 0x9b, 0xf1, 0x7f, 0x90, 0x50, 0x46, 0x25, 0x87, 0x10, 0x24, 0x3f,
|
||||
0x52, 0x5d, 0xff, 0x18, 0xad, 0xed, 0x78, 0x52, 0x00, 0x9c, 0xfe, 0x66,
|
||||
0x22, 0x24, 0xe0, 0x62, 0x13, 0xe2, 0x6f, 0x67, 0xd9, 0xe3, 0x6c, 0x64,
|
||||
0x6b, 0xa6, 0xea, 0x53, 0x61, 0x56, 0x8a, 0x33, 0x81, 0x35, 0xe5, 0x0f,
|
||||
0x35, 0xc9, 0xf3, 0x59, 0xc2, 0xa8, 0x92, 0x73, 0x69, 0x66, 0x05, 0x70,
|
||||
0xa1, 0x5f, 0xec, 0x4e, 0x3d, 0x6b, 0xc0, 0x78, 0xa4, 0xcb, 0xfc, 0x7e,
|
||||
0x44, 0x8c, 0xc4, 0x1b, 0x25, 0x70, 0x8f, 0x27, 0x87, 0x76, 0x2d, 0x4f,
|
||||
0x70, 0xb0, 0xea, 0x7a, 0x92, 0x43, 0x8c, 0x00, 0xed, 0xfd, 0x3b, 0x23,
|
||||
0x69, 0x71, 0x8e, 0x49, 0x83, 0xc3, 0x4e, 0x37, 0xab, 0x18, 0xd9, 0x30,
|
||||
0x4d, 0x48, 0x5e, 0x7e, 0xbc, 0x5a, 0x1a, 0x24, 0x34, 0xed, 0x19, 0x57,
|
||||
0xf4, 0xf4, 0x0d, 0x02, 0x0c, 0x57, 0xde, 0x6d, 0x40, 0x39, 0x1f, 0x71,
|
||||
0x9c, 0xa1, 0xb0, 0x28, 0x2d, 0x05, 0xb9, 0x6b, 0x85, 0x7a, 0x4c, 0x47,
|
||||
0x55, 0x55, 0x55, 0x35, 0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11,
|
||||
0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a,
|
||||
0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a,
|
||||
0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32,
|
||||
0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c,
|
||||
0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a,
|
||||
0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a,
|
||||
0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57,
|
||||
0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25, 0xdb, 0xb6, 0x6d, 0x3b,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32,
|
||||
0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c,
|
||||
0x7b, 0x09, 0xed, 0x25, 0xdb, 0xb6, 0x6d, 0x3b, 0x61, 0xb9, 0xa7, 0x11,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a,
|
||||
0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25,
|
||||
0xdb, 0xb6, 0x6d, 0x3b, 0x61, 0xb9, 0xa7, 0x11, 0x88, 0x88, 0x88, 0x48,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a,
|
||||
0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57,
|
||||
0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25, 0xdb, 0xb6, 0x6d, 0x3b,
|
||||
0x61, 0xb9, 0xa7, 0x11, 0x88, 0x88, 0x88, 0x48, 0xbd, 0xf7, 0xde, 0x7b,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32,
|
||||
0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c,
|
||||
0x7b, 0x09, 0xed, 0x25, 0xdb, 0xb6, 0x6d, 0x3b, 0x61, 0xb9, 0xa7, 0x11,
|
||||
0x88, 0x88, 0x88, 0x48, 0xbd, 0xf7, 0xde, 0x7b, 0x00, 0x00, 0x00, 0x04,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a,
|
||||
0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25,
|
||||
0xdb, 0xb6, 0x6d, 0x3b, 0x61, 0xb9, 0xa7, 0x11, 0x88, 0x88, 0x88, 0x48,
|
||||
0xbd, 0xf7, 0xde, 0x7b, 0x00, 0x00, 0x00, 0x04, 0xc1, 0x07, 0x1f, 0x7c,
|
||||
0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57,
|
||||
0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25, 0xdb, 0xb6, 0x6d, 0x3b,
|
||||
0x61, 0xb9, 0xa7, 0x11, 0x88, 0x88, 0x88, 0x48, 0xbd, 0xf7, 0xde, 0x7b,
|
||||
0x00, 0x00, 0x00, 0x04, 0xc1, 0x07, 0x1f, 0x7c, 0x87, 0x87, 0x87, 0x47,
|
||||
0x55, 0x55, 0x55, 0x35, 0x7c, 0xec, 0xe8, 0x54, 0x5f, 0xc4, 0x1c, 0x7e,
|
||||
0x02, 0x38, 0x4e, 0x55, 0x86, 0x80, 0x6d, 0x71, 0xc3, 0xa8, 0x98, 0x4a,
|
||||
0x2b, 0xaa, 0x86, 0x63, 0x60, 0xd7, 0x4f, 0x2e, 0xb4, 0xac, 0xce, 0x78,
|
||||
0xbd, 0x1c, 0x4f, 0x55, 0x6b, 0x2c, 0x33, 0x64, 0x8c, 0x56, 0x30, 0x43,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0xdd, 0x29, 0xc3, 0x15, 0x02, 0x15, 0x5b, 0x4f,
|
||||
0xdc, 0xb9, 0x0c, 0x03, 0x9a, 0x8d, 0x4d, 0x53, 0x6e, 0xf2, 0x33, 0x15,
|
||||
0xed, 0x3f, 0x16, 0x06, 0x43, 0xab, 0x59, 0x54, 0x1a, 0x62, 0xcd, 0x3a,
|
||||
0xda, 0x77, 0xa8, 0x51, 0x42, 0x58, 0x05, 0x55, 0x39, 0xeb, 0xd1, 0x45,
|
||||
0xb6, 0x6d, 0xdb, 0x76, 0x02, 0x15, 0x5b, 0x4f, 0xb9, 0x5a, 0x8c, 0x36,
|
||||
0x9a, 0x63, 0x3e, 0x3c, 0xe6, 0x28, 0x72, 0x36, 0x51, 0x89, 0xdb, 0x3b,
|
||||
0xfa, 0xe0, 0x07, 0x07, 0x30, 0xb3, 0x56, 0x39, 0x91, 0x42, 0x86, 0x38,
|
||||
0xda, 0xd2, 0x8f, 0x67, 0x75, 0xca, 0x3e, 0x69, 0xe9, 0xd8, 0x07, 0x6f,
|
||||
0x11, 0x11, 0x11, 0x11, 0xdc, 0xb9, 0x0c, 0x03, 0x9a, 0x63, 0x3e, 0x3c,
|
||||
0x54, 0xdc, 0x52, 0x1f, 0xf3, 0xc8, 0xb6, 0x6b, 0x96, 0x31, 0xf8, 0x1b,
|
||||
0x20, 0xee, 0x0b, 0x07, 0x4c, 0x37, 0x80, 0x4b, 0x31, 0x99, 0xd0, 0x09,
|
||||
0xb8, 0xa5, 0x62, 0x5f, 0xa2, 0x72, 0xfb, 0x33, 0x11, 0xd8, 0x0e, 0x65,
|
||||
0x00, 0x00, 0x00, 0x08, 0x9a, 0x8d, 0x4d, 0x53, 0xe6, 0x28, 0x72, 0x36,
|
||||
0xf3, 0xc8, 0xb6, 0x6b, 0xef, 0x80, 0xab, 0x77, 0x4d, 0x49, 0x25, 0x2b,
|
||||
0x7e, 0x10, 0x08, 0x1b, 0x70, 0x22, 0x72, 0x66, 0x8b, 0xe6, 0x06, 0x3a,
|
||||
0x58, 0xb9, 0x7e, 0x02, 0x97, 0xf4, 0xc2, 0x4f, 0x6b, 0x9a, 0x68, 0x53,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x6e, 0xf2, 0x33, 0x15, 0x51, 0x89, 0xdb, 0x3b,
|
||||
0x96, 0x31, 0xf8, 0x1b, 0x4d, 0x49, 0x25, 0x2b, 0xe2, 0xe0, 0x5c, 0x64,
|
||||
0xb6, 0x1d, 0x73, 0x13, 0x38, 0x1b, 0xfd, 0x49, 0xe1, 0x2c, 0xce, 0x5d,
|
||||
0x2a, 0x6b, 0xb4, 0x17, 0x7e, 0xa9, 0x6e, 0x72, 0x2f, 0x77, 0x47, 0x79,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0xed, 0x3f, 0x16, 0x06, 0xfa, 0xe0, 0x07, 0x07,
|
||||
0x20, 0xee, 0x0b, 0x07, 0x7e, 0x10, 0x08, 0x1b, 0xb6, 0x1d, 0x73, 0x13,
|
||||
0xca, 0x4a, 0x44, 0x68, 0x1c, 0x93, 0xbc, 0x37, 0xfa, 0x14, 0x8b, 0x55,
|
||||
0xae, 0xe0, 0xac, 0x31, 0xcb, 0x04, 0x09, 0x46, 0x27, 0x8f, 0x96, 0x07,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0x43, 0xab, 0x59, 0x54, 0x30, 0xb3, 0x56, 0x39,
|
||||
0x4c, 0x37, 0x80, 0x4b, 0x70, 0x22, 0x72, 0x66, 0x38, 0x1b, 0xfd, 0x49,
|
||||
0x1c, 0x93, 0xbc, 0x37, 0xfb, 0xdd, 0xff, 0x41, 0x73, 0x22, 0xa8, 0x31,
|
||||
0xd4, 0xc3, 0x26, 0x2b, 0xe7, 0x8c, 0xce, 0x35, 0x03, 0x29, 0x9c, 0x43,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x1a, 0x62, 0xcd, 0x3a, 0x91, 0x42, 0x86, 0x38,
|
||||
0x31, 0x99, 0xd0, 0x09, 0x8b, 0xe6, 0x06, 0x3a, 0xe1, 0x2c, 0xce, 0x5d,
|
||||
0xfa, 0x14, 0x8b, 0x55, 0x73, 0x22, 0xa8, 0x31, 0xaf, 0x9f, 0x0d, 0x2d,
|
||||
0xd8, 0xf1, 0xd2, 0x43, 0x41, 0x60, 0x7a, 0x48, 0xca, 0xa1, 0x4c, 0x7c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xda, 0x77, 0xa8, 0x51, 0xda, 0xd2, 0x8f, 0x67,
|
||||
0xb8, 0xa5, 0x62, 0x5f, 0x58, 0xb9, 0x7e, 0x02, 0x2a, 0x6b, 0xb4, 0x17,
|
||||
0xae, 0xe0, 0xac, 0x31, 0xd4, 0xc3, 0x26, 0x2b, 0xd8, 0xf1, 0xd2, 0x43,
|
||||
0x38, 0xc4, 0xc5, 0x55, 0x39, 0x3d, 0x1f, 0x4c, 0x81, 0xa8, 0x99, 0x14,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0x42, 0x58, 0x05, 0x55, 0x75, 0xca, 0x3e, 0x69,
|
||||
0xa2, 0x72, 0xfb, 0x33, 0x97, 0xf4, 0xc2, 0x4f, 0x7e, 0xa9, 0x6e, 0x72,
|
||||
0xcb, 0x04, 0x09, 0x46, 0xe7, 0x8c, 0xce, 0x35, 0x41, 0x60, 0x7a, 0x48,
|
||||
0x39, 0x3d, 0x1f, 0x4c, 0xc3, 0x27, 0xbb, 0x1a, 0x86, 0xb4, 0x97, 0x00,
|
||||
0xc8, 0x42, 0x16, 0x32, 0x39, 0xeb, 0xd1, 0x45, 0xe9, 0xd8, 0x07, 0x6f,
|
||||
0x11, 0xd8, 0x0e, 0x65, 0x6b, 0x9a, 0x68, 0x53, 0x2f, 0x77, 0x47, 0x79,
|
||||
0x27, 0x8f, 0x96, 0x07, 0x03, 0x29, 0x9c, 0x43, 0xca, 0xa1, 0x4c, 0x7c,
|
||||
0x81, 0xa8, 0x99, 0x14, 0x86, 0xb4, 0x97, 0x00, 0x0c, 0xd8, 0x29, 0x37,
|
||||
0x55, 0x55, 0x55, 0x35, 0xcc, 0xab, 0xe7, 0x58, 0x82, 0xaa, 0xb7, 0x06,
|
||||
0x3c, 0x2a, 0x3d, 0x61, 0x45, 0xbd, 0xcc, 0x4b, 0xa9, 0x83, 0x44, 0x56,
|
||||
0x16, 0xe6, 0x58, 0x6e, 0x70, 0x4b, 0x3a, 0x44, 0xe2, 0x3b, 0x37, 0x60,
|
||||
0xf0, 0x3b, 0x41, 0x1e, 0x44, 0x40, 0x84, 0x5a, 0x63, 0x5d, 0x4d, 0x78,
|
||||
0x22, 0x80, 0xb3, 0x0f, 0xe0, 0x85, 0xec, 0x77, 0xe5, 0x3d, 0xda, 0x27,
|
||||
0x55, 0xf9, 0xfd, 0x44, 0x38, 0xa7, 0x0f, 0x0a, 0x2f, 0xec, 0xda, 0x34,
|
||||
0x24, 0xef, 0x00, 0x40, 0x54, 0x9a, 0x0b, 0x27, 0xf9, 0x85, 0xf4, 0x16,
|
||||
0x14, 0x1f, 0x17, 0x30, 0x1d, 0xb0, 0xdf, 0x31, 0x55, 0x55, 0x55, 0x35,
|
||||
0x98, 0x36, 0x7e, 0x31, 0xd0, 0xda, 0x0a, 0x16, 0xae, 0xb0, 0x6a, 0x00,
|
||||
0x0e, 0x7a, 0x7e, 0x6d, 0x93, 0x81, 0x4d, 0x21, 0x45, 0x5a, 0x4d, 0x20,
|
||||
0x42, 0x5d, 0xfd, 0x49, 0x28, 0xc5, 0xe2, 0x75, 0x45, 0x85, 0x03, 0x2c,
|
||||
0xfc, 0x78, 0x72, 0x15, 0x98, 0x9c, 0x88, 0x0b, 0xed, 0x8f, 0x6f, 0x2b,
|
||||
0x55, 0x75, 0x17, 0x5f, 0xe5, 0xed, 0x21, 0x52, 0x5a, 0x34, 0x10, 0x7d,
|
||||
0x42, 0x25, 0x57, 0x6a, 0xa4, 0xb2, 0xe6, 0x2e, 0x05, 0xa8, 0xc4, 0x17,
|
||||
0xff, 0x9c, 0x7f, 0x6f, 0x23, 0x64, 0x17, 0x44, 0x85, 0xa9, 0x6b, 0x46,
|
||||
0x66, 0x58, 0x1b, 0x3b, 0x55, 0x55, 0x55, 0x35, 0x55, 0xf6, 0xca, 0x06,
|
||||
0x68, 0x75, 0xa9, 0x55, 0x54, 0x44, 0x4f, 0x61, 0x65, 0x3b, 0x96, 0x37,
|
||||
0xa9, 0x89, 0xb6, 0x47, 0x70, 0x8a, 0x8d, 0x74, 0x09, 0x53, 0x9e, 0x5e,
|
||||
0x92, 0x56, 0x2b, 0x34, 0x3e, 0x9d, 0x12, 0x0a, 0x54, 0x98, 0xf8, 0x29,
|
||||
0xde, 0xa0, 0xdd, 0x11, 0x46, 0x3e, 0x0f, 0x70, 0xff, 0xee, 0x0d, 0x7c,
|
||||
0x48, 0xe0, 0xe1, 0x6d, 0xb6, 0x5a, 0x2f, 0x7c, 0xb1, 0xb2, 0xf7, 0x2f,
|
||||
0xda, 0x64, 0x33, 0x7e, 0x87, 0x48, 0x48, 0x7e, 0x95, 0x6c, 0xd5, 0x5c,
|
||||
0x26, 0x8f, 0xc9, 0x3e, 0xf9, 0x5e, 0x99, 0x38, 0xf5, 0x32, 0xc2, 0x66,
|
||||
0x55, 0x55, 0x55, 0x35, 0x7f, 0xb1, 0x0f, 0x47, 0xac, 0x5d, 0xec, 0x76,
|
||||
0xba, 0x59, 0xc4, 0x7f, 0xfb, 0xdc, 0x32, 0x46, 0xe8, 0x83, 0xe0, 0x0a,
|
||||
0xf4, 0xb8, 0x56, 0x36, 0x07, 0x4f, 0x7f, 0x29, 0x31, 0xb8, 0xf4, 0x2c,
|
||||
0x7e, 0x42, 0xbd, 0x3e, 0xf1, 0x9d, 0x40, 0x73, 0x51, 0xf1, 0xce, 0x31,
|
||||
0x35, 0x7b, 0x0e, 0x48, 0x9e, 0xb9, 0x6e, 0x3b, 0x37, 0x00, 0x57, 0x0c,
|
||||
0x15, 0x25, 0x74, 0x64, 0xdd, 0x39, 0x64, 0x5c, 0x0a, 0x5d, 0x08, 0x2b,
|
||||
0xf5, 0xe6, 0x0c, 0x3f, 0xe6, 0xce, 0x30, 0x2d, 0x27, 0xc4, 0x07, 0x19,
|
||||
0x82, 0xfb, 0x44, 0x08, 0x7b, 0x94, 0x23, 0x69, 0x55, 0x55, 0x55, 0x35,
|
||||
0xc7, 0xbe, 0xaf, 0x49, 0xa6, 0x9a, 0x26, 0x30, 0x7c, 0xb2, 0x66, 0x35,
|
||||
0xe4, 0x83, 0x46, 0x62, 0xe3, 0x1c, 0x23, 0x07, 0x36, 0x2e, 0xd3, 0x00,
|
||||
0xe2, 0x65, 0xc8, 0x51, 0x0c, 0x09, 0x5c, 0x74, 0x13, 0x94, 0xf9, 0x67,
|
||||
0x4e, 0x07, 0x26, 0x03, 0xba, 0xb4, 0x3a, 0x7f, 0x38, 0xb4, 0x7c, 0x6a,
|
||||
0x44, 0x7a, 0x1c, 0x7b, 0xeb, 0xf9, 0x8b, 0x0b, 0x16, 0xf8, 0x23, 0x36,
|
||||
0x7b, 0x89, 0x79, 0x44, 0x80, 0xfe, 0x33, 0x2a, 0x7d, 0x59, 0xe2, 0x1b,
|
||||
0x7b, 0xe1, 0xb0, 0x15, 0x21, 0xcb, 0x47, 0x77, 0x23, 0x1a, 0xc0, 0x14,
|
||||
0x5b, 0x86, 0x06, 0x2d, 0x55, 0x55, 0x55, 0x35, 0x04, 0xb5, 0x47, 0x27,
|
||||
0x1d, 0xb7, 0x22, 0x44, 0xcc, 0x9e, 0xce, 0x7d, 0xf2, 0x75, 0x78, 0x78,
|
||||
0x7b, 0x98, 0x99, 0x12, 0xbd, 0x34, 0xe4, 0x43, 0xf0, 0x0a, 0x96, 0x43,
|
||||
0xf1, 0x50, 0x1d, 0x0b, 0x86, 0x78, 0xc9, 0x59, 0xc7, 0x78, 0xec, 0x16,
|
||||
0x71, 0xaa, 0x0c, 0x56, 0xbf, 0x92, 0xe2, 0x3a, 0xb5, 0x6e, 0x2d, 0x18,
|
||||
0xe2, 0xc7, 0x31, 0x67, 0x10, 0xab, 0x9f, 0x27, 0x27, 0x1e, 0xf3, 0x69,
|
||||
0xaf, 0x57, 0x42, 0x4c, 0x4f, 0xb4, 0x30, 0x35, 0x00, 0x54, 0xb0, 0x4a,
|
||||
0xa2, 0x00, 0x2a, 0x4a, 0x3d, 0x49, 0x58, 0x73, 0xf9, 0x16, 0xb0, 0x01,
|
||||
0x55, 0x55, 0x55, 0x35, 0xe4, 0xd5, 0x3f, 0x2e, 0xee, 0x84, 0x47, 0x51,
|
||||
0x3f, 0x84, 0xb9, 0x6b, 0x49, 0xb9, 0xae, 0x57, 0x32, 0x5a, 0x04, 0x02,
|
||||
0xe1, 0x6a, 0xf1, 0x4b, 0x30, 0x53, 0xf1, 0x05, 0x29, 0x74, 0x75, 0x76,
|
||||
0x4a, 0x15, 0x5b, 0x5d, 0xe1, 0xaa, 0x15, 0x1b, 0x62, 0xf5, 0xe8, 0x76,
|
||||
0x03, 0xc1, 0xaa, 0x06, 0x13, 0x59, 0xc8, 0x40, 0x84, 0x49, 0xc8, 0x1f,
|
||||
0x85, 0x98, 0x55, 0x6b, 0xed, 0x38, 0x45, 0x17, 0xb8, 0xc7, 0xf7, 0x69,
|
||||
0xc3, 0x87, 0xd0, 0x17, 0x0a, 0x93, 0xb7, 0x35, 0xc2, 0x45, 0x75, 0x34,
|
||||
0x7a, 0x78, 0xff, 0x51, 0x26, 0xd2, 0x59, 0x13, 0x55, 0x55, 0x55, 0x35,
|
||||
0x48, 0x38, 0xf7, 0x6e, 0x4f, 0x7d, 0xc7, 0x70, 0x32, 0x5d, 0x5b, 0x7a,
|
||||
0x85, 0x35, 0x9c, 0x07, 0x40, 0x08, 0x30, 0x5c, 0x64, 0x69, 0x27, 0x7a,
|
||||
0x07, 0x34, 0x90, 0x6c, 0x6e, 0xa6, 0x8e, 0x70, 0xd4, 0xf2, 0xf7, 0x59,
|
||||
0x0f, 0x13, 0x17, 0x5d, 0xa8, 0xa9, 0x01, 0x29, 0xad, 0xfd, 0x9a, 0x77,
|
||||
0x3c, 0x77, 0xc7, 0x67, 0xd0, 0x43, 0xb1, 0x3f, 0x97, 0x76, 0xe4, 0x72,
|
||||
0xd4, 0x82, 0x9a, 0x25, 0xec, 0xef, 0xc3, 0x03, 0xdc, 0xf9, 0x94, 0x3f,
|
||||
0xa4, 0x76, 0x88, 0x5a, 0xb8, 0x0f, 0x03, 0x76, 0x58, 0x87, 0x42, 0x11,
|
||||
0x28, 0xb7, 0xb0, 0x1d, 0x55, 0x55, 0x55, 0x35, 0x2f, 0xe6, 0x44, 0x75,
|
||||
0xf3, 0x0b, 0xe8, 0x68, 0x59, 0x72, 0x1f, 0x16, 0x8c, 0xd0, 0xe3, 0x3c,
|
||||
0xcc, 0xfc, 0x77, 0x05, 0xd6, 0x4b, 0x48, 0x78, 0x51, 0x88, 0x4c, 0x5f,
|
||||
0x30, 0x43, 0x9c, 0x2f, 0x49, 0x72, 0xba, 0x01, 0xba, 0xae, 0xfe, 0x0b,
|
||||
0x94, 0x3f, 0xe7, 0x71, 0x9d, 0xfa, 0x37, 0x06, 0xfc, 0xa2, 0x99, 0x6f,
|
||||
0xe2, 0x0d, 0xcf, 0x4b, 0x63, 0x76, 0xec, 0x49, 0xa8, 0xb5, 0x84, 0x0b,
|
||||
0x84, 0xa3, 0x75, 0x4f, 0x5e, 0x56, 0xdd, 0x37, 0x1a, 0x7d, 0x6e, 0x34,
|
||||
0x95, 0x39, 0x80, 0x1e, 0x58, 0x2e, 0x22, 0x50, 0xd3, 0x46, 0x93, 0x1e,
|
||||
0x55, 0x55, 0x55, 0x35, 0xf5, 0x96, 0x5a, 0x5f, 0x9b, 0xc8, 0x58, 0x50,
|
||||
0x3e, 0x03, 0xab, 0x16, 0xd5, 0xc6, 0x4c, 0x7f, 0x3f, 0x82, 0xf6, 0x34,
|
||||
0x1c, 0x29, 0x22, 0x16, 0x40, 0xdb, 0xe7, 0x71, 0x8b, 0x8a, 0x4b, 0x55,
|
||||
0x45, 0xbf, 0xd1, 0x68, 0x4c, 0xbb, 0xe3, 0x43, 0x1b, 0x96, 0x28, 0x3d,
|
||||
0x36, 0x4f, 0xdb, 0x58, 0xa8, 0x39, 0xac, 0x38, 0xd3, 0xeb, 0x90, 0x18,
|
||||
0x2f, 0xb7, 0x06, 0x1a, 0x5a, 0x82, 0x53, 0x13, 0x77, 0xaf, 0xe0, 0x4d,
|
||||
0x9e, 0xe9, 0x39, 0x79, 0xb7, 0xf6, 0xa2, 0x3c, 0x41, 0x9d, 0x14, 0x59,
|
||||
0x01, 0x33, 0x36, 0x20, 0x15, 0xe0, 0xe4, 0x15, 0x55, 0x55, 0x55, 0x35,
|
||||
0x58, 0x48, 0x07, 0x36, 0x3f, 0x43, 0x1e, 0x05, 0x33, 0x9e, 0x14, 0x45,
|
||||
0x69, 0xc8, 0x16, 0x63, 0x5f, 0xab, 0x77, 0x26, 0xf4, 0x08, 0xb0, 0x2e,
|
||||
0xf8, 0x31, 0x79, 0x29, 0x37, 0xc9, 0x37, 0x28, 0x55, 0x62, 0xcc, 0x43,
|
||||
0xeb, 0x6b, 0xe4, 0x03, 0xfe, 0x82, 0x50, 0x20, 0x2d, 0xdf, 0xf2, 0x7d,
|
||||
0xba, 0x07, 0xe2, 0x0e, 0x88, 0x1e, 0x82, 0x2b, 0x87, 0x54, 0x26, 0x39,
|
||||
0xdd, 0xee, 0x3e, 0x0b, 0xdc, 0xbf, 0x93, 0x1a, 0x8a, 0xce, 0xa6, 0x39,
|
||||
0x5b, 0xaf, 0x8f, 0x00, 0x7a, 0xad, 0x27, 0x71, 0x1e, 0x76, 0xd8, 0x58,
|
||||
0x96, 0x36, 0xa3, 0x14, 0x55, 0x55, 0x55, 0x35, 0x76, 0x27, 0x76, 0x62,
|
||||
0xa4, 0x9f, 0x05, 0x5a, 0x41, 0x28, 0x49, 0x12, 0x24, 0x18, 0x49, 0x12,
|
||||
0x4f, 0xc2, 0xa5, 0x25, 0x0e, 0x0e, 0x3c, 0x3c, 0x01, 0xa7, 0x65, 0x00,
|
||||
0x92, 0x9e, 0x17, 0x36, 0xa1, 0x7a, 0x92, 0x27, 0xcf, 0x74, 0xba, 0x4d,
|
||||
0xcb, 0x6f, 0x66, 0x68, 0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32
|
||||
};
|
||||
} // namespace poseidon_constants
|
||||
#endif
|
||||
@@ -1,9 +1,13 @@
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#pragma once
|
||||
#ifndef POSEIDON_KERNELS_H
|
||||
#define POSEIDON_KERNELS_H
|
||||
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
#include "poseidon/constants.cuh"
|
||||
|
||||
namespace poseidon {
|
||||
template <typename S, int T>
|
||||
__global__ void prepare_poseidon_states(S* states, size_t number_of_states, S domain_tag, bool aligned)
|
||||
__global__ void prepare_poseidon_states(const S* input, S* states, unsigned int number_of_states, const S domain_tag)
|
||||
{
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int state_number = idx / T;
|
||||
@@ -16,27 +20,27 @@ namespace poseidon {
|
||||
if (element_number == 0) {
|
||||
prepared_element = domain_tag;
|
||||
} else {
|
||||
if (aligned) {
|
||||
prepared_element = states[idx];
|
||||
} else {
|
||||
prepared_element = states[idx - 1];
|
||||
}
|
||||
prepared_element = input[idx - state_number - 1];
|
||||
}
|
||||
|
||||
// We need __syncthreads here if the state is not aligned
|
||||
// because then we need to shift the vector [A, B, 0] -> [D, A, B]
|
||||
if (!aligned) { __syncthreads(); }
|
||||
|
||||
// Store element in state
|
||||
states[idx] = prepared_element;
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
DEVICE_INLINE S sbox_alpha_five(S element)
|
||||
DEVICE_INLINE S sbox_el(S element, const int alpha)
|
||||
{
|
||||
S result = S::sqr(element);
|
||||
result = S::sqr(result);
|
||||
return result * element;
|
||||
S result2 = S::sqr(element);
|
||||
switch (alpha) {
|
||||
case 3:
|
||||
return result2 * element;
|
||||
case 5:
|
||||
return S::sqr(result2) * element;
|
||||
case 7:
|
||||
return S::sqr(result2) * result2 * element;
|
||||
case 11:
|
||||
return S::sqr(S::sqr(result2)) * result2 * element;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
@@ -71,7 +75,7 @@ namespace poseidon {
|
||||
element = element + constants.round_constants[rc_offset + element_number];
|
||||
rc_offset += T;
|
||||
}
|
||||
element = sbox_alpha_five(element);
|
||||
element = sbox_el(element, constants.alpha);
|
||||
if (!skip_rc) { element = element + constants.round_constants[rc_offset + element_number]; }
|
||||
|
||||
// Multiply all the states by mds matrix
|
||||
@@ -111,7 +115,7 @@ namespace poseidon {
|
||||
__device__ S partial_round(S state[T], size_t rc_offset, int round_number, const PoseidonConstants<S>& constants)
|
||||
{
|
||||
S element = state[0];
|
||||
element = sbox_alpha_five(element);
|
||||
element = sbox_el(element, constants.alpha);
|
||||
element = element + constants.round_constants[rc_offset];
|
||||
|
||||
S* sparse_matrix = &constants.sparse_matrices[(T * 2 - 1) * round_number];
|
||||
@@ -155,22 +159,58 @@ namespace poseidon {
|
||||
}
|
||||
}
|
||||
|
||||
// These function is just doing copy from the states to the output
|
||||
template <typename S, int T>
|
||||
__global__ void get_hash_results(S* states, size_t number_of_states, S* out)
|
||||
__global__ void
|
||||
squeeze_states_kernel(const S* states, unsigned int number_of_states, unsigned int rate, unsigned int offset, S* out)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
|
||||
out[idx] = states[idx * T + 1];
|
||||
for (int i = 0; i < rate; i++) {
|
||||
out[idx * rate + i] = states[idx * T + offset + i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void copy_recursive(S* state, size_t number_of_states, S* out)
|
||||
cudaError_t poseidon_permutation_kernel(
|
||||
const S* input,
|
||||
S* out,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const PoseidonConstants<S>& constants,
|
||||
cudaStream_t& stream)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
S* states;
|
||||
CHK_IF_RETURN(cudaMallocAsync(&states, number_of_states * T * sizeof(S), stream));
|
||||
|
||||
state[(idx / (T - 1) * T) + (idx % (T - 1)) + 1] = out[idx];
|
||||
prepare_poseidon_states<S, T>
|
||||
<<<PKC::number_of_full_blocks(T, number_of_states), PKC::number_of_threads(T), 0, stream>>>(
|
||||
input, states, number_of_states, constants.domain_tag);
|
||||
|
||||
size_t rc_offset = 0;
|
||||
full_rounds<S, T><<<
|
||||
PKC::number_of_full_blocks(T, number_of_states), PKC::number_of_threads(T),
|
||||
sizeof(S) * PKC::hashes_per_block(T) * T, stream>>>(
|
||||
states, number_of_states, rc_offset, FIRST_FULL_ROUNDS, constants);
|
||||
rc_offset += T * (constants.full_rounds_half + 1);
|
||||
|
||||
partial_rounds<S, T><<<PKC::number_of_singlehash_blocks(number_of_states), PKC::singlehash_block_size, 0, stream>>>(
|
||||
states, number_of_states, rc_offset, constants);
|
||||
rc_offset += constants.partial_rounds;
|
||||
|
||||
full_rounds<S, T><<<
|
||||
PKC::number_of_full_blocks(T, number_of_states), PKC::number_of_threads(T),
|
||||
sizeof(S) * PKC::hashes_per_block(T) * T, stream>>>(
|
||||
states, number_of_states, rc_offset, SECOND_FULL_ROUNDS, constants);
|
||||
|
||||
squeeze_states_kernel<S, T>
|
||||
<<<PKC::number_of_singlehash_blocks(number_of_states), PKC::singlehash_block_size, 0, stream>>>(
|
||||
states, number_of_states, output_len, 1, out);
|
||||
|
||||
CHK_IF_RETURN(cudaFreeAsync(states, stream));
|
||||
return CHK_LAST();
|
||||
}
|
||||
} // namespace poseidon
|
||||
} // namespace poseidon
|
||||
|
||||
#endif
|
||||
@@ -8,132 +8,87 @@
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "utils/utils.h"
|
||||
|
||||
#include "poseidon/kernels.cuh"
|
||||
#include "poseidon/constants.cuh"
|
||||
#include "hash/hash.cuh"
|
||||
using namespace hash;
|
||||
|
||||
/**
|
||||
* @namespace poseidon
|
||||
* Implementation of the [Poseidon hash function](https://eprint.iacr.org/2019/458.pdf)
|
||||
* Specifically, the optimized [Filecoin version](https://spec.filecoin.io/algorithms/crypto/poseidon/)
|
||||
*/
|
||||
namespace poseidon {
|
||||
#define FIRST_FULL_ROUNDS true
|
||||
#define SECOND_FULL_ROUNDS false
|
||||
|
||||
/**
|
||||
* For most of the Poseidon configurations this is the case
|
||||
* TODO: Add support for different full rounds numbers
|
||||
*/
|
||||
const int FULL_ROUNDS_DEFAULT = 4;
|
||||
|
||||
/**
|
||||
* @struct PoseidonConstants
|
||||
* This constants are enough to define a Poseidon instantce
|
||||
* @param round_constants A pointer to round constants allocated on the device
|
||||
* @param mds_matrix A pointer to an mds matrix allocated on the device
|
||||
* @param non_sparse_matrix A pointer to non sparse matrix allocated on the device
|
||||
* @param sparse_matrices A pointer to sparse matrices allocated on the device
|
||||
*/
|
||||
template <typename S>
|
||||
struct PoseidonConstants {
|
||||
int arity;
|
||||
int partial_rounds;
|
||||
int full_rounds_half;
|
||||
S* round_constants = nullptr;
|
||||
S* mds_matrix = nullptr;
|
||||
S* non_sparse_matrix = nullptr;
|
||||
S* sparse_matrices = nullptr;
|
||||
S domain_tag;
|
||||
};
|
||||
|
||||
/**
|
||||
* @class PoseidonKernelsConfiguration
|
||||
* Describes the logic of deriving CUDA kernels parameters
|
||||
* such as the number of threads and the number of blocks
|
||||
*/
|
||||
template <int T>
|
||||
class PoseidonKernelsConfiguration
|
||||
class Poseidon : public SpongeHasher<S, S>
|
||||
{
|
||||
public:
|
||||
// The logic behind this is that 1 thread only works on 1 element
|
||||
// We have {T} elements in each state, and {number_of_states} states total
|
||||
static const int number_of_threads = 256 / T * T;
|
||||
const std::size_t device_id;
|
||||
PoseidonConstants<S> constants;
|
||||
|
||||
// The partial rounds operates on the whole state, so we define
|
||||
// the parallelism params for processing a single hash preimage per thread
|
||||
static const int singlehash_block_size = 128;
|
||||
|
||||
static const int hashes_per_block = number_of_threads / T;
|
||||
|
||||
static int number_of_full_blocks(size_t number_of_states)
|
||||
cudaError_t run_hash_many_kernel(
|
||||
const S* input,
|
||||
S* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const device_context::DeviceContext& ctx) const override
|
||||
{
|
||||
int total_number_of_threads = number_of_states * T;
|
||||
return total_number_of_threads / number_of_threads +
|
||||
static_cast<bool>(total_number_of_threads % number_of_threads);
|
||||
cudaError_t permutation_error;
|
||||
#define P_PERM_T(width) \
|
||||
case width: \
|
||||
permutation_error = poseidon_permutation_kernel<S, width>( \
|
||||
input, output, number_of_states, input_len, output_len, this->constants, ctx.stream); \
|
||||
break;
|
||||
|
||||
switch (this->width) {
|
||||
P_PERM_T(3)
|
||||
P_PERM_T(5)
|
||||
P_PERM_T(9)
|
||||
P_PERM_T(12)
|
||||
default:
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "PoseidonPermutation: #width must be one of [3, 5, 9, 12]");
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(permutation_error);
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
static int number_of_singlehash_blocks(size_t number_of_states)
|
||||
Poseidon(
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const S* round_constants,
|
||||
const S* mds_matrix,
|
||||
const S* non_sparse_matrix,
|
||||
const S* sparse_matrices,
|
||||
const S domain_tag,
|
||||
device_context::DeviceContext& ctx)
|
||||
: SpongeHasher<S, S>(arity + 1, arity, arity, 1), device_id(ctx.device_id)
|
||||
{
|
||||
return number_of_states / singlehash_block_size + static_cast<bool>(number_of_states % singlehash_block_size);
|
||||
PoseidonConstants<S> constants;
|
||||
CHK_STICKY(create_optimized_poseidon_constants(
|
||||
arity, alpha, partial_rounds, full_rounds_half, round_constants, mds_matrix, non_sparse_matrix, sparse_matrices,
|
||||
domain_tag, &constants, ctx));
|
||||
this->constants = constants;
|
||||
}
|
||||
|
||||
Poseidon(int arity, device_context::DeviceContext& ctx)
|
||||
: SpongeHasher<S, S>(arity + 1, arity, arity, 1), device_id(ctx.device_id)
|
||||
{
|
||||
PoseidonConstants<S> constants{};
|
||||
CHK_STICKY(init_optimized_poseidon_constants(arity, ctx, &constants));
|
||||
this->constants = constants;
|
||||
}
|
||||
|
||||
~Poseidon()
|
||||
{
|
||||
auto ctx = device_context::get_default_device_context();
|
||||
ctx.device_id = this->device_id;
|
||||
CHK_STICKY(release_optimized_poseidon_constants<S>(&this->constants, ctx));
|
||||
}
|
||||
};
|
||||
|
||||
template <int T>
|
||||
using PKC = PoseidonKernelsConfiguration<T>;
|
||||
|
||||
/**
|
||||
* @struct PoseidonConfig
|
||||
* Struct that encodes various Poseidon parameters.
|
||||
*/
|
||||
struct PoseidonConfig {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool are_outputs_on_device; /**< If true, output is preserved on device, otherwise on host. Default value: false. */
|
||||
bool input_is_a_state; /**< If true, input is considered to be a states vector, holding the preimages
|
||||
* in aligned or not aligned format. Memory under the input pointer will be used for states
|
||||
* If false, fresh states memory will be allocated and input will be copied into it */
|
||||
bool aligned; /**< If true - input should be already aligned for poseidon permutation.
|
||||
* Aligned format: [0, A, B, 0, C, D, ...] (as you might get by using loop_state)
|
||||
* not aligned format: [A, B, 0, C, D, 0, ...] (as you might get from cudaMemcpy2D) */
|
||||
bool loop_state; /**< If true, hash results will also be copied in the input pointer in aligned format */
|
||||
bool is_async; /**< Whether to run the Poseidon asynchronously. If set to `true`, the poseidon_hash function will be
|
||||
* non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the poseidon_hash
|
||||
* function will block the current CPU thread. */
|
||||
};
|
||||
|
||||
static PoseidonConfig default_poseidon_config(
|
||||
int t, const device_context::DeviceContext& ctx = device_context::get_default_device_context())
|
||||
{
|
||||
PoseidonConfig config = {
|
||||
ctx, // ctx
|
||||
false, // are_inputes_on_device
|
||||
false, // are_outputs_on_device
|
||||
false, // input_is_a_state
|
||||
false, // aligned
|
||||
false, // loop_state
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads pre-calculated optimized constants, moves them to the device
|
||||
*/
|
||||
template <typename S>
|
||||
cudaError_t
|
||||
init_optimized_poseidon_constants(int arity, device_context::DeviceContext& ctx, PoseidonConstants<S>* constants);
|
||||
|
||||
/**
|
||||
* Compute the poseidon hash over a sequence of preimages.
|
||||
* Takes {number_of_states * (T-1)} elements of input and computes {number_of_states} hash images
|
||||
* @param T size of the poseidon state, should be equal to {arity + 1}
|
||||
* @param input a pointer to the input data. May be allocated on device or on host, regulated
|
||||
* by the config. May point to a string of preimages or a string of states filled with preimages.
|
||||
* @param output a pointer to the output data. May be allocated on device or on host, regulated
|
||||
* by the config. Must be at least of size [number_of_states](@ref number_of_states)
|
||||
* @param number_of_states number of input blocks of size T-1 (arity)
|
||||
*/
|
||||
template <typename S, int T>
|
||||
cudaError_t poseidon_hash(
|
||||
S* input, S* output, size_t number_of_states, const PoseidonConstants<S>& constants, const PoseidonConfig& config);
|
||||
} // namespace poseidon
|
||||
|
||||
#endif
|
||||
@@ -1,74 +0,0 @@
|
||||
#pragma once
|
||||
#ifndef MERKLE_H
|
||||
#define MERKLE_H
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "utils/utils.h"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
|
||||
using namespace poseidon;
|
||||
|
||||
/**
|
||||
* @namespace merkle
|
||||
* Implementation of the [Poseidon](@ref poseidon) [Merkle tree](https://en.wikipedia.org/wiki/Merkle_tree) builder,
|
||||
* parallelized for the use on GPU
|
||||
*/
|
||||
namespace merkle {
|
||||
static constexpr size_t GIGA = 1024 * 1024 * 1024;
|
||||
|
||||
/// Bytes per stream
|
||||
static constexpr size_t STREAM_CHUNK_SIZE = 1024 * 1024 * 1024;
|
||||
|
||||
/**
|
||||
* @struct TreeBuilderConfig
|
||||
* Struct that encodes various Tree builder parameters.
|
||||
*/
|
||||
struct TreeBuilderConfig {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
int keep_rows; /**< How many rows of the Merkle tree rows should be written to output. '0' means all of them */
|
||||
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool is_async; /**< Whether to run the tree builder asynchronously. If set to `true`, the build_merkle_tree
|
||||
* function will be non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the
|
||||
* function will block the current CPU thread. */
|
||||
};
|
||||
|
||||
static TreeBuilderConfig
|
||||
default_merkle_config(const device_context::DeviceContext& ctx = device_context::get_default_device_context())
|
||||
{
|
||||
TreeBuilderConfig config = {
|
||||
ctx, // ctx
|
||||
0, // keep_rows
|
||||
false, // are_inputes_on_device
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the Poseidon Merkle tree
|
||||
*
|
||||
* @param leaves a pointer to the leaves layer. May be allocated on device or on host, regulated by the config
|
||||
* Expected to have arity ^ (height - 1) elements
|
||||
* @param digests a pointer to the digests storage. May only be allocated on the host
|
||||
* Expected to have `sum(arity ^ (i)) for i in [0..height-1]`
|
||||
* @param height the height of the merkle tree
|
||||
* # Algorithm
|
||||
* The function will split large tree into many subtrees of size that will fit `STREAM_CHUNK_SIZE`.
|
||||
* Each subtree is build in it's own stream (there is a maximum number of streams)
|
||||
* After all subtrees are constructed - the function will combine the resulting sub-digests into the final top-tree
|
||||
*/
|
||||
template <typename S, int T>
|
||||
cudaError_t build_merkle_tree(
|
||||
const S* leaves,
|
||||
S* digests,
|
||||
uint32_t height,
|
||||
const PoseidonConstants<S>& poseidon,
|
||||
const TreeBuilderConfig& config);
|
||||
} // namespace merkle
|
||||
|
||||
#endif
|
||||
65
icicle/include/poseidon2/constants.cuh
Normal file
65
icicle/include/poseidon2/constants.cuh
Normal file
@@ -0,0 +1,65 @@
|
||||
#pragma once
|
||||
#ifndef POSEIDON2_CONSTANTS_H
|
||||
#define POSEIDON2_CONSTANTS_H
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
|
||||
namespace poseidon2 {
|
||||
/**
|
||||
* For most of the Poseidon2 configurations this is the case
|
||||
*/
|
||||
const int EXTERNAL_ROUNDS_DEFAULT = 8;
|
||||
|
||||
enum DiffusionStrategy {
|
||||
DEFAULT_DIFFUSION,
|
||||
MONTGOMERY,
|
||||
};
|
||||
|
||||
enum MdsType { DEFAULT_MDS, PLONKY };
|
||||
|
||||
/**
|
||||
* @struct Poseidon2Constants
|
||||
* This constants are enough to define a Poseidon2 instantce
|
||||
* @param round_constants A pointer to round constants allocated on the device
|
||||
* @param mds_matrix A pointer to an mds matrix allocated on the device
|
||||
* @param non_sparse_matrix A pointer to non sparse matrix allocated on the device
|
||||
* @param sparse_matrices A pointer to sparse matrices allocated on the device
|
||||
*/
|
||||
template <typename S>
|
||||
struct Poseidon2Constants {
|
||||
int width;
|
||||
int alpha;
|
||||
int internal_rounds;
|
||||
int external_rounds;
|
||||
S* round_constants = nullptr;
|
||||
S* internal_matrix_diag = nullptr;
|
||||
MdsType mds_type;
|
||||
DiffusionStrategy diffusion;
|
||||
};
|
||||
|
||||
template <typename S>
|
||||
cudaError_t create_poseidon2_constants(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
const S* round_constants,
|
||||
const S* internal_matrix_diag,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<S>* poseidon_constants);
|
||||
|
||||
template <typename S>
|
||||
cudaError_t init_poseidon2_constants(
|
||||
int width,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<S>* poseidon2_constants);
|
||||
|
||||
template <typename S>
|
||||
cudaError_t release_poseidon2_constants(Poseidon2Constants<S>* constants, device_context::DeviceContext& ctx);
|
||||
} // namespace poseidon2
|
||||
|
||||
#endif
|
||||
1077
icicle/include/poseidon2/constants/m31_poseidon2.h
Normal file
1077
icicle/include/poseidon2/constants/m31_poseidon2.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -3,13 +3,14 @@ from sage.rings.polynomial.polynomial_gf2x import GF2X_BuildIrred_list
|
||||
from math import *
|
||||
import itertools
|
||||
|
||||
CURVE_NAME = "bn254"
|
||||
CURVE_NAME = "m31"
|
||||
|
||||
###########################################################################
|
||||
# p = 18446744069414584321 # GoldiLocks
|
||||
# p = 2013265921 # BabyBear
|
||||
p = 2**31 - 1 # M31
|
||||
# p = 52435875175126190479447740508185965837690552500527637822603658699938581184513 # BLS12-381
|
||||
p = 21888242871839275222246405745257275088548364400416034343698204186575808495617 # BN254/BN256
|
||||
# p = 21888242871839275222246405745257275088548364400416034343698204186575808495617 # BN254/BN256
|
||||
# p = 28948022309329048855892746252171976963363056481941560715954676764349967630337 # Pasta (Pallas)
|
||||
# p = 28948022309329048855892746252171976963363056481941647379679742748393362948097 # Pasta (Vesta)
|
||||
|
||||
@@ -617,6 +618,8 @@ print(f"namespace poseidon2_constants_{CURVE_NAME} {{")
|
||||
for t in TS:
|
||||
NUM_CELLS = t
|
||||
R_F_FIXED, R_P_FIXED, _, _ = poseidon_calc_final_numbers_fixed(p, t, alpha, 128, True)
|
||||
if t == 16:
|
||||
R_P_FIXED = 14
|
||||
|
||||
INIT_SEQUENCE = []
|
||||
|
||||
|
||||
@@ -1,7 +1,28 @@
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#pragma once
|
||||
#ifndef POSEIDON2_KERNELS_H
|
||||
#define POSEIDON2_KERNELS_H
|
||||
|
||||
#include "utils/utils.h"
|
||||
#include "hash/hash.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "poseidon2/constants.cuh"
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
|
||||
using matrix::Matrix;
|
||||
|
||||
namespace poseidon2 {
|
||||
static DEVICE_INLINE unsigned int d_next_pow_of_two(unsigned int v)
|
||||
{
|
||||
v--;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
v |= v >> 8;
|
||||
v |= v >> 16;
|
||||
v++;
|
||||
return v;
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
DEVICE_INLINE S sbox_el(S element, const int alpha)
|
||||
{
|
||||
@@ -19,7 +40,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
DEVICE_INLINE S sbox(S state[T], const int alpha)
|
||||
DEVICE_INLINE void sbox(S state[T], const int alpha)
|
||||
{
|
||||
for (int i = 0; i < T; i++) {
|
||||
state[i] = sbox_el(state[i], alpha);
|
||||
@@ -27,7 +48,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
DEVICE_INLINE S add_rc(S state[T], size_t rc_offset, const S* rc)
|
||||
DEVICE_INLINE void add_rc(S state[T], size_t rc_offset, const S* rc)
|
||||
{
|
||||
for (int i = 0; i < T; i++) {
|
||||
state[i] = state[i] + rc[rc_offset + i];
|
||||
@@ -35,7 +56,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
__device__ S mds_light_4x4(S s[4])
|
||||
__device__ void mds_light_4x4(S s[4])
|
||||
{
|
||||
S t0 = s[0] + s[1];
|
||||
S t1 = s[2] + s[3];
|
||||
@@ -56,7 +77,7 @@ namespace poseidon2 {
|
||||
// [ 3 1 1 2 ].
|
||||
// https://github.com/Plonky3/Plonky3/blob/main/poseidon2/src/matrix.rs#L36
|
||||
template <typename S>
|
||||
__device__ S mds_light_plonky_4x4(S s[4])
|
||||
__device__ void mds_light_plonky_4x4(S s[4])
|
||||
{
|
||||
S t01 = s[0] + s[1];
|
||||
S t23 = s[2] + s[3];
|
||||
@@ -70,7 +91,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__device__ S mds_light(S state[T], MdsType mds)
|
||||
__device__ void mds_light(S state[T], MdsType mds)
|
||||
{
|
||||
S sum;
|
||||
switch (T) {
|
||||
@@ -123,7 +144,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__device__ S internal_round(S state[T], size_t rc_offset, const Poseidon2Constants<S>& constants)
|
||||
__device__ void internal_round(S state[T], size_t rc_offset, const Poseidon2Constants<S>& constants)
|
||||
{
|
||||
S element = state[0];
|
||||
element = element + constants.round_constants[rc_offset];
|
||||
@@ -176,17 +197,8 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void poseidon2_permutation_kernel(
|
||||
const S* states, S* states_out, size_t number_of_states, const Poseidon2Constants<S> constants)
|
||||
__device__ void permute_state(S state[T], const Poseidon2Constants<S>& constants)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
|
||||
S state[T];
|
||||
UNROLL
|
||||
for (int i = 0; i < T; i++) {
|
||||
state[i] = states[idx * T + i];
|
||||
}
|
||||
unsigned int rn;
|
||||
|
||||
mds_light<S, T>(state, constants.mds_type);
|
||||
@@ -213,6 +225,22 @@ namespace poseidon2 {
|
||||
mds_light<S, T>(state, constants.mds_type);
|
||||
rc_offset += T;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void permutation_kernel(
|
||||
const S* states, S* states_out, unsigned int number_of_states, const Poseidon2Constants<S> constants)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
|
||||
S state[T];
|
||||
UNROLL
|
||||
for (int i = 0; i < T; i++) {
|
||||
state[i] = states[idx * T + i];
|
||||
}
|
||||
|
||||
permute_state<S, T>(state, constants);
|
||||
|
||||
UNROLL
|
||||
for (int i = 0; i < T; i++) {
|
||||
@@ -220,13 +248,120 @@ namespace poseidon2 {
|
||||
}
|
||||
}
|
||||
|
||||
// These function is just doing copy from the states to the output
|
||||
template <typename S, int T>
|
||||
__global__ void get_hash_results(const S* states, size_t number_of_states, int index, S* out)
|
||||
__global__ void hash_many_kernel(
|
||||
const S* input,
|
||||
S* output,
|
||||
uint64_t number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const Poseidon2Constants<S> constants)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
uint64_t idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
|
||||
out[idx] = states[idx * T + index];
|
||||
S state[T] = {0};
|
||||
UNROLL
|
||||
for (int i = 0; i < input_len; i++) {
|
||||
state[i] = input[idx * input_len + i];
|
||||
}
|
||||
|
||||
permute_state<S, T>(state, constants);
|
||||
|
||||
UNROLL
|
||||
for (int i = 0; i < output_len; i++) {
|
||||
output[idx * output_len + i] = state[i];
|
||||
}
|
||||
}
|
||||
} // namespace poseidon2
|
||||
|
||||
template <typename S, int T>
|
||||
__device__ void absorb_2d_state(
|
||||
const Matrix<S>* inputs,
|
||||
S state[T],
|
||||
unsigned int number_of_inputs,
|
||||
unsigned int rate,
|
||||
uint64_t row_idx,
|
||||
const Poseidon2Constants<S>& constants)
|
||||
{
|
||||
unsigned int index = 0;
|
||||
for (int i = 0; i < number_of_inputs; i++) {
|
||||
const Matrix<S>* input = inputs + i;
|
||||
for (int j = 0; j < input->width; j++) {
|
||||
state[index] = input->values[row_idx * input->width + j];
|
||||
index++;
|
||||
if (index == rate) {
|
||||
permute_state<S, T>(state, constants);
|
||||
index = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (index) { permute_state<S, T>(state, constants); }
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void hash_2d_kernel(
|
||||
const Matrix<S>* inputs,
|
||||
S* output,
|
||||
unsigned int number_of_inputs,
|
||||
unsigned int rate,
|
||||
unsigned int output_len,
|
||||
const Poseidon2Constants<S> constants)
|
||||
{
|
||||
uint64_t idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= inputs[0].height) { return; }
|
||||
|
||||
S state[T] = {0};
|
||||
|
||||
absorb_2d_state<S, T>(inputs, state, number_of_inputs, rate, idx, constants);
|
||||
|
||||
UNROLL
|
||||
for (int i = 0; i < output_len; i++) {
|
||||
output[idx * output_len + i] = state[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void compress_and_inject_kernel(
|
||||
const Matrix<S>* matrices_to_inject,
|
||||
unsigned int number_of_inputs,
|
||||
const S* prev_layer,
|
||||
S* next_layer,
|
||||
unsigned int rate,
|
||||
unsigned int digest_elements,
|
||||
const Poseidon2Constants<S> constants)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
uint64_t number_of_rows = d_next_pow_of_two(matrices_to_inject[0].height);
|
||||
if (idx >= number_of_rows) { return; }
|
||||
|
||||
size_t next_layer_len = matrices_to_inject[0].height;
|
||||
S state_to_compress[T] = {S::zero()};
|
||||
|
||||
for (int i = 0; i < digest_elements * 2; i++) {
|
||||
state_to_compress[i] = prev_layer[idx * 2 * digest_elements + i];
|
||||
}
|
||||
permute_state<S, T>(state_to_compress, constants);
|
||||
|
||||
S injected_state[T] = {S::zero()};
|
||||
if (idx < next_layer_len) {
|
||||
absorb_2d_state<S, T>(matrices_to_inject, injected_state, number_of_inputs, rate, idx, constants);
|
||||
|
||||
for (int i = 0; i < digest_elements; i++) {
|
||||
injected_state[digest_elements + i] = injected_state[i];
|
||||
injected_state[i] = state_to_compress[i];
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < digest_elements; i++) {
|
||||
injected_state[i] = state_to_compress[i];
|
||||
}
|
||||
}
|
||||
permute_state<S, T>(injected_state, constants);
|
||||
|
||||
for (int i = 0; i < digest_elements; i++) {
|
||||
next_layer[idx * digest_elements + i] = injected_state[i];
|
||||
}
|
||||
}
|
||||
} // namespace poseidon2
|
||||
|
||||
#endif
|
||||
@@ -8,124 +8,172 @@
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "utils/utils.h"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
|
||||
#include "poseidon2/constants.cuh"
|
||||
#include "poseidon2/kernels.cuh"
|
||||
|
||||
using matrix::Matrix;
|
||||
|
||||
/**
|
||||
* @namespace poseidon2
|
||||
* Implementation of the [Poseidon2 hash function](https://eprint.iacr.org/2019/458.pdf)
|
||||
* Specifically, the optimized [Filecoin version](https://spec.filecoin.io/algorithms/crypto/poseidon/)
|
||||
*/
|
||||
namespace poseidon2 {
|
||||
/**
|
||||
* For most of the Poseidon2 configurations this is the case
|
||||
*/
|
||||
const int EXTERNAL_ROUNDS_DEFAULT = 8;
|
||||
|
||||
enum DiffusionStrategy {
|
||||
DEFAULT_DIFFUSION,
|
||||
MONTGOMERY,
|
||||
};
|
||||
|
||||
enum MdsType { DEFAULT_MDS, PLONKY };
|
||||
|
||||
enum PoseidonMode {
|
||||
COMPRESSION,
|
||||
PERMUTATION,
|
||||
};
|
||||
|
||||
/**
|
||||
* @struct Poseidon2Constants
|
||||
* This constants are enough to define a Poseidon2 instantce
|
||||
* @param round_constants A pointer to round constants allocated on the device
|
||||
* @param mds_matrix A pointer to an mds matrix allocated on the device
|
||||
* @param non_sparse_matrix A pointer to non sparse matrix allocated on the device
|
||||
* @param sparse_matrices A pointer to sparse matrices allocated on the device
|
||||
*/
|
||||
template <typename S>
|
||||
struct Poseidon2Constants {
|
||||
int width;
|
||||
int alpha;
|
||||
int internal_rounds;
|
||||
int external_rounds;
|
||||
S* round_constants = nullptr;
|
||||
S* internal_matrix_diag = nullptr;
|
||||
MdsType mds_type;
|
||||
DiffusionStrategy diffusion;
|
||||
};
|
||||
|
||||
/**
|
||||
* @struct Poseidon2Config
|
||||
* Struct that encodes various Poseidon2 parameters.
|
||||
*/
|
||||
struct Poseidon2Config {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
bool are_states_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool are_outputs_on_device; /**< If true, output is preserved on device, otherwise on host. Default value: false. */
|
||||
PoseidonMode mode;
|
||||
int output_index;
|
||||
bool
|
||||
is_async; /**< Whether to run the Poseidon2 asynchronously. If set to `true`, the poseidon_hash function will be
|
||||
* non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the poseidon_hash
|
||||
* function will block the current CPU thread. */
|
||||
};
|
||||
|
||||
static Poseidon2Config default_poseidon2_config(
|
||||
int t, const device_context::DeviceContext& ctx = device_context::get_default_device_context())
|
||||
class Poseidon2 : public hash::SpongeHasher<S, S>
|
||||
{
|
||||
Poseidon2Config config = {
|
||||
ctx, // ctx
|
||||
false, // are_states_on_device
|
||||
false, // are_outputs_on_device
|
||||
PoseidonMode::COMPRESSION,
|
||||
1, // output_index
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
static const int POSEIDON_BLOCK_SIZE = 32;
|
||||
|
||||
template <typename S>
|
||||
cudaError_t create_poseidon2_constants(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
const S* round_constants,
|
||||
const S* internal_matrix_diag,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<S>* poseidon_constants);
|
||||
static inline int poseidon_number_of_blocks(size_t number_of_states)
|
||||
{
|
||||
return number_of_states / POSEIDON_BLOCK_SIZE + static_cast<bool>(number_of_states % POSEIDON_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads pre-calculated optimized constants, moves them to the device
|
||||
*/
|
||||
template <typename S>
|
||||
cudaError_t init_poseidon2_constants(
|
||||
int width,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<S>* constants);
|
||||
public:
|
||||
const std::size_t device_id;
|
||||
Poseidon2Constants<S> constants;
|
||||
|
||||
template <typename S>
|
||||
cudaError_t release_poseidon2_constants(Poseidon2Constants<S>* constants, device_context::DeviceContext& ctx);
|
||||
cudaError_t hash_2d(
|
||||
const Matrix<S>* inputs,
|
||||
S* output,
|
||||
unsigned int number_of_inputs,
|
||||
unsigned int output_len,
|
||||
uint64_t number_of_rows,
|
||||
const device_context::DeviceContext& ctx) const override
|
||||
{
|
||||
#define P2_HASH_2D_T(width) \
|
||||
case width: \
|
||||
hash_2d_kernel<S, width><<<poseidon_number_of_blocks(number_of_rows), POSEIDON_BLOCK_SIZE, 0, ctx.stream>>>( \
|
||||
inputs, output, number_of_inputs, this->rate, output_len, this->constants); \
|
||||
break;
|
||||
|
||||
switch (this->width) {
|
||||
P2_HASH_2D_T(2)
|
||||
P2_HASH_2D_T(3)
|
||||
P2_HASH_2D_T(4)
|
||||
P2_HASH_2D_T(8)
|
||||
P2_HASH_2D_T(12)
|
||||
P2_HASH_2D_T(16)
|
||||
P2_HASH_2D_T(20)
|
||||
P2_HASH_2D_T(24)
|
||||
default:
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument, "PoseidonAbsorb2d: #width must be one of [2, 3, 4, 8, 12, 16, 20, 24]");
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(cudaPeekAtLastError());
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
cudaError_t run_hash_many_kernel(
|
||||
const S* input,
|
||||
S* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const device_context::DeviceContext& ctx) const override
|
||||
{
|
||||
#define P2_HASH_MANY_T(width) \
|
||||
case width: \
|
||||
hash_many_kernel<S, width><<<poseidon_number_of_blocks(number_of_states), POSEIDON_BLOCK_SIZE, 0, ctx.stream>>>( \
|
||||
input, output, number_of_states, input_len, output_len, this->constants); \
|
||||
break;
|
||||
|
||||
switch (this->width) {
|
||||
P2_HASH_MANY_T(2)
|
||||
P2_HASH_MANY_T(3)
|
||||
P2_HASH_MANY_T(4)
|
||||
P2_HASH_MANY_T(8)
|
||||
P2_HASH_MANY_T(12)
|
||||
P2_HASH_MANY_T(16)
|
||||
P2_HASH_MANY_T(20)
|
||||
P2_HASH_MANY_T(24)
|
||||
default:
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument, "PoseidonPermutation: #width must be one of [2, 3, 4, 8, 12, 16, 20, 24]");
|
||||
}
|
||||
CHK_IF_RETURN(cudaPeekAtLastError());
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
cudaError_t compress_and_inject(
|
||||
const Matrix<S>* matrices_to_inject,
|
||||
unsigned int number_of_inputs,
|
||||
uint64_t number_of_rows,
|
||||
const S* prev_layer,
|
||||
S* next_layer,
|
||||
unsigned int digest_elements,
|
||||
const device_context::DeviceContext& ctx) const override
|
||||
{
|
||||
#define P2_COMPRESS_AND_INJECT_T(width) \
|
||||
case width: \
|
||||
compress_and_inject_kernel<S, width> \
|
||||
<<<poseidon_number_of_blocks(number_of_rows), POSEIDON_BLOCK_SIZE, 0, ctx.stream>>>( \
|
||||
matrices_to_inject, number_of_inputs, prev_layer, next_layer, this->rate, digest_elements, this->constants); \
|
||||
break;
|
||||
|
||||
switch (this->width) {
|
||||
P2_COMPRESS_AND_INJECT_T(2)
|
||||
P2_COMPRESS_AND_INJECT_T(3)
|
||||
P2_COMPRESS_AND_INJECT_T(4)
|
||||
P2_COMPRESS_AND_INJECT_T(8)
|
||||
P2_COMPRESS_AND_INJECT_T(12)
|
||||
P2_COMPRESS_AND_INJECT_T(16)
|
||||
P2_COMPRESS_AND_INJECT_T(20)
|
||||
P2_COMPRESS_AND_INJECT_T(24)
|
||||
default:
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument, "PoseidonPermutation: #width must be one of [2, 3, 4, 8, 12, 16, 20, 24]");
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(cudaPeekAtLastError());
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
Poseidon2(
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
unsigned int alpha,
|
||||
unsigned int internal_rounds,
|
||||
unsigned int external_rounds,
|
||||
const S* round_constants,
|
||||
const S* internal_matrix_diag,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx)
|
||||
: hash::SpongeHasher<S, S>(width, width, rate, 0), device_id(ctx.device_id)
|
||||
{
|
||||
Poseidon2Constants<S> constants;
|
||||
CHK_STICKY(create_poseidon2_constants(
|
||||
width, alpha, internal_rounds, external_rounds, round_constants, internal_matrix_diag, mds_type, diffusion, ctx,
|
||||
&constants));
|
||||
this->constants = constants;
|
||||
}
|
||||
|
||||
Poseidon2(
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx)
|
||||
: hash::SpongeHasher<S, S>(width, width, rate, 0), device_id(ctx.device_id)
|
||||
{
|
||||
Poseidon2Constants<S> constants;
|
||||
CHK_STICKY(init_poseidon2_constants(width, mds_type, diffusion, ctx, &constants));
|
||||
this->constants = constants;
|
||||
}
|
||||
|
||||
~Poseidon2()
|
||||
{
|
||||
auto ctx = device_context::get_default_device_context();
|
||||
ctx.device_id = this->device_id;
|
||||
CHK_STICKY(release_poseidon2_constants<S>(&this->constants, ctx));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Compute the poseidon hash over a sequence of preimages.
|
||||
* Takes {number_of_states * (T-1)} elements of input and computes {number_of_states} hash images
|
||||
* @param T size of the poseidon state, should be equal to {arity + 1}
|
||||
* @param states a pointer to the input data. May be allocated on device or on host, regulated
|
||||
* by the config. May point to a string of preimages or a string of states filled with preimages.
|
||||
* @param output a pointer to the output data. May be allocated on device or on host, regulated
|
||||
* by the config. Must be at least of size [number_of_states](@ref number_of_states)
|
||||
* @param number_of_states number of input blocks of size T-1 (arity)
|
||||
*/
|
||||
template <typename S, int T>
|
||||
cudaError_t poseidon2_hash(
|
||||
const S* states,
|
||||
S* output,
|
||||
size_t number_of_states,
|
||||
const Poseidon2Constants<S>& constants,
|
||||
const Poseidon2Config& config);
|
||||
} // namespace poseidon2
|
||||
|
||||
#endif
|
||||
@@ -5,4 +5,15 @@
|
||||
#define CONCAT_DIRECT(a, b) a##_##b
|
||||
#define CONCAT_EXPAND(a, b) CONCAT_DIRECT(a, b) // expand a,b before concatenation
|
||||
|
||||
static unsigned int next_pow_of_two(unsigned int v) {
|
||||
v--;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
v |= v >> 8;
|
||||
v |= v >> 16;
|
||||
v++;
|
||||
return v;
|
||||
}
|
||||
|
||||
#endif // ICICLE_UTILS_H
|
||||
@@ -105,12 +105,12 @@ namespace vec_ops {
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
template <typename E>
|
||||
cudaError_t transpose_batch(
|
||||
cudaError_t transpose_matrix(
|
||||
const E* mat_in,
|
||||
E* mat_out,
|
||||
uint32_t row_size,
|
||||
uint32_t column_size,
|
||||
device_context::DeviceContext& ctx,
|
||||
const device_context::DeviceContext& ctx,
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
|
||||
|
||||
@@ -11,6 +11,9 @@ set(SRC ${CMAKE_SOURCE_DIR}/src)
|
||||
|
||||
set(FIELD_SOURCE ${SRC}/fields/extern.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/vec_ops/extern.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/merkle-tree/extern.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/merkle-tree/extern_mmcs.cu)
|
||||
|
||||
if(EXT_FIELD)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/fields/extern_extension.cu)
|
||||
if (NOT FIELD IN_LIST SUPPORTED_FIELDS_WITHOUT_NTT)
|
||||
@@ -27,8 +30,6 @@ set(POLYNOMIAL_SOURCE_FILES
|
||||
# TODO: impl poseidon for small fields. note that it needs to be defined over the extension field!
|
||||
if (DEFINED CURVE)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/poseidon/extern.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/poseidon/poseidon.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/poseidon/tree/merkle.cu)
|
||||
endif()
|
||||
|
||||
if (NOT FIELD IN_LIST SUPPORTED_FIELDS_WITHOUT_POSEIDON2)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
set(TARGET icicle_hash)
|
||||
|
||||
add_library(${TARGET} STATIC keccak/keccak.cu)
|
||||
add_library(${TARGET} STATIC keccak/extern.cu)
|
||||
target_include_directories(${TARGET} PUBLIC ${CMAKE_SOURCE_DIR}/include/)
|
||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "ingo_hash")
|
||||
1
icicle/src/hash/keccak/.gitignore
vendored
Normal file
1
icicle/src/hash/keccak/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
test_keccak
|
||||
@@ -1,2 +1,6 @@
|
||||
test_keccak: test.cu keccak.cu
|
||||
nvcc -o test_keccak -I. -I../.. test.cu
|
||||
nvcc -o test_keccak -I../../../include test.cu
|
||||
./test_keccak
|
||||
|
||||
clear:
|
||||
rm test_keccak
|
||||
20
icicle/src/hash/keccak/extern.cu
Normal file
20
icicle/src/hash/keccak/extern.cu
Normal file
@@ -0,0 +1,20 @@
|
||||
#include "utils/utils.h"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
#include "hash/keccak/keccak.cuh"
|
||||
#include "keccak.cu"
|
||||
|
||||
namespace keccak {
|
||||
extern "C" cudaError_t
|
||||
keccak256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, KeccakConfig& config)
|
||||
{
|
||||
return keccak_hash<512, 256>(input, input_block_size, number_of_blocks, output, config);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t
|
||||
keccak512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, KeccakConfig& config)
|
||||
{
|
||||
return keccak_hash<1024, 512>(input, input_block_size, number_of_blocks, output, config);
|
||||
}
|
||||
} // namespace keccak
|
||||
@@ -1,227 +1,14 @@
|
||||
#include <cstdint>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
#include "hash/keccak/keccak.cuh"
|
||||
#include "kernels.cu"
|
||||
|
||||
using namespace hash;
|
||||
|
||||
namespace keccak {
|
||||
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
|
||||
|
||||
#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) \
|
||||
{ \
|
||||
t = ROTL64((d0 ^ d1 ^ d2 ^ d3 ^ d4), 1) ^ (c0 ^ c1 ^ c2 ^ c3 ^ c4); \
|
||||
}
|
||||
|
||||
#define THETA( \
|
||||
s00, s01, s02, s03, s04, s10, s11, s12, s13, s14, s20, s21, s22, s23, s24, s30, s31, s32, s33, s34, s40, s41, s42, \
|
||||
s43, s44) \
|
||||
{ \
|
||||
TH_ELT(t0, s40, s41, s42, s43, s44, s10, s11, s12, s13, s14); \
|
||||
TH_ELT(t1, s00, s01, s02, s03, s04, s20, s21, s22, s23, s24); \
|
||||
TH_ELT(t2, s10, s11, s12, s13, s14, s30, s31, s32, s33, s34); \
|
||||
TH_ELT(t3, s20, s21, s22, s23, s24, s40, s41, s42, s43, s44); \
|
||||
TH_ELT(t4, s30, s31, s32, s33, s34, s00, s01, s02, s03, s04); \
|
||||
s00 ^= t0; \
|
||||
s01 ^= t0; \
|
||||
s02 ^= t0; \
|
||||
s03 ^= t0; \
|
||||
s04 ^= t0; \
|
||||
\
|
||||
s10 ^= t1; \
|
||||
s11 ^= t1; \
|
||||
s12 ^= t1; \
|
||||
s13 ^= t1; \
|
||||
s14 ^= t1; \
|
||||
\
|
||||
s20 ^= t2; \
|
||||
s21 ^= t2; \
|
||||
s22 ^= t2; \
|
||||
s23 ^= t2; \
|
||||
s24 ^= t2; \
|
||||
\
|
||||
s30 ^= t3; \
|
||||
s31 ^= t3; \
|
||||
s32 ^= t3; \
|
||||
s33 ^= t3; \
|
||||
s34 ^= t3; \
|
||||
\
|
||||
s40 ^= t4; \
|
||||
s41 ^= t4; \
|
||||
s42 ^= t4; \
|
||||
s43 ^= t4; \
|
||||
s44 ^= t4; \
|
||||
}
|
||||
|
||||
#define RHOPI( \
|
||||
s00, s01, s02, s03, s04, s10, s11, s12, s13, s14, s20, s21, s22, s23, s24, s30, s31, s32, s33, s34, s40, s41, s42, \
|
||||
s43, s44) \
|
||||
{ \
|
||||
t0 = ROTL64(s10, (uint64_t)1); \
|
||||
s10 = ROTL64(s11, (uint64_t)44); \
|
||||
s11 = ROTL64(s41, (uint64_t)20); \
|
||||
s41 = ROTL64(s24, (uint64_t)61); \
|
||||
s24 = ROTL64(s42, (uint64_t)39); \
|
||||
s42 = ROTL64(s04, (uint64_t)18); \
|
||||
s04 = ROTL64(s20, (uint64_t)62); \
|
||||
s20 = ROTL64(s22, (uint64_t)43); \
|
||||
s22 = ROTL64(s32, (uint64_t)25); \
|
||||
s32 = ROTL64(s43, (uint64_t)8); \
|
||||
s43 = ROTL64(s34, (uint64_t)56); \
|
||||
s34 = ROTL64(s03, (uint64_t)41); \
|
||||
s03 = ROTL64(s40, (uint64_t)27); \
|
||||
s40 = ROTL64(s44, (uint64_t)14); \
|
||||
s44 = ROTL64(s14, (uint64_t)2); \
|
||||
s14 = ROTL64(s31, (uint64_t)55); \
|
||||
s31 = ROTL64(s13, (uint64_t)45); \
|
||||
s13 = ROTL64(s01, (uint64_t)36); \
|
||||
s01 = ROTL64(s30, (uint64_t)28); \
|
||||
s30 = ROTL64(s33, (uint64_t)21); \
|
||||
s33 = ROTL64(s23, (uint64_t)15); \
|
||||
s23 = ROTL64(s12, (uint64_t)10); \
|
||||
s12 = ROTL64(s21, (uint64_t)6); \
|
||||
s21 = ROTL64(s02, (uint64_t)3); \
|
||||
s02 = t0; \
|
||||
}
|
||||
|
||||
#define KHI( \
|
||||
s00, s01, s02, s03, s04, s10, s11, s12, s13, s14, s20, s21, s22, s23, s24, s30, s31, s32, s33, s34, s40, s41, s42, \
|
||||
s43, s44) \
|
||||
{ \
|
||||
t0 = s00 ^ (~s10 & s20); \
|
||||
t1 = s10 ^ (~s20 & s30); \
|
||||
t2 = s20 ^ (~s30 & s40); \
|
||||
t3 = s30 ^ (~s40 & s00); \
|
||||
t4 = s40 ^ (~s00 & s10); \
|
||||
s00 = t0; \
|
||||
s10 = t1; \
|
||||
s20 = t2; \
|
||||
s30 = t3; \
|
||||
s40 = t4; \
|
||||
\
|
||||
t0 = s01 ^ (~s11 & s21); \
|
||||
t1 = s11 ^ (~s21 & s31); \
|
||||
t2 = s21 ^ (~s31 & s41); \
|
||||
t3 = s31 ^ (~s41 & s01); \
|
||||
t4 = s41 ^ (~s01 & s11); \
|
||||
s01 = t0; \
|
||||
s11 = t1; \
|
||||
s21 = t2; \
|
||||
s31 = t3; \
|
||||
s41 = t4; \
|
||||
\
|
||||
t0 = s02 ^ (~s12 & s22); \
|
||||
t1 = s12 ^ (~s22 & s32); \
|
||||
t2 = s22 ^ (~s32 & s42); \
|
||||
t3 = s32 ^ (~s42 & s02); \
|
||||
t4 = s42 ^ (~s02 & s12); \
|
||||
s02 = t0; \
|
||||
s12 = t1; \
|
||||
s22 = t2; \
|
||||
s32 = t3; \
|
||||
s42 = t4; \
|
||||
\
|
||||
t0 = s03 ^ (~s13 & s23); \
|
||||
t1 = s13 ^ (~s23 & s33); \
|
||||
t2 = s23 ^ (~s33 & s43); \
|
||||
t3 = s33 ^ (~s43 & s03); \
|
||||
t4 = s43 ^ (~s03 & s13); \
|
||||
s03 = t0; \
|
||||
s13 = t1; \
|
||||
s23 = t2; \
|
||||
s33 = t3; \
|
||||
s43 = t4; \
|
||||
\
|
||||
t0 = s04 ^ (~s14 & s24); \
|
||||
t1 = s14 ^ (~s24 & s34); \
|
||||
t2 = s24 ^ (~s34 & s44); \
|
||||
t3 = s34 ^ (~s44 & s04); \
|
||||
t4 = s44 ^ (~s04 & s14); \
|
||||
s04 = t0; \
|
||||
s14 = t1; \
|
||||
s24 = t2; \
|
||||
s34 = t3; \
|
||||
s44 = t4; \
|
||||
}
|
||||
|
||||
#define IOTA(element, rc) \
|
||||
{ \
|
||||
element ^= rc; \
|
||||
}
|
||||
|
||||
__device__ const uint64_t RC[24] = {0x0000000000000001, 0x0000000000008082, 0x800000000000808a, 0x8000000080008000,
|
||||
0x000000000000808b, 0x0000000080000001, 0x8000000080008081, 0x8000000000008009,
|
||||
0x000000000000008a, 0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089, 0x8000000000008003,
|
||||
0x8000000000008002, 0x8000000000000080, 0x000000000000800a, 0x800000008000000a,
|
||||
0x8000000080008081, 0x8000000000008080, 0x0000000080000001, 0x8000000080008008};
|
||||
|
||||
__device__ void keccakf(uint64_t s[25])
|
||||
{
|
||||
uint64_t t0, t1, t2, t3, t4;
|
||||
|
||||
for (int i = 0; i < 24; i++) {
|
||||
THETA(
|
||||
s[0], s[5], s[10], s[15], s[20], s[1], s[6], s[11], s[16], s[21], s[2], s[7], s[12], s[17], s[22], s[3], s[8],
|
||||
s[13], s[18], s[23], s[4], s[9], s[14], s[19], s[24]);
|
||||
RHOPI(
|
||||
s[0], s[5], s[10], s[15], s[20], s[1], s[6], s[11], s[16], s[21], s[2], s[7], s[12], s[17], s[22], s[3], s[8],
|
||||
s[13], s[18], s[23], s[4], s[9], s[14], s[19], s[24]);
|
||||
KHI(
|
||||
s[0], s[5], s[10], s[15], s[20], s[1], s[6], s[11], s[16], s[21], s[2], s[7], s[12], s[17], s[22], s[3], s[8],
|
||||
s[13], s[18], s[23], s[4], s[9], s[14], s[19], s[24]);
|
||||
IOTA(s[0], RC[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <int C, int D>
|
||||
__global__ void keccak_hash_blocks(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output)
|
||||
{
|
||||
int bid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (bid >= number_of_blocks) { return; }
|
||||
|
||||
const int r_bits = 1600 - C;
|
||||
const int r_bytes = r_bits / 8;
|
||||
const int d_bytes = D / 8;
|
||||
|
||||
uint8_t* b_input = input + bid * input_block_size;
|
||||
uint8_t* b_output = output + bid * d_bytes;
|
||||
uint64_t state[25] = {}; // Initialize with zeroes
|
||||
|
||||
int input_len = input_block_size;
|
||||
|
||||
// absorb
|
||||
while (input_len >= r_bytes) {
|
||||
// #pragma unroll
|
||||
for (int i = 0; i < r_bytes; i += 8) {
|
||||
state[i / 8] ^= *(uint64_t*)(b_input + i);
|
||||
}
|
||||
keccakf(state);
|
||||
b_input += r_bytes;
|
||||
input_len -= r_bytes;
|
||||
}
|
||||
|
||||
// last block (if any)
|
||||
uint8_t last_block[r_bytes];
|
||||
for (int i = 0; i < input_len; i++) {
|
||||
last_block[i] = b_input[i];
|
||||
}
|
||||
|
||||
// pad 10*1
|
||||
last_block[input_len] = 1;
|
||||
for (int i = 0; i < r_bytes - input_len - 1; i++) {
|
||||
last_block[input_len + i + 1] = 0;
|
||||
}
|
||||
// last bit
|
||||
last_block[r_bytes - 1] |= 0x80;
|
||||
|
||||
// #pragma unroll
|
||||
for (int i = 0; i < r_bytes; i += 8) {
|
||||
state[i / 8] ^= *(uint64_t*)(last_block + i);
|
||||
}
|
||||
keccakf(state);
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < d_bytes; i += 8) {
|
||||
*(uint64_t*)(b_output + i) = state[i / 8];
|
||||
}
|
||||
}
|
||||
|
||||
template <int C, int D>
|
||||
cudaError_t
|
||||
keccak_hash(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, KeccakConfig& config)
|
||||
@@ -260,16 +47,4 @@ namespace keccak {
|
||||
if (!config.is_async) return CHK_STICKY(cudaStreamSynchronize(stream));
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
extern "C" cudaError_t
|
||||
keccak256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, KeccakConfig& config)
|
||||
{
|
||||
return keccak_hash<512, 256>(input, input_block_size, number_of_blocks, output, config);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t
|
||||
keccak512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, KeccakConfig& config)
|
||||
{
|
||||
return keccak_hash<1024, 512>(input, input_block_size, number_of_blocks, output, config);
|
||||
}
|
||||
} // namespace keccak
|
||||
233
icicle/src/hash/keccak/kernels.cu
Normal file
233
icicle/src/hash/keccak/kernels.cu
Normal file
@@ -0,0 +1,233 @@
|
||||
#pragma once
|
||||
#ifndef KECCAK_KERNELS_H
|
||||
#define KECCAK_KERNELS_H
|
||||
|
||||
#include <cstdint>
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
|
||||
namespace keccak {
|
||||
using u64 = uint64_t;
|
||||
|
||||
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
|
||||
|
||||
#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) \
|
||||
{ \
|
||||
t = ROTL64((d0 ^ d1 ^ d2 ^ d3 ^ d4), 1) ^ (c0 ^ c1 ^ c2 ^ c3 ^ c4); \
|
||||
}
|
||||
|
||||
#define THETA( \
|
||||
s00, s01, s02, s03, s04, s10, s11, s12, s13, s14, s20, s21, s22, s23, s24, s30, s31, s32, s33, s34, s40, s41, s42, \
|
||||
s43, s44) \
|
||||
{ \
|
||||
TH_ELT(t0, s40, s41, s42, s43, s44, s10, s11, s12, s13, s14); \
|
||||
TH_ELT(t1, s00, s01, s02, s03, s04, s20, s21, s22, s23, s24); \
|
||||
TH_ELT(t2, s10, s11, s12, s13, s14, s30, s31, s32, s33, s34); \
|
||||
TH_ELT(t3, s20, s21, s22, s23, s24, s40, s41, s42, s43, s44); \
|
||||
TH_ELT(t4, s30, s31, s32, s33, s34, s00, s01, s02, s03, s04); \
|
||||
s00 ^= t0; \
|
||||
s01 ^= t0; \
|
||||
s02 ^= t0; \
|
||||
s03 ^= t0; \
|
||||
s04 ^= t0; \
|
||||
\
|
||||
s10 ^= t1; \
|
||||
s11 ^= t1; \
|
||||
s12 ^= t1; \
|
||||
s13 ^= t1; \
|
||||
s14 ^= t1; \
|
||||
\
|
||||
s20 ^= t2; \
|
||||
s21 ^= t2; \
|
||||
s22 ^= t2; \
|
||||
s23 ^= t2; \
|
||||
s24 ^= t2; \
|
||||
\
|
||||
s30 ^= t3; \
|
||||
s31 ^= t3; \
|
||||
s32 ^= t3; \
|
||||
s33 ^= t3; \
|
||||
s34 ^= t3; \
|
||||
\
|
||||
s40 ^= t4; \
|
||||
s41 ^= t4; \
|
||||
s42 ^= t4; \
|
||||
s43 ^= t4; \
|
||||
s44 ^= t4; \
|
||||
}
|
||||
|
||||
#define RHOPI( \
|
||||
s00, s01, s02, s03, s04, s10, s11, s12, s13, s14, s20, s21, s22, s23, s24, s30, s31, s32, s33, s34, s40, s41, s42, \
|
||||
s43, s44) \
|
||||
{ \
|
||||
t0 = ROTL64(s10, (uint64_t)1); \
|
||||
s10 = ROTL64(s11, (uint64_t)44); \
|
||||
s11 = ROTL64(s41, (uint64_t)20); \
|
||||
s41 = ROTL64(s24, (uint64_t)61); \
|
||||
s24 = ROTL64(s42, (uint64_t)39); \
|
||||
s42 = ROTL64(s04, (uint64_t)18); \
|
||||
s04 = ROTL64(s20, (uint64_t)62); \
|
||||
s20 = ROTL64(s22, (uint64_t)43); \
|
||||
s22 = ROTL64(s32, (uint64_t)25); \
|
||||
s32 = ROTL64(s43, (uint64_t)8); \
|
||||
s43 = ROTL64(s34, (uint64_t)56); \
|
||||
s34 = ROTL64(s03, (uint64_t)41); \
|
||||
s03 = ROTL64(s40, (uint64_t)27); \
|
||||
s40 = ROTL64(s44, (uint64_t)14); \
|
||||
s44 = ROTL64(s14, (uint64_t)2); \
|
||||
s14 = ROTL64(s31, (uint64_t)55); \
|
||||
s31 = ROTL64(s13, (uint64_t)45); \
|
||||
s13 = ROTL64(s01, (uint64_t)36); \
|
||||
s01 = ROTL64(s30, (uint64_t)28); \
|
||||
s30 = ROTL64(s33, (uint64_t)21); \
|
||||
s33 = ROTL64(s23, (uint64_t)15); \
|
||||
s23 = ROTL64(s12, (uint64_t)10); \
|
||||
s12 = ROTL64(s21, (uint64_t)6); \
|
||||
s21 = ROTL64(s02, (uint64_t)3); \
|
||||
s02 = t0; \
|
||||
}
|
||||
|
||||
#define KHI( \
|
||||
s00, s01, s02, s03, s04, s10, s11, s12, s13, s14, s20, s21, s22, s23, s24, s30, s31, s32, s33, s34, s40, s41, s42, \
|
||||
s43, s44) \
|
||||
{ \
|
||||
t0 = s00 ^ (~s10 & s20); \
|
||||
t1 = s10 ^ (~s20 & s30); \
|
||||
t2 = s20 ^ (~s30 & s40); \
|
||||
t3 = s30 ^ (~s40 & s00); \
|
||||
t4 = s40 ^ (~s00 & s10); \
|
||||
s00 = t0; \
|
||||
s10 = t1; \
|
||||
s20 = t2; \
|
||||
s30 = t3; \
|
||||
s40 = t4; \
|
||||
\
|
||||
t0 = s01 ^ (~s11 & s21); \
|
||||
t1 = s11 ^ (~s21 & s31); \
|
||||
t2 = s21 ^ (~s31 & s41); \
|
||||
t3 = s31 ^ (~s41 & s01); \
|
||||
t4 = s41 ^ (~s01 & s11); \
|
||||
s01 = t0; \
|
||||
s11 = t1; \
|
||||
s21 = t2; \
|
||||
s31 = t3; \
|
||||
s41 = t4; \
|
||||
\
|
||||
t0 = s02 ^ (~s12 & s22); \
|
||||
t1 = s12 ^ (~s22 & s32); \
|
||||
t2 = s22 ^ (~s32 & s42); \
|
||||
t3 = s32 ^ (~s42 & s02); \
|
||||
t4 = s42 ^ (~s02 & s12); \
|
||||
s02 = t0; \
|
||||
s12 = t1; \
|
||||
s22 = t2; \
|
||||
s32 = t3; \
|
||||
s42 = t4; \
|
||||
\
|
||||
t0 = s03 ^ (~s13 & s23); \
|
||||
t1 = s13 ^ (~s23 & s33); \
|
||||
t2 = s23 ^ (~s33 & s43); \
|
||||
t3 = s33 ^ (~s43 & s03); \
|
||||
t4 = s43 ^ (~s03 & s13); \
|
||||
s03 = t0; \
|
||||
s13 = t1; \
|
||||
s23 = t2; \
|
||||
s33 = t3; \
|
||||
s43 = t4; \
|
||||
\
|
||||
t0 = s04 ^ (~s14 & s24); \
|
||||
t1 = s14 ^ (~s24 & s34); \
|
||||
t2 = s24 ^ (~s34 & s44); \
|
||||
t3 = s34 ^ (~s44 & s04); \
|
||||
t4 = s44 ^ (~s04 & s14); \
|
||||
s04 = t0; \
|
||||
s14 = t1; \
|
||||
s24 = t2; \
|
||||
s34 = t3; \
|
||||
s44 = t4; \
|
||||
}
|
||||
|
||||
#define IOTA(element, rc) \
|
||||
{ \
|
||||
element ^= rc; \
|
||||
}
|
||||
|
||||
__device__ const u64 RC[24] = {0x0000000000000001, 0x0000000000008082, 0x800000000000808a, 0x8000000080008000,
|
||||
0x000000000000808b, 0x0000000080000001, 0x8000000080008081, 0x8000000000008009,
|
||||
0x000000000000008a, 0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089, 0x8000000000008003,
|
||||
0x8000000000008002, 0x8000000000000080, 0x000000000000800a, 0x800000008000000a,
|
||||
0x8000000080008081, 0x8000000000008080, 0x0000000080000001, 0x8000000080008008};
|
||||
|
||||
__device__ void keccakf(u64 s[25])
|
||||
{
|
||||
u64 t0, t1, t2, t3, t4;
|
||||
|
||||
for (int i = 0; i < 24; i++) {
|
||||
THETA(
|
||||
s[0], s[5], s[10], s[15], s[20], s[1], s[6], s[11], s[16], s[21], s[2], s[7], s[12], s[17], s[22], s[3], s[8],
|
||||
s[13], s[18], s[23], s[4], s[9], s[14], s[19], s[24]);
|
||||
RHOPI(
|
||||
s[0], s[5], s[10], s[15], s[20], s[1], s[6], s[11], s[16], s[21], s[2], s[7], s[12], s[17], s[22], s[3], s[8],
|
||||
s[13], s[18], s[23], s[4], s[9], s[14], s[19], s[24]);
|
||||
KHI(
|
||||
s[0], s[5], s[10], s[15], s[20], s[1], s[6], s[11], s[16], s[21], s[2], s[7], s[12], s[17], s[22], s[3], s[8],
|
||||
s[13], s[18], s[23], s[4], s[9], s[14], s[19], s[24]);
|
||||
IOTA(s[0], RC[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <int C, int D>
|
||||
__global__ void keccak_hash_blocks(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output)
|
||||
{
|
||||
int bid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (bid >= number_of_blocks) { return; }
|
||||
|
||||
const int r_bits = 1600 - C;
|
||||
const int r_bytes = r_bits / 8;
|
||||
const int d_bytes = D / 8;
|
||||
|
||||
uint8_t* b_input = input + bid * input_block_size;
|
||||
uint8_t* b_output = output + bid * d_bytes;
|
||||
uint64_t state[25] = {}; // Initialize with zeroes
|
||||
|
||||
int input_len = input_block_size;
|
||||
|
||||
// absorb
|
||||
while (input_len >= r_bytes) {
|
||||
// #pragma unroll
|
||||
for (int i = 0; i < r_bytes; i += 8) {
|
||||
state[i / 8] ^= *(uint64_t*)(b_input + i);
|
||||
}
|
||||
keccakf(state);
|
||||
b_input += r_bytes;
|
||||
input_len -= r_bytes;
|
||||
}
|
||||
|
||||
// last block (if any)
|
||||
uint8_t last_block[r_bytes];
|
||||
for (int i = 0; i < input_len; i++) {
|
||||
last_block[i] = b_input[i];
|
||||
}
|
||||
|
||||
// pad 10*1
|
||||
last_block[input_len] = 1;
|
||||
for (int i = 0; i < r_bytes - input_len - 1; i++) {
|
||||
last_block[input_len + i + 1] = 0;
|
||||
}
|
||||
// last bit
|
||||
last_block[r_bytes - 1] |= 0x80;
|
||||
|
||||
// #pragma unroll
|
||||
for (int i = 0; i < r_bytes; i += 8) {
|
||||
state[i / 8] ^= *(uint64_t*)(last_block + i);
|
||||
}
|
||||
keccakf(state);
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < d_bytes; i += 8) {
|
||||
*(uint64_t*)(b_output + i) = state[i / 8];
|
||||
}
|
||||
}
|
||||
} // namespace keccak
|
||||
|
||||
#endif
|
||||
@@ -1,5 +1,5 @@
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "keccak.cu"
|
||||
#include "extern.cu"
|
||||
|
||||
// #define DEBUG
|
||||
|
||||
|
||||
Binary file not shown.
25
icicle/src/merkle-tree/extern.cu
Normal file
25
icicle/src/merkle-tree/extern.cu
Normal file
@@ -0,0 +1,25 @@
|
||||
#include "utils/utils.h"
|
||||
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "merkle.cu"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
|
||||
#include "fields/field_config.cuh"
|
||||
using namespace field_config;
|
||||
|
||||
namespace merkle_tree {
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, build_merkle_tree)(
|
||||
const scalar_t* leaves_digests,
|
||||
scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::SpongeHasher<scalar_t, scalar_t>* compression,
|
||||
const hash::SpongeHasher<scalar_t, scalar_t>* bottom_layer,
|
||||
const TreeBuilderConfig& tree_config)
|
||||
{
|
||||
return build_merkle_tree<scalar_t, scalar_t>(
|
||||
leaves_digests, digests, height, input_block_len, *compression, *bottom_layer, tree_config);
|
||||
}
|
||||
} // namespace merkle_tree
|
||||
26
icicle/src/merkle-tree/extern_mmcs.cu
Normal file
26
icicle/src/merkle-tree/extern_mmcs.cu
Normal file
@@ -0,0 +1,26 @@
|
||||
#include "utils/utils.h"
|
||||
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "mmcs.cu"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
|
||||
#include "fields/field_config.cuh"
|
||||
using namespace field_config;
|
||||
|
||||
using matrix::Matrix;
|
||||
|
||||
namespace merkle_tree {
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, mmcs_commit_cuda)(
|
||||
const Matrix<scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
scalar_t* digests,
|
||||
const hash::SpongeHasher<scalar_t, scalar_t>* hasher,
|
||||
const hash::SpongeHasher<scalar_t, scalar_t>* compression,
|
||||
const TreeBuilderConfig& tree_config)
|
||||
{
|
||||
return mmcs_commit<scalar_t, scalar_t>(leaves, number_of_inputs, digests, *hasher, *compression, tree_config);
|
||||
}
|
||||
} // namespace merkle_tree
|
||||
336
icicle/src/merkle-tree/merkle.cu
Normal file
336
icicle/src/merkle-tree/merkle.cu
Normal file
@@ -0,0 +1,336 @@
|
||||
#include "hash/hash.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
|
||||
namespace merkle_tree {
|
||||
/// Constructs merkle subtree without parallelization
|
||||
/// The digests are aligned sequentially per row
|
||||
/// Example:
|
||||
///
|
||||
/// Big tree:
|
||||
///
|
||||
/// 1 <- Root
|
||||
/// / \ <- Arity = 2
|
||||
/// 2 3 <- Digests
|
||||
/// / \ / \ <- Height = 2 (as the number of edges)
|
||||
/// 4 5 6 7 <- height^arity leaves
|
||||
/// | | | | <- Bottom layer hash 1 to 1
|
||||
/// a b c d <- Input vector 1x4
|
||||
///
|
||||
/// Subtree 1 Subtree 2
|
||||
/// 2 3
|
||||
/// / \ / \
|
||||
/// 4 5 6 7
|
||||
///
|
||||
/// Digests array for subtree 1:
|
||||
/// [4 5 . . 2 . .]
|
||||
/// | | |
|
||||
/// ----- V
|
||||
/// | Segment (offset = 4, subtree_idx = 0)
|
||||
/// v
|
||||
/// Segment (offset = 0, subtree_idx = 0)
|
||||
///
|
||||
/// Digests array for subtree 2:
|
||||
/// [. . 6 7 . 3 .]
|
||||
/// | |
|
||||
/// -----
|
||||
/// |
|
||||
/// v
|
||||
/// Segment (offset = 0, subtree_idx = 1)
|
||||
///
|
||||
/// Total digests array:
|
||||
/// [4 5 6 7 2 3 .]
|
||||
///
|
||||
/// Example for custom config:
|
||||
///
|
||||
/// arity = 2
|
||||
/// input_block_len = 2
|
||||
/// digest_elements = 2
|
||||
/// bottom_layer hash width = 4
|
||||
/// compression width = 4
|
||||
/// height = 2
|
||||
///
|
||||
/// [a, b] <- Root of the tree
|
||||
/// | |
|
||||
/// [a, b, c, d]
|
||||
/// / \ / \
|
||||
/// [i, j, m, n]
|
||||
/// ┌──┬──────┴──┴──┴──┴──────┬──┐
|
||||
/// | | | |
|
||||
/// [i, j, k, l] [m, n, o, p] <- compression states
|
||||
/// / \ / \ / \ / \ <- Running permutation
|
||||
/// [1, 2, 5, 6] [9, 1, 4, 5] <- compression states
|
||||
/// ┌──┬───┴──┴──┼──┤ ┌──┬───┴──┴──┼──┤
|
||||
/// | | | | | | | | <- digest_element * height^arity leaves
|
||||
/// [1, 2, 3, 4] [5, 6, 7, 8] [9, 1, 2, 3] [4, 5, 6, 7] <- Permuted states
|
||||
/// / \ / \ / \ / \ / \ / \ / \ / \ <- Running permutation
|
||||
/// [a, b, 0, 0] [c, d, 0, 0] [e, f, 0, 0] [g, h, 0, 0] <- States of the bottom layer hash
|
||||
/// | | | | | | | | <- Bottom layer hash 2 to 2
|
||||
/// a b c d e f g h <- Input vector 2x4
|
||||
///
|
||||
/// Input matrix:
|
||||
/// ┌ ┐
|
||||
/// | a b |
|
||||
/// | c d |
|
||||
/// | e f |
|
||||
/// | g h |
|
||||
/// └ ┘
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t build_merkle_subtree(
|
||||
const L* leaves,
|
||||
D* states,
|
||||
D* digests,
|
||||
size_t subtree_idx,
|
||||
size_t subtree_height,
|
||||
L* big_tree_digests,
|
||||
size_t start_segment_size,
|
||||
size_t start_segment_offset,
|
||||
uint64_t keep_rows,
|
||||
uint64_t input_block_len,
|
||||
const SpongeHasher<L, D>& bottom_layer,
|
||||
const SpongeHasher<L, D>& compression,
|
||||
const TreeBuilderConfig& tree_config,
|
||||
device_context::DeviceContext& ctx)
|
||||
{
|
||||
uint64_t arity = tree_config.arity;
|
||||
|
||||
SpongeConfig sponge_config = default_sponge_config(ctx);
|
||||
sponge_config.are_inputs_on_device = true;
|
||||
sponge_config.are_outputs_on_device = true;
|
||||
sponge_config.is_async = true;
|
||||
|
||||
size_t bottom_layer_states = pow(arity, subtree_height);
|
||||
|
||||
if (!tree_config.are_inputs_on_device) {
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
states, leaves, bottom_layer_states * input_block_len * sizeof(L), cudaMemcpyHostToDevice, ctx.stream));
|
||||
}
|
||||
|
||||
bottom_layer.hash_many(
|
||||
tree_config.are_inputs_on_device ? leaves : states, digests, bottom_layer_states, input_block_len,
|
||||
tree_config.digest_elements, sponge_config);
|
||||
|
||||
uint64_t number_of_states = bottom_layer_states / arity;
|
||||
size_t segment_size = start_segment_size;
|
||||
size_t segment_offset = start_segment_offset;
|
||||
|
||||
if (!keep_rows || subtree_height < keep_rows) {
|
||||
D* digests_with_offset = big_tree_digests + segment_offset + subtree_idx * bottom_layer_states;
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
digests_with_offset, digests, bottom_layer_states * tree_config.digest_elements * sizeof(D),
|
||||
cudaMemcpyDeviceToHost, ctx.stream));
|
||||
segment_offset += segment_size;
|
||||
}
|
||||
segment_size /= arity;
|
||||
subtree_height--;
|
||||
swap<D>(&digests, &states);
|
||||
|
||||
while (number_of_states > 0) {
|
||||
CHK_IF_RETURN(
|
||||
compression.compress_many(states, digests, number_of_states, tree_config.digest_elements, sponge_config));
|
||||
|
||||
if (!keep_rows || subtree_height < keep_rows) {
|
||||
D* digests_with_offset =
|
||||
big_tree_digests + segment_offset + subtree_idx * number_of_states * tree_config.digest_elements;
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
digests_with_offset, digests, number_of_states * tree_config.digest_elements * sizeof(D),
|
||||
cudaMemcpyDeviceToHost, ctx.stream));
|
||||
segment_offset += segment_size;
|
||||
}
|
||||
if (number_of_states > 1) { swap<D>(&digests, &states); }
|
||||
segment_size /= arity;
|
||||
subtree_height--;
|
||||
number_of_states /= arity;
|
||||
}
|
||||
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t build_merkle_tree(
|
||||
const L* leaves,
|
||||
D* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const SpongeHasher<L, D>& compression,
|
||||
const SpongeHasher<L, D>& bottom_layer,
|
||||
const TreeBuilderConfig& tree_config)
|
||||
{
|
||||
CHK_INIT_IF_RETURN();
|
||||
cudaStream_t& stream = tree_config.ctx.stream;
|
||||
|
||||
if (input_block_len * sizeof(L) > bottom_layer.rate * sizeof(D))
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument,
|
||||
"Sponge construction at the bottom of the tree doesn't support inputs bigger than hash rate");
|
||||
if (compression.preimage_max_length < tree_config.arity * tree_config.digest_elements)
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument,
|
||||
"Hash max preimage length does not match merkle tree arity multiplied by digest elements");
|
||||
|
||||
uint64_t number_of_bottom_layer_states = pow(tree_config.arity, height);
|
||||
|
||||
// This will determine how much splitting do we need to do
|
||||
// `number_of_streams` subtrees should fit in the device
|
||||
// This means each subtree should fit in `STREAM_CHUNK_SIZE` memory
|
||||
uint64_t number_of_subtrees = 1;
|
||||
uint64_t subtree_height = height;
|
||||
uint64_t subtree_bottom_layer_states = number_of_bottom_layer_states;
|
||||
uint64_t subtree_states_size = subtree_bottom_layer_states * bottom_layer.width;
|
||||
|
||||
uint64_t subtree_digests_size;
|
||||
if (compression.width != compression.preimage_max_length) {
|
||||
// In that case, the states on layer 1 will require extending the states by (width / preimage_max_len) factor
|
||||
subtree_digests_size =
|
||||
subtree_states_size * bottom_layer.preimage_max_length / bottom_layer.width * tree_config.digest_elements;
|
||||
} else {
|
||||
subtree_digests_size = subtree_states_size / bottom_layer.width * tree_config.digest_elements;
|
||||
}
|
||||
size_t subtree_memory_required = sizeof(D) * (subtree_states_size + subtree_digests_size);
|
||||
while (subtree_memory_required > STREAM_CHUNK_SIZE) {
|
||||
number_of_subtrees *= tree_config.arity;
|
||||
subtree_height--;
|
||||
subtree_bottom_layer_states /= tree_config.arity;
|
||||
subtree_states_size /= tree_config.arity;
|
||||
subtree_digests_size /= tree_config.arity;
|
||||
subtree_memory_required = sizeof(D) * (subtree_states_size + subtree_digests_size);
|
||||
}
|
||||
int cap_height = height - subtree_height;
|
||||
size_t caps_len = pow(tree_config.arity, cap_height) * tree_config.digest_elements;
|
||||
|
||||
size_t available_memory, _total_memory;
|
||||
CHK_IF_RETURN(cudaMemGetInfo(&available_memory, &_total_memory));
|
||||
available_memory -= GIGA / 8; // Leave 128 MB just in case
|
||||
|
||||
// We can effectively parallelize memory copy with streams
|
||||
// as long as they don't operate on more than `STREAM_CHUNK_SIZE` bytes
|
||||
const size_t number_of_streams = std::min((uint64_t)(available_memory / STREAM_CHUNK_SIZE), number_of_subtrees);
|
||||
cudaStream_t* streams = static_cast<cudaStream_t*>(malloc(sizeof(cudaStream_t) * number_of_streams));
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamCreate(&streams[i]));
|
||||
}
|
||||
|
||||
bool caps_mode = tree_config.keep_rows && tree_config.keep_rows <= cap_height;
|
||||
D* caps;
|
||||
if (caps_mode) { caps = static_cast<D*>(malloc(caps_len * sizeof(D))); }
|
||||
|
||||
#ifdef MERKLE_DEBUG
|
||||
std::cout << "Available memory = " << available_memory / 1024 / 1024 << " MB" << std::endl;
|
||||
std::cout << "Number of streams = " << number_of_streams << std::endl;
|
||||
std::cout << "Number of subtrees = " << number_of_subtrees << std::endl;
|
||||
std::cout << "Height of a subtree = " << subtree_height << std::endl;
|
||||
std::cout << "Cutoff height = " << height - subtree_height << std::endl;
|
||||
std::cout << "Number of leaves in a subtree = " << subtree_bottom_layer_states << std::endl;
|
||||
std::cout << "State of a subtree = " << subtree_states_size << std::endl;
|
||||
std::cout << "Digest elements for a subtree = " << subtree_digests_size << std::endl;
|
||||
std::cout << "Size of 1 subtree states = " << subtree_states_size * sizeof(D) / 1024 / 1024 << " MB" << std::endl;
|
||||
std::cout << "Size of 1 subtree digests = " << subtree_digests_size * sizeof(D) / 1024 / 1024 << " MB" << std::endl;
|
||||
std::cout << "Cap height = " << cap_height << std::endl;
|
||||
std::cout << "Enabling caps mode? " << caps_mode << std::endl;
|
||||
#endif
|
||||
|
||||
// Allocate memory for the leaves and digests
|
||||
// These are shared by streams in a pool
|
||||
D *states_ptr, *digests_ptr;
|
||||
CHK_IF_RETURN(cudaMallocAsync(&states_ptr, subtree_states_size * number_of_streams * sizeof(D), stream));
|
||||
CHK_IF_RETURN(cudaMemsetAsync(states_ptr, 0, subtree_states_size * number_of_streams * sizeof(D), stream));
|
||||
CHK_IF_RETURN(cudaMallocAsync(&digests_ptr, subtree_digests_size * number_of_streams * sizeof(D), stream));
|
||||
// Wait for these allocations to finish
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(stream));
|
||||
|
||||
// Build subtrees in parallel. This for loop invokes kernels that can run in a pool of size `number_of_streams`
|
||||
for (size_t subtree_idx = 0; subtree_idx < number_of_subtrees; subtree_idx++) {
|
||||
size_t stream_idx = subtree_idx % number_of_streams;
|
||||
cudaStream_t subtree_stream = streams[stream_idx];
|
||||
|
||||
const L* subtree_leaves = leaves + subtree_idx * subtree_bottom_layer_states * input_block_len;
|
||||
D* subtree_state = states_ptr + stream_idx * subtree_states_size;
|
||||
D* subtree_digests = digests_ptr + stream_idx * subtree_digests_size;
|
||||
|
||||
int subtree_keep_rows = 0;
|
||||
if (tree_config.keep_rows) {
|
||||
int diff = tree_config.keep_rows - cap_height;
|
||||
subtree_keep_rows = std::max(1, diff);
|
||||
}
|
||||
device_context::DeviceContext subtree_context{subtree_stream, tree_config.ctx.device_id, tree_config.ctx.mempool};
|
||||
|
||||
uint64_t start_segment_size = number_of_bottom_layer_states * tree_config.digest_elements;
|
||||
cudaError_t subtree_result = build_merkle_subtree<L, D>(
|
||||
subtree_leaves, // leaves
|
||||
subtree_state, // state
|
||||
subtree_digests, // digests
|
||||
subtree_idx, // subtree_idx
|
||||
subtree_height, // subtree_height
|
||||
caps_mode ? caps : digests, // big_tree_digests
|
||||
start_segment_size, // start_segment_size
|
||||
0, // start_segment_offset
|
||||
subtree_keep_rows, // keep_rows
|
||||
input_block_len, // input_block_len
|
||||
bottom_layer, // bottom_layer
|
||||
compression, // compression
|
||||
tree_config, // tree_config
|
||||
subtree_context // subtree_context
|
||||
);
|
||||
CHK_IF_RETURN(subtree_result);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(streams[i]));
|
||||
}
|
||||
|
||||
SpongeConfig sponge_config = default_sponge_config(tree_config.ctx);
|
||||
sponge_config.are_inputs_on_device = tree_config.are_inputs_on_device;
|
||||
sponge_config.are_outputs_on_device = true;
|
||||
sponge_config.is_async = true;
|
||||
// Finish the top-level tree if any
|
||||
if (cap_height > 0) {
|
||||
size_t start_segment_size = caps_len / tree_config.arity;
|
||||
size_t start_segment_offset = 0;
|
||||
if (!caps_mode) { // Calculate offset
|
||||
size_t keep_rows = tree_config.keep_rows ? tree_config.keep_rows : height + 1;
|
||||
size_t layer_size = pow(tree_config.arity, keep_rows - 1) * tree_config.digest_elements;
|
||||
for (int i = 0; i < keep_rows - cap_height; i++) {
|
||||
start_segment_offset += layer_size;
|
||||
layer_size /= tree_config.arity;
|
||||
}
|
||||
}
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
states_ptr, caps_mode ? caps : (digests + start_segment_offset - caps_len), caps_len * sizeof(D),
|
||||
(caps_mode || !tree_config.are_outputs_on_device) ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToDevice, stream));
|
||||
|
||||
uint64_t number_of_states = caps_len / tree_config.arity / tree_config.digest_elements;
|
||||
|
||||
size_t segment_size = start_segment_size;
|
||||
size_t segment_offset = start_segment_offset;
|
||||
while (number_of_states > 0) {
|
||||
CHK_IF_RETURN(compression.compress_many(
|
||||
states_ptr, digests_ptr, number_of_states, tree_config.digest_elements, sponge_config));
|
||||
if (!tree_config.keep_rows || cap_height < tree_config.keep_rows + (int)caps_mode) {
|
||||
D* digests_with_offset = digests + segment_offset;
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
digests_with_offset, digests_ptr, number_of_states * tree_config.digest_elements * sizeof(D),
|
||||
cudaMemcpyDeviceToHost, stream));
|
||||
segment_offset += segment_size;
|
||||
}
|
||||
|
||||
if (number_of_states > 1) { swap<D>(&digests_ptr, &states_ptr); }
|
||||
|
||||
segment_size /= tree_config.arity;
|
||||
cap_height--;
|
||||
number_of_states /= tree_config.arity;
|
||||
}
|
||||
if (caps_mode) { free(caps); }
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(cudaFreeAsync(states_ptr, stream));
|
||||
CHK_IF_RETURN(cudaFreeAsync(digests_ptr, stream));
|
||||
if (!tree_config.is_async) return CHK_STICKY(cudaStreamSynchronize(stream));
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(streams[i]));
|
||||
CHK_IF_RETURN(cudaStreamDestroy(streams[i]));
|
||||
}
|
||||
free(streams);
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
} // namespace merkle_tree
|
||||
456
icicle/src/merkle-tree/mmcs.cu
Normal file
456
icicle/src/merkle-tree/mmcs.cu
Normal file
@@ -0,0 +1,456 @@
|
||||
#include "hash/hash.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
using matrix::Matrix;
|
||||
|
||||
namespace merkle_tree {
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t hash_leaves(
|
||||
const Matrix<L>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
uint64_t number_of_rows,
|
||||
D* digests,
|
||||
unsigned int digest_elements,
|
||||
const SpongeHasher<L, D>& hasher,
|
||||
const device_context::DeviceContext& ctx)
|
||||
{
|
||||
SpongeConfig sponge_config = default_sponge_config(ctx);
|
||||
sponge_config.are_inputs_on_device = true;
|
||||
sponge_config.are_outputs_on_device = true;
|
||||
sponge_config.is_async = true;
|
||||
|
||||
uint64_t number_of_rows_padded = next_pow_of_two(number_of_rows);
|
||||
|
||||
CHK_IF_RETURN(hasher.hash_2d(leaves, digests, number_of_inputs, digest_elements, number_of_rows, ctx));
|
||||
|
||||
if (number_of_rows_padded - number_of_rows) {
|
||||
// Pad with default digests
|
||||
cudaMemsetAsync(
|
||||
(void*)(digests + number_of_rows), 0, (number_of_rows_padded - number_of_rows) * digest_elements * sizeof(D),
|
||||
ctx.stream);
|
||||
}
|
||||
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename L, typename D>
|
||||
struct SubtreeParams {
|
||||
unsigned int number_of_inputs; // Number of input matrices
|
||||
unsigned int arity; // Arity of the tree
|
||||
unsigned int digest_elements; // Number of output elements per hash
|
||||
size_t number_of_rows; // Current number of input rows to operate on
|
||||
size_t number_of_rows_padded; // next power of arity for number_of_rows
|
||||
size_t subtree_idx; // The subtree id
|
||||
size_t number_of_subtrees; // Total number of subtrees
|
||||
uint64_t subtree_height; // Height of one subtree
|
||||
|
||||
/// One segment corresponds to one layer of output digests
|
||||
size_t segment_size; // The size of current segment.
|
||||
size_t segment_offset; // An offset for the current segment
|
||||
unsigned int leaves_offset; // An offset in the sorted list of input matrices
|
||||
unsigned int number_of_leaves_to_inject; // Number of leaves to inject in current level
|
||||
unsigned int keep_rows; // Number of rows to keep
|
||||
bool are_inputs_on_device;
|
||||
bool caps_mode;
|
||||
const SpongeHasher<L, D>* hasher = nullptr;
|
||||
const SpongeHasher<L, D>* compression = nullptr;
|
||||
const device_context::DeviceContext* ctx = nullptr;
|
||||
};
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t slice_and_copy_leaves(
|
||||
const std::vector<Matrix<L>>& leaves, L* d_leaves, Matrix<L>* d_leaves_info, SubtreeParams<L, D>& params)
|
||||
{
|
||||
uint64_t target_height = params.number_of_rows_padded * params.number_of_subtrees;
|
||||
params.number_of_leaves_to_inject = 0;
|
||||
while (params.leaves_offset < params.number_of_inputs &&
|
||||
next_pow_of_two(leaves[params.leaves_offset].height) >= target_height) {
|
||||
if (next_pow_of_two(leaves[params.leaves_offset].height) == target_height) params.number_of_leaves_to_inject++;
|
||||
params.leaves_offset++;
|
||||
}
|
||||
|
||||
if (params.number_of_leaves_to_inject) {
|
||||
size_t rows_offset = params.subtree_idx * params.number_of_rows_padded;
|
||||
size_t actual_layer_rows = leaves[params.leaves_offset - params.number_of_leaves_to_inject].height;
|
||||
params.number_of_rows = std::min(actual_layer_rows - rows_offset, params.number_of_rows_padded);
|
||||
|
||||
Matrix<L>* leaves_info = static_cast<Matrix<L>*>(malloc(params.number_of_leaves_to_inject * sizeof(Matrix<L>)));
|
||||
L* d_leaves_ptr = d_leaves;
|
||||
for (auto i = 0; i < params.number_of_leaves_to_inject; i++) {
|
||||
Matrix<L> leaf = leaves[params.leaves_offset - params.number_of_leaves_to_inject + i];
|
||||
if (!params.are_inputs_on_device) {
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
d_leaves_ptr, leaf.values + rows_offset * leaf.width, params.number_of_rows * leaf.width * sizeof(L),
|
||||
cudaMemcpyHostToDevice, params.ctx->stream));
|
||||
} else {
|
||||
d_leaves_ptr = leaf.values + rows_offset * leaf.width;
|
||||
}
|
||||
|
||||
leaves_info[i] = {d_leaves_ptr, leaf.width, params.number_of_rows};
|
||||
d_leaves_ptr += params.number_of_rows * leaf.width;
|
||||
}
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
d_leaves_info, leaves_info, params.number_of_leaves_to_inject * sizeof(Matrix<L>), cudaMemcpyHostToDevice,
|
||||
params.ctx->stream));
|
||||
free(leaves_info);
|
||||
}
|
||||
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
/// Checks if the current row needs to be copied out to the resulting digests array
|
||||
/// Computes the needed offsets using segments model
|
||||
template <typename L, typename D>
|
||||
cudaError_t maybe_copy_digests(D* digests, L* big_tree_digests, SubtreeParams<L, D>& params)
|
||||
{
|
||||
if (!params.keep_rows || params.subtree_height < params.keep_rows + (int)params.caps_mode) {
|
||||
D* digests_with_offset = big_tree_digests + params.segment_offset +
|
||||
params.subtree_idx * params.number_of_rows_padded * params.digest_elements;
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
digests_with_offset, digests, params.number_of_rows_padded * params.digest_elements * sizeof(D),
|
||||
cudaMemcpyDeviceToHost, params.ctx->stream));
|
||||
params.segment_offset += params.segment_size;
|
||||
}
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t fold_layer(
|
||||
const std::vector<Matrix<L>>& leaves,
|
||||
D* prev_layer,
|
||||
D* next_layer,
|
||||
L* aux_leaves_mem,
|
||||
Matrix<L>* d_leaves_info,
|
||||
SubtreeParams<L, D>& params)
|
||||
{
|
||||
CHK_IF_RETURN(slice_and_copy_leaves<L>(leaves, aux_leaves_mem, d_leaves_info, params));
|
||||
|
||||
if (params.number_of_leaves_to_inject) {
|
||||
CHK_IF_RETURN(params.compression->compress_and_inject(
|
||||
d_leaves_info, params.number_of_leaves_to_inject, params.number_of_rows, prev_layer, next_layer,
|
||||
params.digest_elements, *params.ctx));
|
||||
} else {
|
||||
CHK_IF_RETURN(params.compression->run_hash_many_kernel(
|
||||
prev_layer, next_layer, params.number_of_rows_padded, params.compression->width, params.digest_elements,
|
||||
*params.ctx));
|
||||
}
|
||||
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t build_mmcs_subtree(
|
||||
const std::vector<Matrix<L>>& leaves,
|
||||
L* d_leaves,
|
||||
D* states,
|
||||
L* aux_leaves_mem,
|
||||
L* big_tree_digests,
|
||||
SubtreeParams<L, D>& params)
|
||||
{
|
||||
// Leaves info
|
||||
Matrix<L>* d_leaves_info;
|
||||
CHK_IF_RETURN(cudaMallocAsync(&d_leaves_info, params.number_of_inputs * sizeof(Matrix<L>), params.ctx->stream));
|
||||
|
||||
CHK_IF_RETURN(slice_and_copy_leaves(leaves, d_leaves, d_leaves_info, params));
|
||||
|
||||
// Reuse leaves memory
|
||||
D* digests = (D*)d_leaves;
|
||||
|
||||
CHK_IF_RETURN(hash_leaves(
|
||||
d_leaves_info, params.number_of_leaves_to_inject, params.number_of_rows, states, params.digest_elements,
|
||||
*params.hasher, *params.ctx));
|
||||
|
||||
CHK_IF_RETURN(maybe_copy_digests(digests, big_tree_digests, params));
|
||||
|
||||
params.number_of_rows_padded /= params.arity;
|
||||
params.segment_size /= params.arity;
|
||||
params.subtree_height--;
|
||||
|
||||
D* prev_layer = states;
|
||||
D* next_layer = digests;
|
||||
while (params.number_of_rows_padded > 0) {
|
||||
CHK_IF_RETURN(fold_layer(leaves, prev_layer, next_layer, aux_leaves_mem, d_leaves_info, params));
|
||||
CHK_IF_RETURN(maybe_copy_digests(next_layer, big_tree_digests, params));
|
||||
swap<D>(&prev_layer, &next_layer);
|
||||
params.segment_size /= params.arity;
|
||||
params.subtree_height--;
|
||||
params.number_of_rows_padded /= params.arity;
|
||||
}
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t mmcs_commit(
|
||||
const Matrix<L>* inputs,
|
||||
const unsigned int number_of_inputs,
|
||||
D* digests,
|
||||
const SpongeHasher<L, D>& hasher,
|
||||
const SpongeHasher<L, D>& compression,
|
||||
const TreeBuilderConfig& tree_config)
|
||||
{
|
||||
CHK_INIT_IF_RETURN();
|
||||
cudaStream_t& stream = tree_config.ctx.stream;
|
||||
|
||||
if (number_of_inputs == 0) THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "No matrices provided");
|
||||
|
||||
if (compression.preimage_max_length < tree_config.arity * tree_config.digest_elements)
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument,
|
||||
"Hash max preimage length does not match merkle tree arity multiplied by digest elements");
|
||||
|
||||
std::vector<Matrix<L>> sorted_inputs(number_of_inputs);
|
||||
std::partial_sort_copy(
|
||||
inputs, inputs + number_of_inputs, sorted_inputs.begin(), sorted_inputs.end(),
|
||||
[](const Matrix<L>& left, const Matrix<L>& right) { return left.height > right.height; });
|
||||
|
||||
// Check that the height of any two given matrices either rounds up
|
||||
// to the same next power of two or otherwise equal
|
||||
for (unsigned int i = 0; i < number_of_inputs - 1; i++) {
|
||||
unsigned int left = sorted_inputs[i].height;
|
||||
unsigned int right = sorted_inputs[i + 1].height;
|
||||
|
||||
if (next_pow_of_two(left) == next_pow_of_two(right) && left != right)
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument, "Matrix heights that round up to the same power of two must be equal");
|
||||
}
|
||||
|
||||
uint64_t max_height = sorted_inputs[0].height;
|
||||
|
||||
// Calculate maximum additional memory needed for injected matrices
|
||||
uint64_t max_aux_total_elements = 0;
|
||||
uint64_t current_aux_total_elements = 0;
|
||||
uint64_t current_height = 0;
|
||||
uint64_t bottom_layer_leaves_elements = 0;
|
||||
if (!tree_config.are_inputs_on_device) {
|
||||
for (auto it = sorted_inputs.begin(); it < sorted_inputs.end(); it++) {
|
||||
if (it->height == max_height) {
|
||||
bottom_layer_leaves_elements += it->height * it->width;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (it->height != current_height) {
|
||||
current_height = it->height;
|
||||
current_aux_total_elements = 0;
|
||||
}
|
||||
|
||||
current_aux_total_elements += it->width * it->height;
|
||||
if (current_aux_total_elements > max_aux_total_elements) {
|
||||
max_aux_total_elements = current_aux_total_elements;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t number_of_bottom_layer_rows = next_pow_of_two(max_height);
|
||||
size_t leaves_info_memory = number_of_inputs * sizeof(Matrix<L>);
|
||||
|
||||
unsigned int tree_height = get_height(number_of_bottom_layer_rows);
|
||||
|
||||
// This will determine how much splitting do we need to do
|
||||
// `number_of_streams` subtrees should fit in the device
|
||||
// This means each subtree should fit in `STREAM_CHUNK_SIZE` memory
|
||||
uint64_t number_of_subtrees = 1;
|
||||
uint64_t subtree_height = tree_height;
|
||||
uint64_t subtree_bottom_layer_rows = number_of_bottom_layer_rows;
|
||||
uint64_t subtree_states_size = subtree_bottom_layer_rows * hasher.width;
|
||||
uint64_t subtree_digests_size = subtree_bottom_layer_rows * tree_config.digest_elements;
|
||||
uint64_t subtree_leaves_elements = 0;
|
||||
for (int i = 0; i < number_of_inputs && sorted_inputs[i].height == max_height; i++) {
|
||||
subtree_leaves_elements += sorted_inputs[i].width * sorted_inputs[i].height;
|
||||
}
|
||||
uint64_t subtree_aux_elements = max_aux_total_elements;
|
||||
|
||||
size_t subtree_leaves_memory = std::max(subtree_leaves_elements * sizeof(L), subtree_digests_size * sizeof(D));
|
||||
size_t subtree_memory_required =
|
||||
sizeof(D) * subtree_states_size + subtree_leaves_memory + subtree_aux_elements * sizeof(L) + leaves_info_memory;
|
||||
while (subtree_memory_required > STREAM_CHUNK_SIZE) {
|
||||
number_of_subtrees *= tree_config.arity;
|
||||
subtree_height--;
|
||||
subtree_bottom_layer_rows /= tree_config.arity;
|
||||
subtree_states_size /= tree_config.arity;
|
||||
subtree_digests_size /= tree_config.arity;
|
||||
subtree_leaves_elements /= tree_config.arity;
|
||||
subtree_aux_elements /= tree_config.arity;
|
||||
subtree_leaves_memory = std::max(subtree_leaves_elements * sizeof(L), subtree_digests_size * sizeof(D));
|
||||
subtree_memory_required =
|
||||
sizeof(D) * subtree_states_size + subtree_leaves_memory + subtree_aux_elements * sizeof(L) + leaves_info_memory;
|
||||
}
|
||||
unsigned int cap_height = tree_height - subtree_height;
|
||||
size_t caps_len = pow(tree_config.arity, cap_height) * tree_config.digest_elements;
|
||||
|
||||
size_t available_memory, _total_memory;
|
||||
CHK_IF_RETURN(cudaMemGetInfo(&available_memory, &_total_memory));
|
||||
if (available_memory < (GIGA / 8 + STREAM_CHUNK_SIZE)) {
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument,
|
||||
"Not enough GPU memory to build a tree. At least 1.125 GB of GPU memory required");
|
||||
}
|
||||
available_memory -= GIGA / 8; // Leave 128 MB just in case
|
||||
|
||||
// We can effectively parallelize memory copy with streams
|
||||
// as long as they don't operate on more than `STREAM_CHUNK_SIZE` bytes
|
||||
const size_t number_of_streams = std::min((uint64_t)(available_memory / STREAM_CHUNK_SIZE), number_of_subtrees);
|
||||
std::vector<cudaStream_t> streams(number_of_streams);
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamCreate(&streams[i]));
|
||||
}
|
||||
|
||||
// If keep_rows is smaller then the remaining top-tree height
|
||||
// we need to allocate additional memory to store the roots
|
||||
// of subtrees, in order to proceed from there
|
||||
bool caps_mode = tree_config.keep_rows && tree_config.keep_rows <= cap_height;
|
||||
D* caps;
|
||||
if (caps_mode) { caps = static_cast<D*>(malloc(caps_len * sizeof(D))); }
|
||||
|
||||
#ifdef MERKLE_DEBUG
|
||||
std::cout << "MMCS DEBUG" << std::endl;
|
||||
std::cout << "====================================" << std::endl;
|
||||
std::cout << "Available memory = " << available_memory / 1024 / 1024 << " MB" << std::endl;
|
||||
std::cout << "Number of streams = " << number_of_streams << std::endl;
|
||||
std::cout << "Number of subtrees = " << number_of_subtrees << std::endl;
|
||||
std::cout << "Height of a subtree = " << subtree_height << std::endl;
|
||||
std::cout << "Cutoff height = " << tree_height - subtree_height << std::endl;
|
||||
std::cout << "Number of leaves in a subtree = " << subtree_bottom_layer_rows << std::endl;
|
||||
std::cout << "State of a subtree = " << subtree_states_size << std::endl;
|
||||
std::cout << "Digest elements for a subtree = " << subtree_digests_size << std::endl;
|
||||
std::cout << "Size of 1 subtree states = " << subtree_states_size * sizeof(D) / 1024 / 1024 << " MB" << std::endl;
|
||||
std::cout << "Size of 1 subtree digests = " << subtree_digests_size * sizeof(D) / 1024 / 1024 << " MB" << std::endl;
|
||||
std::cout << "Cap height = " << cap_height << std::endl;
|
||||
std::cout << "Enabling caps mode? " << caps_mode << std::endl;
|
||||
|
||||
std::cout << "Allocating " << subtree_states_size * number_of_streams << " elements for states" << std::endl;
|
||||
std::cout << "Allocating " << subtree_leaves_memory * number_of_streams << " bytes for leaves" << std::endl;
|
||||
std::cout << "Allocating " << subtree_aux_elements * number_of_streams << " elements for aux leaves" << std::endl;
|
||||
std::cout << std::endl;
|
||||
#endif
|
||||
|
||||
// Allocate memory for the states, injected leaves (aux) and digests
|
||||
// These are shared by streams in a pool
|
||||
D* states_ptr;
|
||||
L *aux_ptr, *leaves_ptr;
|
||||
CHK_IF_RETURN(cudaMallocAsync(&states_ptr, subtree_states_size * number_of_streams * sizeof(D), stream));
|
||||
CHK_IF_RETURN(cudaMemsetAsync(states_ptr, 0, subtree_states_size * number_of_streams * sizeof(D), stream));
|
||||
CHK_IF_RETURN(cudaMallocAsync(&leaves_ptr, subtree_leaves_memory * number_of_streams, stream));
|
||||
CHK_IF_RETURN(cudaMallocAsync(&aux_ptr, subtree_aux_elements * number_of_streams * sizeof(L), stream));
|
||||
// Wait for these allocations to finish
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(stream));
|
||||
|
||||
// Build subtrees in parallel. This for loop invokes kernels that can run in a pool of size `number_of_streams`
|
||||
for (size_t subtree_idx = 0; subtree_idx < number_of_subtrees; subtree_idx++) {
|
||||
size_t stream_idx = subtree_idx % number_of_streams;
|
||||
cudaStream_t subtree_stream = streams[stream_idx];
|
||||
|
||||
D* subtree_state = states_ptr + stream_idx * subtree_states_size;
|
||||
L* subtree_leaves = (L*)((unsigned char*)leaves_ptr + stream_idx * subtree_leaves_memory);
|
||||
L* subtree_aux = aux_ptr + stream_idx * subtree_aux_elements;
|
||||
|
||||
unsigned int subtree_keep_rows = 0;
|
||||
if (tree_config.keep_rows) {
|
||||
int diff = tree_config.keep_rows - cap_height;
|
||||
subtree_keep_rows = std::max(1, diff);
|
||||
}
|
||||
device_context::DeviceContext subtree_context{subtree_stream, tree_config.ctx.device_id, tree_config.ctx.mempool};
|
||||
|
||||
SubtreeParams<L, D> params = {};
|
||||
|
||||
params.number_of_inputs = number_of_inputs;
|
||||
params.arity = tree_config.arity;
|
||||
params.digest_elements = tree_config.digest_elements;
|
||||
params.number_of_rows = subtree_bottom_layer_rows;
|
||||
params.number_of_rows_padded = subtree_bottom_layer_rows;
|
||||
|
||||
params.subtree_idx = subtree_idx;
|
||||
params.subtree_height = subtree_height;
|
||||
params.number_of_subtrees = number_of_subtrees;
|
||||
|
||||
params.segment_size = number_of_bottom_layer_rows * tree_config.digest_elements;
|
||||
params.keep_rows = subtree_keep_rows;
|
||||
params.are_inputs_on_device = tree_config.are_inputs_on_device;
|
||||
params.hasher = &hasher;
|
||||
params.compression = &compression;
|
||||
params.ctx = &subtree_context;
|
||||
|
||||
cudaError_t subtree_result = build_mmcs_subtree<L, D>(
|
||||
sorted_inputs,
|
||||
subtree_leaves, // d_leaves
|
||||
subtree_state, // states
|
||||
subtree_aux, // aux_leaves_mem
|
||||
caps_mode ? caps : digests, // big_tree_digests
|
||||
params // params
|
||||
);
|
||||
CHK_IF_RETURN(subtree_result);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(streams[i]));
|
||||
}
|
||||
|
||||
// Finish the top-level tree if any
|
||||
if (cap_height > 0) {
|
||||
D* digests_ptr = (D*)leaves_ptr;
|
||||
size_t start_segment_size = caps_len / tree_config.arity;
|
||||
size_t start_segment_offset = 0;
|
||||
if (!caps_mode) { // Calculate offset
|
||||
size_t keep_rows = tree_config.keep_rows ? tree_config.keep_rows : tree_height + 1;
|
||||
size_t layer_size = pow(tree_config.arity, keep_rows - 1) * tree_config.digest_elements;
|
||||
for (int i = 0; i < keep_rows - cap_height; i++) {
|
||||
start_segment_offset += layer_size;
|
||||
layer_size /= tree_config.arity;
|
||||
}
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
states_ptr, caps_mode ? caps : (digests + start_segment_offset - caps_len), caps_len * sizeof(D),
|
||||
(caps_mode || !tree_config.are_outputs_on_device) ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToDevice, stream));
|
||||
|
||||
uint64_t number_of_states = caps_len / tree_config.arity / tree_config.digest_elements;
|
||||
Matrix<L>* d_leaves_info;
|
||||
CHK_IF_RETURN(cudaMallocAsync(&d_leaves_info, number_of_inputs * sizeof(Matrix<L>), tree_config.ctx.stream));
|
||||
|
||||
SubtreeParams<L, D> top_params = {};
|
||||
|
||||
top_params.number_of_inputs = number_of_inputs;
|
||||
top_params.arity = tree_config.arity;
|
||||
top_params.digest_elements = tree_config.digest_elements;
|
||||
top_params.number_of_rows = number_of_states;
|
||||
top_params.number_of_rows_padded = number_of_states;
|
||||
|
||||
top_params.subtree_height = cap_height;
|
||||
top_params.number_of_subtrees = 1;
|
||||
|
||||
top_params.segment_offset = start_segment_offset;
|
||||
top_params.segment_size = start_segment_size;
|
||||
top_params.keep_rows = tree_config.keep_rows;
|
||||
top_params.are_inputs_on_device = tree_config.are_inputs_on_device;
|
||||
top_params.caps_mode = caps_mode;
|
||||
top_params.hasher = &hasher;
|
||||
top_params.compression = &compression;
|
||||
top_params.ctx = &tree_config.ctx;
|
||||
|
||||
D* prev_layer = states_ptr;
|
||||
D* next_layer = digests_ptr;
|
||||
while (top_params.number_of_rows_padded > 0) {
|
||||
CHK_IF_RETURN(fold_layer(sorted_inputs, prev_layer, next_layer, aux_ptr, d_leaves_info, top_params));
|
||||
CHK_IF_RETURN(maybe_copy_digests(next_layer, digests, top_params));
|
||||
swap<D>(&prev_layer, &next_layer);
|
||||
top_params.segment_size /= top_params.arity;
|
||||
top_params.subtree_height--;
|
||||
top_params.number_of_rows_padded /= top_params.arity;
|
||||
}
|
||||
}
|
||||
|
||||
if (caps_mode) { free(caps); }
|
||||
CHK_IF_RETURN(cudaFreeAsync(states_ptr, stream));
|
||||
CHK_IF_RETURN(cudaFreeAsync(leaves_ptr, stream));
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamDestroy(streams[i]));
|
||||
}
|
||||
if (!tree_config.is_async) return CHK_STICKY(cudaStreamSynchronize(stream));
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
} // namespace merkle_tree
|
||||
7
icicle/src/merkle-tree/tests/merkle/.gitignore
vendored
Normal file
7
icicle/src/merkle-tree/tests/merkle/.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
merkle.o
|
||||
poseidon2.o
|
||||
test_merkle_poseidon2
|
||||
merkle_bls.o
|
||||
poseidon.o
|
||||
test_merkle_poseidon
|
||||
test_merkle
|
||||
23
icicle/src/merkle-tree/tests/merkle/Makefile
Normal file
23
icicle/src/merkle-tree/tests/merkle/Makefile
Normal file
@@ -0,0 +1,23 @@
|
||||
test_merkle_poseidon: poseidon.o merkle_bls.o
|
||||
nvcc -o test_merkle_poseidon -I../../../../include -DFIELD=bls12_381 -DFIELD_ID=2 -DCURVE=bls12_381 -DMERKLE_DEBUG poseidon.o merkle_bls.o test.cu
|
||||
./test_merkle_poseidon
|
||||
|
||||
merkle_bls.o: ../../extern.cu ../../merkle.cu
|
||||
nvcc -o merkle_bls.o -I../../../../include -DFIELD=bls12_381 -DFIELD_ID=2 -DCURVE=bls12_381 -DMERKLE_DEBUG -c ../../extern.cu
|
||||
|
||||
poseidon.o: ../../../poseidon/extern.cu
|
||||
nvcc -o poseidon.o -I../../../../include -DFIELD=bls12_381 -DFIELD_ID=2 -DCURVE=bls12_381 -c ../../../poseidon/extern.cu
|
||||
|
||||
|
||||
test_merkle: poseidon2.o merkle.o
|
||||
nvcc -o test_merkle -I../../../../include -DFIELD=babybear -DFIELD_ID=1001 -DMERKLE_DEBUG poseidon2.o merkle.o test_poseidon2.cu
|
||||
./test_merkle
|
||||
|
||||
merkle.o: ../../extern.cu ../../merkle.cu
|
||||
nvcc -o merkle.o -I../../../../include -DFIELD=babybear -DFIELD_ID=1001 -DMERKLE_DEBUG -c ../../extern.cu
|
||||
|
||||
poseidon2.o: ../../../poseidon2/extern.cu
|
||||
nvcc -o poseidon2.o -I../../../../include -DFIELD=babybear -DFIELD_ID=1001 -c ../../../poseidon2/extern.cu
|
||||
|
||||
clear:
|
||||
rm -f poseidon2.o merkle.o test_merkle merkle_bls.o poseidon.o test_merkle
|
||||
@@ -1,10 +1,3 @@
|
||||
// #define DEBUG
|
||||
#define MERKLE_DEBUG
|
||||
|
||||
#include "curves/curve_config.cuh"
|
||||
#include "../poseidon.cu"
|
||||
#include "merkle.cu"
|
||||
|
||||
#ifndef __CUDA_ARCH__
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
@@ -12,15 +5,18 @@
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
|
||||
using namespace poseidon;
|
||||
using namespace merkle;
|
||||
using namespace curve_config;
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
|
||||
#include "poseidon/poseidon.cuh"
|
||||
|
||||
#include "api/bls12_381.h"
|
||||
using namespace bls12_381;
|
||||
|
||||
// Arity
|
||||
#define A 2
|
||||
#define T (A + 1)
|
||||
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now();
|
||||
#define END_TIMER(timer, msg) \
|
||||
printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count());
|
||||
@@ -30,24 +26,24 @@ int main(int argc, char* argv[])
|
||||
// Load poseidon constants
|
||||
START_TIMER(timer_const);
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
PoseidonConstants<scalar_t> constants;
|
||||
init_optimized_poseidon_constants<scalar_t>(A, ctx, &constants);
|
||||
poseidon::Poseidon<scalar_t> poseidon(A, ctx);
|
||||
END_TIMER(timer_const, "Load poseidon constants");
|
||||
|
||||
/// Tree of height N and arity A contains \sum{A^i} for i in 0..N-1 elements
|
||||
uint32_t tree_height = argc > 1 ? atoi(argv[1]) : 28;
|
||||
uint32_t number_of_leaves = pow(A, (tree_height - 1));
|
||||
uint32_t tree_height = argc > 1 ? atoi(argv[1]) : 26;
|
||||
uint32_t number_of_leaves = pow(A, tree_height);
|
||||
uint32_t total_number_of_leaves = number_of_leaves * A;
|
||||
|
||||
/// Use keep_rows to specify how many rows do you want to store
|
||||
int keep_rows = argc > 2 ? atoi(argv[2]) : 7;
|
||||
size_t digests_len = get_digests_len<scalar_t>(keep_rows + 1, A);
|
||||
size_t digests_len = merkle_tree::get_digests_len(keep_rows - 1, A, 1);
|
||||
|
||||
/// Fill leaves with scalars [0, 1, ... 2^{tree_height - 1} - 1]
|
||||
/// Fill leaves with scalars [0, 1, ... 2^tree_height - 1]
|
||||
START_TIMER(timer_allocation);
|
||||
scalar_t input = scalar_t::zero();
|
||||
size_t leaves_mem = number_of_leaves * sizeof(scalar_t);
|
||||
size_t leaves_mem = total_number_of_leaves * sizeof(scalar_t);
|
||||
scalar_t* leaves = static_cast<scalar_t*>(malloc(leaves_mem));
|
||||
for (uint32_t i = 0; i < number_of_leaves; i++) {
|
||||
for (uint32_t i = 0; i < total_number_of_leaves; i++) {
|
||||
leaves[i] = input;
|
||||
input = input + scalar_t::one();
|
||||
}
|
||||
@@ -62,6 +58,7 @@ int main(int argc, char* argv[])
|
||||
std::cout << "Memory for leaves = " << leaves_mem / 1024 / 1024 << " MB; " << leaves_mem / 1024 / 1024 / 1024 << " GB"
|
||||
<< std::endl;
|
||||
std::cout << "Number of leaves = " << number_of_leaves << std::endl;
|
||||
std::cout << "Total Number of leaves = " << total_number_of_leaves << std::endl;
|
||||
std::cout << "Memory for digests = " << digests_mem / 1024 / 1024 << " MB; " << digests_mem / 1024 / 1024 / 1024
|
||||
<< " GB" << std::endl;
|
||||
std::cout << "Number of digest elements = " << digests_len << std::endl;
|
||||
@@ -69,12 +66,17 @@ int main(int argc, char* argv[])
|
||||
std::cout << "Total RAM consumption = " << (digests_mem + leaves_mem) / 1024 / 1024 << " MB; "
|
||||
<< (digests_mem + leaves_mem) / 1024 / 1024 / 1024 << " GB" << std::endl;
|
||||
|
||||
TreeBuilderConfig config = default_merkle_config();
|
||||
config.keep_rows = keep_rows;
|
||||
merkle_tree::TreeBuilderConfig tree_config = merkle_tree::default_merkle_config();
|
||||
tree_config.arity = 2;
|
||||
tree_config.keep_rows = keep_rows;
|
||||
START_TIMER(timer_merkle);
|
||||
build_merkle_tree<scalar_t, T>(leaves, digests, tree_height, constants, config);
|
||||
bls12_381_build_merkle_tree(leaves, digests, tree_height, A, &poseidon, &poseidon, tree_config);
|
||||
END_TIMER(timer_merkle, "Merkle tree built: ")
|
||||
|
||||
for (int i = 0; i < digests_len; i++) {
|
||||
std::cout << digests[i] << std::endl;
|
||||
}
|
||||
|
||||
// Use this to generate test vectors
|
||||
// for (int i = 0; i < digests_len; i++) {
|
||||
// std::cout << "{";
|
||||
108
icicle/src/merkle-tree/tests/merkle/test_poseidon2.cu
Normal file
108
icicle/src/merkle-tree/tests/merkle/test_poseidon2.cu
Normal file
@@ -0,0 +1,108 @@
|
||||
#ifndef __CUDA_ARCH__
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
|
||||
#include "poseidon2/poseidon2.cuh"
|
||||
|
||||
#include "api/babybear.h"
|
||||
using namespace babybear;
|
||||
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now();
|
||||
#define END_TIMER(timer, msg) \
|
||||
printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count());
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
/// Tree of height N and arity A contains \sum{A^i} for i in 0..N elements
|
||||
uint32_t tree_arity = 2;
|
||||
uint32_t width = 16;
|
||||
uint32_t input_block_len = 8;
|
||||
uint32_t digest_elements = 8;
|
||||
uint64_t tree_height = argc > 1 ? atoi(argv[1]) : 23;
|
||||
uint64_t number_of_leaves = pow(tree_arity, tree_height);
|
||||
uint64_t total_number_of_leaves = number_of_leaves * input_block_len;
|
||||
|
||||
// Load poseidon constants
|
||||
START_TIMER(timer_const);
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
poseidon2::Poseidon2<scalar_t> poseidon(
|
||||
width, input_block_len, poseidon2::MdsType::DEFAULT_MDS, poseidon2::DiffusionStrategy::DEFAULT_DIFFUSION, ctx);
|
||||
END_TIMER(timer_const, "Load poseidon constants");
|
||||
|
||||
/// Use keep_rows to specify how many rows do you want to store
|
||||
int keep_rows = argc > 2 ? atoi(argv[2]) : 3;
|
||||
size_t digests_len = merkle_tree::get_digests_len(keep_rows - 1, tree_arity, digest_elements);
|
||||
|
||||
/// Fill leaves with scalars [0, 1, ... 2^tree_height - 1]
|
||||
START_TIMER(timer_allocation);
|
||||
scalar_t input = scalar_t::zero();
|
||||
size_t leaves_mem = total_number_of_leaves * sizeof(scalar_t);
|
||||
scalar_t* leaves = static_cast<scalar_t*>(malloc(leaves_mem));
|
||||
for (uint64_t i = 0; i < total_number_of_leaves; i++) {
|
||||
leaves[i] = input;
|
||||
input = input + scalar_t::one();
|
||||
}
|
||||
END_TIMER(timer_allocation, "Allocated memory for leaves: ");
|
||||
|
||||
/// Allocate memory for digests of {keep_rows} rows of a tree
|
||||
START_TIMER(timer_digests);
|
||||
size_t digests_mem = digests_len * sizeof(scalar_t);
|
||||
scalar_t* digests = static_cast<scalar_t*>(malloc(digests_mem));
|
||||
END_TIMER(timer_digests, "Allocated memory for digests");
|
||||
|
||||
std::cout << "Memory for leaves = " << leaves_mem / 1024 / 1024 << " MB; " << leaves_mem / 1024 / 1024 / 1024 << " GB"
|
||||
<< std::endl;
|
||||
std::cout << "Number of leaves = " << number_of_leaves << std::endl;
|
||||
std::cout << "Total Number of leaves = " << total_number_of_leaves << std::endl;
|
||||
std::cout << "Memory for digests = " << digests_mem / 1024 / 1024 << " MB; " << digests_mem / 1024 / 1024 / 1024
|
||||
<< " GB" << std::endl;
|
||||
std::cout << "Number of digest elements = " << digests_len << std::endl;
|
||||
|
||||
std::cout << "Total RAM consumption = " << (digests_mem + leaves_mem) / 1024 / 1024 << " MB; "
|
||||
<< (digests_mem + leaves_mem) / 1024 / 1024 / 1024 << " GB" << std::endl;
|
||||
|
||||
merkle_tree::TreeBuilderConfig tree_config = merkle_tree::default_merkle_config();
|
||||
tree_config.arity = tree_arity;
|
||||
tree_config.keep_rows = keep_rows;
|
||||
tree_config.digest_elements = digest_elements;
|
||||
START_TIMER(timer_merkle);
|
||||
babybear_build_merkle_tree(leaves, digests, tree_height, input_block_len, &poseidon, &poseidon, tree_config);
|
||||
END_TIMER(timer_merkle, "Merkle tree built: ")
|
||||
|
||||
for (int i = 0; i < digests_len; i++) {
|
||||
// std::cout << digests[i] << std::endl;
|
||||
}
|
||||
|
||||
// Use this to generate test vectors
|
||||
// for (int i = 0; i < digests_len; i++) {
|
||||
// std::cout << "{";
|
||||
// for (int j = 0; j < 1; j++) {
|
||||
// std::cout << ((uint32_t*)&digests[i].limbs_storage)[j];
|
||||
// }
|
||||
// std::cout << "}," << std::endl;
|
||||
// }
|
||||
|
||||
scalar_t expected[64] = {
|
||||
{1198029810}, {1114813365}, {241588005}, {735332587}, {201392606}, {623383436}, {60086186}, {1225304654},
|
||||
{1501472115}, {891216097}, {184481194}, {855632748}, {1503541944}, {1483537725}, {1023563730}, {698957505},
|
||||
{1322038939}, {1132881200}, {104782797}, {68847168}, {420051722}, {126069919}, {1350263697}, {1711085395},
|
||||
{1322038939}, {1132881200}, {104782797}, {68847168}, {420051722}, {126069919}, {1350263697}, {1711085395},
|
||||
{1019525203}, {127215304}, {1199733491}, {1473997036}, {548538385}, {364347137}, {570748364}, {426431873},
|
||||
{926562920}, {6278762}, {1894248581}, {1304248433}, {1635020421}, {719342960}, {1373719279}, {700539301},
|
||||
{708916911}, {925660920}, {994927540}, {1925434995}, {208534303}, {69614512}, {1701199215}, {1825115630}};
|
||||
|
||||
for (int i = 0; i < digests_len; i++) {
|
||||
scalar_t root = digests[i];
|
||||
assert(root == expected[i]);
|
||||
}
|
||||
free(digests);
|
||||
free(leaves);
|
||||
}
|
||||
|
||||
#endif
|
||||
4
icicle/src/merkle-tree/tests/mmcs/.gitignore
vendored
Normal file
4
icicle/src/merkle-tree/tests/mmcs/.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
mmcs.o
|
||||
poseidon2.o
|
||||
test_mmcs_poseidon2
|
||||
vec_ops.o
|
||||
15
icicle/src/merkle-tree/tests/mmcs/Makefile
Normal file
15
icicle/src/merkle-tree/tests/mmcs/Makefile
Normal file
@@ -0,0 +1,15 @@
|
||||
test_merkle: poseidon2.o mmcs.o vec_ops.o
|
||||
nvcc -o test_mmcs_poseidon2 -lineinfo -I../../../../include -DFIELD=babybear -DFIELD_ID=1001 -DMERKLE_DEBUG poseidon2.o vec_ops.o mmcs.o test_poseidon2.cu
|
||||
./test_mmcs_poseidon2
|
||||
|
||||
mmcs.o: ../../extern_mmcs.cu ../../mmcs.cu
|
||||
nvcc -o mmcs.o -I../../../../include -lineinfo -DFIELD=babybear -DFIELD_ID=1001 -DMERKLE_DEBUG -c ../../extern_mmcs.cu
|
||||
|
||||
poseidon2.o: ../../../poseidon2/extern.cu
|
||||
nvcc -o poseidon2.o -I../../../../include -lineinfo -DFIELD=babybear -DFIELD_ID=1001 -c ../../../poseidon2/extern.cu
|
||||
|
||||
vec_ops.o:
|
||||
nvcc -o vec_ops.o -I../../../../include -lineinfo -DFIELD=babybear -DFIELD_ID=1001 -c ../../../vec_ops/extern.cu
|
||||
|
||||
clear:
|
||||
rm -f poseidon2.o mmcs.o vec_ops.o test_mmcs_poseidon2
|
||||
139
icicle/src/merkle-tree/tests/mmcs/test_poseidon2.cu
Normal file
139
icicle/src/merkle-tree/tests/mmcs/test_poseidon2.cu
Normal file
@@ -0,0 +1,139 @@
|
||||
#ifndef __CUDA_ARCH__
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
|
||||
#include "poseidon2/poseidon2.cuh"
|
||||
|
||||
#include "api/babybear.h"
|
||||
using namespace babybear;
|
||||
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now();
|
||||
#define END_TIMER(timer, msg) \
|
||||
printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count());
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
/// Tree of height N and arity A contains \sum{A^i} for i in 0..N elements
|
||||
uint32_t tree_arity = 2;
|
||||
uint32_t width = 16;
|
||||
uint32_t input_block_len = 600;
|
||||
uint32_t rate = 8;
|
||||
uint32_t digest_elements = 8;
|
||||
uint32_t copied_matrices = 1;
|
||||
uint64_t tree_height = argc > 1 ? atoi(argv[1]) : 3;
|
||||
uint64_t number_of_leaves = pow(tree_arity, tree_height);
|
||||
uint64_t total_number_of_leaves = number_of_leaves * input_block_len;
|
||||
|
||||
bool are_inputs_on_device = true;
|
||||
|
||||
// Load poseidon constants
|
||||
START_TIMER(timer_const);
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
poseidon2::Poseidon2<scalar_t> poseidon(
|
||||
width, rate, poseidon2::MdsType::PLONKY, poseidon2::DiffusionStrategy::MONTGOMERY, ctx);
|
||||
END_TIMER(timer_const, "Load poseidon constants");
|
||||
|
||||
/// Use keep_rows to specify how many rows do you want to store
|
||||
int keep_rows = argc > 2 ? atoi(argv[2]) : 3;
|
||||
size_t digests_len = merkle_tree::get_digests_len(keep_rows - 1, tree_arity, digest_elements);
|
||||
|
||||
/// Fill leaves with scalars [0, 1, ... 2^tree_height - 1]
|
||||
START_TIMER(timer_allocation);
|
||||
scalar_t input = scalar_t::zero();
|
||||
|
||||
// unsigned int number_of_inputs = tree_height * copied_matrices;
|
||||
unsigned int number_of_inputs = 1;
|
||||
Matrix<scalar_t>* leaves = static_cast<Matrix<scalar_t>*>(malloc(number_of_inputs * sizeof(Matrix<scalar_t>)));
|
||||
uint64_t current_matrix_rows = number_of_leaves;
|
||||
for (int i = 0; i < number_of_inputs; i++) {
|
||||
uint64_t current_matrix_size = current_matrix_rows * input_block_len;
|
||||
for (int j = 0; j < copied_matrices; j++) {
|
||||
scalar_t* matrix = static_cast<scalar_t*>(malloc(current_matrix_size * sizeof(scalar_t)));
|
||||
|
||||
for (uint64_t k = 0; k < current_matrix_size; k++) {
|
||||
matrix[k] = input;
|
||||
input = input + scalar_t::one();
|
||||
}
|
||||
|
||||
scalar_t* d_matrix;
|
||||
if (are_inputs_on_device) {
|
||||
cudaMalloc(&d_matrix, current_matrix_size * sizeof(scalar_t));
|
||||
cudaMemcpy(d_matrix, matrix, current_matrix_size * sizeof(scalar_t), cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
leaves[i * copied_matrices + j] = {
|
||||
are_inputs_on_device ? d_matrix : matrix,
|
||||
input_block_len,
|
||||
current_matrix_rows,
|
||||
};
|
||||
}
|
||||
|
||||
current_matrix_rows /= tree_arity;
|
||||
}
|
||||
|
||||
END_TIMER(timer_allocation, "Allocated memory for leaves: ");
|
||||
|
||||
/// Allocate memory for digests of {keep_rows} rows of a tree
|
||||
START_TIMER(timer_digests);
|
||||
size_t digests_mem = digests_len * sizeof(scalar_t);
|
||||
scalar_t* digests = static_cast<scalar_t*>(malloc(digests_mem));
|
||||
END_TIMER(timer_digests, "Allocated memory for digests");
|
||||
|
||||
// std::cout << "Memory for leaves = " << total_number_of_leaves * sizeof(scalar_t) / 1024 / 1024 << " MB; " <<
|
||||
// leaves_mem / 1024 / 1024 / 1024 << " GB"
|
||||
// << std::endl;
|
||||
std::cout << "Number of leaves = " << number_of_leaves << std::endl;
|
||||
std::cout << "Total Number of leaves = " << total_number_of_leaves << std::endl;
|
||||
std::cout << "Memory for digests = " << digests_mem / 1024 / 1024 << " MB; " << digests_mem / 1024 / 1024 / 1024
|
||||
<< " GB" << std::endl;
|
||||
std::cout << "Number of digest elements = " << digests_len << std::endl;
|
||||
std::cout << std::endl;
|
||||
|
||||
// std::cout << "Total RAM consumption = " << (digests_mem + leaves_mem) / 1024 / 1024 << " MB; "
|
||||
// << (digests_mem + leaves_mem) / 1024 / 1024 / 1024 << " GB" << std::endl;
|
||||
|
||||
merkle_tree::TreeBuilderConfig tree_config = merkle_tree::default_merkle_config();
|
||||
tree_config.are_inputs_on_device = are_inputs_on_device;
|
||||
tree_config.arity = tree_arity;
|
||||
tree_config.keep_rows = keep_rows;
|
||||
tree_config.digest_elements = digest_elements;
|
||||
START_TIMER(timer_merkle);
|
||||
babybear_mmcs_commit_cuda(leaves, number_of_inputs, digests, &poseidon, &poseidon, tree_config);
|
||||
END_TIMER(timer_merkle, "Merkle tree built: ")
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
std::cout << digests[digests_len - i - 1] << std::endl;
|
||||
}
|
||||
|
||||
// Use this to generate test vectors
|
||||
// for (int i = 0; i < digests_len; i++) {
|
||||
// std::cout << "{";
|
||||
// for (int j = 0; j < 8; j++) {
|
||||
// std::cout << ((uint64_t*)&digests[i].limbs_storage)[j];
|
||||
// if (j != 7) { std::cout << ", "; }
|
||||
// }
|
||||
// std::cout << "}," << std::endl;
|
||||
// }
|
||||
|
||||
/// These scalars are digests of top-7 rows of a Merkle tree.
|
||||
/// Arity = 2, Tree height = 28, keep_rows = 7
|
||||
/// They are aligned in the following format:
|
||||
/// L-7 L-6 L-5 L-4 L-3 L-2 L-1
|
||||
/// [0..63, 64..95, 96..111, 112..119, 120..123, 124..125, 126]
|
||||
scalar_t expected[0] = {};
|
||||
|
||||
for (int i = 0; i < digests_len; i++) {
|
||||
scalar_t root = digests[i];
|
||||
// assert(root == expected[i]);
|
||||
}
|
||||
free(digests);
|
||||
free(leaves);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,2 +1,5 @@
|
||||
test_poseidon : test.cu poseidon.cu kernels.cu constants.cu nvcc - o test_poseidon - I../../ include - DFIELD_ID =
|
||||
2 - DCURVE_ID = 2 test.cu./ test_poseidon
|
||||
test_poseidon: test.cu
|
||||
nvcc -o test_poseidon -I../../include -DFIELD=bls12_381 -DFIELD_ID=2 -DCURVE_ID=2 -DDEVMODE -DDEBUG extern.cu test.cu
|
||||
|
||||
test_poseidon_m31: test_m31.cu
|
||||
nvcc -o test_poseidon -I../../include -DFIELD=m31 -DFIELD_ID=1003 -DDEVMODE -DDEBUG extern.cu test_m31.cu
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/constants.cuh"
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
|
||||
/// These are pre-calculated constants for different curves
|
||||
#include "fields/id.h"
|
||||
@@ -17,17 +18,25 @@ using namespace poseidon_constants_bw6_761;
|
||||
#elif FIELD_ID == GRUMPKIN
|
||||
#include "poseidon/constants/grumpkin_poseidon.h"
|
||||
using namespace poseidon_constants_grumpkin;
|
||||
#elif FIELD_ID == M31
|
||||
#include "poseidon/constants/m31_poseidon.h"
|
||||
using namespace poseidon_constants_m31;
|
||||
#endif
|
||||
|
||||
namespace poseidon {
|
||||
template <typename S>
|
||||
cudaError_t create_optimized_poseidon_constants(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const S* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
PoseidonConstants<S>* poseidon_constants)
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const S* round_constants,
|
||||
const S* mds_matrix,
|
||||
const S* non_sparse_matrix,
|
||||
const S* sparse_matrices,
|
||||
const S domain_tag,
|
||||
PoseidonConstants<S>* poseidon_constants,
|
||||
device_context::DeviceContext& ctx)
|
||||
{
|
||||
CHK_INIT_IF_RETURN();
|
||||
cudaStream_t& stream = ctx.stream;
|
||||
@@ -41,24 +50,33 @@ namespace poseidon {
|
||||
S* d_constants;
|
||||
CHK_IF_RETURN(cudaMallocAsync(&d_constants, sizeof(S) * constants_len, stream));
|
||||
|
||||
S* d_round_constants = d_constants;
|
||||
S* d_mds_matrix = d_round_constants + round_constants_len;
|
||||
S* d_non_sparse_matrix = d_mds_matrix + mds_matrix_len;
|
||||
S* d_sparse_matrices = d_non_sparse_matrix + mds_matrix_len;
|
||||
|
||||
// Copy constants
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(d_constants, constants, sizeof(S) * constants_len, cudaMemcpyHostToDevice, stream));
|
||||
|
||||
S* round_constants = d_constants;
|
||||
S* mds_matrix = round_constants + round_constants_len;
|
||||
S* non_sparse_matrix = mds_matrix + mds_matrix_len;
|
||||
S* sparse_matrices = non_sparse_matrix + mds_matrix_len;
|
||||
|
||||
// Pick the domain_tag accordinaly
|
||||
// For now, we only support Merkle tree mode
|
||||
uint32_t tree_domain_tag_value = 1;
|
||||
tree_domain_tag_value = (tree_domain_tag_value << (width - 1)) - tree_domain_tag_value;
|
||||
S domain_tag = S::from(tree_domain_tag_value);
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
d_round_constants, round_constants, sizeof(S) * round_constants_len, cudaMemcpyHostToDevice, stream));
|
||||
CHK_IF_RETURN(
|
||||
cudaMemcpyAsync(d_mds_matrix, mds_matrix, sizeof(S) * mds_matrix_len, cudaMemcpyHostToDevice, stream));
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
d_non_sparse_matrix, non_sparse_matrix, sizeof(S) * mds_matrix_len, cudaMemcpyHostToDevice, stream));
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
d_sparse_matrices, sparse_matrices, sizeof(S) * sparse_matrices_len, cudaMemcpyHostToDevice, stream));
|
||||
|
||||
// Make sure all the constants have been copied
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(stream));
|
||||
*poseidon_constants = {arity, partial_rounds, full_rounds_half, round_constants,
|
||||
mds_matrix, non_sparse_matrix, sparse_matrices, domain_tag};
|
||||
*poseidon_constants = {
|
||||
arity,
|
||||
alpha,
|
||||
partial_rounds,
|
||||
full_rounds_half,
|
||||
d_round_constants,
|
||||
d_mds_matrix,
|
||||
d_non_sparse_matrix,
|
||||
d_sparse_matrices,
|
||||
domain_tag};
|
||||
|
||||
return CHK_LAST();
|
||||
}
|
||||
@@ -68,8 +86,8 @@ namespace poseidon {
|
||||
int arity, device_context::DeviceContext& ctx, PoseidonConstants<S>* poseidon_constants)
|
||||
{
|
||||
CHK_INIT_IF_RETURN();
|
||||
int full_rounds_half = FULL_ROUNDS_DEFAULT;
|
||||
int partial_rounds;
|
||||
unsigned int full_rounds_half = FULL_ROUNDS_DEFAULT;
|
||||
unsigned int partial_rounds;
|
||||
unsigned char* constants;
|
||||
switch (arity) {
|
||||
case 2:
|
||||
@@ -94,8 +112,41 @@ namespace poseidon {
|
||||
}
|
||||
S* h_constants = reinterpret_cast<S*>(constants);
|
||||
|
||||
create_optimized_poseidon_constants(arity, full_rounds_half, partial_rounds, h_constants, ctx, poseidon_constants);
|
||||
unsigned int width = arity + 1;
|
||||
unsigned int round_constants_len = width * full_rounds_half * 2 + partial_rounds;
|
||||
unsigned int mds_matrix_len = width * width;
|
||||
|
||||
S* round_constants = h_constants;
|
||||
S* mds_matrix = round_constants + round_constants_len;
|
||||
S* non_sparse_matrix = mds_matrix + mds_matrix_len;
|
||||
S* sparse_matrices = non_sparse_matrix + mds_matrix_len;
|
||||
|
||||
// Pick the domain_tag accordinaly
|
||||
// For now, we only support Merkle tree mode
|
||||
uint32_t tree_domain_tag_value = 1;
|
||||
tree_domain_tag_value = (tree_domain_tag_value << (width - 1)) - tree_domain_tag_value;
|
||||
S domain_tag = S::from(tree_domain_tag_value);
|
||||
|
||||
create_optimized_poseidon_constants<S>(
|
||||
arity, 5, partial_rounds, full_rounds_half, round_constants, mds_matrix, non_sparse_matrix, sparse_matrices,
|
||||
domain_tag, poseidon_constants, ctx);
|
||||
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
cudaError_t release_optimized_poseidon_constants(PoseidonConstants<S>* constants, device_context::DeviceContext& ctx)
|
||||
{
|
||||
CHK_INIT_IF_RETURN();
|
||||
CHK_IF_RETURN(cudaFreeAsync(constants->round_constants, ctx.stream));
|
||||
|
||||
constants->arity = 0;
|
||||
constants->partial_rounds = 0;
|
||||
constants->full_rounds_half = 0;
|
||||
constants->round_constants = nullptr;
|
||||
constants->mds_matrix = nullptr;
|
||||
constants->non_sparse_matrix = nullptr;
|
||||
constants->sparse_matrices = nullptr;
|
||||
return CHK_LAST();
|
||||
}
|
||||
} // namespace poseidon
|
||||
@@ -2,58 +2,68 @@
|
||||
|
||||
using namespace field_config;
|
||||
|
||||
#include "poseidon.cu"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "constants.cu"
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "utils/utils.h"
|
||||
|
||||
namespace poseidon {
|
||||
/**
|
||||
* Extern "C" version of [poseidon_hash_cuda] function with the following
|
||||
* value of template parameter (where the field is given by `-DFIELD` env variable during build):
|
||||
* - `S` is the [field](@ref scalar_t) - either a scalar field of the elliptic curve or a
|
||||
* stand-alone "STARK field";
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, poseidon_hash_cuda)(
|
||||
scalar_t* input,
|
||||
scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const PoseidonConstants<scalar_t>& constants,
|
||||
PoseidonConfig& config)
|
||||
typedef class Poseidon<scalar_t> PoseidonInst;
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, poseidon_create_cuda)(
|
||||
PoseidonInst** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const scalar_t* round_constants,
|
||||
const scalar_t* mds_matrix,
|
||||
const scalar_t* non_sparse_matrix,
|
||||
const scalar_t* sparse_matrices,
|
||||
const scalar_t& domain_tag,
|
||||
device_context::DeviceContext& ctx)
|
||||
{
|
||||
switch (arity) {
|
||||
case 2:
|
||||
return poseidon_hash<scalar_t, 3>(input, output, number_of_states, constants, config);
|
||||
case 4:
|
||||
return poseidon_hash<scalar_t, 5>(input, output, number_of_states, constants, config);
|
||||
case 8:
|
||||
return poseidon_hash<scalar_t, 9>(input, output, number_of_states, constants, config);
|
||||
case 11:
|
||||
return poseidon_hash<scalar_t, 12>(input, output, number_of_states, constants, config);
|
||||
default:
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "PoseidonHash: #arity must be one of [2, 4, 8, 11]");
|
||||
try {
|
||||
*poseidon = new PoseidonInst(
|
||||
arity, alpha, partial_rounds, full_rounds_half, round_constants, mds_matrix, non_sparse_matrix, sparse_matrices,
|
||||
domain_tag, ctx);
|
||||
return cudaError_t::cudaSuccess;
|
||||
} catch (const IcicleError& _error) {
|
||||
return cudaError_t::cudaErrorUnknown;
|
||||
}
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, create_optimized_poseidon_constants_cuda)(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
PoseidonConstants<scalar_t>* poseidon_constants)
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, poseidon_load_cuda)(
|
||||
PoseidonInst** poseidon, unsigned int arity, device_context::DeviceContext& ctx)
|
||||
{
|
||||
return create_optimized_poseidon_constants<scalar_t>(
|
||||
arity, full_rounds_half, partial_rounds, constants, ctx, poseidon_constants);
|
||||
try {
|
||||
*poseidon = new PoseidonInst(arity, ctx);
|
||||
return cudaError_t::cudaSuccess;
|
||||
} catch (const IcicleError& _error) {
|
||||
return cudaError_t::cudaErrorUnknown;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, init_optimized_poseidon_constants_cuda)(
|
||||
int arity, device_context::DeviceContext& ctx, PoseidonConstants<scalar_t>* constants)
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, poseidon_hash_many_cuda)(
|
||||
const PoseidonInst* poseidon,
|
||||
const scalar_t* inputs,
|
||||
scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
const SpongeConfig& cfg)
|
||||
{
|
||||
return init_optimized_poseidon_constants<scalar_t>(arity, ctx, constants);
|
||||
return poseidon->hash_many(inputs, output, number_of_states, input_block_len, output_len, cfg);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, poseidon_delete_cuda)(PoseidonInst* poseidon)
|
||||
{
|
||||
try {
|
||||
poseidon->~Poseidon();
|
||||
return cudaError_t::cudaSuccess;
|
||||
} catch (const IcicleError& _error) {
|
||||
return cudaError_t::cudaErrorUnknown;
|
||||
}
|
||||
}
|
||||
} // namespace poseidon
|
||||
@@ -1,90 +0,0 @@
|
||||
#include "fields/field_config.cuh"
|
||||
|
||||
using namespace field_config;
|
||||
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "kernels.cu"
|
||||
|
||||
namespace poseidon {
|
||||
template <typename S, int T>
|
||||
cudaError_t
|
||||
permute_many(S* states, size_t number_of_states, const PoseidonConstants<S>& constants, cudaStream_t& stream)
|
||||
{
|
||||
size_t rc_offset = 0;
|
||||
|
||||
full_rounds<S, T><<<
|
||||
PKC<T>::number_of_full_blocks(number_of_states), PKC<T>::number_of_threads,
|
||||
sizeof(S) * PKC<T>::hashes_per_block * T, stream>>>(
|
||||
states, number_of_states, rc_offset, FIRST_FULL_ROUNDS, constants);
|
||||
rc_offset += T * (constants.full_rounds_half + 1);
|
||||
|
||||
partial_rounds<S, T>
|
||||
<<<PKC<T>::number_of_singlehash_blocks(number_of_states), PKC<T>::singlehash_block_size, 0, stream>>>(
|
||||
states, number_of_states, rc_offset, constants);
|
||||
rc_offset += constants.partial_rounds;
|
||||
|
||||
full_rounds<S, T><<<
|
||||
PKC<T>::number_of_full_blocks(number_of_states), PKC<T>::number_of_threads,
|
||||
sizeof(S) * PKC<T>::hashes_per_block * T, stream>>>(
|
||||
states, number_of_states, rc_offset, SECOND_FULL_ROUNDS, constants);
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
cudaError_t poseidon_hash(
|
||||
S* input, S* output, size_t number_of_states, const PoseidonConstants<S>& constants, const PoseidonConfig& config)
|
||||
{
|
||||
CHK_INIT_IF_RETURN();
|
||||
cudaStream_t& stream = config.ctx.stream;
|
||||
S* states;
|
||||
if (config.input_is_a_state) {
|
||||
states = input;
|
||||
} else {
|
||||
// allocate memory for {number_of_states} states of {t} scalars each
|
||||
CHK_IF_RETURN(cudaMallocAsync(&states, number_of_states * T * sizeof(S), stream))
|
||||
|
||||
// This is where the input matrix of size Arity x NumberOfBlocks is
|
||||
// padded and copied to device in a T x NumberOfBlocks matrix
|
||||
CHK_IF_RETURN(cudaMemcpy2DAsync(
|
||||
states, T * sizeof(S), // Device pointer and device pitch
|
||||
input, (T - 1) * sizeof(S), // Host pointer and pitch
|
||||
(T - 1) * sizeof(S), number_of_states, // Size of the source matrix (Arity x NumberOfBlocks)
|
||||
cudaMemcpyHostToDevice, stream));
|
||||
}
|
||||
|
||||
S* output_device;
|
||||
if (config.are_outputs_on_device) {
|
||||
output_device = output;
|
||||
} else {
|
||||
CHK_IF_RETURN(cudaMallocAsync(&output_device, number_of_states * sizeof(S), stream))
|
||||
}
|
||||
|
||||
prepare_poseidon_states<S, T>
|
||||
<<<PKC<T>::number_of_full_blocks(number_of_states), PKC<T>::number_of_threads, 0, stream>>>(
|
||||
states, number_of_states, constants.domain_tag, config.aligned);
|
||||
|
||||
cudaError_t hash_error = permute_many<S, T>(states, number_of_states, constants, stream);
|
||||
CHK_IF_RETURN(hash_error);
|
||||
|
||||
get_hash_results<S, T>
|
||||
<<<PKC<T>::number_of_singlehash_blocks(number_of_states), PKC<T>::singlehash_block_size, 0, stream>>>(
|
||||
states, number_of_states, output_device);
|
||||
|
||||
if (config.loop_state) {
|
||||
copy_recursive<S, T>
|
||||
<<<PKC<T>::number_of_singlehash_blocks(number_of_states), PKC<T>::singlehash_block_size, 0, stream>>>(
|
||||
states, number_of_states, output_device);
|
||||
}
|
||||
|
||||
if (!config.input_is_a_state) CHK_IF_RETURN(cudaFreeAsync(states, stream));
|
||||
|
||||
if (!config.are_outputs_on_device) {
|
||||
CHK_IF_RETURN(
|
||||
cudaMemcpyAsync(output, output_device, number_of_states * sizeof(S), cudaMemcpyDeviceToHost, stream));
|
||||
CHK_IF_RETURN(cudaFreeAsync(output_device, stream));
|
||||
}
|
||||
|
||||
if (!config.is_async) return CHK_STICKY(cudaStreamSynchronize(stream));
|
||||
return CHK_LAST();
|
||||
}
|
||||
} // namespace poseidon
|
||||
@@ -4,7 +4,6 @@
|
||||
using namespace curve_config;
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "poseidon.cu"
|
||||
|
||||
#ifndef __CUDA_ARCH__
|
||||
#include <cassert>
|
||||
@@ -12,6 +11,10 @@ using namespace curve_config;
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
#include "api/bls12_381.h"
|
||||
using namespace bls12_381;
|
||||
|
||||
#include "poseidon/poseidon.cuh"
|
||||
using namespace poseidon;
|
||||
|
||||
#define A 2
|
||||
@@ -29,8 +32,7 @@ int main(int argc, char* argv[])
|
||||
// Load poseidon constants
|
||||
START_TIMER(timer_const);
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
PoseidonConstants<scalar_t> constants;
|
||||
init_optimized_poseidon_constants<scalar_t>(A, ctx, &constants);
|
||||
Poseidon<scalar_t> poseidon(A, ctx);
|
||||
END_TIMER(timer_const, "Load poseidon constants");
|
||||
|
||||
START_TIMER(allocation_timer);
|
||||
@@ -46,9 +48,10 @@ int main(int argc, char* argv[])
|
||||
|
||||
scalar_t* out_ptr = static_cast<scalar_t*>(malloc(number_of_blocks * sizeof(scalar_t)));
|
||||
|
||||
SpongeConfig cfg = default_sponge_config();
|
||||
|
||||
START_TIMER(poseidon_timer);
|
||||
PoseidonConfig config = default_poseidon_config(T);
|
||||
poseidon_hash<curve_config::scalar_t, T>(in_ptr, out_ptr, number_of_blocks, constants, config);
|
||||
poseidon.hash_many(in_ptr, out_ptr, number_of_blocks, A, 1, cfg);
|
||||
END_TIMER(poseidon_timer, "Poseidon")
|
||||
|
||||
scalar_t expected[1024] = {
|
||||
@@ -1080,7 +1083,7 @@ int main(int argc, char* argv[])
|
||||
if (number_of_blocks == 1024) {
|
||||
for (int i = 0; i < number_of_blocks; i++) {
|
||||
#ifdef DEBUG
|
||||
std::cout << out_ptr[i] << std::endl;
|
||||
// std::cout << out_ptr[i] << std::endl;
|
||||
#endif
|
||||
assert((out_ptr[i] == expected[i]));
|
||||
}
|
||||
|
||||
70
icicle/src/poseidon/test_m31.cu
Normal file
70
icicle/src/poseidon/test_m31.cu
Normal file
@@ -0,0 +1,70 @@
|
||||
// #define DEBUG
|
||||
|
||||
#include "fields/field_config.cuh"
|
||||
using namespace field_config;
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
|
||||
#ifndef __CUDA_ARCH__
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
using namespace poseidon;
|
||||
|
||||
#define A 11
|
||||
#define T (A + 1)
|
||||
|
||||
#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now();
|
||||
#define END_TIMER(timer, msg) \
|
||||
printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count());
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
using FpMicroseconds = std::chrono::duration<float, std::chrono::microseconds::period>;
|
||||
|
||||
// Load poseidon constants
|
||||
START_TIMER(timer_const);
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
PoseidonConstants<scalar_t> constants;
|
||||
init_optimized_poseidon_constants<scalar_t>(A, ctx, &constants);
|
||||
END_TIMER(timer_const, "Load poseidon constants");
|
||||
|
||||
START_TIMER(allocation_timer);
|
||||
// Prepare input data of [0, 1, 2 ... (number_of_blocks * arity) - 1]
|
||||
int number_of_blocks = argc > 1 ? 1 << atoi(argv[1]) : 1024;
|
||||
scalar_t input = scalar_t::zero();
|
||||
scalar_t* in_ptr = static_cast<scalar_t*>(malloc(number_of_blocks * A * sizeof(scalar_t)));
|
||||
for (uint32_t i = 0; i < number_of_blocks * A; i++) {
|
||||
in_ptr[i] = input;
|
||||
input = input + scalar_t::one();
|
||||
}
|
||||
END_TIMER(allocation_timer, "Allocate mem and fill input");
|
||||
|
||||
scalar_t* out_ptr = static_cast<scalar_t*>(malloc(number_of_blocks * sizeof(scalar_t)));
|
||||
|
||||
START_TIMER(poseidon_timer);
|
||||
PoseidonConfig config = default_poseidon_config(T);
|
||||
poseidon_hash<field_config::scalar_t, T>(in_ptr, out_ptr, number_of_blocks, constants, config);
|
||||
END_TIMER(poseidon_timer, "Poseidon")
|
||||
|
||||
// scalar_t expected[0] = {}
|
||||
|
||||
if (number_of_blocks == 1024) {
|
||||
for (int i = 0; i < number_of_blocks; i++) {
|
||||
#ifdef DEBUG
|
||||
// std::cout << out_ptr[i] << std::endl;
|
||||
#endif
|
||||
// assert((out_ptr[i] == expected[i]));
|
||||
}
|
||||
printf("Expected output matches\n");
|
||||
}
|
||||
|
||||
free(in_ptr);
|
||||
free(out_ptr);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,3 +0,0 @@
|
||||
test_merkle:
|
||||
nvcc -o test_merkle -I../../../include -DFIELD_ID=2 -DCURVE_ID=2 test.cu
|
||||
./test_merkle
|
||||
@@ -1,284 +0,0 @@
|
||||
#include "fields/field_config.cuh"
|
||||
|
||||
using namespace field_config;
|
||||
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
namespace merkle {
|
||||
/// Flattens the tree digests and sum them up to get
|
||||
/// the memory needed to contain all the digests
|
||||
template <typename S>
|
||||
size_t get_digests_len(uint32_t height, uint32_t arity)
|
||||
{
|
||||
size_t digests_len = 0;
|
||||
size_t row_length = 1;
|
||||
for (int i = 1; i < height; i++) {
|
||||
digests_len += row_length;
|
||||
row_length *= arity;
|
||||
}
|
||||
|
||||
return digests_len;
|
||||
}
|
||||
|
||||
/// Constructs merkle subtree without parallelization
|
||||
/// The digests are aligned sequentially per row
|
||||
/// Example:
|
||||
///
|
||||
/// Big tree:
|
||||
///
|
||||
/// 1
|
||||
/// / \
|
||||
/// 2 3
|
||||
/// / \ / \
|
||||
/// 4 5 6 7
|
||||
///
|
||||
/// Subtree 1 Subtree 2
|
||||
/// 2 3
|
||||
/// / \ / \
|
||||
/// 4 5 6 7
|
||||
///
|
||||
/// Digests array for subtree 1:
|
||||
/// [4 5 . . 2 . .]
|
||||
/// | | |
|
||||
/// ----- V
|
||||
/// | Segment (offset = 4, subtree_idx = 0)
|
||||
/// v
|
||||
/// Segment (offset = 0, subtree_idx = 0)
|
||||
///
|
||||
/// Digests array for subtree 2:
|
||||
/// [. . 6 7 . 3 .]
|
||||
/// | |
|
||||
/// -----
|
||||
/// |
|
||||
/// v
|
||||
/// Segment (offset = 0, subtree_idx = 1)
|
||||
///
|
||||
/// Total digests array:
|
||||
/// [4 5 6 7 2 3 .]
|
||||
template <typename S, int T>
|
||||
cudaError_t build_merkle_subtree(
|
||||
S* state,
|
||||
S* digests,
|
||||
size_t subtree_idx,
|
||||
size_t subtree_height,
|
||||
S* big_tree_digests,
|
||||
size_t start_segment_size,
|
||||
size_t start_segment_offset,
|
||||
int keep_rows,
|
||||
const PoseidonConstants<S>& poseidon,
|
||||
cudaStream_t& stream)
|
||||
{
|
||||
int arity = T - 1;
|
||||
|
||||
PoseidonConfig config = default_poseidon_config(T);
|
||||
config.are_inputs_on_device = true;
|
||||
config.are_outputs_on_device = true;
|
||||
config.input_is_a_state = true;
|
||||
config.loop_state = true;
|
||||
config.ctx.stream = stream;
|
||||
|
||||
size_t leaves_size = pow(arity, subtree_height - 1);
|
||||
uint32_t number_of_blocks = leaves_size / arity;
|
||||
size_t segment_size = start_segment_size;
|
||||
size_t segment_offset = start_segment_offset;
|
||||
|
||||
while (number_of_blocks > 0) {
|
||||
cudaError_t poseidon_res = poseidon_hash<S, T>(state, digests, number_of_blocks, poseidon, config);
|
||||
CHK_IF_RETURN(poseidon_res);
|
||||
|
||||
if (!keep_rows || subtree_height <= keep_rows + 1) {
|
||||
S* digests_with_offset = big_tree_digests + segment_offset + subtree_idx * number_of_blocks;
|
||||
CHK_IF_RETURN(
|
||||
cudaMemcpyAsync(digests_with_offset, digests, number_of_blocks * sizeof(S), cudaMemcpyDeviceToHost, stream));
|
||||
segment_offset += segment_size;
|
||||
}
|
||||
|
||||
segment_size /= arity;
|
||||
subtree_height--;
|
||||
number_of_blocks /= arity;
|
||||
config.aligned = true;
|
||||
}
|
||||
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
cudaError_t build_merkle_tree(
|
||||
const S* leaves,
|
||||
S* digests,
|
||||
uint32_t height,
|
||||
const poseidon::PoseidonConstants<S>& poseidon,
|
||||
const TreeBuilderConfig& config)
|
||||
{
|
||||
CHK_INIT_IF_RETURN();
|
||||
cudaStream_t& stream = config.ctx.stream;
|
||||
|
||||
int arity = T - 1;
|
||||
uint32_t number_of_leaves = pow(arity, (height - 1));
|
||||
|
||||
// This will determine how much splitting do we need to do
|
||||
// `number_of_streams` subtrees should fit in the device
|
||||
// This means each subtree should fit in `STREAM_CHUNK_SIZE` memory
|
||||
uint32_t number_of_subtrees = 1;
|
||||
uint32_t subtree_height = height;
|
||||
uint32_t subtree_leaves_size = pow(arity, height - 1);
|
||||
uint32_t subtree_state_size = subtree_leaves_size / arity * T;
|
||||
uint32_t subtree_digests_size = get_digests_len<S>(subtree_height, arity);
|
||||
size_t subtree_memory_required = sizeof(S) * (subtree_state_size + subtree_digests_size);
|
||||
while (subtree_memory_required > STREAM_CHUNK_SIZE) {
|
||||
number_of_subtrees *= arity;
|
||||
subtree_height--;
|
||||
subtree_leaves_size /= arity;
|
||||
subtree_state_size = subtree_leaves_size / arity * T;
|
||||
subtree_digests_size = subtree_state_size / arity;
|
||||
subtree_memory_required = sizeof(S) * (subtree_state_size + subtree_digests_size);
|
||||
}
|
||||
int cap_height = height - subtree_height + 1;
|
||||
size_t caps_len = pow(arity, cap_height - 1);
|
||||
|
||||
size_t available_memory, _total_memory;
|
||||
CHK_IF_RETURN(cudaMemGetInfo(&available_memory, &_total_memory));
|
||||
available_memory -= GIGA / 8; // Leave 128 MB
|
||||
|
||||
// We can effectively parallelize memory copy with streams
|
||||
// as long as they don't operate on more than `STREAM_CHUNK_SIZE` bytes
|
||||
const size_t number_of_streams = std::min((uint32_t)(available_memory / STREAM_CHUNK_SIZE), number_of_subtrees);
|
||||
cudaStream_t* streams = static_cast<cudaStream_t*>(malloc(sizeof(cudaStream_t) * number_of_streams));
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamCreate(&streams[i]));
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(MERKLE_DEBUG)
|
||||
std::cout << "Available memory = " << available_memory / 1024 / 1024 << " MB" << std::endl;
|
||||
std::cout << "Number of streams = " << number_of_streams << std::endl;
|
||||
std::cout << "Number of subtrees = " << number_of_subtrees << std::endl;
|
||||
std::cout << "Height of a subtree = " << subtree_height << std::endl;
|
||||
std::cout << "Cutoff height = " << height - subtree_height + 1 << std::endl;
|
||||
std::cout << "Number of leaves in a subtree = " << subtree_leaves_size << std::endl;
|
||||
std::cout << "State of a subtree = " << subtree_state_size << std::endl;
|
||||
std::cout << "Digest elements for a subtree = " << get_digests_len<S>(subtree_height, arity) << std::endl;
|
||||
std::cout << "Size of 1 subtree states = " << subtree_state_size * sizeof(S) / 1024 / 1024 << " MB" << std::endl;
|
||||
std::cout << "Size of 1 subtree digests = " << subtree_digests_size * sizeof(S) / 1024 / 1024 << " MB" << std::endl;
|
||||
#endif
|
||||
|
||||
// Allocate memory for the leaves and digests
|
||||
// These are shared by streams in a pool
|
||||
S *states_ptr, *digests_ptr;
|
||||
CHK_IF_RETURN(cudaMallocAsync(&states_ptr, subtree_state_size * number_of_streams * sizeof(S), stream))
|
||||
CHK_IF_RETURN(cudaMallocAsync(&digests_ptr, subtree_digests_size * number_of_streams * sizeof(S), stream))
|
||||
// Wait for these allocations to finish
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(stream));
|
||||
|
||||
bool caps_mode = config.keep_rows && config.keep_rows < cap_height;
|
||||
S* caps;
|
||||
if (caps_mode) { caps = static_cast<S*>(malloc(caps_len * sizeof(S))); }
|
||||
|
||||
for (size_t subtree_idx = 0; subtree_idx < number_of_subtrees; subtree_idx++) {
|
||||
size_t stream_idx = subtree_idx % number_of_streams;
|
||||
cudaStream_t subtree_stream = streams[stream_idx];
|
||||
|
||||
const S* subtree_leaves = leaves + subtree_idx * subtree_leaves_size;
|
||||
S* subtree_state = states_ptr + stream_idx * subtree_state_size;
|
||||
S* subtree_digests = digests_ptr + stream_idx * subtree_digests_size;
|
||||
|
||||
// Copy the first level from RAM / device to device
|
||||
// The pitch property of cudaMemcpy2D resolves shape differences
|
||||
CHK_IF_RETURN(cudaMemcpy2DAsync(
|
||||
subtree_state, T * sizeof(S), // Device pointer and device pitch
|
||||
subtree_leaves, arity * sizeof(S), // Host pointer and pitch
|
||||
arity * sizeof(S), // Size of the source matrix (Arity)
|
||||
subtree_leaves_size / arity, // Size of the source matrix (Number of blocks)
|
||||
config.are_inputs_on_device ? cudaMemcpyDeviceToDevice : cudaMemcpyHostToDevice, subtree_stream));
|
||||
|
||||
int subtree_keep_rows = 0;
|
||||
if (config.keep_rows) {
|
||||
int diff = config.keep_rows - cap_height + 1;
|
||||
subtree_keep_rows = diff <= 0 ? 1 : diff;
|
||||
}
|
||||
size_t start_segment_size = number_of_leaves / arity;
|
||||
cudaError_t subtree_result = build_merkle_subtree<S, T>(
|
||||
subtree_state, // state
|
||||
subtree_digests, // digests
|
||||
subtree_idx, // subtree_idx
|
||||
subtree_height, // subtree_height
|
||||
caps_mode ? caps : digests, // big_tree_digests
|
||||
start_segment_size, // start_segment_size
|
||||
0, // start_segment_offset
|
||||
subtree_keep_rows, // keep_rows
|
||||
poseidon, // hash
|
||||
subtree_stream // stream
|
||||
);
|
||||
CHK_IF_RETURN(subtree_result);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(streams[i]));
|
||||
}
|
||||
|
||||
// Finish the top-level tree if any
|
||||
if (cap_height > 1) {
|
||||
size_t start_segment_size = caps_len / arity;
|
||||
size_t start_segment_offset = 0;
|
||||
if (!caps_mode) {
|
||||
size_t layer_size = pow(arity, config.keep_rows - 1);
|
||||
for (int i = 0; i < config.keep_rows - cap_height + 1; i++) {
|
||||
start_segment_offset += layer_size;
|
||||
layer_size /= arity;
|
||||
}
|
||||
}
|
||||
CHK_IF_RETURN(cudaMemcpy2DAsync(
|
||||
states_ptr, T * sizeof(S), caps_mode ? caps : (digests + start_segment_offset - caps_len), arity * sizeof(S),
|
||||
arity * sizeof(S),
|
||||
caps_len / arity, // Size of the source
|
||||
cudaMemcpyHostToDevice, stream)); // Direction and stream
|
||||
|
||||
cudaError_t top_tree_result = build_merkle_subtree<S, T>(
|
||||
states_ptr, // state
|
||||
digests_ptr, // digests
|
||||
0, // subtree_idx
|
||||
cap_height, // subtree_height
|
||||
digests, // big_tree_digests
|
||||
start_segment_size, // start_segment_size
|
||||
start_segment_offset, // start_segment_offset
|
||||
config.keep_rows, // keep_rows
|
||||
poseidon, // hash
|
||||
stream // stream
|
||||
);
|
||||
CHK_IF_RETURN(top_tree_result);
|
||||
if (caps_mode) { free(caps); }
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(cudaFreeAsync(states_ptr, stream));
|
||||
CHK_IF_RETURN(cudaFreeAsync(digests_ptr, stream));
|
||||
if (!config.is_async) return CHK_STICKY(cudaStreamSynchronize(stream));
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(streams[i]));
|
||||
CHK_IF_RETURN(cudaStreamDestroy(streams[i]));
|
||||
}
|
||||
free(streams);
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, build_poseidon_merkle_tree)(
|
||||
const scalar_t* leaves,
|
||||
scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
PoseidonConstants<scalar_t>& constants,
|
||||
TreeBuilderConfig& config)
|
||||
{
|
||||
switch (arity) {
|
||||
case 2:
|
||||
return build_merkle_tree<scalar_t, 3>(leaves, digests, height, constants, config);
|
||||
case 4:
|
||||
return build_merkle_tree<scalar_t, 5>(leaves, digests, height, constants, config);
|
||||
case 8:
|
||||
return build_merkle_tree<scalar_t, 9>(leaves, digests, height, constants, config);
|
||||
case 11:
|
||||
return build_merkle_tree<scalar_t, 12>(leaves, digests, height, constants, config);
|
||||
default:
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "BuildPoseidonMerkleTree: #arity must be one of [2, 4, 8, 11]");
|
||||
}
|
||||
return CHK_LAST();
|
||||
}
|
||||
} // namespace merkle
|
||||
@@ -1,7 +1,5 @@
|
||||
test_poseidon: test.cu poseidon.cu kernels.cu constants.cu
|
||||
nvcc -o test_poseidon -I../../include -DFIELD=bn254 -DFIELD_ID=1 -DCURVE_ID=1 -DDEVMODE -DDEBUG extern.cu test.cu
|
||||
./test_poseidon
|
||||
test_poseidon: test.cu
|
||||
nvcc -o test_poseidon -I../../include -DFIELD=bn254 -DFIELD_ID=1 -DCURVE_ID=1 extern.cu test.cu
|
||||
|
||||
test_poseidon_release: test.cu poseidon.cu kernels.cu constants.cu
|
||||
nvcc -o test_poseidon_release -I../../include -DFIELD=bn254 -DFIELD_ID=1 -DCURVE_ID=1 extern.cu test.cu
|
||||
./test_poseidon_release
|
||||
test_poseidon_m31: test_m31.cu
|
||||
nvcc -o test_poseidon_m31 -I../../include -DFIELD=m31 -DFIELD_ID=1003 extern.cu test_m31.cu
|
||||
@@ -1,4 +1,5 @@
|
||||
#include "poseidon2/poseidon2.cuh"
|
||||
#include "poseidon2/constants.cuh"
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
|
||||
/// These are pre-calculated constants for different curves
|
||||
#include "fields/id.h"
|
||||
@@ -20,6 +21,9 @@ using namespace poseidon2_constants_grumpkin;
|
||||
#elif FIELD_ID == BABY_BEAR
|
||||
#include "poseidon2/constants/babybear_poseidon2.h"
|
||||
using namespace poseidon2_constants_babybear;
|
||||
#elif FIELD_ID == M31
|
||||
#include "poseidon2/constants/m31_poseidon2.h"
|
||||
using namespace poseidon2_constants_m31;
|
||||
#endif
|
||||
|
||||
namespace poseidon2 {
|
||||
@@ -36,7 +40,6 @@ namespace poseidon2 {
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<S>* poseidon_constants)
|
||||
{
|
||||
cudaFree(nullptr); // Temporary solution
|
||||
if (!(alpha == 3 || alpha == 5 || alpha == 7 || alpha == 11)) {
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Invalid alpha value");
|
||||
}
|
||||
@@ -78,7 +81,6 @@ namespace poseidon2 {
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<S>* poseidon2_constants)
|
||||
{
|
||||
cudaFree(nullptr); // Temporary solution
|
||||
CHK_INIT_IF_RETURN();
|
||||
|
||||
#define P2_CONSTANTS_DEF(width) \
|
||||
@@ -121,7 +123,6 @@ namespace poseidon2 {
|
||||
cudaError_t release_poseidon2_constants(Poseidon2Constants<S>* constants, device_context::DeviceContext& ctx)
|
||||
{
|
||||
CHK_INIT_IF_RETURN();
|
||||
CHK_IF_RETURN(cudaFreeAsync(constants->round_constants, ctx.stream));
|
||||
CHK_IF_RETURN(cudaFreeAsync(constants->internal_matrix_diag, ctx.stream));
|
||||
|
||||
constants->alpha = 0;
|
||||
|
||||
@@ -3,67 +3,71 @@
|
||||
#include "fields/field_config.cuh"
|
||||
using namespace field_config;
|
||||
|
||||
#include "poseidon.cu"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "poseidon2/poseidon2.cuh"
|
||||
#include "./constants.cu"
|
||||
|
||||
namespace poseidon2 {
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, create_poseidon2_constants_cuda)(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
template class Poseidon2<scalar_t>;
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, poseidon2_create_cuda)(
|
||||
Poseidon2<scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
unsigned int alpha,
|
||||
unsigned int internal_rounds,
|
||||
unsigned int external_rounds,
|
||||
const scalar_t* round_constants,
|
||||
const scalar_t* internal_matrix_diag,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<scalar_t>* poseidon_constants)
|
||||
device_context::DeviceContext& ctx)
|
||||
{
|
||||
return create_poseidon2_constants<scalar_t>(
|
||||
width, alpha, internal_rounds, external_rounds, round_constants, internal_matrix_diag, mds_type, diffusion, ctx,
|
||||
poseidon_constants);
|
||||
try {
|
||||
*poseidon = new Poseidon2<scalar_t>(
|
||||
width, rate, alpha, internal_rounds, external_rounds, round_constants, internal_matrix_diag, mds_type,
|
||||
diffusion, ctx);
|
||||
return cudaError_t::cudaSuccess;
|
||||
} catch (const IcicleError& _error) {
|
||||
return cudaError_t::cudaErrorUnknown;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, init_poseidon2_constants_cuda)(
|
||||
int width,
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, poseidon2_load_cuda)(
|
||||
Poseidon2<scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<scalar_t>* constants)
|
||||
device_context::DeviceContext& ctx)
|
||||
{
|
||||
return init_poseidon2_constants<scalar_t>(width, mds_type, diffusion, ctx, constants);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, poseidon2_hash_cuda)(
|
||||
const scalar_t* input,
|
||||
scalar_t* output,
|
||||
int number_of_states,
|
||||
int width,
|
||||
const Poseidon2Constants<scalar_t>* constants,
|
||||
Poseidon2Config* config)
|
||||
{
|
||||
#define P2_HASH_T(width) \
|
||||
case width: \
|
||||
return poseidon2_hash<scalar_t, width>(input, output, number_of_states, *constants, *config);
|
||||
|
||||
switch (width) {
|
||||
P2_HASH_T(2)
|
||||
P2_HASH_T(3)
|
||||
P2_HASH_T(4)
|
||||
P2_HASH_T(8)
|
||||
P2_HASH_T(12)
|
||||
P2_HASH_T(16)
|
||||
P2_HASH_T(20)
|
||||
P2_HASH_T(24)
|
||||
default:
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument, "PoseidonHash: #arity must be one of [2, 3, 4, 8, 12, 16, 20, 24]");
|
||||
try {
|
||||
*poseidon = new Poseidon2<scalar_t>(width, rate, mds_type, diffusion, ctx);
|
||||
return cudaError_t::cudaSuccess;
|
||||
} catch (const IcicleError& _error) {
|
||||
return cudaError_t::cudaErrorUnknown;
|
||||
}
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, release_poseidon2_constants_cuda)(
|
||||
Poseidon2Constants<scalar_t>* constants, device_context::DeviceContext& ctx)
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, poseidon2_hash_many_cuda)(
|
||||
const Poseidon2<scalar_t>* poseidon,
|
||||
const scalar_t* inputs,
|
||||
scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::SpongeConfig& cfg)
|
||||
{
|
||||
return release_poseidon2_constants<scalar_t>(constants, ctx);
|
||||
return poseidon->hash_many(inputs, output, number_of_states, input_block_len, output_len, cfg);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, poseidon2_delete_cuda)(Poseidon2<scalar_t>* poseidon)
|
||||
{
|
||||
try {
|
||||
poseidon->~Poseidon2();
|
||||
return cudaError_t::cudaSuccess;
|
||||
} catch (const IcicleError& _error) {
|
||||
return cudaError_t::cudaErrorUnknown;
|
||||
}
|
||||
}
|
||||
} // namespace poseidon2
|
||||
@@ -1,80 +0,0 @@
|
||||
#include "poseidon2/poseidon2.cuh"
|
||||
#include "constants.cu"
|
||||
#include "kernels.cu"
|
||||
|
||||
namespace poseidon2 {
|
||||
static int poseidon_block_size = 128;
|
||||
|
||||
template <typename S, int T>
|
||||
int poseidon_number_of_blocks(size_t number_of_states)
|
||||
{
|
||||
return number_of_states / poseidon_block_size + static_cast<bool>(number_of_states % poseidon_block_size);
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
cudaError_t permute_many(
|
||||
const S* states,
|
||||
S* states_out,
|
||||
size_t number_of_states,
|
||||
const Poseidon2Constants<S>& constants,
|
||||
cudaStream_t& stream)
|
||||
{
|
||||
poseidon2_permutation_kernel<S, T>
|
||||
<<<poseidon_number_of_blocks<S, T>(number_of_states), poseidon_block_size, 0, stream>>>(
|
||||
states, states_out, number_of_states, constants);
|
||||
CHK_IF_RETURN(cudaPeekAtLastError());
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
cudaError_t poseidon2_hash(
|
||||
const S* states,
|
||||
S* output,
|
||||
size_t number_of_states,
|
||||
const Poseidon2Constants<S>& constants,
|
||||
const Poseidon2Config& config)
|
||||
{
|
||||
CHK_INIT_IF_RETURN();
|
||||
cudaStream_t& stream = config.ctx.stream;
|
||||
S* d_states;
|
||||
if (config.are_states_on_device) {
|
||||
d_states = const_cast<S*>(states);
|
||||
} else {
|
||||
// allocate memory for {number_of_states} states of {t} scalars each
|
||||
CHK_IF_RETURN(cudaMallocAsync(&d_states, number_of_states * T * sizeof(S), stream))
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(d_states, states, number_of_states * T * sizeof(S), cudaMemcpyHostToDevice, stream))
|
||||
}
|
||||
|
||||
cudaError_t hash_error = permute_many<S, T>(d_states, d_states, number_of_states, constants, stream);
|
||||
CHK_IF_RETURN(hash_error);
|
||||
|
||||
if (config.mode == PoseidonMode::COMPRESSION) {
|
||||
S* output_device;
|
||||
if (config.are_outputs_on_device) {
|
||||
output_device = output;
|
||||
} else {
|
||||
CHK_IF_RETURN(cudaMallocAsync(&output_device, number_of_states * sizeof(S), stream))
|
||||
}
|
||||
|
||||
get_hash_results<S, T><<<poseidon_number_of_blocks<S, T>(number_of_states), poseidon_block_size, 0, stream>>>(
|
||||
d_states, number_of_states, config.output_index, output_device);
|
||||
CHK_IF_RETURN(cudaPeekAtLastError());
|
||||
|
||||
if (!config.are_outputs_on_device) {
|
||||
CHK_IF_RETURN(
|
||||
cudaMemcpyAsync(output, output_device, number_of_states * sizeof(S), cudaMemcpyDeviceToHost, stream));
|
||||
CHK_IF_RETURN(cudaFreeAsync(output_device, stream));
|
||||
}
|
||||
} else {
|
||||
if (!config.are_states_on_device || !config.are_outputs_on_device) {
|
||||
CHK_IF_RETURN(
|
||||
cudaMemcpyAsync(output, d_states, number_of_states * T * sizeof(S), cudaMemcpyDeviceToHost, stream));
|
||||
}
|
||||
}
|
||||
|
||||
if (!config.are_states_on_device) CHK_IF_RETURN(cudaFreeAsync(d_states, stream));
|
||||
|
||||
if (!config.is_async) return CHK_STICKY(cudaStreamSynchronize(stream));
|
||||
return CHK_LAST();
|
||||
}
|
||||
} // namespace poseidon2
|
||||
File diff suppressed because it is too large
Load Diff
88
icicle/src/poseidon2/test_m31.cu
Normal file
88
icicle/src/poseidon2/test_m31.cu
Normal file
@@ -0,0 +1,88 @@
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
|
||||
#ifndef __CUDA_ARCH__
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
#include "poseidon2/poseidon2.cuh"
|
||||
using namespace poseidon2;
|
||||
|
||||
#include "fields/field_config.cuh"
|
||||
using namespace field_config;
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
|
||||
#define T 16
|
||||
|
||||
#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now();
|
||||
#define END_TIMER(timer, msg) \
|
||||
printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count());
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
using FpMicroseconds = std::chrono::duration<float, std::chrono::microseconds::period>;
|
||||
|
||||
// Load poseidon
|
||||
START_TIMER(timer_const);
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
Poseidon2<scalar_t> poseidon(T, T, MdsType::DEFAULT_MDS, DiffusionStrategy::DEFAULT_DIFFUSION, ctx);
|
||||
END_TIMER(timer_const, "Load poseidon constants");
|
||||
|
||||
int number_of_blocks = argc > 1 ? 1 << atoi(argv[1]) : 1024;
|
||||
scalar_t* in_ptr = static_cast<scalar_t*>(malloc(number_of_blocks * T * sizeof(scalar_t)));
|
||||
scalar_t* out_ptr = static_cast<scalar_t*>(malloc(number_of_blocks * sizeof(scalar_t)));
|
||||
scalar_t input = scalar_t::zero();
|
||||
|
||||
hash::SpongeConfig cfg = hash::default_sponge_config();
|
||||
|
||||
size_t number_of_repetitions = argc > 2 ? 1 << atoi(argv[2]) : 32;
|
||||
|
||||
// Prepare input data of [0, 1, 2 ... (number_of_blocks * arity) - 1]
|
||||
for (uint32_t i = 0; i < number_of_blocks * T; i++) {
|
||||
in_ptr[i] = input;
|
||||
input = input + scalar_t::one();
|
||||
}
|
||||
|
||||
// Warm up
|
||||
poseidon.hash_many(in_ptr, out_ptr, number_of_blocks, T, 1, cfg);
|
||||
|
||||
auto total_time_start = std::chrono::high_resolution_clock::now();
|
||||
size_t avg_time = 0;
|
||||
for (int i = 0; i < number_of_repetitions; i++) {
|
||||
auto poseidon_start = std::chrono::high_resolution_clock::now();
|
||||
poseidon.hash_many(in_ptr, out_ptr, number_of_blocks, T, 1, cfg);
|
||||
avg_time += FpMilliseconds(std::chrono::high_resolution_clock::now() - poseidon_start).count();
|
||||
}
|
||||
auto total_time = FpMilliseconds(std::chrono::high_resolution_clock::now() - total_time_start).count();
|
||||
|
||||
std::cout << "Block size: " << number_of_blocks << std::endl;
|
||||
std::cout << "Total time: " << total_time << " ms" << std::endl;
|
||||
std::cout << "Avg time: " << avg_time / number_of_repetitions << " ms" << std::endl;
|
||||
|
||||
// for (int i = 0; i < number_of_blocks; i++) {
|
||||
// std::cout << "{";
|
||||
// for (int j = 0; j < 8; j++) {
|
||||
// std::cout << ((uint32_t*)&out_ptr[i].limbs_storage)[j];
|
||||
// if (j != 7) { std::cout << ", "; }
|
||||
// }
|
||||
// std::cout << "}," << std::endl;
|
||||
// }
|
||||
|
||||
if (number_of_blocks == 1024) {
|
||||
for (int i = 0; i < number_of_blocks; i++) {
|
||||
#ifdef DEBUG
|
||||
// std::cout << out_ptr[i] << std::endl;
|
||||
#endif
|
||||
// assert((out_ptr[i] == expected[i]));
|
||||
}
|
||||
printf("Expected output matches\n");
|
||||
}
|
||||
|
||||
free(in_ptr);
|
||||
free(out_ptr);
|
||||
}
|
||||
|
||||
#endif
|
||||
BIN
icicle/src/poseidon2/test_poseidon_m31
Executable file
BIN
icicle/src/poseidon2/test_poseidon_m31
Executable file
Binary file not shown.
@@ -165,7 +165,7 @@ namespace vec_ops {
|
||||
E* mat_out,
|
||||
uint32_t row_size,
|
||||
uint32_t column_size,
|
||||
device_context::DeviceContext& ctx,
|
||||
const device_context::DeviceContext& ctx,
|
||||
bool on_device,
|
||||
bool is_async)
|
||||
{
|
||||
|
||||
@@ -77,6 +77,8 @@ FIELDS_CONFIG = {
|
||||
COMMON_INCLUDES = [
|
||||
'#include <cuda_runtime.h>',
|
||||
'#include "gpu-utils/device_context.cuh"',
|
||||
'#include "merkle-tree/merkle.cuh"',
|
||||
'#include "matrix/matrix.cuh"'
|
||||
]
|
||||
|
||||
WARN_TEXT = """\
|
||||
@@ -114,10 +116,9 @@ if __name__ == "__main__":
|
||||
includes.append('#include "msm/msm.cuh"')
|
||||
if any(header.name.startswith("vec_ops") for header in headers):
|
||||
includes.append('#include "vec_ops/vec_ops.cuh"')
|
||||
if any(header.name.startswith("poseidon") for header in headers):
|
||||
if any(header.name.startswith("poseidon.h") for header in headers):
|
||||
includes.append('#include "poseidon/poseidon.cuh"')
|
||||
includes.append('#include "poseidon/tree/merkle.cuh"')
|
||||
if any(header.name.startswith("poseidon2") for header in headers):
|
||||
if any(header.name.startswith("poseidon2.h") for header in headers):
|
||||
includes.append('#include "poseidon2/poseidon2.cuh"')
|
||||
|
||||
contents = WARN_TEXT + INCLUDE_ONCE.format(curve.upper()) + "\n".join(includes) + "\n\n"
|
||||
@@ -148,10 +149,9 @@ if __name__ == "__main__":
|
||||
includes.append('#include "ntt/ntt.cuh"')
|
||||
if any(header.name.startswith("vec_ops") for header in headers):
|
||||
includes.append('#include "vec_ops/vec_ops.cuh"')
|
||||
if any(header.name.startswith("poseidon") for header in headers):
|
||||
if any(header.name.startswith("poseidon.h") for header in headers):
|
||||
includes.append('#include "poseidon/poseidon.cuh"')
|
||||
includes.append('#include "poseidon/tree/merkle.cuh"')
|
||||
if any(header.name.startswith("poseidon2") for header in headers):
|
||||
if any(header.name.startswith("poseidon2.h") for header in headers):
|
||||
includes.append('#include "poseidon2/poseidon2.cuh"')
|
||||
|
||||
contents = WARN_TEXT + INCLUDE_ONCE.format(field.upper()) + "\n".join(includes) + "\n\n"
|
||||
|
||||
@@ -1,94 +0,0 @@
|
||||
package core
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unsafe"
|
||||
|
||||
cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime"
|
||||
)
|
||||
|
||||
type PoseidonConfig struct {
|
||||
/// Details related to the device such as its id and stream id. See [DeviceContext](@ref device_context::DeviceContext).
|
||||
Ctx cr.DeviceContext
|
||||
areInputsOnDevice bool
|
||||
areOutputsOnDevice bool
|
||||
///If true, input is considered to be a states vector, holding the preimages in aligned or not aligned format.
|
||||
///Memory under the input pointer will be used for states. If false, fresh states memory will be allocated and input will be copied into it */
|
||||
InputIsAState bool
|
||||
/// If true - input should be already aligned for poseidon permutation.
|
||||
///* Aligned format: [0, A, B, 0, C, D, ...] (as you might get by using loop_state)
|
||||
///* not aligned format: [A, B, 0, C, D, 0, ...] (as you might get from cudaMemcpy2D) */
|
||||
Aligned bool
|
||||
///If true, hash results will also be copied in the input pointer in aligned format
|
||||
LoopState bool
|
||||
///Whether to run the Poseidon asynchronously. If set to `true`, the poseidon_hash function will be
|
||||
///non-blocking and you'd need to synchronize it explicitly by running `cudaStreamSynchronize` or `cudaDeviceSynchronize`.
|
||||
///If set to false, the poseidon_hash function will block the current CPU thread. */
|
||||
IsAsync bool
|
||||
}
|
||||
|
||||
type PoseidonConstants[T any] struct {
|
||||
Arity int32
|
||||
PartialRounds int32
|
||||
FullRoundsHalf int32
|
||||
RoundConstants unsafe.Pointer
|
||||
MdsMatrix unsafe.Pointer
|
||||
NonSparseMatrix unsafe.Pointer
|
||||
SparseMatrices unsafe.Pointer
|
||||
DomainTag T
|
||||
}
|
||||
|
||||
func GetDefaultPoseidonConfig() PoseidonConfig {
|
||||
ctx, _ := cr.GetDefaultDeviceContext()
|
||||
return PoseidonConfig{
|
||||
ctx, // Ctx
|
||||
false, // areInputsOnDevice
|
||||
false, // areOutputsOnDevice
|
||||
false, // inputIsAState
|
||||
false, // aligned
|
||||
false, // loopState
|
||||
false, // IsAsync
|
||||
}
|
||||
}
|
||||
|
||||
func PoseidonCheck[T any](input, output HostOrDeviceSlice, cfg *PoseidonConfig, constants *PoseidonConstants[T], numberOfStates int) (unsafe.Pointer, unsafe.Pointer, unsafe.Pointer) {
|
||||
inputLen, outputLen := input.Len(), output.Len()
|
||||
arity := int(constants.Arity)
|
||||
expectedInputLen := arity * numberOfStates
|
||||
if cfg.InputIsAState {
|
||||
expectedInputLen += numberOfStates
|
||||
}
|
||||
|
||||
if inputLen != expectedInputLen {
|
||||
errorString := fmt.Sprintf(
|
||||
"input is not the right length for the given parameters: %d, should be: %d",
|
||||
inputLen,
|
||||
arity*numberOfStates,
|
||||
)
|
||||
panic(errorString)
|
||||
}
|
||||
|
||||
if outputLen != numberOfStates {
|
||||
errorString := fmt.Sprintf(
|
||||
"output is not the right length for the given parameters: %d, should be: %d",
|
||||
outputLen,
|
||||
numberOfStates,
|
||||
)
|
||||
panic(errorString)
|
||||
}
|
||||
cfg.areInputsOnDevice = input.IsOnDevice()
|
||||
cfg.areOutputsOnDevice = output.IsOnDevice()
|
||||
|
||||
if input.IsOnDevice() {
|
||||
input.(DeviceSlice).CheckDevice()
|
||||
|
||||
}
|
||||
|
||||
if output.IsOnDevice() {
|
||||
output.(DeviceSlice).CheckDevice()
|
||||
}
|
||||
|
||||
cfgPointer := unsafe.Pointer(cfg)
|
||||
|
||||
return input.AsUnsafePointer(), output.AsUnsafePointer(), cfgPointer
|
||||
}
|
||||
105
wrappers/golang/core/sponge.go
Normal file
105
wrappers/golang/core/sponge.go
Normal file
@@ -0,0 +1,105 @@
|
||||
package core
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime"
|
||||
)
|
||||
|
||||
type SpongeConfig struct {
|
||||
/// Details related to the device such as its id and stream.
|
||||
Ctx cr.DeviceContext
|
||||
|
||||
areInputsOnDevice bool
|
||||
areResultsOnDevice bool
|
||||
|
||||
InputRate uint32
|
||||
OutputRate uint32
|
||||
Offset uint32
|
||||
|
||||
/// If true - input should be already aligned for poseidon permutation.
|
||||
/// Aligned format: [0, A, B, 0, C, D, ...] (as you might get by using loop_state)
|
||||
/// not aligned format: [A, B, 0, C, D, 0, ...] (as you might get from cudaMemcpy2D)
|
||||
RecursiveSqueeze bool
|
||||
|
||||
/// If true, hash results will also be copied in the input pointer in aligned format
|
||||
Aligned bool
|
||||
|
||||
/// Whether to run the SpongeHash asynchronously. If set to `true`, the SpongeHash function will be non-blocking
|
||||
/// and you'd need to synchronize it explicitly by running `cudaStreamSynchronize` or `cudaDeviceSynchronize`.
|
||||
/// If set to `false`, the SpongeHash function will block the current CPU thread.
|
||||
IsAsync bool
|
||||
}
|
||||
|
||||
func GetDefaultSpongeConfig() SpongeConfig {
|
||||
ctx, _ := cr.GetDefaultDeviceContext()
|
||||
return SpongeConfig{
|
||||
ctx,
|
||||
false,
|
||||
false,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
}
|
||||
}
|
||||
|
||||
func SpongeInputCheck(inputs HostOrDeviceSlice, numberOfStates, inputBlockLength, inputRate uint32, ctx *cr.DeviceContext) {
|
||||
if inputBlockLength > inputRate {
|
||||
errorString := fmt.Sprintf(
|
||||
"Input block (%d) can't be greater than input rate (%d)",
|
||||
inputBlockLength,
|
||||
inputRate,
|
||||
)
|
||||
panic(errorString)
|
||||
}
|
||||
inputsSizeExpected := inputBlockLength * numberOfStates
|
||||
if inputs.Len() < int(inputsSizeExpected) {
|
||||
errorString := fmt.Sprintf(
|
||||
"inputs len is %d; but needs to be at least %d",
|
||||
inputs.Len(),
|
||||
inputsSizeExpected,
|
||||
)
|
||||
panic(errorString)
|
||||
}
|
||||
if inputs.IsOnDevice() {
|
||||
inputs.(DeviceSlice).CheckDevice()
|
||||
}
|
||||
}
|
||||
|
||||
func SpongeStatesCheck(states DeviceSlice, numberOfStates, width uint32, ctx *cr.DeviceContext) {
|
||||
|
||||
statesSizeExpected := width * numberOfStates
|
||||
if states.Len() < int(statesSizeExpected) {
|
||||
errorString := fmt.Sprintf(
|
||||
"inputs len is %d; but needs to be at least %d",
|
||||
states.Len(),
|
||||
statesSizeExpected,
|
||||
)
|
||||
panic(errorString)
|
||||
}
|
||||
states.CheckDevice()
|
||||
}
|
||||
|
||||
func SpongeOutputsCheck(outputs HostOrDeviceSlice, numberOfStates, outputLen, width uint32, recursive bool, ctx *cr.DeviceContext) {
|
||||
var outputsSizeExpected uint32
|
||||
if recursive {
|
||||
outputsSizeExpected = width * numberOfStates
|
||||
} else {
|
||||
outputsSizeExpected = outputLen * numberOfStates
|
||||
}
|
||||
|
||||
if outputs.Len() < int(outputsSizeExpected) {
|
||||
errorString := fmt.Sprintf(
|
||||
"outputs len is %d; but needs to be at least %d",
|
||||
outputs.Len(),
|
||||
outputsSizeExpected,
|
||||
)
|
||||
panic(errorString)
|
||||
}
|
||||
if outputs.IsOnDevice() {
|
||||
outputs.(DeviceSlice).CheckDevice()
|
||||
}
|
||||
}
|
||||
@@ -9,14 +9,40 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct scalar_t scalar_t;
|
||||
typedef struct PoseidonConfig PoseidonConfig;
|
||||
typedef struct DeviceContext DeviceContext;
|
||||
typedef struct PoseidonConstants PoseidonConstants;
|
||||
typedef struct TreeBuilderConfig TreeBuilderConfig;
|
||||
typedef struct PoseidonInst PoseidonInst;
|
||||
typedef struct SpongeConfig SpongeConfig;
|
||||
|
||||
|
||||
cudaError_t bls12_377_poseidon_hash_cuda(const scalar_t* input, scalar_t* output, int number_of_states, int arity, PoseidonConstants* constants, PoseidonConfig* config);
|
||||
cudaError_t bls12_377_create_optimized_poseidon_constants_cuda(int arity, int full_rounds_halfs, int partial_rounds, const scalar_t* constants, DeviceContext* ctx, PoseidonConstants* poseidon_constants);
|
||||
cudaError_t bls12_377_init_optimized_poseidon_constants_cuda(int arity, DeviceContext* ctx, PoseidonConstants* constants);
|
||||
cudaError_t bls12_377_poseidon_create_cuda(
|
||||
PoseidonInst** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const scalar_t* round_constants,
|
||||
const scalar_t* mds_matrix,
|
||||
const scalar_t* non_sparse_matrix,
|
||||
const scalar_t* sparse_matrices,
|
||||
const scalar_t* domain_tag,
|
||||
DeviceContext* ctx);
|
||||
|
||||
cudaError_t bls12_377_poseidon_load_cuda(
|
||||
PoseidonInst** poseidon,
|
||||
unsigned int arity,
|
||||
DeviceContext* ctx);
|
||||
|
||||
cudaError_t bls12_377_poseidon_hash_many_cuda(
|
||||
const PoseidonInst* poseidon,
|
||||
const scalar_t* inputs,
|
||||
scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
SpongeConfig* cfg);
|
||||
|
||||
cudaError_t bls12_377_poseidon_delete_cuda(PoseidonInst* poseidon);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -3,55 +3,85 @@ package poseidon
|
||||
// #cgo CFLAGS: -I./include/
|
||||
// #include "poseidon.h"
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ingonyama-zk/icicle/v2/wrappers/golang/core"
|
||||
cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime"
|
||||
bls12_377 "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bls12377"
|
||||
)
|
||||
|
||||
func GetDefaultPoseidonConfig() core.PoseidonConfig {
|
||||
return core.GetDefaultPoseidonConfig()
|
||||
type PoseidonHandler = C.struct_PoseidonInst
|
||||
type Poseidon struct {
|
||||
width uint32
|
||||
handle *PoseidonHandler
|
||||
}
|
||||
|
||||
func PoseidonHash[T any](scalars, results core.HostOrDeviceSlice, numberOfStates int, cfg *core.PoseidonConfig, constants *core.PoseidonConstants[T]) core.IcicleError {
|
||||
scalarsPointer, resultsPointer, cfgPointer := core.PoseidonCheck(scalars, results, cfg, constants, numberOfStates)
|
||||
func Create(arity uint32, alpha uint32, fullRoundsHalf uint32, partialRounds uint32, scalars core.HostOrDeviceSlice, mdsMatrix core.HostOrDeviceSlice, nonSparseMatrix core.HostOrDeviceSlice, sparseMatrices core.HostOrDeviceSlice, domainTag bls12_377.ScalarField, ctx *cr.DeviceContext) (*Poseidon, core.IcicleError) {
|
||||
var poseidon *PoseidonHandler
|
||||
cArity := (C.uint)(arity)
|
||||
cAlpha := (C.uint)(alpha)
|
||||
cFullRoundsHalf := (C.uint)(fullRoundsHalf)
|
||||
cPartialRounds := (C.uint)(partialRounds)
|
||||
cScalars := (*C.scalar_t)(scalars.AsUnsafePointer())
|
||||
cMdsMatrix := (*C.scalar_t)(mdsMatrix.AsUnsafePointer())
|
||||
cNonSparseMatrix := (*C.scalar_t)(nonSparseMatrix.AsUnsafePointer())
|
||||
cSparseMatrices := (*C.scalar_t)(sparseMatrices.AsUnsafePointer())
|
||||
cDomainTag := (*C.scalar_t)(unsafe.Pointer(&domainTag))
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(ctx))
|
||||
__ret := C.bls12_377_poseidon_create_cuda(&poseidon, cArity, cAlpha, cFullRoundsHalf, cPartialRounds, cScalars, cMdsMatrix, cNonSparseMatrix, cSparseMatrices, cDomainTag, cCtx)
|
||||
err := core.FromCudaError((cr.CudaError)(__ret))
|
||||
if err.IcicleErrorCode != core.IcicleSuccess {
|
||||
return nil, err
|
||||
}
|
||||
p := Poseidon{handle: poseidon, width: arity + 1}
|
||||
runtime.SetFinalizer(&p, func(p *Poseidon) {
|
||||
p.Delete()
|
||||
})
|
||||
return &p, err
|
||||
}
|
||||
|
||||
cScalars := (*C.scalar_t)(scalarsPointer)
|
||||
cResults := (*C.scalar_t)(resultsPointer)
|
||||
cNumberOfStates := (C.int)(numberOfStates)
|
||||
cArity := (C.int)(constants.Arity)
|
||||
cConstants := (*C.PoseidonConstants)(unsafe.Pointer(constants))
|
||||
cCfg := (*C.PoseidonConfig)(cfgPointer)
|
||||
func Load(arity uint32, ctx *cr.DeviceContext) (*Poseidon, core.IcicleError) {
|
||||
var poseidon *PoseidonHandler
|
||||
cArity := (C.uint)(arity)
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(ctx))
|
||||
__ret := C.bls12_377_poseidon_load_cuda(&poseidon, cArity, cCtx)
|
||||
err := core.FromCudaError((cr.CudaError)(__ret))
|
||||
if err.IcicleErrorCode != core.IcicleSuccess {
|
||||
return nil, err
|
||||
}
|
||||
p := Poseidon{handle: poseidon, width: arity + 1}
|
||||
runtime.SetFinalizer(&p, func(p *Poseidon) {
|
||||
p.Delete()
|
||||
})
|
||||
return &p, err
|
||||
}
|
||||
|
||||
__ret := C.bls12_377_poseidon_hash_cuda(cScalars, cResults, cNumberOfStates, cArity, cConstants, cCfg)
|
||||
func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.HostOrDeviceSlice, numberOfStates uint32, inputBlockLen uint32, outputLen uint32, cfg *core.SpongeConfig) core.IcicleError {
|
||||
core.SpongeInputCheck(inputs, numberOfStates, inputBlockLen, cfg.InputRate, &cfg.Ctx)
|
||||
core.SpongeOutputsCheck(output, numberOfStates, outputLen, poseidon.width, false, &cfg.Ctx)
|
||||
|
||||
cInputs := (*C.scalar_t)(inputs.AsUnsafePointer())
|
||||
cOutput := (*C.scalar_t)(output.AsUnsafePointer())
|
||||
cNumberOfStates := (C.uint)(numberOfStates)
|
||||
cInputBlockLen := (C.uint)(inputBlockLen)
|
||||
cOutputLen := (C.uint)(outputLen)
|
||||
cCfg := (*C.SpongeConfig)(unsafe.Pointer(cfg))
|
||||
__ret := C.bls12_377_poseidon_hash_many_cuda(poseidon.handle, cInputs, cOutput, cNumberOfStates, cInputBlockLen, cOutputLen, cCfg)
|
||||
err := (cr.CudaError)(__ret)
|
||||
return core.FromCudaError(err)
|
||||
}
|
||||
|
||||
func CreateOptimizedPoseidonConstants[T any](arity, fullRoundsHalfs, partialRounds int, constants core.HostOrDeviceSlice, ctx cr.DeviceContext, poseidonConstants *core.PoseidonConstants[T]) core.IcicleError {
|
||||
|
||||
cArity := (C.int)(arity)
|
||||
cFullRoundsHalfs := (C.int)(fullRoundsHalfs)
|
||||
cPartialRounds := (C.int)(partialRounds)
|
||||
cConstants := (*C.scalar_t)(constants.AsUnsafePointer())
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(&ctx))
|
||||
cPoseidonConstants := (*C.PoseidonConstants)(unsafe.Pointer(poseidonConstants))
|
||||
|
||||
__ret := C.bls12_377_create_optimized_poseidon_constants_cuda(cArity, cFullRoundsHalfs, cPartialRounds, cConstants, cCtx, cPoseidonConstants)
|
||||
func (poseidon *Poseidon) Delete() core.IcicleError {
|
||||
__ret := C.bls12_377_poseidon_delete_cuda(poseidon.handle)
|
||||
err := (cr.CudaError)(__ret)
|
||||
return core.FromCudaError(err)
|
||||
}
|
||||
|
||||
func InitOptimizedPoseidonConstantsCuda[T any](arity int, ctx cr.DeviceContext, constants *core.PoseidonConstants[T]) core.IcicleError {
|
||||
|
||||
cArity := (C.int)(arity)
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(&ctx))
|
||||
cConstants := (*C.PoseidonConstants)(unsafe.Pointer(constants))
|
||||
|
||||
__ret := C.bls12_377_init_optimized_poseidon_constants_cuda(cArity, cCtx, cConstants)
|
||||
err := (cr.CudaError)(__ret)
|
||||
return core.FromCudaError(err)
|
||||
func (poseidon *Poseidon) GetDefaultSpongeConfig() core.SpongeConfig {
|
||||
cfg := core.GetDefaultSpongeConfig()
|
||||
cfg.InputRate = poseidon.width - 1
|
||||
cfg.OutputRate = poseidon.width
|
||||
return cfg
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime"
|
||||
bls12_377 "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bls12377"
|
||||
poseidon "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bls12377/poseidon"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestPoseidon(t *testing.T) {
|
||||
@@ -14,14 +15,11 @@ func TestPoseidon(t *testing.T) {
|
||||
arity := 2
|
||||
numberOfStates := 1
|
||||
|
||||
cfg := poseidon.GetDefaultPoseidonConfig()
|
||||
cfg.IsAsync = true
|
||||
stream, _ := cr.CreateStream()
|
||||
cfg.Ctx.Stream = &stream
|
||||
ctx, _ := cr.GetDefaultDeviceContext()
|
||||
p, err := poseidon.Load(uint32(arity), &ctx)
|
||||
assert.Equal(t, core.IcicleSuccess, err.IcicleErrorCode)
|
||||
|
||||
var constants core.PoseidonConstants[bls12_377.ScalarField]
|
||||
|
||||
poseidon.InitOptimizedPoseidonConstantsCuda(arity, cfg.Ctx, &constants) //generate constants
|
||||
cfg := p.GetDefaultSpongeConfig()
|
||||
|
||||
scalars := bls12_377.GenerateScalars(numberOfStates * arity)
|
||||
scalars[0] = scalars[0].Zero()
|
||||
@@ -30,13 +28,13 @@ func TestPoseidon(t *testing.T) {
|
||||
scalarsCopy := core.HostSliceFromElements(scalars[:numberOfStates*arity])
|
||||
|
||||
var deviceInput core.DeviceSlice
|
||||
scalarsCopy.CopyToDeviceAsync(&deviceInput, stream, true)
|
||||
scalarsCopy.CopyToDevice(&deviceInput, true)
|
||||
var deviceOutput core.DeviceSlice
|
||||
deviceOutput.MallocAsync(numberOfStates*scalarsCopy.SizeOfElement(), scalarsCopy.SizeOfElement(), stream)
|
||||
deviceOutput.Malloc(numberOfStates*scalarsCopy.SizeOfElement(), scalarsCopy.SizeOfElement())
|
||||
|
||||
poseidon.PoseidonHash(deviceInput, deviceOutput, numberOfStates, &cfg, &constants) //run Hash function
|
||||
err = p.HashMany(deviceInput, deviceOutput, uint32(numberOfStates), 1, 1, &cfg) //run Hash function
|
||||
assert.Equal(t, core.IcicleSuccess, err.IcicleErrorCode)
|
||||
|
||||
output := make(core.HostSlice[bls12_377.ScalarField], numberOfStates)
|
||||
output.CopyFromDeviceAsync(&deviceOutput, stream)
|
||||
|
||||
output.CopyFromDevice(&deviceOutput)
|
||||
}
|
||||
|
||||
@@ -9,14 +9,40 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct scalar_t scalar_t;
|
||||
typedef struct PoseidonConfig PoseidonConfig;
|
||||
typedef struct DeviceContext DeviceContext;
|
||||
typedef struct PoseidonConstants PoseidonConstants;
|
||||
typedef struct TreeBuilderConfig TreeBuilderConfig;
|
||||
typedef struct PoseidonInst PoseidonInst;
|
||||
typedef struct SpongeConfig SpongeConfig;
|
||||
|
||||
|
||||
cudaError_t bls12_381_poseidon_hash_cuda(const scalar_t* input, scalar_t* output, int number_of_states, int arity, PoseidonConstants* constants, PoseidonConfig* config);
|
||||
cudaError_t bls12_381_create_optimized_poseidon_constants_cuda(int arity, int full_rounds_halfs, int partial_rounds, const scalar_t* constants, DeviceContext* ctx, PoseidonConstants* poseidon_constants);
|
||||
cudaError_t bls12_381_init_optimized_poseidon_constants_cuda(int arity, DeviceContext* ctx, PoseidonConstants* constants);
|
||||
cudaError_t bls12_381_poseidon_create_cuda(
|
||||
PoseidonInst** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const scalar_t* round_constants,
|
||||
const scalar_t* mds_matrix,
|
||||
const scalar_t* non_sparse_matrix,
|
||||
const scalar_t* sparse_matrices,
|
||||
const scalar_t* domain_tag,
|
||||
DeviceContext* ctx);
|
||||
|
||||
cudaError_t bls12_381_poseidon_load_cuda(
|
||||
PoseidonInst** poseidon,
|
||||
unsigned int arity,
|
||||
DeviceContext* ctx);
|
||||
|
||||
cudaError_t bls12_381_poseidon_hash_many_cuda(
|
||||
const PoseidonInst* poseidon,
|
||||
const scalar_t* inputs,
|
||||
scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
SpongeConfig* cfg);
|
||||
|
||||
cudaError_t bls12_381_poseidon_delete_cuda(PoseidonInst* poseidon);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -3,55 +3,85 @@ package poseidon
|
||||
// #cgo CFLAGS: -I./include/
|
||||
// #include "poseidon.h"
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ingonyama-zk/icicle/v2/wrappers/golang/core"
|
||||
cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime"
|
||||
bls12_381 "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bls12381"
|
||||
)
|
||||
|
||||
func GetDefaultPoseidonConfig() core.PoseidonConfig {
|
||||
return core.GetDefaultPoseidonConfig()
|
||||
type PoseidonHandler = C.struct_PoseidonInst
|
||||
type Poseidon struct {
|
||||
width uint32
|
||||
handle *PoseidonHandler
|
||||
}
|
||||
|
||||
func PoseidonHash[T any](scalars, results core.HostOrDeviceSlice, numberOfStates int, cfg *core.PoseidonConfig, constants *core.PoseidonConstants[T]) core.IcicleError {
|
||||
scalarsPointer, resultsPointer, cfgPointer := core.PoseidonCheck(scalars, results, cfg, constants, numberOfStates)
|
||||
func Create(arity uint32, alpha uint32, fullRoundsHalf uint32, partialRounds uint32, scalars core.HostOrDeviceSlice, mdsMatrix core.HostOrDeviceSlice, nonSparseMatrix core.HostOrDeviceSlice, sparseMatrices core.HostOrDeviceSlice, domainTag bls12_381.ScalarField, ctx *cr.DeviceContext) (*Poseidon, core.IcicleError) {
|
||||
var poseidon *PoseidonHandler
|
||||
cArity := (C.uint)(arity)
|
||||
cAlpha := (C.uint)(alpha)
|
||||
cFullRoundsHalf := (C.uint)(fullRoundsHalf)
|
||||
cPartialRounds := (C.uint)(partialRounds)
|
||||
cScalars := (*C.scalar_t)(scalars.AsUnsafePointer())
|
||||
cMdsMatrix := (*C.scalar_t)(mdsMatrix.AsUnsafePointer())
|
||||
cNonSparseMatrix := (*C.scalar_t)(nonSparseMatrix.AsUnsafePointer())
|
||||
cSparseMatrices := (*C.scalar_t)(sparseMatrices.AsUnsafePointer())
|
||||
cDomainTag := (*C.scalar_t)(unsafe.Pointer(&domainTag))
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(ctx))
|
||||
__ret := C.bls12_381_poseidon_create_cuda(&poseidon, cArity, cAlpha, cFullRoundsHalf, cPartialRounds, cScalars, cMdsMatrix, cNonSparseMatrix, cSparseMatrices, cDomainTag, cCtx)
|
||||
err := core.FromCudaError((cr.CudaError)(__ret))
|
||||
if err.IcicleErrorCode != core.IcicleSuccess {
|
||||
return nil, err
|
||||
}
|
||||
p := Poseidon{handle: poseidon, width: arity + 1}
|
||||
runtime.SetFinalizer(&p, func(p *Poseidon) {
|
||||
p.Delete()
|
||||
})
|
||||
return &p, err
|
||||
}
|
||||
|
||||
cScalars := (*C.scalar_t)(scalarsPointer)
|
||||
cResults := (*C.scalar_t)(resultsPointer)
|
||||
cNumberOfStates := (C.int)(numberOfStates)
|
||||
cArity := (C.int)(constants.Arity)
|
||||
cConstants := (*C.PoseidonConstants)(unsafe.Pointer(constants))
|
||||
cCfg := (*C.PoseidonConfig)(cfgPointer)
|
||||
func Load(arity uint32, ctx *cr.DeviceContext) (*Poseidon, core.IcicleError) {
|
||||
var poseidon *PoseidonHandler
|
||||
cArity := (C.uint)(arity)
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(ctx))
|
||||
__ret := C.bls12_381_poseidon_load_cuda(&poseidon, cArity, cCtx)
|
||||
err := core.FromCudaError((cr.CudaError)(__ret))
|
||||
if err.IcicleErrorCode != core.IcicleSuccess {
|
||||
return nil, err
|
||||
}
|
||||
p := Poseidon{handle: poseidon, width: arity + 1}
|
||||
runtime.SetFinalizer(&p, func(p *Poseidon) {
|
||||
p.Delete()
|
||||
})
|
||||
return &p, err
|
||||
}
|
||||
|
||||
__ret := C.bls12_381_poseidon_hash_cuda(cScalars, cResults, cNumberOfStates, cArity, cConstants, cCfg)
|
||||
func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.HostOrDeviceSlice, numberOfStates uint32, inputBlockLen uint32, outputLen uint32, cfg *core.SpongeConfig) core.IcicleError {
|
||||
core.SpongeInputCheck(inputs, numberOfStates, inputBlockLen, cfg.InputRate, &cfg.Ctx)
|
||||
core.SpongeOutputsCheck(output, numberOfStates, outputLen, poseidon.width, false, &cfg.Ctx)
|
||||
|
||||
cInputs := (*C.scalar_t)(inputs.AsUnsafePointer())
|
||||
cOutput := (*C.scalar_t)(output.AsUnsafePointer())
|
||||
cNumberOfStates := (C.uint)(numberOfStates)
|
||||
cInputBlockLen := (C.uint)(inputBlockLen)
|
||||
cOutputLen := (C.uint)(outputLen)
|
||||
cCfg := (*C.SpongeConfig)(unsafe.Pointer(cfg))
|
||||
__ret := C.bls12_381_poseidon_hash_many_cuda(poseidon.handle, cInputs, cOutput, cNumberOfStates, cInputBlockLen, cOutputLen, cCfg)
|
||||
err := (cr.CudaError)(__ret)
|
||||
return core.FromCudaError(err)
|
||||
}
|
||||
|
||||
func CreateOptimizedPoseidonConstants[T any](arity, fullRoundsHalfs, partialRounds int, constants core.HostOrDeviceSlice, ctx cr.DeviceContext, poseidonConstants *core.PoseidonConstants[T]) core.IcicleError {
|
||||
|
||||
cArity := (C.int)(arity)
|
||||
cFullRoundsHalfs := (C.int)(fullRoundsHalfs)
|
||||
cPartialRounds := (C.int)(partialRounds)
|
||||
cConstants := (*C.scalar_t)(constants.AsUnsafePointer())
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(&ctx))
|
||||
cPoseidonConstants := (*C.PoseidonConstants)(unsafe.Pointer(poseidonConstants))
|
||||
|
||||
__ret := C.bls12_381_create_optimized_poseidon_constants_cuda(cArity, cFullRoundsHalfs, cPartialRounds, cConstants, cCtx, cPoseidonConstants)
|
||||
func (poseidon *Poseidon) Delete() core.IcicleError {
|
||||
__ret := C.bls12_381_poseidon_delete_cuda(poseidon.handle)
|
||||
err := (cr.CudaError)(__ret)
|
||||
return core.FromCudaError(err)
|
||||
}
|
||||
|
||||
func InitOptimizedPoseidonConstantsCuda[T any](arity int, ctx cr.DeviceContext, constants *core.PoseidonConstants[T]) core.IcicleError {
|
||||
|
||||
cArity := (C.int)(arity)
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(&ctx))
|
||||
cConstants := (*C.PoseidonConstants)(unsafe.Pointer(constants))
|
||||
|
||||
__ret := C.bls12_381_init_optimized_poseidon_constants_cuda(cArity, cCtx, cConstants)
|
||||
err := (cr.CudaError)(__ret)
|
||||
return core.FromCudaError(err)
|
||||
func (poseidon *Poseidon) GetDefaultSpongeConfig() core.SpongeConfig {
|
||||
cfg := core.GetDefaultSpongeConfig()
|
||||
cfg.InputRate = poseidon.width - 1
|
||||
cfg.OutputRate = poseidon.width
|
||||
return cfg
|
||||
}
|
||||
|
||||
@@ -7,29 +7,19 @@ import (
|
||||
cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime"
|
||||
bls12_381 "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bls12381"
|
||||
poseidon "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bls12381/poseidon"
|
||||
|
||||
"fmt"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func formatOutput(x bls12_381.ScalarField) string {
|
||||
r := x.GetLimbs()
|
||||
return fmt.Sprintf("%08x%08x%08x%08x%08x%08x%08x%08x", r[7], r[6], r[5], r[4], r[3], r[2], r[1], r[0])
|
||||
}
|
||||
|
||||
func TestPoseidon(t *testing.T) {
|
||||
|
||||
arity := 2
|
||||
numberOfStates := 1
|
||||
|
||||
cfg := poseidon.GetDefaultPoseidonConfig()
|
||||
cfg.IsAsync = true
|
||||
stream, _ := cr.CreateStream()
|
||||
cfg.Ctx.Stream = &stream
|
||||
ctx, _ := cr.GetDefaultDeviceContext()
|
||||
p, err := poseidon.Load(uint32(arity), &ctx)
|
||||
assert.Equal(t, core.IcicleSuccess, err.IcicleErrorCode)
|
||||
|
||||
var constants core.PoseidonConstants[bls12_381.ScalarField]
|
||||
|
||||
poseidon.InitOptimizedPoseidonConstantsCuda(arity, cfg.Ctx, &constants) //generate constants
|
||||
cfg := p.GetDefaultSpongeConfig()
|
||||
|
||||
scalars := bls12_381.GenerateScalars(numberOfStates * arity)
|
||||
scalars[0] = scalars[0].Zero()
|
||||
@@ -38,18 +28,13 @@ func TestPoseidon(t *testing.T) {
|
||||
scalarsCopy := core.HostSliceFromElements(scalars[:numberOfStates*arity])
|
||||
|
||||
var deviceInput core.DeviceSlice
|
||||
scalarsCopy.CopyToDeviceAsync(&deviceInput, stream, true)
|
||||
scalarsCopy.CopyToDevice(&deviceInput, true)
|
||||
var deviceOutput core.DeviceSlice
|
||||
deviceOutput.MallocAsync(numberOfStates*scalarsCopy.SizeOfElement(), scalarsCopy.SizeOfElement(), stream)
|
||||
deviceOutput.Malloc(numberOfStates*scalarsCopy.SizeOfElement(), scalarsCopy.SizeOfElement())
|
||||
|
||||
poseidon.PoseidonHash(deviceInput, deviceOutput, numberOfStates, &cfg, &constants) //run Hash function
|
||||
err = p.HashMany(deviceInput, deviceOutput, uint32(numberOfStates), 1, 1, &cfg) //run Hash function
|
||||
assert.Equal(t, core.IcicleSuccess, err.IcicleErrorCode)
|
||||
|
||||
output := make(core.HostSlice[bls12_381.ScalarField], numberOfStates)
|
||||
output.CopyFromDeviceAsync(&deviceOutput, stream)
|
||||
|
||||
expectedString := "48fe0b1331196f6cdb33a7c6e5af61b76fd388e1ef1d3d418be5147f0e4613d4" //This result is from https://github.com/triplewz/poseidon
|
||||
outputString := formatOutput(output[0])
|
||||
|
||||
assert.Equal(t, outputString, expectedString, "Poseidon hash does not match expected result")
|
||||
|
||||
output.CopyFromDevice(&deviceOutput)
|
||||
}
|
||||
|
||||
@@ -9,14 +9,40 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct scalar_t scalar_t;
|
||||
typedef struct PoseidonConfig PoseidonConfig;
|
||||
typedef struct DeviceContext DeviceContext;
|
||||
typedef struct PoseidonConstants PoseidonConstants;
|
||||
typedef struct TreeBuilderConfig TreeBuilderConfig;
|
||||
typedef struct PoseidonInst PoseidonInst;
|
||||
typedef struct SpongeConfig SpongeConfig;
|
||||
|
||||
|
||||
cudaError_t bn254_poseidon_hash_cuda(const scalar_t* input, scalar_t* output, int number_of_states, int arity, PoseidonConstants* constants, PoseidonConfig* config);
|
||||
cudaError_t bn254_create_optimized_poseidon_constants_cuda(int arity, int full_rounds_halfs, int partial_rounds, const scalar_t* constants, DeviceContext* ctx, PoseidonConstants* poseidon_constants);
|
||||
cudaError_t bn254_init_optimized_poseidon_constants_cuda(int arity, DeviceContext* ctx, PoseidonConstants* constants);
|
||||
cudaError_t bn254_poseidon_create_cuda(
|
||||
PoseidonInst** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const scalar_t* round_constants,
|
||||
const scalar_t* mds_matrix,
|
||||
const scalar_t* non_sparse_matrix,
|
||||
const scalar_t* sparse_matrices,
|
||||
const scalar_t* domain_tag,
|
||||
DeviceContext* ctx);
|
||||
|
||||
cudaError_t bn254_poseidon_load_cuda(
|
||||
PoseidonInst** poseidon,
|
||||
unsigned int arity,
|
||||
DeviceContext* ctx);
|
||||
|
||||
cudaError_t bn254_poseidon_hash_many_cuda(
|
||||
const PoseidonInst* poseidon,
|
||||
const scalar_t* inputs,
|
||||
scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
SpongeConfig* cfg);
|
||||
|
||||
cudaError_t bn254_poseidon_delete_cuda(PoseidonInst* poseidon);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -3,55 +3,85 @@ package poseidon
|
||||
// #cgo CFLAGS: -I./include/
|
||||
// #include "poseidon.h"
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ingonyama-zk/icicle/v2/wrappers/golang/core"
|
||||
cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime"
|
||||
bn254 "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254"
|
||||
)
|
||||
|
||||
func GetDefaultPoseidonConfig() core.PoseidonConfig {
|
||||
return core.GetDefaultPoseidonConfig()
|
||||
type PoseidonHandler = C.struct_PoseidonInst
|
||||
type Poseidon struct {
|
||||
width uint32
|
||||
handle *PoseidonHandler
|
||||
}
|
||||
|
||||
func PoseidonHash[T any](scalars, results core.HostOrDeviceSlice, numberOfStates int, cfg *core.PoseidonConfig, constants *core.PoseidonConstants[T]) core.IcicleError {
|
||||
scalarsPointer, resultsPointer, cfgPointer := core.PoseidonCheck(scalars, results, cfg, constants, numberOfStates)
|
||||
func Create(arity uint32, alpha uint32, fullRoundsHalf uint32, partialRounds uint32, scalars core.HostOrDeviceSlice, mdsMatrix core.HostOrDeviceSlice, nonSparseMatrix core.HostOrDeviceSlice, sparseMatrices core.HostOrDeviceSlice, domainTag bn254.ScalarField, ctx *cr.DeviceContext) (*Poseidon, core.IcicleError) {
|
||||
var poseidon *PoseidonHandler
|
||||
cArity := (C.uint)(arity)
|
||||
cAlpha := (C.uint)(alpha)
|
||||
cFullRoundsHalf := (C.uint)(fullRoundsHalf)
|
||||
cPartialRounds := (C.uint)(partialRounds)
|
||||
cScalars := (*C.scalar_t)(scalars.AsUnsafePointer())
|
||||
cMdsMatrix := (*C.scalar_t)(mdsMatrix.AsUnsafePointer())
|
||||
cNonSparseMatrix := (*C.scalar_t)(nonSparseMatrix.AsUnsafePointer())
|
||||
cSparseMatrices := (*C.scalar_t)(sparseMatrices.AsUnsafePointer())
|
||||
cDomainTag := (*C.scalar_t)(unsafe.Pointer(&domainTag))
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(ctx))
|
||||
__ret := C.bn254_poseidon_create_cuda(&poseidon, cArity, cAlpha, cFullRoundsHalf, cPartialRounds, cScalars, cMdsMatrix, cNonSparseMatrix, cSparseMatrices, cDomainTag, cCtx)
|
||||
err := core.FromCudaError((cr.CudaError)(__ret))
|
||||
if err.IcicleErrorCode != core.IcicleSuccess {
|
||||
return nil, err
|
||||
}
|
||||
p := Poseidon{handle: poseidon, width: arity + 1}
|
||||
runtime.SetFinalizer(&p, func(p *Poseidon) {
|
||||
p.Delete()
|
||||
})
|
||||
return &p, err
|
||||
}
|
||||
|
||||
cScalars := (*C.scalar_t)(scalarsPointer)
|
||||
cResults := (*C.scalar_t)(resultsPointer)
|
||||
cNumberOfStates := (C.int)(numberOfStates)
|
||||
cArity := (C.int)(constants.Arity)
|
||||
cConstants := (*C.PoseidonConstants)(unsafe.Pointer(constants))
|
||||
cCfg := (*C.PoseidonConfig)(cfgPointer)
|
||||
func Load(arity uint32, ctx *cr.DeviceContext) (*Poseidon, core.IcicleError) {
|
||||
var poseidon *PoseidonHandler
|
||||
cArity := (C.uint)(arity)
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(ctx))
|
||||
__ret := C.bn254_poseidon_load_cuda(&poseidon, cArity, cCtx)
|
||||
err := core.FromCudaError((cr.CudaError)(__ret))
|
||||
if err.IcicleErrorCode != core.IcicleSuccess {
|
||||
return nil, err
|
||||
}
|
||||
p := Poseidon{handle: poseidon, width: arity + 1}
|
||||
runtime.SetFinalizer(&p, func(p *Poseidon) {
|
||||
p.Delete()
|
||||
})
|
||||
return &p, err
|
||||
}
|
||||
|
||||
__ret := C.bn254_poseidon_hash_cuda(cScalars, cResults, cNumberOfStates, cArity, cConstants, cCfg)
|
||||
func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.HostOrDeviceSlice, numberOfStates uint32, inputBlockLen uint32, outputLen uint32, cfg *core.SpongeConfig) core.IcicleError {
|
||||
core.SpongeInputCheck(inputs, numberOfStates, inputBlockLen, cfg.InputRate, &cfg.Ctx)
|
||||
core.SpongeOutputsCheck(output, numberOfStates, outputLen, poseidon.width, false, &cfg.Ctx)
|
||||
|
||||
cInputs := (*C.scalar_t)(inputs.AsUnsafePointer())
|
||||
cOutput := (*C.scalar_t)(output.AsUnsafePointer())
|
||||
cNumberOfStates := (C.uint)(numberOfStates)
|
||||
cInputBlockLen := (C.uint)(inputBlockLen)
|
||||
cOutputLen := (C.uint)(outputLen)
|
||||
cCfg := (*C.SpongeConfig)(unsafe.Pointer(cfg))
|
||||
__ret := C.bn254_poseidon_hash_many_cuda(poseidon.handle, cInputs, cOutput, cNumberOfStates, cInputBlockLen, cOutputLen, cCfg)
|
||||
err := (cr.CudaError)(__ret)
|
||||
return core.FromCudaError(err)
|
||||
}
|
||||
|
||||
func CreateOptimizedPoseidonConstants[T any](arity, fullRoundsHalfs, partialRounds int, constants core.HostOrDeviceSlice, ctx cr.DeviceContext, poseidonConstants *core.PoseidonConstants[T]) core.IcicleError {
|
||||
|
||||
cArity := (C.int)(arity)
|
||||
cFullRoundsHalfs := (C.int)(fullRoundsHalfs)
|
||||
cPartialRounds := (C.int)(partialRounds)
|
||||
cConstants := (*C.scalar_t)(constants.AsUnsafePointer())
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(&ctx))
|
||||
cPoseidonConstants := (*C.PoseidonConstants)(unsafe.Pointer(poseidonConstants))
|
||||
|
||||
__ret := C.bn254_create_optimized_poseidon_constants_cuda(cArity, cFullRoundsHalfs, cPartialRounds, cConstants, cCtx, cPoseidonConstants)
|
||||
func (poseidon *Poseidon) Delete() core.IcicleError {
|
||||
__ret := C.bn254_poseidon_delete_cuda(poseidon.handle)
|
||||
err := (cr.CudaError)(__ret)
|
||||
return core.FromCudaError(err)
|
||||
}
|
||||
|
||||
func InitOptimizedPoseidonConstantsCuda[T any](arity int, ctx cr.DeviceContext, constants *core.PoseidonConstants[T]) core.IcicleError {
|
||||
|
||||
cArity := (C.int)(arity)
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(&ctx))
|
||||
cConstants := (*C.PoseidonConstants)(unsafe.Pointer(constants))
|
||||
|
||||
__ret := C.bn254_init_optimized_poseidon_constants_cuda(cArity, cCtx, cConstants)
|
||||
err := (cr.CudaError)(__ret)
|
||||
return core.FromCudaError(err)
|
||||
func (poseidon *Poseidon) GetDefaultSpongeConfig() core.SpongeConfig {
|
||||
cfg := core.GetDefaultSpongeConfig()
|
||||
cfg.InputRate = poseidon.width - 1
|
||||
cfg.OutputRate = poseidon.width
|
||||
return cfg
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime"
|
||||
bn254 "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254"
|
||||
poseidon "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bn254/poseidon"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestPoseidon(t *testing.T) {
|
||||
@@ -14,14 +15,11 @@ func TestPoseidon(t *testing.T) {
|
||||
arity := 2
|
||||
numberOfStates := 1
|
||||
|
||||
cfg := poseidon.GetDefaultPoseidonConfig()
|
||||
cfg.IsAsync = true
|
||||
stream, _ := cr.CreateStream()
|
||||
cfg.Ctx.Stream = &stream
|
||||
ctx, _ := cr.GetDefaultDeviceContext()
|
||||
p, err := poseidon.Load(uint32(arity), &ctx)
|
||||
assert.Equal(t, core.IcicleSuccess, err.IcicleErrorCode)
|
||||
|
||||
var constants core.PoseidonConstants[bn254.ScalarField]
|
||||
|
||||
poseidon.InitOptimizedPoseidonConstantsCuda(arity, cfg.Ctx, &constants) //generate constants
|
||||
cfg := p.GetDefaultSpongeConfig()
|
||||
|
||||
scalars := bn254.GenerateScalars(numberOfStates * arity)
|
||||
scalars[0] = scalars[0].Zero()
|
||||
@@ -30,13 +28,13 @@ func TestPoseidon(t *testing.T) {
|
||||
scalarsCopy := core.HostSliceFromElements(scalars[:numberOfStates*arity])
|
||||
|
||||
var deviceInput core.DeviceSlice
|
||||
scalarsCopy.CopyToDeviceAsync(&deviceInput, stream, true)
|
||||
scalarsCopy.CopyToDevice(&deviceInput, true)
|
||||
var deviceOutput core.DeviceSlice
|
||||
deviceOutput.MallocAsync(numberOfStates*scalarsCopy.SizeOfElement(), scalarsCopy.SizeOfElement(), stream)
|
||||
deviceOutput.Malloc(numberOfStates*scalarsCopy.SizeOfElement(), scalarsCopy.SizeOfElement())
|
||||
|
||||
poseidon.PoseidonHash(deviceInput, deviceOutput, numberOfStates, &cfg, &constants) //run Hash function
|
||||
err = p.HashMany(deviceInput, deviceOutput, uint32(numberOfStates), 1, 1, &cfg) //run Hash function
|
||||
assert.Equal(t, core.IcicleSuccess, err.IcicleErrorCode)
|
||||
|
||||
output := make(core.HostSlice[bn254.ScalarField], numberOfStates)
|
||||
output.CopyFromDeviceAsync(&deviceOutput, stream)
|
||||
|
||||
output.CopyFromDevice(&deviceOutput)
|
||||
}
|
||||
|
||||
@@ -9,14 +9,40 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct scalar_t scalar_t;
|
||||
typedef struct PoseidonConfig PoseidonConfig;
|
||||
typedef struct DeviceContext DeviceContext;
|
||||
typedef struct PoseidonConstants PoseidonConstants;
|
||||
typedef struct TreeBuilderConfig TreeBuilderConfig;
|
||||
typedef struct PoseidonInst PoseidonInst;
|
||||
typedef struct SpongeConfig SpongeConfig;
|
||||
|
||||
|
||||
cudaError_t bw6_761_poseidon_hash_cuda(const scalar_t* input, scalar_t* output, int number_of_states, int arity, PoseidonConstants* constants, PoseidonConfig* config);
|
||||
cudaError_t bw6_761_create_optimized_poseidon_constants_cuda(int arity, int full_rounds_halfs, int partial_rounds, const scalar_t* constants, DeviceContext* ctx, PoseidonConstants* poseidon_constants);
|
||||
cudaError_t bw6_761_init_optimized_poseidon_constants_cuda(int arity, DeviceContext* ctx, PoseidonConstants* constants);
|
||||
cudaError_t bw6_761_poseidon_create_cuda(
|
||||
PoseidonInst** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const scalar_t* round_constants,
|
||||
const scalar_t* mds_matrix,
|
||||
const scalar_t* non_sparse_matrix,
|
||||
const scalar_t* sparse_matrices,
|
||||
const scalar_t* domain_tag,
|
||||
DeviceContext* ctx);
|
||||
|
||||
cudaError_t bw6_761_poseidon_load_cuda(
|
||||
PoseidonInst** poseidon,
|
||||
unsigned int arity,
|
||||
DeviceContext* ctx);
|
||||
|
||||
cudaError_t bw6_761_poseidon_hash_many_cuda(
|
||||
const PoseidonInst* poseidon,
|
||||
const scalar_t* inputs,
|
||||
scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
SpongeConfig* cfg);
|
||||
|
||||
cudaError_t bw6_761_poseidon_delete_cuda(PoseidonInst* poseidon);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -3,55 +3,85 @@ package poseidon
|
||||
// #cgo CFLAGS: -I./include/
|
||||
// #include "poseidon.h"
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ingonyama-zk/icicle/v2/wrappers/golang/core"
|
||||
cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime"
|
||||
bw6_761 "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bw6761"
|
||||
)
|
||||
|
||||
func GetDefaultPoseidonConfig() core.PoseidonConfig {
|
||||
return core.GetDefaultPoseidonConfig()
|
||||
type PoseidonHandler = C.struct_PoseidonInst
|
||||
type Poseidon struct {
|
||||
width uint32
|
||||
handle *PoseidonHandler
|
||||
}
|
||||
|
||||
func PoseidonHash[T any](scalars, results core.HostOrDeviceSlice, numberOfStates int, cfg *core.PoseidonConfig, constants *core.PoseidonConstants[T]) core.IcicleError {
|
||||
scalarsPointer, resultsPointer, cfgPointer := core.PoseidonCheck(scalars, results, cfg, constants, numberOfStates)
|
||||
func Create(arity uint32, alpha uint32, fullRoundsHalf uint32, partialRounds uint32, scalars core.HostOrDeviceSlice, mdsMatrix core.HostOrDeviceSlice, nonSparseMatrix core.HostOrDeviceSlice, sparseMatrices core.HostOrDeviceSlice, domainTag bw6_761.ScalarField, ctx *cr.DeviceContext) (*Poseidon, core.IcicleError) {
|
||||
var poseidon *PoseidonHandler
|
||||
cArity := (C.uint)(arity)
|
||||
cAlpha := (C.uint)(alpha)
|
||||
cFullRoundsHalf := (C.uint)(fullRoundsHalf)
|
||||
cPartialRounds := (C.uint)(partialRounds)
|
||||
cScalars := (*C.scalar_t)(scalars.AsUnsafePointer())
|
||||
cMdsMatrix := (*C.scalar_t)(mdsMatrix.AsUnsafePointer())
|
||||
cNonSparseMatrix := (*C.scalar_t)(nonSparseMatrix.AsUnsafePointer())
|
||||
cSparseMatrices := (*C.scalar_t)(sparseMatrices.AsUnsafePointer())
|
||||
cDomainTag := (*C.scalar_t)(unsafe.Pointer(&domainTag))
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(ctx))
|
||||
__ret := C.bw6_761_poseidon_create_cuda(&poseidon, cArity, cAlpha, cFullRoundsHalf, cPartialRounds, cScalars, cMdsMatrix, cNonSparseMatrix, cSparseMatrices, cDomainTag, cCtx)
|
||||
err := core.FromCudaError((cr.CudaError)(__ret))
|
||||
if err.IcicleErrorCode != core.IcicleSuccess {
|
||||
return nil, err
|
||||
}
|
||||
p := Poseidon{handle: poseidon, width: arity + 1}
|
||||
runtime.SetFinalizer(&p, func(p *Poseidon) {
|
||||
p.Delete()
|
||||
})
|
||||
return &p, err
|
||||
}
|
||||
|
||||
cScalars := (*C.scalar_t)(scalarsPointer)
|
||||
cResults := (*C.scalar_t)(resultsPointer)
|
||||
cNumberOfStates := (C.int)(numberOfStates)
|
||||
cArity := (C.int)(constants.Arity)
|
||||
cConstants := (*C.PoseidonConstants)(unsafe.Pointer(constants))
|
||||
cCfg := (*C.PoseidonConfig)(cfgPointer)
|
||||
func Load(arity uint32, ctx *cr.DeviceContext) (*Poseidon, core.IcicleError) {
|
||||
var poseidon *PoseidonHandler
|
||||
cArity := (C.uint)(arity)
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(ctx))
|
||||
__ret := C.bw6_761_poseidon_load_cuda(&poseidon, cArity, cCtx)
|
||||
err := core.FromCudaError((cr.CudaError)(__ret))
|
||||
if err.IcicleErrorCode != core.IcicleSuccess {
|
||||
return nil, err
|
||||
}
|
||||
p := Poseidon{handle: poseidon, width: arity + 1}
|
||||
runtime.SetFinalizer(&p, func(p *Poseidon) {
|
||||
p.Delete()
|
||||
})
|
||||
return &p, err
|
||||
}
|
||||
|
||||
__ret := C.bw6_761_poseidon_hash_cuda(cScalars, cResults, cNumberOfStates, cArity, cConstants, cCfg)
|
||||
func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.HostOrDeviceSlice, numberOfStates uint32, inputBlockLen uint32, outputLen uint32, cfg *core.SpongeConfig) core.IcicleError {
|
||||
core.SpongeInputCheck(inputs, numberOfStates, inputBlockLen, cfg.InputRate, &cfg.Ctx)
|
||||
core.SpongeOutputsCheck(output, numberOfStates, outputLen, poseidon.width, false, &cfg.Ctx)
|
||||
|
||||
cInputs := (*C.scalar_t)(inputs.AsUnsafePointer())
|
||||
cOutput := (*C.scalar_t)(output.AsUnsafePointer())
|
||||
cNumberOfStates := (C.uint)(numberOfStates)
|
||||
cInputBlockLen := (C.uint)(inputBlockLen)
|
||||
cOutputLen := (C.uint)(outputLen)
|
||||
cCfg := (*C.SpongeConfig)(unsafe.Pointer(cfg))
|
||||
__ret := C.bw6_761_poseidon_hash_many_cuda(poseidon.handle, cInputs, cOutput, cNumberOfStates, cInputBlockLen, cOutputLen, cCfg)
|
||||
err := (cr.CudaError)(__ret)
|
||||
return core.FromCudaError(err)
|
||||
}
|
||||
|
||||
func CreateOptimizedPoseidonConstants[T any](arity, fullRoundsHalfs, partialRounds int, constants core.HostOrDeviceSlice, ctx cr.DeviceContext, poseidonConstants *core.PoseidonConstants[T]) core.IcicleError {
|
||||
|
||||
cArity := (C.int)(arity)
|
||||
cFullRoundsHalfs := (C.int)(fullRoundsHalfs)
|
||||
cPartialRounds := (C.int)(partialRounds)
|
||||
cConstants := (*C.scalar_t)(constants.AsUnsafePointer())
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(&ctx))
|
||||
cPoseidonConstants := (*C.PoseidonConstants)(unsafe.Pointer(poseidonConstants))
|
||||
|
||||
__ret := C.bw6_761_create_optimized_poseidon_constants_cuda(cArity, cFullRoundsHalfs, cPartialRounds, cConstants, cCtx, cPoseidonConstants)
|
||||
func (poseidon *Poseidon) Delete() core.IcicleError {
|
||||
__ret := C.bw6_761_poseidon_delete_cuda(poseidon.handle)
|
||||
err := (cr.CudaError)(__ret)
|
||||
return core.FromCudaError(err)
|
||||
}
|
||||
|
||||
func InitOptimizedPoseidonConstantsCuda[T any](arity int, ctx cr.DeviceContext, constants *core.PoseidonConstants[T]) core.IcicleError {
|
||||
|
||||
cArity := (C.int)(arity)
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(&ctx))
|
||||
cConstants := (*C.PoseidonConstants)(unsafe.Pointer(constants))
|
||||
|
||||
__ret := C.bw6_761_init_optimized_poseidon_constants_cuda(cArity, cCtx, cConstants)
|
||||
err := (cr.CudaError)(__ret)
|
||||
return core.FromCudaError(err)
|
||||
func (poseidon *Poseidon) GetDefaultSpongeConfig() core.SpongeConfig {
|
||||
cfg := core.GetDefaultSpongeConfig()
|
||||
cfg.InputRate = poseidon.width - 1
|
||||
cfg.OutputRate = poseidon.width
|
||||
return cfg
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime"
|
||||
bw6_761 "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bw6761"
|
||||
poseidon "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/bw6761/poseidon"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestPoseidon(t *testing.T) {
|
||||
@@ -14,14 +15,11 @@ func TestPoseidon(t *testing.T) {
|
||||
arity := 2
|
||||
numberOfStates := 1
|
||||
|
||||
cfg := poseidon.GetDefaultPoseidonConfig()
|
||||
cfg.IsAsync = true
|
||||
stream, _ := cr.CreateStream()
|
||||
cfg.Ctx.Stream = &stream
|
||||
ctx, _ := cr.GetDefaultDeviceContext()
|
||||
p, err := poseidon.Load(uint32(arity), &ctx)
|
||||
assert.Equal(t, core.IcicleSuccess, err.IcicleErrorCode)
|
||||
|
||||
var constants core.PoseidonConstants[bw6_761.ScalarField]
|
||||
|
||||
poseidon.InitOptimizedPoseidonConstantsCuda(arity, cfg.Ctx, &constants) //generate constants
|
||||
cfg := p.GetDefaultSpongeConfig()
|
||||
|
||||
scalars := bw6_761.GenerateScalars(numberOfStates * arity)
|
||||
scalars[0] = scalars[0].Zero()
|
||||
@@ -30,13 +28,13 @@ func TestPoseidon(t *testing.T) {
|
||||
scalarsCopy := core.HostSliceFromElements(scalars[:numberOfStates*arity])
|
||||
|
||||
var deviceInput core.DeviceSlice
|
||||
scalarsCopy.CopyToDeviceAsync(&deviceInput, stream, true)
|
||||
scalarsCopy.CopyToDevice(&deviceInput, true)
|
||||
var deviceOutput core.DeviceSlice
|
||||
deviceOutput.MallocAsync(numberOfStates*scalarsCopy.SizeOfElement(), scalarsCopy.SizeOfElement(), stream)
|
||||
deviceOutput.Malloc(numberOfStates*scalarsCopy.SizeOfElement(), scalarsCopy.SizeOfElement())
|
||||
|
||||
poseidon.PoseidonHash(deviceInput, deviceOutput, numberOfStates, &cfg, &constants) //run Hash function
|
||||
err = p.HashMany(deviceInput, deviceOutput, uint32(numberOfStates), 1, 1, &cfg) //run Hash function
|
||||
assert.Equal(t, core.IcicleSuccess, err.IcicleErrorCode)
|
||||
|
||||
output := make(core.HostSlice[bw6_761.ScalarField], numberOfStates)
|
||||
output.CopyFromDeviceAsync(&deviceOutput, stream)
|
||||
|
||||
output.CopyFromDevice(&deviceOutput)
|
||||
}
|
||||
|
||||
@@ -9,14 +9,40 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct scalar_t scalar_t;
|
||||
typedef struct PoseidonConfig PoseidonConfig;
|
||||
typedef struct DeviceContext DeviceContext;
|
||||
typedef struct PoseidonConstants PoseidonConstants;
|
||||
typedef struct TreeBuilderConfig TreeBuilderConfig;
|
||||
typedef struct PoseidonInst PoseidonInst;
|
||||
typedef struct SpongeConfig SpongeConfig;
|
||||
|
||||
|
||||
cudaError_t grumpkin_poseidon_hash_cuda(const scalar_t* input, scalar_t* output, int number_of_states, int arity, PoseidonConstants* constants, PoseidonConfig* config);
|
||||
cudaError_t grumpkin_create_optimized_poseidon_constants_cuda(int arity, int full_rounds_halfs, int partial_rounds, const scalar_t* constants, DeviceContext* ctx, PoseidonConstants* poseidon_constants);
|
||||
cudaError_t grumpkin_init_optimized_poseidon_constants_cuda(int arity, DeviceContext* ctx, PoseidonConstants* constants);
|
||||
cudaError_t grumpkin_poseidon_create_cuda(
|
||||
PoseidonInst** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const scalar_t* round_constants,
|
||||
const scalar_t* mds_matrix,
|
||||
const scalar_t* non_sparse_matrix,
|
||||
const scalar_t* sparse_matrices,
|
||||
const scalar_t* domain_tag,
|
||||
DeviceContext* ctx);
|
||||
|
||||
cudaError_t grumpkin_poseidon_load_cuda(
|
||||
PoseidonInst** poseidon,
|
||||
unsigned int arity,
|
||||
DeviceContext* ctx);
|
||||
|
||||
cudaError_t grumpkin_poseidon_hash_many_cuda(
|
||||
const PoseidonInst* poseidon,
|
||||
const scalar_t* inputs,
|
||||
scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
SpongeConfig* cfg);
|
||||
|
||||
cudaError_t grumpkin_poseidon_delete_cuda(PoseidonInst* poseidon);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -3,55 +3,85 @@ package poseidon
|
||||
// #cgo CFLAGS: -I./include/
|
||||
// #include "poseidon.h"
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ingonyama-zk/icicle/v2/wrappers/golang/core"
|
||||
cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime"
|
||||
grumpkin "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/grumpkin"
|
||||
)
|
||||
|
||||
func GetDefaultPoseidonConfig() core.PoseidonConfig {
|
||||
return core.GetDefaultPoseidonConfig()
|
||||
type PoseidonHandler = C.struct_PoseidonInst
|
||||
type Poseidon struct {
|
||||
width uint32
|
||||
handle *PoseidonHandler
|
||||
}
|
||||
|
||||
func PoseidonHash[T any](scalars, results core.HostOrDeviceSlice, numberOfStates int, cfg *core.PoseidonConfig, constants *core.PoseidonConstants[T]) core.IcicleError {
|
||||
scalarsPointer, resultsPointer, cfgPointer := core.PoseidonCheck(scalars, results, cfg, constants, numberOfStates)
|
||||
func Create(arity uint32, alpha uint32, fullRoundsHalf uint32, partialRounds uint32, scalars core.HostOrDeviceSlice, mdsMatrix core.HostOrDeviceSlice, nonSparseMatrix core.HostOrDeviceSlice, sparseMatrices core.HostOrDeviceSlice, domainTag grumpkin.ScalarField, ctx *cr.DeviceContext) (*Poseidon, core.IcicleError) {
|
||||
var poseidon *PoseidonHandler
|
||||
cArity := (C.uint)(arity)
|
||||
cAlpha := (C.uint)(alpha)
|
||||
cFullRoundsHalf := (C.uint)(fullRoundsHalf)
|
||||
cPartialRounds := (C.uint)(partialRounds)
|
||||
cScalars := (*C.scalar_t)(scalars.AsUnsafePointer())
|
||||
cMdsMatrix := (*C.scalar_t)(mdsMatrix.AsUnsafePointer())
|
||||
cNonSparseMatrix := (*C.scalar_t)(nonSparseMatrix.AsUnsafePointer())
|
||||
cSparseMatrices := (*C.scalar_t)(sparseMatrices.AsUnsafePointer())
|
||||
cDomainTag := (*C.scalar_t)(unsafe.Pointer(&domainTag))
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(ctx))
|
||||
__ret := C.grumpkin_poseidon_create_cuda(&poseidon, cArity, cAlpha, cFullRoundsHalf, cPartialRounds, cScalars, cMdsMatrix, cNonSparseMatrix, cSparseMatrices, cDomainTag, cCtx)
|
||||
err := core.FromCudaError((cr.CudaError)(__ret))
|
||||
if err.IcicleErrorCode != core.IcicleSuccess {
|
||||
return nil, err
|
||||
}
|
||||
p := Poseidon{handle: poseidon, width: arity + 1}
|
||||
runtime.SetFinalizer(&p, func(p *Poseidon) {
|
||||
p.Delete()
|
||||
})
|
||||
return &p, err
|
||||
}
|
||||
|
||||
cScalars := (*C.scalar_t)(scalarsPointer)
|
||||
cResults := (*C.scalar_t)(resultsPointer)
|
||||
cNumberOfStates := (C.int)(numberOfStates)
|
||||
cArity := (C.int)(constants.Arity)
|
||||
cConstants := (*C.PoseidonConstants)(unsafe.Pointer(constants))
|
||||
cCfg := (*C.PoseidonConfig)(cfgPointer)
|
||||
func Load(arity uint32, ctx *cr.DeviceContext) (*Poseidon, core.IcicleError) {
|
||||
var poseidon *PoseidonHandler
|
||||
cArity := (C.uint)(arity)
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(ctx))
|
||||
__ret := C.grumpkin_poseidon_load_cuda(&poseidon, cArity, cCtx)
|
||||
err := core.FromCudaError((cr.CudaError)(__ret))
|
||||
if err.IcicleErrorCode != core.IcicleSuccess {
|
||||
return nil, err
|
||||
}
|
||||
p := Poseidon{handle: poseidon, width: arity + 1}
|
||||
runtime.SetFinalizer(&p, func(p *Poseidon) {
|
||||
p.Delete()
|
||||
})
|
||||
return &p, err
|
||||
}
|
||||
|
||||
__ret := C.grumpkin_poseidon_hash_cuda(cScalars, cResults, cNumberOfStates, cArity, cConstants, cCfg)
|
||||
func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.HostOrDeviceSlice, numberOfStates uint32, inputBlockLen uint32, outputLen uint32, cfg *core.SpongeConfig) core.IcicleError {
|
||||
core.SpongeInputCheck(inputs, numberOfStates, inputBlockLen, cfg.InputRate, &cfg.Ctx)
|
||||
core.SpongeOutputsCheck(output, numberOfStates, outputLen, poseidon.width, false, &cfg.Ctx)
|
||||
|
||||
cInputs := (*C.scalar_t)(inputs.AsUnsafePointer())
|
||||
cOutput := (*C.scalar_t)(output.AsUnsafePointer())
|
||||
cNumberOfStates := (C.uint)(numberOfStates)
|
||||
cInputBlockLen := (C.uint)(inputBlockLen)
|
||||
cOutputLen := (C.uint)(outputLen)
|
||||
cCfg := (*C.SpongeConfig)(unsafe.Pointer(cfg))
|
||||
__ret := C.grumpkin_poseidon_hash_many_cuda(poseidon.handle, cInputs, cOutput, cNumberOfStates, cInputBlockLen, cOutputLen, cCfg)
|
||||
err := (cr.CudaError)(__ret)
|
||||
return core.FromCudaError(err)
|
||||
}
|
||||
|
||||
func CreateOptimizedPoseidonConstants[T any](arity, fullRoundsHalfs, partialRounds int, constants core.HostOrDeviceSlice, ctx cr.DeviceContext, poseidonConstants *core.PoseidonConstants[T]) core.IcicleError {
|
||||
|
||||
cArity := (C.int)(arity)
|
||||
cFullRoundsHalfs := (C.int)(fullRoundsHalfs)
|
||||
cPartialRounds := (C.int)(partialRounds)
|
||||
cConstants := (*C.scalar_t)(constants.AsUnsafePointer())
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(&ctx))
|
||||
cPoseidonConstants := (*C.PoseidonConstants)(unsafe.Pointer(poseidonConstants))
|
||||
|
||||
__ret := C.grumpkin_create_optimized_poseidon_constants_cuda(cArity, cFullRoundsHalfs, cPartialRounds, cConstants, cCtx, cPoseidonConstants)
|
||||
func (poseidon *Poseidon) Delete() core.IcicleError {
|
||||
__ret := C.grumpkin_poseidon_delete_cuda(poseidon.handle)
|
||||
err := (cr.CudaError)(__ret)
|
||||
return core.FromCudaError(err)
|
||||
}
|
||||
|
||||
func InitOptimizedPoseidonConstantsCuda[T any](arity int, ctx cr.DeviceContext, constants *core.PoseidonConstants[T]) core.IcicleError {
|
||||
|
||||
cArity := (C.int)(arity)
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(&ctx))
|
||||
cConstants := (*C.PoseidonConstants)(unsafe.Pointer(constants))
|
||||
|
||||
__ret := C.grumpkin_init_optimized_poseidon_constants_cuda(cArity, cCtx, cConstants)
|
||||
err := (cr.CudaError)(__ret)
|
||||
return core.FromCudaError(err)
|
||||
func (poseidon *Poseidon) GetDefaultSpongeConfig() core.SpongeConfig {
|
||||
cfg := core.GetDefaultSpongeConfig()
|
||||
cfg.InputRate = poseidon.width - 1
|
||||
cfg.OutputRate = poseidon.width
|
||||
return cfg
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime"
|
||||
grumpkin "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/grumpkin"
|
||||
poseidon "github.com/ingonyama-zk/icicle/v2/wrappers/golang/curves/grumpkin/poseidon"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestPoseidon(t *testing.T) {
|
||||
@@ -14,14 +15,11 @@ func TestPoseidon(t *testing.T) {
|
||||
arity := 2
|
||||
numberOfStates := 1
|
||||
|
||||
cfg := poseidon.GetDefaultPoseidonConfig()
|
||||
cfg.IsAsync = true
|
||||
stream, _ := cr.CreateStream()
|
||||
cfg.Ctx.Stream = &stream
|
||||
ctx, _ := cr.GetDefaultDeviceContext()
|
||||
p, err := poseidon.Load(uint32(arity), &ctx)
|
||||
assert.Equal(t, core.IcicleSuccess, err.IcicleErrorCode)
|
||||
|
||||
var constants core.PoseidonConstants[grumpkin.ScalarField]
|
||||
|
||||
poseidon.InitOptimizedPoseidonConstantsCuda(arity, cfg.Ctx, &constants) //generate constants
|
||||
cfg := p.GetDefaultSpongeConfig()
|
||||
|
||||
scalars := grumpkin.GenerateScalars(numberOfStates * arity)
|
||||
scalars[0] = scalars[0].Zero()
|
||||
@@ -30,13 +28,13 @@ func TestPoseidon(t *testing.T) {
|
||||
scalarsCopy := core.HostSliceFromElements(scalars[:numberOfStates*arity])
|
||||
|
||||
var deviceInput core.DeviceSlice
|
||||
scalarsCopy.CopyToDeviceAsync(&deviceInput, stream, true)
|
||||
scalarsCopy.CopyToDevice(&deviceInput, true)
|
||||
var deviceOutput core.DeviceSlice
|
||||
deviceOutput.MallocAsync(numberOfStates*scalarsCopy.SizeOfElement(), scalarsCopy.SizeOfElement(), stream)
|
||||
deviceOutput.Malloc(numberOfStates*scalarsCopy.SizeOfElement(), scalarsCopy.SizeOfElement())
|
||||
|
||||
poseidon.PoseidonHash(deviceInput, deviceOutput, numberOfStates, &cfg, &constants) //run Hash function
|
||||
err = p.HashMany(deviceInput, deviceOutput, uint32(numberOfStates), 1, 1, &cfg) //run Hash function
|
||||
assert.Equal(t, core.IcicleSuccess, err.IcicleErrorCode)
|
||||
|
||||
output := make(core.HostSlice[grumpkin.ScalarField], numberOfStates)
|
||||
output.CopyFromDeviceAsync(&deviceOutput, stream)
|
||||
|
||||
output.CopyFromDevice(&deviceOutput)
|
||||
}
|
||||
|
||||
@@ -3,55 +3,85 @@ package {{.PackageName}}
|
||||
// #cgo CFLAGS: -I./include/
|
||||
// #include "poseidon.h"
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ingonyama-zk/icicle/v2/wrappers/golang/core"
|
||||
cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime"
|
||||
{{.Field}} "github.com/ingonyama-zk/icicle/v2/wrappers/golang/{{.BaseImportPath}}"
|
||||
)
|
||||
|
||||
func GetDefaultPoseidonConfig() core.PoseidonConfig {
|
||||
return core.GetDefaultPoseidonConfig()
|
||||
type PoseidonHandler = C.struct_PoseidonInst
|
||||
type Poseidon struct {
|
||||
width uint32
|
||||
handle *PoseidonHandler
|
||||
}
|
||||
|
||||
func PoseidonHash[T any](scalars, results core.HostOrDeviceSlice, numberOfStates int, cfg *core.PoseidonConfig, constants *core.PoseidonConstants[T]) core.IcicleError {
|
||||
scalarsPointer, resultsPointer, cfgPointer := core.PoseidonCheck(scalars, results, cfg, constants, numberOfStates)
|
||||
func Create(arity uint32, alpha uint32, fullRoundsHalf uint32, partialRounds uint32, scalars core.HostOrDeviceSlice, mdsMatrix core.HostOrDeviceSlice, nonSparseMatrix core.HostOrDeviceSlice, sparseMatrices core.HostOrDeviceSlice, domainTag {{.Field}}.ScalarField, ctx *cr.DeviceContext) (*Poseidon, core.IcicleError) {
|
||||
var poseidon *PoseidonHandler
|
||||
cArity := (C.uint)(arity)
|
||||
cAlpha := (C.uint)(alpha)
|
||||
cFullRoundsHalf := (C.uint)(fullRoundsHalf)
|
||||
cPartialRounds := (C.uint)(partialRounds)
|
||||
cScalars := (*C.scalar_t)(scalars.AsUnsafePointer())
|
||||
cMdsMatrix := (*C.scalar_t)(mdsMatrix.AsUnsafePointer())
|
||||
cNonSparseMatrix := (*C.scalar_t)(nonSparseMatrix.AsUnsafePointer())
|
||||
cSparseMatrices := (*C.scalar_t)(sparseMatrices.AsUnsafePointer())
|
||||
cDomainTag := (*C.scalar_t)(unsafe.Pointer(&domainTag))
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(ctx))
|
||||
__ret := C.{{.Field}}_poseidon_create_cuda(&poseidon, cArity, cAlpha, cFullRoundsHalf, cPartialRounds, cScalars, cMdsMatrix, cNonSparseMatrix, cSparseMatrices, cDomainTag, cCtx)
|
||||
err := core.FromCudaError((cr.CudaError)(__ret))
|
||||
if err.IcicleErrorCode != core.IcicleSuccess {
|
||||
return nil, err
|
||||
}
|
||||
p := Poseidon{handle: poseidon, width: arity + 1}
|
||||
runtime.SetFinalizer(&p, func(p *Poseidon) {
|
||||
p.Delete()
|
||||
})
|
||||
return &p, err
|
||||
}
|
||||
|
||||
cScalars := (*C.scalar_t)(scalarsPointer)
|
||||
cResults := (*C.scalar_t)(resultsPointer)
|
||||
cNumberOfStates := (C.int)(numberOfStates)
|
||||
cArity := (C.int)(constants.Arity)
|
||||
cConstants := (*C.PoseidonConstants)(unsafe.Pointer(constants))
|
||||
cCfg := (*C.PoseidonConfig)(cfgPointer)
|
||||
func Load(arity uint32, ctx *cr.DeviceContext) (*Poseidon, core.IcicleError) {
|
||||
var poseidon *PoseidonHandler
|
||||
cArity := (C.uint)(arity)
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(ctx))
|
||||
__ret := C.{{.Field}}_poseidon_load_cuda(&poseidon, cArity, cCtx)
|
||||
err := core.FromCudaError((cr.CudaError)(__ret))
|
||||
if err.IcicleErrorCode != core.IcicleSuccess {
|
||||
return nil, err
|
||||
}
|
||||
p := Poseidon{handle: poseidon, width: arity + 1}
|
||||
runtime.SetFinalizer(&p, func(p *Poseidon) {
|
||||
p.Delete()
|
||||
})
|
||||
return &p, err
|
||||
}
|
||||
|
||||
__ret := C.{{.Field}}_poseidon_hash_cuda(cScalars, cResults, cNumberOfStates, cArity, cConstants, cCfg)
|
||||
func (poseidon *Poseidon) HashMany(inputs core.HostOrDeviceSlice, output core.HostOrDeviceSlice, numberOfStates uint32, inputBlockLen uint32, outputLen uint32, cfg *core.SpongeConfig) core.IcicleError {
|
||||
core.SpongeInputCheck(inputs, numberOfStates, inputBlockLen, cfg.InputRate, &cfg.Ctx)
|
||||
core.SpongeOutputsCheck(output, numberOfStates, outputLen, poseidon.width, false, &cfg.Ctx)
|
||||
|
||||
cInputs := (*C.scalar_t)(inputs.AsUnsafePointer())
|
||||
cOutput := (*C.scalar_t)(output.AsUnsafePointer())
|
||||
cNumberOfStates := (C.uint)(numberOfStates)
|
||||
cInputBlockLen := (C.uint)(inputBlockLen)
|
||||
cOutputLen := (C.uint)(outputLen)
|
||||
cCfg := (*C.SpongeConfig)(unsafe.Pointer(cfg))
|
||||
__ret := C.{{.Field}}_poseidon_hash_many_cuda(poseidon.handle, cInputs, cOutput, cNumberOfStates, cInputBlockLen, cOutputLen, cCfg)
|
||||
err := (cr.CudaError)(__ret)
|
||||
return core.FromCudaError(err)
|
||||
}
|
||||
|
||||
func CreateOptimizedPoseidonConstants[T any](arity, fullRoundsHalfs, partialRounds int, constants core.HostOrDeviceSlice, ctx cr.DeviceContext, poseidonConstants *core.PoseidonConstants[T]) core.IcicleError {
|
||||
|
||||
cArity := (C.int)(arity)
|
||||
cFullRoundsHalfs := (C.int)(fullRoundsHalfs)
|
||||
cPartialRounds := (C.int)(partialRounds)
|
||||
cConstants := (*C.scalar_t)(constants.AsUnsafePointer())
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(&ctx))
|
||||
cPoseidonConstants := (*C.PoseidonConstants)(unsafe.Pointer(poseidonConstants))
|
||||
|
||||
__ret := C.{{.Field}}_create_optimized_poseidon_constants_cuda(cArity, cFullRoundsHalfs, cPartialRounds, cConstants, cCtx, cPoseidonConstants)
|
||||
func (poseidon *Poseidon) Delete() core.IcicleError {
|
||||
__ret := C.{{.Field}}_poseidon_delete_cuda(poseidon.handle)
|
||||
err := (cr.CudaError)(__ret)
|
||||
return core.FromCudaError(err)
|
||||
}
|
||||
|
||||
func InitOptimizedPoseidonConstantsCuda[T any](arity int, ctx cr.DeviceContext, constants *core.PoseidonConstants[T]) core.IcicleError {
|
||||
|
||||
cArity := (C.int)(arity)
|
||||
cCtx := (*C.DeviceContext)(unsafe.Pointer(&ctx))
|
||||
cConstants := (*C.PoseidonConstants)(unsafe.Pointer(constants))
|
||||
|
||||
__ret := C.{{.Field}}_init_optimized_poseidon_constants_cuda(cArity, cCtx, cConstants)
|
||||
err := (cr.CudaError)(__ret)
|
||||
return core.FromCudaError(err)
|
||||
func (poseidon *Poseidon) GetDefaultSpongeConfig() core.SpongeConfig {
|
||||
cfg := core.GetDefaultSpongeConfig()
|
||||
cfg.InputRate = poseidon.width - 1
|
||||
cfg.OutputRate = poseidon.width
|
||||
return cfg
|
||||
}
|
||||
|
||||
@@ -9,14 +9,40 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct scalar_t scalar_t;
|
||||
typedef struct PoseidonConfig PoseidonConfig;
|
||||
typedef struct DeviceContext DeviceContext;
|
||||
typedef struct PoseidonConstants PoseidonConstants;
|
||||
typedef struct TreeBuilderConfig TreeBuilderConfig;
|
||||
typedef struct PoseidonInst PoseidonInst;
|
||||
typedef struct SpongeConfig SpongeConfig;
|
||||
|
||||
|
||||
cudaError_t {{.Field}}_poseidon_hash_cuda(const scalar_t* input, scalar_t* output, int number_of_states, int arity, PoseidonConstants* constants, PoseidonConfig* config);
|
||||
cudaError_t {{.Field}}_create_optimized_poseidon_constants_cuda(int arity, int full_rounds_halfs, int partial_rounds, const scalar_t* constants, DeviceContext* ctx, PoseidonConstants* poseidon_constants);
|
||||
cudaError_t {{.Field}}_init_optimized_poseidon_constants_cuda(int arity, DeviceContext* ctx, PoseidonConstants* constants);
|
||||
cudaError_t {{.Field}}_poseidon_create_cuda(
|
||||
PoseidonInst** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const scalar_t* round_constants,
|
||||
const scalar_t* mds_matrix,
|
||||
const scalar_t* non_sparse_matrix,
|
||||
const scalar_t* sparse_matrices,
|
||||
const scalar_t* domain_tag,
|
||||
DeviceContext* ctx);
|
||||
|
||||
cudaError_t {{.Field}}_poseidon_load_cuda(
|
||||
PoseidonInst** poseidon,
|
||||
unsigned int arity,
|
||||
DeviceContext* ctx);
|
||||
|
||||
cudaError_t {{.Field}}_poseidon_hash_many_cuda(
|
||||
const PoseidonInst* poseidon,
|
||||
const scalar_t* inputs,
|
||||
scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
SpongeConfig* cfg);
|
||||
|
||||
cudaError_t {{.Field}}_poseidon_delete_cuda(PoseidonInst* poseidon);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -2,37 +2,24 @@ package tests
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
|
||||
core "github.com/ingonyama-zk/icicle/v2/wrappers/golang/core"
|
||||
cr "github.com/ingonyama-zk/icicle/v2/wrappers/golang/cuda_runtime"
|
||||
{{.Field}} "github.com/ingonyama-zk/icicle/v2/wrappers/golang/{{.BaseImportPath}}"
|
||||
poseidon "github.com/ingonyama-zk/icicle/v2/wrappers/golang/{{.BaseImportPath}}/poseidon"
|
||||
|
||||
{{if eq .Field "bls12_381"}}
|
||||
"fmt"
|
||||
"github.com/stretchr/testify/assert"
|
||||
{{end}}
|
||||
)
|
||||
{{if eq .Field "bls12_381"}}
|
||||
func formatOutput(x {{.Field}}.{{.FieldPrefix}}Field) string {
|
||||
r := x.GetLimbs()
|
||||
return fmt.Sprintf("%08x%08x%08x%08x%08x%08x%08x%08x", r[7], r[6], r[5], r[4], r[3], r[2], r[1], r[0])
|
||||
}
|
||||
{{end}}
|
||||
|
||||
func TestPoseidon(t *testing.T) {
|
||||
|
||||
arity := 2
|
||||
numberOfStates := 1
|
||||
|
||||
cfg := poseidon.GetDefaultPoseidonConfig()
|
||||
cfg.IsAsync = true
|
||||
stream, _ := cr.CreateStream()
|
||||
cfg.Ctx.Stream = &stream
|
||||
ctx, _ := cr.GetDefaultDeviceContext()
|
||||
p, err := poseidon.Load(uint32(arity), &ctx)
|
||||
assert.Equal(t, core.IcicleSuccess, err.IcicleErrorCode)
|
||||
|
||||
var constants core.PoseidonConstants[{{.Field}}.{{.FieldPrefix}}Field]
|
||||
|
||||
poseidon.InitOptimizedPoseidonConstantsCuda(arity, cfg.Ctx, &constants) //generate constants
|
||||
cfg := p.GetDefaultSpongeConfig()
|
||||
|
||||
scalars := {{.Field}}.GenerateScalars(numberOfStates * arity)
|
||||
scalars[0] = scalars[0].Zero()
|
||||
@@ -41,19 +28,13 @@ func TestPoseidon(t *testing.T) {
|
||||
scalarsCopy := core.HostSliceFromElements(scalars[:numberOfStates*arity])
|
||||
|
||||
var deviceInput core.DeviceSlice
|
||||
scalarsCopy.CopyToDeviceAsync(&deviceInput, stream, true)
|
||||
scalarsCopy.CopyToDevice(&deviceInput, true)
|
||||
var deviceOutput core.DeviceSlice
|
||||
deviceOutput.MallocAsync(numberOfStates*scalarsCopy.SizeOfElement(), scalarsCopy.SizeOfElement(), stream)
|
||||
deviceOutput.Malloc(numberOfStates*scalarsCopy.SizeOfElement(), scalarsCopy.SizeOfElement())
|
||||
|
||||
poseidon.PoseidonHash(deviceInput, deviceOutput, numberOfStates, &cfg, &constants) //run Hash function
|
||||
err = p.HashMany(deviceInput, deviceOutput, uint32(numberOfStates), 1, 1, &cfg) //run Hash function
|
||||
assert.Equal(t, core.IcicleSuccess, err.IcicleErrorCode)
|
||||
|
||||
output := make(core.HostSlice[{{.Field}}.{{.FieldPrefix}}Field], numberOfStates)
|
||||
output.CopyFromDeviceAsync(&deviceOutput, stream)
|
||||
|
||||
{{if eq .Field "bls12_381"}}
|
||||
expectedString := "48fe0b1331196f6cdb33a7c6e5af61b76fd388e1ef1d3d418be5147f0e4613d4" //This result is from https://github.com/triplewz/poseidon
|
||||
outputString := formatOutput(output[0])
|
||||
|
||||
assert.Equal(t, outputString, expectedString, "Poseidon hash does not match expected result")
|
||||
{{end}}
|
||||
output := make(core.HostSlice[{{.Field}}.ScalarField], numberOfStates)
|
||||
output.CopyFromDevice(&deviceOutput)
|
||||
}
|
||||
|
||||
136
wrappers/rust/icicle-core/src/hash.rs
Normal file
136
wrappers/rust/icicle-core/src/hash.rs
Normal file
@@ -0,0 +1,136 @@
|
||||
use std::ffi::c_void;
|
||||
|
||||
use icicle_cuda_runtime::{
|
||||
device::check_device,
|
||||
device_context::{DeviceContext, DEFAULT_DEVICE_ID},
|
||||
memory::HostOrDeviceSlice,
|
||||
};
|
||||
|
||||
use crate::ntt::IcicleResult;
|
||||
|
||||
/// Struct that encodes Sponge hash parameters.
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SpongeConfig<'a> {
|
||||
/// Details related to the device such as its id and stream id. See [DeviceContext](@ref device_context::DeviceContext).
|
||||
pub ctx: DeviceContext<'a>,
|
||||
pub(crate) are_inputs_on_device: bool,
|
||||
pub(crate) are_outputs_on_device: bool,
|
||||
pub input_rate: u32,
|
||||
pub output_rate: u32,
|
||||
pub offset: u32,
|
||||
|
||||
/// If true - input should be already aligned for poseidon permutation.
|
||||
/// Aligned format: [0, A, B, 0, C, D, ...] (as you might get by using loop_state)
|
||||
/// not aligned format: [A, B, 0, C, D, 0, ...] (as you might get from cudaMemcpy2D)
|
||||
pub recursive_squeeze: bool,
|
||||
|
||||
/// If true, hash results will also be copied in the input pointer in aligned format
|
||||
pub aligned: bool,
|
||||
/// Whether to run the sponge operations asynchronously. If set to `true`, the functions will be non-blocking and you'd need to synchronize
|
||||
/// it explicitly by running `stream.synchronize()`. If set to false, the functions will block the current CPU thread.
|
||||
pub is_async: bool,
|
||||
}
|
||||
|
||||
impl<'a> Default for SpongeConfig<'a> {
|
||||
fn default() -> Self {
|
||||
Self::default_for_device(DEFAULT_DEVICE_ID)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> SpongeConfig<'a> {
|
||||
pub(crate) fn default_for_device(device_id: usize) -> Self {
|
||||
SpongeConfig {
|
||||
ctx: DeviceContext::default_for_device(device_id),
|
||||
are_inputs_on_device: false,
|
||||
are_outputs_on_device: false,
|
||||
input_rate: 0,
|
||||
output_rate: 0,
|
||||
offset: 0,
|
||||
recursive_squeeze: false,
|
||||
aligned: false,
|
||||
is_async: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait SpongeHash<PreImage, Image> {
|
||||
fn hash_many(
|
||||
&self,
|
||||
inputs: &(impl HostOrDeviceSlice<PreImage> + ?Sized),
|
||||
output: &mut (impl HostOrDeviceSlice<Image> + ?Sized),
|
||||
number_of_states: usize,
|
||||
input_block_len: usize,
|
||||
output_len: usize,
|
||||
cfg: &SpongeConfig,
|
||||
) -> IcicleResult<()>;
|
||||
|
||||
fn default_config<'a>(&self) -> SpongeConfig<'a>;
|
||||
|
||||
fn get_handle(&self) -> *const c_void;
|
||||
}
|
||||
|
||||
pub(crate) fn sponge_check_input<T>(
|
||||
inputs: &(impl HostOrDeviceSlice<T> + ?Sized),
|
||||
number_of_states: usize,
|
||||
input_block_len: usize,
|
||||
input_rate: usize,
|
||||
ctx: &DeviceContext,
|
||||
) {
|
||||
if input_block_len > input_rate {
|
||||
panic!(
|
||||
"input block len ({}) can't be greater than input rate ({})",
|
||||
input_block_len, input_rate
|
||||
);
|
||||
}
|
||||
|
||||
let inputs_size_expected = input_block_len * number_of_states;
|
||||
if inputs.len() < inputs_size_expected {
|
||||
panic!(
|
||||
"inputs len is {}; but needs to be at least {}",
|
||||
inputs.len(),
|
||||
inputs_size_expected,
|
||||
);
|
||||
}
|
||||
|
||||
let ctx_device_id = ctx.device_id;
|
||||
if let Some(device_id) = inputs.device_id() {
|
||||
assert_eq!(
|
||||
device_id, ctx_device_id,
|
||||
"Device ids in inputs and context are different"
|
||||
);
|
||||
}
|
||||
check_device(ctx_device_id);
|
||||
}
|
||||
|
||||
pub(crate) fn sponge_check_outputs<T>(
|
||||
outputs: &(impl HostOrDeviceSlice<T> + ?Sized),
|
||||
number_of_states: usize,
|
||||
output_len: usize,
|
||||
width: usize,
|
||||
recursive: bool,
|
||||
ctx: &DeviceContext,
|
||||
) {
|
||||
let outputs_size_expected = if recursive {
|
||||
width * number_of_states
|
||||
} else {
|
||||
output_len * number_of_states
|
||||
};
|
||||
|
||||
if outputs.len() < outputs_size_expected {
|
||||
panic!(
|
||||
"outputs len is {}; but needs to be at least {}",
|
||||
outputs.len(),
|
||||
outputs_size_expected,
|
||||
);
|
||||
}
|
||||
|
||||
let ctx_device_id = ctx.device_id;
|
||||
if let Some(device_id) = outputs.device_id() {
|
||||
assert_eq!(
|
||||
device_id, ctx_device_id,
|
||||
"Device ids in outputs and context are different"
|
||||
);
|
||||
}
|
||||
check_device(ctx_device_id);
|
||||
}
|
||||
@@ -1,7 +1,10 @@
|
||||
use std::ffi::c_void;
|
||||
|
||||
pub mod curve;
|
||||
pub mod ecntt;
|
||||
pub mod error;
|
||||
pub mod field;
|
||||
pub mod hash;
|
||||
pub mod msm;
|
||||
pub mod ntt;
|
||||
pub mod polynomials;
|
||||
@@ -18,3 +21,11 @@ where
|
||||
<Self::ScalarField as traits::FieldImpl>::Config: ntt::NTT<Self::ScalarField, Self::ScalarField>,
|
||||
{
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct Matrix {
|
||||
pub values: *const c_void,
|
||||
pub width: usize,
|
||||
pub height: usize,
|
||||
}
|
||||
|
||||
@@ -1,212 +1,157 @@
|
||||
#[doc(hidden)]
|
||||
pub mod tests;
|
||||
|
||||
use icicle_cuda_runtime::{
|
||||
device::check_device,
|
||||
device_context::{DeviceContext, DEFAULT_DEVICE_ID},
|
||||
memory::{DeviceSlice, HostOrDeviceSlice},
|
||||
use std::{ffi::c_void, marker::PhantomData};
|
||||
|
||||
use icicle_cuda_runtime::{device_context::DeviceContext, memory::HostOrDeviceSlice};
|
||||
|
||||
use crate::{
|
||||
error::IcicleResult,
|
||||
hash::{sponge_check_input, sponge_check_outputs, SpongeConfig, SpongeHash},
|
||||
traits::FieldImpl,
|
||||
};
|
||||
|
||||
use crate::{error::IcicleResult, traits::FieldImpl};
|
||||
|
||||
#[repr(C)]
|
||||
pub struct PoseidonConstants<'a, F: FieldImpl> {
|
||||
arity: u32,
|
||||
|
||||
partial_rounds: u32,
|
||||
|
||||
full_rounds_half: u32,
|
||||
|
||||
/// These should be pointers to data allocated on device
|
||||
round_constants: &'a DeviceSlice<F>,
|
||||
mds_matrix: &'a DeviceSlice<F>,
|
||||
non_sparse_matrix: &'a DeviceSlice<F>,
|
||||
sparse_matrices: &'a DeviceSlice<F>,
|
||||
|
||||
/// Domain tag is the first element in the Poseidon state.
|
||||
/// For the Merkle tree mode it should equal 2^arity - 1
|
||||
domain_tag: F,
|
||||
pub type PoseidonHandle = *const c_void;
|
||||
pub struct Poseidon<F>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: PoseidonImpl<F>,
|
||||
{
|
||||
width: usize,
|
||||
handle: PoseidonHandle,
|
||||
phantom: PhantomData<F>,
|
||||
}
|
||||
|
||||
/// Struct that encodes Poseidon parameters to be passed into the [poseidon_hash_many](poseidon_hash_many) function.
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PoseidonConfig<'a> {
|
||||
/// Details related to the device such as its id and stream id. See [DeviceContext](@ref device_context::DeviceContext).
|
||||
pub ctx: DeviceContext<'a>,
|
||||
|
||||
are_inputs_on_device: bool,
|
||||
|
||||
are_outputs_on_device: bool,
|
||||
|
||||
/// If true, input is considered to be a states vector, holding the preimages
|
||||
/// in aligned or not aligned format. Memory under the input pointer will be used for states
|
||||
/// If false, fresh states memory will be allocated and input will be copied into it
|
||||
pub input_is_a_state: bool,
|
||||
|
||||
/// If true - input should be already aligned for poseidon permutation.
|
||||
/// Aligned format: [0, A, B, 0, C, D, ...] (as you might get by using loop_state)
|
||||
/// not aligned format: [A, B, 0, C, D, 0, ...] (as you might get from cudaMemcpy2D)
|
||||
pub aligned: bool,
|
||||
|
||||
/// If true, hash results will also be copied in the input pointer in aligned format
|
||||
pub loop_state: bool,
|
||||
|
||||
/// Whether to run Poseidon asynchronously. If set to `true`, Poseidon will be non-blocking
|
||||
/// and you'd need to synchronize it explicitly by running `cudaStreamSynchronize` or `cudaDeviceSynchronize`.
|
||||
/// If set to `false`, Poseidon will block the current CPU thread.
|
||||
pub is_async: bool,
|
||||
}
|
||||
|
||||
impl<'a> Default for PoseidonConfig<'a> {
|
||||
fn default() -> Self {
|
||||
Self::default_for_device(DEFAULT_DEVICE_ID)
|
||||
impl<F> Poseidon<F>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: PoseidonImpl<F>,
|
||||
{
|
||||
pub fn load(arity: usize, ctx: &DeviceContext) -> IcicleResult<Self> {
|
||||
<<F as FieldImpl>::Config as PoseidonImpl<F>>::load(arity as u32, ctx).and_then(|handle| {
|
||||
Ok(Self {
|
||||
width: arity + 1,
|
||||
handle,
|
||||
phantom: PhantomData,
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> PoseidonConfig<'a> {
|
||||
pub fn default_for_device(device_id: usize) -> Self {
|
||||
Self {
|
||||
ctx: DeviceContext::default_for_device(device_id),
|
||||
are_inputs_on_device: false,
|
||||
are_outputs_on_device: false,
|
||||
input_is_a_state: false,
|
||||
aligned: false,
|
||||
loop_state: false,
|
||||
is_async: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Poseidon<F: FieldImpl> {
|
||||
fn create_optimized_constants<'a>(
|
||||
arity: u32,
|
||||
pub fn new(
|
||||
arity: usize,
|
||||
alpha: u32,
|
||||
full_rounds_half: u32,
|
||||
partial_rounds: u32,
|
||||
constants: &mut [F],
|
||||
round_constants: &[F],
|
||||
mds_matrix: &[F],
|
||||
non_sparse_matrix: &[F],
|
||||
sparse_matrices: &[F],
|
||||
domain_tag: F,
|
||||
ctx: &DeviceContext,
|
||||
) -> IcicleResult<PoseidonConstants<'a, F>>;
|
||||
fn load_optimized_constants<'a>(arity: u32, ctx: &DeviceContext) -> IcicleResult<PoseidonConstants<'a, F>>;
|
||||
fn poseidon_unchecked(
|
||||
input: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
) -> IcicleResult<Self> {
|
||||
<<F as FieldImpl>::Config as PoseidonImpl<F>>::create(
|
||||
arity as u32,
|
||||
alpha,
|
||||
full_rounds_half,
|
||||
partial_rounds,
|
||||
round_constants,
|
||||
mds_matrix,
|
||||
non_sparse_matrix,
|
||||
sparse_matrices,
|
||||
domain_tag,
|
||||
ctx,
|
||||
)
|
||||
.and_then(|handle| {
|
||||
Ok(Self {
|
||||
width: arity + 1,
|
||||
handle,
|
||||
phantom: PhantomData,
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<F> SpongeHash<F, F> for Poseidon<F>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: PoseidonImpl<F>,
|
||||
{
|
||||
fn get_handle(&self) -> *const c_void {
|
||||
self.handle
|
||||
}
|
||||
|
||||
fn hash_many(
|
||||
&self,
|
||||
inputs: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
number_of_states: usize,
|
||||
input_block_len: usize,
|
||||
output_len: usize,
|
||||
cfg: &SpongeConfig,
|
||||
) -> IcicleResult<()> {
|
||||
sponge_check_input(inputs, number_of_states, input_block_len, self.width - 1, &cfg.ctx);
|
||||
sponge_check_outputs(output, number_of_states, output_len, self.width, false, &cfg.ctx);
|
||||
|
||||
let mut local_cfg = cfg.clone();
|
||||
local_cfg.are_inputs_on_device = inputs.is_on_device();
|
||||
local_cfg.are_outputs_on_device = output.is_on_device();
|
||||
|
||||
<<F as FieldImpl>::Config as PoseidonImpl<F>>::hash_many(
|
||||
inputs,
|
||||
output,
|
||||
number_of_states as u32,
|
||||
input_block_len as u32,
|
||||
output_len as u32,
|
||||
self.handle,
|
||||
&local_cfg,
|
||||
)
|
||||
}
|
||||
|
||||
fn default_config<'a>(&self) -> SpongeConfig<'a> {
|
||||
let mut cfg = SpongeConfig::default();
|
||||
cfg.input_rate = self.width as u32 - 1;
|
||||
cfg.output_rate = self.width as u32;
|
||||
cfg
|
||||
}
|
||||
}
|
||||
|
||||
impl<F> Drop for Poseidon<F>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: PoseidonImpl<F>,
|
||||
{
|
||||
fn drop(&mut self) {
|
||||
<<F as FieldImpl>::Config as PoseidonImpl<F>>::delete(self.handle).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
pub trait PoseidonImpl<F: FieldImpl> {
|
||||
fn create(
|
||||
arity: u32,
|
||||
alpha: u32,
|
||||
full_rounds_half: u32,
|
||||
partial_rounds: u32,
|
||||
round_constants: &[F],
|
||||
mds_matrix: &[F],
|
||||
non_sparse_matrix: &[F],
|
||||
sparse_matrices: &[F],
|
||||
domain_tag: F,
|
||||
ctx: &DeviceContext,
|
||||
) -> IcicleResult<PoseidonHandle>;
|
||||
|
||||
fn load(arity: u32, ctx: &DeviceContext) -> IcicleResult<PoseidonHandle>;
|
||||
|
||||
fn hash_many(
|
||||
inputs: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
number_of_states: u32,
|
||||
arity: u32,
|
||||
constants: &PoseidonConstants<F>,
|
||||
config: &PoseidonConfig,
|
||||
input_block_len: u32,
|
||||
output_len: u32,
|
||||
poseidon: PoseidonHandle,
|
||||
cfg: &SpongeConfig,
|
||||
) -> IcicleResult<()>;
|
||||
}
|
||||
|
||||
/// Loads pre-calculated poseidon constants on the GPU.
|
||||
pub fn load_optimized_poseidon_constants<'a, F>(
|
||||
arity: u32,
|
||||
ctx: &DeviceContext,
|
||||
) -> IcicleResult<PoseidonConstants<'a, F>>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: Poseidon<F>,
|
||||
{
|
||||
<<F as FieldImpl>::Config as Poseidon<F>>::load_optimized_constants(arity, ctx)
|
||||
}
|
||||
|
||||
/// Creates new instance of poseidon constants on the GPU.
|
||||
pub fn create_optimized_poseidon_constants<'a, F>(
|
||||
arity: u32,
|
||||
ctx: &DeviceContext,
|
||||
full_rounds_half: u32,
|
||||
partial_rounds: u32,
|
||||
constants: &mut [F],
|
||||
) -> IcicleResult<PoseidonConstants<'a, F>>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: Poseidon<F>,
|
||||
{
|
||||
<<F as FieldImpl>::Config as Poseidon<F>>::create_optimized_constants(
|
||||
arity,
|
||||
full_rounds_half,
|
||||
partial_rounds,
|
||||
constants,
|
||||
ctx,
|
||||
)
|
||||
}
|
||||
|
||||
/// Computes the poseidon hashes for multiple preimages.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `input` - a pointer to the input data. May point to a vector of preimages or a vector of states filled with preimages.
|
||||
///
|
||||
/// * `output` - a pointer to the output data. Must be at least of size [number_of_states](number_of_states)
|
||||
///
|
||||
/// * `number_of_states` - number of input blocks of size `arity`
|
||||
///
|
||||
/// * `arity` - the arity of the hash function (the size of 1 preimage)
|
||||
///
|
||||
/// * `constants` - Poseidon constants.
|
||||
///
|
||||
/// * `config` - config used to specify extra arguments of the Poseidon.
|
||||
pub fn poseidon_hash_many<F>(
|
||||
input: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
number_of_states: u32,
|
||||
arity: u32,
|
||||
constants: &PoseidonConstants<F>,
|
||||
config: &PoseidonConfig,
|
||||
) -> IcicleResult<()>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: Poseidon<F>,
|
||||
{
|
||||
let input_len_required = if config.input_is_a_state {
|
||||
number_of_states * (arity + 1)
|
||||
} else {
|
||||
number_of_states * arity
|
||||
};
|
||||
|
||||
if input.len() < input_len_required as usize {
|
||||
panic!(
|
||||
"input len is {}; but needs to be at least {}",
|
||||
input.len(),
|
||||
input_len_required
|
||||
);
|
||||
}
|
||||
|
||||
if output.len() < number_of_states as usize {
|
||||
panic!(
|
||||
"output len is {}; but needs to be at least {}",
|
||||
output.len(),
|
||||
number_of_states
|
||||
);
|
||||
}
|
||||
|
||||
let ctx_device_id = config
|
||||
.ctx
|
||||
.device_id;
|
||||
if let Some(device_id) = input.device_id() {
|
||||
assert_eq!(
|
||||
device_id, ctx_device_id,
|
||||
"Device ids in input and context are different"
|
||||
);
|
||||
}
|
||||
if let Some(device_id) = output.device_id() {
|
||||
assert_eq!(
|
||||
device_id, ctx_device_id,
|
||||
"Device ids in output and context are different"
|
||||
);
|
||||
}
|
||||
check_device(ctx_device_id);
|
||||
let mut local_cfg = config.clone();
|
||||
local_cfg.are_inputs_on_device = input.is_on_device();
|
||||
local_cfg.are_outputs_on_device = output.is_on_device();
|
||||
|
||||
<<F as FieldImpl>::Config as Poseidon<F>>::poseidon_unchecked(
|
||||
input,
|
||||
output,
|
||||
number_of_states,
|
||||
arity,
|
||||
constants,
|
||||
&local_cfg,
|
||||
)
|
||||
fn delete(poseidon: PoseidonHandle) -> IcicleResult<()>;
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
@@ -218,91 +163,110 @@ macro_rules! impl_poseidon {
|
||||
$field_config:ident
|
||||
) => {
|
||||
mod $field_prefix_ident {
|
||||
use crate::poseidon::{$field, $field_config, CudaError, DeviceContext, PoseidonConfig, PoseidonConstants};
|
||||
use crate::poseidon::{$field, $field_config, CudaError, DeviceContext, PoseidonHandle, SpongeConfig};
|
||||
extern "C" {
|
||||
#[link_name = concat!($field_prefix, "_create_optimized_poseidon_constants_cuda")]
|
||||
pub(crate) fn _create_optimized_constants(
|
||||
#[link_name = concat!($field_prefix, "_poseidon_create_cuda")]
|
||||
pub(crate) fn create(
|
||||
poseidon: *mut PoseidonHandle,
|
||||
arity: u32,
|
||||
alpha: u32,
|
||||
full_rounds_half: u32,
|
||||
partial_rounds: u32,
|
||||
constants: *mut $field,
|
||||
round_constants: *const $field,
|
||||
mds_matrix: *const $field,
|
||||
non_sparse_matrix: *const $field,
|
||||
sparse_matrices: *const $field,
|
||||
domain_tag: $field,
|
||||
ctx: &DeviceContext,
|
||||
poseidon_constants: *mut PoseidonConstants<$field>,
|
||||
) -> CudaError;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_init_optimized_poseidon_constants_cuda")]
|
||||
pub(crate) fn _load_optimized_constants(
|
||||
arity: u32,
|
||||
ctx: &DeviceContext,
|
||||
constants: *mut PoseidonConstants<$field>,
|
||||
) -> CudaError;
|
||||
#[link_name = concat!($field_prefix, "_poseidon_load_cuda")]
|
||||
pub(crate) fn load(poseidon: *mut PoseidonHandle, arity: u32, ctx: &DeviceContext) -> CudaError;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_poseidon_hash_cuda")]
|
||||
#[link_name = concat!($field_prefix, "_poseidon_delete_cuda")]
|
||||
pub(crate) fn delete(poseidon: PoseidonHandle) -> CudaError;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_poseidon_hash_many_cuda")]
|
||||
pub(crate) fn hash_many(
|
||||
input: *mut $field,
|
||||
poseidon: PoseidonHandle,
|
||||
inputs: *const $field,
|
||||
output: *mut $field,
|
||||
number_of_states: u32,
|
||||
arity: u32,
|
||||
constants: &PoseidonConstants<$field>,
|
||||
config: &PoseidonConfig,
|
||||
input_block_len: u32,
|
||||
output_len: u32,
|
||||
cfg: &SpongeConfig,
|
||||
) -> CudaError;
|
||||
}
|
||||
}
|
||||
|
||||
impl Poseidon<$field> for $field_config {
|
||||
fn create_optimized_constants<'a>(
|
||||
impl PoseidonImpl<$field> for $field_config {
|
||||
fn create(
|
||||
arity: u32,
|
||||
alpha: u32,
|
||||
full_rounds_half: u32,
|
||||
partial_rounds: u32,
|
||||
constants: &mut [$field],
|
||||
round_constants: &[$field],
|
||||
mds_matrix: &[$field],
|
||||
non_sparse_matrix: &[$field],
|
||||
sparse_matrices: &[$field],
|
||||
domain_tag: $field,
|
||||
ctx: &DeviceContext,
|
||||
) -> IcicleResult<PoseidonConstants<'a, $field>> {
|
||||
) -> IcicleResult<PoseidonHandle> {
|
||||
unsafe {
|
||||
let mut poseidon_constants = MaybeUninit::<PoseidonConstants<'a, $field>>::uninit();
|
||||
let err = $field_prefix_ident::_create_optimized_constants(
|
||||
let mut poseidon = MaybeUninit::<PoseidonHandle>::uninit();
|
||||
$field_prefix_ident::create(
|
||||
poseidon.as_mut_ptr(),
|
||||
arity,
|
||||
alpha,
|
||||
full_rounds_half,
|
||||
partial_rounds,
|
||||
constants as *mut _ as *mut $field,
|
||||
round_constants as *const _ as *const $field,
|
||||
mds_matrix as *const _ as *const $field,
|
||||
non_sparse_matrix as *const _ as *const $field,
|
||||
sparse_matrices as *const _ as *const $field,
|
||||
domain_tag,
|
||||
ctx,
|
||||
poseidon_constants.as_mut_ptr(),
|
||||
)
|
||||
.wrap();
|
||||
err.and(Ok(poseidon_constants.assume_init()))
|
||||
.wrap()
|
||||
.and(Ok(poseidon.assume_init()))
|
||||
}
|
||||
}
|
||||
|
||||
fn load_optimized_constants<'a>(
|
||||
arity: u32,
|
||||
ctx: &DeviceContext,
|
||||
) -> IcicleResult<PoseidonConstants<'a, $field>> {
|
||||
fn load(arity: u32, ctx: &DeviceContext) -> IcicleResult<PoseidonHandle> {
|
||||
unsafe {
|
||||
let mut constants = MaybeUninit::<PoseidonConstants<'a, $field>>::uninit();
|
||||
let err = $field_prefix_ident::_load_optimized_constants(arity, ctx, constants.as_mut_ptr()).wrap();
|
||||
err.and(Ok(constants.assume_init()))
|
||||
let mut poseidon = MaybeUninit::<PoseidonHandle>::uninit();
|
||||
$field_prefix_ident::load(poseidon.as_mut_ptr(), arity, ctx)
|
||||
.wrap()
|
||||
.and(Ok(poseidon.assume_init()))
|
||||
}
|
||||
}
|
||||
|
||||
fn poseidon_unchecked(
|
||||
input: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
fn hash_many(
|
||||
inputs: &(impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
output: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
number_of_states: u32,
|
||||
arity: u32,
|
||||
constants: &PoseidonConstants<$field>,
|
||||
config: &PoseidonConfig,
|
||||
input_block_len: u32,
|
||||
output_len: u32,
|
||||
poseidon: PoseidonHandle,
|
||||
cfg: &SpongeConfig,
|
||||
) -> IcicleResult<()> {
|
||||
unsafe {
|
||||
$field_prefix_ident::hash_many(
|
||||
input.as_mut_ptr(),
|
||||
poseidon,
|
||||
inputs.as_ptr(),
|
||||
output.as_mut_ptr(),
|
||||
number_of_states,
|
||||
arity,
|
||||
constants,
|
||||
config,
|
||||
input_block_len,
|
||||
output_len,
|
||||
cfg,
|
||||
)
|
||||
.wrap()
|
||||
}
|
||||
}
|
||||
|
||||
fn delete(poseidon: PoseidonHandle) -> IcicleResult<()> {
|
||||
unsafe { $field_prefix_ident::delete(poseidon).wrap() }
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -318,18 +282,3 @@ macro_rules! impl_poseidon_tests {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! impl_poseidon_custom_config_test {
|
||||
(
|
||||
$field:ident,
|
||||
$field_bytes:literal,
|
||||
$field_prefix:literal,
|
||||
$partial_rounds:literal
|
||||
) => {
|
||||
#[test]
|
||||
fn test_poseidon_custom_config() {
|
||||
check_poseidon_custom_config::<$field>($field_bytes, $field_prefix, $partial_rounds)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,105 +1,48 @@
|
||||
use crate::hash::SpongeHash;
|
||||
use crate::traits::FieldImpl;
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::memory::{HostOrDeviceSlice, HostSlice};
|
||||
|
||||
use std::io::Read;
|
||||
use std::path::PathBuf;
|
||||
use std::{env, fs::File};
|
||||
use super::{Poseidon, PoseidonImpl};
|
||||
|
||||
use super::{
|
||||
create_optimized_poseidon_constants, load_optimized_poseidon_constants, poseidon_hash_many, Poseidon,
|
||||
PoseidonConfig, PoseidonConstants,
|
||||
};
|
||||
|
||||
pub fn init_poseidon<'a, F: FieldImpl>(arity: u32) -> PoseidonConstants<'a, F>
|
||||
pub fn init_poseidon<F: FieldImpl>(arity: usize) -> Poseidon<F>
|
||||
where
|
||||
<F as FieldImpl>::Config: Poseidon<F>,
|
||||
<F as FieldImpl>::Config: PoseidonImpl<F>,
|
||||
{
|
||||
let ctx = DeviceContext::default();
|
||||
|
||||
load_optimized_poseidon_constants::<F>(arity, &ctx).unwrap()
|
||||
Poseidon::load(arity, &ctx).unwrap()
|
||||
}
|
||||
|
||||
pub fn _check_poseidon_hash_many<F: FieldImpl>(constants: PoseidonConstants<F>) -> (F, F)
|
||||
pub fn _check_poseidon_hash_many<F: FieldImpl>(poseidon: Poseidon<F>)
|
||||
where
|
||||
<F as FieldImpl>::Config: Poseidon<F>,
|
||||
<F as FieldImpl>::Config: PoseidonImpl<F>,
|
||||
{
|
||||
let test_size = 1 << 10;
|
||||
let arity = 2u32;
|
||||
let mut inputs = vec![F::one(); test_size * arity as usize];
|
||||
let arity = poseidon.width - 1;
|
||||
let mut inputs = vec![F::one(); test_size * arity];
|
||||
let mut outputs = vec![F::zero(); test_size];
|
||||
|
||||
let input_slice = HostSlice::from_mut_slice(&mut inputs);
|
||||
let output_slice = HostSlice::from_mut_slice(&mut outputs);
|
||||
|
||||
let config = PoseidonConfig::default();
|
||||
poseidon_hash_many::<F>(
|
||||
input_slice,
|
||||
output_slice,
|
||||
test_size as u32,
|
||||
arity as u32,
|
||||
&constants,
|
||||
&config,
|
||||
)
|
||||
.unwrap();
|
||||
let cfg = poseidon.default_config();
|
||||
poseidon
|
||||
.hash_many(input_slice, output_slice, test_size, arity, 1, &cfg)
|
||||
.unwrap();
|
||||
|
||||
let a1 = output_slice[0];
|
||||
let a2 = output_slice[output_slice.len() - 2];
|
||||
let a2 = output_slice[output_slice.len() - 1];
|
||||
|
||||
println!("first: {:?}, last: {:?}", a1, a2);
|
||||
assert_eq!(a1, a2);
|
||||
|
||||
(a1, a2)
|
||||
}
|
||||
|
||||
pub fn check_poseidon_hash_many<F: FieldImpl>()
|
||||
where
|
||||
<F as FieldImpl>::Config: Poseidon<F>,
|
||||
<F as FieldImpl>::Config: PoseidonImpl<F>,
|
||||
{
|
||||
for arity in [2, 4] {
|
||||
let constants = init_poseidon::<F>(arity as u32);
|
||||
for arity in [2, 4, 8, 11] {
|
||||
let poseidon = init_poseidon::<F>(arity);
|
||||
|
||||
_check_poseidon_hash_many(constants);
|
||||
_check_poseidon_hash_many(poseidon);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_poseidon_custom_config<F: FieldImpl>(field_bytes: usize, field_prefix: &str, partial_rounds: u32)
|
||||
where
|
||||
<F as FieldImpl>::Config: Poseidon<F>,
|
||||
{
|
||||
let arity = 2u32;
|
||||
let constants = init_poseidon::<F>(arity as u32);
|
||||
|
||||
let full_rounds_half = 4;
|
||||
|
||||
let ctx = DeviceContext::default();
|
||||
let cargo_manifest_dir = env!("CARGO_MANIFEST_DIR");
|
||||
let constants_file = PathBuf::from(cargo_manifest_dir)
|
||||
.join("tests")
|
||||
.join(format!("{}_constants.bin", field_prefix));
|
||||
let mut constants_buf = vec![];
|
||||
File::open(constants_file)
|
||||
.unwrap()
|
||||
.read_to_end(&mut constants_buf)
|
||||
.unwrap();
|
||||
|
||||
let mut custom_constants = vec![];
|
||||
for chunk in constants_buf.chunks(field_bytes) {
|
||||
custom_constants.push(F::from_bytes_le(chunk));
|
||||
}
|
||||
|
||||
let custom_constants = create_optimized_poseidon_constants::<F>(
|
||||
arity as u32,
|
||||
&ctx,
|
||||
full_rounds_half,
|
||||
partial_rounds,
|
||||
&mut custom_constants,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let (a1, a2) = _check_poseidon_hash_many(constants);
|
||||
let (b1, b2) = _check_poseidon_hash_many(custom_constants);
|
||||
|
||||
assert_eq!(a1, b1);
|
||||
assert_eq!(a2, b2);
|
||||
}
|
||||
|
||||
@@ -1,107 +1,66 @@
|
||||
#[doc(hidden)]
|
||||
pub mod tests;
|
||||
|
||||
use icicle_cuda_runtime::{
|
||||
device::check_device,
|
||||
device_context::{DeviceContext, DEFAULT_DEVICE_ID},
|
||||
memory::{DeviceSlice, HostOrDeviceSlice},
|
||||
use std::{ffi::c_void, marker::PhantomData};
|
||||
|
||||
use icicle_cuda_runtime::{device_context::DeviceContext, memory::HostOrDeviceSlice};
|
||||
|
||||
use crate::{
|
||||
error::IcicleResult,
|
||||
hash::{sponge_check_input, sponge_check_outputs, SpongeConfig, SpongeHash},
|
||||
traits::FieldImpl,
|
||||
};
|
||||
|
||||
use crate::{error::IcicleResult, traits::FieldImpl};
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum DiffusionStrategy {
|
||||
Default,
|
||||
Montgomery,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum MdsType {
|
||||
Default,
|
||||
Plonky,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum PoseidonMode {
|
||||
Compression,
|
||||
Permutation,
|
||||
pub type Poseidon2Handle = *const c_void;
|
||||
pub struct Poseidon2<F>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: Poseidon2Impl<F>,
|
||||
{
|
||||
width: usize,
|
||||
handle: Poseidon2Handle,
|
||||
phantom: PhantomData<F>,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub struct Poseidon2Constants<'a, F: FieldImpl> {
|
||||
width: u32,
|
||||
|
||||
alpha: u32,
|
||||
|
||||
internal_rounds: u32,
|
||||
|
||||
external_rounds: u32,
|
||||
|
||||
round_constants: &'a DeviceSlice<F>,
|
||||
|
||||
inernal_matrix_diag: &'a DeviceSlice<F>,
|
||||
|
||||
pub mds_type: MdsType,
|
||||
|
||||
pub diffusion: DiffusionStrategy,
|
||||
}
|
||||
|
||||
impl<F: FieldImpl> std::fmt::Debug for Poseidon2Constants<'_, F> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{}, {}, {}, {}",
|
||||
self.width, self.alpha, self.internal_rounds, self.external_rounds
|
||||
)
|
||||
impl<F> Poseidon2<F>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: Poseidon2Impl<F>,
|
||||
{
|
||||
pub fn load(
|
||||
width: usize,
|
||||
rate: usize,
|
||||
mds_type: MdsType,
|
||||
diffusion: DiffusionStrategy,
|
||||
ctx: &DeviceContext,
|
||||
) -> IcicleResult<Self> {
|
||||
<<F as FieldImpl>::Config as Poseidon2Impl<F>>::load(width as u32, rate as u32, mds_type, diffusion, ctx)
|
||||
.and_then(|handle| {
|
||||
Ok(Self {
|
||||
width,
|
||||
handle,
|
||||
phantom: PhantomData,
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Struct that encodes Poseidon parameters to be passed into the [poseidon_hash_many](poseidon_hash_many) function.
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Poseidon2Config<'a> {
|
||||
/// Details related to the device such as its id and stream id. See [DeviceContext](@ref device_context::DeviceContext).
|
||||
pub ctx: DeviceContext<'a>,
|
||||
|
||||
are_states_on_device: bool,
|
||||
|
||||
are_outputs_on_device: bool,
|
||||
|
||||
pub mode: PoseidonMode,
|
||||
|
||||
pub output_index: u32,
|
||||
|
||||
/// Whether to run Poseidon asynchronously. If set to `true`, Poseidon will be non-blocking
|
||||
/// and you'd need to synchronize it explicitly by running `cudaStreamSynchronize` or `cudaDeviceSynchronize`.
|
||||
/// If set to `false`, Poseidon will block the current CPU thread.
|
||||
pub is_async: bool,
|
||||
}
|
||||
|
||||
impl<'a> Default for Poseidon2Config<'a> {
|
||||
fn default() -> Self {
|
||||
Self::default_for_device(DEFAULT_DEVICE_ID)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Poseidon2Config<'a> {
|
||||
pub fn default_for_device(device_id: usize) -> Self {
|
||||
Self {
|
||||
ctx: DeviceContext::default_for_device(device_id),
|
||||
are_states_on_device: false,
|
||||
are_outputs_on_device: false,
|
||||
mode: PoseidonMode::Compression,
|
||||
output_index: 1,
|
||||
is_async: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Poseidon2<F: FieldImpl> {
|
||||
fn create_constants<'a>(
|
||||
width: u32,
|
||||
pub fn new(
|
||||
width: usize,
|
||||
rate: usize,
|
||||
alpha: u32,
|
||||
internal_rounds: u32,
|
||||
external_rounds: u32,
|
||||
@@ -110,191 +69,122 @@ pub trait Poseidon2<F: FieldImpl> {
|
||||
mds_type: MdsType,
|
||||
diffusion: DiffusionStrategy,
|
||||
ctx: &DeviceContext,
|
||||
) -> IcicleResult<Poseidon2Constants<'a, F>>;
|
||||
fn load_constants<'a>(
|
||||
) -> IcicleResult<Self> {
|
||||
<<F as FieldImpl>::Config as Poseidon2Impl<F>>::create(
|
||||
width as u32,
|
||||
rate as u32,
|
||||
alpha,
|
||||
internal_rounds,
|
||||
external_rounds,
|
||||
round_constants,
|
||||
internal_matrix_diag,
|
||||
mds_type,
|
||||
diffusion,
|
||||
ctx,
|
||||
)
|
||||
.and_then(|handle| {
|
||||
Ok(Self {
|
||||
width,
|
||||
handle,
|
||||
phantom: PhantomData,
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<F> SpongeHash<F, F> for Poseidon2<F>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: Poseidon2Impl<F>,
|
||||
{
|
||||
fn get_handle(&self) -> *const c_void {
|
||||
self.handle
|
||||
}
|
||||
|
||||
fn hash_many(
|
||||
&self,
|
||||
inputs: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
number_of_states: usize,
|
||||
input_block_len: usize,
|
||||
output_len: usize,
|
||||
cfg: &SpongeConfig,
|
||||
) -> IcicleResult<()> {
|
||||
sponge_check_input(
|
||||
inputs,
|
||||
number_of_states,
|
||||
input_block_len,
|
||||
cfg.input_rate as usize,
|
||||
&cfg.ctx,
|
||||
);
|
||||
sponge_check_outputs(output, number_of_states, output_len, self.width, false, &cfg.ctx);
|
||||
|
||||
let mut local_cfg = cfg.clone();
|
||||
local_cfg.are_inputs_on_device = inputs.is_on_device();
|
||||
local_cfg.are_outputs_on_device = output.is_on_device();
|
||||
|
||||
<<F as FieldImpl>::Config as Poseidon2Impl<F>>::hash_many(
|
||||
inputs,
|
||||
output,
|
||||
number_of_states as u32,
|
||||
input_block_len as u32,
|
||||
output_len as u32,
|
||||
self.handle,
|
||||
&local_cfg,
|
||||
)
|
||||
}
|
||||
|
||||
fn default_config<'a>(&self) -> SpongeConfig<'a> {
|
||||
let mut cfg = SpongeConfig::default();
|
||||
cfg.input_rate = self.width as u32;
|
||||
cfg.output_rate = self.width as u32;
|
||||
cfg
|
||||
}
|
||||
}
|
||||
|
||||
impl<F> Drop for Poseidon2<F>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: Poseidon2Impl<F>,
|
||||
{
|
||||
fn drop(&mut self) {
|
||||
<<F as FieldImpl>::Config as Poseidon2Impl<F>>::delete(self.handle).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Poseidon2Impl<F: FieldImpl> {
|
||||
fn create(
|
||||
width: u32,
|
||||
rate: u32,
|
||||
alpha: u32,
|
||||
internal_rounds: u32,
|
||||
external_rounds: u32,
|
||||
round_constants: &[F],
|
||||
internal_matrix_diag: &[F],
|
||||
mds_type: MdsType,
|
||||
diffusion: DiffusionStrategy,
|
||||
ctx: &DeviceContext,
|
||||
) -> IcicleResult<Poseidon2Constants<'a, F>>;
|
||||
fn poseidon_unchecked(
|
||||
states: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
) -> IcicleResult<Poseidon2Handle>;
|
||||
|
||||
fn load(
|
||||
width: u32,
|
||||
rate: u32,
|
||||
mds_type: MdsType,
|
||||
diffusion: DiffusionStrategy,
|
||||
ctx: &DeviceContext,
|
||||
) -> IcicleResult<Poseidon2Handle>;
|
||||
|
||||
fn hash_many(
|
||||
inputs: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
number_of_states: u32,
|
||||
width: u32,
|
||||
constants: &Poseidon2Constants<F>,
|
||||
config: &Poseidon2Config,
|
||||
input_block_len: u32,
|
||||
output_len: u32,
|
||||
poseidon: Poseidon2Handle,
|
||||
cfg: &SpongeConfig,
|
||||
) -> IcicleResult<()>;
|
||||
fn poseidon_unchecked_inplace(
|
||||
states: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
number_of_states: u32,
|
||||
width: u32,
|
||||
constants: &Poseidon2Constants<F>,
|
||||
config: &Poseidon2Config,
|
||||
) -> IcicleResult<()>;
|
||||
fn release_constants(constants: &Poseidon2Constants<F>, ctx: &DeviceContext) -> IcicleResult<()>;
|
||||
}
|
||||
|
||||
/// Loads pre-calculated poseidon constants on the GPU.
|
||||
pub fn load_poseidon2_constants<'a, F>(
|
||||
width: u32,
|
||||
mds_type: MdsType,
|
||||
diffusion: DiffusionStrategy,
|
||||
ctx: &DeviceContext,
|
||||
) -> IcicleResult<Poseidon2Constants<'a, F>>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: Poseidon2<F>,
|
||||
{
|
||||
<<F as FieldImpl>::Config as Poseidon2<F>>::load_constants(width, mds_type, diffusion, ctx)
|
||||
}
|
||||
|
||||
/// Creates new instance of poseidon constants on the GPU.
|
||||
pub fn create_poseidon2_constants<'a, F>(
|
||||
width: u32,
|
||||
alpha: u32,
|
||||
ctx: &DeviceContext,
|
||||
internal_rounds: u32,
|
||||
external_rounds: u32,
|
||||
round_constants: &mut [F],
|
||||
internal_matrix_diag: &mut [F],
|
||||
mds_type: MdsType,
|
||||
diffusion: DiffusionStrategy,
|
||||
) -> IcicleResult<Poseidon2Constants<'a, F>>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: Poseidon2<F>,
|
||||
{
|
||||
<<F as FieldImpl>::Config as Poseidon2<F>>::create_constants(
|
||||
width,
|
||||
alpha,
|
||||
internal_rounds,
|
||||
external_rounds,
|
||||
round_constants,
|
||||
internal_matrix_diag,
|
||||
mds_type,
|
||||
diffusion,
|
||||
ctx,
|
||||
)
|
||||
}
|
||||
|
||||
fn poseidon_checks<F>(
|
||||
states: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
output: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
number_of_states: u32,
|
||||
width: u32,
|
||||
config: &Poseidon2Config,
|
||||
) where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: Poseidon2<F>,
|
||||
{
|
||||
if states.len() < (number_of_states * width) as usize {
|
||||
panic!(
|
||||
"input len is {}; but needs to be at least {}",
|
||||
states.len(),
|
||||
number_of_states * width
|
||||
);
|
||||
}
|
||||
if output.len() < number_of_states as usize {
|
||||
panic!(
|
||||
"output len is {}; but needs to be at least {}",
|
||||
output.len(),
|
||||
number_of_states
|
||||
);
|
||||
}
|
||||
|
||||
let ctx_device_id = config
|
||||
.ctx
|
||||
.device_id;
|
||||
if let Some(device_id) = states.device_id() {
|
||||
assert_eq!(
|
||||
device_id, ctx_device_id,
|
||||
"Device ids in input and context are different"
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(device_id) = output.device_id() {
|
||||
assert_eq!(
|
||||
device_id, ctx_device_id,
|
||||
"Device ids in output and context are different"
|
||||
);
|
||||
}
|
||||
check_device(ctx_device_id);
|
||||
}
|
||||
|
||||
/// Computes the poseidon hashes for multiple preimages.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `input` - a pointer to the input data. May point to a vector of preimages or a vector of states filled with preimages.
|
||||
///
|
||||
/// * `output` - a pointer to the output data. Must be at least of size [number_of_states](number_of_states)
|
||||
///
|
||||
/// * `number_of_states` - number of input blocks of size `arity`
|
||||
///
|
||||
/// * `arity` - the arity of the hash function (the size of 1 preimage)
|
||||
///
|
||||
/// * `constants` - Poseidon constants.
|
||||
///
|
||||
/// * `config` - config used to specify extra arguments of the Poseidon.
|
||||
pub fn poseidon2_hash_many<F>(
|
||||
states: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
number_of_states: u32,
|
||||
width: u32,
|
||||
constants: &Poseidon2Constants<F>,
|
||||
config: &Poseidon2Config,
|
||||
) -> IcicleResult<()>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: Poseidon2<F>,
|
||||
{
|
||||
poseidon_checks(states, output, number_of_states, width, config);
|
||||
let mut local_cfg = config.clone();
|
||||
local_cfg.are_states_on_device = states.is_on_device();
|
||||
local_cfg.are_outputs_on_device = output.is_on_device();
|
||||
|
||||
<<F as FieldImpl>::Config as Poseidon2<F>>::poseidon_unchecked(
|
||||
states,
|
||||
output,
|
||||
number_of_states,
|
||||
width,
|
||||
constants,
|
||||
&local_cfg,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn poseidon2_hash_many_inplace<F>(
|
||||
states: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
number_of_states: u32,
|
||||
width: u32,
|
||||
constants: &Poseidon2Constants<F>,
|
||||
config: &Poseidon2Config,
|
||||
) -> IcicleResult<()>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: Poseidon2<F>,
|
||||
{
|
||||
poseidon_checks(states, states, number_of_states, width, config);
|
||||
let mut local_cfg = config.clone();
|
||||
local_cfg.are_states_on_device = states.is_on_device();
|
||||
local_cfg.are_outputs_on_device = states.is_on_device();
|
||||
|
||||
<<F as FieldImpl>::Config as Poseidon2<F>>::poseidon_unchecked_inplace(
|
||||
states,
|
||||
number_of_states,
|
||||
width,
|
||||
constants,
|
||||
&local_cfg,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn release_poseidon2_constants<'a, F>(constants: &Poseidon2Constants<F>, ctx: &DeviceContext) -> IcicleResult<()>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: Poseidon2<F>,
|
||||
{
|
||||
<<F as FieldImpl>::Config as Poseidon2<F>>::release_constants(constants, ctx)
|
||||
fn delete(poseidon: Poseidon2Handle) -> IcicleResult<()>;
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
@@ -307,140 +197,125 @@ macro_rules! impl_poseidon2 {
|
||||
) => {
|
||||
mod $field_prefix_ident {
|
||||
use crate::poseidon2::{
|
||||
$field, $field_config, CudaError, DeviceContext, DiffusionStrategy, MdsType, Poseidon2Config,
|
||||
Poseidon2Constants,
|
||||
$field, $field_config, CudaError, DeviceContext, DiffusionStrategy, MdsType, Poseidon2Handle,
|
||||
SpongeConfig,
|
||||
};
|
||||
use icicle_core::error::IcicleError;
|
||||
extern "C" {
|
||||
#[link_name = concat!($field_prefix, "_create_poseidon2_constants_cuda")]
|
||||
pub(crate) fn _create_constants(
|
||||
#[link_name = concat!($field_prefix, "_poseidon2_create_cuda")]
|
||||
pub(crate) fn create(
|
||||
poseidon: *mut Poseidon2Handle,
|
||||
width: u32,
|
||||
rate: u32,
|
||||
alpha: u32,
|
||||
internal_rounds: u32,
|
||||
external_rounds: u32,
|
||||
constants: *mut $field,
|
||||
internal_matrix_diag: *mut $field,
|
||||
constants: *const $field,
|
||||
internal_matrix_diag: *const $field,
|
||||
mds_type: MdsType,
|
||||
diffusion: DiffusionStrategy,
|
||||
ctx: &DeviceContext,
|
||||
poseidon_constants: *mut Poseidon2Constants<$field>,
|
||||
) -> CudaError;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_init_poseidon2_constants_cuda")]
|
||||
pub(crate) fn _load_constants(
|
||||
#[link_name = concat!($field_prefix, "_poseidon2_load_cuda")]
|
||||
pub(crate) fn load(
|
||||
poseidon: *mut Poseidon2Handle,
|
||||
width: u32,
|
||||
rate: u32,
|
||||
mds_type: MdsType,
|
||||
diffusion: DiffusionStrategy,
|
||||
ctx: &DeviceContext,
|
||||
constants: *mut Poseidon2Constants<$field>,
|
||||
) -> CudaError;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_release_poseidon2_constants_cuda")]
|
||||
pub(crate) fn _release_constants(
|
||||
constants: &Poseidon2Constants<$field>,
|
||||
ctx: &DeviceContext,
|
||||
) -> CudaError;
|
||||
#[link_name = concat!($field_prefix, "_poseidon2_delete_cuda")]
|
||||
pub(crate) fn delete(poseidon: Poseidon2Handle) -> CudaError;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_poseidon2_hash_cuda")]
|
||||
#[link_name = concat!($field_prefix, "_poseidon2_hash_many_cuda")]
|
||||
pub(crate) fn hash_many(
|
||||
states: *const $field,
|
||||
poseidon: Poseidon2Handle,
|
||||
inputs: *const $field,
|
||||
output: *mut $field,
|
||||
number_of_states: u32,
|
||||
width: u32,
|
||||
constants: &Poseidon2Constants<$field>,
|
||||
config: &Poseidon2Config,
|
||||
input_block_len: u32,
|
||||
output_len: u32,
|
||||
cfg: &SpongeConfig,
|
||||
) -> CudaError;
|
||||
}
|
||||
}
|
||||
|
||||
impl Poseidon2<$field> for $field_config {
|
||||
fn create_constants<'a>(
|
||||
impl Poseidon2Impl<$field> for $field_config {
|
||||
fn create(
|
||||
width: u32,
|
||||
rate: u32,
|
||||
alpha: u32,
|
||||
internal_rounds: u32,
|
||||
external_rounds: u32,
|
||||
round_constants: &mut [$field],
|
||||
internal_matrix_diag: &mut [$field],
|
||||
round_constants: &[$field],
|
||||
internal_matrix_diag: &[$field],
|
||||
mds_type: MdsType,
|
||||
diffusion: DiffusionStrategy,
|
||||
ctx: &DeviceContext,
|
||||
) -> IcicleResult<Poseidon2Constants<'a, $field>> {
|
||||
) -> IcicleResult<Poseidon2Handle> {
|
||||
unsafe {
|
||||
let mut poseidon_constants = MaybeUninit::<Poseidon2Constants<'a, $field>>::uninit();
|
||||
let err = $field_prefix_ident::_create_constants(
|
||||
let mut poseidon = MaybeUninit::<Poseidon2Handle>::uninit();
|
||||
$field_prefix_ident::create(
|
||||
poseidon.as_mut_ptr(),
|
||||
width,
|
||||
rate,
|
||||
alpha,
|
||||
internal_rounds,
|
||||
external_rounds,
|
||||
round_constants as *mut _ as *mut $field,
|
||||
internal_matrix_diag as *mut _ as *mut $field,
|
||||
round_constants as *const _ as *const $field,
|
||||
internal_matrix_diag as *const _ as *const $field,
|
||||
mds_type,
|
||||
diffusion,
|
||||
ctx,
|
||||
poseidon_constants.as_mut_ptr(),
|
||||
)
|
||||
.wrap();
|
||||
err.and(Ok(poseidon_constants.assume_init()))
|
||||
.wrap()
|
||||
.and(Ok(poseidon.assume_init()))
|
||||
}
|
||||
}
|
||||
|
||||
fn load_constants<'a>(
|
||||
fn load(
|
||||
width: u32,
|
||||
rate: u32,
|
||||
mds_type: MdsType,
|
||||
diffusion: DiffusionStrategy,
|
||||
ctx: &DeviceContext,
|
||||
) -> IcicleResult<Poseidon2Constants<'a, $field>> {
|
||||
) -> IcicleResult<Poseidon2Handle> {
|
||||
unsafe {
|
||||
let mut constants = MaybeUninit::<Poseidon2Constants<'a, $field>>::uninit();
|
||||
let err =
|
||||
$field_prefix_ident::_load_constants(width, mds_type, diffusion, ctx, constants.as_mut_ptr())
|
||||
.wrap();
|
||||
err.and(Ok(constants.assume_init()))
|
||||
let mut poseidon = MaybeUninit::<Poseidon2Handle>::uninit();
|
||||
$field_prefix_ident::load(poseidon.as_mut_ptr(), width, rate, mds_type, diffusion, ctx)
|
||||
.wrap()
|
||||
.and(Ok(poseidon.assume_init()))
|
||||
}
|
||||
}
|
||||
|
||||
fn poseidon_unchecked(
|
||||
states: &(impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
fn hash_many(
|
||||
inputs: &(impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
output: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
number_of_states: u32,
|
||||
width: u32,
|
||||
constants: &Poseidon2Constants<$field>,
|
||||
config: &Poseidon2Config,
|
||||
input_block_len: u32,
|
||||
output_len: u32,
|
||||
poseidon: Poseidon2Handle,
|
||||
cfg: &SpongeConfig,
|
||||
) -> IcicleResult<()> {
|
||||
unsafe {
|
||||
$field_prefix_ident::hash_many(
|
||||
states.as_ptr(),
|
||||
poseidon,
|
||||
inputs.as_ptr(),
|
||||
output.as_mut_ptr(),
|
||||
number_of_states,
|
||||
width,
|
||||
constants,
|
||||
config,
|
||||
input_block_len,
|
||||
output_len,
|
||||
cfg,
|
||||
)
|
||||
.wrap()
|
||||
}
|
||||
}
|
||||
|
||||
fn poseidon_unchecked_inplace(
|
||||
states: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
number_of_states: u32,
|
||||
width: u32,
|
||||
constants: &Poseidon2Constants<$field>,
|
||||
config: &Poseidon2Config,
|
||||
) -> IcicleResult<()> {
|
||||
unsafe {
|
||||
$field_prefix_ident::hash_many(
|
||||
states.as_ptr(),
|
||||
states.as_mut_ptr(),
|
||||
number_of_states,
|
||||
width,
|
||||
constants,
|
||||
config,
|
||||
)
|
||||
.wrap()
|
||||
}
|
||||
}
|
||||
|
||||
fn release_constants<'a>(constants: &Poseidon2Constants<$field>, ctx: &DeviceContext) -> IcicleResult<()> {
|
||||
unsafe { $field_prefix_ident::_release_constants(constants, ctx).wrap() }
|
||||
fn delete(poseidon: Poseidon2Handle) -> IcicleResult<()> {
|
||||
unsafe { $field_prefix_ident::delete(poseidon).wrap() }
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -466,42 +341,41 @@ pub mod bench {
|
||||
};
|
||||
|
||||
use crate::{
|
||||
hash::SpongeHash,
|
||||
ntt::FieldImpl,
|
||||
poseidon2::{load_poseidon2_constants, DiffusionStrategy, MdsType},
|
||||
poseidon2::{DiffusionStrategy, MdsType, Poseidon2, Poseidon2Impl},
|
||||
traits::GenerateRandom,
|
||||
vec_ops::VecOps,
|
||||
};
|
||||
|
||||
use super::{poseidon2_hash_many, Poseidon2, Poseidon2Config, Poseidon2Constants};
|
||||
|
||||
#[allow(unused)]
|
||||
fn poseidon2_for_bench<'a, F: FieldImpl>(
|
||||
fn poseidon2_for_bench<F: FieldImpl>(
|
||||
poseidon: &Poseidon2<F>,
|
||||
states: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
poseidon2_result: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
number_of_states: usize,
|
||||
width: usize,
|
||||
constants: &Poseidon2Constants<'a, F>,
|
||||
config: &Poseidon2Config,
|
||||
ctx: &DeviceContext,
|
||||
_seed: u32,
|
||||
) where
|
||||
<F as FieldImpl>::Config: Poseidon2<F> + GenerateRandom<F>,
|
||||
<F as FieldImpl>::Config: VecOps<F>,
|
||||
<F as FieldImpl>::Config: Poseidon2Impl<F> + GenerateRandom<F>,
|
||||
{
|
||||
poseidon2_hash_many(
|
||||
states,
|
||||
poseidon2_result,
|
||||
number_of_states as u32,
|
||||
width as u32,
|
||||
constants,
|
||||
config,
|
||||
)
|
||||
.unwrap();
|
||||
let cfg = poseidon.default_config();
|
||||
poseidon
|
||||
.hash_many(
|
||||
states,
|
||||
poseidon2_result,
|
||||
number_of_states,
|
||||
poseidon.width,
|
||||
poseidon.width,
|
||||
&cfg,
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn benchmark_poseidon2<F: FieldImpl>(c: &mut Criterion)
|
||||
where
|
||||
<F as FieldImpl>::Config: Poseidon2<F> + GenerateRandom<F>,
|
||||
<F as FieldImpl>::Config: Poseidon2Impl<F> + GenerateRandom<F>,
|
||||
<F as FieldImpl>::Config: VecOps<F>,
|
||||
{
|
||||
use criterion::SamplingMode;
|
||||
@@ -519,7 +393,7 @@ pub mod bench {
|
||||
.parse::<u32>()
|
||||
.unwrap_or(MAX_LOG2);
|
||||
|
||||
for test_size_log2 in 13u32..max_log2 + 1 {
|
||||
for test_size_log2 in 18u32..max_log2 + 1 {
|
||||
for t in [2, 3, 4, 8, 16, 20, 24] {
|
||||
let number_of_states = 1 << test_size_log2;
|
||||
let full_size = t * number_of_states;
|
||||
@@ -531,31 +405,27 @@ pub mod bench {
|
||||
let permutation_result_slice = HostSlice::from_mut_slice(&mut permutation_result);
|
||||
|
||||
let ctx = DeviceContext::default();
|
||||
let config = Poseidon2Config::default();
|
||||
for mds in [MdsType::Default, MdsType::Plonky] {
|
||||
for diffusion in [DiffusionStrategy::Default, DiffusionStrategy::Montgomery] {
|
||||
let constants =
|
||||
load_poseidon2_constants(t as u32, mds.clone(), diffusion.clone(), &ctx).unwrap();
|
||||
let bench_descr = format!(
|
||||
"Mds::{:?}; Diffusion::{:?}; Number of states: {}; Width: {}",
|
||||
mds, diffusion, number_of_states, t
|
||||
);
|
||||
group.bench_function(&bench_descr, |b| {
|
||||
b.iter(|| {
|
||||
poseidon2_for_bench::<F>(
|
||||
input,
|
||||
permutation_result_slice,
|
||||
number_of_states,
|
||||
t,
|
||||
&constants,
|
||||
&config,
|
||||
black_box(1),
|
||||
)
|
||||
})
|
||||
});
|
||||
|
||||
// }
|
||||
}
|
||||
for (mds, diffusion) in [
|
||||
(MdsType::Default, DiffusionStrategy::Default),
|
||||
(MdsType::Plonky, DiffusionStrategy::Montgomery),
|
||||
] {
|
||||
let poseidon = Poseidon2::<F>::load(t, t, mds, diffusion, &ctx).unwrap();
|
||||
let bench_descr = format!(
|
||||
"TestSize: 2**{}, Mds::{:?}, Diffusion::{:?}, Width: {}",
|
||||
test_size_log2, mds, diffusion, t
|
||||
);
|
||||
group.bench_function(&bench_descr, |b| {
|
||||
b.iter(|| {
|
||||
poseidon2_for_bench::<F>(
|
||||
&poseidon,
|
||||
input,
|
||||
permutation_result_slice,
|
||||
number_of_states,
|
||||
&ctx,
|
||||
black_box(1),
|
||||
)
|
||||
})
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,27 +1,21 @@
|
||||
use crate::poseidon2::{MdsType, PoseidonMode};
|
||||
use crate::hash::SpongeHash;
|
||||
use crate::traits::FieldImpl;
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::memory::{HostOrDeviceSlice, HostSlice};
|
||||
|
||||
use super::{
|
||||
load_poseidon2_constants, poseidon2_hash_many, DiffusionStrategy, Poseidon2, Poseidon2Config, Poseidon2Constants,
|
||||
};
|
||||
use super::{DiffusionStrategy, MdsType, Poseidon2, Poseidon2Impl};
|
||||
|
||||
pub fn init_poseidon<'a, F: FieldImpl>(
|
||||
width: u32,
|
||||
mds_type: MdsType,
|
||||
diffusion: DiffusionStrategy,
|
||||
) -> Poseidon2Constants<'a, F>
|
||||
pub fn init_poseidon<F: FieldImpl>(width: usize, mds_type: MdsType, diffusion: DiffusionStrategy) -> Poseidon2<F>
|
||||
where
|
||||
<F as FieldImpl>::Config: Poseidon2<F>,
|
||||
<F as FieldImpl>::Config: Poseidon2Impl<F>,
|
||||
{
|
||||
let ctx = DeviceContext::default();
|
||||
load_poseidon2_constants::<F>(width, mds_type, diffusion, &ctx).unwrap()
|
||||
Poseidon2::load(width, width, mds_type, diffusion, &ctx).unwrap()
|
||||
}
|
||||
|
||||
fn _check_poseidon_hash_many<F: FieldImpl>(width: u32, constants: Poseidon2Constants<F>) -> (F, F)
|
||||
fn _check_poseidon_hash_many<F: FieldImpl>(width: usize, poseidon: &Poseidon2<F>) -> (F, F)
|
||||
where
|
||||
<F as FieldImpl>::Config: Poseidon2<F>,
|
||||
<F as FieldImpl>::Config: Poseidon2Impl<F>,
|
||||
{
|
||||
let test_size = 1 << 10;
|
||||
let mut inputs = vec![F::one(); test_size * width as usize];
|
||||
@@ -30,16 +24,10 @@ where
|
||||
let input_slice = HostSlice::from_mut_slice(&mut inputs);
|
||||
let output_slice = HostSlice::from_mut_slice(&mut outputs);
|
||||
|
||||
let config = Poseidon2Config::default();
|
||||
poseidon2_hash_many::<F>(
|
||||
input_slice,
|
||||
output_slice,
|
||||
test_size as u32,
|
||||
width as u32,
|
||||
&constants,
|
||||
&config,
|
||||
)
|
||||
.unwrap();
|
||||
let cfg = poseidon.default_config();
|
||||
poseidon
|
||||
.hash_many(input_slice, output_slice, test_size, width, 1, &cfg)
|
||||
.unwrap();
|
||||
|
||||
let a1 = output_slice[0];
|
||||
let a2 = output_slice[output_slice.len() - 2];
|
||||
@@ -49,21 +37,22 @@ where
|
||||
(a1, a2)
|
||||
}
|
||||
|
||||
pub fn check_poseidon_hash_many<'a, F: FieldImpl + 'a>()
|
||||
pub fn check_poseidon_hash_many<F: FieldImpl>()
|
||||
where
|
||||
<F as FieldImpl>::Config: Poseidon2<F>,
|
||||
<F as FieldImpl>::Config: Poseidon2Impl<F>,
|
||||
{
|
||||
let widths = [2, 3, 4, 8, 12, 16, 20, 24];
|
||||
let ctx = DeviceContext::default();
|
||||
for width in widths {
|
||||
let constants = init_poseidon::<'a, F>(width as u32, MdsType::Default, DiffusionStrategy::Default);
|
||||
let poseidon = Poseidon2::<F>::load(width, width, MdsType::Default, DiffusionStrategy::Default, &ctx).unwrap();
|
||||
|
||||
_check_poseidon_hash_many(width, constants);
|
||||
_check_poseidon_hash_many(width, &poseidon);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_poseidon_kats<'a, F: FieldImpl>(width: usize, kats: &[F], constants: &Poseidon2Constants<'a, F>)
|
||||
pub fn check_poseidon_kats<F: FieldImpl>(width: usize, kats: &[F], poseidon: &Poseidon2<F>)
|
||||
where
|
||||
<F as FieldImpl>::Config: Poseidon2<F>,
|
||||
<F as FieldImpl>::Config: Poseidon2Impl<F>,
|
||||
{
|
||||
assert_eq!(width, kats.len());
|
||||
|
||||
@@ -83,17 +72,11 @@ where
|
||||
let input_slice = HostSlice::from_mut_slice(&mut inputs);
|
||||
let output_slice = HostSlice::from_mut_slice(&mut outputs);
|
||||
|
||||
let mut config = Poseidon2Config::default();
|
||||
config.mode = PoseidonMode::Permutation;
|
||||
poseidon2_hash_many::<F>(
|
||||
input_slice,
|
||||
output_slice,
|
||||
batch_size as u32,
|
||||
width as u32,
|
||||
&constants,
|
||||
&config,
|
||||
)
|
||||
.unwrap();
|
||||
let cfg = poseidon.default_config();
|
||||
|
||||
poseidon
|
||||
.hash_many(input_slice, output_slice, batch_size, width, width, &cfg)
|
||||
.unwrap();
|
||||
|
||||
for (i, val) in output_slice
|
||||
.iter()
|
||||
|
||||
79
wrappers/rust/icicle-core/src/tree/mmcs.rs
Normal file
79
wrappers/rust/icicle-core/src/tree/mmcs.rs
Normal file
@@ -0,0 +1,79 @@
|
||||
use icicle_cuda_runtime::memory::HostSlice;
|
||||
|
||||
use crate::{error::IcicleResult, ntt::FieldImpl};
|
||||
use crate::{hash::SpongeHash, Matrix};
|
||||
|
||||
use super::TreeBuilderConfig;
|
||||
|
||||
pub trait FieldMmcs<F, Compression, Hasher>
|
||||
where
|
||||
F: FieldImpl,
|
||||
Compression: SpongeHash<F, F>,
|
||||
Hasher: SpongeHash<F, F>,
|
||||
{
|
||||
fn mmcs_commit(
|
||||
leaves: Vec<Matrix>,
|
||||
digests: &mut HostSlice<F>,
|
||||
hasher: &Hasher,
|
||||
compression: &Compression,
|
||||
config: &TreeBuilderConfig,
|
||||
) -> IcicleResult<()>;
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! impl_mmcs {
|
||||
(
|
||||
$field_prefix:literal,
|
||||
$field_prefix_ident:ident,
|
||||
$field:ident,
|
||||
$field_config:ident,
|
||||
$mmcs:ident
|
||||
) => {
|
||||
mod $field_prefix_ident {
|
||||
use super::*;
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
|
||||
extern "C" {
|
||||
#[link_name = concat!($field_prefix, "_mmcs_commit_cuda")]
|
||||
pub(crate) fn mmcs_commit_cuda(
|
||||
leaves: *const Matrix,
|
||||
number_of_inputs: u32,
|
||||
digests: *mut $field,
|
||||
hasher: *const c_void,
|
||||
compression: *const c_void,
|
||||
config: &TreeBuilderConfig,
|
||||
) -> CudaError;
|
||||
}
|
||||
}
|
||||
|
||||
struct $mmcs;
|
||||
|
||||
impl<Compression, Hasher> FieldMmcs<$field, Compression, Hasher> for $mmcs
|
||||
where
|
||||
Compression: SpongeHash<$field, $field>,
|
||||
Hasher: SpongeHash<$field, $field>,
|
||||
{
|
||||
fn mmcs_commit(
|
||||
leaves: Vec<Matrix>,
|
||||
digests: &mut HostSlice<$field>,
|
||||
hasher: &Hasher,
|
||||
compression: &Compression,
|
||||
config: &TreeBuilderConfig,
|
||||
) -> IcicleResult<()> {
|
||||
unsafe {
|
||||
$field_prefix_ident::mmcs_commit_cuda(
|
||||
leaves
|
||||
.as_slice()
|
||||
.as_ptr(),
|
||||
leaves.len() as u32,
|
||||
digests.as_mut_ptr(),
|
||||
compression.get_handle(),
|
||||
hasher.get_handle(),
|
||||
config,
|
||||
)
|
||||
.wrap()
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user