Feat/roman/poseidon2 (#507)

This PR adds support for poseidon2 permutation function as described in
https://eprint.iacr.org/2023/323.pdf

Reference implementations used (and compared against):
https://github.com/HorizenLabs/poseidon2/tree/main
https://github.com/Plonky3/Plonky3/tree/main

Tasks:

- [x] Remove commented code and prints
- [ ] Add doc-comments to functions and structs
- [x] Fix possible issue with Plonky3 imports
- [x] Update NTT/Plonky3 test
- [x] Add Plonky3-bn254 test (impossible)
This commit is contained in:
ChickenLover
2024-05-09 15:13:43 +07:00
committed by GitHub
parent c30e333819
commit 094683d291
27 changed files with 6070 additions and 13 deletions

3
.gitignore vendored
View File

@@ -8,6 +8,7 @@
*.so
*.nsys-rep
*.ncu-rep
*.sage.py
**/target
**/.vscode
**/.*lock*csv#
@@ -17,5 +18,3 @@
**/icicle/build/
**/wrappers/rust/icicle-cuda-runtime/src/bindings.rs
**/build*
**/icicle/appUtils/large_ntt/work
icicle/appUtils/large_ntt/work/test_ntt

View File

@@ -125,6 +125,17 @@ public:
struct Wide {
ff_wide_storage limbs_storage;
static constexpr Wide HOST_DEVICE_INLINE from_field(const Field& xs)
{
Wide out{};
#ifdef __CUDA_ARCH__
UNROLL
#endif
for (unsigned i = 0; i < TLC; i++)
out.limbs_storage.limbs[i] = xs.limbs_storage.limbs[i];
return out;
}
static constexpr Field HOST_DEVICE_INLINE get_lower(const Wide& xs)
{
Field out{};

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,705 @@
# Remark: This script contains functionality for GF(2^n), but currently works only over GF(p)! A few small adaptations are needed for GF(2^n).
from sage.rings.polynomial.polynomial_gf2x import GF2X_BuildIrred_list
from math import *
import itertools
CURVE_NAME = "bn254"
###########################################################################
# p = 18446744069414584321 # GoldiLocks
# p = 2013265921 # BabyBear
# p = 52435875175126190479447740508185965837690552500527637822603658699938581184513 # BLS12-381
p = 21888242871839275222246405745257275088548364400416034343698204186575808495617 # BN254/BN256
# p = 28948022309329048855892746252171976963363056481941560715954676764349967630337 # Pasta (Pallas)
# p = 28948022309329048855892746252171976963363056481941647379679742748393362948097 # Pasta (Vesta)
n = len(p.bits()) # bit
# t = 12 # GoldiLocks (t = 12 for sponge, t = 8 for compression)
# t = 16 # BabyBear (t = 24 for sponge, t = 16 for compression)
# t = 3 # BN254/BN256, BLS12-381, Pallas, Vesta (t = 3 for sponge, t = 2 for compression)
TS = [2, 3, 4, 8, 12, 16, 20, 24]
FIELD = 1
SBOX = 0
FIELD_SIZE = n
def get_alpha(p):
for alpha in range(3, p):
if gcd(alpha, p-1) == 1:
break
return alpha
alpha = get_alpha(p)
def get_sbox_cost(R_F, R_P, N, t):
return int(t * R_F + R_P)
def get_size_cost(R_F, R_P, N, t):
n = ceil(float(N) / t)
return int((N * R_F) + (n * R_P))
def poseidon_calc_final_numbers_fixed(p, t, alpha, M, security_margin):
# [Min. S-boxes] Find best possible for t and N
n = ceil(log(p, 2))
N = int(n * t)
cost_function = get_sbox_cost
ret_list = []
(R_F, R_P) = find_FD_round_numbers(p, t, alpha, M, cost_function, security_margin)
min_sbox_cost = cost_function(R_F, R_P, N, t)
ret_list.append(R_F)
ret_list.append(R_P)
ret_list.append(min_sbox_cost)
# [Min. Size] Find best possible for t and N
# Minimum number of S-boxes for fixed n results in minimum size also (round numbers are the same)!
min_size_cost = get_size_cost(R_F, R_P, N, t)
ret_list.append(min_size_cost)
return ret_list # [R_F, R_P, min_sbox_cost, min_size_cost]
def find_FD_round_numbers(p, t, alpha, M, cost_function, security_margin):
n = ceil(log(p, 2))
N = int(n * t)
sat_inequiv = sat_inequiv_alpha
R_P = 0
R_F = 0
min_cost = float("inf")
max_cost_rf = 0
# Brute-force approach
for R_P_t in range(1, 500):
for R_F_t in range(4, 100):
if R_F_t % 2 == 0:
if (sat_inequiv(p, t, R_F_t, R_P_t, alpha, M) == True):
if security_margin == True:
R_F_t += 2
R_P_t = int(ceil(float(R_P_t) * 1.075))
cost = cost_function(R_F_t, R_P_t, N, t)
if (cost < min_cost) or ((cost == min_cost) and (R_F_t < max_cost_rf)):
R_P = ceil(R_P_t)
R_F = ceil(R_F_t)
min_cost = cost
max_cost_rf = R_F
return (int(R_F), int(R_P))
def sat_inequiv_alpha(p, t, R_F, R_P, alpha, M):
N = int(FIELD_SIZE * NUM_CELLS)
if alpha > 0:
R_F_1 = 6 if M <= ((floor(log(p, 2) - ((alpha-1)/2.0))) * (t + 1)) else 10 # Statistical
R_F_2 = 1 + ceil(log(2, alpha) * min(M, FIELD_SIZE)) + ceil(log(t, alpha)) - R_P # Interpolation
R_F_3 = (log(2, alpha) * min(M, log(p, 2))) - R_P # Groebner 1
R_F_4 = t - 1 + log(2, alpha) * min(M / float(t + 1), log(p, 2) / float(2)) - R_P # Groebner 2
R_F_5 = (t - 2 + (M / float(2 * log(alpha, 2))) - R_P) / float(t - 1) # Groebner 3
R_F_max = max(ceil(R_F_1), ceil(R_F_2), ceil(R_F_3), ceil(R_F_4), ceil(R_F_5))
# Addition due to https://eprint.iacr.org/2023/537.pdf
r_temp = floor(t / 3.0)
over = (R_F - 1) * t + R_P + r_temp + r_temp * (R_F / 2.0) + R_P + alpha
under = r_temp * (R_F / 2.0) + R_P + alpha
binom_log = log(binomial(over, under), 2)
if binom_log == inf:
binom_log = M + 1
cost_gb4 = ceil(2 * binom_log) # Paper uses 2.3727, we are more conservative here
return ((R_F >= R_F_max) and (cost_gb4 >= M))
else:
print("Invalid value for alpha!")
exit(1)
def grain_sr_generator():
bit_sequence = INIT_SEQUENCE
for _ in range(0, 160):
new_bit = bit_sequence[62] ^^ bit_sequence[51] ^^ bit_sequence[38] ^^ bit_sequence[23] ^^ bit_sequence[13] ^^ bit_sequence[0]
bit_sequence.pop(0)
bit_sequence.append(new_bit)
while True:
new_bit = bit_sequence[62] ^^ bit_sequence[51] ^^ bit_sequence[38] ^^ bit_sequence[23] ^^ bit_sequence[13] ^^ bit_sequence[0]
bit_sequence.pop(0)
bit_sequence.append(new_bit)
while new_bit == 0:
new_bit = bit_sequence[62] ^^ bit_sequence[51] ^^ bit_sequence[38] ^^ bit_sequence[23] ^^ bit_sequence[13] ^^ bit_sequence[0]
bit_sequence.pop(0)
bit_sequence.append(new_bit)
new_bit = bit_sequence[62] ^^ bit_sequence[51] ^^ bit_sequence[38] ^^ bit_sequence[23] ^^ bit_sequence[13] ^^ bit_sequence[0]
bit_sequence.pop(0)
bit_sequence.append(new_bit)
new_bit = bit_sequence[62] ^^ bit_sequence[51] ^^ bit_sequence[38] ^^ bit_sequence[23] ^^ bit_sequence[13] ^^ bit_sequence[0]
bit_sequence.pop(0)
bit_sequence.append(new_bit)
yield new_bit
def grain_random_bits(num_bits):
random_bits = [next(grain_gen) for i in range(0, num_bits)]
# random_bits.reverse() ## Remove comment to start from least significant bit
random_int = int("".join(str(i) for i in random_bits), 2)
return random_int
def init_generator(field, sbox, n, t, R_F, R_P):
# Generate initial sequence based on parameters
bit_list_field = [_ for _ in (bin(FIELD)[2:].zfill(2))]
bit_list_sbox = [_ for _ in (bin(SBOX)[2:].zfill(4))]
bit_list_n = [_ for _ in (bin(FIELD_SIZE)[2:].zfill(12))]
bit_list_t = [_ for _ in (bin(NUM_CELLS)[2:].zfill(12))]
bit_list_R_F = [_ for _ in (bin(R_F)[2:].zfill(10))]
bit_list_R_P = [_ for _ in (bin(R_P)[2:].zfill(10))]
bit_list_1 = [1] * 30
global INIT_SEQUENCE
INIT_SEQUENCE = bit_list_field + bit_list_sbox + bit_list_n + bit_list_t + bit_list_R_F + bit_list_R_P + bit_list_1
INIT_SEQUENCE = [int(_) for _ in INIT_SEQUENCE]
def generate_constants(field, n, t, R_F, R_P, prime_number):
round_constants = []
# num_constants = (R_F + R_P) * t # Poseidon
num_constants = (R_F * t) + R_P # Poseidon2
if field == 0:
for i in range(0, num_constants):
random_int = grain_random_bits(n)
round_constants.append(random_int)
elif field == 1:
for i in range(0, num_constants):
random_int = grain_random_bits(n)
while random_int >= prime_number:
# print("[Info] Round constant is not in prime field! Taking next one.")
random_int = grain_random_bits(n)
round_constants.append(random_int)
# Add (t-1) zeroes for Poseidon2 if partial round
if i >= ((R_F/2) * t) and i < (((R_F/2) * t) + R_P):
round_constants.extend([0] * (t-1))
return round_constants
def print_round_constants(round_constants, n, field):
print("Number of round constants:", len(round_constants))
if field == 0:
print("Round constants for GF(2^n):")
elif field == 1:
print("Round constants for GF(p):")
hex_length = int(ceil(float(n) / 4)) + 2 # +2 for "0x"
print(["{0:#0{1}x}".format(entry, hex_length) for entry in round_constants])
def create_mds_p(n, t):
M = matrix(F, t, t)
# Sample random distinct indices and assign to xs and ys
while True:
flag = True
rand_list = [F(grain_random_bits(n)) for _ in range(0, 2*t)]
while len(rand_list) != len(set(rand_list)): # Check for duplicates
rand_list = [F(grain_random_bits(n)) for _ in range(0, 2*t)]
xs = rand_list[:t]
ys = rand_list[t:]
# xs = [F(ele) for ele in range(0, t)]
# ys = [F(ele) for ele in range(t, 2*t)]
for i in range(0, t):
for j in range(0, t):
if (flag == False) or ((xs[i] + ys[j]) == 0):
flag = False
else:
entry = (xs[i] + ys[j])^(-1)
M[i, j] = entry
if flag == False:
continue
return M
def generate_vectorspace(round_num, M, M_round, NUM_CELLS):
t = NUM_CELLS
s = 1
V = VectorSpace(F, t)
if round_num == 0:
return V
elif round_num == 1:
return V.subspace(V.basis()[s:])
else:
mat_temp = matrix(F)
for i in range(0, round_num-1):
add_rows = []
for j in range(0, s):
add_rows.append(M_round[i].rows()[j][s:])
mat_temp = matrix(mat_temp.rows() + add_rows)
r_k = mat_temp.right_kernel()
extended_basis_vectors = []
for vec in r_k.basis():
extended_basis_vectors.append(vector([0]*s + list(vec)))
S = V.subspace(extended_basis_vectors)
return S
def subspace_times_matrix(subspace, M, NUM_CELLS):
t = NUM_CELLS
V = VectorSpace(F, t)
subspace_basis = subspace.basis()
new_basis = []
for vec in subspace_basis:
new_basis.append(M * vec)
new_subspace = V.subspace(new_basis)
return new_subspace
# Returns True if the matrix is considered secure, False otherwise
def algorithm_1(M, NUM_CELLS):
t = NUM_CELLS
s = 1
r = floor((t - s) / float(s))
# Generate round matrices
M_round = []
for j in range(0, t+1):
M_round.append(M^(j+1))
for i in range(1, r+1):
mat_test = M^i
entry = mat_test[0, 0]
mat_target = matrix.circulant(vector([entry] + ([F(0)] * (t-1))))
if (mat_test - mat_target) == matrix.circulant(vector([F(0)] * (t))):
return [False, 1]
S = generate_vectorspace(i, M, M_round, t)
V = VectorSpace(F, t)
basis_vectors= []
for eigenspace in mat_test.eigenspaces_right(format='galois'):
if (eigenspace[0] not in F):
continue
vector_subspace = eigenspace[1]
intersection = S.intersection(vector_subspace)
basis_vectors += intersection.basis()
IS = V.subspace(basis_vectors)
if IS.dimension() >= 1 and IS != V:
return [False, 2]
for j in range(1, i+1):
S_mat_mul = subspace_times_matrix(S, M^j, t)
if S == S_mat_mul:
print("S.basis():\n", S.basis())
return [False, 3]
return [True, 0]
# Returns True if the matrix is considered secure, False otherwise
def algorithm_2(M, NUM_CELLS):
t = NUM_CELLS
s = 1
V = VectorSpace(F, t)
trail = [None, None]
test_next = False
I = range(0, s)
I_powerset = list(sage.misc.misc.powerset(I))[1:]
for I_s in I_powerset:
test_next = False
new_basis = []
for l in I_s:
new_basis.append(V.basis()[l])
IS = V.subspace(new_basis)
for i in range(s, t):
new_basis.append(V.basis()[i])
full_iota_space = V.subspace(new_basis)
for l in I_s:
v = V.basis()[l]
while True:
delta = IS.dimension()
v = M * v
IS = V.subspace(IS.basis() + [v])
if IS.dimension() == t or IS.intersection(full_iota_space) != IS:
test_next = True
break
if IS.dimension() <= delta:
break
if test_next == True:
break
if test_next == True:
continue
return [False, [IS, I_s]]
return [True, None]
# Returns True if the matrix is considered secure, False otherwise
def algorithm_3(M, NUM_CELLS):
t = NUM_CELLS
s = 1
V = VectorSpace(F, t)
l = 4*t
for r in range(2, l+1):
next_r = False
res_alg_2 = algorithm_2(M^r, t)
if res_alg_2[0] == False:
return [False, None]
# if res_alg_2[1] == None:
# continue
# IS = res_alg_2[1][0]
# I_s = res_alg_2[1][1]
# for j in range(1, r):
# IS = subspace_times_matrix(IS, M, t)
# I_j = []
# for i in range(0, s):
# new_basis = []
# for k in range(0, t):
# if k != i:
# new_basis.append(V.basis()[k])
# iota_space = V.subspace(new_basis)
# if IS.intersection(iota_space) != iota_space:
# single_iota_space = V.subspace([V.basis()[i]])
# if IS.intersection(single_iota_space) == single_iota_space:
# I_j.append(i)
# else:
# next_r = True
# break
# if next_r == True:
# break
# if next_r == True:
# continue
# return [False, [IS, I_j, r]]
return [True, None]
def check_minpoly_condition(M, NUM_CELLS):
max_period = 2*NUM_CELLS
all_fulfilled = True
M_temp = M
for i in range(1, max_period + 1):
if not ((M_temp.minimal_polynomial().degree() == NUM_CELLS) and (M_temp.minimal_polynomial().is_irreducible() == True)):
all_fulfilled = False
break
M_temp = M * M_temp
return all_fulfilled
def generate_matrix(FIELD, FIELD_SIZE, NUM_CELLS):
if FIELD == 0:
print("Matrix generation not implemented for GF(2^n).")
exit(1)
elif FIELD == 1:
mds_matrix = create_mds_p(FIELD_SIZE, NUM_CELLS)
result_1 = algorithm_1(mds_matrix, NUM_CELLS)
result_2 = algorithm_2(mds_matrix, NUM_CELLS)
result_3 = algorithm_3(mds_matrix, NUM_CELLS)
while result_1[0] == False or result_2[0] == False or result_3[0] == False:
mds_matrix = create_mds_p(FIELD_SIZE, NUM_CELLS)
result_1 = algorithm_1(mds_matrix, NUM_CELLS)
result_2 = algorithm_2(mds_matrix, NUM_CELLS)
result_3 = algorithm_3(mds_matrix, NUM_CELLS)
return mds_matrix
def generate_matrix_full(NUM_CELLS):
M = None
if t == 2:
M = matrix.circulant(vector([F(2), F(1)]))
elif t == 3:
M = matrix.circulant(vector([F(2), F(1), F(1)]))
elif t == 4:
M = matrix(F, [[F(5), F(7), F(1), F(3)], [F(4), F(6), F(1), F(1)], [F(1), F(3), F(5), F(7)], [F(1), F(1), F(4), F(6)]])
elif (t % 4) == 0:
M = matrix(F, t, t)
# M_small = matrix.circulant(vector([F(3), F(2), F(1), F(1)]))
M_small = matrix(F, [[F(5), F(7), F(1), F(3)], [F(4), F(6), F(1), F(1)], [F(1), F(3), F(5), F(7)], [F(1), F(1), F(4), F(6)]])
small_num = t // 4
for i in range(0, small_num):
for j in range(0, small_num):
if i == j:
M[i*4:(i+1)*4,j*4:(j+1)*4] = 2* M_small
else:
M[i*4:(i+1)*4,j*4:(j+1)*4] = M_small
else:
print("Error: No matrix for these parameters.")
exit()
return M
def generate_matrix_partial(FIELD, FIELD_SIZE, NUM_CELLS): ## TODO: Prioritize small entries
entry_max_bit_size = FIELD_SIZE
if FIELD == 0:
print("Matrix generation not implemented for GF(2^n).")
exit(1)
elif FIELD == 1:
M = None
if t == 2:
M = matrix(F, [[F(2), F(1)], [F(1), F(3)]])
elif t == 3:
M = matrix(F, [[F(2), F(1), F(1)], [F(1), F(2), F(1)], [F(1), F(1), F(3)]])
else:
M_circulant = matrix.circulant(vector([F(0)] + [F(1) for _ in range(0, NUM_CELLS - 1)]))
M_diagonal = matrix.diagonal([F(grain_random_bits(entry_max_bit_size)) for _ in range(0, NUM_CELLS)])
M = M_circulant + M_diagonal
# while algorithm_1(M, NUM_CELLS)[0] == False or algorithm_2(M, NUM_CELLS)[0] == False or algorithm_3(M, NUM_CELLS)[0] == False:
while check_minpoly_condition(M, NUM_CELLS) == False:
M_diagonal = matrix.diagonal([F(grain_random_bits(entry_max_bit_size)) for _ in range(0, NUM_CELLS)])
M = M_circulant + M_diagonal
if(algorithm_1(M, NUM_CELLS)[0] == False or algorithm_2(M, NUM_CELLS)[0] == False or algorithm_3(M, NUM_CELLS)[0] == False):
print("Error: Generated partial matrix is not secure w.r.t. subspace trails.")
exit()
return M
def generate_matrix_partial_small_entries(FIELD, FIELD_SIZE, NUM_CELLS):
if FIELD == 0:
print("Matrix generation not implemented for GF(2^n).")
exit(1)
elif FIELD == 1:
M_circulant = matrix.circulant(vector([F(0)] + [F(1) for _ in range(0, NUM_CELLS - 1)]))
combinations = list(itertools.product(range(2, 6), repeat=NUM_CELLS))
for entry in combinations:
M = M_circulant + matrix.diagonal(vector(F, list(entry)))
print(M)
# if M.is_invertible() == False or algorithm_1(M, NUM_CELLS)[0] == False or algorithm_2(M, NUM_CELLS)[0] == False or algorithm_3(M, NUM_CELLS)[0] == False:
if M.is_invertible() == False or check_minpoly_condition(M, NUM_CELLS) == False:
continue
return M
def matrix_partial_m_1(matrix_partial, NUM_CELLS):
M_circulant = matrix.identity(F, NUM_CELLS)
return matrix_partial - M_circulant
def print_linear_layer(M, n, t):
print("n:", n)
print("t:", t)
print("N:", (n * t))
print("Result Algorithm 1:\n", algorithm_1(M, NUM_CELLS))
print("Result Algorithm 2:\n", algorithm_2(M, NUM_CELLS))
print("Result Algorithm 3:\n", algorithm_3(M, NUM_CELLS))
hex_length = int(ceil(float(n) / 4)) + 2 # +2 for "0x"
print("Prime number:", "0x" + hex(PRIME_NUMBER))
matrix_string = "["
for i in range(0, t):
matrix_string += str(["{0:#0{1}x}".format(int(entry), hex_length) for entry in M[i]])
if i < (t-1):
matrix_string += ","
matrix_string += "]"
print("MDS matrix:\n", matrix_string)
def calc_equivalent_matrices(MDS_matrix_field):
# Following idea: Split M into M' * M'', where M'' is "cheap" and M' can move before the partial nonlinear layer
# The "previous" matrix layer is then M * M'. Due to the construction of M', the M[0,0] and v values will be the same for the new M' (and I also, obviously)
# Thus: Compute the matrices, store the w_hat and v_hat values
MDS_matrix_field_transpose = MDS_matrix_field.transpose()
w_hat_collection = []
v_collection = []
v = MDS_matrix_field_transpose[[0], list(range(1,t))]
M_mul = MDS_matrix_field_transpose
M_i = matrix(F, t, t)
for i in range(R_P_FIXED - 1, -1, -1):
M_hat = M_mul[list(range(1,t)), list(range(1,t))]
w = M_mul[list(range(1,t)), [0]]
v = M_mul[[0], list(range(1,t))]
v_collection.append(v.list())
w_hat = M_hat.inverse() * w
w_hat_collection.append(w_hat.list())
# Generate new M_i, and multiplication M * M_i for "previous" round
M_i = matrix.identity(t)
M_i[list(range(1,t)), list(range(1,t))] = M_hat
M_mul = MDS_matrix_field_transpose * M_i
return M_i, v_collection, w_hat_collection, MDS_matrix_field_transpose[0, 0]
def calc_equivalent_constants(constants, MDS_matrix_field):
constants_temp = [constants[index:index+t] for index in range(0, len(constants), t)]
MDS_matrix_field_transpose = MDS_matrix_field.transpose()
# Start moving round constants up
# Calculate c_i' = M^(-1) * c_(i+1)
# Split c_i': Add c_i'[0] AFTER the S-box, add the rest to c_i
# I.e.: Store c_i'[0] for each of the partial rounds, and make c_i = c_i + c_i' (where now c_i'[0] = 0)
num_rounds = R_F_FIXED + R_P_FIXED
R_f = R_F_FIXED / 2
for i in range(num_rounds - 2 - R_f, R_f - 1, -1):
inv_cip1 = list(vector(constants_temp[i+1]) * MDS_matrix_field_transpose.inverse())
constants_temp[i] = list(vector(constants_temp[i]) + vector([0] + inv_cip1[1:]))
constants_temp[i+1] = [inv_cip1[0]] + [0] * (t-1)
return constants_temp
def poseidon(input_words, matrix, round_constants):
R_f = int(R_F_FIXED / 2)
round_constants_counter = 0
state_words = list(input_words)
# First full rounds
for r in range(0, R_f):
# Round constants, nonlinear layer, matrix multiplication
for i in range(0, t):
state_words[i] = state_words[i] + round_constants[round_constants_counter]
round_constants_counter += 1
for i in range(0, t):
state_words[i] = (state_words[i])^alpha
state_words = list(matrix * vector(state_words))
# Middle partial rounds
for r in range(0, R_P_FIXED):
# Round constants, nonlinear layer, matrix multiplication
for i in range(0, t):
state_words[i] = state_words[i] + round_constants[round_constants_counter]
round_constants_counter += 1
state_words[0] = (state_words[0])^alpha
state_words = list(matrix * vector(state_words))
# Last full rounds
for r in range(0, R_f):
# Round constants, nonlinear layer, matrix multiplication
for i in range(0, t):
state_words[i] = state_words[i] + round_constants[round_constants_counter]
round_constants_counter += 1
for i in range(0, t):
state_words[i] = (state_words[i])^alpha
state_words = list(matrix * vector(state_words))
return state_words
def poseidon2(input_words, matrix_full, matrix_partial, round_constants):
R_f = int(R_F_FIXED / 2)
round_constants_counter = 0
state_words = list(input_words)
# First matrix mul
state_words = list(matrix_full * vector(state_words))
# First full rounds
for r in range(0, R_f):
# Round constants, nonlinear layer, matrix multiplication
for i in range(0, t):
state_words[i] = state_words[i] + round_constants[round_constants_counter]
round_constants_counter += 1
for i in range(0, t):
state_words[i] = (state_words[i])^alpha
state_words = list(matrix_full * vector(state_words))
# Middle partial rounds
for r in range(0, R_P_FIXED):
# Round constants, nonlinear layer, matrix multiplication
for i in range(0, t):
state_words[i] = state_words[i] + round_constants[round_constants_counter]
round_constants_counter += 1
state_words[0] = (state_words[0])^alpha
state_words = list(matrix_partial * vector(state_words))
# Last full rounds
for r in range(0, R_f):
# Round constants, nonlinear layer, matrix multiplication
for i in range(0, t):
state_words[i] = state_words[i] + round_constants[round_constants_counter]
round_constants_counter += 1
for i in range(0, t):
state_words[i] = (state_words[i])^alpha
state_words = list(matrix_full * vector(state_words))
return state_words
def to_bytes(value, indent):
l = len(hex(p - 1))
if l % 2 == 1:
l = l + 1
value = hex(int(value))[2:]
value = value.zfill(l - 2)
value_bytes = reversed(["0x" + value[i:i + 2] for i in range(0, len(value), 2)])
print(" " * indent + ", ".join(value_bytes) + ",")
print("#pragma once")
print(f"#ifndef {CURVE_NAME.upper()}_POSEIDON2_H")
print(f"#define {CURVE_NAME.upper()}_POSEIDON2_H")
print()
print(f"namespace poseidon2_constants_{CURVE_NAME} {{")
for t in TS:
NUM_CELLS = t
R_F_FIXED, R_P_FIXED, _, _ = poseidon_calc_final_numbers_fixed(p, t, alpha, 128, True)
INIT_SEQUENCE = []
PRIME_NUMBER = p
F = GF(PRIME_NUMBER)
grain_gen = grain_sr_generator()
# Init
init_generator(FIELD, SBOX, FIELD_SIZE, NUM_CELLS, R_F_FIXED, R_P_FIXED)
# Round constants
round_constants = generate_constants(FIELD, FIELD_SIZE, NUM_CELLS, R_F_FIXED, R_P_FIXED, PRIME_NUMBER)
# print_round_constants(round_constants, FIELD_SIZE, FIELD)
# Matrix
# MDS = generate_matrix(FIELD, FIELD_SIZE, NUM_CELLS)
MATRIX_FULL = generate_matrix_full(NUM_CELLS)
MATRIX_PARTIAL = generate_matrix_partial(FIELD, FIELD_SIZE, NUM_CELLS)
MATRIX_PARTIAL_DIAGONAL_M_1 = [matrix_partial_m_1(MATRIX_PARTIAL, NUM_CELLS)[i,i] for i in range(0, NUM_CELLS)]
print()
print(f" namespace t{t} {{")
print(f" int internal_rounds = {R_P_FIXED};")
print()
print(f" int alpha = {alpha};")
print()
# # MDS
# print("pub static ref MDS{}: Vec<Vec<Scalar>> = vec![".format(t))
# for vec in MDS:
# print("vec![", end="")
# for val in vec:
# to_hex(val)
# print("],")
# print("];")
# print()
# Efficient partial matrix (diagonal - 1)
print(" unsigned char mat_diag_m_1[] = {")
for val in MATRIX_PARTIAL_DIAGONAL_M_1:
to_bytes(val, 6)
print(" };")
print()
# # Efficient partial matrix (full)
# print(" unsigned char mat_internal[] = {")
# for vec in MATRIX_PARTIAL:
# for val in vec:
# to_bytes(val, 6)
# print()
# print(" };")
# print()
# Round constants
print(" unsigned char round_constants[] = {")
for (i,val) in enumerate(round_constants):
if (i % t == 0 or (i < (R_F_FIXED / 2) * t) or (i > (R_F_FIXED / 2 + R_P_FIXED) * t)):
to_bytes(val, 6)
print(" };")
print(" }")
print()
# state_in = vector([F(i) for i in range(t)])
# # state_out = poseidon(state_in, MDS, round_constants)
# state_out = poseidon2(state_in, MATRIX_FULL, MATRIX_PARTIAL, round_constants)
# for (i,val) in enumerate(state_in):
# if i % t == 0:
# print("vec![", end="")
# to_bytes(val)
# if i % t == t - 1:
# print("],")
# print("];")
# for (i,val) in enumerate(state_out):
# if i % t == 0:
# print("vec![", end="")
# to_bytes(val)
# if i % t == t - 1:
# print("],")
# print("];")
print("}")
print("#endif")

View File

@@ -0,0 +1,130 @@
#pragma once
#ifndef POSEIDON2_H
#define POSEIDON2_H
#include <cstdint>
#include <stdexcept>
#include "gpu-utils/device_context.cuh"
#include "gpu-utils/error_handler.cuh"
#include "utils/utils.h"
/**
* @namespace poseidon2
* Implementation of the [Poseidon2 hash function](https://eprint.iacr.org/2019/458.pdf)
* Specifically, the optimized [Filecoin version](https://spec.filecoin.io/algorithms/crypto/poseidon/)
*/
namespace poseidon2 {
/**
* For most of the Poseidon2 configurations this is the case
*/
const int EXTERNAL_ROUNDS_DEFAULT = 8;
enum DiffusionStrategy {
DEFAULT_DIFFUSION,
MONTGOMERY,
};
enum MdsType { DEFAULT_MDS, PLONKY };
enum PoseidonMode {
COMPRESSION,
PERMUTATION,
};
/**
* @struct Poseidon2Constants
* This constants are enough to define a Poseidon2 instantce
* @param round_constants A pointer to round constants allocated on the device
* @param mds_matrix A pointer to an mds matrix allocated on the device
* @param non_sparse_matrix A pointer to non sparse matrix allocated on the device
* @param sparse_matrices A pointer to sparse matrices allocated on the device
*/
template <typename S>
struct Poseidon2Constants {
int width;
int alpha;
int internal_rounds;
int external_rounds;
S* round_constants = nullptr;
S* internal_matrix_diag = nullptr;
MdsType mds_type;
DiffusionStrategy diffusion;
};
/**
* @struct Poseidon2Config
* Struct that encodes various Poseidon2 parameters.
*/
struct Poseidon2Config {
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
bool are_states_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
bool are_outputs_on_device; /**< If true, output is preserved on device, otherwise on host. Default value: false. */
PoseidonMode mode;
int output_index;
bool loop_state; /**< If true, hash results will also be copied in the input pointer in aligned format */
bool
is_async; /**< Whether to run the Poseidon2 asynchronously. If set to `true`, the poseidon_hash function will be
* non-blocking and you'd need to synchronize it explicitly by running
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the poseidon_hash
* function will block the current CPU thread. */
};
static Poseidon2Config default_poseidon2_config(
int t, const device_context::DeviceContext& ctx = device_context::get_default_device_context())
{
Poseidon2Config config = {
ctx, // ctx
false, // are_states_on_device
false, // are_outputs_on_device
PoseidonMode::COMPRESSION,
1, // output_index
false, // loop_state
false, // is_async
};
return config;
}
template <typename S>
cudaError_t create_optimized_poseidon2_constants(
int width,
int alpha,
int internal_rounds,
int external_rounds,
const S* round_constants,
const S* internal_matrix_diag,
MdsType mds_type,
DiffusionStrategy diffusion,
device_context::DeviceContext& ctx,
Poseidon2Constants<S>* poseidon_constants);
/**
* Loads pre-calculated optimized constants, moves them to the device
*/
template <typename S>
cudaError_t init_optimized_poseidon2_constants(
int width,
MdsType mds_type,
DiffusionStrategy diffusion,
device_context::DeviceContext& ctx,
Poseidon2Constants<S>* constants);
/**
* Compute the poseidon hash over a sequence of preimages.
* Takes {number_of_states * (T-1)} elements of input and computes {number_of_states} hash images
* @param T size of the poseidon state, should be equal to {arity + 1}
* @param states a pointer to the input data. May be allocated on device or on host, regulated
* by the config. May point to a string of preimages or a string of states filled with preimages.
* @param output a pointer to the output data. May be allocated on device or on host, regulated
* by the config. Must be at least of size [number_of_states](@ref number_of_states)
* @param number_of_states number of input blocks of size T-1 (arity)
*/
template <typename S, int T>
cudaError_t poseidon2_hash(
S* states,
S* output,
size_t number_of_states,
const Poseidon2Constants<S>& constants,
const Poseidon2Config& config);
} // namespace poseidon2
#endif

View File

@@ -3,6 +3,7 @@ if (EXT_FIELD)
endif ()
SET(SUPPORTED_FIELDS_WITHOUT_NTT grumpkin)
SET(SUPPORTED_FIELDS_WITHOUT_POSEIDON2 bls12_381;bls12_377;grumpkin;bw6_761;stark252)
set(TARGET icicle_field)
@@ -27,6 +28,10 @@ if (DEFINED CURVE)
list(APPEND FIELD_SOURCE ${SRC}/poseidon/tree/merkle.cu)
endif()
if (NOT FIELD IN_LIST SUPPORTED_FIELDS_WITHOUT_POSEIDON2)
list(APPEND FIELD_SOURCE ${SRC}/poseidon2/extern.cu)
endif()
if (NOT FIELD IN_LIST SUPPORTED_FIELDS_WITHOUT_NTT)
list(APPEND FIELD_SOURCE ${SRC}/ntt/extern.cu)
list(APPEND FIELD_SOURCE ${SRC}/ntt/kernel_ntt.cu)

2
icicle/src/poseidon2/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
test_poseidon
test_poseidon_release

View File

@@ -0,0 +1,7 @@
test_poseidon: test.cu poseidon.cu kernels.cu constants.cu
nvcc -o test_poseidon -I../../include -DFIELD=bn254 -DFIELD_ID=1 -DCURVE_ID=1 -DDEVMODE -DDEBUG extern.cu test.cu
./test_poseidon
test_poseidon_release: test.cu poseidon.cu kernels.cu constants.cu
nvcc -o test_poseidon_release -I../../include -DFIELD=bn254 -DFIELD_ID=1 -DCURVE_ID=1 extern.cu test.cu
./test_poseidon_release

View File

@@ -0,0 +1,120 @@
#include "poseidon2/poseidon2.cuh"
/// These are pre-calculated constants for different curves
#include "fields/id.h"
#if FIELD_ID == BN254
#include "poseidon2/constants/bn254_poseidon2.h"
using namespace poseidon2_constants_bn254;
#elif FIELD_ID == BLS12_381
#include "poseidon2/constants/bls12_381_poseidon2.h"
using namespace poseidon2_constants_bls12_381;
#elif FIELD_ID == BLS12_377
#include "poseidon2/constants/bls12_377_poseidon2.h"
using namespace poseidon2_constants_bls12_377;
#elif FIELD_ID == BW6_761
#include "poseidon2/constants/bw6_761_poseidon2.h"
using namespace poseidon2_constants_bw6_761;
#elif FIELD_ID == GRUMPKIN
#include "poseidon2/constants/grumpkin_poseidon2.h"
using namespace poseidon2_constants_grumpkin;
#elif FIELD_ID == BABY_BEAR
#include "poseidon2/constants/babybear_poseidon2.h"
using namespace poseidon2_constants_babybear;
#endif
namespace poseidon2 {
template <typename S>
cudaError_t create_optimized_poseidon2_constants(
int width,
int alpha,
int internal_rounds,
int external_rounds,
const S* round_constants,
const S* internal_matrix_diag,
MdsType mds_type,
DiffusionStrategy diffusion,
device_context::DeviceContext& ctx,
Poseidon2Constants<S>* poseidon_constants)
{
cudaFree(nullptr); // Temporary solution
if (!(alpha == 3 || alpha == 5 || alpha == 7 || alpha == 11)) {
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Invalid alpha value");
}
if (external_rounds % 2) { THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Invalid external rounds"); }
CHK_INIT_IF_RETURN();
cudaStream_t& stream = ctx.stream;
int round_constants_len = width * (external_rounds) + internal_rounds;
int internal_matrix_len = width;
// Malloc memory for copying round constants and internal matrix
S* d_constants;
CHK_IF_RETURN(cudaMallocAsync(&d_constants, sizeof(S) * (internal_matrix_len + round_constants_len), stream));
S* d_internal_matrix = d_constants;
S* d_round_constants = d_constants + internal_matrix_len;
// Copy internal matrix
CHK_IF_RETURN(cudaMemcpyAsync(
d_internal_matrix, internal_matrix_diag, sizeof(S) * internal_matrix_len, cudaMemcpyHostToDevice, stream));
// Copy round constants
CHK_IF_RETURN(cudaMemcpyAsync(
d_round_constants, round_constants, sizeof(S) * round_constants_len, cudaMemcpyHostToDevice, stream));
// Make sure all the constants have been copied
CHK_IF_RETURN(cudaStreamSynchronize(stream));
*poseidon_constants = {width, alpha, internal_rounds, external_rounds, d_round_constants, d_internal_matrix,
mds_type, diffusion};
return CHK_LAST();
}
template <typename S>
cudaError_t init_optimized_poseidon2_constants(
int width,
MdsType mds_type,
DiffusionStrategy diffusion,
device_context::DeviceContext& ctx,
Poseidon2Constants<S>* poseidon2_constants)
{
cudaFree(nullptr); // Temporary solution
CHK_INIT_IF_RETURN();
#define P2_CONSTANTS_DEF(width) \
case width: \
internal_rounds = t##width::internal_rounds; \
round_constants = t##width::round_constants; \
internal_matrix = t##width::mat_diag_m_1; \
alpha = t##width::alpha; \
break;
int alpha;
int external_rounds = EXTERNAL_ROUNDS_DEFAULT;
int internal_rounds;
unsigned char* round_constants;
unsigned char* internal_matrix;
switch (width) {
P2_CONSTANTS_DEF(2)
P2_CONSTANTS_DEF(3)
P2_CONSTANTS_DEF(4)
P2_CONSTANTS_DEF(8)
P2_CONSTANTS_DEF(12)
P2_CONSTANTS_DEF(16)
P2_CONSTANTS_DEF(20)
P2_CONSTANTS_DEF(24)
default:
THROW_ICICLE_ERR(
IcicleError_t::InvalidArgument,
"init_optimized_poseidon2_constants: #width must be one of [2, 3, 4, 8, 12, 16, 20, 24]");
}
S* h_round_constants = reinterpret_cast<S*>(round_constants);
S* h_internal_matrix = reinterpret_cast<S*>(internal_matrix);
create_optimized_poseidon2_constants(
width, alpha, internal_rounds, external_rounds, h_round_constants, h_internal_matrix, mds_type, diffusion, ctx,
poseidon2_constants);
return CHK_LAST();
}
} // namespace poseidon2

View File

@@ -0,0 +1,63 @@
#include "utils/utils.h"
#include "fields/field_config.cuh"
using namespace field_config;
#include "poseidon.cu"
namespace poseidon2 {
extern "C" cudaError_t CONCAT_EXPAND(FIELD, create_optimized_poseidon2_constants_cuda)(
int width,
int alpha,
int internal_rounds,
int external_rounds,
const scalar_t* round_constants,
const scalar_t* internal_matrix_diag,
MdsType mds_type,
DiffusionStrategy diffusion,
device_context::DeviceContext& ctx,
Poseidon2Constants<scalar_t>* poseidon_constants)
{
return create_optimized_poseidon2_constants<scalar_t>(
width, alpha, internal_rounds, external_rounds, round_constants, internal_matrix_diag, mds_type, diffusion, ctx,
poseidon_constants);
}
extern "C" cudaError_t CONCAT_EXPAND(FIELD, init_optimized_poseidon2_constants_cuda)(
int width,
MdsType mds_type,
DiffusionStrategy diffusion,
device_context::DeviceContext& ctx,
Poseidon2Constants<scalar_t>* constants)
{
return init_optimized_poseidon2_constants<scalar_t>(width, mds_type, diffusion, ctx, constants);
}
extern "C" cudaError_t CONCAT_EXPAND(FIELD, poseidon2_hash_cuda)(
scalar_t* input,
scalar_t* output,
int number_of_states,
int width,
const Poseidon2Constants<scalar_t>* constants,
Poseidon2Config* config)
{
#define P2_HASH_T(width) \
case width: \
return poseidon2_hash<scalar_t, width>(input, output, number_of_states, *constants, *config);
switch (width) {
P2_HASH_T(2)
P2_HASH_T(3)
P2_HASH_T(4)
P2_HASH_T(8)
P2_HASH_T(12)
P2_HASH_T(16)
P2_HASH_T(20)
P2_HASH_T(24)
default:
THROW_ICICLE_ERR(
IcicleError_t::InvalidArgument, "PoseidonHash: #arity must be one of [2, 3, 4, 8, 12, 16, 20, 24]");
}
return CHK_LAST();
}
} // namespace poseidon2

View File

@@ -0,0 +1,233 @@
#include "poseidon/poseidon.cuh"
#include "gpu-utils/modifiers.cuh"
namespace poseidon2 {
template <typename S>
DEVICE_INLINE S sbox_el(S element, const int alpha)
{
S result2 = S::sqr(element);
switch (alpha) {
case 3:
return result2 * element;
case 5:
return S::sqr(result2) * element;
case 7:
return S::sqr(result2) * result2 * element;
case 11:
S result8 = S::sqr(S::sqr(result2));
return result8 * result2 * element;
}
}
template <typename S, int T>
DEVICE_INLINE S sbox(S state[T], const int alpha)
{
for (int i = 0; i < T; i++) {
state[i] = sbox_el(state[i], alpha);
}
}
template <typename S, int T>
DEVICE_INLINE S add_rc(S state[T], size_t rc_offset, const S* rc)
{
for (int i = 0; i < T; i++) {
state[i] = state[i] + rc[rc_offset + i];
}
}
template <typename S>
__device__ S mds_light_4x4(S s[4])
{
S t0 = s[0] + s[1];
S t1 = s[2] + s[3];
S t2 = s[1] + s[1] + t1;
S t3 = s[3] + s[3] + t0;
S t4 = S::template mul_unsigned<4>(t1) + t3;
S t5 = S::template mul_unsigned<4>(t0) + t2;
s[0] = t3 + t5;
s[1] = t5;
s[2] = t2 + t4;
s[3] = t4;
}
// Multiply a 4-element vector x by:
// [ 2 3 1 1 ]
// [ 1 2 3 1 ]
// [ 1 1 2 3 ]
// [ 3 1 1 2 ].
// https://github.com/Plonky3/Plonky3/blob/main/poseidon2/src/matrix.rs#L36
template <typename S>
__device__ S mds_light_plonky_4x4(S s[4])
{
S t01 = s[0] + s[1];
S t23 = s[2] + s[3];
S t0123 = t01 + t23;
S t01123 = t0123 + s[1];
S t01233 = t0123 + s[3];
s[3] = t01233 + S::template mul_unsigned<2>(s[0]);
s[1] = t01123 + S::template mul_unsigned<2>(s[2]);
s[0] = t01123 + t01;
s[2] = t01233 + t23;
}
template <typename S, int T>
__device__ S mds_light(S state[T], MdsType mds)
{
S sum;
switch (T) {
case 2:
// Matrix circ(2, 1)
// [2, 1]
// [1, 2]
sum = state[0] + state[1];
state[0] = state[0] + sum;
state[1] = state[1] + sum;
break;
case 3:
// Matrix circ(2, 1, 1)
// [2, 1, 1]
// [1, 2, 1]
// [1, 1, 2]
sum = state[0] + state[1] + state[2];
state[0] = state[0] + sum;
state[1] = state[1] + sum;
state[2] = state[2] + sum;
break;
case 4:
case 8:
case 12:
case 16:
case 20:
case 24:
for (int i = 0; i < T; i += 4) {
switch (mds) {
case MdsType::DEFAULT_MDS:
mds_light_4x4(&state[i]);
break;
case MdsType::PLONKY:
mds_light_plonky_4x4(&state[i]);
}
}
S sums[4] = {state[0], state[1], state[2], state[3]};
for (int i = 4; i < T; i += 4) {
sums[0] = sums[0] + state[i];
sums[1] = sums[1] + state[i + 1];
sums[2] = sums[2] + state[i + 2];
sums[3] = sums[3] + state[i + 3];
}
for (int i = 0; i < T; i++) {
state[i] = state[i] + sums[i % 4];
}
break;
}
}
template <typename S, int T>
__device__ S internal_round(S state[T], size_t rc_offset, const Poseidon2Constants<S>& constants)
{
S element = state[0];
element = element + constants.round_constants[rc_offset];
element = sbox_el<S>(element, constants.alpha);
S sum;
switch (T) {
case 2:
// [2, 1]
// [1, 3]
sum = element + state[1];
state[0] = element + sum;
state[1] = S::template mul_unsigned<2>(state[1]) + sum;
break;
case 3:
// [2, 1, 1]
// [1, 2, 1]
// [1, 1, 3]
sum = element + state[1] + state[2];
state[0] = element + sum;
state[1] = state[1] + sum;
state[2] = S::template mul_unsigned<2>(state[2]) + sum;
break;
case 4:
case 8:
case 12:
case 16:
case 20:
case 24:
typename S::Wide wide_sum = S::Wide::from_field(element);
for (int i = 1; i < T; i++) {
wide_sum = wide_sum + S::Wide::from_field(state[i]);
}
sum = S::reduce(wide_sum);
switch (constants.diffusion) {
case DiffusionStrategy::DEFAULT_DIFFUSION:
state[0] = element * constants.internal_matrix_diag[0] + sum;
for (int i = 1; i < T; i++) {
state[i] = state[i] * constants.internal_matrix_diag[i] + sum;
}
break;
case DiffusionStrategy::MONTGOMERY:
state[0] = S::from_montgomery(element * constants.internal_matrix_diag[0] + sum);
for (int i = 1; i < T; i++) {
state[i] = S::from_montgomery(state[i] * constants.internal_matrix_diag[i] + sum);
}
break;
}
}
}
template <typename S, int T>
__global__ void
poseidon2_permutation_kernel(S* states, S* states_out, size_t number_of_states, const Poseidon2Constants<S> constants)
{
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
if (idx >= number_of_states) { return; }
S state[T];
UNROLL
for (int i = 0; i < T; i++) {
state[i] = states[idx * T + i];
}
unsigned int rn;
mds_light<S, T>(state, constants.mds_type);
size_t rc_offset = 0;
// External rounds
for (rn = 0; rn < constants.external_rounds / 2; rn++) {
add_rc<S, T>(state, rc_offset, constants.round_constants);
sbox<S, T>(state, constants.alpha);
mds_light<S, T>(state, constants.mds_type);
rc_offset += T;
}
// Internal rounds
for (; rn < constants.external_rounds / 2 + constants.internal_rounds; rn++) {
internal_round<S, T>(state, rc_offset, constants);
rc_offset++;
}
// External rounds
for (; rn < constants.external_rounds + constants.internal_rounds; rn++) {
add_rc<S, T>(state, rc_offset, constants.round_constants);
sbox<S, T>(state, constants.alpha);
mds_light<S, T>(state, constants.mds_type);
rc_offset += T;
}
UNROLL
for (int i = 0; i < T; i++) {
states_out[idx * T + i] = state[i];
}
}
// These function is just doing copy from the states to the output
template <typename S, int T>
__global__ void get_hash_results(S* states, size_t number_of_states, int index, S* out)
{
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
if (idx >= number_of_states) { return; }
out[idx] = states[idx * T + index];
}
} // namespace poseidon2

View File

@@ -0,0 +1,76 @@
#include "poseidon2/poseidon2.cuh"
#include "constants.cu"
#include "kernels.cu"
namespace poseidon2 {
static int poseidon_block_size = 128;
template <typename S, int T>
int poseidon_number_of_blocks(size_t number_of_states)
{
return number_of_states / poseidon_block_size + static_cast<bool>(number_of_states % poseidon_block_size);
}
template <typename S, int T>
cudaError_t permute_many(
S* states, S* states_out, size_t number_of_states, const Poseidon2Constants<S>& constants, cudaStream_t& stream)
{
poseidon2_permutation_kernel<S, T>
<<<poseidon_number_of_blocks<S, T>(number_of_states), poseidon_block_size, 0, stream>>>(
states, states_out, number_of_states, constants);
CHK_IF_RETURN(cudaPeekAtLastError());
return CHK_LAST();
}
template <typename S, int T>
cudaError_t poseidon2_hash(
S* states,
S* output,
size_t number_of_states,
const Poseidon2Constants<S>& constants,
const Poseidon2Config& config)
{
CHK_INIT_IF_RETURN();
cudaStream_t& stream = config.ctx.stream;
S* d_states;
if (config.are_states_on_device) {
d_states = states;
} else {
// allocate memory for {number_of_states} states of {t} scalars each
CHK_IF_RETURN(cudaMallocAsync(&d_states, number_of_states * T * sizeof(S), stream))
CHK_IF_RETURN(cudaMemcpyAsync(d_states, states, number_of_states * T * sizeof(S), cudaMemcpyHostToDevice, stream))
}
cudaError_t hash_error = permute_many<S, T>(d_states, d_states, number_of_states, constants, stream);
CHK_IF_RETURN(hash_error);
if (config.mode == PoseidonMode::COMPRESSION) {
S* output_device;
if (config.are_outputs_on_device) {
output_device = output;
} else {
CHK_IF_RETURN(cudaMallocAsync(&output_device, number_of_states * sizeof(S), stream))
}
get_hash_results<S, T><<<poseidon_number_of_blocks<S, T>(number_of_states), poseidon_block_size, 0, stream>>>(
d_states, number_of_states, config.output_index, output_device);
CHK_IF_RETURN(cudaPeekAtLastError());
if (!config.are_outputs_on_device) {
CHK_IF_RETURN(
cudaMemcpyAsync(output, output_device, number_of_states * sizeof(S), cudaMemcpyDeviceToHost, stream));
CHK_IF_RETURN(cudaFreeAsync(output_device, stream));
}
} else {
if (!config.are_states_on_device || !config.are_outputs_on_device) {
CHK_IF_RETURN(
cudaMemcpyAsync(output, d_states, number_of_states * T * sizeof(S), cudaMemcpyDeviceToHost, stream));
}
}
if (!config.are_states_on_device) CHK_IF_RETURN(cudaFreeAsync(d_states, stream));
if (!config.is_async) return CHK_STICKY(cudaStreamSynchronize(stream));
return CHK_LAST();
}
} // namespace poseidon2

1101
icicle/src/poseidon2/test.cu Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -11,14 +11,13 @@ repository.workspace = true
[dependencies]
icicle-cuda-runtime = { workspace = true }
ark-ff = { version = "0.4.0", optional = true }
ark-ec = { version = "0.4.0", optional = true, features = ["parallel"] }
ark-poly = { version = "0.4.0", optional = true }
ark-std = { version = "0.4.0", optional = true }
rayon = "1.8.1"
hex = "0.4"
criterion = "0.3"
[dev-dependencies]

View File

@@ -3,6 +3,7 @@ use crate::traits::ArkConvertible;
use crate::traits::{FieldConfig, FieldImpl, MontgomeryConvertible};
#[cfg(feature = "arkworks")]
use ark_ff::{BigInteger, Field as ArkField, PrimeField};
use hex::FromHex;
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::memory::DeviceSlice;
@@ -82,6 +83,12 @@ impl<const NUM_LIMBS: usize, F: FieldConfig> FieldImpl for Field<NUM_LIMBS, F> {
Self::from(limbs)
}
fn from_hex(s: &str) -> Self {
let mut bytes = Vec::from_hex(if s.starts_with("0x") { &s[2..] } else { s }).expect("Invalid hex string");
bytes.reverse();
Self::from_bytes_le(&bytes)
}
fn zero() -> Self {
FieldImpl::from_u32(0)
}

View File

@@ -6,6 +6,7 @@ pub mod msm;
pub mod ntt;
pub mod polynomials;
pub mod poseidon;
pub mod poseidon2;
#[doc(hidden)]
pub mod tests;
pub mod traits;

View File

@@ -56,10 +56,11 @@ pub fn check_poseidon_hash_many<F: FieldImpl>()
where
<F as FieldImpl>::Config: Poseidon<F>,
{
let arity = 2u32;
let constants = init_poseidon::<F>(arity as u32);
for arity in [2, 4] {
let constants = init_poseidon::<F>(arity as u32);
_check_poseidon_hash_many(constants);
_check_poseidon_hash_many(constants);
}
}
pub fn check_poseidon_custom_config<F: FieldImpl>(field_bytes: usize, field_prefix: &str, partial_rounds: u32)

View File

@@ -0,0 +1,384 @@
#[doc(hidden)]
pub mod tests;
use icicle_cuda_runtime::{
device::check_device,
device_context::{DeviceContext, DEFAULT_DEVICE_ID},
memory::{DeviceSlice, HostOrDeviceSlice},
};
use crate::{error::IcicleResult, traits::FieldImpl};
#[repr(C)]
#[derive(Debug, Clone)]
pub enum DiffusionStrategy {
Default,
Montgomery,
}
#[repr(C)]
#[derive(Debug, Clone)]
pub enum MdsType {
Default,
Plonky,
}
#[repr(C)]
#[derive(Debug, Clone)]
pub enum PoseidonMode {
Compression,
Permutation,
}
#[repr(C)]
pub struct Poseidon2Constants<'a, F: FieldImpl> {
width: u32,
alpha: u32,
internal_rounds: u32,
external_rounds: u32,
round_constants: &'a DeviceSlice<F>,
inernal_matrix_diag: &'a DeviceSlice<F>,
pub mds_type: MdsType,
pub diffusion: DiffusionStrategy,
}
impl<F: FieldImpl> std::fmt::Debug for Poseidon2Constants<'_, F> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}, {}, {}, {}",
self.width, self.alpha, self.internal_rounds, self.external_rounds
)
}
}
/// Struct that encodes Poseidon parameters to be passed into the [poseidon_hash_many](poseidon_hash_many) function.
#[repr(C)]
#[derive(Debug, Clone)]
pub struct Poseidon2Config<'a> {
/// Details related to the device such as its id and stream id. See [DeviceContext](@ref device_context::DeviceContext).
pub ctx: DeviceContext<'a>,
are_states_on_device: bool,
are_outputs_on_device: bool,
pub mode: PoseidonMode,
pub output_index: u32,
/// If true, hash results will also be copied in the input pointer in aligned format
pub loop_state: bool,
/// Whether to run Poseidon asynchronously. If set to `true`, Poseidon will be non-blocking
/// and you'd need to synchronize it explicitly by running `cudaStreamSynchronize` or `cudaDeviceSynchronize`.
/// If set to `false`, Poseidon will block the current CPU thread.
pub is_async: bool,
}
impl<'a> Default for Poseidon2Config<'a> {
fn default() -> Self {
Self::default_for_device(DEFAULT_DEVICE_ID)
}
}
impl<'a> Poseidon2Config<'a> {
pub fn default_for_device(device_id: usize) -> Self {
Self {
ctx: DeviceContext::default_for_device(device_id),
are_states_on_device: false,
are_outputs_on_device: false,
mode: PoseidonMode::Compression,
output_index: 1,
loop_state: false,
is_async: false,
}
}
}
pub trait Poseidon2<F: FieldImpl> {
fn create_optimized_constants<'a>(
width: u32,
alpha: u32,
internal_rounds: u32,
external_rounds: u32,
round_constants: &mut [F],
internal_matrix_diag: &mut [F],
mds_type: MdsType,
diffusion: DiffusionStrategy,
ctx: &DeviceContext,
) -> IcicleResult<Poseidon2Constants<'a, F>>;
fn load_optimized_constants<'a>(
width: u32,
mds_type: MdsType,
diffusion: DiffusionStrategy,
ctx: &DeviceContext,
) -> IcicleResult<Poseidon2Constants<'a, F>>;
fn poseidon_unchecked(
states: &mut (impl HostOrDeviceSlice<F> + ?Sized),
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
number_of_states: u32,
width: u32,
constants: &Poseidon2Constants<F>,
config: &Poseidon2Config,
) -> IcicleResult<()>;
}
/// Loads pre-calculated poseidon constants on the GPU.
pub fn load_optimized_poseidon2_constants<'a, F>(
width: u32,
mds_type: MdsType,
diffusion: DiffusionStrategy,
ctx: &DeviceContext,
) -> IcicleResult<Poseidon2Constants<'a, F>>
where
F: FieldImpl,
<F as FieldImpl>::Config: Poseidon2<F>,
{
<<F as FieldImpl>::Config as Poseidon2<F>>::load_optimized_constants(width, mds_type, diffusion, ctx)
}
/// Creates new instance of poseidon constants on the GPU.
pub fn create_optimized_poseidon2_constants<'a, F>(
width: u32,
alpha: u32,
ctx: &DeviceContext,
internal_rounds: u32,
external_rounds: u32,
round_constants: &mut [F],
internal_matrix_diag: &mut [F],
mds_type: MdsType,
diffusion: DiffusionStrategy,
) -> IcicleResult<Poseidon2Constants<'a, F>>
where
F: FieldImpl,
<F as FieldImpl>::Config: Poseidon2<F>,
{
<<F as FieldImpl>::Config as Poseidon2<F>>::create_optimized_constants(
width,
alpha,
internal_rounds,
external_rounds,
round_constants,
internal_matrix_diag,
mds_type,
diffusion,
ctx,
)
}
/// Computes the poseidon hashes for multiple preimages.
///
/// # Arguments
///
/// * `input` - a pointer to the input data. May point to a vector of preimages or a vector of states filled with preimages.
///
/// * `output` - a pointer to the output data. Must be at least of size [number_of_states](number_of_states)
///
/// * `number_of_states` - number of input blocks of size `arity`
///
/// * `arity` - the arity of the hash function (the size of 1 preimage)
///
/// * `constants` - Poseidon constants.
///
/// * `config` - config used to specify extra arguments of the Poseidon.
pub fn poseidon_hash_many<F>(
states: &mut (impl HostOrDeviceSlice<F> + ?Sized),
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
number_of_states: u32,
width: u32,
constants: &Poseidon2Constants<F>,
config: &Poseidon2Config,
) -> IcicleResult<()>
where
F: FieldImpl,
<F as FieldImpl>::Config: Poseidon2<F>,
{
if states.len() < (number_of_states * width) as usize {
panic!(
"input len is {}; but needs to be at least {}",
states.len(),
number_of_states * width
);
}
if output.len() < number_of_states as usize {
panic!(
"output len is {}; but needs to be at least {}",
output.len(),
number_of_states
);
}
let ctx_device_id = config
.ctx
.device_id;
if let Some(device_id) = states.device_id() {
assert_eq!(
device_id, ctx_device_id,
"Device ids in input and context are different"
);
}
if let Some(device_id) = output.device_id() {
assert_eq!(
device_id, ctx_device_id,
"Device ids in output and context are different"
);
}
check_device(ctx_device_id);
let mut local_cfg = config.clone();
local_cfg.are_states_on_device = states.is_on_device();
local_cfg.are_outputs_on_device = output.is_on_device();
<<F as FieldImpl>::Config as Poseidon2<F>>::poseidon_unchecked(
states,
output,
number_of_states,
width,
constants,
&local_cfg,
)
}
#[macro_export]
macro_rules! impl_poseidon2 {
(
$field_prefix:literal,
$field_prefix_ident:ident,
$field:ident,
$field_config:ident
) => {
mod $field_prefix_ident {
use crate::poseidon2::{
$field, $field_config, CudaError, DeviceContext, DiffusionStrategy, MdsType, Poseidon2Config,
Poseidon2Constants,
};
extern "C" {
#[link_name = concat!($field_prefix, "_create_optimized_poseidon2_constants_cuda")]
pub(crate) fn _create_optimized_constants(
width: u32,
alpha: u32,
internal_rounds: u32,
external_rounds: u32,
constants: *mut $field,
internal_matrix_diag: *mut $field,
mds_type: MdsType,
diffusion: DiffusionStrategy,
ctx: &DeviceContext,
poseidon_constants: *mut Poseidon2Constants<$field>,
) -> CudaError;
#[link_name = concat!($field_prefix, "_init_optimized_poseidon2_constants_cuda")]
pub(crate) fn _load_optimized_constants(
width: u32,
mds_type: MdsType,
diffusion: DiffusionStrategy,
ctx: &DeviceContext,
constants: *mut Poseidon2Constants<$field>,
) -> CudaError;
#[link_name = concat!($field_prefix, "_poseidon2_hash_cuda")]
pub(crate) fn hash_many(
states: *mut $field,
output: *mut $field,
number_of_states: u32,
width: u32,
constants: &Poseidon2Constants<$field>,
config: &Poseidon2Config,
) -> CudaError;
}
}
impl Poseidon2<$field> for $field_config {
fn create_optimized_constants<'a>(
width: u32,
alpha: u32,
internal_rounds: u32,
external_rounds: u32,
round_constants: &mut [$field],
internal_matrix_diag: &mut [$field],
mds_type: MdsType,
diffusion: DiffusionStrategy,
ctx: &DeviceContext,
) -> IcicleResult<Poseidon2Constants<'a, $field>> {
unsafe {
let mut poseidon_constants = MaybeUninit::<Poseidon2Constants<'a, $field>>::uninit();
let err = $field_prefix_ident::_create_optimized_constants(
width,
alpha,
internal_rounds,
external_rounds,
round_constants as *mut _ as *mut $field,
internal_matrix_diag as *mut _ as *mut $field,
mds_type,
diffusion,
ctx,
poseidon_constants.as_mut_ptr(),
)
.wrap();
err.and(Ok(poseidon_constants.assume_init()))
}
}
fn load_optimized_constants<'a>(
width: u32,
mds_type: MdsType,
diffusion: DiffusionStrategy,
ctx: &DeviceContext,
) -> IcicleResult<Poseidon2Constants<'a, $field>> {
unsafe {
let mut constants = MaybeUninit::<Poseidon2Constants<'a, $field>>::uninit();
let err = $field_prefix_ident::_load_optimized_constants(
width,
mds_type,
diffusion,
ctx,
constants.as_mut_ptr(),
)
.wrap();
err.and(Ok(constants.assume_init()))
}
}
fn poseidon_unchecked(
states: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
output: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
number_of_states: u32,
width: u32,
constants: &Poseidon2Constants<$field>,
config: &Poseidon2Config,
) -> IcicleResult<()> {
unsafe {
$field_prefix_ident::hash_many(
states.as_mut_ptr(),
output.as_mut_ptr(),
number_of_states,
width,
constants,
config,
)
.wrap()
}
}
}
};
}
#[macro_export]
macro_rules! impl_poseidon2_tests {
(
$field:ident
) => {
#[test]
fn test_poseidon2_hash_many() {
check_poseidon_hash_many::<$field>()
}
};
}

View File

@@ -0,0 +1,105 @@
use crate::poseidon2::{MdsType, PoseidonMode};
use crate::traits::FieldImpl;
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::memory::{HostOrDeviceSlice, HostSlice};
use super::{
load_optimized_poseidon2_constants, poseidon_hash_many, DiffusionStrategy, Poseidon2, Poseidon2Config,
Poseidon2Constants,
};
pub fn init_poseidon<'a, F: FieldImpl>(
width: u32,
mds_type: MdsType,
diffusion: DiffusionStrategy,
) -> Poseidon2Constants<'a, F>
where
<F as FieldImpl>::Config: Poseidon2<F>,
{
let ctx = DeviceContext::default();
load_optimized_poseidon2_constants::<F>(width, mds_type, diffusion, &ctx).unwrap()
}
fn _check_poseidon_hash_many<F: FieldImpl>(width: u32, constants: Poseidon2Constants<F>) -> (F, F)
where
<F as FieldImpl>::Config: Poseidon2<F>,
{
let test_size = 1 << 10;
let mut inputs = vec![F::one(); test_size * width as usize];
let mut outputs = vec![F::zero(); test_size];
let input_slice = HostSlice::from_mut_slice(&mut inputs);
let output_slice = HostSlice::from_mut_slice(&mut outputs);
let config = Poseidon2Config::default();
poseidon_hash_many::<F>(
input_slice,
output_slice,
test_size as u32,
width as u32,
&constants,
&config,
)
.unwrap();
let a1 = output_slice[0];
let a2 = output_slice[output_slice.len() - 2];
assert_eq!(a1, a2);
(a1, a2)
}
pub fn check_poseidon_hash_many<'a, F: FieldImpl + 'a>()
where
<F as FieldImpl>::Config: Poseidon2<F>,
{
let widths = [2, 3, 4, 8, 12, 16, 20, 24];
for width in widths {
let constants = init_poseidon::<'a, F>(width as u32, MdsType::Default, DiffusionStrategy::Default);
_check_poseidon_hash_many(width, constants);
}
}
pub fn check_poseidon_kats<'a, F: FieldImpl>(width: usize, kats: &[F], constants: &Poseidon2Constants<'a, F>)
where
<F as FieldImpl>::Config: Poseidon2<F>,
{
assert_eq!(width, kats.len());
let batch_size = 1024;
let mut input = vec![F::one(); width];
let mut outputs = vec![F::zero(); width * batch_size];
for i in 0..width {
input[i] = F::from_u32(i as u32);
}
let mut inputs: Vec<F> = std::iter::repeat(input.clone())
.take(batch_size)
.flatten()
.collect();
let input_slice = HostSlice::from_mut_slice(&mut inputs);
let output_slice = HostSlice::from_mut_slice(&mut outputs);
let mut config = Poseidon2Config::default();
config.mode = PoseidonMode::Permutation;
poseidon_hash_many::<F>(
input_slice,
output_slice,
batch_size as u32,
width as u32,
&constants,
&config,
)
.unwrap();
for (i, val) in output_slice
.iter()
.enumerate()
{
assert_eq!(*val, kats[i % width]);
}
}

View File

@@ -27,6 +27,7 @@ pub trait FieldImpl:
fn to_bytes_le(&self) -> Vec<u8>;
fn from_bytes_le(bytes: &[u8]) -> Self;
fn from_hex(s: &str) -> Self;
fn zero() -> Self;
fn one() -> Self;
fn from_u32(val: u32) -> Self;

View File

@@ -1,9 +1,11 @@
pub mod curve;
pub mod ecntt;
pub mod msm;
pub mod ntt;
pub mod polynomials;
pub mod poseidon;
pub mod poseidon2;
pub mod tree;
pub mod vec_ops;

View File

@@ -0,0 +1,35 @@
use crate::curve::{ScalarCfg, ScalarField};
use icicle_core::error::IcicleResult;
use icicle_core::impl_poseidon2;
use icicle_core::poseidon2::{DiffusionStrategy, MdsType, Poseidon2, Poseidon2Config, Poseidon2Constants};
use icicle_core::traits::IcicleResultWrap;
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
use core::mem::MaybeUninit;
impl_poseidon2!("bn254", bn254, ScalarField, ScalarCfg);
#[cfg(test)]
pub(crate) mod tests {
use crate::curve::ScalarField;
use icicle_core::impl_poseidon2_tests;
use icicle_core::ntt::FieldImpl;
use icicle_core::poseidon2::{tests::*, DiffusionStrategy, MdsType};
impl_poseidon2_tests!(ScalarField);
#[test]
fn test_poseidon2_kats() {
let kats = [
ScalarField::from_hex("0x0bb61d24daca55eebcb1929a82650f328134334da98ea4f847f760054f4a3033"),
ScalarField::from_hex("0x303b6f7c86d043bfcbcc80214f26a30277a15d3f74ca654992defe7ff8d03570"),
ScalarField::from_hex("0x1ed25194542b12eef8617361c3ba7c52e660b145994427cc86296242cf766ec8"),
];
let constants = init_poseidon::<ScalarField>(3, MdsType::Default, DiffusionStrategy::Default);
check_poseidon_kats(3, &kats, &constants);
}
}

View File

@@ -17,10 +17,13 @@ cmake = "0.1.50"
[dev-dependencies]
risc0-core = "0.21.0"
risc0-zkp = "0.21.0"
p3-baby-bear = { git = "https://github.com/Plonky3/Plonky3", rev = "83590121c8c28011cffa7e73cb71cf9bf94b8477" }
p3-field = { git = "https://github.com/Plonky3/Plonky3", rev = "83590121c8c28011cffa7e73cb71cf9bf94b8477" }
p3-dft = { git = "https://github.com/Plonky3/Plonky3", rev = "83590121c8c28011cffa7e73cb71cf9bf94b8477" }
p3-matrix = { git = "https://github.com/Plonky3/Plonky3", rev = "83590121c8c28011cffa7e73cb71cf9bf94b8477" }
p3-baby-bear = { git = "https://github.com/Plonky3/Plonky3", rev = "1e87146ebfaedc2150b635b10a096b733795fdce" }
p3-symmetric = { git = "https://github.com/Plonky3/Plonky3", rev = "1e87146ebfaedc2150b635b10a096b733795fdce" }
p3-mds = { git = "https://github.com/Plonky3/Plonky3", rev = "1e87146ebfaedc2150b635b10a096b733795fdce" }
p3-poseidon2 = { git = "https://github.com/Plonky3/Plonky3", rev = "1e87146ebfaedc2150b635b10a096b733795fdce" }
p3-field = { git = "https://github.com/Plonky3/Plonky3", rev = "1e87146ebfaedc2150b635b10a096b733795fdce" }
p3-dft = { git = "https://github.com/Plonky3/Plonky3", rev = "1e87146ebfaedc2150b635b10a096b733795fdce" }
p3-matrix = { git = "https://github.com/Plonky3/Plonky3", rev = "1e87146ebfaedc2150b635b10a096b733795fdce" }
serial_test = "3.0.0"
[features]

View File

@@ -1,4 +1,5 @@
pub mod field;
pub mod ntt;
pub mod polynomials;
pub mod poseidon2;
pub mod vec_ops;

View File

@@ -120,7 +120,7 @@ pub(crate) mod tests {
ntt_cfg.columns_batch = true;
ntt_inplace(HostSlice::from_mut_slice(&mut scalars[..]), NTTDir::kForward, &ntt_cfg).unwrap();
let result_p3 = Radix2Dit.dft_batch(matrix_p3);
let result_p3 = Radix2Dit::default().dft_batch(matrix_p3);
for i in 0..nrows {
for j in 0..ntt_size {
@@ -156,7 +156,7 @@ pub(crate) mod tests {
)
.unwrap();
let ext_result_p3 = Radix2Dit.dft_batch(ext_matrix_p3);
let ext_result_p3 = Radix2Dit::default().dft_batch(ext_matrix_p3);
for i in 0..nrows {
for j in 0..ntt_size {

View File

@@ -0,0 +1,633 @@
use crate::field::{ScalarCfg, ScalarField};
use icicle_core::error::IcicleResult;
use icicle_core::impl_poseidon2;
use icicle_core::poseidon2::{DiffusionStrategy, MdsType, Poseidon2, Poseidon2Config, Poseidon2Constants};
use icicle_core::traits::IcicleResultWrap;
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
use core::mem::MaybeUninit;
impl_poseidon2!("babybear", babybear, ScalarField, ScalarCfg);
#[cfg(test)]
pub(crate) mod tests {
use crate::field::ScalarField;
use icicle_core::impl_poseidon2_tests;
use icicle_core::poseidon2::{create_optimized_poseidon2_constants, tests::*, DiffusionStrategy, MdsType};
use icicle_core::traits::FieldImpl;
use icicle_cuda_runtime::device_context::DeviceContext;
use p3_baby_bear::BabyBear;
use p3_baby_bear::DiffusionMatrixBabyBear;
use p3_field::{AbstractField, PrimeField32};
use p3_poseidon2::{Poseidon2, Poseidon2ExternalMatrixGeneral};
use p3_symmetric::Permutation;
impl_poseidon2_tests!(ScalarField);
#[test]
fn test_poseidon2_kats() {
let kats = [
ScalarField::from_hex("0x2ed3e23d"),
ScalarField::from_hex("0x12921fb0"),
ScalarField::from_hex("0x0e659e79"),
ScalarField::from_hex("0x61d81dc9"),
ScalarField::from_hex("0x32bae33b"),
ScalarField::from_hex("0x62486ae3"),
ScalarField::from_hex("0x1e681b60"),
ScalarField::from_hex("0x24b91325"),
ScalarField::from_hex("0x2a2ef5b9"),
ScalarField::from_hex("0x50e8593e"),
ScalarField::from_hex("0x5bc818ec"),
ScalarField::from_hex("0x10691997"),
ScalarField::from_hex("0x35a14520"),
ScalarField::from_hex("0x2ba6a3c5"),
ScalarField::from_hex("0x279d47ec"),
ScalarField::from_hex("0x55014e81"),
ScalarField::from_hex("0x5953a67f"),
ScalarField::from_hex("0x2f403111"),
ScalarField::from_hex("0x6b8828ff"),
ScalarField::from_hex("0x1801301f"),
ScalarField::from_hex("0x2749207a"),
ScalarField::from_hex("0x3dc9cf21"),
ScalarField::from_hex("0x3c985ba2"),
ScalarField::from_hex("0x57a99864"),
];
let constants = init_poseidon::<ScalarField>(24, MdsType::Default, DiffusionStrategy::Default);
check_poseidon_kats(24, &kats, &constants);
}
#[test]
fn test_poseidon2_plonky3_t16() {
let rounds_p = 13;
let rounds_f = 8;
const ALPHA: u64 = 7;
const WIDTH: usize = 16;
let cnv = BabyBear::from_canonical_u32;
let external_constants: Vec<[BabyBear; 16]> = vec![
[
cnv(1774958255),
cnv(1185780729),
cnv(1621102414),
cnv(1796380621),
cnv(588815102),
cnv(1932426223),
cnv(1925334750),
cnv(747903232),
cnv(89648862),
cnv(360728943),
cnv(977184635),
cnv(1425273457),
cnv(256487465),
cnv(1200041953),
cnv(572403254),
cnv(448208942),
],
[
cnv(1215789478),
cnv(944884184),
cnv(953948096),
cnv(547326025),
cnv(646827752),
cnv(889997530),
cnv(1536873262),
cnv(86189867),
cnv(1065944411),
cnv(32019634),
cnv(333311454),
cnv(456061748),
cnv(1963448500),
cnv(1827584334),
cnv(1391160226),
cnv(1348741381),
],
[
cnv(88424255),
cnv(104111868),
cnv(1763866748),
cnv(79691676),
cnv(1988915530),
cnv(1050669594),
cnv(359890076),
cnv(573163527),
cnv(222820492),
cnv(159256268),
cnv(669703072),
cnv(763177444),
cnv(889367200),
cnv(256335831),
cnv(704371273),
cnv(25886717),
],
[
cnv(51754520),
cnv(1833211857),
cnv(454499742),
cnv(1384520381),
cnv(777848065),
cnv(1053320300),
cnv(1851729162),
cnv(344647910),
cnv(401996362),
cnv(1046925956),
cnv(5351995),
cnv(1212119315),
cnv(754867989),
cnv(36972490),
cnv(751272725),
cnv(506915399),
],
[
cnv(1922082829),
cnv(1870549801),
cnv(1502529704),
cnv(1990744480),
cnv(1700391016),
cnv(1702593455),
cnv(321330495),
cnv(528965731),
cnv(183414327),
cnv(1886297254),
cnv(1178602734),
cnv(1923111974),
cnv(744004766),
cnv(549271463),
cnv(1781349648),
cnv(542259047),
],
[
cnv(1536158148),
cnv(715456982),
cnv(503426110),
cnv(340311124),
cnv(1558555932),
cnv(1226350925),
cnv(742828095),
cnv(1338992758),
cnv(1641600456),
cnv(1843351545),
cnv(301835475),
cnv(43203215),
cnv(386838401),
cnv(1520185679),
cnv(1235297680),
cnv(904680097),
],
[
cnv(1491801617),
cnv(1581784677),
cnv(913384905),
cnv(247083962),
cnv(532844013),
cnv(107190701),
cnv(213827818),
cnv(1979521776),
cnv(1358282574),
cnv(1681743681),
cnv(1867507480),
cnv(1530706910),
cnv(507181886),
cnv(695185447),
cnv(1172395131),
cnv(1250800299),
],
[
cnv(1503161625),
cnv(817684387),
cnv(498481458),
cnv(494676004),
cnv(1404253825),
cnv(108246855),
cnv(59414691),
cnv(744214112),
cnv(890862029),
cnv(1342765939),
cnv(1417398904),
cnv(1897591937),
cnv(1066647396),
cnv(1682806907),
cnv(1015795079),
cnv(1619482808),
],
];
let internal_constants: Vec<BabyBear> = vec![
cnv(1518359488),
cnv(1765533241),
cnv(945325693),
cnv(422793067),
cnv(311365592),
cnv(1311448267),
cnv(1629555936),
cnv(1009879353),
cnv(190525218),
cnv(786108885),
cnv(557776863),
cnv(212616710),
cnv(605745517),
];
let poseidon2: Poseidon2<BabyBear, Poseidon2ExternalMatrixGeneral, DiffusionMatrixBabyBear, WIDTH, ALPHA> =
Poseidon2::new(
rounds_f,
external_constants.clone(),
Poseidon2ExternalMatrixGeneral::default(),
rounds_p,
internal_constants.clone(),
DiffusionMatrixBabyBear::default(),
);
let mut input: [BabyBear; WIDTH] = [BabyBear::zero(); WIDTH];
for i in 0..WIDTH {
input[i] = BabyBear::from_canonical_u32(i as u32);
}
let output = poseidon2.permute(input);
let mut kats: [ScalarField; WIDTH] = [ScalarField::zero(); WIDTH];
for i in 0..WIDTH {
kats[i] = ScalarField::from_u32(output[i].as_canonical_u32());
}
let ctx = DeviceContext::default();
let mut round_constants = vec![ScalarField::zero(); rounds_f * WIDTH + rounds_p];
let external_constants_flattened: Vec<ScalarField> = external_constants
.into_iter()
.flatten()
.map(|c| ScalarField::from_u32(c.as_canonical_u32()))
.collect();
let internal_constants_icicle: Vec<ScalarField> = internal_constants
.iter()
.map(|&c| ScalarField::from_u32(c.as_canonical_u32()))
.collect();
(&mut round_constants[..rounds_f / 2 * WIDTH])
.copy_from_slice(&external_constants_flattened[..rounds_f / 2 * WIDTH]);
(&mut round_constants[rounds_f / 2 * WIDTH..rounds_f / 2 * WIDTH + rounds_p])
.copy_from_slice(&internal_constants_icicle[..]);
(&mut round_constants[rounds_p + rounds_f / 2 * WIDTH..])
.copy_from_slice(&external_constants_flattened[rounds_f / 2 * WIDTH..]);
let mut internal_matrix_diag = vec![
ScalarField::from_u32(0x78000001 - 2),
ScalarField::from_u32(1),
ScalarField::from_u32(1 << 1),
ScalarField::from_u32(1 << 2),
ScalarField::from_u32(1 << 3),
ScalarField::from_u32(1 << 4),
ScalarField::from_u32(1 << 5),
ScalarField::from_u32(1 << 6),
ScalarField::from_u32(1 << 7),
ScalarField::from_u32(1 << 8),
ScalarField::from_u32(1 << 9),
ScalarField::from_u32(1 << 10),
ScalarField::from_u32(1 << 11),
ScalarField::from_u32(1 << 12),
ScalarField::from_u32(1 << 13),
ScalarField::from_u32(1 << 15),
];
let mut constants = create_optimized_poseidon2_constants(
WIDTH as u32,
ALPHA as u32,
&ctx,
rounds_p as u32,
rounds_f as u32,
&mut round_constants,
&mut internal_matrix_diag,
MdsType::Plonky,
DiffusionStrategy::Montgomery,
)
.unwrap();
check_poseidon_kats(WIDTH, &kats, &constants);
}
#[test]
fn test_poseidon2_plonky3_t24() {
let rounds_p = 21;
let rounds_f = 8;
const ALPHA: u64 = 7;
const WIDTH: usize = 24;
let cnv = BabyBear::from_canonical_u32;
let external_constants: Vec<[BabyBear; 24]> = vec![
[
cnv(262278199),
cnv(127253399),
cnv(314968988),
cnv(246143118),
cnv(157582794),
cnv(118043943),
cnv(454905424),
cnv(815798990),
cnv(1004040026),
cnv(1773108264),
cnv(1066694495),
cnv(1930780904),
cnv(1180307149),
cnv(1464793095),
cnv(1660766320),
cnv(1389166148),
cnv(343354132),
cnv(1307439985),
cnv(638242172),
cnv(525458520),
cnv(1964135730),
cnv(1751797115),
cnv(1421525369),
cnv(831813382),
],
[
cnv(695835963),
cnv(1845603984),
cnv(540703332),
cnv(1333667262),
cnv(1917861751),
cnv(1170029417),
cnv(1989924532),
cnv(1518763784),
cnv(1339793538),
cnv(622609176),
cnv(686842369),
cnv(1737016378),
cnv(1282239129),
cnv(897025192),
cnv(716894289),
cnv(1997503974),
cnv(395622276),
cnv(1201063290),
cnv(1917549072),
cnv(1150912935),
cnv(1687379185),
cnv(1507936940),
cnv(241306552),
cnv(989176635),
],
[
cnv(1147522062),
cnv(27129487),
cnv(1257820264),
cnv(142102402),
cnv(217046702),
cnv(1664590951),
cnv(855276054),
cnv(1215259350),
cnv(946500736),
cnv(552696906),
cnv(1424297384),
cnv(538103555),
cnv(1608853840),
cnv(162510541),
cnv(623051854),
cnv(1549062383),
cnv(1908416316),
cnv(1622328571),
cnv(1079030649),
cnv(1584033957),
cnv(1099252725),
cnv(1910423126),
cnv(447555988),
cnv(862495875),
],
[
cnv(128479034),
cnv(1587822577),
cnv(608401422),
cnv(1290028279),
cnv(342857858),
cnv(825405577),
cnv(427731030),
cnv(1718628547),
cnv(588764636),
cnv(204228775),
cnv(1454563174),
cnv(1740472809),
cnv(1338899225),
cnv(1269493554),
cnv(53007114),
cnv(1647670797),
cnv(306391314),
cnv(172614232),
cnv(51256176),
cnv(1221257987),
cnv(1239734761),
cnv(273790406),
cnv(1781980094),
cnv(1291790245),
],
[
cnv(53041581),
cnv(723038058),
cnv(1439947916),
cnv(1136469704),
cnv(205609311),
cnv(1883820770),
cnv(14387587),
cnv(720724951),
cnv(1854174607),
cnv(1629316321),
cnv(530151394),
cnv(1679178250),
cnv(1549779579),
cnv(48375137),
cnv(976057819),
cnv(463976218),
cnv(875839332),
cnv(1946596189),
cnv(434078361),
cnv(1878280202),
cnv(1363837384),
cnv(1470845646),
cnv(1792450386),
cnv(1040977421),
],
[
cnv(1209164052),
cnv(714957516),
cnv(390340387),
cnv(1213686459),
cnv(790726260),
cnv(117294666),
cnv(140621810),
cnv(993455846),
cnv(1889603648),
cnv(78845751),
cnv(925018226),
cnv(708123747),
cnv(1647665372),
cnv(1649953458),
cnv(942439428),
cnv(1006235079),
cnv(238616145),
cnv(930036496),
cnv(1401020792),
cnv(989618631),
cnv(1545325389),
cnv(1715719711),
cnv(755691969),
cnv(150307788),
],
[
cnv(1567618575),
cnv(1663353317),
cnv(1950429111),
cnv(1891637550),
cnv(192082241),
cnv(1080533265),
cnv(1463323727),
cnv(890243564),
cnv(158646617),
cnv(1402624179),
cnv(59510015),
cnv(1198261138),
cnv(1065075039),
cnv(1150410028),
cnv(1293938517),
cnv(76770019),
cnv(1478577620),
cnv(1748789933),
cnv(457372011),
cnv(1841795381),
cnv(760115692),
cnv(1042892522),
cnv(1507649755),
cnv(1827572010),
],
[
cnv(1206940496),
cnv(1896271507),
cnv(1003792297),
cnv(738091882),
cnv(1124078057),
cnv(1889898),
cnv(813674331),
cnv(228520958),
cnv(1832911930),
cnv(781141772),
cnv(459826664),
cnv(202271745),
cnv(1296144415),
cnv(1111203133),
cnv(1090783436),
cnv(641665156),
cnv(1393671120),
cnv(1303271640),
cnv(809508074),
cnv(162506101),
cnv(1262312258),
cnv(1672219447),
cnv(1608891156),
cnv(1380248020),
],
];
let internal_constants: Vec<BabyBear> = vec![
cnv(497520322),
cnv(1930103076),
cnv(1052077299),
cnv(1540960371),
cnv(924863639),
cnv(1365519753),
cnv(1726563304),
cnv(440300254),
cnv(1891545577),
cnv(822033215),
cnv(1111544260),
cnv(308575117),
cnv(1708681573),
cnv(1240419708),
cnv(1199068823),
cnv(1186174623),
cnv(1551596046),
cnv(1886977120),
cnv(1327682690),
cnv(1210751726),
cnv(1810596765),
];
let poseidon2: Poseidon2<BabyBear, Poseidon2ExternalMatrixGeneral, DiffusionMatrixBabyBear, WIDTH, ALPHA> =
Poseidon2::new(
rounds_f,
external_constants.clone(),
Poseidon2ExternalMatrixGeneral::default(),
rounds_p,
internal_constants.clone(),
DiffusionMatrixBabyBear::default(),
);
let mut input: [BabyBear; WIDTH] = [BabyBear::zero(); WIDTH];
for i in 0..WIDTH {
input[i] = BabyBear::from_canonical_u32(i as u32);
}
let output = poseidon2.permute(input);
let mut kats: [ScalarField; WIDTH] = [ScalarField::zero(); WIDTH];
for i in 0..WIDTH {
kats[i] = ScalarField::from_u32(output[i].as_canonical_u32());
}
let ctx = DeviceContext::default();
let mut round_constants = vec![ScalarField::zero(); rounds_f * WIDTH + rounds_p];
let external_constants_flattened: Vec<ScalarField> = external_constants
.into_iter()
.flatten()
.map(|c| ScalarField::from_u32(c.as_canonical_u32()))
.collect();
let internal_constants_icicle: Vec<ScalarField> = internal_constants
.iter()
.map(|&c| ScalarField::from_u32(c.as_canonical_u32()))
.collect();
(&mut round_constants[..rounds_f / 2 * WIDTH])
.copy_from_slice(&external_constants_flattened[..rounds_f / 2 * WIDTH]);
(&mut round_constants[rounds_f / 2 * WIDTH..rounds_f / 2 * WIDTH + rounds_p])
.copy_from_slice(&internal_constants_icicle[..]);
(&mut round_constants[rounds_p + rounds_f / 2 * WIDTH..])
.copy_from_slice(&external_constants_flattened[rounds_f / 2 * WIDTH..]);
let mut internal_matrix_diag = vec![
ScalarField::from_u32(0x78000001 - 2),
ScalarField::from_u32(1),
ScalarField::from_u32(1 << 1),
ScalarField::from_u32(1 << 2),
ScalarField::from_u32(1 << 3),
ScalarField::from_u32(1 << 4),
ScalarField::from_u32(1 << 5),
ScalarField::from_u32(1 << 6),
ScalarField::from_u32(1 << 7),
ScalarField::from_u32(1 << 8),
ScalarField::from_u32(1 << 9),
ScalarField::from_u32(1 << 10),
ScalarField::from_u32(1 << 11),
ScalarField::from_u32(1 << 12),
ScalarField::from_u32(1 << 13),
ScalarField::from_u32(1 << 14),
ScalarField::from_u32(1 << 15),
ScalarField::from_u32(1 << 16),
ScalarField::from_u32(1 << 18),
ScalarField::from_u32(1 << 19),
ScalarField::from_u32(1 << 20),
ScalarField::from_u32(1 << 21),
ScalarField::from_u32(1 << 22),
ScalarField::from_u32(1 << 23),
];
let mut constants = create_optimized_poseidon2_constants(
WIDTH as u32,
ALPHA as u32,
&ctx,
rounds_p as u32,
rounds_f as u32,
&mut round_constants,
&mut internal_matrix_diag,
MdsType::Plonky,
DiffusionStrategy::Montgomery,
)
.unwrap();
check_poseidon_kats(WIDTH, &kats, &constants);
}
}