mirror of
https://github.com/pseXperiments/icicle.git
synced 2026-01-13 01:17:57 -05:00
Compare commits
13 Commits
mini-cours
...
examples/m
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1ae9c560b5 | ||
|
|
ea71faf1fa | ||
|
|
7fd9ed1b49 | ||
|
|
2d4059c61f | ||
|
|
73cd4c0a99 | ||
|
|
5516320ad7 | ||
|
|
a4b1eb3de9 | ||
|
|
31083463be | ||
|
|
b908053c0c | ||
|
|
29da36d7be | ||
|
|
4fef542346 | ||
|
|
f812f071fa | ||
|
|
2b07513310 |
2
.github/workflows/cpp_cuda.yml
vendored
2
.github/workflows/cpp_cuda.yml
vendored
@@ -73,6 +73,8 @@ jobs:
|
||||
build_args: -DEXT_FIELD=ON
|
||||
- name: stark252
|
||||
build_args: -DEXT_FIELD=OFF
|
||||
- name: m31
|
||||
build_args: -DEXT_FIELD=ON
|
||||
steps:
|
||||
- name: Checkout Repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
11
.github/workflows/rust.yml
vendored
11
.github/workflows/rust.yml
vendored
@@ -62,8 +62,8 @@ jobs:
|
||||
# We need to limit the number of threads to avoid running out of memory on weaker machines
|
||||
# ignored tests are polynomial tests. Since they conflict with NTT tests, they are executed separately
|
||||
run: |
|
||||
cargo test --workspace --exclude icicle-babybear --exclude icicle-stark252 --release --verbose --features=g2 -- --test-threads=2 --ignored
|
||||
cargo test --workspace --exclude icicle-babybear --exclude icicle-stark252 --release --verbose --features=g2 -- --test-threads=2
|
||||
cargo test --workspace --exclude icicle-babybear --exclude icicle-stark252 --exclude icicle-m31 --release --verbose --features=g2 -- --test-threads=2 --ignored
|
||||
cargo test --workspace --exclude icicle-babybear --exclude icicle-stark252 --exclude icicle-m31 --release --verbose --features=g2 -- --test-threads=2
|
||||
|
||||
- name: Run baby bear tests
|
||||
working-directory: ./wrappers/rust/icicle-fields/icicle-babybear
|
||||
@@ -79,6 +79,13 @@ jobs:
|
||||
cargo test --release --verbose -- --ignored
|
||||
cargo test --release --verbose
|
||||
|
||||
- name: Run m31 tests
|
||||
working-directory: ./wrappers/rust/icicle-fields/icicle-m31
|
||||
if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||
run: |
|
||||
cargo test --release --verbose -- --ignored
|
||||
cargo test --release --verbose
|
||||
|
||||
# build-windows:
|
||||
# name: Build on Windows
|
||||
# runs-on: windows-2022
|
||||
|
||||
@@ -25,7 +25,7 @@ func main() {
|
||||
input := createHostSliceFromHexString("1725b6")
|
||||
outHost256 := make(core.HostSlice[uint8], 32)
|
||||
|
||||
cfg := keccak.GetDefaultKeccakConfig()
|
||||
cfg := keccak.GetDefaultHashConfig()
|
||||
e := keccak.Keccak256(input, int32(input.Len()), 1, outHost256, &cfg)
|
||||
if e.CudaErrorCode != cr.CudaSuccess {
|
||||
panic("Keccak256 hashing failed")
|
||||
@@ -49,8 +49,8 @@ func main() {
|
||||
## Keccak Methods
|
||||
|
||||
```go
|
||||
func Keccak256(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *KeccakConfig) core.IcicleError
|
||||
func Keccak512(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *KeccakConfig) core.IcicleError
|
||||
func Keccak256(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *HashConfig) core.IcicleError
|
||||
func Keccak512(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *HashConfig) core.IcicleError
|
||||
```
|
||||
|
||||
### Parameters
|
||||
@@ -59,18 +59,18 @@ func Keccak512(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int3
|
||||
- **`inputBlockSize`**: An integer specifying the size of the input data for a single hash.
|
||||
- **`numberOfBlocks`**: An integer specifying the number of results in the hash batch.
|
||||
- **`output`**: A slice where the resulting hash will be stored. This slice can be in host or device memory.
|
||||
- **`config`**: A pointer to a `KeccakConfig` object, which contains various configuration options for the Keccak256 operation.
|
||||
- **`config`**: A pointer to a `HashConfig` object, which contains various configuration options for the Keccak256 operation.
|
||||
|
||||
### Return Value
|
||||
|
||||
- **`CudaError`**: Returns a CUDA error code indicating the success or failure of the Keccak256/Keccak512 operation.
|
||||
|
||||
## KeccakConfig
|
||||
## HashConfig
|
||||
|
||||
The `KeccakConfig` structure holds configuration parameters for the Keccak256/Keccak512 operation, allowing customization of its behavior to optimize performance based on the specifics of the operation or the underlying hardware.
|
||||
The `HashConfig` structure holds configuration parameters for the Keccak256/Keccak512 operation, allowing customization of its behavior to optimize performance based on the specifics of the operation or the underlying hardware.
|
||||
|
||||
```go
|
||||
type KeccakConfig struct {
|
||||
type HashConfig struct {
|
||||
Ctx cr.DeviceContext
|
||||
areInputsOnDevice bool
|
||||
areOutputsOnDevice bool
|
||||
@@ -87,8 +87,8 @@ type KeccakConfig struct {
|
||||
|
||||
### Default Configuration
|
||||
|
||||
Use `GetDefaultKeccakConfig` to obtain a default configuration, which can then be customized as needed.
|
||||
Use `GetDefaultHashConfig` to obtain a default configuration, which can then be customized as needed.
|
||||
|
||||
```go
|
||||
func GetDefaultKeccakConfig() KeccakConfig
|
||||
func GetDefaultHashConfig() HashConfig
|
||||
```
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
```rust
|
||||
use icicle_cuda_runtime::memory::{DeviceVec, HostSlice};
|
||||
use icicle_hash::keccak::{keccak256, KeccakConfig};
|
||||
use icicle_hash::keccak::{keccak256, HashConfig};
|
||||
use rand::{self, Rng};
|
||||
|
||||
fn main() {
|
||||
@@ -14,7 +14,7 @@ fn main() {
|
||||
let input = HostSlice::<u8>::from_slice(initial_data.as_slice());
|
||||
let mut output = DeviceVec::<u8>::cuda_malloc(32).unwrap();
|
||||
|
||||
let mut config = KeccakConfig::default();
|
||||
let mut config = HashConfig::default();
|
||||
keccak256(input, initial_data.len() as i32, 1, &mut output[..], &mut config).expect("Failed to execute keccak256 hashing");
|
||||
|
||||
let mut output_host = vec![0_u8; 32];
|
||||
@@ -32,7 +32,7 @@ pub fn keccak256(
|
||||
input_block_size: i32,
|
||||
number_of_blocks: i32,
|
||||
output: &mut (impl HostOrDeviceSlice<u8> + ?Sized),
|
||||
config: &mut KeccakConfig,
|
||||
config: &mut HashConfig,
|
||||
) -> IcicleResult<()>
|
||||
|
||||
pub fn keccak512(
|
||||
@@ -40,7 +40,7 @@ pub fn keccak512(
|
||||
input_block_size: i32,
|
||||
number_of_blocks: i32,
|
||||
output: &mut (impl HostOrDeviceSlice<u8> + ?Sized),
|
||||
config: &mut KeccakConfig,
|
||||
config: &mut HashConfig,
|
||||
) -> IcicleResult<()>
|
||||
```
|
||||
|
||||
@@ -50,18 +50,18 @@ pub fn keccak512(
|
||||
- **`input_block_size`**: An integer specifying the size of the input data for a single hash.
|
||||
- **`number_of_blocks`**: An integer specifying the number of results in the hash batch.
|
||||
- **`output`**: A slice where the resulting hash will be stored. This slice can be in host or device memory.
|
||||
- **`config`**: A pointer to a `KeccakConfig` object, which contains various configuration options for the Keccak256 operation.
|
||||
- **`config`**: A pointer to a `HashConfig` object, which contains various configuration options for the Keccak256 operation.
|
||||
|
||||
### Return Value
|
||||
|
||||
- **`IcicleResult`**: Returns a CUDA error code indicating the success or failure of the Keccak256/Keccak512 operation.
|
||||
|
||||
## KeccakConfig
|
||||
## HashConfig
|
||||
|
||||
The `KeccakConfig` structure holds configuration parameters for the Keccak256/Keccak512 operation, allowing customization of its behavior to optimize performance based on the specifics of the operation or the underlying hardware.
|
||||
The `HashConfig` structure holds configuration parameters for the Keccak256/Keccak512 operation, allowing customization of its behavior to optimize performance based on the specifics of the operation or the underlying hardware.
|
||||
|
||||
```rust
|
||||
pub struct KeccakConfig<'a> {
|
||||
pub struct HashConfig<'a> {
|
||||
pub ctx: DeviceContext<'a>,
|
||||
pub are_inputs_on_device: bool,
|
||||
pub are_outputs_on_device: bool,
|
||||
@@ -81,7 +81,7 @@ pub struct KeccakConfig<'a> {
|
||||
Example initialization with default settings:
|
||||
|
||||
```rust
|
||||
let default_config = KeccakConfig::default();
|
||||
let default_config = HashConfig::default();
|
||||
```
|
||||
|
||||
Customizing the configuration:
|
||||
|
||||
@@ -19,5 +19,5 @@ add_executable(
|
||||
example.cu
|
||||
)
|
||||
target_include_directories(example PRIVATE "../../../icicle/include")
|
||||
target_link_libraries(example ${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_curve_bn254.a)
|
||||
target_link_libraries(example ${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_curve_bn254.a ${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_curve_bls12_377.a)
|
||||
set_target_properties(example PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
|
||||
@@ -10,6 +10,9 @@ mkdir -p build/icicle
|
||||
cmake -S ../../../icicle/ -B build/icicle -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254 -DG2=ON
|
||||
cmake --build build/icicle
|
||||
|
||||
cmake -S ../../../icicle/ -B build/icicle -DCMAKE_BUILD_TYPE=Release -DCURVE=bls12_377 -DG2=ON
|
||||
cmake --build build/icicle
|
||||
|
||||
# Configure and build the example application
|
||||
cmake -S . -B build/example
|
||||
cmake --build build/example
|
||||
@@ -3,9 +3,23 @@
|
||||
#include <iomanip>
|
||||
|
||||
#include "api/bn254.h"
|
||||
using namespace bn254;
|
||||
#include "api/bls12_377.h"
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
// using namespace bn254;
|
||||
typedef bn254::scalar_t scalar_bn254;
|
||||
typedef bn254::affine_t affine_bn254;
|
||||
typedef bn254::g2_affine_t g2_affine_bn254;
|
||||
typedef bn254::projective_t projective_bn254;
|
||||
typedef bn254::g2_projective_t g2_projective_bn254;
|
||||
|
||||
typedef bls12_377::scalar_t scalar_bls12377;
|
||||
typedef bls12_377::affine_t affine_bls12377;
|
||||
typedef bls12_377::g2_affine_t g2_affine_bls12377;
|
||||
typedef bls12_377::projective_t projective_bls12377;
|
||||
typedef bls12_377::g2_projective_t g2_projective_bls12377;
|
||||
|
||||
|
||||
int msm_bn254(int argc, char* argv[])
|
||||
{
|
||||
std::cout << "Icicle example: Muli-Scalar Multiplication (MSM)" << std::endl;
|
||||
std::cout << "Example parameters" << std::endl;
|
||||
@@ -18,11 +32,11 @@ int main(int argc, char* argv[])
|
||||
std::cout << "Part I: use G1 points" << std::endl;
|
||||
|
||||
std::cout << "Generating random inputs on-host" << std::endl;
|
||||
scalar_t* scalars = new scalar_t[N];
|
||||
affine_t* points = new affine_t[N];
|
||||
projective_t result;
|
||||
scalar_t::rand_host_many(scalars, N);
|
||||
projective_t::rand_host_many_affine(points, N);
|
||||
scalar_bn254* scalars = new scalar_bn254[N];
|
||||
affine_bn254* points = new affine_bn254[N];
|
||||
projective_bn254 result;
|
||||
scalar_bn254::rand_host_many(scalars, N);
|
||||
projective_bn254::rand_host_many_affine(points, N);
|
||||
|
||||
std::cout << "Using default MSM configuration with on-host inputs" << std::endl;
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
@@ -48,17 +62,17 @@ int main(int argc, char* argv[])
|
||||
cudaStream_t stream = config.ctx.stream;
|
||||
// Execute the MSM kernel
|
||||
bn254_msm_cuda(scalars, points, msm_size, config, &result);
|
||||
std::cout << projective_t::to_affine(result) << std::endl;
|
||||
std::cout << projective_bn254::to_affine(result) << std::endl;
|
||||
|
||||
std::cout << "Copying inputs on-device" << std::endl;
|
||||
scalar_t* scalars_d;
|
||||
affine_t* points_d;
|
||||
projective_t* result_d;
|
||||
cudaMalloc(&scalars_d, sizeof(scalar_t) * N);
|
||||
cudaMalloc(&points_d, sizeof(affine_t) * N);
|
||||
cudaMalloc(&result_d, sizeof(projective_t));
|
||||
cudaMemcpy(scalars_d, scalars, sizeof(scalar_t) * N, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(points_d, points, sizeof(affine_t) * N, cudaMemcpyHostToDevice);
|
||||
scalar_bn254* scalars_d;
|
||||
affine_bn254* points_d;
|
||||
projective_bn254* result_d;
|
||||
cudaMalloc(&scalars_d, sizeof(scalar_bn254) * N);
|
||||
cudaMalloc(&points_d, sizeof(affine_bn254) * N);
|
||||
cudaMalloc(&result_d, sizeof(projective_bn254));
|
||||
cudaMemcpy(scalars_d, scalars, sizeof(scalar_bn254) * N, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(points_d, points, sizeof(affine_bn254) * N, cudaMemcpyHostToDevice);
|
||||
|
||||
std::cout << "Reconfiguring MSM to use on-device inputs" << std::endl;
|
||||
config.are_results_on_device = true;
|
||||
@@ -70,9 +84,9 @@ int main(int argc, char* argv[])
|
||||
bn254_msm_cuda(scalars_d, points_d, msm_size, config, result_d);
|
||||
|
||||
// Copy the result back to the host
|
||||
cudaMemcpy(&result, result_d, sizeof(projective_t), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(&result, result_d, sizeof(projective_bn254), cudaMemcpyDeviceToHost);
|
||||
// Print the result
|
||||
std::cout << projective_t::to_affine(result) << std::endl;
|
||||
std::cout << projective_bn254::to_affine(result) << std::endl;
|
||||
// Free the device memory
|
||||
cudaFree(scalars_d);
|
||||
cudaFree(points_d);
|
||||
@@ -84,25 +98,25 @@ int main(int argc, char* argv[])
|
||||
|
||||
std::cout << "Generating random inputs on-host" << std::endl;
|
||||
// use the same scalars
|
||||
g2_affine_t* g2_points = new g2_affine_t[N];
|
||||
g2_projective_t::rand_host_many_affine(g2_points, N);
|
||||
g2_affine_bn254* g2_points = new g2_affine_bn254[N];
|
||||
g2_projective_bn254::rand_host_many_affine(g2_points, N);
|
||||
|
||||
std::cout << "Reconfiguring MSM to use on-host inputs" << std::endl;
|
||||
config.are_results_on_device = false;
|
||||
config.are_scalars_on_device = false;
|
||||
config.are_points_on_device = false;
|
||||
g2_projective_t g2_result;
|
||||
g2_projective_bn254 g2_result;
|
||||
bn254_g2_msm_cuda(scalars, g2_points, msm_size, config, &g2_result);
|
||||
std::cout << g2_projective_t::to_affine(g2_result) << std::endl;
|
||||
std::cout << g2_projective_bn254::to_affine(g2_result) << std::endl;
|
||||
|
||||
std::cout << "Copying inputs on-device" << std::endl;
|
||||
g2_affine_t* g2_points_d;
|
||||
g2_projective_t* g2_result_d;
|
||||
cudaMalloc(&scalars_d, sizeof(scalar_t) * N);
|
||||
cudaMalloc(&g2_points_d, sizeof(g2_affine_t) * N);
|
||||
cudaMalloc(&g2_result_d, sizeof(g2_projective_t));
|
||||
cudaMemcpy(scalars_d, scalars, sizeof(scalar_t) * N, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(g2_points_d, g2_points, sizeof(g2_affine_t) * N, cudaMemcpyHostToDevice);
|
||||
g2_affine_bn254* g2_points_d;
|
||||
g2_projective_bn254* g2_result_d;
|
||||
cudaMalloc(&scalars_d, sizeof(scalar_bn254) * N);
|
||||
cudaMalloc(&g2_points_d, sizeof(g2_affine_bn254) * N);
|
||||
cudaMalloc(&g2_result_d, sizeof(g2_projective_bn254));
|
||||
cudaMemcpy(scalars_d, scalars, sizeof(scalar_bn254) * N, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(g2_points_d, g2_points, sizeof(g2_affine_bn254) * N, cudaMemcpyHostToDevice);
|
||||
|
||||
std::cout << "Reconfiguring MSM to use on-device inputs" << std::endl;
|
||||
config.are_results_on_device = true;
|
||||
@@ -111,14 +125,140 @@ int main(int argc, char* argv[])
|
||||
|
||||
std::cout << "Running MSM kernel with on-device inputs" << std::endl;
|
||||
bn254_g2_msm_cuda(scalars_d, g2_points_d, msm_size, config, g2_result_d);
|
||||
cudaMemcpy(&g2_result, g2_result_d, sizeof(g2_projective_t), cudaMemcpyDeviceToHost);
|
||||
std::cout << g2_projective_t::to_affine(g2_result) << std::endl;
|
||||
cudaMemcpy(&g2_result, g2_result_d, sizeof(g2_projective_bn254), cudaMemcpyDeviceToHost);
|
||||
std::cout << g2_projective_bn254::to_affine(g2_result) << std::endl;
|
||||
|
||||
cudaFree(scalars_d);
|
||||
cudaFree(g2_points_d);
|
||||
cudaFree(g2_result_d);
|
||||
delete[] g2_points;
|
||||
delete[] scalars;
|
||||
cudaStreamDestroy(stream);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int msm_bls12_377(int argc, char* argv[])
|
||||
{
|
||||
std::cout << "Icicle example: Muli-Scalar Multiplication (MSM)" << std::endl;
|
||||
std::cout << "Example parameters" << std::endl;
|
||||
int batch_size = 1;
|
||||
std::cout << "Batch size: " << batch_size << std::endl;
|
||||
unsigned msm_size = 1048576;
|
||||
std::cout << "MSM size: " << msm_size << std::endl;
|
||||
int N = batch_size * msm_size;
|
||||
|
||||
std::cout << "Part I: use G1 points" << std::endl;
|
||||
|
||||
std::cout << "Generating random inputs on-host" << std::endl;
|
||||
scalar_bls12377* scalars = new scalar_bls12377[N];
|
||||
affine_bls12377* points = new affine_bls12377[N];
|
||||
projective_bls12377 result;
|
||||
scalar_bls12377::rand_host_many(scalars, N);
|
||||
projective_bls12377::rand_host_many_affine(points, N);
|
||||
|
||||
std::cout << "Using default MSM configuration with on-host inputs" << std::endl;
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
msm::MSMConfig config = {
|
||||
ctx, // ctx
|
||||
0, // points_size
|
||||
1, // precompute_factor
|
||||
0, // c
|
||||
0, // bitsize
|
||||
10, // large_bucket_factor
|
||||
1, // batch_size
|
||||
false, // are_scalars_on_device
|
||||
false, // are_scalars_montgomery_form
|
||||
false, // are_points_on_device
|
||||
false, // are_points_montgomery_form
|
||||
false, // are_results_on_device
|
||||
false, // is_big_triangle
|
||||
false, // is_async
|
||||
};
|
||||
config.batch_size = batch_size;
|
||||
|
||||
std::cout << "Running MSM kernel with on-host inputs" << std::endl;
|
||||
cudaStream_t stream = config.ctx.stream;
|
||||
cudaStreamCreate(&stream);
|
||||
// Execute the MSM kernel
|
||||
bls12_377_msm_cuda(scalars, points, msm_size, config, &result);
|
||||
std::cout << projective_bls12377::to_affine(result) << std::endl;
|
||||
|
||||
std::cout << "Copying inputs on-device" << std::endl;
|
||||
scalar_bls12377* scalars_d_bls;
|
||||
affine_bls12377* points_d_bls;
|
||||
projective_bls12377* result_d_bls;
|
||||
cudaMalloc(&scalars_d_bls, sizeof(scalar_bls12377) * N);
|
||||
cudaMalloc(&points_d_bls, sizeof(affine_bls12377) * N);
|
||||
cudaMalloc(&result_d_bls, sizeof(projective_bls12377));
|
||||
cudaMemcpy(scalars_d_bls, scalars, sizeof(scalar_bls12377) * N, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(points_d_bls, points, sizeof(affine_bls12377) * N, cudaMemcpyHostToDevice);
|
||||
|
||||
std::cout << "Reconfiguring MSM to use on-device inputs" << std::endl;
|
||||
config.are_results_on_device = true;
|
||||
config.are_scalars_on_device = true;
|
||||
config.are_points_on_device = true;
|
||||
|
||||
std::cout << "Running MSM kernel with on-device inputs" << std::endl;
|
||||
// Execute the MSM kernel
|
||||
bls12_377_msm_cuda(scalars_d_bls, points_d_bls, msm_size, config, result_d_bls);
|
||||
|
||||
// Copy the result back to the host
|
||||
cudaMemcpy(&result, result_d_bls, sizeof(projective_bls12377), cudaMemcpyDeviceToHost);
|
||||
// Print the result
|
||||
std::cout << projective_bls12377::to_affine(result) << std::endl;
|
||||
// Free the device memory
|
||||
cudaFree(scalars_d_bls);
|
||||
cudaFree(points_d_bls);
|
||||
cudaFree(result_d_bls);
|
||||
// Free the host memory, keep scalars for G2 example
|
||||
delete[] points;
|
||||
|
||||
std::cout << "Part II: use G2 points" << std::endl;
|
||||
|
||||
std::cout << "Generating random inputs on-host" << std::endl;
|
||||
// use the same scalars
|
||||
g2_affine_bls12377* g2_points = new g2_affine_bls12377[N];
|
||||
g2_projective_bls12377::rand_host_many_affine(g2_points, N);
|
||||
|
||||
std::cout << "Reconfiguring MSM to use on-host inputs" << std::endl;
|
||||
config.are_results_on_device = false;
|
||||
config.are_scalars_on_device = false;
|
||||
config.are_points_on_device = false;
|
||||
g2_projective_bls12377 g2_result;
|
||||
bls12_377_g2_msm_cuda(scalars, g2_points, msm_size, config, &g2_result);
|
||||
std::cout << g2_projective_bls12377::to_affine(g2_result) << std::endl;
|
||||
|
||||
std::cout << "Copying inputs on-device" << std::endl;
|
||||
g2_affine_bls12377* g2_points_d;
|
||||
g2_projective_bls12377* g2_result_d;
|
||||
cudaMalloc(&scalars_d_bls, sizeof(scalar_bls12377) * N);
|
||||
cudaMalloc(&g2_points_d, sizeof(g2_affine_bls12377) * N);
|
||||
cudaMalloc(&g2_result_d, sizeof(g2_projective_bls12377));
|
||||
cudaMemcpy(scalars_d_bls, scalars, sizeof(scalar_bls12377) * N, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(g2_points_d, g2_points, sizeof(g2_affine_bls12377) * N, cudaMemcpyHostToDevice);
|
||||
|
||||
std::cout << "Reconfiguring MSM to use on-device inputs" << std::endl;
|
||||
config.are_results_on_device = true;
|
||||
config.are_scalars_on_device = true;
|
||||
config.are_points_on_device = true;
|
||||
|
||||
std::cout << "Running MSM kernel with on-device inputs" << std::endl;
|
||||
bls12_377_g2_msm_cuda(scalars_d_bls, g2_points_d, msm_size, config, g2_result_d);
|
||||
cudaMemcpy(&g2_result, g2_result_d, sizeof(g2_projective_bn254), cudaMemcpyDeviceToHost);
|
||||
std::cout << g2_projective_bls12377::to_affine(g2_result) << std::endl;
|
||||
|
||||
cudaFree(scalars_d_bls);
|
||||
cudaFree(g2_points_d);
|
||||
cudaFree(g2_result_d);
|
||||
delete[] g2_points;
|
||||
delete[] scalars;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
std::cout << "Starting BN254 MSM" << std::endl;
|
||||
msm_bn254(argc, argv);
|
||||
std::cout << "Starting BLS12-377 MSM" << std::endl;
|
||||
msm_bls12_377(argc, argv);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -6,6 +6,9 @@
|
||||
#include "api/bn254.h"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "hash/hash.cuh"
|
||||
|
||||
using namespace poseidon;
|
||||
using namespace bn254;
|
||||
|
||||
@@ -20,31 +23,20 @@ void checkCudaError(cudaError_t error)
|
||||
// these global constants go into template calls
|
||||
const int size_col = 11;
|
||||
|
||||
// this function executes the Poseidon thread
|
||||
void threadPoseidon(
|
||||
device_context::DeviceContext ctx,
|
||||
unsigned size_partition,
|
||||
scalar_t* layers,
|
||||
scalar_t* column_hashes,
|
||||
PoseidonConstants<scalar_t>* constants)
|
||||
Poseidon<scalar_t> * poseidon)
|
||||
{
|
||||
cudaError_t err_result = CHK_STICKY(cudaSetDevice(ctx.device_id));
|
||||
if (err_result != cudaSuccess) {
|
||||
std::cerr << "CUDA error: " << cudaGetErrorString(err_result) << std::endl;
|
||||
return;
|
||||
}
|
||||
// CHK_IF_RETURN(); I can't use it in a standard thread function
|
||||
PoseidonConfig column_config = {
|
||||
ctx, // ctx
|
||||
false, // are_inputes_on_device
|
||||
false, // are_outputs_on_device
|
||||
false, // input_is_a_state
|
||||
false, // aligned
|
||||
false, // loop_state
|
||||
false, // is_async
|
||||
};
|
||||
cudaError_t err =
|
||||
bn254_poseidon_hash_cuda(layers, column_hashes, (size_t)size_partition, size_col, *constants, column_config);
|
||||
HashConfig column_config = default_hash_config(ctx);
|
||||
cudaError_t err = poseidon->hash_many(layers, column_hashes, (size_t) size_partition, size_col, 1, column_config);
|
||||
checkCudaError(err);
|
||||
}
|
||||
|
||||
@@ -59,6 +51,11 @@ using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::p
|
||||
exit(EXIT_FAILURE); \
|
||||
}
|
||||
|
||||
#define CHECK_ALLOC(ptr) if ((ptr) == nullptr) { \
|
||||
std::cerr << "Memory allocation for '" #ptr "' failed." << std::endl; \
|
||||
exit(EXIT_FAILURE); \
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
const unsigned size_row = (1 << 30);
|
||||
@@ -116,19 +113,18 @@ int main()
|
||||
scalar_t* column_hash1 = static_cast<scalar_t*>(malloc(size_partition * sizeof(scalar_t)));
|
||||
CHECK_ALLOC(column_hash1);
|
||||
|
||||
PoseidonConstants<scalar_t> column_constants0, column_constants1;
|
||||
bn254_init_optimized_poseidon_constants_cuda(size_col, ctx0, &column_constants0);
|
||||
cudaError_t err_result = CHK_STICKY(cudaSetDevice(ctx1.device_id));
|
||||
if (err_result != cudaSuccess) {
|
||||
std::cerr << "CUDA error: " << cudaGetErrorString(err_result) << std::endl;
|
||||
return;
|
||||
}
|
||||
bn254_init_optimized_poseidon_constants_cuda(size_col, ctx1, &column_constants1);
|
||||
Poseidon<scalar_t> column_poseidon0(size_col, ctx0);
|
||||
cudaError_t err_result = CHK_STICKY(cudaSetDevice(ctx1.device_id));
|
||||
if (err_result != cudaSuccess) {
|
||||
std::cerr << "CUDA error: " << cudaGetErrorString(err_result) << std::endl;
|
||||
return;
|
||||
}
|
||||
Poseidon<scalar_t> column_poseidon1(size_col, ctx1);
|
||||
|
||||
std::cout << "Parallel execution of Poseidon threads" << std::endl;
|
||||
START_TIMER(parallel);
|
||||
std::thread thread0(threadPoseidon, ctx0, size_partition, layers0, column_hash0, &column_constants0);
|
||||
std::thread thread1(threadPoseidon, ctx1, size_partition, layers1, column_hash1, &column_constants1);
|
||||
std::thread thread0(threadPoseidon, ctx0, size_partition, layers0, column_hash0, &column_poseidon0);
|
||||
std::thread thread1(threadPoseidon, ctx1, size_partition, layers1, column_hash1, &column_poseidon1);
|
||||
|
||||
// Wait for the threads to finish
|
||||
thread0.join();
|
||||
@@ -141,9 +137,9 @@ int main()
|
||||
|
||||
std::cout << "Sequential execution of Poseidon threads" << std::endl;
|
||||
START_TIMER(sequential);
|
||||
std::thread thread2(threadPoseidon, ctx0, size_partition, layers0, column_hash0, &column_constants0);
|
||||
std::thread thread2(threadPoseidon, ctx0, size_partition, layers0, column_hash0, &column_poseidon0);
|
||||
thread2.join();
|
||||
std::thread thread3(threadPoseidon, ctx0, size_partition, layers1, column_hash1, &column_constants0);
|
||||
std::thread thread3(threadPoseidon, ctx0, size_partition, layers1, column_hash1, &column_poseidon0);
|
||||
thread3.join();
|
||||
END_TIMER(sequential, "1 GPU");
|
||||
std::cout << "Output Data from Thread 2: ";
|
||||
|
||||
@@ -18,7 +18,7 @@ add_executable(
|
||||
example.cu
|
||||
)
|
||||
target_include_directories(example PRIVATE "../../../icicle/include")
|
||||
target_link_libraries(example ${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_field_bn254.a)
|
||||
target_link_libraries(example ${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_field_bn254.a ${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_field_bls12_377.a)
|
||||
find_library(NVML_LIBRARY nvidia-ml PATHS /usr/local/cuda/targets/x86_64-linux/lib/stubs/ )
|
||||
target_link_libraries(example ${NVML_LIBRARY})
|
||||
set_target_properties(example PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
|
||||
@@ -7,9 +7,13 @@ mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
# Configure and build Icicle
|
||||
cmake -S ../../../icicle/ -B build/icicle -DMSM=OFF -DCMAKE_BUILD_TYPE=Release -DCURVE=bls12_377
|
||||
cmake --build build/icicle
|
||||
rm build/icicle/CMakeCache.txt
|
||||
cmake -S ../../../icicle/ -B build/icicle -DMSM=OFF -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254
|
||||
cmake --build build/icicle
|
||||
|
||||
|
||||
# Configure and build the example application
|
||||
cmake -S . -B build/example
|
||||
cmake --build build/example
|
||||
@@ -4,14 +4,17 @@
|
||||
#include <nvml.h>
|
||||
|
||||
#include "api/bn254.h"
|
||||
#include "api/bls12_377.h"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
|
||||
using namespace vec_ops;
|
||||
using namespace bn254;
|
||||
// using namespace bn254;
|
||||
typedef bn254::scalar_t T;
|
||||
|
||||
typedef scalar_t T;
|
||||
typedef bls12_377::scalar_t T_bls;
|
||||
|
||||
int vector_mult(T* vec_b, T* vec_a, T* vec_result, size_t n_elments, device_context::DeviceContext ctx)
|
||||
|
||||
int vector_mult_bn254(T* vec_b, T* vec_a, T* vec_result, size_t n_elments, device_context::DeviceContext ctx)
|
||||
{
|
||||
vec_ops::VecOpsConfig config = vec_ops::DefaultVecOpsConfig();
|
||||
config.is_a_on_device = true;
|
||||
@@ -25,10 +28,24 @@ int vector_mult(T* vec_b, T* vec_a, T* vec_result, size_t n_elments, device_cont
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vector_mult_bls12377(T_bls* vec_b, T_bls* vec_a, T_bls* vec_result, size_t n_elments, device_context::DeviceContext ctx)
|
||||
{
|
||||
vec_ops::VecOpsConfig config = vec_ops::DefaultVecOpsConfig();
|
||||
config.is_a_on_device = true;
|
||||
config.is_b_on_device = true;
|
||||
config.is_result_on_device = true;
|
||||
cudaError_t err = bls12_377_mul_cuda(vec_a, vec_b, n_elments, config, vec_result);
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Failed to multiply vectors - " << cudaGetErrorString(err) << std::endl;
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
const unsigned vector_size = 1 << 15;
|
||||
const unsigned repetitions = 1 << 15;
|
||||
const unsigned repetitions = 1 ;
|
||||
|
||||
cudaError_t err;
|
||||
nvmlInit();
|
||||
@@ -61,41 +78,53 @@ int main(int argc, char** argv)
|
||||
// host data
|
||||
T* host_in1 = (T*)malloc(vector_size * sizeof(T));
|
||||
T* host_in2 = (T*)malloc(vector_size * sizeof(T));
|
||||
T_bls* host_in1_bls12377 = (T_bls*)malloc(vector_size * sizeof(T_bls));
|
||||
T_bls* host_in2_bls12377 = (T_bls*)malloc(vector_size * sizeof(T_bls));
|
||||
std::cout << "Initializing vectors with random data" << std::endl;
|
||||
T::rand_host_many(host_in1, vector_size);
|
||||
T::rand_host_many(host_in2, vector_size);
|
||||
T_bls::rand_host_many(host_in1_bls12377, vector_size);
|
||||
T_bls::rand_host_many(host_in2_bls12377, vector_size);
|
||||
// device data
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
T* device_in1;
|
||||
T* device_in2;
|
||||
T* device_out;
|
||||
T* device_in1_bn254;
|
||||
T* device_in2_bn254;
|
||||
T* device_out_bn254;
|
||||
T_bls* device_in1_bls12377;
|
||||
T_bls* device_in2_bls12377;
|
||||
T_bls* device_out_bls12377;
|
||||
|
||||
err = cudaMalloc((void**)&device_in1, vector_size * sizeof(T));
|
||||
err = cudaMalloc((void**)&device_in1_bn254, vector_size * sizeof(T));
|
||||
err = cudaMalloc((void**)&device_in1_bls12377, vector_size * sizeof(T_bls));
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Failed to allocate device memory - " << cudaGetErrorString(err) << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
err = cudaMalloc((void**)&device_in2, vector_size * sizeof(T));
|
||||
err = cudaMalloc((void**)&device_in2_bn254, vector_size * sizeof(T));
|
||||
err = cudaMalloc((void**)&device_in2_bls12377, vector_size * sizeof(T_bls));
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Failed to allocate device memory - " << cudaGetErrorString(err) << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
err = cudaMalloc((void**)&device_out, vector_size * sizeof(T));
|
||||
err = cudaMalloc((void**)&device_out_bn254, vector_size * sizeof(T));
|
||||
err = cudaMalloc((void**)&device_out_bls12377, vector_size * sizeof(T_bls));
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Failed to allocate device memory - " << cudaGetErrorString(err) << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// copy from host to device
|
||||
err = cudaMemcpy(device_in1, host_in1, vector_size * sizeof(T), cudaMemcpyHostToDevice);
|
||||
err = cudaMemcpy(device_in1_bn254, host_in1, vector_size * sizeof(T), cudaMemcpyHostToDevice);
|
||||
err = cudaMemcpy(device_in1_bls12377, host_in1_bls12377, vector_size * sizeof(T_bls), cudaMemcpyHostToDevice);
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Failed to copy data from host to device - " << cudaGetErrorString(err) << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
err = cudaMemcpy(device_in2, host_in2, vector_size * sizeof(T), cudaMemcpyHostToDevice);
|
||||
err = cudaMemcpy(device_in2_bn254, host_in2, vector_size * sizeof(T), cudaMemcpyHostToDevice);
|
||||
err = cudaMemcpy(device_in2_bls12377, host_in2_bls12377, vector_size * sizeof(T_bls), cudaMemcpyHostToDevice);
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Failed to copy data from host to device - " << cudaGetErrorString(err) << std::endl;
|
||||
return 0;
|
||||
@@ -104,7 +133,10 @@ int main(int argc, char** argv)
|
||||
std::cout << "Starting warm-up" << std::endl;
|
||||
// Warm-up loop
|
||||
for (int i = 0; i < repetitions; i++) {
|
||||
vector_mult(device_in1, device_in2, device_out, vector_size, ctx);
|
||||
std::cout << "bn254 mult" << std::endl;
|
||||
vector_mult_bn254(device_in1_bn254, device_in2_bn254, device_out_bn254, vector_size, ctx);
|
||||
std::cout << "bls12-377 mult" << std::endl;
|
||||
vector_mult_bls12377(device_in1_bls12377, device_in2_bls12377, device_out_bls12377, vector_size, ctx);
|
||||
}
|
||||
|
||||
std::cout << "Starting benchmarking" << std::endl;
|
||||
@@ -122,7 +154,7 @@ int main(int argc, char** argv)
|
||||
auto start_time = std::chrono::high_resolution_clock::now();
|
||||
// Benchmark loop
|
||||
for (int i = 0; i < repetitions; i++) {
|
||||
vector_mult(device_in1, device_in2, device_out, vector_size, ctx);
|
||||
vector_mult_bn254(device_in1_bn254, device_in2_bn254, device_out_bn254, vector_size, ctx);
|
||||
}
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
|
||||
@@ -146,7 +178,7 @@ int main(int argc, char** argv)
|
||||
// Optional: validate multiplication
|
||||
T* host_out = (T*)malloc(vector_size * sizeof(T));
|
||||
|
||||
cudaMemcpy(host_out, device_out, vector_size * sizeof(T), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(host_out, device_out_bn254, vector_size * sizeof(T), cudaMemcpyDeviceToHost);
|
||||
|
||||
// validate multiplication here...
|
||||
|
||||
@@ -154,9 +186,9 @@ int main(int argc, char** argv)
|
||||
free(host_in1);
|
||||
free(host_in2);
|
||||
free(host_out);
|
||||
cudaFree(device_in1);
|
||||
cudaFree(device_in2);
|
||||
cudaFree(device_out);
|
||||
cudaFree(device_in1_bn254);
|
||||
cudaFree(device_in2_bn254);
|
||||
cudaFree(device_out_bn254);
|
||||
nvmlShutdown();
|
||||
return 0;
|
||||
}
|
||||
@@ -3,13 +3,11 @@
|
||||
#include "polynomials/polynomials.h"
|
||||
#include "polynomials/cuda_backend/polynomial_cuda_backend.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
#include "api/bn254.h"
|
||||
#include <chrono>
|
||||
|
||||
// using namespace field_config;
|
||||
using namespace polynomials;
|
||||
using namespace merkle;
|
||||
using namespace bn254;
|
||||
|
||||
// define the polynomial type
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
|
||||
#include "api/bn254.h"
|
||||
#include "curves/params/bn254.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "hash/hash.cuh"
|
||||
using namespace poseidon;
|
||||
using namespace bn254;
|
||||
|
||||
@@ -14,13 +16,12 @@ inline uint32_t tree_index(uint32_t level, uint32_t offset) { return (1 << level
|
||||
|
||||
// We assume the tree has leaves already set, compute all other levels
|
||||
void build_tree(
|
||||
const uint32_t tree_height, scalar_t* tree, PoseidonConstants<scalar_t>* constants, PoseidonConfig config)
|
||||
const uint32_t tree_height, scalar_t* tree, Poseidon<scalar_t> &poseidon, HashConfig &config)
|
||||
{
|
||||
for (uint32_t level = tree_height - 1; level > 0; level--) {
|
||||
const uint32_t next_level = level - 1;
|
||||
const uint32_t next_level_width = 1 << next_level;
|
||||
bn254_poseidon_hash_cuda(
|
||||
&tree[tree_index(level, 0)], &tree[tree_index(next_level, 0)], next_level_width, 2, *constants, config);
|
||||
poseidon.hash_many(&tree[tree_index(level, 0)], &tree[tree_index(next_level, 0)], next_level_width, 2, 1, config);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,8 +66,8 @@ uint32_t validate_proof(
|
||||
const uint32_t tree_height,
|
||||
const uint32_t* proof_lr,
|
||||
const scalar_t* proof_hash,
|
||||
PoseidonConstants<scalar_t>* constants,
|
||||
PoseidonConfig config)
|
||||
Poseidon<scalar_t> &poseidon,
|
||||
HashConfig &config)
|
||||
{
|
||||
scalar_t hashes_in[2], hash_out[1], level_hash;
|
||||
level_hash = hash;
|
||||
@@ -79,7 +80,7 @@ uint32_t validate_proof(
|
||||
hashes_in[1] = level_hash;
|
||||
}
|
||||
// next level hash
|
||||
bn254_poseidon_hash_cuda(hashes_in, hash_out, 1, 2, *constants, config);
|
||||
poseidon.hash_many(hashes_in, hash_out, 1, 2, 1, config);
|
||||
level_hash = hash_out[0];
|
||||
}
|
||||
return proof_hash[0] == level_hash;
|
||||
@@ -109,16 +110,15 @@ int main(int argc, char* argv[])
|
||||
d = d + scalar_t::one();
|
||||
}
|
||||
std::cout << "Hashing blocks into tree leaves..." << std::endl;
|
||||
PoseidonConstants<scalar_t> constants;
|
||||
bn254_init_optimized_poseidon_constants_cuda(data_arity, ctx, &constants);
|
||||
PoseidonConfig config = default_poseidon_config(data_arity + 1);
|
||||
bn254_poseidon_hash_cuda(data, &tree[tree_index(leaf_level, 0)], tree_width, 4, constants, config);
|
||||
|
||||
Poseidon<scalar_t> poseidon(data_arity, ctx);
|
||||
HashConfig config = default_hash_config(ctx);
|
||||
poseidon.hash_many(data, &tree[tree_index(leaf_level, 0)], tree_width, data_arity, 1, config);
|
||||
|
||||
std::cout << "3. Building Merkle tree" << std::endl;
|
||||
PoseidonConstants<scalar_t> tree_constants;
|
||||
bn254_init_optimized_poseidon_constants_cuda(tree_arity, ctx, &tree_constants);
|
||||
PoseidonConfig tree_config = default_poseidon_config(tree_arity + 1);
|
||||
build_tree(tree_height, tree, &tree_constants, tree_config);
|
||||
Poseidon<scalar_t> tree_poseidon(tree_arity, ctx);
|
||||
HashConfig tree_config = default_hash_config(ctx);
|
||||
build_tree(tree_height, tree, tree_poseidon, tree_config);
|
||||
|
||||
std::cout << "4. Generate membership proof" << std::endl;
|
||||
uint32_t position = tree_width - 1;
|
||||
@@ -133,13 +133,13 @@ int main(int argc, char* argv[])
|
||||
std::cout << "5. Validate the hash membership" << std::endl;
|
||||
uint32_t validated;
|
||||
const scalar_t hash = tree[tree_index(leaf_level, query_position)];
|
||||
validated = validate_proof(hash, tree_height, proof_lr, proof_hash, &tree_constants, tree_config);
|
||||
validated = validate_proof(hash, tree_height, proof_lr, proof_hash, tree_poseidon, tree_config);
|
||||
std::cout << "Validated: " << validated << std::endl;
|
||||
|
||||
std::cout << "6. Tamper the hash" << std::endl;
|
||||
const scalar_t tampered_hash = hash + scalar_t::one();
|
||||
validated = validate_proof(tampered_hash, tree_height, proof_lr, proof_hash, &tree_constants, tree_config);
|
||||
|
||||
validated = validate_proof(tampered_hash, tree_height, proof_lr, proof_hash, tree_poseidon, tree_config);
|
||||
|
||||
std::cout << "7. Invalidate tamper hash membership" << std::endl;
|
||||
std::cout << "Validated: " << validated << std::endl;
|
||||
return 0;
|
||||
|
||||
28
examples/c++/risc0/CMakeLists.txt
Normal file
28
examples/c++/risc0/CMakeLists.txt
Normal file
@@ -0,0 +1,28 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.24.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
|
||||
else()
|
||||
set(CMAKE_CUDA_ARCHITECTURES native) # on 3.24+, on earlier it is ignored, and the target is not passed
|
||||
endif ()
|
||||
project(example LANGUAGES CUDA CXX)
|
||||
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr -DFIELD_ID=1001")
|
||||
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
|
||||
set(CMAKE_CUDA_FLAGS_RELEASE "")
|
||||
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G -O0")
|
||||
|
||||
add_executable(
|
||||
example
|
||||
example.cu
|
||||
)
|
||||
|
||||
set_target_properties(example PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
target_include_directories(example PRIVATE "../../../icicle/include")
|
||||
|
||||
# can link to another curve/field by changing the following lib and FIELD_ID
|
||||
target_link_libraries(example ${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_field_babybear.a)
|
||||
# target_compile_definitions(example PUBLIC FIELD_ID babybear)
|
||||
44
examples/c++/risc0/README.md
Normal file
44
examples/c++/risc0/README.md
Normal file
@@ -0,0 +1,44 @@
|
||||
# ICICLE example: RISC0's Fibonacci sequence proof using Polynomial API
|
||||
|
||||
## Why RISC0?
|
||||
|
||||
[RISC0 Protocol](https://www.risczero.com/) creates computational integrity proofs (a.k.a. Zero Knowledge Proofs) for programs executing on RISC-V architecture.
|
||||
The proofs are created for sequences of values in RISC-V registers, called execution traces.
|
||||
This approach is transparent to developers and enables the use of general purpose languages.
|
||||
|
||||
## Best-Practices
|
||||
|
||||
This example builds on [ICICLE Polynomial API](../polynomial-api/README.md) so we recommend to run it first.
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
RISC0 encodes execution traces into very large polynomials and commits them using Merkle trees.
|
||||
FRI speeds-up validation of such commitments by recursively generating smaller polynomials (and trees) from larger ones.
|
||||
The key enabler for *recursion* is the *redundancy* of polynomial commitments, hence the use of Reed-Solomon codes.
|
||||
|
||||
## Running the example
|
||||
|
||||
To run example, from project root directory:
|
||||
|
||||
```sh
|
||||
cd examples/c++/risc0
|
||||
./compile.sh
|
||||
./run.sh
|
||||
```
|
||||
|
||||
## What's in the example
|
||||
|
||||
The example follows [STARK by Hand](https://dev.risczero.com/proof-system/stark-by-hand), structured in the following Lessons:
|
||||
|
||||
1. The Execution Trace
|
||||
2. Rule checks to validate a computation
|
||||
3. Padding the Trace
|
||||
4. Constructing Trace Polynomials
|
||||
5. ZK Commitments of the Trace Data
|
||||
6. Constraint Polynomials
|
||||
7. Mixing Constraint Polynomials
|
||||
8. The Core of the RISC Zero STARK
|
||||
9. The DEEP Technique
|
||||
10. Mixing (Batching) for FRI
|
||||
11. FRI Protocol (Commit Phase)
|
||||
12. FRI Protocol (Query Phase)
|
||||
15
examples/c++/risc0/compile.sh
Executable file
15
examples/c++/risc0/compile.sh
Executable file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit immediately on error
|
||||
set -e
|
||||
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
# Configure and build Icicle
|
||||
cmake -S ../../../icicle/ -B build/icicle -DCMAKE_BUILD_TYPE=Release -DFIELD=babybear
|
||||
cmake --build build/icicle
|
||||
|
||||
# Configure and build the example application
|
||||
cmake -S . -B build/example
|
||||
cmake --build build/example
|
||||
275
examples/c++/risc0/example.cu
Normal file
275
examples/c++/risc0/example.cu
Normal file
@@ -0,0 +1,275 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
|
||||
#include "polynomials/polynomials.h"
|
||||
#include "polynomials/cuda_backend/polynomial_cuda_backend.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
|
||||
using namespace polynomials;
|
||||
|
||||
// define the polynomial type
|
||||
typedef Polynomial<scalar_t> Polynomial_t;
|
||||
|
||||
// RISC-V register type
|
||||
typedef int64_t rv_t;
|
||||
|
||||
// Convert RISC-V registers to Finite Fields
|
||||
void to_ff(rv_t* rv, scalar_t* s, size_t n) {
|
||||
for (int i = 0; i < n; ++i) {
|
||||
s[i] = scalar_t::from(rv[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void p_print(Polynomial_t * p, int logn, scalar_t shift, std::string header = "Print Vector") {
|
||||
std::cout << header << std::endl;
|
||||
auto n = 1 << logn;
|
||||
auto omega = scalar_t::omega(logn);
|
||||
auto x = shift;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
std::cout << i << ": " << (*p)(x) << std::endl;
|
||||
x = x*omega;
|
||||
}
|
||||
}
|
||||
|
||||
// value to polynomial
|
||||
Polynomial_t p_value(scalar_t value) {
|
||||
auto p_value = Polynomial_t::from_coefficients(&value , 1);
|
||||
return p_value;
|
||||
}
|
||||
|
||||
Polynomial_t p_rotate(Polynomial_t* p, int logn) {
|
||||
// rotate polynomial coefficients right by one position
|
||||
auto n = 1 << logn;
|
||||
auto evaluations_rou_domain = std::make_unique<scalar_t[]>(n);
|
||||
p->evaluate_on_rou_domain(logn, evaluations_rou_domain.get() );
|
||||
scalar_t tmp = evaluations_rou_domain[n-1];
|
||||
for (int i = n-1; i > 0; --i) {
|
||||
evaluations_rou_domain[i] = evaluations_rou_domain[i-1];
|
||||
}
|
||||
evaluations_rou_domain[0] = tmp;
|
||||
return Polynomial_t::from_rou_evaluations(evaluations_rou_domain.get(), n);
|
||||
}
|
||||
|
||||
// mix polynomials (c.f. mix polynomial evaluations)
|
||||
Polynomial_t p_mix(Polynomial_t* in[], size_t nmix, scalar_t mix_parameter) {
|
||||
scalar_t factor = mix_parameter;
|
||||
Polynomial_t out = in[0]->clone();
|
||||
for (int i = 1; i < nmix; ++i) {
|
||||
out += factor * (*in[i]);
|
||||
factor = factor * mix_parameter;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
void solve_linear(scalar_t xa, scalar_t ya, scalar_t xb, scalar_t yb, scalar_t * coeffs) {
|
||||
coeffs[1] = (ya - yb) * scalar_t::inverse(xa - xb);
|
||||
coeffs[0] = ya - coeffs[1] * xa;
|
||||
}
|
||||
|
||||
std::unique_ptr<scalar_t[]> InterpolateOnLargerDomain(Polynomial_t * p, int n, scalar_t shift = scalar_t::one()) {
|
||||
const int deg = p->degree();
|
||||
auto input = std::make_unique<scalar_t[]>(n);
|
||||
// TBD: check if scalar_t constructor initializes to zero
|
||||
for (int i = 0; i < n; ++i) {
|
||||
input[i] = scalar_t::zero();
|
||||
}
|
||||
p->copy_coeffs(input.get(), 0/*start*/, deg);
|
||||
auto ntt_config = ntt::default_ntt_config<scalar_t>();
|
||||
ntt_config.coset_gen = shift;
|
||||
auto evals_h = std::make_unique<scalar_t[]>(n);
|
||||
auto err = ntt::ntt(input.get(), n, ntt::NTTDir::kForward, ntt_config, evals_h.get());
|
||||
return evals_h;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
std::cout << "This is an ICICLE C++ implementation of the STARK by Hand Explainer." << std::endl;
|
||||
std::cout << "https://dev.risczero.com/proof-system/stark-by-hand" << std::endl;
|
||||
|
||||
const int logn=3;
|
||||
const int n = 1 << logn;
|
||||
|
||||
std::cout << "Initializing NTT" << std::endl;
|
||||
static const int MAX_NTT_LOG_SIZE = 24;
|
||||
auto ntt_config = ntt::default_ntt_config<scalar_t>();
|
||||
const scalar_t basic_root = scalar_t::omega(MAX_NTT_LOG_SIZE);
|
||||
ntt::init_domain(basic_root, ntt_config.ctx);
|
||||
std::cout << "Initializing Polynomials" << std::endl;
|
||||
// Virtual factory design pattern: initializing polynomimals factory for CUDA backend
|
||||
Polynomial_t::initialize(std::make_unique<CUDAPolynomialFactory<>>());
|
||||
|
||||
std::cout << std::endl << "Lesson 1: The Execution Trace" << std::endl;
|
||||
// Trace: Data Columns
|
||||
rv_t rv_d1_trace[] = {24, 30, 54, 84, 78, 15, 29, 50};
|
||||
rv_t rv_d2_trace[] = {30, 54, 84, 138, 2, 77, 21, 36};
|
||||
rv_t rv_d3_trace[] = {54, 84, 138, 222, 71, 17, 92, 33};
|
||||
auto d1_trace = std::make_unique<scalar_t[]>(n);
|
||||
auto d2_trace = std::make_unique<scalar_t[]>(n);
|
||||
auto d3_trace = std::make_unique<scalar_t[]>(n);
|
||||
to_ff(rv_d1_trace, d1_trace.get(), n);
|
||||
to_ff(rv_d2_trace, d2_trace.get(), n);
|
||||
to_ff(rv_d3_trace, d3_trace.get(), n);
|
||||
// Trace: Control Columns
|
||||
// Init steps are flagged in c1_trace
|
||||
// Computation steps are flagged in c2_trace
|
||||
// Termination step is flagged in c3_trace
|
||||
// 0s at the end of each control column correspond to the padding of the trace
|
||||
rv_t rv_c1_trace[] = {1, 0, 0, 0, 0, 0, 0, 0};
|
||||
rv_t rv_c2_trace[] = {0, 1, 1, 1, 0, 0, 0, 0};
|
||||
rv_t rv_c3_trace[] = {0, 0, 0, 1, 0, 0, 0, 0};
|
||||
auto c1_trace = std::make_unique<scalar_t[]>(n);
|
||||
auto c2_trace = std::make_unique<scalar_t[]>(n);
|
||||
auto c3_trace = std::make_unique<scalar_t[]>(n);
|
||||
to_ff(rv_c1_trace, c1_trace.get(), n);
|
||||
to_ff(rv_c2_trace, c2_trace.get(), n);
|
||||
to_ff(rv_c3_trace, c3_trace.get(), n);
|
||||
|
||||
std::cout << "Lesson 2: Rule checks to validate a computation" << std::endl;
|
||||
std::cout << "We use rule-checking polynomials." << std::endl;
|
||||
|
||||
std::cout << "Lesson 3: Padding the Trace" << std::endl;
|
||||
// The trace is padded to a power of 2 size to allow for efficient NTT operations.
|
||||
// we already did this in the initialization of the trace data
|
||||
// We will construct a zero-knowledge proof that:
|
||||
// this trace represents a program that satisfies these 6 rules:
|
||||
// 1) Fibonacci words here
|
||||
// 2) d1_trace[0] == 24 (init 1 constraint)
|
||||
// 3) d2_trace[0] == 30 (init 2 constraint)
|
||||
// 4) d3_trace[3] == 28 (termination constraint)
|
||||
// 5) if c2_trace[i] == 1, then d2_trace[i] == d1_trace[i+1]
|
||||
// 6) if c2_trace[i] == 1, then d3_trace[i] == d2_trace[i+1}
|
||||
|
||||
std::cout << "Lesson 4: Constructing Trace Polynomials" << std::endl;
|
||||
auto p_d1 = Polynomial_t::from_rou_evaluations(d1_trace.get(), n);
|
||||
auto p_d2 = Polynomial_t::from_rou_evaluations(d2_trace.get(), n);
|
||||
auto p_d3 = Polynomial_t::from_rou_evaluations(d3_trace.get(), n);
|
||||
auto p_c1 = Polynomial_t::from_rou_evaluations(c1_trace.get(), n);
|
||||
auto p_c2 = Polynomial_t::from_rou_evaluations(c2_trace.get(), n);
|
||||
auto p_c3 = Polynomial_t::from_rou_evaluations(c3_trace.get(), n);
|
||||
|
||||
std::cout << "Lesson 5: ZK Commitments of the Trace Data" << std::endl;
|
||||
std::cout << "To maintain a zk protocol, the trace polynomials are evaluated over a zk commitment domain" << std::endl;
|
||||
std::cout << "zk commitment domain is a coset of Reed Solomon domain shifted by a basic root of unity" << std::endl;
|
||||
scalar_t xzk = basic_root;
|
||||
p_print(&p_d1, logn, xzk, "ZK commitment for d1 polynomial");
|
||||
std::cout << "Build Merkle Tree for ZK commitments (outside the scope of this example)" << std::endl;
|
||||
|
||||
std::cout << "Lesson 6: Constraint Polynomials" << std::endl;
|
||||
std::cout << "The constraints are used to check the correctness of the trace. In this example, we check 6 rules to establish the validity of the trace." << std::endl;
|
||||
auto p_fib_constraint = (p_d3 - p_d2 - p_d1) * (p_c1 + p_c2 + p_c3);
|
||||
auto fib_constraint_zkcommitment = InterpolateOnLargerDomain(&p_fib_constraint, 4*n, xzk);
|
||||
|
||||
auto p_init1_constraint = (p_d1 - p_value(scalar_t::from(24))) * p_c1;
|
||||
// sanity checks printing
|
||||
p_print(&p_init1_constraint, logn+2, scalar_t::one(), "Reed-Solomon constraint polynomial gives 0s in every 4th row");
|
||||
p_print(&p_init1_constraint, logn+2, xzk, "ZK Commitment constraint polynomial gives no 0s");
|
||||
auto p_init2_constraint = (p_d2 - p_value(scalar_t::from(30))) * p_c1;
|
||||
auto p_termination_constraint = (p_d3 - p_value(scalar_t::from(222))) * p_c3;
|
||||
auto p_recursion_constraint1 = (p_d1 - p_rotate(&p_d2, logn)) * p_c2;
|
||||
auto p_recursion_constraint2 = (p_d2 - p_rotate(&p_d3, logn)) * p_c2;
|
||||
|
||||
std::cout << std::endl << "Lesson 7: Mixing Constraint Polynomials" << std::endl;
|
||||
Polynomial_t * p_all_constraints[] = {&p_fib_constraint, &p_init1_constraint, &p_init2_constraint, &p_termination_constraint, &p_recursion_constraint1, &p_recursion_constraint2};
|
||||
const size_t nmix = sizeof(p_all_constraints) / sizeof(p_all_constraints[0]);
|
||||
auto p_mixed_constraints = p_mix(p_all_constraints, nmix, scalar_t::from(5));
|
||||
std::cout << "All constraint polynomials are low-degree:" << std::endl;
|
||||
for( int i = 0; i < nmix; ++i) {
|
||||
std::cout << i << ": " << p_all_constraints[i]->degree() << std::endl;
|
||||
}
|
||||
|
||||
std::cout << "Lesson 8: The Core of the RISC Zero STARK" << std::endl;
|
||||
std::cout << "Degree of the mixed constraints polynomial: " << p_mixed_constraints.degree() << std::endl;
|
||||
auto p_validity = p_mixed_constraints.divide_by_vanishing_polynomial(n);
|
||||
std::cout << "Degree of the validity polynomial: " << p_validity.degree() << std::endl;
|
||||
std::cout << "The Verifier should provide the Merke commitment for the above" << std::endl;
|
||||
|
||||
std::cout << "Lesson 9: The DEEP Technique" << std::endl;
|
||||
std::cout << "The DEEP technique improves the security of a single query by sampling outside of the commitment domain." << std::endl;
|
||||
// In the original STARK protocol, the Verifier tests validity polynomial at a number of test points;
|
||||
// the soundness of the protocol depends on the number of tests.
|
||||
// The DEEP-ALI technique allows us to achieve a high degree of soundness with a single test.
|
||||
// The details of DEEP are described in the following lesson.
|
||||
|
||||
auto DEEP_point = scalar_t::from(93);
|
||||
std::cout << "The prover convinces the verifier that V=C/Z at the DEEP_test_point, " << DEEP_point << std::endl;
|
||||
const scalar_t coeffs1[2] = {scalar_t::zero()-DEEP_point, scalar_t::one()};
|
||||
auto denom_DEEP1 = Polynomial_t::from_coefficients(coeffs1, 2);
|
||||
auto [p_d1_DEEP, r] = (p_d1 - p_value(DEEP_point)).divide(denom_DEEP1);
|
||||
std::cout << "The DEEP d1 degree is: " << p_d1_DEEP.degree() << std::endl;
|
||||
// d2, d3 use recursion constraints and need the point corresponding to the previous state (clock cycle)
|
||||
auto omega = scalar_t::omega(logn);
|
||||
auto DEEP_prev_point = DEEP_point*scalar_t::inverse(omega);
|
||||
auto coeffs2 = std::make_unique<scalar_t[]>(2);
|
||||
coeffs2[0] = scalar_t::zero() - DEEP_prev_point;
|
||||
coeffs2[1] = scalar_t::one();
|
||||
auto denom_DEEP2 = Polynomial_t::from_coefficients(coeffs2.get(), 2);
|
||||
|
||||
auto coeffs_d2bar = std::make_unique<scalar_t[]>(2);
|
||||
solve_linear(DEEP_point, p_d2(DEEP_point), DEEP_prev_point, p_d2(DEEP_prev_point), coeffs_d2bar.get());
|
||||
auto d2bar = Polynomial_t::from_coefficients(coeffs_d2bar.get(), 2);
|
||||
auto [p_d2_DEEP, r2] = (p_d2 - d2bar).divide(denom_DEEP1*denom_DEEP2);
|
||||
std::cout << "The DEEP d2 degree is: " << p_d2_DEEP.degree() << std::endl;
|
||||
|
||||
auto coeffs_d3bar = std::make_unique<scalar_t[]>(2);
|
||||
solve_linear(DEEP_point, p_d3(DEEP_point), DEEP_prev_point, p_d3(DEEP_prev_point), coeffs_d3bar.get());
|
||||
auto d3bar = Polynomial_t::from_coefficients(coeffs_d3bar.get(), 2);
|
||||
auto [p_d3_DEEP, r3] = (p_d3 - d3bar).divide(denom_DEEP1*denom_DEEP2);
|
||||
std::cout << "The DEEP d3 degree is: " << p_d3_DEEP.degree() << std::endl;
|
||||
|
||||
// DEEP c{1,2,3} polynomials
|
||||
const scalar_t coeffs_c1bar[1] = {p_c1(DEEP_point)};
|
||||
auto c1bar = Polynomial_t::from_coefficients(coeffs_c1bar, 1);
|
||||
auto [p_c1_DEEP, r_c1] = (p_c1 - c1bar).divide(denom_DEEP1);
|
||||
std::cout << "The DEEP c1 degree is: " << p_c1_DEEP.degree() << std::endl;
|
||||
const scalar_t coeffs_c2bar[1] = {p_c2(DEEP_point)};
|
||||
auto c2bar = Polynomial_t::from_coefficients(coeffs_c2bar, 1);
|
||||
auto [p_c2_DEEP, r_c2] = (p_c2 - c2bar).divide(denom_DEEP1);
|
||||
std::cout << "The DEEP c2 degree is: " << p_c2_DEEP.degree() << std::endl;
|
||||
const scalar_t coeffs_c3bar[1] = {p_c3(DEEP_point)};
|
||||
auto c3bar = Polynomial_t::from_coefficients(coeffs_c3bar, 1);
|
||||
auto [p_c3_DEEP, r_c3] = (p_c3 - c3bar).divide(denom_DEEP1);
|
||||
std::cout << "The DEEP c3 degree is: " << p_c3_DEEP.degree() << std::endl;
|
||||
// DEEP validity polynomial
|
||||
const scalar_t coeffs_vbar[1] = {p_validity(DEEP_point)};
|
||||
auto vbar = Polynomial_t::from_coefficients(coeffs_vbar, 1);
|
||||
auto [v_DEEP, r_v] = (p_validity - vbar).divide(denom_DEEP1);
|
||||
std::cout << "The DEEP validity polynomial degree is: " << v_DEEP.degree() << std::endl;
|
||||
std::cout << "The Prover sends DEEP polynomials to the Verifier" << std::endl;
|
||||
|
||||
std::cout << "Lesson 10: Mixing (Batching) for FRI" << std::endl;
|
||||
std::cout << "The initial FRI polynomial is the mix of the 7 DEEP polynomials." << std::endl;
|
||||
Polynomial_t* all_DEEP[] = {&p_d1_DEEP, &p_d2_DEEP, &p_d3_DEEP, &p_c1_DEEP, &p_c2_DEEP, &p_c3_DEEP, &v_DEEP};
|
||||
Polynomial_t fri_input = p_mix(all_DEEP, 7, scalar_t::from(99));
|
||||
std::cout << "The degree of the mixed DEEP polynomial is: " << fri_input.degree() << std::endl;
|
||||
|
||||
std::cout << "Lesson 11: FRI Protocol (Commit Phase)" << std::endl;
|
||||
std::cout << "The prover provides information to convince the verifier that the DEEP polynomials are low-degree." << std::endl;
|
||||
int nof_rounds = 3;
|
||||
Polynomial_t feven[nof_rounds], fodd[nof_rounds], fri[nof_rounds+1];
|
||||
scalar_t rfri[nof_rounds];
|
||||
fri[0] = fri_input.clone();
|
||||
for (int i = 0; i < nof_rounds; ++i) {
|
||||
feven[i] = fri[i].even();
|
||||
fodd[i] = fri[i].odd();
|
||||
rfri[i] = scalar_t::rand_host();
|
||||
fri[i+1] = feven[i] + rfri[i]*fodd[i];
|
||||
std::cout << "The degree of the Round " << i << " polynomial is: " << fri[i+1].degree() << std::endl;
|
||||
}
|
||||
|
||||
std::cout << "Lesson 12: FRI Protocol (Query Phase)" << std::endl;
|
||||
// We use Polynomial API to evaluate the FRI polynomials
|
||||
// In practice, verifier will use Merkle commitments
|
||||
auto xp = scalar_t::rand_host();
|
||||
auto xm = scalar_t::zero() - xp;
|
||||
scalar_t lhs[nof_rounds], rhs[nof_rounds];
|
||||
for (int i = 0; i < nof_rounds; ++i) {
|
||||
rhs[i] = (rfri[i]+xp)*fri[i](xp)*scalar_t::inverse(scalar_t::from(2)*xp) + (rfri[i]+xm)*fri[i](xm)*scalar_t::inverse(scalar_t::from(2)*xm);
|
||||
lhs[i] = fri[i+1](xp*xp);
|
||||
std::cout << "Round " << i << std::endl << "rhs: " << rhs[i] << std::endl << "lhs: " << lhs[i] << std::endl;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
2
examples/c++/risc0/run.sh
Executable file
2
examples/c++/risc0/run.sh
Executable file
@@ -0,0 +1,2 @@
|
||||
#!/bin/bash
|
||||
./build/example/example
|
||||
@@ -2,7 +2,8 @@ use icicle_bls12_381::curve::ScalarField as F;
|
||||
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
|
||||
use icicle_core::poseidon::{load_optimized_poseidon_constants, poseidon_hash_many, PoseidonConfig};
|
||||
use icicle_core::hash::{SpongeHash, HashConfig};
|
||||
use icicle_core::poseidon::Poseidon;
|
||||
use icicle_core::traits::FieldImpl;
|
||||
use icicle_cuda_runtime::memory::HostSlice;
|
||||
|
||||
@@ -24,14 +25,14 @@ fn main() {
|
||||
let test_size = 1 << size;
|
||||
|
||||
println!("Running Icicle Examples: Rust Poseidon Hash");
|
||||
let arity = 2u32;
|
||||
let arity = 2;
|
||||
println!(
|
||||
"---------------------- Loading optimized Poseidon constants for arity={} ------------------------",
|
||||
arity
|
||||
);
|
||||
let ctx = DeviceContext::default();
|
||||
let constants = load_optimized_poseidon_constants::<F>(arity, &ctx).unwrap();
|
||||
let config = PoseidonConfig::default();
|
||||
let poseidon = Poseidon::load(arity, &ctx).unwrap();
|
||||
let config = HashConfig::default();
|
||||
|
||||
println!(
|
||||
"---------------------- Input size 2^{}={} ------------------------",
|
||||
@@ -45,12 +46,12 @@ fn main() {
|
||||
println!("Executing BLS12-381 Poseidon Hash on device...");
|
||||
#[cfg(feature = "profile")]
|
||||
let start = Instant::now();
|
||||
poseidon_hash_many::<F>(
|
||||
poseidon.hash_many(
|
||||
input_slice,
|
||||
output_slice,
|
||||
test_size as u32,
|
||||
arity as u32,
|
||||
&constants,
|
||||
test_size,
|
||||
arity,
|
||||
1,
|
||||
&config,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
function(check_field)
|
||||
set(SUPPORTED_FIELDS babybear;stark252)
|
||||
set(SUPPORTED_FIELDS babybear;stark252;m31)
|
||||
|
||||
set(IS_FIELD_SUPPORTED FALSE)
|
||||
set(I 1000)
|
||||
|
||||
@@ -9,46 +9,67 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "fields/stark_fields/babybear.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
#include "poseidon2/poseidon2.cuh"
|
||||
|
||||
extern "C" cudaError_t babybear_extension_ntt_cuda(
|
||||
const babybear::extension_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<babybear::scalar_t>& config, babybear::extension_t* output);
|
||||
|
||||
extern "C" cudaError_t babybear_create_poseidon2_constants_cuda(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
extern "C" cudaError_t babybear_poseidon2_create_cuda(
|
||||
poseidon2::Poseidon2<babybear::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
unsigned int alpha,
|
||||
unsigned int internal_rounds,
|
||||
unsigned int external_rounds,
|
||||
const babybear::scalar_t* round_constants,
|
||||
const babybear::scalar_t* internal_matrix_diag,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<babybear::scalar_t>* poseidon_constants);
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t babybear_init_poseidon2_constants_cuda(
|
||||
int width,
|
||||
extern "C" cudaError_t babybear_poseidon2_load_cuda(
|
||||
poseidon2::Poseidon2<babybear::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<babybear::scalar_t>* poseidon_constants);
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t babybear_poseidon2_hash_cuda(
|
||||
const babybear::scalar_t* input,
|
||||
extern "C" cudaError_t babybear_poseidon2_hash_many_cuda(
|
||||
const poseidon2::Poseidon2<babybear::scalar_t>* poseidon,
|
||||
const babybear::scalar_t* inputs,
|
||||
babybear::scalar_t* output,
|
||||
int number_of_states,
|
||||
int width,
|
||||
const poseidon2::Poseidon2Constants<babybear::scalar_t>& constants,
|
||||
poseidon2::Poseidon2Config& config);
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t babybear_release_poseidon2_constants_cuda(
|
||||
poseidon2::Poseidon2Constants<babybear::scalar_t>* constants,
|
||||
device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t
|
||||
babybear_poseidon2_delete_cuda(poseidon2::Poseidon2<babybear::scalar_t>* poseidon, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t babybear_build_merkle_tree(
|
||||
const babybear::scalar_t* leaves,
|
||||
babybear::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<babybear::scalar_t, babybear::scalar_t>* compression,
|
||||
const hash::Hasher<babybear::scalar_t, babybear::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t babybear_mmcs_commit_cuda(
|
||||
const matrix::Matrix<babybear::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
babybear::scalar_t* digests,
|
||||
const hash::Hasher<babybear::scalar_t, babybear::scalar_t>* hasher,
|
||||
const hash::Hasher<babybear::scalar_t, babybear::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t babybear_mul_cuda(
|
||||
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::scalar_t* result);
|
||||
@@ -72,10 +93,8 @@ extern "C" cudaError_t babybear_transpose_matrix_cuda(
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t babybear_bit_reverse_cuda(
|
||||
const babybear::scalar_t* input,
|
||||
uint64_t n,
|
||||
vec_ops::BitReverseConfig& config,
|
||||
babybear::scalar_t* output);
|
||||
const babybear::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, babybear::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" void babybear_generate_scalars(babybear::scalar_t* scalars, int size);
|
||||
|
||||
@@ -101,6 +120,9 @@ extern "C" cudaError_t babybear_extension_mul_cuda(
|
||||
extern "C" cudaError_t babybear_extension_add_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_accumulate_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_sub_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
||||
|
||||
@@ -113,4 +135,8 @@ extern "C" cudaError_t babybear_extension_transpose_matrix_cuda(
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_bit_reverse_cuda(
|
||||
const babybear::extension_t* input, uint64_t n, vec_ops::BitReverseConfig& config, babybear::extension_t* output);
|
||||
|
||||
|
||||
#endif
|
||||
@@ -9,12 +9,13 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "curves/params/bls12_377.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
extern "C" cudaError_t bls12_377_g2_precompute_msm_bases_cuda(
|
||||
bls12_377::g2_affine_t* bases,
|
||||
@@ -65,32 +66,52 @@ extern "C" cudaError_t bls12_377_affine_convert_montgomery(
|
||||
extern "C" cudaError_t bls12_377_projective_convert_montgomery(
|
||||
bls12_377::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_377_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const bls12_377::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<bls12_377::scalar_t>* poseidon_constants);
|
||||
|
||||
extern "C" cudaError_t bls12_377_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<bls12_377::scalar_t>* constants);
|
||||
|
||||
extern "C" cudaError_t bls12_377_poseidon_hash_cuda(
|
||||
bls12_377::scalar_t* input,
|
||||
bls12_377::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<bls12_377::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
|
||||
extern "C" cudaError_t bls12_377_build_poseidon_merkle_tree(
|
||||
extern "C" cudaError_t bls12_377_build_merkle_tree(
|
||||
const bls12_377::scalar_t* leaves,
|
||||
bls12_377::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<bls12_377::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<bls12_377::scalar_t, bls12_377::scalar_t>* compression,
|
||||
const hash::Hasher<bls12_377::scalar_t, bls12_377::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bls12_377_mmcs_commit_cuda(
|
||||
const matrix::Matrix<bls12_377::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
bls12_377::scalar_t* digests,
|
||||
const hash::Hasher<bls12_377::scalar_t, bls12_377::scalar_t>* hasher,
|
||||
const hash::Hasher<bls12_377::scalar_t, bls12_377::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bls12_377_poseidon_create_cuda(
|
||||
poseidon::Poseidon<bls12_377::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const bls12_377::scalar_t* round_constants,
|
||||
const bls12_377::scalar_t* mds_matrix,
|
||||
const bls12_377::scalar_t* non_sparse_matrix,
|
||||
const bls12_377::scalar_t* sparse_matrices,
|
||||
const bls12_377::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_377_poseidon_load_cuda(
|
||||
poseidon::Poseidon<bls12_377::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_377_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<bls12_377::scalar_t>* poseidon,
|
||||
const bls12_377::scalar_t* inputs,
|
||||
bls12_377::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t
|
||||
bls12_377_poseidon_delete_cuda(poseidon::Poseidon<bls12_377::scalar_t>* poseidon);
|
||||
|
||||
extern "C" cudaError_t bls12_377_mul_cuda(
|
||||
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_377::scalar_t* result);
|
||||
@@ -114,10 +135,8 @@ extern "C" cudaError_t bls12_377_transpose_matrix_cuda(
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t bls12_377_bit_reverse_cuda(
|
||||
const bls12_377::scalar_t* input,
|
||||
uint64_t n,
|
||||
vec_ops::BitReverseConfig& config,
|
||||
bls12_377::scalar_t* output);
|
||||
const bls12_377::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bls12_377::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" void bls12_377_generate_scalars(bls12_377::scalar_t* scalars, int size);
|
||||
|
||||
|
||||
@@ -9,12 +9,13 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "curves/params/bls12_381.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
extern "C" cudaError_t bls12_381_g2_precompute_msm_bases_cuda(
|
||||
bls12_381::g2_affine_t* bases,
|
||||
@@ -65,32 +66,52 @@ extern "C" cudaError_t bls12_381_affine_convert_montgomery(
|
||||
extern "C" cudaError_t bls12_381_projective_convert_montgomery(
|
||||
bls12_381::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_381_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const bls12_381::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<bls12_381::scalar_t>* poseidon_constants);
|
||||
|
||||
extern "C" cudaError_t bls12_381_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<bls12_381::scalar_t>* constants);
|
||||
|
||||
extern "C" cudaError_t bls12_381_poseidon_hash_cuda(
|
||||
bls12_381::scalar_t* input,
|
||||
bls12_381::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<bls12_381::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
|
||||
extern "C" cudaError_t bls12_381_build_poseidon_merkle_tree(
|
||||
extern "C" cudaError_t bls12_381_build_merkle_tree(
|
||||
const bls12_381::scalar_t* leaves,
|
||||
bls12_381::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<bls12_381::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<bls12_381::scalar_t, bls12_381::scalar_t>* compression,
|
||||
const hash::Hasher<bls12_381::scalar_t, bls12_381::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bls12_381_mmcs_commit_cuda(
|
||||
const matrix::Matrix<bls12_381::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
bls12_381::scalar_t* digests,
|
||||
const hash::Hasher<bls12_381::scalar_t, bls12_381::scalar_t>* hasher,
|
||||
const hash::Hasher<bls12_381::scalar_t, bls12_381::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bls12_381_poseidon_create_cuda(
|
||||
poseidon::Poseidon<bls12_381::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const bls12_381::scalar_t* round_constants,
|
||||
const bls12_381::scalar_t* mds_matrix,
|
||||
const bls12_381::scalar_t* non_sparse_matrix,
|
||||
const bls12_381::scalar_t* sparse_matrices,
|
||||
const bls12_381::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_381_poseidon_load_cuda(
|
||||
poseidon::Poseidon<bls12_381::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_381_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<bls12_381::scalar_t>* poseidon,
|
||||
const bls12_381::scalar_t* inputs,
|
||||
bls12_381::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t
|
||||
bls12_381_poseidon_delete_cuda(poseidon::Poseidon<bls12_381::scalar_t>* poseidon);
|
||||
|
||||
extern "C" cudaError_t bls12_381_mul_cuda(
|
||||
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_381::scalar_t* result);
|
||||
@@ -114,10 +135,8 @@ extern "C" cudaError_t bls12_381_transpose_matrix_cuda(
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t bls12_381_bit_reverse_cuda(
|
||||
const bls12_381::scalar_t* input,
|
||||
uint64_t n,
|
||||
vec_ops::BitReverseConfig& config,
|
||||
bls12_381::scalar_t* output);
|
||||
const bls12_381::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bls12_381::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" void bls12_381_generate_scalars(bls12_381::scalar_t* scalars, int size);
|
||||
|
||||
|
||||
@@ -9,12 +9,13 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "curves/params/bn254.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
#include "poseidon2/poseidon2.cuh"
|
||||
|
||||
extern "C" cudaError_t bn254_g2_precompute_msm_bases_cuda(
|
||||
@@ -66,63 +67,87 @@ extern "C" cudaError_t bn254_affine_convert_montgomery(
|
||||
extern "C" cudaError_t bn254_projective_convert_montgomery(
|
||||
bn254::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_create_poseidon2_constants_cuda(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
extern "C" cudaError_t bn254_poseidon2_create_cuda(
|
||||
poseidon2::Poseidon2<bn254::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
unsigned int alpha,
|
||||
unsigned int internal_rounds,
|
||||
unsigned int external_rounds,
|
||||
const bn254::scalar_t* round_constants,
|
||||
const bn254::scalar_t* internal_matrix_diag,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<bn254::scalar_t>* poseidon_constants);
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t bn254_init_poseidon2_constants_cuda(
|
||||
int width,
|
||||
extern "C" cudaError_t bn254_poseidon2_load_cuda(
|
||||
poseidon2::Poseidon2<bn254::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<bn254::scalar_t>* poseidon_constants);
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon2_hash_cuda(
|
||||
const bn254::scalar_t* input,
|
||||
extern "C" cudaError_t bn254_poseidon2_hash_many_cuda(
|
||||
const poseidon2::Poseidon2<bn254::scalar_t>* poseidon,
|
||||
const bn254::scalar_t* inputs,
|
||||
bn254::scalar_t* output,
|
||||
int number_of_states,
|
||||
int width,
|
||||
const poseidon2::Poseidon2Constants<bn254::scalar_t>& constants,
|
||||
poseidon2::Poseidon2Config& config);
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t bn254_release_poseidon2_constants_cuda(
|
||||
poseidon2::Poseidon2Constants<bn254::scalar_t>* constants,
|
||||
device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t
|
||||
bn254_poseidon2_delete_cuda(poseidon2::Poseidon2<bn254::scalar_t>* poseidon, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const bn254::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<bn254::scalar_t>* poseidon_constants);
|
||||
|
||||
extern "C" cudaError_t bn254_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<bn254::scalar_t>* constants);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon_hash_cuda(
|
||||
bn254::scalar_t* input,
|
||||
bn254::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<bn254::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
|
||||
extern "C" cudaError_t bn254_build_poseidon_merkle_tree(
|
||||
extern "C" cudaError_t bn254_build_merkle_tree(
|
||||
const bn254::scalar_t* leaves,
|
||||
bn254::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<bn254::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<bn254::scalar_t, bn254::scalar_t>* compression,
|
||||
const hash::Hasher<bn254::scalar_t, bn254::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bn254_mmcs_commit_cuda(
|
||||
const matrix::Matrix<bn254::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
bn254::scalar_t* digests,
|
||||
const hash::Hasher<bn254::scalar_t, bn254::scalar_t>* hasher,
|
||||
const hash::Hasher<bn254::scalar_t, bn254::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon_create_cuda(
|
||||
poseidon::Poseidon<bn254::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const bn254::scalar_t* round_constants,
|
||||
const bn254::scalar_t* mds_matrix,
|
||||
const bn254::scalar_t* non_sparse_matrix,
|
||||
const bn254::scalar_t* sparse_matrices,
|
||||
const bn254::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon_load_cuda(
|
||||
poseidon::Poseidon<bn254::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<bn254::scalar_t>* poseidon,
|
||||
const bn254::scalar_t* inputs,
|
||||
bn254::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t
|
||||
bn254_poseidon_delete_cuda(poseidon::Poseidon<bn254::scalar_t>* poseidon);
|
||||
|
||||
extern "C" cudaError_t bn254_mul_cuda(
|
||||
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bn254::scalar_t* result);
|
||||
@@ -146,10 +171,8 @@ extern "C" cudaError_t bn254_transpose_matrix_cuda(
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t bn254_bit_reverse_cuda(
|
||||
const bn254::scalar_t* input,
|
||||
uint64_t n,
|
||||
vec_ops::BitReverseConfig& config,
|
||||
bn254::scalar_t* output);
|
||||
const bn254::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bn254::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" void bn254_generate_scalars(bn254::scalar_t* scalars, int size);
|
||||
|
||||
|
||||
@@ -9,12 +9,13 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "curves/params/bw6_761.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
extern "C" cudaError_t bw6_761_g2_precompute_msm_bases_cuda(
|
||||
bw6_761::g2_affine_t* bases,
|
||||
@@ -65,32 +66,52 @@ extern "C" cudaError_t bw6_761_affine_convert_montgomery(
|
||||
extern "C" cudaError_t bw6_761_projective_convert_montgomery(
|
||||
bw6_761::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bw6_761_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const bw6_761::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<bw6_761::scalar_t>* poseidon_constants);
|
||||
|
||||
extern "C" cudaError_t bw6_761_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<bw6_761::scalar_t>* constants);
|
||||
|
||||
extern "C" cudaError_t bw6_761_poseidon_hash_cuda(
|
||||
bw6_761::scalar_t* input,
|
||||
bw6_761::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<bw6_761::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
|
||||
extern "C" cudaError_t bw6_761_build_poseidon_merkle_tree(
|
||||
extern "C" cudaError_t bw6_761_build_merkle_tree(
|
||||
const bw6_761::scalar_t* leaves,
|
||||
bw6_761::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<bw6_761::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<bw6_761::scalar_t, bw6_761::scalar_t>* compression,
|
||||
const hash::Hasher<bw6_761::scalar_t, bw6_761::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bw6_761_mmcs_commit_cuda(
|
||||
const matrix::Matrix<bw6_761::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
bw6_761::scalar_t* digests,
|
||||
const hash::Hasher<bw6_761::scalar_t, bw6_761::scalar_t>* hasher,
|
||||
const hash::Hasher<bw6_761::scalar_t, bw6_761::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bw6_761_poseidon_create_cuda(
|
||||
poseidon::Poseidon<bw6_761::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const bw6_761::scalar_t* round_constants,
|
||||
const bw6_761::scalar_t* mds_matrix,
|
||||
const bw6_761::scalar_t* non_sparse_matrix,
|
||||
const bw6_761::scalar_t* sparse_matrices,
|
||||
const bw6_761::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bw6_761_poseidon_load_cuda(
|
||||
poseidon::Poseidon<bw6_761::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bw6_761_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<bw6_761::scalar_t>* poseidon,
|
||||
const bw6_761::scalar_t* inputs,
|
||||
bw6_761::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t
|
||||
bw6_761_poseidon_delete_cuda(poseidon::Poseidon<bw6_761::scalar_t>* poseidon);
|
||||
|
||||
extern "C" cudaError_t bw6_761_mul_cuda(
|
||||
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bw6_761::scalar_t* result);
|
||||
@@ -114,10 +135,8 @@ extern "C" cudaError_t bw6_761_transpose_matrix_cuda(
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t bw6_761_bit_reverse_cuda(
|
||||
const bw6_761::scalar_t* input,
|
||||
uint64_t n,
|
||||
vec_ops::BitReverseConfig& config,
|
||||
bw6_761::scalar_t* output);
|
||||
const bw6_761::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bw6_761::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" void bw6_761_generate_scalars(bw6_761::scalar_t* scalars, int size);
|
||||
|
||||
|
||||
@@ -9,11 +9,12 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "curves/params/grumpkin.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
extern "C" cudaError_t grumpkin_precompute_msm_bases_cuda(
|
||||
grumpkin::affine_t* bases,
|
||||
@@ -38,32 +39,52 @@ extern "C" cudaError_t grumpkin_affine_convert_montgomery(
|
||||
extern "C" cudaError_t grumpkin_projective_convert_montgomery(
|
||||
grumpkin::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t grumpkin_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const grumpkin::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<grumpkin::scalar_t>* poseidon_constants);
|
||||
|
||||
extern "C" cudaError_t grumpkin_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<grumpkin::scalar_t>* constants);
|
||||
|
||||
extern "C" cudaError_t grumpkin_poseidon_hash_cuda(
|
||||
grumpkin::scalar_t* input,
|
||||
grumpkin::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<grumpkin::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
|
||||
extern "C" cudaError_t grumpkin_build_poseidon_merkle_tree(
|
||||
extern "C" cudaError_t grumpkin_build_merkle_tree(
|
||||
const grumpkin::scalar_t* leaves,
|
||||
grumpkin::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<grumpkin::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<grumpkin::scalar_t, grumpkin::scalar_t>* compression,
|
||||
const hash::Hasher<grumpkin::scalar_t, grumpkin::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t grumpkin_mmcs_commit_cuda(
|
||||
const matrix::Matrix<grumpkin::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
grumpkin::scalar_t* digests,
|
||||
const hash::Hasher<grumpkin::scalar_t, grumpkin::scalar_t>* hasher,
|
||||
const hash::Hasher<grumpkin::scalar_t, grumpkin::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t grumpkin_poseidon_create_cuda(
|
||||
poseidon::Poseidon<grumpkin::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const grumpkin::scalar_t* round_constants,
|
||||
const grumpkin::scalar_t* mds_matrix,
|
||||
const grumpkin::scalar_t* non_sparse_matrix,
|
||||
const grumpkin::scalar_t* sparse_matrices,
|
||||
const grumpkin::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t grumpkin_poseidon_load_cuda(
|
||||
poseidon::Poseidon<grumpkin::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t grumpkin_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<grumpkin::scalar_t>* poseidon,
|
||||
const grumpkin::scalar_t* inputs,
|
||||
grumpkin::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t
|
||||
grumpkin_poseidon_delete_cuda(poseidon::Poseidon<grumpkin::scalar_t>* poseidon);
|
||||
|
||||
extern "C" cudaError_t grumpkin_mul_cuda(
|
||||
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, grumpkin::scalar_t* result);
|
||||
@@ -87,10 +108,8 @@ extern "C" cudaError_t grumpkin_transpose_matrix_cuda(
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t grumpkin_bit_reverse_cuda(
|
||||
const grumpkin::scalar_t* input,
|
||||
uint64_t n,
|
||||
vec_ops::BitReverseConfig& config,
|
||||
grumpkin::scalar_t* output);
|
||||
const grumpkin::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, grumpkin::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" void grumpkin_generate_scalars(grumpkin::scalar_t* scalars, int size);
|
||||
|
||||
|
||||
@@ -6,11 +6,25 @@
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "hash/keccak/keccak.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
|
||||
extern "C" cudaError_t
|
||||
keccak256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, keccak::KeccakConfig& config);
|
||||
keccak256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, keccak::HashConfig& config);
|
||||
|
||||
extern "C" cudaError_t
|
||||
keccak512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, keccak::KeccakConfig& config);
|
||||
keccak512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, keccak::HashConfig& config);
|
||||
|
||||
extern "C" cudaError_t build_keccak256_merkle_tree_cuda(
|
||||
const uint8_t* leaves,
|
||||
uint64_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t build_keccak512_merkle_tree_cuda(
|
||||
const uint8_t* leaves,
|
||||
uint64_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
#endif
|
||||
94
icicle/include/api/m31.h
Normal file
94
icicle/include/api/m31.h
Normal file
@@ -0,0 +1,94 @@
|
||||
// WARNING: This file is auto-generated by a script.
|
||||
// Any changes made to this file may be overwritten.
|
||||
// Please modify the code generation script instead.
|
||||
// Path to the code generation script: scripts/gen_c_api.py
|
||||
|
||||
#pragma once
|
||||
#ifndef M31_API_H
|
||||
#define M31_API_H
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "fields/stark_fields/m31.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
|
||||
extern "C" cudaError_t m31_build_merkle_tree(
|
||||
const m31::scalar_t* leaves,
|
||||
m31::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<m31::scalar_t, m31::scalar_t>* compression,
|
||||
const hash::Hasher<m31::scalar_t, m31::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t m31_mmcs_commit_cuda(
|
||||
const matrix::Matrix<m31::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
m31::scalar_t* digests,
|
||||
const hash::Hasher<m31::scalar_t, m31::scalar_t>* hasher,
|
||||
const hash::Hasher<m31::scalar_t, m31::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t m31_mul_cuda(
|
||||
m31::scalar_t* vec_a, m31::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::scalar_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_add_cuda(
|
||||
m31::scalar_t* vec_a, m31::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::scalar_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_accumulate_cuda(
|
||||
m31::scalar_t* vec_a, m31::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t m31_sub_cuda(
|
||||
m31::scalar_t* vec_a, m31::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::scalar_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_transpose_matrix_cuda(
|
||||
const m31::scalar_t* input,
|
||||
uint32_t row_size,
|
||||
uint32_t column_size,
|
||||
m31::scalar_t* output,
|
||||
device_context::DeviceContext& ctx,
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t m31_bit_reverse_cuda(
|
||||
const m31::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, m31::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" void m31_generate_scalars(m31::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t m31_scalar_convert_montgomery(
|
||||
m31::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" void m31_extension_generate_scalars(m31::extension_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t m31_extension_scalar_convert_montgomery(
|
||||
m31::extension_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t m31_extension_mul_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_extension_add_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_extension_accumulate_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t m31_extension_sub_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_extension_transpose_matrix_cuda(
|
||||
const m31::extension_t* input,
|
||||
uint32_t row_size,
|
||||
uint32_t column_size,
|
||||
m31::extension_t* output,
|
||||
device_context::DeviceContext& ctx,
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t m31_extension_bit_reverse_cuda(
|
||||
const m31::extension_t* input, uint64_t n, vec_ops::BitReverseConfig& config, m31::extension_t* output);
|
||||
|
||||
|
||||
#endif
|
||||
@@ -9,10 +9,29 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "fields/stark_fields/stark252.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
|
||||
extern "C" cudaError_t stark252_build_merkle_tree(
|
||||
const stark252::scalar_t* leaves,
|
||||
stark252::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<stark252::scalar_t, stark252::scalar_t>* compression,
|
||||
const hash::Hasher<stark252::scalar_t, stark252::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t stark252_mmcs_commit_cuda(
|
||||
const matrix::Matrix<stark252::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
stark252::scalar_t* digests,
|
||||
const hash::Hasher<stark252::scalar_t, stark252::scalar_t>* hasher,
|
||||
const hash::Hasher<stark252::scalar_t, stark252::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t stark252_mul_cuda(
|
||||
stark252::scalar_t* vec_a, stark252::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, stark252::scalar_t* result);
|
||||
|
||||
@@ -35,10 +54,8 @@ extern "C" cudaError_t stark252_transpose_matrix_cuda(
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t stark252_bit_reverse_cuda(
|
||||
const stark252::scalar_t* input,
|
||||
uint64_t n,
|
||||
vec_ops::BitReverseConfig& config,
|
||||
stark252::scalar_t* output);
|
||||
const stark252::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, stark252::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" void stark252_generate_scalars(stark252::scalar_t* scalars, int size);
|
||||
|
||||
|
||||
@@ -1,26 +1,29 @@
|
||||
extern "C" cudaError_t ${FIELD}_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const ${FIELD}::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<${FIELD}::scalar_t>* poseidon_constants);
|
||||
extern "C" cudaError_t ${FIELD}_poseidon_create_cuda(
|
||||
poseidon::Poseidon<${FIELD}::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const ${FIELD}::scalar_t* round_constants,
|
||||
const ${FIELD}::scalar_t* mds_matrix,
|
||||
const ${FIELD}::scalar_t* non_sparse_matrix,
|
||||
const ${FIELD}::scalar_t* sparse_matrices,
|
||||
const ${FIELD}::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<${FIELD}::scalar_t>* constants);
|
||||
extern "C" cudaError_t ${FIELD}_poseidon_load_cuda(
|
||||
poseidon::Poseidon<${FIELD}::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_poseidon_hash_cuda(
|
||||
${FIELD}::scalar_t* input,
|
||||
extern "C" cudaError_t ${FIELD}_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<${FIELD}::scalar_t>* poseidon,
|
||||
const ${FIELD}::scalar_t* inputs,
|
||||
${FIELD}::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<${FIELD}::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_build_poseidon_merkle_tree(
|
||||
const ${FIELD}::scalar_t* leaves,
|
||||
${FIELD}::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<${FIELD}::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
extern "C" cudaError_t
|
||||
${FIELD}_poseidon_delete_cuda(poseidon::Poseidon<${FIELD}::scalar_t>* poseidon);
|
||||
@@ -1,30 +1,34 @@
|
||||
extern "C" cudaError_t ${FIELD}_create_poseidon2_constants_cuda(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
extern "C" cudaError_t ${FIELD}_poseidon2_create_cuda(
|
||||
poseidon2::Poseidon2<${FIELD}::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
unsigned int alpha,
|
||||
unsigned int internal_rounds,
|
||||
unsigned int external_rounds,
|
||||
const ${FIELD}::scalar_t* round_constants,
|
||||
const ${FIELD}::scalar_t* internal_matrix_diag,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<${FIELD}::scalar_t>* poseidon_constants);
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_init_poseidon2_constants_cuda(
|
||||
int width,
|
||||
extern "C" cudaError_t ${FIELD}_poseidon2_load_cuda(
|
||||
poseidon2::Poseidon2<${FIELD}::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<${FIELD}::scalar_t>* poseidon_constants);
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_poseidon2_hash_cuda(
|
||||
const ${FIELD}::scalar_t* input,
|
||||
extern "C" cudaError_t ${FIELD}_poseidon2_hash_many_cuda(
|
||||
const poseidon2::Poseidon2<${FIELD}::scalar_t>* poseidon,
|
||||
const ${FIELD}::scalar_t* inputs,
|
||||
${FIELD}::scalar_t* output,
|
||||
int number_of_states,
|
||||
int width,
|
||||
const poseidon2::Poseidon2Constants<${FIELD}::scalar_t>& constants,
|
||||
poseidon2::Poseidon2Config& config);
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_release_poseidon2_constants_cuda(
|
||||
poseidon2::Poseidon2Constants<${FIELD}::scalar_t>* constants,
|
||||
device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t
|
||||
${FIELD}_poseidon2_delete_cuda(poseidon2::Poseidon2<${FIELD}::scalar_t>* poseidon, device_context::DeviceContext& ctx);
|
||||
16
icicle/include/api/templates/fields/tree.h
Normal file
16
icicle/include/api/templates/fields/tree.h
Normal file
@@ -0,0 +1,16 @@
|
||||
extern "C" cudaError_t ${FIELD}_build_merkle_tree(
|
||||
const ${FIELD}::scalar_t* leaves,
|
||||
${FIELD}::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* compression,
|
||||
const hash::Hasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_mmcs_commit_cuda(
|
||||
const matrix::Matrix<${FIELD}::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
${FIELD}::scalar_t* digests,
|
||||
const hash::Hasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* hasher,
|
||||
const hash::Hasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
@@ -17,4 +17,7 @@ extern "C" cudaError_t ${FIELD}_transpose_matrix_cuda(
|
||||
${FIELD}::scalar_t* output,
|
||||
device_context::DeviceContext& ctx,
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_bit_reverse_cuda(
|
||||
const ${FIELD}::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, ${FIELD}::scalar_t* output);
|
||||
|
||||
@@ -17,4 +17,7 @@ extern "C" cudaError_t ${FIELD}_extension_transpose_matrix_cuda(
|
||||
${FIELD}::extension_t* output,
|
||||
device_context::DeviceContext& ctx,
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_extension_bit_reverse_cuda(
|
||||
const ${FIELD}::extension_t* input, uint64_t n, vec_ops::BitReverseConfig& config, ${FIELD}::extension_t* output);
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
|
||||
#define G2_CURVE_DEFINITIONS \
|
||||
typedef ExtensionField<fq_config> g2_point_field_t; \
|
||||
typedef ExtensionField<fq_config, point_field_t> g2_point_field_t; \
|
||||
static constexpr g2_point_field_t g2_generator_x = \
|
||||
g2_point_field_t{point_field_t{g2_gen_x_re}, point_field_t{g2_gen_x_im}}; \
|
||||
static constexpr g2_point_field_t g2_generator_y = \
|
||||
|
||||
@@ -44,7 +44,7 @@ public:
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE Field from(uint32_t value)
|
||||
{
|
||||
storage<TLC> scalar;
|
||||
storage<TLC> scalar{};
|
||||
scalar.limbs[0] = value;
|
||||
for (int i = 1; i < TLC; i++) {
|
||||
scalar.limbs[i] = 0;
|
||||
@@ -58,8 +58,10 @@ public:
|
||||
|
||||
if (logn > CONFIG::omegas_count) { THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Field: Invalid omega index"); }
|
||||
|
||||
storage_array<CONFIG::omegas_count, TLC> const omega = CONFIG::omega;
|
||||
return Field{omega.storages[logn - 1]};
|
||||
Field omega = Field{CONFIG::rou};
|
||||
for (int i = 0; i < CONFIG::omegas_count - logn; i++)
|
||||
omega = sqr(omega);
|
||||
return omega;
|
||||
}
|
||||
|
||||
static HOST_INLINE Field omega_inv(uint32_t logn)
|
||||
@@ -70,8 +72,10 @@ public:
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Field: Invalid omega_inv index");
|
||||
}
|
||||
|
||||
storage_array<CONFIG::omegas_count, TLC> const omega_inv = CONFIG::omega_inv;
|
||||
return Field{omega_inv.storages[logn - 1]};
|
||||
Field omega = inverse(Field{CONFIG::rou});
|
||||
for (int i = 0; i < CONFIG::omegas_count - logn; i++)
|
||||
omega = sqr(omega);
|
||||
return omega;
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Field inv_log_size(uint32_t logn)
|
||||
@@ -182,7 +186,7 @@ public:
|
||||
if (REDUCTION_SIZE == 0) return xs;
|
||||
const ff_wide_storage modulus = get_modulus_squared<REDUCTION_SIZE>();
|
||||
Wide rs = {};
|
||||
return sub_limbs<true>(xs.limbs_storage, modulus, rs.limbs_storage) ? xs : rs;
|
||||
return sub_limbs<2 * TLC, true>(xs.limbs_storage, modulus, rs.limbs_storage) ? xs : rs;
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
@@ -190,24 +194,24 @@ public:
|
||||
{
|
||||
const ff_wide_storage modulus = get_modulus_squared<MODULUS_MULTIPLE>();
|
||||
Wide rs = {};
|
||||
sub_limbs<false>(modulus, xs.limbs_storage, rs.limbs_storage);
|
||||
sub_limbs<2 * TLC, false>(modulus, xs.limbs_storage, rs.limbs_storage);
|
||||
return rs;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Wide operator+(Wide xs, const Wide& ys)
|
||||
{
|
||||
Wide rs = {};
|
||||
add_limbs<false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
add_limbs<2 * TLC, false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
return sub_modulus_squared<1>(rs);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Wide operator-(Wide xs, const Wide& ys)
|
||||
{
|
||||
Wide rs = {};
|
||||
uint32_t carry = sub_limbs<true>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
uint32_t carry = sub_limbs<2 * TLC, true>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
if (carry == 0) return rs;
|
||||
const ff_wide_storage modulus = get_modulus_squared<1>();
|
||||
add_limbs<false>(rs.limbs_storage, modulus, rs.limbs_storage);
|
||||
add_limbs<2 * TLC, false>(rs.limbs_storage, modulus, rs.limbs_storage);
|
||||
return rs;
|
||||
}
|
||||
};
|
||||
@@ -228,12 +232,6 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned MULTIPLIER = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ff_wide_storage modulus_wide()
|
||||
{
|
||||
return CONFIG::modulus_wide;
|
||||
}
|
||||
|
||||
// return m
|
||||
static constexpr HOST_DEVICE_INLINE ff_storage get_m() { return CONFIG::m; }
|
||||
|
||||
@@ -253,12 +251,11 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr DEVICE_INLINE uint32_t
|
||||
add_sub_u32_device(const uint32_t* x, const uint32_t* y, uint32_t* r, size_t n = (TLC >> 1))
|
||||
template <unsigned NLIMBS, bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr DEVICE_INLINE uint32_t add_sub_u32_device(const uint32_t* x, const uint32_t* y, uint32_t* r)
|
||||
{
|
||||
r[0] = SUBTRACT ? ptx::sub_cc(x[0], y[0]) : ptx::add_cc(x[0], y[0]);
|
||||
for (unsigned i = 1; i < n; i++)
|
||||
for (unsigned i = 1; i < NLIMBS; i++)
|
||||
r[i] = SUBTRACT ? ptx::subc_cc(x[i], y[i]) : ptx::addc_cc(x[i], y[i]);
|
||||
if (!CARRY_OUT) {
|
||||
ptx::addc(0, 0);
|
||||
@@ -267,71 +264,35 @@ public:
|
||||
return SUBTRACT ? ptx::subc(0, 0) : ptx::addc(0, 0);
|
||||
}
|
||||
|
||||
// add or subtract limbs
|
||||
template <bool SUBTRACT, bool CARRY_OUT>
|
||||
template <unsigned NLIMBS, bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr DEVICE_INLINE uint32_t
|
||||
add_sub_limbs_device(const ff_storage& xs, const ff_storage& ys, ff_storage& rs)
|
||||
add_sub_limbs_device(const storage<NLIMBS>& xs, const storage<NLIMBS>& ys, storage<NLIMBS>& rs)
|
||||
{
|
||||
const uint32_t* x = xs.limbs;
|
||||
const uint32_t* y = ys.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
return add_sub_u32_device<SUBTRACT, CARRY_OUT>(x, y, r, TLC);
|
||||
return add_sub_u32_device<NLIMBS, SUBTRACT, CARRY_OUT>(x, y, r);
|
||||
}
|
||||
|
||||
template <bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr DEVICE_INLINE uint32_t
|
||||
add_sub_limbs_device(const ff_wide_storage& xs, const ff_wide_storage& ys, ff_wide_storage& rs)
|
||||
{
|
||||
const uint32_t* x = xs.limbs;
|
||||
const uint32_t* y = ys.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
return add_sub_u32_device<SUBTRACT, CARRY_OUT>(x, y, r, 2 * TLC);
|
||||
}
|
||||
|
||||
template <bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr HOST_INLINE uint32_t add_sub_limbs_host(const ff_storage& xs, const ff_storage& ys, ff_storage& rs)
|
||||
{
|
||||
const uint32_t* x = xs.limbs;
|
||||
const uint32_t* y = ys.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
uint32_t carry = 0;
|
||||
host_math::carry_chain<TLC, false, CARRY_OUT> chain;
|
||||
for (unsigned i = 0; i < TLC; i++)
|
||||
r[i] = SUBTRACT ? chain.sub(x[i], y[i], carry) : chain.add(x[i], y[i], carry);
|
||||
return CARRY_OUT ? carry : 0;
|
||||
}
|
||||
|
||||
template <bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr HOST_INLINE uint32_t
|
||||
add_sub_limbs_host(const ff_wide_storage& xs, const ff_wide_storage& ys, ff_wide_storage& rs)
|
||||
{
|
||||
const uint32_t* x = xs.limbs;
|
||||
const uint32_t* y = ys.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
uint32_t carry = 0;
|
||||
host_math::carry_chain<2 * TLC, false, CARRY_OUT> chain;
|
||||
for (unsigned i = 0; i < 2 * TLC; i++)
|
||||
r[i] = SUBTRACT ? chain.sub(x[i], y[i], carry) : chain.add(x[i], y[i], carry);
|
||||
return CARRY_OUT ? carry : 0;
|
||||
}
|
||||
|
||||
template <bool CARRY_OUT, typename T>
|
||||
static constexpr HOST_DEVICE_INLINE uint32_t add_limbs(const T& xs, const T& ys, T& rs)
|
||||
template <unsigned NLIMBS, bool CARRY_OUT>
|
||||
static constexpr HOST_DEVICE_INLINE uint32_t
|
||||
add_limbs(const storage<NLIMBS>& xs, const storage<NLIMBS>& ys, storage<NLIMBS>& rs)
|
||||
{
|
||||
#ifdef __CUDA_ARCH__
|
||||
return add_sub_limbs_device<false, CARRY_OUT>(xs, ys, rs);
|
||||
return add_sub_limbs_device<NLIMBS, false, CARRY_OUT>(xs, ys, rs);
|
||||
#else
|
||||
return add_sub_limbs_host<false, CARRY_OUT>(xs, ys, rs);
|
||||
return host_math::template add_sub_limbs<NLIMBS, false, CARRY_OUT>(xs, ys, rs);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <bool CARRY_OUT, typename T>
|
||||
static constexpr HOST_DEVICE_INLINE uint32_t sub_limbs(const T& xs, const T& ys, T& rs)
|
||||
template <unsigned NLIMBS, bool CARRY_OUT>
|
||||
static constexpr HOST_DEVICE_INLINE uint32_t
|
||||
sub_limbs(const storage<NLIMBS>& xs, const storage<NLIMBS>& ys, storage<NLIMBS>& rs)
|
||||
{
|
||||
#ifdef __CUDA_ARCH__
|
||||
return add_sub_limbs_device<true, CARRY_OUT>(xs, ys, rs);
|
||||
return add_sub_limbs_device<NLIMBS, true, CARRY_OUT>(xs, ys, rs);
|
||||
#else
|
||||
return add_sub_limbs_host<true, CARRY_OUT>(xs, ys, rs);
|
||||
return host_math::template add_sub_limbs<NLIMBS, true, CARRY_OUT>(xs, ys, rs);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -531,7 +492,7 @@ public:
|
||||
// are necessarily NTT-friendly, `b[0]` often turns out to be \f$ 2^{32} - 1 \f$. This actually leads to
|
||||
// less efficient SASS generated by nvcc, so this case needed separate handling.
|
||||
if (b[0] == UINT32_MAX) {
|
||||
add_sub_u32_device<true, false>(c, a, even, TLC);
|
||||
add_sub_u32_device<TLC, true, false>(c, a, even);
|
||||
for (i = 0; i < TLC - 1; i++)
|
||||
odd[i] = a[i];
|
||||
} else {
|
||||
@@ -639,17 +600,18 @@ public:
|
||||
__align__(16) uint32_t diffs[TLC];
|
||||
// Differences of halves \f$ a_{hi} - a_{lo}; b_{lo} - b_{hi} \$f are written into `diffs`, signs written to
|
||||
// `carry1` and `carry2`.
|
||||
uint32_t carry1 = add_sub_u32_device<true, true>(&a[TLC >> 1], a, diffs);
|
||||
uint32_t carry2 = add_sub_u32_device<true, true>(b, &b[TLC >> 1], &diffs[TLC >> 1]);
|
||||
uint32_t carry1 = add_sub_u32_device<(TLC >> 1), true, true>(&a[TLC >> 1], a, diffs);
|
||||
uint32_t carry2 = add_sub_u32_device<(TLC >> 1), true, true>(b, &b[TLC >> 1], &diffs[TLC >> 1]);
|
||||
// Compute the "middle part" of Karatsuba: \f$ a_{lo} \cdot b_{hi} + b_{lo} \cdot a_{hi} \f$.
|
||||
// This is where the assumption about unset high bit of `a` and `b` is relevant.
|
||||
multiply_and_add_short_raw_device(diffs, &diffs[TLC >> 1], middle_part, r, &r[TLC]);
|
||||
// Corrections that need to be performed when differences are negative.
|
||||
// Again, carry doesn't need to be propagated due to unset high bits of `a` and `b`.
|
||||
if (carry1) add_sub_u32_device<true, false>(&middle_part[TLC >> 1], &diffs[TLC >> 1], &middle_part[TLC >> 1]);
|
||||
if (carry2) add_sub_u32_device<true, false>(&middle_part[TLC >> 1], diffs, &middle_part[TLC >> 1]);
|
||||
if (carry1)
|
||||
add_sub_u32_device<(TLC >> 1), true, false>(&middle_part[TLC >> 1], &diffs[TLC >> 1], &middle_part[TLC >> 1]);
|
||||
if (carry2) add_sub_u32_device<(TLC >> 1), true, false>(&middle_part[TLC >> 1], diffs, &middle_part[TLC >> 1]);
|
||||
// Now that middle part is fully correct, it can be added to the result.
|
||||
add_sub_u32_device<false, true>(&r[TLC >> 1], middle_part, &r[TLC >> 1], TLC);
|
||||
add_sub_u32_device<TLC, false, true>(&r[TLC >> 1], middle_part, &r[TLC >> 1]);
|
||||
|
||||
// Carry from adding middle part has to be propagated to the highest limb.
|
||||
for (size_t i = TLC + (TLC >> 1); i < 2 * TLC; i++)
|
||||
@@ -673,25 +635,12 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
static HOST_INLINE void multiply_raw_host(const ff_storage& as, const ff_storage& bs, ff_wide_storage& rs)
|
||||
{
|
||||
const uint32_t* a = as.limbs;
|
||||
const uint32_t* b = bs.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
for (unsigned i = 0; i < TLC; i++) {
|
||||
uint32_t carry = 0;
|
||||
for (unsigned j = 0; j < TLC; j++)
|
||||
r[j + i] = host_math::madc_cc(a[j], b[i], r[j + i], carry);
|
||||
r[TLC + i] = carry;
|
||||
}
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE void multiply_raw(const ff_storage& as, const ff_storage& bs, ff_wide_storage& rs)
|
||||
{
|
||||
#ifdef __CUDA_ARCH__
|
||||
return multiply_raw_device(as, bs, rs);
|
||||
#else
|
||||
return multiply_raw_host(as, bs, rs);
|
||||
return host_math::template multiply_raw<TLC>(as, bs, rs);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -702,9 +651,9 @@ public:
|
||||
return multiply_and_add_lsb_neg_modulus_raw_device(as, cs, rs);
|
||||
#else
|
||||
Wide r_wide = {};
|
||||
multiply_raw_host(as, get_neg_modulus(), r_wide.limbs_storage);
|
||||
host_math::template multiply_raw<TLC>(as, get_neg_modulus(), r_wide.limbs_storage);
|
||||
Field r = Wide::get_lower(r_wide);
|
||||
add_limbs<false>(cs, r.limbs_storage, rs);
|
||||
add_limbs<TLC, false>(cs, r.limbs_storage, rs);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -713,7 +662,7 @@ public:
|
||||
#ifdef __CUDA_ARCH__
|
||||
return multiply_msb_raw_device(as, bs, rs);
|
||||
#else
|
||||
return multiply_raw_host(as, bs, rs);
|
||||
return host_math::template multiply_raw<TLC>(as, bs, rs);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -759,7 +708,7 @@ public:
|
||||
if (REDUCTION_SIZE == 0) return xs;
|
||||
const ff_storage modulus = get_modulus<REDUCTION_SIZE>();
|
||||
Field rs = {};
|
||||
return sub_limbs<true>(xs.limbs_storage, modulus, rs.limbs_storage) ? xs : rs;
|
||||
return sub_limbs<TLC, true>(xs.limbs_storage, modulus, rs.limbs_storage) ? xs : rs;
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const Field& xs)
|
||||
@@ -778,17 +727,17 @@ public:
|
||||
friend HOST_DEVICE_INLINE Field operator+(Field xs, const Field& ys)
|
||||
{
|
||||
Field rs = {};
|
||||
add_limbs<false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
add_limbs<TLC, false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
return sub_modulus<1>(rs);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Field operator-(Field xs, const Field& ys)
|
||||
{
|
||||
Field rs = {};
|
||||
uint32_t carry = sub_limbs<true>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
uint32_t carry = sub_limbs<TLC, true>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
if (carry == 0) return rs;
|
||||
const ff_storage modulus = get_modulus<1>();
|
||||
add_limbs<false>(rs.limbs_storage, modulus, rs.limbs_storage);
|
||||
add_limbs<TLC, false>(rs.limbs_storage, modulus, rs.limbs_storage);
|
||||
return rs;
|
||||
}
|
||||
|
||||
@@ -838,15 +787,23 @@ public:
|
||||
uint32_t carry;
|
||||
// As mentioned, either 2 or 1 reduction can be performed depending on the field in question.
|
||||
if (num_of_reductions() == 2) {
|
||||
carry = sub_limbs<true>(r.limbs_storage, get_modulus<2>(), r_reduced);
|
||||
carry = sub_limbs<TLC, true>(r.limbs_storage, get_modulus<2>(), r_reduced);
|
||||
if (carry == 0) r = Field{r_reduced};
|
||||
}
|
||||
carry = sub_limbs<true>(r.limbs_storage, get_modulus<1>(), r_reduced);
|
||||
carry = sub_limbs<TLC, true>(r.limbs_storage, get_modulus<1>(), r_reduced);
|
||||
if (carry == 0) r = Field{r_reduced};
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
HOST_DEVICE_INLINE Field& operator=(Field const& other)
|
||||
{
|
||||
for (int i = 0; i < TLC; i++) {
|
||||
this->limbs_storage.limbs[i] = other.limbs_storage.limbs[i];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Field operator*(const Field& xs, const Field& ys)
|
||||
{
|
||||
Wide xy = mul_wide(xs, ys); // full mult
|
||||
@@ -933,7 +890,7 @@ public:
|
||||
{
|
||||
const ff_storage modulus = get_modulus<MODULUS_MULTIPLE>();
|
||||
Field rs = {};
|
||||
sub_limbs<false>(modulus, xs.limbs_storage, rs.limbs_storage);
|
||||
sub_limbs<TLC, false>(modulus, xs.limbs_storage, rs.limbs_storage);
|
||||
return rs;
|
||||
}
|
||||
|
||||
@@ -963,7 +920,7 @@ public:
|
||||
static constexpr HOST_DEVICE_INLINE bool lt(const Field& xs, const Field& ys)
|
||||
{
|
||||
ff_storage dummy = {};
|
||||
uint32_t carry = sub_limbs<true>(xs.limbs_storage, ys.limbs_storage, dummy);
|
||||
uint32_t carry = sub_limbs<TLC, true>(xs.limbs_storage, ys.limbs_storage, dummy);
|
||||
return carry;
|
||||
}
|
||||
|
||||
@@ -983,12 +940,12 @@ public:
|
||||
while (!(u == one) && !(v == one)) {
|
||||
while (is_even(u)) {
|
||||
u = div2(u);
|
||||
if (is_odd(b)) add_limbs<false>(b.limbs_storage, modulus, b.limbs_storage);
|
||||
if (is_odd(b)) add_limbs<TLC, false>(b.limbs_storage, modulus, b.limbs_storage);
|
||||
b = div2(b);
|
||||
}
|
||||
while (is_even(v)) {
|
||||
v = div2(v);
|
||||
if (is_odd(c)) add_limbs<false>(c.limbs_storage, modulus, c.limbs_storage);
|
||||
if (is_odd(c)) add_limbs<TLC, false>(c.limbs_storage, modulus, c.limbs_storage);
|
||||
c = div2(c);
|
||||
}
|
||||
if (lt(v, u)) {
|
||||
|
||||
@@ -33,6 +33,9 @@ namespace field_config = babybear;
|
||||
#elif FIELD_ID == STARK_252
|
||||
#include "fields/stark_fields/stark252.cuh"
|
||||
namespace field_config = stark252;
|
||||
#elif FIELD_ID == M31
|
||||
#include "fields/stark_fields/m31.cuh"
|
||||
namespace field_config = m31;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -5,25 +5,29 @@
|
||||
#include <cstdint>
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
#include "storage.cuh"
|
||||
|
||||
namespace host_math {
|
||||
|
||||
// return x + y with uint32_t operands
|
||||
static __host__ uint32_t add(const uint32_t x, const uint32_t y) { return x + y; }
|
||||
static constexpr __host__ uint32_t add(const uint32_t x, const uint32_t y) { return x + y; }
|
||||
|
||||
// return x + y + carry with uint32_t operands
|
||||
static __host__ uint32_t addc(const uint32_t x, const uint32_t y, const uint32_t carry) { return x + y + carry; }
|
||||
static constexpr __host__ uint32_t addc(const uint32_t x, const uint32_t y, const uint32_t carry)
|
||||
{
|
||||
return x + y + carry;
|
||||
}
|
||||
|
||||
// return x + y and carry out with uint32_t operands
|
||||
static __host__ uint32_t add_cc(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
static constexpr __host__ uint32_t add_cc(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
{
|
||||
uint32_t result;
|
||||
result = x + y;
|
||||
uint32_t result = x + y;
|
||||
carry = x > result;
|
||||
return result;
|
||||
}
|
||||
|
||||
// return x + y + carry and carry out with uint32_t operands
|
||||
static __host__ uint32_t addc_cc(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
static constexpr __host__ uint32_t addc_cc(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
{
|
||||
const uint32_t result = x + y + carry;
|
||||
carry = carry && x >= result || !carry && x > result;
|
||||
@@ -31,22 +35,24 @@ namespace host_math {
|
||||
}
|
||||
|
||||
// return x - y with uint32_t operands
|
||||
static __host__ uint32_t sub(const uint32_t x, const uint32_t y) { return x - y; }
|
||||
static constexpr __host__ uint32_t sub(const uint32_t x, const uint32_t y) { return x - y; }
|
||||
|
||||
// return x - y - borrow with uint32_t operands
|
||||
static __host__ uint32_t subc(const uint32_t x, const uint32_t y, const uint32_t borrow) { return x - y - borrow; }
|
||||
|
||||
// return x - y and borrow out with uint32_t operands
|
||||
static __host__ uint32_t sub_cc(const uint32_t x, const uint32_t y, uint32_t& borrow)
|
||||
// return x - y - borrow with uint32_t operands
|
||||
static constexpr __host__ uint32_t subc(const uint32_t x, const uint32_t y, const uint32_t borrow)
|
||||
{
|
||||
uint32_t result;
|
||||
result = x - y;
|
||||
return x - y - borrow;
|
||||
}
|
||||
|
||||
// return x - y and borrow out with uint32_t operands
|
||||
static constexpr __host__ uint32_t sub_cc(const uint32_t x, const uint32_t y, uint32_t& borrow)
|
||||
{
|
||||
uint32_t result = x - y;
|
||||
borrow = x < result;
|
||||
return result;
|
||||
}
|
||||
|
||||
// return x - y - borrow and borrow out with uint32_t operands
|
||||
static __host__ uint32_t subc_cc(const uint32_t x, const uint32_t y, uint32_t& borrow)
|
||||
// return x - y - borrow and borrow out with uint32_t operands
|
||||
static constexpr __host__ uint32_t subc_cc(const uint32_t x, const uint32_t y, uint32_t& borrow)
|
||||
{
|
||||
const uint32_t result = x - y - borrow;
|
||||
borrow = borrow && x <= result || !borrow && x < result;
|
||||
@@ -54,12 +60,11 @@ namespace host_math {
|
||||
}
|
||||
|
||||
// return x * y + z + carry and carry out with uint32_t operands
|
||||
static __host__ uint32_t madc_cc(const uint32_t x, const uint32_t y, const uint32_t z, uint32_t& carry)
|
||||
static constexpr __host__ uint32_t madc_cc(const uint32_t x, const uint32_t y, const uint32_t z, uint32_t& carry)
|
||||
{
|
||||
uint32_t result;
|
||||
uint64_t r = static_cast<uint64_t>(x) * y + z + carry;
|
||||
carry = (uint32_t)(r >> 32);
|
||||
result = r & 0xffffffff;
|
||||
uint32_t result = r & 0xffffffff;
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -69,7 +74,7 @@ namespace host_math {
|
||||
|
||||
constexpr HOST_INLINE carry_chain() : index(0) {}
|
||||
|
||||
HOST_INLINE uint32_t add(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
constexpr HOST_INLINE uint32_t add(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
{
|
||||
index++;
|
||||
if (index == 1 && OPS_COUNT == 1 && !CARRY_IN && !CARRY_OUT)
|
||||
@@ -82,7 +87,7 @@ namespace host_math {
|
||||
return host_math::addc(x, y, carry);
|
||||
}
|
||||
|
||||
HOST_INLINE uint32_t sub(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
constexpr HOST_INLINE uint32_t sub(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
{
|
||||
index++;
|
||||
if (index == 1 && OPS_COUNT == 1 && !CARRY_IN && !CARRY_OUT)
|
||||
@@ -95,6 +100,89 @@ namespace host_math {
|
||||
return host_math::subc(x, y, carry);
|
||||
}
|
||||
};
|
||||
|
||||
template <unsigned NLIMBS_A, unsigned NLIMBS_B = NLIMBS_A>
|
||||
static constexpr HOST_INLINE void
|
||||
multiply_raw(const storage<NLIMBS_A>& as, const storage<NLIMBS_B>& bs, storage<NLIMBS_A + NLIMBS_B>& rs)
|
||||
{
|
||||
const uint32_t* a = as.limbs;
|
||||
const uint32_t* b = bs.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
for (unsigned i = 0; i < NLIMBS_B; i++) {
|
||||
uint32_t carry = 0;
|
||||
for (unsigned j = 0; j < NLIMBS_A; j++)
|
||||
r[j + i] = host_math::madc_cc(a[j], b[i], r[j + i], carry);
|
||||
r[NLIMBS_A + i] = carry;
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS, bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr HOST_INLINE uint32_t
|
||||
add_sub_limbs(const storage<NLIMBS>& xs, const storage<NLIMBS>& ys, storage<NLIMBS>& rs)
|
||||
{
|
||||
const uint32_t* x = xs.limbs;
|
||||
const uint32_t* y = ys.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
uint32_t carry = 0;
|
||||
carry_chain<NLIMBS, false, CARRY_OUT> chain;
|
||||
for (unsigned i = 0; i < NLIMBS; i++)
|
||||
r[i] = SUBTRACT ? chain.sub(x[i], y[i], carry) : chain.add(x[i], y[i], carry);
|
||||
return CARRY_OUT ? carry : 0;
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS, unsigned BITS>
|
||||
static constexpr HOST_INLINE storage<NLIMBS> left_shift(const storage<NLIMBS>& xs)
|
||||
{
|
||||
if constexpr (BITS == 0)
|
||||
return xs;
|
||||
else {
|
||||
constexpr unsigned BITS32 = BITS % 32;
|
||||
constexpr unsigned LIMBS_GAP = BITS / 32;
|
||||
storage<NLIMBS> out{};
|
||||
if constexpr (LIMBS_GAP < NLIMBS) {
|
||||
out.limbs[LIMBS_GAP] = xs.limbs[0] << BITS32;
|
||||
for (unsigned i = 1; i < NLIMBS - LIMBS_GAP; i++)
|
||||
out.limbs[i + LIMBS_GAP] = (xs.limbs[i] << BITS32) + (xs.limbs[i - 1] >> (32 - BITS32));
|
||||
}
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS, unsigned BITS>
|
||||
static constexpr HOST_INLINE storage<NLIMBS> right_shift(const storage<NLIMBS>& xs)
|
||||
{
|
||||
if constexpr (BITS == 0)
|
||||
return xs;
|
||||
else {
|
||||
constexpr unsigned BITS32 = BITS % 32;
|
||||
constexpr unsigned LIMBS_GAP = BITS / 32;
|
||||
storage<NLIMBS> out{};
|
||||
if constexpr (LIMBS_GAP < NLIMBS - 1) {
|
||||
for (unsigned i = 0; i < NLIMBS - LIMBS_GAP - 1; i++)
|
||||
out.limbs[i] = (xs.limbs[i + LIMBS_GAP] >> BITS32) + (xs.limbs[i + LIMBS_GAP + 1] << (32 - BITS32));
|
||||
}
|
||||
if constexpr (LIMBS_GAP < NLIMBS) out.limbs[NLIMBS - LIMBS_GAP - 1] = (xs.limbs[NLIMBS - 1] >> BITS32);
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS_NUM, unsigned NLIMBS_DENOM, unsigned NLIMBS_Q = (NLIMBS_NUM - NLIMBS_DENOM)>
|
||||
static constexpr HOST_INLINE void integer_division(
|
||||
const storage<NLIMBS_NUM>& num, const storage<NLIMBS_DENOM>& denom, storage<NLIMBS_Q>& q, storage<NLIMBS_DENOM>& r)
|
||||
{
|
||||
storage<NLIMBS_DENOM> temp = {};
|
||||
for (int limb_idx = NLIMBS_NUM - 1; limb_idx >= 0; limb_idx--) {
|
||||
for (int bit_idx = 31; bit_idx >= 0; bit_idx--) {
|
||||
r = left_shift<NLIMBS_DENOM, 1>(r);
|
||||
r.limbs[0] |= ((num.limbs[limb_idx] >> bit_idx) & 1);
|
||||
uint32_t c = add_sub_limbs<NLIMBS_DENOM, true, true>(r, denom, temp);
|
||||
if (limb_idx < NLIMBS_Q & !c) {
|
||||
r = temp;
|
||||
q.limbs[limb_idx] |= 1 << bit_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace host_math
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@@ -10,5 +10,6 @@
|
||||
|
||||
#define BABY_BEAR 1001
|
||||
#define STARK_252 1002
|
||||
#define M31 1003
|
||||
|
||||
#endif
|
||||
129
icicle/include/fields/params_gen.cuh
Normal file
129
icicle/include/fields/params_gen.cuh
Normal file
@@ -0,0 +1,129 @@
|
||||
#pragma once
|
||||
#ifndef PARAMS_GEN_H
|
||||
#define PARAMS_GEN_H
|
||||
|
||||
#include "storage.cuh"
|
||||
#include "host_math.cuh"
|
||||
|
||||
namespace params_gen {
|
||||
template <unsigned NLIMBS, unsigned BIT_SHIFT>
|
||||
static constexpr HOST_INLINE storage<2 * NLIMBS> get_square(const storage<NLIMBS>& xs)
|
||||
{
|
||||
storage<2 * NLIMBS> rs = {};
|
||||
host_math::template multiply_raw<NLIMBS>(xs, xs, rs);
|
||||
return host_math::template left_shift<2 * NLIMBS, BIT_SHIFT>(rs);
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS>
|
||||
static constexpr HOST_INLINE storage<NLIMBS>
|
||||
get_difference_no_carry(const storage<NLIMBS>& xs, const storage<NLIMBS>& ys)
|
||||
{
|
||||
storage<NLIMBS> rs = {};
|
||||
host_math::template add_sub_limbs<NLIMBS, true, false>(xs, ys, rs);
|
||||
return rs;
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS, unsigned EXP>
|
||||
static constexpr HOST_INLINE storage<NLIMBS> get_m(const storage<NLIMBS>& modulus)
|
||||
{
|
||||
storage<NLIMBS> rs = {};
|
||||
storage<NLIMBS> qs = {};
|
||||
storage<2 * NLIMBS> wide_one = {1};
|
||||
storage<2 * NLIMBS> pow_of_2 = host_math::template left_shift<2 * NLIMBS, EXP>(wide_one);
|
||||
host_math::template integer_division<2 * NLIMBS, NLIMBS>(pow_of_2, modulus, qs, rs);
|
||||
return qs;
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS, bool INV>
|
||||
static constexpr HOST_INLINE storage<NLIMBS> get_montgomery_constant(const storage<NLIMBS>& modulus)
|
||||
{
|
||||
storage<NLIMBS> rs = {1};
|
||||
for (int i = 0; i < 32 * NLIMBS; i++) {
|
||||
if (INV) {
|
||||
if (rs.limbs[0] & 1) host_math::template add_sub_limbs<NLIMBS, false, false>(rs, modulus, rs);
|
||||
rs = host_math::template right_shift<NLIMBS, 1>(rs);
|
||||
} else {
|
||||
rs = host_math::template left_shift<NLIMBS, 1>(rs);
|
||||
storage<NLIMBS> temp = {};
|
||||
rs = host_math::template add_sub_limbs<NLIMBS, true, true>(rs, modulus, temp) ? rs : temp;
|
||||
}
|
||||
}
|
||||
return rs;
|
||||
}
|
||||
|
||||
constexpr unsigned floorlog2(uint32_t x) { return x == 1 ? 0 : 1 + floorlog2(x >> 1); }
|
||||
|
||||
template <unsigned NLIMBS, unsigned NBITS>
|
||||
constexpr unsigned num_of_reductions(const storage<NLIMBS>& modulus, const storage<NLIMBS>& m)
|
||||
{
|
||||
storage<2 * NLIMBS> x1 = {};
|
||||
storage<3 * NLIMBS> x2 = {};
|
||||
storage<3 * NLIMBS> x3 = {};
|
||||
host_math::template multiply_raw<NLIMBS>(modulus, m, x1);
|
||||
host_math::template multiply_raw<NLIMBS, 2 * NLIMBS>(modulus, x1, x2);
|
||||
storage<2 * NLIMBS> one = {1};
|
||||
storage<2 * NLIMBS> pow_of_2 = host_math::template left_shift<2 * NLIMBS, NBITS>(one);
|
||||
host_math::template multiply_raw<NLIMBS, 2 * NLIMBS>(modulus, pow_of_2, x3);
|
||||
host_math::template add_sub_limbs<3 * NLIMBS, true, false>(x3, x2, x2);
|
||||
double err = (double)x2.limbs[2 * NLIMBS - 1] / pow_of_2.limbs[2 * NLIMBS - 1];
|
||||
err += (double)m.limbs[NLIMBS - 1] / 0xffffffff;
|
||||
err += (double)NLIMBS / 0x80000000;
|
||||
return unsigned(err) + 1;
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS>
|
||||
constexpr unsigned two_adicity(const storage<NLIMBS>& modulus)
|
||||
{
|
||||
unsigned two_adicity = 1;
|
||||
storage<NLIMBS> temp = host_math::template right_shift<NLIMBS, 1>(modulus);
|
||||
while (!(temp.limbs[0] & 1)) {
|
||||
temp = host_math::template right_shift<NLIMBS, 1>(temp);
|
||||
two_adicity++;
|
||||
}
|
||||
return two_adicity;
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS, unsigned TWO_ADICITY>
|
||||
constexpr storage_array<TWO_ADICITY, NLIMBS> get_invs(const storage<NLIMBS>& modulus)
|
||||
{
|
||||
storage_array<TWO_ADICITY, NLIMBS> invs = {};
|
||||
storage<NLIMBS> rs = {1};
|
||||
for (int i = 0; i < TWO_ADICITY; i++) {
|
||||
if (rs.limbs[0] & 1) host_math::template add_sub_limbs<NLIMBS, false, false>(rs, modulus, rs);
|
||||
rs = host_math::template right_shift<NLIMBS, 1>(rs);
|
||||
invs.storages[i] = rs;
|
||||
}
|
||||
return invs;
|
||||
}
|
||||
} // namespace params_gen
|
||||
|
||||
#define PARAMS(modulus) \
|
||||
static constexpr unsigned limbs_count = modulus.LC; \
|
||||
static constexpr unsigned modulus_bit_count = \
|
||||
32 * (limbs_count - 1) + params_gen::floorlog2(modulus.limbs[limbs_count - 1]) + 1; \
|
||||
static constexpr storage<limbs_count> zero = {}; \
|
||||
static constexpr storage<limbs_count> one = {1}; \
|
||||
static constexpr storage<limbs_count> modulus_2 = host_math::template left_shift<limbs_count, 1>(modulus); \
|
||||
static constexpr storage<limbs_count> modulus_4 = host_math::template left_shift<limbs_count, 1>(modulus_2); \
|
||||
static constexpr storage<limbs_count> neg_modulus = \
|
||||
params_gen::template get_difference_no_carry<limbs_count>(zero, modulus); \
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = \
|
||||
params_gen::template get_square<limbs_count, 0>(modulus); \
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = \
|
||||
host_math::template left_shift<2 * limbs_count, 1>(modulus_squared); \
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = \
|
||||
host_math::template left_shift<2 * limbs_count, 1>(modulus_squared_2); \
|
||||
static constexpr storage<limbs_count> m = params_gen::template get_m<limbs_count, 2 * modulus_bit_count>(modulus); \
|
||||
static constexpr storage<limbs_count> montgomery_r = \
|
||||
params_gen::template get_montgomery_constant<limbs_count, false>(modulus); \
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = \
|
||||
params_gen::template get_montgomery_constant<limbs_count, true>(modulus); \
|
||||
static constexpr unsigned num_of_reductions = \
|
||||
params_gen::template num_of_reductions<limbs_count, 2 * modulus_bit_count>(modulus, m);
|
||||
|
||||
#define TWIDDLES(modulus, rou) \
|
||||
static constexpr unsigned omegas_count = params_gen::template two_adicity<limbs_count>(modulus); \
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = \
|
||||
params_gen::template get_invs<limbs_count, omegas_count>(modulus);
|
||||
|
||||
#endif
|
||||
@@ -4,13 +4,13 @@
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
#include "gpu-utils/sharedmem.cuh"
|
||||
|
||||
template <typename CONFIG>
|
||||
template <typename CONFIG, class T>
|
||||
class ExtensionField
|
||||
{
|
||||
private:
|
||||
friend Field<CONFIG>;
|
||||
friend T;
|
||||
|
||||
typedef typename Field<CONFIG>::Wide FWide;
|
||||
typedef typename T::Wide FWide;
|
||||
|
||||
struct ExtensionWide {
|
||||
FWide real;
|
||||
@@ -28,7 +28,7 @@ private:
|
||||
};
|
||||
|
||||
public:
|
||||
typedef Field<CONFIG> FF;
|
||||
typedef T FF;
|
||||
static constexpr unsigned TLC = 2 * CONFIG::limbs_count;
|
||||
|
||||
FF real;
|
||||
@@ -196,11 +196,11 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
template <class CONFIG>
|
||||
struct SharedMemory<ExtensionField<CONFIG>> {
|
||||
__device__ ExtensionField<CONFIG>* getPointer()
|
||||
template <typename CONFIG, class T>
|
||||
struct SharedMemory<ExtensionField<CONFIG, T>> {
|
||||
__device__ ExtensionField<CONFIG, T>* getPointer()
|
||||
{
|
||||
extern __shared__ ExtensionField<CONFIG> s_ext2_scalar_[];
|
||||
extern __shared__ ExtensionField<CONFIG, T> s_ext2_scalar_[];
|
||||
return s_ext2_scalar_;
|
||||
}
|
||||
};
|
||||
@@ -4,11 +4,11 @@
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
#include "gpu-utils/sharedmem.cuh"
|
||||
|
||||
template <typename CONFIG>
|
||||
template <typename CONFIG, class T>
|
||||
class ExtensionField
|
||||
{
|
||||
private:
|
||||
typedef typename Field<CONFIG>::Wide FWide;
|
||||
typedef typename T::Wide FWide;
|
||||
|
||||
struct ExtensionWide {
|
||||
FWide real;
|
||||
@@ -28,7 +28,7 @@ private:
|
||||
};
|
||||
|
||||
public:
|
||||
typedef Field<CONFIG> FF;
|
||||
typedef T FF;
|
||||
static constexpr unsigned TLC = 4 * CONFIG::limbs_count;
|
||||
|
||||
FF real;
|
||||
@@ -49,15 +49,14 @@ public:
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField to_montgomery(const ExtensionField& xs)
|
||||
{
|
||||
return ExtensionField{
|
||||
xs.real * FF{CONFIG::montgomery_r}, xs.im1 * FF{CONFIG::montgomery_r}, xs.im2 * FF{CONFIG::montgomery_r},
|
||||
xs.im3 * FF{CONFIG::montgomery_r}};
|
||||
FF::to_montgomery(xs.real), FF::to_montgomery(xs.im1), FF::to_montgomery(xs.im2), FF::to_montgomery(xs.im3)};
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField from_montgomery(const ExtensionField& xs)
|
||||
{
|
||||
return ExtensionField{
|
||||
xs.real * FF{CONFIG::montgomery_r_inv}, xs.im1 * FF{CONFIG::montgomery_r_inv},
|
||||
xs.im2 * FF{CONFIG::montgomery_r_inv}, xs.im3 * FF{CONFIG::montgomery_r_inv}};
|
||||
FF::from_montgomery(xs.real), FF::from_montgomery(xs.im1), FF::from_montgomery(xs.im2),
|
||||
FF::from_montgomery(xs.im3)};
|
||||
}
|
||||
|
||||
static HOST_INLINE ExtensionField rand_host()
|
||||
@@ -247,11 +246,11 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
template <class CONFIG>
|
||||
struct SharedMemory<ExtensionField<CONFIG>> {
|
||||
__device__ ExtensionField<CONFIG>* getPointer()
|
||||
template <class CONFIG, class T>
|
||||
struct SharedMemory<ExtensionField<CONFIG, T>> {
|
||||
__device__ ExtensionField<CONFIG, T>* getPointer()
|
||||
{
|
||||
extern __shared__ ExtensionField<CONFIG> s_ext4_scalar_[];
|
||||
extern __shared__ ExtensionField<CONFIG, T> s_ext4_scalar_[];
|
||||
return s_ext4_scalar_;
|
||||
}
|
||||
};
|
||||
@@ -3,337 +3,17 @@
|
||||
#define BLS12_377_BASE_PARAMS_H
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bls12_377 {
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = 12;
|
||||
static constexpr unsigned omegas_count = 48;
|
||||
static constexpr unsigned modulus_bit_count = 377;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44,
|
||||
0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3,
|
||||
0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x0a118000, 0x60000001, 0x2e16ba88,
|
||||
0x74129000, 0x3de6c45f, 0x01ea271e, 0x3445b3e6,
|
||||
0xd9429276, 0x8c760b80, 0x2f8a21d5, 0x035c748c};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x14230000, 0xc0000002, 0x5c2d7510,
|
||||
0xe8252000, 0x7bcd88be, 0x03d44e3c, 0x688b67cc,
|
||||
0xb28524ec, 0x18ec1701, 0x5f1443ab, 0x06b8e918};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0xffffffff, 0x7af73fff, 0xcfffffff, 0xe8f4a2bb,
|
||||
0x45f6b7ff, 0xe10c9dd0, 0xff0aec70, 0xe5dd260c,
|
||||
0x935eb6c4, 0x39c4fa3f, 0xe83aef15, 0xfe51c5b9};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3,
|
||||
0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x00000001, 0x0a118000, 0xf0000001, 0x7338d254, 0x2e1bd800, 0x4ada268f, 0x35f1c09a, 0x6bcbfbd2,
|
||||
0x58638c9d, 0x318324b9, 0x8bb70ae0, 0x460aaaaa, 0x502a4d6c, 0xc014e712, 0xb90660cd, 0x09d018af,
|
||||
0x3dda4d5c, 0x1f5e7141, 0xa4aee93f, 0x4bb8b87d, 0xb361263c, 0x2256913b, 0xd0bbaffb, 0x0002d307};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x00000002, 0x14230000, 0xe0000002, 0xe671a4a9, 0x5c37b000, 0x95b44d1e, 0x6be38134, 0xd797f7a4,
|
||||
0xb0c7193a, 0x63064972, 0x176e15c0, 0x8c155555, 0xa0549ad8, 0x8029ce24, 0x720cc19b, 0x13a0315f,
|
||||
0x7bb49ab8, 0x3ebce282, 0x495dd27e, 0x977170fb, 0x66c24c78, 0x44ad2277, 0xa1775ff6, 0x0005a60f};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x00000004, 0x28460000, 0xc0000004, 0xcce34953, 0xb86f6001, 0x2b689a3c, 0xd7c70269, 0xaf2fef48,
|
||||
0x618e3275, 0xc60c92e5, 0x2edc2b80, 0x182aaaaa, 0x40a935b1, 0x00539c49, 0xe4198337, 0x274062be,
|
||||
0xf7693570, 0x7d79c504, 0x92bba4fc, 0x2ee2e1f6, 0xcd8498f1, 0x895a44ee, 0x42eebfec, 0x000b4c1f};
|
||||
static constexpr storage<limbs_count> m = {0x5e4daffc, 0x1f9fd58c, 0x89c42a59, 0xd0ed6877, 0xd85a6d02, 0x6af2d488,
|
||||
0x6776b1a0, 0x3bbad0de, 0x582ef4f7, 0x976c3ca0, 0x0cc4060e, 0x0261508d};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xffffff68, 0x02cdffff, 0x7fffffb1, 0x51409f83,
|
||||
0x8a7d3ff2, 0x9f7db3a9, 0x6e7c6305, 0x7b4e97b7,
|
||||
0x803c84e8, 0x4cf495bf, 0xe2fdf49a, 0x008d6661};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x451269e8, 0xef129093, 0xe65839f5, 0x6e20bbcd,
|
||||
0xa5582c93, 0x852e3c88, 0xf7f2e657, 0xeeaaf41d,
|
||||
0xa4c49351, 0xeb89746c, 0x436b0736, 0x014212fc};
|
||||
static constexpr storage<12> modulus = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f,
|
||||
0x00f5138f, 0x1a22d9f3, 0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46};
|
||||
PARAMS(modulus)
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0x00000000, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b,
|
||||
0xc63b05c0, 0x17c510ea, 0x01ae3a46},
|
||||
{0xf1391c63, 0x6e76d5ec, 0xbff27d8e, 0x99588459, 0x436b0f62, 0xbce649cf, 0x0ad1dec1, 0x400398f5, 0x1a79beb1,
|
||||
0xc0c534db, 0x796537ca, 0x01680a40},
|
||||
{0x554c85ba, 0x6cbff0e3, 0x0be8ff9d, 0xc07c7a91, 0x9dde4fa2, 0xc3c79f67, 0xb5726bde, 0x44bc6d1a, 0x76d6d607,
|
||||
0xad812919, 0x95e8fd0e, 0x001bc0c2},
|
||||
{0x6d5db237, 0xb8c206b0, 0xcabde6ba, 0x08fed85d, 0xcd92eb6f, 0xf2f54ffc, 0xe39c1788, 0xee81121f, 0x88e82edb,
|
||||
0x852def4d, 0xb95fdb80, 0x00bf1268},
|
||||
{0x192bf14f, 0x3663c26a, 0xe6351854, 0x99c859be, 0x159361b8, 0xf9430828, 0xfbe33d7d, 0x478ed715, 0xdb79c984,
|
||||
0x41e220cf, 0xd961f2be, 0x00cedb38},
|
||||
{0xcc724685, 0xb99caa69, 0x1388a46d, 0xc24087ba, 0x08f03491, 0xeb13a05a, 0x98fb0ff7, 0x558ab21e, 0x86bbd802,
|
||||
0x0166d08d, 0xf5b5728a, 0x00d1dec9},
|
||||
{0x92db32a2, 0x2e3951fe, 0x6014b201, 0x8f5a16c9, 0xa91fbb38, 0xa9e942b9, 0x17b4dbd2, 0xf7bf5b43, 0x81325c7d,
|
||||
0x57f3934a, 0x615ad019, 0x012be78e},
|
||||
{0xdce33f04, 0xb42b84a2, 0x0db0b91c, 0x7a0c1423, 0x88d9f8c8, 0xaed11a0c, 0xd484c501, 0x712d6bc0, 0xfa3f7633,
|
||||
0x50aca1e5, 0xb90f34d0, 0x01002f29},
|
||||
{0xf012f6a0, 0xbc3db054, 0x0d332ea7, 0x00d66897, 0xfd416167, 0x8278ef44, 0x20268e84, 0x1a1a3c4d, 0x4b11d215,
|
||||
0x7c976aa6, 0x63b6e925, 0x00949581},
|
||||
{0x339637c6, 0x9d73cf29, 0xa5642677, 0x8257d1a2, 0xcafd597c, 0xcb48f07f, 0x081435a3, 0x7a505010, 0xacbb9c39,
|
||||
0xaaa45ce1, 0x7431b9c8, 0x013f2b13},
|
||||
{0xd4710c0b, 0x9ef8bddb, 0x85047671, 0xb4c73188, 0x134695ba, 0x87a51d65, 0x022416dd, 0x67f3bc43, 0xcb2a157b,
|
||||
0x21d965b2, 0x5ce4195d, 0x013a57e4},
|
||||
{0xd2461368, 0xf2db3a9f, 0x3802aef2, 0x0595c232, 0x5ea85bd6, 0xa53d621a, 0xa34ee943, 0xce930fbc, 0x6b372bee,
|
||||
0x1d216665, 0xa4535740, 0x009f0159},
|
||||
{0x656bf68d, 0x73cf953a, 0xeac5c1d7, 0x50a5a5b5, 0xaa5355a9, 0x2697b2e1, 0x08de37d2, 0x6be70306, 0x44c5afab,
|
||||
0x907f6976, 0xd4ec46b1, 0x0155cfa2},
|
||||
{0x090e3e20, 0x034160c4, 0xf77a6fbb, 0xbc73cc59, 0x188e54f6, 0x437cd23b, 0x17e42614, 0x5a788edd, 0xebdc8eae,
|
||||
0xf1ad4f54, 0x2f129bcd, 0x005d1440},
|
||||
{0x4e269ee5, 0x5626c031, 0x0d1501ec, 0x5f97673e, 0x86d31c18, 0x4fe089bd, 0x62d1259a, 0x3e9fffcb, 0x1ff89d01,
|
||||
0xe1898f32, 0x59d01a38, 0x00fa1331},
|
||||
{0x38d427b1, 0xda80661b, 0xa814f14b, 0x1913027d, 0xcda4061d, 0xd3f61e24, 0x5da8fcb2, 0x9509e69d, 0x1f05e6d3,
|
||||
0x0e7493a5, 0xa5c6bd06, 0x00dcb8db},
|
||||
{0x61cff9ed, 0x88499d0a, 0x53718444, 0x0b317da2, 0x4b7eec5f, 0xc1624bfd, 0x5af10e6f, 0x6ffc3241, 0xd6c66ff2,
|
||||
0x27d0edf3, 0x73ab0f4a, 0x013019b5},
|
||||
{0x06027b24, 0x42dc7673, 0x3341b9e7, 0x018f8bbd, 0xa435f7e2, 0xd3b389d9, 0xea031176, 0x279739a5, 0x74c35801,
|
||||
0x3555ca51, 0x049dcf87, 0x00748c30},
|
||||
{0x81fe14de, 0x731b16f0, 0x333cc61a, 0x528d6ada, 0x5736dc15, 0x7ae87278, 0xc8bfd40c, 0xa94b9fd2, 0x299b0487,
|
||||
0x714dd8ed, 0xf1a53233, 0x00642b62},
|
||||
{0x5bc45170, 0x31270ddf, 0x7f72c758, 0x7efb6b06, 0xcf4973a8, 0x2eb9f2aa, 0xe556d234, 0xdcb534c9, 0x0e043fef,
|
||||
0xf0b1a210, 0x54dda04e, 0x00e79c44},
|
||||
{0x2d5f1bc2, 0x213b3f52, 0xfd933428, 0x9e115ba7, 0x434c9e2a, 0x7f77d57e, 0xcdb944ef, 0x47a78418, 0x699aa559,
|
||||
0x8cb01cbb, 0xb064c4d7, 0x0075bf81},
|
||||
{0x3fbfc66c, 0x0b6c2e65, 0x6fcab2f8, 0x7bece031, 0xb79dcd4d, 0x2ba7e325, 0xa5c6881b, 0x8c18f66a, 0x7283805a,
|
||||
0x4d893e5a, 0xfc296bfe, 0x0107d3c5},
|
||||
{0x948c881a, 0x53fbdbb4, 0x16803d18, 0xf27a9c14, 0xeddfafef, 0x8490f6c5, 0x3e57fa15, 0xfe068e1d, 0xd26b296b,
|
||||
0xbe923119, 0x9fa377a1, 0x00d56016},
|
||||
{0x6f5b2ad1, 0xb3bbaeb3, 0x11886a1c, 0x0efd4ba9, 0xdedb7083, 0x5911498f, 0x5bd0a90f, 0x0921fe19, 0x83d379cb,
|
||||
0x38e05d4e, 0xb7ba3c73, 0x006b39e2},
|
||||
{0xa55550ba, 0x61b560e4, 0xe7288461, 0xd9ac545b, 0xc6e3e282, 0xde8d2826, 0x7e49dd2c, 0x9e87a310, 0xc43080b7,
|
||||
0xf2edfc44, 0x95b7d300, 0x012b4875},
|
||||
{0x27591e60, 0x4048ddc3, 0xc5d21791, 0xb77c9738, 0x49826bea, 0xf2f82033, 0x42f97e95, 0xf60bb703, 0x5966139d,
|
||||
0xef8f6f16, 0xc0e95e39, 0x00327618},
|
||||
{0x441e395f, 0xf9059c8f, 0xbd087238, 0x29eab35f, 0x7dee5ff1, 0x5d4abeff, 0x771e60e9, 0x7222499b, 0x7ac324a2,
|
||||
0xb70c1ea3, 0x0da51ce8, 0x015b3af9},
|
||||
{0xe9a70026, 0xf7aa576b, 0x01c4a126, 0xb28733ef, 0xa3307647, 0x06b8e768, 0xe12588ce, 0x115500e1, 0x6c9f9b1d,
|
||||
0x7e8dd6b9, 0x6ec020b3, 0x014d091e},
|
||||
{0x8e5bbc8d, 0xd318265d, 0x141bee9b, 0x70b460ba, 0x1aa9df5b, 0x145dd6a6, 0xe3478cb3, 0xd9da2548, 0x7b509387,
|
||||
0x47250509, 0xe967973c, 0x00de53d3},
|
||||
{0xd2aa57b8, 0x5ff4399c, 0xa6ae9b07, 0x90360194, 0x6cfcdb7a, 0x68979991, 0x64e56abb, 0xf517467c, 0xad7a6573,
|
||||
0x44227491, 0xa35ebf55, 0x0001da0b},
|
||||
{0x4d80f6da, 0xd8b22d5a, 0x10ee1a06, 0x6e7b2bfb, 0x17faeac0, 0xac8d97e5, 0x7a12c923, 0x8b75540b, 0x5b42ce02,
|
||||
0xa2787368, 0xe98d9998, 0x008d30a5},
|
||||
{0x9dc292bb, 0xee29c02a, 0xc5b7e1c9, 0x9e7ea016, 0x9a908e5f, 0x62daf95d, 0x3e98eae9, 0x80a71c61, 0xfdda3bba,
|
||||
0x2d514723, 0x068ef829, 0x00f65844},
|
||||
{0x185b1ad6, 0xf62fdfa4, 0xf90ccbe6, 0x2ae7f104, 0x972ce78e, 0xfa435fb6, 0x45e59f91, 0x53a75d3c, 0x2f320b7a,
|
||||
0x7290cac2, 0xe7cb5108, 0x01a2022a},
|
||||
{0xd59dda24, 0xcf0a15be, 0xf2ec72b4, 0xbc77f6d4, 0x96c31202, 0xa8df0caf, 0xbb4f8842, 0xb95429c0, 0xd0087306,
|
||||
0xb989b210, 0x5571e9f0, 0x002b1694},
|
||||
{0x67ae536e, 0x7e84d4b5, 0xc8fb9b80, 0x3a920871, 0x1948ee86, 0x1a82df2b, 0xb3c66ed3, 0xdef79467, 0xef64d05a,
|
||||
0x58fd84f2, 0xd999f400, 0x00c6d5b7},
|
||||
{0x81ee0d53, 0x7639f9a2, 0xb5747565, 0x8ade807d, 0xe6235609, 0xfd9d6266, 0x53730f18, 0xea1948a3, 0xd890142e,
|
||||
0xa356108a, 0xe3e8a723, 0x00a48ac6},
|
||||
{0xd0ca5e04, 0x531c4b83, 0x2ba0a328, 0xff35ced6, 0xa4e563aa, 0x01613079, 0x1442dcd1, 0x6f52b3a3, 0x9e19b0a6,
|
||||
0x813b4616, 0x9536db26, 0x004828c5},
|
||||
{0x0bce1b4e, 0x8a9321a9, 0xae85d6ff, 0xb9759dbe, 0x5cb206e0, 0x1ce1d522, 0x35a1607a, 0x87df044f, 0x94e1329a,
|
||||
0x2ebabee7, 0x73586cc9, 0x01a73170},
|
||||
{0x3dd667f3, 0x69824754, 0x28fd63a2, 0x61a081a7, 0x99499385, 0x0b9f6d2e, 0x5c253e16, 0x6d45622b, 0x765a7f5f,
|
||||
0xcd672e4d, 0x7150d847, 0x01182798},
|
||||
{0x2742d2f6, 0x0af0bfd2, 0x3a02631d, 0x93616956, 0xac8a2203, 0x32dae751, 0x85cf4e2d, 0xea4ffbe7, 0x7dba6eb9,
|
||||
0x673424f4, 0x61f4060d, 0x002ec230},
|
||||
{0x5a5b5c2b, 0x226293ca, 0x0684dbc9, 0xbc0ca23e, 0x7d637c4f, 0x4510cf3a, 0x9b2f4a52, 0x7869c488, 0x2fd73a53,
|
||||
0xec009b90, 0xa8c99cca, 0x003499d6},
|
||||
{0xfd745afc, 0x9da60b0a, 0x41c5362e, 0xff0769ec, 0xfa9fd8ee, 0x487621e9, 0xab04558f, 0x138910d1, 0xc1ed03ce,
|
||||
0x870903cf, 0xed3ffb51, 0x002c1cfa},
|
||||
{0x42870c46, 0x271b1ff3, 0x13b4b491, 0x1e0a9cd1, 0x3c55c65e, 0x2d58cb1a, 0x74756f6e, 0xa6e12c32, 0x2e313bc4,
|
||||
0xf774a43d, 0xcc386ffc, 0x00ca156d},
|
||||
{0x4a67741c, 0x588f79b6, 0xc3590b63, 0xc0ae78b5, 0xc3576385, 0xad0bb97d, 0xb8473137, 0x0583dd49, 0x515d8604,
|
||||
0xb31d9631, 0xd3ba3b12, 0x015337bc},
|
||||
{0x8a458e8c, 0x976a14f5, 0xc3a26ae8, 0xc90809b4, 0x089acf15, 0x270a1575, 0x5013d4b1, 0x614a0d25, 0x6d09901e,
|
||||
0x1314e076, 0xf208945e, 0x0022f414},
|
||||
{0xc563b9a1, 0x7eca603c, 0x06fe0bc3, 0x06df0a43, 0x0ddff8c6, 0xb44d994a, 0x4512a3d4, 0x40fbe05b, 0x8aeffc9b,
|
||||
0x30f15248, 0x05198a80, 0x0036a92e}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0x00000000, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b,
|
||||
0xc63b05c0, 0x17c510ea, 0x01ae3a46},
|
||||
{0x0ec6e39e, 0x1691ea13, 0x700d8272, 0x7db2d8ea, 0x769e389d, 0x620d1860, 0xf62334cd, 0xda1f40fd, 0x52278a89,
|
||||
0x0575d0e5, 0x9e5fd920, 0x00463005},
|
||||
{0x93997f11, 0x9403412c, 0xdfb2323f, 0x845557b3, 0x2d50c7fc, 0x66f2eaaa, 0xc103f92f, 0x992358fb, 0x5d7a3179,
|
||||
0x01d60217, 0xd2af5da0, 0x0077b354},
|
||||
{0xc1000ea4, 0x7ac2ca7a, 0x7f8d9495, 0x937db751, 0x0de62931, 0x401b3873, 0x980129ba, 0x59be7529, 0xa545a303,
|
||||
0x2ba8f85d, 0xb6705512, 0x00573e3a},
|
||||
{0x2c1b22e6, 0xb55712f9, 0x0f91cddd, 0x66cfc0f3, 0x8bb345d8, 0x8d5fcd42, 0x86c0abc3, 0x61e4cf98, 0x432fe8f3,
|
||||
0x93556354, 0xad005fb6, 0x00ff87d5},
|
||||
{0x7aba560e, 0x05065a97, 0x7918b9db, 0x333ff005, 0xdf6be708, 0x03938ae1, 0x7410a77b, 0x922d3376, 0x03a15063,
|
||||
0xa5aeaa56, 0x4aea89e5, 0x01542cb6},
|
||||
{0xe4d6a772, 0x61a6a2d6, 0x6e6239a7, 0xc18c9ef7, 0x04cac70f, 0x8772bb3f, 0x16c5916b, 0x8bbb4185, 0x46335dc0,
|
||||
0x4aa656e2, 0x842c1664, 0x008187ac},
|
||||
{0xdd4e93c5, 0xa002ea0a, 0x07458704, 0xb40a45e8, 0xbaa65f2a, 0xee9ee3ea, 0x8f3b8a87, 0xeffa4f9e, 0x95b5feba,
|
||||
0xb6e03897, 0x81751c63, 0x003c41de},
|
||||
{0x13043a4a, 0x50221a3b, 0xda73331a, 0x6537fca8, 0x8e85077c, 0x8b74cef4, 0x0e5bbe67, 0x65705341, 0xefa22d23,
|
||||
0xf0f56caa, 0xd1865d98, 0x001f8eb5},
|
||||
{0x3e26a605, 0xd9af8944, 0x6970166f, 0xad0efb6e, 0x2c7464ec, 0xc16d7972, 0xf788281b, 0xe0de4b04, 0xaa878b0e,
|
||||
0x0c049e55, 0x63e2e7cd, 0x0135383a},
|
||||
{0x6f6893f7, 0x6b12c42e, 0x44bbbf63, 0x831f38c0, 0x191be6c9, 0xa57797d4, 0x447475cb, 0x6af7f695, 0x4b8be189,
|
||||
0x3295e9e7, 0x350d0aad, 0x00a9a32b},
|
||||
{0x7656ef1d, 0xc2243f86, 0xf4211219, 0x3e4c3bc3, 0x3c9a3d21, 0xaa4db6e0, 0xe8a4c946, 0x29ac638a, 0xa4cf856e,
|
||||
0x21449f8b, 0x7d4c9c67, 0x018cf097},
|
||||
{0x6a8e0139, 0x18e472a2, 0xd6b1c835, 0xcc7c80fd, 0x6546fc0a, 0x1f760883, 0x4ea3417c, 0x5bcfc1fb, 0xe9acb8b0,
|
||||
0x52c9a29b, 0xd9f265a2, 0x01a6d8b2},
|
||||
{0xebb83ac0, 0x95eb1dc8, 0x9f390cf2, 0x1e8d70f5, 0xb0d85145, 0xf9e4955d, 0x89720ee1, 0xe9690d30, 0x50fc879f,
|
||||
0x629972a5, 0x69ccd670, 0x00456e23},
|
||||
{0x83f38be4, 0xfbfb11a1, 0x388e6726, 0xb90a19b9, 0xc860d62c, 0x3fc10bc7, 0xc3c4e575, 0xc9fe043e, 0x7396d780,
|
||||
0x67aeff74, 0x01cadaee, 0x019059fa},
|
||||
{0xfd581be8, 0x43506d6e, 0x018b1b76, 0xf09563e6, 0xe87f9d80, 0x5cd193b2, 0x0a933402, 0x18ba3260, 0x50524c77,
|
||||
0x4de839d9, 0xd90315ce, 0x0018c2ed},
|
||||
{0xa737701d, 0xf900eb81, 0x995e6672, 0x6874c90e, 0xa495900b, 0x69ade94a, 0xd07bd4b1, 0xd5f358e7, 0x6f88e8e4,
|
||||
0xbd437e9d, 0x1d6b88cf, 0x0130d706},
|
||||
{0xfc29b95f, 0x064629bd, 0xb546585c, 0x0a897bff, 0x54a80d9a, 0x856c8d4f, 0x944568ff, 0x85410cc4, 0x59fc4370,
|
||||
0xc1978c65, 0xc668dc52, 0x017c86c8},
|
||||
{0xf6109131, 0x65cecd55, 0x7d2f52e5, 0x6d7e892e, 0xb90b2403, 0xe9a09007, 0xae0a060d, 0x92ca9aac, 0xa22b1e96,
|
||||
0x5ce1cc4f, 0x45201e6f, 0x012eb33c},
|
||||
{0x20d1aac5, 0x9d2cb4cf, 0xded22997, 0x3e4a1e77, 0x07fae2e2, 0x09d692f7, 0xd49bdcbe, 0x6a6aa4f8, 0x09c01cab,
|
||||
0xa8e21ead, 0x6b03b72e, 0x01a19e81},
|
||||
{0x935650ca, 0xf3d94623, 0x2ffd937e, 0x4a688a46, 0xa622b139, 0xf55fd53a, 0x7a1a1e40, 0x227406aa, 0x9a3fea60,
|
||||
0x40dd4504, 0x1edbb584, 0x00fc2332},
|
||||
{0xf28db3fc, 0x9707402f, 0xc28593f1, 0x3d898bd7, 0xb30effcd, 0xcaee2dfd, 0x4fb6ec9d, 0xff1b0790, 0x09ed1120,
|
||||
0x9cb0597e, 0xb78d15e9, 0x005c73a5},
|
||||
{0xb0a8a3b9, 0x739a4c2e, 0xc57196ae, 0x083bde21, 0xba602f29, 0x247eb070, 0x1c2c7132, 0x4ba1dd6a, 0xe2187c6c,
|
||||
0x4ce59fb6, 0x606880b1, 0x0014a7b5},
|
||||
{0x484baf56, 0xdd0eccab, 0x4541b101, 0xe6c80eaf, 0xf7964f64, 0x35b8a558, 0xc50ccf94, 0xb3b824d4, 0x21c71aeb,
|
||||
0xe1f6b4c8, 0x23031df0, 0x01a8a647},
|
||||
{0x592a9620, 0x5338dc01, 0xd94a401b, 0xb217f96d, 0xf830b00e, 0xfefb6601, 0xafd3dee4, 0x1ec061b5, 0x05a199bd,
|
||||
0x0d5d4d3c, 0xc8489913, 0x0196c768},
|
||||
{0x1f980ca0, 0x4acb430e, 0x71c6821c, 0x8973a3cc, 0xb3e9aa75, 0x74414c20, 0x0c13f042, 0x79212a5f, 0x375c705b,
|
||||
0x5c44d226, 0x29439af2, 0x000a2fdd},
|
||||
{0xa387b60c, 0xf01901e6, 0x4561ff3d, 0xa7b1b7dc, 0x0558e085, 0x5d82d374, 0xf2bc1d29, 0x519298e5, 0x3d332207,
|
||||
0x0ad719a8, 0xea19a807, 0x0150a138},
|
||||
{0x9deb8e06, 0x7c6b3eb1, 0x28206b6c, 0x3a8f53c4, 0x7fed1065, 0x039f575f, 0x40c1f898, 0x31be74ba, 0x790ac003,
|
||||
0x76db938e, 0x5508c5e4, 0x0096d5e1},
|
||||
{0xb83f8358, 0x3e940e0e, 0x372a4b8b, 0x204d80e0, 0xa820b2ec, 0x956454b2, 0x2cc8078c, 0x8e2cb3d4, 0xc6f81363,
|
||||
0xdd0d3e12, 0x49041a64, 0x0052f327},
|
||||
{0x2aec0be2, 0x37ca2eb7, 0x555cc652, 0x05093570, 0xd2588d31, 0xe62f1adb, 0x798be240, 0x2fd2518e, 0x0ff6b579,
|
||||
0x9302d4e3, 0x6ee95e5d, 0x0025ca57},
|
||||
{0x233eed68, 0xcc664858, 0xece3a327, 0x600ca1ac, 0x93a2e34f, 0x330d1102, 0xdb5e3bb4, 0xc84ab55f, 0xe4d5576e,
|
||||
0x5179c101, 0x0938f714, 0x00efb20e},
|
||||
{0xfdddaf5c, 0x907f96e7, 0x1ffe49da, 0x348dab77, 0xc14ab779, 0x3eca44ad, 0x4cdc5d98, 0xe9b10b2e, 0xa95c5a36,
|
||||
0x65a25d16, 0x6e616518, 0x00c9f759},
|
||||
{0x7a5aff62, 0x9497d331, 0xb57cd01d, 0x21896195, 0x6c7ba745, 0xe09e22f7, 0x5a7acff0, 0xcc9f1064, 0xc93c46b0,
|
||||
0x7b867cdf, 0x23eba5ae, 0x01a05dcb},
|
||||
{0x4dcc71f4, 0xa56a8e33, 0xcbebdba2, 0xc480b083, 0x36ea43af, 0x748448fa, 0xe7859f3c, 0xee9b4b0e, 0x5af41919,
|
||||
0x9ab2bb09, 0x65caa0ea, 0x0127262d},
|
||||
{0x352a05cc, 0x77c7d12f, 0xdc7160c9, 0xb91ca5be, 0x5a3feda0, 0x245106da, 0x7669f7cd, 0xfd45012d, 0xdc5489fa,
|
||||
0xc4774629, 0x2872daa0, 0x00241273},
|
||||
{0x0d3e0b0b, 0x1838ae6f, 0xff67fc2c, 0x7fcc9b21, 0x23956100, 0xaedca59e, 0x1e79aa4b, 0x572ed634, 0xc7f0673c,
|
||||
0xaeeda160, 0xc8047256, 0x00360e2c},
|
||||
{0xe05044f9, 0xec5e4514, 0x7ec9b4ef, 0xe915b7e7, 0x9c4bec48, 0x9fb78cd8, 0xa38d95a3, 0xd7b84113, 0xb86fd119,
|
||||
0x7be64440, 0xe4f9e70a, 0x009e3a60},
|
||||
{0xc7435591, 0xc61cc546, 0xe5e94dc4, 0xea99a96f, 0xdb8ff17d, 0x5b10e2b4, 0x3dd0ff10, 0x13f8fb9d, 0xe118b9e9,
|
||||
0xcbb1c0ce, 0x7ebf8a0d, 0x00b37258},
|
||||
{0xce5943e7, 0xd44fdb9d, 0x79fa927a, 0xcb7d41ea, 0xdcee72ca, 0x9a4bcebf, 0x11634905, 0x2317799d, 0x584055ac,
|
||||
0x3f1c302e, 0xdc2d0017, 0x013ef021},
|
||||
{0xa78a1578, 0x345cb052, 0x5961b8fe, 0x1ed4d48a, 0x74a5e2af, 0x5858e93c, 0x0fd17e9f, 0xaf643f0a, 0x79d94009,
|
||||
0x61530753, 0xde7b2f53, 0x010a3393},
|
||||
{0x813925df, 0x548b1d28, 0xca3e79b6, 0xabab3a4e, 0x7e51071a, 0xb3c9c068, 0x6c5fcedb, 0x8014e879, 0x95d9facc,
|
||||
0x3ba5db77, 0x7f5c3d2f, 0x0105c419},
|
||||
{0x26bc1104, 0xbb9cbd28, 0xe03cc852, 0x27f09abb, 0x22e5be61, 0x02763b4a, 0xb94fa254, 0xa3940542, 0xff34c35f,
|
||||
0xcf058850, 0x1482533c, 0x019f538f},
|
||||
{0xb3f42de9, 0xf2126047, 0xbeb0a1b8, 0xdb0451c4, 0x9aabc291, 0x1a945bc0, 0x7fe3a6f2, 0x13d08312, 0x390e1c07,
|
||||
0xd8fb13f1, 0x6b30562b, 0x005a41c4},
|
||||
{0xe8b3d5dd, 0x1c60fcc5, 0x75b3a464, 0x5d7babba, 0xf3989910, 0x0d9f52c7, 0x9beec571, 0x464a2840, 0x79689d4b,
|
||||
0x139c496f, 0x099e64c4, 0x0022c6a3},
|
||||
{0x023e0cd1, 0x9df6c2d5, 0xa6b747de, 0x8e23def9, 0x90da6876, 0x7bc83eee, 0xc88bb007, 0xdaeac352, 0x68bb6a7f,
|
||||
0x45cabb6f, 0x94697b34, 0x001e7154},
|
||||
{0x0203d905, 0xffcee91d, 0xc99df56d, 0xd878ee01, 0x210d754c, 0xa0e882f9, 0x7d0aec6a, 0x26c96db8, 0x8ff7afe4,
|
||||
0x46e2e145, 0x54749283, 0x015cd1b0}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0x00000001, 0x42846000, 0x18000000, 0x0b85aea2, 0xdd04a400, 0x8f79b117, 0x807a89c7, 0x8d116cf9, 0x3650a49d,
|
||||
0x631d82e0, 0x0be28875, 0x00d71d23},
|
||||
{0x00000001, 0x63c69000, 0x24000000, 0x114885f3, 0xcb86f600, 0x573689a3, 0x40b7ceab, 0x539a2376, 0x5178f6ec,
|
||||
0x14ac4450, 0x91d3ccb0, 0x0142abb4},
|
||||
{0x00000001, 0x7467a800, 0xaa000000, 0x1429f19b, 0xc2c81f00, 0x3b14f5e9, 0xa0d6711d, 0xb6de7eb4, 0x5f0d2013,
|
||||
0x6d73a508, 0x54cc6ecd, 0x017872fd},
|
||||
{0x00000001, 0x7cb83400, 0xed000000, 0x159aa76f, 0xbe68b380, 0x2d042c0c, 0xd0e5c256, 0x6880ac53, 0x65d734a7,
|
||||
0x19d75564, 0xb648bfdc, 0x019356a1},
|
||||
{0x00000001, 0x80e07a00, 0x0e800000, 0x1653025a, 0x3c38fdc0, 0xa5fbc71e, 0x68ed6af2, 0x4151c323, 0x693c3ef1,
|
||||
0x70092d92, 0xe706e863, 0x01a0c873},
|
||||
{0x00000001, 0x82f49d00, 0x1f400000, 0x16af2fcf, 0xfb2122e0, 0xe27794a6, 0x34f13f40, 0x2dba4e8b, 0x6aeec416,
|
||||
0x1b2219a9, 0xff65fca7, 0x01a7815c},
|
||||
{0x00000001, 0x83feae80, 0xa7a00000, 0x16dd4689, 0x5a953570, 0x00b57b6b, 0x1af32968, 0xa3ee943f, 0xebc806a8,
|
||||
0xf0ae8fb4, 0x8b9586c8, 0x01aaddd1},
|
||||
{0x00000001, 0x8483b740, 0xebd00000, 0x16f451e6, 0x8a4f3eb8, 0x8fd46ecd, 0x0df41e7b, 0xdf08b719, 0xac34a7f1,
|
||||
0xdb74caba, 0xd1ad4bd9, 0x01ac8c0b},
|
||||
{0x00000001, 0x84c63ba0, 0x8de80000, 0x16ffd795, 0xa22c435c, 0x5763e87e, 0x07749905, 0x7c95c886, 0x8c6af896,
|
||||
0x50d7e83d, 0xf4b92e62, 0x01ad6328},
|
||||
{0x00000001, 0x84e77dd0, 0xdef40000, 0x17059a6c, 0x2e1ac5ae, 0x3b2ba557, 0x8434d64a, 0xcb5c513c, 0xfc8620e8,
|
||||
0x8b8976fe, 0x863f1fa6, 0x01adceb7},
|
||||
{0x00000001, 0x84f81ee8, 0x877a0000, 0x17087bd8, 0x741206d7, 0xad0f83c3, 0xc294f4ec, 0xf2bf9597, 0xb493b511,
|
||||
0xa8e23e5f, 0xcf021848, 0x01ae047e},
|
||||
{0x00000001, 0x85006f74, 0x5bbd0000, 0x9709ec8e, 0x970da76b, 0xe60172f9, 0x61c5043d, 0x867137c5, 0x109a7f26,
|
||||
0xb78ea210, 0x73639499, 0x01ae1f62},
|
||||
{0x00000001, 0x850497ba, 0x45de8000, 0xd70aa4e9, 0xa88b77b5, 0x827a6a94, 0x315d0be6, 0xd04a08dc, 0x3e9de430,
|
||||
0x3ee4d3e8, 0x459452c2, 0x01ae2cd4},
|
||||
{0x00000001, 0x8506abdd, 0xbaef4000, 0xf70b0116, 0x314a5fda, 0xd0b6e662, 0x99290fba, 0xf5367167, 0x559f96b5,
|
||||
0x828fecd4, 0x2eacb1d6, 0x01ae338d},
|
||||
{0x80000001, 0x8507b5ee, 0x7577a000, 0x870b2f2d, 0xf5a9d3ed, 0xf7d52448, 0x4d0f11a4, 0x87aca5ad, 0x61206ff8,
|
||||
0xa465794a, 0xa338e160, 0x01ae36e9},
|
||||
{0x40000001, 0x85083af7, 0xd2bbd000, 0xcf0b4638, 0x57d98df6, 0x0b64433c, 0x2702129a, 0xd0e7bfd0, 0x66e0dc99,
|
||||
0xb5503f85, 0xdd7ef925, 0x01ae3897},
|
||||
{0xa0000001, 0x85087d7b, 0x815de800, 0x730b51be, 0x08f16afb, 0x952bd2b6, 0x93fb9314, 0x75854ce1, 0xe9c112ea,
|
||||
0x3dc5a2a2, 0xfaa20508, 0x01ae396e},
|
||||
{0xd0000001, 0x85089ebd, 0x58aef400, 0xc50b5781, 0xe17d597d, 0xda0f9a72, 0x4a785351, 0xc7d4136a, 0xab312e12,
|
||||
0x82005431, 0x89338af9, 0x01ae39da},
|
||||
{0xe8000001, 0x8508af5e, 0xc4577a00, 0xee0b5a62, 0x4dc350be, 0x7c817e51, 0xa5b6b370, 0xf0fb76ae, 0x0be93ba6,
|
||||
0x241dacf9, 0x507c4df2, 0x01ae3a10},
|
||||
{0x74000001, 0x8508b7af, 0x7a2bbd00, 0x828b5bd3, 0x83e64c5f, 0xcdba7040, 0xd355e37f, 0x058f2850, 0xbc454271,
|
||||
0x752c595c, 0x3420af6e, 0x01ae3a2b},
|
||||
{0xba000001, 0x8508bbd7, 0xd515de80, 0xcccb5c8b, 0x1ef7ca2f, 0x7656e938, 0xea257b87, 0x0fd90121, 0x947345d6,
|
||||
0x9db3af8e, 0xa5f2e02c, 0x01ae3a38},
|
||||
{0xdd000001, 0x8508bdeb, 0x028aef40, 0xf1eb5ce8, 0xec808917, 0x4aa525b3, 0x758d478b, 0x94fded8a, 0x808a4788,
|
||||
0xb1f75aa7, 0x5edbf88b, 0x01ae3a3f},
|
||||
{0xee800001, 0x8508bef5, 0x194577a0, 0x047b5d16, 0xd344e88c, 0x34cc43f1, 0xbb412d8d, 0xd79063be, 0xf695c861,
|
||||
0x3c193033, 0xbb5084bb, 0x01ae3a42},
|
||||
{0xf7400001, 0x8508bf7a, 0x24a2bbd0, 0x0dc35d2d, 0xc6a71846, 0x29dfd310, 0xde1b208e, 0x78d99ed8, 0x319b88ce,
|
||||
0x012a1afa, 0x698acad3, 0x01ae3a44},
|
||||
{0x7ba00001, 0x8508bfbd, 0xaa515de8, 0x12675d38, 0x40583023, 0xa4699aa0, 0xef881a0e, 0xc97e3c65, 0x4f1e6904,
|
||||
0xe3b2905d, 0x40a7edde, 0x01ae3a45},
|
||||
{0xbdd00001, 0x8508bfde, 0x6d28aef4, 0x94b95d3e, 0xfd30bc11, 0xe1ae7e67, 0x783e96ce, 0xf1d08b2c, 0xdddfd91f,
|
||||
0xd4f6cb0e, 0xac367f64, 0x01ae3a45},
|
||||
{0x5ee80001, 0x8508bfef, 0x4e94577a, 0xd5e25d41, 0xdb9d0208, 0x0050f04b, 0xbc99d52f, 0x85f9b28f, 0xa540912d,
|
||||
0xcd98e867, 0xe1fdc827, 0x01ae3a45},
|
||||
{0xaf740001, 0x8508bff7, 0xbf4a2bbd, 0x7676dd42, 0xcad32504, 0x0fa2293d, 0x5ec7745f, 0x500e4641, 0x08f0ed34,
|
||||
0x49e9f714, 0xfce16c89, 0x01ae3a45},
|
||||
{0xd7ba0001, 0x0508bffb, 0x77a515df, 0x46c11d43, 0xc26e3682, 0x174ac5b6, 0x2fde43f7, 0xb518901a, 0x3ac91b37,
|
||||
0x08127e6a, 0x0a533eba, 0x01ae3a46},
|
||||
{0xebdd0001, 0xc508bffd, 0xd3d28aef, 0x2ee63d43, 0x3e3bbf41, 0x1b1f13f3, 0x9869abc3, 0x679db506, 0x53b53239,
|
||||
0x6726c215, 0x110c27d2, 0x01ae3a46},
|
||||
{0xf5ee8001, 0x2508bffe, 0x01e94578, 0xa2f8cd44, 0x7c2283a0, 0x1d093b11, 0xccaf5fa9, 0x40e0477c, 0xe02b3dba,
|
||||
0x96b0e3ea, 0x14689c5e, 0x01ae3a46},
|
||||
{0x7af74001, 0x5508bfff, 0x18f4a2bc, 0x5d021544, 0x9b15e5d0, 0x1dfe4ea0, 0xe6d2399c, 0xad8190b7, 0xa666437a,
|
||||
0xae75f4d5, 0x1616d6a4, 0x01ae3a46},
|
||||
{0xbd7ba001, 0x6d08bfff, 0x247a515e, 0x3a06b944, 0x2a8f96e8, 0x9e78d868, 0x73e3a695, 0xe3d23555, 0x0983c65a,
|
||||
0xba587d4b, 0x16edf3c7, 0x01ae3a46},
|
||||
{0xdebdd001, 0x7908bfff, 0x2a3d28af, 0x28890b44, 0xf24c6f74, 0x5eb61d4b, 0x3a6c5d12, 0xfefa87a4, 0xbb1287ca,
|
||||
0x4049c185, 0x17598259, 0x01ae3a46},
|
||||
{0xef5ee801, 0xff08bfff, 0x2d1e9457, 0x1fca3444, 0xd62adbba, 0xbed4bfbd, 0x9db0b850, 0x0c8eb0cb, 0x13d9e883,
|
||||
0x034263a3, 0x178f49a2, 0x01ae3a46},
|
||||
{0xf7af7401, 0x4208bfff, 0x2e8f4a2c, 0x1b6ac8c4, 0xc81a11dd, 0xeee410f6, 0x4f52e5ef, 0x1358c55f, 0xc03d98df,
|
||||
0x64beb4b1, 0x17aa2d46, 0x01ae3a46},
|
||||
{0xfbd7ba01, 0x6388bfff, 0x2f47a516, 0x993b1304, 0x4111acee, 0x86ebb993, 0x2823fcbf, 0x16bdcfa9, 0x166f710d,
|
||||
0x957cdd39, 0x17b79f18, 0x01ae3a46},
|
||||
{0xfdebdd01, 0x7448bfff, 0x2fa3d28b, 0x58233824, 0x7d8d7a77, 0x52ef8de1, 0x148c8827, 0x187054ce, 0xc1885d24,
|
||||
0xaddbf17c, 0x17be5801, 0x01ae3a46},
|
||||
{0xfef5ee81, 0xfca8bfff, 0x2fd1e945, 0xb7974ab4, 0x9bcb613b, 0x38f17808, 0x8ac0cddb, 0x99499760, 0x9714d32f,
|
||||
0x3a0b7b9e, 0x17c1b476, 0x01ae3a46},
|
||||
{0xff7af741, 0x40d8bfff, 0x2fe8f4a3, 0xe75153fc, 0x2aea549d, 0x2bf26d1c, 0xc5daf0b5, 0x59b638a9, 0x81db0e35,
|
||||
0x802340af, 0x17c362b0, 0x01ae3a46},
|
||||
{0xffbd7ba1, 0xe2f0bfff, 0x2ff47a51, 0xff2e58a0, 0xf279ce4e, 0x2572e7a5, 0x63680222, 0x39ec894e, 0xf73e2bb8,
|
||||
0xa32f2337, 0x17c439cd, 0x01ae3a46},
|
||||
{0xffdebdd1, 0x33fcbfff, 0x2ffa3d29, 0x8b1cdaf2, 0xd6418b27, 0xa23324ea, 0xb22e8ad8, 0xaa07b1a0, 0x31efba79,
|
||||
0x34b5147c, 0x17c4a55c, 0x01ae3a46},
|
||||
{0xffef5ee9, 0xdc82bfff, 0x2ffd1e94, 0xd1141c1b, 0x48256993, 0xe093438d, 0xd991cf33, 0x621545c9, 0x4f4881da,
|
||||
0x7d780d1e, 0x17c4db23, 0x01ae3a46},
|
||||
{0xfff7af75, 0xb0c5bfff, 0xaffe8f4a, 0xf40fbcaf, 0x811758c9, 0x7fc352de, 0x6d437161, 0xbe1c0fde, 0x5df4e58a,
|
||||
0x21d9896f, 0x17c4f607, 0x01ae3a46},
|
||||
{0xfffbd7bb, 0x9ae73fff, 0xefff47a5, 0x058d8cf9, 0x1d905065, 0x4f5b5a87, 0xb71c4278, 0xec1f74e8, 0xe54b1762,
|
||||
0xf40a4797, 0x17c50378, 0x01ae3a46},
|
||||
{0xfffdebde, 0x0ff7ffff, 0x0fffa3d3, 0x8e4c751f, 0x6bcccc32, 0xb7275e5b, 0xdc08ab03, 0x0321276d, 0x28f6304f,
|
||||
0xdd22a6ac, 0x17c50a31, 0x01ae3a46}}};
|
||||
static constexpr storage<12> rou = {0xc563b9a1, 0x7eca603c, 0x06fe0bc3, 0x06df0a43, 0x0ddff8c6, 0xb44d994a,
|
||||
0x4512a3d4, 0x40fbe05b, 0x8aeffc9b, 0x30f15248, 0x05198a80, 0x0036a92e};
|
||||
TWIDDLES(modulus, rou)
|
||||
|
||||
// nonresidue to generate the extension field
|
||||
static constexpr uint32_t nonresidue = 5;
|
||||
|
||||
@@ -4,193 +4,17 @@
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "fields/quadratic_extension.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bls12_377 {
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned omegas_count = 47;
|
||||
static constexpr unsigned modulus_bit_count = 253;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<8> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe,
|
||||
0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
PARAMS(modulus)
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe,
|
||||
0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x14230000, 0xa0000002, 0xb354edfd,
|
||||
0xb86f6002, 0xc1689a3c, 0x34594aac, 0x2556cabd};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x28460000, 0x40000004, 0x66a9dbfb,
|
||||
0x70dec005, 0x82d13479, 0x68b29559, 0x4aad957a};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0xffffffff, 0xf5ee7fff, 0x2ffffffe, 0xa6558901,
|
||||
0xa3c84ffe, 0x9f4bb2e1, 0x65d35aa9, 0xed549aa1};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x00000001, 0x14230000, 0xe0000002, 0xc7dd4d2f, 0x8585d003, 0x08ee1bd4, 0xe57fc56e, 0x7e7557e3,
|
||||
0x483a709d, 0x1fdebb41, 0x5678f4e6, 0x8ea77334, 0xc19c3ec5, 0xd717de29, 0xe2340781, 0x015c8d01};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x00000002, 0x28460000, 0xc0000004, 0x8fba9a5f, 0x0b0ba007, 0x11dc37a9, 0xcaff8adc, 0xfceaafc7,
|
||||
0x9074e13a, 0x3fbd7682, 0xacf1e9cc, 0x1d4ee668, 0x83387d8b, 0xae2fbc53, 0xc4680f03, 0x02b91a03};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x00000004, 0x508c0000, 0x80000008, 0x1f7534bf, 0x1617400f, 0x23b86f52, 0x95ff15b8, 0xf9d55f8f,
|
||||
0x20e9c275, 0x7f7aed05, 0x59e3d398, 0x3a9dccd1, 0x0670fb16, 0x5c5f78a7, 0x88d01e07, 0x05723407};
|
||||
|
||||
static constexpr storage<limbs_count> m = {0x151e79ea, 0xf5204c21, 0x8d69e258, 0xfd0a180b,
|
||||
0xfaa80548, 0xe4e51e49, 0xc40b2c9e, 0x36d9491e};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xfffffff3, 0x7d1c7fff, 0x6ffffff2, 0x7257f50f,
|
||||
0x512c0fee, 0x16d81575, 0x2bbb9a9d, 0x0d4bda32};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x1beeec02, 0x4122dd1a, 0x74fee875, 0xbd1eae95,
|
||||
0x27b28e2f, 0x838557e2, 0x2290c02c, 0x07b30191};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e},
|
||||
{0x00000001, 0x8f1a4000, 0xb0000001, 0xcf664765, 0x970dec00, 0x23ed1347, 0x00000000, 0x00000000},
|
||||
{0xfbfa0a01, 0x0f830f7e, 0xd75769a0, 0x20f8b46c, 0xf05d5033, 0x7108bd18, 0x0788de01, 0x07405e08},
|
||||
{0x60b9bdae, 0xc78085a6, 0x789094f5, 0x3116ec22, 0xce87d660, 0x0a02a81d, 0xc2a94856, 0x0ead8236},
|
||||
{0x3e83a7cc, 0x6ffc39d9, 0x958a0a74, 0x117d996e, 0x0b92e8c9, 0xc242289d, 0x29d977d6, 0x0484efb4},
|
||||
{0x0111ec3f, 0x15455b00, 0xc5f6be6f, 0x6b62d7af, 0x337f2d07, 0xfcba0365, 0x43fccd26, 0x0f151842},
|
||||
{0xc31ec69b, 0x57951b2e, 0x2a37ce1f, 0x3e0a4be7, 0xcf3b198a, 0x960aeb4a, 0x341fd5cd, 0x04fb0673},
|
||||
{0xa921851f, 0x71c1b78e, 0x7808f239, 0x3c26340c, 0x976fb990, 0xbcc8f69b, 0xe880dc71, 0x06a5edb2},
|
||||
{0xc0f5679e, 0x7619eab5, 0x0dc0b9cd, 0x1f4cd10e, 0xbf6a480a, 0x7e1b70aa, 0x7f5461bb, 0x0ffc66da},
|
||||
{0xec5cbab2, 0x8159806d, 0x498264a3, 0x14ea1333, 0xe3abfaa6, 0x56bbe1d8, 0x02aa031f, 0x09d2b5c4},
|
||||
{0xc010c48a, 0xd2aa9562, 0x3b004b60, 0x447e5c11, 0x11e243bb, 0xd5a21c13, 0x0ab418b1, 0x01eab23e},
|
||||
{0xacff6986, 0x08715ee8, 0xa93924d0, 0xab01878a, 0x6e9ae5c4, 0xbfbc5e71, 0x26b08d6e, 0x0f8000bf},
|
||||
{0x3ddbc679, 0x06bc13b0, 0x615256ce, 0x7269a1f1, 0x1f5221a2, 0xf7716fbf, 0x8c66c14f, 0x0fa1f02c},
|
||||
{0x906f531f, 0xdd40f131, 0x30728eff, 0xb06b29c7, 0x88839294, 0xc891fd19, 0x646978e8, 0x04e88447},
|
||||
{0x6e259cdc, 0xb1e4b769, 0x00514e5e, 0xbcb0b709, 0x05113e7f, 0x74edb7c0, 0xe92e22af, 0x10c88511},
|
||||
{0x240ede5b, 0xebb2e898, 0x42cd84c6, 0xc2639185, 0x9408f956, 0xf79e8391, 0x94e87a7d, 0x06872fa1},
|
||||
{0x260678ff, 0xf8522249, 0xa8de9973, 0x6148cb16, 0x5a4e8d56, 0x5750f3f4, 0xbaeaf0c3, 0x0e805156},
|
||||
{0x3d766f80, 0x1b4b71cf, 0x1069012d, 0x47d21195, 0x9151ebec, 0x5635235f, 0x2b13c808, 0x093f7d91},
|
||||
{0x4637701d, 0x0848f958, 0x4c8353af, 0x8a750076, 0x0ef6174a, 0x485f4e4f, 0xf38db632, 0x078d97a1},
|
||||
{0x66a16869, 0x50c487c1, 0xd1fd4525, 0x380a66ab, 0x265e8539, 0xd455a01a, 0x064b5334, 0x0cd62875},
|
||||
{0x3358eb25, 0xdbc547bc, 0x722037db, 0x8909d398, 0x5e705b6d, 0x8b7075b5, 0x9bdaf407, 0x02694bb2},
|
||||
{0xf45b9621, 0x102fbfb0, 0xf04faac0, 0xe80f4241, 0x7ca61177, 0x0b830bfd, 0x7033169d, 0x10521892},
|
||||
{0xcc943028, 0xed2576ad, 0xfa4c6090, 0x846e49bc, 0x0049d8e6, 0xc74c1865, 0x665d7be5, 0x0e9c5a12},
|
||||
{0xafeb494b, 0x97319dcd, 0x1d78404c, 0xab30c83e, 0xf26ffe90, 0x452d8a48, 0xa36452c7, 0x0bfc2e92},
|
||||
{0xedc626c3, 0xf30e312d, 0xcf1f3a94, 0x8367a7ca, 0x917a1b28, 0x621e15e1, 0xf2e93b82, 0x07cd59f8},
|
||||
{0xf02ba42c, 0x553085d9, 0x1119b10d, 0x59662159, 0x6b8ea03f, 0xaa670958, 0x7ce92983, 0x066f6f5f},
|
||||
{0x4dd87a5e, 0xf423a283, 0xd9a4c364, 0x1fe46601, 0xbfdc7e9b, 0xda4addbf, 0x3bf94b2b, 0x0a7f2bd8},
|
||||
{0xe5f8848a, 0x270a2326, 0xa727567d, 0x97d14afa, 0x48746fc7, 0x1a3a5a4e, 0xa42f077a, 0x0044e4b1},
|
||||
{0x20b7298a, 0xd7652451, 0x65013b06, 0xc7c9a0b7, 0xad0d8457, 0x479b82a9, 0x0c99f5ce, 0x0bef1e5a},
|
||||
{0x1912f7fa, 0x77d7da1d, 0x299fd7d6, 0xbcb7a5b2, 0x142a4480, 0x705e45dd, 0xb492dbd8, 0x0dc835fd},
|
||||
{0xa0234d2d, 0xe943054c, 0xe5f5be5e, 0x673b0ee0, 0x5048a19a, 0xcdd48e41, 0xabc3cb99, 0x0997d277},
|
||||
{0xa9966ac4, 0x1ae0ea67, 0xda83fb3b, 0x4e2dbb1c, 0x0b51380e, 0xf77cf749, 0xb28a7670, 0x048b4b0e},
|
||||
{0xb14361d4, 0x7f1db43f, 0x25ab6d51, 0x7927e578, 0x383bf21e, 0xb43e52a5, 0xd27fa99f, 0x077595e9},
|
||||
{0xa90a2740, 0xfe3ca4f0, 0x512a7c7a, 0xd259ff36, 0xb41fe696, 0xbca3176a, 0xf33132ce, 0x05bd5ea3},
|
||||
{0xf284f768, 0xdeee484b, 0xe26a0475, 0x2a02e015, 0x88d968c2, 0xf0eb4925, 0x82a391c9, 0x0620ce9e},
|
||||
{0xbd83a3da, 0xd3b69b29, 0xe02ce197, 0x9543950f, 0xc2f87783, 0x80799665, 0xc15be215, 0x11ce8199},
|
||||
{0x1b29736e, 0x8f267f19, 0x1d5a0c3a, 0xa2e04d58, 0x1ae99514, 0x76803064, 0x57f7c806, 0x12129439},
|
||||
{0xf32d6bac, 0xa0b973d4, 0xf0d81b72, 0xae951889, 0x2e2daa0a, 0x51dbe098, 0x40d9af8f, 0x04679474},
|
||||
{0x22df9f13, 0x56313de8, 0x599e7536, 0xe2e75200, 0x6d163e50, 0xa1b4fce7, 0xc8111763, 0x0aec2172},
|
||||
{0x355dd694, 0x4258374d, 0x44c76a20, 0x5c31e8ac, 0xaa5fd062, 0x9b473969, 0x1a37b6b4, 0x0a693d77},
|
||||
{0x44ddbbdc, 0xbafb92a6, 0x26b01974, 0x63c7a02d, 0x5f28a274, 0x0ff86e13, 0x867f2e29, 0x0a7b462a},
|
||||
{0xd5fba57b, 0x90684fea, 0xe0defe98, 0xed237883, 0x030ae924, 0xc502b692, 0xe7a1ec2c, 0x08aa58e8},
|
||||
{0x5e9020dd, 0xade9d4b4, 0x87db8813, 0x489259d2, 0x25051238, 0x5ddce740, 0xb5bc4d11, 0x0c775db1},
|
||||
{0x293f8481, 0xd52cc17a, 0x6f133205, 0x041178fb, 0xb2961832, 0xbbc70d18, 0x481760cd, 0x073d34d1},
|
||||
{0xfdacff58, 0x8215b91d, 0x98331645, 0xd8d9177d, 0x439e803c, 0xe85223ad, 0xcca42c1f, 0x04aa8ef0},
|
||||
{0x01ab3a4d, 0x006f60fa, 0x814ba450, 0xe6600e15, 0xdf9eb147, 0xbde4df36, 0x33760d7b, 0x055d58fa},
|
||||
{0xec2a895e, 0x476ef4a4, 0x63e3f04a, 0x9b506ee3, 0xd1a8a12f, 0x60c69477, 0x0cb92cc1, 0x11d4b7f6}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e},
|
||||
{0x00000000, 0x7af74000, 0x1fffffff, 0x8a442f99, 0xc529c400, 0x3cc739d6, 0x9a2ca556, 0x12ab655e},
|
||||
{0xd60fb046, 0xc9fa190c, 0xc5b4674e, 0xdb5c179b, 0xbc7b8726, 0x2b2bce0b, 0xbf6e69bf, 0x0e4eb338},
|
||||
{0x8ffc4ed5, 0x74732d1f, 0xb7f2eefc, 0x42d9f590, 0xa24dd4dd, 0xf70461e5, 0xef64676f, 0x03b6eba4},
|
||||
{0x102bbab0, 0x5a21f98a, 0x8d8e2efb, 0xa6a147a9, 0x7612906f, 0x0eb4f005, 0x47d8d2e3, 0x0e1a5481},
|
||||
{0xd01e5aa8, 0x6e509add, 0x6e3f123d, 0xe1582468, 0x8274db24, 0xbd6313ee, 0xd173a634, 0x05d5836e},
|
||||
{0xe975c0cf, 0x6aab3344, 0x6f1dc38e, 0xca362e0e, 0x1dd1743a, 0x2fe72cda, 0xc1b4c4c2, 0x0c1c956e},
|
||||
{0xec89a64f, 0x59fe97a0, 0xe8de5d4c, 0x579617d7, 0xc9c1ea7b, 0x256a305b, 0x53fa131b, 0x01ffae4e},
|
||||
{0x29bcb088, 0x463a73ff, 0xe1438e80, 0xee9e9a5e, 0x3c9369e4, 0x2a00951f, 0x80a32052, 0x09711183},
|
||||
{0x4bec8dd2, 0xa36899db, 0x96393687, 0x2946872e, 0x842df3c8, 0xd4b5734f, 0x5f5cd8fb, 0x0834098f},
|
||||
{0xe3c711b9, 0x4bc485f6, 0x648d1d7e, 0xf43a2598, 0xee88abaa, 0x7f981a0e, 0xec6a3f27, 0x0c88c9c3},
|
||||
{0x49046b52, 0x42bcc6c2, 0x56ab9ecc, 0xcc77294a, 0xe4df3ddd, 0x02ecb41a, 0x67f76726, 0x0e567d22},
|
||||
{0x91c64fc2, 0x1cc56cc3, 0xd16a490b, 0x8cb71e65, 0x14fac366, 0x984be37e, 0xa25d7ba5, 0x0a08e032},
|
||||
{0xd4f5941e, 0x966d9739, 0xe5772a73, 0x5805deb6, 0x5c1f970c, 0xe4eb0d33, 0xbdf35409, 0x039715db},
|
||||
{0xcc6518ac, 0x8419686c, 0x9c7a2366, 0x96dec3a8, 0x71724384, 0xefbfcac6, 0xaf34c239, 0x0c44b99a},
|
||||
{0xc18ff4fd, 0xcb66fe1b, 0x86c8d586, 0x588e18b3, 0x1dfab57c, 0xc6e6d2a3, 0x7d7d4efd, 0x10918ad2},
|
||||
{0x97a18f58, 0x56d6cf22, 0xd0d7abd9, 0x11710758, 0x5eb7a9c5, 0xd1a6608b, 0xc4937e38, 0x04059bdb},
|
||||
{0x4b1b63a9, 0x12998cbc, 0xcf420c9f, 0x0f780c6c, 0x129289ad, 0xa5e48723, 0x240a141d, 0x0a3a1223},
|
||||
{0x00db2b48, 0xa43c0e02, 0x933d10ee, 0x76585489, 0xc0ba6a80, 0x12d64af1, 0x2fad8d8e, 0x01940f43},
|
||||
{0x1d75bec9, 0xe29ef6c0, 0xd4b0183b, 0xead287a2, 0xedfd3795, 0x75a017cf, 0x64427c8e, 0x107f8d0f},
|
||||
{0xa26c8c12, 0xa6f4e1d1, 0xf6610f7e, 0x13571553, 0x56701caf, 0xd95e5df6, 0x2263d69d, 0x050e7b89},
|
||||
{0xc161761f, 0x271d7caf, 0xc369a371, 0xf1001d6f, 0x00e60f51, 0x65286415, 0xb74d14b8, 0x00b918f9},
|
||||
{0x03ad3139, 0x01d3f431, 0xa137ce16, 0xe56f6002, 0x1deb42e8, 0x97f53369, 0xaa37cddd, 0x033fa9ac},
|
||||
{0x60cf1330, 0x840f913b, 0x1df5ed87, 0x5610cde6, 0x72b36ddf, 0x858381b0, 0x6f64e0b7, 0x109bf66c},
|
||||
{0x930cee0b, 0x432d3626, 0xf26e8ba3, 0x55ed3efb, 0x14c5457f, 0x802eebcc, 0xe2310f22, 0x00d300e3},
|
||||
{0x4b9ac952, 0x3d29f5ba, 0xc8ea8f94, 0x7c7f2662, 0xcefc3052, 0x736ccb63, 0x0981f3cb, 0x04bfce2f},
|
||||
{0x5d4e643c, 0x3da791ea, 0x85bff013, 0xb6a956ef, 0xd73de6a3, 0x86c629a8, 0x6b8c48a9, 0x0a5a5f55},
|
||||
{0x49c6284a, 0x9ba6aa00, 0xeacbdc63, 0x0b8429fb, 0xedafdf37, 0x9b9c6c5b, 0xad0c78c6, 0x009907e8},
|
||||
{0x3e47b53f, 0x50380ce2, 0x3a9613fc, 0x6ea3c2d3, 0x4c87ab50, 0xfe743105, 0xd192221c, 0x07871979},
|
||||
{0xe978594b, 0x4ddd3320, 0x3abe3f79, 0xe5f36fbe, 0xe4dcff8e, 0x5dba9ef2, 0x7105148f, 0x0bfc27e2},
|
||||
{0x498fb549, 0xd5993cd5, 0x09da9272, 0x718adcee, 0x72bd5bc0, 0x9e03cbb4, 0xc592813f, 0x07206942},
|
||||
{0x78fd3239, 0xaf29730b, 0x40c3e723, 0xbd907ac9, 0x77f214f7, 0x5dcc0aad, 0xb05fb3a1, 0x02d958da},
|
||||
{0xdf80223d, 0x55f432c9, 0x11a2fed9, 0x23daf2f6, 0x41ae8c34, 0x9e43e003, 0x95f22373, 0x0d51533b},
|
||||
{0x7998b62c, 0xbb53132b, 0x22c9b4aa, 0x064a9186, 0x71d61334, 0xd56de253, 0x04e416f6, 0x10fcf25f},
|
||||
{0xdddb58ec, 0x41f8042f, 0x10886d85, 0x7dd54384, 0x622ff4b4, 0x19544f90, 0x050cc539, 0x02f0b49a},
|
||||
{0xa39b02a3, 0x8a3de898, 0xdc94422c, 0x068b2992, 0xf493db31, 0x1c5f019a, 0x11b0f668, 0x066b1790},
|
||||
{0x78500f1a, 0x98310dd7, 0x735ccb27, 0x1c6050bf, 0xb2081df4, 0x07b6fa7f, 0xfa0f1e20, 0x003edf24},
|
||||
{0x89b0ca6f, 0xb4d938e2, 0x2c897570, 0x0214eb59, 0x2d4cf27a, 0x56c45327, 0x3ed546a4, 0x10a2f358},
|
||||
{0xef01ed78, 0xf2828212, 0xf103c9ca, 0xa66094ac, 0x7a2d5573, 0xdceb481d, 0x8af46aab, 0x0190fcde},
|
||||
{0x526bf9fc, 0x023031cc, 0x79c209ba, 0x0e4136c0, 0x3ec42e5c, 0xe5234df1, 0x1d455234, 0x00cb9592},
|
||||
{0x33bf2a1c, 0x842b0c9c, 0xa29b9236, 0x1fd43c95, 0xc06795d3, 0x6b37a603, 0x0c1b712a, 0x00017b17},
|
||||
{0xaf858193, 0x2b955be2, 0x5fb5e378, 0xa513d8be, 0xa326aeb9, 0x88c4ebeb, 0xf3d45990, 0x00c378e2},
|
||||
{0x6464580f, 0x33e6c8c0, 0x3c4aa09f, 0x9d560eb3, 0xcc98f404, 0xb3f1a899, 0x8ca24b48, 0x012c1ea5},
|
||||
{0xe3b4dc56, 0xa0594a67, 0x91b698e1, 0xc8e6b582, 0x8df78057, 0x711cadbf, 0x396466f8, 0x0049abdf},
|
||||
{0x4ffa086a, 0xecc89610, 0xca06afc6, 0x4db82291, 0x8f3a6426, 0x9ae7c68c, 0x2a874432, 0x0b3dae8c},
|
||||
{0x3b3625b6, 0x1e62401f, 0x28471e5a, 0xd0692164, 0x5cad6b77, 0xb85aa9ec, 0xaa95acf2, 0x063e4b66},
|
||||
{0xb9112c51, 0x2542c2b2, 0x6e23b3ce, 0x36ead8da, 0x76476754, 0x9a268d13, 0xa1ad7cf1, 0x121f44ad}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0x00000001, 0x8508c000, 0x68000000, 0xacd53b7f, 0x2e1bd800, 0x305a268f, 0x4d1652ab, 0x0955b2af},
|
||||
{0x00000001, 0xc78d2000, 0x1c000000, 0x033fd93f, 0xc529c401, 0xc88739d6, 0xf3a17c00, 0x0e008c06},
|
||||
{0x00000001, 0xe8cf5000, 0xf6000000, 0x2e75281e, 0x90b0ba01, 0x949dc37a, 0xc6e710ab, 0x1055f8b2},
|
||||
{0x00000001, 0xf9706800, 0xe3000000, 0x440fcf8e, 0x76743501, 0xfaa9084c, 0xb089db00, 0x1180af08},
|
||||
{0x00000001, 0x01c0f400, 0xd9800001, 0x4edd2346, 0x6955f281, 0xadaeaab5, 0xa55b402b, 0x12160a33},
|
||||
{0x00000001, 0x05e93a00, 0xd4c00001, 0x5443cd22, 0xe2c6d141, 0x07317be9, 0x1fc3f2c1, 0x1260b7c9},
|
||||
{0x00000001, 0x07fd5d00, 0xd2600001, 0x56f72210, 0x1f7f40a1, 0xb3f2e484, 0xdcf84c0b, 0x12860e93},
|
||||
{0x00000001, 0x09076e80, 0xd1300001, 0x5850cc87, 0x3ddb7851, 0x0a5398d1, 0x3b9278b1, 0x1298b9f9},
|
||||
{0x00000001, 0x098c7740, 0x50980001, 0x58fda1c3, 0xcd099429, 0xb583f2f7, 0xeadf8f03, 0x12a20fab},
|
||||
{0x00000001, 0x09cefba0, 0x104c0001, 0x59540c61, 0x14a0a215, 0x0b1c200b, 0x42861a2d, 0x12a6ba85},
|
||||
{0x00000001, 0x09f03dd0, 0xf0260001, 0x597f41af, 0xb86c290b, 0xb5e83694, 0xee595fc1, 0x12a90ff1},
|
||||
{0x00000001, 0x0a00dee8, 0x60130001, 0x5994dc57, 0x8a51ec86, 0x0b4e41d9, 0x4443028c, 0x12aa3aa8},
|
||||
{0x00000001, 0x0a092f74, 0x18098001, 0xd99fa9ab, 0xf344ce43, 0x3601477b, 0x6f37d3f1, 0x12aad003},
|
||||
{0x00000001, 0x0a0d57ba, 0xf404c001, 0x99a51054, 0x27be3f22, 0xcb5aca4d, 0x04b23ca3, 0x12ab1ab1},
|
||||
{0x00000001, 0x0a0f6bdd, 0xe2026001, 0xf9a7c3a9, 0xc1faf791, 0x16078bb5, 0xcf6f70fd, 0x12ab4007},
|
||||
{0x80000001, 0x0a1075ee, 0x59013001, 0xa9a91d54, 0x0f1953c9, 0xbb5dec6a, 0x34ce0b29, 0x12ab52b3},
|
||||
{0x40000001, 0x0a10faf7, 0x94809801, 0x81a9ca29, 0x35a881e5, 0x0e091cc4, 0xe77d5840, 0x12ab5c08},
|
||||
{0xa0000001, 0x0a113d7b, 0x32404c01, 0x6daa2094, 0x48f018f3, 0x375eb4f1, 0xc0d4fecb, 0x12ab60b3},
|
||||
{0xd0000001, 0x0a115ebd, 0x81202601, 0x63aa4bc9, 0xd293e47a, 0xcc098107, 0x2d80d210, 0x12ab6309},
|
||||
{0xe8000001, 0x0a116f5e, 0x28901301, 0xdeaa6164, 0x1765ca3d, 0x965ee713, 0xe3d6bbb3, 0x12ab6433},
|
||||
{0x74000001, 0x0a1177af, 0x7c480981, 0x9c2a6c31, 0xb9cebd1f, 0xfb899a18, 0x3f01b084, 0x12ab64c9},
|
||||
{0xba000001, 0x0a117bd7, 0x262404c1, 0x7aea7198, 0x8b033690, 0xae1ef39b, 0xec972aed, 0x12ab6513},
|
||||
{0xdd000001, 0x0a117deb, 0x7b120261, 0xea4a744b, 0xf39d7348, 0x0769a05c, 0x4361e822, 0x12ab6539},
|
||||
{0xee800001, 0x0a117ef5, 0x25890131, 0x21fa75a5, 0xa7ea91a5, 0x340ef6bd, 0xeec746bc, 0x12ab654b},
|
||||
{0xf7400001, 0x0a117f7a, 0xfac48099, 0x3dd27651, 0x021120d3, 0x4a61a1ee, 0x4479f609, 0x12ab6555},
|
||||
{0x7ba00001, 0x0a117fbd, 0x6562404d, 0x4bbe76a8, 0x2f24686a, 0xd58af786, 0xef534daf, 0x12ab6559},
|
||||
{0xbdd00001, 0x0a117fde, 0x9ab12027, 0xd2b476d3, 0x45ae0c35, 0x1b1fa252, 0x44bff983, 0x12ab655c},
|
||||
{0x5ee80001, 0x0a117fef, 0x35589014, 0x962f76e9, 0x50f2de1b, 0xbde9f7b8, 0x6f764f6c, 0x12ab655d},
|
||||
{0xaf740001, 0x8a117ff7, 0x02ac480a, 0x77ecf6f4, 0x5695470e, 0x8f4f226b, 0x04d17a61, 0x12ab655e},
|
||||
{0xd7ba0001, 0xca117ffb, 0x69562405, 0xe8cbb6f9, 0xd9667b87, 0xf801b7c4, 0x4f7f0fdb, 0x12ab655e},
|
||||
{0xebdd0001, 0x6a117ffd, 0x1cab1203, 0xa13b16fc, 0x9acf15c4, 0x2c5b0271, 0x74d5da99, 0x12ab655e},
|
||||
{0xf5ee8001, 0x3a117ffe, 0x76558902, 0xfd72c6fd, 0xfb8362e2, 0xc687a7c7, 0x87813ff7, 0x12ab655e},
|
||||
{0x7af74001, 0xa2117fff, 0x232ac481, 0x2b8e9efe, 0x2bdd8972, 0x139dfa73, 0x90d6f2a7, 0x12ab655e},
|
||||
{0xbd7ba001, 0x56117fff, 0x79956241, 0xc29c8afe, 0xc40a9cb9, 0xba2923c8, 0x9581cbfe, 0x12ab655e},
|
||||
{0xdebdd001, 0x30117fff, 0xa4cab121, 0x8e2380fe, 0x9021265d, 0x8d6eb873, 0x97d738aa, 0x12ab655e},
|
||||
{0xef5ee801, 0x1d117fff, 0xba655891, 0x73e6fbfe, 0xf62c6b2f, 0x771182c8, 0x9901ef00, 0x12ab655e},
|
||||
{0xf7af7401, 0x13917fff, 0xc532ac49, 0x66c8b97e, 0xa9320d98, 0x6be2e7f3, 0x99974a2b, 0x12ab655e},
|
||||
{0xfbd7ba01, 0x0ed17fff, 0xca995625, 0xe039983e, 0x02b4decc, 0xe64b9a89, 0x99e1f7c0, 0x12ab655e},
|
||||
{0xfdebdd01, 0x0c717fff, 0xcd4cab13, 0x1cf2079e, 0xaf764767, 0xa37ff3d3, 0x9a074e8b, 0x12ab655e},
|
||||
{0xfef5ee81, 0x0b417fff, 0xcea6558a, 0x3b4e3f4e, 0x05d6fbb4, 0x021a2079, 0x9a19f9f1, 0x12ab655e},
|
||||
{0xff7af741, 0x8aa97fff, 0xcf532ac5, 0xca7c5b26, 0xb10755da, 0xb16736cb, 0x9a234fa3, 0x12ab655e},
|
||||
{0xffbd7ba1, 0x4a5d7fff, 0xcfa99563, 0x12136912, 0x069f82ee, 0x090dc1f5, 0x9a27fa7d, 0x12ab655e},
|
||||
{0xffdebdd1, 0x2a377fff, 0xcfd4cab2, 0xb5def008, 0xb16b9977, 0xb4e10789, 0x9a2a4fe9, 0x12ab655e},
|
||||
{0xffef5ee9, 0x9a247fff, 0xcfea6559, 0x87c4b383, 0x06d1a4bc, 0x0acaaa54, 0x9a2b7aa0, 0x12ab655e},
|
||||
{0xfff7af75, 0x521affff, 0x4ff532ad, 0xf0b79541, 0x3184aa5e, 0x35bf7bb9, 0x9a2c0ffb, 0x12ab655e},
|
||||
{0xfffbd7bb, 0x2e163fff, 0x0ffa9957, 0x25310620, 0xc6de2d30, 0xcb39e46b, 0x9a2c5aa8, 0x12ab655e},
|
||||
{0xfffdebde, 0x1c13dfff, 0x6ffd4cac, 0xbf6dbe8f, 0x118aee98, 0x95f718c5, 0x9a2c7fff, 0x12ab655e}}};
|
||||
static constexpr storage<8> rou = {0xec2a895e, 0x476ef4a4, 0x63e3f04a, 0x9b506ee3,
|
||||
0xd1a8a12f, 0x60c69477, 0x0cb92cc1, 0x11d4b7f6};
|
||||
TWIDDLES(modulus, rou)
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -3,54 +3,14 @@
|
||||
#define BLS12_381_BASE_PARAMS_H
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bls12_381 {
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = 12;
|
||||
static constexpr unsigned modulus_bit_count = 381;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe,
|
||||
0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
|
||||
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xffff5556, 0x73fdffff, 0x62a7ffff, 0x3d57fffd,
|
||||
0xed61ec48, 0xce61a541, 0xe70a257e, 0xc8ee9709,
|
||||
0x869759ae, 0x96374f6c, 0x72ffcd34, 0x340223d4};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa,
|
||||
0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13,
|
||||
0x0d2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0x00005555, 0x46010000, 0x4eac0000, 0xe1540001,
|
||||
0x094f09db, 0x98cf2d5f, 0x0c7aed40, 0x9b88b47b,
|
||||
0xbcb45328, 0xb4e45849, 0xc6801965, 0xe5feee15};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
|
||||
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
|
||||
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
|
||||
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
|
||||
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
|
||||
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
|
||||
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
|
||||
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
|
||||
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7,
|
||||
0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0x0002fffd, 0x76090000, 0xc40c0002, 0xebf4000b,
|
||||
0x53c758ba, 0x5f489857, 0x70525745, 0x77ce5853,
|
||||
0xa256ec6d, 0x5c071a97, 0xfa80e493, 0x15f65ec3};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x380b4820, 0xf4d38259, 0xd898fafb, 0x7fe11274,
|
||||
0x14956dc8, 0x343ea979, 0x58a88de9, 0x1797ab14,
|
||||
0x3c4f538b, 0xed5e6427, 0xe8fb0ce9, 0x14fec701};
|
||||
static constexpr storage<12> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0,
|
||||
0xf38512bf, 0x64774b84, 0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
|
||||
PARAMS(modulus)
|
||||
|
||||
// nonresidue to generate the extension field
|
||||
static constexpr uint32_t nonresidue = 1;
|
||||
// true if nonresidue is negative
|
||||
@@ -58,4 +18,4 @@ namespace bls12_381 {
|
||||
};
|
||||
} // namespace bls12_381
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@@ -4,148 +4,17 @@
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "fields/quadratic_extension.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bls12_381 {
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned omegas_count = 32;
|
||||
static constexpr unsigned modulus_bit_count = 255;
|
||||
static constexpr unsigned num_of_reductions = 2;
|
||||
static constexpr storage<8> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402,
|
||||
0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
PARAMS(modulus)
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402,
|
||||
0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0xfffffffe, 0xfffcb7fd, 0xa77b4805,
|
||||
0x1343b00a, 0x6673b010, 0x533afa90, 0xe7db4ea6};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0xfffffffc, 0xfff96ffb, 0x4ef6900b,
|
||||
0x26876015, 0xcce76020, 0xa675f520, 0xcfb69d4c};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0xffffffff, 0x00000000, 0x0001a401, 0xac425bfd,
|
||||
0xf65e27fa, 0xccc627f7, 0xd66282b7, 0x8c1258ac};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
|
||||
0x4aa9c661, 0xc2611f6f, 0x59934a1d, 0x0e9593f9, 0xef2cc20f, 0x520c13db, 0xf4bc2778, 0x347f60f3};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
|
||||
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x00000004, 0xfffffff8, 0xfff2dffb, 0x9dfa401f, 0x736ec016, 0xa62b8008, 0x50cfdee1, 0x22a90579,
|
||||
0x2aa71985, 0x09847dbd, 0x664d2877, 0x3a564fe5, 0xbcb3083c, 0x48304f6f, 0xd2f09de1, 0xd1fd83cf};
|
||||
|
||||
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad,
|
||||
0xc1f823b4, 0x0e2d772d, 0x7fb78ddf, 0x8d54253b};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xfffffffe, 0x00000001, 0x00034802, 0x5884b7fa,
|
||||
0xecbc4ff5, 0x998c4fef, 0xacc5056f, 0x1824b159};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0xfe75c040, 0x13f75b69, 0x09dc705f, 0xab6fca8f,
|
||||
0x4f77266a, 0x7204078a, 0x30009d57, 0x1bbe8693};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753},
|
||||
{0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000},
|
||||
{0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d},
|
||||
{0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e},
|
||||
{0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb},
|
||||
{0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac},
|
||||
{0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802},
|
||||
{0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59},
|
||||
{0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667},
|
||||
{0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098},
|
||||
{0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b},
|
||||
{0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0},
|
||||
{0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8},
|
||||
{0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8},
|
||||
{0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911},
|
||||
{0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd},
|
||||
{0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333},
|
||||
{0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db},
|
||||
{0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83},
|
||||
{0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f},
|
||||
{0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5},
|
||||
{0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3},
|
||||
{0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd},
|
||||
{0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc},
|
||||
{0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd},
|
||||
{0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580},
|
||||
{0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d},
|
||||
{0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d},
|
||||
{0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f},
|
||||
{0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b},
|
||||
{0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72},
|
||||
{0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753},
|
||||
{0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753},
|
||||
{0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e},
|
||||
{0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4},
|
||||
{0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c},
|
||||
{0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee},
|
||||
{0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d},
|
||||
{0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25},
|
||||
{0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e},
|
||||
{0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508},
|
||||
{0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d},
|
||||
{0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63},
|
||||
{0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7},
|
||||
{0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1},
|
||||
{0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac},
|
||||
{0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003},
|
||||
{0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c},
|
||||
{0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7},
|
||||
{0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950},
|
||||
{0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2},
|
||||
{0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6},
|
||||
{0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5},
|
||||
{0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9},
|
||||
{0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960},
|
||||
{0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6},
|
||||
{0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf},
|
||||
{0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f},
|
||||
{0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533},
|
||||
{0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff},
|
||||
{0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287},
|
||||
{0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6},
|
||||
{0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9},
|
||||
{0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e},
|
||||
{0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268},
|
||||
{0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd},
|
||||
{0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18},
|
||||
{0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5},
|
||||
{0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04},
|
||||
{0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab},
|
||||
{0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f},
|
||||
{0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9},
|
||||
{0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e},
|
||||
{0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878},
|
||||
{0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5},
|
||||
{0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c},
|
||||
{0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77},
|
||||
{0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365},
|
||||
{0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c},
|
||||
{0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57},
|
||||
{0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5},
|
||||
{0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014},
|
||||
{0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3},
|
||||
{0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583},
|
||||
{0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b},
|
||||
{0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df},
|
||||
{0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719},
|
||||
{0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736},
|
||||
{0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744},
|
||||
{0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b},
|
||||
{0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f},
|
||||
{0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751},
|
||||
{0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752},
|
||||
{0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752}}};
|
||||
static constexpr storage<8> rou = {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2,
|
||||
0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e};
|
||||
TWIDDLES(modulus, rou)
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -3,42 +3,14 @@
|
||||
#define BN254_BASE_PARAMS_H
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bn254 {
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned modulus_bit_count = 254;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<limbs_count> modulus = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91,
|
||||
0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xb0f9fa8e, 0x7841182d, 0xd0e3951a, 0x2f02d522,
|
||||
0x0302b0bb, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x61f3f51c, 0xf082305b, 0xa1c72a34, 0x5e05aa45,
|
||||
0x06056176, 0xe14116da, 0x84c680a6, 0xc19139cb};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0x278302b9, 0xc3df73e9, 0x978e3572, 0x687e956e,
|
||||
0x7e7ea7a2, 0x47afba49, 0x1ece5fd6, 0xcf9bb18d};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x275d69b1, 0x3b5458a2, 0x09eac101, 0xa602072d, 0x6d96cadc, 0x4a50189c, 0x7a1242c8, 0x04689e95,
|
||||
0x34c6b38d, 0x26edfa5c, 0x16375606, 0xb00b8551, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x4ebad362, 0x76a8b144, 0x13d58202, 0x4c040e5a, 0xdb2d95b9, 0x94a03138, 0xf4248590, 0x08d13d2a,
|
||||
0x698d671a, 0x4ddbf4b8, 0x2c6eac0c, 0x60170aa2, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x9d75a6c4, 0xed516288, 0x27ab0404, 0x98081cb4, 0xb65b2b72, 0x29406271, 0xe8490b21, 0x11a27a55,
|
||||
0xd31ace34, 0x9bb7e970, 0x58dd5818, 0xc02e1544, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
|
||||
static constexpr storage<limbs_count> m = {0x19bf90e5, 0x6f3aed8a, 0x67cd4c08, 0xae965e17,
|
||||
0x68073013, 0xab074a58, 0x623a04a7, 0x54a47462};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xc58f0d9d, 0xd35d438d, 0xf5c70b3d, 0x0a78eb28,
|
||||
0x7879462c, 0x666ea36f, 0x9a07df2f, 0x0e0a77c1};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x014afa37, 0xed84884a, 0x0278edf8, 0xeb202285,
|
||||
0xb74492d9, 0xcf63e9cf, 0x59e5c639, 0x2e671571};
|
||||
static constexpr storage<8> modulus = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91,
|
||||
0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
PARAMS(modulus)
|
||||
|
||||
// nonresidue to generate the extension field
|
||||
static constexpr uint32_t nonresidue = 1;
|
||||
// true if nonresidue is negative
|
||||
|
||||
@@ -4,136 +4,17 @@
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "fields/quadratic_extension.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bn254 {
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned omegas_count = 28;
|
||||
static constexpr unsigned modulus_bit_count = 254;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<8> modulus = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848,
|
||||
0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
PARAMS(modulus)
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848,
|
||||
0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xe0000002, 0x87c3eb27, 0xf372e122, 0x5067d090,
|
||||
0x0302b0ba, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xc0000004, 0x0f87d64f, 0xe6e5c245, 0xa0cfa121,
|
||||
0x06056174, 0xe14116da, 0x84c680a6, 0xc19139cb};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0x0fffffff, 0xbc1e0a6c, 0x86468f6e, 0xd7cc17b7,
|
||||
0x7e7ea7a2, 0x47afba49, 0x1ece5fd6, 0xcf9bb18d};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0xe0000001, 0x08c3eb27, 0xdcb34000, 0xc7f26223, 0x68c9bb7f, 0xffe9a62c, 0xe821ddb0, 0xa6ce1975,
|
||||
0x47b62fe7, 0x2c77527b, 0xd379d3df, 0x85f73bb0, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0xc0000002, 0x1187d64f, 0xb9668000, 0x8fe4c447, 0xd19376ff, 0xffd34c58, 0xd043bb61, 0x4d9c32eb,
|
||||
0x8f6c5fcf, 0x58eea4f6, 0xa6f3a7be, 0x0bee7761, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x80000004, 0x230fac9f, 0x72cd0000, 0x1fc9888f, 0xa326edff, 0xffa698b1, 0xa08776c3, 0x9b3865d7,
|
||||
0x1ed8bf9e, 0xb1dd49ed, 0x4de74f7c, 0x17dceec3, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
|
||||
|
||||
static constexpr storage<limbs_count> m = {0xbe1de925, 0x620703a6, 0x09e880ae, 0x71448520,
|
||||
0x68073014, 0xab074a58, 0x623a04a7, 0x54a47462};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0x4ffffffb, 0xac96341c, 0x9f60cd29, 0x36fc7695,
|
||||
0x7879462e, 0x666ea36f, 0x9a07df2f, 0x0e0a77c1};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x6db1194e, 0xdc5ba005, 0xe111ec87, 0x090ef5a9,
|
||||
0xaeb85d5d, 0xc8260de4, 0x82c5551c, 0x15ebf951};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72},
|
||||
{0x8f703636, 0x23120470, 0xfd736bec, 0x5cea24f6, 0x3fd84104, 0x048b6e19, 0xe131a029, 0x30644e72},
|
||||
{0xc1bd5e80, 0x948dad4a, 0xf8170a0a, 0x52627366, 0x96afef36, 0xec9b9e2f, 0xc8c14f22, 0x2b337de1},
|
||||
{0xe306460b, 0xb11509c6, 0x174efb98, 0x996dfbe1, 0x94dd508c, 0x1c6e4f45, 0x16cbbf4e, 0x21082ca2},
|
||||
{0x3bb512d0, 0x3eed4c53, 0x838eeb1d, 0x9c18d51b, 0x47c0b2a9, 0x9678200d, 0x306b93d2, 0x09c532c6},
|
||||
{0x118f023a, 0xdb94fb05, 0x26e324be, 0x46a6cb24, 0x49bdadf2, 0xc24cdb76, 0x5b080fca, 0x1418144d},
|
||||
{0xba9d1811, 0x9d0e470c, 0xb6f24c79, 0x1dcb5564, 0xe85943e0, 0xdf5ce19c, 0xad310991, 0x16e73dfd},
|
||||
{0x74a57a76, 0xc8936191, 0x6750f230, 0x61794254, 0x9f36ffb0, 0xf086204a, 0xa6148404, 0x07b0c561},
|
||||
{0x470157ce, 0x893a7fa1, 0xfc782d75, 0xe8302a41, 0xdd9b0675, 0xffc02c0e, 0xf6e72f5b, 0x0f1ded1e},
|
||||
{0xbc2e5912, 0x11f995e1, 0xa8d2d7ab, 0x39ba79c0, 0xb08771e3, 0xebbebc2b, 0x7017a420, 0x06fd19c1},
|
||||
{0x769a2ee2, 0xd00a58f9, 0x7494f0ca, 0xb8c12c17, 0xa5355d71, 0xb4027fd7, 0x99c5042b, 0x027a3584},
|
||||
{0x0042d43a, 0x1c477572, 0x6f039bb9, 0x76f169c7, 0xfd5a90a9, 0x01ddd073, 0xde2fd10f, 0x0931d596},
|
||||
{0x9bbdd310, 0x4aa49b8d, 0x8e3a2d76, 0xd31bf3e2, 0x78b2667b, 0x001deac8, 0xb869ae62, 0x006fab49},
|
||||
{0x617c6e85, 0xadaa01c2, 0x7420aae6, 0xb4a93ee1, 0x0ddca8a8, 0x1f4e51b8, 0xcdd9e481, 0x2d965651},
|
||||
{0x4e26ecfb, 0xa93458fd, 0x4115a009, 0x022a2a2d, 0x69ec2bd0, 0x017171fa, 0x5941dc91, 0x2d1ba66f},
|
||||
{0xdaac43b7, 0xd1628ba2, 0xe4347e7d, 0x16c8601d, 0xe081dcff, 0x649abebd, 0x5981ed45, 0x00eeb2cb},
|
||||
{0xce8f58e5, 0x276e5858, 0x5655210e, 0x0512eca9, 0xe70e61f3, 0xc3708cc6, 0xa7d74902, 0x1bf82deb},
|
||||
{0x7dcdc0e0, 0x84c6bfa5, 0x13f4d1bd, 0xc57088ff, 0xb5b95e4d, 0x5c0176fb, 0x3a8d46c1, 0x19ddbcaf},
|
||||
{0x613f6cbd, 0x5c1d597f, 0x8357473a, 0x30525841, 0x968e4915, 0x51829353, 0x844bca52, 0x2260e724},
|
||||
{0x53337857, 0x53422da9, 0xdbed349f, 0xac616632, 0x06d1e303, 0x27508aba, 0x0a0ed063, 0x26125da1},
|
||||
{0xfcd0b523, 0xb2c87885, 0xca5a5ce3, 0x58f50577, 0x8598fc8c, 0x4222150e, 0xae2bdd1a, 0x1ded8980},
|
||||
{0xa219447e, 0xa76dde56, 0x359eebbb, 0xec1a1f05, 0x8be08215, 0xcda0ceb6, 0xb1f8d9a7, 0x1ad92f46},
|
||||
{0xab80c59d, 0xb54d4506, 0x22dd991f, 0x5680c640, 0xbc23a139, 0x6b7bcf70, 0x5ab4c74d, 0x0210fe63},
|
||||
{0xe32b045b, 0x1c25f1e3, 0x2e832696, 0x145e0db8, 0x71c6441f, 0x852e2a03, 0x845d50d2, 0x0c9fabc7},
|
||||
{0xb878331a, 0xeccd4f3e, 0x8dc6d26e, 0x7b26b748, 0xd9130cd4, 0xa19b0361, 0x326341ef, 0x2a734ebb},
|
||||
{0x2f4e9212, 0x1c79bd57, 0x3d68f9ae, 0x605b52b6, 0xb8d89d4a, 0x0113eff9, 0xf1ff73b2, 0x1067569a},
|
||||
{0x80928c44, 0x034afc45, 0xf6437da2, 0xb4823532, 0x6dc6e364, 0x5f256a9f, 0xb363ebe8, 0x049ae702},
|
||||
{0x725b19f0, 0x9bd61b6e, 0x41112ed4, 0x402d111e, 0x8ef62abc, 0x00e0a7eb, 0xa58a7e85, 0x2a3c09f0}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72},
|
||||
{0x608fc9cb, 0x20cff123, 0x7c4604a5, 0xcb49c351, 0x41a91758, 0xb3c4d79d, 0x00000000, 0x00000000},
|
||||
{0x07b95a9b, 0x8b11d9ab, 0x41671f56, 0x20710ead, 0x30f81dee, 0xfb3acaee, 0x9778465c, 0x130b1711},
|
||||
{0x373428de, 0xb85a71e6, 0xaeb0337e, 0x74954d30, 0x303402b7, 0x2bfc85eb, 0x409556c0, 0x02e40daf},
|
||||
{0xf210979d, 0x8c99980c, 0x34905b4d, 0xef8f3113, 0xdf25d8e7, 0x0aeaf3e7, 0x03bfbd79, 0x27247136},
|
||||
{0x763d698f, 0x78ce6a0b, 0x1d3213ee, 0xd80396ec, 0x67a8a676, 0x035cdc75, 0xb2a13d3a, 0x26177cf2},
|
||||
{0xc64427d7, 0xdddf985f, 0xa49e95bd, 0xaa4f964a, 0x5def8b04, 0x427c045f, 0x7969b732, 0x1641c053},
|
||||
{0x0329f5d6, 0x692c553d, 0x8712848a, 0xa54cf8c6, 0x38e2b5e6, 0x64751ad9, 0x7422fad3, 0x204bd327},
|
||||
{0xaf6b3e4e, 0x52f26c0f, 0xf0bcc0c8, 0x4c277a07, 0xe4fcfcab, 0x546875d5, 0xaa9995b3, 0x09d8f821},
|
||||
{0xb2e5cc71, 0xcaa2e1e9, 0x6e43404e, 0xed42b68e, 0x7a2c7f0a, 0x6ed80915, 0xde3c86d6, 0x1c4042c7},
|
||||
{0x579d71ae, 0x20a3a65d, 0x0adc4420, 0xfd7efed8, 0xfddabf54, 0x3bb6dcd7, 0xbc73d07b, 0x0fa9bb21},
|
||||
{0xc79e0e57, 0xb6f70f8d, 0xa04e05ac, 0x269d3fde, 0x2ba088d9, 0xcf2e371c, 0x11b88d9c, 0x1af864d2},
|
||||
{0xabd95dc9, 0x3b0b205a, 0x978188ca, 0xc8df74fa, 0x6a1cb6c8, 0x08e124db, 0xbfac6104, 0x1670ed58},
|
||||
{0x641c8410, 0xf8eee934, 0x677771c0, 0xf40976b0, 0x558e6e8c, 0x11680d42, 0x06e7e9e9, 0x281c036f},
|
||||
{0xb2dbc0b4, 0xc92a742f, 0x4d384e68, 0xc3f02842, 0x2fa43d0d, 0x22701b6f, 0xe4590b37, 0x05d33766},
|
||||
{0x02d842d4, 0x922d5ac8, 0xc830e4c6, 0x91126414, 0x082f37e0, 0xe92338c0, 0x7fe704e8, 0x0b5d56b7},
|
||||
{0xd96f0d22, 0x20e75251, 0x6bd4e8c9, 0xc01c7f08, 0xf9dd50c4, 0x37d8b00b, 0xc43ca872, 0x244cf010},
|
||||
{0x66c5174c, 0x7a823174, 0x22d5ad70, 0x7dbe118c, 0x111119c5, 0xf8d7c71d, 0x83780e87, 0x036853f0},
|
||||
{0xca535321, 0xd98f9924, 0xe66e6c81, 0x22dbc0ef, 0x664ae1b7, 0xa15cf806, 0xa314fb67, 0x06e402c0},
|
||||
{0xe26c91f3, 0x0852a8fd, 0x3baca626, 0x521f45cb, 0x2c51bfca, 0xab6473bc, 0x2100895f, 0x100c332d},
|
||||
{0xa376d0f0, 0xf5fac783, 0x940797d3, 0x50fd246e, 0x145f5278, 0xab14ecc1, 0x41091b14, 0x19c6dfb8},
|
||||
{0x7faa1396, 0x43dc52e2, 0x4beced23, 0xd437be9d, 0x6d3c38c3, 0xecc11e9c, 0x0c74a876, 0x2eb58439},
|
||||
{0xd69ca83b, 0x811b03e7, 0xa1a6eadf, 0x126a786b, 0x4e2b8e61, 0x1dd75c9f, 0xbda6792b, 0x2165a1a5},
|
||||
{0x110b737b, 0x02e1d4d1, 0xb323a164, 0x7be1488d, 0x9cd06163, 0xa334d317, 0xdb50e9cd, 0x2710c370},
|
||||
{0x9550fe47, 0x45d2f3cb, 0xf6a8efc4, 0x5f43327b, 0xe993ee18, 0x5bcd0d50, 0xb21de952, 0x27f035bd},
|
||||
{0x232e3983, 0x1d63cbae, 0xaa1b58e2, 0xac815161, 0x6aeb019e, 0x531f42a5, 0x03ca2ef5, 0x2dcd51d9},
|
||||
{0x980db869, 0xa8b64ba8, 0xc9718f6c, 0x4c787f72, 0x15d27ced, 0x7746a25a, 0x435a46e9, 0x110bf78f},
|
||||
{0x9d18157e, 0x72394277, 0xfd399d5d, 0xec9d51f8, 0x49d5387f, 0x6117635d, 0x9c229cd5, 0x01b77519}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0xf8000001, 0xa1f0fac9, 0x3cdcb848, 0x9419f424, 0x40c0ac2e, 0xdc2822db, 0x7098d014, 0x18322739},
|
||||
{0xf4000001, 0xf2e9782e, 0x5b4b146c, 0xde26ee36, 0xe1210245, 0x4a3c3448, 0x28e5381f, 0x244b3ad6},
|
||||
{0x72000001, 0x1b65b6e1, 0x6a82427f, 0x832d6b3f, 0xb1512d51, 0x81463cff, 0x850b6c24, 0x2a57c4a4},
|
||||
{0xb1000001, 0x2fa3d63a, 0xf21dd988, 0x55b0a9c3, 0x196942d7, 0x1ccb415b, 0xb31e8627, 0x2d5e098b},
|
||||
{0x50800001, 0xb9c2e5e7, 0x35eba50c, 0x3ef24906, 0xcd754d9a, 0x6a8dc388, 0x4a281328, 0x2ee12bff},
|
||||
{0xa0400001, 0xfed26dbd, 0x57d28ace, 0xb39318a7, 0xa77b52fb, 0x116f049f, 0x15acd9a9, 0x2fa2bd39},
|
||||
{0xc8200001, 0x215a31a8, 0xe8c5fdb0, 0x6de38077, 0x147e55ac, 0x64dfa52b, 0xfb6f3ce9, 0x300385d5},
|
||||
{0x5c100001, 0xb29e139e, 0x313fb720, 0xcb0bb460, 0xcaffd704, 0x8e97f570, 0x6e506e89, 0x3033ea24},
|
||||
{0x26080001, 0xfb400499, 0x557c93d8, 0xf99fce54, 0xa64097b0, 0xa3741d93, 0xa7c10759, 0x304c1c4b},
|
||||
{0x8b040001, 0x1f90fd16, 0x679b0235, 0x10e9db4e, 0x13e0f807, 0xade231a5, 0x447953c1, 0x3058355f},
|
||||
{0x3d820001, 0x31b97955, 0x70aa3963, 0x1c8ee1cb, 0xcab12832, 0xb3193bad, 0x12d579f5, 0x305e41e9},
|
||||
{0x96c10001, 0x3acdb774, 0xf531d4fa, 0xa2616509, 0x26194047, 0xb5b4c0b2, 0xfa038d0f, 0x3061482d},
|
||||
{0x43608001, 0xbf57d684, 0x3775a2c5, 0x654aa6a9, 0x53cd4c52, 0xb7028334, 0x6d9a969c, 0x3062cb50},
|
||||
{0x19b04001, 0x819ce60c, 0xd89789ab, 0xc6bf4778, 0x6aa75257, 0x37a96475, 0xa7661b63, 0x30638ce1},
|
||||
{0x04d82001, 0x62bf6dd0, 0xa9287d1e, 0x777997e0, 0xf614555a, 0x77fcd515, 0x444bddc6, 0x3063edaa},
|
||||
{0xfa6c1001, 0xd350b1b1, 0x9170f6d7, 0xcfd6c014, 0x3bcad6db, 0x18268d66, 0x92bebef8, 0x30641e0e},
|
||||
{0xf5360801, 0x8b9953a2, 0x859533b4, 0x7c05542e, 0x5ea6179c, 0xe83b698e, 0xb9f82f90, 0x30643640},
|
||||
{0x729b0401, 0xe7bda49b, 0x7fa75222, 0xd21c9e3b, 0x7013b7fc, 0x5045d7a2, 0xcd94e7dd, 0x30644259},
|
||||
{0xb14d8201, 0x15cfcd17, 0xfcb0615a, 0xfd284341, 0x78ca882c, 0x844b0eac, 0x57634403, 0x30644866},
|
||||
{0xd0a6c101, 0xacd8e155, 0x3b34e8f5, 0x12ae15c5, 0x7d25f045, 0x9e4daa31, 0x9c4a7216, 0x30644b6c},
|
||||
{0xe0536081, 0x785d6b74, 0xda772cc3, 0x1d70ff06, 0xff53a451, 0x2b4ef7f3, 0xbebe0920, 0x30644cef},
|
||||
{0x6829b041, 0x5e1fb084, 0xaa184eaa, 0x22d273a7, 0x406a7e57, 0xf1cf9ed5, 0x4ff7d4a4, 0x30644db1},
|
||||
{0x2c14d821, 0xd100d30c, 0x11e8df9d, 0x25832df8, 0xe0f5eb5a, 0x550ff245, 0x1894ba67, 0x30644e12},
|
||||
{0x0e0a6c11, 0x8a716450, 0x45d12817, 0xa6db8b20, 0x313ba1db, 0x86b01bfe, 0x7ce32d48, 0x30644e42},
|
||||
{0xff053609, 0x6729acf1, 0x5fc54c54, 0x6787b9b4, 0x595e7d1c, 0x1f8030da, 0xaf0a66b9, 0x30644e5a},
|
||||
{0xf7829b05, 0xd585d142, 0x6cbf5e72, 0xc7ddd0fe, 0x6d6feabc, 0x6be83b48, 0xc81e0371, 0x30644e66},
|
||||
{0x73c14d83, 0x0cb3e36b, 0x733c6782, 0xf808dca3, 0x7778a18c, 0x921c407f, 0xd4a7d1cd, 0x30644e6c},
|
||||
{0xb1e0a6c2, 0xa84aec7f, 0xf67aec09, 0x101e6275, 0xfc7cfcf5, 0xa536431a, 0xdaecb8fb, 0x30644e6f}}};
|
||||
static constexpr storage<8> rou = {0x725b19f0, 0x9bd61b6e, 0x41112ed4, 0x402d111e,
|
||||
0x8ef62abc, 0x00e0a7eb, 0xa58a7e85, 0x2a3c09f0};
|
||||
TWIDDLES(modulus, rou)
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -3,76 +3,15 @@
|
||||
#define BW6_761_BASE_BASE_H
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bw6_761 {
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = 24;
|
||||
static constexpr unsigned modulus_bit_count = 761;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<limbs_count> modulus = {
|
||||
0x0000008b, 0xf49d0000, 0x70000082, 0xe6913e68, 0xeaf0a437, 0x160cf8ae, 0x5667a8f8, 0x98a116c2,
|
||||
0x73ebff2e, 0x71dcd3dc, 0x12f9fd90, 0x8689c8ed, 0x25b42304, 0x03cebaff, 0xe584e919, 0x707ba638,
|
||||
0x8087be41, 0x528275ef, 0x81d14688, 0xb926186a, 0x04faff3e, 0xd187c940, 0xfb83ce0a, 0x0122e824};
|
||||
static constexpr storage<limbs_count> modulus_2 = {
|
||||
0x00000116, 0xe93a0000, 0xe0000105, 0xcd227cd0, 0xd5e1486f, 0x2c19f15d, 0xaccf51f0, 0x31422d84,
|
||||
0xe7d7fe5d, 0xe3b9a7b8, 0x25f3fb20, 0x0d1391da, 0x4b684609, 0x079d75fe, 0xcb09d232, 0xe0f74c71,
|
||||
0x010f7c82, 0xa504ebdf, 0x03a28d10, 0x724c30d5, 0x09f5fe7d, 0xa30f9280, 0xf7079c15, 0x0245d049};
|
||||
static constexpr storage<limbs_count> modulus_4 = {
|
||||
0x0000022c, 0xd2740000, 0xc000020b, 0x9a44f9a1, 0xabc290df, 0x5833e2bb, 0x599ea3e0, 0x62845b09,
|
||||
0xcfaffcba, 0xc7734f71, 0x4be7f641, 0x1a2723b4, 0x96d08c12, 0x0f3aebfc, 0x9613a464, 0xc1ee98e3,
|
||||
0x021ef905, 0x4a09d7be, 0x07451a21, 0xe49861aa, 0x13ebfcfa, 0x461f2500, 0xee0f382b, 0x048ba093};
|
||||
static constexpr storage<limbs_count> neg_modulus = {
|
||||
0xffffff75, 0x0b62ffff, 0x8fffff7d, 0x196ec197, 0x150f5bc8, 0xe9f30751, 0xa9985707, 0x675ee93d,
|
||||
0x8c1400d1, 0x8e232c23, 0xed06026f, 0x79763712, 0xda4bdcfb, 0xfc314500, 0x1a7b16e6, 0x8f8459c7,
|
||||
0x7f7841be, 0xad7d8a10, 0x7e2eb977, 0x46d9e795, 0xfb0500c1, 0x2e7836bf, 0x047c31f5, 0xfedd17db};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0x0000008b, 0xf49d0000, 0x70000082, 0xe6913e68, 0xeaf0a437, 0x160cf8ae, 0x5667a8f8, 0x98a116c2,
|
||||
0x73ebff2e, 0x71dcd3dc, 0x12f9fd90, 0x8689c8ed, 0x25b42304, 0x03cebaff, 0xe584e919, 0x707ba638,
|
||||
0x8087be41, 0x528275ef, 0x81d14688, 0xb926186a, 0x04faff3e, 0xd187c940, 0xfb83ce0a, 0x0122e824,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x00004b79, 0xa27e0000, 0xa0008e35, 0xbae96db2, 0x82ebf7b1, 0x4aaf1d22, 0x7224cb3d, 0x7908fd92,
|
||||
0x29b17ed1, 0x6fe68290, 0xafc968db, 0xfe1b7282, 0x9028bbf0, 0xe1e548cb, 0x3a8ffc03, 0x09094ed6,
|
||||
0x61e9cf95, 0xd63ea631, 0x54918abf, 0xe834ca62, 0x52aa651e, 0xe52594ed, 0xb4c46a4f, 0xe2423252,
|
||||
0x6c09aae4, 0xa8cf17d8, 0xc5f5cee5, 0x2d80ffb0, 0x55bbc10d, 0x2dede100, 0xe2360382, 0x1f4e7a7c,
|
||||
0xae2fe433, 0x586c3847, 0x78eadae1, 0x915c56e1, 0x69a5ce00, 0xa35b2945, 0x767c08ca, 0x9d66e7fe,
|
||||
0xd8b88c77, 0x7e44cf6a, 0x67c9c873, 0xb29bfc93, 0xbbc80af9, 0x6a24005a, 0xc64ce3d5, 0x00014a92};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x000096f2, 0x44fc0000, 0x40011c6b, 0x75d2db65, 0x05d7ef63, 0x955e3a45, 0xe449967a, 0xf211fb24,
|
||||
0x5362fda2, 0xdfcd0520, 0x5f92d1b6, 0xfc36e505, 0x205177e1, 0xc3ca9197, 0x751ff807, 0x12129dac,
|
||||
0xc3d39f2a, 0xac7d4c62, 0xa923157f, 0xd06994c4, 0xa554ca3d, 0xca4b29da, 0x6988d49f, 0xc48464a5,
|
||||
0xd81355c9, 0x519e2fb0, 0x8beb9dcb, 0x5b01ff61, 0xab77821a, 0x5bdbc200, 0xc46c0704, 0x3e9cf4f9,
|
||||
0x5c5fc866, 0xb0d8708f, 0xf1d5b5c2, 0x22b8adc2, 0xd34b9c01, 0x46b6528a, 0xecf81195, 0x3acdcffc,
|
||||
0xb17118ef, 0xfc899ed5, 0xcf9390e6, 0x6537f926, 0x779015f3, 0xd44800b5, 0x8c99c7aa, 0x00029525};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x00012de4, 0x89f80000, 0x800238d6, 0xeba5b6ca, 0x0bafdec6, 0x2abc748a, 0xc8932cf5, 0xe423f649,
|
||||
0xa6c5fb45, 0xbf9a0a40, 0xbf25a36d, 0xf86dca0a, 0x40a2efc3, 0x8795232e, 0xea3ff00f, 0x24253b58,
|
||||
0x87a73e54, 0x58fa98c5, 0x52462aff, 0xa0d32989, 0x4aa9947b, 0x949653b5, 0xd311a93f, 0x8908c94a,
|
||||
0xb026ab93, 0xa33c5f61, 0x17d73b96, 0xb603fec3, 0x56ef0434, 0xb7b78401, 0x88d80e08, 0x7d39e9f3,
|
||||
0xb8bf90cc, 0x61b0e11e, 0xe3ab6b85, 0x45715b85, 0xa6973802, 0x8d6ca515, 0xd9f0232a, 0x759b9ff9,
|
||||
0x62e231de, 0xf9133dab, 0x9f2721cd, 0xca6ff24d, 0xef202be6, 0xa890016a, 0x19338f55, 0x00052a4b};
|
||||
static constexpr storage<limbs_count> m = {0x2507e899, 0x11629ccd, 0x2e4424dd, 0xab1eef5b, 0x481d2cfa, 0xb82146a9,
|
||||
0x34e4227b, 0xf3182afa, 0xbeb25621, 0xf615fdb5, 0xccc261d6, 0xc4d8988c,
|
||||
0xaaf4fab0, 0x3590d652, 0x2ab9ff30, 0x9c5d0a04, 0x6ec3f460, 0xf6e8534f,
|
||||
0x88075ab4, 0xe8d78b06, 0x6f3fc8fe, 0xa8d3675b, 0x7bc5cd4b, 0x03852086};
|
||||
static constexpr storage<limbs_count> one = {
|
||||
0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {
|
||||
0xffff85d5, 0x0202ffff, 0x8fff8ce7, 0x5a582635, 0x827faade, 0x9e996e43, 0x0ee47df4, 0xda6aff32,
|
||||
0x1d94b80b, 0xece9cb3e, 0x5248240b, 0xc0e667a2, 0xdcad3905, 0xa74da5bf, 0x462f2103, 0x2352e7fe,
|
||||
0x08b1c87c, 0x7b565880, 0xe711022f, 0x45848a63, 0x9f65a9df, 0xd7a81ebb, 0xf127e87d, 0x0051f77e};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {
|
||||
0x181fa3f1, 0x27c2b2a0, 0x25a0e1b8, 0x7d9ca9f9, 0x0a004a5d, 0x35a910f0, 0xdb6b8539, 0x54655b3f,
|
||||
0x7695ef18, 0x5e763565, 0x4fae56bb, 0x226022c2, 0xb70d7652, 0x80e7f067, 0x72116b89, 0x435a8b4a,
|
||||
0x5d84e0d4, 0xac258fd6, 0x4427c7b2, 0x47ee8ac5, 0xd04e621b, 0x478c4048, 0x2add3e93, 0x00e0aa7d};
|
||||
static constexpr storage<24> modulus = {0x0000008b, 0xf49d0000, 0x70000082, 0xe6913e68, 0xeaf0a437, 0x160cf8ae,
|
||||
0x5667a8f8, 0x98a116c2, 0x73ebff2e, 0x71dcd3dc, 0x12f9fd90, 0x8689c8ed,
|
||||
0x25b42304, 0x03cebaff, 0xe584e919, 0x707ba638, 0x8087be41, 0x528275ef,
|
||||
0x81d14688, 0xb926186a, 0x04faff3e, 0xd187c940, 0xfb83ce0a, 0x0122e824};
|
||||
PARAMS(modulus)
|
||||
};
|
||||
} // namespace bw6_761
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "fields/quadratic_extension.cuh"
|
||||
#include "fields/snark_fields/bls12_377_base.cuh"
|
||||
|
||||
namespace bw6_761 {
|
||||
|
||||
@@ -3,46 +3,15 @@
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "fields/quartic_extension.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace babybear {
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 1;
|
||||
static constexpr unsigned omegas_count = 28;
|
||||
static constexpr unsigned modulus_bit_count = 31;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<1> modulus = {0x78000001};
|
||||
PARAMS(modulus)
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0x78000001};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xf0000002};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000000};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0x87ffffff};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {0x78000001, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {0xf0000001, 0x38400000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {0xe0000002, 0x70800001};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {0xc0000004, 0xe1000003};
|
||||
|
||||
static constexpr storage<limbs_count> m = {0x88888887};
|
||||
static constexpr storage<limbs_count> one = {0x00000001};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xffffffe};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x38400000};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0x78000000}, {0x10faa3e0}, {0x6b615c47}, {0x21ceed5a}, {0x2c1c3348}, {0x36c54c86}, {0x701dd01c},
|
||||
{0x56a9a28e}, {0x03e4cabf}, {0x5bacde79}, {0x1eb53838}, {0x1cd781af}, {0x0961a0b7}, {0x65098a87},
|
||||
{0x77851a0b}, {0x5bcba331}, {0x053fc0f5}, {0x5bf816e5}, {0x4bb124ab}, {0x571e9d4e}, {0x313732cb},
|
||||
{0x28aca172}, {0x4e319b52}, {0x45692d95}, {0x14ff4ba1}, {0x00004951}, {0x00000089}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0x78000000}, {0x67055c21}, {0x5ee99486}, {0x0bb4c4e4}, {0x4ab33b27}, {0x044b4497}, {0x410e23aa},
|
||||
{0x08a7ee2b}, {0x563cb93d}, {0x3d70b4b7}, {0x77d999f1}, {0x6ceb65b5}, {0x49e7f635}, {0x0eae3a8c},
|
||||
{0x238b8a78}, {0x70d71b0a}, {0x0eaacc45}, {0x5af0f193}, {0x47303308}, {0x573cbfad}, {0x29ff72c0},
|
||||
{0x05af9dac}, {0x00ef24df}, {0x26985530}, {0x22d1ce4b}, {0x08359375}, {0x2cabe994}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0x3c000001}, {0x5a000001}, {0x69000001}, {0x70800001}, {0x74400001}, {0x76200001}, {0x77100001},
|
||||
{0x77880001}, {0x77c40001}, {0x77e20001}, {0x77f10001}, {0x77f88001}, {0x77fc4001}, {0x77fe2001},
|
||||
{0x77ff1001}, {0x77ff8801}, {0x77ffc401}, {0x77ffe201}, {0x77fff101}, {0x77fff881}, {0x77fffc41},
|
||||
{0x77fffe21}, {0x77ffff11}, {0x77ffff89}, {0x77ffffc5}, {0x77ffffe3}, {0x77fffff2}}};
|
||||
static constexpr storage<1> rou = {0x00000089};
|
||||
TWIDDLES(modulus, rou)
|
||||
|
||||
// nonresidue to generate the extension field
|
||||
static constexpr uint32_t nonresidue = 11;
|
||||
@@ -58,5 +27,5 @@ namespace babybear {
|
||||
/**
|
||||
* Extension field of `scalar_t` enabled if `-DEXT_FIELD` env variable is.
|
||||
*/
|
||||
typedef ExtensionField<fp_config> extension_t;
|
||||
typedef ExtensionField<fp_config, scalar_t> extension_t;
|
||||
} // namespace babybear
|
||||
|
||||
224
icicle/include/fields/stark_fields/m31.cuh
Normal file
224
icicle/include/fields/stark_fields/m31.cuh
Normal file
@@ -0,0 +1,224 @@
|
||||
#pragma once
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "fields/quartic_extension.cuh"
|
||||
|
||||
namespace m31 {
|
||||
template <class CONFIG>
|
||||
class MersenneField : public Field<CONFIG>
|
||||
{
|
||||
public:
|
||||
HOST_DEVICE_INLINE MersenneField(const MersenneField& other) : Field<CONFIG>(other) {}
|
||||
HOST_DEVICE_INLINE MersenneField(const uint32_t& x = 0) : Field<CONFIG>({x}) {}
|
||||
HOST_DEVICE_INLINE MersenneField(storage<CONFIG::limbs_count> x) : Field<CONFIG>{x} {}
|
||||
HOST_DEVICE_INLINE MersenneField(const Field<CONFIG>& other) : Field<CONFIG>(other) {}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField zero() { return MersenneField(CONFIG::zero); }
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField one() { return MersenneField(CONFIG::one.limbs[0]); }
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField from(uint32_t value) { return MersenneField(value); }
|
||||
|
||||
static HOST_INLINE MersenneField rand_host() { return MersenneField(Field<CONFIG>::rand_host()); }
|
||||
|
||||
static void rand_host_many(MersenneField* out, int size)
|
||||
{
|
||||
for (int i = 0; i < size; i++)
|
||||
out[i] = rand_host();
|
||||
}
|
||||
|
||||
HOST_DEVICE_INLINE MersenneField& operator=(const Field<CONFIG>& other)
|
||||
{
|
||||
if (this != &other) { Field<CONFIG>::operator=(other); }
|
||||
return *this;
|
||||
}
|
||||
|
||||
HOST_DEVICE_INLINE uint32_t get_limb() const { return this->limbs_storage.limbs[0]; }
|
||||
|
||||
// The `Wide` struct represents a redundant 32-bit form of the Mersenne Field.
|
||||
struct Wide {
|
||||
uint32_t storage;
|
||||
static constexpr HOST_DEVICE_INLINE Wide from_field(const MersenneField& xs)
|
||||
{
|
||||
Wide out{};
|
||||
out.storage = xs.get_limb();
|
||||
return out;
|
||||
}
|
||||
static constexpr HOST_DEVICE_INLINE Wide from_number(const uint32_t& xs)
|
||||
{
|
||||
Wide out{};
|
||||
out.storage = xs;
|
||||
return out;
|
||||
}
|
||||
friend HOST_DEVICE_INLINE Wide operator+(Wide xs, const Wide& ys)
|
||||
{
|
||||
uint64_t tmp = (uint64_t)xs.storage + ys.storage; // max: 2^33 - 2 = 2^32(1) + (2^32 - 2)
|
||||
tmp = ((tmp >> 32) << 1) + (uint32_t)(tmp); // 2(1)+(2^32-2) = 2^32(1)+(0)
|
||||
return from_number((uint32_t)((tmp >> 32) << 1) + (uint32_t)(tmp)); // max: 2(1) + 0 = 2
|
||||
}
|
||||
friend HOST_DEVICE_INLINE Wide operator-(Wide xs, const Wide& ys)
|
||||
{
|
||||
uint64_t tmp = CONFIG::modulus_3 + xs.storage -
|
||||
ys.storage; // max: 3(2^31-1) + 2^32-1 - 0 = 2^33 + 2^31-4 = 2^32(2) + (2^31-4)
|
||||
return from_number(((uint32_t)(tmp >> 32) << 1) + (uint32_t)(tmp)); // max: 2(2)+(2^31-4) = 2^31
|
||||
}
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE Wide neg(const Wide& xs)
|
||||
{
|
||||
uint64_t tmp = CONFIG::modulus_3 - xs.storage; // max: 3(2^31-1) - 0 = 2^32(1) + (2^31 - 3)
|
||||
return from_number(((uint32_t)(tmp >> 32) << 1) + (uint32_t)(tmp)); // max: 2(1)+(2^31-3) = 2^31 - 1
|
||||
}
|
||||
friend HOST_DEVICE_INLINE Wide operator*(Wide xs, const Wide& ys)
|
||||
{
|
||||
uint64_t t1 = (uint64_t)xs.storage * ys.storage; // max: 2^64 - 2^33+1 = 2^32(2^32 - 2) + 1
|
||||
t1 = ((t1 >> 32) << 1) + (uint32_t)(t1); // max: 2(2^32 - 2) + 1 = 2^32(1) + (2^32 - 3)
|
||||
return from_number((((uint32_t)(t1 >> 32)) << 1) + (uint32_t)(t1)); // max: 2(1) - (2^32 - 3) = 2^32 - 1
|
||||
}
|
||||
};
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField div2(const MersenneField& xs, const uint32_t& power = 1)
|
||||
{
|
||||
uint32_t t = xs.get_limb();
|
||||
return MersenneField{{((t >> power) | (t << (31 - power))) & MersenneField::get_modulus().limbs[0]}};
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField neg(const MersenneField& xs)
|
||||
{
|
||||
uint32_t t = xs.get_limb();
|
||||
return MersenneField{{t == 0 ? t : MersenneField::get_modulus().limbs[0] - t}};
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField reduce(Wide xs)
|
||||
{
|
||||
const uint32_t modulus = MersenneField::get_modulus().limbs[0];
|
||||
uint32_t tmp = (xs.storage >> 31) + (xs.storage & modulus); // max: 1 + 2^31-1 = 2^31
|
||||
tmp = (xs.storage >> 31) + (xs.storage & modulus); // max: 1 + 0 = 1
|
||||
return MersenneField{{tmp == modulus ? 0 : tmp}};
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField inverse(const MersenneField& x)
|
||||
{
|
||||
uint32_t xs = x.limbs_storage.limbs[0];
|
||||
if (xs <= 1) return xs;
|
||||
uint32_t a = 1, b = 0, y = xs, z = MersenneField::get_modulus().limbs[0], e, m = z;
|
||||
while (1) {
|
||||
#ifdef __CUDA_ARCH__
|
||||
e = __ffs(y) - 1;
|
||||
#else
|
||||
e = __builtin_ctz(y);
|
||||
#endif
|
||||
y >>= e;
|
||||
if (a >= m) {
|
||||
a = (a & m) + (a >> 31);
|
||||
if (a == m) a = 0;
|
||||
}
|
||||
a = ((a >> e) | (a << (31 - e))) & m;
|
||||
if (y == 1) return a;
|
||||
e = a + b;
|
||||
b = a;
|
||||
a = e;
|
||||
e = y + z;
|
||||
z = y;
|
||||
y = e;
|
||||
}
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE MersenneField operator+(MersenneField xs, const MersenneField& ys)
|
||||
{
|
||||
uint32_t m = MersenneField::get_modulus().limbs[0];
|
||||
uint32_t t = xs.get_limb() + ys.get_limb();
|
||||
if (t > m) t = (t & m) + (t >> 31);
|
||||
if (t == m) t = 0;
|
||||
return MersenneField{{t}};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE MersenneField operator-(MersenneField xs, const MersenneField& ys)
|
||||
{
|
||||
return xs + neg(ys);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE MersenneField operator*(MersenneField xs, const MersenneField& ys)
|
||||
{
|
||||
uint64_t x = (uint64_t)(xs.get_limb()) * ys.get_limb();
|
||||
uint32_t t = ((x >> 31) + (x & MersenneField::get_modulus().limbs[0]));
|
||||
uint32_t m = MersenneField::get_modulus().limbs[0];
|
||||
if (t > m) t = (t & m) + (t >> 31);
|
||||
if (t > m) t = (t & m) + (t >> 31);
|
||||
if (t == m) t = 0;
|
||||
return MersenneField{{t}};
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE Wide mul_wide(const MersenneField& xs, const MersenneField& ys)
|
||||
{
|
||||
return Wide::from_field(xs) * Wide::from_field(ys);
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE Wide sqr_wide(const MersenneField& xs)
|
||||
{
|
||||
return mul_wide(xs, xs);
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField sqr(const MersenneField& xs) { return xs * xs; }
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField to_montgomery(const MersenneField& xs) { return xs; }
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField from_montgomery(const MersenneField& xs) { return xs; }
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField pow(MersenneField base, int exp)
|
||||
{
|
||||
MersenneField res = one();
|
||||
while (exp > 0) {
|
||||
if (exp & 1) res = res * base;
|
||||
base = base * base;
|
||||
exp >>= 1;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
};
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 1;
|
||||
static constexpr unsigned omegas_count = 1;
|
||||
static constexpr unsigned modulus_bit_count = 31;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0x7fffffff};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xfffffffe};
|
||||
static constexpr uint64_t modulus_3 = 0x17ffffffd;
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xfffffffc};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0x87ffffff};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {0x7fffffff, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {0x00000001, 0x3fffffff};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {0x00000002, 0x7ffffffe};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {0x00000004, 0xfffffffc};
|
||||
|
||||
static constexpr storage<limbs_count> m = {0x80000001};
|
||||
static constexpr storage<limbs_count> one = {0x00000001};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0x00000001};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x00000001};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {{{0x7ffffffe}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {{{0x7ffffffe}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {{{0x40000000}}};
|
||||
|
||||
// nonresidue to generate the extension field
|
||||
static constexpr uint32_t nonresidue = 11;
|
||||
// true if nonresidue is negative.
|
||||
static constexpr bool nonresidue_is_negative = false;
|
||||
};
|
||||
|
||||
/**
|
||||
* Scalar field. Is always a prime field.
|
||||
*/
|
||||
typedef MersenneField<fp_config> scalar_t;
|
||||
|
||||
/**
|
||||
* Extension field of `scalar_t` enabled if `-DEXT_FIELD` env variable is.
|
||||
*/
|
||||
typedef ExtensionField<fp_config, scalar_t> extension_t;
|
||||
} // namespace m31
|
||||
@@ -2,626 +2,18 @@
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
// modulus = 3618502788666131213697322783095070105623107215331596699973092056135872020481 (2^251+17*2^192+1)
|
||||
namespace stark252 {
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned modulus_bit_count = 252;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr unsigned omegas_count = 192;
|
||||
static constexpr storage<8> modulus = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000011, 0x08000000};
|
||||
PARAMS(modulus)
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000011, 0x08000000};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000022, 0x10000000};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000044, 0x20000000};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffee, 0xf7ffffff};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000011, 0x08000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000022, 0x10000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000121, 0x10000000, 0x00000001, 0x00400000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000044, 0x20000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000242, 0x20000000, 0x00000002, 0x00800000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000088, 0x40000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000484, 0x40000000, 0x00000004, 0x01000000};
|
||||
static constexpr storage<limbs_count> m = {0x8c81fffb, 0x00000002, 0xfeccf000, 0xffffffff,
|
||||
0x0000907f, 0x00000000, 0xffffffbc, 0x1fffffff};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xffffffe1, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xfffffdf0, 0x07ffffff};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000121, 0x10000000, 0x00000001, 0x00400000};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000011, 0x08000000},
|
||||
{0xf41337e3, 0x2a616626, 0xac8320da, 0xc5268e56, 0x4329f8c7, 0x53312066, 0x29a2995b, 0x06250239},
|
||||
{0xee6feebb, 0x3ada5e1d, 0xe4412e87, 0x98c62155, 0x2f9c676e, 0xc90adb1e, 0x0de874d9, 0x063365fe},
|
||||
{0x6021e539, 0x8337c45f, 0xbbf30245, 0xb0bdf467, 0x514425f3, 0x4537602d, 0x88826aba, 0x05ec467b},
|
||||
{0x9b48a8ab, 0x2225638f, 0x1a8e7981, 0x26da375d, 0xce6246af, 0xfcdca219, 0x9ecd5c85, 0x0789ad45},
|
||||
{0xb2703765, 0xd6871506, 0xf9e225ec, 0xd09bd064, 0x10826800, 0x5e869a07, 0xe82b2bb5, 0x0128f0fe},
|
||||
{0xdd4af20f, 0xfdab65db, 0x56f9ddbc, 0xefa66822, 0x1b03a097, 0x587781ce, 0x9556f9b8, 0x000fcad1},
|
||||
{0xff0cb347, 0x9f1bc8d7, 0xd0e87cd5, 0xc4d78992, 0xdd51a717, 0xbc7924d5, 0xfd121b58, 0x00c92ecb},
|
||||
{0xc13a1d0b, 0xcc4074a0, 0xe3bc8e32, 0xa1f811a9, 0x6d4b9bd4, 0x0234b46e, 0x7880b4dc, 0x011d07d9},
|
||||
{0xec89c4f1, 0xa206c054, 0xdc125289, 0x653d9e35, 0x711825f5, 0x72406af6, 0x46a03edd, 0x0659d839},
|
||||
{0x0fa30710, 0x45391692, 0x11b54c6c, 0xd439f572, 0xa3492c1e, 0xed5ebbf4, 0xb5d9a6de, 0x010f4d91},
|
||||
{0x7afd187f, 0x9273dbbc, 0x91ee171f, 0xdb5375bc, 0x6749ae3d, 0xc061f425, 0x6ec477cf, 0x003d14df},
|
||||
{0x3112b02d, 0x8171e1da, 0xadf9bf78, 0x5c4564eb, 0x5689b232, 0x68c34184, 0x6538624f, 0x0363d70a},
|
||||
{0x606082e1, 0x3e5a42f0, 0x76fc314a, 0x5edd09f0, 0x0f673d7c, 0xd650df25, 0x34832dba, 0x0393a32b},
|
||||
{0x13a77460, 0xe3efc75d, 0x62ef8a01, 0x93898bc8, 0x8bdbd9b3, 0x1c3a6e5c, 0x611b7206, 0x034b5d5d},
|
||||
{0x309d9da9, 0x80ee9837, 0xf51eddbc, 0x1646d633, 0x4901fab8, 0xb9d2cd85, 0x9978ee09, 0x01eb6d84},
|
||||
{0x2755bfac, 0xa7b1f98c, 0xeb7aa1c1, 0x9ec8116c, 0x3109e611, 0x0eeadedd, 0xc9761a8a, 0x06a6f98d},
|
||||
{0x9745a046, 0xce7b0a8b, 0xe411ee63, 0x7ff61841, 0x635f8799, 0x34f67453, 0xef852560, 0x04768803},
|
||||
{0xbffaa9db, 0x1727fce0, 0xf973dc22, 0x858f5918, 0x223f6558, 0x3e277fa0, 0xf71614e3, 0x02d25658},
|
||||
{0x8574e81f, 0xe3d47b99, 0x7fc4c648, 0xc727c9af, 0xee93dc85, 0x581d81ca, 0xca8a00d9, 0x0594beaf},
|
||||
{0x0e5ffcb8, 0x00654744, 0xe7c1b2fd, 0x030530a6, 0xecbf157b, 0x27e46d76, 0xbeea04f1, 0x01f4c2bf},
|
||||
{0x3e3a2f4b, 0xead33145, 0xd6482f17, 0xd841544d, 0x8d24a344, 0x9822fb10, 0x31eeac7c, 0x03e43835},
|
||||
{0xb40bdbe8, 0x01af11c3, 0xb32a3b23, 0xd7c9c0a1, 0xcd0be360, 0x81cb2e43, 0xafb3df1a, 0x01054544},
|
||||
{0x77156db2, 0xf6b13488, 0xddc0f211, 0x1ad6f3be, 0xd664f4da, 0xe643d3ea, 0x174a8e80, 0x071a47b8},
|
||||
{0x4ca88ffc, 0xb86b03a4, 0x8ef9a25a, 0x6e3398e6, 0xf5fa4665, 0xce9a0d37, 0x5c437763, 0x06e8e769},
|
||||
{0x4586dbc3, 0x32609f1d, 0xaa2da684, 0x03148f22, 0x4795d346, 0xa679e36b, 0x9e51225c, 0x03d8d2c7},
|
||||
{0xea5f81cf, 0xeac5be9e, 0x64c12e72, 0x102e16b2, 0xfee282e4, 0xce0bc0d9, 0xa93b28f3, 0x01f05206},
|
||||
{0xbb6422f9, 0x258e96d2, 0x617c5468, 0x751615d8, 0x6056f032, 0x27145cb6, 0x81c06d84, 0x057a7971},
|
||||
{0xb030713c, 0xf42231bb, 0x3a96c59e, 0xae9c3f9a, 0xf1ee840c, 0x5397e8e2, 0xf2b87657, 0x05e7deca},
|
||||
{0xf81f58b4, 0x209745aa, 0x91af248d, 0x74a64310, 0xc04b00b7, 0xe566a8e1, 0x80fb4cea, 0x022bde40},
|
||||
{0x5de74517, 0x8265b62b, 0xb9b9f2c9, 0x6a788149, 0xa9565d98, 0x6fec2239, 0x573f0c28, 0x060ac0c4},
|
||||
{0xd3ce8992, 0xc129d0f1, 0x81c43de5, 0x719252eb, 0x48221e1a, 0xfea566de, 0x0be8ced2, 0x050732ed},
|
||||
{0x2216f1c8, 0x9aae0db3, 0xd7220015, 0x95e231ac, 0x6340df6f, 0xbd6ae160, 0x16a6e39c, 0x0166c8e2},
|
||||
{0x76b0a92e, 0x3ccd9d2b, 0x7d671a9d, 0x1feb39d7, 0x2109fd56, 0x3c49a630, 0x5d4ec292, 0x07badc4b},
|
||||
{0x5dd8c4c3, 0x081c3166, 0xec14ba21, 0x9dca12d8, 0xcf93b2e5, 0xf58069e2, 0x571ddc34, 0x02399005},
|
||||
{0x08a616fc, 0x65a19cf4, 0x8aea6ff7, 0x860d442c, 0x6896a559, 0x4f24ab19, 0x3d7f5ae6, 0x0685db92},
|
||||
{0x622478c4, 0x051093f0, 0x3fab8962, 0x5c200627, 0x21254c39, 0x2aa7ae1b, 0x7b116fb9, 0x0100fff9},
|
||||
{0x00637050, 0x2693b834, 0x22440235, 0x3fef7c1b, 0x3481c4fe, 0x31150ac1, 0xf261b6de, 0x0772cb7a},
|
||||
{0xd990d491, 0x6966804c, 0xc7505f35, 0x46aba1bc, 0xaceeb7f7, 0x4f696cba, 0x6474b8f0, 0x02b73cad},
|
||||
{0xf39cd3e8, 0x7d13e948, 0x62a1db76, 0xd5c33593, 0x4d1be159, 0x7fd3b59b, 0x3676644e, 0x066d3f61},
|
||||
{0xb3bd8b7e, 0x5a896ef3, 0xba5762ab, 0x2319450a, 0x1a545f8b, 0x226f0a07, 0x55446d35, 0x02760973},
|
||||
{0x140e5623, 0x38eaa186, 0x94be15ba, 0x5a48d469, 0xad75d32a, 0xe4f1f15b, 0x2f14e2f1, 0x039ccdaa},
|
||||
{0xe6fcfdb2, 0xad7108d3, 0x9c9f7f04, 0xfadfc050, 0x9df95366, 0xdbb20071, 0xe555c739, 0x02c4d3fa},
|
||||
{0xc3111bcb, 0xb640956f, 0xbb11fb86, 0xcd942bbd, 0xa3db81cd, 0xa4b4eb09, 0x684fdb65, 0x041ed5ed},
|
||||
{0xdd5ca525, 0x462b41fa, 0x153c3d28, 0xbcc17ccd, 0x6b06db5c, 0x8a81d137, 0x4a050358, 0x05f5cf39},
|
||||
{0xcc60fb85, 0x374012a6, 0x34d1905d, 0x978f9785, 0x4e17ff38, 0x713383d4, 0x1055c25d, 0x07f3796f},
|
||||
{0x0643771f, 0x852ba56e, 0x86781a31, 0xadfa956c, 0xb26a3811, 0x2ee2fccf, 0xdbd56ba7, 0x009214ce},
|
||||
{0x68bc148c, 0xe2bf6c4b, 0x01c203ce, 0xd38dbf38, 0x97923b55, 0x27f73df4, 0x5081f7d9, 0x030a2e81},
|
||||
{0xf11422a0, 0xbe23b78f, 0x99cdc2e0, 0xd4f3510d, 0xaa13ffe5, 0xcb05b3da, 0xc724e0c5, 0x028d98a5},
|
||||
{0x96934000, 0x15277271, 0x588c8a51, 0x8013dd5e, 0x9ed55af8, 0x77772f7c, 0x03549e60, 0x020895f8},
|
||||
{0x34db29f8, 0xc0cc8556, 0x67455b5d, 0x5582a9ff, 0x8a9a38b5, 0x12862a43, 0xa59fd242, 0x059655bc},
|
||||
{0x94ceaf98, 0x39bc5131, 0xc71ccc0d, 0x99f4d1a0, 0x54acb87c, 0xc565794d, 0xc33590ef, 0x0593fcef},
|
||||
{0xe97bf51c, 0xa2922d09, 0x3200d367, 0xdbb866a2, 0x4ad9302d, 0x05849ed8, 0xdf93f2b5, 0x000c447e},
|
||||
{0x850fb317, 0x2755d6c2, 0xd45eb3f5, 0x36feeeea, 0xdfbc1d97, 0x4f4471d7, 0x4e3003f8, 0x07ec8926},
|
||||
{0xb6a791f1, 0x38b8dc2a, 0x27a1bbb1, 0x79d6de48, 0xcad54cf2, 0x78c40b06, 0xa43bc898, 0x036dd150},
|
||||
{0x1cc4133c, 0xefa72477, 0x477d39be, 0x5327d617, 0x2c5db3a4, 0xfd1de1f9, 0xc9a18a1c, 0x0147819b},
|
||||
{0xf8133966, 0x275e6b02, 0x87969b48, 0x82bc79b9, 0x5d1e2f0e, 0x85b1f9bd, 0xc819531b, 0x00f9ea29},
|
||||
{0x120edfab, 0x9e0392a5, 0xe3681a15, 0x07403ad4, 0x8a1c3817, 0xa8d469d8, 0x89f15c6f, 0x0395e7fc},
|
||||
{0x641826ac, 0x7f405a9f, 0x6861e2ce, 0xa566e755, 0xba82a050, 0x8a3a08ba, 0xea63598d, 0x071dd923},
|
||||
{0x5f65c188, 0x1d2b7538, 0xd6fc9625, 0xcb704d0f, 0xf59deccc, 0x18729111, 0x52fe1979, 0x07595020},
|
||||
{0x8a08756f, 0x0175aa1c, 0x7fa7c6c4, 0x9a76a312, 0x6e93f6f3, 0x0bfa523a, 0x258c2f23, 0x03d70de4},
|
||||
{0x8229376d, 0x8a0b9d02, 0x2c65c94e, 0x08421430, 0xd34b0aa6, 0x1160b441, 0xbbfb9491, 0x03b9eb75},
|
||||
{0x827caf53, 0x91874856, 0x37e8a006, 0xdfdcae7a, 0x04e3af6b, 0x6dcfc3f2, 0xba66ff37, 0x0592823d},
|
||||
{0x72fb8b0d, 0xb0a6628d, 0xa72b1f03, 0x7d3eef8b, 0x8dd54dbe, 0x5be965ba, 0x96d1fe4c, 0x0114a278},
|
||||
{0x06051d55, 0x0256d8e6, 0xb9fa9dcc, 0xbf152353, 0x44140d6e, 0x6ef2c68c, 0xc9c0fea6, 0x015f291a},
|
||||
{0xed992efc, 0xa1826724, 0x771da991, 0x9a58fd99, 0xd0b370a1, 0xce51a153, 0x826df846, 0x03c53bf5},
|
||||
{0xcc7bf8c3, 0x3909aad7, 0xb08ddfa2, 0xd408ae7d, 0xff94d9fc, 0x2e9ab5d6, 0xf11cbcf6, 0x0020a1b2},
|
||||
{0x3e257b43, 0x448fff07, 0x5fd9edca, 0x00f4a128, 0x7b429f71, 0x6f8987e3, 0x0fc8b522, 0x013336c1},
|
||||
{0x062bd860, 0xef78ac4c, 0xf5d787d2, 0x6539ee52, 0xbb65576e, 0x113b6071, 0x9f3d7f85, 0x0160e952},
|
||||
{0xf966d24e, 0x0c4e7c07, 0x318277e8, 0x011853d8, 0x7c287f58, 0x93bae650, 0xf64289f7, 0x00b974a1},
|
||||
{0x30408cb9, 0x66d19420, 0x0430b017, 0x709ca6c6, 0x23d95951, 0xb174ad46, 0x111f4192, 0x030762f8},
|
||||
{0xf246c901, 0xb9d70015, 0x57a1cdec, 0xd3616cb1, 0x0d732fdb, 0x61aab25e, 0x12d620d8, 0x0712858b},
|
||||
{0x16334e1a, 0x8ec7e113, 0xa96aeeab, 0x0021a55b, 0xfd639175, 0x8f4c1366, 0x69bc866a, 0x07acdde9},
|
||||
{0x23088fc7, 0x1fb24e5e, 0x92a88089, 0xcacd65df, 0x17343c48, 0x103ec3c8, 0xc387a3b5, 0x03d296b9},
|
||||
{0xcd9fedee, 0xae703c5b, 0x7853b30d, 0xd0c3e0c6, 0x12abaef5, 0xc1e326b3, 0x5d57bb23, 0x04f42d7f},
|
||||
{0x1824b92c, 0x19cd1b4e, 0x81ebc117, 0xc5daaff4, 0xb8183a1d, 0xeeedaa59, 0xe28baf8a, 0x069d8f0c},
|
||||
{0x9dc50729, 0x9733e8df, 0xf1b9f411, 0xd7e0dbb9, 0x50edf7ea, 0x59e4dbd2, 0x4059cb5f, 0x002259fe},
|
||||
{0xb79a92b1, 0x5e3197fc, 0x59086db1, 0xbfddf5c5, 0xdbea4a69, 0x234d8639, 0x4d0a367d, 0x05dd79b0},
|
||||
{0xa86eec0c, 0x8cc1d845, 0x573b44d7, 0x3cac8839, 0x7b0de880, 0x8b8d8735, 0x68c99722, 0x01c5ef12},
|
||||
{0xc2ba0f23, 0x12680395, 0x471f947e, 0xd43bcf85, 0xcc9d9b24, 0x19935b68, 0x108eec6a, 0x06263e1e},
|
||||
{0x5b7be972, 0x29617bad, 0xc55b1b68, 0x0ab73eef, 0x2544381e, 0x07f12359, 0x63a080a0, 0x0161444d},
|
||||
{0x312f9080, 0x07a4b921, 0x2f530413, 0x64c25a07, 0x7d71ca2f, 0x3f6903d7, 0x04838ba1, 0x06917cab},
|
||||
{0x10bdb6cc, 0xec7cfc1f, 0x3bcf85c7, 0x7046910d, 0x7bc3ff5f, 0x7ef09e22, 0x385306d4, 0x004b0b60},
|
||||
{0x3a41158a, 0x82d06d78, 0xaa690d1f, 0x37c4a361, 0x7117c44a, 0x700766e1, 0xab40d7e4, 0x031261d0},
|
||||
{0x91b88258, 0x384c5e8b, 0x009b84dc, 0xd777abd5, 0xe7eed224, 0x02102b55, 0xdbefe5e9, 0x03b22830},
|
||||
{0x8770a4be, 0xec982f60, 0x961f56ad, 0x4b92533d, 0xf428c4b9, 0x7df85fbb, 0x2d9291a4, 0x057e4876},
|
||||
{0xf4910a60, 0x6ace9477, 0x9fc63b7f, 0xdb5a705f, 0x72328369, 0x4cc157b4, 0xc282db6f, 0x05b8acbc},
|
||||
{0x57269216, 0x4c69edd9, 0xbfee24ac, 0xd04f1eeb, 0x2a069b18, 0xacda8418, 0x5990b523, 0x03761a4f},
|
||||
{0xc608d246, 0x7f2e2048, 0x4664959b, 0xd4f52ed2, 0x11c1d565, 0x354e3bf7, 0x457eabd3, 0x0156d837},
|
||||
{0xd455f483, 0xea8cbefd, 0x5d940684, 0x33cd5725, 0x8091a287, 0x2d89a777, 0x939b3ef3, 0x06159e4a},
|
||||
{0x4fa405aa, 0xe43439f1, 0xdbe5763d, 0xa258cfc7, 0x78d7b607, 0x9491173a, 0x9ad23eac, 0x01775d66},
|
||||
{0xd772d637, 0x2413e92c, 0x5eac4588, 0x22c99c9f, 0x71a0cdd2, 0xa2bd1d06, 0xfdd73a36, 0x05e88acb},
|
||||
{0xb2bfa1ad, 0x68886b35, 0x35d2dfb6, 0x7a969b62, 0x9767a44a, 0x359ddb45, 0x52e5da6d, 0x00f1a46e},
|
||||
{0x1c5a4861, 0x4ef9fe94, 0x1c841a89, 0x1540cf67, 0xa9bed4f5, 0x8b51336f, 0xf63c32ab, 0x0240fc41},
|
||||
{0x87086e50, 0x7f5c626d, 0x049c46e2, 0x38ec0386, 0x0c597ea7, 0x30b003fd, 0x6660a912, 0x07a8faa1},
|
||||
{0x7dac5d19, 0x2810d2b4, 0x80339f39, 0x040470c4, 0xc946ab30, 0x30d97769, 0x52667151, 0x019fa1f9},
|
||||
{0x5e7c57a2, 0x00e13c8e, 0x2a0fb7bd, 0x95490ca0, 0x08451e35, 0x6af2b76d, 0xcf78c579, 0x04c3a3a1},
|
||||
{0x55e39071, 0xa848b2f2, 0xf132ce21, 0x6831da1d, 0xe080e2ec, 0x439bdda4, 0xadd19a7d, 0x06680f09},
|
||||
{0x6be27786, 0xfebd2a8b, 0x093a5a7f, 0x2cdd8f78, 0xdcb004b3, 0xbc0746a1, 0xd12450ed, 0x005f950a},
|
||||
{0x39759f39, 0xe1462ca6, 0x7bbe087d, 0x0c37dca2, 0x0c8661cb, 0x198de347, 0x7e531b52, 0x03602655},
|
||||
{0x66d7eb25, 0xaf24ead2, 0x5ee6eb03, 0x27cea560, 0x4f6267c7, 0xe9aa6d50, 0xe5dd28e0, 0x00c962b1},
|
||||
{0xb11706c9, 0x3c3407a5, 0xcf0e1b88, 0x44370686, 0x9fbda5e3, 0x5d0e7af0, 0x41cf0a6b, 0x010d235f},
|
||||
{0x358cfcc2, 0x1fbc42a3, 0xc78f7dac, 0x5a2e6ea2, 0xa12773f2, 0x33e089ca, 0xed7788c1, 0x04bef156},
|
||||
{0xbea42f88, 0xdb150649, 0x5f3fb72a, 0x71329f69, 0x86b82de7, 0x7aa46ad0, 0xc6093912, 0x07913b17},
|
||||
{0xb3b67067, 0xb2b074ae, 0xc55f4455, 0x4f17674d, 0xdeb0740d, 0x9a112816, 0x316cc0d3, 0x06bd0cde},
|
||||
{0x1a264ab3, 0x962ceb6b, 0xd99f7159, 0xd5930255, 0x24a4096e, 0x7db961b0, 0x3e50dfed, 0x050c8e5c},
|
||||
{0x443af109, 0xc3eebe54, 0x86946633, 0x2ca03fcb, 0x04badff6, 0x6e6eef04, 0x82210754, 0x05d92ab7},
|
||||
{0xa5c0dca4, 0xcbadd8ad, 0x5ac103a0, 0x4cf688cf, 0x26e5d435, 0x571dbdb9, 0x220fc7db, 0x074ffc4d},
|
||||
{0x88740c3e, 0x70b80432, 0x03821aa8, 0x4a959d50, 0xe4df06d8, 0x3eb8c3a0, 0xcac57496, 0x025a425b},
|
||||
{0x55205413, 0xdcadfd29, 0x90b17b01, 0xda7456d2, 0x73696a28, 0x437c2fda, 0x329f6855, 0x00a8a188},
|
||||
{0xa828431e, 0x3cde2cdd, 0x9ed29340, 0x60e6c362, 0x7c13e145, 0xef00dfa9, 0xba288c0b, 0x04159bec},
|
||||
{0x9065f8ee, 0x41d351cd, 0xa4845868, 0x4e2e298f, 0xbdb3834a, 0xbcba6ac1, 0xea85f2ec, 0x042c8871},
|
||||
{0x1fda880f, 0xc4dc0d20, 0x26fc2d5c, 0x4f0f9dc4, 0x86839de7, 0x2c555343, 0xf698dd8f, 0x04d12da8},
|
||||
{0x21bd655a, 0x3a6299bd, 0x8cfd772f, 0x2e4aea22, 0xd2c2590d, 0x09716ad9, 0xb298587d, 0x053b143c},
|
||||
{0xa95e3cbf, 0xd35f3e32, 0x04eac3cf, 0xe380dee7, 0x0f7e3e6b, 0x27e6570a, 0xbed46774, 0x008cd288},
|
||||
{0x9583f023, 0xe42676b0, 0x75cfaa7e, 0x39d57dd6, 0x4f0bb727, 0x10d4a8d0, 0x27c81bdd, 0x016b03c9},
|
||||
{0x4decc603, 0x89b394f7, 0xd24690f4, 0xd7322ee9, 0x947a00fd, 0xbbc12961, 0x82e8fa75, 0x00886d23},
|
||||
{0xeb0faad4, 0x7b48a33b, 0x60e0b0c8, 0x4c11ef26, 0x36f0f791, 0x4163a401, 0xa4074faf, 0x07986fea},
|
||||
{0x31d9587e, 0x96044919, 0x9049fd2d, 0xb1cab341, 0x9c0eea09, 0xf28c83c9, 0x5c6620aa, 0x033b74dd},
|
||||
{0x13ee028c, 0xde558d16, 0x5d4233b0, 0x4dcf3932, 0x2e422803, 0x7bd46887, 0xe1261bff, 0x04b4757d},
|
||||
{0xd48e9b00, 0x6c80848f, 0x10b6a121, 0x937c1e6e, 0xe9f2008c, 0x7782f8b8, 0x2bc7171c, 0x00217358},
|
||||
{0x324228d8, 0xba523265, 0x682ee17c, 0x4ebe5506, 0x3be009f9, 0x6c646fe8, 0x8594b924, 0x046de7bc},
|
||||
{0x3b50645a, 0x270aa33a, 0x2a9c6282, 0x28fd23fd, 0xcfe96515, 0x5b2fa771, 0x3f812377, 0x063039de},
|
||||
{0xaba4060a, 0xa1da52b0, 0x0374be67, 0x7f191fd6, 0x0d7d2126, 0x14c64d05, 0xf7f77381, 0x00419cb7},
|
||||
{0xe4b19319, 0x07eda692, 0x0fef654e, 0x6190d3f6, 0x0b21ca7e, 0x893b0916, 0x073c48b4, 0x0367a3c7},
|
||||
{0xc520e3ea, 0x8fd405b2, 0x487e93c9, 0x73b4f714, 0xd5142cff, 0x70b7ee88, 0xa320eca2, 0x058fb800},
|
||||
{0x72ef3623, 0x3b5a8740, 0xaff370fd, 0xbff4af42, 0xe338258e, 0x64c137b0, 0xc7afafca, 0x05ac9917},
|
||||
{0x82ccc89a, 0x99c46a0d, 0x9ff87868, 0x05ae3209, 0xa489481f, 0x6249b2a4, 0xbaead348, 0x0056c235},
|
||||
{0xba0ea95e, 0x5a0640f3, 0xc03af976, 0x518db5cd, 0x5a250a06, 0x1c3223aa, 0xbc3442eb, 0x0397b942},
|
||||
{0xacf14a4f, 0x164f0705, 0x33eb6c0e, 0x386c2325, 0xd7264573, 0xdfaceff6, 0xd1e22f80, 0x00e94509},
|
||||
{0x9ff51bc7, 0x8964ee48, 0x57bbca04, 0x3e0f5037, 0x6510630c, 0xe78d6c8d, 0xdf0a61c1, 0x041d6351},
|
||||
{0x45aa1b58, 0x47892f3b, 0x915c1c70, 0x5a1787ba, 0x67f20d25, 0xbaa23359, 0x0c4bc4be, 0x00e1919f},
|
||||
{0xb9975332, 0x2a87c37a, 0xcdecebc9, 0x95db523f, 0x1d0db226, 0x703949ee, 0x4c3842dd, 0x03152c1d},
|
||||
{0xecfb6f72, 0x0eff7e6a, 0x9493a628, 0xb3a83455, 0xd596cd51, 0xced58dd1, 0x25ee51ff, 0x033dee78},
|
||||
{0x72a30547, 0x1f4047ca, 0xd40b6d0f, 0x9feefa06, 0x94db1b38, 0x836ffd80, 0xa0992ed5, 0x037c79f6},
|
||||
{0xceb3dffd, 0x7ffa095d, 0x768e2cb3, 0x23097a65, 0x373f6222, 0xd228b1f9, 0xc57feea2, 0x06309a6b},
|
||||
{0xecd4c6f7, 0x7a5bead4, 0x7e70f7de, 0xab92043c, 0x220db8d8, 0xf78f890e, 0x2865a07e, 0x052eeb98},
|
||||
{0xdf253531, 0x8e9a6336, 0xbafa937b, 0xb24b664a, 0x303b1f5a, 0xc89f660e, 0x876bd8c7, 0x07ea9749},
|
||||
{0x1d4c3fec, 0xd958e726, 0x06fbef31, 0xa5eb368f, 0xba6a027d, 0x0c911679, 0x5f80f992, 0x06321b51},
|
||||
{0x046b49b2, 0x3ca61d9e, 0x6aa9c29a, 0x616a47d6, 0x9e9462dc, 0x27a7ffeb, 0x8971b70e, 0x0794ed38},
|
||||
{0x9f47496f, 0xdb259a57, 0xa6b0481c, 0x7f3e3f90, 0x4afab47a, 0x76f42726, 0xc5a79505, 0x07b9da96},
|
||||
{0x57e7aeed, 0x908e6450, 0x81648127, 0xe86db2fb, 0x8dd76882, 0x53f3c573, 0x72327da6, 0x02b37324},
|
||||
{0x73a220ec, 0x82a941c9, 0x7f25beea, 0xb4cbecb7, 0xbfb061d6, 0x746ded71, 0x641b3f3d, 0x00f7af27},
|
||||
{0xcbd4ba67, 0x69b8f4df, 0x3d526981, 0x5ee3ac6f, 0x145cef8c, 0x9372af4e, 0x72a31ef1, 0x05cc1cc6},
|
||||
{0x62d1ba57, 0xce898b0d, 0xee3fa47e, 0x86ba0504, 0x4395b70d, 0xc68233b1, 0x80eb8d60, 0x024cfa58},
|
||||
{0x74d51c41, 0x8fa83850, 0x60f8f9da, 0x5824a285, 0xaf1bea48, 0xa7a2067e, 0x5455acc3, 0x04ba49f2},
|
||||
{0x324c6039, 0x0a1e223e, 0x7b18a9d0, 0x28312228, 0x88b6ecda, 0xb60c1f93, 0x687ba365, 0x053097d8},
|
||||
{0xa7dae551, 0x5604b398, 0xe2e11609, 0x51f02e33, 0xe58e2094, 0x0b51a085, 0x3a3ecc28, 0x078679d6},
|
||||
{0x92d52444, 0xe24b5528, 0x33d0fa70, 0xf77e35ad, 0x9bcbfb57, 0x8af5a7b7, 0x022748d2, 0x015c5f15},
|
||||
{0xc993b168, 0xc002185c, 0x293ad856, 0x5586addb, 0x8ec50726, 0x69c1bfcf, 0x5fd97ea1, 0x00d514fc},
|
||||
{0x8866c747, 0x52d7a9a2, 0x01d6ee05, 0x9bd77465, 0xc3a87a88, 0x576adf96, 0xfa69f0ec, 0x0693e89a},
|
||||
{0x05903be3, 0xcfe50d90, 0xcf739179, 0xbe651dd1, 0x2ae70678, 0xba80ffda, 0xb55b06cc, 0x051dbe40},
|
||||
{0x5585a6f0, 0x4adb5947, 0x9fa37e68, 0x14634b99, 0xa2a910a8, 0x27da5fbf, 0xa99c704d, 0x022a91ce},
|
||||
{0xe2ddaacd, 0xfabab7b8, 0x60cf9603, 0x1edf6a83, 0xbfadddd3, 0x20b04218, 0xa81dbffa, 0x03e0ddb6},
|
||||
{0xda25c9fd, 0xf9c1e3a3, 0xac57ece3, 0x41ff4e1e, 0xdd684055, 0x9ba50868, 0x46d8156a, 0x01b30314},
|
||||
{0xab76a462, 0x30e067cc, 0x08f1b99b, 0x2d84c4c2, 0x73edc56f, 0x6b399ae0, 0x62cfacb2, 0x02f187e1},
|
||||
{0x34fc5356, 0xb085758e, 0xf805fedf, 0xbafe9a1c, 0x95272d01, 0x0bcf423c, 0x1feca651, 0x01df4a81},
|
||||
{0x4c264e97, 0xd3bd9833, 0xc08b1798, 0xc0b192be, 0xdc3ed49e, 0x42724e80, 0xbaee9a58, 0x04100303},
|
||||
{0xe49749c9, 0xb653c919, 0x09f8e2fc, 0x07dbe557, 0xca71e551, 0xbb172d28, 0x7989c8fd, 0x07f5f801},
|
||||
{0xdf1d9004, 0x9412a9f3, 0xbe90d67e, 0xddcf6d66, 0x4692f803, 0x1dbfd679, 0x524c2944, 0x04f4fae1},
|
||||
{0x5707d134, 0xd413afdf, 0x887fd7e9, 0xf8a339cf, 0x84883580, 0xf74544f4, 0x851739e0, 0x0554f72a},
|
||||
{0x59824907, 0xe3827564, 0x421182c9, 0x352eab2a, 0x8f8530f2, 0x19138257, 0x20275950, 0x04e3bf44},
|
||||
{0x33f928b7, 0xef7660f9, 0xf5952362, 0xb7cb0619, 0xf17eb8d7, 0x5b24913b, 0x8e8b8082, 0x00f4804c},
|
||||
{0x5bd84f3e, 0xe7020613, 0x736a1659, 0x7ee777e1, 0x0795844b, 0x34ca7cb6, 0x7503ddc3, 0x07ce12e4},
|
||||
{0x6d8408a5, 0xbbbafb3f, 0x519dadca, 0xe0f02915, 0x0670f5d4, 0x5acba199, 0x4a93340f, 0x0056db45},
|
||||
{0xe404f6c5, 0x73f8a435, 0x01731858, 0x68cd3f7a, 0xd01f3de9, 0x214d3134, 0xd5d75a88, 0x05fb76be},
|
||||
{0xf976eb41, 0x3a66ad86, 0xcd08787a, 0x6401b6d3, 0x7d1e82a8, 0x575950f3, 0x55ee9d49, 0x00e34b33},
|
||||
{0x0cc5cbf4, 0xbff2f4e6, 0xec205dcd, 0x5a6b430d, 0xc94862af, 0xa8114ab3, 0x2fe8be1f, 0x0247ecf5},
|
||||
{0x8b98bf40, 0xded3bc57, 0xe26b66b3, 0xb658c8c4, 0x8d4220db, 0x8bd91c55, 0x94d2adea, 0x00d109f2},
|
||||
{0xedeaec42, 0x0fbfd336, 0x5d407ae8, 0xd94f928d, 0x727e74b5, 0xe5e4a16b, 0xc8c22dd8, 0x06a550df},
|
||||
{0x135e0ee9, 0xe378a012, 0x856a1aef, 0x5be86512, 0xd8febe77, 0x7de04ce2, 0xea43d59b, 0x03ddeed6},
|
||||
{0x005a1d86, 0xc04dc48c, 0x6f29053d, 0x64f4bbd2, 0x9be0aef5, 0x10b1b3db, 0xcc625a0b, 0x03745ca5},
|
||||
{0x1f4f0e85, 0x6c72bd40, 0xc2069cba, 0x4234afd0, 0xb99395f4, 0xc25b262f, 0xae0874e2, 0x0605f6a2},
|
||||
{0xdd756b6d, 0x9513e0d4, 0xf0c137cd, 0x5127a167, 0x7f01c538, 0x1a12a425, 0x00a4483b, 0x068b3aaf},
|
||||
{0x79bc6c86, 0x7a5b3e70, 0x375dc240, 0x5a337909, 0xe111d6ce, 0x46d6fe3c, 0x2ff2ca50, 0x02708b05},
|
||||
{0x1524ad8c, 0x1181eb95, 0x52294490, 0xd0744ddc, 0x848605cf, 0x88ed5b7b, 0xb478c12a, 0x04b9cb49},
|
||||
{0x27105dae, 0x98cb2411, 0xed5c1361, 0x3efa8fae, 0xd498e337, 0x6fa736a5, 0x1e369b4f, 0x038e3b07},
|
||||
{0x98c8db7f, 0xbc5915ae, 0x50425ae8, 0x1f3c8f96, 0xfa86658a, 0x77d60416, 0x28ec2dda, 0x02bc8b30},
|
||||
{0xb94bc10e, 0xad6794f2, 0x7e80093a, 0x7463b3f3, 0x90db4c79, 0x7bf5af53, 0x965c0cc4, 0x031531c6},
|
||||
{0x7cc1083d, 0x66425289, 0xa45d785f, 0x778ba471, 0xbbc94c16, 0xe3f5c599, 0x9b92e036, 0x02606413},
|
||||
{0xcf287faf, 0x191a2ea9, 0x823ddf07, 0xe6406a78, 0xaabe912b, 0xabcf2825, 0x7c48649a, 0x021dab44},
|
||||
{0x65375f6c, 0x9465d77c, 0x65370520, 0x924e189c, 0x918f0105, 0x8be0ca5f, 0xb1925509, 0x07586d27},
|
||||
{0x9302ac44, 0xe4fa93cb, 0xbf87d840, 0xf381ebbd, 0x44793049, 0x5027e7d9, 0xd3f09392, 0x0230b5c3},
|
||||
{0x31d48a82, 0x123e992e, 0x729d40e2, 0xef2990c6, 0x0f331903, 0x946813e3, 0x112a2c4d, 0x022f575e},
|
||||
{0xd4ee8cf7, 0x4b44764e, 0xdb576ebc, 0x4d44cff8, 0x0ab93ba1, 0xc6185d3a, 0x7e3f1e78, 0x0520c2d3},
|
||||
{0xbc46b8b4, 0xd9446736, 0x91e2ede1, 0xc7776293, 0x87689930, 0x0323845f, 0x379293ae, 0x061e359f},
|
||||
{0xb49b3a0a, 0x767a1747, 0x2b58f45e, 0x17e69346, 0x1425ad98, 0x10820519, 0x1b487ae5, 0x0367f384},
|
||||
{0x92f8ac25, 0xe0407696, 0x2beb71a6, 0x9ca9d269, 0x2f0c2471, 0x914017ea, 0xf421a10d, 0x07709cc3},
|
||||
{0xc3bb6a8f, 0x2c8ed622, 0xa2a1a8f2, 0x31c57cb6, 0x4bf6c316, 0x053924d5, 0x09563089, 0x0727b76a},
|
||||
{0x09dc6b5c, 0x567be37f, 0x9476eb5d, 0x57e36f45, 0xee5be5b6, 0xf68488dd, 0x2884c2d7, 0x05ac1ff1},
|
||||
{0x04173760, 0x0fc5b934, 0xda828f00, 0xe43272df, 0x2fad6e9c, 0x7e2ab5fe, 0x0a4995b3, 0x00e0a5eb},
|
||||
{0x42f8ef94, 0x6070024f, 0xe11a6161, 0xad187148, 0x9c8b0fa5, 0x3f046451, 0x87529cfa, 0x005282db}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000011, 0x08000000},
|
||||
{0x0becc81e, 0xd59e99d9, 0x537cdf25, 0x3ad971a9, 0xbcd60738, 0xaccedf99, 0xd65d66b5, 0x01dafdc6},
|
||||
{0x4bc9ca34, 0xc8e6df6f, 0x5397aaca, 0xab8bfbc5, 0x94813e6e, 0xb5ea6773, 0xe295dda2, 0x0446ed3c},
|
||||
{0x8145aa75, 0xd7981c5b, 0x3d174c52, 0xb14011ea, 0xe4721c1e, 0x647c9ba3, 0x6f6ac6dd, 0x05c3ed0c},
|
||||
{0x6e0bef41, 0x9de8c5cf, 0xcee1b9b0, 0xec349cbb, 0x2121589c, 0xfe72ab05, 0x24c7669c, 0x03b1c96a},
|
||||
{0x246766d8, 0xb878549e, 0xb5a03ab4, 0x8c5d8531, 0x7f1ec75e, 0x334a83ab, 0x46b146d7, 0x01342b29},
|
||||
{0x31055652, 0x8c71bd50, 0x6081f8c3, 0x2eedac49, 0xab013740, 0x25164a76, 0xbca84bf7, 0x05c0a717},
|
||||
{0xd0a6b4f5, 0x1ad37af3, 0x8ca50294, 0x6dc49fe3, 0x5d9529c3, 0x8357a7ff, 0xcefe8efe, 0x02c161bc},
|
||||
{0x296fbf1c, 0x90a5fa7f, 0xc977b113, 0x18226a39, 0xc178262e, 0x9362d5c9, 0x40d28de5, 0x03a362d3},
|
||||
{0x125ca33a, 0x04eeb1c0, 0x8437c604, 0xaa47a4c0, 0xa4d6bafe, 0x064426a2, 0xb8cc76db, 0x00ffbb44},
|
||||
{0x179e2ebe, 0xecf0daf8, 0x2574403b, 0x942e643e, 0x6bf06f7c, 0x684d31aa, 0x244c675c, 0x003b2bde},
|
||||
{0xfeccfccc, 0x96bc19dc, 0x269130b4, 0xbb26f74e, 0xd511649f, 0x15d57a9f, 0x7dcde3c3, 0x02d852a4},
|
||||
{0x44ad0610, 0xb4a47f4c, 0x06fa1b55, 0xdc2f028f, 0xd25979ac, 0xd73ddcd4, 0x076e7f5d, 0x06ba7cbe},
|
||||
{0x349eea63, 0xb0f43dd2, 0x3e64660d, 0x5e64466c, 0xc3bb94ce, 0x7206f426, 0xed4327aa, 0x036cb7c6},
|
||||
{0xf248b36c, 0x6503e80b, 0xe36060ec, 0xb93dd56f, 0x95c2c067, 0x6d3b2763, 0x155023a7, 0x038e7d59},
|
||||
{0xcdf92351, 0x140437ad, 0x2a5ab630, 0xb7a6e1b4, 0xd48175a5, 0xaa80b742, 0xd4afae89, 0x06a50046},
|
||||
{0xaea51997, 0xe8cde2cd, 0x417e3754, 0x612806f6, 0xb940adf4, 0xe40a4a07, 0xa33929b2, 0x063f5efa},
|
||||
{0x0c07573f, 0x0c0926df, 0xd8d4bee3, 0xa84e9027, 0x6bcd79ea, 0xf3776dfa, 0x523f55a8, 0x043a8517},
|
||||
{0x66984d05, 0x5b7e4e45, 0xdb8c30c4, 0xb9381de7, 0xae86e4f6, 0xd7c15128, 0x809daae7, 0x0718f1ad},
|
||||
{0xc1eae1a6, 0xe4fb0a7d, 0xa90a0813, 0xe5484134, 0x895df525, 0x24cca8f9, 0x1cedd2ee, 0x035fd390},
|
||||
{0x82e87775, 0x0a87a942, 0x971f450b, 0x9f2b4b62, 0x8eae6f09, 0x1dc5aecd, 0x1c5686a6, 0x07547fa3},
|
||||
{0x2e35511a, 0x785975cc, 0xa085c456, 0x4266bc82, 0x3abd5bfd, 0x45cf52e1, 0x7bd95ece, 0x019e8e43},
|
||||
{0xae580194, 0xfad72a75, 0x2989ac16, 0xf2bb5a00, 0x55f2b4d0, 0x53fee728, 0x9c7a91e5, 0x02b9f95d},
|
||||
{0x71200963, 0xb0062d2c, 0x1ac57a23, 0xe16e9f91, 0xc4bd9d3e, 0xaae7b169, 0x7f505f35, 0x07462151},
|
||||
{0x57e31913, 0xcf7bd10e, 0x6a4d0ee4, 0x1a360a91, 0x31869e35, 0xb2ba4914, 0x18005db4, 0x07a62d5c},
|
||||
{0xb4344711, 0x431f11e2, 0x6192c47e, 0x0cc3049c, 0xeb9c1bc3, 0x375dff93, 0x42071ee8, 0x03a75790},
|
||||
{0x9ed81498, 0x4eb14251, 0x98b804ef, 0x5852dbc5, 0x56d7f20c, 0xe0c1be13, 0x20d69181, 0x023e7f68},
|
||||
{0xe34f2d55, 0xf2eeb9b5, 0x2aad6f84, 0x63459f16, 0xbe37dbea, 0xf12099e7, 0x11b1a0fd, 0x06e45493},
|
||||
{0x0d6c93ed, 0x63032f6a, 0x5a04829f, 0xd99cbcc8, 0x89608b5e, 0x80f20416, 0x9df329f4, 0x00bf4231},
|
||||
{0x2710f927, 0xc7fc3d1b, 0x90d8503e, 0xc72d19af, 0x9940e689, 0xa9dcd3b8, 0x2da77ac9, 0x06fd386e},
|
||||
{0x08b27bc2, 0xc800035f, 0x4dfacc03, 0xd98987cf, 0x1256e525, 0x24f8fdbf, 0x1f104273, 0x04c575f1},
|
||||
{0x256c604a, 0x68b16e90, 0x6eba097d, 0x7f51023a, 0x1aeba9c8, 0x52c7629c, 0x4809d8da, 0x0575e850},
|
||||
{0x4ac81249, 0x7439d2f9, 0x4fc31ff2, 0x351e4a62, 0xb3906ded, 0x68fb8313, 0x08507a35, 0x007d43d8},
|
||||
{0x98859a12, 0xa87902b8, 0x73af55b3, 0x2f0d13e0, 0x1b9783c2, 0x5a46c66a, 0x2f5f71d4, 0x01045b06},
|
||||
{0x604fce1e, 0x0c379595, 0x7fccc2b4, 0x20ab6eb8, 0xf1820ae7, 0xac0bc709, 0x93fb2b07, 0x07e7654f},
|
||||
{0x246c4bf0, 0xa0e40811, 0x816b15e0, 0xe12accf5, 0x17938138, 0xee417239, 0x2c9a34fb, 0x004e092e},
|
||||
{0xad2cd984, 0x6304351b, 0x4bf1aafc, 0x38546ca6, 0xf310e99f, 0x1fb81192, 0xb5376275, 0x07e89896},
|
||||
{0x7b2d141d, 0xe4376a0b, 0x6dac220c, 0xea1795e5, 0xb19e1901, 0xd778ab50, 0xa94c274f, 0x077df905},
|
||||
{0x16fcd6c7, 0x7039bab1, 0xa6ea1c94, 0x8eececb7, 0x0f122046, 0x84d26ab5, 0x22fd55a1, 0x053c5d48},
|
||||
{0x72f11f65, 0xd43eb7bb, 0xb2a566d6, 0xfb538785, 0x3f35cbf5, 0xccc2cdc6, 0x7112504a, 0x06df5a9e},
|
||||
{0x60ce9c30, 0x75efb55c, 0x3c541437, 0x991873ed, 0xdf0cbb3b, 0x37eaedcb, 0xb04c2858, 0x0278d7f0},
|
||||
{0x1a06866b, 0x5757dd4e, 0x6570fa7f, 0x15c176b1, 0xafe89a1d, 0x9981b57f, 0xee0cb14c, 0x03c57f4d},
|
||||
{0x503c31cd, 0x3438cd66, 0xc0736d4b, 0x34437e52, 0x2a9d1b28, 0xe825b769, 0x73c06ee7, 0x06955a3a},
|
||||
{0x5c5e530e, 0xbbf0995a, 0x6569a2f9, 0xdee304b3, 0x5bd1a886, 0x3b9c993c, 0xc9cd050a, 0x00f66017},
|
||||
{0xee755737, 0x3666e752, 0x74d0e317, 0xa13bfafc, 0x01d2f1bf, 0x17ab672a, 0x0778f525, 0x079dde3a},
|
||||
{0xed8a25e9, 0x96a003c2, 0x8f347cec, 0x45d258fe, 0x96ea14ac, 0x68ff148d, 0xe148eda9, 0x058f4ec7},
|
||||
{0xe2a700ab, 0x23baf732, 0x5202a945, 0x6434725a, 0x2e693363, 0xa19a338d, 0xbf2f39c6, 0x01d0ea7a},
|
||||
{0x3ab52589, 0x5e571cad, 0x92240361, 0xe2916bb2, 0xdff5e354, 0xe6f8897b, 0x2ffa4707, 0x02a62880},
|
||||
{0xef649a85, 0xaf446c62, 0xed4e461f, 0x14d8072f, 0x59993efa, 0x5a07f4e5, 0x72a3a652, 0x00dc28b6},
|
||||
{0xf21511df, 0x139299d7, 0x4854ebc3, 0x8914e707, 0xbfd102a9, 0x9f3b5913, 0x3a5af894, 0x009dc24f},
|
||||
{0x1f4ba4fa, 0x650e1d91, 0x1977bff0, 0x6ba67806, 0xaa9bbc1b, 0xffbdc531, 0x997408aa, 0x057b69b2},
|
||||
{0x65fb1a91, 0x25c03e81, 0x7fd22618, 0x8682f98b, 0xf46cb453, 0xcad67f13, 0x5a80e5c6, 0x060ca599},
|
||||
{0x94188f2a, 0xa7978a90, 0xdbb9338e, 0xd5fc8f0b, 0xcbdd84f0, 0xf8387e6d, 0xbbc743a3, 0x073ae131},
|
||||
{0x0415bbcc, 0xafd00c46, 0x0df4a52a, 0x1a00eb6c, 0x0b96b594, 0x1ec67c64, 0x8e26b699, 0x01cb82a5},
|
||||
{0x7f740f93, 0xf56319fb, 0x2e2f6ed7, 0xb40d559b, 0x75e19784, 0x63f96f04, 0xc31ba061, 0x06406929},
|
||||
{0xfa5a3239, 0x22349e8b, 0xb9ca6bf9, 0xe1236395, 0x9b0017a4, 0x76ae5a8b, 0x17b7af03, 0x06cfb4ce},
|
||||
{0xb51abfe6, 0x34938785, 0x1249edb6, 0x21f54c80, 0xab038972, 0x3bd1cc16, 0xa4a57a81, 0x0636b37f},
|
||||
{0xf88717cf, 0xfda4a9a1, 0xee19d402, 0xf8fcba35, 0x47c9ba1b, 0x1ac940f6, 0xdd991440, 0x013c0ab3},
|
||||
{0x3743adf4, 0x5082318a, 0x22440f94, 0x3293bae1, 0x8dd2d761, 0x4c2e6d7f, 0xcdc38c82, 0x07124118},
|
||||
{0x76198779, 0xb031f8b7, 0x1b6c1944, 0x6742f602, 0x894a6134, 0xa18290db, 0xaba037dc, 0x035289d8},
|
||||
{0x9f8a9b07, 0x4579e855, 0x4dca3764, 0x1e580662, 0xb8c8ef49, 0xda92152e, 0x8b54508a, 0x0444085a},
|
||||
{0x34696648, 0x7f670ce1, 0xc05768d9, 0x2f00108f, 0x390fb519, 0x2d00a444, 0x1cd6f914, 0x015c468b},
|
||||
{0xfe46c5f2, 0x00666cbf, 0x9f7174d6, 0xca4051c5, 0x8e4277f4, 0x1629882a, 0x6ee002a3, 0x00b3f261},
|
||||
{0xc1dbb4f6, 0x418a2b86, 0x9a6ca270, 0x9f453ccc, 0x1d457b20, 0x1966471f, 0x80fd1319, 0x00b4d831},
|
||||
{0x1c76c8b1, 0xa12f86a8, 0xc0125e48, 0x2772e424, 0x1459dfb8, 0x8d650644, 0xad06d01c, 0x02128e5c},
|
||||
{0x3472799c, 0xcc8cc7f6, 0x2f511cae, 0xfbd97f95, 0x5ebbff71, 0xadd8818b, 0x09af0983, 0x00520540},
|
||||
{0x8ec654cc, 0xcaab5dd4, 0x17ba15a9, 0xc05ad0a7, 0x36300a00, 0x4bda7469, 0x41bb0610, 0x02e486cd},
|
||||
{0x2d6be8b5, 0x077ba983, 0xfe89eb7d, 0xdd5e728f, 0x63f9c51f, 0xe3c872fb, 0xce639995, 0x01f2f7a8},
|
||||
{0xaa2ea7eb, 0xd82b1599, 0xa16489e0, 0x1be5d254, 0x173d3219, 0x19cb236a, 0x1fe63b23, 0x007dd45f},
|
||||
{0x19dba628, 0xa27cc4d3, 0x5fd2e061, 0xf04ac441, 0x9307a758, 0xc7405333, 0x28c40fe4, 0x0103c707},
|
||||
{0x54662aab, 0xb5129fd1, 0x59158f32, 0x2ec5b69b, 0x12c44eec, 0x6c7e6492, 0xe527abb2, 0x046e7c11},
|
||||
{0xe32d46fe, 0xb9bf4936, 0xb08ef006, 0xf23ae18c, 0xe6a5179e, 0x5352cc59, 0x5bf7c0b8, 0x0753a621},
|
||||
{0x9318db3a, 0x19f65bc2, 0x7e3d0014, 0x93ff3f79, 0x6beb580d, 0xf7f93c7f, 0xddd72603, 0x04fdb898},
|
||||
{0xe184a935, 0xf7e1f88f, 0x1ad510f0, 0x82a0f047, 0x4c9ab6ca, 0xce0f7c44, 0x5104a95a, 0x0552304e},
|
||||
{0x985bba5c, 0x06615580, 0xf487a1fb, 0x8ccd29a8, 0xeecf758d, 0xb3e15ed0, 0x857ce648, 0x05328783},
|
||||
{0x6cb042b0, 0x5d1d5a22, 0x0277083c, 0x64375cf4, 0x5fa82215, 0xe8947dab, 0x86932495, 0x05e72829},
|
||||
{0x8c3e2849, 0x5bf6f46a, 0x4924c8f4, 0x7e40314c, 0xdffd6118, 0x3c74a4ba, 0x2f8de20a, 0x05247cdd},
|
||||
{0xd0042d11, 0x25a418c5, 0x2f7da60c, 0x1b60ee9f, 0x02c0b69f, 0x61c041ad, 0x15670214, 0x0632d33a},
|
||||
{0x90e05a92, 0x32b03a5e, 0x78d1e8d6, 0xfb12a1b1, 0x5bc2f5d5, 0xb8af534e, 0xa032918a, 0x05ab4772},
|
||||
{0x0a711a9d, 0x096878a8, 0x6b083c8c, 0x87d070da, 0x87d06afb, 0x77931578, 0xf3104057, 0x03705277},
|
||||
{0xdf993e46, 0x502d2374, 0x35baf646, 0xc1cd2868, 0xe30aa213, 0xa61b54b6, 0xbce34b74, 0x02511017},
|
||||
{0x90a6b9b9, 0xcfb6c51a, 0x8be6ade8, 0x4e0b29ef, 0xd3832d74, 0xa8292467, 0x41ca1e45, 0x02ce7977},
|
||||
{0x3e672d5b, 0x25ee10aa, 0x28597504, 0xb0e60c63, 0xe263c827, 0x4a8d0567, 0xfadefeba, 0x01f4ec42},
|
||||
{0xa5a26158, 0x8b4b15e0, 0x88a71cf2, 0xa59b2df9, 0x5d734341, 0xde44f2e7, 0x4db8d2e8, 0x007a18a0},
|
||||
{0xb4d18100, 0x30fcf001, 0xf8ae0b4f, 0xcdaa5334, 0xe325615a, 0x67017b2b, 0xf0ccbf57, 0x016c6d47},
|
||||
{0xba937732, 0x66afc115, 0xc20be386, 0x917d4890, 0xa017c59d, 0x5dadccff, 0x986c39c1, 0x043fa44e},
|
||||
{0x08baa72a, 0xc57ec886, 0x052364ed, 0xe65a4680, 0x85f9a523, 0x0536b505, 0xfe744ee2, 0x03580609},
|
||||
{0x1bab1ab8, 0x88109415, 0x62f0fa74, 0x02244b19, 0x915618e0, 0x837fcd10, 0x942f12d2, 0x061b83d0},
|
||||
{0x687b7798, 0x823d0bba, 0x84a49784, 0x5f93174a, 0x2574af37, 0xcfd64159, 0xe108057c, 0x0290722e},
|
||||
{0x58a66036, 0x900a7031, 0x6153c2ae, 0xcb443378, 0xa6ccdffe, 0x4c48b8dd, 0xa06e955a, 0x049a9211},
|
||||
{0xea0b9dd9, 0x1b034532, 0x638c79ec, 0x11cba08f, 0x7c5b2d15, 0x16d00728, 0xbb9a759c, 0x05abcbcd},
|
||||
{0x1552d6af, 0x21b4f60e, 0xbed54865, 0x2f7ea9d2, 0x738befdb, 0x39378802, 0x97845360, 0x02adf76c},
|
||||
{0x4026bb92, 0x6e5eb2ca, 0xcbed5570, 0x18f3d8bf, 0xb655ac26, 0x2a5fc8cd, 0x3809a1c5, 0x0031cd25},
|
||||
{0x0ef5e011, 0x2d698950, 0xc018b82d, 0xc0668c45, 0xf520d325, 0xd180ff47, 0xa38122b1, 0x046714c7},
|
||||
{0x12df2cc7, 0x8dec8a4b, 0x963031f8, 0x5eb84a1b, 0x88525708, 0xb75ad701, 0x07df57bd, 0x02054a99},
|
||||
{0x82b2f616, 0xe0013d43, 0x7b385914, 0x2ad34c97, 0x11108f4b, 0xc9969223, 0x9c9fad59, 0x0183f639},
|
||||
{0x06b4dc38, 0xaca9dfbc, 0x962d5774, 0x85596bbc, 0x22f1cd7d, 0xd7023923, 0x2067b180, 0x04d3c939},
|
||||
{0xe4004173, 0x6d13e6ab, 0xaafe8726, 0x3495d095, 0x33dc3303, 0xa22d3e4a, 0x776d2e14, 0x0276dbb2},
|
||||
{0x68c539b6, 0xa03f83cb, 0x7b42a06e, 0xfd3fa839, 0xe8d45ac3, 0xea0f1f15, 0xa414b012, 0x061adb94},
|
||||
{0xb33fb188, 0xd22fc6e3, 0xf723dc18, 0xbebc7978, 0xf6c99f34, 0xa874b584, 0xf67ff454, 0x049beb53},
|
||||
{0x754bed16, 0x7c247948, 0xe50eac10, 0x4a84bcfb, 0xade97580, 0xc00d65df, 0xca79c5ae, 0x0763d73c},
|
||||
{0x7aadbe1a, 0x696e27af, 0x9d8e2a1f, 0x113535e0, 0x4c011766, 0x6953003f, 0xbb52558c, 0x0498a75f},
|
||||
{0x6e09cee7, 0xcf26e897, 0x299b63c7, 0x813a76f2, 0x0939904c, 0x67c02fa7, 0x7e0b9483, 0x045c41a9},
|
||||
{0x4af5adcc, 0xad979914, 0xc2c7c068, 0x7d9267f9, 0x21b4a0a7, 0xda4fa3f8, 0x3386c423, 0x03f4bcc9},
|
||||
{0xd1228595, 0xe5fcd634, 0x12fc8b7c, 0x5571b994, 0x244857f8, 0xd50dcd33, 0x263b93f0, 0x060dc1d6},
|
||||
{0xfee59c89, 0x7040a236, 0x78ceb168, 0x91a4301b, 0x19cdb36a, 0x973b55bd, 0x71008400, 0x06a1c58e},
|
||||
{0x6af1f351, 0x1d3c7ad7, 0xe8ad24dc, 0x8493c0c1, 0x48d5ffd9, 0x076f9dea, 0x5931555f, 0x00b9b2bf},
|
||||
{0xeaa5731c, 0xa3d54d89, 0xba84ee02, 0xfcc41a45, 0xcc1cdac8, 0x7c828f73, 0x5bfe9d23, 0x009c426b},
|
||||
{0x3f1f352c, 0x36fb314c, 0x9feb1120, 0x750a2a5f, 0xd7b06171, 0x3a2f19e8, 0x3b550cd9, 0x06de1885},
|
||||
{0xb69183f6, 0xefc03237, 0x979ee075, 0xb5a14fc3, 0x2dcb1d51, 0xbf114125, 0xb8eca2d3, 0x062364f7},
|
||||
{0x95375861, 0x575f1ea7, 0x80cc8dba, 0x30608586, 0xcf7a8f9f, 0x2beca9f5, 0x5fe60da4, 0x00dfc078},
|
||||
{0x0f86ded5, 0x312928eb, 0xb9c4f0cc, 0x646f5d3e, 0x2fbf14dd, 0x23c69382, 0xc44caa0e, 0x023aae90},
|
||||
{0x13e16243, 0xa7c92faf, 0x92efd5fc, 0x035a3e75, 0x86a744ea, 0x32f44d08, 0x1ea28333, 0x05b45217},
|
||||
{0xc41fdf22, 0xb557d203, 0x4bbc8f76, 0x9697570c, 0x81eaf742, 0x3a6a2cb5, 0xb0d03a0f, 0x07f2c08a},
|
||||
{0x2a18b73a, 0xca806385, 0xdb6a953d, 0xf2015d6d, 0xba5f67b9, 0x51d21a8e, 0x14807dd6, 0x051439d5},
|
||||
{0xf75051de, 0x7b6e0c13, 0x14dd1aa0, 0x114681fb, 0x0fd95a37, 0x72a1cccc, 0xa39e5bb8, 0x02f29d4c},
|
||||
{0x116529cd, 0x4808a0de, 0x5b941d1c, 0x1cf38580, 0xd70796f7, 0xc96a451e, 0x3f24e64f, 0x016d083f},
|
||||
{0x3cf155ee, 0xc71b78d0, 0x0c361b67, 0x0c04a134, 0x7756e4a9, 0xdb546edc, 0x2988eb2c, 0x03474404},
|
||||
{0xf30cef17, 0x1a0b3585, 0x864abd80, 0x63c1de29, 0xc0687c8e, 0x0c171d6e, 0xc9763a97, 0x0353aec8},
|
||||
{0x94192fb8, 0x0a2c9cff, 0x1a7f5bbf, 0x27320b93, 0xe5ceeb75, 0x465d2f9f, 0xd78f1cc3, 0x07ce6f99},
|
||||
{0xe8d1b26d, 0x0f899233, 0xb87a2984, 0xed4b44d2, 0x0bd6354a, 0x0c0712c6, 0xc7032f5c, 0x01eb2a31},
|
||||
{0x46b03b57, 0xc4c03fbd, 0x785ebbe8, 0x989b0ff3, 0x7f0bcb19, 0x5cada62a, 0xa97557c9, 0x01426410},
|
||||
{0x96fb0a26, 0xf1d2e82b, 0x1edb9ce3, 0xe270bc10, 0xfc7aaed8, 0x9549cfd0, 0xd90d7c9c, 0x03e8256c},
|
||||
{0x43ac9984, 0x14eef0ee, 0xa16d6770, 0x2903ff22, 0xa38fbfc0, 0xc66c2690, 0x8755440e, 0x0032a202},
|
||||
{0xf3601782, 0x46a07cf2, 0xaa71d137, 0x79f410f9, 0x8bcabc59, 0xc320c6f1, 0xf8ab64d8, 0x00a706cf},
|
||||
{0x8dbd8d4f, 0x8848a9f0, 0x0085061d, 0xeff89e69, 0xfee62fbe, 0x90e634a7, 0x2ffb456b, 0x03983046},
|
||||
{0xb272ed5c, 0x91ec28a8, 0xdc0cbb77, 0xf8529918, 0x3648d2c5, 0x8f896ddb, 0x74edaf19, 0x0668a86c},
|
||||
{0x128c9bd9, 0x341d5fc8, 0x6b3241c5, 0x592f87d8, 0xb2cc3c97, 0xf8cba6f2, 0x03f396ed, 0x03463bf1},
|
||||
{0xafd9d239, 0xcf3ae525, 0xea20b753, 0x06b8b7b9, 0x3408a993, 0xb2be1e49, 0x9f47063f, 0x02bcb200},
|
||||
{0xa0bd0bc8, 0x7ca02722, 0xb862774d, 0xce8b32ee, 0x5f8da059, 0x424ba5f0, 0x3bb422a0, 0x05c81961},
|
||||
{0x32fd8907, 0x137dad8c, 0xc95a3a5d, 0x301d5119, 0x8937ac08, 0x144b38c3, 0x39338de7, 0x00e66f0e},
|
||||
{0xcfc10885, 0xe68b8875, 0x96147e68, 0x4f24d49a, 0x43032c15, 0x5da9e6fd, 0x9bf25e12, 0x061ab0e6},
|
||||
{0x455c65ad, 0xeab29bbd, 0x2448be64, 0x1c7da0e7, 0x8eedfa1f, 0x8c2c1bcd, 0x698c1197, 0x0400e2d2},
|
||||
{0x04549c13, 0x335d3e9e, 0xd31585cc, 0x546f0d82, 0xe16dbbac, 0x350d5ed5, 0x113c53fd, 0x05f77544},
|
||||
{0x7d8f3b7e, 0x6aa75c04, 0x10a641ae, 0xc70851dd, 0x9a0750fe, 0x4d33edd4, 0xcd1b230f, 0x022802cf},
|
||||
{0xef8170e3, 0x59fa1903, 0x62995788, 0x464a73ef, 0x13369717, 0x338be7fd, 0x52d21278, 0x02e97589},
|
||||
{0x4856ddd5, 0x3f2deca8, 0xfced10e2, 0x969b10e2, 0x52860ee7, 0x09620dde, 0xb620fa3f, 0x04a169bf},
|
||||
{0xa03b49f1, 0xd9beb712, 0xe9af606e, 0x0798af09, 0x63e70b9a, 0xe37f9aea, 0xb35abd7c, 0x02542a44},
|
||||
{0xf6e78973, 0x335d4000, 0x76f1bb23, 0x7bc28fde, 0x1b30e9ca, 0x6cfdc907, 0x0400b651, 0x03ff88aa},
|
||||
{0x36433eaf, 0xfb862981, 0x4111cfa3, 0x15fdc659, 0xeab2909d, 0x569574b9, 0x3cd80f84, 0x01442360},
|
||||
{0xe85c4af3, 0xa8ed8f31, 0xe6aaf3da, 0xf7680fee, 0xc5c1772c, 0x2240e931, 0xaebeeb70, 0x04f44f6f},
|
||||
{0x8846e0af, 0x29de323f, 0x42c25319, 0x33f91593, 0x6cbadd58, 0x863099c1, 0xfd83e5b3, 0x06a603cf},
|
||||
{0x86c77703, 0x1bdd17f3, 0xe02db671, 0x8cee8e78, 0x0b6dffce, 0xed1627af, 0xa0d9b3cc, 0x04491984},
|
||||
{0xcb583661, 0x177f8f9c, 0x73d05bfc, 0x54122d0c, 0xebe37b4a, 0xa9231660, 0xd4826038, 0x06e885db},
|
||||
{0x13c253b9, 0x64cde875, 0x2fbc98a9, 0x8484bccb, 0x4885a9af, 0xbad877c5, 0x0cbc33b6, 0x03007c90},
|
||||
{0x47cfa357, 0x41eb9173, 0x325309ad, 0xb3f06289, 0xaa85421b, 0x029da7c1, 0x84de4bd4, 0x07b7eb0d},
|
||||
{0x56b831e2, 0x2c459a80, 0x321aba19, 0x2b99d098, 0xea73c0e1, 0x96237364, 0xe25ed0ed, 0x02f2c638},
|
||||
{0x9b388bf4, 0xfc8c3228, 0x82cd081d, 0xa4c371e4, 0xc85f75df, 0x11239026, 0x8892896e, 0x01f01c5e},
|
||||
{0x73457917, 0xce1dde59, 0x16dd8b49, 0xdfdaeb19, 0xbfd17b1e, 0x4289a976, 0xc842870a, 0x05e2cf7e},
|
||||
{0xc7705532, 0x72faa825, 0x8f7fe8c2, 0xd24bf942, 0xb695e31b, 0xb7403e13, 0xfc85a0c6, 0x02eac9e7},
|
||||
{0x1ddb2dff, 0xc47638e3, 0x799bb649, 0x78b91a13, 0x552588ed, 0x001800de, 0x9cd9425c, 0x01d0640c},
|
||||
{0xfb431e10, 0x159891e7, 0xa012b461, 0x2f2fb29a, 0xb3333e5d, 0xc1dca804, 0x9a47200d, 0x05b918ec},
|
||||
{0x2d5ce760, 0x379119b5, 0xda2ccdab, 0xf9911f75, 0x47b5c054, 0x92b09490, 0x7298d065, 0x0742a31e},
|
||||
{0x4a73d1f1, 0xe2a1046b, 0xc6ab4d9c, 0xbc85a747, 0xba0701f8, 0x79b0e699, 0xeebc6762, 0x05e5c2cb},
|
||||
{0xe0c0db50, 0xdc644b37, 0x2b8444d2, 0x26f7f083, 0x63479a84, 0x90acf2e7, 0x90ffe372, 0x0590d880},
|
||||
{0x83c0fc9c, 0x3dd1aba4, 0xcfb43020, 0x30a1051f, 0xaf5be716, 0x7d1ca380, 0x1ed8aed9, 0x01d56947},
|
||||
{0x0fa23690, 0x657df8c4, 0x32111be3, 0x61a12fe4, 0xe78236c9, 0xd6cc9942, 0x85e66191, 0x01709635},
|
||||
{0xc6a054f0, 0x96bf35ed, 0x004113cc, 0x9d1e411a, 0x1ac7a3ec, 0xccdb9bc3, 0xd08016b8, 0x07362425},
|
||||
{0x9721b035, 0x72744cce, 0x0beb72e3, 0xb87eb606, 0x60870c2e, 0x00c5e70c, 0x685d7c14, 0x029fa4d3},
|
||||
{0x86e52af4, 0x06d3a7a3, 0x70020878, 0x7b1c814a, 0x52e68007, 0x44373cb7, 0xe403540f, 0x041cf8c0},
|
||||
{0x76a27949, 0xd5dbc8bf, 0x27d9cd12, 0xb41449bc, 0xa7a667a1, 0x93740020, 0x0fbb4e77, 0x000bf807},
|
||||
{0x9969cfe9, 0x274ce281, 0x259ec27c, 0x3234d283, 0xe0b44f04, 0x9ff85b71, 0xffcc1006, 0x0298d060},
|
||||
{0x68ab54f8, 0x5cd8b289, 0x437eaab8, 0x42e3877f, 0x9318bd3e, 0x6490dc61, 0x4e54d968, 0x075b01f3},
|
||||
{0x7b64243c, 0x73100d65, 0x5c802f82, 0x692378be, 0x88184c0c, 0x00283dbb, 0xab6f4f0e, 0x0442efad},
|
||||
{0x72015722, 0xbe83b708, 0xe1cdcf0e, 0x2035319f, 0x398347da, 0x2b1b3351, 0x1a14b8dc, 0x061823d8},
|
||||
{0x378d9803, 0x1090948c, 0x4725c64b, 0x61a558cc, 0x7d7fcd91, 0x9e5bd3b5, 0x57ebda25, 0x061e02a0},
|
||||
{0xf8324dc8, 0x166b4a3c, 0x38133fda, 0xa25b9d11, 0x917171a5, 0x9d602950, 0x417d104e, 0x0632e48b},
|
||||
{0x6a61d5e0, 0x03b9f1b9, 0xe59cfbb7, 0xd906b740, 0x7892fbe4, 0x99a93267, 0xad1b8171, 0x06ddc2a6},
|
||||
{0x67fc3874, 0x6ae4355d, 0xb1ada695, 0x4fa456d8, 0x9f91ac43, 0x4e234065, 0x829d173e, 0x028da309},
|
||||
{0xfc695c2c, 0x1e08dd18, 0xfa687112, 0x1c0a2fad, 0xffd6302a, 0xeb5ebf01, 0xfd1d10f5, 0x012fd387},
|
||||
{0x236e65c9, 0x0b907f2e, 0xb1281d54, 0x92ba7a15, 0xc13f1d75, 0x07f0a6ad, 0xcd6d1e9c, 0x05dfe4e3},
|
||||
{0xc45f33f8, 0xd99cc41a, 0xd373165c, 0xc1c10a71, 0x2ce2936a, 0x6c809230, 0xa0498cf5, 0x018dc832},
|
||||
{0x7b222ad8, 0x8e881eab, 0xb6194efb, 0xc8b48774, 0x963c6b6b, 0x38452dfd, 0xe4c4e0f8, 0x02847f5a},
|
||||
{0x2bf4ad95, 0x2950bb4a, 0xdc39ffb0, 0x37f42c9b, 0x101253a8, 0x3814fa42, 0xb67f2ca5, 0x04d4a34c},
|
||||
{0xa9684ba0, 0x6c40fece, 0x3b13bca4, 0xc7108aad, 0xe7bff9be, 0x98ccc7ea, 0xe9b3b316, 0x048b3a6a},
|
||||
{0x08390a2b, 0x4d908260, 0x74b070bc, 0xd5a641d0, 0x910015c5, 0xc3b19274, 0xd5a998a7, 0x02ac8e74},
|
||||
{0x9698d605, 0x8de03acc, 0xa4c9137f, 0x3b8b720c, 0x354faf46, 0x5bbad6e4, 0xfd9e842f, 0x0054c120},
|
||||
{0xd65aead5, 0x305fa33f, 0x0fe296f9, 0xba02b164, 0x708efc94, 0x64cba43c, 0x8ad7f0ef, 0x034b9ffe},
|
||||
{0x13c2e8f4, 0x59e1179e, 0xc572f8a8, 0x5d823d59, 0x74003bce, 0x0cfdb6ee, 0x011c179e, 0x00763941},
|
||||
{0xa47999a8, 0x29b692ee, 0xbfcd80d8, 0x6436c3f1, 0x959768d7, 0x553444f3, 0x583896d4, 0x01d45a26},
|
||||
{0xc150b3f8, 0x0ce0791d, 0xf493c135, 0x7d3a0c1f, 0x5ede0712, 0x4d37cc23, 0x34fbae9c, 0x036a6a38},
|
||||
{0x2ca1eb78, 0xa8ee8204, 0x66d8b759, 0xc713a1dc, 0xac061800, 0x1813508d, 0x3b1f0da2, 0x05725ca0},
|
||||
{0xf2f391c1, 0xbe6826df, 0x232878f0, 0xeb85b046, 0xf7e1d662, 0xf5a96510, 0xe38c2b64, 0x0419a43b},
|
||||
{0xe69e791b, 0x4b54889b, 0xb5c95ea5, 0xb371eeb0, 0x0b2f26a3, 0x9f53ccca, 0x66f45f71, 0x0040592d},
|
||||
{0xad2e5d5b, 0x4ced12db, 0x0987b849, 0x5f57b16d, 0xd9ec045b, 0xcab0e2e9, 0x6cfbf4df, 0x03e4e405},
|
||||
{0x3ecb72a4, 0xd71a1eee, 0x03a13fb7, 0x6bd9f7ec, 0x5877c6c7, 0xb74a54c8, 0xa28236a5, 0x0377689b},
|
||||
{0x74b3354c, 0x6f558a20, 0x3f776b18, 0xb67f6d10, 0x01165ed8, 0x8c447df2, 0xf3889308, 0x056b8991},
|
||||
{0x0d306b7a, 0x9482eb10, 0xd441cd03, 0xdd738e0f, 0x2de5dfd7, 0x6d186de5, 0x75fd1833, 0x00781b3e},
|
||||
{0x77ec28e5, 0xdbc14748, 0xd26e050c, 0x02ceee41, 0x18457c96, 0x8e5aef74, 0x1823c60f, 0x0461a6e2},
|
||||
{0x2be17c8b, 0x172e551d, 0x49c6a7b8, 0x90e25fa2, 0xa1b3478f, 0x6219e63e, 0xd063a517, 0x00c412f8},
|
||||
{0x65a9b68e, 0xb136b848, 0x673c6cbc, 0x9a9b7169, 0xf8ec7473, 0x15fa1875, 0x3033a5d6, 0x022d72f6}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x80000000, 0x00000008, 0x04000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xc0000000, 0x0000000c, 0x06000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xe0000000, 0x0000000e, 0x07000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xf0000000, 0x0000000f, 0x07800000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x78000000, 0x00000010, 0x07c00000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xbc000000, 0x00000010, 0x07e00000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xde000000, 0x00000010, 0x07f00000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xef000000, 0x00000010, 0x07f80000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xf7800000, 0x00000010, 0x07fc0000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfbc00000, 0x00000010, 0x07fe0000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfde00000, 0x00000010, 0x07ff0000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfef00000, 0x00000010, 0x07ff8000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff780000, 0x00000010, 0x07ffc000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffbc0000, 0x00000010, 0x07ffe000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffde0000, 0x00000010, 0x07fff000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffef0000, 0x00000010, 0x07fff800},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfff78000, 0x00000010, 0x07fffc00},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffbc000, 0x00000010, 0x07fffe00},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffde000, 0x00000010, 0x07ffff00},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffef000, 0x00000010, 0x07ffff80},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffff7800, 0x00000010, 0x07ffffc0},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffbc00, 0x00000010, 0x07ffffe0},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffde00, 0x00000010, 0x07fffff0},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffef00, 0x00000010, 0x07fffff8},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffff780, 0x00000010, 0x07fffffc},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffffbc0, 0x00000010, 0x07fffffe},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffffde0, 0x00000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffffef0, 0x80000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffff78, 0xc0000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffbc, 0xe0000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffde, 0xf0000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffef, 0xf8000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x80000000, 0xfffffff7, 0xfc000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xc0000000, 0xfffffffb, 0xfe000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xe0000000, 0xfffffffd, 0xff000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xf0000000, 0xfffffffe, 0xff800010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x78000000, 0xffffffff, 0xffc00010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xbc000000, 0xffffffff, 0xffe00010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xde000000, 0xffffffff, 0xfff00010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xef000000, 0xffffffff, 0xfff80010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xf7800000, 0xffffffff, 0xfffc0010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfbc00000, 0xffffffff, 0xfffe0010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfde00000, 0xffffffff, 0xffff0010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfef00000, 0xffffffff, 0xffff8010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xff780000, 0xffffffff, 0xffffc010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffbc0000, 0xffffffff, 0xffffe010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffde0000, 0xffffffff, 0xfffff010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffef0000, 0xffffffff, 0xfffff810, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfff78000, 0xffffffff, 0xfffffc10, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffbc000, 0xffffffff, 0xfffffe10, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffde000, 0xffffffff, 0xffffff10, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffef000, 0xffffffff, 0xffffff90, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffff7800, 0xffffffff, 0xffffffd0, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffbc00, 0xffffffff, 0xfffffff0, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffde00, 0xffffffff, 0x00000000, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffef00, 0xffffffff, 0x00000008, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffff780, 0xffffffff, 0x0000000c, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffffbc0, 0xffffffff, 0x0000000e, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffffde0, 0xffffffff, 0x0000000f, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffffef0, 0x7fffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffff78, 0xbfffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffffbc, 0xdfffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffffde, 0xefffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffffef, 0xf7ffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x80000000, 0xfffffff7, 0xfbffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xc0000000, 0xfffffffb, 0xfdffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xe0000000, 0xfffffffd, 0xfeffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xf0000000, 0xfffffffe, 0xff7fffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x78000000, 0xffffffff, 0xffbfffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xbc000000, 0xffffffff, 0xffdfffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xde000000, 0xffffffff, 0xffefffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xef000000, 0xffffffff, 0xfff7ffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xf7800000, 0xffffffff, 0xfffbffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfbc00000, 0xffffffff, 0xfffdffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfde00000, 0xffffffff, 0xfffeffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfef00000, 0xffffffff, 0xffff7fff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xff780000, 0xffffffff, 0xffffbfff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffbc0000, 0xffffffff, 0xffffdfff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffde0000, 0xffffffff, 0xffffefff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffef0000, 0xffffffff, 0xfffff7ff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfff78000, 0xffffffff, 0xfffffbff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffbc000, 0xffffffff, 0xfffffdff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffde000, 0xffffffff, 0xfffffeff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffef000, 0xffffffff, 0xffffff7f, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffff7800, 0xffffffff, 0xffffffbf, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffbc00, 0xffffffff, 0xffffffdf, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffde00, 0xffffffff, 0xffffffef, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffef00, 0xffffffff, 0xfffffff7, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffff780, 0xffffffff, 0xfffffffb, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffffbc0, 0xffffffff, 0xfffffffd, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffffde0, 0xffffffff, 0xfffffffe, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffffef0, 0x7fffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffff78, 0xbfffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffffbc, 0xdfffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffffde, 0xefffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffffef, 0xf7ffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x80000000, 0xfffffff7, 0xfbffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xc0000000, 0xfffffffb, 0xfdffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xe0000000, 0xfffffffd, 0xfeffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xf0000000, 0xfffffffe, 0xff7fffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x78000000, 0xffffffff, 0xffbfffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xbc000000, 0xffffffff, 0xffdfffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xde000000, 0xffffffff, 0xffefffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xef000000, 0xffffffff, 0xfff7ffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xf7800000, 0xffffffff, 0xfffbffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfbc00000, 0xffffffff, 0xfffdffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfde00000, 0xffffffff, 0xfffeffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfef00000, 0xffffffff, 0xffff7fff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xff780000, 0xffffffff, 0xffffbfff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffbc0000, 0xffffffff, 0xffffdfff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffde0000, 0xffffffff, 0xffffefff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffef0000, 0xffffffff, 0xfffff7ff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfff78000, 0xffffffff, 0xfffffbff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffbc000, 0xffffffff, 0xfffffdff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffde000, 0xffffffff, 0xfffffeff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffef000, 0xffffffff, 0xffffff7f, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffff7800, 0xffffffff, 0xffffffbf, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffbc00, 0xffffffff, 0xffffffdf, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffde00, 0xffffffff, 0xffffffef, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffef00, 0xffffffff, 0xfffffff7, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffff780, 0xffffffff, 0xfffffffb, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffffbc0, 0xffffffff, 0xfffffffd, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffffde0, 0xffffffff, 0xfffffffe, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffffef0, 0x7fffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffff78, 0xbfffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffffbc, 0xdfffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffffde, 0xefffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffffef, 0xf7ffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x80000000, 0xfffffff7, 0xfbffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xc0000000, 0xfffffffb, 0xfdffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xe0000000, 0xfffffffd, 0xfeffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xf0000000, 0xfffffffe, 0xff7fffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x78000000, 0xffffffff, 0xffbfffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xbc000000, 0xffffffff, 0xffdfffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xde000000, 0xffffffff, 0xffefffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xef000000, 0xffffffff, 0xfff7ffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xf7800000, 0xffffffff, 0xfffbffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfbc00000, 0xffffffff, 0xfffdffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfde00000, 0xffffffff, 0xfffeffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfef00000, 0xffffffff, 0xffff7fff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xff780000, 0xffffffff, 0xffffbfff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffbc0000, 0xffffffff, 0xffffdfff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffde0000, 0xffffffff, 0xffffefff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffef0000, 0xffffffff, 0xfffff7ff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfff78000, 0xffffffff, 0xfffffbff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffbc000, 0xffffffff, 0xfffffdff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffde000, 0xffffffff, 0xfffffeff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffef000, 0xffffffff, 0xffffff7f, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffff7800, 0xffffffff, 0xffffffbf, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffbc00, 0xffffffff, 0xffffffdf, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffde00, 0xffffffff, 0xffffffef, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffef00, 0xffffffff, 0xfffffff7, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffff780, 0xffffffff, 0xfffffffb, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffffbc0, 0xffffffff, 0xfffffffd, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffffde0, 0xffffffff, 0xfffffffe, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffffef0, 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffff78, 0xbfffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffffbc, 0xdfffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffffde, 0xefffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffffef, 0xf7ffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x80000001, 0xfffffff7, 0xfbffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xc0000001, 0xfffffffb, 0xfdffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xe0000001, 0xfffffffd, 0xfeffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xf0000001, 0xfffffffe, 0xff7fffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x78000001, 0xffffffff, 0xffbfffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xbc000001, 0xffffffff, 0xffdfffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xde000001, 0xffffffff, 0xffefffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xef000001, 0xffffffff, 0xfff7ffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xf7800001, 0xffffffff, 0xfffbffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfbc00001, 0xffffffff, 0xfffdffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfde00001, 0xffffffff, 0xfffeffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfef00001, 0xffffffff, 0xffff7fff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xff780001, 0xffffffff, 0xffffbfff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffbc0001, 0xffffffff, 0xffffdfff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffde0001, 0xffffffff, 0xffffefff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffef0001, 0xffffffff, 0xfffff7ff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfff78001, 0xffffffff, 0xfffffbff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffbc001, 0xffffffff, 0xfffffdff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffde001, 0xffffffff, 0xfffffeff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffef001, 0xffffffff, 0xffffff7f, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffff7801, 0xffffffff, 0xffffffbf, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffffbc01, 0xffffffff, 0xffffffdf, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffffde01, 0xffffffff, 0xffffffef, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffffef01, 0xffffffff, 0xfffffff7, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffff781, 0xffffffff, 0xfffffffb, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffffbc1, 0xffffffff, 0xfffffffd, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffffde1, 0xffffffff, 0xfffffffe, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffffef1, 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffffff79, 0xbfffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffffffbd, 0xdfffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffffffdf, 0xefffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffffff0, 0xf7ffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000}}};
|
||||
static constexpr storage<8> rou = {0x42f8ef94, 0x6070024f, 0xe11a6161, 0xad187148,
|
||||
0x9c8b0fa5, 0x3f046451, 0x87529cfa, 0x005282db};
|
||||
TWIDDLES(modulus, rou)
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#define DEVICE_CONTEXT_H
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
|
||||
namespace device_context {
|
||||
|
||||
@@ -30,6 +31,28 @@ namespace device_context {
|
||||
};
|
||||
}
|
||||
|
||||
} // namespace device_context
|
||||
// checking whether a pointer is on host or device and asserts device matches provided device
|
||||
static bool is_host_ptr(const void* p, int device_id = 0)
|
||||
{
|
||||
cudaPointerAttributes attributes;
|
||||
CHK_STICKY(cudaPointerGetAttributes(&attributes, p));
|
||||
const bool is_on_host = attributes.type == cudaMemoryTypeHost ||
|
||||
attributes.type == cudaMemoryTypeUnregistered; // unregistered is host memory
|
||||
const bool is_on_cur_device = !is_on_host && attributes.device == device_id;
|
||||
const bool is_valid_ptr = is_on_host || is_on_cur_device;
|
||||
if (!is_valid_ptr) { THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Invalid ptr"); }
|
||||
|
||||
return is_on_host;
|
||||
}
|
||||
|
||||
static int get_cuda_device(const void* p)
|
||||
{
|
||||
cudaPointerAttributes attributes;
|
||||
CHK_STICKY(cudaPointerGetAttributes(&attributes, p));
|
||||
const bool is_on_host = attributes.type == cudaMemoryTypeHost ||
|
||||
attributes.type == cudaMemoryTypeUnregistered; // unregistered is host memory
|
||||
return is_on_host ? -1 : attributes.device;
|
||||
}
|
||||
|
||||
} // namespace device_context
|
||||
#endif
|
||||
174
icicle/include/hash/hash.cuh
Normal file
174
icicle/include/hash/hash.cuh
Normal file
@@ -0,0 +1,174 @@
|
||||
#pragma once
|
||||
#ifndef HASH_H
|
||||
#define HASH_H
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include <cassert>
|
||||
|
||||
using matrix::Matrix;
|
||||
|
||||
/**
|
||||
* @namespace hash
|
||||
* Includes classes and methods for describing hash functions.
|
||||
*/
|
||||
namespace hash {
|
||||
|
||||
/**
|
||||
* @struct HashConfig
|
||||
* Encodes hash operations parameters.
|
||||
*/
|
||||
struct HashConfig {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool
|
||||
are_outputs_on_device; /**< True if outputs are on device and false if they're on host. Default value: false. */
|
||||
bool is_async; /**< Whether to run the hash operations asynchronously. If set to `true`, the functions will be
|
||||
* non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false,
|
||||
* functions will block the current CPU thread. */
|
||||
};
|
||||
|
||||
/**
|
||||
* A function that returns the default value of [HashConfig](@ref HashConfig) for the [Hasher](@ref
|
||||
* Hasher) class.
|
||||
* @return Default value of [HashConfig](@ref HashConfig).
|
||||
*/
|
||||
static HashConfig
|
||||
default_hash_config(const device_context::DeviceContext& ctx = device_context::get_default_device_context())
|
||||
{
|
||||
HashConfig config = {
|
||||
ctx, // ctx
|
||||
false, // are_inputs_on_device
|
||||
false, // are_outputs_on_device
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* @class Hasher
|
||||
*
|
||||
* An interface containing methods for hashing
|
||||
*
|
||||
* @tparam PreImage type of inputs elements
|
||||
* @tparam Image type of state elements. Also used to describe the type of hash output
|
||||
*/
|
||||
template <typename PreImage, typename Image>
|
||||
class Hasher
|
||||
{
|
||||
public:
|
||||
/// @brief the width of permutation state
|
||||
const unsigned int width;
|
||||
|
||||
/// @brief how many elements a state can fit per 1 permutation. Used with domain separation.
|
||||
const unsigned int preimage_max_length;
|
||||
|
||||
/// @brief portion of the state to absorb input into, or squeeze output from
|
||||
const unsigned int rate;
|
||||
|
||||
/// @brief start squeezing from this offset. Used with domain separation.
|
||||
const unsigned int offset;
|
||||
|
||||
Hasher(unsigned int width, unsigned int preimage_max_length, unsigned int rate, unsigned int offset)
|
||||
: width(width), preimage_max_length(preimage_max_length), rate(rate), offset(offset)
|
||||
{
|
||||
assert(
|
||||
rate * sizeof(PreImage) <= preimage_max_length * sizeof(Image) &&
|
||||
"Input rate can not be bigger than preimage max length");
|
||||
}
|
||||
|
||||
virtual cudaError_t hash_2d(
|
||||
const Matrix<PreImage>* inputs,
|
||||
Image* states,
|
||||
unsigned int number_of_inputs,
|
||||
unsigned int output_len,
|
||||
uint64_t number_of_rows,
|
||||
const device_context::DeviceContext& ctx) const
|
||||
{
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Absorb 2d is not implemented for this hash");
|
||||
return cudaError_t::cudaSuccess;
|
||||
};
|
||||
|
||||
virtual cudaError_t compress_and_inject(
|
||||
const Matrix<PreImage>* matrices_to_inject,
|
||||
unsigned int number_of_inputs,
|
||||
uint64_t number_of_rows,
|
||||
const Image* prev_layer,
|
||||
Image* next_layer,
|
||||
unsigned int digest_elements,
|
||||
const device_context::DeviceContext& ctx) const
|
||||
{
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Compress and inject is not implemented for this hash");
|
||||
return cudaError_t::cudaSuccess;
|
||||
}
|
||||
|
||||
/// @param input pointer to input allocated on-device
|
||||
/// @param out pointer to output allocated on-device
|
||||
cudaError_t compress_many(
|
||||
const Image* input,
|
||||
Image* out,
|
||||
unsigned int number_of_states,
|
||||
unsigned int output_len,
|
||||
const HashConfig& cfg) const
|
||||
{
|
||||
return hash_many((const PreImage*)input, out, number_of_states, width, output_len, cfg);
|
||||
}
|
||||
|
||||
virtual cudaError_t run_hash_many_kernel(
|
||||
const PreImage* input,
|
||||
Image* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const device_context::DeviceContext& ctx) const
|
||||
{
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Hash many kernel is not implemented for this hash");
|
||||
return cudaError_t::cudaSuccess;
|
||||
};
|
||||
|
||||
cudaError_t hash_many(
|
||||
const PreImage* input,
|
||||
Image* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const HashConfig& cfg) const
|
||||
{
|
||||
const PreImage* d_input;
|
||||
PreImage* d_alloc_input;
|
||||
Image* d_output;
|
||||
if (!cfg.are_inputs_on_device) {
|
||||
CHK_IF_RETURN(cudaMallocAsync(&d_alloc_input, number_of_states * input_len * sizeof(PreImage), cfg.ctx.stream));
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
d_alloc_input, input, number_of_states * input_len * sizeof(PreImage), cudaMemcpyHostToDevice,
|
||||
cfg.ctx.stream));
|
||||
d_input = d_alloc_input;
|
||||
} else {
|
||||
d_input = input;
|
||||
}
|
||||
|
||||
if (!cfg.are_outputs_on_device) {
|
||||
CHK_IF_RETURN(cudaMallocAsync(&d_output, number_of_states * output_len * sizeof(Image), cfg.ctx.stream));
|
||||
} else {
|
||||
d_output = output;
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(run_hash_many_kernel(d_input, d_output, number_of_states, input_len, output_len, cfg.ctx));
|
||||
|
||||
if (!cfg.are_inputs_on_device) { CHK_IF_RETURN(cudaFreeAsync(d_alloc_input, cfg.ctx.stream)); }
|
||||
if (!cfg.are_outputs_on_device) {
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
output, d_output, number_of_states * output_len * sizeof(Image), cudaMemcpyDeviceToHost, cfg.ctx.stream));
|
||||
CHK_IF_RETURN(cudaFreeAsync(d_output, cfg.ctx.stream));
|
||||
}
|
||||
|
||||
if (!cfg.is_async) CHK_IF_RETURN(cudaStreamSynchronize(cfg.ctx.stream));
|
||||
|
||||
return CHK_LAST();
|
||||
};
|
||||
};
|
||||
} // namespace hash
|
||||
|
||||
#endif
|
||||
@@ -6,51 +6,24 @@
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
|
||||
using namespace hash;
|
||||
|
||||
namespace keccak {
|
||||
/**
|
||||
* @struct KeccakConfig
|
||||
* Struct that encodes various Keccak parameters.
|
||||
*/
|
||||
struct KeccakConfig {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool are_outputs_on_device; /**< If true, output is preserved on device, otherwise on host. Default value: false. */
|
||||
bool is_async; /**< Whether to run the Keccak asynchronously. If set to `true`, the keccak_hash function will be
|
||||
* non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, keccak_hash
|
||||
* function will block the current CPU thread. */
|
||||
};
|
||||
|
||||
KeccakConfig default_keccak_config()
|
||||
class Keccak : public Hasher<uint8_t, uint64_t>
|
||||
{
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
KeccakConfig config = {
|
||||
ctx, // ctx
|
||||
false, // are_inputes_on_device
|
||||
false, // are_outputs_on_device
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
public:
|
||||
cudaError_t run_hash_many_kernel(
|
||||
const uint8_t* input,
|
||||
uint64_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const device_context::DeviceContext& ctx) const override;
|
||||
|
||||
/**
|
||||
* Compute the keccak hash over a sequence of preimages.
|
||||
* Takes {number_of_blocks * input_block_size} u64s of input and computes {number_of_blocks} outputs, each of size {D
|
||||
* / 64} u64
|
||||
* @tparam C - number of bits of capacity (c = b - r = 1600 - r). Only multiples of 64 are supported.
|
||||
* @tparam D - number of bits of output. Only multiples of 64 are supported.
|
||||
* @param input a pointer to the input data. May be allocated on device or on host, regulated
|
||||
* by the config. Must be of size [input_block_size](@ref input_block_size) * [number_of_blocks](@ref
|
||||
* number_of_blocks)}.
|
||||
* @param input_block_size - size of each input block in bytes. Should be divisible by 8.
|
||||
* @param number_of_blocks number of input and output blocks. One GPU thread processes one block
|
||||
* @param output a pointer to the output data. May be allocated on device or on host, regulated
|
||||
* by the config. Must be of size [output_block_size](@ref output_block_size) * [number_of_blocks](@ref
|
||||
* number_of_blocks)}
|
||||
*/
|
||||
template <int C, int D>
|
||||
cudaError_t
|
||||
keccak_hash(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, KeccakConfig& config);
|
||||
Keccak(unsigned int rate) : Hasher<uint8_t, uint64_t>(25, 25, rate, 0) {}
|
||||
};
|
||||
} // namespace keccak
|
||||
|
||||
#endif
|
||||
14
icicle/include/matrix/matrix.cuh
Normal file
14
icicle/include/matrix/matrix.cuh
Normal file
@@ -0,0 +1,14 @@
|
||||
#pragma once
|
||||
#ifndef MATRIX_H
|
||||
#define MATRIX_H
|
||||
|
||||
namespace matrix {
|
||||
template <typename T>
|
||||
struct Matrix {
|
||||
T* values;
|
||||
size_t width;
|
||||
size_t height;
|
||||
};
|
||||
} // namespace matrix
|
||||
|
||||
#endif
|
||||
130
icicle/include/merkle-tree/merkle.cuh
Normal file
130
icicle/include/merkle-tree/merkle.cuh
Normal file
@@ -0,0 +1,130 @@
|
||||
#pragma once
|
||||
#ifndef MERKLE_H
|
||||
#define MERKLE_H
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "utils/utils.h"
|
||||
#include "hash/hash.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
|
||||
using namespace hash;
|
||||
using matrix::Matrix;
|
||||
|
||||
/**
|
||||
* @namespace merkle_tree
|
||||
* Implementation of the [Merkle tree](https://en.wikipedia.org/wiki/Merkle_tree) builder,
|
||||
* parallelized for the use on GPU
|
||||
*/
|
||||
namespace merkle_tree {
|
||||
static constexpr size_t GIGA = 1024 * 1024 * 1024;
|
||||
|
||||
/// Bytes per stream
|
||||
static constexpr uint64_t STREAM_CHUNK_SIZE = GIGA;
|
||||
|
||||
/// Flattens the tree digests and sum them up to get
|
||||
/// the memory needed to contain all the digests
|
||||
static size_t get_digests_len(uint32_t height, uint32_t arity, uint32_t digest_elements)
|
||||
{
|
||||
size_t digests_len = 0;
|
||||
size_t row_length = digest_elements;
|
||||
for (int i = 0; i <= height; i++) {
|
||||
digests_len += row_length;
|
||||
row_length *= arity;
|
||||
}
|
||||
|
||||
return digests_len;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void swap(T** r, T** s)
|
||||
{
|
||||
T* t = *r;
|
||||
*r = *s;
|
||||
*s = t;
|
||||
}
|
||||
|
||||
static unsigned int get_height(uint64_t number_of_elements)
|
||||
{
|
||||
unsigned int height = 0;
|
||||
while (number_of_elements >>= 1)
|
||||
++height;
|
||||
return height;
|
||||
}
|
||||
|
||||
/**
|
||||
* @struct TreeBuilderConfig
|
||||
* Struct that encodes various Tree builder parameters.
|
||||
*/
|
||||
struct TreeBuilderConfig {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
unsigned int arity;
|
||||
unsigned int
|
||||
keep_rows; /**< How many rows of the Merkle tree rows should be written to output. '0' means all of them */
|
||||
unsigned int
|
||||
digest_elements; /** @param digest_elements the size of output for each bottom layer hash and compression.
|
||||
* Will also be equal to the size of the root of the tree. Default value 1 */
|
||||
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool
|
||||
are_outputs_on_device; /**< True if outputs are on device and false if they're on host. Default value: false. */
|
||||
bool is_async; /**< Whether to run the tree builder asynchronously. If set to `true`, the build_merkle_tree
|
||||
* function will be non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the
|
||||
* function will block the current CPU thread. */
|
||||
};
|
||||
|
||||
static TreeBuilderConfig
|
||||
default_merkle_config(const device_context::DeviceContext& ctx = device_context::get_default_device_context())
|
||||
{
|
||||
TreeBuilderConfig config = {
|
||||
ctx, // ctx
|
||||
2, // arity
|
||||
0, // keep_rows
|
||||
1, // digest_elements
|
||||
false, // are_inputes_on_device
|
||||
false, // are_outputs_on_device
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the Merkle tree
|
||||
*
|
||||
* @param leaves a pointer to the leaves layer. May be allocated on device or on host, regulated by the config
|
||||
* Expected to have arity ^ (height) * input_block_len elements
|
||||
* @param digests a pointer to the digests storage. May only be allocated on the host
|
||||
* Expected to have `sum(digests_len * (arity ^ (i))) for i in [0..keep_rows]`
|
||||
* @param height the height of the merkle tree
|
||||
* @param input_block_len the size of input vectors at the bottom layer of the tree
|
||||
* # Algorithm
|
||||
* The function will split large tree into many subtrees of size that will fit `STREAM_CHUNK_SIZE`.
|
||||
* Each subtree is build in it's own stream (there is a maximum number of streams)
|
||||
* After all subtrees are constructed - the function will combine the resulting sub-digests into the final top-tree
|
||||
*/
|
||||
template <typename Leaf, typename Digest>
|
||||
cudaError_t build_merkle_tree(
|
||||
const Leaf* inputs,
|
||||
Digest* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const Hasher<Leaf, Digest>& compression,
|
||||
const Hasher<Leaf, Digest>& bottom_layer,
|
||||
const TreeBuilderConfig& config);
|
||||
|
||||
template <typename Leaf, typename Digest>
|
||||
cudaError_t mmcs_commit(
|
||||
const Matrix<Leaf>* inputs,
|
||||
const unsigned int number_of_inputs,
|
||||
Digest* digests,
|
||||
const Hasher<Leaf, Digest>& hasher,
|
||||
const Hasher<Leaf, Digest>& compression,
|
||||
const TreeBuilderConfig& tree_config);
|
||||
} // namespace merkle_tree
|
||||
|
||||
#endif
|
||||
114
icicle/include/poseidon/constants.cuh
Normal file
114
icicle/include/poseidon/constants.cuh
Normal file
@@ -0,0 +1,114 @@
|
||||
#pragma once
|
||||
#ifndef POSEIDON_CONSTANTS_H
|
||||
#define POSEIDON_CONSTANTS_H
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace poseidon {
|
||||
#define FIRST_FULL_ROUNDS true
|
||||
#define SECOND_FULL_ROUNDS false
|
||||
|
||||
/**
|
||||
* For most of the Poseidon configurations this is the case
|
||||
* TODO: Add support for different full rounds numbers
|
||||
*/
|
||||
const int FULL_ROUNDS_DEFAULT = 4;
|
||||
|
||||
/**
|
||||
* @struct PoseidonConstants
|
||||
* This constants are enough to define a Poseidon instantce
|
||||
* @param round_constants A pointer to round constants allocated on the device
|
||||
* @param mds_matrix A pointer to an mds matrix allocated on the device
|
||||
* @param non_sparse_matrix A pointer to non sparse matrix allocated on the device
|
||||
* @param sparse_matrices A pointer to sparse matrices allocated on the device
|
||||
*/
|
||||
template <typename S>
|
||||
struct PoseidonConstants {
|
||||
unsigned int arity;
|
||||
unsigned int alpha;
|
||||
unsigned int partial_rounds;
|
||||
unsigned int full_rounds_half;
|
||||
S* round_constants = nullptr;
|
||||
S* mds_matrix = nullptr;
|
||||
S* non_sparse_matrix = nullptr;
|
||||
S* sparse_matrices = nullptr;
|
||||
S domain_tag = S::zero();
|
||||
|
||||
PoseidonConstants() = default;
|
||||
PoseidonConstants(const PoseidonConstants& other) = default;
|
||||
|
||||
PoseidonConstants<S>& operator=(PoseidonConstants<S> const& other)
|
||||
{
|
||||
this->arity = other.arity;
|
||||
this->alpha = other.alpha;
|
||||
this->partial_rounds = other.partial_rounds;
|
||||
this->full_rounds_half = other.full_rounds_half;
|
||||
this->round_constants = other.round_constants;
|
||||
this->mds_matrix = other.mds_matrix;
|
||||
this->non_sparse_matrix = other.non_sparse_matrix;
|
||||
this->sparse_matrices = other.sparse_matrices;
|
||||
this->domain_tag = other.domain_tag;
|
||||
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @class PoseidonKernelsConfiguration
|
||||
* Describes the logic of deriving CUDA kernels parameters
|
||||
* such as the number of threads and the number of blocks
|
||||
*/
|
||||
class PoseidonKernelsConfiguration
|
||||
{
|
||||
public:
|
||||
// The logic behind this is that 1 thread only works on 1 element
|
||||
// We have {width} elements in each state, and {number_of_states} states total
|
||||
static int number_of_threads(unsigned int width) { return 256 / width * width; }
|
||||
|
||||
// The partial rounds operates on the whole state, so we define
|
||||
// the parallelism params for processing a single hash preimage per thread
|
||||
static const int singlehash_block_size = 128;
|
||||
|
||||
static int hashes_per_block(unsigned int width) { return number_of_threads(width) / width; }
|
||||
|
||||
static int number_of_full_blocks(unsigned int width, size_t number_of_states)
|
||||
{
|
||||
int total_number_of_threads = number_of_states * width;
|
||||
return total_number_of_threads / number_of_threads(width) +
|
||||
static_cast<bool>(total_number_of_threads % number_of_threads(width));
|
||||
}
|
||||
|
||||
static int number_of_singlehash_blocks(size_t number_of_states)
|
||||
{
|
||||
return number_of_states / singlehash_block_size + static_cast<bool>(number_of_states % singlehash_block_size);
|
||||
}
|
||||
};
|
||||
|
||||
using PKC = PoseidonKernelsConfiguration;
|
||||
|
||||
template <typename S>
|
||||
cudaError_t create_optimized_poseidon_constants(
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const S* round_constants,
|
||||
const S* mds_matrix,
|
||||
const S* non_sparse_matrix,
|
||||
const S* sparse_matrices,
|
||||
const S domain_tag,
|
||||
PoseidonConstants<S>* poseidon_constants,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
/**
|
||||
* Loads pre-calculated optimized constants, moves them to the device
|
||||
*/
|
||||
template <typename S>
|
||||
cudaError_t
|
||||
init_optimized_poseidon_constants(int arity, device_context::DeviceContext& ctx, PoseidonConstants<S>* constants);
|
||||
|
||||
template <typename S>
|
||||
cudaError_t release_optimized_poseidon_constants(PoseidonConstants<S>* constants, device_context::DeviceContext& ctx);
|
||||
} // namespace poseidon
|
||||
|
||||
#endif
|
||||
@@ -8,17 +8,18 @@ import numpy as np
|
||||
from poseidon import round_constants as rc, round_numbers as rn
|
||||
|
||||
# Modify these
|
||||
arity = 11
|
||||
p = 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 # grumpkin
|
||||
arity = 2
|
||||
p = 2 ** 31 - 1 # grumpkin
|
||||
# p = 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 # grumpkin
|
||||
# p = 0x73EDA753299D7D483339D80809A1D80553BDA402FFFE5BFEFFFFFFFF00000001 # bls12-381
|
||||
# p = 0x12ab655e9a2ca55660b44d1e5c37b00159aa76fed00000010a11800000000001 # bls12-377
|
||||
# p = 0x30644e72e131a029b85045b68181585d2833e84879b9709143e1f593f0000001 # bn254
|
||||
# p = 0x1ae3a4617c510eac63b05c06ca1493b1a22d9f300f5138f1ef3622fba094800170b5d44300000008508c00000000001 # bw6-761
|
||||
prime_bit_len = 255
|
||||
field_bytes = 32
|
||||
prime_bit_len = 31
|
||||
field_bytes = 4
|
||||
|
||||
# leave set to -1 if not sure
|
||||
full_round = -1
|
||||
full_round = 8
|
||||
half_full_round = full_round // 2
|
||||
# leave set to -1 if not sure
|
||||
partial_round = -1
|
||||
@@ -31,12 +32,12 @@ security_level = 128
|
||||
# F = GF(p)
|
||||
# F.primitive_element()
|
||||
#
|
||||
# primitive_element = None
|
||||
primitive_element = None
|
||||
# primitive_element = 7 # bls12-381
|
||||
# primitive_element = 22 # bls12-377
|
||||
# primitive_element = 5 # bn254
|
||||
# primitive_element = 15 # bw6-761
|
||||
primitive_element = 3 # grumpkin
|
||||
# primitive_element = 3 # grumpkin
|
||||
|
||||
# currently we only support alpha 5, if you need alpha other than 5 - feal free to reach out
|
||||
alpha = 5
|
||||
|
||||
508
icicle/include/poseidon/constants/m31_poseidon.h
Normal file
508
icicle/include/poseidon/constants/m31_poseidon.h
Normal file
@@ -0,0 +1,508 @@
|
||||
#pragma once
|
||||
#ifndef M31_POSEIDON_H
|
||||
#define M31_POSEIDON_H
|
||||
|
||||
namespace poseidon_constants_m31 {
|
||||
/**
|
||||
* This inner namespace contains optimized constants for running Poseidon.
|
||||
* These constants were generated using an algorithm defined at
|
||||
* https://spec.filecoin.io/algorithms/crypto/poseidon/
|
||||
* The number in the name corresponds to the arity of hash function
|
||||
* Each array contains:
|
||||
* RoundConstants | MDSMatrix | Non-sparse matrix | Sparse matrices
|
||||
*/
|
||||
|
||||
int partial_rounds_2 = 7;
|
||||
|
||||
int partial_rounds_4 = 11;
|
||||
|
||||
int partial_rounds_8 = 12;
|
||||
|
||||
int partial_rounds_11 = 12;
|
||||
|
||||
unsigned char poseidon_constants_2[] = {
|
||||
0x33, 0x8b, 0x6d, 0x47, 0xbb, 0x97, 0x11, 0x67, 0x92, 0x9d, 0x55, 0x2d,
|
||||
0xee, 0x1e, 0x2e, 0x45, 0xfe, 0x35, 0x0e, 0x25, 0x7e, 0xc3, 0x4f, 0x70,
|
||||
0x4d, 0x0a, 0x8c, 0x18, 0xd9, 0x43, 0xa4, 0x61, 0xfb, 0x14, 0xd9, 0x14,
|
||||
0x99, 0x13, 0xb9, 0x30, 0xec, 0x3b, 0x8c, 0x16, 0xcc, 0xb2, 0x0b, 0x2e,
|
||||
0x9e, 0x18, 0xbf, 0x26, 0xb6, 0xb7, 0x2a, 0x44, 0x61, 0x29, 0xdb, 0x21,
|
||||
0x18, 0x84, 0x03, 0x4e, 0xef, 0x95, 0xf9, 0x45, 0xe3, 0xd8, 0xf2, 0x46,
|
||||
0x82, 0xb4, 0xc9, 0x5e, 0x5f, 0xf3, 0xb2, 0x4f, 0x61, 0x80, 0x50, 0x0f,
|
||||
0x0d, 0x7f, 0xe3, 0x1b, 0x23, 0xbd, 0x05, 0x2f, 0x0f, 0xb1, 0x60, 0x67,
|
||||
0xd8, 0x85, 0xdf, 0x57, 0x0c, 0x8c, 0xdf, 0x50, 0x9e, 0x65, 0x3c, 0x58,
|
||||
0x07, 0xbd, 0x29, 0x7e, 0xc5, 0xe5, 0xa7, 0x5a, 0x5a, 0x4b, 0x0c, 0x29,
|
||||
0x89, 0x9d, 0x14, 0x11, 0x8c, 0x20, 0xcb, 0x76, 0x4d, 0x56, 0x2d, 0x4a,
|
||||
0x10, 0xda, 0xaf, 0x0a, 0x65, 0x9d, 0x98, 0x3e, 0xa1, 0xac, 0x57, 0x46,
|
||||
0xcb, 0xe8, 0xfc, 0x5b, 0xd4, 0x43, 0x4b, 0x63, 0x1b, 0x13, 0x4b, 0x1f,
|
||||
0xed, 0xac, 0xbf, 0x30, 0x27, 0x15, 0xac, 0x53, 0x4b, 0x27, 0x61, 0x3e,
|
||||
0x37, 0xc3, 0x65, 0x74, 0x55, 0x55, 0x55, 0x55, 0x00, 0x00, 0x00, 0x20,
|
||||
0x33, 0x33, 0x33, 0x33, 0x00, 0x00, 0x00, 0x20, 0x33, 0x33, 0x33, 0x33,
|
||||
0xaa, 0xaa, 0xaa, 0x6a, 0x33, 0x33, 0x33, 0x33, 0xaa, 0xaa, 0xaa, 0x6a,
|
||||
0x6d, 0xdb, 0xb6, 0x6d, 0x55, 0x55, 0x55, 0x55, 0xc0, 0x72, 0x8d, 0x36,
|
||||
0x2c, 0xe5, 0xc0, 0x51, 0x00, 0x00, 0x00, 0x20, 0x0b, 0xd5, 0x67, 0x6c,
|
||||
0x6c, 0x67, 0x2c, 0x13, 0x33, 0x33, 0x33, 0x33, 0x6c, 0x67, 0x2c, 0x13,
|
||||
0xe6, 0xb8, 0x2c, 0x62, 0x55, 0x55, 0x55, 0x55, 0x15, 0x1f, 0xaf, 0x6a,
|
||||
0xd9, 0xa8, 0x14, 0x44, 0xae, 0xb0, 0x38, 0x4b, 0x17, 0x76, 0xd9, 0x39,
|
||||
0x55, 0x55, 0x55, 0x55, 0x28, 0xef, 0x9d, 0x4f, 0xc7, 0x3b, 0xa6, 0x24,
|
||||
0x84, 0x5b, 0x79, 0x6f, 0xde, 0x4f, 0x8f, 0x3d, 0x55, 0x55, 0x55, 0x55,
|
||||
0x54, 0xc2, 0xb2, 0x00, 0x5a, 0xed, 0x68, 0x0c, 0xeb, 0xd4, 0xc4, 0x61,
|
||||
0x02, 0x8c, 0x85, 0x27, 0x55, 0x55, 0x55, 0x55, 0xe4, 0xc5, 0xbd, 0x0a,
|
||||
0xf6, 0xec, 0x75, 0x26, 0xe0, 0xdb, 0xd8, 0x52, 0xdf, 0x28, 0xff, 0x33,
|
||||
0x55, 0x55, 0x55, 0x55, 0xac, 0x68, 0x06, 0x00, 0xc9, 0xff, 0x91, 0x19,
|
||||
0xb1, 0x12, 0x2b, 0x19, 0xa2, 0xdd, 0x47, 0x39, 0x55, 0x55, 0x55, 0x55,
|
||||
0xd5, 0x03, 0x00, 0x00, 0x45, 0xc8, 0xcc, 0x4c, 0x55, 0x55, 0x55, 0x35,
|
||||
0x8d, 0xd6, 0x68, 0x3d, 0x55, 0x55, 0x55, 0x55, 0x03, 0x00, 0x00, 0x00,
|
||||
0x64, 0x66, 0x66, 0x26, 0x00, 0x00, 0x00, 0x20, 0x33, 0x33, 0x33, 0x33
|
||||
};
|
||||
|
||||
unsigned char poseidon_constants_4[] = {
|
||||
0xdb, 0x64, 0xa5, 0x32, 0xd6, 0x3d, 0x12, 0x6e, 0x65, 0x66, 0x46, 0x59,
|
||||
0x2a, 0x64, 0x51, 0x3b, 0xaf, 0xbe, 0x72, 0x0b, 0x66, 0x5f, 0x5c, 0x6c,
|
||||
0x66, 0x11, 0x8c, 0x61, 0x99, 0x24, 0x99, 0x14, 0x1d, 0x5f, 0x67, 0x0a,
|
||||
0x4d, 0xab, 0xc4, 0x1e, 0x43, 0xb2, 0x09, 0x58, 0xc0, 0x27, 0x4c, 0x5b,
|
||||
0xf0, 0x0c, 0xf5, 0x12, 0xc9, 0x2f, 0x88, 0x4f, 0x59, 0x52, 0x5b, 0x6a,
|
||||
0x73, 0x90, 0x55, 0x5b, 0xaf, 0x47, 0x55, 0x0d, 0xa7, 0xc2, 0x0c, 0x6e,
|
||||
0xe6, 0xd6, 0x4e, 0x30, 0x9e, 0x75, 0x47, 0x12, 0xca, 0x93, 0xd1, 0x5b,
|
||||
0x64, 0x27, 0xfc, 0x60, 0x6c, 0x16, 0x52, 0x20, 0xf5, 0xe0, 0x01, 0x15,
|
||||
0x27, 0xf9, 0x96, 0x7f, 0xa0, 0x38, 0xad, 0x3c, 0x95, 0xd3, 0xe4, 0x32,
|
||||
0x57, 0x95, 0x5a, 0x6b, 0x12, 0xcc, 0xdc, 0x18, 0x2b, 0xdd, 0xa4, 0x66,
|
||||
0xbf, 0xe7, 0x96, 0x15, 0x85, 0x87, 0x6a, 0x1f, 0x15, 0x19, 0x9c, 0x65,
|
||||
0xef, 0x24, 0xaa, 0x2c, 0x3f, 0x6b, 0xbc, 0x6b, 0x54, 0x24, 0x2c, 0x17,
|
||||
0xf1, 0x7a, 0x8d, 0x57, 0x90, 0xa4, 0xd4, 0x4a, 0x12, 0x06, 0x77, 0x6a,
|
||||
0xe8, 0x6b, 0xd9, 0x51, 0x80, 0x72, 0xa1, 0x31, 0xce, 0xa8, 0x59, 0x10,
|
||||
0x0c, 0x90, 0xd4, 0x10, 0x8e, 0x60, 0x54, 0x1c, 0xe7, 0xfd, 0x42, 0x3a,
|
||||
0x73, 0xc1, 0xcc, 0x4f, 0x58, 0xbb, 0x99, 0x7c, 0xd2, 0x51, 0xda, 0x43,
|
||||
0xea, 0x6e, 0xe8, 0x16, 0xb2, 0x51, 0x53, 0x61, 0x7e, 0x68, 0x44, 0x3c,
|
||||
0x33, 0x33, 0x33, 0x33, 0xaa, 0xaa, 0xaa, 0x6a, 0x6d, 0xdb, 0xb6, 0x6d,
|
||||
0x00, 0x00, 0x00, 0x10, 0x71, 0x1c, 0xc7, 0x71, 0xaa, 0xaa, 0xaa, 0x6a,
|
||||
0x6d, 0xdb, 0xb6, 0x6d, 0x00, 0x00, 0x00, 0x10, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0x99, 0x99, 0x99, 0x59, 0x6d, 0xdb, 0xb6, 0x6d, 0x00, 0x00, 0x00, 0x10,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x99, 0x99, 0x99, 0x59, 0x45, 0x17, 0x5d, 0x74,
|
||||
0x00, 0x00, 0x00, 0x10, 0x71, 0x1c, 0xc7, 0x71, 0x99, 0x99, 0x99, 0x59,
|
||||
0x45, 0x17, 0x5d, 0x74, 0x55, 0x55, 0x55, 0x35, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0x99, 0x99, 0x99, 0x59, 0x45, 0x17, 0x5d, 0x74, 0x55, 0x55, 0x55, 0x35,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0x33, 0x33, 0x33, 0x33, 0xae, 0x9d, 0xba, 0x61,
|
||||
0x09, 0xf2, 0xee, 0x53, 0x5e, 0x5c, 0xe8, 0x61, 0x8e, 0x1a, 0x60, 0x6c,
|
||||
0xaa, 0xaa, 0xaa, 0x6a, 0xff, 0x1a, 0xb7, 0x09, 0x1d, 0x84, 0x75, 0x5e,
|
||||
0x88, 0x5e, 0x36, 0x25, 0x6b, 0xd4, 0xdd, 0x65, 0x6d, 0xdb, 0xb6, 0x6d,
|
||||
0x1d, 0x84, 0x75, 0x5e, 0x10, 0x9d, 0x2d, 0x63, 0xa7, 0x62, 0xfc, 0x1f,
|
||||
0xe2, 0x43, 0x63, 0x14, 0x00, 0x00, 0x00, 0x10, 0x88, 0x5e, 0x36, 0x25,
|
||||
0xa7, 0x62, 0xfc, 0x1f, 0x47, 0xa0, 0x19, 0x6f, 0x48, 0x1f, 0x4e, 0x22,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x6b, 0xd4, 0xdd, 0x65, 0xe2, 0x43, 0x63, 0x14,
|
||||
0x48, 0x1f, 0x4e, 0x22, 0xb7, 0x4e, 0x73, 0x01, 0x33, 0x33, 0x33, 0x33,
|
||||
0x84, 0xdd, 0xf7, 0x08, 0x6f, 0xc5, 0x14, 0x63, 0xb6, 0x22, 0x01, 0x3d,
|
||||
0xcd, 0xab, 0x7d, 0x62, 0xac, 0x7e, 0x61, 0x57, 0x40, 0x6b, 0xc5, 0x45,
|
||||
0x77, 0xbc, 0x02, 0x18, 0x8c, 0x66, 0xda, 0x74, 0x33, 0x33, 0x33, 0x33,
|
||||
0x01, 0x9d, 0x33, 0x55, 0xed, 0x7d, 0x75, 0x63, 0x41, 0x92, 0x33, 0x76,
|
||||
0x6b, 0xd5, 0x10, 0x23, 0x1a, 0xc4, 0x49, 0x5b, 0x0c, 0x86, 0x5a, 0x60,
|
||||
0x23, 0xe5, 0xd8, 0x1c, 0x43, 0xe9, 0xe2, 0x0d, 0x33, 0x33, 0x33, 0x33,
|
||||
0x1b, 0x68, 0xec, 0x17, 0x0e, 0x3f, 0x34, 0x1a, 0xb0, 0x28, 0xe9, 0x6c,
|
||||
0xc0, 0xf7, 0x3e, 0x79, 0xdc, 0x08, 0x9e, 0x32, 0x45, 0xde, 0xea, 0x73,
|
||||
0x7a, 0xc4, 0xb4, 0x0d, 0x65, 0xb6, 0x61, 0x04, 0x33, 0x33, 0x33, 0x33,
|
||||
0x41, 0x01, 0x02, 0x6b, 0xd8, 0x62, 0x6b, 0x47, 0x47, 0xd9, 0x7e, 0x72,
|
||||
0x4f, 0x80, 0x31, 0x54, 0x8b, 0x5e, 0x3e, 0x26, 0x64, 0x16, 0xe2, 0x51,
|
||||
0xf4, 0xa6, 0xed, 0x35, 0xc3, 0xe9, 0xc5, 0x41, 0x33, 0x33, 0x33, 0x33,
|
||||
0xd5, 0x3f, 0xed, 0x11, 0xf5, 0x0f, 0x56, 0x41, 0xf6, 0x0d, 0xf3, 0x78,
|
||||
0xb0, 0x78, 0xa1, 0x7d, 0x5d, 0x33, 0xc4, 0x5e, 0xa6, 0xd9, 0x47, 0x4c,
|
||||
0x07, 0xc3, 0x30, 0x5a, 0x91, 0x10, 0x31, 0x20, 0x33, 0x33, 0x33, 0x33,
|
||||
0xa5, 0xec, 0xe5, 0x25, 0xe6, 0xa7, 0x4e, 0x01, 0xee, 0x3a, 0xe7, 0x62,
|
||||
0x02, 0xfd, 0xf9, 0x08, 0xdd, 0x91, 0x3f, 0x2d, 0xca, 0xbc, 0xb5, 0x2c,
|
||||
0x54, 0x9e, 0xd4, 0x78, 0x6b, 0x18, 0x94, 0x21, 0x33, 0x33, 0x33, 0x33,
|
||||
0xe6, 0xb3, 0xd2, 0x2e, 0x49, 0xdb, 0xa8, 0x52, 0x5f, 0x6a, 0x75, 0x59,
|
||||
0xd5, 0x45, 0x5c, 0x73, 0x40, 0xe4, 0xd8, 0x2a, 0x8c, 0xe6, 0xda, 0x50,
|
||||
0x5f, 0x4f, 0x18, 0x5d, 0xf4, 0xa4, 0xf4, 0x46, 0x33, 0x33, 0x33, 0x33,
|
||||
0x3e, 0x90, 0x5b, 0x3a, 0x55, 0x96, 0x22, 0x7c, 0xd9, 0x64, 0x36, 0x4e,
|
||||
0x0b, 0xec, 0x66, 0x65, 0xac, 0x55, 0xa9, 0x19, 0x50, 0x87, 0x49, 0x1a,
|
||||
0x1f, 0x78, 0x89, 0x36, 0x25, 0x2a, 0x06, 0x55, 0x33, 0x33, 0x33, 0x33,
|
||||
0x6b, 0xf1, 0x61, 0x67, 0x67, 0x00, 0xc5, 0x24, 0x9e, 0xd1, 0x94, 0x6f,
|
||||
0xbf, 0x8b, 0xaf, 0x2d, 0x69, 0x9c, 0xb7, 0x62, 0xf8, 0x0a, 0x43, 0x13,
|
||||
0x3c, 0xc0, 0x48, 0x3e, 0x9f, 0x3f, 0xa8, 0x2c, 0x33, 0x33, 0x33, 0x33,
|
||||
0x9d, 0x5b, 0xb2, 0x2b, 0x62, 0x05, 0x39, 0x20, 0x52, 0x1f, 0xe8, 0x05,
|
||||
0x1b, 0x24, 0xc0, 0x13, 0x11, 0x11, 0x11, 0x11, 0x9c, 0x6a, 0x35, 0x45,
|
||||
0xf6, 0x7f, 0x5c, 0x4c, 0x9f, 0xc4, 0x8f, 0x1f, 0x33, 0x33, 0x33, 0x33,
|
||||
0xb1, 0xaa, 0xaa, 0x2a, 0xcb, 0xb6, 0x6d, 0x5b, 0x34, 0x49, 0x92, 0x24,
|
||||
0x90, 0x65, 0x59, 0x56, 0xaa, 0xaa, 0xaa, 0x6a, 0x6d, 0xdb, 0xb6, 0x6d,
|
||||
0x00, 0x00, 0x00, 0x10, 0x71, 0x1c, 0xc7, 0x71
|
||||
};
|
||||
|
||||
unsigned char poseidon_constants_8[] = {
|
||||
0x90, 0xaf, 0x71, 0x3e, 0xa3, 0xbe, 0x5a, 0x30, 0xd4, 0x1b, 0x6f, 0x5d,
|
||||
0xeb, 0x36, 0x6b, 0x53, 0x14, 0xc0, 0x30, 0x13, 0xd5, 0xf8, 0x0b, 0x1c,
|
||||
0xa8, 0x66, 0xf1, 0x3c, 0xbd, 0x64, 0xa3, 0x6c, 0x06, 0x5e, 0x95, 0x7c,
|
||||
0xee, 0xc4, 0x0a, 0x0f, 0x37, 0x03, 0xba, 0x6d, 0x20, 0x85, 0xf1, 0x2c,
|
||||
0xee, 0x59, 0x21, 0x11, 0x42, 0xae, 0xb7, 0x3c, 0x73, 0xb4, 0xd6, 0x71,
|
||||
0x6a, 0x29, 0x40, 0x03, 0x86, 0xd8, 0x32, 0x68, 0x61, 0x62, 0x62, 0x32,
|
||||
0x44, 0x5d, 0xcc, 0x38, 0x76, 0x0f, 0xbc, 0x1f, 0xc9, 0x6e, 0x67, 0x1d,
|
||||
0x95, 0x35, 0x10, 0x79, 0x45, 0xaa, 0x0f, 0x7c, 0x73, 0xfa, 0x5d, 0x3f,
|
||||
0x53, 0xf2, 0xdc, 0x21, 0x37, 0xfa, 0x15, 0x04, 0xfd, 0x31, 0x3d, 0x5d,
|
||||
0x5d, 0xe6, 0x1d, 0x4a, 0xb3, 0x2b, 0xa2, 0x07, 0x2d, 0x48, 0x07, 0x2b,
|
||||
0x92, 0x1c, 0x31, 0x52, 0x6c, 0xd3, 0x32, 0x2f, 0x0f, 0xdd, 0x82, 0x7d,
|
||||
0x41, 0x0e, 0x81, 0x7e, 0x60, 0xfb, 0x49, 0x7b, 0xe5, 0x39, 0x3d, 0x75,
|
||||
0x6d, 0xcf, 0x02, 0x77, 0x0d, 0xf6, 0xf8, 0x0c, 0x43, 0xae, 0x62, 0x5e,
|
||||
0x26, 0x36, 0x9e, 0x3a, 0x10, 0xe3, 0x59, 0x4b, 0x3a, 0x59, 0x49, 0x73,
|
||||
0x31, 0x20, 0xb9, 0x40, 0x39, 0xed, 0xaf, 0x37, 0x6d, 0x5c, 0x4c, 0x6a,
|
||||
0xce, 0xca, 0xc4, 0x33, 0x53, 0x96, 0x92, 0x1d, 0xb2, 0xa1, 0xac, 0x65,
|
||||
0xbb, 0x43, 0xc4, 0x16, 0xf9, 0x38, 0x10, 0x67, 0x3d, 0xbb, 0x28, 0x7a,
|
||||
0x2b, 0x1e, 0x65, 0x36, 0x07, 0x14, 0x36, 0x3c, 0xcb, 0xdf, 0x03, 0x6b,
|
||||
0x03, 0x7b, 0xe6, 0x67, 0x79, 0x2a, 0x08, 0x47, 0xb7, 0x8f, 0x9c, 0x7e,
|
||||
0x54, 0xde, 0x08, 0x0a, 0xf8, 0x99, 0x24, 0x6f, 0x64, 0x78, 0x80, 0x5f,
|
||||
0x43, 0x76, 0x77, 0x40, 0x12, 0x62, 0x71, 0x10, 0x35, 0xf5, 0xdd, 0x0a,
|
||||
0x06, 0xff, 0x9b, 0x7b, 0xd8, 0x1a, 0xf3, 0x50, 0x1d, 0xc3, 0x8c, 0x60,
|
||||
0xe0, 0x61, 0xf5, 0x3d, 0xf9, 0xbf, 0xe4, 0x38, 0x78, 0xbf, 0x59, 0x0e,
|
||||
0xed, 0xc9, 0x4d, 0x0b, 0xb1, 0x7a, 0x10, 0x2b, 0x84, 0x27, 0x07, 0x70,
|
||||
0x5d, 0xc0, 0xa4, 0x7e, 0x9c, 0xf0, 0xf6, 0x69, 0x89, 0x6c, 0xc5, 0x39,
|
||||
0x4a, 0x7d, 0x5e, 0x26, 0x2f, 0x08, 0x9d, 0x05, 0xdc, 0x71, 0xec, 0x08,
|
||||
0x2b, 0xca, 0x68, 0x14, 0x42, 0xf6, 0xe6, 0x0a, 0x2f, 0xa5, 0x34, 0x6d,
|
||||
0x95, 0xaa, 0x80, 0x55, 0x23, 0x0f, 0x5f, 0x20, 0xbe, 0x4d, 0x0b, 0x20,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x99, 0x99, 0x99, 0x59, 0x45, 0x17, 0x5d, 0x74,
|
||||
0x55, 0x55, 0x55, 0x35, 0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x99, 0x99, 0x99, 0x59, 0x45, 0x17, 0x5d, 0x74, 0x55, 0x55, 0x55, 0x35,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11,
|
||||
0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78,
|
||||
0x45, 0x17, 0x5d, 0x74, 0x55, 0x55, 0x55, 0x35, 0xd8, 0x89, 0x9d, 0x58,
|
||||
0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c,
|
||||
0x55, 0x55, 0x55, 0x35, 0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11,
|
||||
0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79,
|
||||
0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32,
|
||||
0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a,
|
||||
0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x50, 0x05, 0xd7, 0x30, 0x09, 0x94, 0x4f, 0x13,
|
||||
0x11, 0x86, 0x4b, 0x61, 0x74, 0x8b, 0x94, 0x0e, 0x7e, 0x5d, 0x93, 0x27,
|
||||
0xeb, 0xb6, 0x4b, 0x61, 0x90, 0x3f, 0x9b, 0x7d, 0x10, 0xe9, 0x16, 0x06,
|
||||
0x99, 0x99, 0x99, 0x59, 0x4f, 0xf6, 0x15, 0x6b, 0x84, 0x8c, 0xe0, 0x5f,
|
||||
0x88, 0x9e, 0xb2, 0x08, 0x32, 0x36, 0xe3, 0x25, 0x64, 0x0a, 0xf5, 0x6f,
|
||||
0x80, 0xff, 0x8e, 0x6f, 0xcd, 0xb5, 0x72, 0x12, 0x90, 0xa2, 0x7a, 0x09,
|
||||
0x45, 0x17, 0x5d, 0x74, 0x84, 0x8c, 0xe0, 0x5f, 0xf5, 0x67, 0x02, 0x2d,
|
||||
0x71, 0x83, 0xf0, 0x55, 0x81, 0xa2, 0x81, 0x4b, 0xec, 0xff, 0xb0, 0x6b,
|
||||
0x17, 0x41, 0xd6, 0x36, 0xf3, 0x16, 0x58, 0x23, 0x49, 0x90, 0xa2, 0x17,
|
||||
0x55, 0x55, 0x55, 0x35, 0x88, 0x9e, 0xb2, 0x08, 0x71, 0x83, 0xf0, 0x55,
|
||||
0x27, 0x2a, 0xb0, 0x29, 0x0b, 0xe4, 0x53, 0x70, 0x7f, 0xeb, 0x60, 0x74,
|
||||
0xb9, 0x92, 0xa9, 0x4b, 0x51, 0x41, 0x0e, 0x56, 0x1b, 0xe4, 0x67, 0x43,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0x32, 0x36, 0xe3, 0x25, 0x81, 0xa2, 0x81, 0x4b,
|
||||
0x0b, 0xe4, 0x53, 0x70, 0x73, 0x99, 0xf0, 0x02, 0x1a, 0xf7, 0xe1, 0x40,
|
||||
0x18, 0xc4, 0x58, 0x3a, 0xcc, 0xf5, 0x0b, 0x18, 0xf0, 0x39, 0xab, 0x7a,
|
||||
0xb6, 0x6d, 0xdb, 0x76, 0x64, 0x0a, 0xf5, 0x6f, 0xec, 0xff, 0xb0, 0x6b,
|
||||
0x7f, 0xeb, 0x60, 0x74, 0x1a, 0xf7, 0xe1, 0x40, 0xf7, 0xfc, 0xbe, 0x7f,
|
||||
0xbf, 0x63, 0xc5, 0x05, 0x15, 0x3c, 0x9f, 0x2b, 0x9b, 0x77, 0xb0, 0x44,
|
||||
0x11, 0x11, 0x11, 0x11, 0x80, 0xff, 0x8e, 0x6f, 0x17, 0x41, 0xd6, 0x36,
|
||||
0xb9, 0x92, 0xa9, 0x4b, 0x18, 0xc4, 0x58, 0x3a, 0xbf, 0x63, 0xc5, 0x05,
|
||||
0x2f, 0x5c, 0x3c, 0x09, 0x25, 0xaf, 0xdf, 0x11, 0x21, 0x7d, 0x95, 0x58,
|
||||
0x00, 0x00, 0x00, 0x08, 0xcd, 0xb5, 0x72, 0x12, 0xf3, 0x16, 0x58, 0x23,
|
||||
0x51, 0x41, 0x0e, 0x56, 0xcc, 0xf5, 0x0b, 0x18, 0x15, 0x3c, 0x9f, 0x2b,
|
||||
0x25, 0xaf, 0xdf, 0x11, 0x38, 0x50, 0xe9, 0x16, 0x12, 0xb8, 0xc8, 0x17,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x90, 0xa2, 0x7a, 0x09, 0x49, 0x90, 0xa2, 0x17,
|
||||
0x1b, 0xe4, 0x67, 0x43, 0xf0, 0x39, 0xab, 0x7a, 0x9b, 0x77, 0xb0, 0x44,
|
||||
0x21, 0x7d, 0x95, 0x58, 0x12, 0xb8, 0xc8, 0x17, 0x5a, 0xfc, 0xf7, 0x5c,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0xdb, 0x50, 0x89, 0x38, 0x5f, 0x88, 0xe3, 0x32,
|
||||
0x8b, 0xb4, 0x3b, 0x6c, 0x95, 0x0a, 0xf1, 0x41, 0xe6, 0x0a, 0x52, 0x7d,
|
||||
0xd1, 0x0d, 0xb1, 0x57, 0x9b, 0xd2, 0xf4, 0x1d, 0x80, 0x17, 0xb2, 0x42,
|
||||
0x9c, 0x40, 0x6e, 0x2f, 0x63, 0xa7, 0x42, 0x77, 0xf9, 0x37, 0xd1, 0x43,
|
||||
0x98, 0xd1, 0xec, 0x50, 0x91, 0x26, 0xfa, 0x4e, 0x0c, 0x9e, 0xcc, 0x31,
|
||||
0x52, 0xf4, 0x20, 0x5d, 0x2a, 0x20, 0xeb, 0x1b, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0x54, 0x29, 0xf4, 0x4a, 0xde, 0x91, 0xf6, 0x54, 0x8b, 0xed, 0x18, 0x26,
|
||||
0x71, 0x24, 0x22, 0x34, 0xb7, 0xaf, 0x61, 0x27, 0x7a, 0x0a, 0x21, 0x7f,
|
||||
0x9f, 0xfe, 0xa1, 0x53, 0x26, 0x97, 0x6b, 0x5b, 0xf4, 0xea, 0xef, 0x4a,
|
||||
0x4b, 0x03, 0xa0, 0x7c, 0xe6, 0x64, 0x69, 0x47, 0x76, 0xf7, 0x2d, 0x0b,
|
||||
0x6f, 0xd5, 0x2c, 0x45, 0x52, 0xc1, 0x5c, 0x46, 0x25, 0x38, 0xab, 0x79,
|
||||
0x64, 0xed, 0xe7, 0x57, 0x71, 0x1c, 0xc7, 0x71, 0x94, 0xc2, 0xb7, 0x7f,
|
||||
0xaf, 0x0d, 0x61, 0x4c, 0xa3, 0x86, 0x8e, 0x45, 0xdc, 0x73, 0xe3, 0x77,
|
||||
0x71, 0xed, 0x21, 0x7d, 0x4b, 0x8e, 0xc7, 0x52, 0x39, 0x5d, 0x49, 0x1d,
|
||||
0x75, 0x35, 0xed, 0x09, 0xc6, 0x02, 0x3b, 0x22, 0xb8, 0x91, 0x07, 0x13,
|
||||
0x7f, 0xbf, 0x15, 0x7f, 0xb5, 0xbe, 0x0a, 0x5c, 0xbc, 0x75, 0x54, 0x61,
|
||||
0x6c, 0x2f, 0x28, 0x5f, 0xff, 0xf0, 0x7b, 0x67, 0x11, 0x8e, 0x70, 0x29,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0xe6, 0xfc, 0x29, 0x07, 0xbd, 0x0c, 0x4d, 0x5f,
|
||||
0x57, 0xb7, 0x87, 0x41, 0xec, 0x48, 0xda, 0x18, 0x78, 0x41, 0xb8, 0x6d,
|
||||
0xde, 0x7e, 0x47, 0x5a, 0x13, 0x03, 0xc5, 0x52, 0x2e, 0xee, 0xf3, 0x3f,
|
||||
0x06, 0xd0, 0xcd, 0x48, 0x77, 0x2a, 0xcd, 0x7e, 0x35, 0xee, 0x74, 0x63,
|
||||
0x3e, 0x26, 0x65, 0x64, 0x37, 0xa1, 0xfb, 0x7a, 0x03, 0x44, 0xa8, 0x70,
|
||||
0x2f, 0x03, 0x27, 0x1e, 0xb3, 0x02, 0x3e, 0x4a, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0xfd, 0xe1, 0xfe, 0x3c, 0x88, 0x1c, 0x36, 0x53, 0x36, 0x31, 0x5a, 0x32,
|
||||
0x88, 0x7b, 0xa6, 0x17, 0x40, 0x31, 0xe4, 0x0a, 0xb3, 0x70, 0x8f, 0x4f,
|
||||
0xc3, 0xa2, 0xd7, 0x06, 0x34, 0x9d, 0x4a, 0x71, 0x5b, 0xfa, 0x79, 0x25,
|
||||
0xe8, 0x6f, 0x05, 0x65, 0xc1, 0x4a, 0xee, 0x5c, 0x9a, 0xb2, 0x83, 0x05,
|
||||
0xb0, 0x89, 0x77, 0x2e, 0xc1, 0x56, 0x34, 0x08, 0x50, 0xf5, 0xde, 0x12,
|
||||
0xae, 0x68, 0xc2, 0x1b, 0x71, 0x1c, 0xc7, 0x71, 0xb3, 0x84, 0x6e, 0x4f,
|
||||
0xae, 0x74, 0x57, 0x4f, 0x56, 0xf3, 0xfc, 0x48, 0xfa, 0x73, 0xd7, 0x0e,
|
||||
0x8a, 0xc5, 0x35, 0x4d, 0xf6, 0x26, 0x15, 0x2a, 0xcf, 0xb5, 0x2d, 0x64,
|
||||
0xd1, 0x2a, 0x84, 0x43, 0xab, 0xc0, 0xec, 0x60, 0xa9, 0xbc, 0x09, 0x11,
|
||||
0xfd, 0x06, 0xea, 0x1e, 0xba, 0x29, 0x77, 0x6c, 0xb1, 0x37, 0xa5, 0x42,
|
||||
0x1c, 0x9b, 0x58, 0x37, 0xa8, 0xb7, 0xae, 0x3e, 0x6a, 0xf8, 0x63, 0x25,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x22, 0xa0, 0x75, 0x4e, 0x17, 0x33, 0x99, 0x7c,
|
||||
0x97, 0x97, 0x30, 0x04, 0xbc, 0x22, 0x6d, 0x7c, 0xb3, 0xd7, 0xd9, 0x56,
|
||||
0x4e, 0xef, 0x40, 0x5e, 0x02, 0x05, 0x51, 0x1e, 0x0c, 0x32, 0xb7, 0x06,
|
||||
0x41, 0x16, 0x80, 0x33, 0xc2, 0xdd, 0x8f, 0x18, 0x65, 0xa3, 0xe1, 0x4a,
|
||||
0xdb, 0xb4, 0x5d, 0x78, 0xf3, 0x99, 0x48, 0x3e, 0x04, 0x5b, 0xb9, 0x09,
|
||||
0xd2, 0x3d, 0x14, 0x05, 0x69, 0x50, 0xe9, 0x57, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0x0d, 0x72, 0x37, 0x6c, 0xe3, 0xd1, 0x57, 0x2f, 0x9e, 0xb7, 0xe1, 0x30,
|
||||
0x22, 0xce, 0xe5, 0x66, 0x45, 0x7b, 0x06, 0x0e, 0x06, 0x66, 0xdd, 0x11,
|
||||
0xef, 0xdf, 0x61, 0x52, 0x7d, 0xb9, 0xcf, 0x1e, 0x97, 0xbe, 0x55, 0x00,
|
||||
0x94, 0xcb, 0x50, 0x7c, 0xa0, 0x83, 0x1c, 0x57, 0xf3, 0x72, 0x8c, 0x40,
|
||||
0x07, 0x32, 0x39, 0x54, 0xe8, 0x5a, 0x10, 0x7b, 0x09, 0xc2, 0x02, 0x58,
|
||||
0xb0, 0xeb, 0x23, 0x51, 0x71, 0x1c, 0xc7, 0x71, 0xf0, 0xfd, 0x78, 0x2c,
|
||||
0xe7, 0xa8, 0x53, 0x7c, 0xdd, 0xf6, 0xa3, 0x2b, 0xa9, 0x51, 0xf4, 0x33,
|
||||
0x1d, 0x4d, 0x13, 0x0e, 0x53, 0x6b, 0xde, 0x6b, 0x48, 0x46, 0xa0, 0x01,
|
||||
0xbf, 0x74, 0xf2, 0x14, 0xe5, 0x99, 0x3d, 0x72, 0x37, 0x8e, 0xa9, 0x44,
|
||||
0x61, 0xed, 0xdd, 0x3b, 0x7c, 0x11, 0x28, 0x12, 0xd5, 0xd6, 0x27, 0x78,
|
||||
0x4e, 0xf8, 0xe4, 0x3d, 0xdc, 0x5c, 0x92, 0x0c, 0xea, 0x5b, 0xe2, 0x44,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x64, 0x55, 0xb2, 0x0d, 0x54, 0x7f, 0x64, 0x72,
|
||||
0x8e, 0xe1, 0x7b, 0x52, 0xf5, 0xe4, 0x20, 0x13, 0xd1, 0xd4, 0x5d, 0x4c,
|
||||
0x33, 0x3d, 0xb6, 0x55, 0x26, 0xed, 0xb0, 0x75, 0xa0, 0xf2, 0x72, 0x51,
|
||||
0x6b, 0xc5, 0x37, 0x23, 0x0d, 0x1d, 0xf5, 0x6f, 0xa6, 0x83, 0x5f, 0x3e,
|
||||
0x1e, 0xb5, 0x18, 0x23, 0xc8, 0x40, 0xae, 0x63, 0x68, 0x79, 0x8e, 0x56,
|
||||
0xb0, 0x33, 0x43, 0x08, 0x5b, 0xac, 0x52, 0x39, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0x9d, 0xf2, 0x00, 0x73, 0xf8, 0x96, 0xbb, 0x43, 0x5b, 0x59, 0xce, 0x07,
|
||||
0xbb, 0x11, 0xc8, 0x43, 0xde, 0xea, 0xb7, 0x34, 0x51, 0xbf, 0xa7, 0x2d,
|
||||
0x33, 0x35, 0xc2, 0x40, 0x1c, 0x81, 0x60, 0x63, 0x60, 0x0b, 0xb6, 0x60,
|
||||
0xbf, 0xb9, 0x38, 0x0c, 0x02, 0x54, 0x53, 0x20, 0xd9, 0xf9, 0xeb, 0x2f,
|
||||
0x7e, 0x5b, 0xdf, 0x58, 0x4b, 0x99, 0x8e, 0x04, 0x27, 0xb4, 0x18, 0x78,
|
||||
0xd6, 0x37, 0x16, 0x60, 0x71, 0x1c, 0xc7, 0x71, 0x74, 0x66, 0x66, 0x66,
|
||||
0xb2, 0xf1, 0x94, 0x20, 0xad, 0x2f, 0xba, 0x68, 0x6a, 0x33, 0xfe, 0x6e,
|
||||
0xa5, 0x51, 0xec, 0x44, 0xab, 0x05, 0x7e, 0x60, 0x48, 0x6b, 0xa5, 0x56,
|
||||
0x38, 0x3d, 0xc7, 0x24, 0x99, 0x99, 0x99, 0x59, 0x45, 0x17, 0x5d, 0x74,
|
||||
0x55, 0x55, 0x55, 0x35, 0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f
|
||||
};
|
||||
|
||||
unsigned char poseidon_constants_11[] = {
|
||||
0xb0, 0xf1, 0x1f, 0x2e, 0xf8, 0x8b, 0xb5, 0x07, 0x8d, 0xc4, 0xe1, 0x46,
|
||||
0x99, 0x23, 0x9f, 0x06, 0xcc, 0x64, 0x13, 0x45, 0x9e, 0xb1, 0xdf, 0x5f,
|
||||
0xfa, 0x8e, 0x0f, 0x6f, 0x33, 0xd8, 0xfe, 0x19, 0x0a, 0x25, 0x8b, 0x20,
|
||||
0xe1, 0x2c, 0xcc, 0x36, 0x17, 0x3f, 0x03, 0x05, 0xe1, 0x13, 0xce, 0x35,
|
||||
0xd4, 0xc9, 0xe7, 0x65, 0x1f, 0x7f, 0x2c, 0x7a, 0x93, 0x9f, 0x34, 0x19,
|
||||
0x4d, 0x22, 0xf2, 0x7f, 0x8e, 0xa8, 0xb0, 0x51, 0x22, 0x8c, 0x91, 0x30,
|
||||
0xa5, 0x9c, 0xff, 0x31, 0x0e, 0x04, 0xc9, 0x19, 0x69, 0x60, 0xee, 0x0f,
|
||||
0xc5, 0xa5, 0xeb, 0x6b, 0xb0, 0xa4, 0xaa, 0x5d, 0x1c, 0x4e, 0xeb, 0x73,
|
||||
0xec, 0x94, 0xb7, 0x15, 0xce, 0x64, 0x1c, 0x60, 0x3e, 0xa3, 0x6b, 0x4a,
|
||||
0x87, 0x7a, 0x25, 0x2f, 0xfc, 0xc3, 0x17, 0x20, 0x06, 0xb6, 0x22, 0x7d,
|
||||
0xca, 0xea, 0x8b, 0x3b, 0xf9, 0xca, 0xa4, 0x32, 0xd2, 0xb7, 0x2e, 0x01,
|
||||
0x4f, 0x31, 0xc9, 0x2f, 0x10, 0xbf, 0x41, 0x4c, 0xe6, 0xfe, 0xba, 0x49,
|
||||
0xe5, 0x89, 0xbb, 0x77, 0x7e, 0xe8, 0x83, 0x1c, 0x72, 0xe7, 0x26, 0x58,
|
||||
0x24, 0x90, 0x9d, 0x1e, 0xb3, 0x20, 0xc8, 0x64, 0x84, 0xa3, 0x21, 0x5d,
|
||||
0x06, 0x64, 0x30, 0x4b, 0x19, 0x35, 0x96, 0x1e, 0xd1, 0x86, 0x57, 0x4a,
|
||||
0xb3, 0x8e, 0xd6, 0x7d, 0xaf, 0xd1, 0xde, 0x3f, 0xa2, 0x2c, 0x32, 0x0a,
|
||||
0xbb, 0xea, 0x4a, 0x46, 0x64, 0x1b, 0x72, 0x14, 0x75, 0x85, 0x1b, 0x4d,
|
||||
0x11, 0x02, 0x5f, 0x6f, 0x06, 0xdd, 0xd3, 0x6f, 0xbc, 0xcc, 0x77, 0x2e,
|
||||
0xb7, 0x43, 0xf4, 0x19, 0x9d, 0x2c, 0x4b, 0x2b, 0x0c, 0x41, 0xb9, 0x02,
|
||||
0xdc, 0x14, 0x5a, 0x67, 0xd4, 0x56, 0xca, 0x45, 0x65, 0xd2, 0x7d, 0x17,
|
||||
0xcd, 0x91, 0xdd, 0x45, 0xd8, 0xa8, 0xd8, 0x4b, 0xc9, 0x2b, 0xf2, 0x35,
|
||||
0xc1, 0x81, 0x6c, 0x33, 0xbc, 0xf4, 0x4d, 0x04, 0xfd, 0xb0, 0x91, 0x2b,
|
||||
0xcf, 0xad, 0x39, 0x45, 0x35, 0xb2, 0xac, 0x2e, 0x2f, 0x13, 0xe3, 0x0b,
|
||||
0x40, 0x59, 0x33, 0x07, 0xe3, 0xa5, 0xa1, 0x4d, 0x0e, 0x79, 0x05, 0x4c,
|
||||
0x36, 0x9b, 0xf1, 0x7f, 0x90, 0x50, 0x46, 0x25, 0x87, 0x10, 0x24, 0x3f,
|
||||
0x52, 0x5d, 0xff, 0x18, 0xad, 0xed, 0x78, 0x52, 0x00, 0x9c, 0xfe, 0x66,
|
||||
0x22, 0x24, 0xe0, 0x62, 0x13, 0xe2, 0x6f, 0x67, 0xd9, 0xe3, 0x6c, 0x64,
|
||||
0x6b, 0xa6, 0xea, 0x53, 0x61, 0x56, 0x8a, 0x33, 0x81, 0x35, 0xe5, 0x0f,
|
||||
0x35, 0xc9, 0xf3, 0x59, 0xc2, 0xa8, 0x92, 0x73, 0x69, 0x66, 0x05, 0x70,
|
||||
0xa1, 0x5f, 0xec, 0x4e, 0x3d, 0x6b, 0xc0, 0x78, 0xa4, 0xcb, 0xfc, 0x7e,
|
||||
0x44, 0x8c, 0xc4, 0x1b, 0x25, 0x70, 0x8f, 0x27, 0x87, 0x76, 0x2d, 0x4f,
|
||||
0x70, 0xb0, 0xea, 0x7a, 0x92, 0x43, 0x8c, 0x00, 0xed, 0xfd, 0x3b, 0x23,
|
||||
0x69, 0x71, 0x8e, 0x49, 0x83, 0xc3, 0x4e, 0x37, 0xab, 0x18, 0xd9, 0x30,
|
||||
0x4d, 0x48, 0x5e, 0x7e, 0xbc, 0x5a, 0x1a, 0x24, 0x34, 0xed, 0x19, 0x57,
|
||||
0xf4, 0xf4, 0x0d, 0x02, 0x0c, 0x57, 0xde, 0x6d, 0x40, 0x39, 0x1f, 0x71,
|
||||
0x9c, 0xa1, 0xb0, 0x28, 0x2d, 0x05, 0xb9, 0x6b, 0x85, 0x7a, 0x4c, 0x47,
|
||||
0x55, 0x55, 0x55, 0x35, 0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11,
|
||||
0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a,
|
||||
0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a,
|
||||
0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32,
|
||||
0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c,
|
||||
0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a,
|
||||
0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a,
|
||||
0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57,
|
||||
0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25, 0xdb, 0xb6, 0x6d, 0x3b,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32,
|
||||
0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c,
|
||||
0x7b, 0x09, 0xed, 0x25, 0xdb, 0xb6, 0x6d, 0x3b, 0x61, 0xb9, 0xa7, 0x11,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a,
|
||||
0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25,
|
||||
0xdb, 0xb6, 0x6d, 0x3b, 0x61, 0xb9, 0xa7, 0x11, 0x88, 0x88, 0x88, 0x48,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a,
|
||||
0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57,
|
||||
0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25, 0xdb, 0xb6, 0x6d, 0x3b,
|
||||
0x61, 0xb9, 0xa7, 0x11, 0x88, 0x88, 0x88, 0x48, 0xbd, 0xf7, 0xde, 0x7b,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32,
|
||||
0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c,
|
||||
0x7b, 0x09, 0xed, 0x25, 0xdb, 0xb6, 0x6d, 0x3b, 0x61, 0xb9, 0xa7, 0x11,
|
||||
0x88, 0x88, 0x88, 0x48, 0xbd, 0xf7, 0xde, 0x7b, 0x00, 0x00, 0x00, 0x04,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a,
|
||||
0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25,
|
||||
0xdb, 0xb6, 0x6d, 0x3b, 0x61, 0xb9, 0xa7, 0x11, 0x88, 0x88, 0x88, 0x48,
|
||||
0xbd, 0xf7, 0xde, 0x7b, 0x00, 0x00, 0x00, 0x04, 0xc1, 0x07, 0x1f, 0x7c,
|
||||
0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57,
|
||||
0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25, 0xdb, 0xb6, 0x6d, 0x3b,
|
||||
0x61, 0xb9, 0xa7, 0x11, 0x88, 0x88, 0x88, 0x48, 0xbd, 0xf7, 0xde, 0x7b,
|
||||
0x00, 0x00, 0x00, 0x04, 0xc1, 0x07, 0x1f, 0x7c, 0x87, 0x87, 0x87, 0x47,
|
||||
0x55, 0x55, 0x55, 0x35, 0x7c, 0xec, 0xe8, 0x54, 0x5f, 0xc4, 0x1c, 0x7e,
|
||||
0x02, 0x38, 0x4e, 0x55, 0x86, 0x80, 0x6d, 0x71, 0xc3, 0xa8, 0x98, 0x4a,
|
||||
0x2b, 0xaa, 0x86, 0x63, 0x60, 0xd7, 0x4f, 0x2e, 0xb4, 0xac, 0xce, 0x78,
|
||||
0xbd, 0x1c, 0x4f, 0x55, 0x6b, 0x2c, 0x33, 0x64, 0x8c, 0x56, 0x30, 0x43,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0xdd, 0x29, 0xc3, 0x15, 0x02, 0x15, 0x5b, 0x4f,
|
||||
0xdc, 0xb9, 0x0c, 0x03, 0x9a, 0x8d, 0x4d, 0x53, 0x6e, 0xf2, 0x33, 0x15,
|
||||
0xed, 0x3f, 0x16, 0x06, 0x43, 0xab, 0x59, 0x54, 0x1a, 0x62, 0xcd, 0x3a,
|
||||
0xda, 0x77, 0xa8, 0x51, 0x42, 0x58, 0x05, 0x55, 0x39, 0xeb, 0xd1, 0x45,
|
||||
0xb6, 0x6d, 0xdb, 0x76, 0x02, 0x15, 0x5b, 0x4f, 0xb9, 0x5a, 0x8c, 0x36,
|
||||
0x9a, 0x63, 0x3e, 0x3c, 0xe6, 0x28, 0x72, 0x36, 0x51, 0x89, 0xdb, 0x3b,
|
||||
0xfa, 0xe0, 0x07, 0x07, 0x30, 0xb3, 0x56, 0x39, 0x91, 0x42, 0x86, 0x38,
|
||||
0xda, 0xd2, 0x8f, 0x67, 0x75, 0xca, 0x3e, 0x69, 0xe9, 0xd8, 0x07, 0x6f,
|
||||
0x11, 0x11, 0x11, 0x11, 0xdc, 0xb9, 0x0c, 0x03, 0x9a, 0x63, 0x3e, 0x3c,
|
||||
0x54, 0xdc, 0x52, 0x1f, 0xf3, 0xc8, 0xb6, 0x6b, 0x96, 0x31, 0xf8, 0x1b,
|
||||
0x20, 0xee, 0x0b, 0x07, 0x4c, 0x37, 0x80, 0x4b, 0x31, 0x99, 0xd0, 0x09,
|
||||
0xb8, 0xa5, 0x62, 0x5f, 0xa2, 0x72, 0xfb, 0x33, 0x11, 0xd8, 0x0e, 0x65,
|
||||
0x00, 0x00, 0x00, 0x08, 0x9a, 0x8d, 0x4d, 0x53, 0xe6, 0x28, 0x72, 0x36,
|
||||
0xf3, 0xc8, 0xb6, 0x6b, 0xef, 0x80, 0xab, 0x77, 0x4d, 0x49, 0x25, 0x2b,
|
||||
0x7e, 0x10, 0x08, 0x1b, 0x70, 0x22, 0x72, 0x66, 0x8b, 0xe6, 0x06, 0x3a,
|
||||
0x58, 0xb9, 0x7e, 0x02, 0x97, 0xf4, 0xc2, 0x4f, 0x6b, 0x9a, 0x68, 0x53,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x6e, 0xf2, 0x33, 0x15, 0x51, 0x89, 0xdb, 0x3b,
|
||||
0x96, 0x31, 0xf8, 0x1b, 0x4d, 0x49, 0x25, 0x2b, 0xe2, 0xe0, 0x5c, 0x64,
|
||||
0xb6, 0x1d, 0x73, 0x13, 0x38, 0x1b, 0xfd, 0x49, 0xe1, 0x2c, 0xce, 0x5d,
|
||||
0x2a, 0x6b, 0xb4, 0x17, 0x7e, 0xa9, 0x6e, 0x72, 0x2f, 0x77, 0x47, 0x79,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0xed, 0x3f, 0x16, 0x06, 0xfa, 0xe0, 0x07, 0x07,
|
||||
0x20, 0xee, 0x0b, 0x07, 0x7e, 0x10, 0x08, 0x1b, 0xb6, 0x1d, 0x73, 0x13,
|
||||
0xca, 0x4a, 0x44, 0x68, 0x1c, 0x93, 0xbc, 0x37, 0xfa, 0x14, 0x8b, 0x55,
|
||||
0xae, 0xe0, 0xac, 0x31, 0xcb, 0x04, 0x09, 0x46, 0x27, 0x8f, 0x96, 0x07,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0x43, 0xab, 0x59, 0x54, 0x30, 0xb3, 0x56, 0x39,
|
||||
0x4c, 0x37, 0x80, 0x4b, 0x70, 0x22, 0x72, 0x66, 0x38, 0x1b, 0xfd, 0x49,
|
||||
0x1c, 0x93, 0xbc, 0x37, 0xfb, 0xdd, 0xff, 0x41, 0x73, 0x22, 0xa8, 0x31,
|
||||
0xd4, 0xc3, 0x26, 0x2b, 0xe7, 0x8c, 0xce, 0x35, 0x03, 0x29, 0x9c, 0x43,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x1a, 0x62, 0xcd, 0x3a, 0x91, 0x42, 0x86, 0x38,
|
||||
0x31, 0x99, 0xd0, 0x09, 0x8b, 0xe6, 0x06, 0x3a, 0xe1, 0x2c, 0xce, 0x5d,
|
||||
0xfa, 0x14, 0x8b, 0x55, 0x73, 0x22, 0xa8, 0x31, 0xaf, 0x9f, 0x0d, 0x2d,
|
||||
0xd8, 0xf1, 0xd2, 0x43, 0x41, 0x60, 0x7a, 0x48, 0xca, 0xa1, 0x4c, 0x7c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xda, 0x77, 0xa8, 0x51, 0xda, 0xd2, 0x8f, 0x67,
|
||||
0xb8, 0xa5, 0x62, 0x5f, 0x58, 0xb9, 0x7e, 0x02, 0x2a, 0x6b, 0xb4, 0x17,
|
||||
0xae, 0xe0, 0xac, 0x31, 0xd4, 0xc3, 0x26, 0x2b, 0xd8, 0xf1, 0xd2, 0x43,
|
||||
0x38, 0xc4, 0xc5, 0x55, 0x39, 0x3d, 0x1f, 0x4c, 0x81, 0xa8, 0x99, 0x14,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0x42, 0x58, 0x05, 0x55, 0x75, 0xca, 0x3e, 0x69,
|
||||
0xa2, 0x72, 0xfb, 0x33, 0x97, 0xf4, 0xc2, 0x4f, 0x7e, 0xa9, 0x6e, 0x72,
|
||||
0xcb, 0x04, 0x09, 0x46, 0xe7, 0x8c, 0xce, 0x35, 0x41, 0x60, 0x7a, 0x48,
|
||||
0x39, 0x3d, 0x1f, 0x4c, 0xc3, 0x27, 0xbb, 0x1a, 0x86, 0xb4, 0x97, 0x00,
|
||||
0xc8, 0x42, 0x16, 0x32, 0x39, 0xeb, 0xd1, 0x45, 0xe9, 0xd8, 0x07, 0x6f,
|
||||
0x11, 0xd8, 0x0e, 0x65, 0x6b, 0x9a, 0x68, 0x53, 0x2f, 0x77, 0x47, 0x79,
|
||||
0x27, 0x8f, 0x96, 0x07, 0x03, 0x29, 0x9c, 0x43, 0xca, 0xa1, 0x4c, 0x7c,
|
||||
0x81, 0xa8, 0x99, 0x14, 0x86, 0xb4, 0x97, 0x00, 0x0c, 0xd8, 0x29, 0x37,
|
||||
0x55, 0x55, 0x55, 0x35, 0xcc, 0xab, 0xe7, 0x58, 0x82, 0xaa, 0xb7, 0x06,
|
||||
0x3c, 0x2a, 0x3d, 0x61, 0x45, 0xbd, 0xcc, 0x4b, 0xa9, 0x83, 0x44, 0x56,
|
||||
0x16, 0xe6, 0x58, 0x6e, 0x70, 0x4b, 0x3a, 0x44, 0xe2, 0x3b, 0x37, 0x60,
|
||||
0xf0, 0x3b, 0x41, 0x1e, 0x44, 0x40, 0x84, 0x5a, 0x63, 0x5d, 0x4d, 0x78,
|
||||
0x22, 0x80, 0xb3, 0x0f, 0xe0, 0x85, 0xec, 0x77, 0xe5, 0x3d, 0xda, 0x27,
|
||||
0x55, 0xf9, 0xfd, 0x44, 0x38, 0xa7, 0x0f, 0x0a, 0x2f, 0xec, 0xda, 0x34,
|
||||
0x24, 0xef, 0x00, 0x40, 0x54, 0x9a, 0x0b, 0x27, 0xf9, 0x85, 0xf4, 0x16,
|
||||
0x14, 0x1f, 0x17, 0x30, 0x1d, 0xb0, 0xdf, 0x31, 0x55, 0x55, 0x55, 0x35,
|
||||
0x98, 0x36, 0x7e, 0x31, 0xd0, 0xda, 0x0a, 0x16, 0xae, 0xb0, 0x6a, 0x00,
|
||||
0x0e, 0x7a, 0x7e, 0x6d, 0x93, 0x81, 0x4d, 0x21, 0x45, 0x5a, 0x4d, 0x20,
|
||||
0x42, 0x5d, 0xfd, 0x49, 0x28, 0xc5, 0xe2, 0x75, 0x45, 0x85, 0x03, 0x2c,
|
||||
0xfc, 0x78, 0x72, 0x15, 0x98, 0x9c, 0x88, 0x0b, 0xed, 0x8f, 0x6f, 0x2b,
|
||||
0x55, 0x75, 0x17, 0x5f, 0xe5, 0xed, 0x21, 0x52, 0x5a, 0x34, 0x10, 0x7d,
|
||||
0x42, 0x25, 0x57, 0x6a, 0xa4, 0xb2, 0xe6, 0x2e, 0x05, 0xa8, 0xc4, 0x17,
|
||||
0xff, 0x9c, 0x7f, 0x6f, 0x23, 0x64, 0x17, 0x44, 0x85, 0xa9, 0x6b, 0x46,
|
||||
0x66, 0x58, 0x1b, 0x3b, 0x55, 0x55, 0x55, 0x35, 0x55, 0xf6, 0xca, 0x06,
|
||||
0x68, 0x75, 0xa9, 0x55, 0x54, 0x44, 0x4f, 0x61, 0x65, 0x3b, 0x96, 0x37,
|
||||
0xa9, 0x89, 0xb6, 0x47, 0x70, 0x8a, 0x8d, 0x74, 0x09, 0x53, 0x9e, 0x5e,
|
||||
0x92, 0x56, 0x2b, 0x34, 0x3e, 0x9d, 0x12, 0x0a, 0x54, 0x98, 0xf8, 0x29,
|
||||
0xde, 0xa0, 0xdd, 0x11, 0x46, 0x3e, 0x0f, 0x70, 0xff, 0xee, 0x0d, 0x7c,
|
||||
0x48, 0xe0, 0xe1, 0x6d, 0xb6, 0x5a, 0x2f, 0x7c, 0xb1, 0xb2, 0xf7, 0x2f,
|
||||
0xda, 0x64, 0x33, 0x7e, 0x87, 0x48, 0x48, 0x7e, 0x95, 0x6c, 0xd5, 0x5c,
|
||||
0x26, 0x8f, 0xc9, 0x3e, 0xf9, 0x5e, 0x99, 0x38, 0xf5, 0x32, 0xc2, 0x66,
|
||||
0x55, 0x55, 0x55, 0x35, 0x7f, 0xb1, 0x0f, 0x47, 0xac, 0x5d, 0xec, 0x76,
|
||||
0xba, 0x59, 0xc4, 0x7f, 0xfb, 0xdc, 0x32, 0x46, 0xe8, 0x83, 0xe0, 0x0a,
|
||||
0xf4, 0xb8, 0x56, 0x36, 0x07, 0x4f, 0x7f, 0x29, 0x31, 0xb8, 0xf4, 0x2c,
|
||||
0x7e, 0x42, 0xbd, 0x3e, 0xf1, 0x9d, 0x40, 0x73, 0x51, 0xf1, 0xce, 0x31,
|
||||
0x35, 0x7b, 0x0e, 0x48, 0x9e, 0xb9, 0x6e, 0x3b, 0x37, 0x00, 0x57, 0x0c,
|
||||
0x15, 0x25, 0x74, 0x64, 0xdd, 0x39, 0x64, 0x5c, 0x0a, 0x5d, 0x08, 0x2b,
|
||||
0xf5, 0xe6, 0x0c, 0x3f, 0xe6, 0xce, 0x30, 0x2d, 0x27, 0xc4, 0x07, 0x19,
|
||||
0x82, 0xfb, 0x44, 0x08, 0x7b, 0x94, 0x23, 0x69, 0x55, 0x55, 0x55, 0x35,
|
||||
0xc7, 0xbe, 0xaf, 0x49, 0xa6, 0x9a, 0x26, 0x30, 0x7c, 0xb2, 0x66, 0x35,
|
||||
0xe4, 0x83, 0x46, 0x62, 0xe3, 0x1c, 0x23, 0x07, 0x36, 0x2e, 0xd3, 0x00,
|
||||
0xe2, 0x65, 0xc8, 0x51, 0x0c, 0x09, 0x5c, 0x74, 0x13, 0x94, 0xf9, 0x67,
|
||||
0x4e, 0x07, 0x26, 0x03, 0xba, 0xb4, 0x3a, 0x7f, 0x38, 0xb4, 0x7c, 0x6a,
|
||||
0x44, 0x7a, 0x1c, 0x7b, 0xeb, 0xf9, 0x8b, 0x0b, 0x16, 0xf8, 0x23, 0x36,
|
||||
0x7b, 0x89, 0x79, 0x44, 0x80, 0xfe, 0x33, 0x2a, 0x7d, 0x59, 0xe2, 0x1b,
|
||||
0x7b, 0xe1, 0xb0, 0x15, 0x21, 0xcb, 0x47, 0x77, 0x23, 0x1a, 0xc0, 0x14,
|
||||
0x5b, 0x86, 0x06, 0x2d, 0x55, 0x55, 0x55, 0x35, 0x04, 0xb5, 0x47, 0x27,
|
||||
0x1d, 0xb7, 0x22, 0x44, 0xcc, 0x9e, 0xce, 0x7d, 0xf2, 0x75, 0x78, 0x78,
|
||||
0x7b, 0x98, 0x99, 0x12, 0xbd, 0x34, 0xe4, 0x43, 0xf0, 0x0a, 0x96, 0x43,
|
||||
0xf1, 0x50, 0x1d, 0x0b, 0x86, 0x78, 0xc9, 0x59, 0xc7, 0x78, 0xec, 0x16,
|
||||
0x71, 0xaa, 0x0c, 0x56, 0xbf, 0x92, 0xe2, 0x3a, 0xb5, 0x6e, 0x2d, 0x18,
|
||||
0xe2, 0xc7, 0x31, 0x67, 0x10, 0xab, 0x9f, 0x27, 0x27, 0x1e, 0xf3, 0x69,
|
||||
0xaf, 0x57, 0x42, 0x4c, 0x4f, 0xb4, 0x30, 0x35, 0x00, 0x54, 0xb0, 0x4a,
|
||||
0xa2, 0x00, 0x2a, 0x4a, 0x3d, 0x49, 0x58, 0x73, 0xf9, 0x16, 0xb0, 0x01,
|
||||
0x55, 0x55, 0x55, 0x35, 0xe4, 0xd5, 0x3f, 0x2e, 0xee, 0x84, 0x47, 0x51,
|
||||
0x3f, 0x84, 0xb9, 0x6b, 0x49, 0xb9, 0xae, 0x57, 0x32, 0x5a, 0x04, 0x02,
|
||||
0xe1, 0x6a, 0xf1, 0x4b, 0x30, 0x53, 0xf1, 0x05, 0x29, 0x74, 0x75, 0x76,
|
||||
0x4a, 0x15, 0x5b, 0x5d, 0xe1, 0xaa, 0x15, 0x1b, 0x62, 0xf5, 0xe8, 0x76,
|
||||
0x03, 0xc1, 0xaa, 0x06, 0x13, 0x59, 0xc8, 0x40, 0x84, 0x49, 0xc8, 0x1f,
|
||||
0x85, 0x98, 0x55, 0x6b, 0xed, 0x38, 0x45, 0x17, 0xb8, 0xc7, 0xf7, 0x69,
|
||||
0xc3, 0x87, 0xd0, 0x17, 0x0a, 0x93, 0xb7, 0x35, 0xc2, 0x45, 0x75, 0x34,
|
||||
0x7a, 0x78, 0xff, 0x51, 0x26, 0xd2, 0x59, 0x13, 0x55, 0x55, 0x55, 0x35,
|
||||
0x48, 0x38, 0xf7, 0x6e, 0x4f, 0x7d, 0xc7, 0x70, 0x32, 0x5d, 0x5b, 0x7a,
|
||||
0x85, 0x35, 0x9c, 0x07, 0x40, 0x08, 0x30, 0x5c, 0x64, 0x69, 0x27, 0x7a,
|
||||
0x07, 0x34, 0x90, 0x6c, 0x6e, 0xa6, 0x8e, 0x70, 0xd4, 0xf2, 0xf7, 0x59,
|
||||
0x0f, 0x13, 0x17, 0x5d, 0xa8, 0xa9, 0x01, 0x29, 0xad, 0xfd, 0x9a, 0x77,
|
||||
0x3c, 0x77, 0xc7, 0x67, 0xd0, 0x43, 0xb1, 0x3f, 0x97, 0x76, 0xe4, 0x72,
|
||||
0xd4, 0x82, 0x9a, 0x25, 0xec, 0xef, 0xc3, 0x03, 0xdc, 0xf9, 0x94, 0x3f,
|
||||
0xa4, 0x76, 0x88, 0x5a, 0xb8, 0x0f, 0x03, 0x76, 0x58, 0x87, 0x42, 0x11,
|
||||
0x28, 0xb7, 0xb0, 0x1d, 0x55, 0x55, 0x55, 0x35, 0x2f, 0xe6, 0x44, 0x75,
|
||||
0xf3, 0x0b, 0xe8, 0x68, 0x59, 0x72, 0x1f, 0x16, 0x8c, 0xd0, 0xe3, 0x3c,
|
||||
0xcc, 0xfc, 0x77, 0x05, 0xd6, 0x4b, 0x48, 0x78, 0x51, 0x88, 0x4c, 0x5f,
|
||||
0x30, 0x43, 0x9c, 0x2f, 0x49, 0x72, 0xba, 0x01, 0xba, 0xae, 0xfe, 0x0b,
|
||||
0x94, 0x3f, 0xe7, 0x71, 0x9d, 0xfa, 0x37, 0x06, 0xfc, 0xa2, 0x99, 0x6f,
|
||||
0xe2, 0x0d, 0xcf, 0x4b, 0x63, 0x76, 0xec, 0x49, 0xa8, 0xb5, 0x84, 0x0b,
|
||||
0x84, 0xa3, 0x75, 0x4f, 0x5e, 0x56, 0xdd, 0x37, 0x1a, 0x7d, 0x6e, 0x34,
|
||||
0x95, 0x39, 0x80, 0x1e, 0x58, 0x2e, 0x22, 0x50, 0xd3, 0x46, 0x93, 0x1e,
|
||||
0x55, 0x55, 0x55, 0x35, 0xf5, 0x96, 0x5a, 0x5f, 0x9b, 0xc8, 0x58, 0x50,
|
||||
0x3e, 0x03, 0xab, 0x16, 0xd5, 0xc6, 0x4c, 0x7f, 0x3f, 0x82, 0xf6, 0x34,
|
||||
0x1c, 0x29, 0x22, 0x16, 0x40, 0xdb, 0xe7, 0x71, 0x8b, 0x8a, 0x4b, 0x55,
|
||||
0x45, 0xbf, 0xd1, 0x68, 0x4c, 0xbb, 0xe3, 0x43, 0x1b, 0x96, 0x28, 0x3d,
|
||||
0x36, 0x4f, 0xdb, 0x58, 0xa8, 0x39, 0xac, 0x38, 0xd3, 0xeb, 0x90, 0x18,
|
||||
0x2f, 0xb7, 0x06, 0x1a, 0x5a, 0x82, 0x53, 0x13, 0x77, 0xaf, 0xe0, 0x4d,
|
||||
0x9e, 0xe9, 0x39, 0x79, 0xb7, 0xf6, 0xa2, 0x3c, 0x41, 0x9d, 0x14, 0x59,
|
||||
0x01, 0x33, 0x36, 0x20, 0x15, 0xe0, 0xe4, 0x15, 0x55, 0x55, 0x55, 0x35,
|
||||
0x58, 0x48, 0x07, 0x36, 0x3f, 0x43, 0x1e, 0x05, 0x33, 0x9e, 0x14, 0x45,
|
||||
0x69, 0xc8, 0x16, 0x63, 0x5f, 0xab, 0x77, 0x26, 0xf4, 0x08, 0xb0, 0x2e,
|
||||
0xf8, 0x31, 0x79, 0x29, 0x37, 0xc9, 0x37, 0x28, 0x55, 0x62, 0xcc, 0x43,
|
||||
0xeb, 0x6b, 0xe4, 0x03, 0xfe, 0x82, 0x50, 0x20, 0x2d, 0xdf, 0xf2, 0x7d,
|
||||
0xba, 0x07, 0xe2, 0x0e, 0x88, 0x1e, 0x82, 0x2b, 0x87, 0x54, 0x26, 0x39,
|
||||
0xdd, 0xee, 0x3e, 0x0b, 0xdc, 0xbf, 0x93, 0x1a, 0x8a, 0xce, 0xa6, 0x39,
|
||||
0x5b, 0xaf, 0x8f, 0x00, 0x7a, 0xad, 0x27, 0x71, 0x1e, 0x76, 0xd8, 0x58,
|
||||
0x96, 0x36, 0xa3, 0x14, 0x55, 0x55, 0x55, 0x35, 0x76, 0x27, 0x76, 0x62,
|
||||
0xa4, 0x9f, 0x05, 0x5a, 0x41, 0x28, 0x49, 0x12, 0x24, 0x18, 0x49, 0x12,
|
||||
0x4f, 0xc2, 0xa5, 0x25, 0x0e, 0x0e, 0x3c, 0x3c, 0x01, 0xa7, 0x65, 0x00,
|
||||
0x92, 0x9e, 0x17, 0x36, 0xa1, 0x7a, 0x92, 0x27, 0xcf, 0x74, 0xba, 0x4d,
|
||||
0xcb, 0x6f, 0x66, 0x68, 0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32
|
||||
};
|
||||
} // namespace poseidon_constants
|
||||
#endif
|
||||
@@ -1,9 +1,13 @@
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#pragma once
|
||||
#ifndef POSEIDON_KERNELS_H
|
||||
#define POSEIDON_KERNELS_H
|
||||
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
#include "poseidon/constants.cuh"
|
||||
|
||||
namespace poseidon {
|
||||
template <typename S, int T>
|
||||
__global__ void prepare_poseidon_states(S* states, size_t number_of_states, S domain_tag, bool aligned)
|
||||
__global__ void prepare_poseidon_states(const S* input, S* states, unsigned int number_of_states, const S domain_tag)
|
||||
{
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int state_number = idx / T;
|
||||
@@ -16,27 +20,27 @@ namespace poseidon {
|
||||
if (element_number == 0) {
|
||||
prepared_element = domain_tag;
|
||||
} else {
|
||||
if (aligned) {
|
||||
prepared_element = states[idx];
|
||||
} else {
|
||||
prepared_element = states[idx - 1];
|
||||
}
|
||||
prepared_element = input[idx - state_number - 1];
|
||||
}
|
||||
|
||||
// We need __syncthreads here if the state is not aligned
|
||||
// because then we need to shift the vector [A, B, 0] -> [D, A, B]
|
||||
if (!aligned) { __syncthreads(); }
|
||||
|
||||
// Store element in state
|
||||
states[idx] = prepared_element;
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
DEVICE_INLINE S sbox_alpha_five(S element)
|
||||
DEVICE_INLINE S sbox_el(S element, const int alpha)
|
||||
{
|
||||
S result = S::sqr(element);
|
||||
result = S::sqr(result);
|
||||
return result * element;
|
||||
S result2 = S::sqr(element);
|
||||
switch (alpha) {
|
||||
case 3:
|
||||
return result2 * element;
|
||||
case 5:
|
||||
return S::sqr(result2) * element;
|
||||
case 7:
|
||||
return S::sqr(result2) * result2 * element;
|
||||
case 11:
|
||||
return S::sqr(S::sqr(result2)) * result2 * element;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
@@ -71,7 +75,7 @@ namespace poseidon {
|
||||
element = element + constants.round_constants[rc_offset + element_number];
|
||||
rc_offset += T;
|
||||
}
|
||||
element = sbox_alpha_five(element);
|
||||
element = sbox_el(element, constants.alpha);
|
||||
if (!skip_rc) { element = element + constants.round_constants[rc_offset + element_number]; }
|
||||
|
||||
// Multiply all the states by mds matrix
|
||||
@@ -111,7 +115,7 @@ namespace poseidon {
|
||||
__device__ S partial_round(S state[T], size_t rc_offset, int round_number, const PoseidonConstants<S>& constants)
|
||||
{
|
||||
S element = state[0];
|
||||
element = sbox_alpha_five(element);
|
||||
element = sbox_el(element, constants.alpha);
|
||||
element = element + constants.round_constants[rc_offset];
|
||||
|
||||
S* sparse_matrix = &constants.sparse_matrices[(T * 2 - 1) * round_number];
|
||||
@@ -155,22 +159,58 @@ namespace poseidon {
|
||||
}
|
||||
}
|
||||
|
||||
// These function is just doing copy from the states to the output
|
||||
template <typename S, int T>
|
||||
__global__ void get_hash_results(S* states, size_t number_of_states, S* out)
|
||||
__global__ void
|
||||
squeeze_states_kernel(const S* states, unsigned int number_of_states, unsigned int rate, unsigned int offset, S* out)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
|
||||
out[idx] = states[idx * T + 1];
|
||||
for (int i = 0; i < rate; i++) {
|
||||
out[idx * rate + i] = states[idx * T + offset + i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void copy_recursive(S* state, size_t number_of_states, S* out)
|
||||
cudaError_t poseidon_permutation_kernel(
|
||||
const S* input,
|
||||
S* out,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const PoseidonConstants<S>& constants,
|
||||
cudaStream_t& stream)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
S* states;
|
||||
CHK_IF_RETURN(cudaMallocAsync(&states, number_of_states * T * sizeof(S), stream));
|
||||
|
||||
state[(idx / (T - 1) * T) + (idx % (T - 1)) + 1] = out[idx];
|
||||
prepare_poseidon_states<S, T>
|
||||
<<<PKC::number_of_full_blocks(T, number_of_states), PKC::number_of_threads(T), 0, stream>>>(
|
||||
input, states, number_of_states, constants.domain_tag);
|
||||
|
||||
size_t rc_offset = 0;
|
||||
full_rounds<S, T><<<
|
||||
PKC::number_of_full_blocks(T, number_of_states), PKC::number_of_threads(T),
|
||||
sizeof(S) * PKC::hashes_per_block(T) * T, stream>>>(
|
||||
states, number_of_states, rc_offset, FIRST_FULL_ROUNDS, constants);
|
||||
rc_offset += T * (constants.full_rounds_half + 1);
|
||||
|
||||
partial_rounds<S, T><<<PKC::number_of_singlehash_blocks(number_of_states), PKC::singlehash_block_size, 0, stream>>>(
|
||||
states, number_of_states, rc_offset, constants);
|
||||
rc_offset += constants.partial_rounds;
|
||||
|
||||
full_rounds<S, T><<<
|
||||
PKC::number_of_full_blocks(T, number_of_states), PKC::number_of_threads(T),
|
||||
sizeof(S) * PKC::hashes_per_block(T) * T, stream>>>(
|
||||
states, number_of_states, rc_offset, SECOND_FULL_ROUNDS, constants);
|
||||
|
||||
squeeze_states_kernel<S, T>
|
||||
<<<PKC::number_of_singlehash_blocks(number_of_states), PKC::singlehash_block_size, 0, stream>>>(
|
||||
states, number_of_states, output_len, 1, out);
|
||||
|
||||
CHK_IF_RETURN(cudaFreeAsync(states, stream));
|
||||
return CHK_LAST();
|
||||
}
|
||||
} // namespace poseidon
|
||||
} // namespace poseidon
|
||||
|
||||
#endif
|
||||
@@ -8,132 +8,87 @@
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "utils/utils.h"
|
||||
|
||||
#include "poseidon/kernels.cuh"
|
||||
#include "poseidon/constants.cuh"
|
||||
#include "hash/hash.cuh"
|
||||
using namespace hash;
|
||||
|
||||
/**
|
||||
* @namespace poseidon
|
||||
* Implementation of the [Poseidon hash function](https://eprint.iacr.org/2019/458.pdf)
|
||||
* Specifically, the optimized [Filecoin version](https://spec.filecoin.io/algorithms/crypto/poseidon/)
|
||||
*/
|
||||
namespace poseidon {
|
||||
#define FIRST_FULL_ROUNDS true
|
||||
#define SECOND_FULL_ROUNDS false
|
||||
|
||||
/**
|
||||
* For most of the Poseidon configurations this is the case
|
||||
* TODO: Add support for different full rounds numbers
|
||||
*/
|
||||
const int FULL_ROUNDS_DEFAULT = 4;
|
||||
|
||||
/**
|
||||
* @struct PoseidonConstants
|
||||
* This constants are enough to define a Poseidon instantce
|
||||
* @param round_constants A pointer to round constants allocated on the device
|
||||
* @param mds_matrix A pointer to an mds matrix allocated on the device
|
||||
* @param non_sparse_matrix A pointer to non sparse matrix allocated on the device
|
||||
* @param sparse_matrices A pointer to sparse matrices allocated on the device
|
||||
*/
|
||||
template <typename S>
|
||||
struct PoseidonConstants {
|
||||
int arity;
|
||||
int partial_rounds;
|
||||
int full_rounds_half;
|
||||
S* round_constants = nullptr;
|
||||
S* mds_matrix = nullptr;
|
||||
S* non_sparse_matrix = nullptr;
|
||||
S* sparse_matrices = nullptr;
|
||||
S domain_tag;
|
||||
};
|
||||
|
||||
/**
|
||||
* @class PoseidonKernelsConfiguration
|
||||
* Describes the logic of deriving CUDA kernels parameters
|
||||
* such as the number of threads and the number of blocks
|
||||
*/
|
||||
template <int T>
|
||||
class PoseidonKernelsConfiguration
|
||||
class Poseidon : public Hasher<S, S>
|
||||
{
|
||||
public:
|
||||
// The logic behind this is that 1 thread only works on 1 element
|
||||
// We have {T} elements in each state, and {number_of_states} states total
|
||||
static const int number_of_threads = 256 / T * T;
|
||||
const std::size_t device_id;
|
||||
PoseidonConstants<S> constants;
|
||||
|
||||
// The partial rounds operates on the whole state, so we define
|
||||
// the parallelism params for processing a single hash preimage per thread
|
||||
static const int singlehash_block_size = 128;
|
||||
|
||||
static const int hashes_per_block = number_of_threads / T;
|
||||
|
||||
static int number_of_full_blocks(size_t number_of_states)
|
||||
cudaError_t run_hash_many_kernel(
|
||||
const S* input,
|
||||
S* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const device_context::DeviceContext& ctx) const override
|
||||
{
|
||||
int total_number_of_threads = number_of_states * T;
|
||||
return total_number_of_threads / number_of_threads +
|
||||
static_cast<bool>(total_number_of_threads % number_of_threads);
|
||||
cudaError_t permutation_error;
|
||||
#define P_PERM_T(width) \
|
||||
case width: \
|
||||
permutation_error = poseidon_permutation_kernel<S, width>( \
|
||||
input, output, number_of_states, input_len, output_len, this->constants, ctx.stream); \
|
||||
break;
|
||||
|
||||
switch (this->width) {
|
||||
P_PERM_T(3)
|
||||
P_PERM_T(5)
|
||||
P_PERM_T(9)
|
||||
P_PERM_T(12)
|
||||
default:
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "PoseidonPermutation: #width must be one of [3, 5, 9, 12]");
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(permutation_error);
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
static int number_of_singlehash_blocks(size_t number_of_states)
|
||||
Poseidon(
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const S* round_constants,
|
||||
const S* mds_matrix,
|
||||
const S* non_sparse_matrix,
|
||||
const S* sparse_matrices,
|
||||
const S domain_tag,
|
||||
device_context::DeviceContext& ctx)
|
||||
: Hasher<S, S>(arity + 1, arity, arity, 1), device_id(ctx.device_id)
|
||||
{
|
||||
return number_of_states / singlehash_block_size + static_cast<bool>(number_of_states % singlehash_block_size);
|
||||
PoseidonConstants<S> constants;
|
||||
CHK_STICKY(create_optimized_poseidon_constants(
|
||||
arity, alpha, partial_rounds, full_rounds_half, round_constants, mds_matrix, non_sparse_matrix, sparse_matrices,
|
||||
domain_tag, &constants, ctx));
|
||||
this->constants = constants;
|
||||
}
|
||||
|
||||
Poseidon(int arity, device_context::DeviceContext& ctx)
|
||||
: Hasher<S, S>(arity + 1, arity, arity, 1), device_id(ctx.device_id)
|
||||
{
|
||||
PoseidonConstants<S> constants{};
|
||||
CHK_STICKY(init_optimized_poseidon_constants(arity, ctx, &constants));
|
||||
this->constants = constants;
|
||||
}
|
||||
|
||||
~Poseidon()
|
||||
{
|
||||
auto ctx = device_context::get_default_device_context();
|
||||
ctx.device_id = this->device_id;
|
||||
CHK_STICKY(release_optimized_poseidon_constants<S>(&this->constants, ctx));
|
||||
}
|
||||
};
|
||||
|
||||
template <int T>
|
||||
using PKC = PoseidonKernelsConfiguration<T>;
|
||||
|
||||
/**
|
||||
* @struct PoseidonConfig
|
||||
* Struct that encodes various Poseidon parameters.
|
||||
*/
|
||||
struct PoseidonConfig {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool are_outputs_on_device; /**< If true, output is preserved on device, otherwise on host. Default value: false. */
|
||||
bool input_is_a_state; /**< If true, input is considered to be a states vector, holding the preimages
|
||||
* in aligned or not aligned format. Memory under the input pointer will be used for states
|
||||
* If false, fresh states memory will be allocated and input will be copied into it */
|
||||
bool aligned; /**< If true - input should be already aligned for poseidon permutation.
|
||||
* Aligned format: [0, A, B, 0, C, D, ...] (as you might get by using loop_state)
|
||||
* not aligned format: [A, B, 0, C, D, 0, ...] (as you might get from cudaMemcpy2D) */
|
||||
bool loop_state; /**< If true, hash results will also be copied in the input pointer in aligned format */
|
||||
bool is_async; /**< Whether to run the Poseidon asynchronously. If set to `true`, the poseidon_hash function will be
|
||||
* non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the poseidon_hash
|
||||
* function will block the current CPU thread. */
|
||||
};
|
||||
|
||||
static PoseidonConfig default_poseidon_config(
|
||||
int t, const device_context::DeviceContext& ctx = device_context::get_default_device_context())
|
||||
{
|
||||
PoseidonConfig config = {
|
||||
ctx, // ctx
|
||||
false, // are_inputes_on_device
|
||||
false, // are_outputs_on_device
|
||||
false, // input_is_a_state
|
||||
false, // aligned
|
||||
false, // loop_state
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads pre-calculated optimized constants, moves them to the device
|
||||
*/
|
||||
template <typename S>
|
||||
cudaError_t
|
||||
init_optimized_poseidon_constants(int arity, device_context::DeviceContext& ctx, PoseidonConstants<S>* constants);
|
||||
|
||||
/**
|
||||
* Compute the poseidon hash over a sequence of preimages.
|
||||
* Takes {number_of_states * (T-1)} elements of input and computes {number_of_states} hash images
|
||||
* @param T size of the poseidon state, should be equal to {arity + 1}
|
||||
* @param input a pointer to the input data. May be allocated on device or on host, regulated
|
||||
* by the config. May point to a string of preimages or a string of states filled with preimages.
|
||||
* @param output a pointer to the output data. May be allocated on device or on host, regulated
|
||||
* by the config. Must be at least of size [number_of_states](@ref number_of_states)
|
||||
* @param number_of_states number of input blocks of size T-1 (arity)
|
||||
*/
|
||||
template <typename S, int T>
|
||||
cudaError_t poseidon_hash(
|
||||
S* input, S* output, size_t number_of_states, const PoseidonConstants<S>& constants, const PoseidonConfig& config);
|
||||
} // namespace poseidon
|
||||
|
||||
#endif
|
||||
@@ -1,74 +0,0 @@
|
||||
#pragma once
|
||||
#ifndef MERKLE_H
|
||||
#define MERKLE_H
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "utils/utils.h"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
|
||||
using namespace poseidon;
|
||||
|
||||
/**
|
||||
* @namespace merkle
|
||||
* Implementation of the [Poseidon](@ref poseidon) [Merkle tree](https://en.wikipedia.org/wiki/Merkle_tree) builder,
|
||||
* parallelized for the use on GPU
|
||||
*/
|
||||
namespace merkle {
|
||||
static constexpr size_t GIGA = 1024 * 1024 * 1024;
|
||||
|
||||
/// Bytes per stream
|
||||
static constexpr size_t STREAM_CHUNK_SIZE = 1024 * 1024 * 1024;
|
||||
|
||||
/**
|
||||
* @struct TreeBuilderConfig
|
||||
* Struct that encodes various Tree builder parameters.
|
||||
*/
|
||||
struct TreeBuilderConfig {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
int keep_rows; /**< How many rows of the Merkle tree rows should be written to output. '0' means all of them */
|
||||
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool is_async; /**< Whether to run the tree builder asynchronously. If set to `true`, the build_merkle_tree
|
||||
* function will be non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the
|
||||
* function will block the current CPU thread. */
|
||||
};
|
||||
|
||||
static TreeBuilderConfig
|
||||
default_merkle_config(const device_context::DeviceContext& ctx = device_context::get_default_device_context())
|
||||
{
|
||||
TreeBuilderConfig config = {
|
||||
ctx, // ctx
|
||||
0, // keep_rows
|
||||
false, // are_inputes_on_device
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the Poseidon Merkle tree
|
||||
*
|
||||
* @param leaves a pointer to the leaves layer. May be allocated on device or on host, regulated by the config
|
||||
* Expected to have arity ^ (height - 1) elements
|
||||
* @param digests a pointer to the digests storage. May only be allocated on the host
|
||||
* Expected to have `sum(arity ^ (i)) for i in [0..height-1]`
|
||||
* @param height the height of the merkle tree
|
||||
* # Algorithm
|
||||
* The function will split large tree into many subtrees of size that will fit `STREAM_CHUNK_SIZE`.
|
||||
* Each subtree is build in it's own stream (there is a maximum number of streams)
|
||||
* After all subtrees are constructed - the function will combine the resulting sub-digests into the final top-tree
|
||||
*/
|
||||
template <typename S, int T>
|
||||
cudaError_t build_merkle_tree(
|
||||
const S* leaves,
|
||||
S* digests,
|
||||
uint32_t height,
|
||||
const PoseidonConstants<S>& poseidon,
|
||||
const TreeBuilderConfig& config);
|
||||
} // namespace merkle
|
||||
|
||||
#endif
|
||||
65
icicle/include/poseidon2/constants.cuh
Normal file
65
icicle/include/poseidon2/constants.cuh
Normal file
@@ -0,0 +1,65 @@
|
||||
#pragma once
|
||||
#ifndef POSEIDON2_CONSTANTS_H
|
||||
#define POSEIDON2_CONSTANTS_H
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
|
||||
namespace poseidon2 {
|
||||
/**
|
||||
* For most of the Poseidon2 configurations this is the case
|
||||
*/
|
||||
const int EXTERNAL_ROUNDS_DEFAULT = 8;
|
||||
|
||||
enum DiffusionStrategy {
|
||||
DEFAULT_DIFFUSION,
|
||||
MONTGOMERY,
|
||||
};
|
||||
|
||||
enum MdsType { DEFAULT_MDS, PLONKY };
|
||||
|
||||
/**
|
||||
* @struct Poseidon2Constants
|
||||
* This constants are enough to define a Poseidon2 instantce
|
||||
* @param round_constants A pointer to round constants allocated on the device
|
||||
* @param mds_matrix A pointer to an mds matrix allocated on the device
|
||||
* @param non_sparse_matrix A pointer to non sparse matrix allocated on the device
|
||||
* @param sparse_matrices A pointer to sparse matrices allocated on the device
|
||||
*/
|
||||
template <typename S>
|
||||
struct Poseidon2Constants {
|
||||
int width;
|
||||
int alpha;
|
||||
int internal_rounds;
|
||||
int external_rounds;
|
||||
S* round_constants = nullptr;
|
||||
S* internal_matrix_diag = nullptr;
|
||||
MdsType mds_type;
|
||||
DiffusionStrategy diffusion;
|
||||
};
|
||||
|
||||
template <typename S>
|
||||
cudaError_t create_poseidon2_constants(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
const S* round_constants,
|
||||
const S* internal_matrix_diag,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<S>* poseidon_constants);
|
||||
|
||||
template <typename S>
|
||||
cudaError_t init_poseidon2_constants(
|
||||
int width,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<S>* poseidon2_constants);
|
||||
|
||||
template <typename S>
|
||||
cudaError_t release_poseidon2_constants(Poseidon2Constants<S>* constants, device_context::DeviceContext& ctx);
|
||||
} // namespace poseidon2
|
||||
|
||||
#endif
|
||||
1077
icicle/include/poseidon2/constants/m31_poseidon2.h
Normal file
1077
icicle/include/poseidon2/constants/m31_poseidon2.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -3,13 +3,14 @@ from sage.rings.polynomial.polynomial_gf2x import GF2X_BuildIrred_list
|
||||
from math import *
|
||||
import itertools
|
||||
|
||||
CURVE_NAME = "bn254"
|
||||
CURVE_NAME = "m31"
|
||||
|
||||
###########################################################################
|
||||
# p = 18446744069414584321 # GoldiLocks
|
||||
# p = 2013265921 # BabyBear
|
||||
p = 2**31 - 1 # M31
|
||||
# p = 52435875175126190479447740508185965837690552500527637822603658699938581184513 # BLS12-381
|
||||
p = 21888242871839275222246405745257275088548364400416034343698204186575808495617 # BN254/BN256
|
||||
# p = 21888242871839275222246405745257275088548364400416034343698204186575808495617 # BN254/BN256
|
||||
# p = 28948022309329048855892746252171976963363056481941560715954676764349967630337 # Pasta (Pallas)
|
||||
# p = 28948022309329048855892746252171976963363056481941647379679742748393362948097 # Pasta (Vesta)
|
||||
|
||||
@@ -617,6 +618,8 @@ print(f"namespace poseidon2_constants_{CURVE_NAME} {{")
|
||||
for t in TS:
|
||||
NUM_CELLS = t
|
||||
R_F_FIXED, R_P_FIXED, _, _ = poseidon_calc_final_numbers_fixed(p, t, alpha, 128, True)
|
||||
if t == 16:
|
||||
R_P_FIXED = 14
|
||||
|
||||
INIT_SEQUENCE = []
|
||||
|
||||
|
||||
@@ -1,7 +1,28 @@
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#pragma once
|
||||
#ifndef POSEIDON2_KERNELS_H
|
||||
#define POSEIDON2_KERNELS_H
|
||||
|
||||
#include "utils/utils.h"
|
||||
#include "hash/hash.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "poseidon2/constants.cuh"
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
|
||||
using matrix::Matrix;
|
||||
|
||||
namespace poseidon2 {
|
||||
static DEVICE_INLINE unsigned int d_next_pow_of_two(unsigned int v)
|
||||
{
|
||||
v--;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
v |= v >> 8;
|
||||
v |= v >> 16;
|
||||
v++;
|
||||
return v;
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
DEVICE_INLINE S sbox_el(S element, const int alpha)
|
||||
{
|
||||
@@ -19,7 +40,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
DEVICE_INLINE S sbox(S state[T], const int alpha)
|
||||
DEVICE_INLINE void sbox(S state[T], const int alpha)
|
||||
{
|
||||
for (int i = 0; i < T; i++) {
|
||||
state[i] = sbox_el(state[i], alpha);
|
||||
@@ -27,7 +48,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
DEVICE_INLINE S add_rc(S state[T], size_t rc_offset, const S* rc)
|
||||
DEVICE_INLINE void add_rc(S state[T], size_t rc_offset, const S* rc)
|
||||
{
|
||||
for (int i = 0; i < T; i++) {
|
||||
state[i] = state[i] + rc[rc_offset + i];
|
||||
@@ -35,7 +56,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
__device__ S mds_light_4x4(S s[4])
|
||||
__device__ void mds_light_4x4(S s[4])
|
||||
{
|
||||
S t0 = s[0] + s[1];
|
||||
S t1 = s[2] + s[3];
|
||||
@@ -56,7 +77,7 @@ namespace poseidon2 {
|
||||
// [ 3 1 1 2 ].
|
||||
// https://github.com/Plonky3/Plonky3/blob/main/poseidon2/src/matrix.rs#L36
|
||||
template <typename S>
|
||||
__device__ S mds_light_plonky_4x4(S s[4])
|
||||
__device__ void mds_light_plonky_4x4(S s[4])
|
||||
{
|
||||
S t01 = s[0] + s[1];
|
||||
S t23 = s[2] + s[3];
|
||||
@@ -70,7 +91,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__device__ S mds_light(S state[T], MdsType mds)
|
||||
__device__ void mds_light(S state[T], MdsType mds)
|
||||
{
|
||||
S sum;
|
||||
switch (T) {
|
||||
@@ -123,7 +144,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__device__ S internal_round(S state[T], size_t rc_offset, const Poseidon2Constants<S>& constants)
|
||||
__device__ void internal_round(S state[T], size_t rc_offset, const Poseidon2Constants<S>& constants)
|
||||
{
|
||||
S element = state[0];
|
||||
element = element + constants.round_constants[rc_offset];
|
||||
@@ -176,17 +197,8 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void poseidon2_permutation_kernel(
|
||||
const S* states, S* states_out, size_t number_of_states, const Poseidon2Constants<S> constants)
|
||||
__device__ void permute_state(S state[T], const Poseidon2Constants<S>& constants)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
|
||||
S state[T];
|
||||
UNROLL
|
||||
for (int i = 0; i < T; i++) {
|
||||
state[i] = states[idx * T + i];
|
||||
}
|
||||
unsigned int rn;
|
||||
|
||||
mds_light<S, T>(state, constants.mds_type);
|
||||
@@ -213,6 +225,22 @@ namespace poseidon2 {
|
||||
mds_light<S, T>(state, constants.mds_type);
|
||||
rc_offset += T;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void permutation_kernel(
|
||||
const S* states, S* states_out, unsigned int number_of_states, const Poseidon2Constants<S> constants)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
|
||||
S state[T];
|
||||
UNROLL
|
||||
for (int i = 0; i < T; i++) {
|
||||
state[i] = states[idx * T + i];
|
||||
}
|
||||
|
||||
permute_state<S, T>(state, constants);
|
||||
|
||||
UNROLL
|
||||
for (int i = 0; i < T; i++) {
|
||||
@@ -220,13 +248,120 @@ namespace poseidon2 {
|
||||
}
|
||||
}
|
||||
|
||||
// These function is just doing copy from the states to the output
|
||||
template <typename S, int T>
|
||||
__global__ void get_hash_results(const S* states, size_t number_of_states, int index, S* out)
|
||||
__global__ void hash_many_kernel(
|
||||
const S* input,
|
||||
S* output,
|
||||
uint64_t number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const Poseidon2Constants<S> constants)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
uint64_t idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
|
||||
out[idx] = states[idx * T + index];
|
||||
S state[T] = {0};
|
||||
UNROLL
|
||||
for (int i = 0; i < input_len; i++) {
|
||||
state[i] = input[idx * input_len + i];
|
||||
}
|
||||
|
||||
permute_state<S, T>(state, constants);
|
||||
|
||||
UNROLL
|
||||
for (int i = 0; i < output_len; i++) {
|
||||
output[idx * output_len + i] = state[i];
|
||||
}
|
||||
}
|
||||
} // namespace poseidon2
|
||||
|
||||
template <typename S, int T>
|
||||
__device__ void absorb_2d_state(
|
||||
const Matrix<S>* inputs,
|
||||
S state[T],
|
||||
unsigned int number_of_inputs,
|
||||
unsigned int rate,
|
||||
uint64_t row_idx,
|
||||
const Poseidon2Constants<S>& constants)
|
||||
{
|
||||
unsigned int index = 0;
|
||||
for (int i = 0; i < number_of_inputs; i++) {
|
||||
const Matrix<S>* input = inputs + i;
|
||||
for (int j = 0; j < input->width; j++) {
|
||||
state[index] = input->values[row_idx * input->width + j];
|
||||
index++;
|
||||
if (index == rate) {
|
||||
permute_state<S, T>(state, constants);
|
||||
index = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (index) { permute_state<S, T>(state, constants); }
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void hash_2d_kernel(
|
||||
const Matrix<S>* inputs,
|
||||
S* output,
|
||||
unsigned int number_of_inputs,
|
||||
unsigned int rate,
|
||||
unsigned int output_len,
|
||||
const Poseidon2Constants<S> constants)
|
||||
{
|
||||
uint64_t idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= inputs[0].height) { return; }
|
||||
|
||||
S state[T] = {0};
|
||||
|
||||
absorb_2d_state<S, T>(inputs, state, number_of_inputs, rate, idx, constants);
|
||||
|
||||
UNROLL
|
||||
for (int i = 0; i < output_len; i++) {
|
||||
output[idx * output_len + i] = state[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void compress_and_inject_kernel(
|
||||
const Matrix<S>* matrices_to_inject,
|
||||
unsigned int number_of_inputs,
|
||||
const S* prev_layer,
|
||||
S* next_layer,
|
||||
unsigned int rate,
|
||||
unsigned int digest_elements,
|
||||
const Poseidon2Constants<S> constants)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
uint64_t number_of_rows = d_next_pow_of_two(matrices_to_inject[0].height);
|
||||
if (idx >= number_of_rows) { return; }
|
||||
|
||||
size_t next_layer_len = matrices_to_inject[0].height;
|
||||
S state_to_compress[T] = {S::zero()};
|
||||
|
||||
for (int i = 0; i < digest_elements * 2; i++) {
|
||||
state_to_compress[i] = prev_layer[idx * 2 * digest_elements + i];
|
||||
}
|
||||
permute_state<S, T>(state_to_compress, constants);
|
||||
|
||||
S injected_state[T] = {S::zero()};
|
||||
if (idx < next_layer_len) {
|
||||
absorb_2d_state<S, T>(matrices_to_inject, injected_state, number_of_inputs, rate, idx, constants);
|
||||
|
||||
for (int i = 0; i < digest_elements; i++) {
|
||||
injected_state[digest_elements + i] = injected_state[i];
|
||||
injected_state[i] = state_to_compress[i];
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < digest_elements; i++) {
|
||||
injected_state[i] = state_to_compress[i];
|
||||
}
|
||||
}
|
||||
permute_state<S, T>(injected_state, constants);
|
||||
|
||||
for (int i = 0; i < digest_elements; i++) {
|
||||
next_layer[idx * digest_elements + i] = injected_state[i];
|
||||
}
|
||||
}
|
||||
} // namespace poseidon2
|
||||
|
||||
#endif
|
||||
@@ -8,124 +8,172 @@
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "utils/utils.h"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
|
||||
#include "poseidon2/constants.cuh"
|
||||
#include "poseidon2/kernels.cuh"
|
||||
|
||||
using matrix::Matrix;
|
||||
|
||||
/**
|
||||
* @namespace poseidon2
|
||||
* Implementation of the [Poseidon2 hash function](https://eprint.iacr.org/2019/458.pdf)
|
||||
* Specifically, the optimized [Filecoin version](https://spec.filecoin.io/algorithms/crypto/poseidon/)
|
||||
*/
|
||||
namespace poseidon2 {
|
||||
/**
|
||||
* For most of the Poseidon2 configurations this is the case
|
||||
*/
|
||||
const int EXTERNAL_ROUNDS_DEFAULT = 8;
|
||||
|
||||
enum DiffusionStrategy {
|
||||
DEFAULT_DIFFUSION,
|
||||
MONTGOMERY,
|
||||
};
|
||||
|
||||
enum MdsType { DEFAULT_MDS, PLONKY };
|
||||
|
||||
enum PoseidonMode {
|
||||
COMPRESSION,
|
||||
PERMUTATION,
|
||||
};
|
||||
|
||||
/**
|
||||
* @struct Poseidon2Constants
|
||||
* This constants are enough to define a Poseidon2 instantce
|
||||
* @param round_constants A pointer to round constants allocated on the device
|
||||
* @param mds_matrix A pointer to an mds matrix allocated on the device
|
||||
* @param non_sparse_matrix A pointer to non sparse matrix allocated on the device
|
||||
* @param sparse_matrices A pointer to sparse matrices allocated on the device
|
||||
*/
|
||||
template <typename S>
|
||||
struct Poseidon2Constants {
|
||||
int width;
|
||||
int alpha;
|
||||
int internal_rounds;
|
||||
int external_rounds;
|
||||
S* round_constants = nullptr;
|
||||
S* internal_matrix_diag = nullptr;
|
||||
MdsType mds_type;
|
||||
DiffusionStrategy diffusion;
|
||||
};
|
||||
|
||||
/**
|
||||
* @struct Poseidon2Config
|
||||
* Struct that encodes various Poseidon2 parameters.
|
||||
*/
|
||||
struct Poseidon2Config {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
bool are_states_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool are_outputs_on_device; /**< If true, output is preserved on device, otherwise on host. Default value: false. */
|
||||
PoseidonMode mode;
|
||||
int output_index;
|
||||
bool
|
||||
is_async; /**< Whether to run the Poseidon2 asynchronously. If set to `true`, the poseidon_hash function will be
|
||||
* non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the poseidon_hash
|
||||
* function will block the current CPU thread. */
|
||||
};
|
||||
|
||||
static Poseidon2Config default_poseidon2_config(
|
||||
int t, const device_context::DeviceContext& ctx = device_context::get_default_device_context())
|
||||
class Poseidon2 : public hash::Hasher<S, S>
|
||||
{
|
||||
Poseidon2Config config = {
|
||||
ctx, // ctx
|
||||
false, // are_states_on_device
|
||||
false, // are_outputs_on_device
|
||||
PoseidonMode::COMPRESSION,
|
||||
1, // output_index
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
static const int POSEIDON_BLOCK_SIZE = 32;
|
||||
|
||||
template <typename S>
|
||||
cudaError_t create_poseidon2_constants(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
const S* round_constants,
|
||||
const S* internal_matrix_diag,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<S>* poseidon_constants);
|
||||
static inline int poseidon_number_of_blocks(size_t number_of_states)
|
||||
{
|
||||
return number_of_states / POSEIDON_BLOCK_SIZE + static_cast<bool>(number_of_states % POSEIDON_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads pre-calculated optimized constants, moves them to the device
|
||||
*/
|
||||
template <typename S>
|
||||
cudaError_t init_poseidon2_constants(
|
||||
int width,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<S>* constants);
|
||||
public:
|
||||
const std::size_t device_id;
|
||||
Poseidon2Constants<S> constants;
|
||||
|
||||
template <typename S>
|
||||
cudaError_t release_poseidon2_constants(Poseidon2Constants<S>* constants, device_context::DeviceContext& ctx);
|
||||
cudaError_t hash_2d(
|
||||
const Matrix<S>* inputs,
|
||||
S* output,
|
||||
unsigned int number_of_inputs,
|
||||
unsigned int output_len,
|
||||
uint64_t number_of_rows,
|
||||
const device_context::DeviceContext& ctx) const override
|
||||
{
|
||||
#define P2_HASH_2D_T(width) \
|
||||
case width: \
|
||||
hash_2d_kernel<S, width><<<poseidon_number_of_blocks(number_of_rows), POSEIDON_BLOCK_SIZE, 0, ctx.stream>>>( \
|
||||
inputs, output, number_of_inputs, this->rate, output_len, this->constants); \
|
||||
break;
|
||||
|
||||
switch (this->width) {
|
||||
P2_HASH_2D_T(2)
|
||||
P2_HASH_2D_T(3)
|
||||
P2_HASH_2D_T(4)
|
||||
P2_HASH_2D_T(8)
|
||||
P2_HASH_2D_T(12)
|
||||
P2_HASH_2D_T(16)
|
||||
P2_HASH_2D_T(20)
|
||||
P2_HASH_2D_T(24)
|
||||
default:
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument, "PoseidonAbsorb2d: #width must be one of [2, 3, 4, 8, 12, 16, 20, 24]");
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(cudaPeekAtLastError());
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
cudaError_t run_hash_many_kernel(
|
||||
const S* input,
|
||||
S* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const device_context::DeviceContext& ctx) const override
|
||||
{
|
||||
#define P2_HASH_MANY_T(width) \
|
||||
case width: \
|
||||
hash_many_kernel<S, width><<<poseidon_number_of_blocks(number_of_states), POSEIDON_BLOCK_SIZE, 0, ctx.stream>>>( \
|
||||
input, output, number_of_states, input_len, output_len, this->constants); \
|
||||
break;
|
||||
|
||||
switch (this->width) {
|
||||
P2_HASH_MANY_T(2)
|
||||
P2_HASH_MANY_T(3)
|
||||
P2_HASH_MANY_T(4)
|
||||
P2_HASH_MANY_T(8)
|
||||
P2_HASH_MANY_T(12)
|
||||
P2_HASH_MANY_T(16)
|
||||
P2_HASH_MANY_T(20)
|
||||
P2_HASH_MANY_T(24)
|
||||
default:
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument, "PoseidonPermutation: #width must be one of [2, 3, 4, 8, 12, 16, 20, 24]");
|
||||
}
|
||||
CHK_IF_RETURN(cudaPeekAtLastError());
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
cudaError_t compress_and_inject(
|
||||
const Matrix<S>* matrices_to_inject,
|
||||
unsigned int number_of_inputs,
|
||||
uint64_t number_of_rows,
|
||||
const S* prev_layer,
|
||||
S* next_layer,
|
||||
unsigned int digest_elements,
|
||||
const device_context::DeviceContext& ctx) const override
|
||||
{
|
||||
#define P2_COMPRESS_AND_INJECT_T(width) \
|
||||
case width: \
|
||||
compress_and_inject_kernel<S, width> \
|
||||
<<<poseidon_number_of_blocks(number_of_rows), POSEIDON_BLOCK_SIZE, 0, ctx.stream>>>( \
|
||||
matrices_to_inject, number_of_inputs, prev_layer, next_layer, this->rate, digest_elements, this->constants); \
|
||||
break;
|
||||
|
||||
switch (this->width) {
|
||||
P2_COMPRESS_AND_INJECT_T(2)
|
||||
P2_COMPRESS_AND_INJECT_T(3)
|
||||
P2_COMPRESS_AND_INJECT_T(4)
|
||||
P2_COMPRESS_AND_INJECT_T(8)
|
||||
P2_COMPRESS_AND_INJECT_T(12)
|
||||
P2_COMPRESS_AND_INJECT_T(16)
|
||||
P2_COMPRESS_AND_INJECT_T(20)
|
||||
P2_COMPRESS_AND_INJECT_T(24)
|
||||
default:
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument, "PoseidonPermutation: #width must be one of [2, 3, 4, 8, 12, 16, 20, 24]");
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(cudaPeekAtLastError());
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
Poseidon2(
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
unsigned int alpha,
|
||||
unsigned int internal_rounds,
|
||||
unsigned int external_rounds,
|
||||
const S* round_constants,
|
||||
const S* internal_matrix_diag,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx)
|
||||
: hash::Hasher<S, S>(width, width, rate, 0), device_id(ctx.device_id)
|
||||
{
|
||||
Poseidon2Constants<S> constants;
|
||||
CHK_STICKY(create_poseidon2_constants(
|
||||
width, alpha, internal_rounds, external_rounds, round_constants, internal_matrix_diag, mds_type, diffusion, ctx,
|
||||
&constants));
|
||||
this->constants = constants;
|
||||
}
|
||||
|
||||
Poseidon2(
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx)
|
||||
: hash::Hasher<S, S>(width, width, rate, 0), device_id(ctx.device_id)
|
||||
{
|
||||
Poseidon2Constants<S> constants;
|
||||
CHK_STICKY(init_poseidon2_constants(width, mds_type, diffusion, ctx, &constants));
|
||||
this->constants = constants;
|
||||
}
|
||||
|
||||
~Poseidon2()
|
||||
{
|
||||
auto ctx = device_context::get_default_device_context();
|
||||
ctx.device_id = this->device_id;
|
||||
CHK_STICKY(release_poseidon2_constants<S>(&this->constants, ctx));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Compute the poseidon hash over a sequence of preimages.
|
||||
* Takes {number_of_states * (T-1)} elements of input and computes {number_of_states} hash images
|
||||
* @param T size of the poseidon state, should be equal to {arity + 1}
|
||||
* @param states a pointer to the input data. May be allocated on device or on host, regulated
|
||||
* by the config. May point to a string of preimages or a string of states filled with preimages.
|
||||
* @param output a pointer to the output data. May be allocated on device or on host, regulated
|
||||
* by the config. Must be at least of size [number_of_states](@ref number_of_states)
|
||||
* @param number_of_states number of input blocks of size T-1 (arity)
|
||||
*/
|
||||
template <typename S, int T>
|
||||
cudaError_t poseidon2_hash(
|
||||
const S* states,
|
||||
S* output,
|
||||
size_t number_of_states,
|
||||
const Poseidon2Constants<S>& constants,
|
||||
const Poseidon2Config& config);
|
||||
} // namespace poseidon2
|
||||
|
||||
#endif
|
||||
@@ -5,4 +5,15 @@
|
||||
#define CONCAT_DIRECT(a, b) a##_##b
|
||||
#define CONCAT_EXPAND(a, b) CONCAT_DIRECT(a, b) // expand a,b before concatenation
|
||||
|
||||
static unsigned int next_pow_of_two(unsigned int v) {
|
||||
v--;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
v |= v >> 8;
|
||||
v |= v >> 16;
|
||||
v++;
|
||||
return v;
|
||||
}
|
||||
|
||||
#endif // ICICLE_UTILS_H
|
||||
@@ -105,12 +105,12 @@ namespace vec_ops {
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
template <typename E>
|
||||
cudaError_t transpose_batch(
|
||||
cudaError_t transpose_matrix(
|
||||
const E* mat_in,
|
||||
E* mat_out,
|
||||
uint32_t row_size,
|
||||
uint32_t column_size,
|
||||
device_context::DeviceContext& ctx,
|
||||
const device_context::DeviceContext& ctx,
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@ if (EXT_FIELD)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DEXT_FIELD")
|
||||
endif ()
|
||||
|
||||
SET(SUPPORTED_FIELDS_WITHOUT_NTT grumpkin)
|
||||
SET(SUPPORTED_FIELDS_WITHOUT_POSEIDON2 bls12_381;bls12_377;grumpkin;bw6_761;stark252)
|
||||
SET(SUPPORTED_FIELDS_WITHOUT_NTT grumpkin;m31)
|
||||
SET(SUPPORTED_FIELDS_WITHOUT_POSEIDON2 bls12_381;bls12_377;grumpkin;bw6_761;stark252;m31)
|
||||
|
||||
set(TARGET icicle_field)
|
||||
|
||||
@@ -11,9 +11,14 @@ set(SRC ${CMAKE_SOURCE_DIR}/src)
|
||||
|
||||
set(FIELD_SOURCE ${SRC}/fields/extern.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/vec_ops/extern.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/merkle-tree/extern.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/merkle-tree/extern_mmcs.cu)
|
||||
|
||||
if(EXT_FIELD)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/fields/extern_extension.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/ntt/extern_extension.cu)
|
||||
if (NOT FIELD IN_LIST SUPPORTED_FIELDS_WITHOUT_NTT)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/ntt/extern_extension.cu)
|
||||
endif()
|
||||
list(APPEND FIELD_SOURCE ${SRC}/vec_ops/extern_extension.cu)
|
||||
endif()
|
||||
|
||||
@@ -25,8 +30,6 @@ set(POLYNOMIAL_SOURCE_FILES
|
||||
# TODO: impl poseidon for small fields. note that it needs to be defined over the extension field!
|
||||
if (DEFINED CURVE)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/poseidon/extern.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/poseidon/poseidon.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/poseidon/tree/merkle.cu)
|
||||
endif()
|
||||
|
||||
if (NOT FIELD IN_LIST SUPPORTED_FIELDS_WITHOUT_POSEIDON2)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
set(TARGET icicle_hash)
|
||||
|
||||
add_library(${TARGET} STATIC keccak/keccak.cu)
|
||||
add_library(${TARGET} STATIC keccak/extern.cu)
|
||||
target_include_directories(${TARGET} PUBLIC ${CMAKE_SOURCE_DIR}/include/)
|
||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "ingo_hash")
|
||||
2
icicle/src/hash/keccak/.gitignore
vendored
Normal file
2
icicle/src/hash/keccak/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
test_keccak
|
||||
test_keccak_tree
|
||||
@@ -1,2 +1,10 @@
|
||||
test_keccak_tree: test_tree.cu keccak.cu ../../merkle-tree/merkle.cu
|
||||
nvcc -DMERKLE_DEBUG -o test_keccak_tree -I../../../include test_tree.cu
|
||||
./test_keccak_tree
|
||||
|
||||
test_keccak: test.cu keccak.cu
|
||||
nvcc -o test_keccak -I. -I../.. test.cu
|
||||
nvcc -o test_keccak -I../../../include test.cu
|
||||
./test_keccak
|
||||
|
||||
clear:
|
||||
rm test_keccak test_keccak_tree
|
||||
47
icicle/src/hash/keccak/extern.cu
Normal file
47
icicle/src/hash/keccak/extern.cu
Normal file
@@ -0,0 +1,47 @@
|
||||
#include "utils/utils.h"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
#include "hash/keccak/keccak.cuh"
|
||||
#include "keccak.cu"
|
||||
#include "../../merkle-tree/merkle.cu"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
|
||||
namespace keccak {
|
||||
extern "C" cudaError_t
|
||||
keccak256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, HashConfig& config)
|
||||
{
|
||||
return Keccak(136).hash_many(input, (uint64_t*)output, number_of_blocks, input_block_size, 4, config);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t
|
||||
keccak512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, HashConfig& config)
|
||||
{
|
||||
return Keccak(72).hash_many(input, (uint64_t*)output, number_of_blocks, input_block_size, 8, config);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t build_keccak256_merkle_tree_cuda(
|
||||
const uint8_t* leaves,
|
||||
uint64_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config)
|
||||
{
|
||||
Keccak keccak(136);
|
||||
return merkle_tree::build_merkle_tree<uint8_t, uint64_t>(
|
||||
leaves, digests, height, input_block_len, keccak, keccak, tree_config);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t build_keccak512_merkle_tree_cuda(
|
||||
const uint8_t* leaves,
|
||||
uint64_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config)
|
||||
{
|
||||
Keccak keccak(72);
|
||||
return merkle_tree::build_merkle_tree<uint8_t, uint64_t>(
|
||||
leaves, digests, height, input_block_len, keccak, keccak, tree_config);
|
||||
}
|
||||
|
||||
} // namespace keccak
|
||||
@@ -1,6 +1,16 @@
|
||||
#include <cstdint>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
#include "hash/keccak/keccak.cuh"
|
||||
|
||||
using namespace hash;
|
||||
|
||||
namespace keccak {
|
||||
using u64 = uint64_t;
|
||||
|
||||
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
|
||||
|
||||
#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) \
|
||||
@@ -144,16 +154,16 @@ namespace keccak {
|
||||
element ^= rc; \
|
||||
}
|
||||
|
||||
__device__ const uint64_t RC[24] = {0x0000000000000001, 0x0000000000008082, 0x800000000000808a, 0x8000000080008000,
|
||||
0x000000000000808b, 0x0000000080000001, 0x8000000080008081, 0x8000000000008009,
|
||||
0x000000000000008a, 0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089, 0x8000000000008003,
|
||||
0x8000000000008002, 0x8000000000000080, 0x000000000000800a, 0x800000008000000a,
|
||||
0x8000000080008081, 0x8000000000008080, 0x0000000080000001, 0x8000000080008008};
|
||||
__device__ const u64 RC[24] = {0x0000000000000001, 0x0000000000008082, 0x800000000000808a, 0x8000000080008000,
|
||||
0x000000000000808b, 0x0000000080000001, 0x8000000080008081, 0x8000000000008009,
|
||||
0x000000000000008a, 0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089, 0x8000000000008003,
|
||||
0x8000000000008002, 0x8000000000000080, 0x000000000000800a, 0x800000008000000a,
|
||||
0x8000000080008081, 0x8000000000008080, 0x0000000080000001, 0x8000000080008008};
|
||||
|
||||
__device__ void keccakf(uint64_t s[25])
|
||||
__device__ void keccakf(u64 s[25])
|
||||
{
|
||||
uint64_t t0, t1, t2, t3, t4;
|
||||
u64 t0, t1, t2, t3, t4;
|
||||
|
||||
for (int i = 0; i < 24; i++) {
|
||||
THETA(
|
||||
@@ -169,107 +179,78 @@ namespace keccak {
|
||||
}
|
||||
}
|
||||
|
||||
template <int C, int D>
|
||||
__global__ void keccak_hash_blocks(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output)
|
||||
template <const int R>
|
||||
__global__ void
|
||||
keccak_hash_blocks(const uint8_t* input, int input_block_size, int output_len, int number_of_blocks, uint64_t* output)
|
||||
{
|
||||
int bid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (bid >= number_of_blocks) { return; }
|
||||
int sid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (sid >= number_of_blocks) { return; }
|
||||
|
||||
const int r_bits = 1600 - C;
|
||||
const int r_bytes = r_bits / 8;
|
||||
const int d_bytes = D / 8;
|
||||
|
||||
uint8_t* b_input = input + bid * input_block_size;
|
||||
uint8_t* b_output = output + bid * d_bytes;
|
||||
const uint8_t* b_input = input + sid * input_block_size;
|
||||
uint64_t* b_output = output + sid * output_len;
|
||||
uint64_t state[25] = {}; // Initialize with zeroes
|
||||
|
||||
int input_len = input_block_size;
|
||||
|
||||
// absorb
|
||||
while (input_len >= r_bytes) {
|
||||
// #pragma unroll
|
||||
for (int i = 0; i < r_bytes; i += 8) {
|
||||
while (input_len >= R) {
|
||||
for (int i = 0; i < R; i += 8) {
|
||||
state[i / 8] ^= *(uint64_t*)(b_input + i);
|
||||
}
|
||||
keccakf(state);
|
||||
b_input += r_bytes;
|
||||
input_len -= r_bytes;
|
||||
b_input += R;
|
||||
input_len -= R;
|
||||
}
|
||||
|
||||
// last block (if any)
|
||||
uint8_t last_block[r_bytes];
|
||||
uint8_t last_block[R];
|
||||
for (int i = 0; i < input_len; i++) {
|
||||
last_block[i] = b_input[i];
|
||||
}
|
||||
|
||||
// pad 10*1
|
||||
last_block[input_len] = 1;
|
||||
for (int i = 0; i < r_bytes - input_len - 1; i++) {
|
||||
for (int i = 0; i < R - input_len - 1; i++) {
|
||||
last_block[input_len + i + 1] = 0;
|
||||
}
|
||||
// last bit
|
||||
last_block[r_bytes - 1] |= 0x80;
|
||||
last_block[R - 1] |= 0x80;
|
||||
|
||||
// #pragma unroll
|
||||
for (int i = 0; i < r_bytes; i += 8) {
|
||||
for (int i = 0; i < R; i += 8) {
|
||||
state[i / 8] ^= *(uint64_t*)(last_block + i);
|
||||
}
|
||||
keccakf(state);
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < d_bytes; i += 8) {
|
||||
*(uint64_t*)(b_output + i) = state[i / 8];
|
||||
for (int i = 0; i < output_len; i++) {
|
||||
b_output[i] = state[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <int C, int D>
|
||||
cudaError_t
|
||||
keccak_hash(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, KeccakConfig& config)
|
||||
cudaError_t Keccak::run_hash_many_kernel(
|
||||
const uint8_t* input,
|
||||
uint64_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const device_context::DeviceContext& ctx) const
|
||||
{
|
||||
CHK_INIT_IF_RETURN();
|
||||
cudaStream_t& stream = config.ctx.stream;
|
||||
int number_of_threads = 256;
|
||||
int number_of_gpu_blocks = (number_of_states - 1) / number_of_threads + 1;
|
||||
|
||||
uint8_t* input_device;
|
||||
if (config.are_inputs_on_device) {
|
||||
input_device = input;
|
||||
} else {
|
||||
CHK_IF_RETURN(cudaMallocAsync(&input_device, number_of_blocks * input_block_size, stream));
|
||||
CHK_IF_RETURN(
|
||||
cudaMemcpyAsync(input_device, input, number_of_blocks * input_block_size, cudaMemcpyHostToDevice, stream));
|
||||
switch (rate) {
|
||||
case 136:
|
||||
keccak_hash_blocks<136><<<number_of_gpu_blocks, number_of_threads, 0, ctx.stream>>>(
|
||||
input, input_len, output_len, number_of_states, output);
|
||||
break;
|
||||
case 72:
|
||||
keccak_hash_blocks<72><<<number_of_gpu_blocks, number_of_threads, 0, ctx.stream>>>(
|
||||
input, input_len, output_len, number_of_states, output);
|
||||
break;
|
||||
default:
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "KeccakHash: #rate must be one of [136, 72]");
|
||||
}
|
||||
|
||||
uint8_t* output_device;
|
||||
if (config.are_outputs_on_device) {
|
||||
output_device = output;
|
||||
} else {
|
||||
CHK_IF_RETURN(cudaMallocAsync(&output_device, number_of_blocks * (D / 8), stream));
|
||||
}
|
||||
|
||||
int number_of_threads = 512;
|
||||
int number_of_gpu_blocks = (number_of_blocks - 1) / number_of_threads + 1;
|
||||
keccak_hash_blocks<C, D><<<number_of_gpu_blocks, number_of_threads, 0, stream>>>(
|
||||
input_device, input_block_size, number_of_blocks, output_device);
|
||||
|
||||
if (!config.are_inputs_on_device) CHK_IF_RETURN(cudaFreeAsync(input_device, stream));
|
||||
|
||||
if (!config.are_outputs_on_device) {
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(output, output_device, number_of_blocks * (D / 8), cudaMemcpyDeviceToHost, stream));
|
||||
CHK_IF_RETURN(cudaFreeAsync(output_device, stream));
|
||||
}
|
||||
|
||||
if (!config.is_async) return CHK_STICKY(cudaStreamSynchronize(stream));
|
||||
CHK_IF_RETURN(cudaPeekAtLastError());
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
extern "C" cudaError_t
|
||||
keccak256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, KeccakConfig& config)
|
||||
{
|
||||
return keccak_hash<512, 256>(input, input_block_size, number_of_blocks, output, config);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t
|
||||
keccak512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, KeccakConfig& config)
|
||||
{
|
||||
return keccak_hash<1024, 512>(input, input_block_size, number_of_blocks, output, config);
|
||||
}
|
||||
} // namespace keccak
|
||||
@@ -1,5 +1,5 @@
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "keccak.cu"
|
||||
#include "extern.cu"
|
||||
|
||||
// #define DEBUG
|
||||
|
||||
@@ -50,7 +50,7 @@ int main(int argc, char* argv[])
|
||||
uint8_t* out_ptr = static_cast<uint8_t*>(malloc(number_of_blocks * (D / 8)));
|
||||
|
||||
START_TIMER(keccak_timer);
|
||||
KeccakConfig config = default_keccak_config();
|
||||
HashConfig config = default_hash_config();
|
||||
keccak256_cuda(in_ptr, input_block_size, number_of_blocks, out_ptr, config);
|
||||
END_TIMER(keccak_timer, "Keccak")
|
||||
|
||||
|
||||
Binary file not shown.
91
icicle/src/hash/keccak/test_tree.cu
Normal file
91
icicle/src/hash/keccak/test_tree.cu
Normal file
@@ -0,0 +1,91 @@
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "extern.cu"
|
||||
|
||||
#ifndef __CUDA_ARCH__
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
using namespace keccak;
|
||||
|
||||
#define D 256
|
||||
|
||||
#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now();
|
||||
#define END_TIMER(timer, msg) \
|
||||
printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count());
|
||||
|
||||
void uint8_to_hex_string(const uint8_t* values, int size)
|
||||
{
|
||||
std::stringstream ss;
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
ss << std::hex << std::setw(2) << std::setfill('0') << (int)values[i];
|
||||
}
|
||||
|
||||
std::string hexString = ss.str();
|
||||
std::cout << hexString << std::endl;
|
||||
}
|
||||
|
||||
#define A 2
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
using FpMicroseconds = std::chrono::duration<float, std::chrono::microseconds::period>;
|
||||
|
||||
/// Tree of height N and arity A contains \sum{A^i} for i in 0..N-1 elements
|
||||
uint32_t input_block_len = 136;
|
||||
uint32_t tree_height = argc > 1 ? atoi(argv[1]) : 10;
|
||||
uint32_t number_of_leaves = pow(A, tree_height);
|
||||
uint32_t total_number_of_leaves = number_of_leaves * input_block_len;
|
||||
|
||||
/// Use keep_rows to specify how many rows do you want to store
|
||||
int keep_rows = argc > 2 ? atoi(argv[2]) : 7;
|
||||
size_t digests_len = merkle_tree::get_digests_len(keep_rows - 1, A, 1);
|
||||
|
||||
/// Fill leaves with scalars [0, 1, ... 2^tree_height - 1]
|
||||
START_TIMER(timer_allocation);
|
||||
uint8_t input = 0;
|
||||
uint8_t* leaves = static_cast<uint8_t*>(malloc(total_number_of_leaves));
|
||||
for (uint64_t i = 0; i < total_number_of_leaves; i++) {
|
||||
leaves[i] = (uint8_t)i;
|
||||
}
|
||||
END_TIMER(timer_allocation, "Allocated memory for leaves: ");
|
||||
|
||||
/// Allocate memory for digests of {keep_rows} rows of a tree
|
||||
START_TIMER(timer_digests);
|
||||
size_t digests_mem = digests_len * sizeof(uint64_t);
|
||||
uint64_t* digests = static_cast<uint64_t*>(malloc(digests_mem));
|
||||
END_TIMER(timer_digests, "Allocated memory for digests");
|
||||
|
||||
std::cout << "Memory for leaves = " << total_number_of_leaves / 1024 / 1024 << " MB; "
|
||||
<< total_number_of_leaves / 1024 / 1024 / 1024 << " GB" << std::endl;
|
||||
std::cout << "Number of leaves = " << number_of_leaves << std::endl;
|
||||
std::cout << "Total Number of leaves = " << total_number_of_leaves << std::endl;
|
||||
std::cout << "Memory for digests = " << digests_mem / 1024 / 1024 << " MB; " << digests_mem / 1024 / 1024 / 1024
|
||||
<< " GB" << std::endl;
|
||||
std::cout << "Number of digest elements = " << digests_len << std::endl;
|
||||
|
||||
std::cout << "Total RAM consumption = " << (digests_mem + total_number_of_leaves) / 1024 / 1024 << " MB; "
|
||||
<< (digests_mem + total_number_of_leaves) / 1024 / 1024 / 1024 << " GB" << std::endl;
|
||||
|
||||
merkle_tree::TreeBuilderConfig config = merkle_tree::default_merkle_config();
|
||||
config.arity = A;
|
||||
config.keep_rows = keep_rows;
|
||||
START_TIMER(keccak_timer);
|
||||
build_keccak256_merkle_tree_cuda(leaves, digests, tree_height, input_block_len, config);
|
||||
END_TIMER(keccak_timer, "Keccak")
|
||||
|
||||
for (int i = 0; i < digests_len; i++) {
|
||||
uint64_t root = digests[i];
|
||||
std::cout << root << std::endl;
|
||||
// assert(root == expected[i]);
|
||||
}
|
||||
free(digests);
|
||||
free(leaves);
|
||||
}
|
||||
|
||||
#endif
|
||||
25
icicle/src/merkle-tree/extern.cu
Normal file
25
icicle/src/merkle-tree/extern.cu
Normal file
@@ -0,0 +1,25 @@
|
||||
#include "utils/utils.h"
|
||||
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "merkle.cu"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
|
||||
#include "fields/field_config.cuh"
|
||||
using namespace field_config;
|
||||
|
||||
namespace merkle_tree {
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, build_merkle_tree)(
|
||||
const scalar_t* leaves_digests,
|
||||
scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<scalar_t, scalar_t>* compression,
|
||||
const hash::Hasher<scalar_t, scalar_t>* bottom_layer,
|
||||
const TreeBuilderConfig& tree_config)
|
||||
{
|
||||
return build_merkle_tree<scalar_t, scalar_t>(
|
||||
leaves_digests, digests, height, input_block_len, *compression, *bottom_layer, tree_config);
|
||||
}
|
||||
} // namespace merkle_tree
|
||||
26
icicle/src/merkle-tree/extern_mmcs.cu
Normal file
26
icicle/src/merkle-tree/extern_mmcs.cu
Normal file
@@ -0,0 +1,26 @@
|
||||
#include "utils/utils.h"
|
||||
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "mmcs.cu"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
|
||||
#include "fields/field_config.cuh"
|
||||
using namespace field_config;
|
||||
|
||||
using matrix::Matrix;
|
||||
|
||||
namespace merkle_tree {
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, mmcs_commit_cuda)(
|
||||
const Matrix<scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
scalar_t* digests,
|
||||
const hash::Hasher<scalar_t, scalar_t>* hasher,
|
||||
const hash::Hasher<scalar_t, scalar_t>* compression,
|
||||
const TreeBuilderConfig& tree_config)
|
||||
{
|
||||
return mmcs_commit<scalar_t, scalar_t>(leaves, number_of_inputs, digests, *hasher, *compression, tree_config);
|
||||
}
|
||||
} // namespace merkle_tree
|
||||
331
icicle/src/merkle-tree/merkle.cu
Normal file
331
icicle/src/merkle-tree/merkle.cu
Normal file
@@ -0,0 +1,331 @@
|
||||
#include "hash/hash.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
|
||||
namespace merkle_tree {
|
||||
/// Constructs merkle subtree without parallelization
|
||||
/// The digests are aligned sequentially per row
|
||||
/// Example:
|
||||
///
|
||||
/// Big tree:
|
||||
///
|
||||
/// 1 <- Root
|
||||
/// / \ <- Arity = 2
|
||||
/// 2 3 <- Digests
|
||||
/// / \ / \ <- Height = 2 (as the number of edges)
|
||||
/// 4 5 6 7 <- height^arity leaves
|
||||
/// | | | | <- Bottom layer hash 1 to 1
|
||||
/// a b c d <- Input vector 1x4
|
||||
///
|
||||
/// Subtree 1 Subtree 2
|
||||
/// 2 3
|
||||
/// / \ / \
|
||||
/// 4 5 6 7
|
||||
///
|
||||
/// Digests array for subtree 1:
|
||||
/// [4 5 . . 2 . .]
|
||||
/// | | |
|
||||
/// ----- V
|
||||
/// | Segment (offset = 4, subtree_idx = 0)
|
||||
/// v
|
||||
/// Segment (offset = 0, subtree_idx = 0)
|
||||
///
|
||||
/// Digests array for subtree 2:
|
||||
/// [. . 6 7 . 3 .]
|
||||
/// | |
|
||||
/// -----
|
||||
/// |
|
||||
/// v
|
||||
/// Segment (offset = 0, subtree_idx = 1)
|
||||
///
|
||||
/// Total digests array:
|
||||
/// [4 5 6 7 2 3 .]
|
||||
///
|
||||
/// Example for custom config:
|
||||
///
|
||||
/// arity = 2
|
||||
/// input_block_len = 2
|
||||
/// digest_elements = 2
|
||||
/// bottom_layer hash width = 4
|
||||
/// compression width = 4
|
||||
/// height = 2
|
||||
///
|
||||
/// [a, b] <- Root of the tree
|
||||
/// | |
|
||||
/// [a, b, c, d]
|
||||
/// / \ / \
|
||||
/// [i, j, m, n]
|
||||
/// ┌──┬──────┴──┴──┴──┴──────┬──┐
|
||||
/// | | | |
|
||||
/// [i, j, k, l] [m, n, o, p] <- compression states
|
||||
/// / \ / \ / \ / \ <- Running permutation
|
||||
/// [1, 2, 5, 6] [9, 1, 4, 5] <- compression states
|
||||
/// ┌──┬───┴──┴──┼──┤ ┌──┬───┴──┴──┼──┤
|
||||
/// | | | | | | | | <- digest_element * height^arity leaves
|
||||
/// [1, 2, 3, 4] [5, 6, 7, 8] [9, 1, 2, 3] [4, 5, 6, 7] <- Permuted states
|
||||
/// / \ / \ / \ / \ / \ / \ / \ / \ <- Running permutation
|
||||
/// [a, b, 0, 0] [c, d, 0, 0] [e, f, 0, 0] [g, h, 0, 0] <- States of the bottom layer hash
|
||||
/// | | | | | | | | <- Bottom layer hash 2 to 2
|
||||
/// a b c d e f g h <- Input vector 2x4
|
||||
///
|
||||
/// Input matrix:
|
||||
/// ┌ ┐
|
||||
/// | a b |
|
||||
/// | c d |
|
||||
/// | e f |
|
||||
/// | g h |
|
||||
/// └ ┘
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t build_merkle_subtree(
|
||||
const L* leaves,
|
||||
L* d_leaves,
|
||||
D* digests,
|
||||
size_t subtree_idx,
|
||||
size_t subtree_height,
|
||||
D* big_tree_digests,
|
||||
size_t start_segment_size,
|
||||
size_t start_segment_offset,
|
||||
uint64_t keep_rows,
|
||||
uint64_t input_block_len,
|
||||
const Hasher<L, D>& bottom_layer,
|
||||
const Hasher<L, D>& compression,
|
||||
const TreeBuilderConfig& tree_config,
|
||||
device_context::DeviceContext& ctx)
|
||||
{
|
||||
uint64_t arity = tree_config.arity;
|
||||
|
||||
HashConfig hash_config = default_hash_config(ctx);
|
||||
hash_config.are_inputs_on_device = true;
|
||||
hash_config.are_outputs_on_device = true;
|
||||
hash_config.is_async = true;
|
||||
|
||||
size_t bottom_layer_states = pow(arity, subtree_height);
|
||||
|
||||
if (!tree_config.are_inputs_on_device) {
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
d_leaves, leaves, bottom_layer_states * input_block_len * sizeof(L), cudaMemcpyHostToDevice, ctx.stream));
|
||||
}
|
||||
|
||||
bottom_layer.hash_many(
|
||||
tree_config.are_inputs_on_device ? leaves : d_leaves, digests, bottom_layer_states, input_block_len,
|
||||
tree_config.digest_elements, hash_config);
|
||||
|
||||
uint64_t number_of_states = bottom_layer_states / arity;
|
||||
size_t segment_size = start_segment_size;
|
||||
size_t segment_offset = start_segment_offset;
|
||||
|
||||
if (!keep_rows || subtree_height < keep_rows) {
|
||||
D* digests_with_offset = big_tree_digests + segment_offset + subtree_idx * bottom_layer_states;
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
digests_with_offset, digests, bottom_layer_states * tree_config.digest_elements * sizeof(D),
|
||||
cudaMemcpyDeviceToHost, ctx.stream));
|
||||
segment_offset += segment_size;
|
||||
}
|
||||
segment_size /= arity;
|
||||
subtree_height--;
|
||||
|
||||
D* prev_layer = digests;
|
||||
D* next_layer = (D*)d_leaves;
|
||||
|
||||
while (number_of_states > 0) {
|
||||
CHK_IF_RETURN(compression.run_hash_many_kernel(
|
||||
(L*)prev_layer, next_layer, number_of_states, tree_config.digest_elements * tree_config.arity,
|
||||
tree_config.digest_elements, hash_config.ctx));
|
||||
|
||||
if (!keep_rows || subtree_height < keep_rows) {
|
||||
D* digests_with_offset =
|
||||
big_tree_digests + segment_offset + subtree_idx * number_of_states * tree_config.digest_elements;
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
digests_with_offset, next_layer, number_of_states * tree_config.digest_elements * sizeof(D),
|
||||
cudaMemcpyDeviceToHost, ctx.stream));
|
||||
segment_offset += segment_size;
|
||||
}
|
||||
swap<D>(&prev_layer, &next_layer);
|
||||
segment_size /= arity;
|
||||
subtree_height--;
|
||||
number_of_states /= arity;
|
||||
}
|
||||
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t build_merkle_tree(
|
||||
const L* leaves,
|
||||
D* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const Hasher<L, D>& compression,
|
||||
const Hasher<L, D>& bottom_layer,
|
||||
const TreeBuilderConfig& tree_config)
|
||||
{
|
||||
CHK_INIT_IF_RETURN();
|
||||
cudaStream_t& stream = tree_config.ctx.stream;
|
||||
|
||||
if (compression.preimage_max_length < tree_config.arity * tree_config.digest_elements)
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument,
|
||||
"Hash max preimage length does not match merkle tree arity multiplied by digest elements");
|
||||
|
||||
uint64_t number_of_bottom_layer_states = pow(tree_config.arity, height);
|
||||
|
||||
// This will determine how much splitting do we need to do
|
||||
// `number_of_streams` subtrees should fit in the device
|
||||
// This means each subtree should fit in `STREAM_CHUNK_SIZE` memory
|
||||
uint64_t number_of_subtrees = 1;
|
||||
uint64_t subtree_height = height;
|
||||
uint64_t subtree_bottom_layer_states = number_of_bottom_layer_states;
|
||||
uint64_t subtree_leaves_size = subtree_bottom_layer_states * input_block_len;
|
||||
uint64_t subtree_digests_size = subtree_bottom_layer_states * tree_config.digest_elements;
|
||||
|
||||
size_t subtree_d_leaves_memory = std::max(
|
||||
tree_config.are_inputs_on_device ? 0 : (sizeof(L) * subtree_leaves_size),
|
||||
subtree_digests_size * sizeof(D) / tree_config.arity);
|
||||
size_t subtree_memory_required = sizeof(D) * subtree_digests_size + subtree_d_leaves_memory;
|
||||
while (subtree_memory_required > STREAM_CHUNK_SIZE) {
|
||||
number_of_subtrees *= tree_config.arity;
|
||||
subtree_height--;
|
||||
subtree_bottom_layer_states /= tree_config.arity;
|
||||
subtree_digests_size /= tree_config.arity;
|
||||
subtree_leaves_size /= tree_config.arity;
|
||||
subtree_d_leaves_memory /= tree_config.arity;
|
||||
subtree_memory_required = sizeof(D) * subtree_digests_size + subtree_d_leaves_memory;
|
||||
}
|
||||
int cap_height = height - subtree_height;
|
||||
size_t caps_len = pow(tree_config.arity, cap_height) * tree_config.digest_elements;
|
||||
|
||||
size_t available_memory, _total_memory;
|
||||
CHK_IF_RETURN(cudaMemGetInfo(&available_memory, &_total_memory));
|
||||
available_memory -= GIGA / 8; // Leave 128 MB just in case
|
||||
|
||||
// We can effectively parallelize memory copy with streams
|
||||
// as long as they don't operate on more than `STREAM_CHUNK_SIZE` bytes
|
||||
const size_t number_of_streams = std::min((uint64_t)(available_memory / STREAM_CHUNK_SIZE), number_of_subtrees);
|
||||
cudaStream_t* streams = static_cast<cudaStream_t*>(malloc(sizeof(cudaStream_t) * number_of_streams));
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamCreate(&streams[i]));
|
||||
}
|
||||
|
||||
bool caps_mode = tree_config.keep_rows && tree_config.keep_rows <= cap_height;
|
||||
D* caps;
|
||||
if (caps_mode) { caps = static_cast<D*>(malloc(caps_len * sizeof(D))); }
|
||||
|
||||
#ifdef MERKLE_DEBUG
|
||||
std::cout << "Available memory = " << available_memory / 1024 / 1024 << " MB" << std::endl;
|
||||
std::cout << "Number of streams = " << number_of_streams << std::endl;
|
||||
std::cout << "Number of subtrees = " << number_of_subtrees << std::endl;
|
||||
std::cout << "Height of a subtree = " << subtree_height << std::endl;
|
||||
std::cout << "Cutoff height = " << height - subtree_height << std::endl;
|
||||
std::cout << "Number of leaves in a subtree = " << subtree_bottom_layer_states << std::endl;
|
||||
std::cout << "Digest elements for a subtree = " << subtree_digests_size << std::endl;
|
||||
std::cout << "Size of 1 subtree digests = " << subtree_digests_size * sizeof(D) / 1024 / 1024 << " MB" << std::endl;
|
||||
std::cout << "Cap height = " << cap_height << std::endl;
|
||||
std::cout << "Enabling caps mode? " << caps_mode << std::endl;
|
||||
std::cout << "Allocated " << subtree_d_leaves_memory << " bytes for d_leaves" << std::endl;
|
||||
#endif
|
||||
|
||||
// Allocate memory for the leaves and digests
|
||||
// These are shared by streams in a pool
|
||||
L* d_leaves_ptr;
|
||||
D* digests_ptr;
|
||||
CHK_IF_RETURN(cudaMallocAsync(&d_leaves_ptr, subtree_d_leaves_memory * number_of_streams, stream));
|
||||
CHK_IF_RETURN(cudaMallocAsync(&digests_ptr, subtree_digests_size * number_of_streams * sizeof(D), stream));
|
||||
// Wait for these allocations to finish
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(stream));
|
||||
|
||||
// Build subtrees in parallel. This for loop invokes kernels that can run in a pool of size `number_of_streams`
|
||||
for (size_t subtree_idx = 0; subtree_idx < number_of_subtrees; subtree_idx++) {
|
||||
size_t stream_idx = subtree_idx % number_of_streams;
|
||||
cudaStream_t subtree_stream = streams[stream_idx];
|
||||
|
||||
const L* subtree_leaves = leaves + subtree_idx * subtree_bottom_layer_states * input_block_len;
|
||||
L* subtree_d_leaves = (L*)((unsigned char*)d_leaves_ptr + stream_idx * subtree_d_leaves_memory);
|
||||
D* subtree_digests = digests_ptr + stream_idx * subtree_digests_size;
|
||||
|
||||
int subtree_keep_rows = 0;
|
||||
if (tree_config.keep_rows) {
|
||||
int diff = tree_config.keep_rows - cap_height;
|
||||
subtree_keep_rows = std::max(1, diff);
|
||||
}
|
||||
device_context::DeviceContext subtree_context{subtree_stream, tree_config.ctx.device_id, tree_config.ctx.mempool};
|
||||
|
||||
uint64_t start_segment_size = number_of_bottom_layer_states * tree_config.digest_elements;
|
||||
cudaError_t subtree_result = build_merkle_subtree<L, D>(
|
||||
subtree_leaves, // leaves
|
||||
subtree_d_leaves, // d_leves
|
||||
subtree_digests, // digests
|
||||
subtree_idx, // subtree_idx
|
||||
subtree_height, // subtree_height
|
||||
caps_mode ? caps : digests, // big_tree_digests
|
||||
start_segment_size, // start_segment_size
|
||||
0, // start_segment_offset
|
||||
subtree_keep_rows, // keep_rows
|
||||
input_block_len, // input_block_len
|
||||
bottom_layer, // bottom_layer
|
||||
compression, // compression
|
||||
tree_config, // tree_config
|
||||
subtree_context // subtree_context
|
||||
);
|
||||
CHK_IF_RETURN(subtree_result);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(streams[i]));
|
||||
}
|
||||
|
||||
// Finish the top-level tree if any
|
||||
if (cap_height > 0) {
|
||||
size_t start_segment_size = caps_len / tree_config.arity;
|
||||
size_t start_segment_offset = 0;
|
||||
if (!caps_mode) { // Calculate offset
|
||||
size_t keep_rows = tree_config.keep_rows ? tree_config.keep_rows : height + 1;
|
||||
size_t layer_size = pow(tree_config.arity, keep_rows - 1) * tree_config.digest_elements;
|
||||
for (int i = 0; i < keep_rows - cap_height; i++) {
|
||||
start_segment_offset += layer_size;
|
||||
layer_size /= tree_config.arity;
|
||||
}
|
||||
}
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
d_leaves_ptr, caps_mode ? caps : (digests + start_segment_offset - caps_len), caps_len * sizeof(D),
|
||||
(caps_mode || !tree_config.are_outputs_on_device) ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToDevice, stream));
|
||||
|
||||
uint64_t number_of_states = caps_len / tree_config.arity / tree_config.digest_elements;
|
||||
|
||||
D* prev_layer = (D*)d_leaves_ptr;
|
||||
D* next_layer = digests_ptr;
|
||||
|
||||
size_t segment_size = start_segment_size;
|
||||
size_t segment_offset = start_segment_offset;
|
||||
while (number_of_states > 0) {
|
||||
CHK_IF_RETURN(compression.run_hash_many_kernel(
|
||||
(L*)prev_layer, next_layer, number_of_states, tree_config.digest_elements * tree_config.arity,
|
||||
tree_config.digest_elements, tree_config.ctx));
|
||||
if (!tree_config.keep_rows || cap_height < tree_config.keep_rows + (int)caps_mode) {
|
||||
D* digests_with_offset = digests + segment_offset;
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
digests_with_offset, next_layer, number_of_states * tree_config.digest_elements * sizeof(D),
|
||||
cudaMemcpyDeviceToHost, stream));
|
||||
segment_offset += segment_size;
|
||||
}
|
||||
|
||||
swap<D>(&prev_layer, &next_layer);
|
||||
|
||||
segment_size /= tree_config.arity;
|
||||
cap_height--;
|
||||
number_of_states /= tree_config.arity;
|
||||
}
|
||||
if (caps_mode) { free(caps); }
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(cudaFreeAsync(d_leaves_ptr, stream));
|
||||
CHK_IF_RETURN(cudaFreeAsync(digests_ptr, stream));
|
||||
if (!tree_config.is_async) return CHK_STICKY(cudaStreamSynchronize(stream));
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(streams[i]));
|
||||
CHK_IF_RETURN(cudaStreamDestroy(streams[i]));
|
||||
}
|
||||
free(streams);
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
} // namespace merkle_tree
|
||||
456
icicle/src/merkle-tree/mmcs.cu
Normal file
456
icicle/src/merkle-tree/mmcs.cu
Normal file
@@ -0,0 +1,456 @@
|
||||
#include "hash/hash.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
using matrix::Matrix;
|
||||
|
||||
namespace merkle_tree {
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t hash_leaves(
|
||||
const Matrix<L>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
uint64_t number_of_rows,
|
||||
D* digests,
|
||||
unsigned int digest_elements,
|
||||
const Hasher<L, D>& hasher,
|
||||
const device_context::DeviceContext& ctx)
|
||||
{
|
||||
HashConfig sponge_config = default_hash_config(ctx);
|
||||
sponge_config.are_inputs_on_device = true;
|
||||
sponge_config.are_outputs_on_device = true;
|
||||
sponge_config.is_async = true;
|
||||
|
||||
uint64_t number_of_rows_padded = next_pow_of_two(number_of_rows);
|
||||
|
||||
CHK_IF_RETURN(hasher.hash_2d(leaves, digests, number_of_inputs, digest_elements, number_of_rows, ctx));
|
||||
|
||||
if (number_of_rows_padded - number_of_rows) {
|
||||
// Pad with default digests
|
||||
cudaMemsetAsync(
|
||||
(void*)(digests + number_of_rows), 0, (number_of_rows_padded - number_of_rows) * digest_elements * sizeof(D),
|
||||
ctx.stream);
|
||||
}
|
||||
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename L, typename D>
|
||||
struct SubtreeParams {
|
||||
unsigned int number_of_inputs; // Number of input matrices
|
||||
unsigned int arity; // Arity of the tree
|
||||
unsigned int digest_elements; // Number of output elements per hash
|
||||
size_t number_of_rows; // Current number of input rows to operate on
|
||||
size_t number_of_rows_padded; // next power of arity for number_of_rows
|
||||
size_t subtree_idx; // The subtree id
|
||||
size_t number_of_subtrees; // Total number of subtrees
|
||||
uint64_t subtree_height; // Height of one subtree
|
||||
|
||||
/// One segment corresponds to one layer of output digests
|
||||
size_t segment_size; // The size of current segment.
|
||||
size_t segment_offset; // An offset for the current segment
|
||||
unsigned int leaves_offset; // An offset in the sorted list of input matrices
|
||||
unsigned int number_of_leaves_to_inject; // Number of leaves to inject in current level
|
||||
unsigned int keep_rows; // Number of rows to keep
|
||||
bool are_inputs_on_device;
|
||||
bool caps_mode;
|
||||
const Hasher<L, D>* hasher = nullptr;
|
||||
const Hasher<L, D>* compression = nullptr;
|
||||
const device_context::DeviceContext* ctx = nullptr;
|
||||
};
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t slice_and_copy_leaves(
|
||||
const std::vector<Matrix<L>>& leaves, L* d_leaves, Matrix<L>* d_leaves_info, SubtreeParams<L, D>& params)
|
||||
{
|
||||
uint64_t target_height = params.number_of_rows_padded * params.number_of_subtrees;
|
||||
params.number_of_leaves_to_inject = 0;
|
||||
while (params.leaves_offset < params.number_of_inputs &&
|
||||
next_pow_of_two(leaves[params.leaves_offset].height) >= target_height) {
|
||||
if (next_pow_of_two(leaves[params.leaves_offset].height) == target_height) params.number_of_leaves_to_inject++;
|
||||
params.leaves_offset++;
|
||||
}
|
||||
|
||||
if (params.number_of_leaves_to_inject) {
|
||||
size_t rows_offset = params.subtree_idx * params.number_of_rows_padded;
|
||||
size_t actual_layer_rows = leaves[params.leaves_offset - params.number_of_leaves_to_inject].height;
|
||||
params.number_of_rows = std::min(actual_layer_rows - rows_offset, params.number_of_rows_padded);
|
||||
|
||||
Matrix<L>* leaves_info = static_cast<Matrix<L>*>(malloc(params.number_of_leaves_to_inject * sizeof(Matrix<L>)));
|
||||
L* d_leaves_ptr = d_leaves;
|
||||
for (auto i = 0; i < params.number_of_leaves_to_inject; i++) {
|
||||
Matrix<L> leaf = leaves[params.leaves_offset - params.number_of_leaves_to_inject + i];
|
||||
if (!params.are_inputs_on_device) {
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
d_leaves_ptr, leaf.values + rows_offset * leaf.width, params.number_of_rows * leaf.width * sizeof(L),
|
||||
cudaMemcpyHostToDevice, params.ctx->stream));
|
||||
} else {
|
||||
d_leaves_ptr = leaf.values + rows_offset * leaf.width;
|
||||
}
|
||||
|
||||
leaves_info[i] = {d_leaves_ptr, leaf.width, params.number_of_rows};
|
||||
d_leaves_ptr += params.number_of_rows * leaf.width;
|
||||
}
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
d_leaves_info, leaves_info, params.number_of_leaves_to_inject * sizeof(Matrix<L>), cudaMemcpyHostToDevice,
|
||||
params.ctx->stream));
|
||||
free(leaves_info);
|
||||
}
|
||||
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
/// Checks if the current row needs to be copied out to the resulting digests array
|
||||
/// Computes the needed offsets using segments model
|
||||
template <typename L, typename D>
|
||||
cudaError_t maybe_copy_digests(D* digests, L* big_tree_digests, SubtreeParams<L, D>& params)
|
||||
{
|
||||
if (!params.keep_rows || params.subtree_height < params.keep_rows + (int)params.caps_mode) {
|
||||
D* digests_with_offset = big_tree_digests + params.segment_offset +
|
||||
params.subtree_idx * params.number_of_rows_padded * params.digest_elements;
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
digests_with_offset, digests, params.number_of_rows_padded * params.digest_elements * sizeof(D),
|
||||
cudaMemcpyDeviceToHost, params.ctx->stream));
|
||||
params.segment_offset += params.segment_size;
|
||||
}
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t fold_layer(
|
||||
const std::vector<Matrix<L>>& leaves,
|
||||
D* prev_layer,
|
||||
D* next_layer,
|
||||
L* aux_leaves_mem,
|
||||
Matrix<L>* d_leaves_info,
|
||||
SubtreeParams<L, D>& params)
|
||||
{
|
||||
CHK_IF_RETURN(slice_and_copy_leaves<L>(leaves, aux_leaves_mem, d_leaves_info, params));
|
||||
|
||||
if (params.number_of_leaves_to_inject) {
|
||||
CHK_IF_RETURN(params.compression->compress_and_inject(
|
||||
d_leaves_info, params.number_of_leaves_to_inject, params.number_of_rows, prev_layer, next_layer,
|
||||
params.digest_elements, *params.ctx));
|
||||
} else {
|
||||
CHK_IF_RETURN(params.compression->run_hash_many_kernel(
|
||||
prev_layer, next_layer, params.number_of_rows_padded, params.compression->width, params.digest_elements,
|
||||
*params.ctx));
|
||||
}
|
||||
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t build_mmcs_subtree(
|
||||
const std::vector<Matrix<L>>& leaves,
|
||||
L* d_leaves,
|
||||
D* states,
|
||||
L* aux_leaves_mem,
|
||||
L* big_tree_digests,
|
||||
SubtreeParams<L, D>& params)
|
||||
{
|
||||
// Leaves info
|
||||
Matrix<L>* d_leaves_info;
|
||||
CHK_IF_RETURN(cudaMallocAsync(&d_leaves_info, params.number_of_inputs * sizeof(Matrix<L>), params.ctx->stream));
|
||||
|
||||
CHK_IF_RETURN(slice_and_copy_leaves(leaves, d_leaves, d_leaves_info, params));
|
||||
|
||||
// Reuse leaves memory
|
||||
D* digests = (D*)d_leaves;
|
||||
|
||||
CHK_IF_RETURN(hash_leaves(
|
||||
d_leaves_info, params.number_of_leaves_to_inject, params.number_of_rows, states, params.digest_elements,
|
||||
*params.hasher, *params.ctx));
|
||||
|
||||
CHK_IF_RETURN(maybe_copy_digests(digests, big_tree_digests, params));
|
||||
|
||||
params.number_of_rows_padded /= params.arity;
|
||||
params.segment_size /= params.arity;
|
||||
params.subtree_height--;
|
||||
|
||||
D* prev_layer = states;
|
||||
D* next_layer = digests;
|
||||
while (params.number_of_rows_padded > 0) {
|
||||
CHK_IF_RETURN(fold_layer(leaves, prev_layer, next_layer, aux_leaves_mem, d_leaves_info, params));
|
||||
CHK_IF_RETURN(maybe_copy_digests(next_layer, big_tree_digests, params));
|
||||
swap<D>(&prev_layer, &next_layer);
|
||||
params.segment_size /= params.arity;
|
||||
params.subtree_height--;
|
||||
params.number_of_rows_padded /= params.arity;
|
||||
}
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t mmcs_commit(
|
||||
const Matrix<L>* inputs,
|
||||
const unsigned int number_of_inputs,
|
||||
D* digests,
|
||||
const Hasher<L, D>& hasher,
|
||||
const Hasher<L, D>& compression,
|
||||
const TreeBuilderConfig& tree_config)
|
||||
{
|
||||
CHK_INIT_IF_RETURN();
|
||||
cudaStream_t& stream = tree_config.ctx.stream;
|
||||
|
||||
if (number_of_inputs == 0) THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "No matrices provided");
|
||||
|
||||
if (compression.preimage_max_length < tree_config.arity * tree_config.digest_elements)
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument,
|
||||
"Hash max preimage length does not match merkle tree arity multiplied by digest elements");
|
||||
|
||||
std::vector<Matrix<L>> sorted_inputs(number_of_inputs);
|
||||
std::partial_sort_copy(
|
||||
inputs, inputs + number_of_inputs, sorted_inputs.begin(), sorted_inputs.end(),
|
||||
[](const Matrix<L>& left, const Matrix<L>& right) { return left.height > right.height; });
|
||||
|
||||
// Check that the height of any two given matrices either rounds up
|
||||
// to the same next power of two or otherwise equal
|
||||
for (unsigned int i = 0; i < number_of_inputs - 1; i++) {
|
||||
unsigned int left = sorted_inputs[i].height;
|
||||
unsigned int right = sorted_inputs[i + 1].height;
|
||||
|
||||
if (next_pow_of_two(left) == next_pow_of_two(right) && left != right)
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument, "Matrix heights that round up to the same power of two must be equal");
|
||||
}
|
||||
|
||||
uint64_t max_height = sorted_inputs[0].height;
|
||||
|
||||
// Calculate maximum additional memory needed for injected matrices
|
||||
uint64_t max_aux_total_elements = 0;
|
||||
uint64_t current_aux_total_elements = 0;
|
||||
uint64_t current_height = 0;
|
||||
uint64_t bottom_layer_leaves_elements = 0;
|
||||
if (!tree_config.are_inputs_on_device) {
|
||||
for (auto it = sorted_inputs.begin(); it < sorted_inputs.end(); it++) {
|
||||
if (it->height == max_height) {
|
||||
bottom_layer_leaves_elements += it->height * it->width;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (it->height != current_height) {
|
||||
current_height = it->height;
|
||||
current_aux_total_elements = 0;
|
||||
}
|
||||
|
||||
current_aux_total_elements += it->width * it->height;
|
||||
if (current_aux_total_elements > max_aux_total_elements) {
|
||||
max_aux_total_elements = current_aux_total_elements;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t number_of_bottom_layer_rows = next_pow_of_two(max_height);
|
||||
size_t leaves_info_memory = number_of_inputs * sizeof(Matrix<L>);
|
||||
|
||||
unsigned int tree_height = get_height(number_of_bottom_layer_rows);
|
||||
|
||||
// This will determine how much splitting do we need to do
|
||||
// `number_of_streams` subtrees should fit in the device
|
||||
// This means each subtree should fit in `STREAM_CHUNK_SIZE` memory
|
||||
uint64_t number_of_subtrees = 1;
|
||||
uint64_t subtree_height = tree_height;
|
||||
uint64_t subtree_bottom_layer_rows = number_of_bottom_layer_rows;
|
||||
uint64_t subtree_states_size = subtree_bottom_layer_rows * hasher.width;
|
||||
uint64_t subtree_digests_size = subtree_bottom_layer_rows * tree_config.digest_elements;
|
||||
uint64_t subtree_leaves_elements = 0;
|
||||
for (int i = 0; i < number_of_inputs && sorted_inputs[i].height == max_height; i++) {
|
||||
subtree_leaves_elements += sorted_inputs[i].width * sorted_inputs[i].height;
|
||||
}
|
||||
uint64_t subtree_aux_elements = max_aux_total_elements;
|
||||
|
||||
size_t subtree_leaves_memory = std::max(subtree_leaves_elements * sizeof(L), subtree_digests_size * sizeof(D));
|
||||
size_t subtree_memory_required =
|
||||
sizeof(D) * subtree_states_size + subtree_leaves_memory + subtree_aux_elements * sizeof(L) + leaves_info_memory;
|
||||
while (subtree_memory_required > STREAM_CHUNK_SIZE) {
|
||||
number_of_subtrees *= tree_config.arity;
|
||||
subtree_height--;
|
||||
subtree_bottom_layer_rows /= tree_config.arity;
|
||||
subtree_states_size /= tree_config.arity;
|
||||
subtree_digests_size /= tree_config.arity;
|
||||
subtree_leaves_elements /= tree_config.arity;
|
||||
subtree_aux_elements /= tree_config.arity;
|
||||
subtree_leaves_memory = std::max(subtree_leaves_elements * sizeof(L), subtree_digests_size * sizeof(D));
|
||||
subtree_memory_required =
|
||||
sizeof(D) * subtree_states_size + subtree_leaves_memory + subtree_aux_elements * sizeof(L) + leaves_info_memory;
|
||||
}
|
||||
unsigned int cap_height = tree_height - subtree_height;
|
||||
size_t caps_len = pow(tree_config.arity, cap_height) * tree_config.digest_elements;
|
||||
|
||||
size_t available_memory, _total_memory;
|
||||
CHK_IF_RETURN(cudaMemGetInfo(&available_memory, &_total_memory));
|
||||
if (available_memory < (GIGA / 8 + STREAM_CHUNK_SIZE)) {
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument,
|
||||
"Not enough GPU memory to build a tree. At least 1.125 GB of GPU memory required");
|
||||
}
|
||||
available_memory -= GIGA / 8; // Leave 128 MB just in case
|
||||
|
||||
// We can effectively parallelize memory copy with streams
|
||||
// as long as they don't operate on more than `STREAM_CHUNK_SIZE` bytes
|
||||
const size_t number_of_streams = std::min((uint64_t)(available_memory / STREAM_CHUNK_SIZE), number_of_subtrees);
|
||||
std::vector<cudaStream_t> streams(number_of_streams);
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamCreate(&streams[i]));
|
||||
}
|
||||
|
||||
// If keep_rows is smaller then the remaining top-tree height
|
||||
// we need to allocate additional memory to store the roots
|
||||
// of subtrees, in order to proceed from there
|
||||
bool caps_mode = tree_config.keep_rows && tree_config.keep_rows <= cap_height;
|
||||
D* caps;
|
||||
if (caps_mode) { caps = static_cast<D*>(malloc(caps_len * sizeof(D))); }
|
||||
|
||||
#ifdef MERKLE_DEBUG
|
||||
std::cout << "MMCS DEBUG" << std::endl;
|
||||
std::cout << "====================================" << std::endl;
|
||||
std::cout << "Available memory = " << available_memory / 1024 / 1024 << " MB" << std::endl;
|
||||
std::cout << "Number of streams = " << number_of_streams << std::endl;
|
||||
std::cout << "Number of subtrees = " << number_of_subtrees << std::endl;
|
||||
std::cout << "Height of a subtree = " << subtree_height << std::endl;
|
||||
std::cout << "Cutoff height = " << tree_height - subtree_height << std::endl;
|
||||
std::cout << "Number of leaves in a subtree = " << subtree_bottom_layer_rows << std::endl;
|
||||
std::cout << "State of a subtree = " << subtree_states_size << std::endl;
|
||||
std::cout << "Digest elements for a subtree = " << subtree_digests_size << std::endl;
|
||||
std::cout << "Size of 1 subtree states = " << subtree_states_size * sizeof(D) / 1024 / 1024 << " MB" << std::endl;
|
||||
std::cout << "Size of 1 subtree digests = " << subtree_digests_size * sizeof(D) / 1024 / 1024 << " MB" << std::endl;
|
||||
std::cout << "Cap height = " << cap_height << std::endl;
|
||||
std::cout << "Enabling caps mode? " << caps_mode << std::endl;
|
||||
|
||||
std::cout << "Allocating " << subtree_states_size * number_of_streams << " elements for states" << std::endl;
|
||||
std::cout << "Allocating " << subtree_leaves_memory * number_of_streams << " bytes for leaves" << std::endl;
|
||||
std::cout << "Allocating " << subtree_aux_elements * number_of_streams << " elements for aux leaves" << std::endl;
|
||||
std::cout << std::endl;
|
||||
#endif
|
||||
|
||||
// Allocate memory for the states, injected leaves (aux) and digests
|
||||
// These are shared by streams in a pool
|
||||
D* states_ptr;
|
||||
L *aux_ptr, *leaves_ptr;
|
||||
CHK_IF_RETURN(cudaMallocAsync(&states_ptr, subtree_states_size * number_of_streams * sizeof(D), stream));
|
||||
CHK_IF_RETURN(cudaMemsetAsync(states_ptr, 0, subtree_states_size * number_of_streams * sizeof(D), stream));
|
||||
CHK_IF_RETURN(cudaMallocAsync(&leaves_ptr, subtree_leaves_memory * number_of_streams, stream));
|
||||
CHK_IF_RETURN(cudaMallocAsync(&aux_ptr, subtree_aux_elements * number_of_streams * sizeof(L), stream));
|
||||
// Wait for these allocations to finish
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(stream));
|
||||
|
||||
// Build subtrees in parallel. This for loop invokes kernels that can run in a pool of size `number_of_streams`
|
||||
for (size_t subtree_idx = 0; subtree_idx < number_of_subtrees; subtree_idx++) {
|
||||
size_t stream_idx = subtree_idx % number_of_streams;
|
||||
cudaStream_t subtree_stream = streams[stream_idx];
|
||||
|
||||
D* subtree_state = states_ptr + stream_idx * subtree_states_size;
|
||||
L* subtree_leaves = (L*)((unsigned char*)leaves_ptr + stream_idx * subtree_leaves_memory);
|
||||
L* subtree_aux = aux_ptr + stream_idx * subtree_aux_elements;
|
||||
|
||||
unsigned int subtree_keep_rows = 0;
|
||||
if (tree_config.keep_rows) {
|
||||
int diff = tree_config.keep_rows - cap_height;
|
||||
subtree_keep_rows = std::max(1, diff);
|
||||
}
|
||||
device_context::DeviceContext subtree_context{subtree_stream, tree_config.ctx.device_id, tree_config.ctx.mempool};
|
||||
|
||||
SubtreeParams<L, D> params = {};
|
||||
|
||||
params.number_of_inputs = number_of_inputs;
|
||||
params.arity = tree_config.arity;
|
||||
params.digest_elements = tree_config.digest_elements;
|
||||
params.number_of_rows = subtree_bottom_layer_rows;
|
||||
params.number_of_rows_padded = subtree_bottom_layer_rows;
|
||||
|
||||
params.subtree_idx = subtree_idx;
|
||||
params.subtree_height = subtree_height;
|
||||
params.number_of_subtrees = number_of_subtrees;
|
||||
|
||||
params.segment_size = number_of_bottom_layer_rows * tree_config.digest_elements;
|
||||
params.keep_rows = subtree_keep_rows;
|
||||
params.are_inputs_on_device = tree_config.are_inputs_on_device;
|
||||
params.hasher = &hasher;
|
||||
params.compression = &compression;
|
||||
params.ctx = &subtree_context;
|
||||
|
||||
cudaError_t subtree_result = build_mmcs_subtree<L, D>(
|
||||
sorted_inputs,
|
||||
subtree_leaves, // d_leaves
|
||||
subtree_state, // states
|
||||
subtree_aux, // aux_leaves_mem
|
||||
caps_mode ? caps : digests, // big_tree_digests
|
||||
params // params
|
||||
);
|
||||
CHK_IF_RETURN(subtree_result);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(streams[i]));
|
||||
}
|
||||
|
||||
// Finish the top-level tree if any
|
||||
if (cap_height > 0) {
|
||||
D* digests_ptr = (D*)leaves_ptr;
|
||||
size_t start_segment_size = caps_len / tree_config.arity;
|
||||
size_t start_segment_offset = 0;
|
||||
if (!caps_mode) { // Calculate offset
|
||||
size_t keep_rows = tree_config.keep_rows ? tree_config.keep_rows : tree_height + 1;
|
||||
size_t layer_size = pow(tree_config.arity, keep_rows - 1) * tree_config.digest_elements;
|
||||
for (int i = 0; i < keep_rows - cap_height; i++) {
|
||||
start_segment_offset += layer_size;
|
||||
layer_size /= tree_config.arity;
|
||||
}
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
states_ptr, caps_mode ? caps : (digests + start_segment_offset - caps_len), caps_len * sizeof(D),
|
||||
(caps_mode || !tree_config.are_outputs_on_device) ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToDevice, stream));
|
||||
|
||||
uint64_t number_of_states = caps_len / tree_config.arity / tree_config.digest_elements;
|
||||
Matrix<L>* d_leaves_info;
|
||||
CHK_IF_RETURN(cudaMallocAsync(&d_leaves_info, number_of_inputs * sizeof(Matrix<L>), tree_config.ctx.stream));
|
||||
|
||||
SubtreeParams<L, D> top_params = {};
|
||||
|
||||
top_params.number_of_inputs = number_of_inputs;
|
||||
top_params.arity = tree_config.arity;
|
||||
top_params.digest_elements = tree_config.digest_elements;
|
||||
top_params.number_of_rows = number_of_states;
|
||||
top_params.number_of_rows_padded = number_of_states;
|
||||
|
||||
top_params.subtree_height = cap_height;
|
||||
top_params.number_of_subtrees = 1;
|
||||
|
||||
top_params.segment_offset = start_segment_offset;
|
||||
top_params.segment_size = start_segment_size;
|
||||
top_params.keep_rows = tree_config.keep_rows;
|
||||
top_params.are_inputs_on_device = tree_config.are_inputs_on_device;
|
||||
top_params.caps_mode = caps_mode;
|
||||
top_params.hasher = &hasher;
|
||||
top_params.compression = &compression;
|
||||
top_params.ctx = &tree_config.ctx;
|
||||
|
||||
D* prev_layer = states_ptr;
|
||||
D* next_layer = digests_ptr;
|
||||
while (top_params.number_of_rows_padded > 0) {
|
||||
CHK_IF_RETURN(fold_layer(sorted_inputs, prev_layer, next_layer, aux_ptr, d_leaves_info, top_params));
|
||||
CHK_IF_RETURN(maybe_copy_digests(next_layer, digests, top_params));
|
||||
swap<D>(&prev_layer, &next_layer);
|
||||
top_params.segment_size /= top_params.arity;
|
||||
top_params.subtree_height--;
|
||||
top_params.number_of_rows_padded /= top_params.arity;
|
||||
}
|
||||
}
|
||||
|
||||
if (caps_mode) { free(caps); }
|
||||
CHK_IF_RETURN(cudaFreeAsync(states_ptr, stream));
|
||||
CHK_IF_RETURN(cudaFreeAsync(leaves_ptr, stream));
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamDestroy(streams[i]));
|
||||
}
|
||||
if (!tree_config.is_async) return CHK_STICKY(cudaStreamSynchronize(stream));
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
} // namespace merkle_tree
|
||||
7
icicle/src/merkle-tree/tests/merkle/.gitignore
vendored
Normal file
7
icicle/src/merkle-tree/tests/merkle/.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
merkle.o
|
||||
poseidon2.o
|
||||
test_merkle_poseidon2
|
||||
merkle_bls.o
|
||||
poseidon.o
|
||||
test_merkle_poseidon
|
||||
test_merkle
|
||||
22
icicle/src/merkle-tree/tests/merkle/Makefile
Normal file
22
icicle/src/merkle-tree/tests/merkle/Makefile
Normal file
@@ -0,0 +1,22 @@
|
||||
test_merkle_poseidon: poseidon.o merkle_bls.o
|
||||
nvcc -o test_merkle_poseidon -I../../../../include -DFIELD=bls12_381 -DFIELD_ID=2 -DCURVE=bls12_381 -DMERKLE_DEBUG poseidon.o merkle_bls.o test.cu
|
||||
./test_merkle_poseidon
|
||||
|
||||
merkle_bls.o: ../../extern.cu ../../merkle.cu
|
||||
nvcc -o merkle_bls.o -I../../../../include -DFIELD=bls12_381 -DFIELD_ID=2 -DCURVE=bls12_381 -DMERKLE_DEBUG -c ../../extern.cu
|
||||
|
||||
poseidon.o: ../../../poseidon/extern.cu
|
||||
nvcc -o poseidon.o -I../../../../include -DFIELD=bls12_381 -DFIELD_ID=2 -DCURVE=bls12_381 -c ../../../poseidon/extern.cu
|
||||
|
||||
test_merkle: test_poseidon2.cu poseidon2.o merkle.o
|
||||
nvcc -o test_merkle -I../../../../include -DFIELD=babybear -DFIELD_ID=1001 -DMERKLE_DEBUG poseidon2.o merkle.o test_poseidon2.cu
|
||||
./test_merkle
|
||||
|
||||
merkle.o: ../../extern.cu ../../merkle.cu
|
||||
nvcc -o merkle.o -I../../../../include -DFIELD=babybear -DFIELD_ID=1001 -DMERKLE_DEBUG -c ../../extern.cu
|
||||
|
||||
poseidon2.o: ../../../poseidon2/extern.cu
|
||||
nvcc -o poseidon2.o -I../../../../include -DFIELD=babybear -DFIELD_ID=1001 -c ../../../poseidon2/extern.cu
|
||||
|
||||
clear:
|
||||
rm -f poseidon2.o merkle.o test_merkle merkle_bls.o poseidon.o test_merkle
|
||||
@@ -1,10 +1,3 @@
|
||||
// #define DEBUG
|
||||
#define MERKLE_DEBUG
|
||||
|
||||
#include "curves/curve_config.cuh"
|
||||
#include "../poseidon.cu"
|
||||
#include "merkle.cu"
|
||||
|
||||
#ifndef __CUDA_ARCH__
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
@@ -12,15 +5,19 @@
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
|
||||
using namespace poseidon;
|
||||
using namespace merkle;
|
||||
using namespace curve_config;
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
#define DEBUG
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
|
||||
#include "poseidon/poseidon.cuh"
|
||||
|
||||
#include "api/bls12_381.h"
|
||||
using namespace bls12_381;
|
||||
|
||||
// Arity
|
||||
#define A 2
|
||||
#define T (A + 1)
|
||||
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now();
|
||||
#define END_TIMER(timer, msg) \
|
||||
printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count());
|
||||
@@ -30,24 +27,24 @@ int main(int argc, char* argv[])
|
||||
// Load poseidon constants
|
||||
START_TIMER(timer_const);
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
PoseidonConstants<scalar_t> constants;
|
||||
init_optimized_poseidon_constants<scalar_t>(A, ctx, &constants);
|
||||
poseidon::Poseidon<scalar_t> poseidon(A, ctx);
|
||||
END_TIMER(timer_const, "Load poseidon constants");
|
||||
|
||||
/// Tree of height N and arity A contains \sum{A^i} for i in 0..N-1 elements
|
||||
uint32_t tree_height = argc > 1 ? atoi(argv[1]) : 28;
|
||||
uint32_t number_of_leaves = pow(A, (tree_height - 1));
|
||||
uint32_t tree_height = argc > 1 ? atoi(argv[1]) : 26;
|
||||
uint32_t number_of_leaves = pow(A, tree_height);
|
||||
uint32_t total_number_of_leaves = number_of_leaves * A;
|
||||
|
||||
/// Use keep_rows to specify how many rows do you want to store
|
||||
int keep_rows = argc > 2 ? atoi(argv[2]) : 7;
|
||||
size_t digests_len = get_digests_len<scalar_t>(keep_rows + 1, A);
|
||||
size_t digests_len = merkle_tree::get_digests_len(keep_rows - 1, A, 1);
|
||||
|
||||
/// Fill leaves with scalars [0, 1, ... 2^{tree_height - 1} - 1]
|
||||
/// Fill leaves with scalars [0, 1, ... 2^tree_height - 1]
|
||||
START_TIMER(timer_allocation);
|
||||
scalar_t input = scalar_t::zero();
|
||||
size_t leaves_mem = number_of_leaves * sizeof(scalar_t);
|
||||
size_t leaves_mem = total_number_of_leaves * sizeof(scalar_t);
|
||||
scalar_t* leaves = static_cast<scalar_t*>(malloc(leaves_mem));
|
||||
for (uint32_t i = 0; i < number_of_leaves; i++) {
|
||||
for (uint32_t i = 0; i < total_number_of_leaves; i++) {
|
||||
leaves[i] = input;
|
||||
input = input + scalar_t::one();
|
||||
}
|
||||
@@ -62,6 +59,7 @@ int main(int argc, char* argv[])
|
||||
std::cout << "Memory for leaves = " << leaves_mem / 1024 / 1024 << " MB; " << leaves_mem / 1024 / 1024 / 1024 << " GB"
|
||||
<< std::endl;
|
||||
std::cout << "Number of leaves = " << number_of_leaves << std::endl;
|
||||
std::cout << "Total Number of leaves = " << total_number_of_leaves << std::endl;
|
||||
std::cout << "Memory for digests = " << digests_mem / 1024 / 1024 << " MB; " << digests_mem / 1024 / 1024 / 1024
|
||||
<< " GB" << std::endl;
|
||||
std::cout << "Number of digest elements = " << digests_len << std::endl;
|
||||
@@ -69,12 +67,17 @@ int main(int argc, char* argv[])
|
||||
std::cout << "Total RAM consumption = " << (digests_mem + leaves_mem) / 1024 / 1024 << " MB; "
|
||||
<< (digests_mem + leaves_mem) / 1024 / 1024 / 1024 << " GB" << std::endl;
|
||||
|
||||
TreeBuilderConfig config = default_merkle_config();
|
||||
config.keep_rows = keep_rows;
|
||||
merkle_tree::TreeBuilderConfig tree_config = merkle_tree::default_merkle_config();
|
||||
tree_config.arity = 2;
|
||||
tree_config.keep_rows = keep_rows;
|
||||
START_TIMER(timer_merkle);
|
||||
build_merkle_tree<scalar_t, T>(leaves, digests, tree_height, constants, config);
|
||||
bls12_381_build_merkle_tree(leaves, digests, tree_height, A, &poseidon, &poseidon, tree_config);
|
||||
END_TIMER(timer_merkle, "Merkle tree built: ")
|
||||
|
||||
for (int i = 0; i < digests_len; i++) {
|
||||
std::cout << digests[i] << std::endl;
|
||||
}
|
||||
|
||||
// Use this to generate test vectors
|
||||
// for (int i = 0; i < digests_len; i++) {
|
||||
// std::cout << "{";
|
||||
109
icicle/src/merkle-tree/tests/merkle/test_poseidon2.cu
Normal file
109
icicle/src/merkle-tree/tests/merkle/test_poseidon2.cu
Normal file
@@ -0,0 +1,109 @@
|
||||
#ifndef __CUDA_ARCH__
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
|
||||
#include "poseidon2/poseidon2.cuh"
|
||||
|
||||
#include "api/babybear.h"
|
||||
using namespace babybear;
|
||||
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now();
|
||||
#define END_TIMER(timer, msg) \
|
||||
printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count());
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
/// Tree of height N and arity A contains \sum{A^i} for i in 0..N elements
|
||||
uint32_t tree_arity = 2;
|
||||
uint32_t width = 16;
|
||||
uint32_t input_block_len = 8;
|
||||
uint32_t digest_elements = 8;
|
||||
uint64_t tree_height = argc > 1 ? atoi(argv[1]) : 23;
|
||||
uint64_t number_of_leaves = pow(tree_arity, tree_height);
|
||||
uint64_t total_number_of_leaves = number_of_leaves * input_block_len;
|
||||
|
||||
// Load poseidon constants
|
||||
START_TIMER(timer_const);
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
poseidon2::Poseidon2<scalar_t> poseidon(
|
||||
width, input_block_len, poseidon2::MdsType::DEFAULT_MDS, poseidon2::DiffusionStrategy::DEFAULT_DIFFUSION, ctx);
|
||||
END_TIMER(timer_const, "Load poseidon constants");
|
||||
|
||||
/// Use keep_rows to specify how many rows do you want to store
|
||||
int keep_rows = argc > 2 ? atoi(argv[2]) : 3;
|
||||
size_t digests_len = merkle_tree::get_digests_len(keep_rows - 1, tree_arity, digest_elements);
|
||||
|
||||
/// Fill leaves with scalars [0, 1, ... 2^tree_height - 1]
|
||||
START_TIMER(timer_allocation);
|
||||
scalar_t input = scalar_t::zero();
|
||||
size_t leaves_mem = total_number_of_leaves * sizeof(scalar_t);
|
||||
scalar_t* leaves = static_cast<scalar_t*>(malloc(leaves_mem));
|
||||
for (uint64_t i = 0; i < total_number_of_leaves; i++) {
|
||||
leaves[i] = input;
|
||||
input = input + scalar_t::one();
|
||||
}
|
||||
END_TIMER(timer_allocation, "Allocated memory for leaves: ");
|
||||
|
||||
/// Allocate memory for digests of {keep_rows} rows of a tree
|
||||
START_TIMER(timer_digests);
|
||||
size_t digests_mem = digests_len * sizeof(scalar_t);
|
||||
scalar_t* digests = static_cast<scalar_t*>(malloc(digests_mem));
|
||||
END_TIMER(timer_digests, "Allocated memory for digests");
|
||||
|
||||
std::cout << "Memory for leaves = " << leaves_mem / 1024 / 1024 << " MB; " << leaves_mem / 1024 / 1024 / 1024 << " GB"
|
||||
<< std::endl;
|
||||
std::cout << "Number of leaves = " << number_of_leaves << std::endl;
|
||||
std::cout << "Total Number of leaves = " << total_number_of_leaves << std::endl;
|
||||
std::cout << "Memory for digests = " << digests_mem / 1024 / 1024 << " MB; " << digests_mem / 1024 / 1024 / 1024
|
||||
<< " GB" << std::endl;
|
||||
std::cout << "Number of digest elements = " << digests_len << std::endl;
|
||||
|
||||
std::cout << "Total RAM consumption = " << (digests_mem + leaves_mem) / 1024 / 1024 << " MB; "
|
||||
<< (digests_mem + leaves_mem) / 1024 / 1024 / 1024 << " GB" << std::endl;
|
||||
|
||||
merkle_tree::TreeBuilderConfig tree_config = merkle_tree::default_merkle_config();
|
||||
tree_config.arity = tree_arity;
|
||||
tree_config.keep_rows = keep_rows;
|
||||
tree_config.digest_elements = digest_elements;
|
||||
START_TIMER(timer_merkle);
|
||||
babybear_build_merkle_tree(leaves, digests, tree_height, input_block_len, &poseidon, &poseidon, tree_config);
|
||||
END_TIMER(timer_merkle, "Merkle tree built: ")
|
||||
|
||||
for (int i = 0; i < digests_len; i++) {
|
||||
// std::cout << digests[i] << std::endl;
|
||||
}
|
||||
|
||||
// Use this to generate test vectors
|
||||
// for (int i = 0; i < digests_len; i++) {
|
||||
// std::cout << "{";
|
||||
// for (int j = 0; j < 1; j++) {
|
||||
// std::cout << ((uint32_t*)&digests[i].limbs_storage)[j];
|
||||
// }
|
||||
// std::cout << "}," << std::endl;
|
||||
// }
|
||||
|
||||
scalar_t expected[64] = {
|
||||
{876845485}, {1982055884}, {1232961929}, {1502814326}, {1731913687}, {351564698}, {449044700}, {656218013},
|
||||
{1616800877}, {1324365320}, {651075613}, {1679193452}, {218302636}, {283697394}, {1141456517}, {253630808},
|
||||
{936036237}, {1020969125}, {597252945}, {32839064}, {957901845}, {1137914369}, {155933167}, {986924657},
|
||||
{1553746264}, {1007314324}, {1208763331}, {110389244}, {118704360}, {607471513}, {834479233}, {914998571},
|
||||
{1086906039}, {1673233108}, {431115765}, {233068973}, {1974449092}, {1296268875}, {538093590}, {104288129},
|
||||
{1011605567}, {53314351}, {1461404090}, {870754513}, {1212389386}, {1363519118}, {799527383}, {1258384762},
|
||||
{678820782}, {1940801563}, {887764924}, {1006362075}, {2003940909}, {1213396717}, {1332793191}, {440259232}};
|
||||
|
||||
for (int i = 0; i < digests_len; i++) {
|
||||
scalar_t root = digests[i];
|
||||
// std::cout << root << std::endl;
|
||||
assert(root == expected[i]);
|
||||
}
|
||||
free(digests);
|
||||
free(leaves);
|
||||
}
|
||||
|
||||
#endif
|
||||
4
icicle/src/merkle-tree/tests/mmcs/.gitignore
vendored
Normal file
4
icicle/src/merkle-tree/tests/mmcs/.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
mmcs.o
|
||||
poseidon2.o
|
||||
test_mmcs_poseidon2
|
||||
vec_ops.o
|
||||
15
icicle/src/merkle-tree/tests/mmcs/Makefile
Normal file
15
icicle/src/merkle-tree/tests/mmcs/Makefile
Normal file
@@ -0,0 +1,15 @@
|
||||
test_merkle: poseidon2.o mmcs.o vec_ops.o
|
||||
nvcc -o test_mmcs_poseidon2 -lineinfo -I../../../../include -DFIELD=babybear -DFIELD_ID=1001 -DMERKLE_DEBUG poseidon2.o vec_ops.o mmcs.o test_poseidon2.cu
|
||||
./test_mmcs_poseidon2
|
||||
|
||||
mmcs.o: ../../extern_mmcs.cu ../../mmcs.cu
|
||||
nvcc -o mmcs.o -I../../../../include -lineinfo -DFIELD=babybear -DFIELD_ID=1001 -DMERKLE_DEBUG -c ../../extern_mmcs.cu
|
||||
|
||||
poseidon2.o: ../../../poseidon2/extern.cu
|
||||
nvcc -o poseidon2.o -I../../../../include -lineinfo -DFIELD=babybear -DFIELD_ID=1001 -c ../../../poseidon2/extern.cu
|
||||
|
||||
vec_ops.o:
|
||||
nvcc -o vec_ops.o -I../../../../include -lineinfo -DFIELD=babybear -DFIELD_ID=1001 -c ../../../vec_ops/extern.cu
|
||||
|
||||
clear:
|
||||
rm -f poseidon2.o mmcs.o vec_ops.o test_mmcs_poseidon2
|
||||
139
icicle/src/merkle-tree/tests/mmcs/test_poseidon2.cu
Normal file
139
icicle/src/merkle-tree/tests/mmcs/test_poseidon2.cu
Normal file
@@ -0,0 +1,139 @@
|
||||
#ifndef __CUDA_ARCH__
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
|
||||
#include "poseidon2/poseidon2.cuh"
|
||||
|
||||
#include "api/babybear.h"
|
||||
using namespace babybear;
|
||||
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now();
|
||||
#define END_TIMER(timer, msg) \
|
||||
printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count());
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
/// Tree of height N and arity A contains \sum{A^i} for i in 0..N elements
|
||||
uint32_t tree_arity = 2;
|
||||
uint32_t width = 16;
|
||||
uint32_t input_block_len = 600;
|
||||
uint32_t rate = 8;
|
||||
uint32_t digest_elements = 8;
|
||||
uint32_t copied_matrices = 1;
|
||||
uint64_t tree_height = argc > 1 ? atoi(argv[1]) : 3;
|
||||
uint64_t number_of_leaves = pow(tree_arity, tree_height);
|
||||
uint64_t total_number_of_leaves = number_of_leaves * input_block_len;
|
||||
|
||||
bool are_inputs_on_device = true;
|
||||
|
||||
// Load poseidon constants
|
||||
START_TIMER(timer_const);
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
poseidon2::Poseidon2<scalar_t> poseidon(
|
||||
width, rate, poseidon2::MdsType::PLONKY, poseidon2::DiffusionStrategy::MONTGOMERY, ctx);
|
||||
END_TIMER(timer_const, "Load poseidon constants");
|
||||
|
||||
/// Use keep_rows to specify how many rows do you want to store
|
||||
int keep_rows = argc > 2 ? atoi(argv[2]) : 3;
|
||||
size_t digests_len = merkle_tree::get_digests_len(keep_rows - 1, tree_arity, digest_elements);
|
||||
|
||||
/// Fill leaves with scalars [0, 1, ... 2^tree_height - 1]
|
||||
START_TIMER(timer_allocation);
|
||||
scalar_t input = scalar_t::zero();
|
||||
|
||||
// unsigned int number_of_inputs = tree_height * copied_matrices;
|
||||
unsigned int number_of_inputs = 1;
|
||||
Matrix<scalar_t>* leaves = static_cast<Matrix<scalar_t>*>(malloc(number_of_inputs * sizeof(Matrix<scalar_t>)));
|
||||
uint64_t current_matrix_rows = number_of_leaves;
|
||||
for (int i = 0; i < number_of_inputs; i++) {
|
||||
uint64_t current_matrix_size = current_matrix_rows * input_block_len;
|
||||
for (int j = 0; j < copied_matrices; j++) {
|
||||
scalar_t* matrix = static_cast<scalar_t*>(malloc(current_matrix_size * sizeof(scalar_t)));
|
||||
|
||||
for (uint64_t k = 0; k < current_matrix_size; k++) {
|
||||
matrix[k] = input;
|
||||
input = input + scalar_t::one();
|
||||
}
|
||||
|
||||
scalar_t* d_matrix;
|
||||
if (are_inputs_on_device) {
|
||||
cudaMalloc(&d_matrix, current_matrix_size * sizeof(scalar_t));
|
||||
cudaMemcpy(d_matrix, matrix, current_matrix_size * sizeof(scalar_t), cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
leaves[i * copied_matrices + j] = {
|
||||
are_inputs_on_device ? d_matrix : matrix,
|
||||
input_block_len,
|
||||
current_matrix_rows,
|
||||
};
|
||||
}
|
||||
|
||||
current_matrix_rows /= tree_arity;
|
||||
}
|
||||
|
||||
END_TIMER(timer_allocation, "Allocated memory for leaves: ");
|
||||
|
||||
/// Allocate memory for digests of {keep_rows} rows of a tree
|
||||
START_TIMER(timer_digests);
|
||||
size_t digests_mem = digests_len * sizeof(scalar_t);
|
||||
scalar_t* digests = static_cast<scalar_t*>(malloc(digests_mem));
|
||||
END_TIMER(timer_digests, "Allocated memory for digests");
|
||||
|
||||
// std::cout << "Memory for leaves = " << total_number_of_leaves * sizeof(scalar_t) / 1024 / 1024 << " MB; " <<
|
||||
// leaves_mem / 1024 / 1024 / 1024 << " GB"
|
||||
// << std::endl;
|
||||
std::cout << "Number of leaves = " << number_of_leaves << std::endl;
|
||||
std::cout << "Total Number of leaves = " << total_number_of_leaves << std::endl;
|
||||
std::cout << "Memory for digests = " << digests_mem / 1024 / 1024 << " MB; " << digests_mem / 1024 / 1024 / 1024
|
||||
<< " GB" << std::endl;
|
||||
std::cout << "Number of digest elements = " << digests_len << std::endl;
|
||||
std::cout << std::endl;
|
||||
|
||||
// std::cout << "Total RAM consumption = " << (digests_mem + leaves_mem) / 1024 / 1024 << " MB; "
|
||||
// << (digests_mem + leaves_mem) / 1024 / 1024 / 1024 << " GB" << std::endl;
|
||||
|
||||
merkle_tree::TreeBuilderConfig tree_config = merkle_tree::default_merkle_config();
|
||||
tree_config.are_inputs_on_device = are_inputs_on_device;
|
||||
tree_config.arity = tree_arity;
|
||||
tree_config.keep_rows = keep_rows;
|
||||
tree_config.digest_elements = digest_elements;
|
||||
START_TIMER(timer_merkle);
|
||||
babybear_mmcs_commit_cuda(leaves, number_of_inputs, digests, &poseidon, &poseidon, tree_config);
|
||||
END_TIMER(timer_merkle, "Merkle tree built: ")
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
std::cout << digests[digests_len - i - 1] << std::endl;
|
||||
}
|
||||
|
||||
// Use this to generate test vectors
|
||||
// for (int i = 0; i < digests_len; i++) {
|
||||
// std::cout << "{";
|
||||
// for (int j = 0; j < 8; j++) {
|
||||
// std::cout << ((uint64_t*)&digests[i].limbs_storage)[j];
|
||||
// if (j != 7) { std::cout << ", "; }
|
||||
// }
|
||||
// std::cout << "}," << std::endl;
|
||||
// }
|
||||
|
||||
/// These scalars are digests of top-7 rows of a Merkle tree.
|
||||
/// Arity = 2, Tree height = 28, keep_rows = 7
|
||||
/// They are aligned in the following format:
|
||||
/// L-7 L-6 L-5 L-4 L-3 L-2 L-1
|
||||
/// [0..63, 64..95, 96..111, 112..119, 120..123, 124..125, 126]
|
||||
scalar_t expected[0] = {};
|
||||
|
||||
for (int i = 0; i < digests_len; i++) {
|
||||
scalar_t root = digests[i];
|
||||
// assert(root == expected[i]);
|
||||
}
|
||||
free(digests);
|
||||
free(leaves);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,44 +0,0 @@
|
||||
build_test:
|
||||
mkdir -p work
|
||||
nvcc -o work/test -std=c++17 -arch=sm_80 -I. -I../../include test.cu
|
||||
|
||||
run_test:
|
||||
mkdir -p work
|
||||
nvcc -o work/test -std=c++17 -arch=sm_80 -I. -I../../include test.cu
|
||||
work/test
|
||||
|
||||
|
||||
build_perf:
|
||||
mkdir -p work
|
||||
nvcc -lineinfo -o work/perf -std=c++17 -arch=sm_80 -I. -I../../include perf_test.cu
|
||||
|
||||
run_perf:
|
||||
make build_perf
|
||||
work/perf
|
||||
|
||||
|
||||
build_mem:
|
||||
mkdir -p work
|
||||
nvcc -lineinfo -o work/mem -std=c++17 -arch=sm_80 -I. -I../../include memory_test.cu
|
||||
|
||||
run_mem:
|
||||
make build_mem
|
||||
work/mem
|
||||
|
||||
|
||||
build_transpose:
|
||||
mkdir -p work
|
||||
nvcc -lineinfo -o work/transpose -std=c++17 -arch=sm_80 -I. -I../../include transpose_test.cu
|
||||
|
||||
run_transpose:
|
||||
make build_transpose
|
||||
work/transpose
|
||||
|
||||
|
||||
build_compute:
|
||||
mkdir -p work
|
||||
nvcc -lineinfo -o work/compute -std=c++17 -arch=sm_80 -I. -I../../include compute_test.cu
|
||||
|
||||
run_compute:
|
||||
make build_compute
|
||||
work/compute
|
||||
@@ -1,130 +0,0 @@
|
||||
#include "fields/id.h"
|
||||
// #define FIELD_ID 1001
|
||||
#define CURVE_ID 3
|
||||
#include "curves/curve_config.cuh"
|
||||
// #include "fields/field_config.cuh"
|
||||
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <cub/device/device_radix_sort.cuh>
|
||||
|
||||
#include "fields/field.cuh"
|
||||
#include "curves/projective.cuh"
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
|
||||
#include "kernels.cu"
|
||||
|
||||
class Dummy_Scalar
|
||||
{
|
||||
public:
|
||||
static constexpr unsigned NBITS = 32;
|
||||
|
||||
unsigned x;
|
||||
unsigned p = 10;
|
||||
// unsigned p = 1<<30;
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar zero() { return {0}; }
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar one() { return {1}; }
|
||||
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Scalar& scalar)
|
||||
{
|
||||
os << scalar.x;
|
||||
return os;
|
||||
}
|
||||
|
||||
HOST_DEVICE_INLINE unsigned get_scalar_digit(unsigned digit_num, unsigned digit_width) const
|
||||
{
|
||||
return (x >> (digit_num * digit_width)) & ((1 << digit_width) - 1);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Dummy_Scalar operator+(Dummy_Scalar p1, const Dummy_Scalar& p2)
|
||||
{
|
||||
return {(p1.x + p2.x) % p1.p};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const Dummy_Scalar& p2) { return (p1.x == p2.x); }
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const unsigned p2) { return (p1.x == p2); }
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar& scalar) { return {scalar.p - scalar.x}; }
|
||||
static HOST_INLINE Dummy_Scalar rand_host()
|
||||
{
|
||||
return {(unsigned)rand() % 10};
|
||||
// return {(unsigned)rand()};
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// typedef field_config::scalar_t test_scalar;
|
||||
typedef curve_config::scalar_t test_scalar;
|
||||
typedef curve_config::projective_t test_projective;
|
||||
typedef curve_config::affine_t test_affine;
|
||||
|
||||
// typedef uint32_t test_t;
|
||||
// typedef int4 test_t;
|
||||
// typedef Dummy_Scalar test_t;
|
||||
typedef test_projective test_t;
|
||||
// typedef test_scalar test_t;
|
||||
|
||||
#define REPS 8
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
cudaEvent_t start, stop;
|
||||
float kernel_time;
|
||||
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
|
||||
int N = 1<<22;
|
||||
// int N = 1<<25;
|
||||
|
||||
test_t* arr1_h = new test_t[N];
|
||||
test_t* arr2_h = new test_t[N];
|
||||
|
||||
test_t *arr1_d, *arr2_d;
|
||||
|
||||
cudaMalloc(&arr1_d, N*sizeof(test_t));
|
||||
cudaMalloc(&arr2_d, N*sizeof(test_t));
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
arr1_h[i] = i > 100? arr1_h[i-100] : test_t::rand_host();
|
||||
// arr1_h[i] = i > 100? arr1_h[i-100] : rand();
|
||||
}
|
||||
|
||||
cudaMemcpy(arr1_d, arr1_h, sizeof(test_t) * N, cudaMemcpyHostToDevice);
|
||||
|
||||
int THREADS = 128;
|
||||
int BLOCKS = (N + THREADS - 1)/THREADS;
|
||||
|
||||
//warm up
|
||||
add_many_times<test_t,16><<<BLOCKS, THREADS>>>(arr1_d, arr2_d, N);
|
||||
// multi_mult<test_t,8><<<BLOCKS, THREADS>>>(arr1_d, arr2_d, N);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "cuda err: " << cudaGetErrorString(cudaGetLastError()) << std::endl;
|
||||
|
||||
cudaEventRecord(start, 0);
|
||||
|
||||
// add_many_times<test_t,REPS><<<BLOCKS, THREADS>>>(arr1_d, arr2_d, N);
|
||||
// multi_add<test_t,REPS><<<BLOCKS, THREADS>>>(arr1_d, arr2_d, N);
|
||||
// limb_mult_bench<REPS><<<BLOCKS, THREADS>>>(arr1_d, arr2_d, N);
|
||||
segment_sum<test_t,REPS><<<BLOCKS, THREADS>>>(arr1_d, N);
|
||||
// shmem_segment_sum<test_t,REPS><<<BLOCKS, THREADS>>>(arr1_d, N);
|
||||
// multi_mult<test_t,REPS><<<BLOCKS, THREADS>>>(arr1_d, arr2_d, N);
|
||||
// multi_ntt8<<<BLOCKS, THREADS>>>(arr1_d, arr2_d, N);
|
||||
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "cuda err: " << cudaGetErrorString(cudaGetLastError()) << std::endl;
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaStreamSynchronize(0);
|
||||
cudaEventElapsedTime(&kernel_time, start, stop);
|
||||
printf("kernel_time : %.3f ms.\n", kernel_time);
|
||||
// printf("normalized kernel_time : %.3f ms.\n", kernel_time/REPS);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,457 +0,0 @@
|
||||
|
||||
template <class T>
|
||||
__global__ void add_elements_kernel(const T* x, const T* y, T* result, const unsigned count)
|
||||
{
|
||||
const unsigned tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (tid >= count) return;
|
||||
result[tid] = x[tid] + y[tid];
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void fake_ntt_kernel(const T* x, T* result, const unsigned thread_count)
|
||||
{
|
||||
extern __shared__ T shmem[];
|
||||
const unsigned tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (tid >= thread_count) return;
|
||||
shmem[4*threadIdx.x] = x[4*tid] + x[4*tid+1];
|
||||
shmem[4*threadIdx.x+1] = x[4*tid] + T::neg(x[4*tid+1]);
|
||||
shmem[4*threadIdx.x+2] = x[4*tid+2] + x[4*tid+3];
|
||||
shmem[4*threadIdx.x+3] = x[4*tid+2] + T::neg(x[4*tid+3]);
|
||||
__syncthreads();
|
||||
result[4*tid] = shmem[2*threadIdx.x] + shmem[2*threadIdx.x + 4*blockDim.x/2];
|
||||
result[4*tid+1] = shmem[2*threadIdx.x] + T::neg(shmem[2*threadIdx.x + 4*blockDim.x/2]);
|
||||
result[4*tid+2] = shmem[2*threadIdx.x+1] + shmem[2*threadIdx.x + 4*blockDim.x/2+1];
|
||||
result[4*tid+3] = shmem[2*threadIdx.x+1] + T::neg(shmem[2*threadIdx.x + 4*blockDim.x/2+1]);
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
__global__ void bugged_add_elements_kernel(const T* x, const T* y, T* result, const unsigned count)
|
||||
{
|
||||
const unsigned tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
// if (tid >= count) return;
|
||||
// printf("tid %d\n", tid);
|
||||
result[tid] = x[tid] + y[tid];
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void bugged_fake_ntt_kernel(const T* x, T* result, const unsigned thread_count)
|
||||
{
|
||||
extern __shared__ T shmem[];
|
||||
const unsigned tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
// if (tid >= thread_count) return;
|
||||
// if (tid == 0){
|
||||
// for (int i = 0; i < 8; i++)
|
||||
// {
|
||||
// shmem[i]=T::zero();
|
||||
// }
|
||||
// }
|
||||
|
||||
shmem[4*threadIdx.x] = x[4*tid] + x[4*tid+1];
|
||||
shmem[4*threadIdx.x+1] = x[4*tid] + T::neg(x[4*tid+1]);
|
||||
shmem[4*threadIdx.x+2] = x[4*tid+2] + x[4*tid+1];
|
||||
shmem[4*threadIdx.x+4] = x[4*tid+2] + T::neg(x[4*tid+1]);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
// if (tid == 0){
|
||||
// for (int i = 0; i < 8; i++)
|
||||
// {
|
||||
// printf("%d ",shmem[i]);
|
||||
// }
|
||||
// printf("\n");
|
||||
// }
|
||||
|
||||
// printf("tid: %d, addr1: %d, addr2: %d\n", tid, 2*threadIdx.x, 2*threadIdx.x + 4*blockDim.x);
|
||||
result[4*tid] = shmem[2*threadIdx.x] + shmem[2*threadIdx.x + 4*blockDim.x]; // Incorrect offset
|
||||
result[4*tid+1] = shmem[2*threadIdx.x] + T::neg(shmem[2*threadIdx.x + 4*blockDim.x]); // Incorrect offset
|
||||
result[4*tid+2] = shmem[2*threadIdx.x+1] + shmem[2*threadIdx.x + 4*blockDim.x+1]; // Incorrect offset
|
||||
result[4*tid+3] = shmem[2*threadIdx.x+1] + T::neg(shmem[2*threadIdx.x +4*blockDim.x+1]); // Incorrect offset
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void bucket_acc_naive(T* buckets, unsigned* indices, unsigned* sizes, unsigned nof_buckets){
|
||||
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (tid >= nof_buckets) return;
|
||||
for (int i = 0; i < sizes[tid]; i++)
|
||||
{
|
||||
buckets[indices[tid]] = buckets[indices[tid]] + buckets[indices[tid]];
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void bucket_acc_memory_baseline(T* buckets1, T* buckets2, unsigned* indices, unsigned nof_buckets){
|
||||
int tid = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
if (tid >= nof_buckets) return;
|
||||
buckets2[indices[tid]] = buckets1[indices[tid]];
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void bucket_acc_compute_baseline(T* buckets, unsigned* indices, unsigned* sizes, unsigned nof_buckets){
|
||||
int tid = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
if (tid >= nof_buckets) return;
|
||||
T bucket = buckets[indices[tid]];
|
||||
for (int j = 0; j < 100; j++)
|
||||
{
|
||||
for (int i = 0; i < sizes[tid]; i++)
|
||||
{
|
||||
bucket = bucket + bucket;
|
||||
}
|
||||
}
|
||||
buckets[indices[tid]] = bucket;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void bucket_acc_reg(T* buckets, unsigned* indices, unsigned* sizes, unsigned nof_buckets){
|
||||
int tid = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
if (tid >= nof_buckets) return;
|
||||
T bucket = buckets[indices[tid]];
|
||||
for (int i = 0; i < sizes[tid]; i++)
|
||||
{
|
||||
bucket = bucket + bucket;
|
||||
}
|
||||
buckets[indices[tid]] = bucket;
|
||||
}
|
||||
|
||||
|
||||
// #define NOF_TH 32*64
|
||||
|
||||
|
||||
template <class T, int SIZE_T>
|
||||
__global__ void device_memory_copy(void* arr1_raw, void* arr2_raw, unsigned size){
|
||||
int tid = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
if (tid >= size/SIZE_T) return;
|
||||
T* arr1=(T*)arr1_raw;
|
||||
T* arr2=(T*)arr2_raw;
|
||||
arr2[tid] = arr1[tid];
|
||||
}
|
||||
|
||||
template <class T, int SIZE_T>
|
||||
__global__ void segmented_memory_copy(void* arr1_raw, void* arr2_raw, unsigned size, unsigned read_segment_length, unsigned nof_write_segments){
|
||||
int tid = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
int nof_elements = size/SIZE_T;
|
||||
int write_segment_length = nof_elements / nof_write_segments;
|
||||
int r_segment_idx = tid / read_segment_length;
|
||||
int r_segment_tid = tid % read_segment_length;
|
||||
int w_segment_idx = r_segment_idx % nof_write_segments;
|
||||
int w_segment_tid = r_segment_idx / nof_write_segments;
|
||||
int addr = w_segment_idx * write_segment_length + w_segment_tid * read_segment_length + r_segment_tid;
|
||||
// if (tid < 50) printf("tid %d, addr %d\n", tid, addr);
|
||||
if (tid >= nof_elements) return;
|
||||
T* arr1=(T*)arr1_raw;
|
||||
T* arr2=(T*)arr2_raw;
|
||||
arr2[addr] = arr1[addr];
|
||||
}
|
||||
|
||||
|
||||
template <class T, int SIZE_T>
|
||||
__global__ void multi_memory_copy1(void* arr1_raw, void* arr2_raw, unsigned size, unsigned nof_elements_per_thread){
|
||||
int tid = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
int nof_elements = size/SIZE_T;
|
||||
int segment_length = nof_elements / nof_elements_per_thread;
|
||||
if (tid >= segment_length) return;
|
||||
T* arr1=(T*)arr1_raw;
|
||||
T* arr2=(T*)arr2_raw;
|
||||
for (int i = 0; i < nof_elements_per_thread; i++)
|
||||
{
|
||||
arr2[tid + i*segment_length] = arr1[tid + i*segment_length];
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, int SIZE_T>
|
||||
__global__ void multi_memory_copy2(void* arr1_raw, void* arr2_raw, unsigned size, unsigned nof_elements_per_thread){
|
||||
int tid = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
int nof_elements = size/SIZE_T;
|
||||
int nof_threads = nof_elements / nof_elements_per_thread;
|
||||
if (tid >= nof_threads) return;
|
||||
T* arr1=(T*)arr1_raw;
|
||||
T* arr2=(T*)arr2_raw;
|
||||
for (int i = 0; i < nof_elements_per_thread; i++)
|
||||
{
|
||||
arr2[tid*nof_elements_per_thread + i] = arr1[tid*nof_elements_per_thread + i];
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void simple_memory_copy(T* in, T* out, unsigned size){
|
||||
int tid = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
if (tid >= size) return;
|
||||
out[tid] = in[tid];
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void naive_transpose_write(T *in, T *out, int row_length){
|
||||
int tid = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
if (tid >= row_length * row_length) return;
|
||||
int row_id = tid / row_length;
|
||||
int col_id = tid % row_length;
|
||||
out[col_id * row_length + row_id] = in[tid];
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void naive_transpose_read(T *in, T *out, int row_length){
|
||||
int tid = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
if (tid >= row_length * row_length) return;
|
||||
int row_id = tid / row_length;
|
||||
int col_id = tid % row_length;
|
||||
out[tid] = in[col_id * row_length + row_id];
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
__global__ void shmem_transpose(T *in, T *out, int row_length){
|
||||
__shared__ T shmem[16][16];
|
||||
int tid = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
if (tid >= row_length * row_length) return;
|
||||
int shmem_col_id = threadIdx.x / 16;
|
||||
int shmem_row_id = threadIdx.x % 16;
|
||||
int blocks_per_row = row_length / 16;
|
||||
int block_row_id = blockIdx.x / blocks_per_row;
|
||||
int block_col_id = blockIdx.x % blocks_per_row;
|
||||
// shmem[shmem_col_id][shmem_row_id] = in[block_row_id*row_length*16 + block_col_id*16 + shmem_col_id*row_length + shmem_row_id];
|
||||
shmem[shmem_row_id][shmem_col_id] = in[block_row_id*row_length*16 + block_col_id*16 + shmem_col_id*row_length + shmem_row_id];
|
||||
__syncthreads();
|
||||
// out[block_col_id*row_length*16 + block_row_id*16 + shmem_col_id*row_length + shmem_row_id] = shmem[shmem_row_id][shmem_col_id];
|
||||
out[block_col_id*row_length*16 + block_row_id*16 + shmem_col_id*row_length + shmem_row_id] = shmem[shmem_col_id][shmem_row_id];
|
||||
}
|
||||
|
||||
template <class T, int REPS>
|
||||
__global__ void add_many_times(T *in, T *out, int size){
|
||||
int tid = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
if (tid >= size) return;
|
||||
T temp;
|
||||
#pragma unroll
|
||||
for (int i = 0; i < REPS; i++)
|
||||
{
|
||||
temp = i? temp + temp : in[tid];
|
||||
}
|
||||
out[tid] = temp;
|
||||
}
|
||||
|
||||
|
||||
template <class T, int REPS>
|
||||
__global__ void multi_add(T *in, T *out, int size){
|
||||
int tid = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
int segment_length = size / REPS;
|
||||
if (tid >= segment_length) return;
|
||||
// #pragma unroll 1
|
||||
for (int i = 0; i < REPS; i++)
|
||||
{
|
||||
out[tid + i*segment_length] = in[tid + i*segment_length] + in[tid + i*segment_length];
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, int SEG_SIZE>
|
||||
__global__ void segment_sum(T *inout, int size){
|
||||
int tid = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
int nof_segments = size / SEG_SIZE;
|
||||
if (tid >= nof_segments) return;
|
||||
T sum = T::zero();
|
||||
T sums_sum = T::zero();
|
||||
for (int i = 0; i < SEG_SIZE; i++)
|
||||
{
|
||||
sums_sum = sums_sum + sum;
|
||||
sum = sum + inout[tid * SEG_SIZE + i];
|
||||
}
|
||||
inout[tid * SEG_SIZE] = sums_sum;
|
||||
// inout[tid * SEG_SIZE] = sum;
|
||||
}
|
||||
|
||||
template <class T, int SEG_SIZE>
|
||||
__global__ void shmem_segment_sum(T *inout, int size){
|
||||
int tid = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
int nof_segments = size / SEG_SIZE;
|
||||
if (tid >= nof_segments) return;
|
||||
__shared__ T shmem[128*2];
|
||||
// T sum = T::zero();
|
||||
// T sums_sum = T::zero();
|
||||
shmem[2*threadIdx.x] = T::zero(); //sum
|
||||
shmem[2*threadIdx.x + 1] = T::zero(); //sums_sum
|
||||
for (int i = 0; i < SEG_SIZE; i++)
|
||||
{
|
||||
{T sum = shmem[2*threadIdx.x];
|
||||
T sums_sum = shmem[2*threadIdx.x + 1];
|
||||
shmem[2*threadIdx.x + 1] = sums_sum + sum;}
|
||||
// {T sum = shmem[2*(127-threadIdx.x)];
|
||||
// T sums_sum = shmem[2*(127-threadIdx.x) + 1];
|
||||
// shmem[2*(127-threadIdx.x) + 1] = sums_sum + sum;}
|
||||
// shmem[2*(127-threadIdx.x) + 1] = shmem[2*(127-threadIdx.x) + 1] + shmem[2*(127-threadIdx.x)];
|
||||
// shmem[2*threadIdx.x + 1] = shmem[2*threadIdx.x + 1] + shmem[2*threadIdx.x];
|
||||
// __syncthreads();
|
||||
{T sum = shmem[2*threadIdx.x];
|
||||
T sums_sum = inout[tid * SEG_SIZE + i];
|
||||
shmem[2*threadIdx.x] = sum + sums_sum;}
|
||||
// shmem[2*threadIdx.x] = shmem[2*threadIdx.x] + inout[tid * SEG_SIZE + i];
|
||||
// __syncthreads();
|
||||
}
|
||||
inout[tid * SEG_SIZE] = shmem[2*threadIdx.x + 1];
|
||||
// inout[tid * SEG_SIZE] = sum;
|
||||
}
|
||||
|
||||
template <class T, int REPS>
|
||||
__global__ void multi_mult(T *in, T *out, int size){
|
||||
int tid = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
int segment_length = size / REPS;
|
||||
if (tid >= segment_length) return;
|
||||
#pragma unroll 1
|
||||
for (int i = 0; i < REPS; i++)
|
||||
{
|
||||
out[tid + i*segment_length] = in[tid + i*segment_length] * in[tid + i*segment_length];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <class E>
|
||||
DEVICE_INLINE void ntt8opt(E& X0, E& X1, E& X2, E& X3, E& X4, E& X5, E& X6, E& X7)
|
||||
{
|
||||
E T;
|
||||
|
||||
T = X3 - X7;
|
||||
X7 = X3 + X7;
|
||||
X3 = X1 - X5;
|
||||
X5 = X1 + X5;
|
||||
X1 = X2 + X6;
|
||||
X2 = X2 - X6;
|
||||
X6 = X0 + X4;
|
||||
X0 = X0 - X4;
|
||||
|
||||
X4 = X6 + X1;
|
||||
X6 = X6 - X1;
|
||||
X1 = X3 + T;
|
||||
X3 = X3 - T;
|
||||
T = X5 + X7;
|
||||
X5 = X5 - X7;
|
||||
X7 = X0 + X2;
|
||||
X0 = X0 - X2;
|
||||
|
||||
X2 = X6 + X5;
|
||||
X6 = X6 - X5;
|
||||
X5 = X7 - X1;
|
||||
X1 = X7 + X1;
|
||||
X7 = X0 - X3;
|
||||
X3 = X0 + X3;
|
||||
X0 = X4 + T;
|
||||
X4 = X4 - T;
|
||||
}
|
||||
|
||||
|
||||
template <class E>
|
||||
DEVICE_INLINE void ntt8(E& X0, E& X1, E& X2, E& X3, E& X4, E& X5, E& X6, E& X7)
|
||||
{
|
||||
E Y0,Y1,Y2,Y3,Y4,Y5,Y6,Y7;
|
||||
|
||||
Y0 = X0 + X4;
|
||||
Y1 = X0 - X4;
|
||||
Y2 = X1 - X5;
|
||||
Y3 = X1 + X5;
|
||||
Y4 = X2 + X6;
|
||||
Y5 = X2 - X6;
|
||||
Y6 = X3 - X7;
|
||||
Y7 = X3 + X7;
|
||||
|
||||
X0 = Y0 + Y2;
|
||||
X1 = Y0 - Y2;
|
||||
X2 = Y1 - Y3;
|
||||
X3 = Y1 + Y3;
|
||||
X4 = Y4 + Y6;
|
||||
X5 = Y4 - Y6;
|
||||
X6 = Y5 - Y7;
|
||||
X7 = Y5 + Y7;
|
||||
|
||||
Y0 = X0 + X1;
|
||||
Y1 = X0 - X1;
|
||||
Y2 = X2 - X3;
|
||||
Y3 = X2 + X3;
|
||||
Y4 = X4 + X5;
|
||||
Y5 = X4 - X5;
|
||||
Y6 = X6 - X7;
|
||||
Y7 = X6 + X7;
|
||||
|
||||
X0 = Y0;
|
||||
X1 = Y1;
|
||||
X2 = Y2;
|
||||
X3 = Y3;
|
||||
X4 = Y4;
|
||||
X5 = Y5;
|
||||
X6 = Y6;
|
||||
X7 = Y7;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template <class T>
|
||||
__global__ void multi_ntt8(T *in, T *out, int size){
|
||||
int tid = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
int segment_length = size / 8;
|
||||
if (tid >= segment_length) return;
|
||||
T X[8];
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
X[i] = in[tid + i*segment_length];
|
||||
}
|
||||
// ntt8(X[0],X[1],X[2],X[3],X[4],X[5],X[6],X[7]);
|
||||
ntt8opt(X[0],X[1],X[2],X[3],X[4],X[5],X[6],X[7]);
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
out[tid + i*segment_length] = X[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__device__ void mul_naive(uint32_t *a, uint32_t *b, uint32_t *r){
|
||||
__align__(8) uint32_t odd[2];
|
||||
r[0] = ptx::mul_lo(a[0], b[0]);
|
||||
r[1] = ptx::mul_hi(a[0], b[0]);
|
||||
r[1] = ptx::mad_lo(a[0], b[1], r[1]);
|
||||
r[1] = ptx::mad_lo(a[1], b[0], r[1]);
|
||||
r[2] = ptx::mul_lo(a[1], b[1]);
|
||||
r[2] = ptx::mad_hi(a[1], b[0], r[2]);
|
||||
r[2] = ptx::mad_hi(a[0], b[1], r[2]);
|
||||
r[3] = ptx::mul_hi(a[1], b[1]);
|
||||
|
||||
r[0] = ptx::add_cc(r[0], r[1]);
|
||||
r[1] = ptx::add_cc(r[2], r[3]);
|
||||
}
|
||||
|
||||
__device__ void mul_icicle(uint32_t *a, uint32_t *b, uint32_t *r){
|
||||
__align__(8) uint32_t odd[2];
|
||||
r[0] = ptx::mul_lo(a[0], b[0]);
|
||||
r[1] = ptx::mul_hi(a[0], b[0]);
|
||||
r[2] = ptx::mul_lo(a[1], b[1]);
|
||||
r[3] = ptx::mul_hi(a[1], b[1]);
|
||||
odd[0] = ptx::mul_lo(a[0], b[1]);
|
||||
odd[1] = ptx::mul_hi(a[0], b[1]);
|
||||
odd[0] = ptx::mad_lo(a[1], b[0], odd[0]);
|
||||
odd[1] = ptx::mad_hi(a[1], b[0], odd[1]);
|
||||
r[1] = ptx::add_cc(r[1], odd[0]);
|
||||
r[2] = ptx::addc_cc(r[2], odd[1]);
|
||||
r[3] = ptx::addc(r[3], 0);
|
||||
|
||||
r[0] = ptx::add_cc(r[0], r[1]);
|
||||
r[1] = ptx::add_cc(r[2], r[3]);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template <int REPS>
|
||||
__global__ void limb_mult_bench(uint32_t *in, uint32_t *out, int size){
|
||||
int tid = blockDim.x*blockIdx.x + threadIdx.x;
|
||||
if (tid >= size/2) return;
|
||||
uint32_t res[4];
|
||||
res[0] = in[tid];
|
||||
res[1] = in[tid + size/2];
|
||||
// typename T::Wide temp;
|
||||
for (int i = 0; i < REPS; i++)
|
||||
{
|
||||
mul_naive(res, res, res);
|
||||
// mul_icicle(res, res, res);
|
||||
// T::multiply_raw_device(res.limbs_storage, res.limbs_storage, res.limbs_storage);
|
||||
// temp = T::mul_wide(res, res);
|
||||
}
|
||||
// out[tid] = T::reduce(temp);
|
||||
out[tid] = res[0];
|
||||
out[tid + size/2] = res[1];
|
||||
}
|
||||
@@ -1,114 +0,0 @@
|
||||
#include "fields/id.h"
|
||||
// #define FIELD_ID 1
|
||||
#define CURVE_ID 3
|
||||
#include "curves/curve_config.cuh"
|
||||
// #include "fields/field_config.cuh"
|
||||
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <cub/device/device_radix_sort.cuh>
|
||||
|
||||
#include "fields/field.cuh"
|
||||
#include "curves/projective.cuh"
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
|
||||
#include "kernels.cu"
|
||||
|
||||
class Dummy_Scalar
|
||||
{
|
||||
public:
|
||||
static constexpr unsigned NBITS = 32;
|
||||
|
||||
unsigned x;
|
||||
unsigned p = 10;
|
||||
// unsigned p = 1<<30;
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar zero() { return {0}; }
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar one() { return {1}; }
|
||||
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Scalar& scalar)
|
||||
{
|
||||
os << scalar.x;
|
||||
return os;
|
||||
}
|
||||
|
||||
HOST_DEVICE_INLINE unsigned get_scalar_digit(unsigned digit_num, unsigned digit_width) const
|
||||
{
|
||||
return (x >> (digit_num * digit_width)) & ((1 << digit_width) - 1);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Dummy_Scalar operator+(Dummy_Scalar p1, const Dummy_Scalar& p2)
|
||||
{
|
||||
return {(p1.x + p2.x) % p1.p};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const Dummy_Scalar& p2) { return (p1.x == p2.x); }
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const unsigned p2) { return (p1.x == p2); }
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar& scalar) { return {scalar.p - scalar.x}; }
|
||||
static HOST_INLINE Dummy_Scalar rand_host()
|
||||
{
|
||||
return {(unsigned)rand() % 10};
|
||||
// return {(unsigned)rand()};
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// typedef field_config::scalar_t test_scalar;
|
||||
typedef curve_config::scalar_t test_scalar;
|
||||
typedef curve_config::projective_t test_projective;
|
||||
typedef curve_config::affine_t test_affine;
|
||||
|
||||
typedef int test_t;
|
||||
// typedef int4 test_t;
|
||||
// typedef Dummy_Scalar test_t;
|
||||
// typedef test_projective test_t;
|
||||
// typedef test_scalar test_t;
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
cudaEvent_t start, stop;
|
||||
float kernel_time;
|
||||
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
|
||||
int N = 1<<25;
|
||||
|
||||
void *arr1, *arr2;
|
||||
|
||||
cudaMalloc(&arr1, N);
|
||||
cudaMalloc(&arr2, N);
|
||||
|
||||
int THREADS = 256;
|
||||
int BLOCKS = (N/sizeof(test_t) + THREADS - 1)/THREADS;
|
||||
|
||||
//warm up
|
||||
device_memory_copy<test_t, sizeof(test_t)><<<BLOCKS, THREADS>>>(arr1, arr2, N);
|
||||
segmented_memory_copy<test_t, sizeof(test_t)><<<BLOCKS, THREADS>>>(arr1, arr2, N, 32, 1024);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "cuda err: " << cudaGetErrorString(cudaGetLastError()) << std::endl;
|
||||
|
||||
cudaEventRecord(start, 0);
|
||||
|
||||
device_memory_copy<test_t, sizeof(test_t)><<<BLOCKS, THREADS>>>(arr1, arr2, N);
|
||||
// segmented_memory_copy<test_t, sizeof(test_t)><<<BLOCKS, THREADS>>>(arr1, arr2, N, 2, 1024);
|
||||
// int elements_per_thread = 8;
|
||||
// BLOCKS = (N/sizeof(test_t)/elements_per_thread + THREADS - 1)/THREADS;
|
||||
// multi_memory_copy1<test_t, sizeof(test_t)><<<BLOCKS, THREADS>>>(arr1, arr2, N, elements_per_thread);
|
||||
// multi_memory_copy2<test_t, sizeof(test_t)><<<BLOCKS, THREADS>>>(arr1, arr2, N, elements_per_thread);
|
||||
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "cuda err: " << cudaGetErrorString(cudaGetLastError()) << std::endl;
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaStreamSynchronize(0);
|
||||
cudaEventElapsedTime(&kernel_time, start, stop);
|
||||
printf("kernel_time : %.3f ms.\n", kernel_time);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,199 +0,0 @@
|
||||
#include "fields/id.h"
|
||||
// #define FIELD_ID 1001
|
||||
#define CURVE_ID 3
|
||||
#include "curves/curve_config.cuh"
|
||||
// #include "fields/field_config.cuh"
|
||||
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <cub/device/device_radix_sort.cuh>
|
||||
|
||||
#include "fields/field.cuh"
|
||||
#include "curves/projective.cuh"
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
|
||||
#include "kernels.cu"
|
||||
|
||||
class Dummy_Scalar
|
||||
{
|
||||
public:
|
||||
static constexpr unsigned NBITS = 32;
|
||||
|
||||
unsigned x;
|
||||
unsigned p = 10;
|
||||
// unsigned p = 1<<30;
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar zero() { return {0}; }
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar one() { return {1}; }
|
||||
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Scalar& scalar)
|
||||
{
|
||||
os << scalar.x;
|
||||
return os;
|
||||
}
|
||||
|
||||
HOST_DEVICE_INLINE unsigned get_scalar_digit(unsigned digit_num, unsigned digit_width) const
|
||||
{
|
||||
return (x >> (digit_num * digit_width)) & ((1 << digit_width) - 1);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Dummy_Scalar operator+(Dummy_Scalar p1, const Dummy_Scalar& p2)
|
||||
{
|
||||
return {(p1.x + p2.x) % p1.p};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const Dummy_Scalar& p2) { return (p1.x == p2.x); }
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const unsigned p2) { return (p1.x == p2); }
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar& scalar) { return {scalar.p - scalar.x}; }
|
||||
static HOST_INLINE Dummy_Scalar rand_host()
|
||||
{
|
||||
return {(unsigned)rand() % 10};
|
||||
// return {(unsigned)rand()};
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// typedef field_config::scalar_t test_scalar;
|
||||
typedef curve_config::scalar_t test_scalar;
|
||||
typedef curve_config::projective_t test_projective;
|
||||
typedef curve_config::affine_t test_affine;
|
||||
|
||||
// typedef int test_t;
|
||||
// typedef int4 test_t;
|
||||
// typedef Dummy_Scalar test_t;
|
||||
// typedef test_projective test_t;
|
||||
typedef test_scalar test_t;
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
cudaEvent_t start, stop;
|
||||
float kernel_time;
|
||||
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
|
||||
int N = 1<<20;
|
||||
// int N = 1<<3;
|
||||
|
||||
test_t* buckets_h = new test_t[N];
|
||||
unsigned* indices_h = new unsigned[N];
|
||||
unsigned* sizes_h = new unsigned[N];
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
indices_h[i] = static_cast<unsigned>(i);
|
||||
sizes_h[i] = static_cast<unsigned>(std::rand())%20;
|
||||
// sizes_h[i] = 10;
|
||||
buckets_h[i] = i<100? test_t::rand_host() : buckets_h[i-100];
|
||||
// buckets_h[i] = i<100? rand() : buckets_h[i-100];
|
||||
// buckets_h[i].x = i<100? rand() : buckets_h[i-100].x;
|
||||
// buckets_h[i].y = i<100? rand() : buckets_h[i-100].y;
|
||||
// buckets_h[i].z = i<100? rand() : buckets_h[i-100].z;
|
||||
// buckets_h[i].w = i<100? rand() : buckets_h[i-100].w;
|
||||
// if (i<10) std::cout << indices_h[i] << " " << sizes_h[i] << " " << buckets_h[i] << std::endl;
|
||||
}
|
||||
|
||||
test_t *buckets_d, *buckets2_d;
|
||||
unsigned *sizes_d, *indices_d;
|
||||
|
||||
cudaMalloc(&buckets_d, sizeof(test_t) * N);
|
||||
cudaMalloc(&buckets2_d, sizeof(test_t) * N);
|
||||
cudaMalloc(&sizes_d, sizeof(unsigned) * N);
|
||||
cudaMalloc(&indices_d, sizeof(unsigned) * N);
|
||||
|
||||
cudaMemcpy(buckets_d, buckets_h, sizeof(test_t) * N, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(sizes_d, sizes_h, sizeof(unsigned) * N, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(indices_d, indices_h, sizeof(unsigned) * N, cudaMemcpyHostToDevice);
|
||||
|
||||
int THREADS = 256;
|
||||
int BLOCKS = (N + THREADS - 1)/THREADS;
|
||||
|
||||
//warm up
|
||||
bucket_acc_naive<<<BLOCKS, THREADS>>>(buckets_d, indices_d, sizes_d, N);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "cuda err: " << cudaGetErrorString(cudaGetLastError()) << std::endl;
|
||||
|
||||
cudaEventRecord(start, 0);
|
||||
|
||||
|
||||
// unsigned* sorted_sizes;
|
||||
// cudaMalloc(&sorted_sizes, sizeof(unsigned) * N);
|
||||
|
||||
// unsigned* sorted_indices;
|
||||
// cudaMalloc(&sorted_indices, sizeof(unsigned) * N);
|
||||
// unsigned* sort_indices_temp_storage{};
|
||||
// size_t sort_indices_temp_storage_bytes = 0;
|
||||
// cub::DeviceRadixSort::SortPairsDescending(
|
||||
// sort_indices_temp_storage, sort_indices_temp_storage_bytes, sizes_d,
|
||||
// sorted_sizes, indices_d, sorted_indices, N, 0);
|
||||
// cudaMalloc(&sort_indices_temp_storage, sort_indices_temp_storage_bytes);
|
||||
// cub::DeviceRadixSort::SortPairsDescending(
|
||||
// sort_indices_temp_storage, sort_indices_temp_storage_bytes, sizes_d,
|
||||
// sorted_sizes, indices_d, sorted_indices, N, 0);
|
||||
// cudaFree(sort_indices_temp_storage);
|
||||
|
||||
// test_t* sorted_buckets;
|
||||
// cudaMalloc(&sorted_buckets, sizeof(test_t) * N);
|
||||
// unsigned* sort_buckets_temp_storage{};
|
||||
// size_t sort_buckets_temp_storage_bytes = 0;
|
||||
// cub::DeviceRadixSort::SortPairsDescending(
|
||||
// sort_buckets_temp_storage, sort_buckets_temp_storage_bytes, sizes_d,
|
||||
// sorted_sizes, buckets_d, sorted_buckets, N, 0);
|
||||
// cudaMalloc(&sort_buckets_temp_storage, sort_buckets_temp_storage_bytes);
|
||||
// cub::DeviceRadixSort::SortPairsDescending(
|
||||
// sort_buckets_temp_storage, sort_buckets_temp_storage_bytes, sizes_d,
|
||||
// sorted_sizes, buckets_d, sorted_buckets, N, 0);
|
||||
// cudaFree(sort_buckets_temp_storage);
|
||||
|
||||
// cudaEventRecord(start, 0);
|
||||
|
||||
bucket_acc_naive<<<BLOCKS, THREADS>>>(buckets_d, indices_d, sizes_d, N);
|
||||
// bucket_acc_compute_baseline<<<BLOCKS, THREADS>>>(buckets_d, indices_d, sizes_d, N);
|
||||
// bucket_acc_memory_baseline<<<BLOCKS, THREADS>>>(buckets_d, buckets2_d, indices_d, N);
|
||||
// bucket_acc_reg<<<BLOCKS, THREADS>>>(buckets_d, indices_d, sizes_d, N);
|
||||
// bucket_acc_reg<<<BLOCKS, THREADS>>>(buckets_d, sorted_indices, sorted_sizes, N);
|
||||
// bucket_acc_reg<<<BLOCKS, THREADS>>>(sorted_buckets, indices_d, sorted_sizes, N);
|
||||
|
||||
// simple_memory_copy<<<64, 32>>>(buckets_d, buckets2_d, N);
|
||||
// simple_memory_copy<<<BLOCKS, THREADS>>>(buckets_d, buckets2_d, N);
|
||||
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "cuda err: " << cudaGetErrorString(cudaGetLastError()) << std::endl;
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaStreamSynchronize(0);
|
||||
cudaEventElapsedTime(&kernel_time, start, stop);
|
||||
printf("kernel_time : %.3f ms.\n", kernel_time);
|
||||
|
||||
cudaMemcpy(buckets_h, buckets_d, sizeof(test_t) * N, cudaMemcpyDeviceToHost);
|
||||
// cudaMemcpy(buckets_h, sorted_buckets, sizeof(test_t) * N, cudaMemcpyDeviceToHost);
|
||||
// cudaMemcpy(sizes_h, sorted_indices, sizeof(unsigned) * N, cudaMemcpyDeviceToHost);
|
||||
|
||||
// printf("res:\n");
|
||||
// for (size_t i = 0; i < 8; i++)
|
||||
// {
|
||||
// std::cout << buckets_h[i] << "\n";
|
||||
// // std::cout << sizes_h[i] << "\n";
|
||||
// }
|
||||
// printf("\n");
|
||||
// printf("C test: ");
|
||||
// for (size_t i = 0; i < 8; i++)
|
||||
// {
|
||||
// std::cout << Cb_h[i] << ", ";
|
||||
// }
|
||||
// printf("\n");
|
||||
// printf("C ref: ");
|
||||
// for (size_t i = 0; i < 8; i++)
|
||||
// {
|
||||
// std::cout << C_d[i] << ", ";
|
||||
// // std::cout << C_h[i] << ", ";
|
||||
// }
|
||||
// printf("\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,199 +0,0 @@
|
||||
#include "fields/id.h"
|
||||
// #define FIELD_ID 2
|
||||
#define CURVE_ID 3
|
||||
#include "curves/curve_config.cuh"
|
||||
// #include "fields/field_config.cuh"
|
||||
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include "fields/field.cuh"
|
||||
#include "curves/projective.cuh"
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
|
||||
#include "kernels.cu"
|
||||
|
||||
class Dummy_Scalar
|
||||
{
|
||||
public:
|
||||
static constexpr unsigned NBITS = 32;
|
||||
|
||||
unsigned x;
|
||||
unsigned p = 10;
|
||||
// unsigned p = 1<<30;
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar zero() { return {0}; }
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar one() { return {1}; }
|
||||
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Scalar& scalar)
|
||||
{
|
||||
os << scalar.x;
|
||||
return os;
|
||||
}
|
||||
|
||||
HOST_DEVICE_INLINE unsigned get_scalar_digit(unsigned digit_num, unsigned digit_width) const
|
||||
{
|
||||
return (x >> (digit_num * digit_width)) & ((1 << digit_width) - 1);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Dummy_Scalar operator+(Dummy_Scalar p1, const Dummy_Scalar& p2)
|
||||
{
|
||||
return {(p1.x + p2.x) % p1.p};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const Dummy_Scalar& p2) { return (p1.x == p2.x); }
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const unsigned p2) { return (p1.x == p2); }
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar& scalar) { return {scalar.p - scalar.x}; }
|
||||
static HOST_INLINE Dummy_Scalar rand_host()
|
||||
{
|
||||
return {(unsigned)rand() % 10};
|
||||
// return {(unsigned)rand()};
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
typedef curve_config::scalar_t test_scalar;
|
||||
typedef curve_config::projective_t test_projective;
|
||||
typedef curve_config::affine_t test_affine;
|
||||
|
||||
// typedef Dummy_Scalar test_t;
|
||||
// typedef test_projective test_t;
|
||||
typedef test_scalar test_t;
|
||||
|
||||
void queryGPUProperties() {
|
||||
int deviceCount = 0;
|
||||
cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
|
||||
|
||||
if (error_id != cudaSuccess) {
|
||||
std::cerr << "cudaGetDeviceCount returned " << static_cast<int>(error_id) << " -> " << cudaGetErrorString(error_id) << std::endl;
|
||||
std::cerr << "Result = FAIL" << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (deviceCount == 0) {
|
||||
std::cout << "There are no available device(s) that support CUDA." << std::endl;
|
||||
} else {
|
||||
std::cout << "Detected " << deviceCount << " CUDA Capable device(s)." << std::endl;
|
||||
}
|
||||
|
||||
for (int dev = 0; dev < deviceCount; ++dev) {
|
||||
cudaSetDevice(dev);
|
||||
|
||||
cudaDeviceProp deviceProp;
|
||||
cudaGetDeviceProperties(&deviceProp, dev);
|
||||
|
||||
std::cout << "Device " << dev << ": \"" << deviceProp.name << "\"" << std::endl;
|
||||
std::cout << " CUDA Capability Major/Minor version number: " << deviceProp.major << "." << deviceProp.minor << std::endl;
|
||||
std::cout << " Total amount of global memory: " << deviceProp.totalGlobalMem / (1024 * 1024) << " MB" << std::endl;
|
||||
std::cout << " Number of multiprocessors: " << deviceProp.multiProcessorCount << std::endl;
|
||||
std::cout << " Total amount of global memory: " << deviceProp.totalGlobalMem << " bytes" << std::endl;
|
||||
std::cout << " Total amount of shared memory per block: " << deviceProp.sharedMemPerBlock << " bytes" << std::endl;
|
||||
std::cout << " Total amount of shared memory per multiprocessor: " << deviceProp.sharedMemPerMultiprocessor << " bytes" << std::endl;
|
||||
std::cout << " Total number of registers available per block: " << deviceProp.regsPerBlock << std::endl;
|
||||
std::cout << " Total number of registers available per multiprocessor: " << deviceProp.regsPerMultiprocessor << std::endl;
|
||||
std::cout << " Warp size: " << deviceProp.warpSize << std::endl;
|
||||
std::cout << " Maximum number of threads per block: " << deviceProp.maxThreadsPerBlock << std::endl;
|
||||
std::cout << " Maximum number of threads per multiprocessor: " << deviceProp.maxThreadsPerMultiProcessor << std::endl;
|
||||
std::cout << " Maximum sizes of each dimension of a block: " << deviceProp.maxThreadsDim[0] << " x "
|
||||
<< deviceProp.maxThreadsDim[1] << " x " << deviceProp.maxThreadsDim[2] << std::endl;
|
||||
std::cout << " Maximum sizes of each dimension of a grid: " << deviceProp.maxGridSize[0] << " x "
|
||||
<< deviceProp.maxGridSize[1] << " x " << deviceProp.maxGridSize[2] << std::endl;
|
||||
std::cout << " Clock rate: " << deviceProp.clockRate / 1000 << " MHz" << std::endl;
|
||||
std::cout << " Memory clock rate: " << deviceProp.memoryClockRate / 1000 << " MHz" << std::endl;
|
||||
std::cout << " Memory bus width: " << deviceProp.memoryBusWidth << " bits" << std::endl;
|
||||
std::cout << " Peak memory bandwidth: "
|
||||
<< 2.0 * deviceProp.memoryClockRate * (deviceProp.memoryBusWidth / 8) / 1.0e6 << " GB/s" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
queryGPUProperties();
|
||||
|
||||
int N = 1<<20;
|
||||
// int N = 300;
|
||||
|
||||
test_t* A_h = new test_t[N];
|
||||
test_t* B_h = new test_t[N];
|
||||
test_t* C_h = new test_t[N];
|
||||
test_t* Cb_h = new test_t[N];
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
A_h[i] = i<100? test_t::rand_host() : A_h[i-100];
|
||||
B_h[i] = i<100? test_t::rand_host() : B_h[i-100];
|
||||
}
|
||||
|
||||
test_t *A_d,*B_d,*C_d;
|
||||
test_t *Cb_d;
|
||||
|
||||
|
||||
cudaMalloc(&A_d, sizeof(test_t) * N);
|
||||
cudaMalloc(&B_d, sizeof(test_t) * N);
|
||||
cudaMalloc(&C_d, sizeof(test_t) * N);
|
||||
cudaMalloc(&Cb_d, sizeof(test_t) * N);
|
||||
|
||||
cudaMemcpy(A_d, A_h, sizeof(test_t) * N, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(B_d, B_h, sizeof(test_t) * N, cudaMemcpyHostToDevice);
|
||||
|
||||
// int THREADS = 256;
|
||||
// int BLOCKS = (N + THREADS - 1)/THREADS;
|
||||
// add_elements_kernel<<<BLOCKS, THREADS>>>(A_d, B_d, C_d, N);
|
||||
// cudaDeviceSynchronize();
|
||||
// // printf("cuda error %d\n", cudaGetLastError());
|
||||
// std::cout << "cuda err: " << cudaGetErrorString(cudaGetLastError()) << std::endl;
|
||||
|
||||
// THREADS = 256;
|
||||
// BLOCKS = (N + THREADS - 1)/THREADS;
|
||||
// bugged_add_elements_kernel<<<BLOCKS, THREADS>>>(A_d, B_d, Cb_d, N);
|
||||
// cudaDeviceSynchronize();
|
||||
// // printf("cuda error %d\n", cudaGetLastError());
|
||||
// std::cout << "cuda err: " << cudaGetErrorString(cudaGetLastError()) << std::endl;
|
||||
|
||||
// int THREADS = 128;
|
||||
// int BLOCKS = (N/4 + THREADS - 1)/THREADS;
|
||||
// // fake_ntt_kernel<<<BLOCKS, THREADS, sizeof(test_t)*THREADS>>>(A_d, C_d, N);
|
||||
// fake_ntt_kernel<<<BLOCKS, THREADS, sizeof(test_t)*THREADS*4>>>(A_d, C_d, N/4);
|
||||
// cudaDeviceSynchronize();
|
||||
// // printf("cuda error %d\n", cudaGetLastError());
|
||||
// std::cout << "cuda err: " << cudaGetErrorString(cudaGetLastError()) << std::endl;
|
||||
|
||||
// THREADS = 128;
|
||||
// BLOCKS = (N/4 + THREADS - 1)/THREADS;
|
||||
// // fake_ntt_kernel<<<BLOCKS, THREADS, sizeof(test_t)*THREADS>>>(A_d, C_d, N);
|
||||
// bugged_fake_ntt_kernel<<<BLOCKS, THREADS, sizeof(test_t)*THREADS*4>>>(A_d, Cb_d, N/4);
|
||||
// // bugged_fake_ntt_kernel<<<1, 1, sizeof(test_t)*THREADS*4>>>(A_d, Cb_d, N/4);
|
||||
// cudaDeviceSynchronize();
|
||||
// // printf("cuda error %d\n", cudaGetLastError());
|
||||
// std::cout << "cuda err: " << cudaGetErrorString(cudaGetLastError()) << std::endl;
|
||||
|
||||
cudaMemcpy(C_h, C_d, sizeof(test_t) * N, cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(Cb_h, Cb_d, sizeof(test_t) * N, cudaMemcpyDeviceToHost);
|
||||
|
||||
// printf("A: ");
|
||||
// for (size_t i = 0; i < 8; i++)
|
||||
// {
|
||||
// std::cout << A_h[i] << ", ";
|
||||
// }
|
||||
// printf("\n");
|
||||
// printf("C test: ");
|
||||
// for (size_t i = 0; i < 8; i++)
|
||||
// {
|
||||
// std::cout << Cb_h[i] << ", ";
|
||||
// }
|
||||
// printf("\n");
|
||||
// printf("C ref: ");
|
||||
// for (size_t i = 0; i < 8; i++)
|
||||
// {
|
||||
// std::cout << C_d[i] << ", ";
|
||||
// // std::cout << C_h[i] << ", ";
|
||||
// }
|
||||
// printf("\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,123 +0,0 @@
|
||||
#include "fields/id.h"
|
||||
#define FIELD_ID 1001
|
||||
// #define CURVE_ID 3
|
||||
// #include "curves/curve_config.cuh"
|
||||
#include "fields/field_config.cuh"
|
||||
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <cub/device/device_radix_sort.cuh>
|
||||
|
||||
#include "fields/field.cuh"
|
||||
#include "curves/projective.cuh"
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
|
||||
#include "kernels.cu"
|
||||
|
||||
class Dummy_Scalar
|
||||
{
|
||||
public:
|
||||
static constexpr unsigned NBITS = 32;
|
||||
|
||||
unsigned x;
|
||||
unsigned p = 10;
|
||||
// unsigned p = 1<<30;
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar zero() { return {0}; }
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar one() { return {1}; }
|
||||
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Scalar& scalar)
|
||||
{
|
||||
os << scalar.x;
|
||||
return os;
|
||||
}
|
||||
|
||||
HOST_DEVICE_INLINE unsigned get_scalar_digit(unsigned digit_num, unsigned digit_width) const
|
||||
{
|
||||
return (x >> (digit_num * digit_width)) & ((1 << digit_width) - 1);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Dummy_Scalar operator+(Dummy_Scalar p1, const Dummy_Scalar& p2)
|
||||
{
|
||||
return {(p1.x + p2.x) % p1.p};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const Dummy_Scalar& p2) { return (p1.x == p2.x); }
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const unsigned p2) { return (p1.x == p2); }
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar& scalar) { return {scalar.p - scalar.x}; }
|
||||
static HOST_INLINE Dummy_Scalar rand_host()
|
||||
{
|
||||
return {(unsigned)rand() % 10};
|
||||
// return {(unsigned)rand()};
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
typedef field_config::scalar_t test_scalar;
|
||||
// typedef curve_config::scalar_t test_scalar;
|
||||
// typedef curve_config::projective_t test_projective;
|
||||
// typedef curve_config::affine_t test_affine;
|
||||
|
||||
// typedef int test_t;
|
||||
// typedef int4 test_t;
|
||||
// typedef Dummy_Scalar test_t;
|
||||
// typedef test_projective test_t;
|
||||
typedef test_scalar test_t;
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
cudaEvent_t start, stop;
|
||||
float kernel_time;
|
||||
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
|
||||
int N = 1<<11;
|
||||
int N2 = N*N;
|
||||
|
||||
test_t* arr1_h = new test_t[N2];
|
||||
test_t* arr2_h = new test_t[N2];
|
||||
|
||||
test_t *arr1_d, *arr2_d;
|
||||
|
||||
cudaMalloc(&arr1_d, N2*sizeof(test_t));
|
||||
cudaMalloc(&arr2_d, N2*sizeof(test_t));
|
||||
|
||||
for (int i = 0; i < N2; i++)
|
||||
{
|
||||
arr1_h[i] = i > 100? arr1_h[i-100] : test_t::rand_host();
|
||||
}
|
||||
|
||||
cudaMemcpy(arr1_d, arr1_h, sizeof(test_t) * N2, cudaMemcpyHostToDevice);
|
||||
|
||||
int THREADS = 256;
|
||||
int BLOCKS = (N2 + THREADS - 1)/THREADS;
|
||||
|
||||
//warm up
|
||||
simple_memory_copy<<<BLOCKS, THREADS>>>(arr1_d, arr2_d, N2);
|
||||
shmem_transpose<<<BLOCKS, THREADS>>>(arr1_d, arr2_d, N);
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "cuda err: " << cudaGetErrorString(cudaGetLastError()) << std::endl;
|
||||
|
||||
cudaEventRecord(start, 0);
|
||||
|
||||
simple_memory_copy<<<BLOCKS, THREADS>>>(arr1_d, arr2_d, N2);
|
||||
// naive_transpose_write<<<BLOCKS, THREADS>>>(arr1_d, arr2_d, N);
|
||||
// naive_transpose_read<<<BLOCKS, THREADS>>>(arr1_d, arr2_d, N);
|
||||
// shmem_transpose<<<BLOCKS, THREADS>>>(arr1_d, arr2_d, N);
|
||||
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "cuda err: " << cudaGetErrorString(cudaGetLastError()) << std::endl;
|
||||
cudaEventRecord(stop, 0);
|
||||
cudaStreamSynchronize(0);
|
||||
cudaEventElapsedTime(&kernel_time, start, stop);
|
||||
printf("kernel_time : %.3f ms.\n", kernel_time);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -88,6 +88,7 @@ namespace msm {
|
||||
__global__ void single_stage_multi_reduction_kernel(
|
||||
const P* v,
|
||||
P* v_r,
|
||||
unsigned orig_block_size,
|
||||
unsigned block_size,
|
||||
unsigned write_stride,
|
||||
unsigned buckets_per_bm,
|
||||
@@ -107,11 +108,11 @@ namespace msm {
|
||||
// only for write_phase=1 because of its read pattern.
|
||||
const int shifted_block_id = write_phase ? block_id + (block_id + step) / step : block_id;
|
||||
const int block_tid = shifted_tid % jump;
|
||||
const unsigned read_ind = block_size * shifted_block_id + block_tid;
|
||||
const unsigned read_ind = orig_block_size * shifted_block_id + block_tid;
|
||||
const unsigned write_ind = jump * shifted_block_id + block_tid;
|
||||
const unsigned v_r_key =
|
||||
write_stride ? ((write_ind / buckets_per_bm) * 2 + write_phase) * write_stride + write_ind % buckets_per_bm
|
||||
: write_ind;
|
||||
: read_ind;
|
||||
v_r[v_r_key] = v[read_ind] + v[read_ind + jump];
|
||||
}
|
||||
|
||||
@@ -745,7 +746,7 @@ namespace msm {
|
||||
NUM_BLOCKS = (nof_bms_in_batch + NUM_THREADS - 1) / NUM_THREADS;
|
||||
big_triangle_sum_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, final_results, nof_bms_in_batch, c);
|
||||
} else {
|
||||
// the recursive reduction algorithm works with 2 types of reduction that can run on parallel streams
|
||||
// the iterative reduction algorithm works with 2 types of reduction that can run on parallel streams
|
||||
cudaStream_t stream_reduction;
|
||||
cudaEvent_t event_finished_reduction;
|
||||
CHK_IF_RETURN(cudaStreamCreate(&stream_reduction));
|
||||
@@ -766,10 +767,10 @@ namespace msm {
|
||||
const unsigned target_buckets_count = target_windows_count << target_bits_count; // new_bms*2^new_c
|
||||
CHK_IF_RETURN(cudaMallocAsync(&target_buckets, sizeof(P) * target_buckets_count * batch_size, stream));
|
||||
CHK_IF_RETURN(cudaMallocAsync(
|
||||
&temp_buckets1, sizeof(P) * source_buckets_count / 2 * batch_size,
|
||||
&temp_buckets1, sizeof(P) * source_buckets_count * batch_size,
|
||||
stream)); // for type1 reduction (strided, bottom window - evens)
|
||||
CHK_IF_RETURN(cudaMallocAsync(
|
||||
&temp_buckets2, sizeof(P) * source_buckets_count / 2 * batch_size,
|
||||
&temp_buckets2, sizeof(P) * source_buckets_count * batch_size,
|
||||
stream)); // for type2 reduction (serial, top window - odds)
|
||||
initialize_buckets_kernel<<<(target_buckets_count * batch_size + 255) / 256, 256>>>(
|
||||
target_buckets, target_buckets_count * batch_size); // initialization is needed for the odd c case
|
||||
@@ -788,9 +789,9 @@ namespace msm {
|
||||
if (!is_odd_c || !is_first_iter) { // skip if c is odd and it's the first iteration
|
||||
single_stage_multi_reduction_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(
|
||||
is_first_iter || (is_second_iter && is_odd_c) ? source_buckets : temp_buckets1,
|
||||
is_last_iter ? target_buckets : temp_buckets1, 1 << (source_bits_count - j + (is_odd_c ? 1 : 0)),
|
||||
is_last_iter ? 1 << target_bits_count : 0, 1 << target_bits_count, 0 /*=write_phase*/,
|
||||
(1 << target_bits_count) - 1, nof_threads);
|
||||
is_last_iter ? target_buckets : temp_buckets1, 1 << source_bits_count,
|
||||
1 << (source_bits_count - j + (is_odd_c ? 1 : 0)), is_last_iter ? 1 << target_bits_count : 0,
|
||||
1 << target_bits_count, 0 /*=write_phase*/, (1 << target_bits_count) - 1, nof_threads);
|
||||
}
|
||||
|
||||
nof_threads = (((source_windows_count << (source_bits_count - target_bits_count)) - source_windows_count)
|
||||
@@ -801,7 +802,7 @@ namespace msm {
|
||||
NUM_BLOCKS = (nof_threads + NUM_THREADS - 1) / NUM_THREADS;
|
||||
single_stage_multi_reduction_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream_reduction>>>(
|
||||
is_first_iter ? source_buckets : temp_buckets2, is_last_iter ? target_buckets : temp_buckets2,
|
||||
1 << (target_bits_count - j), is_last_iter ? 1 << target_bits_count : 0,
|
||||
1 << target_bits_count, 1 << (target_bits_count - j), is_last_iter ? 1 << target_bits_count : 0,
|
||||
1 << (target_bits_count - (is_odd_c ? 1 : 0)), 1 /*=write_phase*/,
|
||||
(1 << (target_bits_count - (is_odd_c ? 1 : 0))) - 1, nof_threads);
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user