mirror of
https://github.com/pseXperiments/icicle.git
synced 2026-01-09 15:37:58 -05:00
Merge remote-tracking branch 'origin/main' into aviad/blake2s
This commit is contained in:
4
.github/workflows/cpp_cuda.yml
vendored
4
.github/workflows/cpp_cuda.yml
vendored
@@ -55,7 +55,7 @@ jobs:
|
||||
if: needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||
run: |
|
||||
mkdir -p build && rm -rf build/*
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTS=ON -DCURVE=${{ matrix.curve.name }} ${{ matrix.field.build_args }} -S . -B build
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTS=ON -DCURVE=${{ matrix.curve.name }} ${{ matrix.curve.build_args }} -S . -B build
|
||||
cmake --build build -j
|
||||
- name: Run C++ curve Tests
|
||||
working-directory: ./icicle/build/tests
|
||||
@@ -73,6 +73,8 @@ jobs:
|
||||
build_args: -DEXT_FIELD=ON
|
||||
- name: stark252
|
||||
build_args: -DEXT_FIELD=OFF
|
||||
- name: m31
|
||||
build_args: -DEXT_FIELD=ON
|
||||
steps:
|
||||
- name: Checkout Repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
11
.github/workflows/rust.yml
vendored
11
.github/workflows/rust.yml
vendored
@@ -62,8 +62,8 @@ jobs:
|
||||
# We need to limit the number of threads to avoid running out of memory on weaker machines
|
||||
# ignored tests are polynomial tests. Since they conflict with NTT tests, they are executed separately
|
||||
run: |
|
||||
cargo test --workspace --exclude icicle-babybear --exclude icicle-stark252 --release --verbose --features=g2 -- --test-threads=2 --ignored
|
||||
cargo test --workspace --exclude icicle-babybear --exclude icicle-stark252 --release --verbose --features=g2 -- --test-threads=2
|
||||
cargo test --workspace --exclude icicle-babybear --exclude icicle-stark252 --exclude icicle-m31 --release --verbose --features=g2 -- --test-threads=2 --ignored
|
||||
cargo test --workspace --exclude icicle-babybear --exclude icicle-stark252 --exclude icicle-m31 --release --verbose --features=g2 -- --test-threads=2
|
||||
|
||||
- name: Run baby bear tests
|
||||
working-directory: ./wrappers/rust/icicle-fields/icicle-babybear
|
||||
@@ -79,6 +79,13 @@ jobs:
|
||||
cargo test --release --verbose -- --ignored
|
||||
cargo test --release --verbose
|
||||
|
||||
- name: Run m31 tests
|
||||
working-directory: ./wrappers/rust/icicle-fields/icicle-m31
|
||||
if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||
run: |
|
||||
cargo test --release --verbose -- --ignored
|
||||
cargo test --release --verbose
|
||||
|
||||
# build-windows:
|
||||
# name: Build on Windows
|
||||
# runs-on: windows-2022
|
||||
|
||||
2
.github/workflows/test-deploy-docs.yml
vendored
2
.github/workflows/test-deploy-docs.yml
vendored
@@ -5,7 +5,7 @@ on:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- 'docs/*'
|
||||
- 'docs/**'
|
||||
|
||||
jobs:
|
||||
test-deploy:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Contributor's Guide
|
||||
|
||||
We welcome all contributions with open arms. At Ingonyama we take a village approach, believing it takes many hands and minds to build a ecosystem.
|
||||
We welcome all contributions with open arms. At Ingonyama we take a village approach, believing it takes many hands and minds to build an ecosystem.
|
||||
|
||||
## Contributing to ICICLE
|
||||
|
||||
@@ -14,9 +14,9 @@ We welcome all contributions with open arms. At Ingonyama we take a village appr
|
||||
When opening a [pull request](https://github.com/ingonyama-zk/icicle/pulls) please keep the following in mind.
|
||||
|
||||
- `Clear Purpose` - The pull request should solve a single issue and be clean of any unrelated changes.
|
||||
- `Clear description` - If the pull request is for a new feature describe what you built, why you added it and how its best that we test it. For bug fixes please describe the issue and the solution.
|
||||
- `Clear description` - If the pull request is for a new feature describe what you built, why you added it and how it's best that we test it. For bug fixes please describe the issue and the solution.
|
||||
- `Consistent style` - Rust and Golang code should be linted by the official linters (golang fmt and rust fmt) and maintain a proper style. For CUDA and C++ code we use [`clang-format`](https://github.com/ingonyama-zk/icicle/blob/main/.clang-format), [here](https://github.com/ingonyama-zk/icicle/blob/605c25f9d22135c54ac49683b710fe2ce06e2300/.github/workflows/main-format.yml#L46) you can see how we run it.
|
||||
- `Minimal Tests` - please add test which cover basic usage of your changes .
|
||||
- `Minimal Tests` - please add test which cover basic usage of your changes.
|
||||
|
||||
## Questions?
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ type NTTConfig[T any] struct {
|
||||
- **`areInputsOnDevice`**: Indicates if input scalars are located on the device.
|
||||
- **`areOutputsOnDevice`**: Indicates if results are stored on the device.
|
||||
- **`IsAsync`**: Controls whether the NTT operation runs asynchronously.
|
||||
- **`NttAlgorithm`**: Explicitly select the NTT algorithm. ECNTT supports running on `Radix2` algoruithm.
|
||||
- **`NttAlgorithm`**: Explicitly select the NTT algorithm. ECNTT supports running on `Radix2` algorithm.
|
||||
|
||||
### Default Configuration
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ func main() {
|
||||
input := createHostSliceFromHexString("1725b6")
|
||||
outHost256 := make(core.HostSlice[uint8], 32)
|
||||
|
||||
cfg := keccak.GetDefaultKeccakConfig()
|
||||
cfg := keccak.GetDefaultHashConfig()
|
||||
e := keccak.Keccak256(input, int32(input.Len()), 1, outHost256, &cfg)
|
||||
if e.CudaErrorCode != cr.CudaSuccess {
|
||||
panic("Keccak256 hashing failed")
|
||||
@@ -49,8 +49,8 @@ func main() {
|
||||
## Keccak Methods
|
||||
|
||||
```go
|
||||
func Keccak256(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *KeccakConfig) core.IcicleError
|
||||
func Keccak512(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *KeccakConfig) core.IcicleError
|
||||
func Keccak256(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *HashConfig) core.IcicleError
|
||||
func Keccak512(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *HashConfig) core.IcicleError
|
||||
```
|
||||
|
||||
### Parameters
|
||||
@@ -59,18 +59,18 @@ func Keccak512(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int3
|
||||
- **`inputBlockSize`**: An integer specifying the size of the input data for a single hash.
|
||||
- **`numberOfBlocks`**: An integer specifying the number of results in the hash batch.
|
||||
- **`output`**: A slice where the resulting hash will be stored. This slice can be in host or device memory.
|
||||
- **`config`**: A pointer to a `KeccakConfig` object, which contains various configuration options for the Keccak256 operation.
|
||||
- **`config`**: A pointer to a `HashConfig` object, which contains various configuration options for the Keccak256 operation.
|
||||
|
||||
### Return Value
|
||||
|
||||
- **`CudaError`**: Returns a CUDA error code indicating the success or failure of the Keccak256/Keccak512 operation.
|
||||
|
||||
## KeccakConfig
|
||||
## HashConfig
|
||||
|
||||
The `KeccakConfig` structure holds configuration parameters for the Keccak256/Keccak512 operation, allowing customization of its behavior to optimize performance based on the specifics of the operation or the underlying hardware.
|
||||
The `HashConfig` structure holds configuration parameters for the Keccak256/Keccak512 operation, allowing customization of its behavior to optimize performance based on the specifics of the operation or the underlying hardware.
|
||||
|
||||
```go
|
||||
type KeccakConfig struct {
|
||||
type HashConfig struct {
|
||||
Ctx cr.DeviceContext
|
||||
areInputsOnDevice bool
|
||||
areOutputsOnDevice bool
|
||||
@@ -87,8 +87,8 @@ type KeccakConfig struct {
|
||||
|
||||
### Default Configuration
|
||||
|
||||
Use `GetDefaultKeccakConfig` to obtain a default configuration, which can then be customized as needed.
|
||||
Use `GetDefaultHashConfig` to obtain a default configuration, which can then be customized as needed.
|
||||
|
||||
```go
|
||||
func GetDefaultKeccakConfig() KeccakConfig
|
||||
func GetDefaultHashConfig() HashConfig
|
||||
```
|
||||
@@ -4,9 +4,9 @@ To understand the theory behind MSM pre computation technique refer to Niall Emm
|
||||
|
||||
## Core package
|
||||
|
||||
### MSM PrecomputeBases
|
||||
### MSM PrecomputePoints
|
||||
|
||||
`PrecomputeBases` and `G2PrecomputeBases` exists for all supported curves.
|
||||
`PrecomputePoints` and `G2PrecomputePoints` exists for all supported curves.
|
||||
|
||||
#### Description
|
||||
|
||||
@@ -14,21 +14,20 @@ This function extends each provided base point $(P)$ with its multiples $(2^lP,
|
||||
|
||||
The precomputation process is crucial for optimizing MSM operations, especially when dealing with large sets of points and scalars. By precomputing and storing multiples of the base points, the MSM function can more efficiently compute the scalar-point multiplications.
|
||||
|
||||
#### `PrecomputeBases`
|
||||
#### `PrecomputePoints`
|
||||
|
||||
Precomputes bases for MSM by extending each base point with its multiples.
|
||||
Precomputes points for MSM by extending each base point with its multiples.
|
||||
|
||||
```go
|
||||
func PrecomputeBases(points core.HostOrDeviceSlice, precomputeFactor int32, c int32, ctx *cr.DeviceContext, outputBases core.DeviceSlice) cr.CudaError
|
||||
func PrecomputePoints(points core.HostOrDeviceSlice, msmSize int, cfg *core.MSMConfig, outputBases core.DeviceSlice) cr.CudaError
|
||||
```
|
||||
|
||||
##### Parameters
|
||||
|
||||
- **`points`**: A slice of the original affine points to be extended with their multiples.
|
||||
- **`precomputeFactor`**: Determines the total number of points to precompute for each base point.
|
||||
- **`c`**: Currently unused; reserved for future compatibility.
|
||||
- **`ctx`**: CUDA device context specifying the execution environment.
|
||||
- **`outputBases`**: The device slice allocated for storing the extended bases.
|
||||
- **`msmSize`**: The size of a single msm in order to determine optimal parameters.
|
||||
- **`cfg`**: The MSM configuration parameters.
|
||||
- **`outputBases`**: The device slice allocated for storing the extended points.
|
||||
|
||||
##### Example
|
||||
|
||||
@@ -50,28 +49,27 @@ func main() {
|
||||
var precomputeOut core.DeviceSlice
|
||||
precomputeOut.Malloc(points[0].Size()*points.Len()*int(precomputeFactor), points[0].Size())
|
||||
|
||||
err := bn254.PrecomputeBases(points, precomputeFactor, 0, &cfg.Ctx, precomputeOut)
|
||||
err := bn254.PrecomputePoints(points, 1024, &cfg, precomputeOut)
|
||||
if err != cr.CudaSuccess {
|
||||
log.Fatalf("PrecomputeBases failed: %v", err)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### `G2PrecomputeBases`
|
||||
#### `G2PrecomputePoints`
|
||||
|
||||
This method is the same as `PrecomputeBases` but for G2 points. Extends each G2 curve base point with its multiples for optimized MSM computations.
|
||||
This method is the same as `PrecomputePoints` but for G2 points. Extends each G2 curve base point with its multiples for optimized MSM computations.
|
||||
|
||||
```go
|
||||
func G2PrecomputeBases(points core.HostOrDeviceSlice, precomputeFactor int32, c int32, ctx *cr.DeviceContext, outputBases core.DeviceSlice) cr.CudaError
|
||||
func G2PrecomputePoints(points core.HostOrDeviceSlice, msmSize int, cfg *core.MSMConfig, outputBases core.DeviceSlice) cr.CudaError
|
||||
```
|
||||
|
||||
##### Parameters
|
||||
|
||||
- **`points`**: A slice of G2 curve points to be extended.
|
||||
- **`precomputeFactor`**: The total number of points to precompute for each base.
|
||||
- **`c`**: Reserved for future use to ensure compatibility with MSM operations.
|
||||
- **`ctx`**: Specifies the CUDA device context for execution.
|
||||
- **`outputBases`**: Allocated device slice for the extended bases.
|
||||
- **`points`**: A slice of the original affine points to be extended with their multiples.
|
||||
- **`msmSize`**: The size of a single msm in order to determine optimal parameters.
|
||||
- **`cfg`**: The MSM configuration parameters.
|
||||
- **`outputBases`**: The device slice allocated for storing the extended points.
|
||||
|
||||
##### Example
|
||||
|
||||
@@ -93,20 +91,9 @@ func main() {
|
||||
var precomputeOut core.DeviceSlice
|
||||
precomputeOut.Malloc(points[0].Size()*points.Len()*int(precomputeFactor), points[0].Size())
|
||||
|
||||
err := g2.G2PrecomputeBases(points, precomputeFactor, 0, &cfg.Ctx, precomputeOut)
|
||||
err := g2.G2PrecomputePoints(points, 1024, 0, &cfg, precomputeOut)
|
||||
if err != cr.CudaSuccess {
|
||||
log.Fatalf("PrecomputeBases failed: %v", err)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Benchmarks
|
||||
|
||||
Benchmarks where performed on a Nvidia RTX 3090Ti.
|
||||
|
||||
| Pre-computation factor | bn254 size `2^20` MSM, ms. | bn254 size `2^12` MSM, size `2^10` batch, ms. | bls12-381 size `2^20` MSM, ms. | bls12-381 size `2^12` MSM, size `2^10` batch, ms. |
|
||||
| ------------- | ------------- | ------------- | ------------- | ------------- |
|
||||
| 1 | 14.1 | 82.8 | 25.5 | 136.7 |
|
||||
| 2 | 11.8 | 76.6 | 20.3 | 123.8 |
|
||||
| 4 | 10.9 | 73.8 | 18.1 | 117.8 |
|
||||
| 8 | 10.6 | 73.7 | 17.2 | 116.0 |
|
||||
|
||||
@@ -122,7 +122,7 @@ func GetDefaultMSMConfig() MSMConfig
|
||||
|
||||
## How do I toggle between the supported algorithms?
|
||||
|
||||
When creating your MSM Config you may state which algorithm you wish to use. `cfg.Ctx.IsBigTriangle = true` will activate Large triangle accumulation and `cfg.Ctx.IsBigTriangle = false` will activate Bucket accumulation.
|
||||
When creating your MSM Config you may state which algorithm you wish to use. `cfg.Ctx.IsBigTriangle = true` will activate Large triangle reduction and `cfg.Ctx.IsBigTriangle = false` will activate iterative reduction.
|
||||
|
||||
```go
|
||||
...
|
||||
@@ -139,7 +139,7 @@ cfg.Ctx.IsBigTriangle = true
|
||||
|
||||
Toggling between MSM modes occurs automatically based on the number of results you are expecting from the `MSM` function.
|
||||
|
||||
The number of results is interpreted from the size of `var out core.DeviceSlice`. Thus its important when allocating memory for `var out core.DeviceSlice` to make sure that you are allocating `<number of results> X <size of a single point>`.
|
||||
The number of results is interpreted from the size of `var out core.DeviceSlice`. Thus it's important when allocating memory for `var out core.DeviceSlice` to make sure that you are allocating `<number of results> X <size of a single point>`.
|
||||
|
||||
```go
|
||||
...
|
||||
@@ -152,6 +152,10 @@ out.Malloc(batchSize*p.Size(), p.Size())
|
||||
...
|
||||
```
|
||||
|
||||
## Parameters for optimal performance
|
||||
|
||||
Please refer to the [primitive description](../primitives/msm#choosing-optimal-parameters)
|
||||
|
||||
## Support for G2 group
|
||||
|
||||
To activate G2 support first you must make sure you are building the static libraries with G2 feature enabled as described in the [Golang building instructions](../golang-bindings.md#using-icicle-golang-bindings-in-your-project).
|
||||
@@ -164,7 +168,7 @@ import (
|
||||
)
|
||||
```
|
||||
|
||||
This package include `G2Projective` and `G2Affine` points as well as a `G2Msm` method.
|
||||
This package includes `G2Projective` and `G2Affine` points as well as a `G2Msm` method.
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
# Polynomial API Overview
|
||||
|
||||
:::note
|
||||
Read our paper on the Polynomials API in ICICLE v2 by clicking [here](https://eprint.iacr.org/2024/973).
|
||||
:::
|
||||
|
||||
## Introduction
|
||||
|
||||
The Polynomial API offers a robust framework for polynomial operations within a computational environment. It's designed for flexibility and efficiency, supporting a broad range of operations like arithmetic, evaluation, and manipulation, all while abstracting from the computation and storage specifics. This enables adaptability to various backend technologies, employing modern C++ practices.
|
||||
@@ -167,7 +171,7 @@ Polynomial& add_monomial_inplace(Coeff monomial_coeff, uint64_t monomial = 0);
|
||||
Polynomial& sub_monomial_inplace(Coeff monomial_coeff, uint64_t monomial = 0);
|
||||
```
|
||||
|
||||
The ability to add or subtract monomials directly and in-place is an efficient way to manipualte polynomials.
|
||||
The ability to add or subtract monomials directly and in-place is an efficient way to manipulate polynomials.
|
||||
|
||||
Example:
|
||||
|
||||
|
||||
@@ -12,11 +12,68 @@ At its core, Keccak consists of a permutation function operating on a state arra
|
||||
- **Chi:** This step applies a nonlinear mixing operation to each lane of the state array.
|
||||
- **Iota:** This step introduces a round constant to the state array.
|
||||
|
||||
## Keccak vs Sha3
|
||||
|
||||
There exists a [confusion](https://www.cybertest.com/blog/keccak-vs-sha3) between what is called `Keccak` and `Sha3`. In ICICLE we support both. `Keccak256` relates to the old hash function used in Ethereum, and `Sha3-256` relates to the modern hash function.
|
||||
|
||||
## Using Keccak
|
||||
|
||||
ICICLE Keccak supports batch hashing, which can be utilized for constructing a merkle tree.
|
||||
ICICLE Keccak supports batch hashing, which can be utilized for constructing a merkle tree or running multiple hashes in parallel.
|
||||
|
||||
### Supported Bindings
|
||||
|
||||
- [Golang](https://github.com/ingonyama-zk/icicle/tree/main/wrappers/golang/hash/keccak)
|
||||
- [Rust](https://github.com/ingonyama-zk/icicle/tree/main/wrappers/rust/icicle-hash)
|
||||
- [Rust](https://github.com/ingonyama-zk/icicle/tree/main/wrappers/rust/icicle-hash)
|
||||
|
||||
### Example usage
|
||||
|
||||
This is an example of running 1024 Keccak-256 hashes in parallel, where input strings are of size 136 bytes:
|
||||
|
||||
```rust
|
||||
use icicle_core::hash::HashConfig;
|
||||
use icicle_cuda_runtime::memory::HostSlice;
|
||||
use icicle_hash::keccak::keccak256;
|
||||
|
||||
let config = HashConfig::default();
|
||||
let input_block_len = 136;
|
||||
let number_of_hashes = 1024;
|
||||
|
||||
let preimages = vec![1u8; number_of_hashes * input_block_len];
|
||||
let mut digests = vec![0u8; number_of_hashes * 32];
|
||||
|
||||
let preimages_slice = HostSlice::from_slice(&preimages);
|
||||
let digests_slice = HostSlice::from_mut_slice(&mut digests);
|
||||
|
||||
keccak256(
|
||||
preimages_slice,
|
||||
input_block_len as u32,
|
||||
number_of_hashes as u32,
|
||||
digests_slice,
|
||||
&config,
|
||||
)
|
||||
.unwrap();
|
||||
```
|
||||
|
||||
### Merkle Tree
|
||||
|
||||
You can build a keccak merkle tree using the corresponding functions:
|
||||
|
||||
```rust
|
||||
use icicle_core::tree::{merkle_tree_digests_len, TreeBuilderConfig};
|
||||
use icicle_cuda_runtime::memory::HostSlice;
|
||||
use icicle_hash::keccak::build_keccak256_merkle_tree;
|
||||
|
||||
let mut config = TreeBuilderConfig::default();
|
||||
config.arity = 2;
|
||||
let height = 22;
|
||||
let input_block_len = 136;
|
||||
let leaves = vec![1u8; (1 << height) * input_block_len];
|
||||
let mut digests = vec![0u64; merkle_tree_digests_len((height + 1) as u32, 2, 1)];
|
||||
|
||||
let leaves_slice = HostSlice::from_slice(&leaves);
|
||||
let digests_slice = HostSlice::from_mut_slice(&mut digests);
|
||||
|
||||
build_keccak256_merkle_tree(leaves_slice, digests_slice, height, input_block_len, &config).unwrap();
|
||||
```
|
||||
|
||||
In the example above, a binary tree of height 22 is being built. Each leaf is considered to be a 136 byte long array. The leaves and digests are aligned in a flat array. You can also use keccak512 in `build_keccak512_merkle_tree` function.
|
||||
@@ -1,6 +1,6 @@
|
||||
# MSM - Multi scalar multiplication
|
||||
|
||||
MSM stands for Multi scalar multiplication, its defined as:
|
||||
MSM stands for Multi scalar multiplication, it's defined as:
|
||||
|
||||
<math xmlns="http://www.w3.org/1998/Math/MathML">
|
||||
<mi>M</mi>
|
||||
@@ -43,7 +43,7 @@ $a_0, \ldots, a_n$ - Scalars
|
||||
|
||||
$MSM(a, G) \in G$ - a single EC (elliptic curve) point
|
||||
|
||||
In words, MSM is the sum of scalar and EC point multiplications. We can see from this definition that the core operations occurring are Modular Multiplication and Elliptic curve point addition. Its obvious that multiplication can be computed in parallel and then the products summed, making MSM inherently parallelizable.
|
||||
In words, MSM is the sum of scalar and EC point multiplications. We can see from this definition that the core operations occurring are Modular Multiplication and Elliptic curve point addition. It's obvious that multiplication can be computed in parallel and then the products summed, making MSM inherently parallelizable.
|
||||
|
||||
Accelerating MSM is crucial to a ZK protocol's performance due to the [large percent of run time](https://hackmd.io/@0xMonia/SkQ6-oRz3#Hardware-acceleration-in-action) they take when generating proofs.
|
||||
|
||||
@@ -54,36 +54,142 @@ You can learn more about how MSMs work from this [video](https://www.youtube.com
|
||||
- [Golang](../golang-bindings/msm.md)
|
||||
- [Rust](../rust-bindings//msm.md)
|
||||
|
||||
## Supported algorithms
|
||||
## Algorithm description
|
||||
|
||||
Our MSM implementation supports two algorithms `Bucket accumulation` and `Large triangle accumulation`.
|
||||
We follow the bucket method algorithm. The GPU implementation consists of four phases:
|
||||
|
||||
### Bucket accumulation
|
||||
1. Preparation phase - The scalars are split into smaller scalars of `c` bits each. These are the bucket indices. The points are grouped according to their corresponding bucket index and the buckets are sorted by size.
|
||||
2. Accumulation phase - Each bucket accumulates all of its points using a single thread. More than one thread is assigned to large buckets, in proportion to their size. A bucket is considered large if its size is above the large bucket threshold that is determined by the `large_bucket_factor` parameter. The large bucket threshold is the expected average bucket size times the `large_bucket_factor` parameter.
|
||||
3. Buckets Reduction phase - bucket results are multiplied by their corresponding bucket number and each bucket module is reduced to a small number of final results. By default, this is done by an iterative algorithm which is highly parallel. Setting `is_big_triangle` to `true` will switch this phase to the running sum algorithm described in the above YouTube talk which is much less parallel.
|
||||
4. Final accumulation phase - The final results from the last phase are accumulated using the double-and-add algorithm.
|
||||
|
||||
The Bucket Accumulation algorithm is a method of dividing the overall MSM task into smaller, more manageable sub-tasks. It involves partitioning scalars and their corresponding points into different "buckets" based on the scalar values.
|
||||
## Batched MSM
|
||||
|
||||
Bucket Accumulation can be more parallel-friendly because it involves dividing the computation into smaller, independent tasks, distributing scalar-point pairs into buckets and summing points within each bucket. This division makes it well suited for parallel processing on GPUs.
|
||||
The MSM supports batch mode - running multiple MSMs in parallel. It's always better to use the batch mode instead of running single msms in serial as long as there is enough memory available. We support running a batch of MSMs that share the same points as well as a batch of MSMs that use different points.
|
||||
|
||||
#### When should I use Bucket accumulation?
|
||||
## MSM configuration
|
||||
|
||||
In scenarios involving large MSM computations with many scalar-point pairs, the ability to parallelize operations makes Bucket Accumulation more efficient. The larger the MSM task, the more significant the potential gains from parallelization.
|
||||
```cpp
|
||||
/**
|
||||
* @struct MSMConfig
|
||||
* Struct that encodes MSM parameters to be passed into the [MSM](@ref MSM) function. The intended use of this struct
|
||||
* is to create it using [default_msm_config](@ref default_msm_config) function and then you'll hopefully only need to
|
||||
* change a small number of default values for each of your MSMs.
|
||||
*/
|
||||
struct MSMConfig {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
int points_size; /**< Number of points in the MSM. If a batch of MSMs needs to be computed, this should be
|
||||
* a number of different points. So, if each MSM re-uses the same set of points, this
|
||||
* variable is set equal to the MSM size. And if every MSM uses a distinct set of
|
||||
* points, it should be set to the product of MSM size and [batch_size](@ref
|
||||
* batch_size). Default value: 0 (meaning it's equal to the MSM size). */
|
||||
int precompute_factor; /**< The number of extra points to pre-compute for each point. See the
|
||||
* [precompute_msm_points](@ref precompute_msm_points) function, `precompute_factor` passed
|
||||
* there needs to be equal to the one used here. Larger values decrease the
|
||||
* number of computations to make, on-line memory footprint, but increase the static
|
||||
* memory footprint. Default value: 1 (i.e. don't pre-compute). */
|
||||
int c; /**< \f$ c \f$ value, or "window bitsize" which is the main parameter of the "bucket
|
||||
* method" that we use to solve the MSM problem. As a rule of thumb, larger value
|
||||
* means more on-line memory footprint but also more parallelism and less computational
|
||||
* complexity (up to a certain point). Currently pre-computation is independent of
|
||||
* \f$ c \f$, however in the future value of \f$ c \f$ here and the one passed into the
|
||||
* [precompute_msm_points](@ref precompute_msm_points) function will need to be identical.
|
||||
* Default value: 0 (the optimal value of \f$ c \f$ is chosen automatically). */
|
||||
int bitsize; /**< Number of bits of the largest scalar. Typically equals the bitsize of scalar field,
|
||||
* but if a different (better) upper bound is known, it should be reflected in this
|
||||
* variable. Default value: 0 (set to the bitsize of scalar field). */
|
||||
int large_bucket_factor; /**< Variable that controls how sensitive the algorithm is to the buckets that occur
|
||||
* very frequently. Useful for efficient treatment of non-uniform distributions of
|
||||
* scalars and "top windows" with few bits. Can be set to 0 to disable separate
|
||||
* treatment of large buckets altogether. Default value: 10. */
|
||||
int batch_size; /**< The number of MSMs to compute. Default value: 1. */
|
||||
bool are_scalars_on_device; /**< True if scalars are on device and false if they're on host. Default value:
|
||||
* false. */
|
||||
bool are_scalars_montgomery_form; /**< True if scalars are in Montgomery form and false otherwise. Default value:
|
||||
* true. */
|
||||
bool are_points_on_device; /**< True if points are on device and false if they're on host. Default value: false. */
|
||||
bool are_points_montgomery_form; /**< True if coordinates of points are in Montgomery form and false otherwise.
|
||||
* Default value: true. */
|
||||
bool are_results_on_device; /**< True if the results should be on device and false if they should be on host. If set
|
||||
* to false, `is_async` won't take effect because a synchronization is needed to
|
||||
* transfer results to the host. Default value: false. */
|
||||
bool is_big_triangle; /**< Whether to do "bucket accumulation" serially. Decreases computational complexity
|
||||
* but also greatly decreases parallelism, so only suitable for large batches of MSMs.
|
||||
* Default value: false. */
|
||||
bool is_async; /**< Whether to run the MSM asynchronously. If set to true, the MSM function will be
|
||||
* non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the MSM
|
||||
* function will block the current CPU thread. */
|
||||
};
|
||||
```
|
||||
|
||||
### Large triangle accumulation
|
||||
## Choosing optimal parameters
|
||||
|
||||
Large Triangle Accumulation is a method for optimizing MSM which focuses on reducing the number of point doublings in the computation. This algorithm is based on the observation that the number of point doublings can be minimized by structuring the computation in a specific manner.
|
||||
`is_big_triangle` should be `false` in almost all cases. It might provide better results only for very small MSMs (smaller than 2^8^) with a large batch (larger than 100) but this should be tested per scenario.
|
||||
Large buckets exist in two cases:
|
||||
1. When the scalar distribution isn't uniform.
|
||||
2. When `c` does not divide the scalar bit-size.
|
||||
|
||||
#### When should I use Large triangle accumulation?
|
||||
`large_bucket_factor` that is equal to 10 yields good results for most cases, but it's best to fine tune this parameter per `c` and per scalar distribution.
|
||||
The two most important parameters for performance are `c` and the `precompute_factor`. They affect the number of EC additions as well as the memory size. When the points are not known in advance we cannot use precomputation. In this case the best `c` value is usually around $log_2(msmSize) - 4$. However, in most protocols the points are known in advance and precomputation can be used unless limited by memory. Usually it's best to use maximum precomputation (such that we end up with only a single bucket module) combined with a `c` value around $log_2(msmSize) - 1$.
|
||||
|
||||
The Large Triangle Accumulation algorithm is more sequential in nature, as it builds upon each step sequentially (accumulating sums and then performing doubling). This structure can make it less suitable for parallelization but potentially more efficient for a **large batch of smaller MSM computations**.
|
||||
## Memory usage estimation
|
||||
|
||||
## MSM Modes
|
||||
The main memory requirements of the MSM are the following:
|
||||
|
||||
ICICLE MSM also supports two different modes `Batch MSM` and `Single MSM`
|
||||
- Scalars - `sizeof(scalar_t) * msm_size * batch_size`
|
||||
- Scalar indices - `~6 * sizeof(unsigned) * nof_bucket_modules * msm_size * batch_size`
|
||||
- Points - `sizeof(affine_t) * msm_size * precomp_factor * batch_size`
|
||||
- Buckets - `sizeof(projective_t) * nof_bucket_modules * 2^c * batch_size`
|
||||
|
||||
Batch MSM allows you to run many MSMs with a single API call while single MSM will launch a single MSM computation.
|
||||
where `nof_bucket_modules = ceil(ceil(bitsize / c) / precompute_factor)`
|
||||
|
||||
### Which mode should I use?
|
||||
During the MSM computation first the memory for scalars and scalar indices is allocated, then the indices are freed and points and buckets are allocated. This is why a good estimation for the required memory is the following formula:
|
||||
|
||||
This decision is highly dependent on your use case and design. However, if your design allows for it, using batch mode can significantly improve efficiency. Batch processing allows you to perform multiple MSMs simultaneously, leveraging the parallel processing capabilities of GPUs.
|
||||
$max(scalars + scalarIndices, scalars + points + buckets)$
|
||||
|
||||
Single MSM mode should be used when batching isn't possible or when you have to run a single MSM.
|
||||
This gives a good approximation within 10% of the actual required memory for most cases.
|
||||
|
||||
## Example parameters
|
||||
|
||||
Here is a useful table showing optimal parameters for different MSMs. They are optimal for BLS12-377 curve when running on NVIDIA GeForce RTX 3090 Ti. This is the configuration used:
|
||||
|
||||
```cpp
|
||||
msm::MSMConfig config = {
|
||||
ctx, // DeviceContext
|
||||
N, // points_size
|
||||
precomp_factor, // precompute_factor
|
||||
user_c, // c
|
||||
0, // bitsize
|
||||
10, // large_bucket_factor
|
||||
batch_size, // batch_size
|
||||
false, // are_scalars_on_device
|
||||
false, // are_scalars_montgomery_form
|
||||
true, // are_points_on_device
|
||||
false, // are_points_montgomery_form
|
||||
true, // are_results_on_device
|
||||
false, // is_big_triangle
|
||||
true // is_async
|
||||
};
|
||||
```
|
||||
|
||||
Here are the parameters and the results for the different cases:
|
||||
|
||||
| MSM size | Batch size | Precompute factor | c | Memory estimation (GB) | Actual memory (GB) | Single MSM time (ms) |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| 10 | 1 | 1 | 9 | 0.00227 | 0.00277 | 9.2 |
|
||||
| 10 | 1 | 23 | 11 | 0.00259 | 0.00272 | 1.76 |
|
||||
| 10 | 1000 | 1 | 7 | 0.94 | 1.09 | 0.051 |
|
||||
| 10 | 1000 | 23 | 11 | 2.59 | 2.74 | 0.025 |
|
||||
| 15 | 1 | 1 | 11 | 0.011 | 0.019 | 9.9 |
|
||||
| 15 | 1 | 16 | 16 | 0.061 | 0.065 | 2.4 |
|
||||
| 15 | 100 | 1 | 11 | 1.91 | 1.92 | 0.84 |
|
||||
| 15 | 100 | 19 | 14 | 6.32 | 6.61 | 0.56 |
|
||||
| 18 | 1 | 1 | 14 | 0.128 | 0.128 | 14.4 |
|
||||
| 18 | 1 | 15 | 17 | 0.40 | 0.42 | 5.9 |
|
||||
| 22 | 1 | 1 | 17 | 1.64 | 1.65 | 68 |
|
||||
| 22 | 1 | 13 | 21 | 5.67 | 5.94 | 54 |
|
||||
| 24 | 1 | 1 | 18 | 6.58 | 6.61 | 232 |
|
||||
| 24 | 1 | 7 | 21 | 12.4 | 13.4 | 199 |
|
||||
|
||||
The optimal values can vary per GPU and per curve. It is best to try a few combinations until you get the best results for your specific case.
|
||||
|
||||
@@ -56,7 +56,7 @@ Choosing an algorithm is heavily dependent on your use case. For example Cooley-
|
||||
|
||||
NTT also supports two different modes `Batch NTT` and `Single NTT`
|
||||
|
||||
Deciding weather to use `batch NTT` vs `single NTT` is highly dependent on your application and use case.
|
||||
Deciding whether to use `batch NTT` vs `single NTT` is highly dependent on your application and use case.
|
||||
|
||||
#### Single NTT
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Poseidon
|
||||
|
||||
[Poseidon](https://eprint.iacr.org/2019/458.pdf) is a popular hash in the ZK ecosystem primarily because its optimized to work over large prime fields, a common setting for ZK proofs, thereby minimizing the number of multiplicative operations required.
|
||||
[Poseidon](https://eprint.iacr.org/2019/458.pdf) is a popular hash in the ZK ecosystem primarily because it's optimized to work over large prime fields, a common setting for ZK proofs, thereby minimizing the number of multiplicative operations required.
|
||||
|
||||
Poseidon has also been specifically designed to be efficient when implemented within ZK circuits, Poseidon uses far less constraints compared to other hash functions like Keccak or SHA-256 in the context of ZK circuits.
|
||||
|
||||
@@ -16,7 +16,7 @@ Poseidon starts with the initialization of its internal state, which is composed
|
||||
|
||||
This is done to prevent collisions and to prevent certain cryptographic attacks by ensuring that the internal state is sufficiently mixed and unpredictable.
|
||||
|
||||

|
||||

|
||||
|
||||
## Applying full and partial rounds
|
||||
|
||||
@@ -26,9 +26,9 @@ To generate a secure hash output, the algorithm goes through a series of "full r
|
||||
|
||||
### Full rounds
|
||||
|
||||

|
||||

|
||||
|
||||
**Uniform Application of S-box:** In full rounds, the S-box (a non-linear transformation) is applied uniformly to every element of the hash function's internal state. This ensures a high degree of mixing and diffusion, contributing to the hash function's security. The functions S-box involves raising each element of the state to a certain power denoted by `α` a member of the finite field defined by the prime `p`; `α` can be different depending on the the implementation and user configuration.
|
||||
**Uniform Application of S-box:** In full rounds, the S-box (a non-linear transformation) is applied uniformly to every element of the hash function's internal state. This ensures a high degree of mixing and diffusion, contributing to the hash function's security. The functions S-box involves raising each element of the state to a certain power denoted by `α` a member of the finite field defined by the prime `p`; `α` can be different depending on the implementation and user configuration.
|
||||
|
||||
**Linear Transformation:** After applying the S-box, a linear transformation is performed on the state. This involves multiplying the state by a MDS (Maximum Distance Separable) Matrix. which further diffuses the transformations applied by the S-box across the entire state.
|
||||
|
||||
@@ -36,13 +36,13 @@ To generate a secure hash output, the algorithm goes through a series of "full r
|
||||
|
||||
### Partial Rounds
|
||||
|
||||

|
||||
|
||||
**Selective Application of S-Box:** Partial rounds apply the S-box transformation to only one element of the internal state per round, rather than to all elements. This selective application significantly reduces the computational complexity of the hash function without compromising its security. The choice of which element to apply the S-box to can follow a specific pattern or be fixed, depending on the design of the hash function.
|
||||
|
||||
**Linear Transformation and Round Constants:** A linear transformation is performed and round constants are added. The linear transformation in partial rounds can be designed to be less computationally intensive (this is done by using a sparse matrix) than in full rounds, further optimizing the function's efficiency.
|
||||
|
||||
The user of Poseidon can often choose how many partial or full rounds he wishes to apply; more full rounds will increase security but degrade performance. The choice and balance is highly dependent on the use case.
|
||||
|
||||

|
||||
The user of Poseidon can often choose how many partial or full rounds he wishes to apply; more full rounds will increase security but degrade performance. The choice and balance are highly dependent on the use case.
|
||||
|
||||
## Using Poseidon
|
||||
|
||||
@@ -53,13 +53,14 @@ So for Poseidon of arity 2 and input of size 1024 * 2, we would expect 1024 elem
|
||||
|
||||
### Supported Bindings
|
||||
|
||||
[`Go`](https://github.com/ingonyama-zk/icicle/blob/main/wrappers/golang/curves/bn254/poseidon/poseidon.go)
|
||||
[`Rust`](https://github.com/ingonyama-zk/icicle/tree/main/wrappers/rust/icicle-core/src/poseidon)
|
||||
|
||||
### Constants
|
||||
|
||||
Poseidon is extremely customizable and using different constants will produce different hashes, security levels and performance results.
|
||||
|
||||
We support pre-calculated and optimized constants for each of the [supported curves](#supported-curves).The constants can be found [here](https://github.com/ingonyama-zk/icicle/tree/main/icicle/include/poseidon/constants) and are labeled clearly per curve `<curve_name>_poseidon.h`.
|
||||
We support pre-calculated and optimized constants for each of the [supported curves](../core#supported-curves-and-operations). The constants can be found [here](https://github.com/ingonyama-zk/icicle/tree/main/icicle/include/poseidon/constants) and are labeled clearly per curve `<curve_name>_poseidon.h`.
|
||||
|
||||
If you wish to generate your own constants you can use our python script which can be found [here](https://github.com/ingonyama-zk/icicle/tree/main/icicle/include/poseidon/constants/generate_parameters.py).
|
||||
|
||||
@@ -91,8 +92,6 @@ primitive_element = 7 # bls12-381
|
||||
# primitive_element = 15 # bw6-761
|
||||
```
|
||||
|
||||
We only support `alpha = 5` so if you want to use another alpha for S-box please reach out on discord or open a github issue.
|
||||
|
||||
### Rust API
|
||||
|
||||
This is the most basic way to use the Poseidon API.
|
||||
@@ -101,71 +100,58 @@ This is the most basic way to use the Poseidon API.
|
||||
let test_size = 1 << 10;
|
||||
let arity = 2u32;
|
||||
let ctx = get_default_device_context();
|
||||
let constants = load_optimized_poseidon_constants::<F>(arity, &ctx).unwrap();
|
||||
let config = PoseidonConfig::default();
|
||||
let poseidon = Poseidon::load(arity, &ctx).unwrap();
|
||||
let config = HashConfig::default();
|
||||
|
||||
let inputs = vec![F::one(); test_size * arity as usize];
|
||||
let outputs = vec![F::zero(); test_size];
|
||||
let mut input_slice = HostOrDeviceSlice::on_host(inputs);
|
||||
let mut output_slice = HostOrDeviceSlice::on_host(outputs);
|
||||
|
||||
poseidon_hash_many::<F>(
|
||||
poseidon.hash_many::<F>(
|
||||
&mut input_slice,
|
||||
&mut output_slice,
|
||||
test_size as u32,
|
||||
arity as u32,
|
||||
&constants,
|
||||
1, // Output length
|
||||
&config,
|
||||
)
|
||||
.unwrap();
|
||||
```
|
||||
|
||||
The `PoseidonConfig::default()` can be modified, by default the inputs and outputs are set to be on `Host` for example.
|
||||
The `HashConfig` can be modified, by default the inputs and outputs are set to be on `Host` for example.
|
||||
|
||||
```rust
|
||||
impl<'a> Default for PoseidonConfig<'a> {
|
||||
impl<'a> Default for HashConfig<'a> {
|
||||
fn default() -> Self {
|
||||
let ctx = get_default_device_context();
|
||||
Self {
|
||||
ctx,
|
||||
are_inputs_on_device: false,
|
||||
are_outputs_on_device: false,
|
||||
input_is_a_state: false,
|
||||
aligned: false,
|
||||
loop_state: false,
|
||||
is_async: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
In the example above `load_optimized_poseidon_constants::<F>(arity, &ctx).unwrap();` is used which will load the correct constants based on arity and curve. Its possible to [generate](#constants) your own constants and load them.
|
||||
In the example above `Poseidon::load(arity, &ctx).unwrap();` is used which will load the correct constants based on arity and curve. It's possible to [generate](#constants) your own constants and load them.
|
||||
|
||||
```rust
|
||||
let ctx = get_default_device_context();
|
||||
let cargo_manifest_dir = env!("CARGO_MANIFEST_DIR");
|
||||
let constants_file = PathBuf::from(cargo_manifest_dir)
|
||||
.join("tests")
|
||||
.join(format!("{}_constants.bin", field_prefix));
|
||||
let mut constants_buf = vec![];
|
||||
File::open(constants_file)
|
||||
.unwrap()
|
||||
.read_to_end(&mut constants_buf)
|
||||
.unwrap();
|
||||
|
||||
let mut custom_constants = vec![];
|
||||
for chunk in constants_buf.chunks(field_bytes) {
|
||||
custom_constants.push(F::from_bytes_le(chunk));
|
||||
}
|
||||
|
||||
let custom_constants = create_optimized_poseidon_constants::<F>(
|
||||
arity as u32,
|
||||
&ctx,
|
||||
full_rounds_half,
|
||||
partial_rounds,
|
||||
&mut custom_constants,
|
||||
)
|
||||
.unwrap();
|
||||
let custom_poseidon = Poseidon::new(
|
||||
arity, // The arity of poseidon hash. The width will be equal to arity + 1
|
||||
alpha, // The S-box power
|
||||
full_rounds_half,
|
||||
partial_rounds,
|
||||
round_constants,
|
||||
mds_matrix,
|
||||
non_sparse_matrix,
|
||||
sparse_matrices,
|
||||
domain_tag,
|
||||
ctx,
|
||||
)
|
||||
.unwrap();
|
||||
```
|
||||
|
||||
## The Tree Builder
|
||||
@@ -175,21 +161,34 @@ The tree builder allows you to build Merkle trees using Poseidon.
|
||||
You can define both the tree's `height` and its `arity`. The tree `height` determines the number of layers in the tree, including the root and the leaf layer. The `arity` determines how many children each internal node can have.
|
||||
|
||||
```rust
|
||||
let height = 20;
|
||||
let arity = 2;
|
||||
let leaves = vec![F::one(); 1 << (height - 1)];
|
||||
let mut digests = vec![F::zero(); merkle_tree_digests_len(height, arity)];
|
||||
|
||||
let mut leaves_slice = HostOrDeviceSlice::on_host(leaves);
|
||||
|
||||
let ctx = get_default_device_context();
|
||||
let constants = load_optimized_poseidon_constants::<F>(arity, &ctx).unwrap()
|
||||
use icicle_bn254::tree::Bn254TreeBuilder;
|
||||
use icicle_bn254::poseidon::Poseidon;
|
||||
|
||||
let mut config = TreeBuilderConfig::default();
|
||||
config.keep_rows = 1;
|
||||
build_poseidon_merkle_tree::<F>(&mut leaves_slice, &mut digests, height, arity, &constants, &config).unwrap();
|
||||
let arity = 2;
|
||||
config.arity = arity as u32;
|
||||
let input_block_len = arity;
|
||||
let leaves = vec![F::one(); (1 << height) * arity];
|
||||
let mut digests = vec![F::zero(); merkle_tree_digests_len((height + 1) as u32, arity as u32, 1)];
|
||||
|
||||
println!("Root: {:?}", digests[0..1][0]);
|
||||
let leaves_slice = HostSlice::from_slice(&leaves);
|
||||
let digests_slice = HostSlice::from_mut_slice(&mut digests);
|
||||
|
||||
let ctx = device_context::DeviceContext::default();
|
||||
let hash = Poseidon::load(2, &ctx).unwrap();
|
||||
|
||||
let mut config = TreeBuilderConfig::default();
|
||||
config.keep_rows = 5;
|
||||
Bn254TreeBuilder::build_merkle_tree(
|
||||
leaves_slice,
|
||||
digests_slice,
|
||||
height,
|
||||
input_block_len,
|
||||
&hash,
|
||||
&hash,
|
||||
&config,
|
||||
)
|
||||
.unwrap();
|
||||
```
|
||||
|
||||
Similar to Poseidon, you can also configure the Tree Builder `TreeBuilderConfig::default()`
|
||||
|
||||
88
docs/docs/icicle/primitives/poseidon2.md
Normal file
88
docs/docs/icicle/primitives/poseidon2.md
Normal file
@@ -0,0 +1,88 @@
|
||||
# Poseidon2
|
||||
|
||||
[Poseidon2](https://eprint.iacr.org/2023/323) is a recently released optimized version of Poseidon1. The two versions differ in two crucial points. First, Poseidon is a sponge hash function, while Poseidon2 can be either a sponge or a compression function depending on the use case. Secondly, Poseidon2 is instantiated by new and more efficient linear layers with respect to Poseidon. These changes decrease the number of multiplications in the linear layer by up to 90% and the number of constraints in Plonk circuits by up to 70%. This makes Poseidon2 currently the fastest arithmetization-oriented hash function without lookups.
|
||||
|
||||
|
||||
## Using Poseidon2
|
||||
|
||||
ICICLE Poseidon2 is implemented for GPU and parallelization is performed for each state.
|
||||
We calculate multiple hash-sums over multiple pre-images in parallel, rather than going block by block over the input vector.
|
||||
|
||||
For example, for Poseidon2 of width 16, input rate 8, output elements 8 and input of size 1024 * 8, we would expect 1024 * 8 elements of output. Which means each input block would be of size 8, resulting in 1024 Poseidon2 hashes being performed.
|
||||
|
||||
### Supported Bindings
|
||||
|
||||
[`Rust`](https://github.com/ingonyama-zk/icicle/tree/main/wrappers/rust/icicle-core/src/poseidon2)
|
||||
|
||||
### Constants
|
||||
|
||||
Poseidon2 is also extremely customizable and using different constants will produce different hashes, security levels and performance results.
|
||||
|
||||
We support pre-calculated constants for each of the [supported curves](../core#supported-curves-and-operations). The constants can be found [here](https://github.com/ingonyama-zk/icicle/tree/main/icicle/include/poseidon2/constants) and are labeled clearly per curve `<curve_name>_poseidon2.h`.
|
||||
|
||||
You can also use your own set of constants as shown [here](https://github.com/ingonyama-zk/icicle/blob/main/wrappers/rust/icicle-fields/icicle-babybear/src/poseidon2/mod.rs#L290)
|
||||
|
||||
### Rust API
|
||||
|
||||
This is the most basic way to use the Poseidon2 API.
|
||||
|
||||
```rust
|
||||
let test_size = 1 << 10;
|
||||
let width = 16;
|
||||
let rate = 8;
|
||||
let ctx = get_default_device_context();
|
||||
let poseidon = Poseidon2::load(width, rate, MdsType::Default, DiffusionStrategy::Default, &ctx).unwrap();
|
||||
let config = HashConfig::default();
|
||||
|
||||
let inputs = vec![F::one(); test_size * rate as usize];
|
||||
let outputs = vec![F::zero(); test_size];
|
||||
let mut input_slice = HostOrDeviceSlice::on_host(inputs);
|
||||
let mut output_slice = HostOrDeviceSlice::on_host(outputs);
|
||||
|
||||
poseidon.hash_many::<F>(
|
||||
&mut input_slice,
|
||||
&mut output_slice,
|
||||
test_size as u32,
|
||||
rate as u32,
|
||||
8, // Output length
|
||||
&config,
|
||||
)
|
||||
.unwrap();
|
||||
```
|
||||
|
||||
In the example above `Poseidon2::load(width, rate, MdsType::Default, DiffusionStrategy::Default, &ctx).unwrap();` is used to load the correct constants based on width and curve. Here, the default MDS matrices and diffusion are used. If you want to get a Plonky3 compliant version, set them to `MdsType::Plonky` and `DiffusionStrategy::Montgomery` respectively.
|
||||
|
||||
## The Tree Builder
|
||||
|
||||
Similar to Poseidon1, you can use Poseidon2 in a tree builder.
|
||||
|
||||
```rust
|
||||
use icicle_bn254::tree::Bn254TreeBuilder;
|
||||
use icicle_bn254::poseidon2::Poseidon2;
|
||||
|
||||
let mut config = TreeBuilderConfig::default();
|
||||
let arity = 2;
|
||||
config.arity = arity as u32;
|
||||
let input_block_len = arity;
|
||||
let leaves = vec![F::one(); (1 << height) * arity];
|
||||
let mut digests = vec![F::zero(); merkle_tree_digests_len((height + 1) as u32, arity as u32, 1)];
|
||||
|
||||
let leaves_slice = HostSlice::from_slice(&leaves);
|
||||
let digests_slice = HostSlice::from_mut_slice(&mut digests);
|
||||
|
||||
let ctx = device_context::DeviceContext::default();
|
||||
let hash = Poseidon2::load(arity, arity, MdsType::Default, DiffusionStrategy::Default, &ctx).unwrap();
|
||||
|
||||
let mut config = TreeBuilderConfig::default();
|
||||
config.keep_rows = 5;
|
||||
Bn254TreeBuilder::build_merkle_tree(
|
||||
leaves_slice,
|
||||
digests_slice,
|
||||
height,
|
||||
input_block_len,
|
||||
&hash,
|
||||
&hash,
|
||||
&config,
|
||||
)
|
||||
.unwrap();
|
||||
```
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
```rust
|
||||
use icicle_cuda_runtime::memory::{DeviceVec, HostSlice};
|
||||
use icicle_hash::keccak::{keccak256, KeccakConfig};
|
||||
use icicle_hash::keccak::{keccak256, HashConfig};
|
||||
use rand::{self, Rng};
|
||||
|
||||
fn main() {
|
||||
@@ -14,7 +14,7 @@ fn main() {
|
||||
let input = HostSlice::<u8>::from_slice(initial_data.as_slice());
|
||||
let mut output = DeviceVec::<u8>::cuda_malloc(32).unwrap();
|
||||
|
||||
let mut config = KeccakConfig::default();
|
||||
let mut config = HashConfig::default();
|
||||
keccak256(input, initial_data.len() as i32, 1, &mut output[..], &mut config).expect("Failed to execute keccak256 hashing");
|
||||
|
||||
let mut output_host = vec![0_u8; 32];
|
||||
@@ -32,7 +32,7 @@ pub fn keccak256(
|
||||
input_block_size: i32,
|
||||
number_of_blocks: i32,
|
||||
output: &mut (impl HostOrDeviceSlice<u8> + ?Sized),
|
||||
config: &mut KeccakConfig,
|
||||
config: &mut HashConfig,
|
||||
) -> IcicleResult<()>
|
||||
|
||||
pub fn keccak512(
|
||||
@@ -40,7 +40,7 @@ pub fn keccak512(
|
||||
input_block_size: i32,
|
||||
number_of_blocks: i32,
|
||||
output: &mut (impl HostOrDeviceSlice<u8> + ?Sized),
|
||||
config: &mut KeccakConfig,
|
||||
config: &mut HashConfig,
|
||||
) -> IcicleResult<()>
|
||||
```
|
||||
|
||||
@@ -50,18 +50,18 @@ pub fn keccak512(
|
||||
- **`input_block_size`**: An integer specifying the size of the input data for a single hash.
|
||||
- **`number_of_blocks`**: An integer specifying the number of results in the hash batch.
|
||||
- **`output`**: A slice where the resulting hash will be stored. This slice can be in host or device memory.
|
||||
- **`config`**: A pointer to a `KeccakConfig` object, which contains various configuration options for the Keccak256 operation.
|
||||
- **`config`**: A pointer to a `HashConfig` object, which contains various configuration options for the Keccak256 operation.
|
||||
|
||||
### Return Value
|
||||
|
||||
- **`IcicleResult`**: Returns a CUDA error code indicating the success or failure of the Keccak256/Keccak512 operation.
|
||||
|
||||
## KeccakConfig
|
||||
## HashConfig
|
||||
|
||||
The `KeccakConfig` structure holds configuration parameters for the Keccak256/Keccak512 operation, allowing customization of its behavior to optimize performance based on the specifics of the operation or the underlying hardware.
|
||||
The `HashConfig` structure holds configuration parameters for the Keccak256/Keccak512 operation, allowing customization of its behavior to optimize performance based on the specifics of the operation or the underlying hardware.
|
||||
|
||||
```rust
|
||||
pub struct KeccakConfig<'a> {
|
||||
pub struct HashConfig<'a> {
|
||||
pub ctx: DeviceContext<'a>,
|
||||
pub are_inputs_on_device: bool,
|
||||
pub are_outputs_on_device: bool,
|
||||
@@ -81,7 +81,7 @@ pub struct KeccakConfig<'a> {
|
||||
Example initialization with default settings:
|
||||
|
||||
```rust
|
||||
let default_config = KeccakConfig::default();
|
||||
let default_config = HashConfig::default();
|
||||
```
|
||||
|
||||
Customizing the configuration:
|
||||
|
||||
@@ -2,26 +2,24 @@
|
||||
|
||||
To understand the theory behind MSM pre computation technique refer to Niall Emmart's [talk](https://youtu.be/KAWlySN7Hm8?feature=shared&t=1734).
|
||||
|
||||
## `precompute_bases`
|
||||
## `precompute_points`
|
||||
|
||||
Precomputes bases for the multi-scalar multiplication (MSM) by extending each base point with its multiples, facilitating more efficient MSM calculations.
|
||||
|
||||
```rust
|
||||
pub fn precompute_bases<C: Curve + MSM<C>>(
|
||||
points: &HostOrDeviceSlice<Affine<C>>,
|
||||
precompute_factor: i32,
|
||||
_c: i32,
|
||||
ctx: &DeviceContext,
|
||||
output_bases: &mut HostOrDeviceSlice<Affine<C>>,
|
||||
pub fn precompute_points<C: Curve + MSM<C>>(
|
||||
points: &(impl HostOrDeviceSlice<Affine<C>> + ?Sized),
|
||||
msm_size: i32,
|
||||
cfg: &MSMConfig,
|
||||
output_bases: &mut DeviceSlice<Affine<C>>,
|
||||
) -> IcicleResult<()>
|
||||
```
|
||||
|
||||
### Parameters
|
||||
|
||||
- **`points`**: The original set of affine points (\(P_1, P_2, ..., P_n\)) to be used in the MSM. For batch MSM operations, this should include all unique points concatenated together.
|
||||
- **`precompute_factor`**: Specifies the total number of points to precompute for each base, including the base point itself. This parameter directly influences the memory requirements and the potential speedup of the MSM operation.
|
||||
- **`_c`**: Currently unused. Intended for future use to align with the `c` parameter in `MSMConfig`, ensuring the precomputation is compatible with the bucket method's window size used in MSM.
|
||||
- **`ctx`**: The device context specifying the device ID and stream for execution. This context determines where the precomputation is performed (e.g., on a specific GPU).
|
||||
- **`msm_size`**: The size of a single msm in order to determine optimal parameters.
|
||||
- **`cfg`**: The MSM configuration parameters.
|
||||
- **`output_bases`**: The output buffer for the extended bases. Its size must be `points.len() * precompute_factor`. This buffer should be allocated on the device for GPU computations.
|
||||
|
||||
#### Returns
|
||||
@@ -37,22 +35,11 @@ The precomputation process is crucial for optimizing MSM operations, especially
|
||||
#### Example Usage
|
||||
|
||||
```rust
|
||||
let device_context = DeviceContext::default_for_device(0); // Use the default device
|
||||
let cfg = MSMConfig::default();
|
||||
let precompute_factor = 4; // Number of points to precompute
|
||||
let mut extended_bases = HostOrDeviceSlice::cuda_malloc(expected_size).expect("Failed to allocate memory for extended bases");
|
||||
|
||||
// Precompute the bases using the specified factor
|
||||
precompute_bases(&points, precompute_factor, 0, &device_context, &mut extended_bases)
|
||||
precompute_points(&points, msm_size, &cfg, &mut extended_bases)
|
||||
.expect("Failed to precompute bases");
|
||||
```
|
||||
|
||||
### Benchmarks
|
||||
|
||||
Benchmarks where performed on a Nvidia RTX 3090Ti.
|
||||
|
||||
| Pre-computation factor | bn254 size `2^20` MSM, ms. | bn254 size `2^12` MSM, size `2^10` batch, ms. | bls12-381 size `2^20` MSM, ms. | bls12-381 size `2^12` MSM, size `2^10` batch, ms. |
|
||||
| ------------- | ------------- | ------------- | ------------- | ------------- |
|
||||
| 1 | 14.1 | 82.8 | 25.5 | 136.7 |
|
||||
| 2 | 11.8 | 76.6 | 20.3 | 123.8 |
|
||||
| 4 | 10.9 | 73.8 | 18.1 | 117.8 |
|
||||
| 8 | 10.6 | 73.7 | 17.2 | 116.0 |
|
||||
|
||||
@@ -100,7 +100,7 @@ When performing MSM operations, it's crucial to match the size of the `scalars`
|
||||
|
||||
## How do I toggle between the supported algorithms?
|
||||
|
||||
When creating your MSM Config you may state which algorithm you wish to use. `is_big_triangle=true` will activate Large triangle accumulation and `is_big_triangle=false` will activate Bucket accumulation.
|
||||
When creating your MSM Config you may state which algorithm you wish to use. `is_big_triangle=true` will activate Large triangle reduction and `is_big_triangle=false` will activate iterative reduction.
|
||||
|
||||
```rust
|
||||
...
|
||||
@@ -144,6 +144,10 @@ msm::msm(&scalars, &points, &cfg, &mut msm_results).unwrap();
|
||||
|
||||
Here is a [reference](https://github.com/ingonyama-zk/icicle/blob/77a7613aa21961030e4e12bf1c9a78a2dadb2518/wrappers/rust/icicle-core/src/msm/mod.rs#L108) to the code which automatically sets the batch size. For more MSM examples have a look [here](https://github.com/ingonyama-zk/icicle/blob/77a7613aa21961030e4e12bf1c9a78a2dadb2518/examples/rust/msm/src/main.rs#L1).
|
||||
|
||||
## Parameters for optimal performance
|
||||
|
||||
Please refer to the [primitive description](../primitives/msm#choosing-optimal-parameters)
|
||||
|
||||
## Support for G2 group
|
||||
|
||||
MSM also supports G2 group.
|
||||
|
||||
@@ -166,6 +166,14 @@ const config = {
|
||||
additionalLanguages: ['rust', 'go'],
|
||||
},
|
||||
image: 'img/logo.png',
|
||||
announcementBar: {
|
||||
id: 'announcement', // Any value that will identify this message.
|
||||
content:
|
||||
'<strong>🎉 Read our paper on the Polynomials API in ICICLE v2 by clicking <a target="_blank" rel="noopener noreferrer" href="https://eprint.iacr.org/2024/973">here</a>! 🎉</strong>',
|
||||
backgroundColor: '#ADD8E6', // Light blue background color.
|
||||
textColor: '#000000', // Black text color.
|
||||
isCloseable: true, // Defaults to `true`.
|
||||
},
|
||||
}),
|
||||
};
|
||||
|
||||
|
||||
19
docs/package-lock.json
generated
19
docs/package-lock.json
generated
@@ -3680,6 +3680,8 @@
|
||||
"version": "8.12.0",
|
||||
"resolved": "https://registry.npmjs.org/ajv/-/ajv-8.12.0.tgz",
|
||||
"integrity": "sha512-sRu1kpcO9yLtYxBKvqfTeh9KzZEwO3STyX1HT+4CaDzC6HpTGYhIhPIzj9XuKU7KYDwnaeh5hcOwjy1QuJzBPA==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"fast-deep-equal": "^3.1.1",
|
||||
"json-schema-traverse": "^1.0.0",
|
||||
@@ -3694,7 +3696,9 @@
|
||||
"node_modules/ajv-formats/node_modules/json-schema-traverse": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz",
|
||||
"integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="
|
||||
"integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==",
|
||||
"optional": true,
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/ajv-keywords": {
|
||||
"version": "3.5.2",
|
||||
@@ -16340,14 +16344,13 @@
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-2.1.1.tgz",
|
||||
"integrity": "sha512-Wx0Kx52hxE7C18hkMEggYlEifqWZtYaRgouJor+WMdPnQyEK13vgEWyVNup7SoeeoLMsr4kf5h6dOW11I15MUA==",
|
||||
"requires": {
|
||||
"ajv": "^8.0.0"
|
||||
},
|
||||
"requires": {},
|
||||
"dependencies": {
|
||||
"ajv": {
|
||||
"version": "8.12.0",
|
||||
"resolved": "https://registry.npmjs.org/ajv/-/ajv-8.12.0.tgz",
|
||||
"version": "https://registry.npmjs.org/ajv/-/ajv-8.12.0.tgz",
|
||||
"integrity": "sha512-sRu1kpcO9yLtYxBKvqfTeh9KzZEwO3STyX1HT+4CaDzC6HpTGYhIhPIzj9XuKU7KYDwnaeh5hcOwjy1QuJzBPA==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"requires": {
|
||||
"fast-deep-equal": "^3.1.1",
|
||||
"json-schema-traverse": "^1.0.0",
|
||||
@@ -16358,7 +16361,9 @@
|
||||
"json-schema-traverse": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz",
|
||||
"integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="
|
||||
"integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==",
|
||||
"optional": true,
|
||||
"peer": true
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
@@ -53,6 +53,11 @@ module.exports = {
|
||||
label: "Poseidon Hash",
|
||||
id: "icicle/primitives/poseidon",
|
||||
},
|
||||
{
|
||||
type: "doc",
|
||||
label: "Poseidon2 Hash",
|
||||
id: "icicle/primitives/poseidon2",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
# Icicle example: Muli-Scalar Multiplication (MSM)
|
||||
|
||||
## Best-Practices
|
||||
|
||||
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
`Icicle` provides CUDA C++ template function `MSM` to accelerate [Multi-Scalar Multiplication](https://github.com/ingonyama-zk/ingopedia/blob/master/src/msm.md).
|
||||
|
||||
@@ -6,6 +6,9 @@
|
||||
#include "api/bn254.h"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "hash/hash.cuh"
|
||||
|
||||
using namespace poseidon;
|
||||
using namespace bn254;
|
||||
|
||||
@@ -20,31 +23,20 @@ void checkCudaError(cudaError_t error)
|
||||
// these global constants go into template calls
|
||||
const int size_col = 11;
|
||||
|
||||
// this function executes the Poseidon thread
|
||||
void threadPoseidon(
|
||||
device_context::DeviceContext ctx,
|
||||
unsigned size_partition,
|
||||
scalar_t* layers,
|
||||
scalar_t* column_hashes,
|
||||
PoseidonConstants<scalar_t>* constants)
|
||||
Poseidon<scalar_t> * poseidon)
|
||||
{
|
||||
cudaError_t err_result = CHK_STICKY(cudaSetDevice(ctx.device_id));
|
||||
if (err_result != cudaSuccess) {
|
||||
std::cerr << "CUDA error: " << cudaGetErrorString(err_result) << std::endl;
|
||||
return;
|
||||
}
|
||||
// CHK_IF_RETURN(); I can't use it in a standard thread function
|
||||
PoseidonConfig column_config = {
|
||||
ctx, // ctx
|
||||
false, // are_inputes_on_device
|
||||
false, // are_outputs_on_device
|
||||
false, // input_is_a_state
|
||||
false, // aligned
|
||||
false, // loop_state
|
||||
false, // is_async
|
||||
};
|
||||
cudaError_t err =
|
||||
bn254_poseidon_hash_cuda(layers, column_hashes, (size_t)size_partition, size_col, *constants, column_config);
|
||||
HashConfig column_config = default_hash_config(ctx);
|
||||
cudaError_t err = poseidon->hash_many(layers, column_hashes, (size_t) size_partition, size_col, 1, column_config);
|
||||
checkCudaError(err);
|
||||
}
|
||||
|
||||
@@ -59,6 +51,11 @@ using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::p
|
||||
exit(EXIT_FAILURE); \
|
||||
}
|
||||
|
||||
#define CHECK_ALLOC(ptr) if ((ptr) == nullptr) { \
|
||||
std::cerr << "Memory allocation for '" #ptr "' failed." << std::endl; \
|
||||
exit(EXIT_FAILURE); \
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
const unsigned size_row = (1 << 30);
|
||||
@@ -116,19 +113,18 @@ int main()
|
||||
scalar_t* column_hash1 = static_cast<scalar_t*>(malloc(size_partition * sizeof(scalar_t)));
|
||||
CHECK_ALLOC(column_hash1);
|
||||
|
||||
PoseidonConstants<scalar_t> column_constants0, column_constants1;
|
||||
bn254_init_optimized_poseidon_constants_cuda(size_col, ctx0, &column_constants0);
|
||||
cudaError_t err_result = CHK_STICKY(cudaSetDevice(ctx1.device_id));
|
||||
if (err_result != cudaSuccess) {
|
||||
std::cerr << "CUDA error: " << cudaGetErrorString(err_result) << std::endl;
|
||||
return;
|
||||
}
|
||||
bn254_init_optimized_poseidon_constants_cuda(size_col, ctx1, &column_constants1);
|
||||
Poseidon<scalar_t> column_poseidon0(size_col, ctx0);
|
||||
cudaError_t err_result = CHK_STICKY(cudaSetDevice(ctx1.device_id));
|
||||
if (err_result != cudaSuccess) {
|
||||
std::cerr << "CUDA error: " << cudaGetErrorString(err_result) << std::endl;
|
||||
return;
|
||||
}
|
||||
Poseidon<scalar_t> column_poseidon1(size_col, ctx1);
|
||||
|
||||
std::cout << "Parallel execution of Poseidon threads" << std::endl;
|
||||
START_TIMER(parallel);
|
||||
std::thread thread0(threadPoseidon, ctx0, size_partition, layers0, column_hash0, &column_constants0);
|
||||
std::thread thread1(threadPoseidon, ctx1, size_partition, layers1, column_hash1, &column_constants1);
|
||||
std::thread thread0(threadPoseidon, ctx0, size_partition, layers0, column_hash0, &column_poseidon0);
|
||||
std::thread thread1(threadPoseidon, ctx1, size_partition, layers1, column_hash1, &column_poseidon1);
|
||||
|
||||
// Wait for the threads to finish
|
||||
thread0.join();
|
||||
@@ -141,9 +137,9 @@ int main()
|
||||
|
||||
std::cout << "Sequential execution of Poseidon threads" << std::endl;
|
||||
START_TIMER(sequential);
|
||||
std::thread thread2(threadPoseidon, ctx0, size_partition, layers0, column_hash0, &column_constants0);
|
||||
std::thread thread2(threadPoseidon, ctx0, size_partition, layers0, column_hash0, &column_poseidon0);
|
||||
thread2.join();
|
||||
std::thread thread3(threadPoseidon, ctx0, size_partition, layers1, column_hash1, &column_constants0);
|
||||
std::thread thread3(threadPoseidon, ctx0, size_partition, layers1, column_hash1, &column_poseidon0);
|
||||
thread3.join();
|
||||
END_TIMER(sequential, "1 GPU");
|
||||
std::cout << "Output Data from Thread 2: ";
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
# Icicle example: Multiplication
|
||||
|
||||
## Best-Practices
|
||||
|
||||
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
`Icicle` accelerates multiplication operation `*` using [Karatsuba algorithm](https://en.wikipedia.org/wiki/Karatsuba_algorithm)
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
# Icicle example: Number-Theoretical Transform (NTT)
|
||||
|
||||
## Best-Practices
|
||||
|
||||
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
`Icicle` provides CUDA C++ template function NTT for [Number Theoretical Transform](https://github.com/ingonyama-zk/ingopedia/blob/master/src/fft.md), also known as Discrete Fourier Transform.
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
# ICICLE example: Pedersen Commitment
|
||||
|
||||
## Best-Practices
|
||||
|
||||
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
A Pedersen Commitment is a cryptographic primitive to commit to a value or a vector of values while keeping it hidden, yet enabling the committer to reveal the value later. It provides both hiding (the commitment does not reveal any information about the value) and binding properties (once a value is committed, it cannot be changed without detection).
|
||||
|
||||
@@ -23,5 +23,8 @@ set_target_properties(example PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
target_include_directories(example PRIVATE "../../../icicle/include")
|
||||
|
||||
# can link to another curve/field by changing the following lib and FIELD_ID
|
||||
target_link_libraries(example ${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_field_bn254.a)
|
||||
target_link_libraries(example
|
||||
${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_curve_bn254.a
|
||||
${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_field_bn254.a
|
||||
)
|
||||
target_compile_definitions(example PUBLIC FIELD_ID BN254)
|
||||
@@ -1,9 +1,5 @@
|
||||
# ICICLE examples: computations with polynomials
|
||||
|
||||
## Best-Practices
|
||||
|
||||
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
Polynomials are crucial for Zero-Knowledge Proofs (ZKPs): they enable efficient representation and verification of computational statements, facilitate privacy-preserving protocols, and support complex mathematical operations essential for constructing and verifying proofs without revealing underlying data. Polynomial API is documented [here](https://dev.ingonyama.com/icicle/polynomials/overview)
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
#include <iostream>
|
||||
|
||||
#include <cassert>
|
||||
#include "polynomials/polynomials.h"
|
||||
#include "polynomials/cuda_backend/polynomial_cuda_backend.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
// using namespace field_config;
|
||||
#include "api/bn254.h"
|
||||
#include <chrono>
|
||||
|
||||
using namespace polynomials;
|
||||
using namespace merkle;
|
||||
using namespace bn254;
|
||||
|
||||
// define the polynomial type
|
||||
typedef Polynomial<scalar_t> Polynomial_t;
|
||||
@@ -21,6 +22,27 @@ const auto four = scalar_t::from(4);
|
||||
const auto five = scalar_t::from(5);
|
||||
const auto minus_one = zero - one;
|
||||
|
||||
static std::unique_ptr<scalar_t[]> generate_pows(scalar_t tau, uint32_t size){
|
||||
auto vec = std::make_unique<scalar_t[]>(size);
|
||||
vec[0] = scalar_t::one();
|
||||
for (size_t i = 1; i < size; ++i) {
|
||||
vec[i] = vec[i-1] * tau;
|
||||
}
|
||||
return std::move(vec);
|
||||
}
|
||||
|
||||
static std::unique_ptr<affine_t[]> generate_SRS(uint32_t size) {
|
||||
auto secret_scalar = scalar_t::rand_host();
|
||||
auto gen = projective_t::generator();
|
||||
auto pows_of_tau = generate_pows(secret_scalar,size);
|
||||
auto SRS = std::make_unique<affine_t[]>(size);
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
SRS[i] = projective_t::to_affine(pows_of_tau[i] * gen);
|
||||
}
|
||||
return std::move(SRS);
|
||||
}
|
||||
|
||||
|
||||
void example_evaluate()
|
||||
{
|
||||
std::cout << std::endl << "Example: Polynomial evaluation on random value" << std::endl;
|
||||
@@ -298,6 +320,102 @@ void example_device_memory_view()
|
||||
ntt::ntt(d_coeffs.get(), size, ntt::NTTDir::kForward, ntt_config, coset_evals.get());
|
||||
}
|
||||
|
||||
|
||||
void example_commit_with_device_memory_view()
|
||||
{
|
||||
//declare time vars
|
||||
std::chrono::time_point<std::chrono::high_resolution_clock> start, end;
|
||||
std::chrono::milliseconds duration;
|
||||
|
||||
std::cout << std::endl << "Example: a) commit with Polynomial views [(f1+f2)^2 + (f1-f2)^2 ]_1 = [4 (f1^2+ f_2^2)]_1" << std::endl;
|
||||
std::cout<< "Example: b) commit with Polynomial views [(f1+f2)^2 - (f1-f2)^2 ]_1 = [4 f1 *f_2]_1" << std::endl;
|
||||
int N = 1025;
|
||||
|
||||
//generate group elements string of length N: (1, beta,beta^2....,beta^{N-1}). g
|
||||
std::cout << "Setup: Generating mock SRS" << std::endl;
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
auto SRS = generate_SRS(2*N);
|
||||
//Allocate memory on device (points)
|
||||
affine_t* points_d;
|
||||
cudaMalloc(&points_d, sizeof(affine_t)* 2 * N);
|
||||
// copy SRS to device (could have generated on device, but gives an indicator)
|
||||
cudaMemcpy(points_d, SRS.get(), sizeof(affine_t)* 2 * N, cudaMemcpyHostToDevice);
|
||||
end = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
|
||||
std::cout << "Setup: SRS of length "<< N << " generated and loaded to device. Took: " << duration.count() << " milliseconds" << std::endl;
|
||||
|
||||
//goal:
|
||||
//test commitment equality [(f1+f2)^2 + (f1-f2)^2 ]_1 = [4 (f1^2+ f_2^2)]_1
|
||||
//test commitment equality [(f1+f2)^2 - (f1-f2)^2 ]_1 = [4 f1 *f_2]_1
|
||||
//note: using polyapi to gen scalars: already on device.
|
||||
std::cout << "Setup: Generating polys (on device) f1,f2 of log degree " << log2(N-1) << std::endl;
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
auto f1 = randomize_polynomial(N);
|
||||
auto f2 = randomize_polynomial(N);
|
||||
end = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
|
||||
std::cout << "Setup: Gen poly done. Took: " << duration.count() << " milliseconds" << std::endl;
|
||||
|
||||
//deg 2N constraints (f1+f2)^2 + (f1-f2)^2 = 2 (f1^2+ f_2^2)
|
||||
std::cout << "Computing constraints..start "<< std::endl;
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
auto L1 = (f1+f2)*(f1+f2) + (f1-f2)*(f1-f2);
|
||||
auto R1 = scalar_t::from(2) * (f1*f1 + f2*f2);
|
||||
//deg 2N constraints (f1+f2)^2 - (f1-f2)^2 = 4 f1 *f_2
|
||||
auto L2 = (f1+f2)*(f1+f2) - (f1-f2)*(f1-f2);
|
||||
auto R2 = scalar_t::from(4) * f1 * f2;
|
||||
end = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
|
||||
std::cout << "Computing constraints..done. Took: " << duration.count() << " milliseconds"<< std::endl;
|
||||
|
||||
// extract coeff using coeff view
|
||||
auto [viewL1, sizeL1, device_idL1] = L1.get_coefficients_view();
|
||||
auto [viewL2, sizeL2, device_idL2] = L2.get_coefficients_view();
|
||||
auto [viewR1, sizeR1, device_idR1] = R1.get_coefficients_view();
|
||||
auto [viewR2, sizeR2, device_idR2] = R2.get_coefficients_view();
|
||||
|
||||
std::cout << "Computing Commitments with poly view"<< std::endl;
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
msm::MSMConfig config = msm::default_msm_config();
|
||||
config.are_points_on_device = true;
|
||||
config.are_scalars_on_device = true;
|
||||
|
||||
//host vars (for result)
|
||||
projective_t hL1{}, hL2{}, hR1{}, hR2{};
|
||||
|
||||
//straightforward msm bn254 api: no batching
|
||||
bn254_msm_cuda(viewL1.get(),points_d,N,config,&hL1);
|
||||
bn254_msm_cuda(viewL2.get(),points_d,N,config,&hL2);
|
||||
bn254_msm_cuda(viewR1.get(),points_d,N,config,&hR1);
|
||||
bn254_msm_cuda(viewR2.get(),points_d,N,config,&hR2);
|
||||
|
||||
end = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
|
||||
std::cout << "Commitments done. Took: " << duration.count() << " milliseconds"<< std::endl;
|
||||
|
||||
//sanity checks
|
||||
auto affL1 = projective_t::to_affine(hL1);
|
||||
auto affR1 = projective_t::to_affine(hR1);
|
||||
|
||||
auto affL2 = projective_t::to_affine(hL2);
|
||||
auto affR2 = projective_t::to_affine(hR2);
|
||||
|
||||
//test commitment equality [(f1+f2)^2 + (f1-f2)^2]_1 = [4 (f_1^2+f_2^2]_1
|
||||
assert(affL1.x==affR1.x && affL1.y==affR1.y);
|
||||
std::cout << "commitment [(f1+f2)^2 + (f1-f2)^2]_1:" << std::endl;
|
||||
std::cout << "[x: " << affL1.x << ", y: " << affL1.y << "]" << std::endl;
|
||||
std::cout << "commitment [[2 (f_1^2+f_2^2]_1:" <<std::endl;
|
||||
std::cout << "[x: " << affR1.x << ", y: " << affR1.y << "]" << std::endl;
|
||||
|
||||
assert(affL2.x==affR2.x && affL2.y==affR2.y);
|
||||
std::cout << "commitment [(f1+f2)^2 - (f1-f2)^2]_1:"<< std::endl;
|
||||
std::cout << "[x: " << affL2.x << ", y: " << affL2.y << "]" << std::endl;
|
||||
std::cout << "commitment [4 f_1*f_2]_1:"<<std::endl;
|
||||
std::cout << "[x: " << affR2.x << ", y: " << affR2.y << "]" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
// Initialize NTT. TODO: can we hide this in the library?
|
||||
@@ -324,6 +442,7 @@ int main(int argc, char** argv)
|
||||
example_even_odd();
|
||||
example_slice();
|
||||
example_device_memory_view();
|
||||
example_commit_with_device_memory_view();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,9 +1,5 @@
|
||||
# Icicle example: build a Merkle tree using Poseidon hash
|
||||
|
||||
## Best-Practices
|
||||
|
||||
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
`Icicle` provides CUDA C++ template `poseidon_hash` to accelerate the popular [Poseidon hash function](https://www.poseidon-hash.info/).
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
|
||||
#include "api/bn254.h"
|
||||
#include "curves/params/bn254.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "hash/hash.cuh"
|
||||
using namespace poseidon;
|
||||
using namespace bn254;
|
||||
|
||||
@@ -14,13 +16,12 @@ inline uint32_t tree_index(uint32_t level, uint32_t offset) { return (1 << level
|
||||
|
||||
// We assume the tree has leaves already set, compute all other levels
|
||||
void build_tree(
|
||||
const uint32_t tree_height, scalar_t* tree, PoseidonConstants<scalar_t>* constants, PoseidonConfig config)
|
||||
const uint32_t tree_height, scalar_t* tree, Poseidon<scalar_t> &poseidon, HashConfig &config)
|
||||
{
|
||||
for (uint32_t level = tree_height - 1; level > 0; level--) {
|
||||
const uint32_t next_level = level - 1;
|
||||
const uint32_t next_level_width = 1 << next_level;
|
||||
bn254_poseidon_hash_cuda(
|
||||
&tree[tree_index(level, 0)], &tree[tree_index(next_level, 0)], next_level_width, 2, *constants, config);
|
||||
poseidon.hash_many(&tree[tree_index(level, 0)], &tree[tree_index(next_level, 0)], next_level_width, 2, 1, config);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,8 +66,8 @@ uint32_t validate_proof(
|
||||
const uint32_t tree_height,
|
||||
const uint32_t* proof_lr,
|
||||
const scalar_t* proof_hash,
|
||||
PoseidonConstants<scalar_t>* constants,
|
||||
PoseidonConfig config)
|
||||
Poseidon<scalar_t> &poseidon,
|
||||
HashConfig &config)
|
||||
{
|
||||
scalar_t hashes_in[2], hash_out[1], level_hash;
|
||||
level_hash = hash;
|
||||
@@ -79,7 +80,7 @@ uint32_t validate_proof(
|
||||
hashes_in[1] = level_hash;
|
||||
}
|
||||
// next level hash
|
||||
bn254_poseidon_hash_cuda(hashes_in, hash_out, 1, 2, *constants, config);
|
||||
poseidon.hash_many(hashes_in, hash_out, 1, 2, 1, config);
|
||||
level_hash = hash_out[0];
|
||||
}
|
||||
return proof_hash[0] == level_hash;
|
||||
@@ -109,16 +110,15 @@ int main(int argc, char* argv[])
|
||||
d = d + scalar_t::one();
|
||||
}
|
||||
std::cout << "Hashing blocks into tree leaves..." << std::endl;
|
||||
PoseidonConstants<scalar_t> constants;
|
||||
bn254_init_optimized_poseidon_constants_cuda(data_arity, ctx, &constants);
|
||||
PoseidonConfig config = default_poseidon_config(data_arity + 1);
|
||||
bn254_poseidon_hash_cuda(data, &tree[tree_index(leaf_level, 0)], tree_width, 4, constants, config);
|
||||
|
||||
Poseidon<scalar_t> poseidon(data_arity, ctx);
|
||||
HashConfig config = default_hash_config(ctx);
|
||||
poseidon.hash_many(data, &tree[tree_index(leaf_level, 0)], tree_width, data_arity, 1, config);
|
||||
|
||||
std::cout << "3. Building Merkle tree" << std::endl;
|
||||
PoseidonConstants<scalar_t> tree_constants;
|
||||
bn254_init_optimized_poseidon_constants_cuda(tree_arity, ctx, &tree_constants);
|
||||
PoseidonConfig tree_config = default_poseidon_config(tree_arity + 1);
|
||||
build_tree(tree_height, tree, &tree_constants, tree_config);
|
||||
Poseidon<scalar_t> tree_poseidon(tree_arity, ctx);
|
||||
HashConfig tree_config = default_hash_config(ctx);
|
||||
build_tree(tree_height, tree, tree_poseidon, tree_config);
|
||||
|
||||
std::cout << "4. Generate membership proof" << std::endl;
|
||||
uint32_t position = tree_width - 1;
|
||||
@@ -133,13 +133,13 @@ int main(int argc, char* argv[])
|
||||
std::cout << "5. Validate the hash membership" << std::endl;
|
||||
uint32_t validated;
|
||||
const scalar_t hash = tree[tree_index(leaf_level, query_position)];
|
||||
validated = validate_proof(hash, tree_height, proof_lr, proof_hash, &tree_constants, tree_config);
|
||||
validated = validate_proof(hash, tree_height, proof_lr, proof_hash, tree_poseidon, tree_config);
|
||||
std::cout << "Validated: " << validated << std::endl;
|
||||
|
||||
std::cout << "6. Tamper the hash" << std::endl;
|
||||
const scalar_t tampered_hash = hash + scalar_t::one();
|
||||
validated = validate_proof(tampered_hash, tree_height, proof_lr, proof_hash, &tree_constants, tree_config);
|
||||
|
||||
validated = validate_proof(tampered_hash, tree_height, proof_lr, proof_hash, tree_poseidon, tree_config);
|
||||
|
||||
std::cout << "7. Invalidate tamper hash membership" << std::endl;
|
||||
std::cout << "Validated: " << validated << std::endl;
|
||||
return 0;
|
||||
|
||||
28
examples/c++/risc0/CMakeLists.txt
Normal file
28
examples/c++/risc0/CMakeLists.txt
Normal file
@@ -0,0 +1,28 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.24.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
|
||||
else()
|
||||
set(CMAKE_CUDA_ARCHITECTURES native) # on 3.24+, on earlier it is ignored, and the target is not passed
|
||||
endif ()
|
||||
project(example LANGUAGES CUDA CXX)
|
||||
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr -DFIELD_ID=1001")
|
||||
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
|
||||
set(CMAKE_CUDA_FLAGS_RELEASE "")
|
||||
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G -O0")
|
||||
|
||||
add_executable(
|
||||
example
|
||||
example.cu
|
||||
)
|
||||
|
||||
set_target_properties(example PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
target_include_directories(example PRIVATE "../../../icicle/include")
|
||||
|
||||
# can link to another curve/field by changing the following lib and FIELD_ID
|
||||
target_link_libraries(example ${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_field_babybear.a)
|
||||
# target_compile_definitions(example PUBLIC FIELD_ID babybear)
|
||||
44
examples/c++/risc0/README.md
Normal file
44
examples/c++/risc0/README.md
Normal file
@@ -0,0 +1,44 @@
|
||||
# ICICLE example: RISC0's Fibonacci sequence proof using Polynomial API
|
||||
|
||||
## Why RISC0?
|
||||
|
||||
[RISC0 Protocol](https://www.risczero.com/) creates computational integrity proofs (a.k.a. Zero Knowledge Proofs) for programs executing on RISC-V architecture.
|
||||
The proofs are created for sequences of values in RISC-V registers, called execution traces.
|
||||
This approach is transparent to developers and enables the use of general purpose languages.
|
||||
|
||||
## Best-Practices
|
||||
|
||||
This example builds on [ICICLE Polynomial API](../polynomial-api/README.md) so we recommend to run it first.
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
RISC0 encodes execution traces into very large polynomials and commits them using Merkle trees.
|
||||
FRI speeds-up validation of such commitments by recursively generating smaller polynomials (and trees) from larger ones.
|
||||
The key enabler for *recursion* is the *redundancy* of polynomial commitments, hence the use of Reed-Solomon codes.
|
||||
|
||||
## Running the example
|
||||
|
||||
To run example, from project root directory:
|
||||
|
||||
```sh
|
||||
cd examples/c++/risc0
|
||||
./compile.sh
|
||||
./run.sh
|
||||
```
|
||||
|
||||
## What's in the example
|
||||
|
||||
The example follows [STARK by Hand](https://dev.risczero.com/proof-system/stark-by-hand), structured in the following Lessons:
|
||||
|
||||
1. The Execution Trace
|
||||
2. Rule checks to validate a computation
|
||||
3. Padding the Trace
|
||||
4. Constructing Trace Polynomials
|
||||
5. ZK Commitments of the Trace Data
|
||||
6. Constraint Polynomials
|
||||
7. Mixing Constraint Polynomials
|
||||
8. The Core of the RISC Zero STARK
|
||||
9. The DEEP Technique
|
||||
10. Mixing (Batching) for FRI
|
||||
11. FRI Protocol (Commit Phase)
|
||||
12. FRI Protocol (Query Phase)
|
||||
15
examples/c++/risc0/compile.sh
Executable file
15
examples/c++/risc0/compile.sh
Executable file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit immediately on error
|
||||
set -e
|
||||
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
# Configure and build Icicle
|
||||
cmake -S ../../../icicle/ -B build/icicle -DCMAKE_BUILD_TYPE=Release -DFIELD=babybear
|
||||
cmake --build build/icicle
|
||||
|
||||
# Configure and build the example application
|
||||
cmake -S . -B build/example
|
||||
cmake --build build/example
|
||||
275
examples/c++/risc0/example.cu
Normal file
275
examples/c++/risc0/example.cu
Normal file
@@ -0,0 +1,275 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
|
||||
#include "polynomials/polynomials.h"
|
||||
#include "polynomials/cuda_backend/polynomial_cuda_backend.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
|
||||
using namespace polynomials;
|
||||
|
||||
// define the polynomial type
|
||||
typedef Polynomial<scalar_t> Polynomial_t;
|
||||
|
||||
// RISC-V register type
|
||||
typedef int64_t rv_t;
|
||||
|
||||
// Convert RISC-V registers to Finite Fields
|
||||
void to_ff(rv_t* rv, scalar_t* s, size_t n) {
|
||||
for (int i = 0; i < n; ++i) {
|
||||
s[i] = scalar_t::from(rv[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void p_print(Polynomial_t * p, int logn, scalar_t shift, std::string header = "Print Vector") {
|
||||
std::cout << header << std::endl;
|
||||
auto n = 1 << logn;
|
||||
auto omega = scalar_t::omega(logn);
|
||||
auto x = shift;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
std::cout << i << ": " << (*p)(x) << std::endl;
|
||||
x = x*omega;
|
||||
}
|
||||
}
|
||||
|
||||
// value to polynomial
|
||||
Polynomial_t p_value(scalar_t value) {
|
||||
auto p_value = Polynomial_t::from_coefficients(&value , 1);
|
||||
return p_value;
|
||||
}
|
||||
|
||||
Polynomial_t p_rotate(Polynomial_t* p, int logn) {
|
||||
// rotate polynomial coefficients right by one position
|
||||
auto n = 1 << logn;
|
||||
auto evaluations_rou_domain = std::make_unique<scalar_t[]>(n);
|
||||
p->evaluate_on_rou_domain(logn, evaluations_rou_domain.get() );
|
||||
scalar_t tmp = evaluations_rou_domain[n-1];
|
||||
for (int i = n-1; i > 0; --i) {
|
||||
evaluations_rou_domain[i] = evaluations_rou_domain[i-1];
|
||||
}
|
||||
evaluations_rou_domain[0] = tmp;
|
||||
return Polynomial_t::from_rou_evaluations(evaluations_rou_domain.get(), n);
|
||||
}
|
||||
|
||||
// mix polynomials (c.f. mix polynomial evaluations)
|
||||
Polynomial_t p_mix(Polynomial_t* in[], size_t nmix, scalar_t mix_parameter) {
|
||||
scalar_t factor = mix_parameter;
|
||||
Polynomial_t out = in[0]->clone();
|
||||
for (int i = 1; i < nmix; ++i) {
|
||||
out += factor * (*in[i]);
|
||||
factor = factor * mix_parameter;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
void solve_linear(scalar_t xa, scalar_t ya, scalar_t xb, scalar_t yb, scalar_t * coeffs) {
|
||||
coeffs[1] = (ya - yb) * scalar_t::inverse(xa - xb);
|
||||
coeffs[0] = ya - coeffs[1] * xa;
|
||||
}
|
||||
|
||||
std::unique_ptr<scalar_t[]> InterpolateOnLargerDomain(Polynomial_t * p, int n, scalar_t shift = scalar_t::one()) {
|
||||
const int deg = p->degree();
|
||||
auto input = std::make_unique<scalar_t[]>(n);
|
||||
// TBD: check if scalar_t constructor initializes to zero
|
||||
for (int i = 0; i < n; ++i) {
|
||||
input[i] = scalar_t::zero();
|
||||
}
|
||||
p->copy_coeffs(input.get(), 0/*start*/, deg);
|
||||
auto ntt_config = ntt::default_ntt_config<scalar_t>();
|
||||
ntt_config.coset_gen = shift;
|
||||
auto evals_h = std::make_unique<scalar_t[]>(n);
|
||||
auto err = ntt::ntt(input.get(), n, ntt::NTTDir::kForward, ntt_config, evals_h.get());
|
||||
return evals_h;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
std::cout << "This is an ICICLE C++ implementation of the STARK by Hand Explainer." << std::endl;
|
||||
std::cout << "https://dev.risczero.com/proof-system/stark-by-hand" << std::endl;
|
||||
|
||||
const int logn=3;
|
||||
const int n = 1 << logn;
|
||||
|
||||
std::cout << "Initializing NTT" << std::endl;
|
||||
static const int MAX_NTT_LOG_SIZE = 24;
|
||||
auto ntt_config = ntt::default_ntt_config<scalar_t>();
|
||||
const scalar_t basic_root = scalar_t::omega(MAX_NTT_LOG_SIZE);
|
||||
ntt::init_domain(basic_root, ntt_config.ctx);
|
||||
std::cout << "Initializing Polynomials" << std::endl;
|
||||
// Virtual factory design pattern: initializing polynomimals factory for CUDA backend
|
||||
Polynomial_t::initialize(std::make_unique<CUDAPolynomialFactory<>>());
|
||||
|
||||
std::cout << std::endl << "Lesson 1: The Execution Trace" << std::endl;
|
||||
// Trace: Data Columns
|
||||
rv_t rv_d1_trace[] = {24, 30, 54, 84, 78, 15, 29, 50};
|
||||
rv_t rv_d2_trace[] = {30, 54, 84, 138, 2, 77, 21, 36};
|
||||
rv_t rv_d3_trace[] = {54, 84, 138, 222, 71, 17, 92, 33};
|
||||
auto d1_trace = std::make_unique<scalar_t[]>(n);
|
||||
auto d2_trace = std::make_unique<scalar_t[]>(n);
|
||||
auto d3_trace = std::make_unique<scalar_t[]>(n);
|
||||
to_ff(rv_d1_trace, d1_trace.get(), n);
|
||||
to_ff(rv_d2_trace, d2_trace.get(), n);
|
||||
to_ff(rv_d3_trace, d3_trace.get(), n);
|
||||
// Trace: Control Columns
|
||||
// Init steps are flagged in c1_trace
|
||||
// Computation steps are flagged in c2_trace
|
||||
// Termination step is flagged in c3_trace
|
||||
// 0s at the end of each control column correspond to the padding of the trace
|
||||
rv_t rv_c1_trace[] = {1, 0, 0, 0, 0, 0, 0, 0};
|
||||
rv_t rv_c2_trace[] = {0, 1, 1, 1, 0, 0, 0, 0};
|
||||
rv_t rv_c3_trace[] = {0, 0, 0, 1, 0, 0, 0, 0};
|
||||
auto c1_trace = std::make_unique<scalar_t[]>(n);
|
||||
auto c2_trace = std::make_unique<scalar_t[]>(n);
|
||||
auto c3_trace = std::make_unique<scalar_t[]>(n);
|
||||
to_ff(rv_c1_trace, c1_trace.get(), n);
|
||||
to_ff(rv_c2_trace, c2_trace.get(), n);
|
||||
to_ff(rv_c3_trace, c3_trace.get(), n);
|
||||
|
||||
std::cout << "Lesson 2: Rule checks to validate a computation" << std::endl;
|
||||
std::cout << "We use rule-checking polynomials." << std::endl;
|
||||
|
||||
std::cout << "Lesson 3: Padding the Trace" << std::endl;
|
||||
// The trace is padded to a power of 2 size to allow for efficient NTT operations.
|
||||
// we already did this in the initialization of the trace data
|
||||
// We will construct a zero-knowledge proof that:
|
||||
// this trace represents a program that satisfies these 6 rules:
|
||||
// 1) Fibonacci words here
|
||||
// 2) d1_trace[0] == 24 (init 1 constraint)
|
||||
// 3) d2_trace[0] == 30 (init 2 constraint)
|
||||
// 4) d3_trace[3] == 28 (termination constraint)
|
||||
// 5) if c2_trace[i] == 1, then d2_trace[i] == d1_trace[i+1]
|
||||
// 6) if c2_trace[i] == 1, then d3_trace[i] == d2_trace[i+1}
|
||||
|
||||
std::cout << "Lesson 4: Constructing Trace Polynomials" << std::endl;
|
||||
auto p_d1 = Polynomial_t::from_rou_evaluations(d1_trace.get(), n);
|
||||
auto p_d2 = Polynomial_t::from_rou_evaluations(d2_trace.get(), n);
|
||||
auto p_d3 = Polynomial_t::from_rou_evaluations(d3_trace.get(), n);
|
||||
auto p_c1 = Polynomial_t::from_rou_evaluations(c1_trace.get(), n);
|
||||
auto p_c2 = Polynomial_t::from_rou_evaluations(c2_trace.get(), n);
|
||||
auto p_c3 = Polynomial_t::from_rou_evaluations(c3_trace.get(), n);
|
||||
|
||||
std::cout << "Lesson 5: ZK Commitments of the Trace Data" << std::endl;
|
||||
std::cout << "To maintain a zk protocol, the trace polynomials are evaluated over a zk commitment domain" << std::endl;
|
||||
std::cout << "zk commitment domain is a coset of Reed Solomon domain shifted by a basic root of unity" << std::endl;
|
||||
scalar_t xzk = basic_root;
|
||||
p_print(&p_d1, logn, xzk, "ZK commitment for d1 polynomial");
|
||||
std::cout << "Build Merkle Tree for ZK commitments (outside the scope of this example)" << std::endl;
|
||||
|
||||
std::cout << "Lesson 6: Constraint Polynomials" << std::endl;
|
||||
std::cout << "The constraints are used to check the correctness of the trace. In this example, we check 6 rules to establish the validity of the trace." << std::endl;
|
||||
auto p_fib_constraint = (p_d3 - p_d2 - p_d1) * (p_c1 + p_c2 + p_c3);
|
||||
auto fib_constraint_zkcommitment = InterpolateOnLargerDomain(&p_fib_constraint, 4*n, xzk);
|
||||
|
||||
auto p_init1_constraint = (p_d1 - p_value(scalar_t::from(24))) * p_c1;
|
||||
// sanity checks printing
|
||||
p_print(&p_init1_constraint, logn+2, scalar_t::one(), "Reed-Solomon constraint polynomial gives 0s in every 4th row");
|
||||
p_print(&p_init1_constraint, logn+2, xzk, "ZK Commitment constraint polynomial gives no 0s");
|
||||
auto p_init2_constraint = (p_d2 - p_value(scalar_t::from(30))) * p_c1;
|
||||
auto p_termination_constraint = (p_d3 - p_value(scalar_t::from(222))) * p_c3;
|
||||
auto p_recursion_constraint1 = (p_d1 - p_rotate(&p_d2, logn)) * p_c2;
|
||||
auto p_recursion_constraint2 = (p_d2 - p_rotate(&p_d3, logn)) * p_c2;
|
||||
|
||||
std::cout << std::endl << "Lesson 7: Mixing Constraint Polynomials" << std::endl;
|
||||
Polynomial_t * p_all_constraints[] = {&p_fib_constraint, &p_init1_constraint, &p_init2_constraint, &p_termination_constraint, &p_recursion_constraint1, &p_recursion_constraint2};
|
||||
const size_t nmix = sizeof(p_all_constraints) / sizeof(p_all_constraints[0]);
|
||||
auto p_mixed_constraints = p_mix(p_all_constraints, nmix, scalar_t::from(5));
|
||||
std::cout << "All constraint polynomials are low-degree:" << std::endl;
|
||||
for( int i = 0; i < nmix; ++i) {
|
||||
std::cout << i << ": " << p_all_constraints[i]->degree() << std::endl;
|
||||
}
|
||||
|
||||
std::cout << "Lesson 8: The Core of the RISC Zero STARK" << std::endl;
|
||||
std::cout << "Degree of the mixed constraints polynomial: " << p_mixed_constraints.degree() << std::endl;
|
||||
auto p_validity = p_mixed_constraints.divide_by_vanishing_polynomial(n);
|
||||
std::cout << "Degree of the validity polynomial: " << p_validity.degree() << std::endl;
|
||||
std::cout << "The Verifier should provide the Merke commitment for the above" << std::endl;
|
||||
|
||||
std::cout << "Lesson 9: The DEEP Technique" << std::endl;
|
||||
std::cout << "The DEEP technique improves the security of a single query by sampling outside of the commitment domain." << std::endl;
|
||||
// In the original STARK protocol, the Verifier tests validity polynomial at a number of test points;
|
||||
// the soundness of the protocol depends on the number of tests.
|
||||
// The DEEP-ALI technique allows us to achieve a high degree of soundness with a single test.
|
||||
// The details of DEEP are described in the following lesson.
|
||||
|
||||
auto DEEP_point = scalar_t::from(93);
|
||||
std::cout << "The prover convinces the verifier that V=C/Z at the DEEP_test_point, " << DEEP_point << std::endl;
|
||||
const scalar_t coeffs1[2] = {scalar_t::zero()-DEEP_point, scalar_t::one()};
|
||||
auto denom_DEEP1 = Polynomial_t::from_coefficients(coeffs1, 2);
|
||||
auto [p_d1_DEEP, r] = (p_d1 - p_value(DEEP_point)).divide(denom_DEEP1);
|
||||
std::cout << "The DEEP d1 degree is: " << p_d1_DEEP.degree() << std::endl;
|
||||
// d2, d3 use recursion constraints and need the point corresponding to the previous state (clock cycle)
|
||||
auto omega = scalar_t::omega(logn);
|
||||
auto DEEP_prev_point = DEEP_point*scalar_t::inverse(omega);
|
||||
auto coeffs2 = std::make_unique<scalar_t[]>(2);
|
||||
coeffs2[0] = scalar_t::zero() - DEEP_prev_point;
|
||||
coeffs2[1] = scalar_t::one();
|
||||
auto denom_DEEP2 = Polynomial_t::from_coefficients(coeffs2.get(), 2);
|
||||
|
||||
auto coeffs_d2bar = std::make_unique<scalar_t[]>(2);
|
||||
solve_linear(DEEP_point, p_d2(DEEP_point), DEEP_prev_point, p_d2(DEEP_prev_point), coeffs_d2bar.get());
|
||||
auto d2bar = Polynomial_t::from_coefficients(coeffs_d2bar.get(), 2);
|
||||
auto [p_d2_DEEP, r2] = (p_d2 - d2bar).divide(denom_DEEP1*denom_DEEP2);
|
||||
std::cout << "The DEEP d2 degree is: " << p_d2_DEEP.degree() << std::endl;
|
||||
|
||||
auto coeffs_d3bar = std::make_unique<scalar_t[]>(2);
|
||||
solve_linear(DEEP_point, p_d3(DEEP_point), DEEP_prev_point, p_d3(DEEP_prev_point), coeffs_d3bar.get());
|
||||
auto d3bar = Polynomial_t::from_coefficients(coeffs_d3bar.get(), 2);
|
||||
auto [p_d3_DEEP, r3] = (p_d3 - d3bar).divide(denom_DEEP1*denom_DEEP2);
|
||||
std::cout << "The DEEP d3 degree is: " << p_d3_DEEP.degree() << std::endl;
|
||||
|
||||
// DEEP c{1,2,3} polynomials
|
||||
const scalar_t coeffs_c1bar[1] = {p_c1(DEEP_point)};
|
||||
auto c1bar = Polynomial_t::from_coefficients(coeffs_c1bar, 1);
|
||||
auto [p_c1_DEEP, r_c1] = (p_c1 - c1bar).divide(denom_DEEP1);
|
||||
std::cout << "The DEEP c1 degree is: " << p_c1_DEEP.degree() << std::endl;
|
||||
const scalar_t coeffs_c2bar[1] = {p_c2(DEEP_point)};
|
||||
auto c2bar = Polynomial_t::from_coefficients(coeffs_c2bar, 1);
|
||||
auto [p_c2_DEEP, r_c2] = (p_c2 - c2bar).divide(denom_DEEP1);
|
||||
std::cout << "The DEEP c2 degree is: " << p_c2_DEEP.degree() << std::endl;
|
||||
const scalar_t coeffs_c3bar[1] = {p_c3(DEEP_point)};
|
||||
auto c3bar = Polynomial_t::from_coefficients(coeffs_c3bar, 1);
|
||||
auto [p_c3_DEEP, r_c3] = (p_c3 - c3bar).divide(denom_DEEP1);
|
||||
std::cout << "The DEEP c3 degree is: " << p_c3_DEEP.degree() << std::endl;
|
||||
// DEEP validity polynomial
|
||||
const scalar_t coeffs_vbar[1] = {p_validity(DEEP_point)};
|
||||
auto vbar = Polynomial_t::from_coefficients(coeffs_vbar, 1);
|
||||
auto [v_DEEP, r_v] = (p_validity - vbar).divide(denom_DEEP1);
|
||||
std::cout << "The DEEP validity polynomial degree is: " << v_DEEP.degree() << std::endl;
|
||||
std::cout << "The Prover sends DEEP polynomials to the Verifier" << std::endl;
|
||||
|
||||
std::cout << "Lesson 10: Mixing (Batching) for FRI" << std::endl;
|
||||
std::cout << "The initial FRI polynomial is the mix of the 7 DEEP polynomials." << std::endl;
|
||||
Polynomial_t* all_DEEP[] = {&p_d1_DEEP, &p_d2_DEEP, &p_d3_DEEP, &p_c1_DEEP, &p_c2_DEEP, &p_c3_DEEP, &v_DEEP};
|
||||
Polynomial_t fri_input = p_mix(all_DEEP, 7, scalar_t::from(99));
|
||||
std::cout << "The degree of the mixed DEEP polynomial is: " << fri_input.degree() << std::endl;
|
||||
|
||||
std::cout << "Lesson 11: FRI Protocol (Commit Phase)" << std::endl;
|
||||
std::cout << "The prover provides information to convince the verifier that the DEEP polynomials are low-degree." << std::endl;
|
||||
int nof_rounds = 3;
|
||||
Polynomial_t feven[nof_rounds], fodd[nof_rounds], fri[nof_rounds+1];
|
||||
scalar_t rfri[nof_rounds];
|
||||
fri[0] = fri_input.clone();
|
||||
for (int i = 0; i < nof_rounds; ++i) {
|
||||
feven[i] = fri[i].even();
|
||||
fodd[i] = fri[i].odd();
|
||||
rfri[i] = scalar_t::rand_host();
|
||||
fri[i+1] = feven[i] + rfri[i]*fodd[i];
|
||||
std::cout << "The degree of the Round " << i << " polynomial is: " << fri[i+1].degree() << std::endl;
|
||||
}
|
||||
|
||||
std::cout << "Lesson 12: FRI Protocol (Query Phase)" << std::endl;
|
||||
// We use Polynomial API to evaluate the FRI polynomials
|
||||
// In practice, verifier will use Merkle commitments
|
||||
auto xp = scalar_t::rand_host();
|
||||
auto xm = scalar_t::zero() - xp;
|
||||
scalar_t lhs[nof_rounds], rhs[nof_rounds];
|
||||
for (int i = 0; i < nof_rounds; ++i) {
|
||||
rhs[i] = (rfri[i]+xp)*fri[i](xp)*scalar_t::inverse(scalar_t::from(2)*xp) + (rfri[i]+xm)*fri[i](xm)*scalar_t::inverse(scalar_t::from(2)*xm);
|
||||
lhs[i] = fri[i+1](xp*xp);
|
||||
std::cout << "Round " << i << std::endl << "rhs: " << rhs[i] << std::endl << "lhs: " << lhs[i] << std::endl;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
2
examples/c++/risc0/run.sh
Executable file
2
examples/c++/risc0/run.sh
Executable file
@@ -0,0 +1,2 @@
|
||||
#!/bin/bash
|
||||
./build/example/example
|
||||
@@ -2,10 +2,6 @@
|
||||
|
||||
`ICICLE` provides Rust bindings to CUDA-accelerated C++ implementation of [Multi-Scalar Multiplication](https://github.com/ingonyama-zk/ingopedia/blob/master/src/msm.md).
|
||||
|
||||
## Best Practices
|
||||
|
||||
In order to save time and setting up prerequisites manually, we recommend running this example in our [ZKContainer](../../ZKContainer.md).
|
||||
|
||||
## Usage
|
||||
|
||||
```rust
|
||||
|
||||
@@ -4,10 +4,6 @@
|
||||
|
||||
`ICICLE` provides Rust bindings to CUDA-accelerated C++ implementation of [Number Theoretic Transform](https://github.com/ingonyama-zk/ingopedia/blob/master/src/fft.md).
|
||||
|
||||
## Best Practices
|
||||
|
||||
In order to save time and setting up prerequisites manually, we recommend running this example in our [ZKContainer](../../ZKContainer.md).
|
||||
|
||||
## Usage
|
||||
|
||||
```rust
|
||||
|
||||
@@ -2,7 +2,8 @@ use icicle_bls12_381::curve::ScalarField as F;
|
||||
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
|
||||
use icicle_core::poseidon::{load_optimized_poseidon_constants, poseidon_hash_many, PoseidonConfig};
|
||||
use icicle_core::hash::{SpongeHash, HashConfig};
|
||||
use icicle_core::poseidon::Poseidon;
|
||||
use icicle_core::traits::FieldImpl;
|
||||
use icicle_cuda_runtime::memory::HostSlice;
|
||||
|
||||
@@ -24,14 +25,14 @@ fn main() {
|
||||
let test_size = 1 << size;
|
||||
|
||||
println!("Running Icicle Examples: Rust Poseidon Hash");
|
||||
let arity = 2u32;
|
||||
let arity = 2;
|
||||
println!(
|
||||
"---------------------- Loading optimized Poseidon constants for arity={} ------------------------",
|
||||
arity
|
||||
);
|
||||
let ctx = DeviceContext::default();
|
||||
let constants = load_optimized_poseidon_constants::<F>(arity, &ctx).unwrap();
|
||||
let config = PoseidonConfig::default();
|
||||
let poseidon = Poseidon::load(arity, &ctx).unwrap();
|
||||
let config = HashConfig::default();
|
||||
|
||||
println!(
|
||||
"---------------------- Input size 2^{}={} ------------------------",
|
||||
@@ -45,12 +46,12 @@ fn main() {
|
||||
println!("Executing BLS12-381 Poseidon Hash on device...");
|
||||
#[cfg(feature = "profile")]
|
||||
let start = Instant::now();
|
||||
poseidon_hash_many::<F>(
|
||||
poseidon.hash_many(
|
||||
input_slice,
|
||||
output_slice,
|
||||
test_size as u32,
|
||||
arity as u32,
|
||||
&constants,
|
||||
test_size,
|
||||
arity,
|
||||
1,
|
||||
&config,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
function(check_field)
|
||||
set(SUPPORTED_FIELDS babybear;stark252)
|
||||
set(SUPPORTED_FIELDS babybear;stark252;m31)
|
||||
|
||||
set(IS_FIELD_SUPPORTED FALSE)
|
||||
set(I 1000)
|
||||
|
||||
@@ -9,46 +9,67 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "fields/stark_fields/babybear.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
#include "poseidon2/poseidon2.cuh"
|
||||
|
||||
extern "C" cudaError_t babybear_extension_ntt_cuda(
|
||||
const babybear::extension_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<babybear::scalar_t>& config, babybear::extension_t* output);
|
||||
|
||||
extern "C" cudaError_t babybear_create_poseidon2_constants_cuda(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
extern "C" cudaError_t babybear_poseidon2_create_cuda(
|
||||
poseidon2::Poseidon2<babybear::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
unsigned int alpha,
|
||||
unsigned int internal_rounds,
|
||||
unsigned int external_rounds,
|
||||
const babybear::scalar_t* round_constants,
|
||||
const babybear::scalar_t* internal_matrix_diag,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<babybear::scalar_t>* poseidon_constants);
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t babybear_init_poseidon2_constants_cuda(
|
||||
int width,
|
||||
extern "C" cudaError_t babybear_poseidon2_load_cuda(
|
||||
poseidon2::Poseidon2<babybear::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<babybear::scalar_t>* poseidon_constants);
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t babybear_poseidon2_hash_cuda(
|
||||
const babybear::scalar_t* input,
|
||||
extern "C" cudaError_t babybear_poseidon2_hash_many_cuda(
|
||||
const poseidon2::Poseidon2<babybear::scalar_t>* poseidon,
|
||||
const babybear::scalar_t* inputs,
|
||||
babybear::scalar_t* output,
|
||||
int number_of_states,
|
||||
int width,
|
||||
const poseidon2::Poseidon2Constants<babybear::scalar_t>& constants,
|
||||
poseidon2::Poseidon2Config& config);
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t babybear_release_poseidon2_constants_cuda(
|
||||
poseidon2::Poseidon2Constants<babybear::scalar_t>* constants,
|
||||
device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t
|
||||
babybear_poseidon2_delete_cuda(poseidon2::Poseidon2<babybear::scalar_t>* poseidon, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t babybear_build_merkle_tree(
|
||||
const babybear::scalar_t* leaves,
|
||||
babybear::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<babybear::scalar_t, babybear::scalar_t>* compression,
|
||||
const hash::Hasher<babybear::scalar_t, babybear::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t babybear_mmcs_commit_cuda(
|
||||
const matrix::Matrix<babybear::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
babybear::scalar_t* digests,
|
||||
const hash::Hasher<babybear::scalar_t, babybear::scalar_t>* hasher,
|
||||
const hash::Hasher<babybear::scalar_t, babybear::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t babybear_mul_cuda(
|
||||
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::scalar_t* result);
|
||||
@@ -56,6 +77,9 @@ extern "C" cudaError_t babybear_mul_cuda(
|
||||
extern "C" cudaError_t babybear_add_cuda(
|
||||
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::scalar_t* result);
|
||||
|
||||
extern "C" cudaError_t babybear_accumulate_cuda(
|
||||
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t babybear_sub_cuda(
|
||||
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::scalar_t* result);
|
||||
|
||||
@@ -69,10 +93,8 @@ extern "C" cudaError_t babybear_transpose_matrix_cuda(
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t babybear_bit_reverse_cuda(
|
||||
const babybear::scalar_t* input,
|
||||
uint64_t n,
|
||||
vec_ops::BitReverseConfig& config,
|
||||
babybear::scalar_t* output);
|
||||
const babybear::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, babybear::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" void babybear_generate_scalars(babybear::scalar_t* scalars, int size);
|
||||
|
||||
@@ -98,6 +120,9 @@ extern "C" cudaError_t babybear_extension_mul_cuda(
|
||||
extern "C" cudaError_t babybear_extension_add_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_accumulate_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_sub_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
||||
|
||||
@@ -110,4 +135,8 @@ extern "C" cudaError_t babybear_extension_transpose_matrix_cuda(
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_bit_reverse_cuda(
|
||||
const babybear::extension_t* input, uint64_t n, vec_ops::BitReverseConfig& config, babybear::extension_t* output);
|
||||
|
||||
|
||||
#endif
|
||||
@@ -9,20 +9,18 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "curves/params/bls12_377.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
extern "C" cudaError_t bls12_377_g2_precompute_msm_bases_cuda(
|
||||
bls12_377::g2_affine_t* bases,
|
||||
int bases_size,
|
||||
int precompute_factor,
|
||||
int _c,
|
||||
bool are_bases_on_device,
|
||||
device_context::DeviceContext& ctx,
|
||||
int msm_size,
|
||||
msm::MSMConfig& config,
|
||||
bls12_377::g2_affine_t* output_bases);
|
||||
|
||||
extern "C" cudaError_t bls12_377_g2_msm_cuda(
|
||||
@@ -30,11 +28,8 @@ extern "C" cudaError_t bls12_377_g2_msm_cuda(
|
||||
|
||||
extern "C" cudaError_t bls12_377_precompute_msm_bases_cuda(
|
||||
bls12_377::affine_t* bases,
|
||||
int bases_size,
|
||||
int precompute_factor,
|
||||
int _c,
|
||||
bool are_bases_on_device,
|
||||
device_context::DeviceContext& ctx,
|
||||
int msm_size,
|
||||
msm::MSMConfig& config,
|
||||
bls12_377::affine_t* output_bases);
|
||||
|
||||
extern "C" cudaError_t bls12_377_msm_cuda(
|
||||
@@ -71,32 +66,52 @@ extern "C" cudaError_t bls12_377_affine_convert_montgomery(
|
||||
extern "C" cudaError_t bls12_377_projective_convert_montgomery(
|
||||
bls12_377::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_377_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const bls12_377::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<bls12_377::scalar_t>* poseidon_constants);
|
||||
|
||||
extern "C" cudaError_t bls12_377_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<bls12_377::scalar_t>* constants);
|
||||
|
||||
extern "C" cudaError_t bls12_377_poseidon_hash_cuda(
|
||||
bls12_377::scalar_t* input,
|
||||
bls12_377::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<bls12_377::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
|
||||
extern "C" cudaError_t bls12_377_build_poseidon_merkle_tree(
|
||||
extern "C" cudaError_t bls12_377_build_merkle_tree(
|
||||
const bls12_377::scalar_t* leaves,
|
||||
bls12_377::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<bls12_377::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<bls12_377::scalar_t, bls12_377::scalar_t>* compression,
|
||||
const hash::Hasher<bls12_377::scalar_t, bls12_377::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bls12_377_mmcs_commit_cuda(
|
||||
const matrix::Matrix<bls12_377::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
bls12_377::scalar_t* digests,
|
||||
const hash::Hasher<bls12_377::scalar_t, bls12_377::scalar_t>* hasher,
|
||||
const hash::Hasher<bls12_377::scalar_t, bls12_377::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bls12_377_poseidon_create_cuda(
|
||||
poseidon::Poseidon<bls12_377::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const bls12_377::scalar_t* round_constants,
|
||||
const bls12_377::scalar_t* mds_matrix,
|
||||
const bls12_377::scalar_t* non_sparse_matrix,
|
||||
const bls12_377::scalar_t* sparse_matrices,
|
||||
const bls12_377::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_377_poseidon_load_cuda(
|
||||
poseidon::Poseidon<bls12_377::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_377_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<bls12_377::scalar_t>* poseidon,
|
||||
const bls12_377::scalar_t* inputs,
|
||||
bls12_377::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t
|
||||
bls12_377_poseidon_delete_cuda(poseidon::Poseidon<bls12_377::scalar_t>* poseidon);
|
||||
|
||||
extern "C" cudaError_t bls12_377_mul_cuda(
|
||||
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_377::scalar_t* result);
|
||||
@@ -104,6 +119,9 @@ extern "C" cudaError_t bls12_377_mul_cuda(
|
||||
extern "C" cudaError_t bls12_377_add_cuda(
|
||||
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_377::scalar_t* result);
|
||||
|
||||
extern "C" cudaError_t bls12_377_accumulate_cuda(
|
||||
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t bls12_377_sub_cuda(
|
||||
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_377::scalar_t* result);
|
||||
|
||||
@@ -117,10 +135,8 @@ extern "C" cudaError_t bls12_377_transpose_matrix_cuda(
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t bls12_377_bit_reverse_cuda(
|
||||
const bls12_377::scalar_t* input,
|
||||
uint64_t n,
|
||||
vec_ops::BitReverseConfig& config,
|
||||
bls12_377::scalar_t* output);
|
||||
const bls12_377::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bls12_377::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" void bls12_377_generate_scalars(bls12_377::scalar_t* scalars, int size);
|
||||
|
||||
|
||||
@@ -9,20 +9,18 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "curves/params/bls12_381.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
extern "C" cudaError_t bls12_381_g2_precompute_msm_bases_cuda(
|
||||
bls12_381::g2_affine_t* bases,
|
||||
int bases_size,
|
||||
int precompute_factor,
|
||||
int _c,
|
||||
bool are_bases_on_device,
|
||||
device_context::DeviceContext& ctx,
|
||||
int msm_size,
|
||||
msm::MSMConfig& config,
|
||||
bls12_381::g2_affine_t* output_bases);
|
||||
|
||||
extern "C" cudaError_t bls12_381_g2_msm_cuda(
|
||||
@@ -30,11 +28,8 @@ extern "C" cudaError_t bls12_381_g2_msm_cuda(
|
||||
|
||||
extern "C" cudaError_t bls12_381_precompute_msm_bases_cuda(
|
||||
bls12_381::affine_t* bases,
|
||||
int bases_size,
|
||||
int precompute_factor,
|
||||
int _c,
|
||||
bool are_bases_on_device,
|
||||
device_context::DeviceContext& ctx,
|
||||
int msm_size,
|
||||
msm::MSMConfig& config,
|
||||
bls12_381::affine_t* output_bases);
|
||||
|
||||
extern "C" cudaError_t bls12_381_msm_cuda(
|
||||
@@ -71,32 +66,52 @@ extern "C" cudaError_t bls12_381_affine_convert_montgomery(
|
||||
extern "C" cudaError_t bls12_381_projective_convert_montgomery(
|
||||
bls12_381::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_381_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const bls12_381::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<bls12_381::scalar_t>* poseidon_constants);
|
||||
|
||||
extern "C" cudaError_t bls12_381_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<bls12_381::scalar_t>* constants);
|
||||
|
||||
extern "C" cudaError_t bls12_381_poseidon_hash_cuda(
|
||||
bls12_381::scalar_t* input,
|
||||
bls12_381::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<bls12_381::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
|
||||
extern "C" cudaError_t bls12_381_build_poseidon_merkle_tree(
|
||||
extern "C" cudaError_t bls12_381_build_merkle_tree(
|
||||
const bls12_381::scalar_t* leaves,
|
||||
bls12_381::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<bls12_381::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<bls12_381::scalar_t, bls12_381::scalar_t>* compression,
|
||||
const hash::Hasher<bls12_381::scalar_t, bls12_381::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bls12_381_mmcs_commit_cuda(
|
||||
const matrix::Matrix<bls12_381::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
bls12_381::scalar_t* digests,
|
||||
const hash::Hasher<bls12_381::scalar_t, bls12_381::scalar_t>* hasher,
|
||||
const hash::Hasher<bls12_381::scalar_t, bls12_381::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bls12_381_poseidon_create_cuda(
|
||||
poseidon::Poseidon<bls12_381::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const bls12_381::scalar_t* round_constants,
|
||||
const bls12_381::scalar_t* mds_matrix,
|
||||
const bls12_381::scalar_t* non_sparse_matrix,
|
||||
const bls12_381::scalar_t* sparse_matrices,
|
||||
const bls12_381::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_381_poseidon_load_cuda(
|
||||
poseidon::Poseidon<bls12_381::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_381_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<bls12_381::scalar_t>* poseidon,
|
||||
const bls12_381::scalar_t* inputs,
|
||||
bls12_381::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t
|
||||
bls12_381_poseidon_delete_cuda(poseidon::Poseidon<bls12_381::scalar_t>* poseidon);
|
||||
|
||||
extern "C" cudaError_t bls12_381_mul_cuda(
|
||||
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_381::scalar_t* result);
|
||||
@@ -104,6 +119,9 @@ extern "C" cudaError_t bls12_381_mul_cuda(
|
||||
extern "C" cudaError_t bls12_381_add_cuda(
|
||||
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_381::scalar_t* result);
|
||||
|
||||
extern "C" cudaError_t bls12_381_accumulate_cuda(
|
||||
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t bls12_381_sub_cuda(
|
||||
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_381::scalar_t* result);
|
||||
|
||||
@@ -117,10 +135,8 @@ extern "C" cudaError_t bls12_381_transpose_matrix_cuda(
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t bls12_381_bit_reverse_cuda(
|
||||
const bls12_381::scalar_t* input,
|
||||
uint64_t n,
|
||||
vec_ops::BitReverseConfig& config,
|
||||
bls12_381::scalar_t* output);
|
||||
const bls12_381::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bls12_381::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" void bls12_381_generate_scalars(bls12_381::scalar_t* scalars, int size);
|
||||
|
||||
|
||||
@@ -9,21 +9,19 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "curves/params/bn254.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
#include "poseidon2/poseidon2.cuh"
|
||||
|
||||
extern "C" cudaError_t bn254_g2_precompute_msm_bases_cuda(
|
||||
bn254::g2_affine_t* bases,
|
||||
int bases_size,
|
||||
int precompute_factor,
|
||||
int _c,
|
||||
bool are_bases_on_device,
|
||||
device_context::DeviceContext& ctx,
|
||||
int msm_size,
|
||||
msm::MSMConfig& config,
|
||||
bn254::g2_affine_t* output_bases);
|
||||
|
||||
extern "C" cudaError_t bn254_g2_msm_cuda(
|
||||
@@ -31,11 +29,8 @@ extern "C" cudaError_t bn254_g2_msm_cuda(
|
||||
|
||||
extern "C" cudaError_t bn254_precompute_msm_bases_cuda(
|
||||
bn254::affine_t* bases,
|
||||
int bases_size,
|
||||
int precompute_factor,
|
||||
int _c,
|
||||
bool are_bases_on_device,
|
||||
device_context::DeviceContext& ctx,
|
||||
int msm_size,
|
||||
msm::MSMConfig& config,
|
||||
bn254::affine_t* output_bases);
|
||||
|
||||
extern "C" cudaError_t bn254_msm_cuda(
|
||||
@@ -72,63 +67,87 @@ extern "C" cudaError_t bn254_affine_convert_montgomery(
|
||||
extern "C" cudaError_t bn254_projective_convert_montgomery(
|
||||
bn254::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_create_poseidon2_constants_cuda(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
extern "C" cudaError_t bn254_poseidon2_create_cuda(
|
||||
poseidon2::Poseidon2<bn254::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
unsigned int alpha,
|
||||
unsigned int internal_rounds,
|
||||
unsigned int external_rounds,
|
||||
const bn254::scalar_t* round_constants,
|
||||
const bn254::scalar_t* internal_matrix_diag,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<bn254::scalar_t>* poseidon_constants);
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t bn254_init_poseidon2_constants_cuda(
|
||||
int width,
|
||||
extern "C" cudaError_t bn254_poseidon2_load_cuda(
|
||||
poseidon2::Poseidon2<bn254::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<bn254::scalar_t>* poseidon_constants);
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon2_hash_cuda(
|
||||
const bn254::scalar_t* input,
|
||||
extern "C" cudaError_t bn254_poseidon2_hash_many_cuda(
|
||||
const poseidon2::Poseidon2<bn254::scalar_t>* poseidon,
|
||||
const bn254::scalar_t* inputs,
|
||||
bn254::scalar_t* output,
|
||||
int number_of_states,
|
||||
int width,
|
||||
const poseidon2::Poseidon2Constants<bn254::scalar_t>& constants,
|
||||
poseidon2::Poseidon2Config& config);
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t bn254_release_poseidon2_constants_cuda(
|
||||
poseidon2::Poseidon2Constants<bn254::scalar_t>* constants,
|
||||
device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t
|
||||
bn254_poseidon2_delete_cuda(poseidon2::Poseidon2<bn254::scalar_t>* poseidon, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const bn254::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<bn254::scalar_t>* poseidon_constants);
|
||||
|
||||
extern "C" cudaError_t bn254_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<bn254::scalar_t>* constants);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon_hash_cuda(
|
||||
bn254::scalar_t* input,
|
||||
bn254::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<bn254::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
|
||||
extern "C" cudaError_t bn254_build_poseidon_merkle_tree(
|
||||
extern "C" cudaError_t bn254_build_merkle_tree(
|
||||
const bn254::scalar_t* leaves,
|
||||
bn254::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<bn254::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<bn254::scalar_t, bn254::scalar_t>* compression,
|
||||
const hash::Hasher<bn254::scalar_t, bn254::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bn254_mmcs_commit_cuda(
|
||||
const matrix::Matrix<bn254::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
bn254::scalar_t* digests,
|
||||
const hash::Hasher<bn254::scalar_t, bn254::scalar_t>* hasher,
|
||||
const hash::Hasher<bn254::scalar_t, bn254::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon_create_cuda(
|
||||
poseidon::Poseidon<bn254::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const bn254::scalar_t* round_constants,
|
||||
const bn254::scalar_t* mds_matrix,
|
||||
const bn254::scalar_t* non_sparse_matrix,
|
||||
const bn254::scalar_t* sparse_matrices,
|
||||
const bn254::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon_load_cuda(
|
||||
poseidon::Poseidon<bn254::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<bn254::scalar_t>* poseidon,
|
||||
const bn254::scalar_t* inputs,
|
||||
bn254::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t
|
||||
bn254_poseidon_delete_cuda(poseidon::Poseidon<bn254::scalar_t>* poseidon);
|
||||
|
||||
extern "C" cudaError_t bn254_mul_cuda(
|
||||
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bn254::scalar_t* result);
|
||||
@@ -136,6 +155,9 @@ extern "C" cudaError_t bn254_mul_cuda(
|
||||
extern "C" cudaError_t bn254_add_cuda(
|
||||
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bn254::scalar_t* result);
|
||||
|
||||
extern "C" cudaError_t bn254_accumulate_cuda(
|
||||
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t bn254_sub_cuda(
|
||||
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bn254::scalar_t* result);
|
||||
|
||||
@@ -149,10 +171,8 @@ extern "C" cudaError_t bn254_transpose_matrix_cuda(
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t bn254_bit_reverse_cuda(
|
||||
const bn254::scalar_t* input,
|
||||
uint64_t n,
|
||||
vec_ops::BitReverseConfig& config,
|
||||
bn254::scalar_t* output);
|
||||
const bn254::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bn254::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" void bn254_generate_scalars(bn254::scalar_t* scalars, int size);
|
||||
|
||||
|
||||
@@ -9,20 +9,18 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "curves/params/bw6_761.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
extern "C" cudaError_t bw6_761_g2_precompute_msm_bases_cuda(
|
||||
bw6_761::g2_affine_t* bases,
|
||||
int bases_size,
|
||||
int precompute_factor,
|
||||
int _c,
|
||||
bool are_bases_on_device,
|
||||
device_context::DeviceContext& ctx,
|
||||
int msm_size,
|
||||
msm::MSMConfig& config,
|
||||
bw6_761::g2_affine_t* output_bases);
|
||||
|
||||
extern "C" cudaError_t bw6_761_g2_msm_cuda(
|
||||
@@ -30,11 +28,8 @@ extern "C" cudaError_t bw6_761_g2_msm_cuda(
|
||||
|
||||
extern "C" cudaError_t bw6_761_precompute_msm_bases_cuda(
|
||||
bw6_761::affine_t* bases,
|
||||
int bases_size,
|
||||
int precompute_factor,
|
||||
int _c,
|
||||
bool are_bases_on_device,
|
||||
device_context::DeviceContext& ctx,
|
||||
int msm_size,
|
||||
msm::MSMConfig& config,
|
||||
bw6_761::affine_t* output_bases);
|
||||
|
||||
extern "C" cudaError_t bw6_761_msm_cuda(
|
||||
@@ -71,32 +66,52 @@ extern "C" cudaError_t bw6_761_affine_convert_montgomery(
|
||||
extern "C" cudaError_t bw6_761_projective_convert_montgomery(
|
||||
bw6_761::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bw6_761_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const bw6_761::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<bw6_761::scalar_t>* poseidon_constants);
|
||||
|
||||
extern "C" cudaError_t bw6_761_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<bw6_761::scalar_t>* constants);
|
||||
|
||||
extern "C" cudaError_t bw6_761_poseidon_hash_cuda(
|
||||
bw6_761::scalar_t* input,
|
||||
bw6_761::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<bw6_761::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
|
||||
extern "C" cudaError_t bw6_761_build_poseidon_merkle_tree(
|
||||
extern "C" cudaError_t bw6_761_build_merkle_tree(
|
||||
const bw6_761::scalar_t* leaves,
|
||||
bw6_761::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<bw6_761::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<bw6_761::scalar_t, bw6_761::scalar_t>* compression,
|
||||
const hash::Hasher<bw6_761::scalar_t, bw6_761::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bw6_761_mmcs_commit_cuda(
|
||||
const matrix::Matrix<bw6_761::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
bw6_761::scalar_t* digests,
|
||||
const hash::Hasher<bw6_761::scalar_t, bw6_761::scalar_t>* hasher,
|
||||
const hash::Hasher<bw6_761::scalar_t, bw6_761::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bw6_761_poseidon_create_cuda(
|
||||
poseidon::Poseidon<bw6_761::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const bw6_761::scalar_t* round_constants,
|
||||
const bw6_761::scalar_t* mds_matrix,
|
||||
const bw6_761::scalar_t* non_sparse_matrix,
|
||||
const bw6_761::scalar_t* sparse_matrices,
|
||||
const bw6_761::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bw6_761_poseidon_load_cuda(
|
||||
poseidon::Poseidon<bw6_761::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bw6_761_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<bw6_761::scalar_t>* poseidon,
|
||||
const bw6_761::scalar_t* inputs,
|
||||
bw6_761::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t
|
||||
bw6_761_poseidon_delete_cuda(poseidon::Poseidon<bw6_761::scalar_t>* poseidon);
|
||||
|
||||
extern "C" cudaError_t bw6_761_mul_cuda(
|
||||
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bw6_761::scalar_t* result);
|
||||
@@ -104,6 +119,9 @@ extern "C" cudaError_t bw6_761_mul_cuda(
|
||||
extern "C" cudaError_t bw6_761_add_cuda(
|
||||
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bw6_761::scalar_t* result);
|
||||
|
||||
extern "C" cudaError_t bw6_761_accumulate_cuda(
|
||||
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t bw6_761_sub_cuda(
|
||||
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bw6_761::scalar_t* result);
|
||||
|
||||
@@ -117,10 +135,8 @@ extern "C" cudaError_t bw6_761_transpose_matrix_cuda(
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t bw6_761_bit_reverse_cuda(
|
||||
const bw6_761::scalar_t* input,
|
||||
uint64_t n,
|
||||
vec_ops::BitReverseConfig& config,
|
||||
bw6_761::scalar_t* output);
|
||||
const bw6_761::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bw6_761::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" void bw6_761_generate_scalars(bw6_761::scalar_t* scalars, int size);
|
||||
|
||||
|
||||
@@ -9,19 +9,17 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "curves/params/grumpkin.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
extern "C" cudaError_t grumpkin_precompute_msm_bases_cuda(
|
||||
grumpkin::affine_t* bases,
|
||||
int bases_size,
|
||||
int precompute_factor,
|
||||
int _c,
|
||||
bool are_bases_on_device,
|
||||
device_context::DeviceContext& ctx,
|
||||
int msm_size,
|
||||
msm::MSMConfig& config,
|
||||
grumpkin::affine_t* output_bases);
|
||||
|
||||
extern "C" cudaError_t grumpkin_msm_cuda(
|
||||
@@ -41,32 +39,52 @@ extern "C" cudaError_t grumpkin_affine_convert_montgomery(
|
||||
extern "C" cudaError_t grumpkin_projective_convert_montgomery(
|
||||
grumpkin::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t grumpkin_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const grumpkin::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<grumpkin::scalar_t>* poseidon_constants);
|
||||
|
||||
extern "C" cudaError_t grumpkin_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<grumpkin::scalar_t>* constants);
|
||||
|
||||
extern "C" cudaError_t grumpkin_poseidon_hash_cuda(
|
||||
grumpkin::scalar_t* input,
|
||||
grumpkin::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<grumpkin::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
|
||||
extern "C" cudaError_t grumpkin_build_poseidon_merkle_tree(
|
||||
extern "C" cudaError_t grumpkin_build_merkle_tree(
|
||||
const grumpkin::scalar_t* leaves,
|
||||
grumpkin::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<grumpkin::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<grumpkin::scalar_t, grumpkin::scalar_t>* compression,
|
||||
const hash::Hasher<grumpkin::scalar_t, grumpkin::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t grumpkin_mmcs_commit_cuda(
|
||||
const matrix::Matrix<grumpkin::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
grumpkin::scalar_t* digests,
|
||||
const hash::Hasher<grumpkin::scalar_t, grumpkin::scalar_t>* hasher,
|
||||
const hash::Hasher<grumpkin::scalar_t, grumpkin::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t grumpkin_poseidon_create_cuda(
|
||||
poseidon::Poseidon<grumpkin::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const grumpkin::scalar_t* round_constants,
|
||||
const grumpkin::scalar_t* mds_matrix,
|
||||
const grumpkin::scalar_t* non_sparse_matrix,
|
||||
const grumpkin::scalar_t* sparse_matrices,
|
||||
const grumpkin::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t grumpkin_poseidon_load_cuda(
|
||||
poseidon::Poseidon<grumpkin::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t grumpkin_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<grumpkin::scalar_t>* poseidon,
|
||||
const grumpkin::scalar_t* inputs,
|
||||
grumpkin::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t
|
||||
grumpkin_poseidon_delete_cuda(poseidon::Poseidon<grumpkin::scalar_t>* poseidon);
|
||||
|
||||
extern "C" cudaError_t grumpkin_mul_cuda(
|
||||
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, grumpkin::scalar_t* result);
|
||||
@@ -74,6 +92,9 @@ extern "C" cudaError_t grumpkin_mul_cuda(
|
||||
extern "C" cudaError_t grumpkin_add_cuda(
|
||||
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, grumpkin::scalar_t* result);
|
||||
|
||||
extern "C" cudaError_t grumpkin_accumulate_cuda(
|
||||
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t grumpkin_sub_cuda(
|
||||
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, grumpkin::scalar_t* result);
|
||||
|
||||
@@ -87,10 +108,8 @@ extern "C" cudaError_t grumpkin_transpose_matrix_cuda(
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t grumpkin_bit_reverse_cuda(
|
||||
const grumpkin::scalar_t* input,
|
||||
uint64_t n,
|
||||
vec_ops::BitReverseConfig& config,
|
||||
grumpkin::scalar_t* output);
|
||||
const grumpkin::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, grumpkin::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" void grumpkin_generate_scalars(grumpkin::scalar_t* scalars, int size);
|
||||
|
||||
|
||||
@@ -6,11 +6,25 @@
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "hash/keccak/keccak.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
|
||||
extern "C" cudaError_t
|
||||
keccak256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, keccak::KeccakConfig& config);
|
||||
keccak256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, keccak::HashConfig& config);
|
||||
|
||||
extern "C" cudaError_t
|
||||
keccak512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, keccak::KeccakConfig& config);
|
||||
keccak512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, keccak::HashConfig& config);
|
||||
|
||||
extern "C" cudaError_t build_keccak256_merkle_tree_cuda(
|
||||
const uint8_t* leaves,
|
||||
uint64_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t build_keccak512_merkle_tree_cuda(
|
||||
const uint8_t* leaves,
|
||||
uint64_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
#endif
|
||||
94
icicle/include/api/m31.h
Normal file
94
icicle/include/api/m31.h
Normal file
@@ -0,0 +1,94 @@
|
||||
// WARNING: This file is auto-generated by a script.
|
||||
// Any changes made to this file may be overwritten.
|
||||
// Please modify the code generation script instead.
|
||||
// Path to the code generation script: scripts/gen_c_api.py
|
||||
|
||||
#pragma once
|
||||
#ifndef M31_API_H
|
||||
#define M31_API_H
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "fields/stark_fields/m31.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
|
||||
extern "C" cudaError_t m31_build_merkle_tree(
|
||||
const m31::scalar_t* leaves,
|
||||
m31::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<m31::scalar_t, m31::scalar_t>* compression,
|
||||
const hash::Hasher<m31::scalar_t, m31::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t m31_mmcs_commit_cuda(
|
||||
const matrix::Matrix<m31::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
m31::scalar_t* digests,
|
||||
const hash::Hasher<m31::scalar_t, m31::scalar_t>* hasher,
|
||||
const hash::Hasher<m31::scalar_t, m31::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t m31_mul_cuda(
|
||||
m31::scalar_t* vec_a, m31::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::scalar_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_add_cuda(
|
||||
m31::scalar_t* vec_a, m31::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::scalar_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_accumulate_cuda(
|
||||
m31::scalar_t* vec_a, m31::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t m31_sub_cuda(
|
||||
m31::scalar_t* vec_a, m31::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::scalar_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_transpose_matrix_cuda(
|
||||
const m31::scalar_t* input,
|
||||
uint32_t row_size,
|
||||
uint32_t column_size,
|
||||
m31::scalar_t* output,
|
||||
device_context::DeviceContext& ctx,
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t m31_bit_reverse_cuda(
|
||||
const m31::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, m31::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" void m31_generate_scalars(m31::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t m31_scalar_convert_montgomery(
|
||||
m31::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" void m31_extension_generate_scalars(m31::extension_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t m31_extension_scalar_convert_montgomery(
|
||||
m31::extension_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t m31_extension_mul_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_extension_add_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_extension_accumulate_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t m31_extension_sub_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_extension_transpose_matrix_cuda(
|
||||
const m31::extension_t* input,
|
||||
uint32_t row_size,
|
||||
uint32_t column_size,
|
||||
m31::extension_t* output,
|
||||
device_context::DeviceContext& ctx,
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t m31_extension_bit_reverse_cuda(
|
||||
const m31::extension_t* input, uint64_t n, vec_ops::BitReverseConfig& config, m31::extension_t* output);
|
||||
|
||||
|
||||
#endif
|
||||
@@ -9,16 +9,38 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "fields/stark_fields/stark252.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
|
||||
extern "C" cudaError_t stark252_build_merkle_tree(
|
||||
const stark252::scalar_t* leaves,
|
||||
stark252::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<stark252::scalar_t, stark252::scalar_t>* compression,
|
||||
const hash::Hasher<stark252::scalar_t, stark252::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t stark252_mmcs_commit_cuda(
|
||||
const matrix::Matrix<stark252::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
stark252::scalar_t* digests,
|
||||
const hash::Hasher<stark252::scalar_t, stark252::scalar_t>* hasher,
|
||||
const hash::Hasher<stark252::scalar_t, stark252::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t stark252_mul_cuda(
|
||||
stark252::scalar_t* vec_a, stark252::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, stark252::scalar_t* result);
|
||||
|
||||
extern "C" cudaError_t stark252_add_cuda(
|
||||
stark252::scalar_t* vec_a, stark252::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, stark252::scalar_t* result);
|
||||
|
||||
extern "C" cudaError_t stark252_accumulate_cuda(
|
||||
stark252::scalar_t* vec_a, stark252::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t stark252_sub_cuda(
|
||||
stark252::scalar_t* vec_a, stark252::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, stark252::scalar_t* result);
|
||||
|
||||
@@ -32,10 +54,8 @@ extern "C" cudaError_t stark252_transpose_matrix_cuda(
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t stark252_bit_reverse_cuda(
|
||||
const stark252::scalar_t* input,
|
||||
uint64_t n,
|
||||
vec_ops::BitReverseConfig& config,
|
||||
stark252::scalar_t* output);
|
||||
const stark252::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, stark252::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" void stark252_generate_scalars(stark252::scalar_t* scalars, int size);
|
||||
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
extern "C" cudaError_t ${CURVE}_precompute_msm_bases_cuda(
|
||||
${CURVE}::affine_t* bases,
|
||||
int bases_size,
|
||||
int precompute_factor,
|
||||
int _c,
|
||||
bool are_bases_on_device,
|
||||
device_context::DeviceContext& ctx,
|
||||
int msm_size,
|
||||
msm::MSMConfig& config,
|
||||
${CURVE}::affine_t* output_bases);
|
||||
|
||||
extern "C" cudaError_t ${CURVE}_msm_cuda(
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
extern "C" cudaError_t ${CURVE}_g2_precompute_msm_bases_cuda(
|
||||
${CURVE}::g2_affine_t* bases,
|
||||
int bases_size,
|
||||
int precompute_factor,
|
||||
int _c,
|
||||
bool are_bases_on_device,
|
||||
device_context::DeviceContext& ctx,
|
||||
int msm_size,
|
||||
msm::MSMConfig& config,
|
||||
${CURVE}::g2_affine_t* output_bases);
|
||||
|
||||
extern "C" cudaError_t ${CURVE}_g2_msm_cuda(
|
||||
|
||||
@@ -1,26 +1,29 @@
|
||||
extern "C" cudaError_t ${FIELD}_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const ${FIELD}::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<${FIELD}::scalar_t>* poseidon_constants);
|
||||
extern "C" cudaError_t ${FIELD}_poseidon_create_cuda(
|
||||
poseidon::Poseidon<${FIELD}::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const ${FIELD}::scalar_t* round_constants,
|
||||
const ${FIELD}::scalar_t* mds_matrix,
|
||||
const ${FIELD}::scalar_t* non_sparse_matrix,
|
||||
const ${FIELD}::scalar_t* sparse_matrices,
|
||||
const ${FIELD}::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<${FIELD}::scalar_t>* constants);
|
||||
extern "C" cudaError_t ${FIELD}_poseidon_load_cuda(
|
||||
poseidon::Poseidon<${FIELD}::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_poseidon_hash_cuda(
|
||||
${FIELD}::scalar_t* input,
|
||||
extern "C" cudaError_t ${FIELD}_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<${FIELD}::scalar_t>* poseidon,
|
||||
const ${FIELD}::scalar_t* inputs,
|
||||
${FIELD}::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<${FIELD}::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_build_poseidon_merkle_tree(
|
||||
const ${FIELD}::scalar_t* leaves,
|
||||
${FIELD}::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<${FIELD}::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
extern "C" cudaError_t
|
||||
${FIELD}_poseidon_delete_cuda(poseidon::Poseidon<${FIELD}::scalar_t>* poseidon);
|
||||
@@ -1,30 +1,34 @@
|
||||
extern "C" cudaError_t ${FIELD}_create_poseidon2_constants_cuda(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
extern "C" cudaError_t ${FIELD}_poseidon2_create_cuda(
|
||||
poseidon2::Poseidon2<${FIELD}::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
unsigned int alpha,
|
||||
unsigned int internal_rounds,
|
||||
unsigned int external_rounds,
|
||||
const ${FIELD}::scalar_t* round_constants,
|
||||
const ${FIELD}::scalar_t* internal_matrix_diag,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<${FIELD}::scalar_t>* poseidon_constants);
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_init_poseidon2_constants_cuda(
|
||||
int width,
|
||||
extern "C" cudaError_t ${FIELD}_poseidon2_load_cuda(
|
||||
poseidon2::Poseidon2<${FIELD}::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<${FIELD}::scalar_t>* poseidon_constants);
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_poseidon2_hash_cuda(
|
||||
const ${FIELD}::scalar_t* input,
|
||||
extern "C" cudaError_t ${FIELD}_poseidon2_hash_many_cuda(
|
||||
const poseidon2::Poseidon2<${FIELD}::scalar_t>* poseidon,
|
||||
const ${FIELD}::scalar_t* inputs,
|
||||
${FIELD}::scalar_t* output,
|
||||
int number_of_states,
|
||||
int width,
|
||||
const poseidon2::Poseidon2Constants<${FIELD}::scalar_t>& constants,
|
||||
poseidon2::Poseidon2Config& config);
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_release_poseidon2_constants_cuda(
|
||||
poseidon2::Poseidon2Constants<${FIELD}::scalar_t>* constants,
|
||||
device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t
|
||||
${FIELD}_poseidon2_delete_cuda(poseidon2::Poseidon2<${FIELD}::scalar_t>* poseidon, device_context::DeviceContext& ctx);
|
||||
16
icicle/include/api/templates/fields/tree.h
Normal file
16
icicle/include/api/templates/fields/tree.h
Normal file
@@ -0,0 +1,16 @@
|
||||
extern "C" cudaError_t ${FIELD}_build_merkle_tree(
|
||||
const ${FIELD}::scalar_t* leaves,
|
||||
${FIELD}::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* compression,
|
||||
const hash::Hasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_mmcs_commit_cuda(
|
||||
const matrix::Matrix<${FIELD}::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
${FIELD}::scalar_t* digests,
|
||||
const hash::Hasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* hasher,
|
||||
const hash::Hasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
@@ -4,6 +4,9 @@ extern "C" cudaError_t ${FIELD}_mul_cuda(
|
||||
extern "C" cudaError_t ${FIELD}_add_cuda(
|
||||
${FIELD}::scalar_t* vec_a, ${FIELD}::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, ${FIELD}::scalar_t* result);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_accumulate_cuda(
|
||||
${FIELD}::scalar_t* vec_a, ${FIELD}::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_sub_cuda(
|
||||
${FIELD}::scalar_t* vec_a, ${FIELD}::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, ${FIELD}::scalar_t* result);
|
||||
|
||||
@@ -14,4 +17,7 @@ extern "C" cudaError_t ${FIELD}_transpose_matrix_cuda(
|
||||
${FIELD}::scalar_t* output,
|
||||
device_context::DeviceContext& ctx,
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_bit_reverse_cuda(
|
||||
const ${FIELD}::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, ${FIELD}::scalar_t* output);
|
||||
|
||||
@@ -4,6 +4,9 @@ extern "C" cudaError_t ${FIELD}_extension_mul_cuda(
|
||||
extern "C" cudaError_t ${FIELD}_extension_add_cuda(
|
||||
${FIELD}::extension_t* vec_a, ${FIELD}::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, ${FIELD}::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_extension_accumulate_cuda(
|
||||
${FIELD}::extension_t* vec_a, ${FIELD}::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_extension_sub_cuda(
|
||||
${FIELD}::extension_t* vec_a, ${FIELD}::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, ${FIELD}::extension_t* result);
|
||||
|
||||
@@ -14,4 +17,7 @@ extern "C" cudaError_t ${FIELD}_extension_transpose_matrix_cuda(
|
||||
${FIELD}::extension_t* output,
|
||||
device_context::DeviceContext& ctx,
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_extension_bit_reverse_cuda(
|
||||
const ${FIELD}::extension_t* input, uint64_t n, vec_ops::BitReverseConfig& config, ${FIELD}::extension_t* output);
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
|
||||
#define G2_CURVE_DEFINITIONS \
|
||||
typedef ExtensionField<fq_config> g2_point_field_t; \
|
||||
typedef ExtensionField<fq_config, point_field_t> g2_point_field_t; \
|
||||
static constexpr g2_point_field_t g2_generator_x = \
|
||||
g2_point_field_t{point_field_t{g2_gen_x_re}, point_field_t{g2_gen_x_im}}; \
|
||||
static constexpr g2_point_field_t g2_generator_y = \
|
||||
|
||||
@@ -175,7 +175,7 @@ public:
|
||||
UNROLL
|
||||
#endif
|
||||
for (int i = 0; i < SCALAR_FF::NBITS; i++) {
|
||||
if (i > 0) { res = res + res; }
|
||||
if (i > 0) { res = dbl(res); }
|
||||
if (scalar.get_scalar_digit(SCALAR_FF::NBITS - i - 1, 1)) { res = res + point; }
|
||||
}
|
||||
return res;
|
||||
|
||||
@@ -44,7 +44,7 @@ public:
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE Field from(uint32_t value)
|
||||
{
|
||||
storage<TLC> scalar;
|
||||
storage<TLC> scalar{};
|
||||
scalar.limbs[0] = value;
|
||||
for (int i = 1; i < TLC; i++) {
|
||||
scalar.limbs[i] = 0;
|
||||
@@ -58,8 +58,10 @@ public:
|
||||
|
||||
if (logn > CONFIG::omegas_count) { THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Field: Invalid omega index"); }
|
||||
|
||||
storage_array<CONFIG::omegas_count, TLC> const omega = CONFIG::omega;
|
||||
return Field{omega.storages[logn - 1]};
|
||||
Field omega = Field{CONFIG::rou};
|
||||
for (int i = 0; i < CONFIG::omegas_count - logn; i++)
|
||||
omega = sqr(omega);
|
||||
return omega;
|
||||
}
|
||||
|
||||
static HOST_INLINE Field omega_inv(uint32_t logn)
|
||||
@@ -70,8 +72,10 @@ public:
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Field: Invalid omega_inv index");
|
||||
}
|
||||
|
||||
storage_array<CONFIG::omegas_count, TLC> const omega_inv = CONFIG::omega_inv;
|
||||
return Field{omega_inv.storages[logn - 1]};
|
||||
Field omega = inverse(Field{CONFIG::rou});
|
||||
for (int i = 0; i < CONFIG::omegas_count - logn; i++)
|
||||
omega = sqr(omega);
|
||||
return omega;
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Field inv_log_size(uint32_t logn)
|
||||
@@ -182,7 +186,7 @@ public:
|
||||
if (REDUCTION_SIZE == 0) return xs;
|
||||
const ff_wide_storage modulus = get_modulus_squared<REDUCTION_SIZE>();
|
||||
Wide rs = {};
|
||||
return sub_limbs<true>(xs.limbs_storage, modulus, rs.limbs_storage) ? xs : rs;
|
||||
return sub_limbs<2 * TLC, true>(xs.limbs_storage, modulus, rs.limbs_storage) ? xs : rs;
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
@@ -190,24 +194,24 @@ public:
|
||||
{
|
||||
const ff_wide_storage modulus = get_modulus_squared<MODULUS_MULTIPLE>();
|
||||
Wide rs = {};
|
||||
sub_limbs<false>(modulus, xs.limbs_storage, rs.limbs_storage);
|
||||
sub_limbs<2 * TLC, false>(modulus, xs.limbs_storage, rs.limbs_storage);
|
||||
return rs;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Wide operator+(Wide xs, const Wide& ys)
|
||||
{
|
||||
Wide rs = {};
|
||||
add_limbs<false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
add_limbs<2 * TLC, false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
return sub_modulus_squared<1>(rs);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Wide operator-(Wide xs, const Wide& ys)
|
||||
{
|
||||
Wide rs = {};
|
||||
uint32_t carry = sub_limbs<true>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
uint32_t carry = sub_limbs<2 * TLC, true>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
if (carry == 0) return rs;
|
||||
const ff_wide_storage modulus = get_modulus_squared<1>();
|
||||
add_limbs<false>(rs.limbs_storage, modulus, rs.limbs_storage);
|
||||
add_limbs<2 * TLC, false>(rs.limbs_storage, modulus, rs.limbs_storage);
|
||||
return rs;
|
||||
}
|
||||
};
|
||||
@@ -228,12 +232,6 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned MULTIPLIER = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ff_wide_storage modulus_wide()
|
||||
{
|
||||
return CONFIG::modulus_wide;
|
||||
}
|
||||
|
||||
// return m
|
||||
static constexpr HOST_DEVICE_INLINE ff_storage get_m() { return CONFIG::m; }
|
||||
|
||||
@@ -253,12 +251,11 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr DEVICE_INLINE uint32_t
|
||||
add_sub_u32_device(const uint32_t* x, const uint32_t* y, uint32_t* r, size_t n = (TLC >> 1))
|
||||
template <unsigned NLIMBS, bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr DEVICE_INLINE uint32_t add_sub_u32_device(const uint32_t* x, const uint32_t* y, uint32_t* r)
|
||||
{
|
||||
r[0] = SUBTRACT ? ptx::sub_cc(x[0], y[0]) : ptx::add_cc(x[0], y[0]);
|
||||
for (unsigned i = 1; i < n; i++)
|
||||
for (unsigned i = 1; i < NLIMBS; i++)
|
||||
r[i] = SUBTRACT ? ptx::subc_cc(x[i], y[i]) : ptx::addc_cc(x[i], y[i]);
|
||||
if (!CARRY_OUT) {
|
||||
ptx::addc(0, 0);
|
||||
@@ -267,71 +264,35 @@ public:
|
||||
return SUBTRACT ? ptx::subc(0, 0) : ptx::addc(0, 0);
|
||||
}
|
||||
|
||||
// add or subtract limbs
|
||||
template <bool SUBTRACT, bool CARRY_OUT>
|
||||
template <unsigned NLIMBS, bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr DEVICE_INLINE uint32_t
|
||||
add_sub_limbs_device(const ff_storage& xs, const ff_storage& ys, ff_storage& rs)
|
||||
add_sub_limbs_device(const storage<NLIMBS>& xs, const storage<NLIMBS>& ys, storage<NLIMBS>& rs)
|
||||
{
|
||||
const uint32_t* x = xs.limbs;
|
||||
const uint32_t* y = ys.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
return add_sub_u32_device<SUBTRACT, CARRY_OUT>(x, y, r, TLC);
|
||||
return add_sub_u32_device<NLIMBS, SUBTRACT, CARRY_OUT>(x, y, r);
|
||||
}
|
||||
|
||||
template <bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr DEVICE_INLINE uint32_t
|
||||
add_sub_limbs_device(const ff_wide_storage& xs, const ff_wide_storage& ys, ff_wide_storage& rs)
|
||||
{
|
||||
const uint32_t* x = xs.limbs;
|
||||
const uint32_t* y = ys.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
return add_sub_u32_device<SUBTRACT, CARRY_OUT>(x, y, r, 2 * TLC);
|
||||
}
|
||||
|
||||
template <bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr HOST_INLINE uint32_t add_sub_limbs_host(const ff_storage& xs, const ff_storage& ys, ff_storage& rs)
|
||||
{
|
||||
const uint32_t* x = xs.limbs;
|
||||
const uint32_t* y = ys.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
uint32_t carry = 0;
|
||||
host_math::carry_chain<TLC, false, CARRY_OUT> chain;
|
||||
for (unsigned i = 0; i < TLC; i++)
|
||||
r[i] = SUBTRACT ? chain.sub(x[i], y[i], carry) : chain.add(x[i], y[i], carry);
|
||||
return CARRY_OUT ? carry : 0;
|
||||
}
|
||||
|
||||
template <bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr HOST_INLINE uint32_t
|
||||
add_sub_limbs_host(const ff_wide_storage& xs, const ff_wide_storage& ys, ff_wide_storage& rs)
|
||||
{
|
||||
const uint32_t* x = xs.limbs;
|
||||
const uint32_t* y = ys.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
uint32_t carry = 0;
|
||||
host_math::carry_chain<2 * TLC, false, CARRY_OUT> chain;
|
||||
for (unsigned i = 0; i < 2 * TLC; i++)
|
||||
r[i] = SUBTRACT ? chain.sub(x[i], y[i], carry) : chain.add(x[i], y[i], carry);
|
||||
return CARRY_OUT ? carry : 0;
|
||||
}
|
||||
|
||||
template <bool CARRY_OUT, typename T>
|
||||
static constexpr HOST_DEVICE_INLINE uint32_t add_limbs(const T& xs, const T& ys, T& rs)
|
||||
template <unsigned NLIMBS, bool CARRY_OUT>
|
||||
static constexpr HOST_DEVICE_INLINE uint32_t
|
||||
add_limbs(const storage<NLIMBS>& xs, const storage<NLIMBS>& ys, storage<NLIMBS>& rs)
|
||||
{
|
||||
#ifdef __CUDA_ARCH__
|
||||
return add_sub_limbs_device<false, CARRY_OUT>(xs, ys, rs);
|
||||
return add_sub_limbs_device<NLIMBS, false, CARRY_OUT>(xs, ys, rs);
|
||||
#else
|
||||
return add_sub_limbs_host<false, CARRY_OUT>(xs, ys, rs);
|
||||
return host_math::template add_sub_limbs<NLIMBS, false, CARRY_OUT>(xs, ys, rs);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <bool CARRY_OUT, typename T>
|
||||
static constexpr HOST_DEVICE_INLINE uint32_t sub_limbs(const T& xs, const T& ys, T& rs)
|
||||
template <unsigned NLIMBS, bool CARRY_OUT>
|
||||
static constexpr HOST_DEVICE_INLINE uint32_t
|
||||
sub_limbs(const storage<NLIMBS>& xs, const storage<NLIMBS>& ys, storage<NLIMBS>& rs)
|
||||
{
|
||||
#ifdef __CUDA_ARCH__
|
||||
return add_sub_limbs_device<true, CARRY_OUT>(xs, ys, rs);
|
||||
return add_sub_limbs_device<NLIMBS, true, CARRY_OUT>(xs, ys, rs);
|
||||
#else
|
||||
return add_sub_limbs_host<true, CARRY_OUT>(xs, ys, rs);
|
||||
return host_math::template add_sub_limbs<NLIMBS, true, CARRY_OUT>(xs, ys, rs);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -531,7 +492,7 @@ public:
|
||||
// are necessarily NTT-friendly, `b[0]` often turns out to be \f$ 2^{32} - 1 \f$. This actually leads to
|
||||
// less efficient SASS generated by nvcc, so this case needed separate handling.
|
||||
if (b[0] == UINT32_MAX) {
|
||||
add_sub_u32_device<true, false>(c, a, even, TLC);
|
||||
add_sub_u32_device<TLC, true, false>(c, a, even);
|
||||
for (i = 0; i < TLC - 1; i++)
|
||||
odd[i] = a[i];
|
||||
} else {
|
||||
@@ -639,17 +600,18 @@ public:
|
||||
__align__(16) uint32_t diffs[TLC];
|
||||
// Differences of halves \f$ a_{hi} - a_{lo}; b_{lo} - b_{hi} \$f are written into `diffs`, signs written to
|
||||
// `carry1` and `carry2`.
|
||||
uint32_t carry1 = add_sub_u32_device<true, true>(&a[TLC >> 1], a, diffs);
|
||||
uint32_t carry2 = add_sub_u32_device<true, true>(b, &b[TLC >> 1], &diffs[TLC >> 1]);
|
||||
uint32_t carry1 = add_sub_u32_device<(TLC >> 1), true, true>(&a[TLC >> 1], a, diffs);
|
||||
uint32_t carry2 = add_sub_u32_device<(TLC >> 1), true, true>(b, &b[TLC >> 1], &diffs[TLC >> 1]);
|
||||
// Compute the "middle part" of Karatsuba: \f$ a_{lo} \cdot b_{hi} + b_{lo} \cdot a_{hi} \f$.
|
||||
// This is where the assumption about unset high bit of `a` and `b` is relevant.
|
||||
multiply_and_add_short_raw_device(diffs, &diffs[TLC >> 1], middle_part, r, &r[TLC]);
|
||||
// Corrections that need to be performed when differences are negative.
|
||||
// Again, carry doesn't need to be propagated due to unset high bits of `a` and `b`.
|
||||
if (carry1) add_sub_u32_device<true, false>(&middle_part[TLC >> 1], &diffs[TLC >> 1], &middle_part[TLC >> 1]);
|
||||
if (carry2) add_sub_u32_device<true, false>(&middle_part[TLC >> 1], diffs, &middle_part[TLC >> 1]);
|
||||
if (carry1)
|
||||
add_sub_u32_device<(TLC >> 1), true, false>(&middle_part[TLC >> 1], &diffs[TLC >> 1], &middle_part[TLC >> 1]);
|
||||
if (carry2) add_sub_u32_device<(TLC >> 1), true, false>(&middle_part[TLC >> 1], diffs, &middle_part[TLC >> 1]);
|
||||
// Now that middle part is fully correct, it can be added to the result.
|
||||
add_sub_u32_device<false, true>(&r[TLC >> 1], middle_part, &r[TLC >> 1], TLC);
|
||||
add_sub_u32_device<TLC, false, true>(&r[TLC >> 1], middle_part, &r[TLC >> 1]);
|
||||
|
||||
// Carry from adding middle part has to be propagated to the highest limb.
|
||||
for (size_t i = TLC + (TLC >> 1); i < 2 * TLC; i++)
|
||||
@@ -673,25 +635,12 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
static HOST_INLINE void multiply_raw_host(const ff_storage& as, const ff_storage& bs, ff_wide_storage& rs)
|
||||
{
|
||||
const uint32_t* a = as.limbs;
|
||||
const uint32_t* b = bs.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
for (unsigned i = 0; i < TLC; i++) {
|
||||
uint32_t carry = 0;
|
||||
for (unsigned j = 0; j < TLC; j++)
|
||||
r[j + i] = host_math::madc_cc(a[j], b[i], r[j + i], carry);
|
||||
r[TLC + i] = carry;
|
||||
}
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE void multiply_raw(const ff_storage& as, const ff_storage& bs, ff_wide_storage& rs)
|
||||
{
|
||||
#ifdef __CUDA_ARCH__
|
||||
return multiply_raw_device(as, bs, rs);
|
||||
#else
|
||||
return multiply_raw_host(as, bs, rs);
|
||||
return host_math::template multiply_raw<TLC>(as, bs, rs);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -702,9 +651,9 @@ public:
|
||||
return multiply_and_add_lsb_neg_modulus_raw_device(as, cs, rs);
|
||||
#else
|
||||
Wide r_wide = {};
|
||||
multiply_raw_host(as, get_neg_modulus(), r_wide.limbs_storage);
|
||||
host_math::template multiply_raw<TLC>(as, get_neg_modulus(), r_wide.limbs_storage);
|
||||
Field r = Wide::get_lower(r_wide);
|
||||
add_limbs<false>(cs, r.limbs_storage, rs);
|
||||
add_limbs<TLC, false>(cs, r.limbs_storage, rs);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -713,7 +662,7 @@ public:
|
||||
#ifdef __CUDA_ARCH__
|
||||
return multiply_msb_raw_device(as, bs, rs);
|
||||
#else
|
||||
return multiply_raw_host(as, bs, rs);
|
||||
return host_math::template multiply_raw<TLC>(as, bs, rs);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -759,7 +708,7 @@ public:
|
||||
if (REDUCTION_SIZE == 0) return xs;
|
||||
const ff_storage modulus = get_modulus<REDUCTION_SIZE>();
|
||||
Field rs = {};
|
||||
return sub_limbs<true>(xs.limbs_storage, modulus, rs.limbs_storage) ? xs : rs;
|
||||
return sub_limbs<TLC, true>(xs.limbs_storage, modulus, rs.limbs_storage) ? xs : rs;
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const Field& xs)
|
||||
@@ -778,17 +727,17 @@ public:
|
||||
friend HOST_DEVICE_INLINE Field operator+(Field xs, const Field& ys)
|
||||
{
|
||||
Field rs = {};
|
||||
add_limbs<false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
add_limbs<TLC, false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
return sub_modulus<1>(rs);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Field operator-(Field xs, const Field& ys)
|
||||
{
|
||||
Field rs = {};
|
||||
uint32_t carry = sub_limbs<true>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
uint32_t carry = sub_limbs<TLC, true>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
if (carry == 0) return rs;
|
||||
const ff_storage modulus = get_modulus<1>();
|
||||
add_limbs<false>(rs.limbs_storage, modulus, rs.limbs_storage);
|
||||
add_limbs<TLC, false>(rs.limbs_storage, modulus, rs.limbs_storage);
|
||||
return rs;
|
||||
}
|
||||
|
||||
@@ -838,15 +787,23 @@ public:
|
||||
uint32_t carry;
|
||||
// As mentioned, either 2 or 1 reduction can be performed depending on the field in question.
|
||||
if (num_of_reductions() == 2) {
|
||||
carry = sub_limbs<true>(r.limbs_storage, get_modulus<2>(), r_reduced);
|
||||
carry = sub_limbs<TLC, true>(r.limbs_storage, get_modulus<2>(), r_reduced);
|
||||
if (carry == 0) r = Field{r_reduced};
|
||||
}
|
||||
carry = sub_limbs<true>(r.limbs_storage, get_modulus<1>(), r_reduced);
|
||||
carry = sub_limbs<TLC, true>(r.limbs_storage, get_modulus<1>(), r_reduced);
|
||||
if (carry == 0) r = Field{r_reduced};
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
HOST_DEVICE_INLINE Field& operator=(Field const& other)
|
||||
{
|
||||
for (int i = 0; i < TLC; i++) {
|
||||
this->limbs_storage.limbs[i] = other.limbs_storage.limbs[i];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Field operator*(const Field& xs, const Field& ys)
|
||||
{
|
||||
Wide xy = mul_wide(xs, ys); // full mult
|
||||
@@ -933,7 +890,7 @@ public:
|
||||
{
|
||||
const ff_storage modulus = get_modulus<MODULUS_MULTIPLE>();
|
||||
Field rs = {};
|
||||
sub_limbs<false>(modulus, xs.limbs_storage, rs.limbs_storage);
|
||||
sub_limbs<TLC, false>(modulus, xs.limbs_storage, rs.limbs_storage);
|
||||
return rs;
|
||||
}
|
||||
|
||||
@@ -963,7 +920,7 @@ public:
|
||||
static constexpr HOST_DEVICE_INLINE bool lt(const Field& xs, const Field& ys)
|
||||
{
|
||||
ff_storage dummy = {};
|
||||
uint32_t carry = sub_limbs<true>(xs.limbs_storage, ys.limbs_storage, dummy);
|
||||
uint32_t carry = sub_limbs<TLC, true>(xs.limbs_storage, ys.limbs_storage, dummy);
|
||||
return carry;
|
||||
}
|
||||
|
||||
@@ -983,12 +940,12 @@ public:
|
||||
while (!(u == one) && !(v == one)) {
|
||||
while (is_even(u)) {
|
||||
u = div2(u);
|
||||
if (is_odd(b)) add_limbs<false>(b.limbs_storage, modulus, b.limbs_storage);
|
||||
if (is_odd(b)) add_limbs<TLC, false>(b.limbs_storage, modulus, b.limbs_storage);
|
||||
b = div2(b);
|
||||
}
|
||||
while (is_even(v)) {
|
||||
v = div2(v);
|
||||
if (is_odd(c)) add_limbs<false>(c.limbs_storage, modulus, c.limbs_storage);
|
||||
if (is_odd(c)) add_limbs<TLC, false>(c.limbs_storage, modulus, c.limbs_storage);
|
||||
c = div2(c);
|
||||
}
|
||||
if (lt(v, u)) {
|
||||
|
||||
@@ -33,6 +33,9 @@ namespace field_config = babybear;
|
||||
#elif FIELD_ID == STARK_252
|
||||
#include "fields/stark_fields/stark252.cuh"
|
||||
namespace field_config = stark252;
|
||||
#elif FIELD_ID == M31
|
||||
#include "fields/stark_fields/m31.cuh"
|
||||
namespace field_config = m31;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -5,25 +5,29 @@
|
||||
#include <cstdint>
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
#include "storage.cuh"
|
||||
|
||||
namespace host_math {
|
||||
|
||||
// return x + y with uint32_t operands
|
||||
static __host__ uint32_t add(const uint32_t x, const uint32_t y) { return x + y; }
|
||||
static constexpr __host__ uint32_t add(const uint32_t x, const uint32_t y) { return x + y; }
|
||||
|
||||
// return x + y + carry with uint32_t operands
|
||||
static __host__ uint32_t addc(const uint32_t x, const uint32_t y, const uint32_t carry) { return x + y + carry; }
|
||||
static constexpr __host__ uint32_t addc(const uint32_t x, const uint32_t y, const uint32_t carry)
|
||||
{
|
||||
return x + y + carry;
|
||||
}
|
||||
|
||||
// return x + y and carry out with uint32_t operands
|
||||
static __host__ uint32_t add_cc(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
static constexpr __host__ uint32_t add_cc(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
{
|
||||
uint32_t result;
|
||||
result = x + y;
|
||||
uint32_t result = x + y;
|
||||
carry = x > result;
|
||||
return result;
|
||||
}
|
||||
|
||||
// return x + y + carry and carry out with uint32_t operands
|
||||
static __host__ uint32_t addc_cc(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
static constexpr __host__ uint32_t addc_cc(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
{
|
||||
const uint32_t result = x + y + carry;
|
||||
carry = carry && x >= result || !carry && x > result;
|
||||
@@ -31,22 +35,24 @@ namespace host_math {
|
||||
}
|
||||
|
||||
// return x - y with uint32_t operands
|
||||
static __host__ uint32_t sub(const uint32_t x, const uint32_t y) { return x - y; }
|
||||
static constexpr __host__ uint32_t sub(const uint32_t x, const uint32_t y) { return x - y; }
|
||||
|
||||
// return x - y - borrow with uint32_t operands
|
||||
static __host__ uint32_t subc(const uint32_t x, const uint32_t y, const uint32_t borrow) { return x - y - borrow; }
|
||||
|
||||
// return x - y and borrow out with uint32_t operands
|
||||
static __host__ uint32_t sub_cc(const uint32_t x, const uint32_t y, uint32_t& borrow)
|
||||
// return x - y - borrow with uint32_t operands
|
||||
static constexpr __host__ uint32_t subc(const uint32_t x, const uint32_t y, const uint32_t borrow)
|
||||
{
|
||||
uint32_t result;
|
||||
result = x - y;
|
||||
return x - y - borrow;
|
||||
}
|
||||
|
||||
// return x - y and borrow out with uint32_t operands
|
||||
static constexpr __host__ uint32_t sub_cc(const uint32_t x, const uint32_t y, uint32_t& borrow)
|
||||
{
|
||||
uint32_t result = x - y;
|
||||
borrow = x < result;
|
||||
return result;
|
||||
}
|
||||
|
||||
// return x - y - borrow and borrow out with uint32_t operands
|
||||
static __host__ uint32_t subc_cc(const uint32_t x, const uint32_t y, uint32_t& borrow)
|
||||
// return x - y - borrow and borrow out with uint32_t operands
|
||||
static constexpr __host__ uint32_t subc_cc(const uint32_t x, const uint32_t y, uint32_t& borrow)
|
||||
{
|
||||
const uint32_t result = x - y - borrow;
|
||||
borrow = borrow && x <= result || !borrow && x < result;
|
||||
@@ -54,12 +60,11 @@ namespace host_math {
|
||||
}
|
||||
|
||||
// return x * y + z + carry and carry out with uint32_t operands
|
||||
static __host__ uint32_t madc_cc(const uint32_t x, const uint32_t y, const uint32_t z, uint32_t& carry)
|
||||
static constexpr __host__ uint32_t madc_cc(const uint32_t x, const uint32_t y, const uint32_t z, uint32_t& carry)
|
||||
{
|
||||
uint32_t result;
|
||||
uint64_t r = static_cast<uint64_t>(x) * y + z + carry;
|
||||
carry = (uint32_t)(r >> 32);
|
||||
result = r & 0xffffffff;
|
||||
uint32_t result = r & 0xffffffff;
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -69,7 +74,7 @@ namespace host_math {
|
||||
|
||||
constexpr HOST_INLINE carry_chain() : index(0) {}
|
||||
|
||||
HOST_INLINE uint32_t add(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
constexpr HOST_INLINE uint32_t add(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
{
|
||||
index++;
|
||||
if (index == 1 && OPS_COUNT == 1 && !CARRY_IN && !CARRY_OUT)
|
||||
@@ -82,7 +87,7 @@ namespace host_math {
|
||||
return host_math::addc(x, y, carry);
|
||||
}
|
||||
|
||||
HOST_INLINE uint32_t sub(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
constexpr HOST_INLINE uint32_t sub(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
{
|
||||
index++;
|
||||
if (index == 1 && OPS_COUNT == 1 && !CARRY_IN && !CARRY_OUT)
|
||||
@@ -95,6 +100,89 @@ namespace host_math {
|
||||
return host_math::subc(x, y, carry);
|
||||
}
|
||||
};
|
||||
|
||||
template <unsigned NLIMBS_A, unsigned NLIMBS_B = NLIMBS_A>
|
||||
static constexpr HOST_INLINE void
|
||||
multiply_raw(const storage<NLIMBS_A>& as, const storage<NLIMBS_B>& bs, storage<NLIMBS_A + NLIMBS_B>& rs)
|
||||
{
|
||||
const uint32_t* a = as.limbs;
|
||||
const uint32_t* b = bs.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
for (unsigned i = 0; i < NLIMBS_B; i++) {
|
||||
uint32_t carry = 0;
|
||||
for (unsigned j = 0; j < NLIMBS_A; j++)
|
||||
r[j + i] = host_math::madc_cc(a[j], b[i], r[j + i], carry);
|
||||
r[NLIMBS_A + i] = carry;
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS, bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr HOST_INLINE uint32_t
|
||||
add_sub_limbs(const storage<NLIMBS>& xs, const storage<NLIMBS>& ys, storage<NLIMBS>& rs)
|
||||
{
|
||||
const uint32_t* x = xs.limbs;
|
||||
const uint32_t* y = ys.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
uint32_t carry = 0;
|
||||
carry_chain<NLIMBS, false, CARRY_OUT> chain;
|
||||
for (unsigned i = 0; i < NLIMBS; i++)
|
||||
r[i] = SUBTRACT ? chain.sub(x[i], y[i], carry) : chain.add(x[i], y[i], carry);
|
||||
return CARRY_OUT ? carry : 0;
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS, unsigned BITS>
|
||||
static constexpr HOST_INLINE storage<NLIMBS> left_shift(const storage<NLIMBS>& xs)
|
||||
{
|
||||
if constexpr (BITS == 0)
|
||||
return xs;
|
||||
else {
|
||||
constexpr unsigned BITS32 = BITS % 32;
|
||||
constexpr unsigned LIMBS_GAP = BITS / 32;
|
||||
storage<NLIMBS> out{};
|
||||
if constexpr (LIMBS_GAP < NLIMBS) {
|
||||
out.limbs[LIMBS_GAP] = xs.limbs[0] << BITS32;
|
||||
for (unsigned i = 1; i < NLIMBS - LIMBS_GAP; i++)
|
||||
out.limbs[i + LIMBS_GAP] = (xs.limbs[i] << BITS32) + (xs.limbs[i - 1] >> (32 - BITS32));
|
||||
}
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS, unsigned BITS>
|
||||
static constexpr HOST_INLINE storage<NLIMBS> right_shift(const storage<NLIMBS>& xs)
|
||||
{
|
||||
if constexpr (BITS == 0)
|
||||
return xs;
|
||||
else {
|
||||
constexpr unsigned BITS32 = BITS % 32;
|
||||
constexpr unsigned LIMBS_GAP = BITS / 32;
|
||||
storage<NLIMBS> out{};
|
||||
if constexpr (LIMBS_GAP < NLIMBS - 1) {
|
||||
for (unsigned i = 0; i < NLIMBS - LIMBS_GAP - 1; i++)
|
||||
out.limbs[i] = (xs.limbs[i + LIMBS_GAP] >> BITS32) + (xs.limbs[i + LIMBS_GAP + 1] << (32 - BITS32));
|
||||
}
|
||||
if constexpr (LIMBS_GAP < NLIMBS) out.limbs[NLIMBS - LIMBS_GAP - 1] = (xs.limbs[NLIMBS - 1] >> BITS32);
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS_NUM, unsigned NLIMBS_DENOM, unsigned NLIMBS_Q = (NLIMBS_NUM - NLIMBS_DENOM)>
|
||||
static constexpr HOST_INLINE void integer_division(
|
||||
const storage<NLIMBS_NUM>& num, const storage<NLIMBS_DENOM>& denom, storage<NLIMBS_Q>& q, storage<NLIMBS_DENOM>& r)
|
||||
{
|
||||
storage<NLIMBS_DENOM> temp = {};
|
||||
for (int limb_idx = NLIMBS_NUM - 1; limb_idx >= 0; limb_idx--) {
|
||||
for (int bit_idx = 31; bit_idx >= 0; bit_idx--) {
|
||||
r = left_shift<NLIMBS_DENOM, 1>(r);
|
||||
r.limbs[0] |= ((num.limbs[limb_idx] >> bit_idx) & 1);
|
||||
uint32_t c = add_sub_limbs<NLIMBS_DENOM, true, true>(r, denom, temp);
|
||||
if (limb_idx < NLIMBS_Q & !c) {
|
||||
r = temp;
|
||||
q.limbs[limb_idx] |= 1 << bit_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace host_math
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@@ -10,5 +10,6 @@
|
||||
|
||||
#define BABY_BEAR 1001
|
||||
#define STARK_252 1002
|
||||
#define M31 1003
|
||||
|
||||
#endif
|
||||
129
icicle/include/fields/params_gen.cuh
Normal file
129
icicle/include/fields/params_gen.cuh
Normal file
@@ -0,0 +1,129 @@
|
||||
#pragma once
|
||||
#ifndef PARAMS_GEN_H
|
||||
#define PARAMS_GEN_H
|
||||
|
||||
#include "storage.cuh"
|
||||
#include "host_math.cuh"
|
||||
|
||||
namespace params_gen {
|
||||
template <unsigned NLIMBS, unsigned BIT_SHIFT>
|
||||
static constexpr HOST_INLINE storage<2 * NLIMBS> get_square(const storage<NLIMBS>& xs)
|
||||
{
|
||||
storage<2 * NLIMBS> rs = {};
|
||||
host_math::template multiply_raw<NLIMBS>(xs, xs, rs);
|
||||
return host_math::template left_shift<2 * NLIMBS, BIT_SHIFT>(rs);
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS>
|
||||
static constexpr HOST_INLINE storage<NLIMBS>
|
||||
get_difference_no_carry(const storage<NLIMBS>& xs, const storage<NLIMBS>& ys)
|
||||
{
|
||||
storage<NLIMBS> rs = {};
|
||||
host_math::template add_sub_limbs<NLIMBS, true, false>(xs, ys, rs);
|
||||
return rs;
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS, unsigned EXP>
|
||||
static constexpr HOST_INLINE storage<NLIMBS> get_m(const storage<NLIMBS>& modulus)
|
||||
{
|
||||
storage<NLIMBS> rs = {};
|
||||
storage<NLIMBS> qs = {};
|
||||
storage<2 * NLIMBS> wide_one = {1};
|
||||
storage<2 * NLIMBS> pow_of_2 = host_math::template left_shift<2 * NLIMBS, EXP>(wide_one);
|
||||
host_math::template integer_division<2 * NLIMBS, NLIMBS>(pow_of_2, modulus, qs, rs);
|
||||
return qs;
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS, bool INV>
|
||||
static constexpr HOST_INLINE storage<NLIMBS> get_montgomery_constant(const storage<NLIMBS>& modulus)
|
||||
{
|
||||
storage<NLIMBS> rs = {1};
|
||||
for (int i = 0; i < 32 * NLIMBS; i++) {
|
||||
if (INV) {
|
||||
if (rs.limbs[0] & 1) host_math::template add_sub_limbs<NLIMBS, false, false>(rs, modulus, rs);
|
||||
rs = host_math::template right_shift<NLIMBS, 1>(rs);
|
||||
} else {
|
||||
rs = host_math::template left_shift<NLIMBS, 1>(rs);
|
||||
storage<NLIMBS> temp = {};
|
||||
rs = host_math::template add_sub_limbs<NLIMBS, true, true>(rs, modulus, temp) ? rs : temp;
|
||||
}
|
||||
}
|
||||
return rs;
|
||||
}
|
||||
|
||||
constexpr unsigned floorlog2(uint32_t x) { return x == 1 ? 0 : 1 + floorlog2(x >> 1); }
|
||||
|
||||
template <unsigned NLIMBS, unsigned NBITS>
|
||||
constexpr unsigned num_of_reductions(const storage<NLIMBS>& modulus, const storage<NLIMBS>& m)
|
||||
{
|
||||
storage<2 * NLIMBS> x1 = {};
|
||||
storage<3 * NLIMBS> x2 = {};
|
||||
storage<3 * NLIMBS> x3 = {};
|
||||
host_math::template multiply_raw<NLIMBS>(modulus, m, x1);
|
||||
host_math::template multiply_raw<NLIMBS, 2 * NLIMBS>(modulus, x1, x2);
|
||||
storage<2 * NLIMBS> one = {1};
|
||||
storage<2 * NLIMBS> pow_of_2 = host_math::template left_shift<2 * NLIMBS, NBITS>(one);
|
||||
host_math::template multiply_raw<NLIMBS, 2 * NLIMBS>(modulus, pow_of_2, x3);
|
||||
host_math::template add_sub_limbs<3 * NLIMBS, true, false>(x3, x2, x2);
|
||||
double err = (double)x2.limbs[2 * NLIMBS - 1] / pow_of_2.limbs[2 * NLIMBS - 1];
|
||||
err += (double)m.limbs[NLIMBS - 1] / 0xffffffff;
|
||||
err += (double)NLIMBS / 0x80000000;
|
||||
return unsigned(err) + 1;
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS>
|
||||
constexpr unsigned two_adicity(const storage<NLIMBS>& modulus)
|
||||
{
|
||||
unsigned two_adicity = 1;
|
||||
storage<NLIMBS> temp = host_math::template right_shift<NLIMBS, 1>(modulus);
|
||||
while (!(temp.limbs[0] & 1)) {
|
||||
temp = host_math::template right_shift<NLIMBS, 1>(temp);
|
||||
two_adicity++;
|
||||
}
|
||||
return two_adicity;
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS, unsigned TWO_ADICITY>
|
||||
constexpr storage_array<TWO_ADICITY, NLIMBS> get_invs(const storage<NLIMBS>& modulus)
|
||||
{
|
||||
storage_array<TWO_ADICITY, NLIMBS> invs = {};
|
||||
storage<NLIMBS> rs = {1};
|
||||
for (int i = 0; i < TWO_ADICITY; i++) {
|
||||
if (rs.limbs[0] & 1) host_math::template add_sub_limbs<NLIMBS, false, false>(rs, modulus, rs);
|
||||
rs = host_math::template right_shift<NLIMBS, 1>(rs);
|
||||
invs.storages[i] = rs;
|
||||
}
|
||||
return invs;
|
||||
}
|
||||
} // namespace params_gen
|
||||
|
||||
#define PARAMS(modulus) \
|
||||
static constexpr unsigned limbs_count = modulus.LC; \
|
||||
static constexpr unsigned modulus_bit_count = \
|
||||
32 * (limbs_count - 1) + params_gen::floorlog2(modulus.limbs[limbs_count - 1]) + 1; \
|
||||
static constexpr storage<limbs_count> zero = {}; \
|
||||
static constexpr storage<limbs_count> one = {1}; \
|
||||
static constexpr storage<limbs_count> modulus_2 = host_math::template left_shift<limbs_count, 1>(modulus); \
|
||||
static constexpr storage<limbs_count> modulus_4 = host_math::template left_shift<limbs_count, 1>(modulus_2); \
|
||||
static constexpr storage<limbs_count> neg_modulus = \
|
||||
params_gen::template get_difference_no_carry<limbs_count>(zero, modulus); \
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = \
|
||||
params_gen::template get_square<limbs_count, 0>(modulus); \
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = \
|
||||
host_math::template left_shift<2 * limbs_count, 1>(modulus_squared); \
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = \
|
||||
host_math::template left_shift<2 * limbs_count, 1>(modulus_squared_2); \
|
||||
static constexpr storage<limbs_count> m = params_gen::template get_m<limbs_count, 2 * modulus_bit_count>(modulus); \
|
||||
static constexpr storage<limbs_count> montgomery_r = \
|
||||
params_gen::template get_montgomery_constant<limbs_count, false>(modulus); \
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = \
|
||||
params_gen::template get_montgomery_constant<limbs_count, true>(modulus); \
|
||||
static constexpr unsigned num_of_reductions = \
|
||||
params_gen::template num_of_reductions<limbs_count, 2 * modulus_bit_count>(modulus, m);
|
||||
|
||||
#define TWIDDLES(modulus, rou) \
|
||||
static constexpr unsigned omegas_count = params_gen::template two_adicity<limbs_count>(modulus); \
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = \
|
||||
params_gen::template get_invs<limbs_count, omegas_count>(modulus);
|
||||
|
||||
#endif
|
||||
@@ -4,13 +4,13 @@
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
#include "gpu-utils/sharedmem.cuh"
|
||||
|
||||
template <typename CONFIG>
|
||||
template <typename CONFIG, class T>
|
||||
class ExtensionField
|
||||
{
|
||||
private:
|
||||
friend Field<CONFIG>;
|
||||
friend T;
|
||||
|
||||
typedef typename Field<CONFIG>::Wide FWide;
|
||||
typedef typename T::Wide FWide;
|
||||
|
||||
struct ExtensionWide {
|
||||
FWide real;
|
||||
@@ -28,7 +28,7 @@ private:
|
||||
};
|
||||
|
||||
public:
|
||||
typedef Field<CONFIG> FF;
|
||||
typedef T FF;
|
||||
static constexpr unsigned TLC = 2 * CONFIG::limbs_count;
|
||||
|
||||
FF real;
|
||||
@@ -196,11 +196,11 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
template <class CONFIG>
|
||||
struct SharedMemory<ExtensionField<CONFIG>> {
|
||||
__device__ ExtensionField<CONFIG>* getPointer()
|
||||
template <typename CONFIG, class T>
|
||||
struct SharedMemory<ExtensionField<CONFIG, T>> {
|
||||
__device__ ExtensionField<CONFIG, T>* getPointer()
|
||||
{
|
||||
extern __shared__ ExtensionField<CONFIG> s_ext2_scalar_[];
|
||||
extern __shared__ ExtensionField<CONFIG, T> s_ext2_scalar_[];
|
||||
return s_ext2_scalar_;
|
||||
}
|
||||
};
|
||||
@@ -4,11 +4,11 @@
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
#include "gpu-utils/sharedmem.cuh"
|
||||
|
||||
template <typename CONFIG>
|
||||
template <typename CONFIG, class T>
|
||||
class ExtensionField
|
||||
{
|
||||
private:
|
||||
typedef typename Field<CONFIG>::Wide FWide;
|
||||
typedef typename T::Wide FWide;
|
||||
|
||||
struct ExtensionWide {
|
||||
FWide real;
|
||||
@@ -28,7 +28,7 @@ private:
|
||||
};
|
||||
|
||||
public:
|
||||
typedef Field<CONFIG> FF;
|
||||
typedef T FF;
|
||||
static constexpr unsigned TLC = 4 * CONFIG::limbs_count;
|
||||
|
||||
FF real;
|
||||
@@ -49,15 +49,14 @@ public:
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField to_montgomery(const ExtensionField& xs)
|
||||
{
|
||||
return ExtensionField{
|
||||
xs.real * FF{CONFIG::montgomery_r}, xs.im1 * FF{CONFIG::montgomery_r}, xs.im2 * FF{CONFIG::montgomery_r},
|
||||
xs.im3 * FF{CONFIG::montgomery_r}};
|
||||
FF::to_montgomery(xs.real), FF::to_montgomery(xs.im1), FF::to_montgomery(xs.im2), FF::to_montgomery(xs.im3)};
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField from_montgomery(const ExtensionField& xs)
|
||||
{
|
||||
return ExtensionField{
|
||||
xs.real * FF{CONFIG::montgomery_r_inv}, xs.im1 * FF{CONFIG::montgomery_r_inv},
|
||||
xs.im2 * FF{CONFIG::montgomery_r_inv}, xs.im3 * FF{CONFIG::montgomery_r_inv}};
|
||||
FF::from_montgomery(xs.real), FF::from_montgomery(xs.im1), FF::from_montgomery(xs.im2),
|
||||
FF::from_montgomery(xs.im3)};
|
||||
}
|
||||
|
||||
static HOST_INLINE ExtensionField rand_host()
|
||||
@@ -247,11 +246,11 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
template <class CONFIG>
|
||||
struct SharedMemory<ExtensionField<CONFIG>> {
|
||||
__device__ ExtensionField<CONFIG>* getPointer()
|
||||
template <class CONFIG, class T>
|
||||
struct SharedMemory<ExtensionField<CONFIG, T>> {
|
||||
__device__ ExtensionField<CONFIG, T>* getPointer()
|
||||
{
|
||||
extern __shared__ ExtensionField<CONFIG> s_ext4_scalar_[];
|
||||
extern __shared__ ExtensionField<CONFIG, T> s_ext4_scalar_[];
|
||||
return s_ext4_scalar_;
|
||||
}
|
||||
};
|
||||
@@ -3,337 +3,17 @@
|
||||
#define BLS12_377_BASE_PARAMS_H
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bls12_377 {
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = 12;
|
||||
static constexpr unsigned omegas_count = 48;
|
||||
static constexpr unsigned modulus_bit_count = 377;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44,
|
||||
0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3,
|
||||
0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x0a118000, 0x60000001, 0x2e16ba88,
|
||||
0x74129000, 0x3de6c45f, 0x01ea271e, 0x3445b3e6,
|
||||
0xd9429276, 0x8c760b80, 0x2f8a21d5, 0x035c748c};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x14230000, 0xc0000002, 0x5c2d7510,
|
||||
0xe8252000, 0x7bcd88be, 0x03d44e3c, 0x688b67cc,
|
||||
0xb28524ec, 0x18ec1701, 0x5f1443ab, 0x06b8e918};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0xffffffff, 0x7af73fff, 0xcfffffff, 0xe8f4a2bb,
|
||||
0x45f6b7ff, 0xe10c9dd0, 0xff0aec70, 0xe5dd260c,
|
||||
0x935eb6c4, 0x39c4fa3f, 0xe83aef15, 0xfe51c5b9};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3,
|
||||
0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x00000001, 0x0a118000, 0xf0000001, 0x7338d254, 0x2e1bd800, 0x4ada268f, 0x35f1c09a, 0x6bcbfbd2,
|
||||
0x58638c9d, 0x318324b9, 0x8bb70ae0, 0x460aaaaa, 0x502a4d6c, 0xc014e712, 0xb90660cd, 0x09d018af,
|
||||
0x3dda4d5c, 0x1f5e7141, 0xa4aee93f, 0x4bb8b87d, 0xb361263c, 0x2256913b, 0xd0bbaffb, 0x0002d307};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x00000002, 0x14230000, 0xe0000002, 0xe671a4a9, 0x5c37b000, 0x95b44d1e, 0x6be38134, 0xd797f7a4,
|
||||
0xb0c7193a, 0x63064972, 0x176e15c0, 0x8c155555, 0xa0549ad8, 0x8029ce24, 0x720cc19b, 0x13a0315f,
|
||||
0x7bb49ab8, 0x3ebce282, 0x495dd27e, 0x977170fb, 0x66c24c78, 0x44ad2277, 0xa1775ff6, 0x0005a60f};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x00000004, 0x28460000, 0xc0000004, 0xcce34953, 0xb86f6001, 0x2b689a3c, 0xd7c70269, 0xaf2fef48,
|
||||
0x618e3275, 0xc60c92e5, 0x2edc2b80, 0x182aaaaa, 0x40a935b1, 0x00539c49, 0xe4198337, 0x274062be,
|
||||
0xf7693570, 0x7d79c504, 0x92bba4fc, 0x2ee2e1f6, 0xcd8498f1, 0x895a44ee, 0x42eebfec, 0x000b4c1f};
|
||||
static constexpr storage<limbs_count> m = {0x5e4daffc, 0x1f9fd58c, 0x89c42a59, 0xd0ed6877, 0xd85a6d02, 0x6af2d488,
|
||||
0x6776b1a0, 0x3bbad0de, 0x582ef4f7, 0x976c3ca0, 0x0cc4060e, 0x0261508d};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xffffff68, 0x02cdffff, 0x7fffffb1, 0x51409f83,
|
||||
0x8a7d3ff2, 0x9f7db3a9, 0x6e7c6305, 0x7b4e97b7,
|
||||
0x803c84e8, 0x4cf495bf, 0xe2fdf49a, 0x008d6661};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x451269e8, 0xef129093, 0xe65839f5, 0x6e20bbcd,
|
||||
0xa5582c93, 0x852e3c88, 0xf7f2e657, 0xeeaaf41d,
|
||||
0xa4c49351, 0xeb89746c, 0x436b0736, 0x014212fc};
|
||||
static constexpr storage<12> modulus = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f,
|
||||
0x00f5138f, 0x1a22d9f3, 0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46};
|
||||
PARAMS(modulus)
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0x00000000, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b,
|
||||
0xc63b05c0, 0x17c510ea, 0x01ae3a46},
|
||||
{0xf1391c63, 0x6e76d5ec, 0xbff27d8e, 0x99588459, 0x436b0f62, 0xbce649cf, 0x0ad1dec1, 0x400398f5, 0x1a79beb1,
|
||||
0xc0c534db, 0x796537ca, 0x01680a40},
|
||||
{0x554c85ba, 0x6cbff0e3, 0x0be8ff9d, 0xc07c7a91, 0x9dde4fa2, 0xc3c79f67, 0xb5726bde, 0x44bc6d1a, 0x76d6d607,
|
||||
0xad812919, 0x95e8fd0e, 0x001bc0c2},
|
||||
{0x6d5db237, 0xb8c206b0, 0xcabde6ba, 0x08fed85d, 0xcd92eb6f, 0xf2f54ffc, 0xe39c1788, 0xee81121f, 0x88e82edb,
|
||||
0x852def4d, 0xb95fdb80, 0x00bf1268},
|
||||
{0x192bf14f, 0x3663c26a, 0xe6351854, 0x99c859be, 0x159361b8, 0xf9430828, 0xfbe33d7d, 0x478ed715, 0xdb79c984,
|
||||
0x41e220cf, 0xd961f2be, 0x00cedb38},
|
||||
{0xcc724685, 0xb99caa69, 0x1388a46d, 0xc24087ba, 0x08f03491, 0xeb13a05a, 0x98fb0ff7, 0x558ab21e, 0x86bbd802,
|
||||
0x0166d08d, 0xf5b5728a, 0x00d1dec9},
|
||||
{0x92db32a2, 0x2e3951fe, 0x6014b201, 0x8f5a16c9, 0xa91fbb38, 0xa9e942b9, 0x17b4dbd2, 0xf7bf5b43, 0x81325c7d,
|
||||
0x57f3934a, 0x615ad019, 0x012be78e},
|
||||
{0xdce33f04, 0xb42b84a2, 0x0db0b91c, 0x7a0c1423, 0x88d9f8c8, 0xaed11a0c, 0xd484c501, 0x712d6bc0, 0xfa3f7633,
|
||||
0x50aca1e5, 0xb90f34d0, 0x01002f29},
|
||||
{0xf012f6a0, 0xbc3db054, 0x0d332ea7, 0x00d66897, 0xfd416167, 0x8278ef44, 0x20268e84, 0x1a1a3c4d, 0x4b11d215,
|
||||
0x7c976aa6, 0x63b6e925, 0x00949581},
|
||||
{0x339637c6, 0x9d73cf29, 0xa5642677, 0x8257d1a2, 0xcafd597c, 0xcb48f07f, 0x081435a3, 0x7a505010, 0xacbb9c39,
|
||||
0xaaa45ce1, 0x7431b9c8, 0x013f2b13},
|
||||
{0xd4710c0b, 0x9ef8bddb, 0x85047671, 0xb4c73188, 0x134695ba, 0x87a51d65, 0x022416dd, 0x67f3bc43, 0xcb2a157b,
|
||||
0x21d965b2, 0x5ce4195d, 0x013a57e4},
|
||||
{0xd2461368, 0xf2db3a9f, 0x3802aef2, 0x0595c232, 0x5ea85bd6, 0xa53d621a, 0xa34ee943, 0xce930fbc, 0x6b372bee,
|
||||
0x1d216665, 0xa4535740, 0x009f0159},
|
||||
{0x656bf68d, 0x73cf953a, 0xeac5c1d7, 0x50a5a5b5, 0xaa5355a9, 0x2697b2e1, 0x08de37d2, 0x6be70306, 0x44c5afab,
|
||||
0x907f6976, 0xd4ec46b1, 0x0155cfa2},
|
||||
{0x090e3e20, 0x034160c4, 0xf77a6fbb, 0xbc73cc59, 0x188e54f6, 0x437cd23b, 0x17e42614, 0x5a788edd, 0xebdc8eae,
|
||||
0xf1ad4f54, 0x2f129bcd, 0x005d1440},
|
||||
{0x4e269ee5, 0x5626c031, 0x0d1501ec, 0x5f97673e, 0x86d31c18, 0x4fe089bd, 0x62d1259a, 0x3e9fffcb, 0x1ff89d01,
|
||||
0xe1898f32, 0x59d01a38, 0x00fa1331},
|
||||
{0x38d427b1, 0xda80661b, 0xa814f14b, 0x1913027d, 0xcda4061d, 0xd3f61e24, 0x5da8fcb2, 0x9509e69d, 0x1f05e6d3,
|
||||
0x0e7493a5, 0xa5c6bd06, 0x00dcb8db},
|
||||
{0x61cff9ed, 0x88499d0a, 0x53718444, 0x0b317da2, 0x4b7eec5f, 0xc1624bfd, 0x5af10e6f, 0x6ffc3241, 0xd6c66ff2,
|
||||
0x27d0edf3, 0x73ab0f4a, 0x013019b5},
|
||||
{0x06027b24, 0x42dc7673, 0x3341b9e7, 0x018f8bbd, 0xa435f7e2, 0xd3b389d9, 0xea031176, 0x279739a5, 0x74c35801,
|
||||
0x3555ca51, 0x049dcf87, 0x00748c30},
|
||||
{0x81fe14de, 0x731b16f0, 0x333cc61a, 0x528d6ada, 0x5736dc15, 0x7ae87278, 0xc8bfd40c, 0xa94b9fd2, 0x299b0487,
|
||||
0x714dd8ed, 0xf1a53233, 0x00642b62},
|
||||
{0x5bc45170, 0x31270ddf, 0x7f72c758, 0x7efb6b06, 0xcf4973a8, 0x2eb9f2aa, 0xe556d234, 0xdcb534c9, 0x0e043fef,
|
||||
0xf0b1a210, 0x54dda04e, 0x00e79c44},
|
||||
{0x2d5f1bc2, 0x213b3f52, 0xfd933428, 0x9e115ba7, 0x434c9e2a, 0x7f77d57e, 0xcdb944ef, 0x47a78418, 0x699aa559,
|
||||
0x8cb01cbb, 0xb064c4d7, 0x0075bf81},
|
||||
{0x3fbfc66c, 0x0b6c2e65, 0x6fcab2f8, 0x7bece031, 0xb79dcd4d, 0x2ba7e325, 0xa5c6881b, 0x8c18f66a, 0x7283805a,
|
||||
0x4d893e5a, 0xfc296bfe, 0x0107d3c5},
|
||||
{0x948c881a, 0x53fbdbb4, 0x16803d18, 0xf27a9c14, 0xeddfafef, 0x8490f6c5, 0x3e57fa15, 0xfe068e1d, 0xd26b296b,
|
||||
0xbe923119, 0x9fa377a1, 0x00d56016},
|
||||
{0x6f5b2ad1, 0xb3bbaeb3, 0x11886a1c, 0x0efd4ba9, 0xdedb7083, 0x5911498f, 0x5bd0a90f, 0x0921fe19, 0x83d379cb,
|
||||
0x38e05d4e, 0xb7ba3c73, 0x006b39e2},
|
||||
{0xa55550ba, 0x61b560e4, 0xe7288461, 0xd9ac545b, 0xc6e3e282, 0xde8d2826, 0x7e49dd2c, 0x9e87a310, 0xc43080b7,
|
||||
0xf2edfc44, 0x95b7d300, 0x012b4875},
|
||||
{0x27591e60, 0x4048ddc3, 0xc5d21791, 0xb77c9738, 0x49826bea, 0xf2f82033, 0x42f97e95, 0xf60bb703, 0x5966139d,
|
||||
0xef8f6f16, 0xc0e95e39, 0x00327618},
|
||||
{0x441e395f, 0xf9059c8f, 0xbd087238, 0x29eab35f, 0x7dee5ff1, 0x5d4abeff, 0x771e60e9, 0x7222499b, 0x7ac324a2,
|
||||
0xb70c1ea3, 0x0da51ce8, 0x015b3af9},
|
||||
{0xe9a70026, 0xf7aa576b, 0x01c4a126, 0xb28733ef, 0xa3307647, 0x06b8e768, 0xe12588ce, 0x115500e1, 0x6c9f9b1d,
|
||||
0x7e8dd6b9, 0x6ec020b3, 0x014d091e},
|
||||
{0x8e5bbc8d, 0xd318265d, 0x141bee9b, 0x70b460ba, 0x1aa9df5b, 0x145dd6a6, 0xe3478cb3, 0xd9da2548, 0x7b509387,
|
||||
0x47250509, 0xe967973c, 0x00de53d3},
|
||||
{0xd2aa57b8, 0x5ff4399c, 0xa6ae9b07, 0x90360194, 0x6cfcdb7a, 0x68979991, 0x64e56abb, 0xf517467c, 0xad7a6573,
|
||||
0x44227491, 0xa35ebf55, 0x0001da0b},
|
||||
{0x4d80f6da, 0xd8b22d5a, 0x10ee1a06, 0x6e7b2bfb, 0x17faeac0, 0xac8d97e5, 0x7a12c923, 0x8b75540b, 0x5b42ce02,
|
||||
0xa2787368, 0xe98d9998, 0x008d30a5},
|
||||
{0x9dc292bb, 0xee29c02a, 0xc5b7e1c9, 0x9e7ea016, 0x9a908e5f, 0x62daf95d, 0x3e98eae9, 0x80a71c61, 0xfdda3bba,
|
||||
0x2d514723, 0x068ef829, 0x00f65844},
|
||||
{0x185b1ad6, 0xf62fdfa4, 0xf90ccbe6, 0x2ae7f104, 0x972ce78e, 0xfa435fb6, 0x45e59f91, 0x53a75d3c, 0x2f320b7a,
|
||||
0x7290cac2, 0xe7cb5108, 0x01a2022a},
|
||||
{0xd59dda24, 0xcf0a15be, 0xf2ec72b4, 0xbc77f6d4, 0x96c31202, 0xa8df0caf, 0xbb4f8842, 0xb95429c0, 0xd0087306,
|
||||
0xb989b210, 0x5571e9f0, 0x002b1694},
|
||||
{0x67ae536e, 0x7e84d4b5, 0xc8fb9b80, 0x3a920871, 0x1948ee86, 0x1a82df2b, 0xb3c66ed3, 0xdef79467, 0xef64d05a,
|
||||
0x58fd84f2, 0xd999f400, 0x00c6d5b7},
|
||||
{0x81ee0d53, 0x7639f9a2, 0xb5747565, 0x8ade807d, 0xe6235609, 0xfd9d6266, 0x53730f18, 0xea1948a3, 0xd890142e,
|
||||
0xa356108a, 0xe3e8a723, 0x00a48ac6},
|
||||
{0xd0ca5e04, 0x531c4b83, 0x2ba0a328, 0xff35ced6, 0xa4e563aa, 0x01613079, 0x1442dcd1, 0x6f52b3a3, 0x9e19b0a6,
|
||||
0x813b4616, 0x9536db26, 0x004828c5},
|
||||
{0x0bce1b4e, 0x8a9321a9, 0xae85d6ff, 0xb9759dbe, 0x5cb206e0, 0x1ce1d522, 0x35a1607a, 0x87df044f, 0x94e1329a,
|
||||
0x2ebabee7, 0x73586cc9, 0x01a73170},
|
||||
{0x3dd667f3, 0x69824754, 0x28fd63a2, 0x61a081a7, 0x99499385, 0x0b9f6d2e, 0x5c253e16, 0x6d45622b, 0x765a7f5f,
|
||||
0xcd672e4d, 0x7150d847, 0x01182798},
|
||||
{0x2742d2f6, 0x0af0bfd2, 0x3a02631d, 0x93616956, 0xac8a2203, 0x32dae751, 0x85cf4e2d, 0xea4ffbe7, 0x7dba6eb9,
|
||||
0x673424f4, 0x61f4060d, 0x002ec230},
|
||||
{0x5a5b5c2b, 0x226293ca, 0x0684dbc9, 0xbc0ca23e, 0x7d637c4f, 0x4510cf3a, 0x9b2f4a52, 0x7869c488, 0x2fd73a53,
|
||||
0xec009b90, 0xa8c99cca, 0x003499d6},
|
||||
{0xfd745afc, 0x9da60b0a, 0x41c5362e, 0xff0769ec, 0xfa9fd8ee, 0x487621e9, 0xab04558f, 0x138910d1, 0xc1ed03ce,
|
||||
0x870903cf, 0xed3ffb51, 0x002c1cfa},
|
||||
{0x42870c46, 0x271b1ff3, 0x13b4b491, 0x1e0a9cd1, 0x3c55c65e, 0x2d58cb1a, 0x74756f6e, 0xa6e12c32, 0x2e313bc4,
|
||||
0xf774a43d, 0xcc386ffc, 0x00ca156d},
|
||||
{0x4a67741c, 0x588f79b6, 0xc3590b63, 0xc0ae78b5, 0xc3576385, 0xad0bb97d, 0xb8473137, 0x0583dd49, 0x515d8604,
|
||||
0xb31d9631, 0xd3ba3b12, 0x015337bc},
|
||||
{0x8a458e8c, 0x976a14f5, 0xc3a26ae8, 0xc90809b4, 0x089acf15, 0x270a1575, 0x5013d4b1, 0x614a0d25, 0x6d09901e,
|
||||
0x1314e076, 0xf208945e, 0x0022f414},
|
||||
{0xc563b9a1, 0x7eca603c, 0x06fe0bc3, 0x06df0a43, 0x0ddff8c6, 0xb44d994a, 0x4512a3d4, 0x40fbe05b, 0x8aeffc9b,
|
||||
0x30f15248, 0x05198a80, 0x0036a92e}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0x00000000, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b,
|
||||
0xc63b05c0, 0x17c510ea, 0x01ae3a46},
|
||||
{0x0ec6e39e, 0x1691ea13, 0x700d8272, 0x7db2d8ea, 0x769e389d, 0x620d1860, 0xf62334cd, 0xda1f40fd, 0x52278a89,
|
||||
0x0575d0e5, 0x9e5fd920, 0x00463005},
|
||||
{0x93997f11, 0x9403412c, 0xdfb2323f, 0x845557b3, 0x2d50c7fc, 0x66f2eaaa, 0xc103f92f, 0x992358fb, 0x5d7a3179,
|
||||
0x01d60217, 0xd2af5da0, 0x0077b354},
|
||||
{0xc1000ea4, 0x7ac2ca7a, 0x7f8d9495, 0x937db751, 0x0de62931, 0x401b3873, 0x980129ba, 0x59be7529, 0xa545a303,
|
||||
0x2ba8f85d, 0xb6705512, 0x00573e3a},
|
||||
{0x2c1b22e6, 0xb55712f9, 0x0f91cddd, 0x66cfc0f3, 0x8bb345d8, 0x8d5fcd42, 0x86c0abc3, 0x61e4cf98, 0x432fe8f3,
|
||||
0x93556354, 0xad005fb6, 0x00ff87d5},
|
||||
{0x7aba560e, 0x05065a97, 0x7918b9db, 0x333ff005, 0xdf6be708, 0x03938ae1, 0x7410a77b, 0x922d3376, 0x03a15063,
|
||||
0xa5aeaa56, 0x4aea89e5, 0x01542cb6},
|
||||
{0xe4d6a772, 0x61a6a2d6, 0x6e6239a7, 0xc18c9ef7, 0x04cac70f, 0x8772bb3f, 0x16c5916b, 0x8bbb4185, 0x46335dc0,
|
||||
0x4aa656e2, 0x842c1664, 0x008187ac},
|
||||
{0xdd4e93c5, 0xa002ea0a, 0x07458704, 0xb40a45e8, 0xbaa65f2a, 0xee9ee3ea, 0x8f3b8a87, 0xeffa4f9e, 0x95b5feba,
|
||||
0xb6e03897, 0x81751c63, 0x003c41de},
|
||||
{0x13043a4a, 0x50221a3b, 0xda73331a, 0x6537fca8, 0x8e85077c, 0x8b74cef4, 0x0e5bbe67, 0x65705341, 0xefa22d23,
|
||||
0xf0f56caa, 0xd1865d98, 0x001f8eb5},
|
||||
{0x3e26a605, 0xd9af8944, 0x6970166f, 0xad0efb6e, 0x2c7464ec, 0xc16d7972, 0xf788281b, 0xe0de4b04, 0xaa878b0e,
|
||||
0x0c049e55, 0x63e2e7cd, 0x0135383a},
|
||||
{0x6f6893f7, 0x6b12c42e, 0x44bbbf63, 0x831f38c0, 0x191be6c9, 0xa57797d4, 0x447475cb, 0x6af7f695, 0x4b8be189,
|
||||
0x3295e9e7, 0x350d0aad, 0x00a9a32b},
|
||||
{0x7656ef1d, 0xc2243f86, 0xf4211219, 0x3e4c3bc3, 0x3c9a3d21, 0xaa4db6e0, 0xe8a4c946, 0x29ac638a, 0xa4cf856e,
|
||||
0x21449f8b, 0x7d4c9c67, 0x018cf097},
|
||||
{0x6a8e0139, 0x18e472a2, 0xd6b1c835, 0xcc7c80fd, 0x6546fc0a, 0x1f760883, 0x4ea3417c, 0x5bcfc1fb, 0xe9acb8b0,
|
||||
0x52c9a29b, 0xd9f265a2, 0x01a6d8b2},
|
||||
{0xebb83ac0, 0x95eb1dc8, 0x9f390cf2, 0x1e8d70f5, 0xb0d85145, 0xf9e4955d, 0x89720ee1, 0xe9690d30, 0x50fc879f,
|
||||
0x629972a5, 0x69ccd670, 0x00456e23},
|
||||
{0x83f38be4, 0xfbfb11a1, 0x388e6726, 0xb90a19b9, 0xc860d62c, 0x3fc10bc7, 0xc3c4e575, 0xc9fe043e, 0x7396d780,
|
||||
0x67aeff74, 0x01cadaee, 0x019059fa},
|
||||
{0xfd581be8, 0x43506d6e, 0x018b1b76, 0xf09563e6, 0xe87f9d80, 0x5cd193b2, 0x0a933402, 0x18ba3260, 0x50524c77,
|
||||
0x4de839d9, 0xd90315ce, 0x0018c2ed},
|
||||
{0xa737701d, 0xf900eb81, 0x995e6672, 0x6874c90e, 0xa495900b, 0x69ade94a, 0xd07bd4b1, 0xd5f358e7, 0x6f88e8e4,
|
||||
0xbd437e9d, 0x1d6b88cf, 0x0130d706},
|
||||
{0xfc29b95f, 0x064629bd, 0xb546585c, 0x0a897bff, 0x54a80d9a, 0x856c8d4f, 0x944568ff, 0x85410cc4, 0x59fc4370,
|
||||
0xc1978c65, 0xc668dc52, 0x017c86c8},
|
||||
{0xf6109131, 0x65cecd55, 0x7d2f52e5, 0x6d7e892e, 0xb90b2403, 0xe9a09007, 0xae0a060d, 0x92ca9aac, 0xa22b1e96,
|
||||
0x5ce1cc4f, 0x45201e6f, 0x012eb33c},
|
||||
{0x20d1aac5, 0x9d2cb4cf, 0xded22997, 0x3e4a1e77, 0x07fae2e2, 0x09d692f7, 0xd49bdcbe, 0x6a6aa4f8, 0x09c01cab,
|
||||
0xa8e21ead, 0x6b03b72e, 0x01a19e81},
|
||||
{0x935650ca, 0xf3d94623, 0x2ffd937e, 0x4a688a46, 0xa622b139, 0xf55fd53a, 0x7a1a1e40, 0x227406aa, 0x9a3fea60,
|
||||
0x40dd4504, 0x1edbb584, 0x00fc2332},
|
||||
{0xf28db3fc, 0x9707402f, 0xc28593f1, 0x3d898bd7, 0xb30effcd, 0xcaee2dfd, 0x4fb6ec9d, 0xff1b0790, 0x09ed1120,
|
||||
0x9cb0597e, 0xb78d15e9, 0x005c73a5},
|
||||
{0xb0a8a3b9, 0x739a4c2e, 0xc57196ae, 0x083bde21, 0xba602f29, 0x247eb070, 0x1c2c7132, 0x4ba1dd6a, 0xe2187c6c,
|
||||
0x4ce59fb6, 0x606880b1, 0x0014a7b5},
|
||||
{0x484baf56, 0xdd0eccab, 0x4541b101, 0xe6c80eaf, 0xf7964f64, 0x35b8a558, 0xc50ccf94, 0xb3b824d4, 0x21c71aeb,
|
||||
0xe1f6b4c8, 0x23031df0, 0x01a8a647},
|
||||
{0x592a9620, 0x5338dc01, 0xd94a401b, 0xb217f96d, 0xf830b00e, 0xfefb6601, 0xafd3dee4, 0x1ec061b5, 0x05a199bd,
|
||||
0x0d5d4d3c, 0xc8489913, 0x0196c768},
|
||||
{0x1f980ca0, 0x4acb430e, 0x71c6821c, 0x8973a3cc, 0xb3e9aa75, 0x74414c20, 0x0c13f042, 0x79212a5f, 0x375c705b,
|
||||
0x5c44d226, 0x29439af2, 0x000a2fdd},
|
||||
{0xa387b60c, 0xf01901e6, 0x4561ff3d, 0xa7b1b7dc, 0x0558e085, 0x5d82d374, 0xf2bc1d29, 0x519298e5, 0x3d332207,
|
||||
0x0ad719a8, 0xea19a807, 0x0150a138},
|
||||
{0x9deb8e06, 0x7c6b3eb1, 0x28206b6c, 0x3a8f53c4, 0x7fed1065, 0x039f575f, 0x40c1f898, 0x31be74ba, 0x790ac003,
|
||||
0x76db938e, 0x5508c5e4, 0x0096d5e1},
|
||||
{0xb83f8358, 0x3e940e0e, 0x372a4b8b, 0x204d80e0, 0xa820b2ec, 0x956454b2, 0x2cc8078c, 0x8e2cb3d4, 0xc6f81363,
|
||||
0xdd0d3e12, 0x49041a64, 0x0052f327},
|
||||
{0x2aec0be2, 0x37ca2eb7, 0x555cc652, 0x05093570, 0xd2588d31, 0xe62f1adb, 0x798be240, 0x2fd2518e, 0x0ff6b579,
|
||||
0x9302d4e3, 0x6ee95e5d, 0x0025ca57},
|
||||
{0x233eed68, 0xcc664858, 0xece3a327, 0x600ca1ac, 0x93a2e34f, 0x330d1102, 0xdb5e3bb4, 0xc84ab55f, 0xe4d5576e,
|
||||
0x5179c101, 0x0938f714, 0x00efb20e},
|
||||
{0xfdddaf5c, 0x907f96e7, 0x1ffe49da, 0x348dab77, 0xc14ab779, 0x3eca44ad, 0x4cdc5d98, 0xe9b10b2e, 0xa95c5a36,
|
||||
0x65a25d16, 0x6e616518, 0x00c9f759},
|
||||
{0x7a5aff62, 0x9497d331, 0xb57cd01d, 0x21896195, 0x6c7ba745, 0xe09e22f7, 0x5a7acff0, 0xcc9f1064, 0xc93c46b0,
|
||||
0x7b867cdf, 0x23eba5ae, 0x01a05dcb},
|
||||
{0x4dcc71f4, 0xa56a8e33, 0xcbebdba2, 0xc480b083, 0x36ea43af, 0x748448fa, 0xe7859f3c, 0xee9b4b0e, 0x5af41919,
|
||||
0x9ab2bb09, 0x65caa0ea, 0x0127262d},
|
||||
{0x352a05cc, 0x77c7d12f, 0xdc7160c9, 0xb91ca5be, 0x5a3feda0, 0x245106da, 0x7669f7cd, 0xfd45012d, 0xdc5489fa,
|
||||
0xc4774629, 0x2872daa0, 0x00241273},
|
||||
{0x0d3e0b0b, 0x1838ae6f, 0xff67fc2c, 0x7fcc9b21, 0x23956100, 0xaedca59e, 0x1e79aa4b, 0x572ed634, 0xc7f0673c,
|
||||
0xaeeda160, 0xc8047256, 0x00360e2c},
|
||||
{0xe05044f9, 0xec5e4514, 0x7ec9b4ef, 0xe915b7e7, 0x9c4bec48, 0x9fb78cd8, 0xa38d95a3, 0xd7b84113, 0xb86fd119,
|
||||
0x7be64440, 0xe4f9e70a, 0x009e3a60},
|
||||
{0xc7435591, 0xc61cc546, 0xe5e94dc4, 0xea99a96f, 0xdb8ff17d, 0x5b10e2b4, 0x3dd0ff10, 0x13f8fb9d, 0xe118b9e9,
|
||||
0xcbb1c0ce, 0x7ebf8a0d, 0x00b37258},
|
||||
{0xce5943e7, 0xd44fdb9d, 0x79fa927a, 0xcb7d41ea, 0xdcee72ca, 0x9a4bcebf, 0x11634905, 0x2317799d, 0x584055ac,
|
||||
0x3f1c302e, 0xdc2d0017, 0x013ef021},
|
||||
{0xa78a1578, 0x345cb052, 0x5961b8fe, 0x1ed4d48a, 0x74a5e2af, 0x5858e93c, 0x0fd17e9f, 0xaf643f0a, 0x79d94009,
|
||||
0x61530753, 0xde7b2f53, 0x010a3393},
|
||||
{0x813925df, 0x548b1d28, 0xca3e79b6, 0xabab3a4e, 0x7e51071a, 0xb3c9c068, 0x6c5fcedb, 0x8014e879, 0x95d9facc,
|
||||
0x3ba5db77, 0x7f5c3d2f, 0x0105c419},
|
||||
{0x26bc1104, 0xbb9cbd28, 0xe03cc852, 0x27f09abb, 0x22e5be61, 0x02763b4a, 0xb94fa254, 0xa3940542, 0xff34c35f,
|
||||
0xcf058850, 0x1482533c, 0x019f538f},
|
||||
{0xb3f42de9, 0xf2126047, 0xbeb0a1b8, 0xdb0451c4, 0x9aabc291, 0x1a945bc0, 0x7fe3a6f2, 0x13d08312, 0x390e1c07,
|
||||
0xd8fb13f1, 0x6b30562b, 0x005a41c4},
|
||||
{0xe8b3d5dd, 0x1c60fcc5, 0x75b3a464, 0x5d7babba, 0xf3989910, 0x0d9f52c7, 0x9beec571, 0x464a2840, 0x79689d4b,
|
||||
0x139c496f, 0x099e64c4, 0x0022c6a3},
|
||||
{0x023e0cd1, 0x9df6c2d5, 0xa6b747de, 0x8e23def9, 0x90da6876, 0x7bc83eee, 0xc88bb007, 0xdaeac352, 0x68bb6a7f,
|
||||
0x45cabb6f, 0x94697b34, 0x001e7154},
|
||||
{0x0203d905, 0xffcee91d, 0xc99df56d, 0xd878ee01, 0x210d754c, 0xa0e882f9, 0x7d0aec6a, 0x26c96db8, 0x8ff7afe4,
|
||||
0x46e2e145, 0x54749283, 0x015cd1b0}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0x00000001, 0x42846000, 0x18000000, 0x0b85aea2, 0xdd04a400, 0x8f79b117, 0x807a89c7, 0x8d116cf9, 0x3650a49d,
|
||||
0x631d82e0, 0x0be28875, 0x00d71d23},
|
||||
{0x00000001, 0x63c69000, 0x24000000, 0x114885f3, 0xcb86f600, 0x573689a3, 0x40b7ceab, 0x539a2376, 0x5178f6ec,
|
||||
0x14ac4450, 0x91d3ccb0, 0x0142abb4},
|
||||
{0x00000001, 0x7467a800, 0xaa000000, 0x1429f19b, 0xc2c81f00, 0x3b14f5e9, 0xa0d6711d, 0xb6de7eb4, 0x5f0d2013,
|
||||
0x6d73a508, 0x54cc6ecd, 0x017872fd},
|
||||
{0x00000001, 0x7cb83400, 0xed000000, 0x159aa76f, 0xbe68b380, 0x2d042c0c, 0xd0e5c256, 0x6880ac53, 0x65d734a7,
|
||||
0x19d75564, 0xb648bfdc, 0x019356a1},
|
||||
{0x00000001, 0x80e07a00, 0x0e800000, 0x1653025a, 0x3c38fdc0, 0xa5fbc71e, 0x68ed6af2, 0x4151c323, 0x693c3ef1,
|
||||
0x70092d92, 0xe706e863, 0x01a0c873},
|
||||
{0x00000001, 0x82f49d00, 0x1f400000, 0x16af2fcf, 0xfb2122e0, 0xe27794a6, 0x34f13f40, 0x2dba4e8b, 0x6aeec416,
|
||||
0x1b2219a9, 0xff65fca7, 0x01a7815c},
|
||||
{0x00000001, 0x83feae80, 0xa7a00000, 0x16dd4689, 0x5a953570, 0x00b57b6b, 0x1af32968, 0xa3ee943f, 0xebc806a8,
|
||||
0xf0ae8fb4, 0x8b9586c8, 0x01aaddd1},
|
||||
{0x00000001, 0x8483b740, 0xebd00000, 0x16f451e6, 0x8a4f3eb8, 0x8fd46ecd, 0x0df41e7b, 0xdf08b719, 0xac34a7f1,
|
||||
0xdb74caba, 0xd1ad4bd9, 0x01ac8c0b},
|
||||
{0x00000001, 0x84c63ba0, 0x8de80000, 0x16ffd795, 0xa22c435c, 0x5763e87e, 0x07749905, 0x7c95c886, 0x8c6af896,
|
||||
0x50d7e83d, 0xf4b92e62, 0x01ad6328},
|
||||
{0x00000001, 0x84e77dd0, 0xdef40000, 0x17059a6c, 0x2e1ac5ae, 0x3b2ba557, 0x8434d64a, 0xcb5c513c, 0xfc8620e8,
|
||||
0x8b8976fe, 0x863f1fa6, 0x01adceb7},
|
||||
{0x00000001, 0x84f81ee8, 0x877a0000, 0x17087bd8, 0x741206d7, 0xad0f83c3, 0xc294f4ec, 0xf2bf9597, 0xb493b511,
|
||||
0xa8e23e5f, 0xcf021848, 0x01ae047e},
|
||||
{0x00000001, 0x85006f74, 0x5bbd0000, 0x9709ec8e, 0x970da76b, 0xe60172f9, 0x61c5043d, 0x867137c5, 0x109a7f26,
|
||||
0xb78ea210, 0x73639499, 0x01ae1f62},
|
||||
{0x00000001, 0x850497ba, 0x45de8000, 0xd70aa4e9, 0xa88b77b5, 0x827a6a94, 0x315d0be6, 0xd04a08dc, 0x3e9de430,
|
||||
0x3ee4d3e8, 0x459452c2, 0x01ae2cd4},
|
||||
{0x00000001, 0x8506abdd, 0xbaef4000, 0xf70b0116, 0x314a5fda, 0xd0b6e662, 0x99290fba, 0xf5367167, 0x559f96b5,
|
||||
0x828fecd4, 0x2eacb1d6, 0x01ae338d},
|
||||
{0x80000001, 0x8507b5ee, 0x7577a000, 0x870b2f2d, 0xf5a9d3ed, 0xf7d52448, 0x4d0f11a4, 0x87aca5ad, 0x61206ff8,
|
||||
0xa465794a, 0xa338e160, 0x01ae36e9},
|
||||
{0x40000001, 0x85083af7, 0xd2bbd000, 0xcf0b4638, 0x57d98df6, 0x0b64433c, 0x2702129a, 0xd0e7bfd0, 0x66e0dc99,
|
||||
0xb5503f85, 0xdd7ef925, 0x01ae3897},
|
||||
{0xa0000001, 0x85087d7b, 0x815de800, 0x730b51be, 0x08f16afb, 0x952bd2b6, 0x93fb9314, 0x75854ce1, 0xe9c112ea,
|
||||
0x3dc5a2a2, 0xfaa20508, 0x01ae396e},
|
||||
{0xd0000001, 0x85089ebd, 0x58aef400, 0xc50b5781, 0xe17d597d, 0xda0f9a72, 0x4a785351, 0xc7d4136a, 0xab312e12,
|
||||
0x82005431, 0x89338af9, 0x01ae39da},
|
||||
{0xe8000001, 0x8508af5e, 0xc4577a00, 0xee0b5a62, 0x4dc350be, 0x7c817e51, 0xa5b6b370, 0xf0fb76ae, 0x0be93ba6,
|
||||
0x241dacf9, 0x507c4df2, 0x01ae3a10},
|
||||
{0x74000001, 0x8508b7af, 0x7a2bbd00, 0x828b5bd3, 0x83e64c5f, 0xcdba7040, 0xd355e37f, 0x058f2850, 0xbc454271,
|
||||
0x752c595c, 0x3420af6e, 0x01ae3a2b},
|
||||
{0xba000001, 0x8508bbd7, 0xd515de80, 0xcccb5c8b, 0x1ef7ca2f, 0x7656e938, 0xea257b87, 0x0fd90121, 0x947345d6,
|
||||
0x9db3af8e, 0xa5f2e02c, 0x01ae3a38},
|
||||
{0xdd000001, 0x8508bdeb, 0x028aef40, 0xf1eb5ce8, 0xec808917, 0x4aa525b3, 0x758d478b, 0x94fded8a, 0x808a4788,
|
||||
0xb1f75aa7, 0x5edbf88b, 0x01ae3a3f},
|
||||
{0xee800001, 0x8508bef5, 0x194577a0, 0x047b5d16, 0xd344e88c, 0x34cc43f1, 0xbb412d8d, 0xd79063be, 0xf695c861,
|
||||
0x3c193033, 0xbb5084bb, 0x01ae3a42},
|
||||
{0xf7400001, 0x8508bf7a, 0x24a2bbd0, 0x0dc35d2d, 0xc6a71846, 0x29dfd310, 0xde1b208e, 0x78d99ed8, 0x319b88ce,
|
||||
0x012a1afa, 0x698acad3, 0x01ae3a44},
|
||||
{0x7ba00001, 0x8508bfbd, 0xaa515de8, 0x12675d38, 0x40583023, 0xa4699aa0, 0xef881a0e, 0xc97e3c65, 0x4f1e6904,
|
||||
0xe3b2905d, 0x40a7edde, 0x01ae3a45},
|
||||
{0xbdd00001, 0x8508bfde, 0x6d28aef4, 0x94b95d3e, 0xfd30bc11, 0xe1ae7e67, 0x783e96ce, 0xf1d08b2c, 0xdddfd91f,
|
||||
0xd4f6cb0e, 0xac367f64, 0x01ae3a45},
|
||||
{0x5ee80001, 0x8508bfef, 0x4e94577a, 0xd5e25d41, 0xdb9d0208, 0x0050f04b, 0xbc99d52f, 0x85f9b28f, 0xa540912d,
|
||||
0xcd98e867, 0xe1fdc827, 0x01ae3a45},
|
||||
{0xaf740001, 0x8508bff7, 0xbf4a2bbd, 0x7676dd42, 0xcad32504, 0x0fa2293d, 0x5ec7745f, 0x500e4641, 0x08f0ed34,
|
||||
0x49e9f714, 0xfce16c89, 0x01ae3a45},
|
||||
{0xd7ba0001, 0x0508bffb, 0x77a515df, 0x46c11d43, 0xc26e3682, 0x174ac5b6, 0x2fde43f7, 0xb518901a, 0x3ac91b37,
|
||||
0x08127e6a, 0x0a533eba, 0x01ae3a46},
|
||||
{0xebdd0001, 0xc508bffd, 0xd3d28aef, 0x2ee63d43, 0x3e3bbf41, 0x1b1f13f3, 0x9869abc3, 0x679db506, 0x53b53239,
|
||||
0x6726c215, 0x110c27d2, 0x01ae3a46},
|
||||
{0xf5ee8001, 0x2508bffe, 0x01e94578, 0xa2f8cd44, 0x7c2283a0, 0x1d093b11, 0xccaf5fa9, 0x40e0477c, 0xe02b3dba,
|
||||
0x96b0e3ea, 0x14689c5e, 0x01ae3a46},
|
||||
{0x7af74001, 0x5508bfff, 0x18f4a2bc, 0x5d021544, 0x9b15e5d0, 0x1dfe4ea0, 0xe6d2399c, 0xad8190b7, 0xa666437a,
|
||||
0xae75f4d5, 0x1616d6a4, 0x01ae3a46},
|
||||
{0xbd7ba001, 0x6d08bfff, 0x247a515e, 0x3a06b944, 0x2a8f96e8, 0x9e78d868, 0x73e3a695, 0xe3d23555, 0x0983c65a,
|
||||
0xba587d4b, 0x16edf3c7, 0x01ae3a46},
|
||||
{0xdebdd001, 0x7908bfff, 0x2a3d28af, 0x28890b44, 0xf24c6f74, 0x5eb61d4b, 0x3a6c5d12, 0xfefa87a4, 0xbb1287ca,
|
||||
0x4049c185, 0x17598259, 0x01ae3a46},
|
||||
{0xef5ee801, 0xff08bfff, 0x2d1e9457, 0x1fca3444, 0xd62adbba, 0xbed4bfbd, 0x9db0b850, 0x0c8eb0cb, 0x13d9e883,
|
||||
0x034263a3, 0x178f49a2, 0x01ae3a46},
|
||||
{0xf7af7401, 0x4208bfff, 0x2e8f4a2c, 0x1b6ac8c4, 0xc81a11dd, 0xeee410f6, 0x4f52e5ef, 0x1358c55f, 0xc03d98df,
|
||||
0x64beb4b1, 0x17aa2d46, 0x01ae3a46},
|
||||
{0xfbd7ba01, 0x6388bfff, 0x2f47a516, 0x993b1304, 0x4111acee, 0x86ebb993, 0x2823fcbf, 0x16bdcfa9, 0x166f710d,
|
||||
0x957cdd39, 0x17b79f18, 0x01ae3a46},
|
||||
{0xfdebdd01, 0x7448bfff, 0x2fa3d28b, 0x58233824, 0x7d8d7a77, 0x52ef8de1, 0x148c8827, 0x187054ce, 0xc1885d24,
|
||||
0xaddbf17c, 0x17be5801, 0x01ae3a46},
|
||||
{0xfef5ee81, 0xfca8bfff, 0x2fd1e945, 0xb7974ab4, 0x9bcb613b, 0x38f17808, 0x8ac0cddb, 0x99499760, 0x9714d32f,
|
||||
0x3a0b7b9e, 0x17c1b476, 0x01ae3a46},
|
||||
{0xff7af741, 0x40d8bfff, 0x2fe8f4a3, 0xe75153fc, 0x2aea549d, 0x2bf26d1c, 0xc5daf0b5, 0x59b638a9, 0x81db0e35,
|
||||
0x802340af, 0x17c362b0, 0x01ae3a46},
|
||||
{0xffbd7ba1, 0xe2f0bfff, 0x2ff47a51, 0xff2e58a0, 0xf279ce4e, 0x2572e7a5, 0x63680222, 0x39ec894e, 0xf73e2bb8,
|
||||
0xa32f2337, 0x17c439cd, 0x01ae3a46},
|
||||
{0xffdebdd1, 0x33fcbfff, 0x2ffa3d29, 0x8b1cdaf2, 0xd6418b27, 0xa23324ea, 0xb22e8ad8, 0xaa07b1a0, 0x31efba79,
|
||||
0x34b5147c, 0x17c4a55c, 0x01ae3a46},
|
||||
{0xffef5ee9, 0xdc82bfff, 0x2ffd1e94, 0xd1141c1b, 0x48256993, 0xe093438d, 0xd991cf33, 0x621545c9, 0x4f4881da,
|
||||
0x7d780d1e, 0x17c4db23, 0x01ae3a46},
|
||||
{0xfff7af75, 0xb0c5bfff, 0xaffe8f4a, 0xf40fbcaf, 0x811758c9, 0x7fc352de, 0x6d437161, 0xbe1c0fde, 0x5df4e58a,
|
||||
0x21d9896f, 0x17c4f607, 0x01ae3a46},
|
||||
{0xfffbd7bb, 0x9ae73fff, 0xefff47a5, 0x058d8cf9, 0x1d905065, 0x4f5b5a87, 0xb71c4278, 0xec1f74e8, 0xe54b1762,
|
||||
0xf40a4797, 0x17c50378, 0x01ae3a46},
|
||||
{0xfffdebde, 0x0ff7ffff, 0x0fffa3d3, 0x8e4c751f, 0x6bcccc32, 0xb7275e5b, 0xdc08ab03, 0x0321276d, 0x28f6304f,
|
||||
0xdd22a6ac, 0x17c50a31, 0x01ae3a46}}};
|
||||
static constexpr storage<12> rou = {0xc563b9a1, 0x7eca603c, 0x06fe0bc3, 0x06df0a43, 0x0ddff8c6, 0xb44d994a,
|
||||
0x4512a3d4, 0x40fbe05b, 0x8aeffc9b, 0x30f15248, 0x05198a80, 0x0036a92e};
|
||||
TWIDDLES(modulus, rou)
|
||||
|
||||
// nonresidue to generate the extension field
|
||||
static constexpr uint32_t nonresidue = 5;
|
||||
|
||||
@@ -4,193 +4,17 @@
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "fields/quadratic_extension.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bls12_377 {
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned omegas_count = 47;
|
||||
static constexpr unsigned modulus_bit_count = 253;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<8> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe,
|
||||
0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
PARAMS(modulus)
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe,
|
||||
0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x14230000, 0xa0000002, 0xb354edfd,
|
||||
0xb86f6002, 0xc1689a3c, 0x34594aac, 0x2556cabd};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x28460000, 0x40000004, 0x66a9dbfb,
|
||||
0x70dec005, 0x82d13479, 0x68b29559, 0x4aad957a};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0xffffffff, 0xf5ee7fff, 0x2ffffffe, 0xa6558901,
|
||||
0xa3c84ffe, 0x9f4bb2e1, 0x65d35aa9, 0xed549aa1};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x00000001, 0x14230000, 0xe0000002, 0xc7dd4d2f, 0x8585d003, 0x08ee1bd4, 0xe57fc56e, 0x7e7557e3,
|
||||
0x483a709d, 0x1fdebb41, 0x5678f4e6, 0x8ea77334, 0xc19c3ec5, 0xd717de29, 0xe2340781, 0x015c8d01};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x00000002, 0x28460000, 0xc0000004, 0x8fba9a5f, 0x0b0ba007, 0x11dc37a9, 0xcaff8adc, 0xfceaafc7,
|
||||
0x9074e13a, 0x3fbd7682, 0xacf1e9cc, 0x1d4ee668, 0x83387d8b, 0xae2fbc53, 0xc4680f03, 0x02b91a03};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x00000004, 0x508c0000, 0x80000008, 0x1f7534bf, 0x1617400f, 0x23b86f52, 0x95ff15b8, 0xf9d55f8f,
|
||||
0x20e9c275, 0x7f7aed05, 0x59e3d398, 0x3a9dccd1, 0x0670fb16, 0x5c5f78a7, 0x88d01e07, 0x05723407};
|
||||
|
||||
static constexpr storage<limbs_count> m = {0x151e79ea, 0xf5204c21, 0x8d69e258, 0xfd0a180b,
|
||||
0xfaa80548, 0xe4e51e49, 0xc40b2c9e, 0x36d9491e};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xfffffff3, 0x7d1c7fff, 0x6ffffff2, 0x7257f50f,
|
||||
0x512c0fee, 0x16d81575, 0x2bbb9a9d, 0x0d4bda32};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x1beeec02, 0x4122dd1a, 0x74fee875, 0xbd1eae95,
|
||||
0x27b28e2f, 0x838557e2, 0x2290c02c, 0x07b30191};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e},
|
||||
{0x00000001, 0x8f1a4000, 0xb0000001, 0xcf664765, 0x970dec00, 0x23ed1347, 0x00000000, 0x00000000},
|
||||
{0xfbfa0a01, 0x0f830f7e, 0xd75769a0, 0x20f8b46c, 0xf05d5033, 0x7108bd18, 0x0788de01, 0x07405e08},
|
||||
{0x60b9bdae, 0xc78085a6, 0x789094f5, 0x3116ec22, 0xce87d660, 0x0a02a81d, 0xc2a94856, 0x0ead8236},
|
||||
{0x3e83a7cc, 0x6ffc39d9, 0x958a0a74, 0x117d996e, 0x0b92e8c9, 0xc242289d, 0x29d977d6, 0x0484efb4},
|
||||
{0x0111ec3f, 0x15455b00, 0xc5f6be6f, 0x6b62d7af, 0x337f2d07, 0xfcba0365, 0x43fccd26, 0x0f151842},
|
||||
{0xc31ec69b, 0x57951b2e, 0x2a37ce1f, 0x3e0a4be7, 0xcf3b198a, 0x960aeb4a, 0x341fd5cd, 0x04fb0673},
|
||||
{0xa921851f, 0x71c1b78e, 0x7808f239, 0x3c26340c, 0x976fb990, 0xbcc8f69b, 0xe880dc71, 0x06a5edb2},
|
||||
{0xc0f5679e, 0x7619eab5, 0x0dc0b9cd, 0x1f4cd10e, 0xbf6a480a, 0x7e1b70aa, 0x7f5461bb, 0x0ffc66da},
|
||||
{0xec5cbab2, 0x8159806d, 0x498264a3, 0x14ea1333, 0xe3abfaa6, 0x56bbe1d8, 0x02aa031f, 0x09d2b5c4},
|
||||
{0xc010c48a, 0xd2aa9562, 0x3b004b60, 0x447e5c11, 0x11e243bb, 0xd5a21c13, 0x0ab418b1, 0x01eab23e},
|
||||
{0xacff6986, 0x08715ee8, 0xa93924d0, 0xab01878a, 0x6e9ae5c4, 0xbfbc5e71, 0x26b08d6e, 0x0f8000bf},
|
||||
{0x3ddbc679, 0x06bc13b0, 0x615256ce, 0x7269a1f1, 0x1f5221a2, 0xf7716fbf, 0x8c66c14f, 0x0fa1f02c},
|
||||
{0x906f531f, 0xdd40f131, 0x30728eff, 0xb06b29c7, 0x88839294, 0xc891fd19, 0x646978e8, 0x04e88447},
|
||||
{0x6e259cdc, 0xb1e4b769, 0x00514e5e, 0xbcb0b709, 0x05113e7f, 0x74edb7c0, 0xe92e22af, 0x10c88511},
|
||||
{0x240ede5b, 0xebb2e898, 0x42cd84c6, 0xc2639185, 0x9408f956, 0xf79e8391, 0x94e87a7d, 0x06872fa1},
|
||||
{0x260678ff, 0xf8522249, 0xa8de9973, 0x6148cb16, 0x5a4e8d56, 0x5750f3f4, 0xbaeaf0c3, 0x0e805156},
|
||||
{0x3d766f80, 0x1b4b71cf, 0x1069012d, 0x47d21195, 0x9151ebec, 0x5635235f, 0x2b13c808, 0x093f7d91},
|
||||
{0x4637701d, 0x0848f958, 0x4c8353af, 0x8a750076, 0x0ef6174a, 0x485f4e4f, 0xf38db632, 0x078d97a1},
|
||||
{0x66a16869, 0x50c487c1, 0xd1fd4525, 0x380a66ab, 0x265e8539, 0xd455a01a, 0x064b5334, 0x0cd62875},
|
||||
{0x3358eb25, 0xdbc547bc, 0x722037db, 0x8909d398, 0x5e705b6d, 0x8b7075b5, 0x9bdaf407, 0x02694bb2},
|
||||
{0xf45b9621, 0x102fbfb0, 0xf04faac0, 0xe80f4241, 0x7ca61177, 0x0b830bfd, 0x7033169d, 0x10521892},
|
||||
{0xcc943028, 0xed2576ad, 0xfa4c6090, 0x846e49bc, 0x0049d8e6, 0xc74c1865, 0x665d7be5, 0x0e9c5a12},
|
||||
{0xafeb494b, 0x97319dcd, 0x1d78404c, 0xab30c83e, 0xf26ffe90, 0x452d8a48, 0xa36452c7, 0x0bfc2e92},
|
||||
{0xedc626c3, 0xf30e312d, 0xcf1f3a94, 0x8367a7ca, 0x917a1b28, 0x621e15e1, 0xf2e93b82, 0x07cd59f8},
|
||||
{0xf02ba42c, 0x553085d9, 0x1119b10d, 0x59662159, 0x6b8ea03f, 0xaa670958, 0x7ce92983, 0x066f6f5f},
|
||||
{0x4dd87a5e, 0xf423a283, 0xd9a4c364, 0x1fe46601, 0xbfdc7e9b, 0xda4addbf, 0x3bf94b2b, 0x0a7f2bd8},
|
||||
{0xe5f8848a, 0x270a2326, 0xa727567d, 0x97d14afa, 0x48746fc7, 0x1a3a5a4e, 0xa42f077a, 0x0044e4b1},
|
||||
{0x20b7298a, 0xd7652451, 0x65013b06, 0xc7c9a0b7, 0xad0d8457, 0x479b82a9, 0x0c99f5ce, 0x0bef1e5a},
|
||||
{0x1912f7fa, 0x77d7da1d, 0x299fd7d6, 0xbcb7a5b2, 0x142a4480, 0x705e45dd, 0xb492dbd8, 0x0dc835fd},
|
||||
{0xa0234d2d, 0xe943054c, 0xe5f5be5e, 0x673b0ee0, 0x5048a19a, 0xcdd48e41, 0xabc3cb99, 0x0997d277},
|
||||
{0xa9966ac4, 0x1ae0ea67, 0xda83fb3b, 0x4e2dbb1c, 0x0b51380e, 0xf77cf749, 0xb28a7670, 0x048b4b0e},
|
||||
{0xb14361d4, 0x7f1db43f, 0x25ab6d51, 0x7927e578, 0x383bf21e, 0xb43e52a5, 0xd27fa99f, 0x077595e9},
|
||||
{0xa90a2740, 0xfe3ca4f0, 0x512a7c7a, 0xd259ff36, 0xb41fe696, 0xbca3176a, 0xf33132ce, 0x05bd5ea3},
|
||||
{0xf284f768, 0xdeee484b, 0xe26a0475, 0x2a02e015, 0x88d968c2, 0xf0eb4925, 0x82a391c9, 0x0620ce9e},
|
||||
{0xbd83a3da, 0xd3b69b29, 0xe02ce197, 0x9543950f, 0xc2f87783, 0x80799665, 0xc15be215, 0x11ce8199},
|
||||
{0x1b29736e, 0x8f267f19, 0x1d5a0c3a, 0xa2e04d58, 0x1ae99514, 0x76803064, 0x57f7c806, 0x12129439},
|
||||
{0xf32d6bac, 0xa0b973d4, 0xf0d81b72, 0xae951889, 0x2e2daa0a, 0x51dbe098, 0x40d9af8f, 0x04679474},
|
||||
{0x22df9f13, 0x56313de8, 0x599e7536, 0xe2e75200, 0x6d163e50, 0xa1b4fce7, 0xc8111763, 0x0aec2172},
|
||||
{0x355dd694, 0x4258374d, 0x44c76a20, 0x5c31e8ac, 0xaa5fd062, 0x9b473969, 0x1a37b6b4, 0x0a693d77},
|
||||
{0x44ddbbdc, 0xbafb92a6, 0x26b01974, 0x63c7a02d, 0x5f28a274, 0x0ff86e13, 0x867f2e29, 0x0a7b462a},
|
||||
{0xd5fba57b, 0x90684fea, 0xe0defe98, 0xed237883, 0x030ae924, 0xc502b692, 0xe7a1ec2c, 0x08aa58e8},
|
||||
{0x5e9020dd, 0xade9d4b4, 0x87db8813, 0x489259d2, 0x25051238, 0x5ddce740, 0xb5bc4d11, 0x0c775db1},
|
||||
{0x293f8481, 0xd52cc17a, 0x6f133205, 0x041178fb, 0xb2961832, 0xbbc70d18, 0x481760cd, 0x073d34d1},
|
||||
{0xfdacff58, 0x8215b91d, 0x98331645, 0xd8d9177d, 0x439e803c, 0xe85223ad, 0xcca42c1f, 0x04aa8ef0},
|
||||
{0x01ab3a4d, 0x006f60fa, 0x814ba450, 0xe6600e15, 0xdf9eb147, 0xbde4df36, 0x33760d7b, 0x055d58fa},
|
||||
{0xec2a895e, 0x476ef4a4, 0x63e3f04a, 0x9b506ee3, 0xd1a8a12f, 0x60c69477, 0x0cb92cc1, 0x11d4b7f6}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e},
|
||||
{0x00000000, 0x7af74000, 0x1fffffff, 0x8a442f99, 0xc529c400, 0x3cc739d6, 0x9a2ca556, 0x12ab655e},
|
||||
{0xd60fb046, 0xc9fa190c, 0xc5b4674e, 0xdb5c179b, 0xbc7b8726, 0x2b2bce0b, 0xbf6e69bf, 0x0e4eb338},
|
||||
{0x8ffc4ed5, 0x74732d1f, 0xb7f2eefc, 0x42d9f590, 0xa24dd4dd, 0xf70461e5, 0xef64676f, 0x03b6eba4},
|
||||
{0x102bbab0, 0x5a21f98a, 0x8d8e2efb, 0xa6a147a9, 0x7612906f, 0x0eb4f005, 0x47d8d2e3, 0x0e1a5481},
|
||||
{0xd01e5aa8, 0x6e509add, 0x6e3f123d, 0xe1582468, 0x8274db24, 0xbd6313ee, 0xd173a634, 0x05d5836e},
|
||||
{0xe975c0cf, 0x6aab3344, 0x6f1dc38e, 0xca362e0e, 0x1dd1743a, 0x2fe72cda, 0xc1b4c4c2, 0x0c1c956e},
|
||||
{0xec89a64f, 0x59fe97a0, 0xe8de5d4c, 0x579617d7, 0xc9c1ea7b, 0x256a305b, 0x53fa131b, 0x01ffae4e},
|
||||
{0x29bcb088, 0x463a73ff, 0xe1438e80, 0xee9e9a5e, 0x3c9369e4, 0x2a00951f, 0x80a32052, 0x09711183},
|
||||
{0x4bec8dd2, 0xa36899db, 0x96393687, 0x2946872e, 0x842df3c8, 0xd4b5734f, 0x5f5cd8fb, 0x0834098f},
|
||||
{0xe3c711b9, 0x4bc485f6, 0x648d1d7e, 0xf43a2598, 0xee88abaa, 0x7f981a0e, 0xec6a3f27, 0x0c88c9c3},
|
||||
{0x49046b52, 0x42bcc6c2, 0x56ab9ecc, 0xcc77294a, 0xe4df3ddd, 0x02ecb41a, 0x67f76726, 0x0e567d22},
|
||||
{0x91c64fc2, 0x1cc56cc3, 0xd16a490b, 0x8cb71e65, 0x14fac366, 0x984be37e, 0xa25d7ba5, 0x0a08e032},
|
||||
{0xd4f5941e, 0x966d9739, 0xe5772a73, 0x5805deb6, 0x5c1f970c, 0xe4eb0d33, 0xbdf35409, 0x039715db},
|
||||
{0xcc6518ac, 0x8419686c, 0x9c7a2366, 0x96dec3a8, 0x71724384, 0xefbfcac6, 0xaf34c239, 0x0c44b99a},
|
||||
{0xc18ff4fd, 0xcb66fe1b, 0x86c8d586, 0x588e18b3, 0x1dfab57c, 0xc6e6d2a3, 0x7d7d4efd, 0x10918ad2},
|
||||
{0x97a18f58, 0x56d6cf22, 0xd0d7abd9, 0x11710758, 0x5eb7a9c5, 0xd1a6608b, 0xc4937e38, 0x04059bdb},
|
||||
{0x4b1b63a9, 0x12998cbc, 0xcf420c9f, 0x0f780c6c, 0x129289ad, 0xa5e48723, 0x240a141d, 0x0a3a1223},
|
||||
{0x00db2b48, 0xa43c0e02, 0x933d10ee, 0x76585489, 0xc0ba6a80, 0x12d64af1, 0x2fad8d8e, 0x01940f43},
|
||||
{0x1d75bec9, 0xe29ef6c0, 0xd4b0183b, 0xead287a2, 0xedfd3795, 0x75a017cf, 0x64427c8e, 0x107f8d0f},
|
||||
{0xa26c8c12, 0xa6f4e1d1, 0xf6610f7e, 0x13571553, 0x56701caf, 0xd95e5df6, 0x2263d69d, 0x050e7b89},
|
||||
{0xc161761f, 0x271d7caf, 0xc369a371, 0xf1001d6f, 0x00e60f51, 0x65286415, 0xb74d14b8, 0x00b918f9},
|
||||
{0x03ad3139, 0x01d3f431, 0xa137ce16, 0xe56f6002, 0x1deb42e8, 0x97f53369, 0xaa37cddd, 0x033fa9ac},
|
||||
{0x60cf1330, 0x840f913b, 0x1df5ed87, 0x5610cde6, 0x72b36ddf, 0x858381b0, 0x6f64e0b7, 0x109bf66c},
|
||||
{0x930cee0b, 0x432d3626, 0xf26e8ba3, 0x55ed3efb, 0x14c5457f, 0x802eebcc, 0xe2310f22, 0x00d300e3},
|
||||
{0x4b9ac952, 0x3d29f5ba, 0xc8ea8f94, 0x7c7f2662, 0xcefc3052, 0x736ccb63, 0x0981f3cb, 0x04bfce2f},
|
||||
{0x5d4e643c, 0x3da791ea, 0x85bff013, 0xb6a956ef, 0xd73de6a3, 0x86c629a8, 0x6b8c48a9, 0x0a5a5f55},
|
||||
{0x49c6284a, 0x9ba6aa00, 0xeacbdc63, 0x0b8429fb, 0xedafdf37, 0x9b9c6c5b, 0xad0c78c6, 0x009907e8},
|
||||
{0x3e47b53f, 0x50380ce2, 0x3a9613fc, 0x6ea3c2d3, 0x4c87ab50, 0xfe743105, 0xd192221c, 0x07871979},
|
||||
{0xe978594b, 0x4ddd3320, 0x3abe3f79, 0xe5f36fbe, 0xe4dcff8e, 0x5dba9ef2, 0x7105148f, 0x0bfc27e2},
|
||||
{0x498fb549, 0xd5993cd5, 0x09da9272, 0x718adcee, 0x72bd5bc0, 0x9e03cbb4, 0xc592813f, 0x07206942},
|
||||
{0x78fd3239, 0xaf29730b, 0x40c3e723, 0xbd907ac9, 0x77f214f7, 0x5dcc0aad, 0xb05fb3a1, 0x02d958da},
|
||||
{0xdf80223d, 0x55f432c9, 0x11a2fed9, 0x23daf2f6, 0x41ae8c34, 0x9e43e003, 0x95f22373, 0x0d51533b},
|
||||
{0x7998b62c, 0xbb53132b, 0x22c9b4aa, 0x064a9186, 0x71d61334, 0xd56de253, 0x04e416f6, 0x10fcf25f},
|
||||
{0xdddb58ec, 0x41f8042f, 0x10886d85, 0x7dd54384, 0x622ff4b4, 0x19544f90, 0x050cc539, 0x02f0b49a},
|
||||
{0xa39b02a3, 0x8a3de898, 0xdc94422c, 0x068b2992, 0xf493db31, 0x1c5f019a, 0x11b0f668, 0x066b1790},
|
||||
{0x78500f1a, 0x98310dd7, 0x735ccb27, 0x1c6050bf, 0xb2081df4, 0x07b6fa7f, 0xfa0f1e20, 0x003edf24},
|
||||
{0x89b0ca6f, 0xb4d938e2, 0x2c897570, 0x0214eb59, 0x2d4cf27a, 0x56c45327, 0x3ed546a4, 0x10a2f358},
|
||||
{0xef01ed78, 0xf2828212, 0xf103c9ca, 0xa66094ac, 0x7a2d5573, 0xdceb481d, 0x8af46aab, 0x0190fcde},
|
||||
{0x526bf9fc, 0x023031cc, 0x79c209ba, 0x0e4136c0, 0x3ec42e5c, 0xe5234df1, 0x1d455234, 0x00cb9592},
|
||||
{0x33bf2a1c, 0x842b0c9c, 0xa29b9236, 0x1fd43c95, 0xc06795d3, 0x6b37a603, 0x0c1b712a, 0x00017b17},
|
||||
{0xaf858193, 0x2b955be2, 0x5fb5e378, 0xa513d8be, 0xa326aeb9, 0x88c4ebeb, 0xf3d45990, 0x00c378e2},
|
||||
{0x6464580f, 0x33e6c8c0, 0x3c4aa09f, 0x9d560eb3, 0xcc98f404, 0xb3f1a899, 0x8ca24b48, 0x012c1ea5},
|
||||
{0xe3b4dc56, 0xa0594a67, 0x91b698e1, 0xc8e6b582, 0x8df78057, 0x711cadbf, 0x396466f8, 0x0049abdf},
|
||||
{0x4ffa086a, 0xecc89610, 0xca06afc6, 0x4db82291, 0x8f3a6426, 0x9ae7c68c, 0x2a874432, 0x0b3dae8c},
|
||||
{0x3b3625b6, 0x1e62401f, 0x28471e5a, 0xd0692164, 0x5cad6b77, 0xb85aa9ec, 0xaa95acf2, 0x063e4b66},
|
||||
{0xb9112c51, 0x2542c2b2, 0x6e23b3ce, 0x36ead8da, 0x76476754, 0x9a268d13, 0xa1ad7cf1, 0x121f44ad}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0x00000001, 0x8508c000, 0x68000000, 0xacd53b7f, 0x2e1bd800, 0x305a268f, 0x4d1652ab, 0x0955b2af},
|
||||
{0x00000001, 0xc78d2000, 0x1c000000, 0x033fd93f, 0xc529c401, 0xc88739d6, 0xf3a17c00, 0x0e008c06},
|
||||
{0x00000001, 0xe8cf5000, 0xf6000000, 0x2e75281e, 0x90b0ba01, 0x949dc37a, 0xc6e710ab, 0x1055f8b2},
|
||||
{0x00000001, 0xf9706800, 0xe3000000, 0x440fcf8e, 0x76743501, 0xfaa9084c, 0xb089db00, 0x1180af08},
|
||||
{0x00000001, 0x01c0f400, 0xd9800001, 0x4edd2346, 0x6955f281, 0xadaeaab5, 0xa55b402b, 0x12160a33},
|
||||
{0x00000001, 0x05e93a00, 0xd4c00001, 0x5443cd22, 0xe2c6d141, 0x07317be9, 0x1fc3f2c1, 0x1260b7c9},
|
||||
{0x00000001, 0x07fd5d00, 0xd2600001, 0x56f72210, 0x1f7f40a1, 0xb3f2e484, 0xdcf84c0b, 0x12860e93},
|
||||
{0x00000001, 0x09076e80, 0xd1300001, 0x5850cc87, 0x3ddb7851, 0x0a5398d1, 0x3b9278b1, 0x1298b9f9},
|
||||
{0x00000001, 0x098c7740, 0x50980001, 0x58fda1c3, 0xcd099429, 0xb583f2f7, 0xeadf8f03, 0x12a20fab},
|
||||
{0x00000001, 0x09cefba0, 0x104c0001, 0x59540c61, 0x14a0a215, 0x0b1c200b, 0x42861a2d, 0x12a6ba85},
|
||||
{0x00000001, 0x09f03dd0, 0xf0260001, 0x597f41af, 0xb86c290b, 0xb5e83694, 0xee595fc1, 0x12a90ff1},
|
||||
{0x00000001, 0x0a00dee8, 0x60130001, 0x5994dc57, 0x8a51ec86, 0x0b4e41d9, 0x4443028c, 0x12aa3aa8},
|
||||
{0x00000001, 0x0a092f74, 0x18098001, 0xd99fa9ab, 0xf344ce43, 0x3601477b, 0x6f37d3f1, 0x12aad003},
|
||||
{0x00000001, 0x0a0d57ba, 0xf404c001, 0x99a51054, 0x27be3f22, 0xcb5aca4d, 0x04b23ca3, 0x12ab1ab1},
|
||||
{0x00000001, 0x0a0f6bdd, 0xe2026001, 0xf9a7c3a9, 0xc1faf791, 0x16078bb5, 0xcf6f70fd, 0x12ab4007},
|
||||
{0x80000001, 0x0a1075ee, 0x59013001, 0xa9a91d54, 0x0f1953c9, 0xbb5dec6a, 0x34ce0b29, 0x12ab52b3},
|
||||
{0x40000001, 0x0a10faf7, 0x94809801, 0x81a9ca29, 0x35a881e5, 0x0e091cc4, 0xe77d5840, 0x12ab5c08},
|
||||
{0xa0000001, 0x0a113d7b, 0x32404c01, 0x6daa2094, 0x48f018f3, 0x375eb4f1, 0xc0d4fecb, 0x12ab60b3},
|
||||
{0xd0000001, 0x0a115ebd, 0x81202601, 0x63aa4bc9, 0xd293e47a, 0xcc098107, 0x2d80d210, 0x12ab6309},
|
||||
{0xe8000001, 0x0a116f5e, 0x28901301, 0xdeaa6164, 0x1765ca3d, 0x965ee713, 0xe3d6bbb3, 0x12ab6433},
|
||||
{0x74000001, 0x0a1177af, 0x7c480981, 0x9c2a6c31, 0xb9cebd1f, 0xfb899a18, 0x3f01b084, 0x12ab64c9},
|
||||
{0xba000001, 0x0a117bd7, 0x262404c1, 0x7aea7198, 0x8b033690, 0xae1ef39b, 0xec972aed, 0x12ab6513},
|
||||
{0xdd000001, 0x0a117deb, 0x7b120261, 0xea4a744b, 0xf39d7348, 0x0769a05c, 0x4361e822, 0x12ab6539},
|
||||
{0xee800001, 0x0a117ef5, 0x25890131, 0x21fa75a5, 0xa7ea91a5, 0x340ef6bd, 0xeec746bc, 0x12ab654b},
|
||||
{0xf7400001, 0x0a117f7a, 0xfac48099, 0x3dd27651, 0x021120d3, 0x4a61a1ee, 0x4479f609, 0x12ab6555},
|
||||
{0x7ba00001, 0x0a117fbd, 0x6562404d, 0x4bbe76a8, 0x2f24686a, 0xd58af786, 0xef534daf, 0x12ab6559},
|
||||
{0xbdd00001, 0x0a117fde, 0x9ab12027, 0xd2b476d3, 0x45ae0c35, 0x1b1fa252, 0x44bff983, 0x12ab655c},
|
||||
{0x5ee80001, 0x0a117fef, 0x35589014, 0x962f76e9, 0x50f2de1b, 0xbde9f7b8, 0x6f764f6c, 0x12ab655d},
|
||||
{0xaf740001, 0x8a117ff7, 0x02ac480a, 0x77ecf6f4, 0x5695470e, 0x8f4f226b, 0x04d17a61, 0x12ab655e},
|
||||
{0xd7ba0001, 0xca117ffb, 0x69562405, 0xe8cbb6f9, 0xd9667b87, 0xf801b7c4, 0x4f7f0fdb, 0x12ab655e},
|
||||
{0xebdd0001, 0x6a117ffd, 0x1cab1203, 0xa13b16fc, 0x9acf15c4, 0x2c5b0271, 0x74d5da99, 0x12ab655e},
|
||||
{0xf5ee8001, 0x3a117ffe, 0x76558902, 0xfd72c6fd, 0xfb8362e2, 0xc687a7c7, 0x87813ff7, 0x12ab655e},
|
||||
{0x7af74001, 0xa2117fff, 0x232ac481, 0x2b8e9efe, 0x2bdd8972, 0x139dfa73, 0x90d6f2a7, 0x12ab655e},
|
||||
{0xbd7ba001, 0x56117fff, 0x79956241, 0xc29c8afe, 0xc40a9cb9, 0xba2923c8, 0x9581cbfe, 0x12ab655e},
|
||||
{0xdebdd001, 0x30117fff, 0xa4cab121, 0x8e2380fe, 0x9021265d, 0x8d6eb873, 0x97d738aa, 0x12ab655e},
|
||||
{0xef5ee801, 0x1d117fff, 0xba655891, 0x73e6fbfe, 0xf62c6b2f, 0x771182c8, 0x9901ef00, 0x12ab655e},
|
||||
{0xf7af7401, 0x13917fff, 0xc532ac49, 0x66c8b97e, 0xa9320d98, 0x6be2e7f3, 0x99974a2b, 0x12ab655e},
|
||||
{0xfbd7ba01, 0x0ed17fff, 0xca995625, 0xe039983e, 0x02b4decc, 0xe64b9a89, 0x99e1f7c0, 0x12ab655e},
|
||||
{0xfdebdd01, 0x0c717fff, 0xcd4cab13, 0x1cf2079e, 0xaf764767, 0xa37ff3d3, 0x9a074e8b, 0x12ab655e},
|
||||
{0xfef5ee81, 0x0b417fff, 0xcea6558a, 0x3b4e3f4e, 0x05d6fbb4, 0x021a2079, 0x9a19f9f1, 0x12ab655e},
|
||||
{0xff7af741, 0x8aa97fff, 0xcf532ac5, 0xca7c5b26, 0xb10755da, 0xb16736cb, 0x9a234fa3, 0x12ab655e},
|
||||
{0xffbd7ba1, 0x4a5d7fff, 0xcfa99563, 0x12136912, 0x069f82ee, 0x090dc1f5, 0x9a27fa7d, 0x12ab655e},
|
||||
{0xffdebdd1, 0x2a377fff, 0xcfd4cab2, 0xb5def008, 0xb16b9977, 0xb4e10789, 0x9a2a4fe9, 0x12ab655e},
|
||||
{0xffef5ee9, 0x9a247fff, 0xcfea6559, 0x87c4b383, 0x06d1a4bc, 0x0acaaa54, 0x9a2b7aa0, 0x12ab655e},
|
||||
{0xfff7af75, 0x521affff, 0x4ff532ad, 0xf0b79541, 0x3184aa5e, 0x35bf7bb9, 0x9a2c0ffb, 0x12ab655e},
|
||||
{0xfffbd7bb, 0x2e163fff, 0x0ffa9957, 0x25310620, 0xc6de2d30, 0xcb39e46b, 0x9a2c5aa8, 0x12ab655e},
|
||||
{0xfffdebde, 0x1c13dfff, 0x6ffd4cac, 0xbf6dbe8f, 0x118aee98, 0x95f718c5, 0x9a2c7fff, 0x12ab655e}}};
|
||||
static constexpr storage<8> rou = {0xec2a895e, 0x476ef4a4, 0x63e3f04a, 0x9b506ee3,
|
||||
0xd1a8a12f, 0x60c69477, 0x0cb92cc1, 0x11d4b7f6};
|
||||
TWIDDLES(modulus, rou)
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -3,54 +3,14 @@
|
||||
#define BLS12_381_BASE_PARAMS_H
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bls12_381 {
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = 12;
|
||||
static constexpr unsigned modulus_bit_count = 381;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe,
|
||||
0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
|
||||
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xffff5556, 0x73fdffff, 0x62a7ffff, 0x3d57fffd,
|
||||
0xed61ec48, 0xce61a541, 0xe70a257e, 0xc8ee9709,
|
||||
0x869759ae, 0x96374f6c, 0x72ffcd34, 0x340223d4};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa,
|
||||
0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13,
|
||||
0x0d2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0x00005555, 0x46010000, 0x4eac0000, 0xe1540001,
|
||||
0x094f09db, 0x98cf2d5f, 0x0c7aed40, 0x9b88b47b,
|
||||
0xbcb45328, 0xb4e45849, 0xc6801965, 0xe5feee15};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
|
||||
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
|
||||
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
|
||||
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
|
||||
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
|
||||
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
|
||||
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
|
||||
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
|
||||
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7,
|
||||
0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0x0002fffd, 0x76090000, 0xc40c0002, 0xebf4000b,
|
||||
0x53c758ba, 0x5f489857, 0x70525745, 0x77ce5853,
|
||||
0xa256ec6d, 0x5c071a97, 0xfa80e493, 0x15f65ec3};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x380b4820, 0xf4d38259, 0xd898fafb, 0x7fe11274,
|
||||
0x14956dc8, 0x343ea979, 0x58a88de9, 0x1797ab14,
|
||||
0x3c4f538b, 0xed5e6427, 0xe8fb0ce9, 0x14fec701};
|
||||
static constexpr storage<12> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0,
|
||||
0xf38512bf, 0x64774b84, 0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
|
||||
PARAMS(modulus)
|
||||
|
||||
// nonresidue to generate the extension field
|
||||
static constexpr uint32_t nonresidue = 1;
|
||||
// true if nonresidue is negative
|
||||
@@ -58,4 +18,4 @@ namespace bls12_381 {
|
||||
};
|
||||
} // namespace bls12_381
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@@ -4,148 +4,17 @@
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "fields/quadratic_extension.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bls12_381 {
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned omegas_count = 32;
|
||||
static constexpr unsigned modulus_bit_count = 255;
|
||||
static constexpr unsigned num_of_reductions = 2;
|
||||
static constexpr storage<8> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402,
|
||||
0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
PARAMS(modulus)
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402,
|
||||
0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0xfffffffe, 0xfffcb7fd, 0xa77b4805,
|
||||
0x1343b00a, 0x6673b010, 0x533afa90, 0xe7db4ea6};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0xfffffffc, 0xfff96ffb, 0x4ef6900b,
|
||||
0x26876015, 0xcce76020, 0xa675f520, 0xcfb69d4c};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0xffffffff, 0x00000000, 0x0001a401, 0xac425bfd,
|
||||
0xf65e27fa, 0xccc627f7, 0xd66282b7, 0x8c1258ac};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
|
||||
0x4aa9c661, 0xc2611f6f, 0x59934a1d, 0x0e9593f9, 0xef2cc20f, 0x520c13db, 0xf4bc2778, 0x347f60f3};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
|
||||
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x00000004, 0xfffffff8, 0xfff2dffb, 0x9dfa401f, 0x736ec016, 0xa62b8008, 0x50cfdee1, 0x22a90579,
|
||||
0x2aa71985, 0x09847dbd, 0x664d2877, 0x3a564fe5, 0xbcb3083c, 0x48304f6f, 0xd2f09de1, 0xd1fd83cf};
|
||||
|
||||
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad,
|
||||
0xc1f823b4, 0x0e2d772d, 0x7fb78ddf, 0x8d54253b};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xfffffffe, 0x00000001, 0x00034802, 0x5884b7fa,
|
||||
0xecbc4ff5, 0x998c4fef, 0xacc5056f, 0x1824b159};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0xfe75c040, 0x13f75b69, 0x09dc705f, 0xab6fca8f,
|
||||
0x4f77266a, 0x7204078a, 0x30009d57, 0x1bbe8693};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753},
|
||||
{0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000},
|
||||
{0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d},
|
||||
{0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e},
|
||||
{0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb},
|
||||
{0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac},
|
||||
{0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802},
|
||||
{0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59},
|
||||
{0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667},
|
||||
{0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098},
|
||||
{0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b},
|
||||
{0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0},
|
||||
{0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8},
|
||||
{0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8},
|
||||
{0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911},
|
||||
{0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd},
|
||||
{0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333},
|
||||
{0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db},
|
||||
{0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83},
|
||||
{0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f},
|
||||
{0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5},
|
||||
{0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3},
|
||||
{0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd},
|
||||
{0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc},
|
||||
{0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd},
|
||||
{0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580},
|
||||
{0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d},
|
||||
{0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d},
|
||||
{0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f},
|
||||
{0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b},
|
||||
{0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72},
|
||||
{0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753},
|
||||
{0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753},
|
||||
{0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e},
|
||||
{0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4},
|
||||
{0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c},
|
||||
{0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee},
|
||||
{0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d},
|
||||
{0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25},
|
||||
{0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e},
|
||||
{0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508},
|
||||
{0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d},
|
||||
{0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63},
|
||||
{0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7},
|
||||
{0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1},
|
||||
{0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac},
|
||||
{0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003},
|
||||
{0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c},
|
||||
{0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7},
|
||||
{0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950},
|
||||
{0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2},
|
||||
{0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6},
|
||||
{0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5},
|
||||
{0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9},
|
||||
{0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960},
|
||||
{0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6},
|
||||
{0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf},
|
||||
{0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f},
|
||||
{0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533},
|
||||
{0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff},
|
||||
{0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287},
|
||||
{0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6},
|
||||
{0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9},
|
||||
{0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e},
|
||||
{0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268},
|
||||
{0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd},
|
||||
{0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18},
|
||||
{0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5},
|
||||
{0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04},
|
||||
{0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab},
|
||||
{0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f},
|
||||
{0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9},
|
||||
{0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e},
|
||||
{0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878},
|
||||
{0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5},
|
||||
{0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c},
|
||||
{0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77},
|
||||
{0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365},
|
||||
{0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c},
|
||||
{0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57},
|
||||
{0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5},
|
||||
{0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014},
|
||||
{0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3},
|
||||
{0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583},
|
||||
{0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b},
|
||||
{0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df},
|
||||
{0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719},
|
||||
{0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736},
|
||||
{0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744},
|
||||
{0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b},
|
||||
{0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f},
|
||||
{0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751},
|
||||
{0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752},
|
||||
{0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752}}};
|
||||
static constexpr storage<8> rou = {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2,
|
||||
0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e};
|
||||
TWIDDLES(modulus, rou)
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -3,42 +3,14 @@
|
||||
#define BN254_BASE_PARAMS_H
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bn254 {
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned modulus_bit_count = 254;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<limbs_count> modulus = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91,
|
||||
0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xb0f9fa8e, 0x7841182d, 0xd0e3951a, 0x2f02d522,
|
||||
0x0302b0bb, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x61f3f51c, 0xf082305b, 0xa1c72a34, 0x5e05aa45,
|
||||
0x06056176, 0xe14116da, 0x84c680a6, 0xc19139cb};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0x278302b9, 0xc3df73e9, 0x978e3572, 0x687e956e,
|
||||
0x7e7ea7a2, 0x47afba49, 0x1ece5fd6, 0xcf9bb18d};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x275d69b1, 0x3b5458a2, 0x09eac101, 0xa602072d, 0x6d96cadc, 0x4a50189c, 0x7a1242c8, 0x04689e95,
|
||||
0x34c6b38d, 0x26edfa5c, 0x16375606, 0xb00b8551, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x4ebad362, 0x76a8b144, 0x13d58202, 0x4c040e5a, 0xdb2d95b9, 0x94a03138, 0xf4248590, 0x08d13d2a,
|
||||
0x698d671a, 0x4ddbf4b8, 0x2c6eac0c, 0x60170aa2, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x9d75a6c4, 0xed516288, 0x27ab0404, 0x98081cb4, 0xb65b2b72, 0x29406271, 0xe8490b21, 0x11a27a55,
|
||||
0xd31ace34, 0x9bb7e970, 0x58dd5818, 0xc02e1544, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
|
||||
static constexpr storage<limbs_count> m = {0x19bf90e5, 0x6f3aed8a, 0x67cd4c08, 0xae965e17,
|
||||
0x68073013, 0xab074a58, 0x623a04a7, 0x54a47462};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xc58f0d9d, 0xd35d438d, 0xf5c70b3d, 0x0a78eb28,
|
||||
0x7879462c, 0x666ea36f, 0x9a07df2f, 0x0e0a77c1};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x014afa37, 0xed84884a, 0x0278edf8, 0xeb202285,
|
||||
0xb74492d9, 0xcf63e9cf, 0x59e5c639, 0x2e671571};
|
||||
static constexpr storage<8> modulus = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91,
|
||||
0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
PARAMS(modulus)
|
||||
|
||||
// nonresidue to generate the extension field
|
||||
static constexpr uint32_t nonresidue = 1;
|
||||
// true if nonresidue is negative
|
||||
|
||||
@@ -4,136 +4,17 @@
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "fields/quadratic_extension.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bn254 {
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned omegas_count = 28;
|
||||
static constexpr unsigned modulus_bit_count = 254;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<8> modulus = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848,
|
||||
0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
PARAMS(modulus)
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848,
|
||||
0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xe0000002, 0x87c3eb27, 0xf372e122, 0x5067d090,
|
||||
0x0302b0ba, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xc0000004, 0x0f87d64f, 0xe6e5c245, 0xa0cfa121,
|
||||
0x06056174, 0xe14116da, 0x84c680a6, 0xc19139cb};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0x0fffffff, 0xbc1e0a6c, 0x86468f6e, 0xd7cc17b7,
|
||||
0x7e7ea7a2, 0x47afba49, 0x1ece5fd6, 0xcf9bb18d};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0xe0000001, 0x08c3eb27, 0xdcb34000, 0xc7f26223, 0x68c9bb7f, 0xffe9a62c, 0xe821ddb0, 0xa6ce1975,
|
||||
0x47b62fe7, 0x2c77527b, 0xd379d3df, 0x85f73bb0, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0xc0000002, 0x1187d64f, 0xb9668000, 0x8fe4c447, 0xd19376ff, 0xffd34c58, 0xd043bb61, 0x4d9c32eb,
|
||||
0x8f6c5fcf, 0x58eea4f6, 0xa6f3a7be, 0x0bee7761, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x80000004, 0x230fac9f, 0x72cd0000, 0x1fc9888f, 0xa326edff, 0xffa698b1, 0xa08776c3, 0x9b3865d7,
|
||||
0x1ed8bf9e, 0xb1dd49ed, 0x4de74f7c, 0x17dceec3, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
|
||||
|
||||
static constexpr storage<limbs_count> m = {0xbe1de925, 0x620703a6, 0x09e880ae, 0x71448520,
|
||||
0x68073014, 0xab074a58, 0x623a04a7, 0x54a47462};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0x4ffffffb, 0xac96341c, 0x9f60cd29, 0x36fc7695,
|
||||
0x7879462e, 0x666ea36f, 0x9a07df2f, 0x0e0a77c1};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x6db1194e, 0xdc5ba005, 0xe111ec87, 0x090ef5a9,
|
||||
0xaeb85d5d, 0xc8260de4, 0x82c5551c, 0x15ebf951};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72},
|
||||
{0x8f703636, 0x23120470, 0xfd736bec, 0x5cea24f6, 0x3fd84104, 0x048b6e19, 0xe131a029, 0x30644e72},
|
||||
{0xc1bd5e80, 0x948dad4a, 0xf8170a0a, 0x52627366, 0x96afef36, 0xec9b9e2f, 0xc8c14f22, 0x2b337de1},
|
||||
{0xe306460b, 0xb11509c6, 0x174efb98, 0x996dfbe1, 0x94dd508c, 0x1c6e4f45, 0x16cbbf4e, 0x21082ca2},
|
||||
{0x3bb512d0, 0x3eed4c53, 0x838eeb1d, 0x9c18d51b, 0x47c0b2a9, 0x9678200d, 0x306b93d2, 0x09c532c6},
|
||||
{0x118f023a, 0xdb94fb05, 0x26e324be, 0x46a6cb24, 0x49bdadf2, 0xc24cdb76, 0x5b080fca, 0x1418144d},
|
||||
{0xba9d1811, 0x9d0e470c, 0xb6f24c79, 0x1dcb5564, 0xe85943e0, 0xdf5ce19c, 0xad310991, 0x16e73dfd},
|
||||
{0x74a57a76, 0xc8936191, 0x6750f230, 0x61794254, 0x9f36ffb0, 0xf086204a, 0xa6148404, 0x07b0c561},
|
||||
{0x470157ce, 0x893a7fa1, 0xfc782d75, 0xe8302a41, 0xdd9b0675, 0xffc02c0e, 0xf6e72f5b, 0x0f1ded1e},
|
||||
{0xbc2e5912, 0x11f995e1, 0xa8d2d7ab, 0x39ba79c0, 0xb08771e3, 0xebbebc2b, 0x7017a420, 0x06fd19c1},
|
||||
{0x769a2ee2, 0xd00a58f9, 0x7494f0ca, 0xb8c12c17, 0xa5355d71, 0xb4027fd7, 0x99c5042b, 0x027a3584},
|
||||
{0x0042d43a, 0x1c477572, 0x6f039bb9, 0x76f169c7, 0xfd5a90a9, 0x01ddd073, 0xde2fd10f, 0x0931d596},
|
||||
{0x9bbdd310, 0x4aa49b8d, 0x8e3a2d76, 0xd31bf3e2, 0x78b2667b, 0x001deac8, 0xb869ae62, 0x006fab49},
|
||||
{0x617c6e85, 0xadaa01c2, 0x7420aae6, 0xb4a93ee1, 0x0ddca8a8, 0x1f4e51b8, 0xcdd9e481, 0x2d965651},
|
||||
{0x4e26ecfb, 0xa93458fd, 0x4115a009, 0x022a2a2d, 0x69ec2bd0, 0x017171fa, 0x5941dc91, 0x2d1ba66f},
|
||||
{0xdaac43b7, 0xd1628ba2, 0xe4347e7d, 0x16c8601d, 0xe081dcff, 0x649abebd, 0x5981ed45, 0x00eeb2cb},
|
||||
{0xce8f58e5, 0x276e5858, 0x5655210e, 0x0512eca9, 0xe70e61f3, 0xc3708cc6, 0xa7d74902, 0x1bf82deb},
|
||||
{0x7dcdc0e0, 0x84c6bfa5, 0x13f4d1bd, 0xc57088ff, 0xb5b95e4d, 0x5c0176fb, 0x3a8d46c1, 0x19ddbcaf},
|
||||
{0x613f6cbd, 0x5c1d597f, 0x8357473a, 0x30525841, 0x968e4915, 0x51829353, 0x844bca52, 0x2260e724},
|
||||
{0x53337857, 0x53422da9, 0xdbed349f, 0xac616632, 0x06d1e303, 0x27508aba, 0x0a0ed063, 0x26125da1},
|
||||
{0xfcd0b523, 0xb2c87885, 0xca5a5ce3, 0x58f50577, 0x8598fc8c, 0x4222150e, 0xae2bdd1a, 0x1ded8980},
|
||||
{0xa219447e, 0xa76dde56, 0x359eebbb, 0xec1a1f05, 0x8be08215, 0xcda0ceb6, 0xb1f8d9a7, 0x1ad92f46},
|
||||
{0xab80c59d, 0xb54d4506, 0x22dd991f, 0x5680c640, 0xbc23a139, 0x6b7bcf70, 0x5ab4c74d, 0x0210fe63},
|
||||
{0xe32b045b, 0x1c25f1e3, 0x2e832696, 0x145e0db8, 0x71c6441f, 0x852e2a03, 0x845d50d2, 0x0c9fabc7},
|
||||
{0xb878331a, 0xeccd4f3e, 0x8dc6d26e, 0x7b26b748, 0xd9130cd4, 0xa19b0361, 0x326341ef, 0x2a734ebb},
|
||||
{0x2f4e9212, 0x1c79bd57, 0x3d68f9ae, 0x605b52b6, 0xb8d89d4a, 0x0113eff9, 0xf1ff73b2, 0x1067569a},
|
||||
{0x80928c44, 0x034afc45, 0xf6437da2, 0xb4823532, 0x6dc6e364, 0x5f256a9f, 0xb363ebe8, 0x049ae702},
|
||||
{0x725b19f0, 0x9bd61b6e, 0x41112ed4, 0x402d111e, 0x8ef62abc, 0x00e0a7eb, 0xa58a7e85, 0x2a3c09f0}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72},
|
||||
{0x608fc9cb, 0x20cff123, 0x7c4604a5, 0xcb49c351, 0x41a91758, 0xb3c4d79d, 0x00000000, 0x00000000},
|
||||
{0x07b95a9b, 0x8b11d9ab, 0x41671f56, 0x20710ead, 0x30f81dee, 0xfb3acaee, 0x9778465c, 0x130b1711},
|
||||
{0x373428de, 0xb85a71e6, 0xaeb0337e, 0x74954d30, 0x303402b7, 0x2bfc85eb, 0x409556c0, 0x02e40daf},
|
||||
{0xf210979d, 0x8c99980c, 0x34905b4d, 0xef8f3113, 0xdf25d8e7, 0x0aeaf3e7, 0x03bfbd79, 0x27247136},
|
||||
{0x763d698f, 0x78ce6a0b, 0x1d3213ee, 0xd80396ec, 0x67a8a676, 0x035cdc75, 0xb2a13d3a, 0x26177cf2},
|
||||
{0xc64427d7, 0xdddf985f, 0xa49e95bd, 0xaa4f964a, 0x5def8b04, 0x427c045f, 0x7969b732, 0x1641c053},
|
||||
{0x0329f5d6, 0x692c553d, 0x8712848a, 0xa54cf8c6, 0x38e2b5e6, 0x64751ad9, 0x7422fad3, 0x204bd327},
|
||||
{0xaf6b3e4e, 0x52f26c0f, 0xf0bcc0c8, 0x4c277a07, 0xe4fcfcab, 0x546875d5, 0xaa9995b3, 0x09d8f821},
|
||||
{0xb2e5cc71, 0xcaa2e1e9, 0x6e43404e, 0xed42b68e, 0x7a2c7f0a, 0x6ed80915, 0xde3c86d6, 0x1c4042c7},
|
||||
{0x579d71ae, 0x20a3a65d, 0x0adc4420, 0xfd7efed8, 0xfddabf54, 0x3bb6dcd7, 0xbc73d07b, 0x0fa9bb21},
|
||||
{0xc79e0e57, 0xb6f70f8d, 0xa04e05ac, 0x269d3fde, 0x2ba088d9, 0xcf2e371c, 0x11b88d9c, 0x1af864d2},
|
||||
{0xabd95dc9, 0x3b0b205a, 0x978188ca, 0xc8df74fa, 0x6a1cb6c8, 0x08e124db, 0xbfac6104, 0x1670ed58},
|
||||
{0x641c8410, 0xf8eee934, 0x677771c0, 0xf40976b0, 0x558e6e8c, 0x11680d42, 0x06e7e9e9, 0x281c036f},
|
||||
{0xb2dbc0b4, 0xc92a742f, 0x4d384e68, 0xc3f02842, 0x2fa43d0d, 0x22701b6f, 0xe4590b37, 0x05d33766},
|
||||
{0x02d842d4, 0x922d5ac8, 0xc830e4c6, 0x91126414, 0x082f37e0, 0xe92338c0, 0x7fe704e8, 0x0b5d56b7},
|
||||
{0xd96f0d22, 0x20e75251, 0x6bd4e8c9, 0xc01c7f08, 0xf9dd50c4, 0x37d8b00b, 0xc43ca872, 0x244cf010},
|
||||
{0x66c5174c, 0x7a823174, 0x22d5ad70, 0x7dbe118c, 0x111119c5, 0xf8d7c71d, 0x83780e87, 0x036853f0},
|
||||
{0xca535321, 0xd98f9924, 0xe66e6c81, 0x22dbc0ef, 0x664ae1b7, 0xa15cf806, 0xa314fb67, 0x06e402c0},
|
||||
{0xe26c91f3, 0x0852a8fd, 0x3baca626, 0x521f45cb, 0x2c51bfca, 0xab6473bc, 0x2100895f, 0x100c332d},
|
||||
{0xa376d0f0, 0xf5fac783, 0x940797d3, 0x50fd246e, 0x145f5278, 0xab14ecc1, 0x41091b14, 0x19c6dfb8},
|
||||
{0x7faa1396, 0x43dc52e2, 0x4beced23, 0xd437be9d, 0x6d3c38c3, 0xecc11e9c, 0x0c74a876, 0x2eb58439},
|
||||
{0xd69ca83b, 0x811b03e7, 0xa1a6eadf, 0x126a786b, 0x4e2b8e61, 0x1dd75c9f, 0xbda6792b, 0x2165a1a5},
|
||||
{0x110b737b, 0x02e1d4d1, 0xb323a164, 0x7be1488d, 0x9cd06163, 0xa334d317, 0xdb50e9cd, 0x2710c370},
|
||||
{0x9550fe47, 0x45d2f3cb, 0xf6a8efc4, 0x5f43327b, 0xe993ee18, 0x5bcd0d50, 0xb21de952, 0x27f035bd},
|
||||
{0x232e3983, 0x1d63cbae, 0xaa1b58e2, 0xac815161, 0x6aeb019e, 0x531f42a5, 0x03ca2ef5, 0x2dcd51d9},
|
||||
{0x980db869, 0xa8b64ba8, 0xc9718f6c, 0x4c787f72, 0x15d27ced, 0x7746a25a, 0x435a46e9, 0x110bf78f},
|
||||
{0x9d18157e, 0x72394277, 0xfd399d5d, 0xec9d51f8, 0x49d5387f, 0x6117635d, 0x9c229cd5, 0x01b77519}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0xf8000001, 0xa1f0fac9, 0x3cdcb848, 0x9419f424, 0x40c0ac2e, 0xdc2822db, 0x7098d014, 0x18322739},
|
||||
{0xf4000001, 0xf2e9782e, 0x5b4b146c, 0xde26ee36, 0xe1210245, 0x4a3c3448, 0x28e5381f, 0x244b3ad6},
|
||||
{0x72000001, 0x1b65b6e1, 0x6a82427f, 0x832d6b3f, 0xb1512d51, 0x81463cff, 0x850b6c24, 0x2a57c4a4},
|
||||
{0xb1000001, 0x2fa3d63a, 0xf21dd988, 0x55b0a9c3, 0x196942d7, 0x1ccb415b, 0xb31e8627, 0x2d5e098b},
|
||||
{0x50800001, 0xb9c2e5e7, 0x35eba50c, 0x3ef24906, 0xcd754d9a, 0x6a8dc388, 0x4a281328, 0x2ee12bff},
|
||||
{0xa0400001, 0xfed26dbd, 0x57d28ace, 0xb39318a7, 0xa77b52fb, 0x116f049f, 0x15acd9a9, 0x2fa2bd39},
|
||||
{0xc8200001, 0x215a31a8, 0xe8c5fdb0, 0x6de38077, 0x147e55ac, 0x64dfa52b, 0xfb6f3ce9, 0x300385d5},
|
||||
{0x5c100001, 0xb29e139e, 0x313fb720, 0xcb0bb460, 0xcaffd704, 0x8e97f570, 0x6e506e89, 0x3033ea24},
|
||||
{0x26080001, 0xfb400499, 0x557c93d8, 0xf99fce54, 0xa64097b0, 0xa3741d93, 0xa7c10759, 0x304c1c4b},
|
||||
{0x8b040001, 0x1f90fd16, 0x679b0235, 0x10e9db4e, 0x13e0f807, 0xade231a5, 0x447953c1, 0x3058355f},
|
||||
{0x3d820001, 0x31b97955, 0x70aa3963, 0x1c8ee1cb, 0xcab12832, 0xb3193bad, 0x12d579f5, 0x305e41e9},
|
||||
{0x96c10001, 0x3acdb774, 0xf531d4fa, 0xa2616509, 0x26194047, 0xb5b4c0b2, 0xfa038d0f, 0x3061482d},
|
||||
{0x43608001, 0xbf57d684, 0x3775a2c5, 0x654aa6a9, 0x53cd4c52, 0xb7028334, 0x6d9a969c, 0x3062cb50},
|
||||
{0x19b04001, 0x819ce60c, 0xd89789ab, 0xc6bf4778, 0x6aa75257, 0x37a96475, 0xa7661b63, 0x30638ce1},
|
||||
{0x04d82001, 0x62bf6dd0, 0xa9287d1e, 0x777997e0, 0xf614555a, 0x77fcd515, 0x444bddc6, 0x3063edaa},
|
||||
{0xfa6c1001, 0xd350b1b1, 0x9170f6d7, 0xcfd6c014, 0x3bcad6db, 0x18268d66, 0x92bebef8, 0x30641e0e},
|
||||
{0xf5360801, 0x8b9953a2, 0x859533b4, 0x7c05542e, 0x5ea6179c, 0xe83b698e, 0xb9f82f90, 0x30643640},
|
||||
{0x729b0401, 0xe7bda49b, 0x7fa75222, 0xd21c9e3b, 0x7013b7fc, 0x5045d7a2, 0xcd94e7dd, 0x30644259},
|
||||
{0xb14d8201, 0x15cfcd17, 0xfcb0615a, 0xfd284341, 0x78ca882c, 0x844b0eac, 0x57634403, 0x30644866},
|
||||
{0xd0a6c101, 0xacd8e155, 0x3b34e8f5, 0x12ae15c5, 0x7d25f045, 0x9e4daa31, 0x9c4a7216, 0x30644b6c},
|
||||
{0xe0536081, 0x785d6b74, 0xda772cc3, 0x1d70ff06, 0xff53a451, 0x2b4ef7f3, 0xbebe0920, 0x30644cef},
|
||||
{0x6829b041, 0x5e1fb084, 0xaa184eaa, 0x22d273a7, 0x406a7e57, 0xf1cf9ed5, 0x4ff7d4a4, 0x30644db1},
|
||||
{0x2c14d821, 0xd100d30c, 0x11e8df9d, 0x25832df8, 0xe0f5eb5a, 0x550ff245, 0x1894ba67, 0x30644e12},
|
||||
{0x0e0a6c11, 0x8a716450, 0x45d12817, 0xa6db8b20, 0x313ba1db, 0x86b01bfe, 0x7ce32d48, 0x30644e42},
|
||||
{0xff053609, 0x6729acf1, 0x5fc54c54, 0x6787b9b4, 0x595e7d1c, 0x1f8030da, 0xaf0a66b9, 0x30644e5a},
|
||||
{0xf7829b05, 0xd585d142, 0x6cbf5e72, 0xc7ddd0fe, 0x6d6feabc, 0x6be83b48, 0xc81e0371, 0x30644e66},
|
||||
{0x73c14d83, 0x0cb3e36b, 0x733c6782, 0xf808dca3, 0x7778a18c, 0x921c407f, 0xd4a7d1cd, 0x30644e6c},
|
||||
{0xb1e0a6c2, 0xa84aec7f, 0xf67aec09, 0x101e6275, 0xfc7cfcf5, 0xa536431a, 0xdaecb8fb, 0x30644e6f}}};
|
||||
static constexpr storage<8> rou = {0x725b19f0, 0x9bd61b6e, 0x41112ed4, 0x402d111e,
|
||||
0x8ef62abc, 0x00e0a7eb, 0xa58a7e85, 0x2a3c09f0};
|
||||
TWIDDLES(modulus, rou)
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -3,76 +3,15 @@
|
||||
#define BW6_761_BASE_BASE_H
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bw6_761 {
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = 24;
|
||||
static constexpr unsigned modulus_bit_count = 761;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<limbs_count> modulus = {
|
||||
0x0000008b, 0xf49d0000, 0x70000082, 0xe6913e68, 0xeaf0a437, 0x160cf8ae, 0x5667a8f8, 0x98a116c2,
|
||||
0x73ebff2e, 0x71dcd3dc, 0x12f9fd90, 0x8689c8ed, 0x25b42304, 0x03cebaff, 0xe584e919, 0x707ba638,
|
||||
0x8087be41, 0x528275ef, 0x81d14688, 0xb926186a, 0x04faff3e, 0xd187c940, 0xfb83ce0a, 0x0122e824};
|
||||
static constexpr storage<limbs_count> modulus_2 = {
|
||||
0x00000116, 0xe93a0000, 0xe0000105, 0xcd227cd0, 0xd5e1486f, 0x2c19f15d, 0xaccf51f0, 0x31422d84,
|
||||
0xe7d7fe5d, 0xe3b9a7b8, 0x25f3fb20, 0x0d1391da, 0x4b684609, 0x079d75fe, 0xcb09d232, 0xe0f74c71,
|
||||
0x010f7c82, 0xa504ebdf, 0x03a28d10, 0x724c30d5, 0x09f5fe7d, 0xa30f9280, 0xf7079c15, 0x0245d049};
|
||||
static constexpr storage<limbs_count> modulus_4 = {
|
||||
0x0000022c, 0xd2740000, 0xc000020b, 0x9a44f9a1, 0xabc290df, 0x5833e2bb, 0x599ea3e0, 0x62845b09,
|
||||
0xcfaffcba, 0xc7734f71, 0x4be7f641, 0x1a2723b4, 0x96d08c12, 0x0f3aebfc, 0x9613a464, 0xc1ee98e3,
|
||||
0x021ef905, 0x4a09d7be, 0x07451a21, 0xe49861aa, 0x13ebfcfa, 0x461f2500, 0xee0f382b, 0x048ba093};
|
||||
static constexpr storage<limbs_count> neg_modulus = {
|
||||
0xffffff75, 0x0b62ffff, 0x8fffff7d, 0x196ec197, 0x150f5bc8, 0xe9f30751, 0xa9985707, 0x675ee93d,
|
||||
0x8c1400d1, 0x8e232c23, 0xed06026f, 0x79763712, 0xda4bdcfb, 0xfc314500, 0x1a7b16e6, 0x8f8459c7,
|
||||
0x7f7841be, 0xad7d8a10, 0x7e2eb977, 0x46d9e795, 0xfb0500c1, 0x2e7836bf, 0x047c31f5, 0xfedd17db};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0x0000008b, 0xf49d0000, 0x70000082, 0xe6913e68, 0xeaf0a437, 0x160cf8ae, 0x5667a8f8, 0x98a116c2,
|
||||
0x73ebff2e, 0x71dcd3dc, 0x12f9fd90, 0x8689c8ed, 0x25b42304, 0x03cebaff, 0xe584e919, 0x707ba638,
|
||||
0x8087be41, 0x528275ef, 0x81d14688, 0xb926186a, 0x04faff3e, 0xd187c940, 0xfb83ce0a, 0x0122e824,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x00004b79, 0xa27e0000, 0xa0008e35, 0xbae96db2, 0x82ebf7b1, 0x4aaf1d22, 0x7224cb3d, 0x7908fd92,
|
||||
0x29b17ed1, 0x6fe68290, 0xafc968db, 0xfe1b7282, 0x9028bbf0, 0xe1e548cb, 0x3a8ffc03, 0x09094ed6,
|
||||
0x61e9cf95, 0xd63ea631, 0x54918abf, 0xe834ca62, 0x52aa651e, 0xe52594ed, 0xb4c46a4f, 0xe2423252,
|
||||
0x6c09aae4, 0xa8cf17d8, 0xc5f5cee5, 0x2d80ffb0, 0x55bbc10d, 0x2dede100, 0xe2360382, 0x1f4e7a7c,
|
||||
0xae2fe433, 0x586c3847, 0x78eadae1, 0x915c56e1, 0x69a5ce00, 0xa35b2945, 0x767c08ca, 0x9d66e7fe,
|
||||
0xd8b88c77, 0x7e44cf6a, 0x67c9c873, 0xb29bfc93, 0xbbc80af9, 0x6a24005a, 0xc64ce3d5, 0x00014a92};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x000096f2, 0x44fc0000, 0x40011c6b, 0x75d2db65, 0x05d7ef63, 0x955e3a45, 0xe449967a, 0xf211fb24,
|
||||
0x5362fda2, 0xdfcd0520, 0x5f92d1b6, 0xfc36e505, 0x205177e1, 0xc3ca9197, 0x751ff807, 0x12129dac,
|
||||
0xc3d39f2a, 0xac7d4c62, 0xa923157f, 0xd06994c4, 0xa554ca3d, 0xca4b29da, 0x6988d49f, 0xc48464a5,
|
||||
0xd81355c9, 0x519e2fb0, 0x8beb9dcb, 0x5b01ff61, 0xab77821a, 0x5bdbc200, 0xc46c0704, 0x3e9cf4f9,
|
||||
0x5c5fc866, 0xb0d8708f, 0xf1d5b5c2, 0x22b8adc2, 0xd34b9c01, 0x46b6528a, 0xecf81195, 0x3acdcffc,
|
||||
0xb17118ef, 0xfc899ed5, 0xcf9390e6, 0x6537f926, 0x779015f3, 0xd44800b5, 0x8c99c7aa, 0x00029525};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x00012de4, 0x89f80000, 0x800238d6, 0xeba5b6ca, 0x0bafdec6, 0x2abc748a, 0xc8932cf5, 0xe423f649,
|
||||
0xa6c5fb45, 0xbf9a0a40, 0xbf25a36d, 0xf86dca0a, 0x40a2efc3, 0x8795232e, 0xea3ff00f, 0x24253b58,
|
||||
0x87a73e54, 0x58fa98c5, 0x52462aff, 0xa0d32989, 0x4aa9947b, 0x949653b5, 0xd311a93f, 0x8908c94a,
|
||||
0xb026ab93, 0xa33c5f61, 0x17d73b96, 0xb603fec3, 0x56ef0434, 0xb7b78401, 0x88d80e08, 0x7d39e9f3,
|
||||
0xb8bf90cc, 0x61b0e11e, 0xe3ab6b85, 0x45715b85, 0xa6973802, 0x8d6ca515, 0xd9f0232a, 0x759b9ff9,
|
||||
0x62e231de, 0xf9133dab, 0x9f2721cd, 0xca6ff24d, 0xef202be6, 0xa890016a, 0x19338f55, 0x00052a4b};
|
||||
static constexpr storage<limbs_count> m = {0x2507e899, 0x11629ccd, 0x2e4424dd, 0xab1eef5b, 0x481d2cfa, 0xb82146a9,
|
||||
0x34e4227b, 0xf3182afa, 0xbeb25621, 0xf615fdb5, 0xccc261d6, 0xc4d8988c,
|
||||
0xaaf4fab0, 0x3590d652, 0x2ab9ff30, 0x9c5d0a04, 0x6ec3f460, 0xf6e8534f,
|
||||
0x88075ab4, 0xe8d78b06, 0x6f3fc8fe, 0xa8d3675b, 0x7bc5cd4b, 0x03852086};
|
||||
static constexpr storage<limbs_count> one = {
|
||||
0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {
|
||||
0xffff85d5, 0x0202ffff, 0x8fff8ce7, 0x5a582635, 0x827faade, 0x9e996e43, 0x0ee47df4, 0xda6aff32,
|
||||
0x1d94b80b, 0xece9cb3e, 0x5248240b, 0xc0e667a2, 0xdcad3905, 0xa74da5bf, 0x462f2103, 0x2352e7fe,
|
||||
0x08b1c87c, 0x7b565880, 0xe711022f, 0x45848a63, 0x9f65a9df, 0xd7a81ebb, 0xf127e87d, 0x0051f77e};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {
|
||||
0x181fa3f1, 0x27c2b2a0, 0x25a0e1b8, 0x7d9ca9f9, 0x0a004a5d, 0x35a910f0, 0xdb6b8539, 0x54655b3f,
|
||||
0x7695ef18, 0x5e763565, 0x4fae56bb, 0x226022c2, 0xb70d7652, 0x80e7f067, 0x72116b89, 0x435a8b4a,
|
||||
0x5d84e0d4, 0xac258fd6, 0x4427c7b2, 0x47ee8ac5, 0xd04e621b, 0x478c4048, 0x2add3e93, 0x00e0aa7d};
|
||||
static constexpr storage<24> modulus = {0x0000008b, 0xf49d0000, 0x70000082, 0xe6913e68, 0xeaf0a437, 0x160cf8ae,
|
||||
0x5667a8f8, 0x98a116c2, 0x73ebff2e, 0x71dcd3dc, 0x12f9fd90, 0x8689c8ed,
|
||||
0x25b42304, 0x03cebaff, 0xe584e919, 0x707ba638, 0x8087be41, 0x528275ef,
|
||||
0x81d14688, 0xb926186a, 0x04faff3e, 0xd187c940, 0xfb83ce0a, 0x0122e824};
|
||||
PARAMS(modulus)
|
||||
};
|
||||
} // namespace bw6_761
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "fields/quadratic_extension.cuh"
|
||||
#include "fields/snark_fields/bls12_377_base.cuh"
|
||||
|
||||
namespace bw6_761 {
|
||||
|
||||
@@ -3,46 +3,15 @@
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "fields/quartic_extension.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace babybear {
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 1;
|
||||
static constexpr unsigned omegas_count = 28;
|
||||
static constexpr unsigned modulus_bit_count = 31;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<1> modulus = {0x78000001};
|
||||
PARAMS(modulus)
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0x78000001};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xf0000002};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000000};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0x87ffffff};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {0x78000001, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {0xf0000001, 0x38400000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {0xe0000002, 0x70800001};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {0xc0000004, 0xe1000003};
|
||||
|
||||
static constexpr storage<limbs_count> m = {0x88888887};
|
||||
static constexpr storage<limbs_count> one = {0x00000001};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xffffffe};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x38400000};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0x78000000}, {0x10faa3e0}, {0x6b615c47}, {0x21ceed5a}, {0x2c1c3348}, {0x36c54c86}, {0x701dd01c},
|
||||
{0x56a9a28e}, {0x03e4cabf}, {0x5bacde79}, {0x1eb53838}, {0x1cd781af}, {0x0961a0b7}, {0x65098a87},
|
||||
{0x77851a0b}, {0x5bcba331}, {0x053fc0f5}, {0x5bf816e5}, {0x4bb124ab}, {0x571e9d4e}, {0x313732cb},
|
||||
{0x28aca172}, {0x4e319b52}, {0x45692d95}, {0x14ff4ba1}, {0x00004951}, {0x00000089}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0x78000000}, {0x67055c21}, {0x5ee99486}, {0x0bb4c4e4}, {0x4ab33b27}, {0x044b4497}, {0x410e23aa},
|
||||
{0x08a7ee2b}, {0x563cb93d}, {0x3d70b4b7}, {0x77d999f1}, {0x6ceb65b5}, {0x49e7f635}, {0x0eae3a8c},
|
||||
{0x238b8a78}, {0x70d71b0a}, {0x0eaacc45}, {0x5af0f193}, {0x47303308}, {0x573cbfad}, {0x29ff72c0},
|
||||
{0x05af9dac}, {0x00ef24df}, {0x26985530}, {0x22d1ce4b}, {0x08359375}, {0x2cabe994}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0x3c000001}, {0x5a000001}, {0x69000001}, {0x70800001}, {0x74400001}, {0x76200001}, {0x77100001},
|
||||
{0x77880001}, {0x77c40001}, {0x77e20001}, {0x77f10001}, {0x77f88001}, {0x77fc4001}, {0x77fe2001},
|
||||
{0x77ff1001}, {0x77ff8801}, {0x77ffc401}, {0x77ffe201}, {0x77fff101}, {0x77fff881}, {0x77fffc41},
|
||||
{0x77fffe21}, {0x77ffff11}, {0x77ffff89}, {0x77ffffc5}, {0x77ffffe3}, {0x77fffff2}}};
|
||||
static constexpr storage<1> rou = {0x00000089};
|
||||
TWIDDLES(modulus, rou)
|
||||
|
||||
// nonresidue to generate the extension field
|
||||
static constexpr uint32_t nonresidue = 11;
|
||||
@@ -58,5 +27,5 @@ namespace babybear {
|
||||
/**
|
||||
* Extension field of `scalar_t` enabled if `-DEXT_FIELD` env variable is.
|
||||
*/
|
||||
typedef ExtensionField<fp_config> extension_t;
|
||||
typedef ExtensionField<fp_config, scalar_t> extension_t;
|
||||
} // namespace babybear
|
||||
|
||||
224
icicle/include/fields/stark_fields/m31.cuh
Normal file
224
icicle/include/fields/stark_fields/m31.cuh
Normal file
@@ -0,0 +1,224 @@
|
||||
#pragma once
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "fields/quartic_extension.cuh"
|
||||
|
||||
namespace m31 {
|
||||
template <class CONFIG>
|
||||
class MersenneField : public Field<CONFIG>
|
||||
{
|
||||
public:
|
||||
HOST_DEVICE_INLINE MersenneField(const MersenneField& other) : Field<CONFIG>(other) {}
|
||||
HOST_DEVICE_INLINE MersenneField(const uint32_t& x = 0) : Field<CONFIG>({x}) {}
|
||||
HOST_DEVICE_INLINE MersenneField(storage<CONFIG::limbs_count> x) : Field<CONFIG>{x} {}
|
||||
HOST_DEVICE_INLINE MersenneField(const Field<CONFIG>& other) : Field<CONFIG>(other) {}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField zero() { return MersenneField(CONFIG::zero); }
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField one() { return MersenneField(CONFIG::one.limbs[0]); }
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField from(uint32_t value) { return MersenneField(value); }
|
||||
|
||||
static HOST_INLINE MersenneField rand_host() { return MersenneField(Field<CONFIG>::rand_host()); }
|
||||
|
||||
static void rand_host_many(MersenneField* out, int size)
|
||||
{
|
||||
for (int i = 0; i < size; i++)
|
||||
out[i] = rand_host();
|
||||
}
|
||||
|
||||
HOST_DEVICE_INLINE MersenneField& operator=(const Field<CONFIG>& other)
|
||||
{
|
||||
if (this != &other) { Field<CONFIG>::operator=(other); }
|
||||
return *this;
|
||||
}
|
||||
|
||||
HOST_DEVICE_INLINE uint32_t get_limb() const { return this->limbs_storage.limbs[0]; }
|
||||
|
||||
// The `Wide` struct represents a redundant 32-bit form of the Mersenne Field.
|
||||
struct Wide {
|
||||
uint32_t storage;
|
||||
static constexpr HOST_DEVICE_INLINE Wide from_field(const MersenneField& xs)
|
||||
{
|
||||
Wide out{};
|
||||
out.storage = xs.get_limb();
|
||||
return out;
|
||||
}
|
||||
static constexpr HOST_DEVICE_INLINE Wide from_number(const uint32_t& xs)
|
||||
{
|
||||
Wide out{};
|
||||
out.storage = xs;
|
||||
return out;
|
||||
}
|
||||
friend HOST_DEVICE_INLINE Wide operator+(Wide xs, const Wide& ys)
|
||||
{
|
||||
uint64_t tmp = (uint64_t)xs.storage + ys.storage; // max: 2^33 - 2 = 2^32(1) + (2^32 - 2)
|
||||
tmp = ((tmp >> 32) << 1) + (uint32_t)(tmp); // 2(1)+(2^32-2) = 2^32(1)+(0)
|
||||
return from_number((uint32_t)((tmp >> 32) << 1) + (uint32_t)(tmp)); // max: 2(1) + 0 = 2
|
||||
}
|
||||
friend HOST_DEVICE_INLINE Wide operator-(Wide xs, const Wide& ys)
|
||||
{
|
||||
uint64_t tmp = CONFIG::modulus_3 + xs.storage -
|
||||
ys.storage; // max: 3(2^31-1) + 2^32-1 - 0 = 2^33 + 2^31-4 = 2^32(2) + (2^31-4)
|
||||
return from_number(((uint32_t)(tmp >> 32) << 1) + (uint32_t)(tmp)); // max: 2(2)+(2^31-4) = 2^31
|
||||
}
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE Wide neg(const Wide& xs)
|
||||
{
|
||||
uint64_t tmp = CONFIG::modulus_3 - xs.storage; // max: 3(2^31-1) - 0 = 2^32(1) + (2^31 - 3)
|
||||
return from_number(((uint32_t)(tmp >> 32) << 1) + (uint32_t)(tmp)); // max: 2(1)+(2^31-3) = 2^31 - 1
|
||||
}
|
||||
friend HOST_DEVICE_INLINE Wide operator*(Wide xs, const Wide& ys)
|
||||
{
|
||||
uint64_t t1 = (uint64_t)xs.storage * ys.storage; // max: 2^64 - 2^33+1 = 2^32(2^32 - 2) + 1
|
||||
t1 = ((t1 >> 32) << 1) + (uint32_t)(t1); // max: 2(2^32 - 2) + 1 = 2^32(1) + (2^32 - 3)
|
||||
return from_number((((uint32_t)(t1 >> 32)) << 1) + (uint32_t)(t1)); // max: 2(1) - (2^32 - 3) = 2^32 - 1
|
||||
}
|
||||
};
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField div2(const MersenneField& xs, const uint32_t& power = 1)
|
||||
{
|
||||
uint32_t t = xs.get_limb();
|
||||
return MersenneField{{((t >> power) | (t << (31 - power))) & MersenneField::get_modulus().limbs[0]}};
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField neg(const MersenneField& xs)
|
||||
{
|
||||
uint32_t t = xs.get_limb();
|
||||
return MersenneField{{t == 0 ? t : MersenneField::get_modulus().limbs[0] - t}};
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField reduce(Wide xs)
|
||||
{
|
||||
const uint32_t modulus = MersenneField::get_modulus().limbs[0];
|
||||
uint32_t tmp = (xs.storage >> 31) + (xs.storage & modulus); // max: 1 + 2^31-1 = 2^31
|
||||
tmp = (xs.storage >> 31) + (xs.storage & modulus); // max: 1 + 0 = 1
|
||||
return MersenneField{{tmp == modulus ? 0 : tmp}};
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField inverse(const MersenneField& x)
|
||||
{
|
||||
uint32_t xs = x.limbs_storage.limbs[0];
|
||||
if (xs <= 1) return xs;
|
||||
uint32_t a = 1, b = 0, y = xs, z = MersenneField::get_modulus().limbs[0], e, m = z;
|
||||
while (1) {
|
||||
#ifdef __CUDA_ARCH__
|
||||
e = __ffs(y) - 1;
|
||||
#else
|
||||
e = __builtin_ctz(y);
|
||||
#endif
|
||||
y >>= e;
|
||||
if (a >= m) {
|
||||
a = (a & m) + (a >> 31);
|
||||
if (a == m) a = 0;
|
||||
}
|
||||
a = ((a >> e) | (a << (31 - e))) & m;
|
||||
if (y == 1) return a;
|
||||
e = a + b;
|
||||
b = a;
|
||||
a = e;
|
||||
e = y + z;
|
||||
z = y;
|
||||
y = e;
|
||||
}
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE MersenneField operator+(MersenneField xs, const MersenneField& ys)
|
||||
{
|
||||
uint32_t m = MersenneField::get_modulus().limbs[0];
|
||||
uint32_t t = xs.get_limb() + ys.get_limb();
|
||||
if (t > m) t = (t & m) + (t >> 31);
|
||||
if (t == m) t = 0;
|
||||
return MersenneField{{t}};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE MersenneField operator-(MersenneField xs, const MersenneField& ys)
|
||||
{
|
||||
return xs + neg(ys);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE MersenneField operator*(MersenneField xs, const MersenneField& ys)
|
||||
{
|
||||
uint64_t x = (uint64_t)(xs.get_limb()) * ys.get_limb();
|
||||
uint32_t t = ((x >> 31) + (x & MersenneField::get_modulus().limbs[0]));
|
||||
uint32_t m = MersenneField::get_modulus().limbs[0];
|
||||
if (t > m) t = (t & m) + (t >> 31);
|
||||
if (t > m) t = (t & m) + (t >> 31);
|
||||
if (t == m) t = 0;
|
||||
return MersenneField{{t}};
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE Wide mul_wide(const MersenneField& xs, const MersenneField& ys)
|
||||
{
|
||||
return Wide::from_field(xs) * Wide::from_field(ys);
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE Wide sqr_wide(const MersenneField& xs)
|
||||
{
|
||||
return mul_wide(xs, xs);
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField sqr(const MersenneField& xs) { return xs * xs; }
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField to_montgomery(const MersenneField& xs) { return xs; }
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField from_montgomery(const MersenneField& xs) { return xs; }
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField pow(MersenneField base, int exp)
|
||||
{
|
||||
MersenneField res = one();
|
||||
while (exp > 0) {
|
||||
if (exp & 1) res = res * base;
|
||||
base = base * base;
|
||||
exp >>= 1;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
};
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 1;
|
||||
static constexpr unsigned omegas_count = 1;
|
||||
static constexpr unsigned modulus_bit_count = 31;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0x7fffffff};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xfffffffe};
|
||||
static constexpr uint64_t modulus_3 = 0x17ffffffd;
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xfffffffc};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0x87ffffff};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {0x7fffffff, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {0x00000001, 0x3fffffff};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {0x00000002, 0x7ffffffe};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {0x00000004, 0xfffffffc};
|
||||
|
||||
static constexpr storage<limbs_count> m = {0x80000001};
|
||||
static constexpr storage<limbs_count> one = {0x00000001};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0x00000001};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x00000001};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {{{0x7ffffffe}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {{{0x7ffffffe}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {{{0x40000000}}};
|
||||
|
||||
// nonresidue to generate the extension field
|
||||
static constexpr uint32_t nonresidue = 11;
|
||||
// true if nonresidue is negative.
|
||||
static constexpr bool nonresidue_is_negative = false;
|
||||
};
|
||||
|
||||
/**
|
||||
* Scalar field. Is always a prime field.
|
||||
*/
|
||||
typedef MersenneField<fp_config> scalar_t;
|
||||
|
||||
/**
|
||||
* Extension field of `scalar_t` enabled if `-DEXT_FIELD` env variable is.
|
||||
*/
|
||||
typedef ExtensionField<fp_config, scalar_t> extension_t;
|
||||
} // namespace m31
|
||||
@@ -2,626 +2,18 @@
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
// modulus = 3618502788666131213697322783095070105623107215331596699973092056135872020481 (2^251+17*2^192+1)
|
||||
namespace stark252 {
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned modulus_bit_count = 252;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr unsigned omegas_count = 192;
|
||||
static constexpr storage<8> modulus = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000011, 0x08000000};
|
||||
PARAMS(modulus)
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000011, 0x08000000};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000022, 0x10000000};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000044, 0x20000000};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffee, 0xf7ffffff};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000011, 0x08000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000022, 0x10000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000121, 0x10000000, 0x00000001, 0x00400000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000044, 0x20000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000242, 0x20000000, 0x00000002, 0x00800000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000088, 0x40000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000484, 0x40000000, 0x00000004, 0x01000000};
|
||||
static constexpr storage<limbs_count> m = {0x8c81fffb, 0x00000002, 0xfeccf000, 0xffffffff,
|
||||
0x0000907f, 0x00000000, 0xffffffbc, 0x1fffffff};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xffffffe1, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xfffffdf0, 0x07ffffff};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000121, 0x10000000, 0x00000001, 0x00400000};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000011, 0x08000000},
|
||||
{0xf41337e3, 0x2a616626, 0xac8320da, 0xc5268e56, 0x4329f8c7, 0x53312066, 0x29a2995b, 0x06250239},
|
||||
{0xee6feebb, 0x3ada5e1d, 0xe4412e87, 0x98c62155, 0x2f9c676e, 0xc90adb1e, 0x0de874d9, 0x063365fe},
|
||||
{0x6021e539, 0x8337c45f, 0xbbf30245, 0xb0bdf467, 0x514425f3, 0x4537602d, 0x88826aba, 0x05ec467b},
|
||||
{0x9b48a8ab, 0x2225638f, 0x1a8e7981, 0x26da375d, 0xce6246af, 0xfcdca219, 0x9ecd5c85, 0x0789ad45},
|
||||
{0xb2703765, 0xd6871506, 0xf9e225ec, 0xd09bd064, 0x10826800, 0x5e869a07, 0xe82b2bb5, 0x0128f0fe},
|
||||
{0xdd4af20f, 0xfdab65db, 0x56f9ddbc, 0xefa66822, 0x1b03a097, 0x587781ce, 0x9556f9b8, 0x000fcad1},
|
||||
{0xff0cb347, 0x9f1bc8d7, 0xd0e87cd5, 0xc4d78992, 0xdd51a717, 0xbc7924d5, 0xfd121b58, 0x00c92ecb},
|
||||
{0xc13a1d0b, 0xcc4074a0, 0xe3bc8e32, 0xa1f811a9, 0x6d4b9bd4, 0x0234b46e, 0x7880b4dc, 0x011d07d9},
|
||||
{0xec89c4f1, 0xa206c054, 0xdc125289, 0x653d9e35, 0x711825f5, 0x72406af6, 0x46a03edd, 0x0659d839},
|
||||
{0x0fa30710, 0x45391692, 0x11b54c6c, 0xd439f572, 0xa3492c1e, 0xed5ebbf4, 0xb5d9a6de, 0x010f4d91},
|
||||
{0x7afd187f, 0x9273dbbc, 0x91ee171f, 0xdb5375bc, 0x6749ae3d, 0xc061f425, 0x6ec477cf, 0x003d14df},
|
||||
{0x3112b02d, 0x8171e1da, 0xadf9bf78, 0x5c4564eb, 0x5689b232, 0x68c34184, 0x6538624f, 0x0363d70a},
|
||||
{0x606082e1, 0x3e5a42f0, 0x76fc314a, 0x5edd09f0, 0x0f673d7c, 0xd650df25, 0x34832dba, 0x0393a32b},
|
||||
{0x13a77460, 0xe3efc75d, 0x62ef8a01, 0x93898bc8, 0x8bdbd9b3, 0x1c3a6e5c, 0x611b7206, 0x034b5d5d},
|
||||
{0x309d9da9, 0x80ee9837, 0xf51eddbc, 0x1646d633, 0x4901fab8, 0xb9d2cd85, 0x9978ee09, 0x01eb6d84},
|
||||
{0x2755bfac, 0xa7b1f98c, 0xeb7aa1c1, 0x9ec8116c, 0x3109e611, 0x0eeadedd, 0xc9761a8a, 0x06a6f98d},
|
||||
{0x9745a046, 0xce7b0a8b, 0xe411ee63, 0x7ff61841, 0x635f8799, 0x34f67453, 0xef852560, 0x04768803},
|
||||
{0xbffaa9db, 0x1727fce0, 0xf973dc22, 0x858f5918, 0x223f6558, 0x3e277fa0, 0xf71614e3, 0x02d25658},
|
||||
{0x8574e81f, 0xe3d47b99, 0x7fc4c648, 0xc727c9af, 0xee93dc85, 0x581d81ca, 0xca8a00d9, 0x0594beaf},
|
||||
{0x0e5ffcb8, 0x00654744, 0xe7c1b2fd, 0x030530a6, 0xecbf157b, 0x27e46d76, 0xbeea04f1, 0x01f4c2bf},
|
||||
{0x3e3a2f4b, 0xead33145, 0xd6482f17, 0xd841544d, 0x8d24a344, 0x9822fb10, 0x31eeac7c, 0x03e43835},
|
||||
{0xb40bdbe8, 0x01af11c3, 0xb32a3b23, 0xd7c9c0a1, 0xcd0be360, 0x81cb2e43, 0xafb3df1a, 0x01054544},
|
||||
{0x77156db2, 0xf6b13488, 0xddc0f211, 0x1ad6f3be, 0xd664f4da, 0xe643d3ea, 0x174a8e80, 0x071a47b8},
|
||||
{0x4ca88ffc, 0xb86b03a4, 0x8ef9a25a, 0x6e3398e6, 0xf5fa4665, 0xce9a0d37, 0x5c437763, 0x06e8e769},
|
||||
{0x4586dbc3, 0x32609f1d, 0xaa2da684, 0x03148f22, 0x4795d346, 0xa679e36b, 0x9e51225c, 0x03d8d2c7},
|
||||
{0xea5f81cf, 0xeac5be9e, 0x64c12e72, 0x102e16b2, 0xfee282e4, 0xce0bc0d9, 0xa93b28f3, 0x01f05206},
|
||||
{0xbb6422f9, 0x258e96d2, 0x617c5468, 0x751615d8, 0x6056f032, 0x27145cb6, 0x81c06d84, 0x057a7971},
|
||||
{0xb030713c, 0xf42231bb, 0x3a96c59e, 0xae9c3f9a, 0xf1ee840c, 0x5397e8e2, 0xf2b87657, 0x05e7deca},
|
||||
{0xf81f58b4, 0x209745aa, 0x91af248d, 0x74a64310, 0xc04b00b7, 0xe566a8e1, 0x80fb4cea, 0x022bde40},
|
||||
{0x5de74517, 0x8265b62b, 0xb9b9f2c9, 0x6a788149, 0xa9565d98, 0x6fec2239, 0x573f0c28, 0x060ac0c4},
|
||||
{0xd3ce8992, 0xc129d0f1, 0x81c43de5, 0x719252eb, 0x48221e1a, 0xfea566de, 0x0be8ced2, 0x050732ed},
|
||||
{0x2216f1c8, 0x9aae0db3, 0xd7220015, 0x95e231ac, 0x6340df6f, 0xbd6ae160, 0x16a6e39c, 0x0166c8e2},
|
||||
{0x76b0a92e, 0x3ccd9d2b, 0x7d671a9d, 0x1feb39d7, 0x2109fd56, 0x3c49a630, 0x5d4ec292, 0x07badc4b},
|
||||
{0x5dd8c4c3, 0x081c3166, 0xec14ba21, 0x9dca12d8, 0xcf93b2e5, 0xf58069e2, 0x571ddc34, 0x02399005},
|
||||
{0x08a616fc, 0x65a19cf4, 0x8aea6ff7, 0x860d442c, 0x6896a559, 0x4f24ab19, 0x3d7f5ae6, 0x0685db92},
|
||||
{0x622478c4, 0x051093f0, 0x3fab8962, 0x5c200627, 0x21254c39, 0x2aa7ae1b, 0x7b116fb9, 0x0100fff9},
|
||||
{0x00637050, 0x2693b834, 0x22440235, 0x3fef7c1b, 0x3481c4fe, 0x31150ac1, 0xf261b6de, 0x0772cb7a},
|
||||
{0xd990d491, 0x6966804c, 0xc7505f35, 0x46aba1bc, 0xaceeb7f7, 0x4f696cba, 0x6474b8f0, 0x02b73cad},
|
||||
{0xf39cd3e8, 0x7d13e948, 0x62a1db76, 0xd5c33593, 0x4d1be159, 0x7fd3b59b, 0x3676644e, 0x066d3f61},
|
||||
{0xb3bd8b7e, 0x5a896ef3, 0xba5762ab, 0x2319450a, 0x1a545f8b, 0x226f0a07, 0x55446d35, 0x02760973},
|
||||
{0x140e5623, 0x38eaa186, 0x94be15ba, 0x5a48d469, 0xad75d32a, 0xe4f1f15b, 0x2f14e2f1, 0x039ccdaa},
|
||||
{0xe6fcfdb2, 0xad7108d3, 0x9c9f7f04, 0xfadfc050, 0x9df95366, 0xdbb20071, 0xe555c739, 0x02c4d3fa},
|
||||
{0xc3111bcb, 0xb640956f, 0xbb11fb86, 0xcd942bbd, 0xa3db81cd, 0xa4b4eb09, 0x684fdb65, 0x041ed5ed},
|
||||
{0xdd5ca525, 0x462b41fa, 0x153c3d28, 0xbcc17ccd, 0x6b06db5c, 0x8a81d137, 0x4a050358, 0x05f5cf39},
|
||||
{0xcc60fb85, 0x374012a6, 0x34d1905d, 0x978f9785, 0x4e17ff38, 0x713383d4, 0x1055c25d, 0x07f3796f},
|
||||
{0x0643771f, 0x852ba56e, 0x86781a31, 0xadfa956c, 0xb26a3811, 0x2ee2fccf, 0xdbd56ba7, 0x009214ce},
|
||||
{0x68bc148c, 0xe2bf6c4b, 0x01c203ce, 0xd38dbf38, 0x97923b55, 0x27f73df4, 0x5081f7d9, 0x030a2e81},
|
||||
{0xf11422a0, 0xbe23b78f, 0x99cdc2e0, 0xd4f3510d, 0xaa13ffe5, 0xcb05b3da, 0xc724e0c5, 0x028d98a5},
|
||||
{0x96934000, 0x15277271, 0x588c8a51, 0x8013dd5e, 0x9ed55af8, 0x77772f7c, 0x03549e60, 0x020895f8},
|
||||
{0x34db29f8, 0xc0cc8556, 0x67455b5d, 0x5582a9ff, 0x8a9a38b5, 0x12862a43, 0xa59fd242, 0x059655bc},
|
||||
{0x94ceaf98, 0x39bc5131, 0xc71ccc0d, 0x99f4d1a0, 0x54acb87c, 0xc565794d, 0xc33590ef, 0x0593fcef},
|
||||
{0xe97bf51c, 0xa2922d09, 0x3200d367, 0xdbb866a2, 0x4ad9302d, 0x05849ed8, 0xdf93f2b5, 0x000c447e},
|
||||
{0x850fb317, 0x2755d6c2, 0xd45eb3f5, 0x36feeeea, 0xdfbc1d97, 0x4f4471d7, 0x4e3003f8, 0x07ec8926},
|
||||
{0xb6a791f1, 0x38b8dc2a, 0x27a1bbb1, 0x79d6de48, 0xcad54cf2, 0x78c40b06, 0xa43bc898, 0x036dd150},
|
||||
{0x1cc4133c, 0xefa72477, 0x477d39be, 0x5327d617, 0x2c5db3a4, 0xfd1de1f9, 0xc9a18a1c, 0x0147819b},
|
||||
{0xf8133966, 0x275e6b02, 0x87969b48, 0x82bc79b9, 0x5d1e2f0e, 0x85b1f9bd, 0xc819531b, 0x00f9ea29},
|
||||
{0x120edfab, 0x9e0392a5, 0xe3681a15, 0x07403ad4, 0x8a1c3817, 0xa8d469d8, 0x89f15c6f, 0x0395e7fc},
|
||||
{0x641826ac, 0x7f405a9f, 0x6861e2ce, 0xa566e755, 0xba82a050, 0x8a3a08ba, 0xea63598d, 0x071dd923},
|
||||
{0x5f65c188, 0x1d2b7538, 0xd6fc9625, 0xcb704d0f, 0xf59deccc, 0x18729111, 0x52fe1979, 0x07595020},
|
||||
{0x8a08756f, 0x0175aa1c, 0x7fa7c6c4, 0x9a76a312, 0x6e93f6f3, 0x0bfa523a, 0x258c2f23, 0x03d70de4},
|
||||
{0x8229376d, 0x8a0b9d02, 0x2c65c94e, 0x08421430, 0xd34b0aa6, 0x1160b441, 0xbbfb9491, 0x03b9eb75},
|
||||
{0x827caf53, 0x91874856, 0x37e8a006, 0xdfdcae7a, 0x04e3af6b, 0x6dcfc3f2, 0xba66ff37, 0x0592823d},
|
||||
{0x72fb8b0d, 0xb0a6628d, 0xa72b1f03, 0x7d3eef8b, 0x8dd54dbe, 0x5be965ba, 0x96d1fe4c, 0x0114a278},
|
||||
{0x06051d55, 0x0256d8e6, 0xb9fa9dcc, 0xbf152353, 0x44140d6e, 0x6ef2c68c, 0xc9c0fea6, 0x015f291a},
|
||||
{0xed992efc, 0xa1826724, 0x771da991, 0x9a58fd99, 0xd0b370a1, 0xce51a153, 0x826df846, 0x03c53bf5},
|
||||
{0xcc7bf8c3, 0x3909aad7, 0xb08ddfa2, 0xd408ae7d, 0xff94d9fc, 0x2e9ab5d6, 0xf11cbcf6, 0x0020a1b2},
|
||||
{0x3e257b43, 0x448fff07, 0x5fd9edca, 0x00f4a128, 0x7b429f71, 0x6f8987e3, 0x0fc8b522, 0x013336c1},
|
||||
{0x062bd860, 0xef78ac4c, 0xf5d787d2, 0x6539ee52, 0xbb65576e, 0x113b6071, 0x9f3d7f85, 0x0160e952},
|
||||
{0xf966d24e, 0x0c4e7c07, 0x318277e8, 0x011853d8, 0x7c287f58, 0x93bae650, 0xf64289f7, 0x00b974a1},
|
||||
{0x30408cb9, 0x66d19420, 0x0430b017, 0x709ca6c6, 0x23d95951, 0xb174ad46, 0x111f4192, 0x030762f8},
|
||||
{0xf246c901, 0xb9d70015, 0x57a1cdec, 0xd3616cb1, 0x0d732fdb, 0x61aab25e, 0x12d620d8, 0x0712858b},
|
||||
{0x16334e1a, 0x8ec7e113, 0xa96aeeab, 0x0021a55b, 0xfd639175, 0x8f4c1366, 0x69bc866a, 0x07acdde9},
|
||||
{0x23088fc7, 0x1fb24e5e, 0x92a88089, 0xcacd65df, 0x17343c48, 0x103ec3c8, 0xc387a3b5, 0x03d296b9},
|
||||
{0xcd9fedee, 0xae703c5b, 0x7853b30d, 0xd0c3e0c6, 0x12abaef5, 0xc1e326b3, 0x5d57bb23, 0x04f42d7f},
|
||||
{0x1824b92c, 0x19cd1b4e, 0x81ebc117, 0xc5daaff4, 0xb8183a1d, 0xeeedaa59, 0xe28baf8a, 0x069d8f0c},
|
||||
{0x9dc50729, 0x9733e8df, 0xf1b9f411, 0xd7e0dbb9, 0x50edf7ea, 0x59e4dbd2, 0x4059cb5f, 0x002259fe},
|
||||
{0xb79a92b1, 0x5e3197fc, 0x59086db1, 0xbfddf5c5, 0xdbea4a69, 0x234d8639, 0x4d0a367d, 0x05dd79b0},
|
||||
{0xa86eec0c, 0x8cc1d845, 0x573b44d7, 0x3cac8839, 0x7b0de880, 0x8b8d8735, 0x68c99722, 0x01c5ef12},
|
||||
{0xc2ba0f23, 0x12680395, 0x471f947e, 0xd43bcf85, 0xcc9d9b24, 0x19935b68, 0x108eec6a, 0x06263e1e},
|
||||
{0x5b7be972, 0x29617bad, 0xc55b1b68, 0x0ab73eef, 0x2544381e, 0x07f12359, 0x63a080a0, 0x0161444d},
|
||||
{0x312f9080, 0x07a4b921, 0x2f530413, 0x64c25a07, 0x7d71ca2f, 0x3f6903d7, 0x04838ba1, 0x06917cab},
|
||||
{0x10bdb6cc, 0xec7cfc1f, 0x3bcf85c7, 0x7046910d, 0x7bc3ff5f, 0x7ef09e22, 0x385306d4, 0x004b0b60},
|
||||
{0x3a41158a, 0x82d06d78, 0xaa690d1f, 0x37c4a361, 0x7117c44a, 0x700766e1, 0xab40d7e4, 0x031261d0},
|
||||
{0x91b88258, 0x384c5e8b, 0x009b84dc, 0xd777abd5, 0xe7eed224, 0x02102b55, 0xdbefe5e9, 0x03b22830},
|
||||
{0x8770a4be, 0xec982f60, 0x961f56ad, 0x4b92533d, 0xf428c4b9, 0x7df85fbb, 0x2d9291a4, 0x057e4876},
|
||||
{0xf4910a60, 0x6ace9477, 0x9fc63b7f, 0xdb5a705f, 0x72328369, 0x4cc157b4, 0xc282db6f, 0x05b8acbc},
|
||||
{0x57269216, 0x4c69edd9, 0xbfee24ac, 0xd04f1eeb, 0x2a069b18, 0xacda8418, 0x5990b523, 0x03761a4f},
|
||||
{0xc608d246, 0x7f2e2048, 0x4664959b, 0xd4f52ed2, 0x11c1d565, 0x354e3bf7, 0x457eabd3, 0x0156d837},
|
||||
{0xd455f483, 0xea8cbefd, 0x5d940684, 0x33cd5725, 0x8091a287, 0x2d89a777, 0x939b3ef3, 0x06159e4a},
|
||||
{0x4fa405aa, 0xe43439f1, 0xdbe5763d, 0xa258cfc7, 0x78d7b607, 0x9491173a, 0x9ad23eac, 0x01775d66},
|
||||
{0xd772d637, 0x2413e92c, 0x5eac4588, 0x22c99c9f, 0x71a0cdd2, 0xa2bd1d06, 0xfdd73a36, 0x05e88acb},
|
||||
{0xb2bfa1ad, 0x68886b35, 0x35d2dfb6, 0x7a969b62, 0x9767a44a, 0x359ddb45, 0x52e5da6d, 0x00f1a46e},
|
||||
{0x1c5a4861, 0x4ef9fe94, 0x1c841a89, 0x1540cf67, 0xa9bed4f5, 0x8b51336f, 0xf63c32ab, 0x0240fc41},
|
||||
{0x87086e50, 0x7f5c626d, 0x049c46e2, 0x38ec0386, 0x0c597ea7, 0x30b003fd, 0x6660a912, 0x07a8faa1},
|
||||
{0x7dac5d19, 0x2810d2b4, 0x80339f39, 0x040470c4, 0xc946ab30, 0x30d97769, 0x52667151, 0x019fa1f9},
|
||||
{0x5e7c57a2, 0x00e13c8e, 0x2a0fb7bd, 0x95490ca0, 0x08451e35, 0x6af2b76d, 0xcf78c579, 0x04c3a3a1},
|
||||
{0x55e39071, 0xa848b2f2, 0xf132ce21, 0x6831da1d, 0xe080e2ec, 0x439bdda4, 0xadd19a7d, 0x06680f09},
|
||||
{0x6be27786, 0xfebd2a8b, 0x093a5a7f, 0x2cdd8f78, 0xdcb004b3, 0xbc0746a1, 0xd12450ed, 0x005f950a},
|
||||
{0x39759f39, 0xe1462ca6, 0x7bbe087d, 0x0c37dca2, 0x0c8661cb, 0x198de347, 0x7e531b52, 0x03602655},
|
||||
{0x66d7eb25, 0xaf24ead2, 0x5ee6eb03, 0x27cea560, 0x4f6267c7, 0xe9aa6d50, 0xe5dd28e0, 0x00c962b1},
|
||||
{0xb11706c9, 0x3c3407a5, 0xcf0e1b88, 0x44370686, 0x9fbda5e3, 0x5d0e7af0, 0x41cf0a6b, 0x010d235f},
|
||||
{0x358cfcc2, 0x1fbc42a3, 0xc78f7dac, 0x5a2e6ea2, 0xa12773f2, 0x33e089ca, 0xed7788c1, 0x04bef156},
|
||||
{0xbea42f88, 0xdb150649, 0x5f3fb72a, 0x71329f69, 0x86b82de7, 0x7aa46ad0, 0xc6093912, 0x07913b17},
|
||||
{0xb3b67067, 0xb2b074ae, 0xc55f4455, 0x4f17674d, 0xdeb0740d, 0x9a112816, 0x316cc0d3, 0x06bd0cde},
|
||||
{0x1a264ab3, 0x962ceb6b, 0xd99f7159, 0xd5930255, 0x24a4096e, 0x7db961b0, 0x3e50dfed, 0x050c8e5c},
|
||||
{0x443af109, 0xc3eebe54, 0x86946633, 0x2ca03fcb, 0x04badff6, 0x6e6eef04, 0x82210754, 0x05d92ab7},
|
||||
{0xa5c0dca4, 0xcbadd8ad, 0x5ac103a0, 0x4cf688cf, 0x26e5d435, 0x571dbdb9, 0x220fc7db, 0x074ffc4d},
|
||||
{0x88740c3e, 0x70b80432, 0x03821aa8, 0x4a959d50, 0xe4df06d8, 0x3eb8c3a0, 0xcac57496, 0x025a425b},
|
||||
{0x55205413, 0xdcadfd29, 0x90b17b01, 0xda7456d2, 0x73696a28, 0x437c2fda, 0x329f6855, 0x00a8a188},
|
||||
{0xa828431e, 0x3cde2cdd, 0x9ed29340, 0x60e6c362, 0x7c13e145, 0xef00dfa9, 0xba288c0b, 0x04159bec},
|
||||
{0x9065f8ee, 0x41d351cd, 0xa4845868, 0x4e2e298f, 0xbdb3834a, 0xbcba6ac1, 0xea85f2ec, 0x042c8871},
|
||||
{0x1fda880f, 0xc4dc0d20, 0x26fc2d5c, 0x4f0f9dc4, 0x86839de7, 0x2c555343, 0xf698dd8f, 0x04d12da8},
|
||||
{0x21bd655a, 0x3a6299bd, 0x8cfd772f, 0x2e4aea22, 0xd2c2590d, 0x09716ad9, 0xb298587d, 0x053b143c},
|
||||
{0xa95e3cbf, 0xd35f3e32, 0x04eac3cf, 0xe380dee7, 0x0f7e3e6b, 0x27e6570a, 0xbed46774, 0x008cd288},
|
||||
{0x9583f023, 0xe42676b0, 0x75cfaa7e, 0x39d57dd6, 0x4f0bb727, 0x10d4a8d0, 0x27c81bdd, 0x016b03c9},
|
||||
{0x4decc603, 0x89b394f7, 0xd24690f4, 0xd7322ee9, 0x947a00fd, 0xbbc12961, 0x82e8fa75, 0x00886d23},
|
||||
{0xeb0faad4, 0x7b48a33b, 0x60e0b0c8, 0x4c11ef26, 0x36f0f791, 0x4163a401, 0xa4074faf, 0x07986fea},
|
||||
{0x31d9587e, 0x96044919, 0x9049fd2d, 0xb1cab341, 0x9c0eea09, 0xf28c83c9, 0x5c6620aa, 0x033b74dd},
|
||||
{0x13ee028c, 0xde558d16, 0x5d4233b0, 0x4dcf3932, 0x2e422803, 0x7bd46887, 0xe1261bff, 0x04b4757d},
|
||||
{0xd48e9b00, 0x6c80848f, 0x10b6a121, 0x937c1e6e, 0xe9f2008c, 0x7782f8b8, 0x2bc7171c, 0x00217358},
|
||||
{0x324228d8, 0xba523265, 0x682ee17c, 0x4ebe5506, 0x3be009f9, 0x6c646fe8, 0x8594b924, 0x046de7bc},
|
||||
{0x3b50645a, 0x270aa33a, 0x2a9c6282, 0x28fd23fd, 0xcfe96515, 0x5b2fa771, 0x3f812377, 0x063039de},
|
||||
{0xaba4060a, 0xa1da52b0, 0x0374be67, 0x7f191fd6, 0x0d7d2126, 0x14c64d05, 0xf7f77381, 0x00419cb7},
|
||||
{0xe4b19319, 0x07eda692, 0x0fef654e, 0x6190d3f6, 0x0b21ca7e, 0x893b0916, 0x073c48b4, 0x0367a3c7},
|
||||
{0xc520e3ea, 0x8fd405b2, 0x487e93c9, 0x73b4f714, 0xd5142cff, 0x70b7ee88, 0xa320eca2, 0x058fb800},
|
||||
{0x72ef3623, 0x3b5a8740, 0xaff370fd, 0xbff4af42, 0xe338258e, 0x64c137b0, 0xc7afafca, 0x05ac9917},
|
||||
{0x82ccc89a, 0x99c46a0d, 0x9ff87868, 0x05ae3209, 0xa489481f, 0x6249b2a4, 0xbaead348, 0x0056c235},
|
||||
{0xba0ea95e, 0x5a0640f3, 0xc03af976, 0x518db5cd, 0x5a250a06, 0x1c3223aa, 0xbc3442eb, 0x0397b942},
|
||||
{0xacf14a4f, 0x164f0705, 0x33eb6c0e, 0x386c2325, 0xd7264573, 0xdfaceff6, 0xd1e22f80, 0x00e94509},
|
||||
{0x9ff51bc7, 0x8964ee48, 0x57bbca04, 0x3e0f5037, 0x6510630c, 0xe78d6c8d, 0xdf0a61c1, 0x041d6351},
|
||||
{0x45aa1b58, 0x47892f3b, 0x915c1c70, 0x5a1787ba, 0x67f20d25, 0xbaa23359, 0x0c4bc4be, 0x00e1919f},
|
||||
{0xb9975332, 0x2a87c37a, 0xcdecebc9, 0x95db523f, 0x1d0db226, 0x703949ee, 0x4c3842dd, 0x03152c1d},
|
||||
{0xecfb6f72, 0x0eff7e6a, 0x9493a628, 0xb3a83455, 0xd596cd51, 0xced58dd1, 0x25ee51ff, 0x033dee78},
|
||||
{0x72a30547, 0x1f4047ca, 0xd40b6d0f, 0x9feefa06, 0x94db1b38, 0x836ffd80, 0xa0992ed5, 0x037c79f6},
|
||||
{0xceb3dffd, 0x7ffa095d, 0x768e2cb3, 0x23097a65, 0x373f6222, 0xd228b1f9, 0xc57feea2, 0x06309a6b},
|
||||
{0xecd4c6f7, 0x7a5bead4, 0x7e70f7de, 0xab92043c, 0x220db8d8, 0xf78f890e, 0x2865a07e, 0x052eeb98},
|
||||
{0xdf253531, 0x8e9a6336, 0xbafa937b, 0xb24b664a, 0x303b1f5a, 0xc89f660e, 0x876bd8c7, 0x07ea9749},
|
||||
{0x1d4c3fec, 0xd958e726, 0x06fbef31, 0xa5eb368f, 0xba6a027d, 0x0c911679, 0x5f80f992, 0x06321b51},
|
||||
{0x046b49b2, 0x3ca61d9e, 0x6aa9c29a, 0x616a47d6, 0x9e9462dc, 0x27a7ffeb, 0x8971b70e, 0x0794ed38},
|
||||
{0x9f47496f, 0xdb259a57, 0xa6b0481c, 0x7f3e3f90, 0x4afab47a, 0x76f42726, 0xc5a79505, 0x07b9da96},
|
||||
{0x57e7aeed, 0x908e6450, 0x81648127, 0xe86db2fb, 0x8dd76882, 0x53f3c573, 0x72327da6, 0x02b37324},
|
||||
{0x73a220ec, 0x82a941c9, 0x7f25beea, 0xb4cbecb7, 0xbfb061d6, 0x746ded71, 0x641b3f3d, 0x00f7af27},
|
||||
{0xcbd4ba67, 0x69b8f4df, 0x3d526981, 0x5ee3ac6f, 0x145cef8c, 0x9372af4e, 0x72a31ef1, 0x05cc1cc6},
|
||||
{0x62d1ba57, 0xce898b0d, 0xee3fa47e, 0x86ba0504, 0x4395b70d, 0xc68233b1, 0x80eb8d60, 0x024cfa58},
|
||||
{0x74d51c41, 0x8fa83850, 0x60f8f9da, 0x5824a285, 0xaf1bea48, 0xa7a2067e, 0x5455acc3, 0x04ba49f2},
|
||||
{0x324c6039, 0x0a1e223e, 0x7b18a9d0, 0x28312228, 0x88b6ecda, 0xb60c1f93, 0x687ba365, 0x053097d8},
|
||||
{0xa7dae551, 0x5604b398, 0xe2e11609, 0x51f02e33, 0xe58e2094, 0x0b51a085, 0x3a3ecc28, 0x078679d6},
|
||||
{0x92d52444, 0xe24b5528, 0x33d0fa70, 0xf77e35ad, 0x9bcbfb57, 0x8af5a7b7, 0x022748d2, 0x015c5f15},
|
||||
{0xc993b168, 0xc002185c, 0x293ad856, 0x5586addb, 0x8ec50726, 0x69c1bfcf, 0x5fd97ea1, 0x00d514fc},
|
||||
{0x8866c747, 0x52d7a9a2, 0x01d6ee05, 0x9bd77465, 0xc3a87a88, 0x576adf96, 0xfa69f0ec, 0x0693e89a},
|
||||
{0x05903be3, 0xcfe50d90, 0xcf739179, 0xbe651dd1, 0x2ae70678, 0xba80ffda, 0xb55b06cc, 0x051dbe40},
|
||||
{0x5585a6f0, 0x4adb5947, 0x9fa37e68, 0x14634b99, 0xa2a910a8, 0x27da5fbf, 0xa99c704d, 0x022a91ce},
|
||||
{0xe2ddaacd, 0xfabab7b8, 0x60cf9603, 0x1edf6a83, 0xbfadddd3, 0x20b04218, 0xa81dbffa, 0x03e0ddb6},
|
||||
{0xda25c9fd, 0xf9c1e3a3, 0xac57ece3, 0x41ff4e1e, 0xdd684055, 0x9ba50868, 0x46d8156a, 0x01b30314},
|
||||
{0xab76a462, 0x30e067cc, 0x08f1b99b, 0x2d84c4c2, 0x73edc56f, 0x6b399ae0, 0x62cfacb2, 0x02f187e1},
|
||||
{0x34fc5356, 0xb085758e, 0xf805fedf, 0xbafe9a1c, 0x95272d01, 0x0bcf423c, 0x1feca651, 0x01df4a81},
|
||||
{0x4c264e97, 0xd3bd9833, 0xc08b1798, 0xc0b192be, 0xdc3ed49e, 0x42724e80, 0xbaee9a58, 0x04100303},
|
||||
{0xe49749c9, 0xb653c919, 0x09f8e2fc, 0x07dbe557, 0xca71e551, 0xbb172d28, 0x7989c8fd, 0x07f5f801},
|
||||
{0xdf1d9004, 0x9412a9f3, 0xbe90d67e, 0xddcf6d66, 0x4692f803, 0x1dbfd679, 0x524c2944, 0x04f4fae1},
|
||||
{0x5707d134, 0xd413afdf, 0x887fd7e9, 0xf8a339cf, 0x84883580, 0xf74544f4, 0x851739e0, 0x0554f72a},
|
||||
{0x59824907, 0xe3827564, 0x421182c9, 0x352eab2a, 0x8f8530f2, 0x19138257, 0x20275950, 0x04e3bf44},
|
||||
{0x33f928b7, 0xef7660f9, 0xf5952362, 0xb7cb0619, 0xf17eb8d7, 0x5b24913b, 0x8e8b8082, 0x00f4804c},
|
||||
{0x5bd84f3e, 0xe7020613, 0x736a1659, 0x7ee777e1, 0x0795844b, 0x34ca7cb6, 0x7503ddc3, 0x07ce12e4},
|
||||
{0x6d8408a5, 0xbbbafb3f, 0x519dadca, 0xe0f02915, 0x0670f5d4, 0x5acba199, 0x4a93340f, 0x0056db45},
|
||||
{0xe404f6c5, 0x73f8a435, 0x01731858, 0x68cd3f7a, 0xd01f3de9, 0x214d3134, 0xd5d75a88, 0x05fb76be},
|
||||
{0xf976eb41, 0x3a66ad86, 0xcd08787a, 0x6401b6d3, 0x7d1e82a8, 0x575950f3, 0x55ee9d49, 0x00e34b33},
|
||||
{0x0cc5cbf4, 0xbff2f4e6, 0xec205dcd, 0x5a6b430d, 0xc94862af, 0xa8114ab3, 0x2fe8be1f, 0x0247ecf5},
|
||||
{0x8b98bf40, 0xded3bc57, 0xe26b66b3, 0xb658c8c4, 0x8d4220db, 0x8bd91c55, 0x94d2adea, 0x00d109f2},
|
||||
{0xedeaec42, 0x0fbfd336, 0x5d407ae8, 0xd94f928d, 0x727e74b5, 0xe5e4a16b, 0xc8c22dd8, 0x06a550df},
|
||||
{0x135e0ee9, 0xe378a012, 0x856a1aef, 0x5be86512, 0xd8febe77, 0x7de04ce2, 0xea43d59b, 0x03ddeed6},
|
||||
{0x005a1d86, 0xc04dc48c, 0x6f29053d, 0x64f4bbd2, 0x9be0aef5, 0x10b1b3db, 0xcc625a0b, 0x03745ca5},
|
||||
{0x1f4f0e85, 0x6c72bd40, 0xc2069cba, 0x4234afd0, 0xb99395f4, 0xc25b262f, 0xae0874e2, 0x0605f6a2},
|
||||
{0xdd756b6d, 0x9513e0d4, 0xf0c137cd, 0x5127a167, 0x7f01c538, 0x1a12a425, 0x00a4483b, 0x068b3aaf},
|
||||
{0x79bc6c86, 0x7a5b3e70, 0x375dc240, 0x5a337909, 0xe111d6ce, 0x46d6fe3c, 0x2ff2ca50, 0x02708b05},
|
||||
{0x1524ad8c, 0x1181eb95, 0x52294490, 0xd0744ddc, 0x848605cf, 0x88ed5b7b, 0xb478c12a, 0x04b9cb49},
|
||||
{0x27105dae, 0x98cb2411, 0xed5c1361, 0x3efa8fae, 0xd498e337, 0x6fa736a5, 0x1e369b4f, 0x038e3b07},
|
||||
{0x98c8db7f, 0xbc5915ae, 0x50425ae8, 0x1f3c8f96, 0xfa86658a, 0x77d60416, 0x28ec2dda, 0x02bc8b30},
|
||||
{0xb94bc10e, 0xad6794f2, 0x7e80093a, 0x7463b3f3, 0x90db4c79, 0x7bf5af53, 0x965c0cc4, 0x031531c6},
|
||||
{0x7cc1083d, 0x66425289, 0xa45d785f, 0x778ba471, 0xbbc94c16, 0xe3f5c599, 0x9b92e036, 0x02606413},
|
||||
{0xcf287faf, 0x191a2ea9, 0x823ddf07, 0xe6406a78, 0xaabe912b, 0xabcf2825, 0x7c48649a, 0x021dab44},
|
||||
{0x65375f6c, 0x9465d77c, 0x65370520, 0x924e189c, 0x918f0105, 0x8be0ca5f, 0xb1925509, 0x07586d27},
|
||||
{0x9302ac44, 0xe4fa93cb, 0xbf87d840, 0xf381ebbd, 0x44793049, 0x5027e7d9, 0xd3f09392, 0x0230b5c3},
|
||||
{0x31d48a82, 0x123e992e, 0x729d40e2, 0xef2990c6, 0x0f331903, 0x946813e3, 0x112a2c4d, 0x022f575e},
|
||||
{0xd4ee8cf7, 0x4b44764e, 0xdb576ebc, 0x4d44cff8, 0x0ab93ba1, 0xc6185d3a, 0x7e3f1e78, 0x0520c2d3},
|
||||
{0xbc46b8b4, 0xd9446736, 0x91e2ede1, 0xc7776293, 0x87689930, 0x0323845f, 0x379293ae, 0x061e359f},
|
||||
{0xb49b3a0a, 0x767a1747, 0x2b58f45e, 0x17e69346, 0x1425ad98, 0x10820519, 0x1b487ae5, 0x0367f384},
|
||||
{0x92f8ac25, 0xe0407696, 0x2beb71a6, 0x9ca9d269, 0x2f0c2471, 0x914017ea, 0xf421a10d, 0x07709cc3},
|
||||
{0xc3bb6a8f, 0x2c8ed622, 0xa2a1a8f2, 0x31c57cb6, 0x4bf6c316, 0x053924d5, 0x09563089, 0x0727b76a},
|
||||
{0x09dc6b5c, 0x567be37f, 0x9476eb5d, 0x57e36f45, 0xee5be5b6, 0xf68488dd, 0x2884c2d7, 0x05ac1ff1},
|
||||
{0x04173760, 0x0fc5b934, 0xda828f00, 0xe43272df, 0x2fad6e9c, 0x7e2ab5fe, 0x0a4995b3, 0x00e0a5eb},
|
||||
{0x42f8ef94, 0x6070024f, 0xe11a6161, 0xad187148, 0x9c8b0fa5, 0x3f046451, 0x87529cfa, 0x005282db}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000011, 0x08000000},
|
||||
{0x0becc81e, 0xd59e99d9, 0x537cdf25, 0x3ad971a9, 0xbcd60738, 0xaccedf99, 0xd65d66b5, 0x01dafdc6},
|
||||
{0x4bc9ca34, 0xc8e6df6f, 0x5397aaca, 0xab8bfbc5, 0x94813e6e, 0xb5ea6773, 0xe295dda2, 0x0446ed3c},
|
||||
{0x8145aa75, 0xd7981c5b, 0x3d174c52, 0xb14011ea, 0xe4721c1e, 0x647c9ba3, 0x6f6ac6dd, 0x05c3ed0c},
|
||||
{0x6e0bef41, 0x9de8c5cf, 0xcee1b9b0, 0xec349cbb, 0x2121589c, 0xfe72ab05, 0x24c7669c, 0x03b1c96a},
|
||||
{0x246766d8, 0xb878549e, 0xb5a03ab4, 0x8c5d8531, 0x7f1ec75e, 0x334a83ab, 0x46b146d7, 0x01342b29},
|
||||
{0x31055652, 0x8c71bd50, 0x6081f8c3, 0x2eedac49, 0xab013740, 0x25164a76, 0xbca84bf7, 0x05c0a717},
|
||||
{0xd0a6b4f5, 0x1ad37af3, 0x8ca50294, 0x6dc49fe3, 0x5d9529c3, 0x8357a7ff, 0xcefe8efe, 0x02c161bc},
|
||||
{0x296fbf1c, 0x90a5fa7f, 0xc977b113, 0x18226a39, 0xc178262e, 0x9362d5c9, 0x40d28de5, 0x03a362d3},
|
||||
{0x125ca33a, 0x04eeb1c0, 0x8437c604, 0xaa47a4c0, 0xa4d6bafe, 0x064426a2, 0xb8cc76db, 0x00ffbb44},
|
||||
{0x179e2ebe, 0xecf0daf8, 0x2574403b, 0x942e643e, 0x6bf06f7c, 0x684d31aa, 0x244c675c, 0x003b2bde},
|
||||
{0xfeccfccc, 0x96bc19dc, 0x269130b4, 0xbb26f74e, 0xd511649f, 0x15d57a9f, 0x7dcde3c3, 0x02d852a4},
|
||||
{0x44ad0610, 0xb4a47f4c, 0x06fa1b55, 0xdc2f028f, 0xd25979ac, 0xd73ddcd4, 0x076e7f5d, 0x06ba7cbe},
|
||||
{0x349eea63, 0xb0f43dd2, 0x3e64660d, 0x5e64466c, 0xc3bb94ce, 0x7206f426, 0xed4327aa, 0x036cb7c6},
|
||||
{0xf248b36c, 0x6503e80b, 0xe36060ec, 0xb93dd56f, 0x95c2c067, 0x6d3b2763, 0x155023a7, 0x038e7d59},
|
||||
{0xcdf92351, 0x140437ad, 0x2a5ab630, 0xb7a6e1b4, 0xd48175a5, 0xaa80b742, 0xd4afae89, 0x06a50046},
|
||||
{0xaea51997, 0xe8cde2cd, 0x417e3754, 0x612806f6, 0xb940adf4, 0xe40a4a07, 0xa33929b2, 0x063f5efa},
|
||||
{0x0c07573f, 0x0c0926df, 0xd8d4bee3, 0xa84e9027, 0x6bcd79ea, 0xf3776dfa, 0x523f55a8, 0x043a8517},
|
||||
{0x66984d05, 0x5b7e4e45, 0xdb8c30c4, 0xb9381de7, 0xae86e4f6, 0xd7c15128, 0x809daae7, 0x0718f1ad},
|
||||
{0xc1eae1a6, 0xe4fb0a7d, 0xa90a0813, 0xe5484134, 0x895df525, 0x24cca8f9, 0x1cedd2ee, 0x035fd390},
|
||||
{0x82e87775, 0x0a87a942, 0x971f450b, 0x9f2b4b62, 0x8eae6f09, 0x1dc5aecd, 0x1c5686a6, 0x07547fa3},
|
||||
{0x2e35511a, 0x785975cc, 0xa085c456, 0x4266bc82, 0x3abd5bfd, 0x45cf52e1, 0x7bd95ece, 0x019e8e43},
|
||||
{0xae580194, 0xfad72a75, 0x2989ac16, 0xf2bb5a00, 0x55f2b4d0, 0x53fee728, 0x9c7a91e5, 0x02b9f95d},
|
||||
{0x71200963, 0xb0062d2c, 0x1ac57a23, 0xe16e9f91, 0xc4bd9d3e, 0xaae7b169, 0x7f505f35, 0x07462151},
|
||||
{0x57e31913, 0xcf7bd10e, 0x6a4d0ee4, 0x1a360a91, 0x31869e35, 0xb2ba4914, 0x18005db4, 0x07a62d5c},
|
||||
{0xb4344711, 0x431f11e2, 0x6192c47e, 0x0cc3049c, 0xeb9c1bc3, 0x375dff93, 0x42071ee8, 0x03a75790},
|
||||
{0x9ed81498, 0x4eb14251, 0x98b804ef, 0x5852dbc5, 0x56d7f20c, 0xe0c1be13, 0x20d69181, 0x023e7f68},
|
||||
{0xe34f2d55, 0xf2eeb9b5, 0x2aad6f84, 0x63459f16, 0xbe37dbea, 0xf12099e7, 0x11b1a0fd, 0x06e45493},
|
||||
{0x0d6c93ed, 0x63032f6a, 0x5a04829f, 0xd99cbcc8, 0x89608b5e, 0x80f20416, 0x9df329f4, 0x00bf4231},
|
||||
{0x2710f927, 0xc7fc3d1b, 0x90d8503e, 0xc72d19af, 0x9940e689, 0xa9dcd3b8, 0x2da77ac9, 0x06fd386e},
|
||||
{0x08b27bc2, 0xc800035f, 0x4dfacc03, 0xd98987cf, 0x1256e525, 0x24f8fdbf, 0x1f104273, 0x04c575f1},
|
||||
{0x256c604a, 0x68b16e90, 0x6eba097d, 0x7f51023a, 0x1aeba9c8, 0x52c7629c, 0x4809d8da, 0x0575e850},
|
||||
{0x4ac81249, 0x7439d2f9, 0x4fc31ff2, 0x351e4a62, 0xb3906ded, 0x68fb8313, 0x08507a35, 0x007d43d8},
|
||||
{0x98859a12, 0xa87902b8, 0x73af55b3, 0x2f0d13e0, 0x1b9783c2, 0x5a46c66a, 0x2f5f71d4, 0x01045b06},
|
||||
{0x604fce1e, 0x0c379595, 0x7fccc2b4, 0x20ab6eb8, 0xf1820ae7, 0xac0bc709, 0x93fb2b07, 0x07e7654f},
|
||||
{0x246c4bf0, 0xa0e40811, 0x816b15e0, 0xe12accf5, 0x17938138, 0xee417239, 0x2c9a34fb, 0x004e092e},
|
||||
{0xad2cd984, 0x6304351b, 0x4bf1aafc, 0x38546ca6, 0xf310e99f, 0x1fb81192, 0xb5376275, 0x07e89896},
|
||||
{0x7b2d141d, 0xe4376a0b, 0x6dac220c, 0xea1795e5, 0xb19e1901, 0xd778ab50, 0xa94c274f, 0x077df905},
|
||||
{0x16fcd6c7, 0x7039bab1, 0xa6ea1c94, 0x8eececb7, 0x0f122046, 0x84d26ab5, 0x22fd55a1, 0x053c5d48},
|
||||
{0x72f11f65, 0xd43eb7bb, 0xb2a566d6, 0xfb538785, 0x3f35cbf5, 0xccc2cdc6, 0x7112504a, 0x06df5a9e},
|
||||
{0x60ce9c30, 0x75efb55c, 0x3c541437, 0x991873ed, 0xdf0cbb3b, 0x37eaedcb, 0xb04c2858, 0x0278d7f0},
|
||||
{0x1a06866b, 0x5757dd4e, 0x6570fa7f, 0x15c176b1, 0xafe89a1d, 0x9981b57f, 0xee0cb14c, 0x03c57f4d},
|
||||
{0x503c31cd, 0x3438cd66, 0xc0736d4b, 0x34437e52, 0x2a9d1b28, 0xe825b769, 0x73c06ee7, 0x06955a3a},
|
||||
{0x5c5e530e, 0xbbf0995a, 0x6569a2f9, 0xdee304b3, 0x5bd1a886, 0x3b9c993c, 0xc9cd050a, 0x00f66017},
|
||||
{0xee755737, 0x3666e752, 0x74d0e317, 0xa13bfafc, 0x01d2f1bf, 0x17ab672a, 0x0778f525, 0x079dde3a},
|
||||
{0xed8a25e9, 0x96a003c2, 0x8f347cec, 0x45d258fe, 0x96ea14ac, 0x68ff148d, 0xe148eda9, 0x058f4ec7},
|
||||
{0xe2a700ab, 0x23baf732, 0x5202a945, 0x6434725a, 0x2e693363, 0xa19a338d, 0xbf2f39c6, 0x01d0ea7a},
|
||||
{0x3ab52589, 0x5e571cad, 0x92240361, 0xe2916bb2, 0xdff5e354, 0xe6f8897b, 0x2ffa4707, 0x02a62880},
|
||||
{0xef649a85, 0xaf446c62, 0xed4e461f, 0x14d8072f, 0x59993efa, 0x5a07f4e5, 0x72a3a652, 0x00dc28b6},
|
||||
{0xf21511df, 0x139299d7, 0x4854ebc3, 0x8914e707, 0xbfd102a9, 0x9f3b5913, 0x3a5af894, 0x009dc24f},
|
||||
{0x1f4ba4fa, 0x650e1d91, 0x1977bff0, 0x6ba67806, 0xaa9bbc1b, 0xffbdc531, 0x997408aa, 0x057b69b2},
|
||||
{0x65fb1a91, 0x25c03e81, 0x7fd22618, 0x8682f98b, 0xf46cb453, 0xcad67f13, 0x5a80e5c6, 0x060ca599},
|
||||
{0x94188f2a, 0xa7978a90, 0xdbb9338e, 0xd5fc8f0b, 0xcbdd84f0, 0xf8387e6d, 0xbbc743a3, 0x073ae131},
|
||||
{0x0415bbcc, 0xafd00c46, 0x0df4a52a, 0x1a00eb6c, 0x0b96b594, 0x1ec67c64, 0x8e26b699, 0x01cb82a5},
|
||||
{0x7f740f93, 0xf56319fb, 0x2e2f6ed7, 0xb40d559b, 0x75e19784, 0x63f96f04, 0xc31ba061, 0x06406929},
|
||||
{0xfa5a3239, 0x22349e8b, 0xb9ca6bf9, 0xe1236395, 0x9b0017a4, 0x76ae5a8b, 0x17b7af03, 0x06cfb4ce},
|
||||
{0xb51abfe6, 0x34938785, 0x1249edb6, 0x21f54c80, 0xab038972, 0x3bd1cc16, 0xa4a57a81, 0x0636b37f},
|
||||
{0xf88717cf, 0xfda4a9a1, 0xee19d402, 0xf8fcba35, 0x47c9ba1b, 0x1ac940f6, 0xdd991440, 0x013c0ab3},
|
||||
{0x3743adf4, 0x5082318a, 0x22440f94, 0x3293bae1, 0x8dd2d761, 0x4c2e6d7f, 0xcdc38c82, 0x07124118},
|
||||
{0x76198779, 0xb031f8b7, 0x1b6c1944, 0x6742f602, 0x894a6134, 0xa18290db, 0xaba037dc, 0x035289d8},
|
||||
{0x9f8a9b07, 0x4579e855, 0x4dca3764, 0x1e580662, 0xb8c8ef49, 0xda92152e, 0x8b54508a, 0x0444085a},
|
||||
{0x34696648, 0x7f670ce1, 0xc05768d9, 0x2f00108f, 0x390fb519, 0x2d00a444, 0x1cd6f914, 0x015c468b},
|
||||
{0xfe46c5f2, 0x00666cbf, 0x9f7174d6, 0xca4051c5, 0x8e4277f4, 0x1629882a, 0x6ee002a3, 0x00b3f261},
|
||||
{0xc1dbb4f6, 0x418a2b86, 0x9a6ca270, 0x9f453ccc, 0x1d457b20, 0x1966471f, 0x80fd1319, 0x00b4d831},
|
||||
{0x1c76c8b1, 0xa12f86a8, 0xc0125e48, 0x2772e424, 0x1459dfb8, 0x8d650644, 0xad06d01c, 0x02128e5c},
|
||||
{0x3472799c, 0xcc8cc7f6, 0x2f511cae, 0xfbd97f95, 0x5ebbff71, 0xadd8818b, 0x09af0983, 0x00520540},
|
||||
{0x8ec654cc, 0xcaab5dd4, 0x17ba15a9, 0xc05ad0a7, 0x36300a00, 0x4bda7469, 0x41bb0610, 0x02e486cd},
|
||||
{0x2d6be8b5, 0x077ba983, 0xfe89eb7d, 0xdd5e728f, 0x63f9c51f, 0xe3c872fb, 0xce639995, 0x01f2f7a8},
|
||||
{0xaa2ea7eb, 0xd82b1599, 0xa16489e0, 0x1be5d254, 0x173d3219, 0x19cb236a, 0x1fe63b23, 0x007dd45f},
|
||||
{0x19dba628, 0xa27cc4d3, 0x5fd2e061, 0xf04ac441, 0x9307a758, 0xc7405333, 0x28c40fe4, 0x0103c707},
|
||||
{0x54662aab, 0xb5129fd1, 0x59158f32, 0x2ec5b69b, 0x12c44eec, 0x6c7e6492, 0xe527abb2, 0x046e7c11},
|
||||
{0xe32d46fe, 0xb9bf4936, 0xb08ef006, 0xf23ae18c, 0xe6a5179e, 0x5352cc59, 0x5bf7c0b8, 0x0753a621},
|
||||
{0x9318db3a, 0x19f65bc2, 0x7e3d0014, 0x93ff3f79, 0x6beb580d, 0xf7f93c7f, 0xddd72603, 0x04fdb898},
|
||||
{0xe184a935, 0xf7e1f88f, 0x1ad510f0, 0x82a0f047, 0x4c9ab6ca, 0xce0f7c44, 0x5104a95a, 0x0552304e},
|
||||
{0x985bba5c, 0x06615580, 0xf487a1fb, 0x8ccd29a8, 0xeecf758d, 0xb3e15ed0, 0x857ce648, 0x05328783},
|
||||
{0x6cb042b0, 0x5d1d5a22, 0x0277083c, 0x64375cf4, 0x5fa82215, 0xe8947dab, 0x86932495, 0x05e72829},
|
||||
{0x8c3e2849, 0x5bf6f46a, 0x4924c8f4, 0x7e40314c, 0xdffd6118, 0x3c74a4ba, 0x2f8de20a, 0x05247cdd},
|
||||
{0xd0042d11, 0x25a418c5, 0x2f7da60c, 0x1b60ee9f, 0x02c0b69f, 0x61c041ad, 0x15670214, 0x0632d33a},
|
||||
{0x90e05a92, 0x32b03a5e, 0x78d1e8d6, 0xfb12a1b1, 0x5bc2f5d5, 0xb8af534e, 0xa032918a, 0x05ab4772},
|
||||
{0x0a711a9d, 0x096878a8, 0x6b083c8c, 0x87d070da, 0x87d06afb, 0x77931578, 0xf3104057, 0x03705277},
|
||||
{0xdf993e46, 0x502d2374, 0x35baf646, 0xc1cd2868, 0xe30aa213, 0xa61b54b6, 0xbce34b74, 0x02511017},
|
||||
{0x90a6b9b9, 0xcfb6c51a, 0x8be6ade8, 0x4e0b29ef, 0xd3832d74, 0xa8292467, 0x41ca1e45, 0x02ce7977},
|
||||
{0x3e672d5b, 0x25ee10aa, 0x28597504, 0xb0e60c63, 0xe263c827, 0x4a8d0567, 0xfadefeba, 0x01f4ec42},
|
||||
{0xa5a26158, 0x8b4b15e0, 0x88a71cf2, 0xa59b2df9, 0x5d734341, 0xde44f2e7, 0x4db8d2e8, 0x007a18a0},
|
||||
{0xb4d18100, 0x30fcf001, 0xf8ae0b4f, 0xcdaa5334, 0xe325615a, 0x67017b2b, 0xf0ccbf57, 0x016c6d47},
|
||||
{0xba937732, 0x66afc115, 0xc20be386, 0x917d4890, 0xa017c59d, 0x5dadccff, 0x986c39c1, 0x043fa44e},
|
||||
{0x08baa72a, 0xc57ec886, 0x052364ed, 0xe65a4680, 0x85f9a523, 0x0536b505, 0xfe744ee2, 0x03580609},
|
||||
{0x1bab1ab8, 0x88109415, 0x62f0fa74, 0x02244b19, 0x915618e0, 0x837fcd10, 0x942f12d2, 0x061b83d0},
|
||||
{0x687b7798, 0x823d0bba, 0x84a49784, 0x5f93174a, 0x2574af37, 0xcfd64159, 0xe108057c, 0x0290722e},
|
||||
{0x58a66036, 0x900a7031, 0x6153c2ae, 0xcb443378, 0xa6ccdffe, 0x4c48b8dd, 0xa06e955a, 0x049a9211},
|
||||
{0xea0b9dd9, 0x1b034532, 0x638c79ec, 0x11cba08f, 0x7c5b2d15, 0x16d00728, 0xbb9a759c, 0x05abcbcd},
|
||||
{0x1552d6af, 0x21b4f60e, 0xbed54865, 0x2f7ea9d2, 0x738befdb, 0x39378802, 0x97845360, 0x02adf76c},
|
||||
{0x4026bb92, 0x6e5eb2ca, 0xcbed5570, 0x18f3d8bf, 0xb655ac26, 0x2a5fc8cd, 0x3809a1c5, 0x0031cd25},
|
||||
{0x0ef5e011, 0x2d698950, 0xc018b82d, 0xc0668c45, 0xf520d325, 0xd180ff47, 0xa38122b1, 0x046714c7},
|
||||
{0x12df2cc7, 0x8dec8a4b, 0x963031f8, 0x5eb84a1b, 0x88525708, 0xb75ad701, 0x07df57bd, 0x02054a99},
|
||||
{0x82b2f616, 0xe0013d43, 0x7b385914, 0x2ad34c97, 0x11108f4b, 0xc9969223, 0x9c9fad59, 0x0183f639},
|
||||
{0x06b4dc38, 0xaca9dfbc, 0x962d5774, 0x85596bbc, 0x22f1cd7d, 0xd7023923, 0x2067b180, 0x04d3c939},
|
||||
{0xe4004173, 0x6d13e6ab, 0xaafe8726, 0x3495d095, 0x33dc3303, 0xa22d3e4a, 0x776d2e14, 0x0276dbb2},
|
||||
{0x68c539b6, 0xa03f83cb, 0x7b42a06e, 0xfd3fa839, 0xe8d45ac3, 0xea0f1f15, 0xa414b012, 0x061adb94},
|
||||
{0xb33fb188, 0xd22fc6e3, 0xf723dc18, 0xbebc7978, 0xf6c99f34, 0xa874b584, 0xf67ff454, 0x049beb53},
|
||||
{0x754bed16, 0x7c247948, 0xe50eac10, 0x4a84bcfb, 0xade97580, 0xc00d65df, 0xca79c5ae, 0x0763d73c},
|
||||
{0x7aadbe1a, 0x696e27af, 0x9d8e2a1f, 0x113535e0, 0x4c011766, 0x6953003f, 0xbb52558c, 0x0498a75f},
|
||||
{0x6e09cee7, 0xcf26e897, 0x299b63c7, 0x813a76f2, 0x0939904c, 0x67c02fa7, 0x7e0b9483, 0x045c41a9},
|
||||
{0x4af5adcc, 0xad979914, 0xc2c7c068, 0x7d9267f9, 0x21b4a0a7, 0xda4fa3f8, 0x3386c423, 0x03f4bcc9},
|
||||
{0xd1228595, 0xe5fcd634, 0x12fc8b7c, 0x5571b994, 0x244857f8, 0xd50dcd33, 0x263b93f0, 0x060dc1d6},
|
||||
{0xfee59c89, 0x7040a236, 0x78ceb168, 0x91a4301b, 0x19cdb36a, 0x973b55bd, 0x71008400, 0x06a1c58e},
|
||||
{0x6af1f351, 0x1d3c7ad7, 0xe8ad24dc, 0x8493c0c1, 0x48d5ffd9, 0x076f9dea, 0x5931555f, 0x00b9b2bf},
|
||||
{0xeaa5731c, 0xa3d54d89, 0xba84ee02, 0xfcc41a45, 0xcc1cdac8, 0x7c828f73, 0x5bfe9d23, 0x009c426b},
|
||||
{0x3f1f352c, 0x36fb314c, 0x9feb1120, 0x750a2a5f, 0xd7b06171, 0x3a2f19e8, 0x3b550cd9, 0x06de1885},
|
||||
{0xb69183f6, 0xefc03237, 0x979ee075, 0xb5a14fc3, 0x2dcb1d51, 0xbf114125, 0xb8eca2d3, 0x062364f7},
|
||||
{0x95375861, 0x575f1ea7, 0x80cc8dba, 0x30608586, 0xcf7a8f9f, 0x2beca9f5, 0x5fe60da4, 0x00dfc078},
|
||||
{0x0f86ded5, 0x312928eb, 0xb9c4f0cc, 0x646f5d3e, 0x2fbf14dd, 0x23c69382, 0xc44caa0e, 0x023aae90},
|
||||
{0x13e16243, 0xa7c92faf, 0x92efd5fc, 0x035a3e75, 0x86a744ea, 0x32f44d08, 0x1ea28333, 0x05b45217},
|
||||
{0xc41fdf22, 0xb557d203, 0x4bbc8f76, 0x9697570c, 0x81eaf742, 0x3a6a2cb5, 0xb0d03a0f, 0x07f2c08a},
|
||||
{0x2a18b73a, 0xca806385, 0xdb6a953d, 0xf2015d6d, 0xba5f67b9, 0x51d21a8e, 0x14807dd6, 0x051439d5},
|
||||
{0xf75051de, 0x7b6e0c13, 0x14dd1aa0, 0x114681fb, 0x0fd95a37, 0x72a1cccc, 0xa39e5bb8, 0x02f29d4c},
|
||||
{0x116529cd, 0x4808a0de, 0x5b941d1c, 0x1cf38580, 0xd70796f7, 0xc96a451e, 0x3f24e64f, 0x016d083f},
|
||||
{0x3cf155ee, 0xc71b78d0, 0x0c361b67, 0x0c04a134, 0x7756e4a9, 0xdb546edc, 0x2988eb2c, 0x03474404},
|
||||
{0xf30cef17, 0x1a0b3585, 0x864abd80, 0x63c1de29, 0xc0687c8e, 0x0c171d6e, 0xc9763a97, 0x0353aec8},
|
||||
{0x94192fb8, 0x0a2c9cff, 0x1a7f5bbf, 0x27320b93, 0xe5ceeb75, 0x465d2f9f, 0xd78f1cc3, 0x07ce6f99},
|
||||
{0xe8d1b26d, 0x0f899233, 0xb87a2984, 0xed4b44d2, 0x0bd6354a, 0x0c0712c6, 0xc7032f5c, 0x01eb2a31},
|
||||
{0x46b03b57, 0xc4c03fbd, 0x785ebbe8, 0x989b0ff3, 0x7f0bcb19, 0x5cada62a, 0xa97557c9, 0x01426410},
|
||||
{0x96fb0a26, 0xf1d2e82b, 0x1edb9ce3, 0xe270bc10, 0xfc7aaed8, 0x9549cfd0, 0xd90d7c9c, 0x03e8256c},
|
||||
{0x43ac9984, 0x14eef0ee, 0xa16d6770, 0x2903ff22, 0xa38fbfc0, 0xc66c2690, 0x8755440e, 0x0032a202},
|
||||
{0xf3601782, 0x46a07cf2, 0xaa71d137, 0x79f410f9, 0x8bcabc59, 0xc320c6f1, 0xf8ab64d8, 0x00a706cf},
|
||||
{0x8dbd8d4f, 0x8848a9f0, 0x0085061d, 0xeff89e69, 0xfee62fbe, 0x90e634a7, 0x2ffb456b, 0x03983046},
|
||||
{0xb272ed5c, 0x91ec28a8, 0xdc0cbb77, 0xf8529918, 0x3648d2c5, 0x8f896ddb, 0x74edaf19, 0x0668a86c},
|
||||
{0x128c9bd9, 0x341d5fc8, 0x6b3241c5, 0x592f87d8, 0xb2cc3c97, 0xf8cba6f2, 0x03f396ed, 0x03463bf1},
|
||||
{0xafd9d239, 0xcf3ae525, 0xea20b753, 0x06b8b7b9, 0x3408a993, 0xb2be1e49, 0x9f47063f, 0x02bcb200},
|
||||
{0xa0bd0bc8, 0x7ca02722, 0xb862774d, 0xce8b32ee, 0x5f8da059, 0x424ba5f0, 0x3bb422a0, 0x05c81961},
|
||||
{0x32fd8907, 0x137dad8c, 0xc95a3a5d, 0x301d5119, 0x8937ac08, 0x144b38c3, 0x39338de7, 0x00e66f0e},
|
||||
{0xcfc10885, 0xe68b8875, 0x96147e68, 0x4f24d49a, 0x43032c15, 0x5da9e6fd, 0x9bf25e12, 0x061ab0e6},
|
||||
{0x455c65ad, 0xeab29bbd, 0x2448be64, 0x1c7da0e7, 0x8eedfa1f, 0x8c2c1bcd, 0x698c1197, 0x0400e2d2},
|
||||
{0x04549c13, 0x335d3e9e, 0xd31585cc, 0x546f0d82, 0xe16dbbac, 0x350d5ed5, 0x113c53fd, 0x05f77544},
|
||||
{0x7d8f3b7e, 0x6aa75c04, 0x10a641ae, 0xc70851dd, 0x9a0750fe, 0x4d33edd4, 0xcd1b230f, 0x022802cf},
|
||||
{0xef8170e3, 0x59fa1903, 0x62995788, 0x464a73ef, 0x13369717, 0x338be7fd, 0x52d21278, 0x02e97589},
|
||||
{0x4856ddd5, 0x3f2deca8, 0xfced10e2, 0x969b10e2, 0x52860ee7, 0x09620dde, 0xb620fa3f, 0x04a169bf},
|
||||
{0xa03b49f1, 0xd9beb712, 0xe9af606e, 0x0798af09, 0x63e70b9a, 0xe37f9aea, 0xb35abd7c, 0x02542a44},
|
||||
{0xf6e78973, 0x335d4000, 0x76f1bb23, 0x7bc28fde, 0x1b30e9ca, 0x6cfdc907, 0x0400b651, 0x03ff88aa},
|
||||
{0x36433eaf, 0xfb862981, 0x4111cfa3, 0x15fdc659, 0xeab2909d, 0x569574b9, 0x3cd80f84, 0x01442360},
|
||||
{0xe85c4af3, 0xa8ed8f31, 0xe6aaf3da, 0xf7680fee, 0xc5c1772c, 0x2240e931, 0xaebeeb70, 0x04f44f6f},
|
||||
{0x8846e0af, 0x29de323f, 0x42c25319, 0x33f91593, 0x6cbadd58, 0x863099c1, 0xfd83e5b3, 0x06a603cf},
|
||||
{0x86c77703, 0x1bdd17f3, 0xe02db671, 0x8cee8e78, 0x0b6dffce, 0xed1627af, 0xa0d9b3cc, 0x04491984},
|
||||
{0xcb583661, 0x177f8f9c, 0x73d05bfc, 0x54122d0c, 0xebe37b4a, 0xa9231660, 0xd4826038, 0x06e885db},
|
||||
{0x13c253b9, 0x64cde875, 0x2fbc98a9, 0x8484bccb, 0x4885a9af, 0xbad877c5, 0x0cbc33b6, 0x03007c90},
|
||||
{0x47cfa357, 0x41eb9173, 0x325309ad, 0xb3f06289, 0xaa85421b, 0x029da7c1, 0x84de4bd4, 0x07b7eb0d},
|
||||
{0x56b831e2, 0x2c459a80, 0x321aba19, 0x2b99d098, 0xea73c0e1, 0x96237364, 0xe25ed0ed, 0x02f2c638},
|
||||
{0x9b388bf4, 0xfc8c3228, 0x82cd081d, 0xa4c371e4, 0xc85f75df, 0x11239026, 0x8892896e, 0x01f01c5e},
|
||||
{0x73457917, 0xce1dde59, 0x16dd8b49, 0xdfdaeb19, 0xbfd17b1e, 0x4289a976, 0xc842870a, 0x05e2cf7e},
|
||||
{0xc7705532, 0x72faa825, 0x8f7fe8c2, 0xd24bf942, 0xb695e31b, 0xb7403e13, 0xfc85a0c6, 0x02eac9e7},
|
||||
{0x1ddb2dff, 0xc47638e3, 0x799bb649, 0x78b91a13, 0x552588ed, 0x001800de, 0x9cd9425c, 0x01d0640c},
|
||||
{0xfb431e10, 0x159891e7, 0xa012b461, 0x2f2fb29a, 0xb3333e5d, 0xc1dca804, 0x9a47200d, 0x05b918ec},
|
||||
{0x2d5ce760, 0x379119b5, 0xda2ccdab, 0xf9911f75, 0x47b5c054, 0x92b09490, 0x7298d065, 0x0742a31e},
|
||||
{0x4a73d1f1, 0xe2a1046b, 0xc6ab4d9c, 0xbc85a747, 0xba0701f8, 0x79b0e699, 0xeebc6762, 0x05e5c2cb},
|
||||
{0xe0c0db50, 0xdc644b37, 0x2b8444d2, 0x26f7f083, 0x63479a84, 0x90acf2e7, 0x90ffe372, 0x0590d880},
|
||||
{0x83c0fc9c, 0x3dd1aba4, 0xcfb43020, 0x30a1051f, 0xaf5be716, 0x7d1ca380, 0x1ed8aed9, 0x01d56947},
|
||||
{0x0fa23690, 0x657df8c4, 0x32111be3, 0x61a12fe4, 0xe78236c9, 0xd6cc9942, 0x85e66191, 0x01709635},
|
||||
{0xc6a054f0, 0x96bf35ed, 0x004113cc, 0x9d1e411a, 0x1ac7a3ec, 0xccdb9bc3, 0xd08016b8, 0x07362425},
|
||||
{0x9721b035, 0x72744cce, 0x0beb72e3, 0xb87eb606, 0x60870c2e, 0x00c5e70c, 0x685d7c14, 0x029fa4d3},
|
||||
{0x86e52af4, 0x06d3a7a3, 0x70020878, 0x7b1c814a, 0x52e68007, 0x44373cb7, 0xe403540f, 0x041cf8c0},
|
||||
{0x76a27949, 0xd5dbc8bf, 0x27d9cd12, 0xb41449bc, 0xa7a667a1, 0x93740020, 0x0fbb4e77, 0x000bf807},
|
||||
{0x9969cfe9, 0x274ce281, 0x259ec27c, 0x3234d283, 0xe0b44f04, 0x9ff85b71, 0xffcc1006, 0x0298d060},
|
||||
{0x68ab54f8, 0x5cd8b289, 0x437eaab8, 0x42e3877f, 0x9318bd3e, 0x6490dc61, 0x4e54d968, 0x075b01f3},
|
||||
{0x7b64243c, 0x73100d65, 0x5c802f82, 0x692378be, 0x88184c0c, 0x00283dbb, 0xab6f4f0e, 0x0442efad},
|
||||
{0x72015722, 0xbe83b708, 0xe1cdcf0e, 0x2035319f, 0x398347da, 0x2b1b3351, 0x1a14b8dc, 0x061823d8},
|
||||
{0x378d9803, 0x1090948c, 0x4725c64b, 0x61a558cc, 0x7d7fcd91, 0x9e5bd3b5, 0x57ebda25, 0x061e02a0},
|
||||
{0xf8324dc8, 0x166b4a3c, 0x38133fda, 0xa25b9d11, 0x917171a5, 0x9d602950, 0x417d104e, 0x0632e48b},
|
||||
{0x6a61d5e0, 0x03b9f1b9, 0xe59cfbb7, 0xd906b740, 0x7892fbe4, 0x99a93267, 0xad1b8171, 0x06ddc2a6},
|
||||
{0x67fc3874, 0x6ae4355d, 0xb1ada695, 0x4fa456d8, 0x9f91ac43, 0x4e234065, 0x829d173e, 0x028da309},
|
||||
{0xfc695c2c, 0x1e08dd18, 0xfa687112, 0x1c0a2fad, 0xffd6302a, 0xeb5ebf01, 0xfd1d10f5, 0x012fd387},
|
||||
{0x236e65c9, 0x0b907f2e, 0xb1281d54, 0x92ba7a15, 0xc13f1d75, 0x07f0a6ad, 0xcd6d1e9c, 0x05dfe4e3},
|
||||
{0xc45f33f8, 0xd99cc41a, 0xd373165c, 0xc1c10a71, 0x2ce2936a, 0x6c809230, 0xa0498cf5, 0x018dc832},
|
||||
{0x7b222ad8, 0x8e881eab, 0xb6194efb, 0xc8b48774, 0x963c6b6b, 0x38452dfd, 0xe4c4e0f8, 0x02847f5a},
|
||||
{0x2bf4ad95, 0x2950bb4a, 0xdc39ffb0, 0x37f42c9b, 0x101253a8, 0x3814fa42, 0xb67f2ca5, 0x04d4a34c},
|
||||
{0xa9684ba0, 0x6c40fece, 0x3b13bca4, 0xc7108aad, 0xe7bff9be, 0x98ccc7ea, 0xe9b3b316, 0x048b3a6a},
|
||||
{0x08390a2b, 0x4d908260, 0x74b070bc, 0xd5a641d0, 0x910015c5, 0xc3b19274, 0xd5a998a7, 0x02ac8e74},
|
||||
{0x9698d605, 0x8de03acc, 0xa4c9137f, 0x3b8b720c, 0x354faf46, 0x5bbad6e4, 0xfd9e842f, 0x0054c120},
|
||||
{0xd65aead5, 0x305fa33f, 0x0fe296f9, 0xba02b164, 0x708efc94, 0x64cba43c, 0x8ad7f0ef, 0x034b9ffe},
|
||||
{0x13c2e8f4, 0x59e1179e, 0xc572f8a8, 0x5d823d59, 0x74003bce, 0x0cfdb6ee, 0x011c179e, 0x00763941},
|
||||
{0xa47999a8, 0x29b692ee, 0xbfcd80d8, 0x6436c3f1, 0x959768d7, 0x553444f3, 0x583896d4, 0x01d45a26},
|
||||
{0xc150b3f8, 0x0ce0791d, 0xf493c135, 0x7d3a0c1f, 0x5ede0712, 0x4d37cc23, 0x34fbae9c, 0x036a6a38},
|
||||
{0x2ca1eb78, 0xa8ee8204, 0x66d8b759, 0xc713a1dc, 0xac061800, 0x1813508d, 0x3b1f0da2, 0x05725ca0},
|
||||
{0xf2f391c1, 0xbe6826df, 0x232878f0, 0xeb85b046, 0xf7e1d662, 0xf5a96510, 0xe38c2b64, 0x0419a43b},
|
||||
{0xe69e791b, 0x4b54889b, 0xb5c95ea5, 0xb371eeb0, 0x0b2f26a3, 0x9f53ccca, 0x66f45f71, 0x0040592d},
|
||||
{0xad2e5d5b, 0x4ced12db, 0x0987b849, 0x5f57b16d, 0xd9ec045b, 0xcab0e2e9, 0x6cfbf4df, 0x03e4e405},
|
||||
{0x3ecb72a4, 0xd71a1eee, 0x03a13fb7, 0x6bd9f7ec, 0x5877c6c7, 0xb74a54c8, 0xa28236a5, 0x0377689b},
|
||||
{0x74b3354c, 0x6f558a20, 0x3f776b18, 0xb67f6d10, 0x01165ed8, 0x8c447df2, 0xf3889308, 0x056b8991},
|
||||
{0x0d306b7a, 0x9482eb10, 0xd441cd03, 0xdd738e0f, 0x2de5dfd7, 0x6d186de5, 0x75fd1833, 0x00781b3e},
|
||||
{0x77ec28e5, 0xdbc14748, 0xd26e050c, 0x02ceee41, 0x18457c96, 0x8e5aef74, 0x1823c60f, 0x0461a6e2},
|
||||
{0x2be17c8b, 0x172e551d, 0x49c6a7b8, 0x90e25fa2, 0xa1b3478f, 0x6219e63e, 0xd063a517, 0x00c412f8},
|
||||
{0x65a9b68e, 0xb136b848, 0x673c6cbc, 0x9a9b7169, 0xf8ec7473, 0x15fa1875, 0x3033a5d6, 0x022d72f6}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x80000000, 0x00000008, 0x04000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xc0000000, 0x0000000c, 0x06000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xe0000000, 0x0000000e, 0x07000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xf0000000, 0x0000000f, 0x07800000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x78000000, 0x00000010, 0x07c00000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xbc000000, 0x00000010, 0x07e00000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xde000000, 0x00000010, 0x07f00000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xef000000, 0x00000010, 0x07f80000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xf7800000, 0x00000010, 0x07fc0000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfbc00000, 0x00000010, 0x07fe0000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfde00000, 0x00000010, 0x07ff0000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfef00000, 0x00000010, 0x07ff8000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff780000, 0x00000010, 0x07ffc000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffbc0000, 0x00000010, 0x07ffe000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffde0000, 0x00000010, 0x07fff000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffef0000, 0x00000010, 0x07fff800},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfff78000, 0x00000010, 0x07fffc00},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffbc000, 0x00000010, 0x07fffe00},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffde000, 0x00000010, 0x07ffff00},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffef000, 0x00000010, 0x07ffff80},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffff7800, 0x00000010, 0x07ffffc0},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffbc00, 0x00000010, 0x07ffffe0},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffde00, 0x00000010, 0x07fffff0},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffef00, 0x00000010, 0x07fffff8},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffff780, 0x00000010, 0x07fffffc},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffffbc0, 0x00000010, 0x07fffffe},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffffde0, 0x00000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffffef0, 0x80000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffff78, 0xc0000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffbc, 0xe0000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffde, 0xf0000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffef, 0xf8000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x80000000, 0xfffffff7, 0xfc000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xc0000000, 0xfffffffb, 0xfe000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xe0000000, 0xfffffffd, 0xff000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xf0000000, 0xfffffffe, 0xff800010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x78000000, 0xffffffff, 0xffc00010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xbc000000, 0xffffffff, 0xffe00010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xde000000, 0xffffffff, 0xfff00010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xef000000, 0xffffffff, 0xfff80010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xf7800000, 0xffffffff, 0xfffc0010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfbc00000, 0xffffffff, 0xfffe0010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfde00000, 0xffffffff, 0xffff0010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfef00000, 0xffffffff, 0xffff8010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xff780000, 0xffffffff, 0xffffc010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffbc0000, 0xffffffff, 0xffffe010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffde0000, 0xffffffff, 0xfffff010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffef0000, 0xffffffff, 0xfffff810, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfff78000, 0xffffffff, 0xfffffc10, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffbc000, 0xffffffff, 0xfffffe10, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffde000, 0xffffffff, 0xffffff10, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffef000, 0xffffffff, 0xffffff90, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffff7800, 0xffffffff, 0xffffffd0, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffbc00, 0xffffffff, 0xfffffff0, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffde00, 0xffffffff, 0x00000000, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffef00, 0xffffffff, 0x00000008, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffff780, 0xffffffff, 0x0000000c, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffffbc0, 0xffffffff, 0x0000000e, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffffde0, 0xffffffff, 0x0000000f, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffffef0, 0x7fffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffff78, 0xbfffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffffbc, 0xdfffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffffde, 0xefffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffffef, 0xf7ffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x80000000, 0xfffffff7, 0xfbffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xc0000000, 0xfffffffb, 0xfdffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xe0000000, 0xfffffffd, 0xfeffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xf0000000, 0xfffffffe, 0xff7fffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x78000000, 0xffffffff, 0xffbfffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xbc000000, 0xffffffff, 0xffdfffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xde000000, 0xffffffff, 0xffefffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xef000000, 0xffffffff, 0xfff7ffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xf7800000, 0xffffffff, 0xfffbffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfbc00000, 0xffffffff, 0xfffdffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfde00000, 0xffffffff, 0xfffeffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfef00000, 0xffffffff, 0xffff7fff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xff780000, 0xffffffff, 0xffffbfff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffbc0000, 0xffffffff, 0xffffdfff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffde0000, 0xffffffff, 0xffffefff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffef0000, 0xffffffff, 0xfffff7ff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfff78000, 0xffffffff, 0xfffffbff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffbc000, 0xffffffff, 0xfffffdff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffde000, 0xffffffff, 0xfffffeff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffef000, 0xffffffff, 0xffffff7f, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffff7800, 0xffffffff, 0xffffffbf, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffbc00, 0xffffffff, 0xffffffdf, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffde00, 0xffffffff, 0xffffffef, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffef00, 0xffffffff, 0xfffffff7, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffff780, 0xffffffff, 0xfffffffb, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffffbc0, 0xffffffff, 0xfffffffd, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffffde0, 0xffffffff, 0xfffffffe, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffffef0, 0x7fffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffff78, 0xbfffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffffbc, 0xdfffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffffde, 0xefffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffffef, 0xf7ffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x80000000, 0xfffffff7, 0xfbffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xc0000000, 0xfffffffb, 0xfdffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xe0000000, 0xfffffffd, 0xfeffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xf0000000, 0xfffffffe, 0xff7fffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x78000000, 0xffffffff, 0xffbfffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xbc000000, 0xffffffff, 0xffdfffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xde000000, 0xffffffff, 0xffefffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xef000000, 0xffffffff, 0xfff7ffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xf7800000, 0xffffffff, 0xfffbffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfbc00000, 0xffffffff, 0xfffdffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfde00000, 0xffffffff, 0xfffeffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfef00000, 0xffffffff, 0xffff7fff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xff780000, 0xffffffff, 0xffffbfff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffbc0000, 0xffffffff, 0xffffdfff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffde0000, 0xffffffff, 0xffffefff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffef0000, 0xffffffff, 0xfffff7ff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfff78000, 0xffffffff, 0xfffffbff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffbc000, 0xffffffff, 0xfffffdff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffde000, 0xffffffff, 0xfffffeff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffef000, 0xffffffff, 0xffffff7f, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffff7800, 0xffffffff, 0xffffffbf, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffbc00, 0xffffffff, 0xffffffdf, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffde00, 0xffffffff, 0xffffffef, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffef00, 0xffffffff, 0xfffffff7, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffff780, 0xffffffff, 0xfffffffb, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffffbc0, 0xffffffff, 0xfffffffd, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffffde0, 0xffffffff, 0xfffffffe, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffffef0, 0x7fffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffff78, 0xbfffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffffbc, 0xdfffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffffde, 0xefffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffffef, 0xf7ffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x80000000, 0xfffffff7, 0xfbffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xc0000000, 0xfffffffb, 0xfdffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xe0000000, 0xfffffffd, 0xfeffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xf0000000, 0xfffffffe, 0xff7fffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x78000000, 0xffffffff, 0xffbfffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xbc000000, 0xffffffff, 0xffdfffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xde000000, 0xffffffff, 0xffefffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xef000000, 0xffffffff, 0xfff7ffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xf7800000, 0xffffffff, 0xfffbffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfbc00000, 0xffffffff, 0xfffdffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfde00000, 0xffffffff, 0xfffeffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfef00000, 0xffffffff, 0xffff7fff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xff780000, 0xffffffff, 0xffffbfff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffbc0000, 0xffffffff, 0xffffdfff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffde0000, 0xffffffff, 0xffffefff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffef0000, 0xffffffff, 0xfffff7ff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfff78000, 0xffffffff, 0xfffffbff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffbc000, 0xffffffff, 0xfffffdff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffde000, 0xffffffff, 0xfffffeff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffef000, 0xffffffff, 0xffffff7f, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffff7800, 0xffffffff, 0xffffffbf, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffbc00, 0xffffffff, 0xffffffdf, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffde00, 0xffffffff, 0xffffffef, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffef00, 0xffffffff, 0xfffffff7, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffff780, 0xffffffff, 0xfffffffb, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffffbc0, 0xffffffff, 0xfffffffd, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffffde0, 0xffffffff, 0xfffffffe, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffffef0, 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffff78, 0xbfffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffffbc, 0xdfffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffffde, 0xefffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffffef, 0xf7ffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x80000001, 0xfffffff7, 0xfbffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xc0000001, 0xfffffffb, 0xfdffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xe0000001, 0xfffffffd, 0xfeffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xf0000001, 0xfffffffe, 0xff7fffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x78000001, 0xffffffff, 0xffbfffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xbc000001, 0xffffffff, 0xffdfffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xde000001, 0xffffffff, 0xffefffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xef000001, 0xffffffff, 0xfff7ffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xf7800001, 0xffffffff, 0xfffbffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfbc00001, 0xffffffff, 0xfffdffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfde00001, 0xffffffff, 0xfffeffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfef00001, 0xffffffff, 0xffff7fff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xff780001, 0xffffffff, 0xffffbfff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffbc0001, 0xffffffff, 0xffffdfff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffde0001, 0xffffffff, 0xffffefff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffef0001, 0xffffffff, 0xfffff7ff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfff78001, 0xffffffff, 0xfffffbff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffbc001, 0xffffffff, 0xfffffdff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffde001, 0xffffffff, 0xfffffeff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffef001, 0xffffffff, 0xffffff7f, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffff7801, 0xffffffff, 0xffffffbf, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffffbc01, 0xffffffff, 0xffffffdf, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffffde01, 0xffffffff, 0xffffffef, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffffef01, 0xffffffff, 0xfffffff7, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffff781, 0xffffffff, 0xfffffffb, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffffbc1, 0xffffffff, 0xfffffffd, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffffde1, 0xffffffff, 0xfffffffe, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffffef1, 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffffff79, 0xbfffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffffffbd, 0xdfffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffffffdf, 0xefffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffffff0, 0xf7ffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000}}};
|
||||
static constexpr storage<8> rou = {0x42f8ef94, 0x6070024f, 0xe11a6161, 0xad187148,
|
||||
0x9c8b0fa5, 0x3f046451, 0x87529cfa, 0x005282db};
|
||||
TWIDDLES(modulus, rou)
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#define DEVICE_CONTEXT_H
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
|
||||
namespace device_context {
|
||||
|
||||
@@ -30,6 +31,28 @@ namespace device_context {
|
||||
};
|
||||
}
|
||||
|
||||
} // namespace device_context
|
||||
// checking whether a pointer is on host or device and asserts device matches provided device
|
||||
static bool is_host_ptr(const void* p, int device_id = 0)
|
||||
{
|
||||
cudaPointerAttributes attributes;
|
||||
CHK_STICKY(cudaPointerGetAttributes(&attributes, p));
|
||||
const bool is_on_host = attributes.type == cudaMemoryTypeHost ||
|
||||
attributes.type == cudaMemoryTypeUnregistered; // unregistered is host memory
|
||||
const bool is_on_cur_device = !is_on_host && attributes.device == device_id;
|
||||
const bool is_valid_ptr = is_on_host || is_on_cur_device;
|
||||
if (!is_valid_ptr) { THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Invalid ptr"); }
|
||||
|
||||
return is_on_host;
|
||||
}
|
||||
|
||||
static int get_cuda_device(const void* p)
|
||||
{
|
||||
cudaPointerAttributes attributes;
|
||||
CHK_STICKY(cudaPointerGetAttributes(&attributes, p));
|
||||
const bool is_on_host = attributes.type == cudaMemoryTypeHost ||
|
||||
attributes.type == cudaMemoryTypeUnregistered; // unregistered is host memory
|
||||
return is_on_host ? -1 : attributes.device;
|
||||
}
|
||||
|
||||
} // namespace device_context
|
||||
#endif
|
||||
174
icicle/include/hash/hash.cuh
Normal file
174
icicle/include/hash/hash.cuh
Normal file
@@ -0,0 +1,174 @@
|
||||
#pragma once
|
||||
#ifndef HASH_H
|
||||
#define HASH_H
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include <cassert>
|
||||
|
||||
using matrix::Matrix;
|
||||
|
||||
/**
|
||||
* @namespace hash
|
||||
* Includes classes and methods for describing hash functions.
|
||||
*/
|
||||
namespace hash {
|
||||
|
||||
/**
|
||||
* @struct HashConfig
|
||||
* Encodes hash operations parameters.
|
||||
*/
|
||||
struct HashConfig {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool
|
||||
are_outputs_on_device; /**< True if outputs are on device and false if they're on host. Default value: false. */
|
||||
bool is_async; /**< Whether to run the hash operations asynchronously. If set to `true`, the functions will be
|
||||
* non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false,
|
||||
* functions will block the current CPU thread. */
|
||||
};
|
||||
|
||||
/**
|
||||
* A function that returns the default value of [HashConfig](@ref HashConfig) for the [Hasher](@ref
|
||||
* Hasher) class.
|
||||
* @return Default value of [HashConfig](@ref HashConfig).
|
||||
*/
|
||||
static HashConfig
|
||||
default_hash_config(const device_context::DeviceContext& ctx = device_context::get_default_device_context())
|
||||
{
|
||||
HashConfig config = {
|
||||
ctx, // ctx
|
||||
false, // are_inputs_on_device
|
||||
false, // are_outputs_on_device
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* @class Hasher
|
||||
*
|
||||
* An interface containing methods for hashing
|
||||
*
|
||||
* @tparam PreImage type of inputs elements
|
||||
* @tparam Image type of state elements. Also used to describe the type of hash output
|
||||
*/
|
||||
template <typename PreImage, typename Image>
|
||||
class Hasher
|
||||
{
|
||||
public:
|
||||
/// @brief the width of permutation state
|
||||
const unsigned int width;
|
||||
|
||||
/// @brief how many elements a state can fit per 1 permutation. Used with domain separation.
|
||||
const unsigned int preimage_max_length;
|
||||
|
||||
/// @brief portion of the state to absorb input into, or squeeze output from
|
||||
const unsigned int rate;
|
||||
|
||||
/// @brief start squeezing from this offset. Used with domain separation.
|
||||
const unsigned int offset;
|
||||
|
||||
Hasher(unsigned int width, unsigned int preimage_max_length, unsigned int rate, unsigned int offset)
|
||||
: width(width), preimage_max_length(preimage_max_length), rate(rate), offset(offset)
|
||||
{
|
||||
assert(
|
||||
rate * sizeof(PreImage) <= preimage_max_length * sizeof(Image) &&
|
||||
"Input rate can not be bigger than preimage max length");
|
||||
}
|
||||
|
||||
virtual cudaError_t hash_2d(
|
||||
const Matrix<PreImage>* inputs,
|
||||
Image* states,
|
||||
unsigned int number_of_inputs,
|
||||
unsigned int output_len,
|
||||
uint64_t number_of_rows,
|
||||
const device_context::DeviceContext& ctx) const
|
||||
{
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Absorb 2d is not implemented for this hash");
|
||||
return cudaError_t::cudaSuccess;
|
||||
};
|
||||
|
||||
virtual cudaError_t compress_and_inject(
|
||||
const Matrix<PreImage>* matrices_to_inject,
|
||||
unsigned int number_of_inputs,
|
||||
uint64_t number_of_rows,
|
||||
const Image* prev_layer,
|
||||
Image* next_layer,
|
||||
unsigned int digest_elements,
|
||||
const device_context::DeviceContext& ctx) const
|
||||
{
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Compress and inject is not implemented for this hash");
|
||||
return cudaError_t::cudaSuccess;
|
||||
}
|
||||
|
||||
/// @param input pointer to input allocated on-device
|
||||
/// @param out pointer to output allocated on-device
|
||||
cudaError_t compress_many(
|
||||
const Image* input,
|
||||
Image* out,
|
||||
unsigned int number_of_states,
|
||||
unsigned int output_len,
|
||||
const HashConfig& cfg) const
|
||||
{
|
||||
return hash_many((const PreImage*)input, out, number_of_states, width, output_len, cfg);
|
||||
}
|
||||
|
||||
virtual cudaError_t run_hash_many_kernel(
|
||||
const PreImage* input,
|
||||
Image* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const device_context::DeviceContext& ctx) const
|
||||
{
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Hash many kernel is not implemented for this hash");
|
||||
return cudaError_t::cudaSuccess;
|
||||
};
|
||||
|
||||
cudaError_t hash_many(
|
||||
const PreImage* input,
|
||||
Image* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const HashConfig& cfg) const
|
||||
{
|
||||
const PreImage* d_input;
|
||||
PreImage* d_alloc_input;
|
||||
Image* d_output;
|
||||
if (!cfg.are_inputs_on_device) {
|
||||
CHK_IF_RETURN(cudaMallocAsync(&d_alloc_input, number_of_states * input_len * sizeof(PreImage), cfg.ctx.stream));
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
d_alloc_input, input, number_of_states * input_len * sizeof(PreImage), cudaMemcpyHostToDevice,
|
||||
cfg.ctx.stream));
|
||||
d_input = d_alloc_input;
|
||||
} else {
|
||||
d_input = input;
|
||||
}
|
||||
|
||||
if (!cfg.are_outputs_on_device) {
|
||||
CHK_IF_RETURN(cudaMallocAsync(&d_output, number_of_states * output_len * sizeof(Image), cfg.ctx.stream));
|
||||
} else {
|
||||
d_output = output;
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(run_hash_many_kernel(d_input, d_output, number_of_states, input_len, output_len, cfg.ctx));
|
||||
|
||||
if (!cfg.are_inputs_on_device) { CHK_IF_RETURN(cudaFreeAsync(d_alloc_input, cfg.ctx.stream)); }
|
||||
if (!cfg.are_outputs_on_device) {
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
output, d_output, number_of_states * output_len * sizeof(Image), cudaMemcpyDeviceToHost, cfg.ctx.stream));
|
||||
CHK_IF_RETURN(cudaFreeAsync(d_output, cfg.ctx.stream));
|
||||
}
|
||||
|
||||
if (!cfg.is_async) CHK_IF_RETURN(cudaStreamSynchronize(cfg.ctx.stream));
|
||||
|
||||
return CHK_LAST();
|
||||
};
|
||||
};
|
||||
} // namespace hash
|
||||
|
||||
#endif
|
||||
@@ -6,51 +6,67 @@
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
|
||||
using namespace hash;
|
||||
|
||||
namespace keccak {
|
||||
/**
|
||||
* @struct KeccakConfig
|
||||
* Struct that encodes various Keccak parameters.
|
||||
*/
|
||||
struct KeccakConfig {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool are_outputs_on_device; /**< If true, output is preserved on device, otherwise on host. Default value: false. */
|
||||
bool is_async; /**< Whether to run the Keccak asynchronously. If set to `true`, the keccak_hash function will be
|
||||
* non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, keccak_hash
|
||||
* function will block the current CPU thread. */
|
||||
// Input rate in bytes
|
||||
const int KECCAK_256_RATE = 136;
|
||||
const int KECCAK_512_RATE = 72;
|
||||
|
||||
// Digest size in u64
|
||||
const int KECCAK_256_DIGEST = 4;
|
||||
const int KECCAK_512_DIGEST = 8;
|
||||
|
||||
// Number of state elements in u64
|
||||
const int KECCAK_STATE_SIZE = 25;
|
||||
|
||||
const int KECCAK_PADDING_CONST = 1;
|
||||
const int SHA3_PADDING_CONST = 6;
|
||||
|
||||
class Keccak : public Hasher<uint8_t, uint64_t>
|
||||
{
|
||||
public:
|
||||
const int PADDING_CONST;
|
||||
|
||||
cudaError_t run_hash_many_kernel(
|
||||
const uint8_t* input,
|
||||
uint64_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const device_context::DeviceContext& ctx) const override;
|
||||
|
||||
Keccak(unsigned int rate, unsigned int padding_const)
|
||||
: Hasher<uint8_t, uint64_t>(KECCAK_STATE_SIZE, KECCAK_STATE_SIZE, rate, 0), PADDING_CONST(padding_const)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
KeccakConfig default_keccak_config()
|
||||
class Keccak256 : public Keccak
|
||||
{
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
KeccakConfig config = {
|
||||
ctx, // ctx
|
||||
false, // are_inputes_on_device
|
||||
false, // are_outputs_on_device
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
public:
|
||||
Keccak256() : Keccak(KECCAK_256_RATE, KECCAK_PADDING_CONST) {}
|
||||
};
|
||||
|
||||
/**
|
||||
* Compute the keccak hash over a sequence of preimages.
|
||||
* Takes {number_of_blocks * input_block_size} u64s of input and computes {number_of_blocks} outputs, each of size {D
|
||||
* / 64} u64
|
||||
* @tparam C - number of bits of capacity (c = b - r = 1600 - r). Only multiples of 64 are supported.
|
||||
* @tparam D - number of bits of output. Only multiples of 64 are supported.
|
||||
* @param input a pointer to the input data. May be allocated on device or on host, regulated
|
||||
* by the config. Must be of size [input_block_size](@ref input_block_size) * [number_of_blocks](@ref
|
||||
* number_of_blocks)}.
|
||||
* @param input_block_size - size of each input block in bytes. Should be divisible by 8.
|
||||
* @param number_of_blocks number of input and output blocks. One GPU thread processes one block
|
||||
* @param output a pointer to the output data. May be allocated on device or on host, regulated
|
||||
* by the config. Must be of size [output_block_size](@ref output_block_size) * [number_of_blocks](@ref
|
||||
* number_of_blocks)}
|
||||
*/
|
||||
template <int C, int D>
|
||||
cudaError_t
|
||||
keccak_hash(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, KeccakConfig& config);
|
||||
class Keccak512 : public Keccak
|
||||
{
|
||||
public:
|
||||
Keccak512() : Keccak(KECCAK_512_RATE, KECCAK_PADDING_CONST) {}
|
||||
};
|
||||
|
||||
class Sha3_256 : public Keccak
|
||||
{
|
||||
public:
|
||||
Sha3_256() : Keccak(KECCAK_256_RATE, SHA3_PADDING_CONST) {}
|
||||
};
|
||||
|
||||
class Sha3_512 : public Keccak
|
||||
{
|
||||
public:
|
||||
Sha3_512() : Keccak(KECCAK_512_RATE, SHA3_PADDING_CONST) {}
|
||||
};
|
||||
} // namespace keccak
|
||||
|
||||
#endif
|
||||
14
icicle/include/matrix/matrix.cuh
Normal file
14
icicle/include/matrix/matrix.cuh
Normal file
@@ -0,0 +1,14 @@
|
||||
#pragma once
|
||||
#ifndef MATRIX_H
|
||||
#define MATRIX_H
|
||||
|
||||
namespace matrix {
|
||||
template <typename T>
|
||||
struct Matrix {
|
||||
T* values;
|
||||
size_t width;
|
||||
size_t height;
|
||||
};
|
||||
} // namespace matrix
|
||||
|
||||
#endif
|
||||
130
icicle/include/merkle-tree/merkle.cuh
Normal file
130
icicle/include/merkle-tree/merkle.cuh
Normal file
@@ -0,0 +1,130 @@
|
||||
#pragma once
|
||||
#ifndef MERKLE_H
|
||||
#define MERKLE_H
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "utils/utils.h"
|
||||
#include "hash/hash.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
|
||||
using namespace hash;
|
||||
using matrix::Matrix;
|
||||
|
||||
/**
|
||||
* @namespace merkle_tree
|
||||
* Implementation of the [Merkle tree](https://en.wikipedia.org/wiki/Merkle_tree) builder,
|
||||
* parallelized for the use on GPU
|
||||
*/
|
||||
namespace merkle_tree {
|
||||
static constexpr size_t GIGA = 1024 * 1024 * 1024;
|
||||
|
||||
/// Bytes per stream
|
||||
static constexpr uint64_t STREAM_CHUNK_SIZE = GIGA;
|
||||
|
||||
/// Flattens the tree digests and sum them up to get
|
||||
/// the memory needed to contain all the digests
|
||||
static size_t get_digests_len(uint32_t height, uint32_t arity, uint32_t digest_elements)
|
||||
{
|
||||
size_t digests_len = 0;
|
||||
size_t row_length = digest_elements;
|
||||
for (int i = 0; i <= height; i++) {
|
||||
digests_len += row_length;
|
||||
row_length *= arity;
|
||||
}
|
||||
|
||||
return digests_len;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void swap(T** r, T** s)
|
||||
{
|
||||
T* t = *r;
|
||||
*r = *s;
|
||||
*s = t;
|
||||
}
|
||||
|
||||
static unsigned int get_height(uint64_t number_of_elements)
|
||||
{
|
||||
unsigned int height = 0;
|
||||
while (number_of_elements >>= 1)
|
||||
++height;
|
||||
return height;
|
||||
}
|
||||
|
||||
/**
|
||||
* @struct TreeBuilderConfig
|
||||
* Struct that encodes various Tree builder parameters.
|
||||
*/
|
||||
struct TreeBuilderConfig {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
unsigned int arity;
|
||||
unsigned int
|
||||
keep_rows; /**< How many rows of the Merkle tree rows should be written to output. '0' means all of them */
|
||||
unsigned int
|
||||
digest_elements; /** @param digest_elements the size of output for each bottom layer hash and compression.
|
||||
* Will also be equal to the size of the root of the tree. Default value 1 */
|
||||
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool
|
||||
are_outputs_on_device; /**< True if outputs are on device and false if they're on host. Default value: false. */
|
||||
bool is_async; /**< Whether to run the tree builder asynchronously. If set to `true`, the build_merkle_tree
|
||||
* function will be non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the
|
||||
* function will block the current CPU thread. */
|
||||
};
|
||||
|
||||
static TreeBuilderConfig
|
||||
default_merkle_config(const device_context::DeviceContext& ctx = device_context::get_default_device_context())
|
||||
{
|
||||
TreeBuilderConfig config = {
|
||||
ctx, // ctx
|
||||
2, // arity
|
||||
0, // keep_rows
|
||||
1, // digest_elements
|
||||
false, // are_inputes_on_device
|
||||
false, // are_outputs_on_device
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the Merkle tree
|
||||
*
|
||||
* @param leaves a pointer to the leaves layer. May be allocated on device or on host, regulated by the config
|
||||
* Expected to have arity ^ (height) * input_block_len elements
|
||||
* @param digests a pointer to the digests storage. May only be allocated on the host
|
||||
* Expected to have `sum(digests_len * (arity ^ (i))) for i in [0..keep_rows]`
|
||||
* @param height the height of the merkle tree
|
||||
* @param input_block_len the size of input vectors at the bottom layer of the tree
|
||||
* # Algorithm
|
||||
* The function will split large tree into many subtrees of size that will fit `STREAM_CHUNK_SIZE`.
|
||||
* Each subtree is build in it's own stream (there is a maximum number of streams)
|
||||
* After all subtrees are constructed - the function will combine the resulting sub-digests into the final top-tree
|
||||
*/
|
||||
template <typename Leaf, typename Digest>
|
||||
cudaError_t build_merkle_tree(
|
||||
const Leaf* inputs,
|
||||
Digest* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const Hasher<Leaf, Digest>& compression,
|
||||
const Hasher<Leaf, Digest>& bottom_layer,
|
||||
const TreeBuilderConfig& config);
|
||||
|
||||
template <typename Leaf, typename Digest>
|
||||
cudaError_t mmcs_commit(
|
||||
const Matrix<Leaf>* inputs,
|
||||
const unsigned int number_of_inputs,
|
||||
Digest* digests,
|
||||
const Hasher<Leaf, Digest>& hasher,
|
||||
const Hasher<Leaf, Digest>& compression,
|
||||
const TreeBuilderConfig& tree_config);
|
||||
} // namespace merkle_tree
|
||||
|
||||
#endif
|
||||
@@ -43,7 +43,7 @@ namespace msm {
|
||||
* points, it should be set to the product of MSM size and [batch_size](@ref
|
||||
* batch_size). Default value: 0 (meaning it's equal to the MSM size). */
|
||||
int precompute_factor; /**< The number of extra points to pre-compute for each point. See the
|
||||
* [precompute_msm_bases](@ref precompute_msm_bases) function, `precompute_factor` passed
|
||||
* [precompute_msm_points](@ref precompute_msm_points) function, `precompute_factor` passed
|
||||
* there needs to be equal to the one used here. Larger values decrease the
|
||||
* number of computations to make, on-line memory footprint, but increase the static
|
||||
* memory footprint. Default value: 1 (i.e. don't pre-compute). */
|
||||
@@ -52,7 +52,7 @@ namespace msm {
|
||||
* means more on-line memory footprint but also more parallelism and less computational
|
||||
* complexity (up to a certain point). Currently pre-computation is independent of
|
||||
* \f$ c \f$, however in the future value of \f$ c \f$ here and the one passed into the
|
||||
* [precompute_msm_bases](@ref precompute_msm_bases) function will need to be identical.
|
||||
* [precompute_msm_points](@ref precompute_msm_points) function will need to be identical.
|
||||
* Default value: 0 (the optimal value of \f$ c \f$ is chosen automatically). */
|
||||
int bitsize; /**< Number of bits of the largest scalar. Typically equals the bitsize of scalar field,
|
||||
* but if a different (better) upper bound is known, it should be reflected in this
|
||||
@@ -127,6 +127,26 @@ namespace msm {
|
||||
template <typename S, typename A, typename P>
|
||||
cudaError_t msm(const S* scalars, const A* points, int msm_size, MSMConfig& config, P* results);
|
||||
|
||||
/**
|
||||
* A function that precomputes MSM bases by extending them with their shifted copies.
|
||||
* e.g.:
|
||||
* Original points: \f$ P_0, P_1, P_2, ... P_{size} \f$
|
||||
* Extended points: \f$ P_0, P_1, P_2, ... P_{size}, 2^{l}P_0, 2^{l}P_1, ..., 2^{l}P_{size},
|
||||
* 2^{2l}P_0, 2^{2l}P_1, ..., 2^{2cl}P_{size}, ... \f$
|
||||
* @param points Points \f$ P_i \f$. In case of batch MSM, all *unique* points are concatenated.
|
||||
* @param msm_size MSM size \f$ N \f$. If a batch of MSMs (which all need to have the same size) is computed, this is
|
||||
* the size of 1 MSM.
|
||||
* @param config [MSMConfig](@ref MSMConfig) used in this MSM.
|
||||
* @param output_points Device-allocated buffer of size config.points_size * precompute_factor for the extended
|
||||
* points.
|
||||
* @tparam A The type of points \f$ \{P_i\} \f$ which is typically an [affine
|
||||
* Weierstrass](https://hyperelliptic.org/EFD/g1p/auto-shortw.html) point.
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*
|
||||
*/
|
||||
template <typename A, typename P>
|
||||
cudaError_t precompute_msm_points(A* points, int msm_size, msm::MSMConfig& config, A* output_points);
|
||||
|
||||
/**
|
||||
* A function that precomputes MSM bases by extending them with their shifted copies.
|
||||
* e.g.:
|
||||
@@ -148,7 +168,7 @@ namespace msm {
|
||||
*
|
||||
*/
|
||||
template <typename A, typename P>
|
||||
cudaError_t precompute_msm_bases(
|
||||
[[deprecated("Use precompute_msm_points instead.")]] cudaError_t precompute_msm_bases(
|
||||
A* bases,
|
||||
int bases_size,
|
||||
int precompute_factor,
|
||||
|
||||
114
icicle/include/poseidon/constants.cuh
Normal file
114
icicle/include/poseidon/constants.cuh
Normal file
@@ -0,0 +1,114 @@
|
||||
#pragma once
|
||||
#ifndef POSEIDON_CONSTANTS_H
|
||||
#define POSEIDON_CONSTANTS_H
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace poseidon {
|
||||
#define FIRST_FULL_ROUNDS true
|
||||
#define SECOND_FULL_ROUNDS false
|
||||
|
||||
/**
|
||||
* For most of the Poseidon configurations this is the case
|
||||
* TODO: Add support for different full rounds numbers
|
||||
*/
|
||||
const int FULL_ROUNDS_DEFAULT = 4;
|
||||
|
||||
/**
|
||||
* @struct PoseidonConstants
|
||||
* This constants are enough to define a Poseidon instantce
|
||||
* @param round_constants A pointer to round constants allocated on the device
|
||||
* @param mds_matrix A pointer to an mds matrix allocated on the device
|
||||
* @param non_sparse_matrix A pointer to non sparse matrix allocated on the device
|
||||
* @param sparse_matrices A pointer to sparse matrices allocated on the device
|
||||
*/
|
||||
template <typename S>
|
||||
struct PoseidonConstants {
|
||||
unsigned int arity;
|
||||
unsigned int alpha;
|
||||
unsigned int partial_rounds;
|
||||
unsigned int full_rounds_half;
|
||||
S* round_constants = nullptr;
|
||||
S* mds_matrix = nullptr;
|
||||
S* non_sparse_matrix = nullptr;
|
||||
S* sparse_matrices = nullptr;
|
||||
S domain_tag = S::zero();
|
||||
|
||||
PoseidonConstants() = default;
|
||||
PoseidonConstants(const PoseidonConstants& other) = default;
|
||||
|
||||
PoseidonConstants<S>& operator=(PoseidonConstants<S> const& other)
|
||||
{
|
||||
this->arity = other.arity;
|
||||
this->alpha = other.alpha;
|
||||
this->partial_rounds = other.partial_rounds;
|
||||
this->full_rounds_half = other.full_rounds_half;
|
||||
this->round_constants = other.round_constants;
|
||||
this->mds_matrix = other.mds_matrix;
|
||||
this->non_sparse_matrix = other.non_sparse_matrix;
|
||||
this->sparse_matrices = other.sparse_matrices;
|
||||
this->domain_tag = other.domain_tag;
|
||||
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @class PoseidonKernelsConfiguration
|
||||
* Describes the logic of deriving CUDA kernels parameters
|
||||
* such as the number of threads and the number of blocks
|
||||
*/
|
||||
class PoseidonKernelsConfiguration
|
||||
{
|
||||
public:
|
||||
// The logic behind this is that 1 thread only works on 1 element
|
||||
// We have {width} elements in each state, and {number_of_states} states total
|
||||
static int number_of_threads(unsigned int width) { return 256 / width * width; }
|
||||
|
||||
// The partial rounds operates on the whole state, so we define
|
||||
// the parallelism params for processing a single hash preimage per thread
|
||||
static const int singlehash_block_size = 128;
|
||||
|
||||
static int hashes_per_block(unsigned int width) { return number_of_threads(width) / width; }
|
||||
|
||||
static int number_of_full_blocks(unsigned int width, size_t number_of_states)
|
||||
{
|
||||
int total_number_of_threads = number_of_states * width;
|
||||
return total_number_of_threads / number_of_threads(width) +
|
||||
static_cast<bool>(total_number_of_threads % number_of_threads(width));
|
||||
}
|
||||
|
||||
static int number_of_singlehash_blocks(size_t number_of_states)
|
||||
{
|
||||
return number_of_states / singlehash_block_size + static_cast<bool>(number_of_states % singlehash_block_size);
|
||||
}
|
||||
};
|
||||
|
||||
using PKC = PoseidonKernelsConfiguration;
|
||||
|
||||
template <typename S>
|
||||
cudaError_t create_optimized_poseidon_constants(
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const S* round_constants,
|
||||
const S* mds_matrix,
|
||||
const S* non_sparse_matrix,
|
||||
const S* sparse_matrices,
|
||||
const S domain_tag,
|
||||
PoseidonConstants<S>* poseidon_constants,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
/**
|
||||
* Loads pre-calculated optimized constants, moves them to the device
|
||||
*/
|
||||
template <typename S>
|
||||
cudaError_t
|
||||
init_optimized_poseidon_constants(int arity, device_context::DeviceContext& ctx, PoseidonConstants<S>* constants);
|
||||
|
||||
template <typename S>
|
||||
cudaError_t release_optimized_poseidon_constants(PoseidonConstants<S>* constants, device_context::DeviceContext& ctx);
|
||||
} // namespace poseidon
|
||||
|
||||
#endif
|
||||
@@ -8,17 +8,18 @@ import numpy as np
|
||||
from poseidon import round_constants as rc, round_numbers as rn
|
||||
|
||||
# Modify these
|
||||
arity = 11
|
||||
p = 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 # grumpkin
|
||||
arity = 2
|
||||
p = 2 ** 31 - 1 # grumpkin
|
||||
# p = 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 # grumpkin
|
||||
# p = 0x73EDA753299D7D483339D80809A1D80553BDA402FFFE5BFEFFFFFFFF00000001 # bls12-381
|
||||
# p = 0x12ab655e9a2ca55660b44d1e5c37b00159aa76fed00000010a11800000000001 # bls12-377
|
||||
# p = 0x30644e72e131a029b85045b68181585d2833e84879b9709143e1f593f0000001 # bn254
|
||||
# p = 0x1ae3a4617c510eac63b05c06ca1493b1a22d9f300f5138f1ef3622fba094800170b5d44300000008508c00000000001 # bw6-761
|
||||
prime_bit_len = 255
|
||||
field_bytes = 32
|
||||
prime_bit_len = 31
|
||||
field_bytes = 4
|
||||
|
||||
# leave set to -1 if not sure
|
||||
full_round = -1
|
||||
full_round = 8
|
||||
half_full_round = full_round // 2
|
||||
# leave set to -1 if not sure
|
||||
partial_round = -1
|
||||
@@ -31,12 +32,12 @@ security_level = 128
|
||||
# F = GF(p)
|
||||
# F.primitive_element()
|
||||
#
|
||||
# primitive_element = None
|
||||
primitive_element = None
|
||||
# primitive_element = 7 # bls12-381
|
||||
# primitive_element = 22 # bls12-377
|
||||
# primitive_element = 5 # bn254
|
||||
# primitive_element = 15 # bw6-761
|
||||
primitive_element = 3 # grumpkin
|
||||
# primitive_element = 3 # grumpkin
|
||||
|
||||
# currently we only support alpha 5, if you need alpha other than 5 - feal free to reach out
|
||||
alpha = 5
|
||||
|
||||
508
icicle/include/poseidon/constants/m31_poseidon.h
Normal file
508
icicle/include/poseidon/constants/m31_poseidon.h
Normal file
@@ -0,0 +1,508 @@
|
||||
#pragma once
|
||||
#ifndef M31_POSEIDON_H
|
||||
#define M31_POSEIDON_H
|
||||
|
||||
namespace poseidon_constants_m31 {
|
||||
/**
|
||||
* This inner namespace contains optimized constants for running Poseidon.
|
||||
* These constants were generated using an algorithm defined at
|
||||
* https://spec.filecoin.io/algorithms/crypto/poseidon/
|
||||
* The number in the name corresponds to the arity of hash function
|
||||
* Each array contains:
|
||||
* RoundConstants | MDSMatrix | Non-sparse matrix | Sparse matrices
|
||||
*/
|
||||
|
||||
int partial_rounds_2 = 7;
|
||||
|
||||
int partial_rounds_4 = 11;
|
||||
|
||||
int partial_rounds_8 = 12;
|
||||
|
||||
int partial_rounds_11 = 12;
|
||||
|
||||
unsigned char poseidon_constants_2[] = {
|
||||
0x33, 0x8b, 0x6d, 0x47, 0xbb, 0x97, 0x11, 0x67, 0x92, 0x9d, 0x55, 0x2d,
|
||||
0xee, 0x1e, 0x2e, 0x45, 0xfe, 0x35, 0x0e, 0x25, 0x7e, 0xc3, 0x4f, 0x70,
|
||||
0x4d, 0x0a, 0x8c, 0x18, 0xd9, 0x43, 0xa4, 0x61, 0xfb, 0x14, 0xd9, 0x14,
|
||||
0x99, 0x13, 0xb9, 0x30, 0xec, 0x3b, 0x8c, 0x16, 0xcc, 0xb2, 0x0b, 0x2e,
|
||||
0x9e, 0x18, 0xbf, 0x26, 0xb6, 0xb7, 0x2a, 0x44, 0x61, 0x29, 0xdb, 0x21,
|
||||
0x18, 0x84, 0x03, 0x4e, 0xef, 0x95, 0xf9, 0x45, 0xe3, 0xd8, 0xf2, 0x46,
|
||||
0x82, 0xb4, 0xc9, 0x5e, 0x5f, 0xf3, 0xb2, 0x4f, 0x61, 0x80, 0x50, 0x0f,
|
||||
0x0d, 0x7f, 0xe3, 0x1b, 0x23, 0xbd, 0x05, 0x2f, 0x0f, 0xb1, 0x60, 0x67,
|
||||
0xd8, 0x85, 0xdf, 0x57, 0x0c, 0x8c, 0xdf, 0x50, 0x9e, 0x65, 0x3c, 0x58,
|
||||
0x07, 0xbd, 0x29, 0x7e, 0xc5, 0xe5, 0xa7, 0x5a, 0x5a, 0x4b, 0x0c, 0x29,
|
||||
0x89, 0x9d, 0x14, 0x11, 0x8c, 0x20, 0xcb, 0x76, 0x4d, 0x56, 0x2d, 0x4a,
|
||||
0x10, 0xda, 0xaf, 0x0a, 0x65, 0x9d, 0x98, 0x3e, 0xa1, 0xac, 0x57, 0x46,
|
||||
0xcb, 0xe8, 0xfc, 0x5b, 0xd4, 0x43, 0x4b, 0x63, 0x1b, 0x13, 0x4b, 0x1f,
|
||||
0xed, 0xac, 0xbf, 0x30, 0x27, 0x15, 0xac, 0x53, 0x4b, 0x27, 0x61, 0x3e,
|
||||
0x37, 0xc3, 0x65, 0x74, 0x55, 0x55, 0x55, 0x55, 0x00, 0x00, 0x00, 0x20,
|
||||
0x33, 0x33, 0x33, 0x33, 0x00, 0x00, 0x00, 0x20, 0x33, 0x33, 0x33, 0x33,
|
||||
0xaa, 0xaa, 0xaa, 0x6a, 0x33, 0x33, 0x33, 0x33, 0xaa, 0xaa, 0xaa, 0x6a,
|
||||
0x6d, 0xdb, 0xb6, 0x6d, 0x55, 0x55, 0x55, 0x55, 0xc0, 0x72, 0x8d, 0x36,
|
||||
0x2c, 0xe5, 0xc0, 0x51, 0x00, 0x00, 0x00, 0x20, 0x0b, 0xd5, 0x67, 0x6c,
|
||||
0x6c, 0x67, 0x2c, 0x13, 0x33, 0x33, 0x33, 0x33, 0x6c, 0x67, 0x2c, 0x13,
|
||||
0xe6, 0xb8, 0x2c, 0x62, 0x55, 0x55, 0x55, 0x55, 0x15, 0x1f, 0xaf, 0x6a,
|
||||
0xd9, 0xa8, 0x14, 0x44, 0xae, 0xb0, 0x38, 0x4b, 0x17, 0x76, 0xd9, 0x39,
|
||||
0x55, 0x55, 0x55, 0x55, 0x28, 0xef, 0x9d, 0x4f, 0xc7, 0x3b, 0xa6, 0x24,
|
||||
0x84, 0x5b, 0x79, 0x6f, 0xde, 0x4f, 0x8f, 0x3d, 0x55, 0x55, 0x55, 0x55,
|
||||
0x54, 0xc2, 0xb2, 0x00, 0x5a, 0xed, 0x68, 0x0c, 0xeb, 0xd4, 0xc4, 0x61,
|
||||
0x02, 0x8c, 0x85, 0x27, 0x55, 0x55, 0x55, 0x55, 0xe4, 0xc5, 0xbd, 0x0a,
|
||||
0xf6, 0xec, 0x75, 0x26, 0xe0, 0xdb, 0xd8, 0x52, 0xdf, 0x28, 0xff, 0x33,
|
||||
0x55, 0x55, 0x55, 0x55, 0xac, 0x68, 0x06, 0x00, 0xc9, 0xff, 0x91, 0x19,
|
||||
0xb1, 0x12, 0x2b, 0x19, 0xa2, 0xdd, 0x47, 0x39, 0x55, 0x55, 0x55, 0x55,
|
||||
0xd5, 0x03, 0x00, 0x00, 0x45, 0xc8, 0xcc, 0x4c, 0x55, 0x55, 0x55, 0x35,
|
||||
0x8d, 0xd6, 0x68, 0x3d, 0x55, 0x55, 0x55, 0x55, 0x03, 0x00, 0x00, 0x00,
|
||||
0x64, 0x66, 0x66, 0x26, 0x00, 0x00, 0x00, 0x20, 0x33, 0x33, 0x33, 0x33
|
||||
};
|
||||
|
||||
unsigned char poseidon_constants_4[] = {
|
||||
0xdb, 0x64, 0xa5, 0x32, 0xd6, 0x3d, 0x12, 0x6e, 0x65, 0x66, 0x46, 0x59,
|
||||
0x2a, 0x64, 0x51, 0x3b, 0xaf, 0xbe, 0x72, 0x0b, 0x66, 0x5f, 0x5c, 0x6c,
|
||||
0x66, 0x11, 0x8c, 0x61, 0x99, 0x24, 0x99, 0x14, 0x1d, 0x5f, 0x67, 0x0a,
|
||||
0x4d, 0xab, 0xc4, 0x1e, 0x43, 0xb2, 0x09, 0x58, 0xc0, 0x27, 0x4c, 0x5b,
|
||||
0xf0, 0x0c, 0xf5, 0x12, 0xc9, 0x2f, 0x88, 0x4f, 0x59, 0x52, 0x5b, 0x6a,
|
||||
0x73, 0x90, 0x55, 0x5b, 0xaf, 0x47, 0x55, 0x0d, 0xa7, 0xc2, 0x0c, 0x6e,
|
||||
0xe6, 0xd6, 0x4e, 0x30, 0x9e, 0x75, 0x47, 0x12, 0xca, 0x93, 0xd1, 0x5b,
|
||||
0x64, 0x27, 0xfc, 0x60, 0x6c, 0x16, 0x52, 0x20, 0xf5, 0xe0, 0x01, 0x15,
|
||||
0x27, 0xf9, 0x96, 0x7f, 0xa0, 0x38, 0xad, 0x3c, 0x95, 0xd3, 0xe4, 0x32,
|
||||
0x57, 0x95, 0x5a, 0x6b, 0x12, 0xcc, 0xdc, 0x18, 0x2b, 0xdd, 0xa4, 0x66,
|
||||
0xbf, 0xe7, 0x96, 0x15, 0x85, 0x87, 0x6a, 0x1f, 0x15, 0x19, 0x9c, 0x65,
|
||||
0xef, 0x24, 0xaa, 0x2c, 0x3f, 0x6b, 0xbc, 0x6b, 0x54, 0x24, 0x2c, 0x17,
|
||||
0xf1, 0x7a, 0x8d, 0x57, 0x90, 0xa4, 0xd4, 0x4a, 0x12, 0x06, 0x77, 0x6a,
|
||||
0xe8, 0x6b, 0xd9, 0x51, 0x80, 0x72, 0xa1, 0x31, 0xce, 0xa8, 0x59, 0x10,
|
||||
0x0c, 0x90, 0xd4, 0x10, 0x8e, 0x60, 0x54, 0x1c, 0xe7, 0xfd, 0x42, 0x3a,
|
||||
0x73, 0xc1, 0xcc, 0x4f, 0x58, 0xbb, 0x99, 0x7c, 0xd2, 0x51, 0xda, 0x43,
|
||||
0xea, 0x6e, 0xe8, 0x16, 0xb2, 0x51, 0x53, 0x61, 0x7e, 0x68, 0x44, 0x3c,
|
||||
0x33, 0x33, 0x33, 0x33, 0xaa, 0xaa, 0xaa, 0x6a, 0x6d, 0xdb, 0xb6, 0x6d,
|
||||
0x00, 0x00, 0x00, 0x10, 0x71, 0x1c, 0xc7, 0x71, 0xaa, 0xaa, 0xaa, 0x6a,
|
||||
0x6d, 0xdb, 0xb6, 0x6d, 0x00, 0x00, 0x00, 0x10, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0x99, 0x99, 0x99, 0x59, 0x6d, 0xdb, 0xb6, 0x6d, 0x00, 0x00, 0x00, 0x10,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x99, 0x99, 0x99, 0x59, 0x45, 0x17, 0x5d, 0x74,
|
||||
0x00, 0x00, 0x00, 0x10, 0x71, 0x1c, 0xc7, 0x71, 0x99, 0x99, 0x99, 0x59,
|
||||
0x45, 0x17, 0x5d, 0x74, 0x55, 0x55, 0x55, 0x35, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0x99, 0x99, 0x99, 0x59, 0x45, 0x17, 0x5d, 0x74, 0x55, 0x55, 0x55, 0x35,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0x33, 0x33, 0x33, 0x33, 0xae, 0x9d, 0xba, 0x61,
|
||||
0x09, 0xf2, 0xee, 0x53, 0x5e, 0x5c, 0xe8, 0x61, 0x8e, 0x1a, 0x60, 0x6c,
|
||||
0xaa, 0xaa, 0xaa, 0x6a, 0xff, 0x1a, 0xb7, 0x09, 0x1d, 0x84, 0x75, 0x5e,
|
||||
0x88, 0x5e, 0x36, 0x25, 0x6b, 0xd4, 0xdd, 0x65, 0x6d, 0xdb, 0xb6, 0x6d,
|
||||
0x1d, 0x84, 0x75, 0x5e, 0x10, 0x9d, 0x2d, 0x63, 0xa7, 0x62, 0xfc, 0x1f,
|
||||
0xe2, 0x43, 0x63, 0x14, 0x00, 0x00, 0x00, 0x10, 0x88, 0x5e, 0x36, 0x25,
|
||||
0xa7, 0x62, 0xfc, 0x1f, 0x47, 0xa0, 0x19, 0x6f, 0x48, 0x1f, 0x4e, 0x22,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x6b, 0xd4, 0xdd, 0x65, 0xe2, 0x43, 0x63, 0x14,
|
||||
0x48, 0x1f, 0x4e, 0x22, 0xb7, 0x4e, 0x73, 0x01, 0x33, 0x33, 0x33, 0x33,
|
||||
0x84, 0xdd, 0xf7, 0x08, 0x6f, 0xc5, 0x14, 0x63, 0xb6, 0x22, 0x01, 0x3d,
|
||||
0xcd, 0xab, 0x7d, 0x62, 0xac, 0x7e, 0x61, 0x57, 0x40, 0x6b, 0xc5, 0x45,
|
||||
0x77, 0xbc, 0x02, 0x18, 0x8c, 0x66, 0xda, 0x74, 0x33, 0x33, 0x33, 0x33,
|
||||
0x01, 0x9d, 0x33, 0x55, 0xed, 0x7d, 0x75, 0x63, 0x41, 0x92, 0x33, 0x76,
|
||||
0x6b, 0xd5, 0x10, 0x23, 0x1a, 0xc4, 0x49, 0x5b, 0x0c, 0x86, 0x5a, 0x60,
|
||||
0x23, 0xe5, 0xd8, 0x1c, 0x43, 0xe9, 0xe2, 0x0d, 0x33, 0x33, 0x33, 0x33,
|
||||
0x1b, 0x68, 0xec, 0x17, 0x0e, 0x3f, 0x34, 0x1a, 0xb0, 0x28, 0xe9, 0x6c,
|
||||
0xc0, 0xf7, 0x3e, 0x79, 0xdc, 0x08, 0x9e, 0x32, 0x45, 0xde, 0xea, 0x73,
|
||||
0x7a, 0xc4, 0xb4, 0x0d, 0x65, 0xb6, 0x61, 0x04, 0x33, 0x33, 0x33, 0x33,
|
||||
0x41, 0x01, 0x02, 0x6b, 0xd8, 0x62, 0x6b, 0x47, 0x47, 0xd9, 0x7e, 0x72,
|
||||
0x4f, 0x80, 0x31, 0x54, 0x8b, 0x5e, 0x3e, 0x26, 0x64, 0x16, 0xe2, 0x51,
|
||||
0xf4, 0xa6, 0xed, 0x35, 0xc3, 0xe9, 0xc5, 0x41, 0x33, 0x33, 0x33, 0x33,
|
||||
0xd5, 0x3f, 0xed, 0x11, 0xf5, 0x0f, 0x56, 0x41, 0xf6, 0x0d, 0xf3, 0x78,
|
||||
0xb0, 0x78, 0xa1, 0x7d, 0x5d, 0x33, 0xc4, 0x5e, 0xa6, 0xd9, 0x47, 0x4c,
|
||||
0x07, 0xc3, 0x30, 0x5a, 0x91, 0x10, 0x31, 0x20, 0x33, 0x33, 0x33, 0x33,
|
||||
0xa5, 0xec, 0xe5, 0x25, 0xe6, 0xa7, 0x4e, 0x01, 0xee, 0x3a, 0xe7, 0x62,
|
||||
0x02, 0xfd, 0xf9, 0x08, 0xdd, 0x91, 0x3f, 0x2d, 0xca, 0xbc, 0xb5, 0x2c,
|
||||
0x54, 0x9e, 0xd4, 0x78, 0x6b, 0x18, 0x94, 0x21, 0x33, 0x33, 0x33, 0x33,
|
||||
0xe6, 0xb3, 0xd2, 0x2e, 0x49, 0xdb, 0xa8, 0x52, 0x5f, 0x6a, 0x75, 0x59,
|
||||
0xd5, 0x45, 0x5c, 0x73, 0x40, 0xe4, 0xd8, 0x2a, 0x8c, 0xe6, 0xda, 0x50,
|
||||
0x5f, 0x4f, 0x18, 0x5d, 0xf4, 0xa4, 0xf4, 0x46, 0x33, 0x33, 0x33, 0x33,
|
||||
0x3e, 0x90, 0x5b, 0x3a, 0x55, 0x96, 0x22, 0x7c, 0xd9, 0x64, 0x36, 0x4e,
|
||||
0x0b, 0xec, 0x66, 0x65, 0xac, 0x55, 0xa9, 0x19, 0x50, 0x87, 0x49, 0x1a,
|
||||
0x1f, 0x78, 0x89, 0x36, 0x25, 0x2a, 0x06, 0x55, 0x33, 0x33, 0x33, 0x33,
|
||||
0x6b, 0xf1, 0x61, 0x67, 0x67, 0x00, 0xc5, 0x24, 0x9e, 0xd1, 0x94, 0x6f,
|
||||
0xbf, 0x8b, 0xaf, 0x2d, 0x69, 0x9c, 0xb7, 0x62, 0xf8, 0x0a, 0x43, 0x13,
|
||||
0x3c, 0xc0, 0x48, 0x3e, 0x9f, 0x3f, 0xa8, 0x2c, 0x33, 0x33, 0x33, 0x33,
|
||||
0x9d, 0x5b, 0xb2, 0x2b, 0x62, 0x05, 0x39, 0x20, 0x52, 0x1f, 0xe8, 0x05,
|
||||
0x1b, 0x24, 0xc0, 0x13, 0x11, 0x11, 0x11, 0x11, 0x9c, 0x6a, 0x35, 0x45,
|
||||
0xf6, 0x7f, 0x5c, 0x4c, 0x9f, 0xc4, 0x8f, 0x1f, 0x33, 0x33, 0x33, 0x33,
|
||||
0xb1, 0xaa, 0xaa, 0x2a, 0xcb, 0xb6, 0x6d, 0x5b, 0x34, 0x49, 0x92, 0x24,
|
||||
0x90, 0x65, 0x59, 0x56, 0xaa, 0xaa, 0xaa, 0x6a, 0x6d, 0xdb, 0xb6, 0x6d,
|
||||
0x00, 0x00, 0x00, 0x10, 0x71, 0x1c, 0xc7, 0x71
|
||||
};
|
||||
|
||||
unsigned char poseidon_constants_8[] = {
|
||||
0x90, 0xaf, 0x71, 0x3e, 0xa3, 0xbe, 0x5a, 0x30, 0xd4, 0x1b, 0x6f, 0x5d,
|
||||
0xeb, 0x36, 0x6b, 0x53, 0x14, 0xc0, 0x30, 0x13, 0xd5, 0xf8, 0x0b, 0x1c,
|
||||
0xa8, 0x66, 0xf1, 0x3c, 0xbd, 0x64, 0xa3, 0x6c, 0x06, 0x5e, 0x95, 0x7c,
|
||||
0xee, 0xc4, 0x0a, 0x0f, 0x37, 0x03, 0xba, 0x6d, 0x20, 0x85, 0xf1, 0x2c,
|
||||
0xee, 0x59, 0x21, 0x11, 0x42, 0xae, 0xb7, 0x3c, 0x73, 0xb4, 0xd6, 0x71,
|
||||
0x6a, 0x29, 0x40, 0x03, 0x86, 0xd8, 0x32, 0x68, 0x61, 0x62, 0x62, 0x32,
|
||||
0x44, 0x5d, 0xcc, 0x38, 0x76, 0x0f, 0xbc, 0x1f, 0xc9, 0x6e, 0x67, 0x1d,
|
||||
0x95, 0x35, 0x10, 0x79, 0x45, 0xaa, 0x0f, 0x7c, 0x73, 0xfa, 0x5d, 0x3f,
|
||||
0x53, 0xf2, 0xdc, 0x21, 0x37, 0xfa, 0x15, 0x04, 0xfd, 0x31, 0x3d, 0x5d,
|
||||
0x5d, 0xe6, 0x1d, 0x4a, 0xb3, 0x2b, 0xa2, 0x07, 0x2d, 0x48, 0x07, 0x2b,
|
||||
0x92, 0x1c, 0x31, 0x52, 0x6c, 0xd3, 0x32, 0x2f, 0x0f, 0xdd, 0x82, 0x7d,
|
||||
0x41, 0x0e, 0x81, 0x7e, 0x60, 0xfb, 0x49, 0x7b, 0xe5, 0x39, 0x3d, 0x75,
|
||||
0x6d, 0xcf, 0x02, 0x77, 0x0d, 0xf6, 0xf8, 0x0c, 0x43, 0xae, 0x62, 0x5e,
|
||||
0x26, 0x36, 0x9e, 0x3a, 0x10, 0xe3, 0x59, 0x4b, 0x3a, 0x59, 0x49, 0x73,
|
||||
0x31, 0x20, 0xb9, 0x40, 0x39, 0xed, 0xaf, 0x37, 0x6d, 0x5c, 0x4c, 0x6a,
|
||||
0xce, 0xca, 0xc4, 0x33, 0x53, 0x96, 0x92, 0x1d, 0xb2, 0xa1, 0xac, 0x65,
|
||||
0xbb, 0x43, 0xc4, 0x16, 0xf9, 0x38, 0x10, 0x67, 0x3d, 0xbb, 0x28, 0x7a,
|
||||
0x2b, 0x1e, 0x65, 0x36, 0x07, 0x14, 0x36, 0x3c, 0xcb, 0xdf, 0x03, 0x6b,
|
||||
0x03, 0x7b, 0xe6, 0x67, 0x79, 0x2a, 0x08, 0x47, 0xb7, 0x8f, 0x9c, 0x7e,
|
||||
0x54, 0xde, 0x08, 0x0a, 0xf8, 0x99, 0x24, 0x6f, 0x64, 0x78, 0x80, 0x5f,
|
||||
0x43, 0x76, 0x77, 0x40, 0x12, 0x62, 0x71, 0x10, 0x35, 0xf5, 0xdd, 0x0a,
|
||||
0x06, 0xff, 0x9b, 0x7b, 0xd8, 0x1a, 0xf3, 0x50, 0x1d, 0xc3, 0x8c, 0x60,
|
||||
0xe0, 0x61, 0xf5, 0x3d, 0xf9, 0xbf, 0xe4, 0x38, 0x78, 0xbf, 0x59, 0x0e,
|
||||
0xed, 0xc9, 0x4d, 0x0b, 0xb1, 0x7a, 0x10, 0x2b, 0x84, 0x27, 0x07, 0x70,
|
||||
0x5d, 0xc0, 0xa4, 0x7e, 0x9c, 0xf0, 0xf6, 0x69, 0x89, 0x6c, 0xc5, 0x39,
|
||||
0x4a, 0x7d, 0x5e, 0x26, 0x2f, 0x08, 0x9d, 0x05, 0xdc, 0x71, 0xec, 0x08,
|
||||
0x2b, 0xca, 0x68, 0x14, 0x42, 0xf6, 0xe6, 0x0a, 0x2f, 0xa5, 0x34, 0x6d,
|
||||
0x95, 0xaa, 0x80, 0x55, 0x23, 0x0f, 0x5f, 0x20, 0xbe, 0x4d, 0x0b, 0x20,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x99, 0x99, 0x99, 0x59, 0x45, 0x17, 0x5d, 0x74,
|
||||
0x55, 0x55, 0x55, 0x35, 0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x99, 0x99, 0x99, 0x59, 0x45, 0x17, 0x5d, 0x74, 0x55, 0x55, 0x55, 0x35,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11,
|
||||
0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78,
|
||||
0x45, 0x17, 0x5d, 0x74, 0x55, 0x55, 0x55, 0x35, 0xd8, 0x89, 0x9d, 0x58,
|
||||
0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c,
|
||||
0x55, 0x55, 0x55, 0x35, 0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11,
|
||||
0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79,
|
||||
0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32,
|
||||
0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a,
|
||||
0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x50, 0x05, 0xd7, 0x30, 0x09, 0x94, 0x4f, 0x13,
|
||||
0x11, 0x86, 0x4b, 0x61, 0x74, 0x8b, 0x94, 0x0e, 0x7e, 0x5d, 0x93, 0x27,
|
||||
0xeb, 0xb6, 0x4b, 0x61, 0x90, 0x3f, 0x9b, 0x7d, 0x10, 0xe9, 0x16, 0x06,
|
||||
0x99, 0x99, 0x99, 0x59, 0x4f, 0xf6, 0x15, 0x6b, 0x84, 0x8c, 0xe0, 0x5f,
|
||||
0x88, 0x9e, 0xb2, 0x08, 0x32, 0x36, 0xe3, 0x25, 0x64, 0x0a, 0xf5, 0x6f,
|
||||
0x80, 0xff, 0x8e, 0x6f, 0xcd, 0xb5, 0x72, 0x12, 0x90, 0xa2, 0x7a, 0x09,
|
||||
0x45, 0x17, 0x5d, 0x74, 0x84, 0x8c, 0xe0, 0x5f, 0xf5, 0x67, 0x02, 0x2d,
|
||||
0x71, 0x83, 0xf0, 0x55, 0x81, 0xa2, 0x81, 0x4b, 0xec, 0xff, 0xb0, 0x6b,
|
||||
0x17, 0x41, 0xd6, 0x36, 0xf3, 0x16, 0x58, 0x23, 0x49, 0x90, 0xa2, 0x17,
|
||||
0x55, 0x55, 0x55, 0x35, 0x88, 0x9e, 0xb2, 0x08, 0x71, 0x83, 0xf0, 0x55,
|
||||
0x27, 0x2a, 0xb0, 0x29, 0x0b, 0xe4, 0x53, 0x70, 0x7f, 0xeb, 0x60, 0x74,
|
||||
0xb9, 0x92, 0xa9, 0x4b, 0x51, 0x41, 0x0e, 0x56, 0x1b, 0xe4, 0x67, 0x43,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0x32, 0x36, 0xe3, 0x25, 0x81, 0xa2, 0x81, 0x4b,
|
||||
0x0b, 0xe4, 0x53, 0x70, 0x73, 0x99, 0xf0, 0x02, 0x1a, 0xf7, 0xe1, 0x40,
|
||||
0x18, 0xc4, 0x58, 0x3a, 0xcc, 0xf5, 0x0b, 0x18, 0xf0, 0x39, 0xab, 0x7a,
|
||||
0xb6, 0x6d, 0xdb, 0x76, 0x64, 0x0a, 0xf5, 0x6f, 0xec, 0xff, 0xb0, 0x6b,
|
||||
0x7f, 0xeb, 0x60, 0x74, 0x1a, 0xf7, 0xe1, 0x40, 0xf7, 0xfc, 0xbe, 0x7f,
|
||||
0xbf, 0x63, 0xc5, 0x05, 0x15, 0x3c, 0x9f, 0x2b, 0x9b, 0x77, 0xb0, 0x44,
|
||||
0x11, 0x11, 0x11, 0x11, 0x80, 0xff, 0x8e, 0x6f, 0x17, 0x41, 0xd6, 0x36,
|
||||
0xb9, 0x92, 0xa9, 0x4b, 0x18, 0xc4, 0x58, 0x3a, 0xbf, 0x63, 0xc5, 0x05,
|
||||
0x2f, 0x5c, 0x3c, 0x09, 0x25, 0xaf, 0xdf, 0x11, 0x21, 0x7d, 0x95, 0x58,
|
||||
0x00, 0x00, 0x00, 0x08, 0xcd, 0xb5, 0x72, 0x12, 0xf3, 0x16, 0x58, 0x23,
|
||||
0x51, 0x41, 0x0e, 0x56, 0xcc, 0xf5, 0x0b, 0x18, 0x15, 0x3c, 0x9f, 0x2b,
|
||||
0x25, 0xaf, 0xdf, 0x11, 0x38, 0x50, 0xe9, 0x16, 0x12, 0xb8, 0xc8, 0x17,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x90, 0xa2, 0x7a, 0x09, 0x49, 0x90, 0xa2, 0x17,
|
||||
0x1b, 0xe4, 0x67, 0x43, 0xf0, 0x39, 0xab, 0x7a, 0x9b, 0x77, 0xb0, 0x44,
|
||||
0x21, 0x7d, 0x95, 0x58, 0x12, 0xb8, 0xc8, 0x17, 0x5a, 0xfc, 0xf7, 0x5c,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0xdb, 0x50, 0x89, 0x38, 0x5f, 0x88, 0xe3, 0x32,
|
||||
0x8b, 0xb4, 0x3b, 0x6c, 0x95, 0x0a, 0xf1, 0x41, 0xe6, 0x0a, 0x52, 0x7d,
|
||||
0xd1, 0x0d, 0xb1, 0x57, 0x9b, 0xd2, 0xf4, 0x1d, 0x80, 0x17, 0xb2, 0x42,
|
||||
0x9c, 0x40, 0x6e, 0x2f, 0x63, 0xa7, 0x42, 0x77, 0xf9, 0x37, 0xd1, 0x43,
|
||||
0x98, 0xd1, 0xec, 0x50, 0x91, 0x26, 0xfa, 0x4e, 0x0c, 0x9e, 0xcc, 0x31,
|
||||
0x52, 0xf4, 0x20, 0x5d, 0x2a, 0x20, 0xeb, 0x1b, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0x54, 0x29, 0xf4, 0x4a, 0xde, 0x91, 0xf6, 0x54, 0x8b, 0xed, 0x18, 0x26,
|
||||
0x71, 0x24, 0x22, 0x34, 0xb7, 0xaf, 0x61, 0x27, 0x7a, 0x0a, 0x21, 0x7f,
|
||||
0x9f, 0xfe, 0xa1, 0x53, 0x26, 0x97, 0x6b, 0x5b, 0xf4, 0xea, 0xef, 0x4a,
|
||||
0x4b, 0x03, 0xa0, 0x7c, 0xe6, 0x64, 0x69, 0x47, 0x76, 0xf7, 0x2d, 0x0b,
|
||||
0x6f, 0xd5, 0x2c, 0x45, 0x52, 0xc1, 0x5c, 0x46, 0x25, 0x38, 0xab, 0x79,
|
||||
0x64, 0xed, 0xe7, 0x57, 0x71, 0x1c, 0xc7, 0x71, 0x94, 0xc2, 0xb7, 0x7f,
|
||||
0xaf, 0x0d, 0x61, 0x4c, 0xa3, 0x86, 0x8e, 0x45, 0xdc, 0x73, 0xe3, 0x77,
|
||||
0x71, 0xed, 0x21, 0x7d, 0x4b, 0x8e, 0xc7, 0x52, 0x39, 0x5d, 0x49, 0x1d,
|
||||
0x75, 0x35, 0xed, 0x09, 0xc6, 0x02, 0x3b, 0x22, 0xb8, 0x91, 0x07, 0x13,
|
||||
0x7f, 0xbf, 0x15, 0x7f, 0xb5, 0xbe, 0x0a, 0x5c, 0xbc, 0x75, 0x54, 0x61,
|
||||
0x6c, 0x2f, 0x28, 0x5f, 0xff, 0xf0, 0x7b, 0x67, 0x11, 0x8e, 0x70, 0x29,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0xe6, 0xfc, 0x29, 0x07, 0xbd, 0x0c, 0x4d, 0x5f,
|
||||
0x57, 0xb7, 0x87, 0x41, 0xec, 0x48, 0xda, 0x18, 0x78, 0x41, 0xb8, 0x6d,
|
||||
0xde, 0x7e, 0x47, 0x5a, 0x13, 0x03, 0xc5, 0x52, 0x2e, 0xee, 0xf3, 0x3f,
|
||||
0x06, 0xd0, 0xcd, 0x48, 0x77, 0x2a, 0xcd, 0x7e, 0x35, 0xee, 0x74, 0x63,
|
||||
0x3e, 0x26, 0x65, 0x64, 0x37, 0xa1, 0xfb, 0x7a, 0x03, 0x44, 0xa8, 0x70,
|
||||
0x2f, 0x03, 0x27, 0x1e, 0xb3, 0x02, 0x3e, 0x4a, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0xfd, 0xe1, 0xfe, 0x3c, 0x88, 0x1c, 0x36, 0x53, 0x36, 0x31, 0x5a, 0x32,
|
||||
0x88, 0x7b, 0xa6, 0x17, 0x40, 0x31, 0xe4, 0x0a, 0xb3, 0x70, 0x8f, 0x4f,
|
||||
0xc3, 0xa2, 0xd7, 0x06, 0x34, 0x9d, 0x4a, 0x71, 0x5b, 0xfa, 0x79, 0x25,
|
||||
0xe8, 0x6f, 0x05, 0x65, 0xc1, 0x4a, 0xee, 0x5c, 0x9a, 0xb2, 0x83, 0x05,
|
||||
0xb0, 0x89, 0x77, 0x2e, 0xc1, 0x56, 0x34, 0x08, 0x50, 0xf5, 0xde, 0x12,
|
||||
0xae, 0x68, 0xc2, 0x1b, 0x71, 0x1c, 0xc7, 0x71, 0xb3, 0x84, 0x6e, 0x4f,
|
||||
0xae, 0x74, 0x57, 0x4f, 0x56, 0xf3, 0xfc, 0x48, 0xfa, 0x73, 0xd7, 0x0e,
|
||||
0x8a, 0xc5, 0x35, 0x4d, 0xf6, 0x26, 0x15, 0x2a, 0xcf, 0xb5, 0x2d, 0x64,
|
||||
0xd1, 0x2a, 0x84, 0x43, 0xab, 0xc0, 0xec, 0x60, 0xa9, 0xbc, 0x09, 0x11,
|
||||
0xfd, 0x06, 0xea, 0x1e, 0xba, 0x29, 0x77, 0x6c, 0xb1, 0x37, 0xa5, 0x42,
|
||||
0x1c, 0x9b, 0x58, 0x37, 0xa8, 0xb7, 0xae, 0x3e, 0x6a, 0xf8, 0x63, 0x25,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x22, 0xa0, 0x75, 0x4e, 0x17, 0x33, 0x99, 0x7c,
|
||||
0x97, 0x97, 0x30, 0x04, 0xbc, 0x22, 0x6d, 0x7c, 0xb3, 0xd7, 0xd9, 0x56,
|
||||
0x4e, 0xef, 0x40, 0x5e, 0x02, 0x05, 0x51, 0x1e, 0x0c, 0x32, 0xb7, 0x06,
|
||||
0x41, 0x16, 0x80, 0x33, 0xc2, 0xdd, 0x8f, 0x18, 0x65, 0xa3, 0xe1, 0x4a,
|
||||
0xdb, 0xb4, 0x5d, 0x78, 0xf3, 0x99, 0x48, 0x3e, 0x04, 0x5b, 0xb9, 0x09,
|
||||
0xd2, 0x3d, 0x14, 0x05, 0x69, 0x50, 0xe9, 0x57, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0x0d, 0x72, 0x37, 0x6c, 0xe3, 0xd1, 0x57, 0x2f, 0x9e, 0xb7, 0xe1, 0x30,
|
||||
0x22, 0xce, 0xe5, 0x66, 0x45, 0x7b, 0x06, 0x0e, 0x06, 0x66, 0xdd, 0x11,
|
||||
0xef, 0xdf, 0x61, 0x52, 0x7d, 0xb9, 0xcf, 0x1e, 0x97, 0xbe, 0x55, 0x00,
|
||||
0x94, 0xcb, 0x50, 0x7c, 0xa0, 0x83, 0x1c, 0x57, 0xf3, 0x72, 0x8c, 0x40,
|
||||
0x07, 0x32, 0x39, 0x54, 0xe8, 0x5a, 0x10, 0x7b, 0x09, 0xc2, 0x02, 0x58,
|
||||
0xb0, 0xeb, 0x23, 0x51, 0x71, 0x1c, 0xc7, 0x71, 0xf0, 0xfd, 0x78, 0x2c,
|
||||
0xe7, 0xa8, 0x53, 0x7c, 0xdd, 0xf6, 0xa3, 0x2b, 0xa9, 0x51, 0xf4, 0x33,
|
||||
0x1d, 0x4d, 0x13, 0x0e, 0x53, 0x6b, 0xde, 0x6b, 0x48, 0x46, 0xa0, 0x01,
|
||||
0xbf, 0x74, 0xf2, 0x14, 0xe5, 0x99, 0x3d, 0x72, 0x37, 0x8e, 0xa9, 0x44,
|
||||
0x61, 0xed, 0xdd, 0x3b, 0x7c, 0x11, 0x28, 0x12, 0xd5, 0xd6, 0x27, 0x78,
|
||||
0x4e, 0xf8, 0xe4, 0x3d, 0xdc, 0x5c, 0x92, 0x0c, 0xea, 0x5b, 0xe2, 0x44,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x64, 0x55, 0xb2, 0x0d, 0x54, 0x7f, 0x64, 0x72,
|
||||
0x8e, 0xe1, 0x7b, 0x52, 0xf5, 0xe4, 0x20, 0x13, 0xd1, 0xd4, 0x5d, 0x4c,
|
||||
0x33, 0x3d, 0xb6, 0x55, 0x26, 0xed, 0xb0, 0x75, 0xa0, 0xf2, 0x72, 0x51,
|
||||
0x6b, 0xc5, 0x37, 0x23, 0x0d, 0x1d, 0xf5, 0x6f, 0xa6, 0x83, 0x5f, 0x3e,
|
||||
0x1e, 0xb5, 0x18, 0x23, 0xc8, 0x40, 0xae, 0x63, 0x68, 0x79, 0x8e, 0x56,
|
||||
0xb0, 0x33, 0x43, 0x08, 0x5b, 0xac, 0x52, 0x39, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0x9d, 0xf2, 0x00, 0x73, 0xf8, 0x96, 0xbb, 0x43, 0x5b, 0x59, 0xce, 0x07,
|
||||
0xbb, 0x11, 0xc8, 0x43, 0xde, 0xea, 0xb7, 0x34, 0x51, 0xbf, 0xa7, 0x2d,
|
||||
0x33, 0x35, 0xc2, 0x40, 0x1c, 0x81, 0x60, 0x63, 0x60, 0x0b, 0xb6, 0x60,
|
||||
0xbf, 0xb9, 0x38, 0x0c, 0x02, 0x54, 0x53, 0x20, 0xd9, 0xf9, 0xeb, 0x2f,
|
||||
0x7e, 0x5b, 0xdf, 0x58, 0x4b, 0x99, 0x8e, 0x04, 0x27, 0xb4, 0x18, 0x78,
|
||||
0xd6, 0x37, 0x16, 0x60, 0x71, 0x1c, 0xc7, 0x71, 0x74, 0x66, 0x66, 0x66,
|
||||
0xb2, 0xf1, 0x94, 0x20, 0xad, 0x2f, 0xba, 0x68, 0x6a, 0x33, 0xfe, 0x6e,
|
||||
0xa5, 0x51, 0xec, 0x44, 0xab, 0x05, 0x7e, 0x60, 0x48, 0x6b, 0xa5, 0x56,
|
||||
0x38, 0x3d, 0xc7, 0x24, 0x99, 0x99, 0x99, 0x59, 0x45, 0x17, 0x5d, 0x74,
|
||||
0x55, 0x55, 0x55, 0x35, 0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f
|
||||
};
|
||||
|
||||
unsigned char poseidon_constants_11[] = {
|
||||
0xb0, 0xf1, 0x1f, 0x2e, 0xf8, 0x8b, 0xb5, 0x07, 0x8d, 0xc4, 0xe1, 0x46,
|
||||
0x99, 0x23, 0x9f, 0x06, 0xcc, 0x64, 0x13, 0x45, 0x9e, 0xb1, 0xdf, 0x5f,
|
||||
0xfa, 0x8e, 0x0f, 0x6f, 0x33, 0xd8, 0xfe, 0x19, 0x0a, 0x25, 0x8b, 0x20,
|
||||
0xe1, 0x2c, 0xcc, 0x36, 0x17, 0x3f, 0x03, 0x05, 0xe1, 0x13, 0xce, 0x35,
|
||||
0xd4, 0xc9, 0xe7, 0x65, 0x1f, 0x7f, 0x2c, 0x7a, 0x93, 0x9f, 0x34, 0x19,
|
||||
0x4d, 0x22, 0xf2, 0x7f, 0x8e, 0xa8, 0xb0, 0x51, 0x22, 0x8c, 0x91, 0x30,
|
||||
0xa5, 0x9c, 0xff, 0x31, 0x0e, 0x04, 0xc9, 0x19, 0x69, 0x60, 0xee, 0x0f,
|
||||
0xc5, 0xa5, 0xeb, 0x6b, 0xb0, 0xa4, 0xaa, 0x5d, 0x1c, 0x4e, 0xeb, 0x73,
|
||||
0xec, 0x94, 0xb7, 0x15, 0xce, 0x64, 0x1c, 0x60, 0x3e, 0xa3, 0x6b, 0x4a,
|
||||
0x87, 0x7a, 0x25, 0x2f, 0xfc, 0xc3, 0x17, 0x20, 0x06, 0xb6, 0x22, 0x7d,
|
||||
0xca, 0xea, 0x8b, 0x3b, 0xf9, 0xca, 0xa4, 0x32, 0xd2, 0xb7, 0x2e, 0x01,
|
||||
0x4f, 0x31, 0xc9, 0x2f, 0x10, 0xbf, 0x41, 0x4c, 0xe6, 0xfe, 0xba, 0x49,
|
||||
0xe5, 0x89, 0xbb, 0x77, 0x7e, 0xe8, 0x83, 0x1c, 0x72, 0xe7, 0x26, 0x58,
|
||||
0x24, 0x90, 0x9d, 0x1e, 0xb3, 0x20, 0xc8, 0x64, 0x84, 0xa3, 0x21, 0x5d,
|
||||
0x06, 0x64, 0x30, 0x4b, 0x19, 0x35, 0x96, 0x1e, 0xd1, 0x86, 0x57, 0x4a,
|
||||
0xb3, 0x8e, 0xd6, 0x7d, 0xaf, 0xd1, 0xde, 0x3f, 0xa2, 0x2c, 0x32, 0x0a,
|
||||
0xbb, 0xea, 0x4a, 0x46, 0x64, 0x1b, 0x72, 0x14, 0x75, 0x85, 0x1b, 0x4d,
|
||||
0x11, 0x02, 0x5f, 0x6f, 0x06, 0xdd, 0xd3, 0x6f, 0xbc, 0xcc, 0x77, 0x2e,
|
||||
0xb7, 0x43, 0xf4, 0x19, 0x9d, 0x2c, 0x4b, 0x2b, 0x0c, 0x41, 0xb9, 0x02,
|
||||
0xdc, 0x14, 0x5a, 0x67, 0xd4, 0x56, 0xca, 0x45, 0x65, 0xd2, 0x7d, 0x17,
|
||||
0xcd, 0x91, 0xdd, 0x45, 0xd8, 0xa8, 0xd8, 0x4b, 0xc9, 0x2b, 0xf2, 0x35,
|
||||
0xc1, 0x81, 0x6c, 0x33, 0xbc, 0xf4, 0x4d, 0x04, 0xfd, 0xb0, 0x91, 0x2b,
|
||||
0xcf, 0xad, 0x39, 0x45, 0x35, 0xb2, 0xac, 0x2e, 0x2f, 0x13, 0xe3, 0x0b,
|
||||
0x40, 0x59, 0x33, 0x07, 0xe3, 0xa5, 0xa1, 0x4d, 0x0e, 0x79, 0x05, 0x4c,
|
||||
0x36, 0x9b, 0xf1, 0x7f, 0x90, 0x50, 0x46, 0x25, 0x87, 0x10, 0x24, 0x3f,
|
||||
0x52, 0x5d, 0xff, 0x18, 0xad, 0xed, 0x78, 0x52, 0x00, 0x9c, 0xfe, 0x66,
|
||||
0x22, 0x24, 0xe0, 0x62, 0x13, 0xe2, 0x6f, 0x67, 0xd9, 0xe3, 0x6c, 0x64,
|
||||
0x6b, 0xa6, 0xea, 0x53, 0x61, 0x56, 0x8a, 0x33, 0x81, 0x35, 0xe5, 0x0f,
|
||||
0x35, 0xc9, 0xf3, 0x59, 0xc2, 0xa8, 0x92, 0x73, 0x69, 0x66, 0x05, 0x70,
|
||||
0xa1, 0x5f, 0xec, 0x4e, 0x3d, 0x6b, 0xc0, 0x78, 0xa4, 0xcb, 0xfc, 0x7e,
|
||||
0x44, 0x8c, 0xc4, 0x1b, 0x25, 0x70, 0x8f, 0x27, 0x87, 0x76, 0x2d, 0x4f,
|
||||
0x70, 0xb0, 0xea, 0x7a, 0x92, 0x43, 0x8c, 0x00, 0xed, 0xfd, 0x3b, 0x23,
|
||||
0x69, 0x71, 0x8e, 0x49, 0x83, 0xc3, 0x4e, 0x37, 0xab, 0x18, 0xd9, 0x30,
|
||||
0x4d, 0x48, 0x5e, 0x7e, 0xbc, 0x5a, 0x1a, 0x24, 0x34, 0xed, 0x19, 0x57,
|
||||
0xf4, 0xf4, 0x0d, 0x02, 0x0c, 0x57, 0xde, 0x6d, 0x40, 0x39, 0x1f, 0x71,
|
||||
0x9c, 0xa1, 0xb0, 0x28, 0x2d, 0x05, 0xb9, 0x6b, 0x85, 0x7a, 0x4c, 0x47,
|
||||
0x55, 0x55, 0x55, 0x35, 0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11,
|
||||
0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a,
|
||||
0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a,
|
||||
0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32,
|
||||
0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c,
|
||||
0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a,
|
||||
0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a,
|
||||
0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57,
|
||||
0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25, 0xdb, 0xb6, 0x6d, 0x3b,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32,
|
||||
0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c,
|
||||
0x7b, 0x09, 0xed, 0x25, 0xdb, 0xb6, 0x6d, 0x3b, 0x61, 0xb9, 0xa7, 0x11,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a,
|
||||
0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25,
|
||||
0xdb, 0xb6, 0x6d, 0x3b, 0x61, 0xb9, 0xa7, 0x11, 0x88, 0x88, 0x88, 0x48,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a,
|
||||
0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57,
|
||||
0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25, 0xdb, 0xb6, 0x6d, 0x3b,
|
||||
0x61, 0xb9, 0xa7, 0x11, 0x88, 0x88, 0x88, 0x48, 0xbd, 0xf7, 0xde, 0x7b,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32,
|
||||
0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c,
|
||||
0x7b, 0x09, 0xed, 0x25, 0xdb, 0xb6, 0x6d, 0x3b, 0x61, 0xb9, 0xa7, 0x11,
|
||||
0x88, 0x88, 0x88, 0x48, 0xbd, 0xf7, 0xde, 0x7b, 0x00, 0x00, 0x00, 0x04,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a,
|
||||
0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25,
|
||||
0xdb, 0xb6, 0x6d, 0x3b, 0x61, 0xb9, 0xa7, 0x11, 0x88, 0x88, 0x88, 0x48,
|
||||
0xbd, 0xf7, 0xde, 0x7b, 0x00, 0x00, 0x00, 0x04, 0xc1, 0x07, 0x1f, 0x7c,
|
||||
0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57,
|
||||
0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25, 0xdb, 0xb6, 0x6d, 0x3b,
|
||||
0x61, 0xb9, 0xa7, 0x11, 0x88, 0x88, 0x88, 0x48, 0xbd, 0xf7, 0xde, 0x7b,
|
||||
0x00, 0x00, 0x00, 0x04, 0xc1, 0x07, 0x1f, 0x7c, 0x87, 0x87, 0x87, 0x47,
|
||||
0x55, 0x55, 0x55, 0x35, 0x7c, 0xec, 0xe8, 0x54, 0x5f, 0xc4, 0x1c, 0x7e,
|
||||
0x02, 0x38, 0x4e, 0x55, 0x86, 0x80, 0x6d, 0x71, 0xc3, 0xa8, 0x98, 0x4a,
|
||||
0x2b, 0xaa, 0x86, 0x63, 0x60, 0xd7, 0x4f, 0x2e, 0xb4, 0xac, 0xce, 0x78,
|
||||
0xbd, 0x1c, 0x4f, 0x55, 0x6b, 0x2c, 0x33, 0x64, 0x8c, 0x56, 0x30, 0x43,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0xdd, 0x29, 0xc3, 0x15, 0x02, 0x15, 0x5b, 0x4f,
|
||||
0xdc, 0xb9, 0x0c, 0x03, 0x9a, 0x8d, 0x4d, 0x53, 0x6e, 0xf2, 0x33, 0x15,
|
||||
0xed, 0x3f, 0x16, 0x06, 0x43, 0xab, 0x59, 0x54, 0x1a, 0x62, 0xcd, 0x3a,
|
||||
0xda, 0x77, 0xa8, 0x51, 0x42, 0x58, 0x05, 0x55, 0x39, 0xeb, 0xd1, 0x45,
|
||||
0xb6, 0x6d, 0xdb, 0x76, 0x02, 0x15, 0x5b, 0x4f, 0xb9, 0x5a, 0x8c, 0x36,
|
||||
0x9a, 0x63, 0x3e, 0x3c, 0xe6, 0x28, 0x72, 0x36, 0x51, 0x89, 0xdb, 0x3b,
|
||||
0xfa, 0xe0, 0x07, 0x07, 0x30, 0xb3, 0x56, 0x39, 0x91, 0x42, 0x86, 0x38,
|
||||
0xda, 0xd2, 0x8f, 0x67, 0x75, 0xca, 0x3e, 0x69, 0xe9, 0xd8, 0x07, 0x6f,
|
||||
0x11, 0x11, 0x11, 0x11, 0xdc, 0xb9, 0x0c, 0x03, 0x9a, 0x63, 0x3e, 0x3c,
|
||||
0x54, 0xdc, 0x52, 0x1f, 0xf3, 0xc8, 0xb6, 0x6b, 0x96, 0x31, 0xf8, 0x1b,
|
||||
0x20, 0xee, 0x0b, 0x07, 0x4c, 0x37, 0x80, 0x4b, 0x31, 0x99, 0xd0, 0x09,
|
||||
0xb8, 0xa5, 0x62, 0x5f, 0xa2, 0x72, 0xfb, 0x33, 0x11, 0xd8, 0x0e, 0x65,
|
||||
0x00, 0x00, 0x00, 0x08, 0x9a, 0x8d, 0x4d, 0x53, 0xe6, 0x28, 0x72, 0x36,
|
||||
0xf3, 0xc8, 0xb6, 0x6b, 0xef, 0x80, 0xab, 0x77, 0x4d, 0x49, 0x25, 0x2b,
|
||||
0x7e, 0x10, 0x08, 0x1b, 0x70, 0x22, 0x72, 0x66, 0x8b, 0xe6, 0x06, 0x3a,
|
||||
0x58, 0xb9, 0x7e, 0x02, 0x97, 0xf4, 0xc2, 0x4f, 0x6b, 0x9a, 0x68, 0x53,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x6e, 0xf2, 0x33, 0x15, 0x51, 0x89, 0xdb, 0x3b,
|
||||
0x96, 0x31, 0xf8, 0x1b, 0x4d, 0x49, 0x25, 0x2b, 0xe2, 0xe0, 0x5c, 0x64,
|
||||
0xb6, 0x1d, 0x73, 0x13, 0x38, 0x1b, 0xfd, 0x49, 0xe1, 0x2c, 0xce, 0x5d,
|
||||
0x2a, 0x6b, 0xb4, 0x17, 0x7e, 0xa9, 0x6e, 0x72, 0x2f, 0x77, 0x47, 0x79,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0xed, 0x3f, 0x16, 0x06, 0xfa, 0xe0, 0x07, 0x07,
|
||||
0x20, 0xee, 0x0b, 0x07, 0x7e, 0x10, 0x08, 0x1b, 0xb6, 0x1d, 0x73, 0x13,
|
||||
0xca, 0x4a, 0x44, 0x68, 0x1c, 0x93, 0xbc, 0x37, 0xfa, 0x14, 0x8b, 0x55,
|
||||
0xae, 0xe0, 0xac, 0x31, 0xcb, 0x04, 0x09, 0x46, 0x27, 0x8f, 0x96, 0x07,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0x43, 0xab, 0x59, 0x54, 0x30, 0xb3, 0x56, 0x39,
|
||||
0x4c, 0x37, 0x80, 0x4b, 0x70, 0x22, 0x72, 0x66, 0x38, 0x1b, 0xfd, 0x49,
|
||||
0x1c, 0x93, 0xbc, 0x37, 0xfb, 0xdd, 0xff, 0x41, 0x73, 0x22, 0xa8, 0x31,
|
||||
0xd4, 0xc3, 0x26, 0x2b, 0xe7, 0x8c, 0xce, 0x35, 0x03, 0x29, 0x9c, 0x43,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x1a, 0x62, 0xcd, 0x3a, 0x91, 0x42, 0x86, 0x38,
|
||||
0x31, 0x99, 0xd0, 0x09, 0x8b, 0xe6, 0x06, 0x3a, 0xe1, 0x2c, 0xce, 0x5d,
|
||||
0xfa, 0x14, 0x8b, 0x55, 0x73, 0x22, 0xa8, 0x31, 0xaf, 0x9f, 0x0d, 0x2d,
|
||||
0xd8, 0xf1, 0xd2, 0x43, 0x41, 0x60, 0x7a, 0x48, 0xca, 0xa1, 0x4c, 0x7c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xda, 0x77, 0xa8, 0x51, 0xda, 0xd2, 0x8f, 0x67,
|
||||
0xb8, 0xa5, 0x62, 0x5f, 0x58, 0xb9, 0x7e, 0x02, 0x2a, 0x6b, 0xb4, 0x17,
|
||||
0xae, 0xe0, 0xac, 0x31, 0xd4, 0xc3, 0x26, 0x2b, 0xd8, 0xf1, 0xd2, 0x43,
|
||||
0x38, 0xc4, 0xc5, 0x55, 0x39, 0x3d, 0x1f, 0x4c, 0x81, 0xa8, 0x99, 0x14,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0x42, 0x58, 0x05, 0x55, 0x75, 0xca, 0x3e, 0x69,
|
||||
0xa2, 0x72, 0xfb, 0x33, 0x97, 0xf4, 0xc2, 0x4f, 0x7e, 0xa9, 0x6e, 0x72,
|
||||
0xcb, 0x04, 0x09, 0x46, 0xe7, 0x8c, 0xce, 0x35, 0x41, 0x60, 0x7a, 0x48,
|
||||
0x39, 0x3d, 0x1f, 0x4c, 0xc3, 0x27, 0xbb, 0x1a, 0x86, 0xb4, 0x97, 0x00,
|
||||
0xc8, 0x42, 0x16, 0x32, 0x39, 0xeb, 0xd1, 0x45, 0xe9, 0xd8, 0x07, 0x6f,
|
||||
0x11, 0xd8, 0x0e, 0x65, 0x6b, 0x9a, 0x68, 0x53, 0x2f, 0x77, 0x47, 0x79,
|
||||
0x27, 0x8f, 0x96, 0x07, 0x03, 0x29, 0x9c, 0x43, 0xca, 0xa1, 0x4c, 0x7c,
|
||||
0x81, 0xa8, 0x99, 0x14, 0x86, 0xb4, 0x97, 0x00, 0x0c, 0xd8, 0x29, 0x37,
|
||||
0x55, 0x55, 0x55, 0x35, 0xcc, 0xab, 0xe7, 0x58, 0x82, 0xaa, 0xb7, 0x06,
|
||||
0x3c, 0x2a, 0x3d, 0x61, 0x45, 0xbd, 0xcc, 0x4b, 0xa9, 0x83, 0x44, 0x56,
|
||||
0x16, 0xe6, 0x58, 0x6e, 0x70, 0x4b, 0x3a, 0x44, 0xe2, 0x3b, 0x37, 0x60,
|
||||
0xf0, 0x3b, 0x41, 0x1e, 0x44, 0x40, 0x84, 0x5a, 0x63, 0x5d, 0x4d, 0x78,
|
||||
0x22, 0x80, 0xb3, 0x0f, 0xe0, 0x85, 0xec, 0x77, 0xe5, 0x3d, 0xda, 0x27,
|
||||
0x55, 0xf9, 0xfd, 0x44, 0x38, 0xa7, 0x0f, 0x0a, 0x2f, 0xec, 0xda, 0x34,
|
||||
0x24, 0xef, 0x00, 0x40, 0x54, 0x9a, 0x0b, 0x27, 0xf9, 0x85, 0xf4, 0x16,
|
||||
0x14, 0x1f, 0x17, 0x30, 0x1d, 0xb0, 0xdf, 0x31, 0x55, 0x55, 0x55, 0x35,
|
||||
0x98, 0x36, 0x7e, 0x31, 0xd0, 0xda, 0x0a, 0x16, 0xae, 0xb0, 0x6a, 0x00,
|
||||
0x0e, 0x7a, 0x7e, 0x6d, 0x93, 0x81, 0x4d, 0x21, 0x45, 0x5a, 0x4d, 0x20,
|
||||
0x42, 0x5d, 0xfd, 0x49, 0x28, 0xc5, 0xe2, 0x75, 0x45, 0x85, 0x03, 0x2c,
|
||||
0xfc, 0x78, 0x72, 0x15, 0x98, 0x9c, 0x88, 0x0b, 0xed, 0x8f, 0x6f, 0x2b,
|
||||
0x55, 0x75, 0x17, 0x5f, 0xe5, 0xed, 0x21, 0x52, 0x5a, 0x34, 0x10, 0x7d,
|
||||
0x42, 0x25, 0x57, 0x6a, 0xa4, 0xb2, 0xe6, 0x2e, 0x05, 0xa8, 0xc4, 0x17,
|
||||
0xff, 0x9c, 0x7f, 0x6f, 0x23, 0x64, 0x17, 0x44, 0x85, 0xa9, 0x6b, 0x46,
|
||||
0x66, 0x58, 0x1b, 0x3b, 0x55, 0x55, 0x55, 0x35, 0x55, 0xf6, 0xca, 0x06,
|
||||
0x68, 0x75, 0xa9, 0x55, 0x54, 0x44, 0x4f, 0x61, 0x65, 0x3b, 0x96, 0x37,
|
||||
0xa9, 0x89, 0xb6, 0x47, 0x70, 0x8a, 0x8d, 0x74, 0x09, 0x53, 0x9e, 0x5e,
|
||||
0x92, 0x56, 0x2b, 0x34, 0x3e, 0x9d, 0x12, 0x0a, 0x54, 0x98, 0xf8, 0x29,
|
||||
0xde, 0xa0, 0xdd, 0x11, 0x46, 0x3e, 0x0f, 0x70, 0xff, 0xee, 0x0d, 0x7c,
|
||||
0x48, 0xe0, 0xe1, 0x6d, 0xb6, 0x5a, 0x2f, 0x7c, 0xb1, 0xb2, 0xf7, 0x2f,
|
||||
0xda, 0x64, 0x33, 0x7e, 0x87, 0x48, 0x48, 0x7e, 0x95, 0x6c, 0xd5, 0x5c,
|
||||
0x26, 0x8f, 0xc9, 0x3e, 0xf9, 0x5e, 0x99, 0x38, 0xf5, 0x32, 0xc2, 0x66,
|
||||
0x55, 0x55, 0x55, 0x35, 0x7f, 0xb1, 0x0f, 0x47, 0xac, 0x5d, 0xec, 0x76,
|
||||
0xba, 0x59, 0xc4, 0x7f, 0xfb, 0xdc, 0x32, 0x46, 0xe8, 0x83, 0xe0, 0x0a,
|
||||
0xf4, 0xb8, 0x56, 0x36, 0x07, 0x4f, 0x7f, 0x29, 0x31, 0xb8, 0xf4, 0x2c,
|
||||
0x7e, 0x42, 0xbd, 0x3e, 0xf1, 0x9d, 0x40, 0x73, 0x51, 0xf1, 0xce, 0x31,
|
||||
0x35, 0x7b, 0x0e, 0x48, 0x9e, 0xb9, 0x6e, 0x3b, 0x37, 0x00, 0x57, 0x0c,
|
||||
0x15, 0x25, 0x74, 0x64, 0xdd, 0x39, 0x64, 0x5c, 0x0a, 0x5d, 0x08, 0x2b,
|
||||
0xf5, 0xe6, 0x0c, 0x3f, 0xe6, 0xce, 0x30, 0x2d, 0x27, 0xc4, 0x07, 0x19,
|
||||
0x82, 0xfb, 0x44, 0x08, 0x7b, 0x94, 0x23, 0x69, 0x55, 0x55, 0x55, 0x35,
|
||||
0xc7, 0xbe, 0xaf, 0x49, 0xa6, 0x9a, 0x26, 0x30, 0x7c, 0xb2, 0x66, 0x35,
|
||||
0xe4, 0x83, 0x46, 0x62, 0xe3, 0x1c, 0x23, 0x07, 0x36, 0x2e, 0xd3, 0x00,
|
||||
0xe2, 0x65, 0xc8, 0x51, 0x0c, 0x09, 0x5c, 0x74, 0x13, 0x94, 0xf9, 0x67,
|
||||
0x4e, 0x07, 0x26, 0x03, 0xba, 0xb4, 0x3a, 0x7f, 0x38, 0xb4, 0x7c, 0x6a,
|
||||
0x44, 0x7a, 0x1c, 0x7b, 0xeb, 0xf9, 0x8b, 0x0b, 0x16, 0xf8, 0x23, 0x36,
|
||||
0x7b, 0x89, 0x79, 0x44, 0x80, 0xfe, 0x33, 0x2a, 0x7d, 0x59, 0xe2, 0x1b,
|
||||
0x7b, 0xe1, 0xb0, 0x15, 0x21, 0xcb, 0x47, 0x77, 0x23, 0x1a, 0xc0, 0x14,
|
||||
0x5b, 0x86, 0x06, 0x2d, 0x55, 0x55, 0x55, 0x35, 0x04, 0xb5, 0x47, 0x27,
|
||||
0x1d, 0xb7, 0x22, 0x44, 0xcc, 0x9e, 0xce, 0x7d, 0xf2, 0x75, 0x78, 0x78,
|
||||
0x7b, 0x98, 0x99, 0x12, 0xbd, 0x34, 0xe4, 0x43, 0xf0, 0x0a, 0x96, 0x43,
|
||||
0xf1, 0x50, 0x1d, 0x0b, 0x86, 0x78, 0xc9, 0x59, 0xc7, 0x78, 0xec, 0x16,
|
||||
0x71, 0xaa, 0x0c, 0x56, 0xbf, 0x92, 0xe2, 0x3a, 0xb5, 0x6e, 0x2d, 0x18,
|
||||
0xe2, 0xc7, 0x31, 0x67, 0x10, 0xab, 0x9f, 0x27, 0x27, 0x1e, 0xf3, 0x69,
|
||||
0xaf, 0x57, 0x42, 0x4c, 0x4f, 0xb4, 0x30, 0x35, 0x00, 0x54, 0xb0, 0x4a,
|
||||
0xa2, 0x00, 0x2a, 0x4a, 0x3d, 0x49, 0x58, 0x73, 0xf9, 0x16, 0xb0, 0x01,
|
||||
0x55, 0x55, 0x55, 0x35, 0xe4, 0xd5, 0x3f, 0x2e, 0xee, 0x84, 0x47, 0x51,
|
||||
0x3f, 0x84, 0xb9, 0x6b, 0x49, 0xb9, 0xae, 0x57, 0x32, 0x5a, 0x04, 0x02,
|
||||
0xe1, 0x6a, 0xf1, 0x4b, 0x30, 0x53, 0xf1, 0x05, 0x29, 0x74, 0x75, 0x76,
|
||||
0x4a, 0x15, 0x5b, 0x5d, 0xe1, 0xaa, 0x15, 0x1b, 0x62, 0xf5, 0xe8, 0x76,
|
||||
0x03, 0xc1, 0xaa, 0x06, 0x13, 0x59, 0xc8, 0x40, 0x84, 0x49, 0xc8, 0x1f,
|
||||
0x85, 0x98, 0x55, 0x6b, 0xed, 0x38, 0x45, 0x17, 0xb8, 0xc7, 0xf7, 0x69,
|
||||
0xc3, 0x87, 0xd0, 0x17, 0x0a, 0x93, 0xb7, 0x35, 0xc2, 0x45, 0x75, 0x34,
|
||||
0x7a, 0x78, 0xff, 0x51, 0x26, 0xd2, 0x59, 0x13, 0x55, 0x55, 0x55, 0x35,
|
||||
0x48, 0x38, 0xf7, 0x6e, 0x4f, 0x7d, 0xc7, 0x70, 0x32, 0x5d, 0x5b, 0x7a,
|
||||
0x85, 0x35, 0x9c, 0x07, 0x40, 0x08, 0x30, 0x5c, 0x64, 0x69, 0x27, 0x7a,
|
||||
0x07, 0x34, 0x90, 0x6c, 0x6e, 0xa6, 0x8e, 0x70, 0xd4, 0xf2, 0xf7, 0x59,
|
||||
0x0f, 0x13, 0x17, 0x5d, 0xa8, 0xa9, 0x01, 0x29, 0xad, 0xfd, 0x9a, 0x77,
|
||||
0x3c, 0x77, 0xc7, 0x67, 0xd0, 0x43, 0xb1, 0x3f, 0x97, 0x76, 0xe4, 0x72,
|
||||
0xd4, 0x82, 0x9a, 0x25, 0xec, 0xef, 0xc3, 0x03, 0xdc, 0xf9, 0x94, 0x3f,
|
||||
0xa4, 0x76, 0x88, 0x5a, 0xb8, 0x0f, 0x03, 0x76, 0x58, 0x87, 0x42, 0x11,
|
||||
0x28, 0xb7, 0xb0, 0x1d, 0x55, 0x55, 0x55, 0x35, 0x2f, 0xe6, 0x44, 0x75,
|
||||
0xf3, 0x0b, 0xe8, 0x68, 0x59, 0x72, 0x1f, 0x16, 0x8c, 0xd0, 0xe3, 0x3c,
|
||||
0xcc, 0xfc, 0x77, 0x05, 0xd6, 0x4b, 0x48, 0x78, 0x51, 0x88, 0x4c, 0x5f,
|
||||
0x30, 0x43, 0x9c, 0x2f, 0x49, 0x72, 0xba, 0x01, 0xba, 0xae, 0xfe, 0x0b,
|
||||
0x94, 0x3f, 0xe7, 0x71, 0x9d, 0xfa, 0x37, 0x06, 0xfc, 0xa2, 0x99, 0x6f,
|
||||
0xe2, 0x0d, 0xcf, 0x4b, 0x63, 0x76, 0xec, 0x49, 0xa8, 0xb5, 0x84, 0x0b,
|
||||
0x84, 0xa3, 0x75, 0x4f, 0x5e, 0x56, 0xdd, 0x37, 0x1a, 0x7d, 0x6e, 0x34,
|
||||
0x95, 0x39, 0x80, 0x1e, 0x58, 0x2e, 0x22, 0x50, 0xd3, 0x46, 0x93, 0x1e,
|
||||
0x55, 0x55, 0x55, 0x35, 0xf5, 0x96, 0x5a, 0x5f, 0x9b, 0xc8, 0x58, 0x50,
|
||||
0x3e, 0x03, 0xab, 0x16, 0xd5, 0xc6, 0x4c, 0x7f, 0x3f, 0x82, 0xf6, 0x34,
|
||||
0x1c, 0x29, 0x22, 0x16, 0x40, 0xdb, 0xe7, 0x71, 0x8b, 0x8a, 0x4b, 0x55,
|
||||
0x45, 0xbf, 0xd1, 0x68, 0x4c, 0xbb, 0xe3, 0x43, 0x1b, 0x96, 0x28, 0x3d,
|
||||
0x36, 0x4f, 0xdb, 0x58, 0xa8, 0x39, 0xac, 0x38, 0xd3, 0xeb, 0x90, 0x18,
|
||||
0x2f, 0xb7, 0x06, 0x1a, 0x5a, 0x82, 0x53, 0x13, 0x77, 0xaf, 0xe0, 0x4d,
|
||||
0x9e, 0xe9, 0x39, 0x79, 0xb7, 0xf6, 0xa2, 0x3c, 0x41, 0x9d, 0x14, 0x59,
|
||||
0x01, 0x33, 0x36, 0x20, 0x15, 0xe0, 0xe4, 0x15, 0x55, 0x55, 0x55, 0x35,
|
||||
0x58, 0x48, 0x07, 0x36, 0x3f, 0x43, 0x1e, 0x05, 0x33, 0x9e, 0x14, 0x45,
|
||||
0x69, 0xc8, 0x16, 0x63, 0x5f, 0xab, 0x77, 0x26, 0xf4, 0x08, 0xb0, 0x2e,
|
||||
0xf8, 0x31, 0x79, 0x29, 0x37, 0xc9, 0x37, 0x28, 0x55, 0x62, 0xcc, 0x43,
|
||||
0xeb, 0x6b, 0xe4, 0x03, 0xfe, 0x82, 0x50, 0x20, 0x2d, 0xdf, 0xf2, 0x7d,
|
||||
0xba, 0x07, 0xe2, 0x0e, 0x88, 0x1e, 0x82, 0x2b, 0x87, 0x54, 0x26, 0x39,
|
||||
0xdd, 0xee, 0x3e, 0x0b, 0xdc, 0xbf, 0x93, 0x1a, 0x8a, 0xce, 0xa6, 0x39,
|
||||
0x5b, 0xaf, 0x8f, 0x00, 0x7a, 0xad, 0x27, 0x71, 0x1e, 0x76, 0xd8, 0x58,
|
||||
0x96, 0x36, 0xa3, 0x14, 0x55, 0x55, 0x55, 0x35, 0x76, 0x27, 0x76, 0x62,
|
||||
0xa4, 0x9f, 0x05, 0x5a, 0x41, 0x28, 0x49, 0x12, 0x24, 0x18, 0x49, 0x12,
|
||||
0x4f, 0xc2, 0xa5, 0x25, 0x0e, 0x0e, 0x3c, 0x3c, 0x01, 0xa7, 0x65, 0x00,
|
||||
0x92, 0x9e, 0x17, 0x36, 0xa1, 0x7a, 0x92, 0x27, 0xcf, 0x74, 0xba, 0x4d,
|
||||
0xcb, 0x6f, 0x66, 0x68, 0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32
|
||||
};
|
||||
} // namespace poseidon_constants
|
||||
#endif
|
||||
@@ -1,9 +1,13 @@
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#pragma once
|
||||
#ifndef POSEIDON_KERNELS_H
|
||||
#define POSEIDON_KERNELS_H
|
||||
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
#include "poseidon/constants.cuh"
|
||||
|
||||
namespace poseidon {
|
||||
template <typename S, int T>
|
||||
__global__ void prepare_poseidon_states(S* states, size_t number_of_states, S domain_tag, bool aligned)
|
||||
__global__ void prepare_poseidon_states(const S* input, S* states, unsigned int number_of_states, const S domain_tag)
|
||||
{
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int state_number = idx / T;
|
||||
@@ -16,27 +20,27 @@ namespace poseidon {
|
||||
if (element_number == 0) {
|
||||
prepared_element = domain_tag;
|
||||
} else {
|
||||
if (aligned) {
|
||||
prepared_element = states[idx];
|
||||
} else {
|
||||
prepared_element = states[idx - 1];
|
||||
}
|
||||
prepared_element = input[idx - state_number - 1];
|
||||
}
|
||||
|
||||
// We need __syncthreads here if the state is not aligned
|
||||
// because then we need to shift the vector [A, B, 0] -> [D, A, B]
|
||||
if (!aligned) { __syncthreads(); }
|
||||
|
||||
// Store element in state
|
||||
states[idx] = prepared_element;
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
DEVICE_INLINE S sbox_alpha_five(S element)
|
||||
DEVICE_INLINE S sbox_el(S element, const int alpha)
|
||||
{
|
||||
S result = S::sqr(element);
|
||||
result = S::sqr(result);
|
||||
return result * element;
|
||||
S result2 = S::sqr(element);
|
||||
switch (alpha) {
|
||||
case 3:
|
||||
return result2 * element;
|
||||
case 5:
|
||||
return S::sqr(result2) * element;
|
||||
case 7:
|
||||
return S::sqr(result2) * result2 * element;
|
||||
case 11:
|
||||
return S::sqr(S::sqr(result2)) * result2 * element;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
@@ -71,7 +75,7 @@ namespace poseidon {
|
||||
element = element + constants.round_constants[rc_offset + element_number];
|
||||
rc_offset += T;
|
||||
}
|
||||
element = sbox_alpha_five(element);
|
||||
element = sbox_el(element, constants.alpha);
|
||||
if (!skip_rc) { element = element + constants.round_constants[rc_offset + element_number]; }
|
||||
|
||||
// Multiply all the states by mds matrix
|
||||
@@ -111,7 +115,7 @@ namespace poseidon {
|
||||
__device__ S partial_round(S state[T], size_t rc_offset, int round_number, const PoseidonConstants<S>& constants)
|
||||
{
|
||||
S element = state[0];
|
||||
element = sbox_alpha_five(element);
|
||||
element = sbox_el(element, constants.alpha);
|
||||
element = element + constants.round_constants[rc_offset];
|
||||
|
||||
S* sparse_matrix = &constants.sparse_matrices[(T * 2 - 1) * round_number];
|
||||
@@ -155,22 +159,58 @@ namespace poseidon {
|
||||
}
|
||||
}
|
||||
|
||||
// These function is just doing copy from the states to the output
|
||||
template <typename S, int T>
|
||||
__global__ void get_hash_results(S* states, size_t number_of_states, S* out)
|
||||
__global__ void
|
||||
squeeze_states_kernel(const S* states, unsigned int number_of_states, unsigned int rate, unsigned int offset, S* out)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
|
||||
out[idx] = states[idx * T + 1];
|
||||
for (int i = 0; i < rate; i++) {
|
||||
out[idx * rate + i] = states[idx * T + offset + i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void copy_recursive(S* state, size_t number_of_states, S* out)
|
||||
cudaError_t poseidon_permutation_kernel(
|
||||
const S* input,
|
||||
S* out,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const PoseidonConstants<S>& constants,
|
||||
cudaStream_t& stream)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
S* states;
|
||||
CHK_IF_RETURN(cudaMallocAsync(&states, number_of_states * T * sizeof(S), stream));
|
||||
|
||||
state[(idx / (T - 1) * T) + (idx % (T - 1)) + 1] = out[idx];
|
||||
prepare_poseidon_states<S, T>
|
||||
<<<PKC::number_of_full_blocks(T, number_of_states), PKC::number_of_threads(T), 0, stream>>>(
|
||||
input, states, number_of_states, constants.domain_tag);
|
||||
|
||||
size_t rc_offset = 0;
|
||||
full_rounds<S, T><<<
|
||||
PKC::number_of_full_blocks(T, number_of_states), PKC::number_of_threads(T),
|
||||
sizeof(S) * PKC::hashes_per_block(T) * T, stream>>>(
|
||||
states, number_of_states, rc_offset, FIRST_FULL_ROUNDS, constants);
|
||||
rc_offset += T * (constants.full_rounds_half + 1);
|
||||
|
||||
partial_rounds<S, T><<<PKC::number_of_singlehash_blocks(number_of_states), PKC::singlehash_block_size, 0, stream>>>(
|
||||
states, number_of_states, rc_offset, constants);
|
||||
rc_offset += constants.partial_rounds;
|
||||
|
||||
full_rounds<S, T><<<
|
||||
PKC::number_of_full_blocks(T, number_of_states), PKC::number_of_threads(T),
|
||||
sizeof(S) * PKC::hashes_per_block(T) * T, stream>>>(
|
||||
states, number_of_states, rc_offset, SECOND_FULL_ROUNDS, constants);
|
||||
|
||||
squeeze_states_kernel<S, T>
|
||||
<<<PKC::number_of_singlehash_blocks(number_of_states), PKC::singlehash_block_size, 0, stream>>>(
|
||||
states, number_of_states, output_len, 1, out);
|
||||
|
||||
CHK_IF_RETURN(cudaFreeAsync(states, stream));
|
||||
return CHK_LAST();
|
||||
}
|
||||
} // namespace poseidon
|
||||
} // namespace poseidon
|
||||
|
||||
#endif
|
||||
@@ -8,132 +8,87 @@
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "utils/utils.h"
|
||||
|
||||
#include "poseidon/kernels.cuh"
|
||||
#include "poseidon/constants.cuh"
|
||||
#include "hash/hash.cuh"
|
||||
using namespace hash;
|
||||
|
||||
/**
|
||||
* @namespace poseidon
|
||||
* Implementation of the [Poseidon hash function](https://eprint.iacr.org/2019/458.pdf)
|
||||
* Specifically, the optimized [Filecoin version](https://spec.filecoin.io/algorithms/crypto/poseidon/)
|
||||
*/
|
||||
namespace poseidon {
|
||||
#define FIRST_FULL_ROUNDS true
|
||||
#define SECOND_FULL_ROUNDS false
|
||||
|
||||
/**
|
||||
* For most of the Poseidon configurations this is the case
|
||||
* TODO: Add support for different full rounds numbers
|
||||
*/
|
||||
const int FULL_ROUNDS_DEFAULT = 4;
|
||||
|
||||
/**
|
||||
* @struct PoseidonConstants
|
||||
* This constants are enough to define a Poseidon instantce
|
||||
* @param round_constants A pointer to round constants allocated on the device
|
||||
* @param mds_matrix A pointer to an mds matrix allocated on the device
|
||||
* @param non_sparse_matrix A pointer to non sparse matrix allocated on the device
|
||||
* @param sparse_matrices A pointer to sparse matrices allocated on the device
|
||||
*/
|
||||
template <typename S>
|
||||
struct PoseidonConstants {
|
||||
int arity;
|
||||
int partial_rounds;
|
||||
int full_rounds_half;
|
||||
S* round_constants = nullptr;
|
||||
S* mds_matrix = nullptr;
|
||||
S* non_sparse_matrix = nullptr;
|
||||
S* sparse_matrices = nullptr;
|
||||
S domain_tag;
|
||||
};
|
||||
|
||||
/**
|
||||
* @class PoseidonKernelsConfiguration
|
||||
* Describes the logic of deriving CUDA kernels parameters
|
||||
* such as the number of threads and the number of blocks
|
||||
*/
|
||||
template <int T>
|
||||
class PoseidonKernelsConfiguration
|
||||
class Poseidon : public Hasher<S, S>
|
||||
{
|
||||
public:
|
||||
// The logic behind this is that 1 thread only works on 1 element
|
||||
// We have {T} elements in each state, and {number_of_states} states total
|
||||
static const int number_of_threads = 256 / T * T;
|
||||
const std::size_t device_id;
|
||||
PoseidonConstants<S> constants;
|
||||
|
||||
// The partial rounds operates on the whole state, so we define
|
||||
// the parallelism params for processing a single hash preimage per thread
|
||||
static const int singlehash_block_size = 128;
|
||||
|
||||
static const int hashes_per_block = number_of_threads / T;
|
||||
|
||||
static int number_of_full_blocks(size_t number_of_states)
|
||||
cudaError_t run_hash_many_kernel(
|
||||
const S* input,
|
||||
S* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const device_context::DeviceContext& ctx) const override
|
||||
{
|
||||
int total_number_of_threads = number_of_states * T;
|
||||
return total_number_of_threads / number_of_threads +
|
||||
static_cast<bool>(total_number_of_threads % number_of_threads);
|
||||
cudaError_t permutation_error;
|
||||
#define P_PERM_T(width) \
|
||||
case width: \
|
||||
permutation_error = poseidon_permutation_kernel<S, width>( \
|
||||
input, output, number_of_states, input_len, output_len, this->constants, ctx.stream); \
|
||||
break;
|
||||
|
||||
switch (this->width) {
|
||||
P_PERM_T(3)
|
||||
P_PERM_T(5)
|
||||
P_PERM_T(9)
|
||||
P_PERM_T(12)
|
||||
default:
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "PoseidonPermutation: #width must be one of [3, 5, 9, 12]");
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(permutation_error);
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
static int number_of_singlehash_blocks(size_t number_of_states)
|
||||
Poseidon(
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const S* round_constants,
|
||||
const S* mds_matrix,
|
||||
const S* non_sparse_matrix,
|
||||
const S* sparse_matrices,
|
||||
const S domain_tag,
|
||||
device_context::DeviceContext& ctx)
|
||||
: Hasher<S, S>(arity + 1, arity, arity, 1), device_id(ctx.device_id)
|
||||
{
|
||||
return number_of_states / singlehash_block_size + static_cast<bool>(number_of_states % singlehash_block_size);
|
||||
PoseidonConstants<S> constants;
|
||||
CHK_STICKY(create_optimized_poseidon_constants(
|
||||
arity, alpha, partial_rounds, full_rounds_half, round_constants, mds_matrix, non_sparse_matrix, sparse_matrices,
|
||||
domain_tag, &constants, ctx));
|
||||
this->constants = constants;
|
||||
}
|
||||
|
||||
Poseidon(int arity, device_context::DeviceContext& ctx)
|
||||
: Hasher<S, S>(arity + 1, arity, arity, 1), device_id(ctx.device_id)
|
||||
{
|
||||
PoseidonConstants<S> constants{};
|
||||
CHK_STICKY(init_optimized_poseidon_constants(arity, ctx, &constants));
|
||||
this->constants = constants;
|
||||
}
|
||||
|
||||
~Poseidon()
|
||||
{
|
||||
auto ctx = device_context::get_default_device_context();
|
||||
ctx.device_id = this->device_id;
|
||||
CHK_STICKY(release_optimized_poseidon_constants<S>(&this->constants, ctx));
|
||||
}
|
||||
};
|
||||
|
||||
template <int T>
|
||||
using PKC = PoseidonKernelsConfiguration<T>;
|
||||
|
||||
/**
|
||||
* @struct PoseidonConfig
|
||||
* Struct that encodes various Poseidon parameters.
|
||||
*/
|
||||
struct PoseidonConfig {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool are_outputs_on_device; /**< If true, output is preserved on device, otherwise on host. Default value: false. */
|
||||
bool input_is_a_state; /**< If true, input is considered to be a states vector, holding the preimages
|
||||
* in aligned or not aligned format. Memory under the input pointer will be used for states
|
||||
* If false, fresh states memory will be allocated and input will be copied into it */
|
||||
bool aligned; /**< If true - input should be already aligned for poseidon permutation.
|
||||
* Aligned format: [0, A, B, 0, C, D, ...] (as you might get by using loop_state)
|
||||
* not aligned format: [A, B, 0, C, D, 0, ...] (as you might get from cudaMemcpy2D) */
|
||||
bool loop_state; /**< If true, hash results will also be copied in the input pointer in aligned format */
|
||||
bool is_async; /**< Whether to run the Poseidon asynchronously. If set to `true`, the poseidon_hash function will be
|
||||
* non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the poseidon_hash
|
||||
* function will block the current CPU thread. */
|
||||
};
|
||||
|
||||
static PoseidonConfig default_poseidon_config(
|
||||
int t, const device_context::DeviceContext& ctx = device_context::get_default_device_context())
|
||||
{
|
||||
PoseidonConfig config = {
|
||||
ctx, // ctx
|
||||
false, // are_inputes_on_device
|
||||
false, // are_outputs_on_device
|
||||
false, // input_is_a_state
|
||||
false, // aligned
|
||||
false, // loop_state
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads pre-calculated optimized constants, moves them to the device
|
||||
*/
|
||||
template <typename S>
|
||||
cudaError_t
|
||||
init_optimized_poseidon_constants(int arity, device_context::DeviceContext& ctx, PoseidonConstants<S>* constants);
|
||||
|
||||
/**
|
||||
* Compute the poseidon hash over a sequence of preimages.
|
||||
* Takes {number_of_states * (T-1)} elements of input and computes {number_of_states} hash images
|
||||
* @param T size of the poseidon state, should be equal to {arity + 1}
|
||||
* @param input a pointer to the input data. May be allocated on device or on host, regulated
|
||||
* by the config. May point to a string of preimages or a string of states filled with preimages.
|
||||
* @param output a pointer to the output data. May be allocated on device or on host, regulated
|
||||
* by the config. Must be at least of size [number_of_states](@ref number_of_states)
|
||||
* @param number_of_states number of input blocks of size T-1 (arity)
|
||||
*/
|
||||
template <typename S, int T>
|
||||
cudaError_t poseidon_hash(
|
||||
S* input, S* output, size_t number_of_states, const PoseidonConstants<S>& constants, const PoseidonConfig& config);
|
||||
} // namespace poseidon
|
||||
|
||||
#endif
|
||||
@@ -1,74 +0,0 @@
|
||||
#pragma once
|
||||
#ifndef MERKLE_H
|
||||
#define MERKLE_H
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "utils/utils.h"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
|
||||
using namespace poseidon;
|
||||
|
||||
/**
|
||||
* @namespace merkle
|
||||
* Implementation of the [Poseidon](@ref poseidon) [Merkle tree](https://en.wikipedia.org/wiki/Merkle_tree) builder,
|
||||
* parallelized for the use on GPU
|
||||
*/
|
||||
namespace merkle {
|
||||
static constexpr size_t GIGA = 1024 * 1024 * 1024;
|
||||
|
||||
/// Bytes per stream
|
||||
static constexpr size_t STREAM_CHUNK_SIZE = 1024 * 1024 * 1024;
|
||||
|
||||
/**
|
||||
* @struct TreeBuilderConfig
|
||||
* Struct that encodes various Tree builder parameters.
|
||||
*/
|
||||
struct TreeBuilderConfig {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
int keep_rows; /**< How many rows of the Merkle tree rows should be written to output. '0' means all of them */
|
||||
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool is_async; /**< Whether to run the tree builder asynchronously. If set to `true`, the build_merkle_tree
|
||||
* function will be non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the
|
||||
* function will block the current CPU thread. */
|
||||
};
|
||||
|
||||
static TreeBuilderConfig
|
||||
default_merkle_config(const device_context::DeviceContext& ctx = device_context::get_default_device_context())
|
||||
{
|
||||
TreeBuilderConfig config = {
|
||||
ctx, // ctx
|
||||
0, // keep_rows
|
||||
false, // are_inputes_on_device
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the Poseidon Merkle tree
|
||||
*
|
||||
* @param leaves a pointer to the leaves layer. May be allocated on device or on host, regulated by the config
|
||||
* Expected to have arity ^ (height - 1) elements
|
||||
* @param digests a pointer to the digests storage. May only be allocated on the host
|
||||
* Expected to have `sum(arity ^ (i)) for i in [0..height-1]`
|
||||
* @param height the height of the merkle tree
|
||||
* # Algorithm
|
||||
* The function will split large tree into many subtrees of size that will fit `STREAM_CHUNK_SIZE`.
|
||||
* Each subtree is build in it's own stream (there is a maximum number of streams)
|
||||
* After all subtrees are constructed - the function will combine the resulting sub-digests into the final top-tree
|
||||
*/
|
||||
template <typename S, int T>
|
||||
cudaError_t build_merkle_tree(
|
||||
const S* leaves,
|
||||
S* digests,
|
||||
uint32_t height,
|
||||
const PoseidonConstants<S>& poseidon,
|
||||
const TreeBuilderConfig& config);
|
||||
} // namespace merkle
|
||||
|
||||
#endif
|
||||
65
icicle/include/poseidon2/constants.cuh
Normal file
65
icicle/include/poseidon2/constants.cuh
Normal file
@@ -0,0 +1,65 @@
|
||||
#pragma once
|
||||
#ifndef POSEIDON2_CONSTANTS_H
|
||||
#define POSEIDON2_CONSTANTS_H
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
|
||||
namespace poseidon2 {
|
||||
/**
|
||||
* For most of the Poseidon2 configurations this is the case
|
||||
*/
|
||||
const int EXTERNAL_ROUNDS_DEFAULT = 8;
|
||||
|
||||
enum DiffusionStrategy {
|
||||
DEFAULT_DIFFUSION,
|
||||
MONTGOMERY,
|
||||
};
|
||||
|
||||
enum MdsType { DEFAULT_MDS, PLONKY };
|
||||
|
||||
/**
|
||||
* @struct Poseidon2Constants
|
||||
* This constants are enough to define a Poseidon2 instantce
|
||||
* @param round_constants A pointer to round constants allocated on the device
|
||||
* @param mds_matrix A pointer to an mds matrix allocated on the device
|
||||
* @param non_sparse_matrix A pointer to non sparse matrix allocated on the device
|
||||
* @param sparse_matrices A pointer to sparse matrices allocated on the device
|
||||
*/
|
||||
template <typename S>
|
||||
struct Poseidon2Constants {
|
||||
int width;
|
||||
int alpha;
|
||||
int internal_rounds;
|
||||
int external_rounds;
|
||||
S* round_constants = nullptr;
|
||||
S* internal_matrix_diag = nullptr;
|
||||
MdsType mds_type;
|
||||
DiffusionStrategy diffusion;
|
||||
};
|
||||
|
||||
template <typename S>
|
||||
cudaError_t create_poseidon2_constants(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
const S* round_constants,
|
||||
const S* internal_matrix_diag,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<S>* poseidon_constants);
|
||||
|
||||
template <typename S>
|
||||
cudaError_t init_poseidon2_constants(
|
||||
int width,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<S>* poseidon2_constants);
|
||||
|
||||
template <typename S>
|
||||
cudaError_t release_poseidon2_constants(Poseidon2Constants<S>* constants, device_context::DeviceContext& ctx);
|
||||
} // namespace poseidon2
|
||||
|
||||
#endif
|
||||
1077
icicle/include/poseidon2/constants/m31_poseidon2.h
Normal file
1077
icicle/include/poseidon2/constants/m31_poseidon2.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -3,13 +3,14 @@ from sage.rings.polynomial.polynomial_gf2x import GF2X_BuildIrred_list
|
||||
from math import *
|
||||
import itertools
|
||||
|
||||
CURVE_NAME = "bn254"
|
||||
CURVE_NAME = "m31"
|
||||
|
||||
###########################################################################
|
||||
# p = 18446744069414584321 # GoldiLocks
|
||||
# p = 2013265921 # BabyBear
|
||||
p = 2**31 - 1 # M31
|
||||
# p = 52435875175126190479447740508185965837690552500527637822603658699938581184513 # BLS12-381
|
||||
p = 21888242871839275222246405745257275088548364400416034343698204186575808495617 # BN254/BN256
|
||||
# p = 21888242871839275222246405745257275088548364400416034343698204186575808495617 # BN254/BN256
|
||||
# p = 28948022309329048855892746252171976963363056481941560715954676764349967630337 # Pasta (Pallas)
|
||||
# p = 28948022309329048855892746252171976963363056481941647379679742748393362948097 # Pasta (Vesta)
|
||||
|
||||
@@ -617,6 +618,8 @@ print(f"namespace poseidon2_constants_{CURVE_NAME} {{")
|
||||
for t in TS:
|
||||
NUM_CELLS = t
|
||||
R_F_FIXED, R_P_FIXED, _, _ = poseidon_calc_final_numbers_fixed(p, t, alpha, 128, True)
|
||||
if t == 16:
|
||||
R_P_FIXED = 14
|
||||
|
||||
INIT_SEQUENCE = []
|
||||
|
||||
|
||||
@@ -1,7 +1,28 @@
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#pragma once
|
||||
#ifndef POSEIDON2_KERNELS_H
|
||||
#define POSEIDON2_KERNELS_H
|
||||
|
||||
#include "utils/utils.h"
|
||||
#include "hash/hash.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "poseidon2/constants.cuh"
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
|
||||
using matrix::Matrix;
|
||||
|
||||
namespace poseidon2 {
|
||||
static DEVICE_INLINE unsigned int d_next_pow_of_two(unsigned int v)
|
||||
{
|
||||
v--;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
v |= v >> 8;
|
||||
v |= v >> 16;
|
||||
v++;
|
||||
return v;
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
DEVICE_INLINE S sbox_el(S element, const int alpha)
|
||||
{
|
||||
@@ -19,7 +40,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
DEVICE_INLINE S sbox(S state[T], const int alpha)
|
||||
DEVICE_INLINE void sbox(S state[T], const int alpha)
|
||||
{
|
||||
for (int i = 0; i < T; i++) {
|
||||
state[i] = sbox_el(state[i], alpha);
|
||||
@@ -27,7 +48,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
DEVICE_INLINE S add_rc(S state[T], size_t rc_offset, const S* rc)
|
||||
DEVICE_INLINE void add_rc(S state[T], size_t rc_offset, const S* rc)
|
||||
{
|
||||
for (int i = 0; i < T; i++) {
|
||||
state[i] = state[i] + rc[rc_offset + i];
|
||||
@@ -35,7 +56,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
__device__ S mds_light_4x4(S s[4])
|
||||
__device__ void mds_light_4x4(S s[4])
|
||||
{
|
||||
S t0 = s[0] + s[1];
|
||||
S t1 = s[2] + s[3];
|
||||
@@ -56,7 +77,7 @@ namespace poseidon2 {
|
||||
// [ 3 1 1 2 ].
|
||||
// https://github.com/Plonky3/Plonky3/blob/main/poseidon2/src/matrix.rs#L36
|
||||
template <typename S>
|
||||
__device__ S mds_light_plonky_4x4(S s[4])
|
||||
__device__ void mds_light_plonky_4x4(S s[4])
|
||||
{
|
||||
S t01 = s[0] + s[1];
|
||||
S t23 = s[2] + s[3];
|
||||
@@ -70,7 +91,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__device__ S mds_light(S state[T], MdsType mds)
|
||||
__device__ void mds_light(S state[T], MdsType mds)
|
||||
{
|
||||
S sum;
|
||||
switch (T) {
|
||||
@@ -123,7 +144,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__device__ S internal_round(S state[T], size_t rc_offset, const Poseidon2Constants<S>& constants)
|
||||
__device__ void internal_round(S state[T], size_t rc_offset, const Poseidon2Constants<S>& constants)
|
||||
{
|
||||
S element = state[0];
|
||||
element = element + constants.round_constants[rc_offset];
|
||||
@@ -176,17 +197,8 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void poseidon2_permutation_kernel(
|
||||
const S* states, S* states_out, size_t number_of_states, const Poseidon2Constants<S> constants)
|
||||
__device__ void permute_state(S state[T], const Poseidon2Constants<S>& constants)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
|
||||
S state[T];
|
||||
UNROLL
|
||||
for (int i = 0; i < T; i++) {
|
||||
state[i] = states[idx * T + i];
|
||||
}
|
||||
unsigned int rn;
|
||||
|
||||
mds_light<S, T>(state, constants.mds_type);
|
||||
@@ -213,6 +225,22 @@ namespace poseidon2 {
|
||||
mds_light<S, T>(state, constants.mds_type);
|
||||
rc_offset += T;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void permutation_kernel(
|
||||
const S* states, S* states_out, unsigned int number_of_states, const Poseidon2Constants<S> constants)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
|
||||
S state[T];
|
||||
UNROLL
|
||||
for (int i = 0; i < T; i++) {
|
||||
state[i] = states[idx * T + i];
|
||||
}
|
||||
|
||||
permute_state<S, T>(state, constants);
|
||||
|
||||
UNROLL
|
||||
for (int i = 0; i < T; i++) {
|
||||
@@ -220,13 +248,120 @@ namespace poseidon2 {
|
||||
}
|
||||
}
|
||||
|
||||
// These function is just doing copy from the states to the output
|
||||
template <typename S, int T>
|
||||
__global__ void get_hash_results(const S* states, size_t number_of_states, int index, S* out)
|
||||
__global__ void hash_many_kernel(
|
||||
const S* input,
|
||||
S* output,
|
||||
uint64_t number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const Poseidon2Constants<S> constants)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
uint64_t idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
|
||||
out[idx] = states[idx * T + index];
|
||||
S state[T] = {0};
|
||||
UNROLL
|
||||
for (int i = 0; i < input_len; i++) {
|
||||
state[i] = input[idx * input_len + i];
|
||||
}
|
||||
|
||||
permute_state<S, T>(state, constants);
|
||||
|
||||
UNROLL
|
||||
for (int i = 0; i < output_len; i++) {
|
||||
output[idx * output_len + i] = state[i];
|
||||
}
|
||||
}
|
||||
} // namespace poseidon2
|
||||
|
||||
template <typename S, int T>
|
||||
__device__ void absorb_2d_state(
|
||||
const Matrix<S>* inputs,
|
||||
S state[T],
|
||||
unsigned int number_of_inputs,
|
||||
unsigned int rate,
|
||||
uint64_t row_idx,
|
||||
const Poseidon2Constants<S>& constants)
|
||||
{
|
||||
unsigned int index = 0;
|
||||
for (int i = 0; i < number_of_inputs; i++) {
|
||||
const Matrix<S>* input = inputs + i;
|
||||
for (int j = 0; j < input->width; j++) {
|
||||
state[index] = input->values[row_idx * input->width + j];
|
||||
index++;
|
||||
if (index == rate) {
|
||||
permute_state<S, T>(state, constants);
|
||||
index = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (index) { permute_state<S, T>(state, constants); }
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void hash_2d_kernel(
|
||||
const Matrix<S>* inputs,
|
||||
S* output,
|
||||
unsigned int number_of_inputs,
|
||||
unsigned int rate,
|
||||
unsigned int output_len,
|
||||
const Poseidon2Constants<S> constants)
|
||||
{
|
||||
uint64_t idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= inputs[0].height) { return; }
|
||||
|
||||
S state[T] = {0};
|
||||
|
||||
absorb_2d_state<S, T>(inputs, state, number_of_inputs, rate, idx, constants);
|
||||
|
||||
UNROLL
|
||||
for (int i = 0; i < output_len; i++) {
|
||||
output[idx * output_len + i] = state[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void compress_and_inject_kernel(
|
||||
const Matrix<S>* matrices_to_inject,
|
||||
unsigned int number_of_inputs,
|
||||
const S* prev_layer,
|
||||
S* next_layer,
|
||||
unsigned int rate,
|
||||
unsigned int digest_elements,
|
||||
const Poseidon2Constants<S> constants)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
uint64_t number_of_rows = d_next_pow_of_two(matrices_to_inject[0].height);
|
||||
if (idx >= number_of_rows) { return; }
|
||||
|
||||
size_t next_layer_len = matrices_to_inject[0].height;
|
||||
S state_to_compress[T] = {S::zero()};
|
||||
|
||||
for (int i = 0; i < digest_elements * 2; i++) {
|
||||
state_to_compress[i] = prev_layer[idx * 2 * digest_elements + i];
|
||||
}
|
||||
permute_state<S, T>(state_to_compress, constants);
|
||||
|
||||
S injected_state[T] = {S::zero()};
|
||||
if (idx < next_layer_len) {
|
||||
absorb_2d_state<S, T>(matrices_to_inject, injected_state, number_of_inputs, rate, idx, constants);
|
||||
|
||||
for (int i = 0; i < digest_elements; i++) {
|
||||
injected_state[digest_elements + i] = injected_state[i];
|
||||
injected_state[i] = state_to_compress[i];
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < digest_elements; i++) {
|
||||
injected_state[i] = state_to_compress[i];
|
||||
}
|
||||
}
|
||||
permute_state<S, T>(injected_state, constants);
|
||||
|
||||
for (int i = 0; i < digest_elements; i++) {
|
||||
next_layer[idx * digest_elements + i] = injected_state[i];
|
||||
}
|
||||
}
|
||||
} // namespace poseidon2
|
||||
|
||||
#endif
|
||||
@@ -8,124 +8,172 @@
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "utils/utils.h"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
|
||||
#include "poseidon2/constants.cuh"
|
||||
#include "poseidon2/kernels.cuh"
|
||||
|
||||
using matrix::Matrix;
|
||||
|
||||
/**
|
||||
* @namespace poseidon2
|
||||
* Implementation of the [Poseidon2 hash function](https://eprint.iacr.org/2019/458.pdf)
|
||||
* Specifically, the optimized [Filecoin version](https://spec.filecoin.io/algorithms/crypto/poseidon/)
|
||||
*/
|
||||
namespace poseidon2 {
|
||||
/**
|
||||
* For most of the Poseidon2 configurations this is the case
|
||||
*/
|
||||
const int EXTERNAL_ROUNDS_DEFAULT = 8;
|
||||
|
||||
enum DiffusionStrategy {
|
||||
DEFAULT_DIFFUSION,
|
||||
MONTGOMERY,
|
||||
};
|
||||
|
||||
enum MdsType { DEFAULT_MDS, PLONKY };
|
||||
|
||||
enum PoseidonMode {
|
||||
COMPRESSION,
|
||||
PERMUTATION,
|
||||
};
|
||||
|
||||
/**
|
||||
* @struct Poseidon2Constants
|
||||
* This constants are enough to define a Poseidon2 instantce
|
||||
* @param round_constants A pointer to round constants allocated on the device
|
||||
* @param mds_matrix A pointer to an mds matrix allocated on the device
|
||||
* @param non_sparse_matrix A pointer to non sparse matrix allocated on the device
|
||||
* @param sparse_matrices A pointer to sparse matrices allocated on the device
|
||||
*/
|
||||
template <typename S>
|
||||
struct Poseidon2Constants {
|
||||
int width;
|
||||
int alpha;
|
||||
int internal_rounds;
|
||||
int external_rounds;
|
||||
S* round_constants = nullptr;
|
||||
S* internal_matrix_diag = nullptr;
|
||||
MdsType mds_type;
|
||||
DiffusionStrategy diffusion;
|
||||
};
|
||||
|
||||
/**
|
||||
* @struct Poseidon2Config
|
||||
* Struct that encodes various Poseidon2 parameters.
|
||||
*/
|
||||
struct Poseidon2Config {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
bool are_states_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool are_outputs_on_device; /**< If true, output is preserved on device, otherwise on host. Default value: false. */
|
||||
PoseidonMode mode;
|
||||
int output_index;
|
||||
bool
|
||||
is_async; /**< Whether to run the Poseidon2 asynchronously. If set to `true`, the poseidon_hash function will be
|
||||
* non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the poseidon_hash
|
||||
* function will block the current CPU thread. */
|
||||
};
|
||||
|
||||
static Poseidon2Config default_poseidon2_config(
|
||||
int t, const device_context::DeviceContext& ctx = device_context::get_default_device_context())
|
||||
class Poseidon2 : public hash::Hasher<S, S>
|
||||
{
|
||||
Poseidon2Config config = {
|
||||
ctx, // ctx
|
||||
false, // are_states_on_device
|
||||
false, // are_outputs_on_device
|
||||
PoseidonMode::COMPRESSION,
|
||||
1, // output_index
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
static const int POSEIDON_BLOCK_SIZE = 32;
|
||||
|
||||
template <typename S>
|
||||
cudaError_t create_poseidon2_constants(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
const S* round_constants,
|
||||
const S* internal_matrix_diag,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<S>* poseidon_constants);
|
||||
static inline int poseidon_number_of_blocks(size_t number_of_states)
|
||||
{
|
||||
return number_of_states / POSEIDON_BLOCK_SIZE + static_cast<bool>(number_of_states % POSEIDON_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads pre-calculated optimized constants, moves them to the device
|
||||
*/
|
||||
template <typename S>
|
||||
cudaError_t init_poseidon2_constants(
|
||||
int width,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<S>* constants);
|
||||
public:
|
||||
const std::size_t device_id;
|
||||
Poseidon2Constants<S> constants;
|
||||
|
||||
template <typename S>
|
||||
cudaError_t release_poseidon2_constants(Poseidon2Constants<S>* constants, device_context::DeviceContext& ctx);
|
||||
cudaError_t hash_2d(
|
||||
const Matrix<S>* inputs,
|
||||
S* output,
|
||||
unsigned int number_of_inputs,
|
||||
unsigned int output_len,
|
||||
uint64_t number_of_rows,
|
||||
const device_context::DeviceContext& ctx) const override
|
||||
{
|
||||
#define P2_HASH_2D_T(width) \
|
||||
case width: \
|
||||
hash_2d_kernel<S, width><<<poseidon_number_of_blocks(number_of_rows), POSEIDON_BLOCK_SIZE, 0, ctx.stream>>>( \
|
||||
inputs, output, number_of_inputs, this->rate, output_len, this->constants); \
|
||||
break;
|
||||
|
||||
switch (this->width) {
|
||||
P2_HASH_2D_T(2)
|
||||
P2_HASH_2D_T(3)
|
||||
P2_HASH_2D_T(4)
|
||||
P2_HASH_2D_T(8)
|
||||
P2_HASH_2D_T(12)
|
||||
P2_HASH_2D_T(16)
|
||||
P2_HASH_2D_T(20)
|
||||
P2_HASH_2D_T(24)
|
||||
default:
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument, "PoseidonAbsorb2d: #width must be one of [2, 3, 4, 8, 12, 16, 20, 24]");
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(cudaPeekAtLastError());
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
cudaError_t run_hash_many_kernel(
|
||||
const S* input,
|
||||
S* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const device_context::DeviceContext& ctx) const override
|
||||
{
|
||||
#define P2_HASH_MANY_T(width) \
|
||||
case width: \
|
||||
hash_many_kernel<S, width><<<poseidon_number_of_blocks(number_of_states), POSEIDON_BLOCK_SIZE, 0, ctx.stream>>>( \
|
||||
input, output, number_of_states, input_len, output_len, this->constants); \
|
||||
break;
|
||||
|
||||
switch (this->width) {
|
||||
P2_HASH_MANY_T(2)
|
||||
P2_HASH_MANY_T(3)
|
||||
P2_HASH_MANY_T(4)
|
||||
P2_HASH_MANY_T(8)
|
||||
P2_HASH_MANY_T(12)
|
||||
P2_HASH_MANY_T(16)
|
||||
P2_HASH_MANY_T(20)
|
||||
P2_HASH_MANY_T(24)
|
||||
default:
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument, "PoseidonPermutation: #width must be one of [2, 3, 4, 8, 12, 16, 20, 24]");
|
||||
}
|
||||
CHK_IF_RETURN(cudaPeekAtLastError());
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
cudaError_t compress_and_inject(
|
||||
const Matrix<S>* matrices_to_inject,
|
||||
unsigned int number_of_inputs,
|
||||
uint64_t number_of_rows,
|
||||
const S* prev_layer,
|
||||
S* next_layer,
|
||||
unsigned int digest_elements,
|
||||
const device_context::DeviceContext& ctx) const override
|
||||
{
|
||||
#define P2_COMPRESS_AND_INJECT_T(width) \
|
||||
case width: \
|
||||
compress_and_inject_kernel<S, width> \
|
||||
<<<poseidon_number_of_blocks(number_of_rows), POSEIDON_BLOCK_SIZE, 0, ctx.stream>>>( \
|
||||
matrices_to_inject, number_of_inputs, prev_layer, next_layer, this->rate, digest_elements, this->constants); \
|
||||
break;
|
||||
|
||||
switch (this->width) {
|
||||
P2_COMPRESS_AND_INJECT_T(2)
|
||||
P2_COMPRESS_AND_INJECT_T(3)
|
||||
P2_COMPRESS_AND_INJECT_T(4)
|
||||
P2_COMPRESS_AND_INJECT_T(8)
|
||||
P2_COMPRESS_AND_INJECT_T(12)
|
||||
P2_COMPRESS_AND_INJECT_T(16)
|
||||
P2_COMPRESS_AND_INJECT_T(20)
|
||||
P2_COMPRESS_AND_INJECT_T(24)
|
||||
default:
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument, "PoseidonPermutation: #width must be one of [2, 3, 4, 8, 12, 16, 20, 24]");
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(cudaPeekAtLastError());
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
Poseidon2(
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
unsigned int alpha,
|
||||
unsigned int internal_rounds,
|
||||
unsigned int external_rounds,
|
||||
const S* round_constants,
|
||||
const S* internal_matrix_diag,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx)
|
||||
: hash::Hasher<S, S>(width, width, rate, 0), device_id(ctx.device_id)
|
||||
{
|
||||
Poseidon2Constants<S> constants;
|
||||
CHK_STICKY(create_poseidon2_constants(
|
||||
width, alpha, internal_rounds, external_rounds, round_constants, internal_matrix_diag, mds_type, diffusion, ctx,
|
||||
&constants));
|
||||
this->constants = constants;
|
||||
}
|
||||
|
||||
Poseidon2(
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx)
|
||||
: hash::Hasher<S, S>(width, width, rate, 0), device_id(ctx.device_id)
|
||||
{
|
||||
Poseidon2Constants<S> constants;
|
||||
CHK_STICKY(init_poseidon2_constants(width, mds_type, diffusion, ctx, &constants));
|
||||
this->constants = constants;
|
||||
}
|
||||
|
||||
~Poseidon2()
|
||||
{
|
||||
auto ctx = device_context::get_default_device_context();
|
||||
ctx.device_id = this->device_id;
|
||||
CHK_STICKY(release_poseidon2_constants<S>(&this->constants, ctx));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Compute the poseidon hash over a sequence of preimages.
|
||||
* Takes {number_of_states * (T-1)} elements of input and computes {number_of_states} hash images
|
||||
* @param T size of the poseidon state, should be equal to {arity + 1}
|
||||
* @param states a pointer to the input data. May be allocated on device or on host, regulated
|
||||
* by the config. May point to a string of preimages or a string of states filled with preimages.
|
||||
* @param output a pointer to the output data. May be allocated on device or on host, regulated
|
||||
* by the config. Must be at least of size [number_of_states](@ref number_of_states)
|
||||
* @param number_of_states number of input blocks of size T-1 (arity)
|
||||
*/
|
||||
template <typename S, int T>
|
||||
cudaError_t poseidon2_hash(
|
||||
const S* states,
|
||||
S* output,
|
||||
size_t number_of_states,
|
||||
const Poseidon2Constants<S>& constants,
|
||||
const Poseidon2Config& config);
|
||||
} // namespace poseidon2
|
||||
|
||||
#endif
|
||||
@@ -5,4 +5,15 @@
|
||||
#define CONCAT_DIRECT(a, b) a##_##b
|
||||
#define CONCAT_EXPAND(a, b) CONCAT_DIRECT(a, b) // expand a,b before concatenation
|
||||
|
||||
static unsigned int next_pow_of_two(unsigned int v) {
|
||||
v--;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
v |= v >> 8;
|
||||
v |= v >> 16;
|
||||
v++;
|
||||
return v;
|
||||
}
|
||||
|
||||
#endif // ICICLE_UTILS_H
|
||||
@@ -105,12 +105,12 @@ namespace vec_ops {
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
template <typename E>
|
||||
cudaError_t transpose_batch(
|
||||
cudaError_t transpose_matrix(
|
||||
const E* mat_in,
|
||||
E* mat_out,
|
||||
uint32_t row_size,
|
||||
uint32_t column_size,
|
||||
device_context::DeviceContext& ctx,
|
||||
const device_context::DeviceContext& ctx,
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
|
||||
|
||||
@@ -20,6 +20,11 @@ extern "C" void CONCAT_EXPAND(CURVE, to_affine)(projective_t* point, affine_t* p
|
||||
*point_out = projective_t::to_affine(*point);
|
||||
}
|
||||
|
||||
extern "C" void CONCAT_EXPAND(CURVE, from_affine)(affine_t* point, projective_t* point_out)
|
||||
{
|
||||
*point_out = projective_t::from_affine(*point);
|
||||
}
|
||||
|
||||
extern "C" void CONCAT_EXPAND(CURVE, generate_projective_points)(projective_t* points, int size)
|
||||
{
|
||||
projective_t::rand_host_many(points, size);
|
||||
|
||||
@@ -20,6 +20,11 @@ extern "C" void CONCAT_EXPAND(CURVE, g2_to_affine)(g2_projective_t* point, g2_af
|
||||
*point_out = g2_projective_t::to_affine(*point);
|
||||
}
|
||||
|
||||
extern "C" void CONCAT_EXPAND(CURVE, g2_from_affine)(g2_affine_t* point, g2_projective_t* point_out)
|
||||
{
|
||||
*point_out = g2_projective_t::from_affine(*point);
|
||||
}
|
||||
|
||||
extern "C" void CONCAT_EXPAND(CURVE, g2_generate_projective_points)(g2_projective_t* points, int size)
|
||||
{
|
||||
g2_projective_t::rand_host_many(points, size);
|
||||
|
||||
@@ -2,8 +2,8 @@ if (EXT_FIELD)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DEXT_FIELD")
|
||||
endif ()
|
||||
|
||||
SET(SUPPORTED_FIELDS_WITHOUT_NTT grumpkin)
|
||||
SET(SUPPORTED_FIELDS_WITHOUT_POSEIDON2 bls12_381;bls12_377;grumpkin;bw6_761;stark252)
|
||||
SET(SUPPORTED_FIELDS_WITHOUT_NTT grumpkin;m31)
|
||||
SET(SUPPORTED_FIELDS_WITHOUT_POSEIDON2 bls12_381;bls12_377;grumpkin;bw6_761;stark252;m31)
|
||||
|
||||
set(TARGET icicle_field)
|
||||
|
||||
@@ -11,9 +11,14 @@ set(SRC ${CMAKE_SOURCE_DIR}/src)
|
||||
|
||||
set(FIELD_SOURCE ${SRC}/fields/extern.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/vec_ops/extern.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/merkle-tree/extern.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/merkle-tree/extern_mmcs.cu)
|
||||
|
||||
if(EXT_FIELD)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/fields/extern_extension.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/ntt/extern_extension.cu)
|
||||
if (NOT FIELD IN_LIST SUPPORTED_FIELDS_WITHOUT_NTT)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/ntt/extern_extension.cu)
|
||||
endif()
|
||||
list(APPEND FIELD_SOURCE ${SRC}/vec_ops/extern_extension.cu)
|
||||
endif()
|
||||
|
||||
@@ -25,8 +30,6 @@ set(POLYNOMIAL_SOURCE_FILES
|
||||
# TODO: impl poseidon for small fields. note that it needs to be defined over the extension field!
|
||||
if (DEFINED CURVE)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/poseidon/extern.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/poseidon/poseidon.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/poseidon/tree/merkle.cu)
|
||||
endif()
|
||||
|
||||
if (NOT FIELD IN_LIST SUPPORTED_FIELDS_WITHOUT_POSEIDON2)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
set(TARGET icicle_hash)
|
||||
|
||||
add_library(${TARGET} STATIC keccak/keccak.cu)
|
||||
add_library(${TARGET} STATIC keccak/extern.cu)
|
||||
target_include_directories(${TARGET} PUBLIC ${CMAKE_SOURCE_DIR}/include/)
|
||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "ingo_hash")
|
||||
2
icicle/src/hash/keccak/.gitignore
vendored
Normal file
2
icicle/src/hash/keccak/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
test_keccak
|
||||
test_keccak_tree
|
||||
@@ -1,2 +1,10 @@
|
||||
test_keccak_tree: test_tree.cu keccak.cu ../../merkle-tree/merkle.cu
|
||||
nvcc -DMERKLE_DEBUG -o test_keccak_tree -I../../../include test_tree.cu
|
||||
./test_keccak_tree
|
||||
|
||||
test_keccak: test.cu keccak.cu
|
||||
nvcc -o test_keccak -I. -I../.. test.cu
|
||||
nvcc -o test_keccak -I../../../include test.cu
|
||||
./test_keccak
|
||||
|
||||
clear:
|
||||
rm test_keccak test_keccak_tree
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user